datacite-mapping 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -0
- data/lib/datacite/mapping/empty_filtering_nodes.rb +32 -0
- data/lib/datacite/mapping/identifier.rb +5 -2
- data/lib/datacite/mapping/module_info.rb +1 -1
- data/lib/datacite/mapping/resource.rb +2 -2
- data/spec/unit/datacite/mapping/resource_spec.rb +117 -13
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 94b1cd9d6fa3c9f73d4e01b8932735ed5531de96
|
4
|
+
data.tar.gz: 1f641d5ea6c21711480d806a4fc6e07b07df2697
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8958bbce65cce405ec02251e111902de276955f4f822a60409ef62c0532cc3315869db205f2491b95d8701579a623da711513c1938f637ab9c59ea84646668e9
|
7
|
+
data.tar.gz: 91bfefea1df89cf6067fa6f9a680c885ee49f76ff700849e88ada850016f42626fe3eec484da19efd1e1520ea16480dbea41afcbf5c75ec447d9b36237a8bbfe
|
data/CHANGES.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 0.2.3 (5 October 2016)
|
2
|
+
|
3
|
+
- Allow empty `<identifier/>` tags on read, but not write
|
4
|
+
- Allow but ignore empty `<subject/>` and `<description/>` tags on read
|
5
|
+
|
1
6
|
## 0.2.2 (4 October 2016)
|
2
7
|
|
3
8
|
- Fixed issue where `<geoLocation>` child elements would be written in Datacite 4
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'xml/mapping_extensions'
|
2
|
+
|
3
|
+
module Datacite
|
4
|
+
module Mapping
|
5
|
+
|
6
|
+
module EmptyNodeUtils
|
7
|
+
def not_empty(element)
|
8
|
+
return unless element
|
9
|
+
text = element.text
|
10
|
+
empty = text.nil? || text.strip.empty?
|
11
|
+
warn "Ignoring empty element #{element}" if empty
|
12
|
+
!empty
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# An {XML::Mapping::ArrayNode} that ignores empty tags, including tags
|
17
|
+
# containing only blank text.
|
18
|
+
class EmptyFilteringArrayNode < XML::Mapping::ArrayNode
|
19
|
+
include EmptyNodeUtils
|
20
|
+
def extract_attr_value(xml)
|
21
|
+
elements = default_when_xpath_err { @reader_path.all(xml) }
|
22
|
+
non_empty_elements = elements.select { |e| not_empty(e) }
|
23
|
+
non_empty_elements.map { |e| unmarshal(e) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def unmarshal(element)
|
27
|
+
@unmarshaller.call(element)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
XML::Mapping.add_node_class EmptyFilteringArrayNode
|
31
|
+
end
|
32
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'xml/mapping'
|
2
|
+
require 'datacite/mapping/empty_filtering_nodes'
|
2
3
|
|
3
4
|
module Datacite
|
4
5
|
module Mapping
|
@@ -60,8 +61,10 @@ module Datacite
|
|
60
61
|
# Custom node to warn (but not blow up) if we read an XML `<resource/>` that's
|
61
62
|
# missing its `<identifier/>`.
|
62
63
|
class IdentifierNode < XML::Mapping::ObjectNode
|
64
|
+
include EmptyNodeUtils
|
63
65
|
def xml_to_obj(_obj, xml)
|
64
|
-
super if has_element?(xml)
|
66
|
+
return super if (element = has_element?(xml)) && not_empty(element)
|
67
|
+
warn 'Identifier not found; add a valid Identifier to the Resource before saving'
|
65
68
|
end
|
66
69
|
|
67
70
|
private
|
@@ -69,7 +72,7 @@ module Datacite
|
|
69
72
|
def has_element?(xml) # rubocop:disable Style/PredicateName
|
70
73
|
@path.first(xml)
|
71
74
|
rescue XML::XXPathError
|
72
|
-
|
75
|
+
false
|
73
76
|
end
|
74
77
|
end
|
75
78
|
XML::Mapping.add_node_class IdentifierNode
|
@@ -184,7 +184,7 @@ module Datacite
|
|
184
184
|
|
185
185
|
# @!attribute [rw] subjects
|
186
186
|
# @return [Array<Subject>] subjects, keywords, classification codes, or key phrases describing the resource.
|
187
|
-
|
187
|
+
empty_filtering_array_node :subjects, 'subjects', 'subject', class: Subject, default_value: []
|
188
188
|
|
189
189
|
# @!attribute [rw] fundingReferences
|
190
190
|
# @return [Array<FundingReference>] information about financial support (funding) for the resource being registered.
|
@@ -232,7 +232,7 @@ module Datacite
|
|
232
232
|
|
233
233
|
# @!attribute [rw] descriptions
|
234
234
|
# @return [Array<Description>] all additional information that does not fit in any of the other categories.
|
235
|
-
|
235
|
+
empty_filtering_array_node :descriptions, 'descriptions', 'description', class: Description, default_value: []
|
236
236
|
|
237
237
|
# @!attribute [rw] geo_locations
|
238
238
|
# @return [Array<GeoLocations>] spatial region or named place where the data was gathered or about which the data is focused.
|
@@ -938,7 +938,8 @@ module Datacite
|
|
938
938
|
def normalize(xml_str)
|
939
939
|
r0 = xml_str
|
940
940
|
r1 = r0.gsub(%r{<br\s+/>}, '<br/>') # entity-de-escape <br/> tags
|
941
|
-
r2 = r1.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
|
941
|
+
# r2 = r1.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
|
942
|
+
r2 = r1
|
942
943
|
r3 = r2.gsub(/<resource (xmlns:xsi="[^"]+")\s+(xsi:schemaLocation="[^"]+")>/, "<resource \\2 \\1 xmlns=\"http://datacite.org/schema/kernel-3\">") # fix missing namespace
|
943
944
|
r4 = r3.gsub(%r{(<identifier[^>]+>)\s*([^ ]+)\s*(</identifier>)}, '\\1\\2\\3') # trim identifiers
|
944
945
|
r5 = r4.gsub(%r{<([^>]+tude)>([0-9.-]+?)(0?)0+</\1>}, '<\\1>\\2\\3</\\1>') # strip trailing coordinate zeroes
|
@@ -966,21 +967,12 @@ module Datacite
|
|
966
967
|
# - missing DOI
|
967
968
|
# - empty tags
|
968
969
|
# - nested contributors instead of contributorNames
|
969
|
-
# - empty descriptions
|
970
|
-
# TODO: handle empty descriptions like empty subjects
|
971
970
|
|
972
971
|
r0 = xml_str
|
973
972
|
r1 = r0.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
|
974
|
-
r2 = r1.gsub(%r{<
|
975
|
-
r3 = r2.gsub(%r{<
|
976
|
-
|
977
|
-
r5 = r4.gsub(%r{(<date[^>]*>)(\d{4})-(\d{4})(</date>)}, '\\1\\2/\\3\\4') # fix date ranges
|
978
|
-
r6 = r5.gsub(%r{(<contributor[^>/]+>\s*)<contributor>([^<]+)</contributor>(\s*</contributor>)}, '\\1<contributorName>\\2</contributorName>\\3') # fix broken contributors
|
979
|
-
# if r6.include?('<br')
|
980
|
-
# trace = [r0, r1, r2, r3, r4, r5, r6].map { |r| r.include?('<br') }
|
981
|
-
# puts trace
|
982
|
-
# end
|
983
|
-
r6
|
973
|
+
r2 = r1.gsub(%r{<([A-Za-z]*)[^>]*>\s*</\1>}, '') # remove empty tag pairs
|
974
|
+
r3 = r2.gsub(%r{(<date[^>]*>)(\d{4})-(\d{4})(</date>)}, '\\1\\2/\\3\\4') # fix date ranges
|
975
|
+
r3.gsub(%r{(<contributor[^>/]+>\s*)<contributor>([^<]+)</contributor>(\s*</contributor>)}, '\\1<contributorName>\\2</contributorName>\\3') # fix broken contributors
|
984
976
|
end
|
985
977
|
|
986
978
|
def it_round_trips(file:, mapping: :_default, fix_dash1: false) # rubocop:disable Metrics/AbcSize
|
@@ -1165,6 +1157,118 @@ module Datacite
|
|
1165
1157
|
end
|
1166
1158
|
end
|
1167
1159
|
end
|
1160
|
+
|
1161
|
+
describe '#save_to_xml' do
|
1162
|
+
|
1163
|
+
attr_reader :resource
|
1164
|
+
|
1165
|
+
before(:each) do
|
1166
|
+
@identifier = Identifier.new(value: '10.14749/1407399495')
|
1167
|
+
|
1168
|
+
@creators = [
|
1169
|
+
Creator.new(
|
1170
|
+
name: 'Hedy Lamarr',
|
1171
|
+
identifier: NameIdentifier.new(scheme: 'ISNI', scheme_uri: URI('http://isni.org/'), value: '0000-0001-1690-159X'),
|
1172
|
+
affiliations: ['United Artists', 'Metro-Goldwyn-Mayer']
|
1173
|
+
),
|
1174
|
+
Creator.new(
|
1175
|
+
name: 'Herschlag, Natalie',
|
1176
|
+
identifier: NameIdentifier.new(scheme: 'ISNI', scheme_uri: URI('http://isni.org/'), value: '0000-0001-0907-8419'),
|
1177
|
+
affiliations: ['Gaumont Buena Vista International', '20th Century Fox']
|
1178
|
+
)
|
1179
|
+
]
|
1180
|
+
|
1181
|
+
@titles = [
|
1182
|
+
Title.new(value: 'An Account of a Very Odd Monstrous Calf', language: 'en-emodeng'),
|
1183
|
+
Title.new(type: TitleType::SUBTITLE, value: 'And a Contest between Two Artists about Optick Glasses, &c', language: 'en-emodeng')
|
1184
|
+
]
|
1185
|
+
|
1186
|
+
@publisher = 'California Digital Library'
|
1187
|
+
@publication_year = 2015
|
1188
|
+
|
1189
|
+
@resource = Resource.new(
|
1190
|
+
identifier: identifier,
|
1191
|
+
creators: creators,
|
1192
|
+
titles: titles,
|
1193
|
+
publisher: publisher,
|
1194
|
+
publication_year: publication_year
|
1195
|
+
)
|
1196
|
+
end
|
1197
|
+
|
1198
|
+
it 'sets the DC4 namespace by default' do
|
1199
|
+
xml = resource.save_to_xml
|
1200
|
+
expect(xml).to be_a(REXML::Element)
|
1201
|
+
expect(xml.namespace).to eq(DATACITE_4_NAMESPACE.uri)
|
1202
|
+
end
|
1203
|
+
|
1204
|
+
it 'sets the DC3 namespace for the :datacite_3 mapping' do
|
1205
|
+
xml = resource.save_to_xml(mapping: :datacite_3)
|
1206
|
+
expect(xml).to be_a(REXML::Element)
|
1207
|
+
expect(xml.namespace).to eq(DATACITE_3_NAMESPACE.uri)
|
1208
|
+
end
|
1209
|
+
|
1210
|
+
it 'fails on nil identifiers' do
|
1211
|
+
resource.instance_variable_set(:@identifier, nil)
|
1212
|
+
expect { resource.save_to_xml }.to raise_error(XML::MappingError)
|
1213
|
+
end
|
1214
|
+
end
|
1215
|
+
|
1216
|
+
describe '#parse_xml' do
|
1217
|
+
|
1218
|
+
attr_reader :xml_text
|
1219
|
+
|
1220
|
+
before(:each) do
|
1221
|
+
@xml_text = "<resource xsi:schemaLocation='http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns='http://datacite.org/schema/kernel-4'>
|
1222
|
+
<identifier identifierType='DOI'>10.14749/1407399495</identifier>
|
1223
|
+
<creators>
|
1224
|
+
<creator>
|
1225
|
+
<creatorName>Hedy Lamarr</creatorName>
|
1226
|
+
<nameIdentifier nameIdentifierScheme='ISNI' schemeURI='http://isni.org/'>0000-0001-1690-159X</nameIdentifier>
|
1227
|
+
<affiliation>United Artists</affiliation>
|
1228
|
+
<affiliation>Metro-Goldwyn-Mayer</affiliation>
|
1229
|
+
</creator>
|
1230
|
+
<creator>
|
1231
|
+
<creatorName>Herschlag, Natalie</creatorName>
|
1232
|
+
<nameIdentifier nameIdentifierScheme='ISNI' schemeURI='http://isni.org/'>0000-0001-0907-8419</nameIdentifier>
|
1233
|
+
<affiliation>Gaumont Buena Vista International</affiliation>
|
1234
|
+
<affiliation>20th Century Fox</affiliation>
|
1235
|
+
</creator>
|
1236
|
+
</creators>
|
1237
|
+
<titles>
|
1238
|
+
<title xml:lang='en-emodeng'>An Account of a Very Odd Monstrous Calf</title>
|
1239
|
+
<title xml:lang='en-emodeng' titleType='Subtitle'>And a Contest between Two Artists about Optick Glasses, &c</title>
|
1240
|
+
</titles>
|
1241
|
+
<publisher>California Digital Library</publisher>
|
1242
|
+
<publicationYear>2015</publicationYear>
|
1243
|
+
<subjects>
|
1244
|
+
<subject xml:lang='en-us' schemeURI='http://id.loc.gov/authorities/subjects' subjectScheme='LCSH'>Mammals--Embryology</subject>
|
1245
|
+
</subjects>
|
1246
|
+
<descriptions>
|
1247
|
+
<description xml:lang='en-us' descriptionType='Abstract'>foo</description>
|
1248
|
+
</descriptions>
|
1249
|
+
</resource>"
|
1250
|
+
end
|
1251
|
+
|
1252
|
+
it 'skips empty identifiers' do
|
1253
|
+
sketchy_xml = xml_text.gsub(%r{<identifier.*/identifier>}, '<identifier/>')
|
1254
|
+
resource = Resource.parse_xml(sketchy_xml)
|
1255
|
+
expect(resource).to be_a(Resource)
|
1256
|
+
end
|
1257
|
+
|
1258
|
+
it 'skips empty subjects' do
|
1259
|
+
sketchy_xml = xml_text.gsub(%r{>[^<]+</subject>}, '/>')
|
1260
|
+
resource = Resource.parse_xml(sketchy_xml)
|
1261
|
+
expect(resource).to be_a(Resource)
|
1262
|
+
expect(resource.subjects).to eq([])
|
1263
|
+
end
|
1264
|
+
|
1265
|
+
it 'skips empty descriptions' do
|
1266
|
+
sketchy_xml = xml_text.gsub(%r{>[^<]+</description>}, '/>')
|
1267
|
+
resource = Resource.parse_xml(sketchy_xml)
|
1268
|
+
expect(resource).to be_a(Resource)
|
1269
|
+
expect(resource.descriptions).to eq([])
|
1270
|
+
end
|
1271
|
+
end
|
1168
1272
|
end
|
1169
1273
|
end
|
1170
1274
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datacite-mapping
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Moles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: typesafe_enum
|
@@ -217,6 +217,7 @@ files:
|
|
217
217
|
- lib/datacite/mapping/date.rb
|
218
218
|
- lib/datacite/mapping/date_value.rb
|
219
219
|
- lib/datacite/mapping/description.rb
|
220
|
+
- lib/datacite/mapping/empty_filtering_nodes.rb
|
220
221
|
- lib/datacite/mapping/funding_reference.rb
|
221
222
|
- lib/datacite/mapping/geo_location.rb
|
222
223
|
- lib/datacite/mapping/geo_location_box.rb
|