datacite-mapping 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -0
- data/lib/datacite/mapping/empty_filtering_nodes.rb +32 -0
- data/lib/datacite/mapping/identifier.rb +5 -2
- data/lib/datacite/mapping/module_info.rb +1 -1
- data/lib/datacite/mapping/resource.rb +2 -2
- data/spec/unit/datacite/mapping/resource_spec.rb +117 -13
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 94b1cd9d6fa3c9f73d4e01b8932735ed5531de96
|
4
|
+
data.tar.gz: 1f641d5ea6c21711480d806a4fc6e07b07df2697
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8958bbce65cce405ec02251e111902de276955f4f822a60409ef62c0532cc3315869db205f2491b95d8701579a623da711513c1938f637ab9c59ea84646668e9
|
7
|
+
data.tar.gz: 91bfefea1df89cf6067fa6f9a680c885ee49f76ff700849e88ada850016f42626fe3eec484da19efd1e1520ea16480dbea41afcbf5c75ec447d9b36237a8bbfe
|
data/CHANGES.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 0.2.3 (5 October 2016)
|
2
|
+
|
3
|
+
- Allow empty `<identifier/>` tags on read, but not write
|
4
|
+
- Allow but ignore empty `<subject/>` and `<description/>` tags on read
|
5
|
+
|
1
6
|
## 0.2.2 (4 October 2016)
|
2
7
|
|
3
8
|
- Fixed issue where `<geoLocation>` child elements would be written in Datacite 4
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'xml/mapping_extensions'
|
2
|
+
|
3
|
+
module Datacite
|
4
|
+
module Mapping
|
5
|
+
|
6
|
+
module EmptyNodeUtils
|
7
|
+
def not_empty(element)
|
8
|
+
return unless element
|
9
|
+
text = element.text
|
10
|
+
empty = text.nil? || text.strip.empty?
|
11
|
+
warn "Ignoring empty element #{element}" if empty
|
12
|
+
!empty
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# An {XML::Mapping::ArrayNode} that ignores empty tags, including tags
|
17
|
+
# containing only blank text.
|
18
|
+
class EmptyFilteringArrayNode < XML::Mapping::ArrayNode
|
19
|
+
include EmptyNodeUtils
|
20
|
+
def extract_attr_value(xml)
|
21
|
+
elements = default_when_xpath_err { @reader_path.all(xml) }
|
22
|
+
non_empty_elements = elements.select { |e| not_empty(e) }
|
23
|
+
non_empty_elements.map { |e| unmarshal(e) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def unmarshal(element)
|
27
|
+
@unmarshaller.call(element)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
XML::Mapping.add_node_class EmptyFilteringArrayNode
|
31
|
+
end
|
32
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'xml/mapping'
|
2
|
+
require 'datacite/mapping/empty_filtering_nodes'
|
2
3
|
|
3
4
|
module Datacite
|
4
5
|
module Mapping
|
@@ -60,8 +61,10 @@ module Datacite
|
|
60
61
|
# Custom node to warn (but not blow up) if we read an XML `<resource/>` that's
|
61
62
|
# missing its `<identifier/>`.
|
62
63
|
class IdentifierNode < XML::Mapping::ObjectNode
|
64
|
+
include EmptyNodeUtils
|
63
65
|
def xml_to_obj(_obj, xml)
|
64
|
-
super if has_element?(xml)
|
66
|
+
return super if (element = has_element?(xml)) && not_empty(element)
|
67
|
+
warn 'Identifier not found; add a valid Identifier to the Resource before saving'
|
65
68
|
end
|
66
69
|
|
67
70
|
private
|
@@ -69,7 +72,7 @@ module Datacite
|
|
69
72
|
def has_element?(xml) # rubocop:disable Style/PredicateName
|
70
73
|
@path.first(xml)
|
71
74
|
rescue XML::XXPathError
|
72
|
-
|
75
|
+
false
|
73
76
|
end
|
74
77
|
end
|
75
78
|
XML::Mapping.add_node_class IdentifierNode
|
@@ -184,7 +184,7 @@ module Datacite
|
|
184
184
|
|
185
185
|
# @!attribute [rw] subjects
|
186
186
|
# @return [Array<Subject>] subjects, keywords, classification codes, or key phrases describing the resource.
|
187
|
-
|
187
|
+
empty_filtering_array_node :subjects, 'subjects', 'subject', class: Subject, default_value: []
|
188
188
|
|
189
189
|
# @!attribute [rw] fundingReferences
|
190
190
|
# @return [Array<FundingReference>] information about financial support (funding) for the resource being registered.
|
@@ -232,7 +232,7 @@ module Datacite
|
|
232
232
|
|
233
233
|
# @!attribute [rw] descriptions
|
234
234
|
# @return [Array<Description>] all additional information that does not fit in any of the other categories.
|
235
|
-
|
235
|
+
empty_filtering_array_node :descriptions, 'descriptions', 'description', class: Description, default_value: []
|
236
236
|
|
237
237
|
# @!attribute [rw] geo_locations
|
238
238
|
# @return [Array<GeoLocations>] spatial region or named place where the data was gathered or about which the data is focused.
|
@@ -938,7 +938,8 @@ module Datacite
|
|
938
938
|
def normalize(xml_str)
|
939
939
|
r0 = xml_str
|
940
940
|
r1 = r0.gsub(%r{<br\s+/>}, '<br/>') # entity-de-escape <br/> tags
|
941
|
-
r2 = r1.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
|
941
|
+
# r2 = r1.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
|
942
|
+
r2 = r1
|
942
943
|
r3 = r2.gsub(/<resource (xmlns:xsi="[^"]+")\s+(xsi:schemaLocation="[^"]+")>/, "<resource \\2 \\1 xmlns=\"http://datacite.org/schema/kernel-3\">") # fix missing namespace
|
943
944
|
r4 = r3.gsub(%r{(<identifier[^>]+>)\s*([^ ]+)\s*(</identifier>)}, '\\1\\2\\3') # trim identifiers
|
944
945
|
r5 = r4.gsub(%r{<([^>]+tude)>([0-9.-]+?)(0?)0+</\1>}, '<\\1>\\2\\3</\\1>') # strip trailing coordinate zeroes
|
@@ -966,21 +967,12 @@ module Datacite
|
|
966
967
|
# - missing DOI
|
967
968
|
# - empty tags
|
968
969
|
# - nested contributors instead of contributorNames
|
969
|
-
# - empty descriptions
|
970
|
-
# TODO: handle empty descriptions like empty subjects
|
971
970
|
|
972
971
|
r0 = xml_str
|
973
972
|
r1 = r0.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
|
974
|
-
r2 = r1.gsub(%r{<
|
975
|
-
r3 = r2.gsub(%r{<
|
976
|
-
|
977
|
-
r5 = r4.gsub(%r{(<date[^>]*>)(\d{4})-(\d{4})(</date>)}, '\\1\\2/\\3\\4') # fix date ranges
|
978
|
-
r6 = r5.gsub(%r{(<contributor[^>/]+>\s*)<contributor>([^<]+)</contributor>(\s*</contributor>)}, '\\1<contributorName>\\2</contributorName>\\3') # fix broken contributors
|
979
|
-
# if r6.include?('<br')
|
980
|
-
# trace = [r0, r1, r2, r3, r4, r5, r6].map { |r| r.include?('<br') }
|
981
|
-
# puts trace
|
982
|
-
# end
|
983
|
-
r6
|
973
|
+
r2 = r1.gsub(%r{<([A-Za-z]*)[^>]*>\s*</\1>}, '') # remove empty tag pairs
|
974
|
+
r3 = r2.gsub(%r{(<date[^>]*>)(\d{4})-(\d{4})(</date>)}, '\\1\\2/\\3\\4') # fix date ranges
|
975
|
+
r3.gsub(%r{(<contributor[^>/]+>\s*)<contributor>([^<]+)</contributor>(\s*</contributor>)}, '\\1<contributorName>\\2</contributorName>\\3') # fix broken contributors
|
984
976
|
end
|
985
977
|
|
986
978
|
def it_round_trips(file:, mapping: :_default, fix_dash1: false) # rubocop:disable Metrics/AbcSize
|
@@ -1165,6 +1157,118 @@ module Datacite
|
|
1165
1157
|
end
|
1166
1158
|
end
|
1167
1159
|
end
|
1160
|
+
|
1161
|
+
describe '#save_to_xml' do
|
1162
|
+
|
1163
|
+
attr_reader :resource
|
1164
|
+
|
1165
|
+
before(:each) do
|
1166
|
+
@identifier = Identifier.new(value: '10.14749/1407399495')
|
1167
|
+
|
1168
|
+
@creators = [
|
1169
|
+
Creator.new(
|
1170
|
+
name: 'Hedy Lamarr',
|
1171
|
+
identifier: NameIdentifier.new(scheme: 'ISNI', scheme_uri: URI('http://isni.org/'), value: '0000-0001-1690-159X'),
|
1172
|
+
affiliations: ['United Artists', 'Metro-Goldwyn-Mayer']
|
1173
|
+
),
|
1174
|
+
Creator.new(
|
1175
|
+
name: 'Herschlag, Natalie',
|
1176
|
+
identifier: NameIdentifier.new(scheme: 'ISNI', scheme_uri: URI('http://isni.org/'), value: '0000-0001-0907-8419'),
|
1177
|
+
affiliations: ['Gaumont Buena Vista International', '20th Century Fox']
|
1178
|
+
)
|
1179
|
+
]
|
1180
|
+
|
1181
|
+
@titles = [
|
1182
|
+
Title.new(value: 'An Account of a Very Odd Monstrous Calf', language: 'en-emodeng'),
|
1183
|
+
Title.new(type: TitleType::SUBTITLE, value: 'And a Contest between Two Artists about Optick Glasses, &c', language: 'en-emodeng')
|
1184
|
+
]
|
1185
|
+
|
1186
|
+
@publisher = 'California Digital Library'
|
1187
|
+
@publication_year = 2015
|
1188
|
+
|
1189
|
+
@resource = Resource.new(
|
1190
|
+
identifier: identifier,
|
1191
|
+
creators: creators,
|
1192
|
+
titles: titles,
|
1193
|
+
publisher: publisher,
|
1194
|
+
publication_year: publication_year
|
1195
|
+
)
|
1196
|
+
end
|
1197
|
+
|
1198
|
+
it 'sets the DC4 namespace by default' do
|
1199
|
+
xml = resource.save_to_xml
|
1200
|
+
expect(xml).to be_a(REXML::Element)
|
1201
|
+
expect(xml.namespace).to eq(DATACITE_4_NAMESPACE.uri)
|
1202
|
+
end
|
1203
|
+
|
1204
|
+
it 'sets the DC3 namespace for the :datacite_3 mapping' do
|
1205
|
+
xml = resource.save_to_xml(mapping: :datacite_3)
|
1206
|
+
expect(xml).to be_a(REXML::Element)
|
1207
|
+
expect(xml.namespace).to eq(DATACITE_3_NAMESPACE.uri)
|
1208
|
+
end
|
1209
|
+
|
1210
|
+
it 'fails on nil identifiers' do
|
1211
|
+
resource.instance_variable_set(:@identifier, nil)
|
1212
|
+
expect { resource.save_to_xml }.to raise_error(XML::MappingError)
|
1213
|
+
end
|
1214
|
+
end
|
1215
|
+
|
1216
|
+
describe '#parse_xml' do
|
1217
|
+
|
1218
|
+
attr_reader :xml_text
|
1219
|
+
|
1220
|
+
before(:each) do
|
1221
|
+
@xml_text = "<resource xsi:schemaLocation='http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns='http://datacite.org/schema/kernel-4'>
|
1222
|
+
<identifier identifierType='DOI'>10.14749/1407399495</identifier>
|
1223
|
+
<creators>
|
1224
|
+
<creator>
|
1225
|
+
<creatorName>Hedy Lamarr</creatorName>
|
1226
|
+
<nameIdentifier nameIdentifierScheme='ISNI' schemeURI='http://isni.org/'>0000-0001-1690-159X</nameIdentifier>
|
1227
|
+
<affiliation>United Artists</affiliation>
|
1228
|
+
<affiliation>Metro-Goldwyn-Mayer</affiliation>
|
1229
|
+
</creator>
|
1230
|
+
<creator>
|
1231
|
+
<creatorName>Herschlag, Natalie</creatorName>
|
1232
|
+
<nameIdentifier nameIdentifierScheme='ISNI' schemeURI='http://isni.org/'>0000-0001-0907-8419</nameIdentifier>
|
1233
|
+
<affiliation>Gaumont Buena Vista International</affiliation>
|
1234
|
+
<affiliation>20th Century Fox</affiliation>
|
1235
|
+
</creator>
|
1236
|
+
</creators>
|
1237
|
+
<titles>
|
1238
|
+
<title xml:lang='en-emodeng'>An Account of a Very Odd Monstrous Calf</title>
|
1239
|
+
<title xml:lang='en-emodeng' titleType='Subtitle'>And a Contest between Two Artists about Optick Glasses, &c</title>
|
1240
|
+
</titles>
|
1241
|
+
<publisher>California Digital Library</publisher>
|
1242
|
+
<publicationYear>2015</publicationYear>
|
1243
|
+
<subjects>
|
1244
|
+
<subject xml:lang='en-us' schemeURI='http://id.loc.gov/authorities/subjects' subjectScheme='LCSH'>Mammals--Embryology</subject>
|
1245
|
+
</subjects>
|
1246
|
+
<descriptions>
|
1247
|
+
<description xml:lang='en-us' descriptionType='Abstract'>foo</description>
|
1248
|
+
</descriptions>
|
1249
|
+
</resource>"
|
1250
|
+
end
|
1251
|
+
|
1252
|
+
it 'skips empty identifiers' do
|
1253
|
+
sketchy_xml = xml_text.gsub(%r{<identifier.*/identifier>}, '<identifier/>')
|
1254
|
+
resource = Resource.parse_xml(sketchy_xml)
|
1255
|
+
expect(resource).to be_a(Resource)
|
1256
|
+
end
|
1257
|
+
|
1258
|
+
it 'skips empty subjects' do
|
1259
|
+
sketchy_xml = xml_text.gsub(%r{>[^<]+</subject>}, '/>')
|
1260
|
+
resource = Resource.parse_xml(sketchy_xml)
|
1261
|
+
expect(resource).to be_a(Resource)
|
1262
|
+
expect(resource.subjects).to eq([])
|
1263
|
+
end
|
1264
|
+
|
1265
|
+
it 'skips empty descriptions' do
|
1266
|
+
sketchy_xml = xml_text.gsub(%r{>[^<]+</description>}, '/>')
|
1267
|
+
resource = Resource.parse_xml(sketchy_xml)
|
1268
|
+
expect(resource).to be_a(Resource)
|
1269
|
+
expect(resource.descriptions).to eq([])
|
1270
|
+
end
|
1271
|
+
end
|
1168
1272
|
end
|
1169
1273
|
end
|
1170
1274
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datacite-mapping
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Moles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: typesafe_enum
|
@@ -217,6 +217,7 @@ files:
|
|
217
217
|
- lib/datacite/mapping/date.rb
|
218
218
|
- lib/datacite/mapping/date_value.rb
|
219
219
|
- lib/datacite/mapping/description.rb
|
220
|
+
- lib/datacite/mapping/empty_filtering_nodes.rb
|
220
221
|
- lib/datacite/mapping/funding_reference.rb
|
221
222
|
- lib/datacite/mapping/geo_location.rb
|
222
223
|
- lib/datacite/mapping/geo_location_box.rb
|