datacite-mapping 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6962dc851d06101de0b83fbc18ccba19cdb56c02
4
- data.tar.gz: 2a11a09bb26c835049db959b4451aeebb9cabde2
3
+ metadata.gz: 94b1cd9d6fa3c9f73d4e01b8932735ed5531de96
4
+ data.tar.gz: 1f641d5ea6c21711480d806a4fc6e07b07df2697
5
5
  SHA512:
6
- metadata.gz: 67d2bb08cd1a9d50f65ba8daa5308513a8b4363f72815599208423e620fe4a3666adb3c8fdd8a20ed3e70315bbdcbddb023d7885c97ae4f0a4a5bceea7e117f1
7
- data.tar.gz: 6d3af828854f9caffbb9b56f5f92c2074600bb956f940febfefd2776aec878e8e078aa6901772c833e7ad21f7a3958cad7587a4ad667734da81a0a3f11766ef1
6
+ metadata.gz: 8958bbce65cce405ec02251e111902de276955f4f822a60409ef62c0532cc3315869db205f2491b95d8701579a623da711513c1938f637ab9c59ea84646668e9
7
+ data.tar.gz: 91bfefea1df89cf6067fa6f9a680c885ee49f76ff700849e88ada850016f42626fe3eec484da19efd1e1520ea16480dbea41afcbf5c75ec447d9b36237a8bbfe
data/CHANGES.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.3 (5 October 2016)
2
+
3
+ - Allow empty `<identifier/>` tags on read, but not write
4
+ - Allow but ignore empty `<subject/>` and `<description/>` tags on read
5
+
1
6
  ## 0.2.2 (4 October 2016)
2
7
 
3
8
  - Fixed issue where `<geoLocation>` child elements would be written in Datacite 4
@@ -0,0 +1,32 @@
1
+ require 'xml/mapping_extensions'
2
+
3
+ module Datacite
4
+ module Mapping
5
+
6
+ module EmptyNodeUtils
7
+ def not_empty(element)
8
+ return unless element
9
+ text = element.text
10
+ empty = text.nil? || text.strip.empty?
11
+ warn "Ignoring empty element #{element}" if empty
12
+ !empty
13
+ end
14
+ end
15
+
16
+ # An {XML::Mapping::ArrayNode} that ignores empty tags, including tags
17
+ # containing only blank text.
18
+ class EmptyFilteringArrayNode < XML::Mapping::ArrayNode
19
+ include EmptyNodeUtils
20
+ def extract_attr_value(xml)
21
+ elements = default_when_xpath_err { @reader_path.all(xml) }
22
+ non_empty_elements = elements.select { |e| not_empty(e) }
23
+ non_empty_elements.map { |e| unmarshal(e) }
24
+ end
25
+
26
+ def unmarshal(element)
27
+ @unmarshaller.call(element)
28
+ end
29
+ end
30
+ XML::Mapping.add_node_class EmptyFilteringArrayNode
31
+ end
32
+ end
@@ -1,4 +1,5 @@
1
1
  require 'xml/mapping'
2
+ require 'datacite/mapping/empty_filtering_nodes'
2
3
 
3
4
  module Datacite
4
5
  module Mapping
@@ -60,8 +61,10 @@ module Datacite
60
61
  # Custom node to warn (but not blow up) if we read an XML `<resource/>` that's
61
62
  # missing its `<identifier/>`.
62
63
  class IdentifierNode < XML::Mapping::ObjectNode
64
+ include EmptyNodeUtils
63
65
  def xml_to_obj(_obj, xml)
64
- super if has_element?(xml)
66
+ return super if (element = has_element?(xml)) && not_empty(element)
67
+ warn 'Identifier not found; add a valid Identifier to the Resource before saving'
65
68
  end
66
69
 
67
70
  private
@@ -69,7 +72,7 @@ module Datacite
69
72
  def has_element?(xml) # rubocop:disable Style/PredicateName
70
73
  @path.first(xml)
71
74
  rescue XML::XXPathError
72
- warn '<identifier/> not found; add a valid Identifier to the Resource before saving'
75
+ false
73
76
  end
74
77
  end
75
78
  XML::Mapping.add_node_class IdentifierNode
@@ -4,7 +4,7 @@ module Datacite
4
4
  NAME = 'datacite-mapping'.freeze
5
5
 
6
6
  # The version of this gem
7
- VERSION = '0.2.2'.freeze
7
+ VERSION = '0.2.3'.freeze
8
8
 
9
9
  # The copyright notice for this gem
10
10
  COPYRIGHT = 'Copyright (c) 2016 The Regents of the University of California'.freeze
@@ -184,7 +184,7 @@ module Datacite
184
184
 
185
185
  # @!attribute [rw] subjects
186
186
  # @return [Array<Subject>] subjects, keywords, classification codes, or key phrases describing the resource.
187
- array_node :subjects, 'subjects', 'subject', class: Subject, default_value: []
187
+ empty_filtering_array_node :subjects, 'subjects', 'subject', class: Subject, default_value: []
188
188
 
189
189
  # @!attribute [rw] fundingReferences
190
190
  # @return [Array<FundingReference>] information about financial support (funding) for the resource being registered.
@@ -232,7 +232,7 @@ module Datacite
232
232
 
233
233
  # @!attribute [rw] descriptions
234
234
  # @return [Array<Description>] all additional information that does not fit in any of the other categories.
235
- array_node :descriptions, 'descriptions', 'description', class: Description, default_value: []
235
+ empty_filtering_array_node :descriptions, 'descriptions', 'description', class: Description, default_value: []
236
236
 
237
237
  # @!attribute [rw] geo_locations
238
238
  # @return [Array<GeoLocations>] spatial region or named place where the data was gathered or about which the data is focused.
@@ -938,7 +938,8 @@ module Datacite
938
938
  def normalize(xml_str)
939
939
  r0 = xml_str
940
940
  r1 = r0.gsub(%r{&lt;br\s+/&gt;}, '<br/>') # entity-de-escape <br/> tags
941
- r2 = r1.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
941
+ # r2 = r1.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
942
+ r2 = r1
942
943
  r3 = r2.gsub(/<resource (xmlns:xsi="[^"]+")\s+(xsi:schemaLocation="[^"]+")>/, "<resource \\2 \\1 xmlns=\"http://datacite.org/schema/kernel-3\">") # fix missing namespace
943
944
  r4 = r3.gsub(%r{(<identifier[^>]+>)\s*([^ ]+)\s*(</identifier>)}, '\\1\\2\\3') # trim identifiers
944
945
  r5 = r4.gsub(%r{<([^>]+tude)>([0-9.-]+?)(0?)0+</\1>}, '<\\1>\\2\\3</\\1>') # strip trailing coordinate zeroes
@@ -966,21 +967,12 @@ module Datacite
966
967
  # - missing DOI
967
968
  # - empty tags
968
969
  # - nested contributors instead of contributorNames
969
- # - empty descriptions
970
- # TODO: handle empty descriptions like empty subjects
971
970
 
972
971
  r0 = xml_str
973
972
  r1 = r0.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
974
- r2 = r1.gsub(%r{<description[^/>]*/>}, '') # strip empty descriptions
975
- r3 = r2.gsub(%r{<description[^/>]*></description>}, '') # strip empty descriptions
976
- r4 = r3.gsub(%r{<([A-Za-z]*)[^>]*>\s*</\1>}, '') # remove empty tag pairs
977
- r5 = r4.gsub(%r{(<date[^>]*>)(\d{4})-(\d{4})(</date>)}, '\\1\\2/\\3\\4') # fix date ranges
978
- r6 = r5.gsub(%r{(<contributor[^>/]+>\s*)<contributor>([^<]+)</contributor>(\s*</contributor>)}, '\\1<contributorName>\\2</contributorName>\\3') # fix broken contributors
979
- # if r6.include?('&lt;br')
980
- # trace = [r0, r1, r2, r3, r4, r5, r6].map { |r| r.include?('&lt;br') }
981
- # puts trace
982
- # end
983
- r6
973
+ r2 = r1.gsub(%r{<([A-Za-z]*)[^>]*>\s*</\1>}, '') # remove empty tag pairs
974
+ r3 = r2.gsub(%r{(<date[^>]*>)(\d{4})-(\d{4})(</date>)}, '\\1\\2/\\3\\4') # fix date ranges
975
+ r3.gsub(%r{(<contributor[^>/]+>\s*)<contributor>([^<]+)</contributor>(\s*</contributor>)}, '\\1<contributorName>\\2</contributorName>\\3') # fix broken contributors
984
976
  end
985
977
 
986
978
  def it_round_trips(file:, mapping: :_default, fix_dash1: false) # rubocop:disable Metrics/AbcSize
@@ -1165,6 +1157,118 @@ module Datacite
1165
1157
  end
1166
1158
  end
1167
1159
  end
1160
+
1161
+ describe '#save_to_xml' do
1162
+
1163
+ attr_reader :resource
1164
+
1165
+ before(:each) do
1166
+ @identifier = Identifier.new(value: '10.14749/1407399495')
1167
+
1168
+ @creators = [
1169
+ Creator.new(
1170
+ name: 'Hedy Lamarr',
1171
+ identifier: NameIdentifier.new(scheme: 'ISNI', scheme_uri: URI('http://isni.org/'), value: '0000-0001-1690-159X'),
1172
+ affiliations: ['United Artists', 'Metro-Goldwyn-Mayer']
1173
+ ),
1174
+ Creator.new(
1175
+ name: 'Herschlag, Natalie',
1176
+ identifier: NameIdentifier.new(scheme: 'ISNI', scheme_uri: URI('http://isni.org/'), value: '0000-0001-0907-8419'),
1177
+ affiliations: ['Gaumont Buena Vista International', '20th Century Fox']
1178
+ )
1179
+ ]
1180
+
1181
+ @titles = [
1182
+ Title.new(value: 'An Account of a Very Odd Monstrous Calf', language: 'en-emodeng'),
1183
+ Title.new(type: TitleType::SUBTITLE, value: 'And a Contest between Two Artists about Optick Glasses, &c', language: 'en-emodeng')
1184
+ ]
1185
+
1186
+ @publisher = 'California Digital Library'
1187
+ @publication_year = 2015
1188
+
1189
+ @resource = Resource.new(
1190
+ identifier: identifier,
1191
+ creators: creators,
1192
+ titles: titles,
1193
+ publisher: publisher,
1194
+ publication_year: publication_year
1195
+ )
1196
+ end
1197
+
1198
+ it 'sets the DC4 namespace by default' do
1199
+ xml = resource.save_to_xml
1200
+ expect(xml).to be_a(REXML::Element)
1201
+ expect(xml.namespace).to eq(DATACITE_4_NAMESPACE.uri)
1202
+ end
1203
+
1204
+ it 'sets the DC3 namespace for the :datacite_3 mapping' do
1205
+ xml = resource.save_to_xml(mapping: :datacite_3)
1206
+ expect(xml).to be_a(REXML::Element)
1207
+ expect(xml.namespace).to eq(DATACITE_3_NAMESPACE.uri)
1208
+ end
1209
+
1210
+ it 'fails on nil identifiers' do
1211
+ resource.instance_variable_set(:@identifier, nil)
1212
+ expect { resource.save_to_xml }.to raise_error(XML::MappingError)
1213
+ end
1214
+ end
1215
+
1216
+ describe '#parse_xml' do
1217
+
1218
+ attr_reader :xml_text
1219
+
1220
+ before(:each) do
1221
+ @xml_text = "<resource xsi:schemaLocation='http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns='http://datacite.org/schema/kernel-4'>
1222
+ <identifier identifierType='DOI'>10.14749/1407399495</identifier>
1223
+ <creators>
1224
+ <creator>
1225
+ <creatorName>Hedy Lamarr</creatorName>
1226
+ <nameIdentifier nameIdentifierScheme='ISNI' schemeURI='http://isni.org/'>0000-0001-1690-159X</nameIdentifier>
1227
+ <affiliation>United Artists</affiliation>
1228
+ <affiliation>Metro-Goldwyn-Mayer</affiliation>
1229
+ </creator>
1230
+ <creator>
1231
+ <creatorName>Herschlag, Natalie</creatorName>
1232
+ <nameIdentifier nameIdentifierScheme='ISNI' schemeURI='http://isni.org/'>0000-0001-0907-8419</nameIdentifier>
1233
+ <affiliation>Gaumont Buena Vista International</affiliation>
1234
+ <affiliation>20th Century Fox</affiliation>
1235
+ </creator>
1236
+ </creators>
1237
+ <titles>
1238
+ <title xml:lang='en-emodeng'>An Account of a Very Odd Monstrous Calf</title>
1239
+ <title xml:lang='en-emodeng' titleType='Subtitle'>And a Contest between Two Artists about Optick Glasses, &amp;c</title>
1240
+ </titles>
1241
+ <publisher>California Digital Library</publisher>
1242
+ <publicationYear>2015</publicationYear>
1243
+ <subjects>
1244
+ <subject xml:lang='en-us' schemeURI='http://id.loc.gov/authorities/subjects' subjectScheme='LCSH'>Mammals--Embryology</subject>
1245
+ </subjects>
1246
+ <descriptions>
1247
+ <description xml:lang='en-us' descriptionType='Abstract'>foo</description>
1248
+ </descriptions>
1249
+ </resource>"
1250
+ end
1251
+
1252
+ it 'skips empty identifiers' do
1253
+ sketchy_xml = xml_text.gsub(%r{<identifier.*/identifier>}, '<identifier/>')
1254
+ resource = Resource.parse_xml(sketchy_xml)
1255
+ expect(resource).to be_a(Resource)
1256
+ end
1257
+
1258
+ it 'skips empty subjects' do
1259
+ sketchy_xml = xml_text.gsub(%r{>[^<]+</subject>}, '/>')
1260
+ resource = Resource.parse_xml(sketchy_xml)
1261
+ expect(resource).to be_a(Resource)
1262
+ expect(resource.subjects).to eq([])
1263
+ end
1264
+
1265
+ it 'skips empty descriptions' do
1266
+ sketchy_xml = xml_text.gsub(%r{>[^<]+</description>}, '/>')
1267
+ resource = Resource.parse_xml(sketchy_xml)
1268
+ expect(resource).to be_a(Resource)
1269
+ expect(resource.descriptions).to eq([])
1270
+ end
1271
+ end
1168
1272
  end
1169
1273
  end
1170
1274
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacite-mapping
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Moles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-04 00:00:00.000000000 Z
11
+ date: 2016-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: typesafe_enum
@@ -217,6 +217,7 @@ files:
217
217
  - lib/datacite/mapping/date.rb
218
218
  - lib/datacite/mapping/date_value.rb
219
219
  - lib/datacite/mapping/description.rb
220
+ - lib/datacite/mapping/empty_filtering_nodes.rb
220
221
  - lib/datacite/mapping/funding_reference.rb
221
222
  - lib/datacite/mapping/geo_location.rb
222
223
  - lib/datacite/mapping/geo_location_box.rb