datacite-mapping 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6962dc851d06101de0b83fbc18ccba19cdb56c02
4
- data.tar.gz: 2a11a09bb26c835049db959b4451aeebb9cabde2
3
+ metadata.gz: 94b1cd9d6fa3c9f73d4e01b8932735ed5531de96
4
+ data.tar.gz: 1f641d5ea6c21711480d806a4fc6e07b07df2697
5
5
  SHA512:
6
- metadata.gz: 67d2bb08cd1a9d50f65ba8daa5308513a8b4363f72815599208423e620fe4a3666adb3c8fdd8a20ed3e70315bbdcbddb023d7885c97ae4f0a4a5bceea7e117f1
7
- data.tar.gz: 6d3af828854f9caffbb9b56f5f92c2074600bb956f940febfefd2776aec878e8e078aa6901772c833e7ad21f7a3958cad7587a4ad667734da81a0a3f11766ef1
6
+ metadata.gz: 8958bbce65cce405ec02251e111902de276955f4f822a60409ef62c0532cc3315869db205f2491b95d8701579a623da711513c1938f637ab9c59ea84646668e9
7
+ data.tar.gz: 91bfefea1df89cf6067fa6f9a680c885ee49f76ff700849e88ada850016f42626fe3eec484da19efd1e1520ea16480dbea41afcbf5c75ec447d9b36237a8bbfe
data/CHANGES.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.3 (5 October 2016)
2
+
3
+ - Allow empty `<identifier/>` tags on read, but not write
4
+ - Allow but ignore empty `<subject/>` and `<description/>` tags on read
5
+
1
6
  ## 0.2.2 (4 October 2016)
2
7
 
3
8
  - Fixed issue where `<geoLocation>` child elements would be written in Datacite 4
@@ -0,0 +1,32 @@
1
+ require 'xml/mapping_extensions'
2
+
3
+ module Datacite
4
+ module Mapping
5
+
6
+ module EmptyNodeUtils
7
+ def not_empty(element)
8
+ return unless element
9
+ text = element.text
10
+ empty = text.nil? || text.strip.empty?
11
+ warn "Ignoring empty element #{element}" if empty
12
+ !empty
13
+ end
14
+ end
15
+
16
+ # An {XML::Mapping::ArrayNode} that ignores empty tags, including tags
17
+ # containing only blank text.
18
+ class EmptyFilteringArrayNode < XML::Mapping::ArrayNode
19
+ include EmptyNodeUtils
20
+ def extract_attr_value(xml)
21
+ elements = default_when_xpath_err { @reader_path.all(xml) }
22
+ non_empty_elements = elements.select { |e| not_empty(e) }
23
+ non_empty_elements.map { |e| unmarshal(e) }
24
+ end
25
+
26
+ def unmarshal(element)
27
+ @unmarshaller.call(element)
28
+ end
29
+ end
30
+ XML::Mapping.add_node_class EmptyFilteringArrayNode
31
+ end
32
+ end
@@ -1,4 +1,5 @@
1
1
  require 'xml/mapping'
2
+ require 'datacite/mapping/empty_filtering_nodes'
2
3
 
3
4
  module Datacite
4
5
  module Mapping
@@ -60,8 +61,10 @@ module Datacite
60
61
  # Custom node to warn (but not blow up) if we read an XML `<resource/>` that's
61
62
  # missing its `<identifier/>`.
62
63
  class IdentifierNode < XML::Mapping::ObjectNode
64
+ include EmptyNodeUtils
63
65
  def xml_to_obj(_obj, xml)
64
- super if has_element?(xml)
66
+ return super if (element = has_element?(xml)) && not_empty(element)
67
+ warn 'Identifier not found; add a valid Identifier to the Resource before saving'
65
68
  end
66
69
 
67
70
  private
@@ -69,7 +72,7 @@ module Datacite
69
72
  def has_element?(xml) # rubocop:disable Style/PredicateName
70
73
  @path.first(xml)
71
74
  rescue XML::XXPathError
72
- warn '<identifier/> not found; add a valid Identifier to the Resource before saving'
75
+ false
73
76
  end
74
77
  end
75
78
  XML::Mapping.add_node_class IdentifierNode
@@ -4,7 +4,7 @@ module Datacite
4
4
  NAME = 'datacite-mapping'.freeze
5
5
 
6
6
  # The version of this gem
7
- VERSION = '0.2.2'.freeze
7
+ VERSION = '0.2.3'.freeze
8
8
 
9
9
  # The copyright notice for this gem
10
10
  COPYRIGHT = 'Copyright (c) 2016 The Regents of the University of California'.freeze
@@ -184,7 +184,7 @@ module Datacite
184
184
 
185
185
  # @!attribute [rw] subjects
186
186
  # @return [Array<Subject>] subjects, keywords, classification codes, or key phrases describing the resource.
187
- array_node :subjects, 'subjects', 'subject', class: Subject, default_value: []
187
+ empty_filtering_array_node :subjects, 'subjects', 'subject', class: Subject, default_value: []
188
188
 
189
189
  # @!attribute [rw] fundingReferences
190
190
  # @return [Array<FundingReference>] information about financial support (funding) for the resource being registered.
@@ -232,7 +232,7 @@ module Datacite
232
232
 
233
233
  # @!attribute [rw] descriptions
234
234
  # @return [Array<Description>] all additional information that does not fit in any of the other categories.
235
- array_node :descriptions, 'descriptions', 'description', class: Description, default_value: []
235
+ empty_filtering_array_node :descriptions, 'descriptions', 'description', class: Description, default_value: []
236
236
 
237
237
  # @!attribute [rw] geo_locations
238
238
  # @return [Array<GeoLocations>] spatial region or named place where the data was gathered or about which the data is focused.
@@ -938,7 +938,8 @@ module Datacite
938
938
  def normalize(xml_str)
939
939
  r0 = xml_str
940
940
  r1 = r0.gsub(%r{&lt;br\s+/&gt;}, '<br/>') # entity-de-escape <br/> tags
941
- r2 = r1.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
941
+ # r2 = r1.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
942
+ r2 = r1
942
943
  r3 = r2.gsub(/<resource (xmlns:xsi="[^"]+")\s+(xsi:schemaLocation="[^"]+")>/, "<resource \\2 \\1 xmlns=\"http://datacite.org/schema/kernel-3\">") # fix missing namespace
943
944
  r4 = r3.gsub(%r{(<identifier[^>]+>)\s*([^ ]+)\s*(</identifier>)}, '\\1\\2\\3') # trim identifiers
944
945
  r5 = r4.gsub(%r{<([^>]+tude)>([0-9.-]+?)(0?)0+</\1>}, '<\\1>\\2\\3</\\1>') # strip trailing coordinate zeroes
@@ -966,21 +967,12 @@ module Datacite
966
967
  # - missing DOI
967
968
  # - empty tags
968
969
  # - nested contributors instead of contributorNames
969
- # - empty descriptions
970
- # TODO: handle empty descriptions like empty subjects
971
970
 
972
971
  r0 = xml_str
973
972
  r1 = r0.gsub(%r{<(?!br)[^>]+/>}, '') # remove empty tags
974
- r2 = r1.gsub(%r{<description[^/>]*/>}, '') # strip empty descriptions
975
- r3 = r2.gsub(%r{<description[^/>]*></description>}, '') # strip empty descriptions
976
- r4 = r3.gsub(%r{<([A-Za-z]*)[^>]*>\s*</\1>}, '') # remove empty tag pairs
977
- r5 = r4.gsub(%r{(<date[^>]*>)(\d{4})-(\d{4})(</date>)}, '\\1\\2/\\3\\4') # fix date ranges
978
- r6 = r5.gsub(%r{(<contributor[^>/]+>\s*)<contributor>([^<]+)</contributor>(\s*</contributor>)}, '\\1<contributorName>\\2</contributorName>\\3') # fix broken contributors
979
- # if r6.include?('&lt;br')
980
- # trace = [r0, r1, r2, r3, r4, r5, r6].map { |r| r.include?('&lt;br') }
981
- # puts trace
982
- # end
983
- r6
973
+ r2 = r1.gsub(%r{<([A-Za-z]*)[^>]*>\s*</\1>}, '') # remove empty tag pairs
974
+ r3 = r2.gsub(%r{(<date[^>]*>)(\d{4})-(\d{4})(</date>)}, '\\1\\2/\\3\\4') # fix date ranges
975
+ r3.gsub(%r{(<contributor[^>/]+>\s*)<contributor>([^<]+)</contributor>(\s*</contributor>)}, '\\1<contributorName>\\2</contributorName>\\3') # fix broken contributors
984
976
  end
985
977
 
986
978
  def it_round_trips(file:, mapping: :_default, fix_dash1: false) # rubocop:disable Metrics/AbcSize
@@ -1165,6 +1157,118 @@ module Datacite
1165
1157
  end
1166
1158
  end
1167
1159
  end
1160
+
1161
+ describe '#save_to_xml' do
1162
+
1163
+ attr_reader :resource
1164
+
1165
+ before(:each) do
1166
+ @identifier = Identifier.new(value: '10.14749/1407399495')
1167
+
1168
+ @creators = [
1169
+ Creator.new(
1170
+ name: 'Hedy Lamarr',
1171
+ identifier: NameIdentifier.new(scheme: 'ISNI', scheme_uri: URI('http://isni.org/'), value: '0000-0001-1690-159X'),
1172
+ affiliations: ['United Artists', 'Metro-Goldwyn-Mayer']
1173
+ ),
1174
+ Creator.new(
1175
+ name: 'Herschlag, Natalie',
1176
+ identifier: NameIdentifier.new(scheme: 'ISNI', scheme_uri: URI('http://isni.org/'), value: '0000-0001-0907-8419'),
1177
+ affiliations: ['Gaumont Buena Vista International', '20th Century Fox']
1178
+ )
1179
+ ]
1180
+
1181
+ @titles = [
1182
+ Title.new(value: 'An Account of a Very Odd Monstrous Calf', language: 'en-emodeng'),
1183
+ Title.new(type: TitleType::SUBTITLE, value: 'And a Contest between Two Artists about Optick Glasses, &c', language: 'en-emodeng')
1184
+ ]
1185
+
1186
+ @publisher = 'California Digital Library'
1187
+ @publication_year = 2015
1188
+
1189
+ @resource = Resource.new(
1190
+ identifier: identifier,
1191
+ creators: creators,
1192
+ titles: titles,
1193
+ publisher: publisher,
1194
+ publication_year: publication_year
1195
+ )
1196
+ end
1197
+
1198
+ it 'sets the DC4 namespace by default' do
1199
+ xml = resource.save_to_xml
1200
+ expect(xml).to be_a(REXML::Element)
1201
+ expect(xml.namespace).to eq(DATACITE_4_NAMESPACE.uri)
1202
+ end
1203
+
1204
+ it 'sets the DC3 namespace for the :datacite_3 mapping' do
1205
+ xml = resource.save_to_xml(mapping: :datacite_3)
1206
+ expect(xml).to be_a(REXML::Element)
1207
+ expect(xml.namespace).to eq(DATACITE_3_NAMESPACE.uri)
1208
+ end
1209
+
1210
+ it 'fails on nil identifiers' do
1211
+ resource.instance_variable_set(:@identifier, nil)
1212
+ expect { resource.save_to_xml }.to raise_error(XML::MappingError)
1213
+ end
1214
+ end
1215
+
1216
+ describe '#parse_xml' do
1217
+
1218
+ attr_reader :xml_text
1219
+
1220
+ before(:each) do
1221
+ @xml_text = "<resource xsi:schemaLocation='http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns='http://datacite.org/schema/kernel-4'>
1222
+ <identifier identifierType='DOI'>10.14749/1407399495</identifier>
1223
+ <creators>
1224
+ <creator>
1225
+ <creatorName>Hedy Lamarr</creatorName>
1226
+ <nameIdentifier nameIdentifierScheme='ISNI' schemeURI='http://isni.org/'>0000-0001-1690-159X</nameIdentifier>
1227
+ <affiliation>United Artists</affiliation>
1228
+ <affiliation>Metro-Goldwyn-Mayer</affiliation>
1229
+ </creator>
1230
+ <creator>
1231
+ <creatorName>Herschlag, Natalie</creatorName>
1232
+ <nameIdentifier nameIdentifierScheme='ISNI' schemeURI='http://isni.org/'>0000-0001-0907-8419</nameIdentifier>
1233
+ <affiliation>Gaumont Buena Vista International</affiliation>
1234
+ <affiliation>20th Century Fox</affiliation>
1235
+ </creator>
1236
+ </creators>
1237
+ <titles>
1238
+ <title xml:lang='en-emodeng'>An Account of a Very Odd Monstrous Calf</title>
1239
+ <title xml:lang='en-emodeng' titleType='Subtitle'>And a Contest between Two Artists about Optick Glasses, &amp;c</title>
1240
+ </titles>
1241
+ <publisher>California Digital Library</publisher>
1242
+ <publicationYear>2015</publicationYear>
1243
+ <subjects>
1244
+ <subject xml:lang='en-us' schemeURI='http://id.loc.gov/authorities/subjects' subjectScheme='LCSH'>Mammals--Embryology</subject>
1245
+ </subjects>
1246
+ <descriptions>
1247
+ <description xml:lang='en-us' descriptionType='Abstract'>foo</description>
1248
+ </descriptions>
1249
+ </resource>"
1250
+ end
1251
+
1252
+ it 'skips empty identifiers' do
1253
+ sketchy_xml = xml_text.gsub(%r{<identifier.*/identifier>}, '<identifier/>')
1254
+ resource = Resource.parse_xml(sketchy_xml)
1255
+ expect(resource).to be_a(Resource)
1256
+ end
1257
+
1258
+ it 'skips empty subjects' do
1259
+ sketchy_xml = xml_text.gsub(%r{>[^<]+</subject>}, '/>')
1260
+ resource = Resource.parse_xml(sketchy_xml)
1261
+ expect(resource).to be_a(Resource)
1262
+ expect(resource.subjects).to eq([])
1263
+ end
1264
+
1265
+ it 'skips empty descriptions' do
1266
+ sketchy_xml = xml_text.gsub(%r{>[^<]+</description>}, '/>')
1267
+ resource = Resource.parse_xml(sketchy_xml)
1268
+ expect(resource).to be_a(Resource)
1269
+ expect(resource.descriptions).to eq([])
1270
+ end
1271
+ end
1168
1272
  end
1169
1273
  end
1170
1274
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacite-mapping
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Moles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-04 00:00:00.000000000 Z
11
+ date: 2016-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: typesafe_enum
@@ -217,6 +217,7 @@ files:
217
217
  - lib/datacite/mapping/date.rb
218
218
  - lib/datacite/mapping/date_value.rb
219
219
  - lib/datacite/mapping/description.rb
220
+ - lib/datacite/mapping/empty_filtering_nodes.rb
220
221
  - lib/datacite/mapping/funding_reference.rb
221
222
  - lib/datacite/mapping/geo_location.rb
222
223
  - lib/datacite/mapping/geo_location_box.rb