bolognese 1.11.5 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +5 -4
  3. data/bolognese.gemspec +1 -0
  4. data/lib/bolognese/datacite_utils.rb +17 -9
  5. data/lib/bolognese/metadata.rb +1 -1
  6. data/lib/bolognese/metadata_utils.rb +1 -1
  7. data/lib/bolognese/readers/bibtex_reader.rb +2 -2
  8. data/lib/bolognese/readers/citeproc_reader.rb +1 -1
  9. data/lib/bolognese/readers/codemeta_reader.rb +2 -2
  10. data/lib/bolognese/readers/crosscite_reader.rb +4 -1
  11. data/lib/bolognese/readers/crossref_reader.rb +3 -4
  12. data/lib/bolognese/readers/datacite_json_reader.rb +4 -1
  13. data/lib/bolognese/readers/datacite_reader.rb +19 -3
  14. data/lib/bolognese/readers/npm_reader.rb +1 -1
  15. data/lib/bolognese/readers/ris_reader.rb +2 -2
  16. data/lib/bolognese/readers/schema_org_reader.rb +3 -2
  17. data/lib/bolognese/utils.rb +16 -4
  18. data/lib/bolognese/version.rb +1 -1
  19. data/lib/bolognese/writers/bibtex_writer.rb +1 -1
  20. data/lib/bolognese/writers/codemeta_writer.rb +1 -1
  21. data/lib/bolognese/writers/csv_writer.rb +1 -1
  22. data/lib/bolognese/writers/datacite_json_writer.rb +6 -1
  23. data/lib/bolognese/writers/jats_writer.rb +3 -3
  24. data/lib/bolognese/writers/ris_writer.rb +1 -1
  25. data/lib/bolognese/writers/schema_org_writer.rb +1 -1
  26. data/resources/kernel-4/include/datacite-relationType-v4.xsd +2 -0
  27. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +2 -0
  28. data/resources/kernel-4/metadata.xsd +11 -7
  29. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  30. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  31. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  32. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  33. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  34. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  35. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  36. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  37. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  38. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  39. data/resources/kernel-4.5/include/xml.xsd +286 -0
  40. data/resources/kernel-4.5/metadata.xsd +711 -0
  41. data/spec/fixtures/datacite-example-full-v4.5.xml +255 -0
  42. data/spec/fixtures/datacite-seriesinformation.xml +7 -2
  43. data/spec/readers/bibtex_reader_spec.rb +2 -0
  44. data/spec/readers/citeproc_reader_spec.rb +3 -0
  45. data/spec/readers/codemeta_reader_spec.rb +4 -4
  46. data/spec/readers/crosscite_reader_spec.rb +2 -0
  47. data/spec/readers/crossref_reader_spec.rb +41 -41
  48. data/spec/readers/datacite_json_reader_spec.rb +2 -0
  49. data/spec/readers/datacite_reader_spec.rb +73 -42
  50. data/spec/readers/npm_reader_spec.rb +2 -0
  51. data/spec/readers/ris_reader_spec.rb +3 -0
  52. data/spec/readers/schema_org_reader_spec.rb +11 -11
  53. data/spec/writers/crosscite_writer_spec.rb +7 -0
  54. data/spec/writers/datacite_json_writer_spec.rb +38 -0
  55. data/spec/writers/datacite_writer_spec.rb +84 -5
  56. data/spec/writers/jats_writer_spec.rb +8 -0
  57. data/spec/writers/rdf_xml_writer_spec.rb +7 -0
  58. data/spec/writers/turtle_writer_spec.rb +18 -0
  59. metadata +29 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56e889d65ba92e22edbe2dfa559d112281809b5bc3c6828c745af4b2f412e98d
4
- data.tar.gz: 5b487af779c0c50a9e87e9e50427fe4575efa68945204659380c7da056333545
3
+ metadata.gz: df7445354dfea2520078a43cc8eec9128c91c67759e4d4ffe0378223eba52750
4
+ data.tar.gz: 6673463c01a3a114bdeff5e8ed2f6af7c25f565d0d1e39c35c6e1d1a5b7c5d57
5
5
  SHA512:
6
- metadata.gz: 613d6c58f9cb704d430012e4640d69e4abce8ed7e411b9d4ce8263c2688f9ee26a8c94039ef1803e5c3af8072682716a752f09bf54888c3215f9c0b74f897d96
7
- data.tar.gz: f6ac57bbbc80d5091b72799bba17c8cbcd301e872198a82e705d2642d878a15fe0c2447a2ace3da8d4bcca5eb14ec14bebb7462b9cecf0739a1e2a7e1af9cf27
6
+ metadata.gz: e7ea1cab22c2a8e6e26979547c6a8a87f83c2e6bb346bad254154087d70534735b53d4fa89798db7688ddecf3945cf11e23ddcedda295154304c3f7ff55ca24a
7
+ data.tar.gz: 29aee296393f1338d5ab048a10abd4ad760a82dff8abe3a2b5d9a006149b34e1251ab8b883a7205b1eb25f0ded55fb1b476830d6214cff904b26cec81fd8a469
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (1.11.5)
4
+ bolognese (2.0.1)
5
5
  activesupport (>= 4.2.5)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (>= 5.1.0)
@@ -95,10 +95,10 @@ GEM
95
95
  concurrent-ruby (~> 1.0)
96
96
  iso8601 (0.9.1)
97
97
  json (2.6.2)
98
- json-canonicalization (0.3.2)
99
- json-ld (3.2.5)
98
+ json-canonicalization (0.3.1)
99
+ json-ld (3.2.4)
100
100
  htmlentities (~> 4.3)
101
- json-canonicalization (~> 0.3, >= 0.3.2)
101
+ json-canonicalization (~> 0.3)
102
102
  link_header (~> 0.0, >= 0.0.8)
103
103
  multi_json (~> 1.15)
104
104
  rack (>= 2.2, < 4)
@@ -225,6 +225,7 @@ DEPENDENCIES
225
225
  bundler (>= 1.0)
226
226
  byebug
227
227
  hashdiff (>= 1.0.0.beta1, < 2.0.0)
228
+ json-canonicalization (= 0.3.1)
228
229
  rack-test (~> 0)
229
230
  rake (~> 12.0)
230
231
  rspec (~> 3.4)
data/bolognese.gemspec CHANGED
@@ -50,6 +50,7 @@ Gem::Specification.new do |s|
50
50
  s.add_development_dependency 'simplecov', '0.17.1'
51
51
  s.add_development_dependency 'hashdiff', ['>= 1.0.0.beta1', '< 2.0.0']
52
52
  s.add_development_dependency 'byebug'
53
+ s.add_development_dependency 'json-canonicalization', '0.3.1'
53
54
 
54
55
  s.require_paths = ["lib"]
55
56
  s.files = `git ls-files`.split($/)
@@ -106,9 +106,19 @@ module Bolognese
106
106
  end
107
107
  end
108
108
  end
109
-
109
+
110
110
  def insert_publisher(xml)
111
- xml.publisher(publisher || container && container["title"])
111
+ if publisher.is_a?(Hash)
112
+ attributes = {
113
+ 'publisherIdentifier' => publisher["publisherIdentifier"],
114
+ 'publisherIdentifierScheme' => publisher["publisherIdentifierScheme"],
115
+ 'schemeURI' => publisher["schemeUri"],
116
+ "xml:lang" => publisher["lang"]
117
+ }.compact
118
+ xml.publisher(publisher["name"] || container && container["title"], attributes)
119
+ else
120
+ xml.publisher(publisher || container && container["title"])
121
+ end
112
122
  end
113
123
 
114
124
  def insert_publication_year(xml)
@@ -375,13 +385,11 @@ module Bolognese
375
385
  end
376
386
  end
377
387
  if geo_location["geoLocationPolygon"]
378
- geo_location["geoLocationPolygon"].each do |geo_location_polygon|
379
- xml.geoLocationPolygon do
380
- geo_location_polygon.each do |polygon_point|
381
- xml.polygonPoint do
382
- xml.pointLatitude(polygon_point.dig("polygonPoint", "pointLatitude"))
383
- xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
384
- end
388
+ xml.geoLocationPolygon do
389
+ Array.wrap(geo_location["geoLocationPolygon"]).each do |polygon_point|
390
+ xml.polygonPoint do
391
+ xml.pointLatitude(polygon_point.dig("polygonPoint", "pointLatitude"))
392
+ xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
385
393
  end
386
394
  end
387
395
  end
@@ -217,7 +217,7 @@ module Bolognese
217
217
  end
218
218
 
219
219
  def publisher
220
- @publisher ||= meta.fetch("publisher", nil)
220
+ @publisher ||= normalize_publisher(meta["publisher"]) if meta.fetch("publisher", nil).present?
221
221
  end
222
222
 
223
223
  def identifiers
@@ -161,7 +161,7 @@ module Bolognese
161
161
  "volume" => container.to_h["volume"],
162
162
  "issue" => container.to_h["issue"],
163
163
  "page" => page,
164
- "publisher" => publisher,
164
+ "publisher" => publisher["name"],
165
165
  "title" => parse_attributes(titles, content: "title", first: true),
166
166
  "URL" => url,
167
167
  "copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
@@ -86,11 +86,11 @@ module Bolognese
86
86
  "titles" => meta.try(:title).present? ? [{ "title" => meta.try(:title).to_s }] : [],
87
87
  "creators" => creators,
88
88
  "container" => container,
89
- "publisher" => meta.try(:publisher).to_s.presence,
89
+ "publisher" => meta.try(:publisher).present? ? { "name" => meta.publisher.to_s } : nil,
90
90
  "related_identifiers" => related_identifiers,
91
91
  "dates" => dates,
92
92
  "publication_year" => publication_year,
93
- "descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
93
+ "descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s, new_line: true).presence, "descriptionType" => "Abstract" }] : [],
94
94
  "rights_list" => rights_list,
95
95
  "state" => state
96
96
  }.merge(read_options)
@@ -107,7 +107,7 @@ module Bolognese
107
107
  "related_identifiers" => related_identifiers,
108
108
  "dates" => dates,
109
109
  "publication_year" => publication_year,
110
- "descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
110
+ "descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract"), new_line: true), "descriptionType" => "Abstract" }] : [],
111
111
  "rights_list" => rights_list,
112
112
  "version_info" => meta.fetch("version", nil),
113
113
  "subjects" => subjects,
@@ -43,7 +43,7 @@ module Bolognese
43
43
  dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
44
44
  dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
45
45
  publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
46
- publisher = meta.fetch("publisher", nil)
46
+ publisher = { "name" => meta.fetch("publisher", nil) } if meta.fetch("publisher", nil).present?
47
47
  state = meta.present? || read_options.present? ? "findable" : "not_found"
48
48
  schema_org = meta.fetch("@type", nil)
49
49
  types = {
@@ -76,7 +76,7 @@ module Bolognese
76
76
  #{}"is_part_of" => is_part_of,
77
77
  "dates" => dates,
78
78
  "publication_year" => publication_year,
79
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
79
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
80
80
  "rights_list" => rights_list,
81
81
  "version_info" => meta.fetch("version", nil),
82
82
  "subjects" => subjects,
@@ -7,7 +7,10 @@ module Bolognese
7
7
  errors = jsonlint(string)
8
8
  return { "errors" => errors } if errors.present?
9
9
 
10
- string.present? ? Maremma.from_json(string) : {}
10
+ crosscite = string.present? ? Maremma.from_json(string) : {}
11
+ crosscite["publisher"] = normalize_publisher(crosscite["publisher"]) if crosscite.fetch("publisher", nil).present?
12
+
13
+ crosscite
11
14
  end
12
15
  end
13
16
  end
@@ -40,8 +40,7 @@ module Bolognese
40
40
  journal_metadata = nil
41
41
  journal_issue = {}
42
42
  journal_metadata = nil
43
- publisher = query.dig("crm_item", 0)
44
- publisher = nil unless publisher.is_a?(String)
43
+ publisher = query.dig("crm_item", 0).is_a?(String) ? { "name" => query.dig("crm_item", 0) } : nil
45
44
 
46
45
  case model
47
46
  when "book"
@@ -232,11 +231,11 @@ module Bolognese
232
231
 
233
232
  def crossref_description(bibliographic_metadata)
234
233
  abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
235
- { "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p')) }.compact
234
+ { "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p'), new_line: true) }.compact
236
235
  end
237
236
 
238
237
  description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
239
- { "descriptionType" => "Other", "description" => sanitize(parse_attributes(r)) }.compact
238
+ { "descriptionType" => "Other", "description" => sanitize(parse_attributes(r), new_line: true) }.compact
240
239
  end
241
240
 
242
241
  (abstract + description)
@@ -7,7 +7,10 @@ module Bolognese
7
7
  errors = jsonlint(string)
8
8
  return { "errors" => errors } if errors.present?
9
9
 
10
- string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
10
+ datacite_json = string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
11
+ datacite_json["publisher"] = normalize_publisher(datacite_json["publisher"]) if datacite_json.fetch("publisher", nil).present?
12
+
13
+ datacite_json
11
14
  end
12
15
  end
13
16
  end
@@ -94,13 +94,29 @@ module Bolognese
94
94
 
95
95
  titles = get_titles(meta)
96
96
 
97
+ publisher = Array.wrap(meta.dig("publisher")).map do |r|
98
+ if r.blank?
99
+ nil
100
+ elsif r.is_a?(String)
101
+ { "name" => r.strip }
102
+ elsif r.is_a?(Hash)
103
+ {
104
+ "name" => r["__content__"].strip,
105
+ "publisherIdentifier" => r["publisherIdentifierScheme"] == "ROR" ? normalize_ror(r["publisherIdentifier"]) : r["publisherIdentifier"],
106
+ "publisherIdentifierScheme" => r["publisherIdentifierScheme"],
107
+ "schemeUri" => r["schemeURI"],
108
+ "lang" => r["lang"],
109
+ }.compact
110
+ end
111
+ end.compact.first
112
+
97
113
  descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
98
114
  if r.blank?
99
115
  nil
100
116
  elsif r.is_a?(String)
101
- { "description" => sanitize(r), "descriptionType" => "Abstract" }
117
+ { "description" => sanitize(r, new_line: true), "descriptionType" => "Abstract" }
102
118
  elsif r.is_a?(Hash)
103
- { "description" => sanitize(r["__content__"]), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
119
+ { "description" => sanitize(r["__content__"], new_line: true), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
104
120
  end
105
121
  end.compact
106
122
  rights_list = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
@@ -287,7 +303,7 @@ module Bolognese
287
303
  "creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
288
304
  "contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
289
305
  "container" => set_container(meta),
290
- "publisher" => parse_attributes(meta.fetch("publisher", nil), first: true).to_s.strip.presence,
306
+ "publisher" => publisher,
291
307
  "agency" => "datacite",
292
308
  "funding_references" => funding_references,
293
309
  "dates" => dates,
@@ -103,7 +103,7 @@ module Bolognese
103
103
  #"related_identifiers" => related_identifiers,
104
104
  #"dates" => dates,
105
105
  #"publication_year" => publication_year,
106
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
106
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : [],
107
107
  "rights_list" => rights_list,
108
108
  "version_info" => meta.fetch("version", nil),
109
109
  "subjects" => subjects
@@ -89,12 +89,12 @@ module Bolognese
89
89
  "url" => meta.fetch("UR", nil),
90
90
  "titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
91
91
  "creators" => get_authors(author),
92
- "publisher" => meta.fetch("PB", "(:unav)"),
92
+ "publisher" => { "name" => meta.fetch("PB", "(:unav)") },
93
93
  "container" => container,
94
94
  "related_identifiers" => related_identifiers,
95
95
  "dates" => dates,
96
96
  "publication_year" => publication_year,
97
- "descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
97
+ "descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB"), new_line: true), "descriptionType" => "Abstract" }] : nil,
98
98
  "subjects" => subjects,
99
99
  "language" => meta.fetch("LA", nil),
100
100
  "state" => state
@@ -74,7 +74,8 @@ module Bolognese
74
74
  creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
75
75
  end
76
76
  contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
77
- publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
77
+ publisher_name = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
78
+ publisher = { "name" => publisher_name } if publisher_name.present?
78
79
 
79
80
  ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
80
81
  container = if meta.fetch(ct, nil).present?
@@ -180,7 +181,7 @@ module Bolognese
180
181
  "related_identifiers" => related_identifiers,
181
182
  "publication_year" => publication_year,
182
183
  "dates" => dates,
183
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
184
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
184
185
  "rights_list" => rights_list,
185
186
  "version_info" => meta.fetch("version", nil).to_s.presence,
186
187
  "subjects" => subjects,
@@ -773,6 +773,14 @@ module Bolognese
773
773
  nil
774
774
  end
775
775
 
776
+ def normalize_publisher(publisher)
777
+ if publisher.respond_to?(:to_hash)
778
+ publisher
779
+ elsif publisher.respond_to?(:to_str)
780
+ { "name" => publisher }
781
+ end
782
+ end
783
+
776
784
  def to_datacite_json(element, options={})
777
785
  a = Array.wrap(element).map do |e|
778
786
  e.inject({}) {|h, (k,v)| h[k.dasherize] = v; h }
@@ -1057,12 +1065,16 @@ module Bolognese
1057
1065
  custom_scrubber = Bolognese::WhitelistScrubber.new(options)
1058
1066
 
1059
1067
  if text.is_a?(String)
1060
- # remove excessive internal whitespace with squish
1061
- Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
1068
+ if options[:new_line]
1069
+ # Remove multiple spaces, tabs, and other whitespace characters while preserving single spaces and new lines
1070
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/[ \t]+/, ' ').strip
1071
+ else
1072
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
1073
+ end
1062
1074
  elsif text.is_a?(Hash)
1063
- sanitize(text.fetch(content, nil))
1075
+ sanitize(text.fetch(content, nil), new_line: options[:new_line])
1064
1076
  elsif text.is_a?(Array)
1065
- a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil)) : sanitize(e) }.uniq
1077
+ a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil), new_line: options[:new_line]) : sanitize(e, new_line: options[:new_line]) }.uniq
1066
1078
  a = options[:first] ? a.first : a.unwrap
1067
1079
  else
1068
1080
  nil
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "1.11.5"
2
+ VERSION = "2.0.1"
3
3
  end
@@ -21,7 +21,7 @@ module Bolognese
21
21
  volume: container.to_h["volume"],
22
22
  issue: container.to_h["issue"],
23
23
  pages: pages,
24
- publisher: publisher,
24
+ publisher: publisher["name"],
25
25
  year: publication_year,
26
26
  copyright: Array.wrap(rights_list).map { |l| l["rights"] }.first,
27
27
  }.compact
@@ -19,7 +19,7 @@ module Bolognese
19
19
  "tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil,
20
20
  "datePublished" => get_date(dates, "Issued") || publication_year,
21
21
  "dateModified" => get_date(dates, "Updated"),
22
- "publisher" => publisher,
22
+ "publisher" => publisher["name"],
23
23
  "license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
24
24
  }.compact
25
25
  JSON.pretty_generate hsh.presence
@@ -15,7 +15,7 @@ module Bolognese
15
15
  resource_type: types["resourceType"],
16
16
  title: parse_attributes(titles, content: "title", first: true),
17
17
  author: authors_as_string(creators),
18
- publisher: publisher,
18
+ publisher: publisher["name"],
19
19
  publication_year: publication_year
20
20
  }.values
21
21
 
@@ -4,7 +4,12 @@ module Bolognese
4
4
  module Writers
5
5
  module DataciteJsonWriter
6
6
  def datacite_json
7
- JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) } if crosscite_hsh.present?
7
+ # Remove the following change for the schema 4.5 release
8
+ if crosscite_hsh.present?
9
+ datacite_json_hsh = crosscite_hsh
10
+ datacite_json_hsh['publisher'] = self.publisher['name'] if self.publisher&.respond_to?(:to_hash) && self.publisher.has_key?('name') && !self.publisher['name'].blank?
11
+ JSON.pretty_generate datacite_json_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
12
+ end
8
13
  end
9
14
  end
10
15
  end
@@ -77,16 +77,16 @@ module Bolognese
77
77
 
78
78
  def insert_source(xml)
79
79
  if is_chapter?
80
- xml.source(publisher)
80
+ xml.source(publisher["name"])
81
81
  elsif is_article? || is_data?
82
- xml.source(container && container["title"] || publisher)
82
+ xml.source(container && container["title"] || publisher["name"])
83
83
  else
84
84
  xml.source(parse_attributes(titles, content: "title", first: true))
85
85
  end
86
86
  end
87
87
 
88
88
  def insert_publisher_name(xml)
89
- xml.send("publisher-name", publisher)
89
+ xml.send("publisher-name", publisher["name"])
90
90
  end
91
91
 
92
92
  def insert_publication_date(xml)
@@ -14,7 +14,7 @@ module Bolognese
14
14
  "AB" => parse_attributes(abstract_description, content: "description", first: true),
15
15
  "KW" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
16
16
  "PY" => publication_year,
17
- "PB" => publisher,
17
+ "PB" => publisher["name"],
18
18
  "LA" => language,
19
19
  "VL" => container.to_h["volume"],
20
20
  "IS" => container.to_h["issue"],
@@ -37,7 +37,7 @@ module Bolognese
37
37
  "schemaVersion" => schema_version,
38
38
  "periodical" => types.present? ? ((types["schemaOrg"] != "Dataset") && container.present? ? to_schema_org(container) : nil) : nil,
39
39
  "includedInDataCatalog" => types.present? ? ((types["schemaOrg"] == "Dataset") && container.present? ? to_schema_org_container(container, type: "Dataset") : nil) : nil,
40
- "publisher" => publisher.present? ? { "@type" => "Organization", "name" => publisher } : nil,
40
+ "publisher" => publisher.present? ? { "@type" => "Organization", "name" => publisher["name"] } : nil,
41
41
  "funder" => to_schema_org_funder(funding_references),
42
42
  "provider" => agency.present? ? { "@type" => "Organization", "name" => agency } : nil
43
43
  }.compact.presence
@@ -46,6 +46,8 @@
46
46
  <xs:enumeration value="IsRequiredBy" />
47
47
  <xs:enumeration value="Obsoletes" />
48
48
  <xs:enumeration value="IsObsoletedBy" />
49
+ <xs:enumeration value="Collects" />
50
+ <xs:enumeration value="IsCollectedBy" />
49
51
  </xs:restriction>
50
52
  </xs:simpleType>
51
53
  </xs:schema>
@@ -22,6 +22,7 @@
22
22
  <xs:enumeration value="Dissertation" />
23
23
  <xs:enumeration value="Event" />
24
24
  <xs:enumeration value="Image" />
25
+ <xs:enumeration value="Instrument" />
25
26
  <xs:enumeration value="InteractiveResource" />
26
27
  <xs:enumeration value="Journal" />
27
28
  <xs:enumeration value="JournalArticle" />
@@ -35,6 +36,7 @@
35
36
  <xs:enumeration value="Software" />
36
37
  <xs:enumeration value="Sound" />
37
38
  <xs:enumeration value="Standard" />
39
+ <xs:enumeration value="StudyRegistration" />
38
40
  <xs:enumeration value="Text" />
39
41
  <xs:enumeration value="Workflow" />
40
42
  <xs:enumeration value="Other" />
@@ -2,7 +2,7 @@
2
2
  <!-- Revision history
3
3
  2010-08-26 Complete revision according to new common specification by the metadata work group after review. AJH, DTIC
4
4
  2010-11-17 Revised to current state of kernel review, FZ, TIB
5
- 2011-01-17 Complete revsion after community review. FZ, TIB
5
+ 2011-01-17 Complete revision after community review. FZ, TIB
6
6
  2011-03-17 Release of v2.1: added a namespace; mandatory properties got minLength; changes in the definitions of relationTypes IsDocumentedBy/Documents and isCompiledBy/Compiles; changes type of property "Date" from xs:date to xs:string. FZ, TIB
7
7
  2011-06-27 v2.2: namespace: kernel-2.2, additions to controlled lists "resourceType", "contributorType", "relatedIdentifierType", and "descriptionType". Removal of intermediate include-files.
8
8
  2013-07-24 v3.0: namespace: kernel-3.0; delete LastMetadataUpdate & MetadateVersionNumber; additions to controlled lists "contributorType", "dateType", "descriptionType", "relationType", "relatedIdentifierType" & "resourceType"; deletion of "StartDate" & "EndDate" from list "dateType" and "Film" from "resourceType"; allow arbitrary order of elements; allow optional wrapper elements to be empty; include xml:lang attribute for title, subject & description; include attribute schemeURI for nameIdentifier of creator, contributor & subject; added new attributes "relatedMetadataScheme", "schemeURI" & "schemeType" to relatedIdentifier; included new property "geoLocation"
@@ -12,7 +12,8 @@
12
12
  2018-09-08 v4.1.1 Make schema 4.1 backwards compatible to 4.0 by allowing geolocation elements in any order
13
13
  2019-02-14 v4.2: Addition of dateType value "Withdrawn", relationType values "Obsoletes", "isObsoletedBy", addition of new subproperties for Rights: rightsIdentifier, rightsIdentifierScheme, schemeURI, addition of the XML language attribute to the properties Creator, Contributor and Publisher for organizational names, don't check format of DOI
14
14
  2019-07-13 v4.3: Addition of new subproperties for Affiliation: "affiliationIdentifier", "affiliationIdentifierScheme", "schemeURI", addition of new sub-property for funderIdentifier: "schemeURI", addition of new funderIdentifierScheme: "ROR", added documentation for nameIdentifier
15
- 2021-03-08 v4.4: Addition of new property relatedItem, relationType value "isPublishedIn", subject subproperty "classificationCode", controlled list "numberType", additional 13 properties for controlled list "resourceType" -->
15
+ 2021-03-08 v4.4: Addition of new property relatedItem, relationType value "isPublishedIn", subject subproperty "classificationCode", controlled list "numberType", additional 13 properties for controlled list "resourceType"
16
+ 2023-??-?? v4.5: Addition of new subproperties for publisher: "publisherIdentifier", "publisherIdentifierScheme", and "schemeURI"; addition of new resourceTypeGeneral values "Instrument" and "StudyRegistration"; addition of new relationType values "Collects" and "IsCollectedBy".-->
16
17
  <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified" xml:lang="EN">
17
18
  <xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="include/xml.xsd" />
18
19
  <xs:include schemaLocation="include/datacite-titleType-v4.xsd" />
@@ -105,6 +106,9 @@
105
106
  <xs:complexType>
106
107
  <xs:simpleContent>
107
108
  <xs:extension base="nonemptycontentStringType">
109
+ <xs:attribute name="publisherIdentifier" type="xs:string" use="optional" />
110
+ <xs:attribute name="publisherIdentifierScheme" type="xs:string" use="optional" />
111
+ <xs:attribute name="schemeURI" type="xs:anyURI" use="optional" />
108
112
  <xs:attribute ref="xml:lang" />
109
113
  </xs:extension>
110
114
  </xs:simpleContent>
@@ -161,7 +165,7 @@
161
165
  <xs:sequence>
162
166
  <xs:element name="contributor" minOccurs="0" maxOccurs="unbounded">
163
167
  <xs:annotation>
164
- <xs:documentation>The institution or person responsible for collecting, creating, or otherwise contributing to the developement of the dataset.</xs:documentation>
168
+ <xs:documentation>The institution or person responsible for collecting, creating, or otherwise contributing to the development of the dataset.</xs:documentation>
165
169
  <xs:documentation>The personal name format should be: Family, Given.</xs:documentation>
166
170
  </xs:annotation>
167
171
  <xs:complexType>
@@ -524,7 +528,7 @@ Use the complete title of a license and include version information if applicabl
524
528
  </xs:element>
525
529
  <xs:element name="number" minOccurs="0">
526
530
  <xs:annotation>
527
- <xs:documentation>Issue number or name of the related item.</xs:documentation>
531
+ <xs:documentation>Number of the related item e.g. report number of article number.</xs:documentation>
528
532
  </xs:annotation>
529
533
  <xs:complexType>
530
534
  <xs:simpleContent>
@@ -641,19 +645,19 @@ Use the complete title of a license and include version information if applicabl
641
645
  <xs:pattern value="\d{2}(\d{2}|\?\?|\d(\d|\?))(-(\d{2}|\?\?))?~?\??" />
642
646
  <!--
643
647
  The following pattern is for yearMonthDay - yyyymmdd, where 'dd' may be '??' so '200412??' means "some day during the month of 12/2004".
644
- The whole string may be followed by '?' or '~' to mean "questionable" or "approximate". Hypens are not allowed for this pattern.
648
+ The whole string may be followed by '?' or '~' to mean "questionable" or "approximate". Hyphens are not allowed for this pattern.
645
649
  -->
646
650
  <xs:pattern value="\d{6}(\d{2}|\?\?)~?\??" />
647
651
  <!--
648
652
 
649
653
  The following pattern is for date and time with T separator:'yyyymmddThhmmss'.
650
- Hypens in date and colons in time not allowed for this pattern.
654
+ Hyphens in date and colons in time not allowed for this pattern.
651
655
  -->
652
656
  <xs:pattern value="\d{8}T\d{6}" />
653
657
  <!--
654
658
 
655
659
  The following pattern is for a date range. in years: 'yyyy/yyyy'; or year/month: yyyy-mm/yyyy-mm, or year/month/day: yyyy-mm-dd/yyyy-mm-dd. Beginning or end of range value may be 'unknown'. End of range value may be 'open'.
656
- Hypens mandatory when month is present.
660
+ Hyphens mandatory when month is present.
657
661
  -->
658
662
  <xs:pattern value="((-)?(\d{4}(-\d{2})?(-\d{2})?)|unknown)/((-)?(\d{4}(-\d{2})?(-\d{2})?)|unknown|open)" />
659
663
  </xs:restriction>
@@ -0,0 +1,35 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
3
+ 2013-05 v3.0: Addition of ID to simpleType element, added values "ResearchGroup" & "Other"
4
+ 2014-08-20 v3.1: Addition of value "DataCurator"
5
+ 2015-05-14 v4.0 dropped value "Funder", use new "funderReference" -->
6
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
7
+ <xs:simpleType name="contributorType" id="contributorType">
8
+ <xs:annotation>
9
+ <xs:documentation>The type of contributor of the resource.</xs:documentation>
10
+ </xs:annotation>
11
+ <xs:restriction base="xs:string">
12
+ <xs:enumeration value="ContactPerson" />
13
+ <xs:enumeration value="DataCollector" />
14
+ <xs:enumeration value="DataCurator" />
15
+ <xs:enumeration value="DataManager" />
16
+ <xs:enumeration value="Distributor" />
17
+ <xs:enumeration value="Editor" />
18
+ <xs:enumeration value="HostingInstitution" />
19
+ <xs:enumeration value="Other" />
20
+ <xs:enumeration value="Producer" />
21
+ <xs:enumeration value="ProjectLeader" />
22
+ <xs:enumeration value="ProjectManager" />
23
+ <xs:enumeration value="ProjectMember" />
24
+ <xs:enumeration value="RegistrationAgency" />
25
+ <xs:enumeration value="RegistrationAuthority" />
26
+ <xs:enumeration value="RelatedPerson" />
27
+ <xs:enumeration value="ResearchGroup" />
28
+ <xs:enumeration value="RightsHolder" />
29
+ <xs:enumeration value="Researcher" />
30
+ <xs:enumeration value="Sponsor" />
31
+ <xs:enumeration value="Supervisor" />
32
+ <xs:enumeration value="WorkPackageLeader" />
33
+ </xs:restriction>
34
+ </xs:simpleType>
35
+ </xs:schema>
@@ -0,0 +1,25 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
3
+ 2013-05 v3.0: Addition of ID to simpleType element; addition of value "Collected"; deleted "StartDate" & "EndDate"
4
+ 2017-10-23 v4.1: Addition of value "Other"
5
+ 2019-02-14 v4.2: Addition of value "Withdrawn"-->
6
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
7
+ <xs:simpleType name="dateType" id="dateType">
8
+ <xs:annotation>
9
+ <xs:documentation>The type of date. Use RKMS‐ISO8601 standard for depicting date ranges.To indicate the end of an embargo period, use Available. To indicate the start of an embargo period, use Submitted or Accepted, as appropriate.</xs:documentation>
10
+ </xs:annotation>
11
+ <xs:restriction base="xs:string">
12
+ <xs:enumeration value="Accepted" />
13
+ <xs:enumeration value="Available" />
14
+ <xs:enumeration value="Collected" />
15
+ <xs:enumeration value="Copyrighted" />
16
+ <xs:enumeration value="Created" />
17
+ <xs:enumeration value="Issued" />
18
+ <xs:enumeration value="Other" />
19
+ <xs:enumeration value="Submitted" />
20
+ <xs:enumeration value="Updated" />
21
+ <xs:enumeration value="Valid" />
22
+ <xs:enumeration value="Withdrawn" />
23
+ </xs:restriction>
24
+ </xs:simpleType>
25
+ </xs:schema>
@@ -0,0 +1,19 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
3
+ 2013-05 v3.0: Addition of ID to simpleType element, addition of value "Methods"
4
+ 2015-02-12 v4.0: Addition of value "TechnicalInfo"-->
5
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
6
+ <xs:simpleType name="descriptionType" id="descriptionType">
7
+ <xs:annotation>
8
+ <xs:documentation>The type of the description.</xs:documentation>
9
+ </xs:annotation>
10
+ <xs:restriction base="xs:string">
11
+ <xs:enumeration value="Abstract" />
12
+ <xs:enumeration value="Methods" />
13
+ <xs:enumeration value="SeriesInformation" />
14
+ <xs:enumeration value="TableOfContents" />
15
+ <xs:enumeration value="TechnicalInfo" />
16
+ <xs:enumeration value="Other" />
17
+ </xs:restriction>
18
+ </xs:simpleType>
19
+ </xs:schema>