bolognese 1.11.3 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +1 -1
  3. data/.github/workflows/release.yml +2 -2
  4. data/Gemfile.lock +15 -12
  5. data/bolognese.gemspec +5 -3
  6. data/lib/bolognese/author_utils.rb +4 -3
  7. data/lib/bolognese/datacite_utils.rb +18 -10
  8. data/lib/bolognese/metadata.rb +1 -6
  9. data/lib/bolognese/metadata_utils.rb +2 -2
  10. data/lib/bolognese/readers/bibtex_reader.rb +2 -2
  11. data/lib/bolognese/readers/citeproc_reader.rb +1 -1
  12. data/lib/bolognese/readers/codemeta_reader.rb +2 -2
  13. data/lib/bolognese/readers/crosscite_reader.rb +4 -1
  14. data/lib/bolognese/readers/crossref_reader.rb +3 -4
  15. data/lib/bolognese/readers/datacite_json_reader.rb +4 -1
  16. data/lib/bolognese/readers/datacite_reader.rb +19 -3
  17. data/lib/bolognese/readers/npm_reader.rb +1 -1
  18. data/lib/bolognese/readers/ris_reader.rb +2 -2
  19. data/lib/bolognese/readers/schema_org_reader.rb +6 -2
  20. data/lib/bolognese/utils.rb +23 -6
  21. data/lib/bolognese/version.rb +1 -1
  22. data/lib/bolognese/writers/bibtex_writer.rb +1 -1
  23. data/lib/bolognese/writers/codemeta_writer.rb +1 -1
  24. data/lib/bolognese/writers/csv_writer.rb +1 -1
  25. data/lib/bolognese/writers/datacite_json_writer.rb +3 -1
  26. data/lib/bolognese/writers/jats_writer.rb +6 -3
  27. data/lib/bolognese/writers/ris_writer.rb +1 -1
  28. data/lib/bolognese/writers/schema_org_writer.rb +1 -1
  29. data/resources/kernel-4/include/datacite-relationType-v4.xsd +2 -0
  30. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +2 -0
  31. data/resources/kernel-4/metadata.xsd +11 -7
  32. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  33. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  34. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  35. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  36. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  37. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  38. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  39. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  40. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  41. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  42. data/resources/kernel-4.5/include/xml.xsd +286 -0
  43. data/resources/kernel-4.5/metadata.xsd +711 -0
  44. data/spec/author_utils_spec.rb +33 -4
  45. data/spec/datacite_utils_spec.rb +5 -1
  46. data/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml +18 -2
  47. data/spec/fixtures/datacite-example-full-v4.5.xml +255 -0
  48. data/spec/fixtures/datacite-seriesinformation.xml +7 -2
  49. data/spec/fixtures/datacite-xml-lang.xml +1 -1
  50. data/spec/fixtures/datacite_blank_name_identifier.xml +22 -0
  51. data/spec/fixtures/datacite_blank_publisher.xml +18 -0
  52. data/spec/fixtures/datacite_journal_article.xml +64 -0
  53. data/spec/fixtures/schema_org.json +1 -0
  54. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_subjects/insert.yml +32 -14
  55. data/spec/readers/bibtex_reader_spec.rb +2 -0
  56. data/spec/readers/citeproc_reader_spec.rb +3 -0
  57. data/spec/readers/codemeta_reader_spec.rb +4 -4
  58. data/spec/readers/crosscite_reader_spec.rb +2 -0
  59. data/spec/readers/crossref_reader_spec.rb +41 -41
  60. data/spec/readers/datacite_json_reader_spec.rb +2 -0
  61. data/spec/readers/datacite_reader_spec.rb +123 -42
  62. data/spec/readers/npm_reader_spec.rb +2 -0
  63. data/spec/readers/ris_reader_spec.rb +3 -0
  64. data/spec/readers/schema_org_reader_spec.rb +11 -11
  65. data/spec/spec_helper.rb +1 -0
  66. data/spec/writers/citation_writer_spec.rb +9 -0
  67. data/spec/writers/crosscite_writer_spec.rb +7 -0
  68. data/spec/writers/datacite_json_writer_spec.rb +22 -0
  69. data/spec/writers/datacite_writer_spec.rb +84 -5
  70. data/spec/writers/jats_writer_spec.rb +15 -0
  71. data/spec/writers/rdf_xml_writer_spec.rb +7 -0
  72. data/spec/writers/schema_org_writer_spec.rb +13 -0
  73. data/spec/writers/turtle_writer_spec.rb +18 -0
  74. metadata +60 -22
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 84e06f3ca24650902cd1d96f777b2103a242fc44346d9995667961f090c1e300
4
- data.tar.gz: 047d14ab54b8a9aa0569bdaeb90806a8a835809f399693c2b29c0a97e9eb0658
3
+ metadata.gz: ce52f2eb08396d0e56bdccf06702901fe7c04f1925cacafc71f315a1edda91c3
4
+ data.tar.gz: 29fb5a1d987a95220bbc7a3912aa24735f54924f47c8a74d61d1f51e25b75314
5
5
  SHA512:
6
- metadata.gz: 2547f9e0c41915163269e3802c32c2f73abf6977987748eb70eec7d324585487585a3952a8874bb40e35e4d65d748e6968c0d1f7d92f76992ee3068b2cd718dd
7
- data.tar.gz: 3beb48ca299e9b0a1ab95e7e74fabd99e127c374a513b9b775644695e12ac489f3d35851bc1553bc01a593c2ce3c84bc37e8f5e2bd452f6ac561d431edd4f669
6
+ metadata.gz: 4d528d2597900dfb90410504833a83e79b1bda1974d9f77102071cecd3f71c3e1f899d8e59abe24cf1110469807cc0c6382c50a5a62571ceceb9278e7cfa37b7
7
+ data.tar.gz: 341d76ed37284899ea1e74e331026ba6d042d92037a260d7591b9a48b38b7c8a0b06a6908e97917b2c345c5be8b3ac6475cc63a5478891589d624f248137b315
@@ -7,7 +7,7 @@ jobs:
7
7
  strategy:
8
8
  fail-fast: false
9
9
  matrix:
10
- ruby: ["2.6", "2.7", "3.0", "3.1"]
10
+ ruby: ["3.0", "3.1", "3.2", "3.3"]
11
11
  runs-on: ubuntu-latest
12
12
  steps:
13
13
  - uses: actions/checkout@v3
@@ -11,10 +11,10 @@ jobs:
11
11
  runs-on: ubuntu-latest
12
12
  steps:
13
13
  - uses: actions/checkout@v3
14
- - name: Set up Ruby 2.6
14
+ - name: Set up Ruby 3.1.4
15
15
  uses: ruby/setup-ruby@v1
16
16
  with:
17
- ruby-version: "2.6"
17
+ ruby-version: "3.1.4"
18
18
 
19
19
  - name: Build
20
20
  run: |
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (1.11.3)
4
+ bolognese (2.2.0)
5
5
  activesupport (>= 4.2.5)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (>= 5.1.0)
@@ -16,9 +16,9 @@ PATH
16
16
  json-ld-preloaded (~> 3.1, >= 3.1.3)
17
17
  jsonlint (~> 0.3.0)
18
18
  loofah (~> 2.0, >= 2.0.3)
19
- maremma (>= 4.9.4, < 5)
19
+ maremma (~> 5.0)
20
20
  namae (~> 1.0)
21
- nokogiri (>= 1.13.2, < 1.14)
21
+ nokogiri (~> 1.16, >= 1.16.2)
22
22
  oj (~> 3.10)
23
23
  oj_mimic_json (~> 1.0, >= 1.0.1)
24
24
  postrank-uri (~> 1.0, >= 1.0.18)
@@ -41,6 +41,7 @@ GEM
41
41
  bibtex-ruby (6.0.0)
42
42
  latex-decode (~> 0.0)
43
43
  builder (3.2.4)
44
+ byebug (11.1.3)
44
45
  citeproc (1.0.10)
45
46
  namae (~> 1.0)
46
47
  citeproc-ruby (1.1.14)
@@ -94,10 +95,10 @@ GEM
94
95
  concurrent-ruby (~> 1.0)
95
96
  iso8601 (0.9.1)
96
97
  json (2.6.2)
97
- json-canonicalization (0.3.2)
98
- json-ld (3.2.5)
98
+ json-canonicalization (0.3.1)
99
+ json-ld (3.2.4)
99
100
  htmlentities (~> 4.3)
100
- json-canonicalization (~> 0.3, >= 0.3.2)
101
+ json-canonicalization (~> 0.3)
101
102
  link_header (~> 0.0, >= 0.0.8)
102
103
  multi_json (~> 1.15)
103
104
  rack (>= 2.2, < 4)
@@ -113,7 +114,7 @@ GEM
113
114
  loofah (2.21.3)
114
115
  crass (~> 1.0.2)
115
116
  nokogiri (>= 1.12.0)
116
- maremma (4.9.9)
117
+ maremma (5.0.0)
117
118
  activesupport (>= 4.2.5)
118
119
  addressable (>= 2.3.6)
119
120
  builder (~> 3.2, >= 3.2.2)
@@ -124,17 +125,17 @@ GEM
124
125
  faraday-follow_redirects (~> 0.3.0)
125
126
  faraday-gzip (~> 0.1.0)
126
127
  faraday-multipart (~> 1.0.4)
127
- nokogiri (>= 1.13.1, < 1.14.0)
128
+ nokogiri (~> 1.16, >= 1.16.2)
128
129
  oj (>= 2.8.3)
129
130
  oj_mimic_json (~> 1.0, >= 1.0.1)
130
131
  matrix (0.4.2)
131
- mini_portile2 (2.8.0)
132
+ mini_portile2 (2.8.5)
132
133
  minitest (5.18.0)
133
134
  multi_json (1.15.0)
134
135
  multipart-post (2.3.0)
135
136
  namae (1.1.1)
136
- nokogiri (1.13.9)
137
- mini_portile2 (~> 2.8.0)
137
+ nokogiri (1.16.2)
138
+ mini_portile2 (~> 2.8.2)
138
139
  racc (~> 1.4)
139
140
  oj (3.14.2)
140
141
  oj_mimic_json (1.0.1)
@@ -222,7 +223,9 @@ PLATFORMS
222
223
  DEPENDENCIES
223
224
  bolognese!
224
225
  bundler (>= 1.0)
226
+ byebug
225
227
  hashdiff (>= 1.0.0.beta1, < 2.0.0)
228
+ json-canonicalization (= 0.3.1)
226
229
  rack-test (~> 0)
227
230
  rake (~> 12.0)
228
231
  rspec (~> 3.4)
@@ -232,4 +235,4 @@ DEPENDENCIES
232
235
  webmock (~> 3.0, >= 3.0.1)
233
236
 
234
237
  BUNDLED WITH
235
- 2.4.10
238
+ 2.5.5
data/bolognese.gemspec CHANGED
@@ -13,12 +13,12 @@ Gem::Specification.new do |s|
13
13
  s.version = Bolognese::VERSION
14
14
  s.extra_rdoc_files = ["README.md"]
15
15
  s.license = 'MIT'
16
- s.required_ruby_version = ['>=2.3']
16
+ s.required_ruby_version = ['>=3.0']
17
17
 
18
18
  # Declare dependencies here, rather than in the Gemfile
19
- s.add_dependency 'maremma', '>= 4.9.4', '< 5'
19
+ s.add_dependency 'maremma', '~> 5.0'
20
20
  #s.add_dependency 'faraday', '~> 0.17.3'
21
- s.add_dependency 'nokogiri', '>= 1.13.2', '< 1.14'
21
+ s.add_dependency 'nokogiri', '~> 1.16', '>= 1.16.2'
22
22
  s.add_dependency 'loofah', '~> 2.0', '>= 2.0.3'
23
23
  s.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
24
24
  s.add_dependency 'activesupport', '>= 4.2.5'
@@ -49,6 +49,8 @@ Gem::Specification.new do |s|
49
49
  s.add_development_dependency 'webmock', '~> 3.0', '>= 3.0.1'
50
50
  s.add_development_dependency 'simplecov', '0.17.1'
51
51
  s.add_development_dependency 'hashdiff', ['>= 1.0.0.beta1', '< 2.0.0']
52
+ s.add_development_dependency 'byebug'
53
+ s.add_development_dependency 'json-canonicalization', '0.3.1'
52
54
 
53
55
  s.require_paths = ["lib"]
54
56
  s.files = `git ls-files`.split($/)
@@ -30,19 +30,20 @@ module Bolognese
30
30
  name_type = parse_attributes(author.fetch("creatorName", nil), content: "nameType", first: true) || parse_attributes(author.fetch("contributorName", nil), content: "nameType", first: true)
31
31
 
32
32
  name_identifiers = Array.wrap(author.fetch("nameIdentifier", nil)).map do |ni|
33
+ name_identifier = ni["__content__"].strip if ni["__content__"].present?
33
34
  if ni["nameIdentifierScheme"] == "ORCID"
34
35
  {
35
- "nameIdentifier" => normalize_orcid(ni["__content__"]),
36
+ "nameIdentifier" => normalize_orcid(name_identifier),
36
37
  "schemeUri" => "https://orcid.org",
37
38
  "nameIdentifierScheme" => "ORCID" }.compact
38
39
  elsif ni["nameIdentifierScheme"] == "ROR"
39
40
  {
40
- "nameIdentifier" => normalize_ror(ni["__content__"]),
41
+ "nameIdentifier" => normalize_ror(name_identifier),
41
42
  "schemeUri" => "https://ror.org",
42
43
  "nameIdentifierScheme" => "ROR" }.compact
43
44
  else
44
45
  {
45
- "nameIdentifier" => ni["__content__"],
46
+ "nameIdentifier" => name_identifier,
46
47
  "schemeUri" => ni.fetch("schemeURI", nil),
47
48
  "nameIdentifierScheme" => ni["nameIdentifierScheme"] }.compact
48
49
  end
@@ -106,9 +106,19 @@ module Bolognese
106
106
  end
107
107
  end
108
108
  end
109
-
109
+
110
110
  def insert_publisher(xml)
111
- xml.publisher(publisher || container && container["title"])
111
+ if publisher.is_a?(Hash)
112
+ attributes = {
113
+ 'publisherIdentifier' => publisher["publisherIdentifier"],
114
+ 'publisherIdentifierScheme' => publisher["publisherIdentifierScheme"],
115
+ 'schemeURI' => publisher["schemeUri"],
116
+ "xml:lang" => publisher["lang"]
117
+ }.compact
118
+ xml.publisher(publisher["name"] || container && container["title"], attributes)
119
+ else
120
+ xml.publisher(publisher || container && container["title"])
121
+ end
112
122
  end
113
123
 
114
124
  def insert_publication_year(xml)
@@ -171,7 +181,7 @@ module Bolognese
171
181
  s["subject"] = subject
172
182
  end
173
183
 
174
- attributes = { "subjectScheme" => s["subjectScheme"], "schemeURI" => s["schemeUri"], "valueURI" => s["valueUri"], "xml:lang" => s["lang"] }.compact
184
+ attributes = { "subjectScheme" => s["subjectScheme"], "schemeURI" => s["schemeUri"], "valueURI" => s["valueUri"], "classificationCode" => s["classificationCode"], "xml:lang" => s["lang"] }.compact
175
185
 
176
186
  xml.subject(s["subject"], attributes)
177
187
  end
@@ -375,13 +385,11 @@ module Bolognese
375
385
  end
376
386
  end
377
387
  if geo_location["geoLocationPolygon"]
378
- geo_location["geoLocationPolygon"].each do |geo_location_polygon|
379
- xml.geoLocationPolygon do
380
- geo_location_polygon.each do |polygon_point|
381
- xml.polygonPoint do
382
- xml.pointLatitude(polygon_point.dig("polygonPoint", "pointLatitude"))
383
- xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
384
- end
388
+ xml.geoLocationPolygon do
389
+ Array.wrap(geo_location["geoLocationPolygon"]).each do |polygon_point|
390
+ xml.polygonPoint do
391
+ xml.pointLatitude(polygon_point.dig("polygonPoint", "pointLatitude"))
392
+ xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
385
393
  end
386
394
  end
387
395
  end
@@ -156,11 +156,6 @@ module Bolognese
156
156
  @descriptions ||= meta.fetch("descriptions", nil)
157
157
  end
158
158
 
159
- def abstract_description
160
- # Fetch the first description with descriptionType "Abstract"
161
- @abstract_description ||= descriptions&.find { |d| d["descriptionType"] == "Abstract" }
162
- end
163
-
164
159
  def rights_list
165
160
  @rights_list ||= meta.fetch("rights_list", nil)
166
161
  end
@@ -222,7 +217,7 @@ module Bolognese
222
217
  end
223
218
 
224
219
  def publisher
225
- @publisher ||= meta.fetch("publisher", nil)
220
+ @publisher ||= normalize_publisher(meta["publisher"]) if meta.fetch("publisher", nil).present?
226
221
  end
227
222
 
228
223
  def identifiers
@@ -96,7 +96,7 @@ module Bolognese
96
96
  if container.present?
97
97
  container["title"]
98
98
  elsif types["citeproc"] == "article-journal"
99
- publisher
99
+ publisher["name"] if publisher.present?
100
100
  else
101
101
  nil
102
102
  end
@@ -161,7 +161,7 @@ module Bolognese
161
161
  "volume" => container.to_h["volume"],
162
162
  "issue" => container.to_h["issue"],
163
163
  "page" => page,
164
- "publisher" => publisher,
164
+ "publisher" => publisher["name"],
165
165
  "title" => parse_attributes(titles, content: "title", first: true),
166
166
  "URL" => url,
167
167
  "copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
@@ -86,11 +86,11 @@ module Bolognese
86
86
  "titles" => meta.try(:title).present? ? [{ "title" => meta.try(:title).to_s }] : [],
87
87
  "creators" => creators,
88
88
  "container" => container,
89
- "publisher" => meta.try(:publisher).to_s.presence,
89
+ "publisher" => meta.try(:publisher).present? ? { "name" => meta.publisher.to_s } : nil,
90
90
  "related_identifiers" => related_identifiers,
91
91
  "dates" => dates,
92
92
  "publication_year" => publication_year,
93
- "descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
93
+ "descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s, new_line: true).presence, "descriptionType" => "Abstract" }] : [],
94
94
  "rights_list" => rights_list,
95
95
  "state" => state
96
96
  }.merge(read_options)
@@ -107,7 +107,7 @@ module Bolognese
107
107
  "related_identifiers" => related_identifiers,
108
108
  "dates" => dates,
109
109
  "publication_year" => publication_year,
110
- "descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
110
+ "descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract"), new_line: true), "descriptionType" => "Abstract" }] : [],
111
111
  "rights_list" => rights_list,
112
112
  "version_info" => meta.fetch("version", nil),
113
113
  "subjects" => subjects,
@@ -43,7 +43,7 @@ module Bolognese
43
43
  dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
44
44
  dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
45
45
  publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
46
- publisher = meta.fetch("publisher", nil)
46
+ publisher = { "name" => meta.fetch("publisher", nil) } if meta.fetch("publisher", nil).present?
47
47
  state = meta.present? || read_options.present? ? "findable" : "not_found"
48
48
  schema_org = meta.fetch("@type", nil)
49
49
  types = {
@@ -76,7 +76,7 @@ module Bolognese
76
76
  #{}"is_part_of" => is_part_of,
77
77
  "dates" => dates,
78
78
  "publication_year" => publication_year,
79
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
79
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
80
80
  "rights_list" => rights_list,
81
81
  "version_info" => meta.fetch("version", nil),
82
82
  "subjects" => subjects,
@@ -7,7 +7,10 @@ module Bolognese
7
7
  errors = jsonlint(string)
8
8
  return { "errors" => errors } if errors.present?
9
9
 
10
- string.present? ? Maremma.from_json(string) : {}
10
+ crosscite = string.present? ? Maremma.from_json(string) : {}
11
+ crosscite["publisher"] = normalize_publisher(crosscite["publisher"]) if crosscite.fetch("publisher", nil).present?
12
+
13
+ crosscite
11
14
  end
12
15
  end
13
16
  end
@@ -40,8 +40,7 @@ module Bolognese
40
40
  journal_metadata = nil
41
41
  journal_issue = {}
42
42
  journal_metadata = nil
43
- publisher = query.dig("crm_item", 0)
44
- publisher = nil unless publisher.is_a?(String)
43
+ publisher = query.dig("crm_item", 0).is_a?(String) ? { "name" => query.dig("crm_item", 0) } : nil
45
44
 
46
45
  case model
47
46
  when "book"
@@ -232,11 +231,11 @@ module Bolognese
232
231
 
233
232
  def crossref_description(bibliographic_metadata)
234
233
  abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
235
- { "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p')) }.compact
234
+ { "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p'), new_line: true) }.compact
236
235
  end
237
236
 
238
237
  description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
239
- { "descriptionType" => "Other", "description" => sanitize(parse_attributes(r)) }.compact
238
+ { "descriptionType" => "Other", "description" => sanitize(parse_attributes(r), new_line: true) }.compact
240
239
  end
241
240
 
242
241
  (abstract + description)
@@ -7,7 +7,10 @@ module Bolognese
7
7
  errors = jsonlint(string)
8
8
  return { "errors" => errors } if errors.present?
9
9
 
10
- string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
10
+ datacite_json = string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
11
+ datacite_json["publisher"] = normalize_publisher(datacite_json["publisher"]) if datacite_json.fetch("publisher", nil).present?
12
+
13
+ datacite_json
11
14
  end
12
15
  end
13
16
  end
@@ -94,13 +94,29 @@ module Bolognese
94
94
 
95
95
  titles = get_titles(meta)
96
96
 
97
+ publisher = Array.wrap(meta.dig("publisher")).map do |r|
98
+ if r.blank?
99
+ nil
100
+ elsif r.is_a?(String)
101
+ { "name" => r.strip }
102
+ elsif r.is_a?(Hash)
103
+ {
104
+ "name" => r["__content__"].present? ? r["__content__"].strip : nil,
105
+ "publisherIdentifier" => r["publisherIdentifierScheme"] == "ROR" ? normalize_ror(r["publisherIdentifier"]) : r["publisherIdentifier"],
106
+ "publisherIdentifierScheme" => r["publisherIdentifierScheme"],
107
+ "schemeUri" => r["schemeURI"],
108
+ "lang" => r["lang"],
109
+ }.compact
110
+ end
111
+ end.compact.first
112
+
97
113
  descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
98
114
  if r.blank?
99
115
  nil
100
116
  elsif r.is_a?(String)
101
- { "description" => sanitize(r), "descriptionType" => "Abstract" }
117
+ { "description" => sanitize(r, new_line: true), "descriptionType" => "Abstract" }
102
118
  elsif r.is_a?(Hash)
103
- { "description" => sanitize(r["__content__"]), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
119
+ { "description" => sanitize(r["__content__"], new_line: true), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
104
120
  end
105
121
  end.compact
106
122
  rights_list = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
@@ -287,7 +303,7 @@ module Bolognese
287
303
  "creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
288
304
  "contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
289
305
  "container" => set_container(meta),
290
- "publisher" => parse_attributes(meta.fetch("publisher", nil), first: true).to_s.strip.presence,
306
+ "publisher" => publisher,
291
307
  "agency" => "datacite",
292
308
  "funding_references" => funding_references,
293
309
  "dates" => dates,
@@ -103,7 +103,7 @@ module Bolognese
103
103
  #"related_identifiers" => related_identifiers,
104
104
  #"dates" => dates,
105
105
  #"publication_year" => publication_year,
106
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
106
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : [],
107
107
  "rights_list" => rights_list,
108
108
  "version_info" => meta.fetch("version", nil),
109
109
  "subjects" => subjects
@@ -89,12 +89,12 @@ module Bolognese
89
89
  "url" => meta.fetch("UR", nil),
90
90
  "titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
91
91
  "creators" => get_authors(author),
92
- "publisher" => meta.fetch("PB", "(:unav)"),
92
+ "publisher" => { "name" => meta.fetch("PB", "(:unav)") },
93
93
  "container" => container,
94
94
  "related_identifiers" => related_identifiers,
95
95
  "dates" => dates,
96
96
  "publication_year" => publication_year,
97
- "descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
97
+ "descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB"), new_line: true), "descriptionType" => "Abstract" }] : nil,
98
98
  "subjects" => subjects,
99
99
  "language" => meta.fetch("LA", nil),
100
100
  "state" => state
@@ -74,7 +74,11 @@ module Bolognese
74
74
  creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
75
75
  end
76
76
  contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
77
- publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
77
+
78
+ publisher = {
79
+ "name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
80
+ "publisherIdentifier" => parse_attributes(meta.fetch("publisher", nil), content: "@id", first: true),
81
+ }.compact if meta.fetch("publisher", nil).present?
78
82
 
79
83
  ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
80
84
  container = if meta.fetch(ct, nil).present?
@@ -180,7 +184,7 @@ module Bolognese
180
184
  "related_identifiers" => related_identifiers,
181
185
  "publication_year" => publication_year,
182
186
  "dates" => dates,
183
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
187
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
184
188
  "rights_list" => rights_list,
185
189
  "version_info" => meta.fetch("version", nil).to_s.presence,
186
190
  "subjects" => subjects,
@@ -600,12 +600,12 @@ module Bolognese
600
600
  end
601
601
 
602
602
  def validate_orcid(orcid)
603
- orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z/.match(orcid)).last
603
+ orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\/{0,1}\z/.match(orcid)).last
604
604
  orcid.gsub(/[[:space:]]/, "-") if orcid.present?
605
605
  end
606
606
 
607
607
  def validate_ror(ror)
608
- Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})$/.match(ror)).last
608
+ Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})\/{0,1}$/.match(ror)).last
609
609
  end
610
610
 
611
611
  def validate_orcid_scheme(orcid_scheme)
@@ -773,6 +773,14 @@ module Bolognese
773
773
  nil
774
774
  end
775
775
 
776
+ def normalize_publisher(publisher)
777
+ if publisher.respond_to?(:to_hash)
778
+ publisher
779
+ elsif publisher.respond_to?(:to_str)
780
+ { "name" => publisher }
781
+ end
782
+ end
783
+
776
784
  def to_datacite_json(element, options={})
777
785
  a = Array.wrap(element).map do |e|
778
786
  e.inject({}) {|h, (k,v)| h[k.dasherize] = v; h }
@@ -1057,12 +1065,16 @@ module Bolognese
1057
1065
  custom_scrubber = Bolognese::WhitelistScrubber.new(options)
1058
1066
 
1059
1067
  if text.is_a?(String)
1060
- # remove excessive internal whitespace with squish
1061
- Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
1068
+ if options[:new_line]
1069
+ # Remove multiple spaces, tabs, and other whitespace characters while preserving single spaces and new lines
1070
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/[ \t]+/, ' ').strip
1071
+ else
1072
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
1073
+ end
1062
1074
  elsif text.is_a?(Hash)
1063
- sanitize(text.fetch(content, nil))
1075
+ sanitize(text.fetch(content, nil), new_line: options[:new_line])
1064
1076
  elsif text.is_a?(Array)
1065
- a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil)) : sanitize(e) }.uniq
1077
+ a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil), new_line: options[:new_line]) : sanitize(e, new_line: options[:new_line]) }.uniq
1066
1078
  a = options[:first] ? a.first : a.unwrap
1067
1079
  else
1068
1080
  nil
@@ -1410,5 +1422,10 @@ module Bolognese
1410
1422
  }
1411
1423
  end
1412
1424
  end
1425
+
1426
+ def abstract_description
1427
+ # Fetch the first description with descriptionType "Abstract"
1428
+ descriptions&.find { |d| d["descriptionType"] == "Abstract" }
1429
+ end
1413
1430
  end
1414
1431
  end
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "1.11.3"
2
+ VERSION = "2.2.0"
3
3
  end
@@ -21,7 +21,7 @@ module Bolognese
21
21
  volume: container.to_h["volume"],
22
22
  issue: container.to_h["issue"],
23
23
  pages: pages,
24
- publisher: publisher,
24
+ publisher: publisher["name"],
25
25
  year: publication_year,
26
26
  copyright: Array.wrap(rights_list).map { |l| l["rights"] }.first,
27
27
  }.compact
@@ -19,7 +19,7 @@ module Bolognese
19
19
  "tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil,
20
20
  "datePublished" => get_date(dates, "Issued") || publication_year,
21
21
  "dateModified" => get_date(dates, "Updated"),
22
- "publisher" => publisher,
22
+ "publisher" => publisher["name"],
23
23
  "license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
24
24
  }.compact
25
25
  JSON.pretty_generate hsh.presence
@@ -15,7 +15,7 @@ module Bolognese
15
15
  resource_type: types["resourceType"],
16
16
  title: parse_attributes(titles, content: "title", first: true),
17
17
  author: authors_as_string(creators),
18
- publisher: publisher,
18
+ publisher: publisher["name"],
19
19
  publication_year: publication_year
20
20
  }.values
21
21
 
@@ -4,7 +4,9 @@ module Bolognese
4
4
  module Writers
5
5
  module DataciteJsonWriter
6
6
  def datacite_json
7
- JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) } if crosscite_hsh.present?
7
+ if crosscite_hsh.present?
8
+ JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
9
+ end
8
10
  end
9
11
  end
10
12
  end
@@ -77,16 +77,19 @@ module Bolognese
77
77
 
78
78
  def insert_source(xml)
79
79
  if is_chapter?
80
- xml.source(publisher)
80
+ xml.source(publisher["name"])
81
81
  elsif is_article? || is_data?
82
- xml.source(container && container["title"] || publisher)
82
+ xml.source(container && container["title"] || publisher["name"])
83
83
  else
84
84
  xml.source(parse_attributes(titles, content: "title", first: true))
85
85
  end
86
86
  end
87
87
 
88
88
  def insert_publisher_name(xml)
89
- xml.send("publisher-name", publisher)
89
+ attributes = {
90
+ "xml:lang" => publisher["lang"]
91
+ }.compact
92
+ xml.send("publisher-name", attributes, publisher["name"])
90
93
  end
91
94
 
92
95
  def insert_publication_date(xml)
@@ -14,7 +14,7 @@ module Bolognese
14
14
  "AB" => parse_attributes(abstract_description, content: "description", first: true),
15
15
  "KW" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
16
16
  "PY" => publication_year,
17
- "PB" => publisher,
17
+ "PB" => publisher["name"],
18
18
  "LA" => language,
19
19
  "VL" => container.to_h["volume"],
20
20
  "IS" => container.to_h["issue"],
@@ -37,7 +37,7 @@ module Bolognese
37
37
  "schemaVersion" => schema_version,
38
38
  "periodical" => types.present? ? ((types["schemaOrg"] != "Dataset") && container.present? ? to_schema_org(container) : nil) : nil,
39
39
  "includedInDataCatalog" => types.present? ? ((types["schemaOrg"] == "Dataset") && container.present? ? to_schema_org_container(container, type: "Dataset") : nil) : nil,
40
- "publisher" => publisher.present? ? { "@type" => "Organization", "name" => publisher } : nil,
40
+ "publisher" => publisher.present? ? { "@type" => "Organization", "@id" => publisher["publisherIdentifier"], "name" => publisher["name"] }.compact : nil,
41
41
  "funder" => to_schema_org_funder(funding_references),
42
42
  "provider" => agency.present? ? { "@type" => "Organization", "name" => agency } : nil
43
43
  }.compact.presence
@@ -46,6 +46,8 @@
46
46
  <xs:enumeration value="IsRequiredBy" />
47
47
  <xs:enumeration value="Obsoletes" />
48
48
  <xs:enumeration value="IsObsoletedBy" />
49
+ <xs:enumeration value="Collects" />
50
+ <xs:enumeration value="IsCollectedBy" />
49
51
  </xs:restriction>
50
52
  </xs:simpleType>
51
53
  </xs:schema>
@@ -22,6 +22,7 @@
22
22
  <xs:enumeration value="Dissertation" />
23
23
  <xs:enumeration value="Event" />
24
24
  <xs:enumeration value="Image" />
25
+ <xs:enumeration value="Instrument" />
25
26
  <xs:enumeration value="InteractiveResource" />
26
27
  <xs:enumeration value="Journal" />
27
28
  <xs:enumeration value="JournalArticle" />
@@ -35,6 +36,7 @@
35
36
  <xs:enumeration value="Software" />
36
37
  <xs:enumeration value="Sound" />
37
38
  <xs:enumeration value="Standard" />
39
+ <xs:enumeration value="StudyRegistration" />
38
40
  <xs:enumeration value="Text" />
39
41
  <xs:enumeration value="Workflow" />
40
42
  <xs:enumeration value="Other" />