bolognese 1.10.0 → 2.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/build.yml +0 -14
  3. data/.github/workflows/ci.yml +1 -1
  4. data/.github/workflows/release.yml +2 -12
  5. data/CHANGELOG.md +2 -2
  6. data/Gemfile.lock +69 -53
  7. data/bolognese.gemspec +6 -4
  8. data/lib/bolognese/author_utils.rb +17 -4
  9. data/lib/bolognese/datacite_utils.rb +19 -11
  10. data/lib/bolognese/metadata.rb +1 -1
  11. data/lib/bolognese/metadata_utils.rb +3 -2
  12. data/lib/bolognese/readers/bibtex_reader.rb +2 -2
  13. data/lib/bolognese/readers/citeproc_reader.rb +7 -1
  14. data/lib/bolognese/readers/codemeta_reader.rb +2 -2
  15. data/lib/bolognese/readers/crosscite_reader.rb +4 -1
  16. data/lib/bolognese/readers/crossref_reader.rb +40 -7
  17. data/lib/bolognese/readers/datacite_json_reader.rb +4 -1
  18. data/lib/bolognese/readers/datacite_reader.rb +25 -9
  19. data/lib/bolognese/readers/npm_reader.rb +1 -1
  20. data/lib/bolognese/readers/ris_reader.rb +2 -2
  21. data/lib/bolognese/readers/schema_org_reader.rb +27 -4
  22. data/lib/bolognese/utils.rb +36 -14
  23. data/lib/bolognese/version.rb +1 -1
  24. data/lib/bolognese/writers/bibtex_writer.rb +1 -1
  25. data/lib/bolognese/writers/codemeta_writer.rb +1 -1
  26. data/lib/bolognese/writers/csv_writer.rb +1 -1
  27. data/lib/bolognese/writers/datacite_json_writer.rb +3 -1
  28. data/lib/bolognese/writers/jats_writer.rb +6 -3
  29. data/lib/bolognese/writers/ris_writer.rb +2 -2
  30. data/lib/bolognese/writers/schema_org_writer.rb +6 -2
  31. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +3 -1
  32. data/resources/kernel-4/include/datacite-dateType-v4.xsd +3 -1
  33. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +5 -2
  34. data/resources/kernel-4/include/datacite-relationType-v4.xsd +9 -3
  35. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +7 -1
  36. data/resources/kernel-4/include/datacite-titleType-v4.xsd +1 -1
  37. data/resources/kernel-4/metadata.xsd +12 -7
  38. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  39. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  40. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  41. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  42. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  43. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  44. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  45. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  46. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  47. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  48. data/resources/kernel-4.5/include/xml.xsd +286 -0
  49. data/resources/kernel-4.5/metadata.xsd +711 -0
  50. data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
  51. data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
  52. data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
  53. data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
  54. data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
  55. data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
  56. data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
  57. data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
  58. data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
  59. data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
  60. data/resources/kernel-4.6/include/xml.xsd +286 -0
  61. data/resources/kernel-4.6/metadata.xsd +712 -0
  62. data/spec/author_utils_spec.rb +59 -6
  63. data/spec/datacite_utils_spec.rb +156 -2
  64. data/spec/fixtures/citeproc.json +7 -1
  65. data/spec/fixtures/crossref_schema_4.6_values.xml +183 -0
  66. data/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml +24 -1
  67. data/spec/fixtures/datacite-example-full-v4.5.xml +255 -0
  68. data/spec/fixtures/datacite-example-full-v4.6.xml +114 -0
  69. data/spec/fixtures/datacite-example-relateditems-with-attributes.xml +61 -0
  70. data/spec/fixtures/datacite-seriesinformation.xml +7 -2
  71. data/spec/fixtures/datacite-xml-lang.xml +1 -1
  72. data/spec/fixtures/datacite.json +9 -3
  73. data/spec/fixtures/datacite_blank_name_identifier.xml +22 -0
  74. data/spec/fixtures/datacite_blank_publisher.xml +18 -0
  75. data/spec/fixtures/datacite_journal_article.xml +64 -0
  76. data/spec/fixtures/schema_org.json +1 -0
  77. data/spec/fixtures/schema_org_4.6_attributes.json +108 -0
  78. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_contributors_Translator/supports_Translator_contributorType.yml +71 -0
  79. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_dates_with_Coverage/inserts_date_with_dateType_Coverage.yml +71 -0
  80. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_related_identifiers_CSTR/supports_CSTR_relatedIdentifierType.yml +71 -0
  81. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_related_identifiers_HasTranslation/supports_HasTranslation_relationType.yml +71 -0
  82. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_related_identifiers_RRID/supports_RRID_relatedIdentifierType.yml +71 -0
  83. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_resource_type_with_Award/supports_Award_as_resourceTypeGeneral.yml +71 -0
  84. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_resource_type_with_Project/supports_Project_as_resourceTypeGeneral.yml +71 -0
  85. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_subjects/insert.yml +32 -14
  86. data/spec/readers/bibtex_reader_spec.rb +2 -0
  87. data/spec/readers/citeproc_reader_spec.rb +4 -0
  88. data/spec/readers/codemeta_reader_spec.rb +4 -4
  89. data/spec/readers/crosscite_reader_spec.rb +2 -0
  90. data/spec/readers/crossref_reader_spec.rb +72 -41
  91. data/spec/readers/datacite_json_reader_spec.rb +2 -0
  92. data/spec/readers/datacite_reader_spec.rb +209 -44
  93. data/spec/readers/npm_reader_spec.rb +2 -0
  94. data/spec/readers/ris_reader_spec.rb +3 -0
  95. data/spec/readers/schema_org_reader_spec.rb +38 -11
  96. data/spec/spec_helper.rb +1 -0
  97. data/spec/writers/citation_writer_spec.rb +9 -0
  98. data/spec/writers/citeproc_writer_spec.rb +9 -0
  99. data/spec/writers/crosscite_writer_spec.rb +7 -0
  100. data/spec/writers/datacite_json_writer_spec.rb +22 -0
  101. data/spec/writers/datacite_writer_spec.rb +155 -5
  102. data/spec/writers/jats_writer_spec.rb +16 -1
  103. data/spec/writers/rdf_xml_writer_spec.rb +7 -0
  104. data/spec/writers/schema_org_writer_spec.rb +49 -0
  105. data/spec/writers/turtle_writer_spec.rb +18 -0
  106. metadata +84 -37
@@ -4,6 +4,12 @@ module Bolognese
4
4
  module Readers
5
5
  module CrossrefReader
6
6
  # CrossRef types from https://api.crossref.org/types
7
+
8
+ CR_TO_DC_CONTRIBUTOR_TYPES = {
9
+ "editor" => "Editor",
10
+ "translator" => "Translator",
11
+ }
12
+
7
13
  def get_crossref(id: nil, **options)
8
14
  return { "string" => nil, "state" => "not_found" } unless id.present?
9
15
 
@@ -40,8 +46,7 @@ module Bolognese
40
46
  journal_metadata = nil
41
47
  journal_issue = {}
42
48
  journal_metadata = nil
43
- publisher = query.dig("crm_item", 0)
44
- publisher = nil unless publisher.is_a?(String)
49
+ publisher = query.dig("crm_item", 0).is_a?(String) ? { "name" => query.dig("crm_item", 0) } : nil
45
50
 
46
51
  case model
47
52
  when "book"
@@ -139,7 +144,7 @@ module Bolognese
139
144
 
140
145
  state = meta.present? || read_options.present? ? "findable" : "not_found"
141
146
 
142
- related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
147
+ related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata))
143
148
 
144
149
  container = if journal_metadata.present?
145
150
  issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
@@ -188,7 +193,7 @@ module Bolognese
188
193
  "titles" => titles,
189
194
  "identifiers" => identifiers,
190
195
  "creators" => crossref_people(bibliographic_metadata, "author"),
191
- "contributors" => crossref_people(bibliographic_metadata, "editor"),
196
+ "contributors" => crossref_people(bibliographic_metadata, "editor") + crossref_people(bibliographic_metadata, "translator"),
192
197
  "funding_references" => crossref_funding_reference(program_metadata),
193
198
  "publisher" => publisher,
194
199
  "container" => container,
@@ -232,11 +237,11 @@ module Bolognese
232
237
 
233
238
  def crossref_description(bibliographic_metadata)
234
239
  abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
235
- { "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p')) }.compact
240
+ { "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p'), new_line: true) }.compact
236
241
  end
237
242
 
238
243
  description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
239
- { "descriptionType" => "Other", "description" => sanitize(parse_attributes(r)) }.compact
244
+ { "descriptionType" => "Other", "description" => sanitize(parse_attributes(r), new_line: true) }.compact
240
245
  end
241
246
 
242
247
  (abstract + description)
@@ -277,13 +282,15 @@ module Bolognese
277
282
  end
278
283
  end.compact
279
284
 
285
+ contributor_type = CR_TO_DC_CONTRIBUTOR_TYPES[a["contributor_role"]]
286
+
280
287
  { "nameType" => "Personal",
281
288
  "nameIdentifiers" => name_identifiers,
282
289
  "name" => [family_name, given_name].compact.join(", "),
283
290
  "givenName" => given_name,
284
291
  "familyName" => family_name,
285
292
  "affiliation" => affiliation.presence,
286
- "contributorType" => contributor_role == "editor" ? "Editor" : nil }.compact
293
+ "contributorType" => contributor_type }.compact
287
294
  else
288
295
  { "nameType" => "Organizational",
289
296
  "name" => a["name"] || a["__content__"] }
@@ -363,6 +370,32 @@ module Bolognese
363
370
  end
364
371
  end.compact.unwrap
365
372
  end
373
+
374
+ def crossref_has_translation(program_metadata)
375
+ refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
376
+ Array.wrap(refs).select { |a| a.dig("intra_work_relation", "relationship_type") == "hasTranslation" }.map do |c|
377
+ if c.dig("intra_work_relation", "identifier_type") == "doi"
378
+ { "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
379
+ "relationType" => "HasTranslation",
380
+ "relatedIdentifierType" => "DOI" }.compact
381
+ else
382
+ nil
383
+ end
384
+ end.compact.unwrap
385
+ end
386
+
387
+ def crossref_is_translation_of(program_metadata)
388
+ refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
389
+ Array.wrap(refs).select { |a| a.dig("intra_work_relation", "relationship_type") == "isTranslationOf" }.map do |c|
390
+ if c.dig("intra_work_relation", "identifier_type") == "doi"
391
+ { "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
392
+ "relationType" => "IsTranslationOf",
393
+ "relatedIdentifierType" => "DOI" }.compact
394
+ else
395
+ nil
396
+ end
397
+ end.compact.unwrap
398
+ end
366
399
  end
367
400
  end
368
401
  end
@@ -7,7 +7,10 @@ module Bolognese
7
7
  errors = jsonlint(string)
8
8
  return { "errors" => errors } if errors.present?
9
9
 
10
- string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
10
+ datacite_json = string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
11
+ datacite_json["publisher"] = normalize_publisher(datacite_json["publisher"]) if datacite_json.fetch("publisher", nil).present?
12
+
13
+ datacite_json
11
14
  end
12
15
  end
13
16
  end
@@ -94,13 +94,29 @@ module Bolognese
94
94
 
95
95
  titles = get_titles(meta)
96
96
 
97
+ publisher = Array.wrap(meta.dig("publisher")).map do |r|
98
+ if r.blank?
99
+ nil
100
+ elsif r.is_a?(String)
101
+ { "name" => r.strip }
102
+ elsif r.is_a?(Hash)
103
+ {
104
+ "name" => r["__content__"].present? ? r["__content__"].strip : nil,
105
+ "publisherIdentifier" => r["publisherIdentifierScheme"] == "ROR" ? normalize_ror(r["publisherIdentifier"]) : r["publisherIdentifier"],
106
+ "publisherIdentifierScheme" => r["publisherIdentifierScheme"],
107
+ "schemeUri" => r["schemeURI"],
108
+ "lang" => r["lang"],
109
+ }.compact
110
+ end
111
+ end.compact.first
112
+
97
113
  descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
98
114
  if r.blank?
99
115
  nil
100
116
  elsif r.is_a?(String)
101
- { "description" => sanitize(r), "descriptionType" => "Abstract" }
117
+ { "description" => sanitize(r, new_line: true), "descriptionType" => "Abstract" }
102
118
  elsif r.is_a?(Hash)
103
- { "description" => sanitize(r["__content__"]), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
119
+ { "description" => sanitize(r["__content__"], new_line: true), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
104
120
  end
105
121
  end.compact
106
122
  rights_list = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
@@ -230,14 +246,14 @@ module Bolognese
230
246
  "creators" => get_authors(Array.wrap(ri.dig("creators", "creator"))),
231
247
  "titles" => get_titles(ri),
232
248
  "publicationYear" => ri["publicationYear"],
233
- "volume" => ri["volume"],
234
- "issue" => ri["issue"],
249
+ "volume" => parse_attributes(ri["volume"]),
250
+ "issue" => parse_attributes(ri["issue"]),
235
251
  "number" => number,
236
252
  "numberType" => numberType,
237
- "firstPage" => ri["firstPage"],
238
- "lastPage" => ri["lastPage"],
239
- "publisher" => ri["publisher"],
240
- "edition" => ri["edition"],
253
+ "firstPage" => parse_attributes(ri["firstPage"]),
254
+ "lastPage" => parse_attributes(ri["lastPage"]),
255
+ "publisher" => parse_attributes(ri["publisher"]),
256
+ "edition" => parse_attributes(ri["edition"]),
241
257
  "contributors" => get_authors(Array.wrap(ri.dig("contributors", "contributor"))),
242
258
  }.compact
243
259
  end
@@ -287,7 +303,7 @@ module Bolognese
287
303
  "creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
288
304
  "contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
289
305
  "container" => set_container(meta),
290
- "publisher" => parse_attributes(meta.fetch("publisher", nil), first: true).to_s.strip.presence,
306
+ "publisher" => publisher,
291
307
  "agency" => "datacite",
292
308
  "funding_references" => funding_references,
293
309
  "dates" => dates,
@@ -103,7 +103,7 @@ module Bolognese
103
103
  #"related_identifiers" => related_identifiers,
104
104
  #"dates" => dates,
105
105
  #"publication_year" => publication_year,
106
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
106
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : [],
107
107
  "rights_list" => rights_list,
108
108
  "version_info" => meta.fetch("version", nil),
109
109
  "subjects" => subjects
@@ -89,12 +89,12 @@ module Bolognese
89
89
  "url" => meta.fetch("UR", nil),
90
90
  "titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
91
91
  "creators" => get_authors(author),
92
- "publisher" => meta.fetch("PB", "(:unav)"),
92
+ "publisher" => { "name" => meta.fetch("PB", "(:unav)") },
93
93
  "container" => container,
94
94
  "related_identifiers" => related_identifiers,
95
95
  "dates" => dates,
96
96
  "publication_year" => publication_year,
97
- "descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
97
+ "descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB"), new_line: true), "descriptionType" => "Abstract" }] : nil,
98
98
  "subjects" => subjects,
99
99
  "language" => meta.fetch("LA", nil),
100
100
  "state" => state
@@ -10,7 +10,9 @@ module Bolognese
10
10
  "isPartOf" => "IsPartOf",
11
11
  "hasPart" => "HasPart",
12
12
  "isPredecessor" => "IsPreviousVersionOf",
13
- "isSuccessor" => "IsNewVersionOf"
13
+ "isSuccessor" => "IsNewVersionOf",
14
+ "workTranslation" => "HasTranslation",
15
+ "translationOfWork" => "IsTranslationOf"
14
16
  }
15
17
 
16
18
  SO_TO_DC_REVERSE_RELATION_TYPES = {
@@ -74,7 +76,17 @@ module Bolognese
74
76
  creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
75
77
  end
76
78
  contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
77
- publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
79
+ translators = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("translator", nil))))
80
+ translators.map! do |translator|
81
+ translator["contributorType"] = "Translator"
82
+ translator
83
+ end
84
+ contributors += translators
85
+
86
+ publisher = {
87
+ "name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
88
+ "publisherIdentifier" => parse_attributes(meta.fetch("publisher", nil), content: "@id", first: true),
89
+ }.compact if meta.fetch("publisher", nil).present?
78
90
 
79
91
  ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
80
92
  container = if meta.fetch(ct, nil).present?
@@ -102,7 +114,9 @@ module Bolognese
102
114
  Array.wrap(schema_org_references(meta)) +
103
115
  Array.wrap(schema_org_is_referenced_by(meta)) +
104
116
  Array.wrap(schema_org_is_supplement_to(meta)) +
105
- Array.wrap(schema_org_is_supplemented_by(meta))
117
+ Array.wrap(schema_org_is_supplemented_by(meta)) +
118
+ Array.wrap(schema_org_has_translation(meta)) +
119
+ Array.wrap(schema_org_is_translation_of(meta))
106
120
 
107
121
  rights_list = Array.wrap(meta.fetch("license", nil)).compact.map do |rl|
108
122
  hsh_to_spdx("__content__" => rl["name"], "rightsURI" => rl["id"])
@@ -123,6 +137,7 @@ module Bolognese
123
137
  dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if Date.edtf(meta.fetch("datePublished", nil)).present?
124
138
  dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if Date.edtf(meta.fetch("dateCreated", nil)).present?
125
139
  dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if Date.edtf(meta.fetch("dateModified", nil)).present?
140
+ dates << { "date" => meta.fetch("temporalCoverage"), "dateType" => "Coverage" } if Date.edtf(meta.fetch("temporalCoverage", nil)).present?
126
141
  publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
127
142
 
128
143
  if meta.fetch("inLanguage", nil).is_a?(String)
@@ -180,7 +195,7 @@ module Bolognese
180
195
  "related_identifiers" => related_identifiers,
181
196
  "publication_year" => publication_year,
182
197
  "dates" => dates,
183
- "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
198
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
184
199
  "rights_list" => rights_list,
185
200
  "version_info" => meta.fetch("version", nil).to_s.presence,
186
201
  "subjects" => subjects,
@@ -236,6 +251,14 @@ module Bolognese
236
251
  schema_org_related_identifier(meta, relation_type: "isBasedOn")
237
252
  end
238
253
 
254
+ def schema_org_has_translation(meta)
255
+ schema_org_related_identifier(meta, relation_type: "workTranslation", )
256
+ end
257
+
258
+ def schema_org_is_translation_of(meta)
259
+ schema_org_related_identifier(meta, relation_type: "translationOfWork")
260
+ end
261
+
239
262
  end
240
263
  end
241
264
  end
@@ -78,7 +78,9 @@ module Bolognese
78
78
  "Other" => "CreativeWork",
79
79
  # not part of DataCite schema, but used internally
80
80
  "Periodical" => "Periodical",
81
- "DataCatalog" => "DataCatalog"
81
+ "DataCatalog" => "DataCatalog",
82
+ "Award" => "Grant",
83
+ "Project" => "Project"
82
84
  }
83
85
 
84
86
  DC_TO_CP_TRANSLATIONS = {
@@ -600,12 +602,12 @@ module Bolognese
600
602
  end
601
603
 
602
604
  def validate_orcid(orcid)
603
- orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z/.match(orcid)).last
605
+ orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\/{0,1}\z/.match(orcid)).last
604
606
  orcid.gsub(/[[:space:]]/, "-") if orcid.present?
605
607
  end
606
608
 
607
609
  def validate_ror(ror)
608
- Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})$/.match(ror)).last
610
+ Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})\/{0,1}$/.match(ror)).last
609
611
  end
610
612
 
611
613
  def validate_orcid_scheme(orcid_scheme)
@@ -773,6 +775,14 @@ module Bolognese
773
775
  nil
774
776
  end
775
777
 
778
+ def normalize_publisher(publisher)
779
+ if publisher.respond_to?(:to_hash)
780
+ publisher
781
+ elsif publisher.respond_to?(:to_str)
782
+ { "name" => publisher }
783
+ end
784
+ end
785
+
776
786
  def to_datacite_json(element, options={})
777
787
  a = Array.wrap(element).map do |e|
778
788
  e.inject({}) {|h, (k,v)| h[k.dasherize] = v; h }
@@ -817,7 +827,8 @@ module Bolognese
817
827
 
818
828
  def to_schema_org_contributors(element)
819
829
  element = Array.wrap(element).map do |c|
820
- c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
830
+ transformed_c = c.dup
831
+ transformed_c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
821
832
  if a.is_a?(String)
822
833
  name = a
823
834
  affiliation_identifier = nil
@@ -831,10 +842,10 @@ module Bolognese
831
842
  "@id" => affiliation_identifier,
832
843
  "name" => name }.compact
833
844
  end.unwrap
834
- c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
835
- c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
836
- c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
837
- c.except("nameIdentifiers", "nameType").compact
845
+ transformed_c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
846
+ transformed_c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
847
+ transformed_c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
848
+ transformed_c.except("nameIdentifiers", "nameType").compact
838
849
  end.unwrap
839
850
  end
840
851
 
@@ -1021,7 +1032,7 @@ module Bolognese
1021
1032
  Array.wrap(element).map do |a|
1022
1033
  if a["literal"].present?
1023
1034
  a["@type"] = "Organization"
1024
- a["name"] = a["literal"]
1035
+ a["creatorName"] = a["literal"]
1025
1036
  else
1026
1037
  a["@type"] = "Person"
1027
1038
  a["name"] = [a["given"], a["family"]].compact.join(" ")
@@ -1057,12 +1068,16 @@ module Bolognese
1057
1068
  custom_scrubber = Bolognese::WhitelistScrubber.new(options)
1058
1069
 
1059
1070
  if text.is_a?(String)
1060
- # remove excessive internal whitespace with squish
1061
- Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
1071
+ if options[:new_line]
1072
+ # Remove multiple spaces, tabs, and other whitespace characters while preserving single spaces and new lines
1073
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/[ \t]+/, ' ').strip
1074
+ else
1075
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
1076
+ end
1062
1077
  elsif text.is_a?(Hash)
1063
- sanitize(text.fetch(content, nil))
1078
+ sanitize(text.fetch(content, nil), new_line: options[:new_line])
1064
1079
  elsif text.is_a?(Array)
1065
- a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil)) : sanitize(e) }.uniq
1080
+ a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil), new_line: options[:new_line]) : sanitize(e, new_line: options[:new_line]) }.uniq
1066
1081
  a = options[:first] ? a.first : a.unwrap
1067
1082
  else
1068
1083
  nil
@@ -1222,7 +1237,9 @@ module Bolognese
1222
1237
  "urn" => "URN",
1223
1238
  "md5" => "md5",
1224
1239
  "minid" => "minid",
1225
- "dataguid" => "dataguid"
1240
+ "dataguid" => "dataguid",
1241
+ "cstr" => "CSTR",
1242
+ "rrid" => "RRID"
1226
1243
  }
1227
1244
 
1228
1245
  identifierTypes[identifier_type.downcase] || identifier_type
@@ -1410,5 +1427,10 @@ module Bolognese
1410
1427
  }
1411
1428
  end
1412
1429
  end
1430
+
1431
+ def abstract_description
1432
+ # Fetch the first description with descriptionType "Abstract"
1433
+ descriptions&.find { |d| d["descriptionType"] == "Abstract" }
1434
+ end
1413
1435
  end
1414
1436
  end
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "1.10.0"
2
+ VERSION = "2.3.3"
3
3
  end
@@ -21,7 +21,7 @@ module Bolognese
21
21
  volume: container.to_h["volume"],
22
22
  issue: container.to_h["issue"],
23
23
  pages: pages,
24
- publisher: publisher,
24
+ publisher: publisher["name"],
25
25
  year: publication_year,
26
26
  copyright: Array.wrap(rights_list).map { |l| l["rights"] }.first,
27
27
  }.compact
@@ -19,7 +19,7 @@ module Bolognese
19
19
  "tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil,
20
20
  "datePublished" => get_date(dates, "Issued") || publication_year,
21
21
  "dateModified" => get_date(dates, "Updated"),
22
- "publisher" => publisher,
22
+ "publisher" => publisher["name"],
23
23
  "license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
24
24
  }.compact
25
25
  JSON.pretty_generate hsh.presence
@@ -15,7 +15,7 @@ module Bolognese
15
15
  resource_type: types["resourceType"],
16
16
  title: parse_attributes(titles, content: "title", first: true),
17
17
  author: authors_as_string(creators),
18
- publisher: publisher,
18
+ publisher: publisher["name"],
19
19
  publication_year: publication_year
20
20
  }.values
21
21
 
@@ -4,7 +4,9 @@ module Bolognese
4
4
  module Writers
5
5
  module DataciteJsonWriter
6
6
  def datacite_json
7
- JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) } if crosscite_hsh.present?
7
+ if crosscite_hsh.present?
8
+ JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
9
+ end
8
10
  end
9
11
  end
10
12
  end
@@ -77,16 +77,19 @@ module Bolognese
77
77
 
78
78
  def insert_source(xml)
79
79
  if is_chapter?
80
- xml.source(publisher)
80
+ xml.source(publisher["name"])
81
81
  elsif is_article? || is_data?
82
- xml.source(container && container["title"] || publisher)
82
+ xml.source(container && container["title"] || publisher["name"])
83
83
  else
84
84
  xml.source(parse_attributes(titles, content: "title", first: true))
85
85
  end
86
86
  end
87
87
 
88
88
  def insert_publisher_name(xml)
89
- xml.send("publisher-name", publisher)
89
+ attributes = {
90
+ "xml:lang" => publisher["lang"]
91
+ }.compact
92
+ xml.send("publisher-name", attributes, publisher["name"])
90
93
  end
91
94
 
92
95
  def insert_publication_date(xml)
@@ -11,10 +11,10 @@ module Bolognese
11
11
  "AU" => to_ris(creators),
12
12
  "DO" => doi,
13
13
  "UR" => url,
14
- "AB" => parse_attributes(descriptions, content: "description", first: true),
14
+ "AB" => parse_attributes(abstract_description, content: "description", first: true),
15
15
  "KW" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
16
16
  "PY" => publication_year,
17
- "PB" => publisher,
17
+ "PB" => publisher["name"],
18
18
  "LA" => language,
19
19
  "VL" => container.to_h["volume"],
20
20
  "IS" => container.to_h["issue"],
@@ -13,7 +13,8 @@ module Bolognese
13
13
  "name" => parse_attributes(titles, content: "title", first: true),
14
14
  "author" => to_schema_org_creators(creators),
15
15
  "editor" => to_schema_org_contributors(contributors),
16
- "description" => parse_attributes(descriptions, content: "description", first: true),
16
+ "translator" => contributors ? to_schema_org_contributors(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
17
+ "description" => parse_attributes(abstract_description, content: "description", first: true),
17
18
  "license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
18
19
  "version" => version_info,
19
20
  "keywords" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.join(", ") : nil,
@@ -23,6 +24,7 @@ module Bolognese
23
24
  "dateCreated" => get_date(dates, "Created"),
24
25
  "datePublished" => get_date(dates, "Issued") || publication_year,
25
26
  "dateModified" => get_date(dates, "Updated"),
27
+ "temporalCoverage" => get_date(dates, "Coverage"),
26
28
  "pageStart" => container.to_h["firstPage"],
27
29
  "pageEnd" => container.to_h["lastPage"],
28
30
  "spatialCoverage" => to_schema_org_spatial_coverage(geo_locations),
@@ -32,12 +34,14 @@ module Bolognese
32
34
  "predecessor_of" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsPreviousVersionOf"),
33
35
  "successor_of" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsNewVersionOf"),
34
36
  "citation" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "References"),
37
+ "workTranslation" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "HasTranslation"),
38
+ "translationOfWork" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsTranslationOf"),
35
39
  "@reverse" => reverse.presence,
36
40
  "contentUrl" => Array.wrap(content_url).unwrap,
37
41
  "schemaVersion" => schema_version,
38
42
  "periodical" => types.present? ? ((types["schemaOrg"] != "Dataset") && container.present? ? to_schema_org(container) : nil) : nil,
39
43
  "includedInDataCatalog" => types.present? ? ((types["schemaOrg"] == "Dataset") && container.present? ? to_schema_org_container(container, type: "Dataset") : nil) : nil,
40
- "publisher" => publisher.present? ? { "@type" => "Organization", "name" => publisher } : nil,
44
+ "publisher" => publisher.present? ? { "@type" => "Organization", "@id" => publisher["publisherIdentifier"], "name" => publisher["name"] }.compact : nil,
41
45
  "funder" => to_schema_org_funder(funding_references),
42
46
  "provider" => agency.present? ? { "@type" => "Organization", "name" => agency } : nil
43
47
  }.compact.presence
@@ -2,7 +2,8 @@
2
2
  <!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
3
3
  2013-05 v3.0: Addition of ID to simpleType element, added values "ResearchGroup" & "Other"
4
4
  2014-08-20 v3.1: Addition of value "DataCurator"
5
- 2015-05-14 v4.0 dropped value "Funder", use new "funderReference" -->
5
+ 2015-05-14 v4.0 dropped value "Funder", use new "funderReference"
6
+ 2024-12-31 v4.6: Addition of value "Translator" -->
6
7
  <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
7
8
  <xs:simpleType name="contributorType" id="contributorType">
8
9
  <xs:annotation>
@@ -29,6 +30,7 @@
29
30
  <xs:enumeration value="Researcher" />
30
31
  <xs:enumeration value="Sponsor" />
31
32
  <xs:enumeration value="Supervisor" />
33
+ <xs:enumeration value="Translator" />
32
34
  <xs:enumeration value="WorkPackageLeader" />
33
35
  </xs:restriction>
34
36
  </xs:simpleType>
@@ -2,7 +2,8 @@
2
2
  <!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
3
3
  2013-05 v3.0: Addition of ID to simpleType element; addition of value "Collected"; deleted "StartDate" & "EndDate"
4
4
  2017-10-23 v4.1: Addition of value "Other"
5
- 2019-02-14 v4.2: Addition of value "Withdrawn"-->
5
+ 2019-02-14 v4.2: Addition of value "Withdrawn"
6
+ 2024-12-31 v4.6: Addition of value "Coverage"-->
6
7
  <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
7
8
  <xs:simpleType name="dateType" id="dateType">
8
9
  <xs:annotation>
@@ -13,6 +14,7 @@
13
14
  <xs:enumeration value="Available" />
14
15
  <xs:enumeration value="Collected" />
15
16
  <xs:enumeration value="Copyrighted" />
17
+ <xs:enumeration value="Coverage" />
16
18
  <xs:enumeration value="Created" />
17
19
  <xs:enumeration value="Issued" />
18
20
  <xs:enumeration value="Other" />
@@ -2,8 +2,9 @@
2
2
  <!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
3
3
  2013-05 v3.0: Addition of ID to simpleType element; addition of value "PMID"
4
4
  2014-08-20 v3.1: Addition of values "arxiv" and "bibcode"
5
- 2015-02-12 v4.0 Addition of value "IGSN"
6
- 2019-02-14 v4.2 Addition of value "w3id" -->
5
+ 2015-02-12 v4.0: Addition of value "IGSN"
6
+ 2019-02-14 v4.2: Addition of value "w3id"
7
+ 2024-12-31 v4.5: Addition of values "CSTR", "RRID" -->
7
8
  <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
8
9
  <xs:simpleType name="relatedIdentifierType" id="relatedIdentifierType">
9
10
  <xs:annotation>
@@ -13,6 +14,7 @@
13
14
  <xs:enumeration value="ARK" />
14
15
  <xs:enumeration value="arXiv" />
15
16
  <xs:enumeration value="bibcode" />
17
+ <xs:enumeration value="CSTR" />
16
18
  <xs:enumeration value="DOI" />
17
19
  <xs:enumeration value="EAN13" />
18
20
  <xs:enumeration value="EISSN" />
@@ -25,6 +27,7 @@
25
27
  <xs:enumeration value="LSID" />
26
28
  <xs:enumeration value="PMID" />
27
29
  <xs:enumeration value="PURL" />
30
+ <xs:enumeration value="RRID" />
28
31
  <xs:enumeration value="UPC" />
29
32
  <xs:enumeration value="URL" />
30
33
  <xs:enumeration value="URN" />
@@ -3,9 +3,11 @@
3
3
  2011-01-13 v1.0 - FZ, TIB, Germany
4
4
  2013-05 v3.0: Addition of ID to simpleType element, addition of values "IsIdenticalTo", "HasMetadata" & "IsMetadataFor"
5
5
  2014-08-20 v3.1: Addition of values "Reviews" & "IsReviewedBy" and "IsDerivedFrom" & "IsSourceOf"
6
- 2017-10-23 v.4.1: Addition of values "Describes", "IsDescribedBy", "HasVersion", "IsVersionOf", "Requires", "IsRequiredBy"
7
- 2019-02-14 v.4.2: Addition of values "Obsoletes", "IsObsoletedBy"
8
- 2021-03-05 v.4.4: Addition of value "IsPublishedIn" -->
6
+ 2017-10-23 v4.1: Addition of values "Describes", "IsDescribedBy", "HasVersion", "IsVersionOf", "Requires", "IsRequiredBy"
7
+ 2019-02-14 v4.2: Addition of values "Obsoletes", "IsObsoletedBy"
8
+ 2021-03-05 v4.4: Addition of value "IsPublishedIn"
9
+ 2024-01-22 v4.5: Addition of values "Collects, "IsCollectedBy"
10
+ 2024-12-31 v4.6: Addition of values "HasTranslation", "IsTranslationOf"-->
9
11
  <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
10
12
  <xs:simpleType name="relationType" id="relationType">
11
13
  <xs:annotation>
@@ -46,6 +48,10 @@
46
48
  <xs:enumeration value="IsRequiredBy" />
47
49
  <xs:enumeration value="Obsoletes" />
48
50
  <xs:enumeration value="IsObsoletedBy" />
51
+ <xs:enumeration value="Collects" />
52
+ <xs:enumeration value="IsCollectedBy" />
53
+ <xs:enumeration value="HasTranslation" />
54
+ <xs:enumeration value="IsTranslationOf" />
49
55
  </xs:restriction>
50
56
  </xs:simpleType>
51
57
  </xs:schema>