bolognese 1.8.12 → 1.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/build.yml +37 -0
  3. data/.github/workflows/changelog.yml +36 -0
  4. data/.github/workflows/release.yml +47 -0
  5. data/CHANGELOG.md +1716 -0
  6. data/Gemfile.lock +63 -59
  7. data/README.md +1 -1
  8. data/bolognese.gemspec +1 -1
  9. data/lib/bolognese/author_utils.rb +6 -5
  10. data/lib/bolognese/datacite_utils.rb +76 -9
  11. data/lib/bolognese/metadata.rb +10 -5
  12. data/lib/bolognese/metadata_utils.rb +3 -2
  13. data/lib/bolognese/readers/bibtex_reader.rb +5 -4
  14. data/lib/bolognese/readers/citeproc_reader.rb +1 -1
  15. data/lib/bolognese/readers/crossref_reader.rb +4 -5
  16. data/lib/bolognese/readers/datacite_reader.rb +57 -11
  17. data/lib/bolognese/readers/ris_reader.rb +3 -3
  18. data/lib/bolognese/utils.rb +163 -46
  19. data/lib/bolognese/version.rb +1 -1
  20. data/lib/bolognese/writers/bibtex_writer.rb +1 -1
  21. data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
  22. data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
  23. data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
  24. data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  25. data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
  26. data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
  27. data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  28. data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
  29. data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
  30. data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
  31. data/resources/kernel-4.4/include/xml.xsd +286 -0
  32. data/resources/kernel-4.4/metadata.xsd +707 -0
  33. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +21 -21
  34. data/resources/kernel-4/include/datacite-dateType-v4.xsd +11 -11
  35. data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +6 -6
  36. data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +5 -5
  37. data/resources/kernel-4/include/datacite-nameType-v4.xsd +2 -2
  38. data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
  39. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +19 -19
  40. data/resources/kernel-4/include/datacite-relationType-v4.xsd +36 -34
  41. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +32 -17
  42. data/resources/kernel-4/include/datacite-titleType-v4.xsd +4 -4
  43. data/resources/kernel-4/metadata.xsd +297 -105
  44. data/spec/fixtures/datacite-example-dissertation-v4.4.xml +56 -0
  45. data/spec/fixtures/datacite-example-full-v4.4.xml +114 -0
  46. data/spec/fixtures/datacite-example-relateditems.xml +61 -0
  47. data/spec/fixtures/datacite-example-xs-string.xml +28 -0
  48. data/spec/fixtures/datacite-geolocationpolygons-multiple.xml +56 -0
  49. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/article_id_as_page_number.yml +100 -0
  50. data/spec/metadata_spec.rb +1 -1
  51. data/spec/readers/bibtex_reader_spec.rb +2 -2
  52. data/spec/readers/crossref_reader_spec.rb +56 -38
  53. data/spec/readers/datacite_json_reader_spec.rb +1 -1
  54. data/spec/readers/datacite_reader_spec.rb +255 -4
  55. data/spec/readers/ris_reader_spec.rb +3 -3
  56. data/spec/writers/bibtex_writer_spec.rb +1 -1
  57. data/spec/writers/citation_writer_spec.rb +2 -2
  58. data/spec/writers/crosscite_writer_spec.rb +3 -3
  59. data/spec/writers/csv_writer_spec.rb +4 -4
  60. data/spec/writers/datacite_json_writer_spec.rb +3 -3
  61. data/spec/writers/datacite_writer_spec.rb +5 -5
  62. data/spec/writers/ris_writer_spec.rb +3 -2
  63. data/spec/writers/schema_org_writer_spec.rb +4 -4
  64. metadata +30 -8
  65. data/.travis.yml +0 -35
@@ -108,12 +108,12 @@ module Bolognese
108
108
  end
109
109
 
110
110
  def reverse
111
- { "citation" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsReferencedBy" }.map do |r|
111
+ { "citation" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsReferencedBy" }.map do |r|
112
112
  { "@id" => normalize_doi(r["relatedIdentifier"]),
113
113
  "@type" => r["resourceTypeGeneral"] || "ScholarlyArticle",
114
114
  "identifier" => r["relatedIdentifierType"] == "DOI" ? nil : to_identifier(r) }.compact
115
115
  end.unwrap,
116
- "isBasedOn" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsSupplementTo" }.map do |r|
116
+ "isBasedOn" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsSupplementTo" }.map do |r|
117
117
  { "@id" => normalize_doi(r["relatedIdentifier"]),
118
118
  "@type" => r["resourceTypeGeneral"] || "ScholarlyArticle",
119
119
  "identifier" => r["relatedIdentifierType"] == "DOI" ? nil : to_identifier(r) }.compact
@@ -193,6 +193,7 @@ module Bolognese
193
193
  "geo_locations" => geo_locations,
194
194
  "funding_references" => funding_references,
195
195
  "related_identifiers" => related_identifiers,
196
+ "related_items" => related_items,
196
197
  "schema_version" => schema_version,
197
198
  "provider_id" => provider_id,
198
199
  "client_id" => client_id,
@@ -31,12 +31,13 @@ module Bolognese
31
31
 
32
32
  meta = string.present? ? BibTeX.parse(string).first : OpenStruct.new
33
33
 
34
- schema_org = BIB_TO_SO_TRANSLATIONS[meta.try(:type).to_s] || "ScholarlyArticle"
34
+ bibtex_type = meta.try(:type).to_s
35
+ schema_org = BIB_TO_SO_TRANSLATIONS[bibtex_type] || "ScholarlyArticle"
35
36
  types = {
36
- "resourceTypeGeneral" => Metadata::SO_TO_DC_TRANSLATIONS[schema_org],
37
+ "resourceTypeGeneral" => Metadata::BIB_TO_DC_TRANSLATIONS[bibtex_type],
37
38
  "resourceType" => Bolognese::Utils::BIB_TO_CR_TRANSLATIONS[meta.try(:type).to_s] || meta.try(:type).to_s,
38
39
  "schemaOrg" => schema_org,
39
- "bibtex" => meta.type.to_s,
40
+ "bibtex" => bibtex_type,
40
41
  "citeproc" => BIB_TO_CP_TRANSLATIONS[meta.try(:type).to_s] || "misc",
41
42
  "ris" => BIB_TO_RIS_TRANSLATIONS[meta.try(:type).to_s] || "GEN"
42
43
  }.compact
@@ -60,7 +61,7 @@ module Bolognese
60
61
  container = if meta.try(:journal).present?
61
62
  first_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[0] : nil
62
63
  last_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[1] : nil
63
-
64
+
64
65
  { "type" => "Journal",
65
66
  "title" => meta.journal.to_s,
66
67
  "identifier" => meta.try(:issn).to_s.presence,
@@ -38,7 +38,7 @@ module Bolognese
38
38
  citeproc_type = meta.fetch("type", nil)
39
39
  schema_org = CP_TO_SO_TRANSLATIONS[citeproc_type] || "CreativeWork"
40
40
  types = {
41
- "resourceTypeGeneral" => Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org],
41
+ "resourceTypeGeneral" => Bolognese::Utils::CP_TO_DC_TRANSLATIONS[citeproc_type],
42
42
  "reourceType" => meta.fetch("additionalType", nil),
43
43
  "schemaOrg" => schema_org,
44
44
  "citeproc" => citeproc_type,
@@ -64,7 +64,6 @@ module Bolognese
64
64
  journal_article = meta.dig("crossref", "journal", "journal_article") || {}
65
65
  bibliographic_metadata = journal_article.presence || journal_issue.presence || journal_metadata
66
66
  program_metadata = bibliographic_metadata.dig("crossmark", "custom_metadata", "program") || bibliographic_metadata.dig("program")
67
-
68
67
  resource_type = if journal_article.present?
69
68
  "journal_article"
70
69
  elsif journal_issue.present?
@@ -94,7 +93,7 @@ module Bolognese
94
93
  resource_type = (resource_type || model).to_s.underscore.camelcase.presence
95
94
  schema_org = Bolognese::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || "ScholarlyArticle"
96
95
  types = {
97
- "resourceTypeGeneral" => Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org],
96
+ "resourceTypeGeneral" => Bolognese::Utils::CR_TO_DC_TRANSLATIONS[resource_type],
98
97
  "resourceType" => resource_type,
99
98
  "schemaOrg" => schema_org,
100
99
  "citeproc" => Bolognese::Utils::CR_TO_CP_TRANSLATIONS[resource_type] || "article-journal",
@@ -134,10 +133,10 @@ module Bolognese
134
133
  # check that date is valid iso8601 date
135
134
  date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
136
135
  date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
137
-
136
+
138
137
  dates = [date_published, date_updated].compact
139
138
  publication_year = date_published.to_h.fetch("date", "")[0..3].presence
140
-
139
+
141
140
  state = meta.present? || read_options.present? ? "findable" : "not_found"
142
141
 
143
142
  related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
@@ -150,7 +149,7 @@ module Bolognese
150
149
  "title" => parse_attributes(journal_metadata.to_h["full_title"]),
151
150
  "volume" => parse_attributes(journal_issue.dig("journal_volume", "volume")),
152
151
  "issue" => parse_attributes(journal_issue.dig("issue")),
153
- "firstPage" => bibliographic_metadata.dig("pages", "first_page"),
152
+ "firstPage" => bibliographic_metadata.dig("pages", "first_page") || parse_attributes(journal_article.to_h.dig("publisher_item", "item_number"), first: true),
154
153
  "lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
155
154
  elsif book_series_metadata.to_h.fetch("series_metadata", nil).present?
156
155
  issn = normalize_issn(book_series_metadata.dig("series_metadata", "issn"))
@@ -92,15 +92,7 @@ module Bolognese
92
92
  "ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
93
93
  }.compact
94
94
 
95
- titles = Array.wrap(meta.dig("titles", "title")).map do |r|
96
- if r.blank?
97
- nil
98
- elsif r.is_a?(String)
99
- { "title" => sanitize(r) }
100
- else
101
- { "title" => sanitize(r["__content__"]), "titleType" => r["titleType"], "lang" => r["lang"] }.compact
102
- end
103
- end.compact
95
+ titles = get_titles(meta)
104
96
 
105
97
  descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
106
98
  if r.blank?
@@ -120,7 +112,7 @@ module Bolognese
120
112
  hsh_to_spdx(r)
121
113
  end
122
114
  end.compact
123
-
115
+
124
116
  subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
125
117
  if subject.is_a?(String)
126
118
  sum += name_to_fos(subject)
@@ -194,6 +186,42 @@ module Bolognese
194
186
  "schemeType" => ri["schemeType"]
195
187
  }.compact
196
188
  end
189
+
190
+ related_items = Array.wrap(meta.dig("relatedItems", "relatedItem")).map do |ri|
191
+ rii = ri["relatedItemIdentifier"]
192
+ if rii["relatedItemIdentifierType"] == "DOI"
193
+ rid = validate_doi(rii["__content__"].to_s.downcase)
194
+ else
195
+ rid = rii["__content__"]
196
+ end
197
+
198
+ relatedItemIdentifier = {
199
+ "relatedItemIdentifier" => rid,
200
+ "relatedItemIdentifierType" => rii["relatedItemIdentifierType"],
201
+ "relatedMetadataScheme" => rii["relatedMetadataScheme"],
202
+ "schemeURI" => rii["schemeURI"],
203
+ "schemeType" => rii["schemeType"]
204
+ }.compact
205
+
206
+ {
207
+ "relationType" => ri["relationType"],
208
+ "relatedItemType" => ri["relatedItemType"],
209
+ "relatedItemIdentifier" => relatedItemIdentifier,
210
+ "creators" => get_authors(Array.wrap(ri.dig("creators", "creator"))),
211
+ "titles" => get_titles(ri),
212
+ "publicationYear" => ri["publicationYear"],
213
+ "volume" => ri["volume"],
214
+ "issue" => ri["issue"],
215
+ "number" => ri.dig("number", "__content__"),
216
+ "numberType" => ri.dig("number", "numberType"),
217
+ "firstPage" => ri["firstPage"],
218
+ "lastPage" => ri["lastPage"],
219
+ "publisher" => ri["publisher"],
220
+ "edition" => ri["edition"],
221
+ "contributors" => get_authors(Array.wrap(ri.dig("contributors", "contributor"))),
222
+ }.compact
223
+ end
224
+
197
225
  geo_locations = Array.wrap(meta.dig("geoLocations", "geoLocation")).map do |gl|
198
226
  if !gl.is_a?(Hash) || gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String) || gl["geoLocationPolygon"].is_a?(String)
199
227
  nil
@@ -209,7 +237,9 @@ module Bolognese
209
237
  "southBoundLatitude" => gl.dig("geoLocationBox", "southBoundLatitude"),
210
238
  "northBoundLatitude" => gl.dig("geoLocationBox", "northBoundLatitude")
211
239
  }.compact.presence,
212
- "geoLocationPolygon" => Array.wrap(gl.dig("geoLocationPolygon", "polygonPoint")).map { |glp| { "polygonPoint" => glp } }.compact.presence,
240
+ "geoLocationPolygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
241
+ Array.wrap(glp.dig("polygonPoint")).map { |glpp| { "polygonPoint" => glpp } }.compact.presence
242
+ end.compact.presence,
213
243
  "geoLocationPlace" => parse_attributes(gl["geoLocationPlace"], first: true).to_s.strip.presence
214
244
  }.compact
215
245
  end
@@ -238,6 +268,7 @@ module Bolognese
238
268
  "language" => parse_attributes(meta.fetch("language", nil), first: true).to_s.strip.presence,
239
269
  "geo_locations" => geo_locations,
240
270
  "related_identifiers" => related_identifiers,
271
+ "related_items" => related_items,
241
272
  "formats" => formats,
242
273
  "sizes" => sizes,
243
274
  "schema_version" => schema_version,
@@ -266,6 +297,21 @@ module Bolognese
266
297
  {}
267
298
  end
268
299
  end
300
+
301
+ def get_titles(meta)
302
+ titles = Array.wrap(meta.dig("titles", "title")).map do |r|
303
+ if r.blank?
304
+ nil
305
+ elsif r.is_a?(String)
306
+ { "title" => sanitize(r) }
307
+ else
308
+ { "title" => sanitize(r["__content__"]), "titleType" => r["titleType"], "lang" => r["lang"] }.compact
309
+ end
310
+ end.compact
311
+
312
+ titles
313
+ end
314
+
269
315
  end
270
316
  end
271
317
  end
@@ -41,14 +41,14 @@ module Bolognese
41
41
  ris_type = meta.fetch("TY", nil) || "GEN"
42
42
  schema_org = RIS_TO_SO_TRANSLATIONS[ris_type] || "CreativeWork"
43
43
  types = {
44
- "resourceTypeGeneral" => Metadata::SO_TO_DC_TRANSLATIONS[schema_org],
44
+ "resourceTypeGeneral" => Metadata::RIS_TO_DC_TRANSLATIONS[ris_type],
45
45
  "schemaOrg" => schema_org,
46
46
  "citeproc" => RIS_TO_CP_TRANSLATIONS[schema_org] || "misc",
47
47
  "ris" => ris_type
48
48
  }.compact
49
49
 
50
50
  id = normalize_doi(options[:doi] || meta.fetch("DO", nil))
51
-
51
+
52
52
  author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "creatorName" => a } }
53
53
  date_parts = meta.fetch("PY", nil).to_s.split("/")
54
54
  created_date_parts = meta.fetch("Y1", nil).to_s.split("/")
@@ -67,7 +67,7 @@ module Bolognese
67
67
  end
68
68
  container = if meta.fetch("T2", nil).present?
69
69
  { "type" => "Journal",
70
- "title" => meta.fetch("T2", nil),
70
+ "title" => meta.fetch("T2", nil),
71
71
  "identifier" => meta.fetch("SN", nil),
72
72
  "volume" => meta.fetch("VL", nil),
73
73
  "issue" => meta.fetch("IS", nil),
@@ -45,16 +45,30 @@ module Bolognese
45
45
 
46
46
  DC_TO_SO_TRANSLATIONS = {
47
47
  "Audiovisual" => "MediaObject",
48
+ "Book" => "Book",
49
+ "BookChapter" => "Chapter",
48
50
  "Collection" => "Collection",
51
+ "ComputationalNotebook" => "SoftwareSourceCode",
52
+ "ConferencePaper" => "Article",
53
+ "ConferenceProceeding" => "Periodical",
54
+ "DataPaper" => "Article",
49
55
  "Dataset" => "Dataset",
56
+ "Dissertation" => "Thesis",
50
57
  "Event" => "Event",
51
58
  "Image" => "ImageObject",
52
59
  "InteractiveResource" => nil,
60
+ "Journal" => "Periodical",
61
+ "JournalArticle" => "ScholarlyArticle",
53
62
  "Model" => nil,
63
+ "OutputManagementPlan" => nil,
64
+ "PeerReview" => "Review",
54
65
  "PhysicalObject" => nil,
66
+ "Preprint" => nil,
67
+ "Report" => "Report",
55
68
  "Service" => "Service",
56
69
  "Software" => "SoftwareSourceCode",
57
70
  "Sound" => "AudioObject",
71
+ "Standard" => nil,
58
72
  "Text" => "ScholarlyArticle",
59
73
  "Workflow" => nil,
60
74
  "Other" => "CreativeWork",
@@ -65,15 +79,29 @@ module Bolognese
65
79
 
66
80
  DC_TO_CP_TRANSLATIONS = {
67
81
  "Audiovisual" => "motion_picture",
82
+ "Book" => "book",
83
+ "BookChapter" => "chapter",
68
84
  "Collection" => nil,
85
+ "ComputationalNotebook" => nil,
86
+ "ConferencePaper" => "paper_conference",
87
+ "ConferenceProceeding" => nil,
88
+ "DataPaper" => "report",
69
89
  "Dataset" => "dataset",
90
+ "Dissertation" => nil,
70
91
  "Event" => nil,
71
92
  "Image" => "graphic",
72
93
  "InteractiveResource" => nil,
94
+ "Journal" => nil,
95
+ "JournalArticle" => "article-journal",
73
96
  "Model" => nil,
97
+ "OutputManagementPlan" => nil,
98
+ "PeerReview" => "review",
74
99
  "PhysicalObject" => nil,
100
+ "Preprint" => nil,
101
+ "Report" => "report",
75
102
  "Service" => nil,
76
103
  "Sound" => "song",
104
+ "Standard" => nil,
77
105
  "Text" => "report",
78
106
  "Workflow" => nil,
79
107
  "Other" => nil
@@ -202,13 +230,44 @@ module Bolognese
202
230
  "BookSet" => "book"
203
231
  }
204
232
 
233
+ CR_TO_DC_TRANSLATIONS = {
234
+ "Proceedings" => nil,
235
+ "ReferenceBook" => nil,
236
+ "JournalIssue" => "Text",
237
+ "ProceedingsArticle" => "ConferencePaper",
238
+ "Other" => "Other",
239
+ "Dissertation" => "Dissertation",
240
+ "Dataset" => "Dataset",
241
+ "EditedBook" => "Book",
242
+ "JournalArticle" => "JournalArticle",
243
+ "Journal" => "Journal",
244
+ "Report" => "Report",
245
+ "BookSeries" => nil,
246
+ "ReportSeries" => nil,
247
+ "BookTrack" => nil,
248
+ "Standard" => "Standard",
249
+ "BookSection" => "BookChapter",
250
+ "BookPart" => nil,
251
+ "Book" => "Book",
252
+ "BookChapter" => "BookChapter",
253
+ "SaComponent" => "Text",
254
+ "StandardSeries" => "Standard",
255
+ "Monograph" => "book",
256
+ "Component" => nil,
257
+ "ReferenceEntry" => nil,
258
+ "JournalVolume" => nil,
259
+ "BookSet" => nil,
260
+ "PostedContent" => "JournalArticle",
261
+ "PeerReview" => "PeerReview"
262
+ }
263
+
205
264
  SO_TO_DC_TRANSLATIONS = {
206
265
  "Article" => "Text",
207
266
  "AudioObject" => "Sound",
208
267
  "Blog" => "Text",
209
268
  "BlogPosting" => "Text",
210
- "Book" => "Text",
211
- "Chapter" => "Text",
269
+ "Book" => "Book",
270
+ "Chapter" => "BookChapter",
212
271
  "Collection" => "Collection",
213
272
  "DataCatalog" => "Dataset",
214
273
  "Dataset" => "Dataset",
@@ -216,11 +275,11 @@ module Bolognese
216
275
  "ImageObject" => "Image",
217
276
  "Movie" => "Audiovisual",
218
277
  "PublicationIssue" => "Text",
219
- "Report" => "Text",
278
+ "Report" => "Report",
220
279
  "ScholarlyArticle" => "Text",
221
280
  "Thesis" => "Text",
222
281
  "Service" => "Service",
223
- "Review" => "Text",
282
+ "Review" => "PeerReview",
224
283
  "SoftwareSourceCode" => "Software",
225
284
  "VideoObject" => "Audiovisual",
226
285
  "WebPage" => "Text",
@@ -323,21 +382,73 @@ module Bolognese
323
382
 
324
383
  DC_TO_RIS_TRANSLATIONS = {
325
384
  "Audiovisual" => "MPCT",
385
+ "Book" => "BOOK",
386
+ "BookChapter" => "CHAP",
326
387
  "Collection" => nil,
388
+ "ComputationalNotebook" => "COMP",
389
+ "ConferencePaper" => nil,
390
+ "ConferenceProceeding" => nil,
391
+ "DataPaper" => nil,
327
392
  "Dataset" => "DATA",
393
+ "Dissertation" => "THES",
328
394
  "Event" => nil,
329
395
  "Image" => "FIGURE",
330
396
  "InteractiveResource" => nil,
397
+ "Journal" => nil,
398
+ "JournalArticle" => "JOUR",
331
399
  "Model" => nil,
400
+ "OutputManagementPlan" => nil,
401
+ "PeerReview" => nil,
332
402
  "PhysicalObject" => nil,
403
+ "Preprint" => nil,
404
+ "Report" => "RRPT",
333
405
  "Service" => nil,
334
406
  "Software" => "COMP",
335
407
  "Sound" => "SOUND",
408
+ "Standard" => nil,
336
409
  "Text" => "RPRT",
337
410
  "Workflow" => nil,
338
411
  "Other" => nil
339
412
  }
340
413
 
414
+ RIS_TO_DC_TRANSLATIONS = {
415
+ "BLOG" => "Text",
416
+ "GEN" => "Text",
417
+ "CTLG" => "Collection",
418
+ "DATA" => "Dataset",
419
+ "FIGURE" => "Image",
420
+ "THES" => "Dissertation",
421
+ "MPCT" => "Audiovisual",
422
+ "JOUR" => "JournalArticle",
423
+ "COMP" => "Software",
424
+ "VIDEO" => "Audiovisual",
425
+ "ELEC" => "Text"
426
+ }
427
+
428
+ BIB_TO_DC_TRANSLATIONS = {
429
+ "article" => "JournalArticle",
430
+ "book" => "Book",
431
+ "inbook" => "BookChapter",
432
+ "inproceedings" => nil,
433
+ "manual" => nil,
434
+ "misc" => "Other",
435
+ "phdthesis" => "Dissertation",
436
+ "proceedings" => "ConferenceProceeding",
437
+ "techreport" => "Report",
438
+ "unpublished" => nil
439
+ }
440
+
441
+ CP_TO_DC_TRANSLATIONS = {
442
+ "song" => "Audiovisual",
443
+ "post-weblog" => "Text",
444
+ "dataset" => "Dataset",
445
+ "graphic" => "Image",
446
+ "motion_picture" => "Audiovisual",
447
+ "article-journal" => "JournalArticle",
448
+ "broadcast" => "Audiovisual",
449
+ "webpage" => "Text"
450
+ }
451
+
341
452
  SO_TO_BIB_TRANSLATIONS = {
342
453
  "Article" => "article",
343
454
  "AudioObject" => "misc",
@@ -415,7 +526,7 @@ module Bolognese
415
526
  "ris"
416
527
  elsif options[:ext] == ".xml" && Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref")
417
528
  "crossref"
418
- elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
529
+ elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
419
530
  "datacite"
420
531
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
421
532
  "schema_org"
@@ -423,7 +534,7 @@ module Bolognese
423
534
  "codemeta"
424
535
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel")
425
536
  "datacite_json"
426
- elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("types")
537
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("types") && Maremma.from_json(string).to_h.dig("publication_year").present?
427
538
  "crosscite"
428
539
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
429
540
  "citeproc"
@@ -433,7 +544,7 @@ module Bolognese
433
544
  def find_from_format_by_string(string)
434
545
  if Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref").present?
435
546
  "crossref"
436
- elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
547
+ elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
437
548
  "datacite"
438
549
  elsif Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
439
550
  "schema_org"
@@ -441,7 +552,7 @@ module Bolognese
441
552
  "codemeta"
442
553
  elsif Maremma.from_json(string).to_h.dig("schema-version").to_s.start_with?("http://datacite.org/schema/kernel")
443
554
  "datacite_json"
444
- elsif Maremma.from_json(string).to_h.dig("types").present?
555
+ elsif Maremma.from_json(string).to_h.dig("types").present? && Maremma.from_json(string).to_h.dig("publication_year").present?
445
556
  "crosscite"
446
557
  elsif Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
447
558
  "citeproc"
@@ -523,7 +634,7 @@ module Bolognese
523
634
  uri = Addressable::URI.parse(id)
524
635
 
525
636
  return nil unless uri && uri.host && %w(http https ftp).include?(uri.scheme)
526
-
637
+
527
638
  # optionally turn into https URL
528
639
  uri.scheme = "https" if options[:https]
529
640
 
@@ -654,8 +765,8 @@ module Bolognese
654
765
  affiliation_identifier = a["affiliationIdentifier"]
655
766
  end
656
767
 
657
- {
658
- "@type" => "Organization",
768
+ {
769
+ "@type" => "Organization",
659
770
  "@id" => affiliation_identifier,
660
771
  "name" => name }.compact
661
772
  end.unwrap
@@ -677,8 +788,8 @@ module Bolognese
677
788
  affiliation_identifier = a["affiliationIdentifier"]
678
789
  end
679
790
 
680
- {
681
- "@type" => "Organization",
791
+ {
792
+ "@type" => "Organization",
682
793
  "@id" => affiliation_identifier,
683
794
  "name" => name }.compact
684
795
  end.unwrap
@@ -692,7 +803,7 @@ module Bolognese
692
803
  def to_schema_org_container(element, options={})
693
804
  return nil unless (element.is_a?(Hash) || (element.nil? && options[:container_title].present?))
694
805
 
695
- {
806
+ {
696
807
  "@id" => element["identifier"],
697
808
  "@type" => (options[:type] == "Dataset") ? "DataCatalog" : "Periodical",
698
809
  "name" => element["title"] || options[:container_title] }.compact
@@ -700,7 +811,7 @@ module Bolognese
700
811
 
701
812
  def to_schema_org_identifiers(element, options={})
702
813
  Array.wrap(element).map do |ai|
703
- {
814
+ {
704
815
  "@type" => "PropertyValue",
705
816
  "propertyID" => ai["identifierType"],
706
817
  "value" => ai["identifier"] }
@@ -710,7 +821,7 @@ module Bolognese
710
821
  def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
711
822
  return nil unless related_identifiers.present? && relation_type.present?
712
823
 
713
- relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]
824
+ relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]
714
825
 
715
826
  Array.wrap(related_identifiers).select { |ri| relation_type.include?(ri["relationType"]) }.map do |r|
716
827
  if r["relatedIdentifierType"] == "ISSN" && r["relationType"] == "IsPartOf"
@@ -741,7 +852,7 @@ module Bolognese
741
852
 
742
853
  Array.wrap(geo_location).reduce([]) do |sum, gl|
743
854
  if gl.fetch("geoLocationPoint", nil)
744
- sum << {
855
+ sum << {
745
856
  "@type" => "Place",
746
857
  "geo" => {
747
858
  "@type" => "GeoCoordinates",
@@ -752,7 +863,7 @@ module Bolognese
752
863
  end
753
864
 
754
865
  if gl.fetch("geoLocationBox", nil)
755
- sum << {
866
+ sum << {
756
867
  "@type" => "Place",
757
868
  "geo" => {
758
869
  "@type" => "GeoShape",
@@ -765,19 +876,22 @@ module Bolognese
765
876
  end
766
877
 
767
878
  if gl.fetch("geoLocationPolygon", nil)
768
- sum << {
879
+ sum << {
769
880
  "@type" => "Place",
770
881
  "geo" => {
771
882
  "@type" => "GeoShape",
772
883
  "address" => gl["geoLocationPlace"],
773
- "polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
774
- [glp.dig("polygonPoint", "pointLongitude"), glp.dig("polygonPoint", "pointLatitude")].compact
775
- end.compact }
884
+ "polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
885
+ Array.wrap(glp).map do |glpp|
886
+ [glpp.dig("polygonPoint", "pointLongitude"), glpp.dig("polygonPoint", "pointLatitude")].compact
887
+ end.compact
888
+ end.compact.presence,
889
+ }
776
890
  }
777
891
  end
778
892
 
779
893
  if gl.fetch("geoLocationPlace", nil) && !gl.fetch("geoLocationPoint", nil) && !gl.fetch("geoLocationBox", nil) && !gl.fetch("geoLocationPolygon", nil)
780
- sum << {
894
+ sum << {
781
895
  "@type" => "Place",
782
896
  "geo" => {
783
897
  "@type" => "GeoCoordinates",
@@ -992,7 +1106,7 @@ module Bolognese
992
1106
 
993
1107
  def get_iso8601_date(iso8601_time)
994
1108
  return nil if iso8601_time.nil?
995
-
1109
+
996
1110
  iso8601_time[0..9]
997
1111
  end
998
1112
 
@@ -1089,7 +1203,7 @@ module Bolognese
1089
1203
  first_page = pages.present? ? pages.split("-").map(&:strip)[0] : nil
1090
1204
  last_page = pages.present? ? pages.split("-").map(&:strip)[1] : nil
1091
1205
 
1092
- {
1206
+ {
1093
1207
  "title" => title,
1094
1208
  "volume" => volume,
1095
1209
  "issue" => issue,
@@ -1148,11 +1262,11 @@ module Bolognese
1148
1262
  def name_to_fos(name)
1149
1263
  # first find subject in Fields of Science (OECD)
1150
1264
  fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
1151
-
1265
+
1152
1266
  subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
1153
1267
 
1154
1268
  if subject
1155
- return [{
1269
+ return [{
1156
1270
  "subject" => sanitize(name) },
1157
1271
  {
1158
1272
  "subject" => "FOS: " + subject["fosLabel"],
@@ -1166,12 +1280,12 @@ module Bolognese
1166
1280
  fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
1167
1281
  for_fields = fores.fetch("forFields")
1168
1282
  for_disciplines = fores.fetch("forDisciplines")
1169
-
1283
+
1170
1284
  subject = for_fields.find { |l| l["forLabel"] == name } ||
1171
1285
  for_disciplines.find { |l| l["forLabel"] == name }
1172
1286
 
1173
1287
  if subject
1174
- [{
1288
+ [{
1175
1289
  "subject" => sanitize(name) },
1176
1290
  {
1177
1291
  "subject" => "FOS: " + subject["fosLabel"],
@@ -1189,16 +1303,17 @@ module Bolognese
1189
1303
  subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]}
1190
1304
 
1191
1305
  if subject
1192
- return [{
1193
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1306
+ return [{
1307
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1194
1308
  "subjectScheme" => hsh["subjectScheme"],
1195
- "schemeUri" => hsh["schemeURI"],
1196
- "valueUri" => hsh["valueURI"],
1309
+ "schemeUri" => hsh["schemeURI"],
1310
+ "valueUri" => hsh["valueURI"],
1311
+ "classificationCode" => hsh["classificationCode"],
1197
1312
  "lang" => hsh["lang"] }.compact,
1198
- {
1313
+ {
1199
1314
  "subject" => "FOS: " + subject["fosLabel"],
1200
1315
  "subjectScheme" => "Fields of Science and Technology (FOS)",
1201
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
1316
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
1202
1317
  end
1203
1318
 
1204
1319
  # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
@@ -1210,8 +1325,8 @@ module Bolognese
1210
1325
  # try to extract forId
1211
1326
  if hsh["subjectScheme"] == "FOR"
1212
1327
  for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
1213
- for_id = for_id.rjust(6, "0")
1214
-
1328
+ for_id = for_id.rjust(6, "0")
1329
+
1215
1330
  subject = for_fields.find { |l| l["forId"] == for_id } ||
1216
1331
  for_disciplines.find { |l| l["forId"] == for_id[0..3] }
1217
1332
  else
@@ -1220,11 +1335,12 @@ module Bolognese
1220
1335
  end
1221
1336
 
1222
1337
  if subject
1223
- [{
1224
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1338
+ [{
1339
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1225
1340
  "subjectScheme" => hsh["subjectScheme"],
1226
- "schemeUri" => hsh["schemeURI"],
1227
- "valueUri" => hsh["valueURI"],
1341
+ "classificationCode" => hsh["classificationCode"],
1342
+ "schemeUri" => hsh["schemeURI"],
1343
+ "valueUri" => hsh["valueURI"],
1228
1344
  "lang" => hsh["lang"] }.compact,
1229
1345
  {
1230
1346
  "subject" => "FOS: " + subject["fosLabel"],
@@ -1232,12 +1348,13 @@ module Bolognese
1232
1348
  "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1233
1349
  }]
1234
1350
  else
1235
- [{
1236
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1351
+ [{
1352
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1237
1353
  "subjectScheme" => hsh["subjectScheme"],
1238
- "schemeUri" => hsh["schemeURI"],
1239
- "valueUri" => hsh["valueURI"],
1240
- "lang" => hsh["lang"] }.compact]
1354
+ "classificationCode" => hsh["classificationCode"],
1355
+ "schemeUri" => hsh["schemeURI"],
1356
+ "valueUri" => hsh["valueURI"],
1357
+ "lang" => hsh["lang"] }.compact]
1241
1358
  end
1242
1359
  end
1243
1360
  end