bolognese 1.8.17 → 1.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +37 -0
- data/.github/workflows/changelog.yml +36 -0
- data/.github/workflows/release.yml +17 -3
- data/CHANGELOG.md +1737 -0
- data/Gemfile.lock +59 -53
- data/README.md +1 -1
- data/bolognese.gemspec +2 -2
- data/lib/bolognese/author_utils.rb +6 -5
- data/lib/bolognese/datacite_utils.rb +76 -9
- data/lib/bolognese/metadata.rb +10 -5
- data/lib/bolognese/metadata_utils.rb +3 -2
- data/lib/bolognese/readers/bibtex_reader.rb +5 -4
- data/lib/bolognese/readers/citeproc_reader.rb +1 -1
- data/lib/bolognese/readers/crossref_reader.rb +3 -3
- data/lib/bolognese/readers/datacite_reader.rb +68 -11
- data/lib/bolognese/readers/ris_reader.rb +3 -3
- data/lib/bolognese/utils.rb +163 -46
- data/lib/bolognese/version.rb +1 -1
- data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
- data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
- data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.4/include/xml.xsd +286 -0
- data/resources/kernel-4.4/metadata.xsd +707 -0
- data/resources/kernel-4/include/datacite-contributorType-v4.xsd +21 -21
- data/resources/kernel-4/include/datacite-dateType-v4.xsd +11 -11
- data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +6 -6
- data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +5 -5
- data/resources/kernel-4/include/datacite-nameType-v4.xsd +2 -2
- data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +19 -19
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +36 -34
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +32 -17
- data/resources/kernel-4/include/datacite-titleType-v4.xsd +4 -4
- data/resources/kernel-4/metadata.xsd +297 -105
- data/spec/fixtures/datacite-example-dissertation-v4.4.xml +56 -0
- data/spec/fixtures/datacite-example-full-v4.4.xml +114 -0
- data/spec/fixtures/datacite-example-relateditems.xml +61 -0
- data/spec/fixtures/datacite-example-xs-string.xml +28 -0
- data/spec/fixtures/datacite-geolocationpolygons-multiple.xml +76 -0
- data/spec/metadata_spec.rb +1 -1
- data/spec/readers/bibtex_reader_spec.rb +2 -2
- data/spec/readers/crossref_reader_spec.rb +37 -37
- data/spec/readers/datacite_json_reader_spec.rb +1 -1
- data/spec/readers/datacite_reader_spec.rb +262 -3
- data/spec/readers/ris_reader_spec.rb +3 -3
- data/spec/writers/crosscite_writer_spec.rb +3 -3
- data/spec/writers/csv_writer_spec.rb +4 -4
- data/spec/writers/datacite_json_writer_spec.rb +3 -3
- data/spec/writers/datacite_writer_spec.rb +4 -4
- data/spec/writers/schema_org_writer_spec.rb +2 -2
- metadata +29 -12
- data/.github/workflows/deploy.yml +0 -23
- data/.travis.yml +0 -35
- data/lib/bolognese/.github/workflows/build.yml +0 -28
- data/lib/bolognese/.github/workflows/release.yml +0 -39
@@ -108,12 +108,12 @@ module Bolognese
|
|
108
108
|
end
|
109
109
|
|
110
110
|
def reverse
|
111
|
-
{ "citation" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsReferencedBy" }.map do |r|
|
111
|
+
{ "citation" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsReferencedBy" }.map do |r|
|
112
112
|
{ "@id" => normalize_doi(r["relatedIdentifier"]),
|
113
113
|
"@type" => r["resourceTypeGeneral"] || "ScholarlyArticle",
|
114
114
|
"identifier" => r["relatedIdentifierType"] == "DOI" ? nil : to_identifier(r) }.compact
|
115
115
|
end.unwrap,
|
116
|
-
"isBasedOn" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsSupplementTo" }.map do |r|
|
116
|
+
"isBasedOn" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsSupplementTo" }.map do |r|
|
117
117
|
{ "@id" => normalize_doi(r["relatedIdentifier"]),
|
118
118
|
"@type" => r["resourceTypeGeneral"] || "ScholarlyArticle",
|
119
119
|
"identifier" => r["relatedIdentifierType"] == "DOI" ? nil : to_identifier(r) }.compact
|
@@ -193,6 +193,7 @@ module Bolognese
|
|
193
193
|
"geo_locations" => geo_locations,
|
194
194
|
"funding_references" => funding_references,
|
195
195
|
"related_identifiers" => related_identifiers,
|
196
|
+
"related_items" => related_items,
|
196
197
|
"schema_version" => schema_version,
|
197
198
|
"provider_id" => provider_id,
|
198
199
|
"client_id" => client_id,
|
@@ -31,12 +31,13 @@ module Bolognese
|
|
31
31
|
|
32
32
|
meta = string.present? ? BibTeX.parse(string).first : OpenStruct.new
|
33
33
|
|
34
|
-
|
34
|
+
bibtex_type = meta.try(:type).to_s
|
35
|
+
schema_org = BIB_TO_SO_TRANSLATIONS[bibtex_type] || "ScholarlyArticle"
|
35
36
|
types = {
|
36
|
-
"resourceTypeGeneral" => Metadata::
|
37
|
+
"resourceTypeGeneral" => Metadata::BIB_TO_DC_TRANSLATIONS[bibtex_type],
|
37
38
|
"resourceType" => Bolognese::Utils::BIB_TO_CR_TRANSLATIONS[meta.try(:type).to_s] || meta.try(:type).to_s,
|
38
39
|
"schemaOrg" => schema_org,
|
39
|
-
"bibtex" =>
|
40
|
+
"bibtex" => bibtex_type,
|
40
41
|
"citeproc" => BIB_TO_CP_TRANSLATIONS[meta.try(:type).to_s] || "misc",
|
41
42
|
"ris" => BIB_TO_RIS_TRANSLATIONS[meta.try(:type).to_s] || "GEN"
|
42
43
|
}.compact
|
@@ -60,7 +61,7 @@ module Bolognese
|
|
60
61
|
container = if meta.try(:journal).present?
|
61
62
|
first_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[0] : nil
|
62
63
|
last_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[1] : nil
|
63
|
-
|
64
|
+
|
64
65
|
{ "type" => "Journal",
|
65
66
|
"title" => meta.journal.to_s,
|
66
67
|
"identifier" => meta.try(:issn).to_s.presence,
|
@@ -38,7 +38,7 @@ module Bolognese
|
|
38
38
|
citeproc_type = meta.fetch("type", nil)
|
39
39
|
schema_org = CP_TO_SO_TRANSLATIONS[citeproc_type] || "CreativeWork"
|
40
40
|
types = {
|
41
|
-
"resourceTypeGeneral" => Bolognese::Utils::
|
41
|
+
"resourceTypeGeneral" => Bolognese::Utils::CP_TO_DC_TRANSLATIONS[citeproc_type],
|
42
42
|
"reourceType" => meta.fetch("additionalType", nil),
|
43
43
|
"schemaOrg" => schema_org,
|
44
44
|
"citeproc" => citeproc_type,
|
@@ -93,7 +93,7 @@ module Bolognese
|
|
93
93
|
resource_type = (resource_type || model).to_s.underscore.camelcase.presence
|
94
94
|
schema_org = Bolognese::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || "ScholarlyArticle"
|
95
95
|
types = {
|
96
|
-
"resourceTypeGeneral" => Bolognese::Utils::
|
96
|
+
"resourceTypeGeneral" => Bolognese::Utils::CR_TO_DC_TRANSLATIONS[resource_type],
|
97
97
|
"resourceType" => resource_type,
|
98
98
|
"schemaOrg" => schema_org,
|
99
99
|
"citeproc" => Bolognese::Utils::CR_TO_CP_TRANSLATIONS[resource_type] || "article-journal",
|
@@ -133,10 +133,10 @@ module Bolognese
|
|
133
133
|
# check that date is valid iso8601 date
|
134
134
|
date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
|
135
135
|
date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
|
136
|
-
|
136
|
+
|
137
137
|
dates = [date_published, date_updated].compact
|
138
138
|
publication_year = date_published.to_h.fetch("date", "")[0..3].presence
|
139
|
-
|
139
|
+
|
140
140
|
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
141
141
|
|
142
142
|
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
|
@@ -92,15 +92,7 @@ module Bolognese
|
|
92
92
|
"ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
|
93
93
|
}.compact
|
94
94
|
|
95
|
-
titles =
|
96
|
-
if r.blank?
|
97
|
-
nil
|
98
|
-
elsif r.is_a?(String)
|
99
|
-
{ "title" => sanitize(r) }
|
100
|
-
else
|
101
|
-
{ "title" => sanitize(r["__content__"]), "titleType" => r["titleType"], "lang" => r["lang"] }.compact
|
102
|
-
end
|
103
|
-
end.compact
|
95
|
+
titles = get_titles(meta)
|
104
96
|
|
105
97
|
descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
|
106
98
|
if r.blank?
|
@@ -120,7 +112,7 @@ module Bolognese
|
|
120
112
|
hsh_to_spdx(r)
|
121
113
|
end
|
122
114
|
end.compact
|
123
|
-
|
115
|
+
|
124
116
|
subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
|
125
117
|
if subject.is_a?(String)
|
126
118
|
sum += name_to_fos(subject)
|
@@ -194,10 +186,59 @@ module Bolognese
|
|
194
186
|
"schemeType" => ri["schemeType"]
|
195
187
|
}.compact
|
196
188
|
end
|
189
|
+
|
190
|
+
related_items = Array.wrap(meta.dig("relatedItems", "relatedItem")).map do |ri|
|
191
|
+
rii = ri["relatedItemIdentifier"]
|
192
|
+
if rii["relatedItemIdentifierType"] == "DOI"
|
193
|
+
rid = validate_doi(rii["__content__"].to_s.downcase)
|
194
|
+
else
|
195
|
+
rid = rii["__content__"]
|
196
|
+
end
|
197
|
+
|
198
|
+
relatedItemIdentifier = {
|
199
|
+
"relatedItemIdentifier" => rid,
|
200
|
+
"relatedItemIdentifierType" => rii["relatedItemIdentifierType"],
|
201
|
+
"relatedMetadataScheme" => rii["relatedMetadataScheme"],
|
202
|
+
"schemeURI" => rii["schemeURI"],
|
203
|
+
"schemeType" => rii["schemeType"]
|
204
|
+
}.compact
|
205
|
+
|
206
|
+
{
|
207
|
+
"relationType" => ri["relationType"],
|
208
|
+
"relatedItemType" => ri["relatedItemType"],
|
209
|
+
"relatedItemIdentifier" => relatedItemIdentifier,
|
210
|
+
"creators" => get_authors(Array.wrap(ri.dig("creators", "creator"))),
|
211
|
+
"titles" => get_titles(ri),
|
212
|
+
"publicationYear" => ri["publicationYear"],
|
213
|
+
"volume" => ri["volume"],
|
214
|
+
"issue" => ri["issue"],
|
215
|
+
"number" => ri.dig("number", "__content__"),
|
216
|
+
"numberType" => ri.dig("number", "numberType"),
|
217
|
+
"firstPage" => ri["firstPage"],
|
218
|
+
"lastPage" => ri["lastPage"],
|
219
|
+
"publisher" => ri["publisher"],
|
220
|
+
"edition" => ri["edition"],
|
221
|
+
"contributors" => get_authors(Array.wrap(ri.dig("contributors", "contributor"))),
|
222
|
+
}.compact
|
223
|
+
end
|
224
|
+
|
197
225
|
geo_locations = Array.wrap(meta.dig("geoLocations", "geoLocation")).map do |gl|
|
198
226
|
if !gl.is_a?(Hash) || gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String) || gl["geoLocationPolygon"].is_a?(String)
|
199
227
|
nil
|
200
228
|
else
|
229
|
+
|
230
|
+
# Handle scenario where multiple geoLocationPolygons are allowed within a single geoLocation
|
231
|
+
# we want to return an array if it's already an array (i.e. multiple geoLocationPolygons)
|
232
|
+
# vs if it's singular just return the object
|
233
|
+
# This is for backwards compatability to allow both scenarios.
|
234
|
+
if gl.dig("geoLocationPolygon").kind_of?(Array)
|
235
|
+
geoLocationPolygon = gl.dig("geoLocationPolygon").map do |glp|
|
236
|
+
Array.wrap(glp.dig("polygonPoint")).map { |glpp| { "polygonPoint" => glpp } }.compact.presence
|
237
|
+
end.compact.presence
|
238
|
+
else
|
239
|
+
geoLocationPolygon = Array.wrap(gl.dig("geoLocationPolygon", "polygonPoint")).map { |glp| { "polygonPoint" => glp } }.compact.presence
|
240
|
+
end
|
241
|
+
|
201
242
|
{
|
202
243
|
"geoLocationPoint" => {
|
203
244
|
"pointLatitude" => gl.dig("geoLocationPoint", "pointLatitude"),
|
@@ -209,7 +250,7 @@ module Bolognese
|
|
209
250
|
"southBoundLatitude" => gl.dig("geoLocationBox", "southBoundLatitude"),
|
210
251
|
"northBoundLatitude" => gl.dig("geoLocationBox", "northBoundLatitude")
|
211
252
|
}.compact.presence,
|
212
|
-
"geoLocationPolygon" =>
|
253
|
+
"geoLocationPolygon" => geoLocationPolygon,
|
213
254
|
"geoLocationPlace" => parse_attributes(gl["geoLocationPlace"], first: true).to_s.strip.presence
|
214
255
|
}.compact
|
215
256
|
end
|
@@ -238,6 +279,7 @@ module Bolognese
|
|
238
279
|
"language" => parse_attributes(meta.fetch("language", nil), first: true).to_s.strip.presence,
|
239
280
|
"geo_locations" => geo_locations,
|
240
281
|
"related_identifiers" => related_identifiers,
|
282
|
+
"related_items" => related_items,
|
241
283
|
"formats" => formats,
|
242
284
|
"sizes" => sizes,
|
243
285
|
"schema_version" => schema_version,
|
@@ -266,6 +308,21 @@ module Bolognese
|
|
266
308
|
{}
|
267
309
|
end
|
268
310
|
end
|
311
|
+
|
312
|
+
def get_titles(meta)
|
313
|
+
titles = Array.wrap(meta.dig("titles", "title")).map do |r|
|
314
|
+
if r.blank?
|
315
|
+
nil
|
316
|
+
elsif r.is_a?(String)
|
317
|
+
{ "title" => sanitize(r) }
|
318
|
+
else
|
319
|
+
{ "title" => sanitize(r["__content__"]), "titleType" => r["titleType"], "lang" => r["lang"] }.compact
|
320
|
+
end
|
321
|
+
end.compact
|
322
|
+
|
323
|
+
titles
|
324
|
+
end
|
325
|
+
|
269
326
|
end
|
270
327
|
end
|
271
328
|
end
|
@@ -41,14 +41,14 @@ module Bolognese
|
|
41
41
|
ris_type = meta.fetch("TY", nil) || "GEN"
|
42
42
|
schema_org = RIS_TO_SO_TRANSLATIONS[ris_type] || "CreativeWork"
|
43
43
|
types = {
|
44
|
-
"resourceTypeGeneral" => Metadata::
|
44
|
+
"resourceTypeGeneral" => Metadata::RIS_TO_DC_TRANSLATIONS[ris_type],
|
45
45
|
"schemaOrg" => schema_org,
|
46
46
|
"citeproc" => RIS_TO_CP_TRANSLATIONS[schema_org] || "misc",
|
47
47
|
"ris" => ris_type
|
48
48
|
}.compact
|
49
49
|
|
50
50
|
id = normalize_doi(options[:doi] || meta.fetch("DO", nil))
|
51
|
-
|
51
|
+
|
52
52
|
author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "creatorName" => a } }
|
53
53
|
date_parts = meta.fetch("PY", nil).to_s.split("/")
|
54
54
|
created_date_parts = meta.fetch("Y1", nil).to_s.split("/")
|
@@ -67,7 +67,7 @@ module Bolognese
|
|
67
67
|
end
|
68
68
|
container = if meta.fetch("T2", nil).present?
|
69
69
|
{ "type" => "Journal",
|
70
|
-
"title" => meta.fetch("T2", nil),
|
70
|
+
"title" => meta.fetch("T2", nil),
|
71
71
|
"identifier" => meta.fetch("SN", nil),
|
72
72
|
"volume" => meta.fetch("VL", nil),
|
73
73
|
"issue" => meta.fetch("IS", nil),
|
data/lib/bolognese/utils.rb
CHANGED
@@ -45,16 +45,30 @@ module Bolognese
|
|
45
45
|
|
46
46
|
DC_TO_SO_TRANSLATIONS = {
|
47
47
|
"Audiovisual" => "MediaObject",
|
48
|
+
"Book" => "Book",
|
49
|
+
"BookChapter" => "Chapter",
|
48
50
|
"Collection" => "Collection",
|
51
|
+
"ComputationalNotebook" => "SoftwareSourceCode",
|
52
|
+
"ConferencePaper" => "Article",
|
53
|
+
"ConferenceProceeding" => "Periodical",
|
54
|
+
"DataPaper" => "Article",
|
49
55
|
"Dataset" => "Dataset",
|
56
|
+
"Dissertation" => "Thesis",
|
50
57
|
"Event" => "Event",
|
51
58
|
"Image" => "ImageObject",
|
52
59
|
"InteractiveResource" => nil,
|
60
|
+
"Journal" => "Periodical",
|
61
|
+
"JournalArticle" => "ScholarlyArticle",
|
53
62
|
"Model" => nil,
|
63
|
+
"OutputManagementPlan" => nil,
|
64
|
+
"PeerReview" => "Review",
|
54
65
|
"PhysicalObject" => nil,
|
66
|
+
"Preprint" => nil,
|
67
|
+
"Report" => "Report",
|
55
68
|
"Service" => "Service",
|
56
69
|
"Software" => "SoftwareSourceCode",
|
57
70
|
"Sound" => "AudioObject",
|
71
|
+
"Standard" => nil,
|
58
72
|
"Text" => "ScholarlyArticle",
|
59
73
|
"Workflow" => nil,
|
60
74
|
"Other" => "CreativeWork",
|
@@ -65,15 +79,29 @@ module Bolognese
|
|
65
79
|
|
66
80
|
DC_TO_CP_TRANSLATIONS = {
|
67
81
|
"Audiovisual" => "motion_picture",
|
82
|
+
"Book" => "book",
|
83
|
+
"BookChapter" => "chapter",
|
68
84
|
"Collection" => nil,
|
85
|
+
"ComputationalNotebook" => nil,
|
86
|
+
"ConferencePaper" => "paper_conference",
|
87
|
+
"ConferenceProceeding" => nil,
|
88
|
+
"DataPaper" => "report",
|
69
89
|
"Dataset" => "dataset",
|
90
|
+
"Dissertation" => nil,
|
70
91
|
"Event" => nil,
|
71
92
|
"Image" => "graphic",
|
72
93
|
"InteractiveResource" => nil,
|
94
|
+
"Journal" => nil,
|
95
|
+
"JournalArticle" => "article-journal",
|
73
96
|
"Model" => nil,
|
97
|
+
"OutputManagementPlan" => nil,
|
98
|
+
"PeerReview" => "review",
|
74
99
|
"PhysicalObject" => nil,
|
100
|
+
"Preprint" => nil,
|
101
|
+
"Report" => "report",
|
75
102
|
"Service" => nil,
|
76
103
|
"Sound" => "song",
|
104
|
+
"Standard" => nil,
|
77
105
|
"Text" => "report",
|
78
106
|
"Workflow" => nil,
|
79
107
|
"Other" => nil
|
@@ -202,13 +230,44 @@ module Bolognese
|
|
202
230
|
"BookSet" => "book"
|
203
231
|
}
|
204
232
|
|
233
|
+
CR_TO_DC_TRANSLATIONS = {
|
234
|
+
"Proceedings" => nil,
|
235
|
+
"ReferenceBook" => nil,
|
236
|
+
"JournalIssue" => "Text",
|
237
|
+
"ProceedingsArticle" => "ConferencePaper",
|
238
|
+
"Other" => "Other",
|
239
|
+
"Dissertation" => "Dissertation",
|
240
|
+
"Dataset" => "Dataset",
|
241
|
+
"EditedBook" => "Book",
|
242
|
+
"JournalArticle" => "JournalArticle",
|
243
|
+
"Journal" => "Journal",
|
244
|
+
"Report" => "Report",
|
245
|
+
"BookSeries" => nil,
|
246
|
+
"ReportSeries" => nil,
|
247
|
+
"BookTrack" => nil,
|
248
|
+
"Standard" => "Standard",
|
249
|
+
"BookSection" => "BookChapter",
|
250
|
+
"BookPart" => nil,
|
251
|
+
"Book" => "Book",
|
252
|
+
"BookChapter" => "BookChapter",
|
253
|
+
"SaComponent" => "Text",
|
254
|
+
"StandardSeries" => "Standard",
|
255
|
+
"Monograph" => "book",
|
256
|
+
"Component" => nil,
|
257
|
+
"ReferenceEntry" => nil,
|
258
|
+
"JournalVolume" => nil,
|
259
|
+
"BookSet" => nil,
|
260
|
+
"PostedContent" => "JournalArticle",
|
261
|
+
"PeerReview" => "PeerReview"
|
262
|
+
}
|
263
|
+
|
205
264
|
SO_TO_DC_TRANSLATIONS = {
|
206
265
|
"Article" => "Text",
|
207
266
|
"AudioObject" => "Sound",
|
208
267
|
"Blog" => "Text",
|
209
268
|
"BlogPosting" => "Text",
|
210
|
-
"Book" => "
|
211
|
-
"Chapter" => "
|
269
|
+
"Book" => "Book",
|
270
|
+
"Chapter" => "BookChapter",
|
212
271
|
"Collection" => "Collection",
|
213
272
|
"DataCatalog" => "Dataset",
|
214
273
|
"Dataset" => "Dataset",
|
@@ -216,11 +275,11 @@ module Bolognese
|
|
216
275
|
"ImageObject" => "Image",
|
217
276
|
"Movie" => "Audiovisual",
|
218
277
|
"PublicationIssue" => "Text",
|
219
|
-
"Report" => "
|
278
|
+
"Report" => "Report",
|
220
279
|
"ScholarlyArticle" => "Text",
|
221
280
|
"Thesis" => "Text",
|
222
281
|
"Service" => "Service",
|
223
|
-
"Review" => "
|
282
|
+
"Review" => "PeerReview",
|
224
283
|
"SoftwareSourceCode" => "Software",
|
225
284
|
"VideoObject" => "Audiovisual",
|
226
285
|
"WebPage" => "Text",
|
@@ -323,21 +382,73 @@ module Bolognese
|
|
323
382
|
|
324
383
|
DC_TO_RIS_TRANSLATIONS = {
|
325
384
|
"Audiovisual" => "MPCT",
|
385
|
+
"Book" => "BOOK",
|
386
|
+
"BookChapter" => "CHAP",
|
326
387
|
"Collection" => nil,
|
388
|
+
"ComputationalNotebook" => "COMP",
|
389
|
+
"ConferencePaper" => nil,
|
390
|
+
"ConferenceProceeding" => nil,
|
391
|
+
"DataPaper" => nil,
|
327
392
|
"Dataset" => "DATA",
|
393
|
+
"Dissertation" => "THES",
|
328
394
|
"Event" => nil,
|
329
395
|
"Image" => "FIGURE",
|
330
396
|
"InteractiveResource" => nil,
|
397
|
+
"Journal" => nil,
|
398
|
+
"JournalArticle" => "JOUR",
|
331
399
|
"Model" => nil,
|
400
|
+
"OutputManagementPlan" => nil,
|
401
|
+
"PeerReview" => nil,
|
332
402
|
"PhysicalObject" => nil,
|
403
|
+
"Preprint" => nil,
|
404
|
+
"Report" => "RRPT",
|
333
405
|
"Service" => nil,
|
334
406
|
"Software" => "COMP",
|
335
407
|
"Sound" => "SOUND",
|
408
|
+
"Standard" => nil,
|
336
409
|
"Text" => "RPRT",
|
337
410
|
"Workflow" => nil,
|
338
411
|
"Other" => nil
|
339
412
|
}
|
340
413
|
|
414
|
+
RIS_TO_DC_TRANSLATIONS = {
|
415
|
+
"BLOG" => "Text",
|
416
|
+
"GEN" => "Text",
|
417
|
+
"CTLG" => "Collection",
|
418
|
+
"DATA" => "Dataset",
|
419
|
+
"FIGURE" => "Image",
|
420
|
+
"THES" => "Dissertation",
|
421
|
+
"MPCT" => "Audiovisual",
|
422
|
+
"JOUR" => "JournalArticle",
|
423
|
+
"COMP" => "Software",
|
424
|
+
"VIDEO" => "Audiovisual",
|
425
|
+
"ELEC" => "Text"
|
426
|
+
}
|
427
|
+
|
428
|
+
BIB_TO_DC_TRANSLATIONS = {
|
429
|
+
"article" => "JournalArticle",
|
430
|
+
"book" => "Book",
|
431
|
+
"inbook" => "BookChapter",
|
432
|
+
"inproceedings" => nil,
|
433
|
+
"manual" => nil,
|
434
|
+
"misc" => "Other",
|
435
|
+
"phdthesis" => "Dissertation",
|
436
|
+
"proceedings" => "ConferenceProceeding",
|
437
|
+
"techreport" => "Report",
|
438
|
+
"unpublished" => nil
|
439
|
+
}
|
440
|
+
|
441
|
+
CP_TO_DC_TRANSLATIONS = {
|
442
|
+
"song" => "Audiovisual",
|
443
|
+
"post-weblog" => "Text",
|
444
|
+
"dataset" => "Dataset",
|
445
|
+
"graphic" => "Image",
|
446
|
+
"motion_picture" => "Audiovisual",
|
447
|
+
"article-journal" => "JournalArticle",
|
448
|
+
"broadcast" => "Audiovisual",
|
449
|
+
"webpage" => "Text"
|
450
|
+
}
|
451
|
+
|
341
452
|
SO_TO_BIB_TRANSLATIONS = {
|
342
453
|
"Article" => "article",
|
343
454
|
"AudioObject" => "misc",
|
@@ -415,7 +526,7 @@ module Bolognese
|
|
415
526
|
"ris"
|
416
527
|
elsif options[:ext] == ".xml" && Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref")
|
417
528
|
"crossref"
|
418
|
-
elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
|
529
|
+
elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
|
419
530
|
"datacite"
|
420
531
|
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
|
421
532
|
"schema_org"
|
@@ -423,7 +534,7 @@ module Bolognese
|
|
423
534
|
"codemeta"
|
424
535
|
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel")
|
425
536
|
"datacite_json"
|
426
|
-
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("types")
|
537
|
+
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("types") && Maremma.from_json(string).to_h.dig("publication_year").present?
|
427
538
|
"crosscite"
|
428
539
|
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
|
429
540
|
"citeproc"
|
@@ -433,7 +544,7 @@ module Bolognese
|
|
433
544
|
def find_from_format_by_string(string)
|
434
545
|
if Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref").present?
|
435
546
|
"crossref"
|
436
|
-
elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
|
547
|
+
elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
|
437
548
|
"datacite"
|
438
549
|
elsif Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
|
439
550
|
"schema_org"
|
@@ -441,7 +552,7 @@ module Bolognese
|
|
441
552
|
"codemeta"
|
442
553
|
elsif Maremma.from_json(string).to_h.dig("schema-version").to_s.start_with?("http://datacite.org/schema/kernel")
|
443
554
|
"datacite_json"
|
444
|
-
elsif Maremma.from_json(string).to_h.dig("types").present?
|
555
|
+
elsif Maremma.from_json(string).to_h.dig("types").present? && Maremma.from_json(string).to_h.dig("publication_year").present?
|
445
556
|
"crosscite"
|
446
557
|
elsif Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
|
447
558
|
"citeproc"
|
@@ -523,7 +634,7 @@ module Bolognese
|
|
523
634
|
uri = Addressable::URI.parse(id)
|
524
635
|
|
525
636
|
return nil unless uri && uri.host && %w(http https ftp).include?(uri.scheme)
|
526
|
-
|
637
|
+
|
527
638
|
# optionally turn into https URL
|
528
639
|
uri.scheme = "https" if options[:https]
|
529
640
|
|
@@ -654,8 +765,8 @@ module Bolognese
|
|
654
765
|
affiliation_identifier = a["affiliationIdentifier"]
|
655
766
|
end
|
656
767
|
|
657
|
-
{
|
658
|
-
"@type" => "Organization",
|
768
|
+
{
|
769
|
+
"@type" => "Organization",
|
659
770
|
"@id" => affiliation_identifier,
|
660
771
|
"name" => name }.compact
|
661
772
|
end.unwrap
|
@@ -677,8 +788,8 @@ module Bolognese
|
|
677
788
|
affiliation_identifier = a["affiliationIdentifier"]
|
678
789
|
end
|
679
790
|
|
680
|
-
{
|
681
|
-
"@type" => "Organization",
|
791
|
+
{
|
792
|
+
"@type" => "Organization",
|
682
793
|
"@id" => affiliation_identifier,
|
683
794
|
"name" => name }.compact
|
684
795
|
end.unwrap
|
@@ -692,7 +803,7 @@ module Bolognese
|
|
692
803
|
def to_schema_org_container(element, options={})
|
693
804
|
return nil unless (element.is_a?(Hash) || (element.nil? && options[:container_title].present?))
|
694
805
|
|
695
|
-
{
|
806
|
+
{
|
696
807
|
"@id" => element["identifier"],
|
697
808
|
"@type" => (options[:type] == "Dataset") ? "DataCatalog" : "Periodical",
|
698
809
|
"name" => element["title"] || options[:container_title] }.compact
|
@@ -700,7 +811,7 @@ module Bolognese
|
|
700
811
|
|
701
812
|
def to_schema_org_identifiers(element, options={})
|
702
813
|
Array.wrap(element).map do |ai|
|
703
|
-
{
|
814
|
+
{
|
704
815
|
"@type" => "PropertyValue",
|
705
816
|
"propertyID" => ai["identifierType"],
|
706
817
|
"value" => ai["identifier"] }
|
@@ -710,7 +821,7 @@ module Bolognese
|
|
710
821
|
def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
|
711
822
|
return nil unless related_identifiers.present? && relation_type.present?
|
712
823
|
|
713
|
-
relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]
|
824
|
+
relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]
|
714
825
|
|
715
826
|
Array.wrap(related_identifiers).select { |ri| relation_type.include?(ri["relationType"]) }.map do |r|
|
716
827
|
if r["relatedIdentifierType"] == "ISSN" && r["relationType"] == "IsPartOf"
|
@@ -741,7 +852,7 @@ module Bolognese
|
|
741
852
|
|
742
853
|
Array.wrap(geo_location).reduce([]) do |sum, gl|
|
743
854
|
if gl.fetch("geoLocationPoint", nil)
|
744
|
-
sum << {
|
855
|
+
sum << {
|
745
856
|
"@type" => "Place",
|
746
857
|
"geo" => {
|
747
858
|
"@type" => "GeoCoordinates",
|
@@ -752,7 +863,7 @@ module Bolognese
|
|
752
863
|
end
|
753
864
|
|
754
865
|
if gl.fetch("geoLocationBox", nil)
|
755
|
-
sum << {
|
866
|
+
sum << {
|
756
867
|
"@type" => "Place",
|
757
868
|
"geo" => {
|
758
869
|
"@type" => "GeoShape",
|
@@ -765,19 +876,22 @@ module Bolognese
|
|
765
876
|
end
|
766
877
|
|
767
878
|
if gl.fetch("geoLocationPolygon", nil)
|
768
|
-
sum << {
|
879
|
+
sum << {
|
769
880
|
"@type" => "Place",
|
770
881
|
"geo" => {
|
771
882
|
"@type" => "GeoShape",
|
772
883
|
"address" => gl["geoLocationPlace"],
|
773
|
-
"polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
|
774
|
-
|
775
|
-
|
884
|
+
"polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
|
885
|
+
Array.wrap(glp).map do |glpp|
|
886
|
+
[glpp.dig("polygonPoint", "pointLongitude"), glpp.dig("polygonPoint", "pointLatitude")].compact
|
887
|
+
end.compact
|
888
|
+
end.compact.presence,
|
889
|
+
}
|
776
890
|
}
|
777
891
|
end
|
778
892
|
|
779
893
|
if gl.fetch("geoLocationPlace", nil) && !gl.fetch("geoLocationPoint", nil) && !gl.fetch("geoLocationBox", nil) && !gl.fetch("geoLocationPolygon", nil)
|
780
|
-
sum << {
|
894
|
+
sum << {
|
781
895
|
"@type" => "Place",
|
782
896
|
"geo" => {
|
783
897
|
"@type" => "GeoCoordinates",
|
@@ -992,7 +1106,7 @@ module Bolognese
|
|
992
1106
|
|
993
1107
|
def get_iso8601_date(iso8601_time)
|
994
1108
|
return nil if iso8601_time.nil?
|
995
|
-
|
1109
|
+
|
996
1110
|
iso8601_time[0..9]
|
997
1111
|
end
|
998
1112
|
|
@@ -1089,7 +1203,7 @@ module Bolognese
|
|
1089
1203
|
first_page = pages.present? ? pages.split("-").map(&:strip)[0] : nil
|
1090
1204
|
last_page = pages.present? ? pages.split("-").map(&:strip)[1] : nil
|
1091
1205
|
|
1092
|
-
{
|
1206
|
+
{
|
1093
1207
|
"title" => title,
|
1094
1208
|
"volume" => volume,
|
1095
1209
|
"issue" => issue,
|
@@ -1148,11 +1262,11 @@ module Bolognese
|
|
1148
1262
|
def name_to_fos(name)
|
1149
1263
|
# first find subject in Fields of Science (OECD)
|
1150
1264
|
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
|
1151
|
-
|
1265
|
+
|
1152
1266
|
subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
|
1153
1267
|
|
1154
1268
|
if subject
|
1155
|
-
return [{
|
1269
|
+
return [{
|
1156
1270
|
"subject" => sanitize(name) },
|
1157
1271
|
{
|
1158
1272
|
"subject" => "FOS: " + subject["fosLabel"],
|
@@ -1166,12 +1280,12 @@ module Bolognese
|
|
1166
1280
|
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
|
1167
1281
|
for_fields = fores.fetch("forFields")
|
1168
1282
|
for_disciplines = fores.fetch("forDisciplines")
|
1169
|
-
|
1283
|
+
|
1170
1284
|
subject = for_fields.find { |l| l["forLabel"] == name } ||
|
1171
1285
|
for_disciplines.find { |l| l["forLabel"] == name }
|
1172
1286
|
|
1173
1287
|
if subject
|
1174
|
-
[{
|
1288
|
+
[{
|
1175
1289
|
"subject" => sanitize(name) },
|
1176
1290
|
{
|
1177
1291
|
"subject" => "FOS: " + subject["fosLabel"],
|
@@ -1189,16 +1303,17 @@ module Bolognese
|
|
1189
1303
|
subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]}
|
1190
1304
|
|
1191
1305
|
if subject
|
1192
|
-
return [{
|
1193
|
-
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1306
|
+
return [{
|
1307
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1194
1308
|
"subjectScheme" => hsh["subjectScheme"],
|
1195
|
-
"schemeUri" => hsh["schemeURI"],
|
1196
|
-
"valueUri" => hsh["valueURI"],
|
1309
|
+
"schemeUri" => hsh["schemeURI"],
|
1310
|
+
"valueUri" => hsh["valueURI"],
|
1311
|
+
"classificationCode" => hsh["classificationCode"],
|
1197
1312
|
"lang" => hsh["lang"] }.compact,
|
1198
|
-
{
|
1313
|
+
{
|
1199
1314
|
"subject" => "FOS: " + subject["fosLabel"],
|
1200
1315
|
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1201
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
|
1316
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
|
1202
1317
|
end
|
1203
1318
|
|
1204
1319
|
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
@@ -1210,8 +1325,8 @@ module Bolognese
|
|
1210
1325
|
# try to extract forId
|
1211
1326
|
if hsh["subjectScheme"] == "FOR"
|
1212
1327
|
for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
|
1213
|
-
for_id = for_id.rjust(6, "0")
|
1214
|
-
|
1328
|
+
for_id = for_id.rjust(6, "0")
|
1329
|
+
|
1215
1330
|
subject = for_fields.find { |l| l["forId"] == for_id } ||
|
1216
1331
|
for_disciplines.find { |l| l["forId"] == for_id[0..3] }
|
1217
1332
|
else
|
@@ -1220,11 +1335,12 @@ module Bolognese
|
|
1220
1335
|
end
|
1221
1336
|
|
1222
1337
|
if subject
|
1223
|
-
[{
|
1224
|
-
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1338
|
+
[{
|
1339
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1225
1340
|
"subjectScheme" => hsh["subjectScheme"],
|
1226
|
-
"
|
1227
|
-
"
|
1341
|
+
"classificationCode" => hsh["classificationCode"],
|
1342
|
+
"schemeUri" => hsh["schemeURI"],
|
1343
|
+
"valueUri" => hsh["valueURI"],
|
1228
1344
|
"lang" => hsh["lang"] }.compact,
|
1229
1345
|
{
|
1230
1346
|
"subject" => "FOS: " + subject["fosLabel"],
|
@@ -1232,12 +1348,13 @@ module Bolognese
|
|
1232
1348
|
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
1233
1349
|
}]
|
1234
1350
|
else
|
1235
|
-
[{
|
1236
|
-
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1351
|
+
[{
|
1352
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1237
1353
|
"subjectScheme" => hsh["subjectScheme"],
|
1238
|
-
"
|
1239
|
-
"
|
1240
|
-
"
|
1354
|
+
"classificationCode" => hsh["classificationCode"],
|
1355
|
+
"schemeUri" => hsh["schemeURI"],
|
1356
|
+
"valueUri" => hsh["valueURI"],
|
1357
|
+
"lang" => hsh["lang"] }.compact]
|
1241
1358
|
end
|
1242
1359
|
end
|
1243
1360
|
end
|