bolognese 1.11.3 → 2.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/.github/workflows/release.yml +2 -2
- data/Gemfile.lock +15 -12
- data/bolognese.gemspec +5 -3
- data/lib/bolognese/author_utils.rb +4 -3
- data/lib/bolognese/datacite_utils.rb +19 -11
- data/lib/bolognese/metadata.rb +1 -6
- data/lib/bolognese/metadata_utils.rb +3 -2
- data/lib/bolognese/readers/bibtex_reader.rb +2 -2
- data/lib/bolognese/readers/citeproc_reader.rb +7 -1
- data/lib/bolognese/readers/codemeta_reader.rb +2 -2
- data/lib/bolognese/readers/crosscite_reader.rb +4 -1
- data/lib/bolognese/readers/crossref_reader.rb +40 -7
- data/lib/bolognese/readers/datacite_json_reader.rb +4 -1
- data/lib/bolognese/readers/datacite_reader.rb +19 -3
- data/lib/bolognese/readers/npm_reader.rb +1 -1
- data/lib/bolognese/readers/ris_reader.rb +2 -2
- data/lib/bolognese/readers/schema_org_reader.rb +27 -4
- data/lib/bolognese/utils.rb +35 -13
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/bibtex_writer.rb +1 -1
- data/lib/bolognese/writers/codemeta_writer.rb +1 -1
- data/lib/bolognese/writers/csv_writer.rb +1 -1
- data/lib/bolognese/writers/datacite_json_writer.rb +3 -1
- data/lib/bolognese/writers/jats_writer.rb +6 -3
- data/lib/bolognese/writers/ris_writer.rb +1 -1
- data/lib/bolognese/writers/schema_org_writer.rb +5 -1
- data/resources/kernel-4/include/datacite-contributorType-v4.xsd +3 -1
- data/resources/kernel-4/include/datacite-dateType-v4.xsd +3 -1
- data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +5 -2
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +9 -3
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +7 -1
- data/resources/kernel-4/include/datacite-titleType-v4.xsd +1 -1
- data/resources/kernel-4/metadata.xsd +12 -7
- data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
- data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
- data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.5/include/xml.xsd +286 -0
- data/resources/kernel-4.5/metadata.xsd +711 -0
- data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
- data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
- data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.6/include/xml.xsd +286 -0
- data/resources/kernel-4.6/metadata.xsd +712 -0
- data/spec/author_utils_spec.rb +33 -4
- data/spec/datacite_utils_spec.rb +156 -2
- data/spec/fixtures/citeproc.json +6 -2
- data/spec/fixtures/crossref_schema_4.6_values.xml +183 -0
- data/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml +18 -2
- data/spec/fixtures/datacite-example-full-v4.5.xml +255 -0
- data/spec/fixtures/datacite-example-full-v4.6.xml +114 -0
- data/spec/fixtures/datacite-seriesinformation.xml +7 -2
- data/spec/fixtures/datacite-xml-lang.xml +1 -1
- data/spec/fixtures/datacite_blank_name_identifier.xml +22 -0
- data/spec/fixtures/datacite_blank_publisher.xml +18 -0
- data/spec/fixtures/datacite_journal_article.xml +64 -0
- data/spec/fixtures/schema_org.json +1 -0
- data/spec/fixtures/schema_org_4.6_attributes.json +108 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_contributors_Translator/supports_Translator_contributorType.yml +71 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_dates_with_Coverage/inserts_date_with_dateType_Coverage.yml +71 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_related_identifiers_CSTR/supports_CSTR_relatedIdentifierType.yml +71 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_related_identifiers_HasTranslation/supports_HasTranslation_relationType.yml +71 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_related_identifiers_RRID/supports_RRID_relatedIdentifierType.yml +71 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_resource_type_with_Award/supports_Award_as_resourceTypeGeneral.yml +71 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_resource_type_with_Project/supports_Project_as_resourceTypeGeneral.yml +71 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_subjects/insert.yml +32 -14
- data/spec/readers/bibtex_reader_spec.rb +2 -0
- data/spec/readers/citeproc_reader_spec.rb +4 -0
- data/spec/readers/codemeta_reader_spec.rb +4 -4
- data/spec/readers/crosscite_reader_spec.rb +2 -0
- data/spec/readers/crossref_reader_spec.rb +72 -41
- data/spec/readers/datacite_json_reader_spec.rb +2 -0
- data/spec/readers/datacite_reader_spec.rb +164 -42
- data/spec/readers/npm_reader_spec.rb +2 -0
- data/spec/readers/ris_reader_spec.rb +3 -0
- data/spec/readers/schema_org_reader_spec.rb +38 -11
- data/spec/spec_helper.rb +1 -0
- data/spec/writers/citation_writer_spec.rb +9 -0
- data/spec/writers/citeproc_writer_spec.rb +9 -0
- data/spec/writers/crosscite_writer_spec.rb +7 -0
- data/spec/writers/datacite_json_writer_spec.rb +22 -0
- data/spec/writers/datacite_writer_spec.rb +155 -5
- data/spec/writers/jats_writer_spec.rb +16 -1
- data/spec/writers/rdf_xml_writer_spec.rb +7 -0
- data/spec/writers/schema_org_writer_spec.rb +49 -0
- data/spec/writers/turtle_writer_spec.rb +18 -0
- metadata +85 -25
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1a86efd4d59883de399bfae917077f97c5f5e33bd698bc70fc4a3daa40736acc
|
|
4
|
+
data.tar.gz: da3ac310e177b6ad9733ff56cb9893cfe107c0beb930167b7b8f392dfb2332f4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ee269b568dd16e08da5c10f00155a3e0b55e9725c1d6aa114c6c3a5020dfe05c2df89388a9bc394a6274e86ca81567f4f756dd31bcaf5faa12dff2a7df275308
|
|
7
|
+
data.tar.gz: 26245d3b2e4553616ba0708047b542f4092240e4cc251ae578de502010ba69a3d6700a5df48f59cd427b7f0eaf498a15cc043cd78ad799572bd589dfcbad6784
|
data/.github/workflows/ci.yml
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
bolognese (
|
|
4
|
+
bolognese (2.3.3)
|
|
5
5
|
activesupport (>= 4.2.5)
|
|
6
6
|
benchmark_methods (~> 0.7)
|
|
7
7
|
bibtex-ruby (>= 5.1.0)
|
|
@@ -16,9 +16,9 @@ PATH
|
|
|
16
16
|
json-ld-preloaded (~> 3.1, >= 3.1.3)
|
|
17
17
|
jsonlint (~> 0.3.0)
|
|
18
18
|
loofah (~> 2.0, >= 2.0.3)
|
|
19
|
-
maremma (
|
|
19
|
+
maremma (~> 5.0)
|
|
20
20
|
namae (~> 1.0)
|
|
21
|
-
nokogiri (
|
|
21
|
+
nokogiri (~> 1.16, >= 1.16.2)
|
|
22
22
|
oj (~> 3.10)
|
|
23
23
|
oj_mimic_json (~> 1.0, >= 1.0.1)
|
|
24
24
|
postrank-uri (~> 1.0, >= 1.0.18)
|
|
@@ -41,6 +41,7 @@ GEM
|
|
|
41
41
|
bibtex-ruby (6.0.0)
|
|
42
42
|
latex-decode (~> 0.0)
|
|
43
43
|
builder (3.2.4)
|
|
44
|
+
byebug (11.1.3)
|
|
44
45
|
citeproc (1.0.10)
|
|
45
46
|
namae (~> 1.0)
|
|
46
47
|
citeproc-ruby (1.1.14)
|
|
@@ -94,10 +95,10 @@ GEM
|
|
|
94
95
|
concurrent-ruby (~> 1.0)
|
|
95
96
|
iso8601 (0.9.1)
|
|
96
97
|
json (2.6.2)
|
|
97
|
-
json-canonicalization (0.3.
|
|
98
|
-
json-ld (3.2.
|
|
98
|
+
json-canonicalization (0.3.1)
|
|
99
|
+
json-ld (3.2.4)
|
|
99
100
|
htmlentities (~> 4.3)
|
|
100
|
-
json-canonicalization (~> 0.3
|
|
101
|
+
json-canonicalization (~> 0.3)
|
|
101
102
|
link_header (~> 0.0, >= 0.0.8)
|
|
102
103
|
multi_json (~> 1.15)
|
|
103
104
|
rack (>= 2.2, < 4)
|
|
@@ -113,7 +114,7 @@ GEM
|
|
|
113
114
|
loofah (2.21.3)
|
|
114
115
|
crass (~> 1.0.2)
|
|
115
116
|
nokogiri (>= 1.12.0)
|
|
116
|
-
maremma (
|
|
117
|
+
maremma (5.0.0)
|
|
117
118
|
activesupport (>= 4.2.5)
|
|
118
119
|
addressable (>= 2.3.6)
|
|
119
120
|
builder (~> 3.2, >= 3.2.2)
|
|
@@ -124,17 +125,17 @@ GEM
|
|
|
124
125
|
faraday-follow_redirects (~> 0.3.0)
|
|
125
126
|
faraday-gzip (~> 0.1.0)
|
|
126
127
|
faraday-multipart (~> 1.0.4)
|
|
127
|
-
nokogiri (
|
|
128
|
+
nokogiri (~> 1.16, >= 1.16.2)
|
|
128
129
|
oj (>= 2.8.3)
|
|
129
130
|
oj_mimic_json (~> 1.0, >= 1.0.1)
|
|
130
131
|
matrix (0.4.2)
|
|
131
|
-
mini_portile2 (2.8.
|
|
132
|
+
mini_portile2 (2.8.5)
|
|
132
133
|
minitest (5.18.0)
|
|
133
134
|
multi_json (1.15.0)
|
|
134
135
|
multipart-post (2.3.0)
|
|
135
136
|
namae (1.1.1)
|
|
136
|
-
nokogiri (1.
|
|
137
|
-
mini_portile2 (~> 2.8.
|
|
137
|
+
nokogiri (1.16.2)
|
|
138
|
+
mini_portile2 (~> 2.8.2)
|
|
138
139
|
racc (~> 1.4)
|
|
139
140
|
oj (3.14.2)
|
|
140
141
|
oj_mimic_json (1.0.1)
|
|
@@ -222,7 +223,9 @@ PLATFORMS
|
|
|
222
223
|
DEPENDENCIES
|
|
223
224
|
bolognese!
|
|
224
225
|
bundler (>= 1.0)
|
|
226
|
+
byebug
|
|
225
227
|
hashdiff (>= 1.0.0.beta1, < 2.0.0)
|
|
228
|
+
json-canonicalization (= 0.3.1)
|
|
226
229
|
rack-test (~> 0)
|
|
227
230
|
rake (~> 12.0)
|
|
228
231
|
rspec (~> 3.4)
|
|
@@ -232,4 +235,4 @@ DEPENDENCIES
|
|
|
232
235
|
webmock (~> 3.0, >= 3.0.1)
|
|
233
236
|
|
|
234
237
|
BUNDLED WITH
|
|
235
|
-
2.
|
|
238
|
+
2.5.5
|
data/bolognese.gemspec
CHANGED
|
@@ -13,12 +13,12 @@ Gem::Specification.new do |s|
|
|
|
13
13
|
s.version = Bolognese::VERSION
|
|
14
14
|
s.extra_rdoc_files = ["README.md"]
|
|
15
15
|
s.license = 'MIT'
|
|
16
|
-
s.required_ruby_version = ['>=
|
|
16
|
+
s.required_ruby_version = ['>=3.0']
|
|
17
17
|
|
|
18
18
|
# Declare dependencies here, rather than in the Gemfile
|
|
19
|
-
s.add_dependency 'maremma', '
|
|
19
|
+
s.add_dependency 'maremma', '~> 5.0'
|
|
20
20
|
#s.add_dependency 'faraday', '~> 0.17.3'
|
|
21
|
-
s.add_dependency 'nokogiri', '
|
|
21
|
+
s.add_dependency 'nokogiri', '~> 1.16', '>= 1.16.2'
|
|
22
22
|
s.add_dependency 'loofah', '~> 2.0', '>= 2.0.3'
|
|
23
23
|
s.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
|
|
24
24
|
s.add_dependency 'activesupport', '>= 4.2.5'
|
|
@@ -49,6 +49,8 @@ Gem::Specification.new do |s|
|
|
|
49
49
|
s.add_development_dependency 'webmock', '~> 3.0', '>= 3.0.1'
|
|
50
50
|
s.add_development_dependency 'simplecov', '0.17.1'
|
|
51
51
|
s.add_development_dependency 'hashdiff', ['>= 1.0.0.beta1', '< 2.0.0']
|
|
52
|
+
s.add_development_dependency 'byebug'
|
|
53
|
+
s.add_development_dependency 'json-canonicalization', '0.3.1'
|
|
52
54
|
|
|
53
55
|
s.require_paths = ["lib"]
|
|
54
56
|
s.files = `git ls-files`.split($/)
|
|
@@ -30,19 +30,20 @@ module Bolognese
|
|
|
30
30
|
name_type = parse_attributes(author.fetch("creatorName", nil), content: "nameType", first: true) || parse_attributes(author.fetch("contributorName", nil), content: "nameType", first: true)
|
|
31
31
|
|
|
32
32
|
name_identifiers = Array.wrap(author.fetch("nameIdentifier", nil)).map do |ni|
|
|
33
|
+
name_identifier = ni["__content__"].strip if ni["__content__"].present?
|
|
33
34
|
if ni["nameIdentifierScheme"] == "ORCID"
|
|
34
35
|
{
|
|
35
|
-
"nameIdentifier" => normalize_orcid(
|
|
36
|
+
"nameIdentifier" => normalize_orcid(name_identifier),
|
|
36
37
|
"schemeUri" => "https://orcid.org",
|
|
37
38
|
"nameIdentifierScheme" => "ORCID" }.compact
|
|
38
39
|
elsif ni["nameIdentifierScheme"] == "ROR"
|
|
39
40
|
{
|
|
40
|
-
"nameIdentifier" => normalize_ror(
|
|
41
|
+
"nameIdentifier" => normalize_ror(name_identifier),
|
|
41
42
|
"schemeUri" => "https://ror.org",
|
|
42
43
|
"nameIdentifierScheme" => "ROR" }.compact
|
|
43
44
|
else
|
|
44
45
|
{
|
|
45
|
-
"nameIdentifier" =>
|
|
46
|
+
"nameIdentifier" => name_identifier,
|
|
46
47
|
"schemeUri" => ni.fetch("schemeURI", nil),
|
|
47
48
|
"nameIdentifierScheme" => ni["nameIdentifierScheme"] }.compact
|
|
48
49
|
end
|
|
@@ -106,9 +106,19 @@ module Bolognese
|
|
|
106
106
|
end
|
|
107
107
|
end
|
|
108
108
|
end
|
|
109
|
-
|
|
109
|
+
|
|
110
110
|
def insert_publisher(xml)
|
|
111
|
-
|
|
111
|
+
if publisher.is_a?(Hash)
|
|
112
|
+
attributes = {
|
|
113
|
+
'publisherIdentifier' => publisher["publisherIdentifier"],
|
|
114
|
+
'publisherIdentifierScheme' => publisher["publisherIdentifierScheme"],
|
|
115
|
+
'schemeURI' => publisher["schemeUri"],
|
|
116
|
+
"xml:lang" => publisher["lang"]
|
|
117
|
+
}.compact
|
|
118
|
+
xml.publisher(publisher["name"] || container && container["title"], attributes)
|
|
119
|
+
else
|
|
120
|
+
xml.publisher(publisher || container && container["title"])
|
|
121
|
+
end
|
|
112
122
|
end
|
|
113
123
|
|
|
114
124
|
def insert_publication_year(xml)
|
|
@@ -118,7 +128,7 @@ module Bolognese
|
|
|
118
128
|
def insert_resource_type(xml)
|
|
119
129
|
return xml unless types.is_a?(Hash) && (types["schemaOrg"].present? || types["resourceTypeGeneral"])
|
|
120
130
|
|
|
121
|
-
xml.resourceType(types["resourceType"]
|
|
131
|
+
xml.resourceType(types["resourceType"],
|
|
122
132
|
'resourceTypeGeneral' => types["resourceTypeGeneral"] || Metadata::SO_TO_DC_TRANSLATIONS[types["schemaOrg"]] || "Other")
|
|
123
133
|
end
|
|
124
134
|
|
|
@@ -171,7 +181,7 @@ module Bolognese
|
|
|
171
181
|
s["subject"] = subject
|
|
172
182
|
end
|
|
173
183
|
|
|
174
|
-
attributes = { "subjectScheme" => s["subjectScheme"], "schemeURI" => s["schemeUri"], "valueURI" => s["valueUri"], "xml:lang" => s["lang"] }.compact
|
|
184
|
+
attributes = { "subjectScheme" => s["subjectScheme"], "schemeURI" => s["schemeUri"], "valueURI" => s["valueUri"], "classificationCode" => s["classificationCode"], "xml:lang" => s["lang"] }.compact
|
|
175
185
|
|
|
176
186
|
xml.subject(s["subject"], attributes)
|
|
177
187
|
end
|
|
@@ -375,13 +385,11 @@ module Bolognese
|
|
|
375
385
|
end
|
|
376
386
|
end
|
|
377
387
|
if geo_location["geoLocationPolygon"]
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
xml.polygonPoint
|
|
382
|
-
|
|
383
|
-
xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
|
|
384
|
-
end
|
|
388
|
+
xml.geoLocationPolygon do
|
|
389
|
+
Array.wrap(geo_location["geoLocationPolygon"]).each do |polygon_point|
|
|
390
|
+
xml.polygonPoint do
|
|
391
|
+
xml.pointLatitude(polygon_point.dig("polygonPoint", "pointLatitude"))
|
|
392
|
+
xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
|
|
385
393
|
end
|
|
386
394
|
end
|
|
387
395
|
end
|
data/lib/bolognese/metadata.rb
CHANGED
|
@@ -156,11 +156,6 @@ module Bolognese
|
|
|
156
156
|
@descriptions ||= meta.fetch("descriptions", nil)
|
|
157
157
|
end
|
|
158
158
|
|
|
159
|
-
def abstract_description
|
|
160
|
-
# Fetch the first description with descriptionType "Abstract"
|
|
161
|
-
@abstract_description ||= descriptions&.find { |d| d["descriptionType"] == "Abstract" }
|
|
162
|
-
end
|
|
163
|
-
|
|
164
159
|
def rights_list
|
|
165
160
|
@rights_list ||= meta.fetch("rights_list", nil)
|
|
166
161
|
end
|
|
@@ -222,7 +217,7 @@ module Bolognese
|
|
|
222
217
|
end
|
|
223
218
|
|
|
224
219
|
def publisher
|
|
225
|
-
@publisher ||= meta.fetch("publisher", nil)
|
|
220
|
+
@publisher ||= normalize_publisher(meta["publisher"]) if meta.fetch("publisher", nil).present?
|
|
226
221
|
end
|
|
227
222
|
|
|
228
223
|
def identifiers
|
|
@@ -96,7 +96,7 @@ module Bolognese
|
|
|
96
96
|
if container.present?
|
|
97
97
|
container["title"]
|
|
98
98
|
elsif types["citeproc"] == "article-journal"
|
|
99
|
-
publisher
|
|
99
|
+
publisher["name"] if publisher.present?
|
|
100
100
|
else
|
|
101
101
|
nil
|
|
102
102
|
end
|
|
@@ -153,6 +153,7 @@ module Bolognese
|
|
|
153
153
|
"language" => language,
|
|
154
154
|
"author" => author,
|
|
155
155
|
"contributor" => to_citeproc(contributors),
|
|
156
|
+
"translator" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
|
|
156
157
|
"issued" => get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year.to_s),
|
|
157
158
|
"submitted" => Array.wrap(dates).find { |d| d["dateType"] == "Submitted" }.to_h.fetch("__content__", nil),
|
|
158
159
|
"abstract" => parse_attributes(descriptions, content: "description", first: true),
|
|
@@ -161,7 +162,7 @@ module Bolognese
|
|
|
161
162
|
"volume" => container.to_h["volume"],
|
|
162
163
|
"issue" => container.to_h["issue"],
|
|
163
164
|
"page" => page,
|
|
164
|
-
"publisher" => publisher,
|
|
165
|
+
"publisher" => publisher["name"],
|
|
165
166
|
"title" => parse_attributes(titles, content: "title", first: true),
|
|
166
167
|
"URL" => url,
|
|
167
168
|
"copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
|
|
@@ -86,11 +86,11 @@ module Bolognese
|
|
|
86
86
|
"titles" => meta.try(:title).present? ? [{ "title" => meta.try(:title).to_s }] : [],
|
|
87
87
|
"creators" => creators,
|
|
88
88
|
"container" => container,
|
|
89
|
-
"publisher" => meta.try(:publisher).to_s
|
|
89
|
+
"publisher" => meta.try(:publisher).present? ? { "name" => meta.publisher.to_s } : nil,
|
|
90
90
|
"related_identifiers" => related_identifiers,
|
|
91
91
|
"dates" => dates,
|
|
92
92
|
"publication_year" => publication_year,
|
|
93
|
-
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
|
|
93
|
+
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s, new_line: true).presence, "descriptionType" => "Abstract" }] : [],
|
|
94
94
|
"rights_list" => rights_list,
|
|
95
95
|
"state" => state
|
|
96
96
|
}.merge(read_options)
|
|
@@ -52,6 +52,12 @@ module Bolognese
|
|
|
52
52
|
[{ "nameType" => "Organizational", "name" => ":(unav)" }]
|
|
53
53
|
end
|
|
54
54
|
contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
|
|
55
|
+
translators = get_authors(from_citeproc(Array.wrap(meta.fetch("translator", nil))))
|
|
56
|
+
translators.each do |translator|
|
|
57
|
+
translator["contributorType"] = "Translator"
|
|
58
|
+
end
|
|
59
|
+
contributors += translators
|
|
60
|
+
|
|
55
61
|
dates = if date = get_date_from_date_parts(meta.fetch("issued", nil))
|
|
56
62
|
if Date.edtf(date).present?
|
|
57
63
|
[{ "date" => date,
|
|
@@ -107,7 +113,7 @@ module Bolognese
|
|
|
107
113
|
"related_identifiers" => related_identifiers,
|
|
108
114
|
"dates" => dates,
|
|
109
115
|
"publication_year" => publication_year,
|
|
110
|
-
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
|
|
116
|
+
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract"), new_line: true), "descriptionType" => "Abstract" }] : [],
|
|
111
117
|
"rights_list" => rights_list,
|
|
112
118
|
"version_info" => meta.fetch("version", nil),
|
|
113
119
|
"subjects" => subjects,
|
|
@@ -43,7 +43,7 @@ module Bolognese
|
|
|
43
43
|
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
|
|
44
44
|
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
|
|
45
45
|
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
|
46
|
-
publisher = meta.fetch("publisher", nil)
|
|
46
|
+
publisher = { "name" => meta.fetch("publisher", nil) } if meta.fetch("publisher", nil).present?
|
|
47
47
|
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
|
48
48
|
schema_org = meta.fetch("@type", nil)
|
|
49
49
|
types = {
|
|
@@ -76,7 +76,7 @@ module Bolognese
|
|
|
76
76
|
#{}"is_part_of" => is_part_of,
|
|
77
77
|
"dates" => dates,
|
|
78
78
|
"publication_year" => publication_year,
|
|
79
|
-
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
|
|
79
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
80
80
|
"rights_list" => rights_list,
|
|
81
81
|
"version_info" => meta.fetch("version", nil),
|
|
82
82
|
"subjects" => subjects,
|
|
@@ -7,7 +7,10 @@ module Bolognese
|
|
|
7
7
|
errors = jsonlint(string)
|
|
8
8
|
return { "errors" => errors } if errors.present?
|
|
9
9
|
|
|
10
|
-
string.present? ? Maremma.from_json(string) : {}
|
|
10
|
+
crosscite = string.present? ? Maremma.from_json(string) : {}
|
|
11
|
+
crosscite["publisher"] = normalize_publisher(crosscite["publisher"]) if crosscite.fetch("publisher", nil).present?
|
|
12
|
+
|
|
13
|
+
crosscite
|
|
11
14
|
end
|
|
12
15
|
end
|
|
13
16
|
end
|
|
@@ -4,6 +4,12 @@ module Bolognese
|
|
|
4
4
|
module Readers
|
|
5
5
|
module CrossrefReader
|
|
6
6
|
# CrossRef types from https://api.crossref.org/types
|
|
7
|
+
|
|
8
|
+
CR_TO_DC_CONTRIBUTOR_TYPES = {
|
|
9
|
+
"editor" => "Editor",
|
|
10
|
+
"translator" => "Translator",
|
|
11
|
+
}
|
|
12
|
+
|
|
7
13
|
def get_crossref(id: nil, **options)
|
|
8
14
|
return { "string" => nil, "state" => "not_found" } unless id.present?
|
|
9
15
|
|
|
@@ -40,8 +46,7 @@ module Bolognese
|
|
|
40
46
|
journal_metadata = nil
|
|
41
47
|
journal_issue = {}
|
|
42
48
|
journal_metadata = nil
|
|
43
|
-
publisher = query.dig("crm_item", 0)
|
|
44
|
-
publisher = nil unless publisher.is_a?(String)
|
|
49
|
+
publisher = query.dig("crm_item", 0).is_a?(String) ? { "name" => query.dig("crm_item", 0) } : nil
|
|
45
50
|
|
|
46
51
|
case model
|
|
47
52
|
when "book"
|
|
@@ -139,7 +144,7 @@ module Bolognese
|
|
|
139
144
|
|
|
140
145
|
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
|
141
146
|
|
|
142
|
-
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
|
|
147
|
+
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata))
|
|
143
148
|
|
|
144
149
|
container = if journal_metadata.present?
|
|
145
150
|
issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
|
|
@@ -188,7 +193,7 @@ module Bolognese
|
|
|
188
193
|
"titles" => titles,
|
|
189
194
|
"identifiers" => identifiers,
|
|
190
195
|
"creators" => crossref_people(bibliographic_metadata, "author"),
|
|
191
|
-
"contributors" => crossref_people(bibliographic_metadata, "editor"),
|
|
196
|
+
"contributors" => crossref_people(bibliographic_metadata, "editor") + crossref_people(bibliographic_metadata, "translator"),
|
|
192
197
|
"funding_references" => crossref_funding_reference(program_metadata),
|
|
193
198
|
"publisher" => publisher,
|
|
194
199
|
"container" => container,
|
|
@@ -232,11 +237,11 @@ module Bolognese
|
|
|
232
237
|
|
|
233
238
|
def crossref_description(bibliographic_metadata)
|
|
234
239
|
abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
|
|
235
|
-
{ "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p')) }.compact
|
|
240
|
+
{ "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p'), new_line: true) }.compact
|
|
236
241
|
end
|
|
237
242
|
|
|
238
243
|
description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
|
|
239
|
-
{ "descriptionType" => "Other", "description" => sanitize(parse_attributes(r)) }.compact
|
|
244
|
+
{ "descriptionType" => "Other", "description" => sanitize(parse_attributes(r), new_line: true) }.compact
|
|
240
245
|
end
|
|
241
246
|
|
|
242
247
|
(abstract + description)
|
|
@@ -277,13 +282,15 @@ module Bolognese
|
|
|
277
282
|
end
|
|
278
283
|
end.compact
|
|
279
284
|
|
|
285
|
+
contributor_type = CR_TO_DC_CONTRIBUTOR_TYPES[a["contributor_role"]]
|
|
286
|
+
|
|
280
287
|
{ "nameType" => "Personal",
|
|
281
288
|
"nameIdentifiers" => name_identifiers,
|
|
282
289
|
"name" => [family_name, given_name].compact.join(", "),
|
|
283
290
|
"givenName" => given_name,
|
|
284
291
|
"familyName" => family_name,
|
|
285
292
|
"affiliation" => affiliation.presence,
|
|
286
|
-
"contributorType" =>
|
|
293
|
+
"contributorType" => contributor_type }.compact
|
|
287
294
|
else
|
|
288
295
|
{ "nameType" => "Organizational",
|
|
289
296
|
"name" => a["name"] || a["__content__"] }
|
|
@@ -363,6 +370,32 @@ module Bolognese
|
|
|
363
370
|
end
|
|
364
371
|
end.compact.unwrap
|
|
365
372
|
end
|
|
373
|
+
|
|
374
|
+
def crossref_has_translation(program_metadata)
|
|
375
|
+
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
|
|
376
|
+
Array.wrap(refs).select { |a| a.dig("intra_work_relation", "relationship_type") == "hasTranslation" }.map do |c|
|
|
377
|
+
if c.dig("intra_work_relation", "identifier_type") == "doi"
|
|
378
|
+
{ "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
|
|
379
|
+
"relationType" => "HasTranslation",
|
|
380
|
+
"relatedIdentifierType" => "DOI" }.compact
|
|
381
|
+
else
|
|
382
|
+
nil
|
|
383
|
+
end
|
|
384
|
+
end.compact.unwrap
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
def crossref_is_translation_of(program_metadata)
|
|
388
|
+
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
|
|
389
|
+
Array.wrap(refs).select { |a| a.dig("intra_work_relation", "relationship_type") == "isTranslationOf" }.map do |c|
|
|
390
|
+
if c.dig("intra_work_relation", "identifier_type") == "doi"
|
|
391
|
+
{ "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
|
|
392
|
+
"relationType" => "IsTranslationOf",
|
|
393
|
+
"relatedIdentifierType" => "DOI" }.compact
|
|
394
|
+
else
|
|
395
|
+
nil
|
|
396
|
+
end
|
|
397
|
+
end.compact.unwrap
|
|
398
|
+
end
|
|
366
399
|
end
|
|
367
400
|
end
|
|
368
401
|
end
|
|
@@ -7,7 +7,10 @@ module Bolognese
|
|
|
7
7
|
errors = jsonlint(string)
|
|
8
8
|
return { "errors" => errors } if errors.present?
|
|
9
9
|
|
|
10
|
-
string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
|
|
10
|
+
datacite_json = string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
|
|
11
|
+
datacite_json["publisher"] = normalize_publisher(datacite_json["publisher"]) if datacite_json.fetch("publisher", nil).present?
|
|
12
|
+
|
|
13
|
+
datacite_json
|
|
11
14
|
end
|
|
12
15
|
end
|
|
13
16
|
end
|
|
@@ -94,13 +94,29 @@ module Bolognese
|
|
|
94
94
|
|
|
95
95
|
titles = get_titles(meta)
|
|
96
96
|
|
|
97
|
+
publisher = Array.wrap(meta.dig("publisher")).map do |r|
|
|
98
|
+
if r.blank?
|
|
99
|
+
nil
|
|
100
|
+
elsif r.is_a?(String)
|
|
101
|
+
{ "name" => r.strip }
|
|
102
|
+
elsif r.is_a?(Hash)
|
|
103
|
+
{
|
|
104
|
+
"name" => r["__content__"].present? ? r["__content__"].strip : nil,
|
|
105
|
+
"publisherIdentifier" => r["publisherIdentifierScheme"] == "ROR" ? normalize_ror(r["publisherIdentifier"]) : r["publisherIdentifier"],
|
|
106
|
+
"publisherIdentifierScheme" => r["publisherIdentifierScheme"],
|
|
107
|
+
"schemeUri" => r["schemeURI"],
|
|
108
|
+
"lang" => r["lang"],
|
|
109
|
+
}.compact
|
|
110
|
+
end
|
|
111
|
+
end.compact.first
|
|
112
|
+
|
|
97
113
|
descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
|
|
98
114
|
if r.blank?
|
|
99
115
|
nil
|
|
100
116
|
elsif r.is_a?(String)
|
|
101
|
-
{ "description" => sanitize(r), "descriptionType" => "Abstract" }
|
|
117
|
+
{ "description" => sanitize(r, new_line: true), "descriptionType" => "Abstract" }
|
|
102
118
|
elsif r.is_a?(Hash)
|
|
103
|
-
{ "description" => sanitize(r["__content__"]), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
|
|
119
|
+
{ "description" => sanitize(r["__content__"], new_line: true), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
|
|
104
120
|
end
|
|
105
121
|
end.compact
|
|
106
122
|
rights_list = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
|
|
@@ -287,7 +303,7 @@ module Bolognese
|
|
|
287
303
|
"creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
|
|
288
304
|
"contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
|
|
289
305
|
"container" => set_container(meta),
|
|
290
|
-
"publisher" =>
|
|
306
|
+
"publisher" => publisher,
|
|
291
307
|
"agency" => "datacite",
|
|
292
308
|
"funding_references" => funding_references,
|
|
293
309
|
"dates" => dates,
|
|
@@ -103,7 +103,7 @@ module Bolognese
|
|
|
103
103
|
#"related_identifiers" => related_identifiers,
|
|
104
104
|
#"dates" => dates,
|
|
105
105
|
#"publication_year" => publication_year,
|
|
106
|
-
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
|
|
106
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : [],
|
|
107
107
|
"rights_list" => rights_list,
|
|
108
108
|
"version_info" => meta.fetch("version", nil),
|
|
109
109
|
"subjects" => subjects
|
|
@@ -89,12 +89,12 @@ module Bolognese
|
|
|
89
89
|
"url" => meta.fetch("UR", nil),
|
|
90
90
|
"titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
|
|
91
91
|
"creators" => get_authors(author),
|
|
92
|
-
"publisher" => meta.fetch("PB", "(:unav)"),
|
|
92
|
+
"publisher" => { "name" => meta.fetch("PB", "(:unav)") },
|
|
93
93
|
"container" => container,
|
|
94
94
|
"related_identifiers" => related_identifiers,
|
|
95
95
|
"dates" => dates,
|
|
96
96
|
"publication_year" => publication_year,
|
|
97
|
-
"descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
|
|
97
|
+
"descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
98
98
|
"subjects" => subjects,
|
|
99
99
|
"language" => meta.fetch("LA", nil),
|
|
100
100
|
"state" => state
|
|
@@ -10,7 +10,9 @@ module Bolognese
|
|
|
10
10
|
"isPartOf" => "IsPartOf",
|
|
11
11
|
"hasPart" => "HasPart",
|
|
12
12
|
"isPredecessor" => "IsPreviousVersionOf",
|
|
13
|
-
"isSuccessor" => "IsNewVersionOf"
|
|
13
|
+
"isSuccessor" => "IsNewVersionOf",
|
|
14
|
+
"workTranslation" => "HasTranslation",
|
|
15
|
+
"translationOfWork" => "IsTranslationOf"
|
|
14
16
|
}
|
|
15
17
|
|
|
16
18
|
SO_TO_DC_REVERSE_RELATION_TYPES = {
|
|
@@ -74,7 +76,17 @@ module Bolognese
|
|
|
74
76
|
creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
|
|
75
77
|
end
|
|
76
78
|
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
|
|
77
|
-
|
|
79
|
+
translators = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("translator", nil))))
|
|
80
|
+
translators.map! do |translator|
|
|
81
|
+
translator["contributorType"] = "Translator"
|
|
82
|
+
translator
|
|
83
|
+
end
|
|
84
|
+
contributors += translators
|
|
85
|
+
|
|
86
|
+
publisher = {
|
|
87
|
+
"name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
|
|
88
|
+
"publisherIdentifier" => parse_attributes(meta.fetch("publisher", nil), content: "@id", first: true),
|
|
89
|
+
}.compact if meta.fetch("publisher", nil).present?
|
|
78
90
|
|
|
79
91
|
ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
|
|
80
92
|
container = if meta.fetch(ct, nil).present?
|
|
@@ -102,7 +114,9 @@ module Bolognese
|
|
|
102
114
|
Array.wrap(schema_org_references(meta)) +
|
|
103
115
|
Array.wrap(schema_org_is_referenced_by(meta)) +
|
|
104
116
|
Array.wrap(schema_org_is_supplement_to(meta)) +
|
|
105
|
-
Array.wrap(schema_org_is_supplemented_by(meta))
|
|
117
|
+
Array.wrap(schema_org_is_supplemented_by(meta)) +
|
|
118
|
+
Array.wrap(schema_org_has_translation(meta)) +
|
|
119
|
+
Array.wrap(schema_org_is_translation_of(meta))
|
|
106
120
|
|
|
107
121
|
rights_list = Array.wrap(meta.fetch("license", nil)).compact.map do |rl|
|
|
108
122
|
hsh_to_spdx("__content__" => rl["name"], "rightsURI" => rl["id"])
|
|
@@ -123,6 +137,7 @@ module Bolognese
|
|
|
123
137
|
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if Date.edtf(meta.fetch("datePublished", nil)).present?
|
|
124
138
|
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if Date.edtf(meta.fetch("dateCreated", nil)).present?
|
|
125
139
|
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if Date.edtf(meta.fetch("dateModified", nil)).present?
|
|
140
|
+
dates << { "date" => meta.fetch("temporalCoverage"), "dateType" => "Coverage" } if Date.edtf(meta.fetch("temporalCoverage", nil)).present?
|
|
126
141
|
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
|
127
142
|
|
|
128
143
|
if meta.fetch("inLanguage", nil).is_a?(String)
|
|
@@ -180,7 +195,7 @@ module Bolognese
|
|
|
180
195
|
"related_identifiers" => related_identifiers,
|
|
181
196
|
"publication_year" => publication_year,
|
|
182
197
|
"dates" => dates,
|
|
183
|
-
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
|
|
198
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
184
199
|
"rights_list" => rights_list,
|
|
185
200
|
"version_info" => meta.fetch("version", nil).to_s.presence,
|
|
186
201
|
"subjects" => subjects,
|
|
@@ -236,6 +251,14 @@ module Bolognese
|
|
|
236
251
|
schema_org_related_identifier(meta, relation_type: "isBasedOn")
|
|
237
252
|
end
|
|
238
253
|
|
|
254
|
+
def schema_org_has_translation(meta)
|
|
255
|
+
schema_org_related_identifier(meta, relation_type: "workTranslation", )
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def schema_org_is_translation_of(meta)
|
|
259
|
+
schema_org_related_identifier(meta, relation_type: "translationOfWork")
|
|
260
|
+
end
|
|
261
|
+
|
|
239
262
|
end
|
|
240
263
|
end
|
|
241
264
|
end
|