bolognese 1.11.3 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/.github/workflows/release.yml +2 -2
- data/Gemfile.lock +15 -12
- data/bolognese.gemspec +5 -3
- data/lib/bolognese/author_utils.rb +4 -3
- data/lib/bolognese/datacite_utils.rb +18 -10
- data/lib/bolognese/metadata.rb +1 -6
- data/lib/bolognese/metadata_utils.rb +2 -2
- data/lib/bolognese/readers/bibtex_reader.rb +2 -2
- data/lib/bolognese/readers/citeproc_reader.rb +1 -1
- data/lib/bolognese/readers/codemeta_reader.rb +2 -2
- data/lib/bolognese/readers/crosscite_reader.rb +4 -1
- data/lib/bolognese/readers/crossref_reader.rb +3 -4
- data/lib/bolognese/readers/datacite_json_reader.rb +4 -1
- data/lib/bolognese/readers/datacite_reader.rb +19 -3
- data/lib/bolognese/readers/npm_reader.rb +1 -1
- data/lib/bolognese/readers/ris_reader.rb +2 -2
- data/lib/bolognese/readers/schema_org_reader.rb +6 -2
- data/lib/bolognese/utils.rb +23 -6
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/bibtex_writer.rb +1 -1
- data/lib/bolognese/writers/codemeta_writer.rb +1 -1
- data/lib/bolognese/writers/csv_writer.rb +1 -1
- data/lib/bolognese/writers/datacite_json_writer.rb +3 -1
- data/lib/bolognese/writers/jats_writer.rb +6 -3
- data/lib/bolognese/writers/ris_writer.rb +1 -1
- data/lib/bolognese/writers/schema_org_writer.rb +1 -1
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +2 -0
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +2 -0
- data/resources/kernel-4/metadata.xsd +11 -7
- data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
- data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
- data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.5/include/xml.xsd +286 -0
- data/resources/kernel-4.5/metadata.xsd +711 -0
- data/spec/author_utils_spec.rb +33 -4
- data/spec/datacite_utils_spec.rb +5 -1
- data/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml +18 -2
- data/spec/fixtures/datacite-example-full-v4.5.xml +255 -0
- data/spec/fixtures/datacite-seriesinformation.xml +7 -2
- data/spec/fixtures/datacite-xml-lang.xml +1 -1
- data/spec/fixtures/datacite_blank_name_identifier.xml +22 -0
- data/spec/fixtures/datacite_blank_publisher.xml +18 -0
- data/spec/fixtures/datacite_journal_article.xml +64 -0
- data/spec/fixtures/schema_org.json +1 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_subjects/insert.yml +32 -14
- data/spec/readers/bibtex_reader_spec.rb +2 -0
- data/spec/readers/citeproc_reader_spec.rb +3 -0
- data/spec/readers/codemeta_reader_spec.rb +4 -4
- data/spec/readers/crosscite_reader_spec.rb +2 -0
- data/spec/readers/crossref_reader_spec.rb +41 -41
- data/spec/readers/datacite_json_reader_spec.rb +2 -0
- data/spec/readers/datacite_reader_spec.rb +123 -42
- data/spec/readers/npm_reader_spec.rb +2 -0
- data/spec/readers/ris_reader_spec.rb +3 -0
- data/spec/readers/schema_org_reader_spec.rb +11 -11
- data/spec/spec_helper.rb +1 -0
- data/spec/writers/citation_writer_spec.rb +9 -0
- data/spec/writers/crosscite_writer_spec.rb +7 -0
- data/spec/writers/datacite_json_writer_spec.rb +22 -0
- data/spec/writers/datacite_writer_spec.rb +84 -5
- data/spec/writers/jats_writer_spec.rb +15 -0
- data/spec/writers/rdf_xml_writer_spec.rb +7 -0
- data/spec/writers/schema_org_writer_spec.rb +13 -0
- data/spec/writers/turtle_writer_spec.rb +18 -0
- metadata +60 -22
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ce52f2eb08396d0e56bdccf06702901fe7c04f1925cacafc71f315a1edda91c3
|
|
4
|
+
data.tar.gz: 29fb5a1d987a95220bbc7a3912aa24735f54924f47c8a74d61d1f51e25b75314
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4d528d2597900dfb90410504833a83e79b1bda1974d9f77102071cecd3f71c3e1f899d8e59abe24cf1110469807cc0c6382c50a5a62571ceceb9278e7cfa37b7
|
|
7
|
+
data.tar.gz: 341d76ed37284899ea1e74e331026ba6d042d92037a260d7591b9a48b38b7c8a0b06a6908e97917b2c345c5be8b3ac6475cc63a5478891589d624f248137b315
|
data/.github/workflows/ci.yml
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
bolognese (
|
|
4
|
+
bolognese (2.2.0)
|
|
5
5
|
activesupport (>= 4.2.5)
|
|
6
6
|
benchmark_methods (~> 0.7)
|
|
7
7
|
bibtex-ruby (>= 5.1.0)
|
|
@@ -16,9 +16,9 @@ PATH
|
|
|
16
16
|
json-ld-preloaded (~> 3.1, >= 3.1.3)
|
|
17
17
|
jsonlint (~> 0.3.0)
|
|
18
18
|
loofah (~> 2.0, >= 2.0.3)
|
|
19
|
-
maremma (
|
|
19
|
+
maremma (~> 5.0)
|
|
20
20
|
namae (~> 1.0)
|
|
21
|
-
nokogiri (
|
|
21
|
+
nokogiri (~> 1.16, >= 1.16.2)
|
|
22
22
|
oj (~> 3.10)
|
|
23
23
|
oj_mimic_json (~> 1.0, >= 1.0.1)
|
|
24
24
|
postrank-uri (~> 1.0, >= 1.0.18)
|
|
@@ -41,6 +41,7 @@ GEM
|
|
|
41
41
|
bibtex-ruby (6.0.0)
|
|
42
42
|
latex-decode (~> 0.0)
|
|
43
43
|
builder (3.2.4)
|
|
44
|
+
byebug (11.1.3)
|
|
44
45
|
citeproc (1.0.10)
|
|
45
46
|
namae (~> 1.0)
|
|
46
47
|
citeproc-ruby (1.1.14)
|
|
@@ -94,10 +95,10 @@ GEM
|
|
|
94
95
|
concurrent-ruby (~> 1.0)
|
|
95
96
|
iso8601 (0.9.1)
|
|
96
97
|
json (2.6.2)
|
|
97
|
-
json-canonicalization (0.3.
|
|
98
|
-
json-ld (3.2.
|
|
98
|
+
json-canonicalization (0.3.1)
|
|
99
|
+
json-ld (3.2.4)
|
|
99
100
|
htmlentities (~> 4.3)
|
|
100
|
-
json-canonicalization (~> 0.3
|
|
101
|
+
json-canonicalization (~> 0.3)
|
|
101
102
|
link_header (~> 0.0, >= 0.0.8)
|
|
102
103
|
multi_json (~> 1.15)
|
|
103
104
|
rack (>= 2.2, < 4)
|
|
@@ -113,7 +114,7 @@ GEM
|
|
|
113
114
|
loofah (2.21.3)
|
|
114
115
|
crass (~> 1.0.2)
|
|
115
116
|
nokogiri (>= 1.12.0)
|
|
116
|
-
maremma (
|
|
117
|
+
maremma (5.0.0)
|
|
117
118
|
activesupport (>= 4.2.5)
|
|
118
119
|
addressable (>= 2.3.6)
|
|
119
120
|
builder (~> 3.2, >= 3.2.2)
|
|
@@ -124,17 +125,17 @@ GEM
|
|
|
124
125
|
faraday-follow_redirects (~> 0.3.0)
|
|
125
126
|
faraday-gzip (~> 0.1.0)
|
|
126
127
|
faraday-multipart (~> 1.0.4)
|
|
127
|
-
nokogiri (
|
|
128
|
+
nokogiri (~> 1.16, >= 1.16.2)
|
|
128
129
|
oj (>= 2.8.3)
|
|
129
130
|
oj_mimic_json (~> 1.0, >= 1.0.1)
|
|
130
131
|
matrix (0.4.2)
|
|
131
|
-
mini_portile2 (2.8.
|
|
132
|
+
mini_portile2 (2.8.5)
|
|
132
133
|
minitest (5.18.0)
|
|
133
134
|
multi_json (1.15.0)
|
|
134
135
|
multipart-post (2.3.0)
|
|
135
136
|
namae (1.1.1)
|
|
136
|
-
nokogiri (1.
|
|
137
|
-
mini_portile2 (~> 2.8.
|
|
137
|
+
nokogiri (1.16.2)
|
|
138
|
+
mini_portile2 (~> 2.8.2)
|
|
138
139
|
racc (~> 1.4)
|
|
139
140
|
oj (3.14.2)
|
|
140
141
|
oj_mimic_json (1.0.1)
|
|
@@ -222,7 +223,9 @@ PLATFORMS
|
|
|
222
223
|
DEPENDENCIES
|
|
223
224
|
bolognese!
|
|
224
225
|
bundler (>= 1.0)
|
|
226
|
+
byebug
|
|
225
227
|
hashdiff (>= 1.0.0.beta1, < 2.0.0)
|
|
228
|
+
json-canonicalization (= 0.3.1)
|
|
226
229
|
rack-test (~> 0)
|
|
227
230
|
rake (~> 12.0)
|
|
228
231
|
rspec (~> 3.4)
|
|
@@ -232,4 +235,4 @@ DEPENDENCIES
|
|
|
232
235
|
webmock (~> 3.0, >= 3.0.1)
|
|
233
236
|
|
|
234
237
|
BUNDLED WITH
|
|
235
|
-
2.
|
|
238
|
+
2.5.5
|
data/bolognese.gemspec
CHANGED
|
@@ -13,12 +13,12 @@ Gem::Specification.new do |s|
|
|
|
13
13
|
s.version = Bolognese::VERSION
|
|
14
14
|
s.extra_rdoc_files = ["README.md"]
|
|
15
15
|
s.license = 'MIT'
|
|
16
|
-
s.required_ruby_version = ['>=
|
|
16
|
+
s.required_ruby_version = ['>=3.0']
|
|
17
17
|
|
|
18
18
|
# Declare dependencies here, rather than in the Gemfile
|
|
19
|
-
s.add_dependency 'maremma', '
|
|
19
|
+
s.add_dependency 'maremma', '~> 5.0'
|
|
20
20
|
#s.add_dependency 'faraday', '~> 0.17.3'
|
|
21
|
-
s.add_dependency 'nokogiri', '
|
|
21
|
+
s.add_dependency 'nokogiri', '~> 1.16', '>= 1.16.2'
|
|
22
22
|
s.add_dependency 'loofah', '~> 2.0', '>= 2.0.3'
|
|
23
23
|
s.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
|
|
24
24
|
s.add_dependency 'activesupport', '>= 4.2.5'
|
|
@@ -49,6 +49,8 @@ Gem::Specification.new do |s|
|
|
|
49
49
|
s.add_development_dependency 'webmock', '~> 3.0', '>= 3.0.1'
|
|
50
50
|
s.add_development_dependency 'simplecov', '0.17.1'
|
|
51
51
|
s.add_development_dependency 'hashdiff', ['>= 1.0.0.beta1', '< 2.0.0']
|
|
52
|
+
s.add_development_dependency 'byebug'
|
|
53
|
+
s.add_development_dependency 'json-canonicalization', '0.3.1'
|
|
52
54
|
|
|
53
55
|
s.require_paths = ["lib"]
|
|
54
56
|
s.files = `git ls-files`.split($/)
|
|
@@ -30,19 +30,20 @@ module Bolognese
|
|
|
30
30
|
name_type = parse_attributes(author.fetch("creatorName", nil), content: "nameType", first: true) || parse_attributes(author.fetch("contributorName", nil), content: "nameType", first: true)
|
|
31
31
|
|
|
32
32
|
name_identifiers = Array.wrap(author.fetch("nameIdentifier", nil)).map do |ni|
|
|
33
|
+
name_identifier = ni["__content__"].strip if ni["__content__"].present?
|
|
33
34
|
if ni["nameIdentifierScheme"] == "ORCID"
|
|
34
35
|
{
|
|
35
|
-
"nameIdentifier" => normalize_orcid(
|
|
36
|
+
"nameIdentifier" => normalize_orcid(name_identifier),
|
|
36
37
|
"schemeUri" => "https://orcid.org",
|
|
37
38
|
"nameIdentifierScheme" => "ORCID" }.compact
|
|
38
39
|
elsif ni["nameIdentifierScheme"] == "ROR"
|
|
39
40
|
{
|
|
40
|
-
"nameIdentifier" => normalize_ror(
|
|
41
|
+
"nameIdentifier" => normalize_ror(name_identifier),
|
|
41
42
|
"schemeUri" => "https://ror.org",
|
|
42
43
|
"nameIdentifierScheme" => "ROR" }.compact
|
|
43
44
|
else
|
|
44
45
|
{
|
|
45
|
-
"nameIdentifier" =>
|
|
46
|
+
"nameIdentifier" => name_identifier,
|
|
46
47
|
"schemeUri" => ni.fetch("schemeURI", nil),
|
|
47
48
|
"nameIdentifierScheme" => ni["nameIdentifierScheme"] }.compact
|
|
48
49
|
end
|
|
@@ -106,9 +106,19 @@ module Bolognese
|
|
|
106
106
|
end
|
|
107
107
|
end
|
|
108
108
|
end
|
|
109
|
-
|
|
109
|
+
|
|
110
110
|
def insert_publisher(xml)
|
|
111
|
-
|
|
111
|
+
if publisher.is_a?(Hash)
|
|
112
|
+
attributes = {
|
|
113
|
+
'publisherIdentifier' => publisher["publisherIdentifier"],
|
|
114
|
+
'publisherIdentifierScheme' => publisher["publisherIdentifierScheme"],
|
|
115
|
+
'schemeURI' => publisher["schemeUri"],
|
|
116
|
+
"xml:lang" => publisher["lang"]
|
|
117
|
+
}.compact
|
|
118
|
+
xml.publisher(publisher["name"] || container && container["title"], attributes)
|
|
119
|
+
else
|
|
120
|
+
xml.publisher(publisher || container && container["title"])
|
|
121
|
+
end
|
|
112
122
|
end
|
|
113
123
|
|
|
114
124
|
def insert_publication_year(xml)
|
|
@@ -171,7 +181,7 @@ module Bolognese
|
|
|
171
181
|
s["subject"] = subject
|
|
172
182
|
end
|
|
173
183
|
|
|
174
|
-
attributes = { "subjectScheme" => s["subjectScheme"], "schemeURI" => s["schemeUri"], "valueURI" => s["valueUri"], "xml:lang" => s["lang"] }.compact
|
|
184
|
+
attributes = { "subjectScheme" => s["subjectScheme"], "schemeURI" => s["schemeUri"], "valueURI" => s["valueUri"], "classificationCode" => s["classificationCode"], "xml:lang" => s["lang"] }.compact
|
|
175
185
|
|
|
176
186
|
xml.subject(s["subject"], attributes)
|
|
177
187
|
end
|
|
@@ -375,13 +385,11 @@ module Bolognese
|
|
|
375
385
|
end
|
|
376
386
|
end
|
|
377
387
|
if geo_location["geoLocationPolygon"]
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
xml.polygonPoint
|
|
382
|
-
|
|
383
|
-
xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
|
|
384
|
-
end
|
|
388
|
+
xml.geoLocationPolygon do
|
|
389
|
+
Array.wrap(geo_location["geoLocationPolygon"]).each do |polygon_point|
|
|
390
|
+
xml.polygonPoint do
|
|
391
|
+
xml.pointLatitude(polygon_point.dig("polygonPoint", "pointLatitude"))
|
|
392
|
+
xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
|
|
385
393
|
end
|
|
386
394
|
end
|
|
387
395
|
end
|
data/lib/bolognese/metadata.rb
CHANGED
|
@@ -156,11 +156,6 @@ module Bolognese
|
|
|
156
156
|
@descriptions ||= meta.fetch("descriptions", nil)
|
|
157
157
|
end
|
|
158
158
|
|
|
159
|
-
def abstract_description
|
|
160
|
-
# Fetch the first description with descriptionType "Abstract"
|
|
161
|
-
@abstract_description ||= descriptions&.find { |d| d["descriptionType"] == "Abstract" }
|
|
162
|
-
end
|
|
163
|
-
|
|
164
159
|
def rights_list
|
|
165
160
|
@rights_list ||= meta.fetch("rights_list", nil)
|
|
166
161
|
end
|
|
@@ -222,7 +217,7 @@ module Bolognese
|
|
|
222
217
|
end
|
|
223
218
|
|
|
224
219
|
def publisher
|
|
225
|
-
@publisher ||= meta.fetch("publisher", nil)
|
|
220
|
+
@publisher ||= normalize_publisher(meta["publisher"]) if meta.fetch("publisher", nil).present?
|
|
226
221
|
end
|
|
227
222
|
|
|
228
223
|
def identifiers
|
|
@@ -96,7 +96,7 @@ module Bolognese
|
|
|
96
96
|
if container.present?
|
|
97
97
|
container["title"]
|
|
98
98
|
elsif types["citeproc"] == "article-journal"
|
|
99
|
-
publisher
|
|
99
|
+
publisher["name"] if publisher.present?
|
|
100
100
|
else
|
|
101
101
|
nil
|
|
102
102
|
end
|
|
@@ -161,7 +161,7 @@ module Bolognese
|
|
|
161
161
|
"volume" => container.to_h["volume"],
|
|
162
162
|
"issue" => container.to_h["issue"],
|
|
163
163
|
"page" => page,
|
|
164
|
-
"publisher" => publisher,
|
|
164
|
+
"publisher" => publisher["name"],
|
|
165
165
|
"title" => parse_attributes(titles, content: "title", first: true),
|
|
166
166
|
"URL" => url,
|
|
167
167
|
"copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
|
|
@@ -86,11 +86,11 @@ module Bolognese
|
|
|
86
86
|
"titles" => meta.try(:title).present? ? [{ "title" => meta.try(:title).to_s }] : [],
|
|
87
87
|
"creators" => creators,
|
|
88
88
|
"container" => container,
|
|
89
|
-
"publisher" => meta.try(:publisher).to_s
|
|
89
|
+
"publisher" => meta.try(:publisher).present? ? { "name" => meta.publisher.to_s } : nil,
|
|
90
90
|
"related_identifiers" => related_identifiers,
|
|
91
91
|
"dates" => dates,
|
|
92
92
|
"publication_year" => publication_year,
|
|
93
|
-
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
|
|
93
|
+
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s, new_line: true).presence, "descriptionType" => "Abstract" }] : [],
|
|
94
94
|
"rights_list" => rights_list,
|
|
95
95
|
"state" => state
|
|
96
96
|
}.merge(read_options)
|
|
@@ -107,7 +107,7 @@ module Bolognese
|
|
|
107
107
|
"related_identifiers" => related_identifiers,
|
|
108
108
|
"dates" => dates,
|
|
109
109
|
"publication_year" => publication_year,
|
|
110
|
-
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
|
|
110
|
+
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract"), new_line: true), "descriptionType" => "Abstract" }] : [],
|
|
111
111
|
"rights_list" => rights_list,
|
|
112
112
|
"version_info" => meta.fetch("version", nil),
|
|
113
113
|
"subjects" => subjects,
|
|
@@ -43,7 +43,7 @@ module Bolognese
|
|
|
43
43
|
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
|
|
44
44
|
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
|
|
45
45
|
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
|
46
|
-
publisher = meta.fetch("publisher", nil)
|
|
46
|
+
publisher = { "name" => meta.fetch("publisher", nil) } if meta.fetch("publisher", nil).present?
|
|
47
47
|
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
|
48
48
|
schema_org = meta.fetch("@type", nil)
|
|
49
49
|
types = {
|
|
@@ -76,7 +76,7 @@ module Bolognese
|
|
|
76
76
|
#{}"is_part_of" => is_part_of,
|
|
77
77
|
"dates" => dates,
|
|
78
78
|
"publication_year" => publication_year,
|
|
79
|
-
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
|
|
79
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
80
80
|
"rights_list" => rights_list,
|
|
81
81
|
"version_info" => meta.fetch("version", nil),
|
|
82
82
|
"subjects" => subjects,
|
|
@@ -7,7 +7,10 @@ module Bolognese
|
|
|
7
7
|
errors = jsonlint(string)
|
|
8
8
|
return { "errors" => errors } if errors.present?
|
|
9
9
|
|
|
10
|
-
string.present? ? Maremma.from_json(string) : {}
|
|
10
|
+
crosscite = string.present? ? Maremma.from_json(string) : {}
|
|
11
|
+
crosscite["publisher"] = normalize_publisher(crosscite["publisher"]) if crosscite.fetch("publisher", nil).present?
|
|
12
|
+
|
|
13
|
+
crosscite
|
|
11
14
|
end
|
|
12
15
|
end
|
|
13
16
|
end
|
|
@@ -40,8 +40,7 @@ module Bolognese
|
|
|
40
40
|
journal_metadata = nil
|
|
41
41
|
journal_issue = {}
|
|
42
42
|
journal_metadata = nil
|
|
43
|
-
publisher = query.dig("crm_item", 0)
|
|
44
|
-
publisher = nil unless publisher.is_a?(String)
|
|
43
|
+
publisher = query.dig("crm_item", 0).is_a?(String) ? { "name" => query.dig("crm_item", 0) } : nil
|
|
45
44
|
|
|
46
45
|
case model
|
|
47
46
|
when "book"
|
|
@@ -232,11 +231,11 @@ module Bolognese
|
|
|
232
231
|
|
|
233
232
|
def crossref_description(bibliographic_metadata)
|
|
234
233
|
abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
|
|
235
|
-
{ "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p')) }.compact
|
|
234
|
+
{ "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p'), new_line: true) }.compact
|
|
236
235
|
end
|
|
237
236
|
|
|
238
237
|
description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
|
|
239
|
-
{ "descriptionType" => "Other", "description" => sanitize(parse_attributes(r)) }.compact
|
|
238
|
+
{ "descriptionType" => "Other", "description" => sanitize(parse_attributes(r), new_line: true) }.compact
|
|
240
239
|
end
|
|
241
240
|
|
|
242
241
|
(abstract + description)
|
|
@@ -7,7 +7,10 @@ module Bolognese
|
|
|
7
7
|
errors = jsonlint(string)
|
|
8
8
|
return { "errors" => errors } if errors.present?
|
|
9
9
|
|
|
10
|
-
string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
|
|
10
|
+
datacite_json = string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
|
|
11
|
+
datacite_json["publisher"] = normalize_publisher(datacite_json["publisher"]) if datacite_json.fetch("publisher", nil).present?
|
|
12
|
+
|
|
13
|
+
datacite_json
|
|
11
14
|
end
|
|
12
15
|
end
|
|
13
16
|
end
|
|
@@ -94,13 +94,29 @@ module Bolognese
|
|
|
94
94
|
|
|
95
95
|
titles = get_titles(meta)
|
|
96
96
|
|
|
97
|
+
publisher = Array.wrap(meta.dig("publisher")).map do |r|
|
|
98
|
+
if r.blank?
|
|
99
|
+
nil
|
|
100
|
+
elsif r.is_a?(String)
|
|
101
|
+
{ "name" => r.strip }
|
|
102
|
+
elsif r.is_a?(Hash)
|
|
103
|
+
{
|
|
104
|
+
"name" => r["__content__"].present? ? r["__content__"].strip : nil,
|
|
105
|
+
"publisherIdentifier" => r["publisherIdentifierScheme"] == "ROR" ? normalize_ror(r["publisherIdentifier"]) : r["publisherIdentifier"],
|
|
106
|
+
"publisherIdentifierScheme" => r["publisherIdentifierScheme"],
|
|
107
|
+
"schemeUri" => r["schemeURI"],
|
|
108
|
+
"lang" => r["lang"],
|
|
109
|
+
}.compact
|
|
110
|
+
end
|
|
111
|
+
end.compact.first
|
|
112
|
+
|
|
97
113
|
descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
|
|
98
114
|
if r.blank?
|
|
99
115
|
nil
|
|
100
116
|
elsif r.is_a?(String)
|
|
101
|
-
{ "description" => sanitize(r), "descriptionType" => "Abstract" }
|
|
117
|
+
{ "description" => sanitize(r, new_line: true), "descriptionType" => "Abstract" }
|
|
102
118
|
elsif r.is_a?(Hash)
|
|
103
|
-
{ "description" => sanitize(r["__content__"]), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
|
|
119
|
+
{ "description" => sanitize(r["__content__"], new_line: true), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
|
|
104
120
|
end
|
|
105
121
|
end.compact
|
|
106
122
|
rights_list = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
|
|
@@ -287,7 +303,7 @@ module Bolognese
|
|
|
287
303
|
"creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
|
|
288
304
|
"contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
|
|
289
305
|
"container" => set_container(meta),
|
|
290
|
-
"publisher" =>
|
|
306
|
+
"publisher" => publisher,
|
|
291
307
|
"agency" => "datacite",
|
|
292
308
|
"funding_references" => funding_references,
|
|
293
309
|
"dates" => dates,
|
|
@@ -103,7 +103,7 @@ module Bolognese
|
|
|
103
103
|
#"related_identifiers" => related_identifiers,
|
|
104
104
|
#"dates" => dates,
|
|
105
105
|
#"publication_year" => publication_year,
|
|
106
|
-
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
|
|
106
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : [],
|
|
107
107
|
"rights_list" => rights_list,
|
|
108
108
|
"version_info" => meta.fetch("version", nil),
|
|
109
109
|
"subjects" => subjects
|
|
@@ -89,12 +89,12 @@ module Bolognese
|
|
|
89
89
|
"url" => meta.fetch("UR", nil),
|
|
90
90
|
"titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
|
|
91
91
|
"creators" => get_authors(author),
|
|
92
|
-
"publisher" => meta.fetch("PB", "(:unav)"),
|
|
92
|
+
"publisher" => { "name" => meta.fetch("PB", "(:unav)") },
|
|
93
93
|
"container" => container,
|
|
94
94
|
"related_identifiers" => related_identifiers,
|
|
95
95
|
"dates" => dates,
|
|
96
96
|
"publication_year" => publication_year,
|
|
97
|
-
"descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
|
|
97
|
+
"descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
98
98
|
"subjects" => subjects,
|
|
99
99
|
"language" => meta.fetch("LA", nil),
|
|
100
100
|
"state" => state
|
|
@@ -74,7 +74,11 @@ module Bolognese
|
|
|
74
74
|
creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
|
|
75
75
|
end
|
|
76
76
|
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
|
|
77
|
-
|
|
77
|
+
|
|
78
|
+
publisher = {
|
|
79
|
+
"name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
|
|
80
|
+
"publisherIdentifier" => parse_attributes(meta.fetch("publisher", nil), content: "@id", first: true),
|
|
81
|
+
}.compact if meta.fetch("publisher", nil).present?
|
|
78
82
|
|
|
79
83
|
ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
|
|
80
84
|
container = if meta.fetch(ct, nil).present?
|
|
@@ -180,7 +184,7 @@ module Bolognese
|
|
|
180
184
|
"related_identifiers" => related_identifiers,
|
|
181
185
|
"publication_year" => publication_year,
|
|
182
186
|
"dates" => dates,
|
|
183
|
-
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
|
|
187
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
184
188
|
"rights_list" => rights_list,
|
|
185
189
|
"version_info" => meta.fetch("version", nil).to_s.presence,
|
|
186
190
|
"subjects" => subjects,
|
data/lib/bolognese/utils.rb
CHANGED
|
@@ -600,12 +600,12 @@ module Bolognese
|
|
|
600
600
|
end
|
|
601
601
|
|
|
602
602
|
def validate_orcid(orcid)
|
|
603
|
-
orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z/.match(orcid)).last
|
|
603
|
+
orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\/{0,1}\z/.match(orcid)).last
|
|
604
604
|
orcid.gsub(/[[:space:]]/, "-") if orcid.present?
|
|
605
605
|
end
|
|
606
606
|
|
|
607
607
|
def validate_ror(ror)
|
|
608
|
-
Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})$/.match(ror)).last
|
|
608
|
+
Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})\/{0,1}$/.match(ror)).last
|
|
609
609
|
end
|
|
610
610
|
|
|
611
611
|
def validate_orcid_scheme(orcid_scheme)
|
|
@@ -773,6 +773,14 @@ module Bolognese
|
|
|
773
773
|
nil
|
|
774
774
|
end
|
|
775
775
|
|
|
776
|
+
def normalize_publisher(publisher)
|
|
777
|
+
if publisher.respond_to?(:to_hash)
|
|
778
|
+
publisher
|
|
779
|
+
elsif publisher.respond_to?(:to_str)
|
|
780
|
+
{ "name" => publisher }
|
|
781
|
+
end
|
|
782
|
+
end
|
|
783
|
+
|
|
776
784
|
def to_datacite_json(element, options={})
|
|
777
785
|
a = Array.wrap(element).map do |e|
|
|
778
786
|
e.inject({}) {|h, (k,v)| h[k.dasherize] = v; h }
|
|
@@ -1057,12 +1065,16 @@ module Bolognese
|
|
|
1057
1065
|
custom_scrubber = Bolognese::WhitelistScrubber.new(options)
|
|
1058
1066
|
|
|
1059
1067
|
if text.is_a?(String)
|
|
1060
|
-
|
|
1061
|
-
|
|
1068
|
+
if options[:new_line]
|
|
1069
|
+
# Remove multiple spaces, tabs, and other whitespace characters while preserving single spaces and new lines
|
|
1070
|
+
Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/[ \t]+/, ' ').strip
|
|
1071
|
+
else
|
|
1072
|
+
Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
|
|
1073
|
+
end
|
|
1062
1074
|
elsif text.is_a?(Hash)
|
|
1063
|
-
sanitize(text.fetch(content, nil))
|
|
1075
|
+
sanitize(text.fetch(content, nil), new_line: options[:new_line])
|
|
1064
1076
|
elsif text.is_a?(Array)
|
|
1065
|
-
a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil)) : sanitize(e) }.uniq
|
|
1077
|
+
a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil), new_line: options[:new_line]) : sanitize(e, new_line: options[:new_line]) }.uniq
|
|
1066
1078
|
a = options[:first] ? a.first : a.unwrap
|
|
1067
1079
|
else
|
|
1068
1080
|
nil
|
|
@@ -1410,5 +1422,10 @@ module Bolognese
|
|
|
1410
1422
|
}
|
|
1411
1423
|
end
|
|
1412
1424
|
end
|
|
1425
|
+
|
|
1426
|
+
def abstract_description
|
|
1427
|
+
# Fetch the first description with descriptionType "Abstract"
|
|
1428
|
+
descriptions&.find { |d| d["descriptionType"] == "Abstract" }
|
|
1429
|
+
end
|
|
1413
1430
|
end
|
|
1414
1431
|
end
|
data/lib/bolognese/version.rb
CHANGED
|
@@ -21,7 +21,7 @@ module Bolognese
|
|
|
21
21
|
volume: container.to_h["volume"],
|
|
22
22
|
issue: container.to_h["issue"],
|
|
23
23
|
pages: pages,
|
|
24
|
-
publisher: publisher,
|
|
24
|
+
publisher: publisher["name"],
|
|
25
25
|
year: publication_year,
|
|
26
26
|
copyright: Array.wrap(rights_list).map { |l| l["rights"] }.first,
|
|
27
27
|
}.compact
|
|
@@ -19,7 +19,7 @@ module Bolognese
|
|
|
19
19
|
"tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil,
|
|
20
20
|
"datePublished" => get_date(dates, "Issued") || publication_year,
|
|
21
21
|
"dateModified" => get_date(dates, "Updated"),
|
|
22
|
-
"publisher" => publisher,
|
|
22
|
+
"publisher" => publisher["name"],
|
|
23
23
|
"license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
|
|
24
24
|
}.compact
|
|
25
25
|
JSON.pretty_generate hsh.presence
|
|
@@ -15,7 +15,7 @@ module Bolognese
|
|
|
15
15
|
resource_type: types["resourceType"],
|
|
16
16
|
title: parse_attributes(titles, content: "title", first: true),
|
|
17
17
|
author: authors_as_string(creators),
|
|
18
|
-
publisher: publisher,
|
|
18
|
+
publisher: publisher["name"],
|
|
19
19
|
publication_year: publication_year
|
|
20
20
|
}.values
|
|
21
21
|
|
|
@@ -4,7 +4,9 @@ module Bolognese
|
|
|
4
4
|
module Writers
|
|
5
5
|
module DataciteJsonWriter
|
|
6
6
|
def datacite_json
|
|
7
|
-
|
|
7
|
+
if crosscite_hsh.present?
|
|
8
|
+
JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
|
|
9
|
+
end
|
|
8
10
|
end
|
|
9
11
|
end
|
|
10
12
|
end
|
|
@@ -77,16 +77,19 @@ module Bolognese
|
|
|
77
77
|
|
|
78
78
|
def insert_source(xml)
|
|
79
79
|
if is_chapter?
|
|
80
|
-
xml.source(publisher)
|
|
80
|
+
xml.source(publisher["name"])
|
|
81
81
|
elsif is_article? || is_data?
|
|
82
|
-
xml.source(container && container["title"] || publisher)
|
|
82
|
+
xml.source(container && container["title"] || publisher["name"])
|
|
83
83
|
else
|
|
84
84
|
xml.source(parse_attributes(titles, content: "title", first: true))
|
|
85
85
|
end
|
|
86
86
|
end
|
|
87
87
|
|
|
88
88
|
def insert_publisher_name(xml)
|
|
89
|
-
|
|
89
|
+
attributes = {
|
|
90
|
+
"xml:lang" => publisher["lang"]
|
|
91
|
+
}.compact
|
|
92
|
+
xml.send("publisher-name", attributes, publisher["name"])
|
|
90
93
|
end
|
|
91
94
|
|
|
92
95
|
def insert_publication_date(xml)
|
|
@@ -14,7 +14,7 @@ module Bolognese
|
|
|
14
14
|
"AB" => parse_attributes(abstract_description, content: "description", first: true),
|
|
15
15
|
"KW" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
|
|
16
16
|
"PY" => publication_year,
|
|
17
|
-
"PB" => publisher,
|
|
17
|
+
"PB" => publisher["name"],
|
|
18
18
|
"LA" => language,
|
|
19
19
|
"VL" => container.to_h["volume"],
|
|
20
20
|
"IS" => container.to_h["issue"],
|
|
@@ -37,7 +37,7 @@ module Bolognese
|
|
|
37
37
|
"schemaVersion" => schema_version,
|
|
38
38
|
"periodical" => types.present? ? ((types["schemaOrg"] != "Dataset") && container.present? ? to_schema_org(container) : nil) : nil,
|
|
39
39
|
"includedInDataCatalog" => types.present? ? ((types["schemaOrg"] == "Dataset") && container.present? ? to_schema_org_container(container, type: "Dataset") : nil) : nil,
|
|
40
|
-
"publisher" => publisher.present? ? { "@type" => "Organization", "name" => publisher } : nil,
|
|
40
|
+
"publisher" => publisher.present? ? { "@type" => "Organization", "@id" => publisher["publisherIdentifier"], "name" => publisher["name"] }.compact : nil,
|
|
41
41
|
"funder" => to_schema_org_funder(funding_references),
|
|
42
42
|
"provider" => agency.present? ? { "@type" => "Organization", "name" => agency } : nil
|
|
43
43
|
}.compact.presence
|
|
@@ -46,6 +46,8 @@
|
|
|
46
46
|
<xs:enumeration value="IsRequiredBy" />
|
|
47
47
|
<xs:enumeration value="Obsoletes" />
|
|
48
48
|
<xs:enumeration value="IsObsoletedBy" />
|
|
49
|
+
<xs:enumeration value="Collects" />
|
|
50
|
+
<xs:enumeration value="IsCollectedBy" />
|
|
49
51
|
</xs:restriction>
|
|
50
52
|
</xs:simpleType>
|
|
51
53
|
</xs:schema>
|
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
<xs:enumeration value="Dissertation" />
|
|
23
23
|
<xs:enumeration value="Event" />
|
|
24
24
|
<xs:enumeration value="Image" />
|
|
25
|
+
<xs:enumeration value="Instrument" />
|
|
25
26
|
<xs:enumeration value="InteractiveResource" />
|
|
26
27
|
<xs:enumeration value="Journal" />
|
|
27
28
|
<xs:enumeration value="JournalArticle" />
|
|
@@ -35,6 +36,7 @@
|
|
|
35
36
|
<xs:enumeration value="Software" />
|
|
36
37
|
<xs:enumeration value="Sound" />
|
|
37
38
|
<xs:enumeration value="Standard" />
|
|
39
|
+
<xs:enumeration value="StudyRegistration" />
|
|
38
40
|
<xs:enumeration value="Text" />
|
|
39
41
|
<xs:enumeration value="Workflow" />
|
|
40
42
|
<xs:enumeration value="Other" />
|