bolognese 2.6.1 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/bolognese/author_utils.rb +1 -40
- data/lib/bolognese/doi_utils.rb +4 -3
- data/lib/bolognese/readers/citeproc_reader.rb +1 -1
- data/lib/bolognese/readers/codemeta_reader.rb +1 -1
- data/lib/bolognese/readers/datacite_reader.rb +11 -14
- data/lib/bolognese/readers/ris_reader.rb +1 -1
- data/lib/bolognese/readers/schema_org_reader.rb +1 -1
- data/lib/bolognese/utils.rb +16 -101
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/jats_writer.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 835ab3fe44434e73ae854befe4f6acb61ea005c5fdd95833103c7a692b19378c
|
|
4
|
+
data.tar.gz: 5b5085360ceae14323ae37e82d517dade2e87cc0c8b631a2a1dd8ea95eb5e7be
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 50d00d40e262c6175bee47c5443f8673dc2af21dce0bd97ee6e31abe62d0631073aa89bac32cd78610706e04662d6501aa1f4059466d2ba1f574353dc832de82
|
|
7
|
+
data.tar.gz: faa9c4ee7e4c5f1f9c4cd36139701dac10d6a4ec5f4a2bb13213618fff45bb32f14e7aebdee6d27cd27eac8524b731964d953ff45d3f95007e9f98abc3fbf39f
|
data/Gemfile.lock
CHANGED
|
@@ -21,7 +21,6 @@ module Bolognese
|
|
|
21
21
|
given_name = parse_attributes(author.fetch("givenName", nil))
|
|
22
22
|
family_name = parse_attributes(author.fetch("familyName", nil))
|
|
23
23
|
name = cleanup_author(name)
|
|
24
|
-
name = [family_name, given_name].join(", ") if family_name.present? && given_name.present?
|
|
25
24
|
contributor_type = parse_attributes(author.fetch("contributorType", nil))
|
|
26
25
|
|
|
27
26
|
name_type = parse_attributes(author.fetch("creatorName", nil), content: "nameType", first: true) || parse_attributes(author.fetch("contributorName", nil), content: "nameType", first: true)
|
|
@@ -46,55 +45,18 @@ module Bolognese
|
|
|
46
45
|
end
|
|
47
46
|
end.presence
|
|
48
47
|
|
|
49
|
-
|
|
48
|
+
{ "nameType" => name_type,
|
|
50
49
|
"name" => name,
|
|
51
50
|
"givenName" => given_name,
|
|
52
51
|
"familyName" => family_name,
|
|
53
52
|
"nameIdentifiers" => name_identifiers,
|
|
54
53
|
"affiliation" => get_affiliations(author.fetch("affiliation", nil)),
|
|
55
54
|
"contributorType" => contributor_type }.compact
|
|
56
|
-
|
|
57
|
-
return author if family_name.present?
|
|
58
|
-
|
|
59
|
-
if is_personal_name?(author)
|
|
60
|
-
Namae.options[:include_particle_in_family] = true
|
|
61
|
-
names = Namae.parse(name)
|
|
62
|
-
parsed_name = names.first
|
|
63
|
-
|
|
64
|
-
if parsed_name.present?
|
|
65
|
-
given_name = parsed_name.given
|
|
66
|
-
family_name = parsed_name.family
|
|
67
|
-
name = [family_name, given_name].join(", ")
|
|
68
|
-
else
|
|
69
|
-
given_name = nil
|
|
70
|
-
family_name = nil
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
{ "nameType" => "Personal",
|
|
74
|
-
"name" => name,
|
|
75
|
-
"givenName" => given_name,
|
|
76
|
-
"familyName" => family_name,
|
|
77
|
-
"nameIdentifiers" => Array.wrap(name_identifiers),
|
|
78
|
-
"affiliation" => Array.wrap(author.fetch("affiliation", nil)),
|
|
79
|
-
"contributorType" => contributor_type }.compact
|
|
80
|
-
else
|
|
81
|
-
{ "nameType" => name_type,
|
|
82
|
-
"name" => name,
|
|
83
|
-
"nameIdentifiers" => Array.wrap(name_identifiers),
|
|
84
|
-
"affiliation" => Array.wrap(author.fetch("affiliation", nil)),
|
|
85
|
-
"contributorType" => contributor_type }.compact
|
|
86
|
-
end
|
|
87
55
|
end
|
|
88
56
|
|
|
89
57
|
def cleanup_author(author)
|
|
90
58
|
return nil unless author.present?
|
|
91
59
|
|
|
92
|
-
# detect pattern "Smith J.", but not "Smith, John K."
|
|
93
|
-
author = author.gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2') unless author.include?(",")
|
|
94
|
-
|
|
95
|
-
# remove spaces around hyphens
|
|
96
|
-
author = author.gsub(" - ", "-")
|
|
97
|
-
|
|
98
60
|
# titleize strings
|
|
99
61
|
# remove non-standard space characters
|
|
100
62
|
author.gsub(/[[:space:]]/, ' ')
|
|
@@ -155,7 +117,6 @@ module Bolognese
|
|
|
155
117
|
# when `normalize_id` method could not normalize, it returns nil, hence we have following condition
|
|
156
118
|
if affiliation_identifier.nil?
|
|
157
119
|
if a["affiliationIdentifierScheme"] == "ROR"
|
|
158
|
-
scheme_uri = "https://ror.org"
|
|
159
120
|
affiliation_identifier = normalize_ror(a["affiliationIdentifier"])
|
|
160
121
|
else
|
|
161
122
|
affiliation_identifier = a["affiliationIdentifier"]
|
data/lib/bolognese/doi_utils.rb
CHANGED
|
@@ -13,11 +13,12 @@ module Bolognese
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def validate_funder_doi(doi)
|
|
16
|
-
|
|
16
|
+
match = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.13039\/)?([1-9]\d+)\z/.match(doi)).last
|
|
17
|
+
return doi unless match
|
|
17
18
|
|
|
18
19
|
# remove non-printing whitespace and downcase
|
|
19
|
-
if
|
|
20
|
-
doi.delete("\u200B").downcase
|
|
20
|
+
if match.present?
|
|
21
|
+
doi = match.delete("\u200B").downcase
|
|
21
22
|
"https://doi.org/10.13039/#{doi}"
|
|
22
23
|
end
|
|
23
24
|
end
|
|
@@ -96,7 +96,7 @@ module Bolognese
|
|
|
96
96
|
|
|
97
97
|
state = id.present? || read_options.present? ? "findable" : "not_found"
|
|
98
98
|
subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
|
|
99
|
-
sum +=
|
|
99
|
+
sum += name_to_subject(subject)
|
|
100
100
|
|
|
101
101
|
sum
|
|
102
102
|
end
|
|
@@ -55,7 +55,7 @@ module Bolognese
|
|
|
55
55
|
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
|
|
56
56
|
}.compact
|
|
57
57
|
subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
|
|
58
|
-
sum +=
|
|
58
|
+
sum += name_to_subject(subject)
|
|
59
59
|
|
|
60
60
|
sum
|
|
61
61
|
end
|
|
@@ -133,9 +133,9 @@ module Bolognese
|
|
|
133
133
|
|
|
134
134
|
subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
|
|
135
135
|
if subject.is_a?(String)
|
|
136
|
-
sum +=
|
|
136
|
+
sum += name_to_subject(subject)
|
|
137
137
|
elsif subject.is_a?(Hash)
|
|
138
|
-
sum +=
|
|
138
|
+
sum += hsh_to_subject(subject)
|
|
139
139
|
end
|
|
140
140
|
|
|
141
141
|
sum
|
|
@@ -143,15 +143,12 @@ module Bolognese
|
|
|
143
143
|
|
|
144
144
|
dates = Array.wrap(meta.dig("dates", "date")).map do |r|
|
|
145
145
|
if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
}.compact
|
|
151
|
-
end
|
|
146
|
+
{ "date" => date,
|
|
147
|
+
"dateType" => parse_attributes(r, content: "dateType"),
|
|
148
|
+
"dateInformation" => parse_attributes(r, content: "dateInformation")
|
|
149
|
+
}.compact
|
|
152
150
|
end
|
|
153
151
|
end.compact
|
|
154
|
-
dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
|
|
155
152
|
sizes = Array.wrap(meta.dig("sizes", "size")).map do |k|
|
|
156
153
|
if k.blank?
|
|
157
154
|
nil
|
|
@@ -179,8 +176,7 @@ module Bolognese
|
|
|
179
176
|
if funder_identifier_type == "Crossref Funder ID"
|
|
180
177
|
funder_identifier = validate_funder_doi(funder_identifier)
|
|
181
178
|
elsif funder_identifier_type == "ROR"
|
|
182
|
-
funder_identifier =
|
|
183
|
-
scheme_uri = "https://ror.org"
|
|
179
|
+
funder_identifier = normalize_ror(funder_identifier)
|
|
184
180
|
else
|
|
185
181
|
funder_identifier = normalize_id(funder_identifier) ? normalize_id(funder_identifier) : funder_identifier
|
|
186
182
|
end
|
|
@@ -196,7 +192,8 @@ module Bolognese
|
|
|
196
192
|
end
|
|
197
193
|
related_identifiers = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).map do |ri|
|
|
198
194
|
if ri["relatedIdentifierType"] == "DOI"
|
|
199
|
-
|
|
195
|
+
doi = ri["__content__"].to_s.downcase
|
|
196
|
+
rid = validate_doi(doi) || doi
|
|
200
197
|
else
|
|
201
198
|
rid = ri["__content__"]
|
|
202
199
|
end
|
|
@@ -219,7 +216,8 @@ module Bolognese
|
|
|
219
216
|
relatedItemIdentifier = nil
|
|
220
217
|
if rii
|
|
221
218
|
if rii["relatedItemIdentifierType"] == "DOI"
|
|
222
|
-
|
|
219
|
+
doi = rii["__content__"].to_s.downcase
|
|
220
|
+
rid = validate_doi(doi) || doi
|
|
223
221
|
else
|
|
224
222
|
rid = rii["__content__"]
|
|
225
223
|
end
|
|
@@ -236,7 +234,6 @@ module Bolognese
|
|
|
236
234
|
number = ri["number"]
|
|
237
235
|
if number.is_a?(String)
|
|
238
236
|
number = number
|
|
239
|
-
numberType = nil
|
|
240
237
|
else
|
|
241
238
|
number = ri.dig("number", "__content__")
|
|
242
239
|
numberType = ri.dig("number", "numberType")
|
|
@@ -78,7 +78,7 @@ module Bolognese
|
|
|
78
78
|
end
|
|
79
79
|
state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
|
|
80
80
|
subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
|
|
81
|
-
sum +=
|
|
81
|
+
sum += name_to_subject(subject)
|
|
82
82
|
|
|
83
83
|
sum
|
|
84
84
|
end
|
|
@@ -174,7 +174,7 @@ module Bolognese
|
|
|
174
174
|
subjects = meta.fetch("keywords", nil)
|
|
175
175
|
subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
|
|
176
176
|
subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
|
|
177
|
-
sum +=
|
|
177
|
+
sum += name_to_subject(subject)
|
|
178
178
|
sum
|
|
179
179
|
end
|
|
180
180
|
|
data/lib/bolognese/utils.rb
CHANGED
|
@@ -699,19 +699,19 @@ module Bolognese
|
|
|
699
699
|
end
|
|
700
700
|
|
|
701
701
|
def normalize_orcid(orcid)
|
|
702
|
-
|
|
703
|
-
return
|
|
702
|
+
validated = validate_orcid(orcid)
|
|
703
|
+
return orcid unless validated.present?
|
|
704
704
|
|
|
705
705
|
# turn ORCID ID into URL
|
|
706
|
-
"https://orcid.org/" + Addressable::URI.encode(
|
|
706
|
+
"https://orcid.org/" + Addressable::URI.encode(validated)
|
|
707
707
|
end
|
|
708
708
|
|
|
709
709
|
def normalize_ror(ror)
|
|
710
|
-
|
|
711
|
-
return
|
|
710
|
+
validated = validate_ror(ror)
|
|
711
|
+
return ror unless validated.present?
|
|
712
712
|
|
|
713
713
|
# turn ROR into URL
|
|
714
|
-
"https://ror.org/" + Addressable::URI.encode(
|
|
714
|
+
"https://ror.org/" + Addressable::URI.encode(validated)
|
|
715
715
|
end
|
|
716
716
|
|
|
717
717
|
def normalize_ids(ids: nil, relation_type: nil)
|
|
@@ -1331,103 +1331,18 @@ module Bolognese
|
|
|
1331
1331
|
end
|
|
1332
1332
|
end
|
|
1333
1333
|
|
|
1334
|
-
def
|
|
1335
|
-
|
|
1336
|
-
fos = resource_json(:fos).fetch("fosFields")
|
|
1337
|
-
|
|
1338
|
-
subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
|
|
1339
|
-
|
|
1340
|
-
if subject
|
|
1341
|
-
return [{
|
|
1342
|
-
"subject" => sanitize(name) },
|
|
1343
|
-
{
|
|
1344
|
-
"subject" => "FOS: " + subject["fosLabel"],
|
|
1345
|
-
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1346
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1347
|
-
}]
|
|
1348
|
-
end
|
|
1349
|
-
|
|
1350
|
-
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
|
1351
|
-
# and map to Fields of Science. Add an extra entry for the latter
|
|
1352
|
-
fores = resource_json(:for)
|
|
1353
|
-
for_fields = fores.fetch("forFields")
|
|
1354
|
-
for_disciplines = fores.fetch("forDisciplines")
|
|
1355
|
-
|
|
1356
|
-
subject = for_fields.find { |l| l["forLabel"] == name } ||
|
|
1357
|
-
for_disciplines.find { |l| l["forLabel"] == name }
|
|
1358
|
-
|
|
1359
|
-
if subject
|
|
1360
|
-
[{
|
|
1361
|
-
"subject" => sanitize(name) },
|
|
1362
|
-
{
|
|
1363
|
-
"subject" => "FOS: " + subject["fosLabel"],
|
|
1364
|
-
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1365
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1366
|
-
}]
|
|
1367
|
-
else
|
|
1368
|
-
[{ "subject" => sanitize(name) }]
|
|
1369
|
-
end
|
|
1334
|
+
def name_to_subject(name)
|
|
1335
|
+
[{ "subject" => sanitize(name) }]
|
|
1370
1336
|
end
|
|
1371
1337
|
|
|
1372
|
-
def
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
"subjectScheme" => hsh["subjectScheme"],
|
|
1381
|
-
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
|
1382
|
-
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
|
1383
|
-
"classificationCode" => hsh["classificationCode"],
|
|
1384
|
-
"lang" => hsh["lang"] }.compact,
|
|
1385
|
-
{
|
|
1386
|
-
"subject" => "FOS: " + subject["fosLabel"],
|
|
1387
|
-
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1388
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
|
|
1389
|
-
end
|
|
1390
|
-
|
|
1391
|
-
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
|
1392
|
-
# and map to Fields of Science. Add an extra entry for the latter
|
|
1393
|
-
fores = resource_json(:for)
|
|
1394
|
-
for_fields = fores.fetch("forFields")
|
|
1395
|
-
for_disciplines = fores.fetch("forDisciplines")
|
|
1396
|
-
|
|
1397
|
-
# try to extract forId
|
|
1398
|
-
if hsh["subjectScheme"] == "FOR"
|
|
1399
|
-
for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
|
|
1400
|
-
for_id = for_id.rjust(6, "0")
|
|
1401
|
-
|
|
1402
|
-
subject = for_fields.find { |l| l["forId"] == for_id } ||
|
|
1403
|
-
for_disciplines.find { |l| l["forId"] == for_id[0..3] }
|
|
1404
|
-
else
|
|
1405
|
-
subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] } ||
|
|
1406
|
-
for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] }
|
|
1407
|
-
end
|
|
1408
|
-
|
|
1409
|
-
if subject
|
|
1410
|
-
[{
|
|
1411
|
-
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
|
1412
|
-
"subjectScheme" => hsh["subjectScheme"],
|
|
1413
|
-
"classificationCode" => hsh["classificationCode"],
|
|
1414
|
-
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
|
1415
|
-
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
|
1416
|
-
"lang" => hsh["lang"] }.compact,
|
|
1417
|
-
{
|
|
1418
|
-
"subject" => "FOS: " + subject["fosLabel"],
|
|
1419
|
-
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1420
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1421
|
-
}]
|
|
1422
|
-
else
|
|
1423
|
-
[{
|
|
1424
|
-
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
|
1425
|
-
"subjectScheme" => hsh["subjectScheme"],
|
|
1426
|
-
"classificationCode" => hsh["classificationCode"],
|
|
1427
|
-
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
|
1428
|
-
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
|
1429
|
-
"lang" => hsh["lang"] }.compact]
|
|
1430
|
-
end
|
|
1338
|
+
def hsh_to_subject(hsh)
|
|
1339
|
+
[{
|
|
1340
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
|
1341
|
+
"subjectScheme" => hsh["subjectScheme"],
|
|
1342
|
+
"classificationCode" => hsh["classificationCode"],
|
|
1343
|
+
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
|
1344
|
+
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
|
1345
|
+
"lang" => hsh["lang"] }.compact]
|
|
1431
1346
|
end
|
|
1432
1347
|
|
|
1433
1348
|
def dfg_ids_to_fos(dfg_ids)
|
data/lib/bolognese/version.rb
CHANGED
|
@@ -93,9 +93,10 @@ module Bolognese
|
|
|
93
93
|
end
|
|
94
94
|
|
|
95
95
|
def insert_publication_date(xml)
|
|
96
|
-
|
|
96
|
+
date = get_date(dates, "Issued") || publication_year
|
|
97
|
+
year, month, day = get_date_parts(date).to_h.fetch("date-parts", []).first
|
|
97
98
|
|
|
98
|
-
xml.year(year, "iso-8601-date" =>
|
|
99
|
+
xml.year(year, "iso-8601-date" => date)
|
|
99
100
|
xml.month(month.to_s.rjust(2, '0')) if month.present?
|
|
100
101
|
xml.day(day.to_s.rjust(2, '0')) if day.present?
|
|
101
102
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bolognese
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.7.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martin Fenner
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
10
|
+
date: 2026-05-27 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: maremma
|