bolognese 2.6.1 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: af6403c24df2779189b8883a6ac2a649856eefc21021f2a93e5e6ebeee91592a
4
- data.tar.gz: 4fab74f8c6e1044c48a3d66fb64ff90ff134cf868b881dcbfc6a91e410fb3fa8
3
+ metadata.gz: 835ab3fe44434e73ae854befe4f6acb61ea005c5fdd95833103c7a692b19378c
4
+ data.tar.gz: 5b5085360ceae14323ae37e82d517dade2e87cc0c8b631a2a1dd8ea95eb5e7be
5
5
  SHA512:
6
- metadata.gz: edb53a6a6644b9184fec632cb1b6a128d630c403499d19df8ce3e0febbac3a51d3d85d86b3f8a124a5b4fa3f3573022b0d0b633c5b9ced8a9bc416638cd7dfd4
7
- data.tar.gz: a3f0b04d4ed508858ea7e7ec896f2182bdd9d7d5773e58e4e97faa8e52bd362cd0688a305b2efec9c614a51564b0927d8270b597ce976322e8650182cab5bc4a
6
+ metadata.gz: 50d00d40e262c6175bee47c5443f8673dc2af21dce0bd97ee6e31abe62d0631073aa89bac32cd78610706e04662d6501aa1f4059466d2ba1f574353dc832de82
7
+ data.tar.gz: faa9c4ee7e4c5f1f9c4cd36139701dac10d6a4ec5f4a2bb13213618fff45bb32f14e7aebdee6d27cd27eac8524b731964d953ff45d3f95007e9f98abc3fbf39f
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (2.6.1)
4
+ bolognese (2.7.0)
5
5
  activesupport (~> 8.1, >= 8.1.2)
6
6
  bibtex-ruby (~> 6.2)
7
7
  builder (~> 3.3)
@@ -21,7 +21,6 @@ module Bolognese
21
21
  given_name = parse_attributes(author.fetch("givenName", nil))
22
22
  family_name = parse_attributes(author.fetch("familyName", nil))
23
23
  name = cleanup_author(name)
24
- name = [family_name, given_name].join(", ") if family_name.present? && given_name.present?
25
24
  contributor_type = parse_attributes(author.fetch("contributorType", nil))
26
25
 
27
26
  name_type = parse_attributes(author.fetch("creatorName", nil), content: "nameType", first: true) || parse_attributes(author.fetch("contributorName", nil), content: "nameType", first: true)
@@ -46,55 +45,18 @@ module Bolognese
46
45
  end
47
46
  end.presence
48
47
 
49
- author = { "nameType" => name_type,
48
+ { "nameType" => name_type,
50
49
  "name" => name,
51
50
  "givenName" => given_name,
52
51
  "familyName" => family_name,
53
52
  "nameIdentifiers" => name_identifiers,
54
53
  "affiliation" => get_affiliations(author.fetch("affiliation", nil)),
55
54
  "contributorType" => contributor_type }.compact
56
-
57
- return author if family_name.present?
58
-
59
- if is_personal_name?(author)
60
- Namae.options[:include_particle_in_family] = true
61
- names = Namae.parse(name)
62
- parsed_name = names.first
63
-
64
- if parsed_name.present?
65
- given_name = parsed_name.given
66
- family_name = parsed_name.family
67
- name = [family_name, given_name].join(", ")
68
- else
69
- given_name = nil
70
- family_name = nil
71
- end
72
-
73
- { "nameType" => "Personal",
74
- "name" => name,
75
- "givenName" => given_name,
76
- "familyName" => family_name,
77
- "nameIdentifiers" => Array.wrap(name_identifiers),
78
- "affiliation" => Array.wrap(author.fetch("affiliation", nil)),
79
- "contributorType" => contributor_type }.compact
80
- else
81
- { "nameType" => name_type,
82
- "name" => name,
83
- "nameIdentifiers" => Array.wrap(name_identifiers),
84
- "affiliation" => Array.wrap(author.fetch("affiliation", nil)),
85
- "contributorType" => contributor_type }.compact
86
- end
87
55
  end
88
56
 
89
57
  def cleanup_author(author)
90
58
  return nil unless author.present?
91
59
 
92
- # detect pattern "Smith J.", but not "Smith, John K."
93
- author = author.gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2') unless author.include?(",")
94
-
95
- # remove spaces around hyphens
96
- author = author.gsub(" - ", "-")
97
-
98
60
  # titleize strings
99
61
  # remove non-standard space characters
100
62
  author.gsub(/[[:space:]]/, ' ')
@@ -155,7 +117,6 @@ module Bolognese
155
117
  # when `normalize_id` method could not normalize, it returns nil, hence we have following condition
156
118
  if affiliation_identifier.nil?
157
119
  if a["affiliationIdentifierScheme"] == "ROR"
158
- scheme_uri = "https://ror.org"
159
120
  affiliation_identifier = normalize_ror(a["affiliationIdentifier"])
160
121
  else
161
122
  affiliation_identifier = a["affiliationIdentifier"]
@@ -13,11 +13,12 @@ module Bolognese
13
13
  end
14
14
 
15
15
  def validate_funder_doi(doi)
16
- doi = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.13039\/)?([1-9]\d+)\z/.match(doi)).last
16
+ match = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.13039\/)?([1-9]\d+)\z/.match(doi)).last
17
+ return doi unless match
17
18
 
18
19
  # remove non-printing whitespace and downcase
19
- if doi.present?
20
- doi.delete("\u200B").downcase
20
+ if match.present?
21
+ doi = match.delete("\u200B").downcase
21
22
  "https://doi.org/10.13039/#{doi}"
22
23
  end
23
24
  end
@@ -96,7 +96,7 @@ module Bolognese
96
96
 
97
97
  state = id.present? || read_options.present? ? "findable" : "not_found"
98
98
  subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
99
- sum += name_to_fos(subject)
99
+ sum += name_to_subject(subject)
100
100
 
101
101
  sum
102
102
  end
@@ -55,7 +55,7 @@ module Bolognese
55
55
  "ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
56
56
  }.compact
57
57
  subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
58
- sum += name_to_fos(subject)
58
+ sum += name_to_subject(subject)
59
59
 
60
60
  sum
61
61
  end
@@ -133,9 +133,9 @@ module Bolognese
133
133
 
134
134
  subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
135
135
  if subject.is_a?(String)
136
- sum += name_to_fos(subject)
136
+ sum += name_to_subject(subject)
137
137
  elsif subject.is_a?(Hash)
138
- sum += hsh_to_fos(subject)
138
+ sum += hsh_to_subject(subject)
139
139
  end
140
140
 
141
141
  sum
@@ -143,15 +143,12 @@ module Bolognese
143
143
 
144
144
  dates = Array.wrap(meta.dig("dates", "date")).map do |r|
145
145
  if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
146
- if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
147
- { "date" => date,
148
- "dateType" => parse_attributes(r, content: "dateType"),
149
- "dateInformation" => parse_attributes(r, content: "dateInformation")
150
- }.compact
151
- end
146
+ { "date" => date,
147
+ "dateType" => parse_attributes(r, content: "dateType"),
148
+ "dateInformation" => parse_attributes(r, content: "dateInformation")
149
+ }.compact
152
150
  end
153
151
  end.compact
154
- dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
155
152
  sizes = Array.wrap(meta.dig("sizes", "size")).map do |k|
156
153
  if k.blank?
157
154
  nil
@@ -179,8 +176,7 @@ module Bolognese
179
176
  if funder_identifier_type == "Crossref Funder ID"
180
177
  funder_identifier = validate_funder_doi(funder_identifier)
181
178
  elsif funder_identifier_type == "ROR"
182
- funder_identifier = normalize_ror(funder_identifier)
183
- scheme_uri = "https://ror.org"
179
+ funder_identifier = normalize_ror(funder_identifier)
184
180
  else
185
181
  funder_identifier = normalize_id(funder_identifier) ? normalize_id(funder_identifier) : funder_identifier
186
182
  end
@@ -196,7 +192,8 @@ module Bolognese
196
192
  end
197
193
  related_identifiers = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).map do |ri|
198
194
  if ri["relatedIdentifierType"] == "DOI"
199
- rid = validate_doi(ri["__content__"].to_s.downcase)
195
+ doi = ri["__content__"].to_s.downcase
196
+ rid = validate_doi(doi) || doi
200
197
  else
201
198
  rid = ri["__content__"]
202
199
  end
@@ -219,7 +216,8 @@ module Bolognese
219
216
  relatedItemIdentifier = nil
220
217
  if rii
221
218
  if rii["relatedItemIdentifierType"] == "DOI"
222
- rid = validate_doi(rii["__content__"].to_s.downcase)
219
+ doi = rii["__content__"].to_s.downcase
220
+ rid = validate_doi(doi) || doi
223
221
  else
224
222
  rid = rii["__content__"]
225
223
  end
@@ -236,7 +234,6 @@ module Bolognese
236
234
  number = ri["number"]
237
235
  if number.is_a?(String)
238
236
  number = number
239
- numberType = nil
240
237
  else
241
238
  number = ri.dig("number", "__content__")
242
239
  numberType = ri.dig("number", "numberType")
@@ -78,7 +78,7 @@ module Bolognese
78
78
  end
79
79
  state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
80
80
  subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
81
- sum += name_to_fos(subject)
81
+ sum += name_to_subject(subject)
82
82
 
83
83
  sum
84
84
  end
@@ -174,7 +174,7 @@ module Bolognese
174
174
  subjects = meta.fetch("keywords", nil)
175
175
  subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
176
176
  subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
177
- sum += name_to_fos(subject)
177
+ sum += name_to_subject(subject)
178
178
  sum
179
179
  end
180
180
 
@@ -699,19 +699,19 @@ module Bolognese
699
699
  end
700
700
 
701
701
  def normalize_orcid(orcid)
702
- orcid = validate_orcid(orcid)
703
- return nil unless orcid.present?
702
+ validated = validate_orcid(orcid)
703
+ return orcid unless validated.present?
704
704
 
705
705
  # turn ORCID ID into URL
706
- "https://orcid.org/" + Addressable::URI.encode(orcid)
706
+ "https://orcid.org/" + Addressable::URI.encode(validated)
707
707
  end
708
708
 
709
709
  def normalize_ror(ror)
710
- ror = validate_ror(ror)
711
- return nil unless ror.present?
710
+ validated = validate_ror(ror)
711
+ return ror unless validated.present?
712
712
 
713
713
  # turn ROR into URL
714
- "https://ror.org/" + Addressable::URI.encode(ror)
714
+ "https://ror.org/" + Addressable::URI.encode(validated)
715
715
  end
716
716
 
717
717
  def normalize_ids(ids: nil, relation_type: nil)
@@ -1331,103 +1331,18 @@ module Bolognese
1331
1331
  end
1332
1332
  end
1333
1333
 
1334
- def name_to_fos(name)
1335
- # first find subject in Fields of Science (OECD)
1336
- fos = resource_json(:fos).fetch("fosFields")
1337
-
1338
- subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
1339
-
1340
- if subject
1341
- return [{
1342
- "subject" => sanitize(name) },
1343
- {
1344
- "subject" => "FOS: " + subject["fosLabel"],
1345
- "subjectScheme" => "Fields of Science and Technology (FOS)",
1346
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1347
- }]
1348
- end
1349
-
1350
- # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1351
- # and map to Fields of Science. Add an extra entry for the latter
1352
- fores = resource_json(:for)
1353
- for_fields = fores.fetch("forFields")
1354
- for_disciplines = fores.fetch("forDisciplines")
1355
-
1356
- subject = for_fields.find { |l| l["forLabel"] == name } ||
1357
- for_disciplines.find { |l| l["forLabel"] == name }
1358
-
1359
- if subject
1360
- [{
1361
- "subject" => sanitize(name) },
1362
- {
1363
- "subject" => "FOS: " + subject["fosLabel"],
1364
- "subjectScheme" => "Fields of Science and Technology (FOS)",
1365
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1366
- }]
1367
- else
1368
- [{ "subject" => sanitize(name) }]
1369
- end
1334
+ def name_to_subject(name)
1335
+ [{ "subject" => sanitize(name) }]
1370
1336
  end
1371
1337
 
1372
- def hsh_to_fos(hsh)
1373
- # first find subject in Fields of Science (OECD)
1374
- fos = resource_json(:fos).fetch("fosFields")
1375
- subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]}
1376
-
1377
- if subject
1378
- return [{
1379
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1380
- "subjectScheme" => hsh["subjectScheme"],
1381
- "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1382
- "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1383
- "classificationCode" => hsh["classificationCode"],
1384
- "lang" => hsh["lang"] }.compact,
1385
- {
1386
- "subject" => "FOS: " + subject["fosLabel"],
1387
- "subjectScheme" => "Fields of Science and Technology (FOS)",
1388
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
1389
- end
1390
-
1391
- # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1392
- # and map to Fields of Science. Add an extra entry for the latter
1393
- fores = resource_json(:for)
1394
- for_fields = fores.fetch("forFields")
1395
- for_disciplines = fores.fetch("forDisciplines")
1396
-
1397
- # try to extract forId
1398
- if hsh["subjectScheme"] == "FOR"
1399
- for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
1400
- for_id = for_id.rjust(6, "0")
1401
-
1402
- subject = for_fields.find { |l| l["forId"] == for_id } ||
1403
- for_disciplines.find { |l| l["forId"] == for_id[0..3] }
1404
- else
1405
- subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] } ||
1406
- for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] }
1407
- end
1408
-
1409
- if subject
1410
- [{
1411
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1412
- "subjectScheme" => hsh["subjectScheme"],
1413
- "classificationCode" => hsh["classificationCode"],
1414
- "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1415
- "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1416
- "lang" => hsh["lang"] }.compact,
1417
- {
1418
- "subject" => "FOS: " + subject["fosLabel"],
1419
- "subjectScheme" => "Fields of Science and Technology (FOS)",
1420
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1421
- }]
1422
- else
1423
- [{
1424
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1425
- "subjectScheme" => hsh["subjectScheme"],
1426
- "classificationCode" => hsh["classificationCode"],
1427
- "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1428
- "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1429
- "lang" => hsh["lang"] }.compact]
1430
- end
1338
+ def hsh_to_subject(hsh)
1339
+ [{
1340
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1341
+ "subjectScheme" => hsh["subjectScheme"],
1342
+ "classificationCode" => hsh["classificationCode"],
1343
+ "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1344
+ "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1345
+ "lang" => hsh["lang"] }.compact]
1431
1346
  end
1432
1347
 
1433
1348
  def dfg_ids_to_fos(dfg_ids)
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "2.6.1"
2
+ VERSION = "2.7.0"
3
3
  end
@@ -93,9 +93,10 @@ module Bolognese
93
93
  end
94
94
 
95
95
  def insert_publication_date(xml)
96
- year, month, day = get_date_parts(get_date(dates, "Issued")).to_h.fetch("date-parts", []).first
96
+ date = get_date(dates, "Issued") || publication_year
97
+ year, month, day = get_date_parts(date).to_h.fetch("date-parts", []).first
97
98
 
98
- xml.year(year, "iso-8601-date" => get_date(dates, "Issued"))
99
+ xml.year(year, "iso-8601-date" => date)
99
100
  xml.month(month.to_s.rjust(2, '0')) if month.present?
100
101
  xml.day(day.to_s.rjust(2, '0')) if day.present?
101
102
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bolognese
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.1
4
+ version: 2.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2026-04-29 00:00:00.000000000 Z
10
+ date: 2026-05-27 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: maremma