bolognese 2.5.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +1 -1
  3. data/.github/workflows/release.yml +1 -1
  4. data/Gemfile +1 -1
  5. data/Gemfile.lock +182 -162
  6. data/bolognese.gemspec +29 -34
  7. data/lib/bolognese/author_utils.rb +1 -43
  8. data/lib/bolognese/citeproc_extensions.rb +48 -0
  9. data/lib/bolognese/datacite_utils.rb +4 -1
  10. data/lib/bolognese/doi_utils.rb +4 -3
  11. data/lib/bolognese/metadata.rb +4 -4
  12. data/lib/bolognese/metadata_utils.rb +12 -4
  13. data/lib/bolognese/readers/citeproc_reader.rb +1 -1
  14. data/lib/bolognese/readers/codemeta_reader.rb +1 -1
  15. data/lib/bolognese/readers/crossref_reader.rb +14 -1
  16. data/lib/bolognese/readers/datacite_reader.rb +17 -16
  17. data/lib/bolognese/readers/ris_reader.rb +1 -1
  18. data/lib/bolognese/readers/schema_org_reader.rb +1 -1
  19. data/lib/bolognese/utils.rb +47 -109
  20. data/lib/bolognese/version.rb +1 -1
  21. data/lib/bolognese/writers/csv_writer.rb +3 -5
  22. data/lib/bolognese/writers/jats_writer.rb +3 -2
  23. data/lib/bolognese.rb +1 -4
  24. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +1 -1
  25. data/resources/kernel-4/include/datacite-dateType-v4.xsd +1 -1
  26. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +3 -1
  27. data/resources/kernel-4/include/datacite-relationType-v4.xsd +4 -2
  28. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +4 -1
  29. data/resources/kernel-4/metadata.xsd +4 -1
  30. data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
  31. data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
  32. data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
  33. data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
  34. data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
  35. data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
  36. data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  37. data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
  38. data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
  39. data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
  40. data/resources/kernel-4.7/include/xml.xsd +286 -0
  41. data/resources/kernel-4.7/metadata.xsd +715 -0
  42. metadata +129 -195
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Minimal patch for csl-ruby and citeproc-ruby compatibility
4
+ # Root cause: 'contributor' is not recognized as a names variable in citeproc gem
5
+ # https://github.com/inukshuk/citeproc/blob/121fa4a950b9bd71960e42d20db96bcea1165201/lib/citeproc/variable.rb#L20-L24
6
+
7
+ module CiteProc
8
+ class Variable
9
+ # Unfreeze, modify, and refreeze the fields to add 'contributor' and 'accepted-date'
10
+ if @fields
11
+ # Unfreeze the fields hash temporarily
12
+ fields_dup = @fields.dup
13
+
14
+ # Add contributor to names (make a new unfrozen array)
15
+ fields_dup[:names] = (@fields[:names] + [:contributor]).uniq
16
+
17
+ # Add accepted-date to dates (make a new unfrozen array)
18
+ fields_dup[:date] = (@fields[:date] + [:'accepted-date']).uniq
19
+
20
+ # Rebuild the types mapping - only use actual type keys, not aliases like :all, :any, etc.
21
+ types_hash = Hash[*[:date, :names, :number, :text].map { |k| fields_dup[k].map { |n| [n, k] } }.flatten]
22
+
23
+ # Update the class instance variables
24
+ @fields = fields_dup
25
+ @types = Hash.new { |h,k| h.fetch(k.to_sym, nil) }.merge(types_hash).freeze
26
+
27
+ # Rebuild @factories from the new @types
28
+ # This maps each field name to its Variable subclass (Names, Date, Text, Number)
29
+ @factories = Hash.new { |h,k| h.fetch(k.to_s.intern, CiteProc::Variable) }.merge(
30
+ Hash[*@types.map { |field_name, type|
31
+ [field_name, CiteProc.const_get(type.to_s.capitalize)]
32
+ }.flatten]
33
+ ).freeze
34
+
35
+ # Recreate the aliases
36
+ @fields[:name] = @fields[:names]
37
+ @fields[:dates] = @fields[:date]
38
+ @fields[:numbers] = @fields[:number]
39
+
40
+ # Recreate :all and :any
41
+ @fields[:all] = @fields[:any] =
42
+ [:date, :names, :text, :number].reduce([]) { |s,a| s.concat(@fields[a]) }.sort
43
+
44
+ # Refreeze fields
45
+ @fields.freeze
46
+ end
47
+ end
48
+ end
@@ -209,7 +209,9 @@ module Bolognese
209
209
  attributes = {
210
210
  'relatedIdentifierType' => related_identifier["relatedIdentifierType"],
211
211
  'relationType' => related_identifier["relationType"],
212
- 'resourceTypeGeneral' => related_identifier["resourceTypeGeneral"] }.compact
212
+ 'relationTypeInformation' => related_identifier["relationTypeInformation"],
213
+ 'resourceTypeGeneral' => related_identifier["resourceTypeGeneral",
214
+ ] }.compact
213
215
 
214
216
  attributes.merge({ 'relatedMetadataScheme' => related_identifier["relatedMetadataSchema"],
215
217
  'schemeURI' => related_identifier["schemeUri"],
@@ -228,6 +230,7 @@ module Bolognese
228
230
  attributes = {
229
231
  'relatedItemType' => related_item["relatedItemType"],
230
232
  'relationType' => related_item["relationType"],
233
+ 'relationTypeInformation' => related_item["relationTypeInformation"],
231
234
  }.compact
232
235
 
233
236
  xml.relatedItem(related_item["relatedItem"], attributes) do
@@ -13,11 +13,12 @@ module Bolognese
13
13
  end
14
14
 
15
15
  def validate_funder_doi(doi)
16
- doi = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.13039\/)?([1-9]\d+)\z/.match(doi)).last
16
+ match = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.13039\/)?([1-9]\d+)\z/.match(doi)).last
17
+ return doi unless match
17
18
 
18
19
  # remove non-printing whitespace and downcase
19
- if doi.present?
20
- doi.delete("\u200B").downcase
20
+ if match.present?
21
+ doi = match.delete("\u200B").downcase
21
22
  "https://doi.org/10.13039/#{doi}"
22
23
  end
23
24
  end
@@ -1,4 +1,4 @@
1
- # frozen_string_literal: true
1
+ # frozen_string_literal: false
2
2
 
3
3
  require_relative 'metadata_utils'
4
4
 
@@ -76,8 +76,8 @@ module Bolognese
76
76
  end
77
77
 
78
78
  # make sure input is encoded as utf8
79
- string = string.force_encoding("UTF-8") if string.present?
80
- @string = string
79
+ string1 = string.dup.force_encoding("UTF-8") if string.present?
80
+ @string = string1
81
81
 
82
82
  # input options for citation formatting
83
83
  @style = options[:style]
@@ -120,7 +120,7 @@ module Bolognese
120
120
 
121
121
  @regenerate = options[:regenerate] || read_options.present?
122
122
  # generate name for method to call dynamically
123
- opts = { string: string, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)
123
+ opts = { string: string1, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)
124
124
  @meta = @from.present? ? send("read_" + @from, **opts) : {}
125
125
  end
126
126
 
@@ -33,7 +33,6 @@ require_relative 'writers/turtle_writer'
33
33
 
34
34
  module Bolognese
35
35
  module MetadataUtils
36
- # include BenchmarkMethods
37
36
  include Bolognese::DoiUtils
38
37
  include Bolognese::AuthorUtils
39
38
  include Bolognese::DataciteUtils
@@ -140,19 +139,28 @@ module Bolognese
140
139
  author = to_citeproc(creators)
141
140
  end
142
141
 
143
- if types["resourceTypeGeneral"] == "Software" && version_info.present?
144
- type = "book"
142
+ if types["resourceTypeGeneral"] == "Software"
143
+ type = "software"
145
144
  else
146
145
  type = types["citeproc"]
147
146
  end
148
147
 
148
+ # Filter out contributors who are already creators, editors, or translators to avoid duplication
149
+ creator_names = Array.wrap(creators).map { |c| c["name"] || [c["givenName"], c["familyName"]].compact.join(" ") }.compact
150
+ unique_contributors = Array.wrap(contributors).reject do |c|
151
+ contributor_name = c["name"] || [c["givenName"], c["familyName"]].compact.join(" ")
152
+ creator_names.include?(contributor_name) ||
153
+ c["contributorType"] == "Editor" ||
154
+ c["contributorType"] == "Translator"
155
+ end
156
+
149
157
  {
150
158
  "type" => type,
151
159
  "id" => normalize_doi(doi),
152
160
  "categories" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
153
161
  "language" => language,
154
162
  "author" => author,
155
- "contributor" => to_citeproc(contributors),
163
+ "contributor" => unique_contributors.presence ? to_citeproc(unique_contributors) : nil,
156
164
  "editor" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Editor" }) : nil,
157
165
  "translator" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
158
166
  "issued" => get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year.to_s),
@@ -96,7 +96,7 @@ module Bolognese
96
96
 
97
97
  state = id.present? || read_options.present? ? "findable" : "not_found"
98
98
  subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
99
- sum += name_to_fos(subject)
99
+ sum += name_to_subject(subject)
100
100
 
101
101
  sum
102
102
  end
@@ -55,7 +55,7 @@ module Bolognese
55
55
  "ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
56
56
  }.compact
57
57
  subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
58
- sum += name_to_fos(subject)
58
+ sum += name_to_subject(subject)
59
59
 
60
60
  sum
61
61
  end
@@ -144,7 +144,7 @@ module Bolognese
144
144
 
145
145
  state = meta.present? || read_options.present? ? "findable" : "not_found"
146
146
 
147
- related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata))
147
+ related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata)) + Array.wrap(crossref_is_related_material(program_metadata))
148
148
 
149
149
  container = if journal_metadata.present?
150
150
  issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
@@ -395,6 +395,19 @@ module Bolognese
395
395
  end
396
396
  end.compact.unwrap
397
397
  end
398
+
399
+ def crossref_is_related_material(program_metadata)
400
+ refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
401
+ Array.wrap(refs).select { |a| a.dig("interwork_relation", "relationship_type") == "isRelatedMaterial" }.map do |c|
402
+ if c.dig("inter_work_relation", "identifier_type") == "doi"
403
+ { "relatedIdentifier" => parse_attributes(c["inter_work_relation"]).downcase,
404
+ "relationType" => "Other",
405
+ "relatedIdentifierType" => "DOI" }.compact
406
+ else
407
+ nil
408
+ end
409
+ end.compact.unwrap
410
+ end
398
411
  end
399
412
  end
400
413
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'base64'
4
+
3
5
  module Bolognese
4
6
  module Readers
5
7
  module DataciteReader
@@ -131,9 +133,9 @@ module Bolognese
131
133
 
132
134
  subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
133
135
  if subject.is_a?(String)
134
- sum += name_to_fos(subject)
136
+ sum += name_to_subject(subject)
135
137
  elsif subject.is_a?(Hash)
136
- sum += hsh_to_fos(subject)
138
+ sum += hsh_to_subject(subject)
137
139
  end
138
140
 
139
141
  sum
@@ -141,15 +143,12 @@ module Bolognese
141
143
 
142
144
  dates = Array.wrap(meta.dig("dates", "date")).map do |r|
143
145
  if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
144
- if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
145
- { "date" => date,
146
- "dateType" => parse_attributes(r, content: "dateType"),
147
- "dateInformation" => parse_attributes(r, content: "dateInformation")
148
- }.compact
149
- end
146
+ { "date" => date,
147
+ "dateType" => parse_attributes(r, content: "dateType"),
148
+ "dateInformation" => parse_attributes(r, content: "dateInformation")
149
+ }.compact
150
150
  end
151
151
  end.compact
152
- dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
153
152
  sizes = Array.wrap(meta.dig("sizes", "size")).map do |k|
154
153
  if k.blank?
155
154
  nil
@@ -177,8 +176,7 @@ module Bolognese
177
176
  if funder_identifier_type == "Crossref Funder ID"
178
177
  funder_identifier = validate_funder_doi(funder_identifier)
179
178
  elsif funder_identifier_type == "ROR"
180
- funder_identifier = normalize_ror(funder_identifier)
181
- scheme_uri = "https://ror.org"
179
+ funder_identifier = normalize_ror(funder_identifier)
182
180
  else
183
181
  funder_identifier = normalize_id(funder_identifier) ? normalize_id(funder_identifier) : funder_identifier
184
182
  end
@@ -194,7 +192,8 @@ module Bolognese
194
192
  end
195
193
  related_identifiers = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).map do |ri|
196
194
  if ri["relatedIdentifierType"] == "DOI"
197
- rid = validate_doi(ri["__content__"].to_s.downcase)
195
+ doi = ri["__content__"].to_s.downcase
196
+ rid = validate_doi(doi) || doi
198
197
  else
199
198
  rid = ri["__content__"]
200
199
  end
@@ -206,7 +205,8 @@ module Bolognese
206
205
  "resourceTypeGeneral" => ri["resourceTypeGeneral"],
207
206
  "relatedMetadataScheme" => ri["relatedMetadataScheme"],
208
207
  "schemeUri" => ri["schemeURI"],
209
- "schemeType" => ri["schemeType"]
208
+ "schemeType" => ri["schemeType"],
209
+ "relationTypeInformation" => ri["relationTypeInformation"]
210
210
  }.compact
211
211
  end
212
212
 
@@ -216,7 +216,8 @@ module Bolognese
216
216
  relatedItemIdentifier = nil
217
217
  if rii
218
218
  if rii["relatedItemIdentifierType"] == "DOI"
219
- rid = validate_doi(rii["__content__"].to_s.downcase)
219
+ doi = rii["__content__"].to_s.downcase
220
+ rid = validate_doi(doi) || doi
220
221
  else
221
222
  rid = rii["__content__"]
222
223
  end
@@ -226,14 +227,13 @@ module Bolognese
226
227
  "relatedItemIdentifierType" => rii["relatedItemIdentifierType"],
227
228
  "relatedMetadataScheme" => rii["relatedMetadataScheme"],
228
229
  "schemeURI" => rii["schemeURI"],
229
- "schemeType" => rii["schemeType"]
230
+ "schemeType" => rii["schemeType"],
230
231
  }.compact
231
232
  end
232
233
 
233
234
  number = ri["number"]
234
235
  if number.is_a?(String)
235
236
  number = number
236
- numberType = nil
237
237
  else
238
238
  number = ri.dig("number", "__content__")
239
239
  numberType = ri.dig("number", "numberType")
@@ -241,6 +241,7 @@ module Bolognese
241
241
 
242
242
  a = {
243
243
  "relationType" => ri["relationType"],
244
+ "relationTypeInformation" => ri["relationTypeInformation"],
244
245
  "relatedItemType" => ri["relatedItemType"],
245
246
  "relatedItemIdentifier" => relatedItemIdentifier,
246
247
  "creators" => get_authors(Array.wrap(ri.dig("creators", "creator"))),
@@ -78,7 +78,7 @@ module Bolognese
78
78
  end
79
79
  state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
80
80
  subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
81
- sum += name_to_fos(subject)
81
+ sum += name_to_subject(subject)
82
82
 
83
83
  sum
84
84
  end
@@ -174,7 +174,7 @@ module Bolognese
174
174
  subjects = meta.fetch("keywords", nil)
175
175
  subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
176
176
  subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
177
- sum += name_to_fos(subject)
177
+ sum += name_to_subject(subject)
178
178
  sum
179
179
  end
180
180
 
@@ -67,7 +67,9 @@ module Bolognese
67
67
  "OutputManagementPlan" => nil,
68
68
  "PeerReview" => "Review",
69
69
  "PhysicalObject" => nil,
70
+ "Poster" => "Poster",
70
71
  "Preprint" => nil,
72
+ "Presentation" => "PresentationDigitalDocument",
71
73
  "Report" => "Report",
72
74
  "Service" => "Service",
73
75
  "Software" => "SoftwareSourceCode",
@@ -103,7 +105,9 @@ module Bolognese
103
105
  "OutputManagementPlan" => nil,
104
106
  "PeerReview" => "review",
105
107
  "PhysicalObject" => nil,
108
+ "Poster" => "document",
106
109
  "Preprint" => nil,
110
+ "Presentation" => "presentation",
107
111
  "Report" => "report",
108
112
  "Service" => nil,
109
113
  "Sound" => "song",
@@ -280,6 +284,8 @@ module Bolognese
280
284
  "Event" => "Event",
281
285
  "ImageObject" => "Image",
282
286
  "Movie" => "Audiovisual",
287
+ "Poster" => "Poster",
288
+ "PresentationDigitalDocument" => "Presentation",
283
289
  "PublicationIssue" => "Text",
284
290
  "Report" => "Report",
285
291
  "ScholarlyArticle" => "Text",
@@ -326,6 +332,8 @@ module Bolognese
326
332
  "Event" => nil,
327
333
  "ImageObject" => "graphic",
328
334
  "Movie" => "motion_picture",
335
+ "Poster" => "document",
336
+ "PresentationDigitalDocument" => "presentation",
329
337
  "PublicationIssue" => nil,
330
338
  "Report" => "report",
331
339
  "ScholarlyArticle" => "article-journal",
@@ -348,8 +356,10 @@ module Bolognese
348
356
  "Event" => nil,
349
357
  "ImageObject" => "FIGURE",
350
358
  "Movie" => "MPCT",
351
- "Report" => "RPRT",
359
+ "Poster" => "GEN",
360
+ "PresentationDigitalDocument" => "SLIDE",
352
361
  "PublicationIssue" => nil,
362
+ "Report" => "RPRT",
353
363
  "ScholarlyArticle" => "JOUR",
354
364
  "Service" => nil,
355
365
  "SoftwareSourceCode" => "COMP",
@@ -406,7 +416,9 @@ module Bolognese
406
416
  "OutputManagementPlan" => nil,
407
417
  "PeerReview" => nil,
408
418
  "PhysicalObject" => nil,
419
+ "Poster" => "GEN",
409
420
  "Preprint" => nil,
421
+ "Presentation" => "SLIDE",
410
422
  "Report" => "RRPT",
411
423
  "Service" => nil,
412
424
  "Software" => "COMP",
@@ -419,7 +431,7 @@ module Bolognese
419
431
 
420
432
  RIS_TO_DC_TRANSLATIONS = {
421
433
  "BLOG" => "Text",
422
- "GEN" => "Text",
434
+ "GEN" => "Poster",
423
435
  "CTLG" => "Collection",
424
436
  "DATA" => "Dataset",
425
437
  "FIGURE" => "Image",
@@ -428,7 +440,8 @@ module Bolognese
428
440
  "JOUR" => "JournalArticle",
429
441
  "COMP" => "Software",
430
442
  "VIDEO" => "Audiovisual",
431
- "ELEC" => "Text"
443
+ "ELEC" => "Text",
444
+ "SLIDE" => "Presentation"
432
445
  }
433
446
 
434
447
  BIB_TO_DC_TRANSLATIONS = {
@@ -452,7 +465,9 @@ module Bolognese
452
465
  "motion_picture" => "Audiovisual",
453
466
  "article-journal" => "JournalArticle",
454
467
  "broadcast" => "Audiovisual",
455
- "webpage" => "Text"
468
+ "webpage" => "Text",
469
+ "document" => "Poster",
470
+ "presentation" => "Presentation"
456
471
  }
457
472
 
458
473
  SO_TO_BIB_TRANSLATIONS = {
@@ -468,6 +483,8 @@ module Bolognese
468
483
  "Event" => "misc",
469
484
  "ImageObject" => "misc",
470
485
  "Movie" => "misc",
486
+ "Poster" => "misc",
487
+ "PresentationDigitalDocument" => "misc",
471
488
  "PublicationIssue" => "misc",
472
489
  "ScholarlyArticle" => "article",
473
490
  "Service" => "misc",
@@ -651,7 +668,7 @@ module Bolognese
651
668
  return nil unless uri && uri.host && %w(http https).include?(uri.scheme)
652
669
 
653
670
  # clean up URL
654
- PostRank::URI.clean(id)
671
+ normalize_uri_with_path_cleanup(uri)
655
672
  rescue Addressable::URI::InvalidURIError
656
673
  nil
657
674
  end
@@ -671,9 +688,7 @@ module Bolognese
671
688
  uri.scheme = "https" if options[:https]
672
689
 
673
690
  # clean up URL
674
- uri.path = PostRank::URI.clean(uri.path)
675
-
676
- uri.to_s
691
+ normalize_uri_with_path_cleanup(uri)
677
692
  rescue Addressable::URI::InvalidURIError
678
693
  nil
679
694
  end
@@ -684,19 +699,19 @@ module Bolognese
684
699
  end
685
700
 
686
701
  def normalize_orcid(orcid)
687
- orcid = validate_orcid(orcid)
688
- return nil unless orcid.present?
702
+ validated = validate_orcid(orcid)
703
+ return orcid unless validated.present?
689
704
 
690
705
  # turn ORCID ID into URL
691
- "https://orcid.org/" + Addressable::URI.encode(orcid)
706
+ "https://orcid.org/" + Addressable::URI.encode(validated)
692
707
  end
693
708
 
694
709
  def normalize_ror(ror)
695
- ror = validate_ror(ror)
696
- return nil unless ror.present?
710
+ validated = validate_ror(ror)
711
+ return ror unless validated.present?
697
712
 
698
713
  # turn ROR into URL
699
- "https://ror.org/" + Addressable::URI.encode(ror)
714
+ "https://ror.org/" + Addressable::URI.encode(validated)
700
715
  end
701
716
 
702
717
  def normalize_ids(ids: nil, relation_type: nil)
@@ -1316,103 +1331,18 @@ module Bolognese
1316
1331
  end
1317
1332
  end
1318
1333
 
1319
- def name_to_fos(name)
1320
- # first find subject in Fields of Science (OECD)
1321
- fos = resource_json(:fos).fetch("fosFields")
1322
-
1323
- subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
1324
-
1325
- if subject
1326
- return [{
1327
- "subject" => sanitize(name) },
1328
- {
1329
- "subject" => "FOS: " + subject["fosLabel"],
1330
- "subjectScheme" => "Fields of Science and Technology (FOS)",
1331
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1332
- }]
1333
- end
1334
-
1335
- # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1336
- # and map to Fields of Science. Add an extra entry for the latter
1337
- fores = resource_json(:for)
1338
- for_fields = fores.fetch("forFields")
1339
- for_disciplines = fores.fetch("forDisciplines")
1340
-
1341
- subject = for_fields.find { |l| l["forLabel"] == name } ||
1342
- for_disciplines.find { |l| l["forLabel"] == name }
1343
-
1344
- if subject
1345
- [{
1346
- "subject" => sanitize(name) },
1347
- {
1348
- "subject" => "FOS: " + subject["fosLabel"],
1349
- "subjectScheme" => "Fields of Science and Technology (FOS)",
1350
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1351
- }]
1352
- else
1353
- [{ "subject" => sanitize(name) }]
1354
- end
1334
+ def name_to_subject(name)
1335
+ [{ "subject" => sanitize(name) }]
1355
1336
  end
1356
1337
 
1357
- def hsh_to_fos(hsh)
1358
- # first find subject in Fields of Science (OECD)
1359
- fos = resource_json(:fos).fetch("fosFields")
1360
- subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]}
1361
-
1362
- if subject
1363
- return [{
1364
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1365
- "subjectScheme" => hsh["subjectScheme"],
1366
- "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1367
- "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1368
- "classificationCode" => hsh["classificationCode"],
1369
- "lang" => hsh["lang"] }.compact,
1370
- {
1371
- "subject" => "FOS: " + subject["fosLabel"],
1372
- "subjectScheme" => "Fields of Science and Technology (FOS)",
1373
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
1374
- end
1375
-
1376
- # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1377
- # and map to Fields of Science. Add an extra entry for the latter
1378
- fores = resource_json(:for)
1379
- for_fields = fores.fetch("forFields")
1380
- for_disciplines = fores.fetch("forDisciplines")
1381
-
1382
- # try to extract forId
1383
- if hsh["subjectScheme"] == "FOR"
1384
- for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
1385
- for_id = for_id.rjust(6, "0")
1386
-
1387
- subject = for_fields.find { |l| l["forId"] == for_id } ||
1388
- for_disciplines.find { |l| l["forId"] == for_id[0..3] }
1389
- else
1390
- subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] } ||
1391
- for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] }
1392
- end
1393
-
1394
- if subject
1395
- [{
1396
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1397
- "subjectScheme" => hsh["subjectScheme"],
1398
- "classificationCode" => hsh["classificationCode"],
1399
- "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1400
- "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1401
- "lang" => hsh["lang"] }.compact,
1402
- {
1403
- "subject" => "FOS: " + subject["fosLabel"],
1404
- "subjectScheme" => "Fields of Science and Technology (FOS)",
1405
- "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1406
- }]
1407
- else
1408
- [{
1409
- "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1410
- "subjectScheme" => hsh["subjectScheme"],
1411
- "classificationCode" => hsh["classificationCode"],
1412
- "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1413
- "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1414
- "lang" => hsh["lang"] }.compact]
1415
- end
1338
+ def hsh_to_subject(hsh)
1339
+ [{
1340
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1341
+ "subjectScheme" => hsh["subjectScheme"],
1342
+ "classificationCode" => hsh["classificationCode"],
1343
+ "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1344
+ "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1345
+ "lang" => hsh["lang"] }.compact]
1416
1346
  end
1417
1347
 
1418
1348
  def dfg_ids_to_fos(dfg_ids)
@@ -1476,5 +1406,13 @@ module Bolognese
1476
1406
  }.compact
1477
1407
  end
1478
1408
  end
1409
+
1410
+ private
1411
+
1412
+ def normalize_uri_with_path_cleanup(uri)
1413
+ normalized_uri = uri.normalize
1414
+ normalized_uri.path = normalized_uri.path.sub(%r{/\z}, "") if normalized_uri.path.present?
1415
+ normalized_uri.to_s
1416
+ end
1479
1417
  end
1480
1418
  end
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "2.5.0"
2
+ VERSION = "2.7.0"
3
3
  end
@@ -4,18 +4,16 @@ module Bolognese
4
4
  require "csv"
5
5
 
6
6
  def csv
7
- return nil unless valid?
8
-
9
7
  bib = {
10
8
  doi: doi,
11
9
  url: url,
12
10
  registered: get_iso8601_date(date_registered),
13
11
  state: state,
14
- resource_type_general: types["resourceTypeGeneral"],
15
- resource_type: types["resourceType"],
12
+ resource_type_general: types.respond_to?(:to_h) ? types.to_h["resourceTypeGeneral"] : nil,
13
+ resource_type: types.respond_to?(:to_h) ? types.to_h["resourceType"] : nil,
16
14
  title: parse_attributes(titles, content: "title", first: true),
17
15
  author: authors_as_string(creators),
18
- publisher: publisher["name"],
16
+ publisher: publisher.respond_to?(:to_h) ? publisher.to_h["name"] : nil,
19
17
  publication_year: publication_year
20
18
  }.values
21
19
 
@@ -93,9 +93,10 @@ module Bolognese
93
93
  end
94
94
 
95
95
  def insert_publication_date(xml)
96
- year, month, day = get_date_parts(get_date(dates, "Issued")).to_h.fetch("date-parts", []).first
96
+ date = get_date(dates, "Issued") || publication_year
97
+ year, month, day = get_date_parts(date).to_h.fetch("date-parts", []).first
97
98
 
98
- xml.year(year, "iso-8601-date" => get_date(dates, "Issued"))
99
+ xml.year(year, "iso-8601-date" => date)
99
100
  xml.month(month.to_s.rjust(2, '0')) if month.present?
100
101
  xml.day(day.to_s.rjust(2, '0')) if day.present?
101
102
  end
data/lib/bolognese.rb CHANGED
@@ -3,9 +3,7 @@
3
3
  require 'active_support/all'
4
4
  require 'nokogiri'
5
5
  require 'maremma'
6
- require 'postrank-uri'
7
6
  require 'bibtex'
8
- require 'colorize'
9
7
  require 'loofah'
10
8
  require 'json/ld'
11
9
  require 'rdf/turtle'
@@ -13,13 +11,12 @@ require 'rdf/rdfxml'
13
11
  require 'logger'
14
12
  require 'iso8601'
15
13
  require 'jsonlint'
16
- require 'benchmark_methods'
17
14
  require 'gender_detector'
18
- require 'citeproc/ruby'
19
15
  require 'citeproc'
20
16
  require 'csl/styles'
21
17
  require 'edtf'
22
18
 
19
+ require "bolognese/citeproc_extensions"
23
20
  require "bolognese/version"
24
21
  require "bolognese/metadata"
25
22
  require "bolognese/cli"