bolognese 2.5.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/.github/workflows/release.yml +1 -1
- data/Gemfile +1 -1
- data/Gemfile.lock +182 -162
- data/bolognese.gemspec +29 -34
- data/lib/bolognese/author_utils.rb +1 -43
- data/lib/bolognese/citeproc_extensions.rb +48 -0
- data/lib/bolognese/datacite_utils.rb +4 -1
- data/lib/bolognese/doi_utils.rb +4 -3
- data/lib/bolognese/metadata.rb +4 -4
- data/lib/bolognese/metadata_utils.rb +12 -4
- data/lib/bolognese/readers/citeproc_reader.rb +1 -1
- data/lib/bolognese/readers/codemeta_reader.rb +1 -1
- data/lib/bolognese/readers/crossref_reader.rb +14 -1
- data/lib/bolognese/readers/datacite_reader.rb +17 -16
- data/lib/bolognese/readers/ris_reader.rb +1 -1
- data/lib/bolognese/readers/schema_org_reader.rb +1 -1
- data/lib/bolognese/utils.rb +47 -109
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/csv_writer.rb +3 -5
- data/lib/bolognese/writers/jats_writer.rb +3 -2
- data/lib/bolognese.rb +1 -4
- data/resources/kernel-4/include/datacite-contributorType-v4.xsd +1 -1
- data/resources/kernel-4/include/datacite-dateType-v4.xsd +1 -1
- data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +3 -1
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +4 -2
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +4 -1
- data/resources/kernel-4/metadata.xsd +4 -1
- data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.7/include/xml.xsd +286 -0
- data/resources/kernel-4.7/metadata.xsd +715 -0
- metadata +129 -195
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Minimal patch for csl-ruby and citeproc-ruby compatibility
|
|
4
|
+
# Root cause: 'contributor' is not recognized as a names variable in citeproc gem
|
|
5
|
+
# https://github.com/inukshuk/citeproc/blob/121fa4a950b9bd71960e42d20db96bcea1165201/lib/citeproc/variable.rb#L20-L24
|
|
6
|
+
|
|
7
|
+
module CiteProc
|
|
8
|
+
class Variable
|
|
9
|
+
# Unfreeze, modify, and refreeze the fields to add 'contributor' and 'accepted-date'
|
|
10
|
+
if @fields
|
|
11
|
+
# Unfreeze the fields hash temporarily
|
|
12
|
+
fields_dup = @fields.dup
|
|
13
|
+
|
|
14
|
+
# Add contributor to names (make a new unfrozen array)
|
|
15
|
+
fields_dup[:names] = (@fields[:names] + [:contributor]).uniq
|
|
16
|
+
|
|
17
|
+
# Add accepted-date to dates (make a new unfrozen array)
|
|
18
|
+
fields_dup[:date] = (@fields[:date] + [:'accepted-date']).uniq
|
|
19
|
+
|
|
20
|
+
# Rebuild the types mapping - only use actual type keys, not aliases like :all, :any, etc.
|
|
21
|
+
types_hash = Hash[*[:date, :names, :number, :text].map { |k| fields_dup[k].map { |n| [n, k] } }.flatten]
|
|
22
|
+
|
|
23
|
+
# Update the class instance variables
|
|
24
|
+
@fields = fields_dup
|
|
25
|
+
@types = Hash.new { |h,k| h.fetch(k.to_sym, nil) }.merge(types_hash).freeze
|
|
26
|
+
|
|
27
|
+
# Rebuild @factories from the new @types
|
|
28
|
+
# This maps each field name to its Variable subclass (Names, Date, Text, Number)
|
|
29
|
+
@factories = Hash.new { |h,k| h.fetch(k.to_s.intern, CiteProc::Variable) }.merge(
|
|
30
|
+
Hash[*@types.map { |field_name, type|
|
|
31
|
+
[field_name, CiteProc.const_get(type.to_s.capitalize)]
|
|
32
|
+
}.flatten]
|
|
33
|
+
).freeze
|
|
34
|
+
|
|
35
|
+
# Recreate the aliases
|
|
36
|
+
@fields[:name] = @fields[:names]
|
|
37
|
+
@fields[:dates] = @fields[:date]
|
|
38
|
+
@fields[:numbers] = @fields[:number]
|
|
39
|
+
|
|
40
|
+
# Recreate :all and :any
|
|
41
|
+
@fields[:all] = @fields[:any] =
|
|
42
|
+
[:date, :names, :text, :number].reduce([]) { |s,a| s.concat(@fields[a]) }.sort
|
|
43
|
+
|
|
44
|
+
# Refreeze fields
|
|
45
|
+
@fields.freeze
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -209,7 +209,9 @@ module Bolognese
|
|
|
209
209
|
attributes = {
|
|
210
210
|
'relatedIdentifierType' => related_identifier["relatedIdentifierType"],
|
|
211
211
|
'relationType' => related_identifier["relationType"],
|
|
212
|
-
'
|
|
212
|
+
'relationTypeInformation' => related_identifier["relationTypeInformation"],
|
|
213
|
+
'resourceTypeGeneral' => related_identifier["resourceTypeGeneral",
|
|
214
|
+
] }.compact
|
|
213
215
|
|
|
214
216
|
attributes.merge({ 'relatedMetadataScheme' => related_identifier["relatedMetadataSchema"],
|
|
215
217
|
'schemeURI' => related_identifier["schemeUri"],
|
|
@@ -228,6 +230,7 @@ module Bolognese
|
|
|
228
230
|
attributes = {
|
|
229
231
|
'relatedItemType' => related_item["relatedItemType"],
|
|
230
232
|
'relationType' => related_item["relationType"],
|
|
233
|
+
'relationTypeInformation' => related_item["relationTypeInformation"],
|
|
231
234
|
}.compact
|
|
232
235
|
|
|
233
236
|
xml.relatedItem(related_item["relatedItem"], attributes) do
|
data/lib/bolognese/doi_utils.rb
CHANGED
|
@@ -13,11 +13,12 @@ module Bolognese
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def validate_funder_doi(doi)
|
|
16
|
-
|
|
16
|
+
match = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.13039\/)?([1-9]\d+)\z/.match(doi)).last
|
|
17
|
+
return doi unless match
|
|
17
18
|
|
|
18
19
|
# remove non-printing whitespace and downcase
|
|
19
|
-
if
|
|
20
|
-
doi.delete("\u200B").downcase
|
|
20
|
+
if match.present?
|
|
21
|
+
doi = match.delete("\u200B").downcase
|
|
21
22
|
"https://doi.org/10.13039/#{doi}"
|
|
22
23
|
end
|
|
23
24
|
end
|
data/lib/bolognese/metadata.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# frozen_string_literal:
|
|
1
|
+
# frozen_string_literal: false
|
|
2
2
|
|
|
3
3
|
require_relative 'metadata_utils'
|
|
4
4
|
|
|
@@ -76,8 +76,8 @@ module Bolognese
|
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
# make sure input is encoded as utf8
|
|
79
|
-
|
|
80
|
-
@string =
|
|
79
|
+
string1 = string.dup.force_encoding("UTF-8") if string.present?
|
|
80
|
+
@string = string1
|
|
81
81
|
|
|
82
82
|
# input options for citation formatting
|
|
83
83
|
@style = options[:style]
|
|
@@ -120,7 +120,7 @@ module Bolognese
|
|
|
120
120
|
|
|
121
121
|
@regenerate = options[:regenerate] || read_options.present?
|
|
122
122
|
# generate name for method to call dynamically
|
|
123
|
-
opts = { string:
|
|
123
|
+
opts = { string: string1, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)
|
|
124
124
|
@meta = @from.present? ? send("read_" + @from, **opts) : {}
|
|
125
125
|
end
|
|
126
126
|
|
|
@@ -33,7 +33,6 @@ require_relative 'writers/turtle_writer'
|
|
|
33
33
|
|
|
34
34
|
module Bolognese
|
|
35
35
|
module MetadataUtils
|
|
36
|
-
# include BenchmarkMethods
|
|
37
36
|
include Bolognese::DoiUtils
|
|
38
37
|
include Bolognese::AuthorUtils
|
|
39
38
|
include Bolognese::DataciteUtils
|
|
@@ -140,19 +139,28 @@ module Bolognese
|
|
|
140
139
|
author = to_citeproc(creators)
|
|
141
140
|
end
|
|
142
141
|
|
|
143
|
-
if types["resourceTypeGeneral"] == "Software"
|
|
144
|
-
type = "
|
|
142
|
+
if types["resourceTypeGeneral"] == "Software"
|
|
143
|
+
type = "software"
|
|
145
144
|
else
|
|
146
145
|
type = types["citeproc"]
|
|
147
146
|
end
|
|
148
147
|
|
|
148
|
+
# Filter out contributors who are already creators, editors, or translators to avoid duplication
|
|
149
|
+
creator_names = Array.wrap(creators).map { |c| c["name"] || [c["givenName"], c["familyName"]].compact.join(" ") }.compact
|
|
150
|
+
unique_contributors = Array.wrap(contributors).reject do |c|
|
|
151
|
+
contributor_name = c["name"] || [c["givenName"], c["familyName"]].compact.join(" ")
|
|
152
|
+
creator_names.include?(contributor_name) ||
|
|
153
|
+
c["contributorType"] == "Editor" ||
|
|
154
|
+
c["contributorType"] == "Translator"
|
|
155
|
+
end
|
|
156
|
+
|
|
149
157
|
{
|
|
150
158
|
"type" => type,
|
|
151
159
|
"id" => normalize_doi(doi),
|
|
152
160
|
"categories" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
|
|
153
161
|
"language" => language,
|
|
154
162
|
"author" => author,
|
|
155
|
-
"contributor" => to_citeproc(
|
|
163
|
+
"contributor" => unique_contributors.presence ? to_citeproc(unique_contributors) : nil,
|
|
156
164
|
"editor" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Editor" }) : nil,
|
|
157
165
|
"translator" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
|
|
158
166
|
"issued" => get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year.to_s),
|
|
@@ -96,7 +96,7 @@ module Bolognese
|
|
|
96
96
|
|
|
97
97
|
state = id.present? || read_options.present? ? "findable" : "not_found"
|
|
98
98
|
subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
|
|
99
|
-
sum +=
|
|
99
|
+
sum += name_to_subject(subject)
|
|
100
100
|
|
|
101
101
|
sum
|
|
102
102
|
end
|
|
@@ -55,7 +55,7 @@ module Bolognese
|
|
|
55
55
|
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
|
|
56
56
|
}.compact
|
|
57
57
|
subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
|
|
58
|
-
sum +=
|
|
58
|
+
sum += name_to_subject(subject)
|
|
59
59
|
|
|
60
60
|
sum
|
|
61
61
|
end
|
|
@@ -144,7 +144,7 @@ module Bolognese
|
|
|
144
144
|
|
|
145
145
|
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
|
146
146
|
|
|
147
|
-
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata))
|
|
147
|
+
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata)) + Array.wrap(crossref_is_related_material(program_metadata))
|
|
148
148
|
|
|
149
149
|
container = if journal_metadata.present?
|
|
150
150
|
issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
|
|
@@ -395,6 +395,19 @@ module Bolognese
|
|
|
395
395
|
end
|
|
396
396
|
end.compact.unwrap
|
|
397
397
|
end
|
|
398
|
+
|
|
399
|
+
def crossref_is_related_material(program_metadata)
|
|
400
|
+
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
|
|
401
|
+
Array.wrap(refs).select { |a| a.dig("interwork_relation", "relationship_type") == "isRelatedMaterial" }.map do |c|
|
|
402
|
+
if c.dig("inter_work_relation", "identifier_type") == "doi"
|
|
403
|
+
{ "relatedIdentifier" => parse_attributes(c["inter_work_relation"]).downcase,
|
|
404
|
+
"relationType" => "Other",
|
|
405
|
+
"relatedIdentifierType" => "DOI" }.compact
|
|
406
|
+
else
|
|
407
|
+
nil
|
|
408
|
+
end
|
|
409
|
+
end.compact.unwrap
|
|
410
|
+
end
|
|
398
411
|
end
|
|
399
412
|
end
|
|
400
413
|
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'base64'
|
|
4
|
+
|
|
3
5
|
module Bolognese
|
|
4
6
|
module Readers
|
|
5
7
|
module DataciteReader
|
|
@@ -131,9 +133,9 @@ module Bolognese
|
|
|
131
133
|
|
|
132
134
|
subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
|
|
133
135
|
if subject.is_a?(String)
|
|
134
|
-
sum +=
|
|
136
|
+
sum += name_to_subject(subject)
|
|
135
137
|
elsif subject.is_a?(Hash)
|
|
136
|
-
sum +=
|
|
138
|
+
sum += hsh_to_subject(subject)
|
|
137
139
|
end
|
|
138
140
|
|
|
139
141
|
sum
|
|
@@ -141,15 +143,12 @@ module Bolognese
|
|
|
141
143
|
|
|
142
144
|
dates = Array.wrap(meta.dig("dates", "date")).map do |r|
|
|
143
145
|
if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
}.compact
|
|
149
|
-
end
|
|
146
|
+
{ "date" => date,
|
|
147
|
+
"dateType" => parse_attributes(r, content: "dateType"),
|
|
148
|
+
"dateInformation" => parse_attributes(r, content: "dateInformation")
|
|
149
|
+
}.compact
|
|
150
150
|
end
|
|
151
151
|
end.compact
|
|
152
|
-
dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
|
|
153
152
|
sizes = Array.wrap(meta.dig("sizes", "size")).map do |k|
|
|
154
153
|
if k.blank?
|
|
155
154
|
nil
|
|
@@ -177,8 +176,7 @@ module Bolognese
|
|
|
177
176
|
if funder_identifier_type == "Crossref Funder ID"
|
|
178
177
|
funder_identifier = validate_funder_doi(funder_identifier)
|
|
179
178
|
elsif funder_identifier_type == "ROR"
|
|
180
|
-
funder_identifier =
|
|
181
|
-
scheme_uri = "https://ror.org"
|
|
179
|
+
funder_identifier = normalize_ror(funder_identifier)
|
|
182
180
|
else
|
|
183
181
|
funder_identifier = normalize_id(funder_identifier) ? normalize_id(funder_identifier) : funder_identifier
|
|
184
182
|
end
|
|
@@ -194,7 +192,8 @@ module Bolognese
|
|
|
194
192
|
end
|
|
195
193
|
related_identifiers = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).map do |ri|
|
|
196
194
|
if ri["relatedIdentifierType"] == "DOI"
|
|
197
|
-
|
|
195
|
+
doi = ri["__content__"].to_s.downcase
|
|
196
|
+
rid = validate_doi(doi) || doi
|
|
198
197
|
else
|
|
199
198
|
rid = ri["__content__"]
|
|
200
199
|
end
|
|
@@ -206,7 +205,8 @@ module Bolognese
|
|
|
206
205
|
"resourceTypeGeneral" => ri["resourceTypeGeneral"],
|
|
207
206
|
"relatedMetadataScheme" => ri["relatedMetadataScheme"],
|
|
208
207
|
"schemeUri" => ri["schemeURI"],
|
|
209
|
-
"schemeType" => ri["schemeType"]
|
|
208
|
+
"schemeType" => ri["schemeType"],
|
|
209
|
+
"relationTypeInformation" => ri["relationTypeInformation"]
|
|
210
210
|
}.compact
|
|
211
211
|
end
|
|
212
212
|
|
|
@@ -216,7 +216,8 @@ module Bolognese
|
|
|
216
216
|
relatedItemIdentifier = nil
|
|
217
217
|
if rii
|
|
218
218
|
if rii["relatedItemIdentifierType"] == "DOI"
|
|
219
|
-
|
|
219
|
+
doi = rii["__content__"].to_s.downcase
|
|
220
|
+
rid = validate_doi(doi) || doi
|
|
220
221
|
else
|
|
221
222
|
rid = rii["__content__"]
|
|
222
223
|
end
|
|
@@ -226,14 +227,13 @@ module Bolognese
|
|
|
226
227
|
"relatedItemIdentifierType" => rii["relatedItemIdentifierType"],
|
|
227
228
|
"relatedMetadataScheme" => rii["relatedMetadataScheme"],
|
|
228
229
|
"schemeURI" => rii["schemeURI"],
|
|
229
|
-
"schemeType" => rii["schemeType"]
|
|
230
|
+
"schemeType" => rii["schemeType"],
|
|
230
231
|
}.compact
|
|
231
232
|
end
|
|
232
233
|
|
|
233
234
|
number = ri["number"]
|
|
234
235
|
if number.is_a?(String)
|
|
235
236
|
number = number
|
|
236
|
-
numberType = nil
|
|
237
237
|
else
|
|
238
238
|
number = ri.dig("number", "__content__")
|
|
239
239
|
numberType = ri.dig("number", "numberType")
|
|
@@ -241,6 +241,7 @@ module Bolognese
|
|
|
241
241
|
|
|
242
242
|
a = {
|
|
243
243
|
"relationType" => ri["relationType"],
|
|
244
|
+
"relationTypeInformation" => ri["relationTypeInformation"],
|
|
244
245
|
"relatedItemType" => ri["relatedItemType"],
|
|
245
246
|
"relatedItemIdentifier" => relatedItemIdentifier,
|
|
246
247
|
"creators" => get_authors(Array.wrap(ri.dig("creators", "creator"))),
|
|
@@ -78,7 +78,7 @@ module Bolognese
|
|
|
78
78
|
end
|
|
79
79
|
state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
|
|
80
80
|
subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
|
|
81
|
-
sum +=
|
|
81
|
+
sum += name_to_subject(subject)
|
|
82
82
|
|
|
83
83
|
sum
|
|
84
84
|
end
|
|
@@ -174,7 +174,7 @@ module Bolognese
|
|
|
174
174
|
subjects = meta.fetch("keywords", nil)
|
|
175
175
|
subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
|
|
176
176
|
subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
|
|
177
|
-
sum +=
|
|
177
|
+
sum += name_to_subject(subject)
|
|
178
178
|
sum
|
|
179
179
|
end
|
|
180
180
|
|
data/lib/bolognese/utils.rb
CHANGED
|
@@ -67,7 +67,9 @@ module Bolognese
|
|
|
67
67
|
"OutputManagementPlan" => nil,
|
|
68
68
|
"PeerReview" => "Review",
|
|
69
69
|
"PhysicalObject" => nil,
|
|
70
|
+
"Poster" => "Poster",
|
|
70
71
|
"Preprint" => nil,
|
|
72
|
+
"Presentation" => "PresentationDigitalDocument",
|
|
71
73
|
"Report" => "Report",
|
|
72
74
|
"Service" => "Service",
|
|
73
75
|
"Software" => "SoftwareSourceCode",
|
|
@@ -103,7 +105,9 @@ module Bolognese
|
|
|
103
105
|
"OutputManagementPlan" => nil,
|
|
104
106
|
"PeerReview" => "review",
|
|
105
107
|
"PhysicalObject" => nil,
|
|
108
|
+
"Poster" => "document",
|
|
106
109
|
"Preprint" => nil,
|
|
110
|
+
"Presentation" => "presentation",
|
|
107
111
|
"Report" => "report",
|
|
108
112
|
"Service" => nil,
|
|
109
113
|
"Sound" => "song",
|
|
@@ -280,6 +284,8 @@ module Bolognese
|
|
|
280
284
|
"Event" => "Event",
|
|
281
285
|
"ImageObject" => "Image",
|
|
282
286
|
"Movie" => "Audiovisual",
|
|
287
|
+
"Poster" => "Poster",
|
|
288
|
+
"PresentationDigitalDocument" => "Presentation",
|
|
283
289
|
"PublicationIssue" => "Text",
|
|
284
290
|
"Report" => "Report",
|
|
285
291
|
"ScholarlyArticle" => "Text",
|
|
@@ -326,6 +332,8 @@ module Bolognese
|
|
|
326
332
|
"Event" => nil,
|
|
327
333
|
"ImageObject" => "graphic",
|
|
328
334
|
"Movie" => "motion_picture",
|
|
335
|
+
"Poster" => "document",
|
|
336
|
+
"PresentationDigitalDocument" => "presentation",
|
|
329
337
|
"PublicationIssue" => nil,
|
|
330
338
|
"Report" => "report",
|
|
331
339
|
"ScholarlyArticle" => "article-journal",
|
|
@@ -348,8 +356,10 @@ module Bolognese
|
|
|
348
356
|
"Event" => nil,
|
|
349
357
|
"ImageObject" => "FIGURE",
|
|
350
358
|
"Movie" => "MPCT",
|
|
351
|
-
"
|
|
359
|
+
"Poster" => "GEN",
|
|
360
|
+
"PresentationDigitalDocument" => "SLIDE",
|
|
352
361
|
"PublicationIssue" => nil,
|
|
362
|
+
"Report" => "RPRT",
|
|
353
363
|
"ScholarlyArticle" => "JOUR",
|
|
354
364
|
"Service" => nil,
|
|
355
365
|
"SoftwareSourceCode" => "COMP",
|
|
@@ -406,7 +416,9 @@ module Bolognese
|
|
|
406
416
|
"OutputManagementPlan" => nil,
|
|
407
417
|
"PeerReview" => nil,
|
|
408
418
|
"PhysicalObject" => nil,
|
|
419
|
+
"Poster" => "GEN",
|
|
409
420
|
"Preprint" => nil,
|
|
421
|
+
"Presentation" => "SLIDE",
|
|
410
422
|
"Report" => "RRPT",
|
|
411
423
|
"Service" => nil,
|
|
412
424
|
"Software" => "COMP",
|
|
@@ -419,7 +431,7 @@ module Bolognese
|
|
|
419
431
|
|
|
420
432
|
RIS_TO_DC_TRANSLATIONS = {
|
|
421
433
|
"BLOG" => "Text",
|
|
422
|
-
"GEN" => "
|
|
434
|
+
"GEN" => "Poster",
|
|
423
435
|
"CTLG" => "Collection",
|
|
424
436
|
"DATA" => "Dataset",
|
|
425
437
|
"FIGURE" => "Image",
|
|
@@ -428,7 +440,8 @@ module Bolognese
|
|
|
428
440
|
"JOUR" => "JournalArticle",
|
|
429
441
|
"COMP" => "Software",
|
|
430
442
|
"VIDEO" => "Audiovisual",
|
|
431
|
-
"ELEC" => "Text"
|
|
443
|
+
"ELEC" => "Text",
|
|
444
|
+
"SLIDE" => "Presentation"
|
|
432
445
|
}
|
|
433
446
|
|
|
434
447
|
BIB_TO_DC_TRANSLATIONS = {
|
|
@@ -452,7 +465,9 @@ module Bolognese
|
|
|
452
465
|
"motion_picture" => "Audiovisual",
|
|
453
466
|
"article-journal" => "JournalArticle",
|
|
454
467
|
"broadcast" => "Audiovisual",
|
|
455
|
-
"webpage" => "Text"
|
|
468
|
+
"webpage" => "Text",
|
|
469
|
+
"document" => "Poster",
|
|
470
|
+
"presentation" => "Presentation"
|
|
456
471
|
}
|
|
457
472
|
|
|
458
473
|
SO_TO_BIB_TRANSLATIONS = {
|
|
@@ -468,6 +483,8 @@ module Bolognese
|
|
|
468
483
|
"Event" => "misc",
|
|
469
484
|
"ImageObject" => "misc",
|
|
470
485
|
"Movie" => "misc",
|
|
486
|
+
"Poster" => "misc",
|
|
487
|
+
"PresentationDigitalDocument" => "misc",
|
|
471
488
|
"PublicationIssue" => "misc",
|
|
472
489
|
"ScholarlyArticle" => "article",
|
|
473
490
|
"Service" => "misc",
|
|
@@ -651,7 +668,7 @@ module Bolognese
|
|
|
651
668
|
return nil unless uri && uri.host && %w(http https).include?(uri.scheme)
|
|
652
669
|
|
|
653
670
|
# clean up URL
|
|
654
|
-
|
|
671
|
+
normalize_uri_with_path_cleanup(uri)
|
|
655
672
|
rescue Addressable::URI::InvalidURIError
|
|
656
673
|
nil
|
|
657
674
|
end
|
|
@@ -671,9 +688,7 @@ module Bolognese
|
|
|
671
688
|
uri.scheme = "https" if options[:https]
|
|
672
689
|
|
|
673
690
|
# clean up URL
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
uri.to_s
|
|
691
|
+
normalize_uri_with_path_cleanup(uri)
|
|
677
692
|
rescue Addressable::URI::InvalidURIError
|
|
678
693
|
nil
|
|
679
694
|
end
|
|
@@ -684,19 +699,19 @@ module Bolognese
|
|
|
684
699
|
end
|
|
685
700
|
|
|
686
701
|
def normalize_orcid(orcid)
|
|
687
|
-
|
|
688
|
-
return
|
|
702
|
+
validated = validate_orcid(orcid)
|
|
703
|
+
return orcid unless validated.present?
|
|
689
704
|
|
|
690
705
|
# turn ORCID ID into URL
|
|
691
|
-
"https://orcid.org/" + Addressable::URI.encode(
|
|
706
|
+
"https://orcid.org/" + Addressable::URI.encode(validated)
|
|
692
707
|
end
|
|
693
708
|
|
|
694
709
|
def normalize_ror(ror)
|
|
695
|
-
|
|
696
|
-
return
|
|
710
|
+
validated = validate_ror(ror)
|
|
711
|
+
return ror unless validated.present?
|
|
697
712
|
|
|
698
713
|
# turn ROR into URL
|
|
699
|
-
"https://ror.org/" + Addressable::URI.encode(
|
|
714
|
+
"https://ror.org/" + Addressable::URI.encode(validated)
|
|
700
715
|
end
|
|
701
716
|
|
|
702
717
|
def normalize_ids(ids: nil, relation_type: nil)
|
|
@@ -1316,103 +1331,18 @@ module Bolognese
|
|
|
1316
1331
|
end
|
|
1317
1332
|
end
|
|
1318
1333
|
|
|
1319
|
-
def
|
|
1320
|
-
|
|
1321
|
-
fos = resource_json(:fos).fetch("fosFields")
|
|
1322
|
-
|
|
1323
|
-
subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
|
|
1324
|
-
|
|
1325
|
-
if subject
|
|
1326
|
-
return [{
|
|
1327
|
-
"subject" => sanitize(name) },
|
|
1328
|
-
{
|
|
1329
|
-
"subject" => "FOS: " + subject["fosLabel"],
|
|
1330
|
-
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1331
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1332
|
-
}]
|
|
1333
|
-
end
|
|
1334
|
-
|
|
1335
|
-
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
|
1336
|
-
# and map to Fields of Science. Add an extra entry for the latter
|
|
1337
|
-
fores = resource_json(:for)
|
|
1338
|
-
for_fields = fores.fetch("forFields")
|
|
1339
|
-
for_disciplines = fores.fetch("forDisciplines")
|
|
1340
|
-
|
|
1341
|
-
subject = for_fields.find { |l| l["forLabel"] == name } ||
|
|
1342
|
-
for_disciplines.find { |l| l["forLabel"] == name }
|
|
1343
|
-
|
|
1344
|
-
if subject
|
|
1345
|
-
[{
|
|
1346
|
-
"subject" => sanitize(name) },
|
|
1347
|
-
{
|
|
1348
|
-
"subject" => "FOS: " + subject["fosLabel"],
|
|
1349
|
-
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1350
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1351
|
-
}]
|
|
1352
|
-
else
|
|
1353
|
-
[{ "subject" => sanitize(name) }]
|
|
1354
|
-
end
|
|
1334
|
+
def name_to_subject(name)
|
|
1335
|
+
[{ "subject" => sanitize(name) }]
|
|
1355
1336
|
end
|
|
1356
1337
|
|
|
1357
|
-
def
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
"subjectScheme" => hsh["subjectScheme"],
|
|
1366
|
-
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
|
1367
|
-
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
|
1368
|
-
"classificationCode" => hsh["classificationCode"],
|
|
1369
|
-
"lang" => hsh["lang"] }.compact,
|
|
1370
|
-
{
|
|
1371
|
-
"subject" => "FOS: " + subject["fosLabel"],
|
|
1372
|
-
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1373
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
|
|
1374
|
-
end
|
|
1375
|
-
|
|
1376
|
-
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
|
1377
|
-
# and map to Fields of Science. Add an extra entry for the latter
|
|
1378
|
-
fores = resource_json(:for)
|
|
1379
|
-
for_fields = fores.fetch("forFields")
|
|
1380
|
-
for_disciplines = fores.fetch("forDisciplines")
|
|
1381
|
-
|
|
1382
|
-
# try to extract forId
|
|
1383
|
-
if hsh["subjectScheme"] == "FOR"
|
|
1384
|
-
for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
|
|
1385
|
-
for_id = for_id.rjust(6, "0")
|
|
1386
|
-
|
|
1387
|
-
subject = for_fields.find { |l| l["forId"] == for_id } ||
|
|
1388
|
-
for_disciplines.find { |l| l["forId"] == for_id[0..3] }
|
|
1389
|
-
else
|
|
1390
|
-
subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] } ||
|
|
1391
|
-
for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] }
|
|
1392
|
-
end
|
|
1393
|
-
|
|
1394
|
-
if subject
|
|
1395
|
-
[{
|
|
1396
|
-
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
|
1397
|
-
"subjectScheme" => hsh["subjectScheme"],
|
|
1398
|
-
"classificationCode" => hsh["classificationCode"],
|
|
1399
|
-
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
|
1400
|
-
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
|
1401
|
-
"lang" => hsh["lang"] }.compact,
|
|
1402
|
-
{
|
|
1403
|
-
"subject" => "FOS: " + subject["fosLabel"],
|
|
1404
|
-
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1405
|
-
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1406
|
-
}]
|
|
1407
|
-
else
|
|
1408
|
-
[{
|
|
1409
|
-
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
|
1410
|
-
"subjectScheme" => hsh["subjectScheme"],
|
|
1411
|
-
"classificationCode" => hsh["classificationCode"],
|
|
1412
|
-
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
|
1413
|
-
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
|
1414
|
-
"lang" => hsh["lang"] }.compact]
|
|
1415
|
-
end
|
|
1338
|
+
def hsh_to_subject(hsh)
|
|
1339
|
+
[{
|
|
1340
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
|
1341
|
+
"subjectScheme" => hsh["subjectScheme"],
|
|
1342
|
+
"classificationCode" => hsh["classificationCode"],
|
|
1343
|
+
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
|
1344
|
+
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
|
1345
|
+
"lang" => hsh["lang"] }.compact]
|
|
1416
1346
|
end
|
|
1417
1347
|
|
|
1418
1348
|
def dfg_ids_to_fos(dfg_ids)
|
|
@@ -1476,5 +1406,13 @@ module Bolognese
|
|
|
1476
1406
|
}.compact
|
|
1477
1407
|
end
|
|
1478
1408
|
end
|
|
1409
|
+
|
|
1410
|
+
private
|
|
1411
|
+
|
|
1412
|
+
def normalize_uri_with_path_cleanup(uri)
|
|
1413
|
+
normalized_uri = uri.normalize
|
|
1414
|
+
normalized_uri.path = normalized_uri.path.sub(%r{/\z}, "") if normalized_uri.path.present?
|
|
1415
|
+
normalized_uri.to_s
|
|
1416
|
+
end
|
|
1479
1417
|
end
|
|
1480
1418
|
end
|
data/lib/bolognese/version.rb
CHANGED
|
@@ -4,18 +4,16 @@ module Bolognese
|
|
|
4
4
|
require "csv"
|
|
5
5
|
|
|
6
6
|
def csv
|
|
7
|
-
return nil unless valid?
|
|
8
|
-
|
|
9
7
|
bib = {
|
|
10
8
|
doi: doi,
|
|
11
9
|
url: url,
|
|
12
10
|
registered: get_iso8601_date(date_registered),
|
|
13
11
|
state: state,
|
|
14
|
-
resource_type_general: types["resourceTypeGeneral"],
|
|
15
|
-
resource_type: types["resourceType"],
|
|
12
|
+
resource_type_general: types.respond_to?(:to_h) ? types.to_h["resourceTypeGeneral"] : nil,
|
|
13
|
+
resource_type: types.respond_to?(:to_h) ? types.to_h["resourceType"] : nil,
|
|
16
14
|
title: parse_attributes(titles, content: "title", first: true),
|
|
17
15
|
author: authors_as_string(creators),
|
|
18
|
-
publisher: publisher["name"],
|
|
16
|
+
publisher: publisher.respond_to?(:to_h) ? publisher.to_h["name"] : nil,
|
|
19
17
|
publication_year: publication_year
|
|
20
18
|
}.values
|
|
21
19
|
|
|
@@ -93,9 +93,10 @@ module Bolognese
|
|
|
93
93
|
end
|
|
94
94
|
|
|
95
95
|
def insert_publication_date(xml)
|
|
96
|
-
|
|
96
|
+
date = get_date(dates, "Issued") || publication_year
|
|
97
|
+
year, month, day = get_date_parts(date).to_h.fetch("date-parts", []).first
|
|
97
98
|
|
|
98
|
-
xml.year(year, "iso-8601-date" =>
|
|
99
|
+
xml.year(year, "iso-8601-date" => date)
|
|
99
100
|
xml.month(month.to_s.rjust(2, '0')) if month.present?
|
|
100
101
|
xml.day(day.to_s.rjust(2, '0')) if day.present?
|
|
101
102
|
end
|
data/lib/bolognese.rb
CHANGED
|
@@ -3,9 +3,7 @@
|
|
|
3
3
|
require 'active_support/all'
|
|
4
4
|
require 'nokogiri'
|
|
5
5
|
require 'maremma'
|
|
6
|
-
require 'postrank-uri'
|
|
7
6
|
require 'bibtex'
|
|
8
|
-
require 'colorize'
|
|
9
7
|
require 'loofah'
|
|
10
8
|
require 'json/ld'
|
|
11
9
|
require 'rdf/turtle'
|
|
@@ -13,13 +11,12 @@ require 'rdf/rdfxml'
|
|
|
13
11
|
require 'logger'
|
|
14
12
|
require 'iso8601'
|
|
15
13
|
require 'jsonlint'
|
|
16
|
-
require 'benchmark_methods'
|
|
17
14
|
require 'gender_detector'
|
|
18
|
-
require 'citeproc/ruby'
|
|
19
15
|
require 'citeproc'
|
|
20
16
|
require 'csl/styles'
|
|
21
17
|
require 'edtf'
|
|
22
18
|
|
|
19
|
+
require "bolognese/citeproc_extensions"
|
|
23
20
|
require "bolognese/version"
|
|
24
21
|
require "bolognese/metadata"
|
|
25
22
|
require "bolognese/cli"
|