bolognese 1.5.16 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile.lock +17 -14
- data/lib/bolognese/datacite_utils.rb +22 -4
- data/lib/bolognese/metadata.rb +8 -5
- data/lib/bolognese/metadata_utils.rb +2 -2
- data/lib/bolognese/readers/citeproc_reader.rb +5 -3
- data/lib/bolognese/readers/codemeta_reader.rb +5 -3
- data/lib/bolognese/readers/crossref_reader.rb +3 -6
- data/lib/bolognese/readers/datacite_reader.rb +19 -14
- data/lib/bolognese/readers/npm_reader.rb +1 -1
- data/lib/bolognese/readers/ris_reader.rb +4 -2
- data/lib/bolognese/readers/schema_org_reader.rb +8 -3
- data/lib/bolognese/utils.rb +103 -0
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/codemeta_writer.rb +1 -1
- data/lib/bolognese/writers/jats_writer.rb +2 -2
- data/lib/bolognese/writers/schema_org_writer.rb +1 -1
- data/resources/oecd/for-mappings.json +1101 -0
- data/resources/oecd/fos-mappings.json +198 -0
- data/spec/datacite_utils_spec.rb +27 -7
- data/spec/fixtures/datacite-funderIdentifier.xml +4 -0
- data/spec/fixtures/datacite_software_version.json +74 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_for_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_no_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_for_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_no_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/invalid_date.yml +94 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/even_more_subject_scheme_FOR.yml +97 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/more_subject_scheme_FOR.yml +107 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/subject_scheme_FOR.yml +110 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/future.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/invalid.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/nil.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/past.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/present.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_timestamp/present.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_dates/insert.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_formats/insert.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_person/creator_given_and_family_name.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_sizes/insert.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_citeproc/software.yml +9 -9
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_citeproc/software_w/version.yml +52 -0
- data/spec/readers/codemeta_reader_spec.rb +1 -1
- data/spec/readers/crossref_reader_spec.rb +20 -0
- data/spec/readers/datacite_json_reader_spec.rb +1 -1
- data/spec/readers/datacite_reader_spec.rb +102 -2
- data/spec/readers/npm_reader_spec.rb +3 -3
- data/spec/readers/schema_org_reader_spec.rb +17 -3
- data/spec/utils_spec.rb +82 -0
- data/spec/writers/citeproc_writer_spec.rb +18 -0
- data/spec/writers/datacite_writer_spec.rb +9 -0
- data/spec/writers/schema_org_writer_spec.rb +2 -0
- metadata +26 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 49f153c7bf6a7dd69a2cf35844b9fe303574ebeb6f515a00581cea60955dc6d6
|
|
4
|
+
data.tar.gz: ebc11d65b789fb08e97dafa25c2e9e0b2ddfb6085011478498f2e619ebdd605c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d30e6e90b55e93bf4febdef8cf5fe3aa17e1821e2ee317f521957cdee0e811f8f94ed07f929b974ea967a8e5e4b7f0c8e7c08f47bc9bee681e72702f263ac80e
|
|
7
|
+
data.tar.gz: e30113a9e7ceddb8772a8e26310c7bc9ecdb349c93a33596c4d2ff0167c12650631e7dee7860c32a2e4dd8323453ed6acd7315c292060aef5a03954fbca61682
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
bolognese (1.
|
|
4
|
+
bolognese (1.6.2)
|
|
5
5
|
activesupport (>= 4.2.5)
|
|
6
6
|
benchmark_methods (~> 0.7)
|
|
7
7
|
bibtex-ruby (>= 5.1.0)
|
|
@@ -30,12 +30,12 @@ PATH
|
|
|
30
30
|
GEM
|
|
31
31
|
remote: https://rubygems.org/
|
|
32
32
|
specs:
|
|
33
|
-
activesupport (6.0.
|
|
33
|
+
activesupport (6.0.3.1)
|
|
34
34
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
35
35
|
i18n (>= 0.7, < 2)
|
|
36
36
|
minitest (~> 5.1)
|
|
37
37
|
tzinfo (~> 1.1)
|
|
38
|
-
zeitwerk (~> 2.2)
|
|
38
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
|
39
39
|
addressable (2.7.0)
|
|
40
40
|
public_suffix (>= 2.0.2, < 5.0)
|
|
41
41
|
benchmark_methods (0.7)
|
|
@@ -105,7 +105,7 @@ GEM
|
|
|
105
105
|
oj (>= 2.8.3)
|
|
106
106
|
oj_mimic_json (~> 1.0, >= 1.0.1)
|
|
107
107
|
mini_portile2 (2.4.0)
|
|
108
|
-
minitest (5.14.
|
|
108
|
+
minitest (5.14.1)
|
|
109
109
|
multi_json (1.14.1)
|
|
110
110
|
multipart-post (2.1.1)
|
|
111
111
|
namae (1.0.1)
|
|
@@ -113,7 +113,7 @@ GEM
|
|
|
113
113
|
mini_portile2 (~> 2.4.0)
|
|
114
114
|
oj (3.10.6)
|
|
115
115
|
oj_mimic_json (1.0.1)
|
|
116
|
-
optimist (3.0.
|
|
116
|
+
optimist (3.0.1)
|
|
117
117
|
postrank-uri (1.0.24)
|
|
118
118
|
addressable (>= 2.4.0)
|
|
119
119
|
nokogiri (>= 1.8.0)
|
|
@@ -123,40 +123,43 @@ GEM
|
|
|
123
123
|
rack-test (0.8.3)
|
|
124
124
|
rack (>= 1.0, < 3)
|
|
125
125
|
rake (12.3.3)
|
|
126
|
-
rdf (3.1.
|
|
126
|
+
rdf (3.1.2)
|
|
127
127
|
hamster (~> 3.0)
|
|
128
128
|
link_header (~> 0.0, >= 0.0.8)
|
|
129
129
|
rdf-aggregate-repo (3.1.0)
|
|
130
130
|
rdf (~> 3.1)
|
|
131
|
-
rdf-rdfa (3.1.
|
|
131
|
+
rdf-rdfa (3.1.1)
|
|
132
132
|
haml (~> 5.1)
|
|
133
133
|
htmlentities (~> 4.3)
|
|
134
|
-
rdf (~> 3.1)
|
|
134
|
+
rdf (~> 3.1, >= 3.1.2)
|
|
135
135
|
rdf-aggregate-repo (~> 3.1)
|
|
136
|
+
rdf-vocab (~> 3.1, >= 3.1.5)
|
|
136
137
|
rdf-xsd (~> 3.1)
|
|
137
138
|
rdf-rdfxml (3.1.0)
|
|
138
139
|
htmlentities (~> 4.3)
|
|
139
140
|
rdf (~> 3.1)
|
|
140
141
|
rdf-rdfa (~> 3.1)
|
|
141
142
|
rdf-xsd (~> 3.1)
|
|
142
|
-
rdf-turtle (3.1.
|
|
143
|
+
rdf-turtle (3.1.1)
|
|
143
144
|
ebnf (~> 1.2)
|
|
144
|
-
rdf (~> 3.1)
|
|
145
|
+
rdf (~> 3.1, >= 3.1.2)
|
|
146
|
+
rdf-vocab (3.1.5)
|
|
147
|
+
rdf (~> 3.1, >= 3.1.2)
|
|
145
148
|
rdf-xsd (3.1.0)
|
|
146
149
|
rdf (~> 3.1)
|
|
147
150
|
rspec (3.9.0)
|
|
148
151
|
rspec-core (~> 3.9.0)
|
|
149
152
|
rspec-expectations (~> 3.9.0)
|
|
150
153
|
rspec-mocks (~> 3.9.0)
|
|
151
|
-
rspec-core (3.9.
|
|
152
|
-
rspec-support (~> 3.9.
|
|
153
|
-
rspec-expectations (3.9.
|
|
154
|
+
rspec-core (3.9.2)
|
|
155
|
+
rspec-support (~> 3.9.3)
|
|
156
|
+
rspec-expectations (3.9.2)
|
|
154
157
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
155
158
|
rspec-support (~> 3.9.0)
|
|
156
159
|
rspec-mocks (3.9.1)
|
|
157
160
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
158
161
|
rspec-support (~> 3.9.0)
|
|
159
|
-
rspec-support (3.9.
|
|
162
|
+
rspec-support (3.9.3)
|
|
160
163
|
rspec-xsd (0.1.0)
|
|
161
164
|
nokogiri (~> 1.6)
|
|
162
165
|
rspec (~> 3)
|
|
@@ -28,12 +28,14 @@ module Bolognese
|
|
|
28
28
|
insert_publisher(xml)
|
|
29
29
|
insert_publication_year(xml)
|
|
30
30
|
insert_resource_type(xml)
|
|
31
|
-
insert_alternate_identifiers(xml)
|
|
32
31
|
insert_subjects(xml)
|
|
33
|
-
insert_language(xml)
|
|
34
32
|
insert_contributors(xml)
|
|
35
33
|
insert_dates(xml)
|
|
34
|
+
insert_language(xml)
|
|
35
|
+
insert_alternate_identifiers(xml)
|
|
36
36
|
insert_related_identifiers(xml)
|
|
37
|
+
insert_sizes(xml)
|
|
38
|
+
insert_formats(xml)
|
|
37
39
|
insert_version(xml)
|
|
38
40
|
insert_rights_list(xml)
|
|
39
41
|
insert_descriptions(xml)
|
|
@@ -170,9 +172,9 @@ module Bolognese
|
|
|
170
172
|
end
|
|
171
173
|
|
|
172
174
|
def insert_version(xml)
|
|
173
|
-
return xml unless
|
|
175
|
+
return xml unless version.present?
|
|
174
176
|
|
|
175
|
-
xml.version(
|
|
177
|
+
xml.version(version)
|
|
176
178
|
end
|
|
177
179
|
|
|
178
180
|
|
|
@@ -201,6 +203,22 @@ module Bolognese
|
|
|
201
203
|
end
|
|
202
204
|
end
|
|
203
205
|
|
|
206
|
+
def insert_sizes(xml)
|
|
207
|
+
xml.sizes do
|
|
208
|
+
Array.wrap(sizes).each do |s|
|
|
209
|
+
xml.size(s)
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def insert_formats(xml)
|
|
215
|
+
xml.formats do
|
|
216
|
+
Array.wrap(formats).each do |f|
|
|
217
|
+
xml.format(f)
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
204
222
|
def insert_rights_list(xml)
|
|
205
223
|
return xml unless rights_list.present?
|
|
206
224
|
|
data/lib/bolognese/metadata.rb
CHANGED
|
@@ -9,7 +9,7 @@ module Bolognese
|
|
|
9
9
|
attr_accessor :string, :from, :sandbox, :meta, :regenerate, :issue, :show_errors
|
|
10
10
|
attr_reader :doc, :page_start, :page_end
|
|
11
11
|
attr_writer :id, :provider_id, :client_id, :doi, :identifiers, :creators, :contributors, :titles, :publisher,
|
|
12
|
-
:rights_list, :dates, :publication_year, :volume, :url, :
|
|
12
|
+
:rights_list, :dates, :publication_year, :volume, :url, :version,
|
|
13
13
|
:subjects, :contributor, :descriptions, :language, :sizes,
|
|
14
14
|
:formats, :schema_version, :meta, :container, :agency,
|
|
15
15
|
:format, :funding_references, :state, :geo_locations,
|
|
@@ -36,6 +36,7 @@ module Bolognese
|
|
|
36
36
|
# generate name for method to call dynamically
|
|
37
37
|
hsh = @from.present? ? send("get_" + @from, id: id, sandbox: options[:sandbox]) : {}
|
|
38
38
|
string = hsh.fetch("string", nil)
|
|
39
|
+
|
|
39
40
|
elsif input.present? && File.exist?(input)
|
|
40
41
|
filename = File.basename(input)
|
|
41
42
|
ext = File.extname(input)
|
|
@@ -105,7 +106,7 @@ module Bolognese
|
|
|
105
106
|
:publication_year,
|
|
106
107
|
:descriptions,
|
|
107
108
|
:rights_list,
|
|
108
|
-
:
|
|
109
|
+
:version,
|
|
109
110
|
:subjects,
|
|
110
111
|
:language,
|
|
111
112
|
:geo_locations,
|
|
@@ -115,7 +116,6 @@ module Bolognese
|
|
|
115
116
|
).compact
|
|
116
117
|
|
|
117
118
|
@regenerate = options[:regenerate] || read_options.present?
|
|
118
|
-
|
|
119
119
|
# generate name for method to call dynamically
|
|
120
120
|
@meta = @from.present? ? send("read_" + @from, { string: string, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)) : {}
|
|
121
121
|
end
|
|
@@ -189,10 +189,13 @@ module Bolognese
|
|
|
189
189
|
@url ||= meta.fetch("url", nil)
|
|
190
190
|
end
|
|
191
191
|
|
|
192
|
-
def
|
|
193
|
-
@
|
|
192
|
+
def version
|
|
193
|
+
@version ||= meta.fetch("version", nil)
|
|
194
194
|
end
|
|
195
195
|
|
|
196
|
+
# for backwards compatibility
|
|
197
|
+
alias_attribute :version_info, :version
|
|
198
|
+
|
|
196
199
|
def publication_year
|
|
197
200
|
@publication_year ||= meta.fetch("publication_year", nil)
|
|
198
201
|
end
|
|
@@ -158,7 +158,7 @@ module Bolognese
|
|
|
158
158
|
"publisher" => publisher,
|
|
159
159
|
"title" => parse_attributes(titles, content: "title", first: true),
|
|
160
160
|
"URL" => url,
|
|
161
|
-
"version" =>
|
|
161
|
+
"version" => version
|
|
162
162
|
}.compact.symbolize_keys
|
|
163
163
|
end
|
|
164
164
|
|
|
@@ -180,7 +180,7 @@ module Bolognese
|
|
|
180
180
|
"identifiers" => identifiers,
|
|
181
181
|
"sizes" => sizes,
|
|
182
182
|
"formats" => formats,
|
|
183
|
-
"version" =>
|
|
183
|
+
"version" => version,
|
|
184
184
|
"rights_list" => rights_list,
|
|
185
185
|
"descriptions" => descriptions,
|
|
186
186
|
"geo_locations" => geo_locations,
|
|
@@ -100,8 +100,10 @@ module Bolognese
|
|
|
100
100
|
doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
|
101
101
|
|
|
102
102
|
state = id.present? || read_options.present? ? "findable" : "not_found"
|
|
103
|
-
subjects = Array.wrap(meta.fetch("categories", nil)).
|
|
104
|
-
|
|
103
|
+
subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
|
|
104
|
+
sum += name_to_fos(subject)
|
|
105
|
+
|
|
106
|
+
sum
|
|
105
107
|
end
|
|
106
108
|
|
|
107
109
|
{ "id" => id,
|
|
@@ -119,7 +121,7 @@ module Bolognese
|
|
|
119
121
|
"publication_year" => publication_year,
|
|
120
122
|
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
|
|
121
123
|
"rights_list" => rights_list,
|
|
122
|
-
"
|
|
124
|
+
"version" => meta.fetch("version", nil),
|
|
123
125
|
"subjects" => subjects,
|
|
124
126
|
"state" => state
|
|
125
127
|
}.merge(read_options)
|
|
@@ -57,8 +57,10 @@ module Bolognese
|
|
|
57
57
|
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
|
58
58
|
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
|
|
59
59
|
}.compact
|
|
60
|
-
subjects = Array.wrap(meta.fetch("tags", nil)).
|
|
61
|
-
|
|
60
|
+
subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
|
|
61
|
+
sum += name_to_fos(subject)
|
|
62
|
+
|
|
63
|
+
sum
|
|
62
64
|
end
|
|
63
65
|
|
|
64
66
|
has_title = meta.fetch("title", nil)
|
|
@@ -79,7 +81,7 @@ module Bolognese
|
|
|
79
81
|
"publication_year" => publication_year,
|
|
80
82
|
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
|
|
81
83
|
"rights_list" => [{ "rightsUri" => meta.fetch("license", nil) }.compact],
|
|
82
|
-
"
|
|
84
|
+
"version" => meta.fetch("version", nil),
|
|
83
85
|
"subjects" => subjects,
|
|
84
86
|
"state" => state
|
|
85
87
|
}.merge(read_options)
|
|
@@ -117,11 +117,8 @@ module Bolognese
|
|
|
117
117
|
date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
|
|
118
118
|
|
|
119
119
|
date_registered = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "deposit-timestamp" }
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
else
|
|
123
|
-
date_registered = nil
|
|
124
|
-
end
|
|
120
|
+
date_registered = get_datetime_from_time(date_registered.fetch("__content__", nil)) if date_registered.present?
|
|
121
|
+
|
|
125
122
|
# check that date is valid iso8601 date
|
|
126
123
|
date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
|
|
127
124
|
date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
|
|
@@ -175,7 +172,7 @@ module Bolognese
|
|
|
175
172
|
"publication_year" => publication_year,
|
|
176
173
|
"descriptions" => crossref_description(bibliographic_metadata),
|
|
177
174
|
"rights_list" => crossref_license(program_metadata),
|
|
178
|
-
"
|
|
175
|
+
"version" => nil,
|
|
179
176
|
"subjects" => nil,
|
|
180
177
|
"language" => nil,
|
|
181
178
|
"sizes" => nil,
|
|
@@ -93,7 +93,7 @@ module Bolognese
|
|
|
93
93
|
"bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
|
94
94
|
"ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
|
|
95
95
|
}.compact
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
titles = Array.wrap(meta.dig("titles", "title")).map do |r|
|
|
98
98
|
if r.blank?
|
|
99
99
|
nil
|
|
@@ -122,15 +122,17 @@ module Bolognese
|
|
|
122
122
|
{ "rights" => r["__content__"], "rightsUri" => normalize_url(r["rightsURI"]), "lang" => r["lang"] }.compact
|
|
123
123
|
end
|
|
124
124
|
end.compact
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
{ "subject" => sanitize(k["__content__"]), "subjectScheme" => k["subjectScheme"], "schemeUri" => k["schemeURI"], "valueUri" => k["valueURI"], "lang" => k["lang"] }.compact
|
|
125
|
+
|
|
126
|
+
subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
|
|
127
|
+
if subject.is_a?(String)
|
|
128
|
+
sum += name_to_fos(subject)
|
|
129
|
+
elsif subject.is_a?(Hash)
|
|
130
|
+
sum += hsh_to_fos(subject)
|
|
132
131
|
end
|
|
133
|
-
|
|
132
|
+
|
|
133
|
+
sum
|
|
134
|
+
end.uniq
|
|
135
|
+
|
|
134
136
|
dates = Array.wrap(meta.dig("dates", "date")).map do |r|
|
|
135
137
|
if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
|
|
136
138
|
if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
|
|
@@ -164,12 +166,15 @@ module Bolognese
|
|
|
164
166
|
funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
|
|
165
167
|
scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
|
|
166
168
|
funder_identifier = parse_attributes(fr["funderIdentifier"])
|
|
167
|
-
|
|
168
|
-
|
|
169
|
+
funder_identifier_type = parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType")
|
|
170
|
+
if funder_identifier_type != "Other"
|
|
171
|
+
funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
|
|
172
|
+
end
|
|
173
|
+
|
|
169
174
|
{
|
|
170
175
|
"funderName" => fr["funderName"],
|
|
171
176
|
"funderIdentifier" => funder_identifier,
|
|
172
|
-
"funderIdentifierType" =>
|
|
177
|
+
"funderIdentifierType" => funder_identifier_type,
|
|
173
178
|
"awardNumber" => parse_attributes(fr["awardNumber"]),
|
|
174
179
|
"awardUri" => parse_attributes(fr["awardNumber"], content: "awardURI"),
|
|
175
180
|
"awardTitle" => fr["awardTitle"] }.compact
|
|
@@ -181,7 +186,7 @@ module Bolognese
|
|
|
181
186
|
rid = ri["__content__"]
|
|
182
187
|
end
|
|
183
188
|
|
|
184
|
-
{
|
|
189
|
+
{
|
|
185
190
|
"relatedIdentifier" => rid,
|
|
186
191
|
"relatedIdentifierType" => ri["relatedIdentifierType"],
|
|
187
192
|
"relationType" => ri["relationType"],
|
|
@@ -230,7 +235,7 @@ module Bolognese
|
|
|
230
235
|
"publication_year" => parse_attributes(meta.fetch("publicationYear", nil), first: true).to_s.strip.presence,
|
|
231
236
|
"descriptions" => descriptions,
|
|
232
237
|
"rights_list" => Array.wrap(rights_list),
|
|
233
|
-
"
|
|
238
|
+
"version" => meta.fetch("version", nil).to_s.presence,
|
|
234
239
|
"subjects" => subjects,
|
|
235
240
|
"language" => parse_attributes(meta.fetch("language", nil), first: true).to_s.strip.presence,
|
|
236
241
|
"geo_locations" => geo_locations,
|
|
@@ -105,7 +105,7 @@ module Bolognese
|
|
|
105
105
|
#"publication_year" => publication_year,
|
|
106
106
|
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
|
|
107
107
|
"rights_list" => rights_list,
|
|
108
|
-
"
|
|
108
|
+
"version" => meta.fetch("version", nil),
|
|
109
109
|
"subjects" => subjects
|
|
110
110
|
#"state" => state
|
|
111
111
|
}.merge(read_options)
|
|
@@ -82,8 +82,10 @@ module Bolognese
|
|
|
82
82
|
nil
|
|
83
83
|
end
|
|
84
84
|
state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
|
|
85
|
-
subjects = Array.wrap(meta.fetch("KW", nil)).
|
|
86
|
-
|
|
85
|
+
subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
|
|
86
|
+
sum += name_to_fos(subject)
|
|
87
|
+
|
|
88
|
+
sum
|
|
87
89
|
end
|
|
88
90
|
|
|
89
91
|
{ "id" => id,
|
|
@@ -150,8 +150,13 @@ module Bolognese
|
|
|
150
150
|
"geoLocationBox" => geo_location_box
|
|
151
151
|
}.compact
|
|
152
152
|
end
|
|
153
|
-
|
|
154
|
-
|
|
153
|
+
|
|
154
|
+
# handle keywords as array and as comma-separated string
|
|
155
|
+
subjects = meta.fetch("keywords", nil)
|
|
156
|
+
subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
|
|
157
|
+
subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
|
|
158
|
+
sum += name_to_fos(subject)
|
|
159
|
+
sum
|
|
155
160
|
end
|
|
156
161
|
|
|
157
162
|
{ "id" => id,
|
|
@@ -173,7 +178,7 @@ module Bolognese
|
|
|
173
178
|
"dates" => dates,
|
|
174
179
|
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
|
|
175
180
|
"rights_list" => rights_list,
|
|
176
|
-
"
|
|
181
|
+
"version" => meta.fetch("version", nil).to_s.presence,
|
|
177
182
|
"subjects" => subjects,
|
|
178
183
|
"state" => state,
|
|
179
184
|
"schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
|
data/lib/bolognese/utils.rb
CHANGED
|
@@ -974,6 +974,14 @@ module Bolognese
|
|
|
974
974
|
nil
|
|
975
975
|
end
|
|
976
976
|
|
|
977
|
+
# iso8601 datetime without hyphens and colons, used by Crossref
|
|
978
|
+
# return nil if invalid
|
|
979
|
+
def get_datetime_from_time(time)
|
|
980
|
+
DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
981
|
+
rescue ArgumentError
|
|
982
|
+
nil
|
|
983
|
+
end
|
|
984
|
+
|
|
977
985
|
def get_date(dates, date_type)
|
|
978
986
|
dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
|
|
979
987
|
dd.fetch("date", nil)
|
|
@@ -1048,5 +1056,100 @@ module Bolognese
|
|
|
1048
1056
|
error_array
|
|
1049
1057
|
end
|
|
1050
1058
|
|
|
1059
|
+
def name_to_fos(name)
|
|
1060
|
+
# first find subject in Fields of Science (OECD)
|
|
1061
|
+
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
|
|
1062
|
+
|
|
1063
|
+
subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
|
|
1064
|
+
|
|
1065
|
+
if subject
|
|
1066
|
+
return [{
|
|
1067
|
+
"subject" => sanitize(name) },
|
|
1068
|
+
{
|
|
1069
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
|
1070
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1071
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1072
|
+
}]
|
|
1073
|
+
end
|
|
1074
|
+
|
|
1075
|
+
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
|
1076
|
+
# and map to Fields of Science. Add an extra entry for the latter
|
|
1077
|
+
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
|
|
1078
|
+
for_fields = fores.fetch("forFields")
|
|
1079
|
+
for_disciplines = fores.fetch("forDisciplines")
|
|
1080
|
+
|
|
1081
|
+
subject = for_fields.find { |l| l["forLabel"] == name } ||
|
|
1082
|
+
for_disciplines.find { |l| l["forLabel"] == name }
|
|
1083
|
+
|
|
1084
|
+
if subject
|
|
1085
|
+
[{
|
|
1086
|
+
"subject" => sanitize(name) },
|
|
1087
|
+
{
|
|
1088
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
|
1089
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1090
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1091
|
+
}]
|
|
1092
|
+
else
|
|
1093
|
+
[{ "subject" => sanitize(name) }]
|
|
1094
|
+
end
|
|
1095
|
+
end
|
|
1096
|
+
|
|
1097
|
+
def hsh_to_fos(hsh)
|
|
1098
|
+
# first find subject in Fields of Science (OECD)
|
|
1099
|
+
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
|
|
1100
|
+
subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] }
|
|
1101
|
+
|
|
1102
|
+
if subject
|
|
1103
|
+
return [{
|
|
1104
|
+
"subject" => sanitize(hsh["__content__"]),
|
|
1105
|
+
"subjectScheme" => hsh["subjectScheme"],
|
|
1106
|
+
"schemeUri" => hsh["schemeURI"],
|
|
1107
|
+
"valueUri" => hsh["valueURI"],
|
|
1108
|
+
"lang" => hsh["lang"] }.compact,
|
|
1109
|
+
{
|
|
1110
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
|
1111
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1112
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
|
|
1113
|
+
end
|
|
1114
|
+
|
|
1115
|
+
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
|
1116
|
+
# and map to Fields of Science. Add an extra entry for the latter
|
|
1117
|
+
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
|
|
1118
|
+
for_fields = fores.fetch("forFields")
|
|
1119
|
+
for_disciplines = fores.fetch("forDisciplines")
|
|
1120
|
+
|
|
1121
|
+
# try to extract forId
|
|
1122
|
+
if hsh["subjectScheme"] == "FOR"
|
|
1123
|
+
for_id = hsh["__content__"].split(" ").first
|
|
1124
|
+
for_id = for_id.rjust(6, "0")
|
|
1125
|
+
|
|
1126
|
+
subject = for_fields.find { |l| l["forId"] == for_id } ||
|
|
1127
|
+
for_disciplines.find { |l| l["forId"] == for_id[0..3] }
|
|
1128
|
+
else
|
|
1129
|
+
subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] } ||
|
|
1130
|
+
for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] }
|
|
1131
|
+
end
|
|
1132
|
+
|
|
1133
|
+
if subject
|
|
1134
|
+
[{
|
|
1135
|
+
"subject" => sanitize(hsh["__content__"]),
|
|
1136
|
+
"subjectScheme" => hsh["subjectScheme"],
|
|
1137
|
+
"schemeUri" => hsh["schemeURI"],
|
|
1138
|
+
"valueUri" => hsh["valueURI"],
|
|
1139
|
+
"lang" => hsh["lang"] }.compact,
|
|
1140
|
+
{
|
|
1141
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
|
1142
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
|
1143
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
|
1144
|
+
}]
|
|
1145
|
+
else
|
|
1146
|
+
[{
|
|
1147
|
+
"subject" => sanitize(hsh["__content__"]),
|
|
1148
|
+
"subjectScheme" => hsh["subjectScheme"],
|
|
1149
|
+
"schemeUri" => hsh["schemeURI"],
|
|
1150
|
+
"valueUri" => hsh["valueURI"],
|
|
1151
|
+
"lang" => hsh["lang"] }.compact]
|
|
1152
|
+
end
|
|
1153
|
+
end
|
|
1051
1154
|
end
|
|
1052
1155
|
end
|