bolognese 1.5.16 → 1.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile.lock +17 -14
- data/lib/bolognese/datacite_utils.rb +22 -4
- data/lib/bolognese/metadata.rb +8 -5
- data/lib/bolognese/metadata_utils.rb +2 -2
- data/lib/bolognese/readers/citeproc_reader.rb +5 -3
- data/lib/bolognese/readers/codemeta_reader.rb +5 -3
- data/lib/bolognese/readers/crossref_reader.rb +3 -6
- data/lib/bolognese/readers/datacite_reader.rb +19 -14
- data/lib/bolognese/readers/npm_reader.rb +1 -1
- data/lib/bolognese/readers/ris_reader.rb +4 -2
- data/lib/bolognese/readers/schema_org_reader.rb +8 -3
- data/lib/bolognese/utils.rb +103 -0
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/codemeta_writer.rb +1 -1
- data/lib/bolognese/writers/jats_writer.rb +2 -2
- data/lib/bolognese/writers/schema_org_writer.rb +1 -1
- data/resources/oecd/for-mappings.json +1101 -0
- data/resources/oecd/fos-mappings.json +198 -0
- data/spec/datacite_utils_spec.rb +27 -7
- data/spec/fixtures/datacite-funderIdentifier.xml +4 -0
- data/spec/fixtures/datacite_software_version.json +74 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_for_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_no_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_for_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_no_match.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/invalid_date.yml +94 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/even_more_subject_scheme_FOR.yml +97 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/more_subject_scheme_FOR.yml +107 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/subject_scheme_FOR.yml +110 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/future.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/invalid.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/nil.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/past.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/present.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_timestamp/present.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_dates/insert.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_formats/insert.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_person/creator_given_and_family_name.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_sizes/insert.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_citeproc/software.yml +9 -9
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_citeproc/software_w/version.yml +52 -0
- data/spec/readers/codemeta_reader_spec.rb +1 -1
- data/spec/readers/crossref_reader_spec.rb +20 -0
- data/spec/readers/datacite_json_reader_spec.rb +1 -1
- data/spec/readers/datacite_reader_spec.rb +102 -2
- data/spec/readers/npm_reader_spec.rb +3 -3
- data/spec/readers/schema_org_reader_spec.rb +17 -3
- data/spec/utils_spec.rb +82 -0
- data/spec/writers/citeproc_writer_spec.rb +18 -0
- data/spec/writers/datacite_writer_spec.rb +9 -0
- data/spec/writers/schema_org_writer_spec.rb +2 -0
- metadata +26 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49f153c7bf6a7dd69a2cf35844b9fe303574ebeb6f515a00581cea60955dc6d6
|
4
|
+
data.tar.gz: ebc11d65b789fb08e97dafa25c2e9e0b2ddfb6085011478498f2e619ebdd605c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d30e6e90b55e93bf4febdef8cf5fe3aa17e1821e2ee317f521957cdee0e811f8f94ed07f929b974ea967a8e5e4b7f0c8e7c08f47bc9bee681e72702f263ac80e
|
7
|
+
data.tar.gz: e30113a9e7ceddb8772a8e26310c7bc9ecdb349c93a33596c4d2ff0167c12650631e7dee7860c32a2e4dd8323453ed6acd7315c292060aef5a03954fbca61682
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
bolognese (1.
|
4
|
+
bolognese (1.6.2)
|
5
5
|
activesupport (>= 4.2.5)
|
6
6
|
benchmark_methods (~> 0.7)
|
7
7
|
bibtex-ruby (>= 5.1.0)
|
@@ -30,12 +30,12 @@ PATH
|
|
30
30
|
GEM
|
31
31
|
remote: https://rubygems.org/
|
32
32
|
specs:
|
33
|
-
activesupport (6.0.
|
33
|
+
activesupport (6.0.3.1)
|
34
34
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
35
35
|
i18n (>= 0.7, < 2)
|
36
36
|
minitest (~> 5.1)
|
37
37
|
tzinfo (~> 1.1)
|
38
|
-
zeitwerk (~> 2.2)
|
38
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
39
39
|
addressable (2.7.0)
|
40
40
|
public_suffix (>= 2.0.2, < 5.0)
|
41
41
|
benchmark_methods (0.7)
|
@@ -105,7 +105,7 @@ GEM
|
|
105
105
|
oj (>= 2.8.3)
|
106
106
|
oj_mimic_json (~> 1.0, >= 1.0.1)
|
107
107
|
mini_portile2 (2.4.0)
|
108
|
-
minitest (5.14.
|
108
|
+
minitest (5.14.1)
|
109
109
|
multi_json (1.14.1)
|
110
110
|
multipart-post (2.1.1)
|
111
111
|
namae (1.0.1)
|
@@ -113,7 +113,7 @@ GEM
|
|
113
113
|
mini_portile2 (~> 2.4.0)
|
114
114
|
oj (3.10.6)
|
115
115
|
oj_mimic_json (1.0.1)
|
116
|
-
optimist (3.0.
|
116
|
+
optimist (3.0.1)
|
117
117
|
postrank-uri (1.0.24)
|
118
118
|
addressable (>= 2.4.0)
|
119
119
|
nokogiri (>= 1.8.0)
|
@@ -123,40 +123,43 @@ GEM
|
|
123
123
|
rack-test (0.8.3)
|
124
124
|
rack (>= 1.0, < 3)
|
125
125
|
rake (12.3.3)
|
126
|
-
rdf (3.1.
|
126
|
+
rdf (3.1.2)
|
127
127
|
hamster (~> 3.0)
|
128
128
|
link_header (~> 0.0, >= 0.0.8)
|
129
129
|
rdf-aggregate-repo (3.1.0)
|
130
130
|
rdf (~> 3.1)
|
131
|
-
rdf-rdfa (3.1.
|
131
|
+
rdf-rdfa (3.1.1)
|
132
132
|
haml (~> 5.1)
|
133
133
|
htmlentities (~> 4.3)
|
134
|
-
rdf (~> 3.1)
|
134
|
+
rdf (~> 3.1, >= 3.1.2)
|
135
135
|
rdf-aggregate-repo (~> 3.1)
|
136
|
+
rdf-vocab (~> 3.1, >= 3.1.5)
|
136
137
|
rdf-xsd (~> 3.1)
|
137
138
|
rdf-rdfxml (3.1.0)
|
138
139
|
htmlentities (~> 4.3)
|
139
140
|
rdf (~> 3.1)
|
140
141
|
rdf-rdfa (~> 3.1)
|
141
142
|
rdf-xsd (~> 3.1)
|
142
|
-
rdf-turtle (3.1.
|
143
|
+
rdf-turtle (3.1.1)
|
143
144
|
ebnf (~> 1.2)
|
144
|
-
rdf (~> 3.1)
|
145
|
+
rdf (~> 3.1, >= 3.1.2)
|
146
|
+
rdf-vocab (3.1.5)
|
147
|
+
rdf (~> 3.1, >= 3.1.2)
|
145
148
|
rdf-xsd (3.1.0)
|
146
149
|
rdf (~> 3.1)
|
147
150
|
rspec (3.9.0)
|
148
151
|
rspec-core (~> 3.9.0)
|
149
152
|
rspec-expectations (~> 3.9.0)
|
150
153
|
rspec-mocks (~> 3.9.0)
|
151
|
-
rspec-core (3.9.
|
152
|
-
rspec-support (~> 3.9.
|
153
|
-
rspec-expectations (3.9.
|
154
|
+
rspec-core (3.9.2)
|
155
|
+
rspec-support (~> 3.9.3)
|
156
|
+
rspec-expectations (3.9.2)
|
154
157
|
diff-lcs (>= 1.2.0, < 2.0)
|
155
158
|
rspec-support (~> 3.9.0)
|
156
159
|
rspec-mocks (3.9.1)
|
157
160
|
diff-lcs (>= 1.2.0, < 2.0)
|
158
161
|
rspec-support (~> 3.9.0)
|
159
|
-
rspec-support (3.9.
|
162
|
+
rspec-support (3.9.3)
|
160
163
|
rspec-xsd (0.1.0)
|
161
164
|
nokogiri (~> 1.6)
|
162
165
|
rspec (~> 3)
|
@@ -28,12 +28,14 @@ module Bolognese
|
|
28
28
|
insert_publisher(xml)
|
29
29
|
insert_publication_year(xml)
|
30
30
|
insert_resource_type(xml)
|
31
|
-
insert_alternate_identifiers(xml)
|
32
31
|
insert_subjects(xml)
|
33
|
-
insert_language(xml)
|
34
32
|
insert_contributors(xml)
|
35
33
|
insert_dates(xml)
|
34
|
+
insert_language(xml)
|
35
|
+
insert_alternate_identifiers(xml)
|
36
36
|
insert_related_identifiers(xml)
|
37
|
+
insert_sizes(xml)
|
38
|
+
insert_formats(xml)
|
37
39
|
insert_version(xml)
|
38
40
|
insert_rights_list(xml)
|
39
41
|
insert_descriptions(xml)
|
@@ -170,9 +172,9 @@ module Bolognese
|
|
170
172
|
end
|
171
173
|
|
172
174
|
def insert_version(xml)
|
173
|
-
return xml unless
|
175
|
+
return xml unless version.present?
|
174
176
|
|
175
|
-
xml.version(
|
177
|
+
xml.version(version)
|
176
178
|
end
|
177
179
|
|
178
180
|
|
@@ -201,6 +203,22 @@ module Bolognese
|
|
201
203
|
end
|
202
204
|
end
|
203
205
|
|
206
|
+
def insert_sizes(xml)
|
207
|
+
xml.sizes do
|
208
|
+
Array.wrap(sizes).each do |s|
|
209
|
+
xml.size(s)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def insert_formats(xml)
|
215
|
+
xml.formats do
|
216
|
+
Array.wrap(formats).each do |f|
|
217
|
+
xml.format(f)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
204
222
|
def insert_rights_list(xml)
|
205
223
|
return xml unless rights_list.present?
|
206
224
|
|
data/lib/bolognese/metadata.rb
CHANGED
@@ -9,7 +9,7 @@ module Bolognese
|
|
9
9
|
attr_accessor :string, :from, :sandbox, :meta, :regenerate, :issue, :show_errors
|
10
10
|
attr_reader :doc, :page_start, :page_end
|
11
11
|
attr_writer :id, :provider_id, :client_id, :doi, :identifiers, :creators, :contributors, :titles, :publisher,
|
12
|
-
:rights_list, :dates, :publication_year, :volume, :url, :
|
12
|
+
:rights_list, :dates, :publication_year, :volume, :url, :version,
|
13
13
|
:subjects, :contributor, :descriptions, :language, :sizes,
|
14
14
|
:formats, :schema_version, :meta, :container, :agency,
|
15
15
|
:format, :funding_references, :state, :geo_locations,
|
@@ -36,6 +36,7 @@ module Bolognese
|
|
36
36
|
# generate name for method to call dynamically
|
37
37
|
hsh = @from.present? ? send("get_" + @from, id: id, sandbox: options[:sandbox]) : {}
|
38
38
|
string = hsh.fetch("string", nil)
|
39
|
+
|
39
40
|
elsif input.present? && File.exist?(input)
|
40
41
|
filename = File.basename(input)
|
41
42
|
ext = File.extname(input)
|
@@ -105,7 +106,7 @@ module Bolognese
|
|
105
106
|
:publication_year,
|
106
107
|
:descriptions,
|
107
108
|
:rights_list,
|
108
|
-
:
|
109
|
+
:version,
|
109
110
|
:subjects,
|
110
111
|
:language,
|
111
112
|
:geo_locations,
|
@@ -115,7 +116,6 @@ module Bolognese
|
|
115
116
|
).compact
|
116
117
|
|
117
118
|
@regenerate = options[:regenerate] || read_options.present?
|
118
|
-
|
119
119
|
# generate name for method to call dynamically
|
120
120
|
@meta = @from.present? ? send("read_" + @from, { string: string, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)) : {}
|
121
121
|
end
|
@@ -189,10 +189,13 @@ module Bolognese
|
|
189
189
|
@url ||= meta.fetch("url", nil)
|
190
190
|
end
|
191
191
|
|
192
|
-
def
|
193
|
-
@
|
192
|
+
def version
|
193
|
+
@version ||= meta.fetch("version", nil)
|
194
194
|
end
|
195
195
|
|
196
|
+
# for backwards compatibility
|
197
|
+
alias_attribute :version_info, :version
|
198
|
+
|
196
199
|
def publication_year
|
197
200
|
@publication_year ||= meta.fetch("publication_year", nil)
|
198
201
|
end
|
@@ -158,7 +158,7 @@ module Bolognese
|
|
158
158
|
"publisher" => publisher,
|
159
159
|
"title" => parse_attributes(titles, content: "title", first: true),
|
160
160
|
"URL" => url,
|
161
|
-
"version" =>
|
161
|
+
"version" => version
|
162
162
|
}.compact.symbolize_keys
|
163
163
|
end
|
164
164
|
|
@@ -180,7 +180,7 @@ module Bolognese
|
|
180
180
|
"identifiers" => identifiers,
|
181
181
|
"sizes" => sizes,
|
182
182
|
"formats" => formats,
|
183
|
-
"version" =>
|
183
|
+
"version" => version,
|
184
184
|
"rights_list" => rights_list,
|
185
185
|
"descriptions" => descriptions,
|
186
186
|
"geo_locations" => geo_locations,
|
@@ -100,8 +100,10 @@ module Bolognese
|
|
100
100
|
doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
101
101
|
|
102
102
|
state = id.present? || read_options.present? ? "findable" : "not_found"
|
103
|
-
subjects = Array.wrap(meta.fetch("categories", nil)).
|
104
|
-
|
103
|
+
subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
|
104
|
+
sum += name_to_fos(subject)
|
105
|
+
|
106
|
+
sum
|
105
107
|
end
|
106
108
|
|
107
109
|
{ "id" => id,
|
@@ -119,7 +121,7 @@ module Bolognese
|
|
119
121
|
"publication_year" => publication_year,
|
120
122
|
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
|
121
123
|
"rights_list" => rights_list,
|
122
|
-
"
|
124
|
+
"version" => meta.fetch("version", nil),
|
123
125
|
"subjects" => subjects,
|
124
126
|
"state" => state
|
125
127
|
}.merge(read_options)
|
@@ -57,8 +57,10 @@ module Bolognese
|
|
57
57
|
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
58
58
|
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
|
59
59
|
}.compact
|
60
|
-
subjects = Array.wrap(meta.fetch("tags", nil)).
|
61
|
-
|
60
|
+
subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
|
61
|
+
sum += name_to_fos(subject)
|
62
|
+
|
63
|
+
sum
|
62
64
|
end
|
63
65
|
|
64
66
|
has_title = meta.fetch("title", nil)
|
@@ -79,7 +81,7 @@ module Bolognese
|
|
79
81
|
"publication_year" => publication_year,
|
80
82
|
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
|
81
83
|
"rights_list" => [{ "rightsUri" => meta.fetch("license", nil) }.compact],
|
82
|
-
"
|
84
|
+
"version" => meta.fetch("version", nil),
|
83
85
|
"subjects" => subjects,
|
84
86
|
"state" => state
|
85
87
|
}.merge(read_options)
|
@@ -117,11 +117,8 @@ module Bolognese
|
|
117
117
|
date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
|
118
118
|
|
119
119
|
date_registered = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "deposit-timestamp" }
|
120
|
-
|
121
|
-
|
122
|
-
else
|
123
|
-
date_registered = nil
|
124
|
-
end
|
120
|
+
date_registered = get_datetime_from_time(date_registered.fetch("__content__", nil)) if date_registered.present?
|
121
|
+
|
125
122
|
# check that date is valid iso8601 date
|
126
123
|
date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
|
127
124
|
date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
|
@@ -175,7 +172,7 @@ module Bolognese
|
|
175
172
|
"publication_year" => publication_year,
|
176
173
|
"descriptions" => crossref_description(bibliographic_metadata),
|
177
174
|
"rights_list" => crossref_license(program_metadata),
|
178
|
-
"
|
175
|
+
"version" => nil,
|
179
176
|
"subjects" => nil,
|
180
177
|
"language" => nil,
|
181
178
|
"sizes" => nil,
|
@@ -93,7 +93,7 @@ module Bolognese
|
|
93
93
|
"bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
94
94
|
"ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
|
95
95
|
}.compact
|
96
|
-
|
96
|
+
|
97
97
|
titles = Array.wrap(meta.dig("titles", "title")).map do |r|
|
98
98
|
if r.blank?
|
99
99
|
nil
|
@@ -122,15 +122,17 @@ module Bolognese
|
|
122
122
|
{ "rights" => r["__content__"], "rightsUri" => normalize_url(r["rightsURI"]), "lang" => r["lang"] }.compact
|
123
123
|
end
|
124
124
|
end.compact
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
{ "subject" => sanitize(k["__content__"]), "subjectScheme" => k["subjectScheme"], "schemeUri" => k["schemeURI"], "valueUri" => k["valueURI"], "lang" => k["lang"] }.compact
|
125
|
+
|
126
|
+
subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
|
127
|
+
if subject.is_a?(String)
|
128
|
+
sum += name_to_fos(subject)
|
129
|
+
elsif subject.is_a?(Hash)
|
130
|
+
sum += hsh_to_fos(subject)
|
132
131
|
end
|
133
|
-
|
132
|
+
|
133
|
+
sum
|
134
|
+
end.uniq
|
135
|
+
|
134
136
|
dates = Array.wrap(meta.dig("dates", "date")).map do |r|
|
135
137
|
if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
|
136
138
|
if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
|
@@ -164,12 +166,15 @@ module Bolognese
|
|
164
166
|
funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
|
165
167
|
scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
|
166
168
|
funder_identifier = parse_attributes(fr["funderIdentifier"])
|
167
|
-
|
168
|
-
|
169
|
+
funder_identifier_type = parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType")
|
170
|
+
if funder_identifier_type != "Other"
|
171
|
+
funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
|
172
|
+
end
|
173
|
+
|
169
174
|
{
|
170
175
|
"funderName" => fr["funderName"],
|
171
176
|
"funderIdentifier" => funder_identifier,
|
172
|
-
"funderIdentifierType" =>
|
177
|
+
"funderIdentifierType" => funder_identifier_type,
|
173
178
|
"awardNumber" => parse_attributes(fr["awardNumber"]),
|
174
179
|
"awardUri" => parse_attributes(fr["awardNumber"], content: "awardURI"),
|
175
180
|
"awardTitle" => fr["awardTitle"] }.compact
|
@@ -181,7 +186,7 @@ module Bolognese
|
|
181
186
|
rid = ri["__content__"]
|
182
187
|
end
|
183
188
|
|
184
|
-
{
|
189
|
+
{
|
185
190
|
"relatedIdentifier" => rid,
|
186
191
|
"relatedIdentifierType" => ri["relatedIdentifierType"],
|
187
192
|
"relationType" => ri["relationType"],
|
@@ -230,7 +235,7 @@ module Bolognese
|
|
230
235
|
"publication_year" => parse_attributes(meta.fetch("publicationYear", nil), first: true).to_s.strip.presence,
|
231
236
|
"descriptions" => descriptions,
|
232
237
|
"rights_list" => Array.wrap(rights_list),
|
233
|
-
"
|
238
|
+
"version" => meta.fetch("version", nil).to_s.presence,
|
234
239
|
"subjects" => subjects,
|
235
240
|
"language" => parse_attributes(meta.fetch("language", nil), first: true).to_s.strip.presence,
|
236
241
|
"geo_locations" => geo_locations,
|
@@ -105,7 +105,7 @@ module Bolognese
|
|
105
105
|
#"publication_year" => publication_year,
|
106
106
|
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
|
107
107
|
"rights_list" => rights_list,
|
108
|
-
"
|
108
|
+
"version" => meta.fetch("version", nil),
|
109
109
|
"subjects" => subjects
|
110
110
|
#"state" => state
|
111
111
|
}.merge(read_options)
|
@@ -82,8 +82,10 @@ module Bolognese
|
|
82
82
|
nil
|
83
83
|
end
|
84
84
|
state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
|
85
|
-
subjects = Array.wrap(meta.fetch("KW", nil)).
|
86
|
-
|
85
|
+
subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
|
86
|
+
sum += name_to_fos(subject)
|
87
|
+
|
88
|
+
sum
|
87
89
|
end
|
88
90
|
|
89
91
|
{ "id" => id,
|
@@ -150,8 +150,13 @@ module Bolognese
|
|
150
150
|
"geoLocationBox" => geo_location_box
|
151
151
|
}.compact
|
152
152
|
end
|
153
|
-
|
154
|
-
|
153
|
+
|
154
|
+
# handle keywords as array and as comma-separated string
|
155
|
+
subjects = meta.fetch("keywords", nil)
|
156
|
+
subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
|
157
|
+
subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
|
158
|
+
sum += name_to_fos(subject)
|
159
|
+
sum
|
155
160
|
end
|
156
161
|
|
157
162
|
{ "id" => id,
|
@@ -173,7 +178,7 @@ module Bolognese
|
|
173
178
|
"dates" => dates,
|
174
179
|
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
|
175
180
|
"rights_list" => rights_list,
|
176
|
-
"
|
181
|
+
"version" => meta.fetch("version", nil).to_s.presence,
|
177
182
|
"subjects" => subjects,
|
178
183
|
"state" => state,
|
179
184
|
"schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
|
data/lib/bolognese/utils.rb
CHANGED
@@ -974,6 +974,14 @@ module Bolognese
|
|
974
974
|
nil
|
975
975
|
end
|
976
976
|
|
977
|
+
# iso8601 datetime without hyphens and colons, used by Crossref
|
978
|
+
# return nil if invalid
|
979
|
+
def get_datetime_from_time(time)
|
980
|
+
DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime('%Y-%m-%dT%H:%M:%SZ')
|
981
|
+
rescue ArgumentError
|
982
|
+
nil
|
983
|
+
end
|
984
|
+
|
977
985
|
def get_date(dates, date_type)
|
978
986
|
dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
|
979
987
|
dd.fetch("date", nil)
|
@@ -1048,5 +1056,100 @@ module Bolognese
|
|
1048
1056
|
error_array
|
1049
1057
|
end
|
1050
1058
|
|
1059
|
+
def name_to_fos(name)
|
1060
|
+
# first find subject in Fields of Science (OECD)
|
1061
|
+
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
|
1062
|
+
|
1063
|
+
subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
|
1064
|
+
|
1065
|
+
if subject
|
1066
|
+
return [{
|
1067
|
+
"subject" => sanitize(name) },
|
1068
|
+
{
|
1069
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1070
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1071
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
1072
|
+
}]
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
1076
|
+
# and map to Fields of Science. Add an extra entry for the latter
|
1077
|
+
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
|
1078
|
+
for_fields = fores.fetch("forFields")
|
1079
|
+
for_disciplines = fores.fetch("forDisciplines")
|
1080
|
+
|
1081
|
+
subject = for_fields.find { |l| l["forLabel"] == name } ||
|
1082
|
+
for_disciplines.find { |l| l["forLabel"] == name }
|
1083
|
+
|
1084
|
+
if subject
|
1085
|
+
[{
|
1086
|
+
"subject" => sanitize(name) },
|
1087
|
+
{
|
1088
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1089
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1090
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
1091
|
+
}]
|
1092
|
+
else
|
1093
|
+
[{ "subject" => sanitize(name) }]
|
1094
|
+
end
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
def hsh_to_fos(hsh)
|
1098
|
+
# first find subject in Fields of Science (OECD)
|
1099
|
+
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
|
1100
|
+
subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] }
|
1101
|
+
|
1102
|
+
if subject
|
1103
|
+
return [{
|
1104
|
+
"subject" => sanitize(hsh["__content__"]),
|
1105
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1106
|
+
"schemeUri" => hsh["schemeURI"],
|
1107
|
+
"valueUri" => hsh["valueURI"],
|
1108
|
+
"lang" => hsh["lang"] }.compact,
|
1109
|
+
{
|
1110
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1111
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1112
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
|
1113
|
+
end
|
1114
|
+
|
1115
|
+
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
1116
|
+
# and map to Fields of Science. Add an extra entry for the latter
|
1117
|
+
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
|
1118
|
+
for_fields = fores.fetch("forFields")
|
1119
|
+
for_disciplines = fores.fetch("forDisciplines")
|
1120
|
+
|
1121
|
+
# try to extract forId
|
1122
|
+
if hsh["subjectScheme"] == "FOR"
|
1123
|
+
for_id = hsh["__content__"].split(" ").first
|
1124
|
+
for_id = for_id.rjust(6, "0")
|
1125
|
+
|
1126
|
+
subject = for_fields.find { |l| l["forId"] == for_id } ||
|
1127
|
+
for_disciplines.find { |l| l["forId"] == for_id[0..3] }
|
1128
|
+
else
|
1129
|
+
subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] } ||
|
1130
|
+
for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] }
|
1131
|
+
end
|
1132
|
+
|
1133
|
+
if subject
|
1134
|
+
[{
|
1135
|
+
"subject" => sanitize(hsh["__content__"]),
|
1136
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1137
|
+
"schemeUri" => hsh["schemeURI"],
|
1138
|
+
"valueUri" => hsh["valueURI"],
|
1139
|
+
"lang" => hsh["lang"] }.compact,
|
1140
|
+
{
|
1141
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1142
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1143
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
|
1144
|
+
}]
|
1145
|
+
else
|
1146
|
+
[{
|
1147
|
+
"subject" => sanitize(hsh["__content__"]),
|
1148
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1149
|
+
"schemeUri" => hsh["schemeURI"],
|
1150
|
+
"valueUri" => hsh["valueURI"],
|
1151
|
+
"lang" => hsh["lang"] }.compact]
|
1152
|
+
end
|
1153
|
+
end
|
1051
1154
|
end
|
1052
1155
|
end
|