bolognese 1.5.16 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/Gemfile.lock +17 -14
  4. data/lib/bolognese/datacite_utils.rb +22 -4
  5. data/lib/bolognese/metadata.rb +8 -5
  6. data/lib/bolognese/metadata_utils.rb +2 -2
  7. data/lib/bolognese/readers/citeproc_reader.rb +5 -3
  8. data/lib/bolognese/readers/codemeta_reader.rb +5 -3
  9. data/lib/bolognese/readers/crossref_reader.rb +3 -6
  10. data/lib/bolognese/readers/datacite_reader.rb +19 -14
  11. data/lib/bolognese/readers/npm_reader.rb +1 -1
  12. data/lib/bolognese/readers/ris_reader.rb +4 -2
  13. data/lib/bolognese/readers/schema_org_reader.rb +8 -3
  14. data/lib/bolognese/utils.rb +103 -0
  15. data/lib/bolognese/version.rb +1 -1
  16. data/lib/bolognese/writers/codemeta_writer.rb +1 -1
  17. data/lib/bolognese/writers/jats_writer.rb +2 -2
  18. data/lib/bolognese/writers/schema_org_writer.rb +1 -1
  19. data/resources/oecd/for-mappings.json +1101 -0
  20. data/resources/oecd/fos-mappings.json +198 -0
  21. data/spec/datacite_utils_spec.rb +27 -7
  22. data/spec/fixtures/datacite-funderIdentifier.xml +4 -0
  23. data/spec/fixtures/datacite_software_version.json +74 -0
  24. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_for_match.yml +44 -0
  25. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_match.yml +44 -0
  26. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_no_match.yml +44 -0
  27. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_for_match.yml +44 -0
  28. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_match.yml +44 -0
  29. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_no_match.yml +44 -0
  30. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/invalid_date.yml +94 -0
  31. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/even_more_subject_scheme_FOR.yml +97 -0
  32. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/more_subject_scheme_FOR.yml +107 -0
  33. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/subject_scheme_FOR.yml +110 -0
  34. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/future.yml +44 -0
  35. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/invalid.yml +44 -0
  36. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/nil.yml +44 -0
  37. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/past.yml +44 -0
  38. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_time/present.yml +44 -0
  39. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datetime_from_timestamp/present.yml +44 -0
  40. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_dates/insert.yml +49 -0
  41. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_formats/insert.yml +49 -0
  42. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_person/creator_given_and_family_name.yml +49 -0
  43. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_sizes/insert.yml +49 -0
  44. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_citeproc/software.yml +9 -9
  45. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_citeproc/software_w/version.yml +52 -0
  46. data/spec/readers/codemeta_reader_spec.rb +1 -1
  47. data/spec/readers/crossref_reader_spec.rb +20 -0
  48. data/spec/readers/datacite_json_reader_spec.rb +1 -1
  49. data/spec/readers/datacite_reader_spec.rb +102 -2
  50. data/spec/readers/npm_reader_spec.rb +3 -3
  51. data/spec/readers/schema_org_reader_spec.rb +17 -3
  52. data/spec/utils_spec.rb +82 -0
  53. data/spec/writers/citeproc_writer_spec.rb +18 -0
  54. data/spec/writers/datacite_writer_spec.rb +9 -0
  55. data/spec/writers/schema_org_writer_spec.rb +2 -0
  56. metadata +26 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e25803b02c683fa2d39bce713e1f1db91bb5231e5f357a121aa718e920e03fe
4
- data.tar.gz: 614f8f6c57c4b5efff60553a8dbd65935c5d37680ec238945b5f8fdb94160905
3
+ metadata.gz: 49f153c7bf6a7dd69a2cf35844b9fe303574ebeb6f515a00581cea60955dc6d6
4
+ data.tar.gz: ebc11d65b789fb08e97dafa25c2e9e0b2ddfb6085011478498f2e619ebdd605c
5
5
  SHA512:
6
- metadata.gz: eae192302662c5d77e0b66aa4799349972baada6e941fe2a250d92fbaddb945e4547ff9a2409aae1acb35136f9b971c567ee9aba1f07e9f69c3b8a96f7b1a4cb
7
- data.tar.gz: ebe67deb2039574f9ba98489f1e8e990346fc15ae29418a3e40bb758db14ab4c72593acaff3fa0e131f253e75cbc7b9d6c93efc7c972b79db8a41fc60354b6f2
6
+ metadata.gz: d30e6e90b55e93bf4febdef8cf5fe3aa17e1821e2ee317f521957cdee0e811f8f94ed07f929b974ea967a8e5e4b7f0c8e7c08f47bc9bee681e72702f263ac80e
7
+ data.tar.gz: e30113a9e7ceddb8772a8e26310c7bc9ecdb349c93a33596c4d2ff0167c12650631e7dee7860c32a2e4dd8323453ed6acd7315c292060aef5a03954fbca61682
data/.gitignore CHANGED
@@ -55,3 +55,5 @@ coverage/
55
55
  .env.*
56
56
  !.env.example
57
57
  !.env.travis
58
+
59
+ .vscode
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (1.5.16)
4
+ bolognese (1.6.2)
5
5
  activesupport (>= 4.2.5)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (>= 5.1.0)
@@ -30,12 +30,12 @@ PATH
30
30
  GEM
31
31
  remote: https://rubygems.org/
32
32
  specs:
33
- activesupport (6.0.2.2)
33
+ activesupport (6.0.3.1)
34
34
  concurrent-ruby (~> 1.0, >= 1.0.2)
35
35
  i18n (>= 0.7, < 2)
36
36
  minitest (~> 5.1)
37
37
  tzinfo (~> 1.1)
38
- zeitwerk (~> 2.2)
38
+ zeitwerk (~> 2.2, >= 2.2.2)
39
39
  addressable (2.7.0)
40
40
  public_suffix (>= 2.0.2, < 5.0)
41
41
  benchmark_methods (0.7)
@@ -105,7 +105,7 @@ GEM
105
105
  oj (>= 2.8.3)
106
106
  oj_mimic_json (~> 1.0, >= 1.0.1)
107
107
  mini_portile2 (2.4.0)
108
- minitest (5.14.0)
108
+ minitest (5.14.1)
109
109
  multi_json (1.14.1)
110
110
  multipart-post (2.1.1)
111
111
  namae (1.0.1)
@@ -113,7 +113,7 @@ GEM
113
113
  mini_portile2 (~> 2.4.0)
114
114
  oj (3.10.6)
115
115
  oj_mimic_json (1.0.1)
116
- optimist (3.0.0)
116
+ optimist (3.0.1)
117
117
  postrank-uri (1.0.24)
118
118
  addressable (>= 2.4.0)
119
119
  nokogiri (>= 1.8.0)
@@ -123,40 +123,43 @@ GEM
123
123
  rack-test (0.8.3)
124
124
  rack (>= 1.0, < 3)
125
125
  rake (12.3.3)
126
- rdf (3.1.1)
126
+ rdf (3.1.2)
127
127
  hamster (~> 3.0)
128
128
  link_header (~> 0.0, >= 0.0.8)
129
129
  rdf-aggregate-repo (3.1.0)
130
130
  rdf (~> 3.1)
131
- rdf-rdfa (3.1.0)
131
+ rdf-rdfa (3.1.1)
132
132
  haml (~> 5.1)
133
133
  htmlentities (~> 4.3)
134
- rdf (~> 3.1)
134
+ rdf (~> 3.1, >= 3.1.2)
135
135
  rdf-aggregate-repo (~> 3.1)
136
+ rdf-vocab (~> 3.1, >= 3.1.5)
136
137
  rdf-xsd (~> 3.1)
137
138
  rdf-rdfxml (3.1.0)
138
139
  htmlentities (~> 4.3)
139
140
  rdf (~> 3.1)
140
141
  rdf-rdfa (~> 3.1)
141
142
  rdf-xsd (~> 3.1)
142
- rdf-turtle (3.1.0)
143
+ rdf-turtle (3.1.1)
143
144
  ebnf (~> 1.2)
144
- rdf (~> 3.1)
145
+ rdf (~> 3.1, >= 3.1.2)
146
+ rdf-vocab (3.1.5)
147
+ rdf (~> 3.1, >= 3.1.2)
145
148
  rdf-xsd (3.1.0)
146
149
  rdf (~> 3.1)
147
150
  rspec (3.9.0)
148
151
  rspec-core (~> 3.9.0)
149
152
  rspec-expectations (~> 3.9.0)
150
153
  rspec-mocks (~> 3.9.0)
151
- rspec-core (3.9.1)
152
- rspec-support (~> 3.9.1)
153
- rspec-expectations (3.9.1)
154
+ rspec-core (3.9.2)
155
+ rspec-support (~> 3.9.3)
156
+ rspec-expectations (3.9.2)
154
157
  diff-lcs (>= 1.2.0, < 2.0)
155
158
  rspec-support (~> 3.9.0)
156
159
  rspec-mocks (3.9.1)
157
160
  diff-lcs (>= 1.2.0, < 2.0)
158
161
  rspec-support (~> 3.9.0)
159
- rspec-support (3.9.2)
162
+ rspec-support (3.9.3)
160
163
  rspec-xsd (0.1.0)
161
164
  nokogiri (~> 1.6)
162
165
  rspec (~> 3)
@@ -28,12 +28,14 @@ module Bolognese
28
28
  insert_publisher(xml)
29
29
  insert_publication_year(xml)
30
30
  insert_resource_type(xml)
31
- insert_alternate_identifiers(xml)
32
31
  insert_subjects(xml)
33
- insert_language(xml)
34
32
  insert_contributors(xml)
35
33
  insert_dates(xml)
34
+ insert_language(xml)
35
+ insert_alternate_identifiers(xml)
36
36
  insert_related_identifiers(xml)
37
+ insert_sizes(xml)
38
+ insert_formats(xml)
37
39
  insert_version(xml)
38
40
  insert_rights_list(xml)
39
41
  insert_descriptions(xml)
@@ -170,9 +172,9 @@ module Bolognese
170
172
  end
171
173
 
172
174
  def insert_version(xml)
173
- return xml unless version_info.present?
175
+ return xml unless version.present?
174
176
 
175
- xml.version(version_info)
177
+ xml.version(version)
176
178
  end
177
179
 
178
180
 
@@ -201,6 +203,22 @@ module Bolognese
201
203
  end
202
204
  end
203
205
 
206
+ def insert_sizes(xml)
207
+ xml.sizes do
208
+ Array.wrap(sizes).each do |s|
209
+ xml.size(s)
210
+ end
211
+ end
212
+ end
213
+
214
+ def insert_formats(xml)
215
+ xml.formats do
216
+ Array.wrap(formats).each do |f|
217
+ xml.format(f)
218
+ end
219
+ end
220
+ end
221
+
204
222
  def insert_rights_list(xml)
205
223
  return xml unless rights_list.present?
206
224
 
@@ -9,7 +9,7 @@ module Bolognese
9
9
  attr_accessor :string, :from, :sandbox, :meta, :regenerate, :issue, :show_errors
10
10
  attr_reader :doc, :page_start, :page_end
11
11
  attr_writer :id, :provider_id, :client_id, :doi, :identifiers, :creators, :contributors, :titles, :publisher,
12
- :rights_list, :dates, :publication_year, :volume, :url, :version_info,
12
+ :rights_list, :dates, :publication_year, :volume, :url, :version,
13
13
  :subjects, :contributor, :descriptions, :language, :sizes,
14
14
  :formats, :schema_version, :meta, :container, :agency,
15
15
  :format, :funding_references, :state, :geo_locations,
@@ -36,6 +36,7 @@ module Bolognese
36
36
  # generate name for method to call dynamically
37
37
  hsh = @from.present? ? send("get_" + @from, id: id, sandbox: options[:sandbox]) : {}
38
38
  string = hsh.fetch("string", nil)
39
+
39
40
  elsif input.present? && File.exist?(input)
40
41
  filename = File.basename(input)
41
42
  ext = File.extname(input)
@@ -105,7 +106,7 @@ module Bolognese
105
106
  :publication_year,
106
107
  :descriptions,
107
108
  :rights_list,
108
- :version_info,
109
+ :version,
109
110
  :subjects,
110
111
  :language,
111
112
  :geo_locations,
@@ -115,7 +116,6 @@ module Bolognese
115
116
  ).compact
116
117
 
117
118
  @regenerate = options[:regenerate] || read_options.present?
118
-
119
119
  # generate name for method to call dynamically
120
120
  @meta = @from.present? ? send("read_" + @from, { string: string, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)) : {}
121
121
  end
@@ -189,10 +189,13 @@ module Bolognese
189
189
  @url ||= meta.fetch("url", nil)
190
190
  end
191
191
 
192
- def version_info
193
- @version_info ||= meta.fetch("version_info", nil)
192
+ def version
193
+ @version ||= meta.fetch("version", nil)
194
194
  end
195
195
 
196
+ # for backwards compatibility
197
+ alias_attribute :version_info, :version
198
+
196
199
  def publication_year
197
200
  @publication_year ||= meta.fetch("publication_year", nil)
198
201
  end
@@ -158,7 +158,7 @@ module Bolognese
158
158
  "publisher" => publisher,
159
159
  "title" => parse_attributes(titles, content: "title", first: true),
160
160
  "URL" => url,
161
- "version" => version_info
161
+ "version" => version
162
162
  }.compact.symbolize_keys
163
163
  end
164
164
 
@@ -180,7 +180,7 @@ module Bolognese
180
180
  "identifiers" => identifiers,
181
181
  "sizes" => sizes,
182
182
  "formats" => formats,
183
- "version" => version_info,
183
+ "version" => version,
184
184
  "rights_list" => rights_list,
185
185
  "descriptions" => descriptions,
186
186
  "geo_locations" => geo_locations,
@@ -100,8 +100,10 @@ module Bolognese
100
100
  doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
101
101
 
102
102
  state = id.present? || read_options.present? ? "findable" : "not_found"
103
- subjects = Array.wrap(meta.fetch("categories", nil)).map do |s|
104
- { "subject" => s }
103
+ subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
104
+ sum += name_to_fos(subject)
105
+
106
+ sum
105
107
  end
106
108
 
107
109
  { "id" => id,
@@ -119,7 +121,7 @@ module Bolognese
119
121
  "publication_year" => publication_year,
120
122
  "descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
121
123
  "rights_list" => rights_list,
122
- "version_info" => meta.fetch("version", nil),
124
+ "version" => meta.fetch("version", nil),
123
125
  "subjects" => subjects,
124
126
  "state" => state
125
127
  }.merge(read_options)
@@ -57,8 +57,10 @@ module Bolognese
57
57
  "bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
58
58
  "ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
59
59
  }.compact
60
- subjects = Array.wrap(meta.fetch("tags", nil)).map do |s|
61
- { "subject" => s }
60
+ subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
61
+ sum += name_to_fos(subject)
62
+
63
+ sum
62
64
  end
63
65
 
64
66
  has_title = meta.fetch("title", nil)
@@ -79,7 +81,7 @@ module Bolognese
79
81
  "publication_year" => publication_year,
80
82
  "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
81
83
  "rights_list" => [{ "rightsUri" => meta.fetch("license", nil) }.compact],
82
- "version_info" => meta.fetch("version", nil),
84
+ "version" => meta.fetch("version", nil),
83
85
  "subjects" => subjects,
84
86
  "state" => state
85
87
  }.merge(read_options)
@@ -117,11 +117,8 @@ module Bolognese
117
117
  date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
118
118
 
119
119
  date_registered = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "deposit-timestamp" }
120
- if date_registered && date_registered.fetch("__content__", nil).to_i > 15000000000000 # check for valid input string that includes seconds
121
- date_registered = DateTime.strptime(date_registered.fetch("__content__", ""), "%Y%m%d%H%M%S").strftime('%Y-%m-%dT%H:%M:%SZ')
122
- else
123
- date_registered = nil
124
- end
120
+ date_registered = get_datetime_from_time(date_registered.fetch("__content__", nil)) if date_registered.present?
121
+
125
122
  # check that date is valid iso8601 date
126
123
  date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
127
124
  date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
@@ -175,7 +172,7 @@ module Bolognese
175
172
  "publication_year" => publication_year,
176
173
  "descriptions" => crossref_description(bibliographic_metadata),
177
174
  "rights_list" => crossref_license(program_metadata),
178
- "version_info" => nil,
175
+ "version" => nil,
179
176
  "subjects" => nil,
180
177
  "language" => nil,
181
178
  "sizes" => nil,
@@ -93,7 +93,7 @@ module Bolognese
93
93
  "bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
94
94
  "ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
95
95
  }.compact
96
-
96
+
97
97
  titles = Array.wrap(meta.dig("titles", "title")).map do |r|
98
98
  if r.blank?
99
99
  nil
@@ -122,15 +122,17 @@ module Bolognese
122
122
  { "rights" => r["__content__"], "rightsUri" => normalize_url(r["rightsURI"]), "lang" => r["lang"] }.compact
123
123
  end
124
124
  end.compact
125
- subjects = Array.wrap(meta.dig("subjects", "subject")).map do |k|
126
- if k.blank?
127
- nil
128
- elsif k.is_a?(String)
129
- { "subject" => sanitize(k) }
130
- elsif k.is_a?(Hash)
131
- { "subject" => sanitize(k["__content__"]), "subjectScheme" => k["subjectScheme"], "schemeUri" => k["schemeURI"], "valueUri" => k["valueURI"], "lang" => k["lang"] }.compact
125
+
126
+ subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
127
+ if subject.is_a?(String)
128
+ sum += name_to_fos(subject)
129
+ elsif subject.is_a?(Hash)
130
+ sum += hsh_to_fos(subject)
132
131
  end
133
- end.compact
132
+
133
+ sum
134
+ end.uniq
135
+
134
136
  dates = Array.wrap(meta.dig("dates", "date")).map do |r|
135
137
  if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
136
138
  if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
@@ -164,12 +166,15 @@ module Bolognese
164
166
  funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
165
167
  scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
166
168
  funder_identifier = parse_attributes(fr["funderIdentifier"])
167
- funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
168
-
169
+ funder_identifier_type = parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType")
170
+ if funder_identifier_type != "Other"
171
+ funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
172
+ end
173
+
169
174
  {
170
175
  "funderName" => fr["funderName"],
171
176
  "funderIdentifier" => funder_identifier,
172
- "funderIdentifierType" => parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType"),
177
+ "funderIdentifierType" => funder_identifier_type,
173
178
  "awardNumber" => parse_attributes(fr["awardNumber"]),
174
179
  "awardUri" => parse_attributes(fr["awardNumber"], content: "awardURI"),
175
180
  "awardTitle" => fr["awardTitle"] }.compact
@@ -181,7 +186,7 @@ module Bolognese
181
186
  rid = ri["__content__"]
182
187
  end
183
188
 
184
- {
189
+ {
185
190
  "relatedIdentifier" => rid,
186
191
  "relatedIdentifierType" => ri["relatedIdentifierType"],
187
192
  "relationType" => ri["relationType"],
@@ -230,7 +235,7 @@ module Bolognese
230
235
  "publication_year" => parse_attributes(meta.fetch("publicationYear", nil), first: true).to_s.strip.presence,
231
236
  "descriptions" => descriptions,
232
237
  "rights_list" => Array.wrap(rights_list),
233
- "version_info" => meta.fetch("version", nil).to_s.presence,
238
+ "version" => meta.fetch("version", nil).to_s.presence,
234
239
  "subjects" => subjects,
235
240
  "language" => parse_attributes(meta.fetch("language", nil), first: true).to_s.strip.presence,
236
241
  "geo_locations" => geo_locations,
@@ -105,7 +105,7 @@ module Bolognese
105
105
  #"publication_year" => publication_year,
106
106
  "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
107
107
  "rights_list" => rights_list,
108
- "version_info" => meta.fetch("version", nil),
108
+ "version" => meta.fetch("version", nil),
109
109
  "subjects" => subjects
110
110
  #"state" => state
111
111
  }.merge(read_options)
@@ -82,8 +82,10 @@ module Bolognese
82
82
  nil
83
83
  end
84
84
  state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
85
- subjects = Array.wrap(meta.fetch("KW", nil)).map do |s|
86
- { "subject" => s }
85
+ subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
86
+ sum += name_to_fos(subject)
87
+
88
+ sum
87
89
  end
88
90
 
89
91
  { "id" => id,
@@ -150,8 +150,13 @@ module Bolognese
150
150
  "geoLocationBox" => geo_location_box
151
151
  }.compact
152
152
  end
153
- subjects = Array.wrap(meta.fetch("keywords", nil).to_s.split(", ")).map do |s|
154
- { "subject" => s }
153
+
154
+ # handle keywords as array and as comma-separated string
155
+ subjects = meta.fetch("keywords", nil)
156
+ subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
157
+ subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
158
+ sum += name_to_fos(subject)
159
+ sum
155
160
  end
156
161
 
157
162
  { "id" => id,
@@ -173,7 +178,7 @@ module Bolognese
173
178
  "dates" => dates,
174
179
  "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
175
180
  "rights_list" => rights_list,
176
- "version_info" => meta.fetch("version", nil).to_s.presence,
181
+ "version" => meta.fetch("version", nil).to_s.presence,
177
182
  "subjects" => subjects,
178
183
  "state" => state,
179
184
  "schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
@@ -974,6 +974,14 @@ module Bolognese
974
974
  nil
975
975
  end
976
976
 
977
+ # iso8601 datetime without hyphens and colons, used by Crossref
978
+ # return nil if invalid
979
+ def get_datetime_from_time(time)
980
+ DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime('%Y-%m-%dT%H:%M:%SZ')
981
+ rescue ArgumentError
982
+ nil
983
+ end
984
+
977
985
  def get_date(dates, date_type)
978
986
  dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
979
987
  dd.fetch("date", nil)
@@ -1048,5 +1056,100 @@ module Bolognese
1048
1056
  error_array
1049
1057
  end
1050
1058
 
1059
+ def name_to_fos(name)
1060
+ # first find subject in Fields of Science (OECD)
1061
+ fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
1062
+
1063
+ subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
1064
+
1065
+ if subject
1066
+ return [{
1067
+ "subject" => sanitize(name) },
1068
+ {
1069
+ "subject" => "FOS: " + subject["fosLabel"],
1070
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1071
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1072
+ }]
1073
+ end
1074
+
1075
+ # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1076
+ # and map to Fields of Science. Add an extra entry for the latter
1077
+ fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
1078
+ for_fields = fores.fetch("forFields")
1079
+ for_disciplines = fores.fetch("forDisciplines")
1080
+
1081
+ subject = for_fields.find { |l| l["forLabel"] == name } ||
1082
+ for_disciplines.find { |l| l["forLabel"] == name }
1083
+
1084
+ if subject
1085
+ [{
1086
+ "subject" => sanitize(name) },
1087
+ {
1088
+ "subject" => "FOS: " + subject["fosLabel"],
1089
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1090
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1091
+ }]
1092
+ else
1093
+ [{ "subject" => sanitize(name) }]
1094
+ end
1095
+ end
1096
+
1097
+ def hsh_to_fos(hsh)
1098
+ # first find subject in Fields of Science (OECD)
1099
+ fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
1100
+ subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] }
1101
+
1102
+ if subject
1103
+ return [{
1104
+ "subject" => sanitize(hsh["__content__"]),
1105
+ "subjectScheme" => hsh["subjectScheme"],
1106
+ "schemeUri" => hsh["schemeURI"],
1107
+ "valueUri" => hsh["valueURI"],
1108
+ "lang" => hsh["lang"] }.compact,
1109
+ {
1110
+ "subject" => "FOS: " + subject["fosLabel"],
1111
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1112
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
1113
+ end
1114
+
1115
+ # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1116
+ # and map to Fields of Science. Add an extra entry for the latter
1117
+ fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
1118
+ for_fields = fores.fetch("forFields")
1119
+ for_disciplines = fores.fetch("forDisciplines")
1120
+
1121
+ # try to extract forId
1122
+ if hsh["subjectScheme"] == "FOR"
1123
+ for_id = hsh["__content__"].split(" ").first
1124
+ for_id = for_id.rjust(6, "0")
1125
+
1126
+ subject = for_fields.find { |l| l["forId"] == for_id } ||
1127
+ for_disciplines.find { |l| l["forId"] == for_id[0..3] }
1128
+ else
1129
+ subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] } ||
1130
+ for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] }
1131
+ end
1132
+
1133
+ if subject
1134
+ [{
1135
+ "subject" => sanitize(hsh["__content__"]),
1136
+ "subjectScheme" => hsh["subjectScheme"],
1137
+ "schemeUri" => hsh["schemeURI"],
1138
+ "valueUri" => hsh["valueURI"],
1139
+ "lang" => hsh["lang"] }.compact,
1140
+ {
1141
+ "subject" => "FOS: " + subject["fosLabel"],
1142
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1143
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1144
+ }]
1145
+ else
1146
+ [{
1147
+ "subject" => sanitize(hsh["__content__"]),
1148
+ "subjectScheme" => hsh["subjectScheme"],
1149
+ "schemeUri" => hsh["schemeURI"],
1150
+ "valueUri" => hsh["valueURI"],
1151
+ "lang" => hsh["lang"] }.compact]
1152
+ end
1153
+ end
1051
1154
  end
1052
1155
  end