bolognese 1.5.19 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/Gemfile.lock +16 -13
  4. data/lib/bolognese/datacite_utils.rb +27 -3
  5. data/lib/bolognese/metadata_utils.rb +1 -0
  6. data/lib/bolognese/readers/bibtex_reader.rb +2 -1
  7. data/lib/bolognese/readers/citeproc_reader.rb +5 -3
  8. data/lib/bolognese/readers/codemeta_reader.rb +6 -4
  9. data/lib/bolognese/readers/crossref_reader.rb +1 -1
  10. data/lib/bolognese/readers/datacite_reader.rb +20 -15
  11. data/lib/bolognese/readers/ris_reader.rb +4 -2
  12. data/lib/bolognese/readers/schema_org_reader.rb +10 -6
  13. data/lib/bolognese/utils.rb +184 -9
  14. data/lib/bolognese/version.rb +1 -1
  15. data/lib/bolognese/writers/bibtex_writer.rb +2 -1
  16. data/lib/bolognese/writers/codemeta_writer.rb +2 -1
  17. data/resources/oecd/for-mappings.json +1101 -0
  18. data/resources/oecd/fos-mappings.json +198 -0
  19. data/resources/spdx/licenses.json +5297 -0
  20. data/spec/datacite_utils_spec.rb +32 -8
  21. data/spec/fixtures/datacite-example-affiliation.xml +1 -1
  22. data/spec/fixtures/datacite-funderIdentifier.xml +4 -0
  23. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_for_match.yml +44 -0
  24. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_match.yml +44 -0
  25. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_no_match.yml +44 -0
  26. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_for_match.yml +44 -0
  27. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_match.yml +44 -0
  28. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_no_match.yml +44 -0
  29. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/even_more_subject_scheme_FOR.yml +97 -0
  30. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/more_subject_scheme_FOR.yml +107 -0
  31. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/subject_scheme_FOR.yml +110 -0
  32. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_dates/insert.yml +49 -0
  33. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_formats/insert.yml +49 -0
  34. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_person/creator_given_and_family_name.yml +49 -0
  35. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/insert_sizes/insert.yml +49 -0
  36. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_cc_url/not_found.yml +44 -0
  37. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_cc_url/with_trailing_slash.yml +44 -0
  38. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_cc_url/with_trailing_slash_and_to_https.yml +44 -0
  39. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_url/to_https.yml +44 -0
  40. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_url/with_trailing_slash_and_to_https.yml +44 -0
  41. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/hsh_to_spdx_id.yml +44 -0
  42. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/hsh_to_spdx_not_found.yml +44 -0
  43. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/hsh_to_spdx_url.yml +44 -0
  44. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/name_to_spdx_exists.yml +44 -0
  45. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/name_to_spdx_id.yml +44 -0
  46. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/name_to_spdx_not_found.yml +44 -0
  47. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/url_to_https/http.yml +44 -0
  48. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/url_to_https/uri.yml +44 -0
  49. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/url_to_https/with_trailing_slash.yml +44 -0
  50. data/spec/readers/bibtex_reader_spec.rb +5 -1
  51. data/spec/readers/codemeta_reader_spec.rb +20 -0
  52. data/spec/readers/crossref_reader_spec.rb +21 -5
  53. data/spec/readers/datacite_reader_spec.rb +150 -10
  54. data/spec/readers/schema_org_reader_spec.rb +15 -1
  55. data/spec/utils_spec.rb +108 -0
  56. data/spec/writers/bibtex_writer_spec.rb +6 -0
  57. data/spec/writers/citeproc_writer_spec.rb +10 -0
  58. data/spec/writers/crosscite_writer_spec.rb +5 -1
  59. data/spec/writers/datacite_json_writer_spec.rb +5 -1
  60. data/spec/writers/datacite_writer_spec.rb +30 -6
  61. data/spec/writers/schema_org_writer_spec.rb +11 -0
  62. metadata +32 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f12f43e3a56ef075609d39fab403da351db40d2517ce8af31d945275a98f785
4
- data.tar.gz: d8f7e1240112ab75a299b5d750dd6218e2f03eb67d4ea14d5c978696d85984ec
3
+ metadata.gz: 42cbc43f26aa6dd2044b02555d97077aa592b2ad298a3a4381cd1a8ca8100f2b
4
+ data.tar.gz: a61172b20d5318dfb921429d5946fe807d71d9951bebc5a7397fe705105d1df9
5
5
  SHA512:
6
- metadata.gz: 35d704991da3b2b84b4d30d02d206adff835e45d5dc887ea5cb85fcab5568c5b0a95a214f2a23de770d47f8f731773286ae81ae33a9bd43fcd04ae403301789b
7
- data.tar.gz: ef9fbd252cbe2b1e7c868c574ea8518b75b38063f9d47e629cda4e119d8174dfaa39c1b08c7ab43ae43b83dbd1262d6ec91a5b7dae087005ca60ff995c410f70
6
+ metadata.gz: b378e7868ce5d7be3f304cf68016cd0c274a972941813973047d0e03f98a4efd5e4fafd660f576a42505fcad1c660a2c8cd8b51d88d9920d112a699e46f9882c
7
+ data.tar.gz: d29615c3446981b5f63da5c9aea4529ca40042fe89f723dddd27c4a46e3fa077075cb3113ee74d330c11dc01c7ae76122da37e7250f8da7a07e732e19d3144d0
data/.gitignore CHANGED
@@ -55,3 +55,5 @@ coverage/
55
55
  .env.*
56
56
  !.env.example
57
57
  !.env.travis
58
+
59
+ .vscode
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (1.5.19)
4
+ bolognese (1.6.3)
5
5
  activesupport (>= 4.2.5)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (>= 5.1.0)
@@ -30,12 +30,12 @@ PATH
30
30
  GEM
31
31
  remote: https://rubygems.org/
32
32
  specs:
33
- activesupport (6.0.2.2)
33
+ activesupport (6.0.3.1)
34
34
  concurrent-ruby (~> 1.0, >= 1.0.2)
35
35
  i18n (>= 0.7, < 2)
36
36
  minitest (~> 5.1)
37
37
  tzinfo (~> 1.1)
38
- zeitwerk (~> 2.2)
38
+ zeitwerk (~> 2.2, >= 2.2.2)
39
39
  addressable (2.7.0)
40
40
  public_suffix (>= 2.0.2, < 5.0)
41
41
  benchmark_methods (0.7)
@@ -105,7 +105,7 @@ GEM
105
105
  oj (>= 2.8.3)
106
106
  oj_mimic_json (~> 1.0, >= 1.0.1)
107
107
  mini_portile2 (2.4.0)
108
- minitest (5.14.0)
108
+ minitest (5.14.1)
109
109
  multi_json (1.14.1)
110
110
  multipart-post (2.1.1)
111
111
  namae (1.0.1)
@@ -123,40 +123,43 @@ GEM
123
123
  rack-test (0.8.3)
124
124
  rack (>= 1.0, < 3)
125
125
  rake (12.3.3)
126
- rdf (3.1.1)
126
+ rdf (3.1.2)
127
127
  hamster (~> 3.0)
128
128
  link_header (~> 0.0, >= 0.0.8)
129
129
  rdf-aggregate-repo (3.1.0)
130
130
  rdf (~> 3.1)
131
- rdf-rdfa (3.1.0)
131
+ rdf-rdfa (3.1.1)
132
132
  haml (~> 5.1)
133
133
  htmlentities (~> 4.3)
134
- rdf (~> 3.1)
134
+ rdf (~> 3.1, >= 3.1.2)
135
135
  rdf-aggregate-repo (~> 3.1)
136
+ rdf-vocab (~> 3.1, >= 3.1.5)
136
137
  rdf-xsd (~> 3.1)
137
138
  rdf-rdfxml (3.1.0)
138
139
  htmlentities (~> 4.3)
139
140
  rdf (~> 3.1)
140
141
  rdf-rdfa (~> 3.1)
141
142
  rdf-xsd (~> 3.1)
142
- rdf-turtle (3.1.0)
143
+ rdf-turtle (3.1.1)
143
144
  ebnf (~> 1.2)
144
- rdf (~> 3.1)
145
+ rdf (~> 3.1, >= 3.1.2)
146
+ rdf-vocab (3.1.5)
147
+ rdf (~> 3.1, >= 3.1.2)
145
148
  rdf-xsd (3.1.0)
146
149
  rdf (~> 3.1)
147
150
  rspec (3.9.0)
148
151
  rspec-core (~> 3.9.0)
149
152
  rspec-expectations (~> 3.9.0)
150
153
  rspec-mocks (~> 3.9.0)
151
- rspec-core (3.9.1)
152
- rspec-support (~> 3.9.1)
153
- rspec-expectations (3.9.1)
154
+ rspec-core (3.9.2)
155
+ rspec-support (~> 3.9.3)
156
+ rspec-expectations (3.9.2)
154
157
  diff-lcs (>= 1.2.0, < 2.0)
155
158
  rspec-support (~> 3.9.0)
156
159
  rspec-mocks (3.9.1)
157
160
  diff-lcs (>= 1.2.0, < 2.0)
158
161
  rspec-support (~> 3.9.0)
159
- rspec-support (3.9.2)
162
+ rspec-support (3.9.3)
160
163
  rspec-xsd (0.1.0)
161
164
  nokogiri (~> 1.6)
162
165
  rspec (~> 3)
@@ -28,12 +28,14 @@ module Bolognese
28
28
  insert_publisher(xml)
29
29
  insert_publication_year(xml)
30
30
  insert_resource_type(xml)
31
- insert_alternate_identifiers(xml)
32
31
  insert_subjects(xml)
33
- insert_language(xml)
34
32
  insert_contributors(xml)
35
33
  insert_dates(xml)
34
+ insert_language(xml)
35
+ insert_alternate_identifiers(xml)
36
36
  insert_related_identifiers(xml)
37
+ insert_sizes(xml)
38
+ insert_formats(xml)
37
39
  insert_version(xml)
38
40
  insert_rights_list(xml)
39
41
  insert_descriptions(xml)
@@ -201,6 +203,22 @@ module Bolognese
201
203
  end
202
204
  end
203
205
 
206
+ def insert_sizes(xml)
207
+ xml.sizes do
208
+ Array.wrap(sizes).each do |s|
209
+ xml.size(s)
210
+ end
211
+ end
212
+ end
213
+
214
+ def insert_formats(xml)
215
+ xml.formats do
216
+ Array.wrap(formats).each do |f|
217
+ xml.format(f)
218
+ end
219
+ end
220
+ end
221
+
204
222
  def insert_rights_list(xml)
205
223
  return xml unless rights_list.present?
206
224
 
@@ -214,7 +232,13 @@ module Bolognese
214
232
  r["rightsUri"] = normalize_id(rights)
215
233
  end
216
234
 
217
- attributes = { 'rightsURI' => r["rightsUri"], 'xml:lang' => r["lang"] }.compact
235
+ attributes = {
236
+ "rightsURI" => r["rightsUri"],
237
+ "rightsIdentifier" => r["rightsIdentifier"],
238
+ "rightsIdentifierScheme" => r["rightsIdentifierScheme"],
239
+ "schemeURI" => r["schemeUri"],
240
+ "xml:lang" => r["lang"]
241
+ }.compact
218
242
 
219
243
  xml.rights(r["rights"], attributes)
220
244
  end
@@ -158,6 +158,7 @@ module Bolognese
158
158
  "publisher" => publisher,
159
159
  "title" => parse_attributes(titles, content: "title", first: true),
160
160
  "URL" => url,
161
+ "copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
161
162
  "version" => version_info
162
163
  }.compact.symbolize_keys
163
164
  end
@@ -76,6 +76,7 @@ module Bolognese
76
76
  "dateType" => "Issued" }]
77
77
  end
78
78
  publication_year = meta.try(:date).present? ? meta.date.to_s[0..3] : nil
79
+ rights_list = meta.try(:copyright).present? ? [hsh_to_spdx("rightsURI" => meta[:copyright])] : []
79
80
 
80
81
  { "id" => normalize_doi(doi),
81
82
  "types" => types,
@@ -90,7 +91,7 @@ module Bolognese
90
91
  "dates" => dates,
91
92
  "publication_year" => publication_year,
92
93
  "descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
93
- "rights_list" => meta.try(:copyright).present? ? [{ "rightsUri" => meta.try(:copyright).to_s.presence }.compact] : [],
94
+ "rights_list" => rights_list,
94
95
  "state" => state
95
96
  }.merge(read_options)
96
97
  end
@@ -60,7 +60,7 @@ module Bolognese
60
60
  end
61
61
  publication_year = get_date_from_date_parts(meta.fetch("issued", nil)).to_s[0..3]
62
62
  rights_list = if meta.fetch("copyright", nil)
63
- [{ "rightsUri" => normalize_url(meta.fetch("copyright")) }.compact]
63
+ [hsh_to_spdx("rightsURI" => meta.fetch("copyright"))]
64
64
  end
65
65
  related_identifiers = if meta.fetch("container-title", nil).present? && meta.fetch("ISSN", nil).present?
66
66
  [{ "type" => "Periodical",
@@ -100,8 +100,10 @@ module Bolognese
100
100
  doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
101
101
 
102
102
  state = id.present? || read_options.present? ? "findable" : "not_found"
103
- subjects = Array.wrap(meta.fetch("categories", nil)).map do |s|
104
- { "subject" => s }
103
+ subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
104
+ sum += name_to_fos(subject)
105
+
106
+ sum
105
107
  end
106
108
 
107
109
  { "id" => id,
@@ -57,13 +57,15 @@ module Bolognese
57
57
  "bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
58
58
  "ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
59
59
  }.compact
60
- subjects = Array.wrap(meta.fetch("tags", nil)).map do |s|
61
- { "subject" => s }
60
+ subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
61
+ sum += name_to_fos(subject)
62
+
63
+ sum
62
64
  end
63
65
 
64
66
  has_title = meta.fetch("title", nil)
65
-
66
67
  titles = has_title.nil? ? [{ "title" => meta.fetch("name", nil) }] : [{ "title" => has_title }]
68
+ rights_list = meta.fetch("licenseId", nil).present? ? [hsh_to_spdx("rightsIdentifier" => meta.fetch("licenseId"))] : nil
67
69
 
68
70
  { "id" => id,
69
71
  "types" => types,
@@ -78,7 +80,7 @@ module Bolognese
78
80
  "dates" => dates,
79
81
  "publication_year" => publication_year,
80
82
  "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
81
- "rights_list" => [{ "rightsUri" => meta.fetch("license", nil) }.compact],
83
+ "rights_list" => rights_list,
82
84
  "version_info" => meta.fetch("version", nil),
83
85
  "subjects" => subjects,
84
86
  "state" => state
@@ -211,7 +211,7 @@ module Bolognese
211
211
  access_indicator = Array.wrap(program_metadata).find { |m| m["name"] == "AccessIndicators" }
212
212
  if access_indicator.present?
213
213
  Array.wrap(access_indicator["license_ref"]).map do |license|
214
- { "rightsUri" => normalize_url(parse_attributes(license)) }
214
+ hsh_to_spdx("rightsURI" => parse_attributes(license))
215
215
  end.uniq
216
216
  else
217
217
  []
@@ -93,7 +93,7 @@ module Bolognese
93
93
  "bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
94
94
  "ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
95
95
  }.compact
96
-
96
+
97
97
  titles = Array.wrap(meta.dig("titles", "title")).map do |r|
98
98
  if r.blank?
99
99
  nil
@@ -117,20 +117,22 @@ module Bolognese
117
117
  if r.blank?
118
118
  nil
119
119
  elsif r.is_a?(String)
120
- { "rights" => r }
120
+ name_to_spdx(r)
121
121
  elsif r.is_a?(Hash)
122
- { "rights" => r["__content__"], "rightsUri" => normalize_url(r["rightsURI"]), "lang" => r["lang"] }.compact
122
+ hsh_to_spdx(r)
123
123
  end
124
124
  end.compact
125
- subjects = Array.wrap(meta.dig("subjects", "subject")).map do |k|
126
- if k.blank?
127
- nil
128
- elsif k.is_a?(String)
129
- { "subject" => sanitize(k) }
130
- elsif k.is_a?(Hash)
131
- { "subject" => sanitize(k["__content__"]), "subjectScheme" => k["subjectScheme"], "schemeUri" => k["schemeURI"], "valueUri" => k["valueURI"], "lang" => k["lang"] }.compact
125
+
126
+ subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
127
+ if subject.is_a?(String)
128
+ sum += name_to_fos(subject)
129
+ elsif subject.is_a?(Hash)
130
+ sum += hsh_to_fos(subject)
132
131
  end
133
- end.compact
132
+
133
+ sum
134
+ end.uniq
135
+
134
136
  dates = Array.wrap(meta.dig("dates", "date")).map do |r|
135
137
  if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
136
138
  if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
@@ -164,12 +166,15 @@ module Bolognese
164
166
  funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
165
167
  scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
166
168
  funder_identifier = parse_attributes(fr["funderIdentifier"])
167
- funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
168
-
169
+ funder_identifier_type = parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType")
170
+ if funder_identifier_type != "Other"
171
+ funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
172
+ end
173
+
169
174
  {
170
175
  "funderName" => fr["funderName"],
171
176
  "funderIdentifier" => funder_identifier,
172
- "funderIdentifierType" => parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType"),
177
+ "funderIdentifierType" => funder_identifier_type,
173
178
  "awardNumber" => parse_attributes(fr["awardNumber"]),
174
179
  "awardUri" => parse_attributes(fr["awardNumber"], content: "awardURI"),
175
180
  "awardTitle" => fr["awardTitle"] }.compact
@@ -181,7 +186,7 @@ module Bolognese
181
186
  rid = ri["__content__"]
182
187
  end
183
188
 
184
- {
189
+ {
185
190
  "relatedIdentifier" => rid,
186
191
  "relatedIdentifierType" => ri["relatedIdentifierType"],
187
192
  "relationType" => ri["relationType"],
@@ -82,8 +82,10 @@ module Bolognese
82
82
  nil
83
83
  end
84
84
  state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
85
- subjects = Array.wrap(meta.fetch("KW", nil)).map do |s|
86
- { "subject" => s }
85
+ subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
86
+ sum += name_to_fos(subject)
87
+
88
+ sum
87
89
  end
88
90
 
89
91
  { "id" => id,
@@ -107,10 +107,9 @@ module Bolognese
107
107
  Array.wrap(schema_org_is_supplement_to(meta)) +
108
108
  Array.wrap(schema_org_is_supplemented_by(meta))
109
109
 
110
- rights_list = [{
111
- "rightsUri" => parse_attributes(meta.fetch("license", nil), content: "id", first: true),
112
- "rights" => parse_attributes(meta.fetch("license", nil), content: "name", first: true)
113
- }]
110
+ rights_list = Array.wrap(meta.fetch("license", nil)).compact.map do |rl|
111
+ hsh_to_spdx("__content__" => rl["name"], "rightsURI" => rl["id"])
112
+ end
114
113
 
115
114
  funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
116
115
  if fr["@id"].present?
@@ -150,8 +149,13 @@ module Bolognese
150
149
  "geoLocationBox" => geo_location_box
151
150
  }.compact
152
151
  end
153
- subjects = Array.wrap(meta.fetch("keywords", nil).to_s.split(", ")).map do |s|
154
- { "subject" => s }
152
+
153
+ # handle keywords as array and as comma-separated string
154
+ subjects = meta.fetch("keywords", nil)
155
+ subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
156
+ subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
157
+ sum += name_to_fos(subject)
158
+ sum
155
159
  end
156
160
 
157
161
  { "id" => id,
@@ -2,13 +2,44 @@
2
2
 
3
3
  module Bolognese
4
4
  module Utils
5
- LICENSE_NAMES = {
6
- "http://creativecommons.org/publicdomain/zero/1.0/" => "Public Domain (CC0 1.0)",
7
- "http://creativecommons.org/licenses/by/3.0/" => "Creative Commons Attribution 3.0 (CC-BY 3.0)",
8
- "http://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution 4.0 (CC-BY 4.0)",
9
- "http://creativecommons.org/licenses/by-nc/4.0/" => "Creative Commons Attribution Noncommercial 4.0 (CC-BY-NC 4.0)",
10
- "http://creativecommons.org/licenses/by-sa/4.0/" => "Creative Commons Attribution Share Alike 4.0 (CC-BY-SA 4.0)",
11
- "http://creativecommons.org/licenses/by-nc-nd/4.0/" => "Creative Commons Attribution Noncommercial No Derivatives 4.0 (CC-BY-NC-ND 4.0)"
5
+ NORMALIZED_LICENSES = {
6
+ "https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode",
7
+ "https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode",
8
+ "https://creativecommons.org/licenses/by/2.5" => "https://creativecommons.org/licenses/by/2.5/legalcode",
9
+ "https://creativecommons.org/licenses/by/3.0" => "https://creativecommons.org/licenses/by/3.0/legalcode",
10
+ "https://creativecommons.org/licenses/by/4.0" => "https://creativecommons.org/licenses/by/4.0/legalcode",
11
+ "https://creativecommons.org/licenses/by-nc/1.0" => "https://creativecommons.org/licenses/by-nc/1.0/legalcode",
12
+ "https://creativecommons.org/licenses/by-nc/2.0" => "https://creativecommons.org/licenses/by-nc/2.0/legalcode",
13
+ "https://creativecommons.org/licenses/by-nc/2.5" => "https://creativecommons.org/licenses/by-nc/2.5/legalcode",
14
+ "https://creativecommons.org/licenses/by-nc/3.0" => "https://creativecommons.org/licenses/by-nc/3.0/legalcode",
15
+ "https://creativecommons.org/licenses/by-nc/4.0" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
16
+ "https://creativecommons.org/licenses/by-nd-nc/1.0" => "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode",
17
+ "https://creativecommons.org/licenses/by-nd-nc/2.0" => "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode",
18
+ "https://creativecommons.org/licenses/by-nd-nc/2.5" => "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode",
19
+ "https://creativecommons.org/licenses/by-nd-nc/3.0" => "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode",
20
+ "https://creativecommons.org/licenses/by-nd-nc/4.0" => "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode",
21
+ "https://creativecommons.org/licenses/by-nc-sa/1.0" => "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode",
22
+ "https://creativecommons.org/licenses/by-nc-sa/2.0" => "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode",
23
+ "https://creativecommons.org/licenses/by-nc-sa/2.5" => "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode",
24
+ "https://creativecommons.org/licenses/by-nc-sa/3.0" => "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
25
+ "https://creativecommons.org/licenses/by-nc-sa/4.0" => "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
26
+ "https://creativecommons.org/licenses/by-nd/1.0" => "https://creativecommons.org/licenses/by-nd/1.0/legalcode",
27
+ "https://creativecommons.org/licenses/by-nd/2.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
28
+ "https://creativecommons.org/licenses/by-nd/2.5" => "https://creativecommons.org/licenses/by-nd/2.5/legalcode",
29
+ "https://creativecommons.org/licenses/by-nd/3.0" => "https://creativecommons.org/licenses/by-nd/3.0/legalcode",
30
+ "https://creativecommons.org/licenses/by-nd/4.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
31
+ "https://creativecommons.org/licenses/by-sa/1.0" => "https://creativecommons.org/licenses/by-sa/1.0/legalcode",
32
+ "https://creativecommons.org/licenses/by-sa/2.0" => "https://creativecommons.org/licenses/by-sa/2.0/legalcode",
33
+ "https://creativecommons.org/licenses/by-sa/2.5" => "https://creativecommons.org/licenses/by-sa/2.5/legalcode",
34
+ "https://creativecommons.org/licenses/by-sa/3.0" => "https://creativecommons.org/licenses/by-sa/3.0/legalcode",
35
+ "https://creativecommons.org/licenses/by-sa/4.0" => "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
36
+ "https://creativecommons.org/licenses/by-nc-nd/1.0" => "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode",
37
+ "https://creativecommons.org/licenses/by-nc-nd/2.0" => "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
38
+ "https://creativecommons.org/licenses/by-nc-nd/2.5" => "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode",
39
+ "https://creativecommons.org/licenses/by-nc-nd/3.0" => "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode",
40
+ "https://creativecommons.org/licenses/by-nc-nd/4.0" => "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
41
+ "https://creativecommons.org/licenses/publicdomain" => "https://creativecommons.org/licenses/publicdomain/",
42
+ "https://creativecommons.org/publicdomain/zero/1.0" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
12
43
  }
13
44
 
14
45
  DC_TO_SO_TRANSLATIONS = {
@@ -475,7 +506,7 @@ module Bolognese
475
506
  nil
476
507
  end
477
508
 
478
- def normalize_url(id)
509
+ def normalize_url(id, options={})
479
510
  return nil unless id.present?
480
511
 
481
512
  # handle info URIs
@@ -485,13 +516,23 @@ module Bolognese
485
516
  uri = Addressable::URI.parse(id)
486
517
 
487
518
  return nil unless uri && uri.host && %w(http https ftp).include?(uri.scheme)
519
+
520
+ # optionally turn into https URL
521
+ uri.scheme = "https" if options[:https]
488
522
 
489
523
  # clean up URL
490
- PostRank::URI.clean(id)
524
+ uri.path = PostRank::URI.clean(uri.path)
525
+
526
+ uri.to_s
491
527
  rescue Addressable::URI::InvalidURIError
492
528
  nil
493
529
  end
494
530
 
531
+ def normalize_cc_url(id)
532
+ id = normalize_url(id, https: true)
533
+ NORMALIZED_LICENSES.fetch(id, id)
534
+ end
535
+
495
536
  def normalize_orcid(orcid)
496
537
  orcid = validate_orcid(orcid)
497
538
  return nil unless orcid.present?
@@ -1056,5 +1097,139 @@ module Bolognese
1056
1097
  error_array
1057
1098
  end
1058
1099
 
1100
+ def name_to_spdx(name)
1101
+ spdx = JSON.load(File.read(File.expand_path('../../../resources/spdx/licenses.json', __FILE__))).fetch("licenses")
1102
+ license = spdx.find { |l| l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name) }
1103
+
1104
+ if license
1105
+ {
1106
+ "rights" => license["name"],
1107
+ "rightsUri" => license["seeAlso"].first,
1108
+ "rightsIdentifier" => license["licenseId"],
1109
+ "rightsIdentifierScheme" => "SPDX",
1110
+ "schemeUri" => "https://spdx.org/licenses/" }.compact
1111
+ else
1112
+ { "rights" => name }
1113
+ end
1114
+ end
1115
+
1116
+ def hsh_to_spdx(hsh)
1117
+ spdx = JSON.load(File.read(File.expand_path('../../../resources/spdx/licenses.json', __FILE__))).fetch("licenses")
1118
+ license = spdx.find { |l| l["licenseId"] == hsh["rightsIdentifier"] || l["seeAlso"].first == normalize_cc_url(hsh["rightsURI"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"]) }
1119
+
1120
+ if license
1121
+ {
1122
+ "rights" => license["name"],
1123
+ "rightsUri" => license["seeAlso"].first,
1124
+ "rightsIdentifier" => license["licenseId"],
1125
+ "rightsIdentifierScheme" => "SPDX",
1126
+ "schemeUri" => "https://spdx.org/licenses/",
1127
+ "lang" => hsh["lang"] }.compact
1128
+ else
1129
+ {
1130
+ "rights" => hsh["__content__"],
1131
+ "rightsUri" => hsh["rightsURI"],
1132
+ "rightsIdentifier" => hsh["rightsIdentifier"],
1133
+ "rightsIdentifierScheme" => hsh["rightsIdentifierScheme"],
1134
+ "schemeUri" => hsh["schemeUri"],
1135
+ "lang" => hsh["lang"] }.compact
1136
+ end
1137
+ end
1138
+
1139
+ def name_to_fos(name)
1140
+ # first find subject in Fields of Science (OECD)
1141
+ fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
1142
+
1143
+ subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
1144
+
1145
+ if subject
1146
+ return [{
1147
+ "subject" => sanitize(name) },
1148
+ {
1149
+ "subject" => "FOS: " + subject["fosLabel"],
1150
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1151
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1152
+ }]
1153
+ end
1154
+
1155
+ # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1156
+ # and map to Fields of Science. Add an extra entry for the latter
1157
+ fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
1158
+ for_fields = fores.fetch("forFields")
1159
+ for_disciplines = fores.fetch("forDisciplines")
1160
+
1161
+ subject = for_fields.find { |l| l["forLabel"] == name } ||
1162
+ for_disciplines.find { |l| l["forLabel"] == name }
1163
+
1164
+ if subject
1165
+ [{
1166
+ "subject" => sanitize(name) },
1167
+ {
1168
+ "subject" => "FOS: " + subject["fosLabel"],
1169
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1170
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1171
+ }]
1172
+ else
1173
+ [{ "subject" => sanitize(name) }]
1174
+ end
1175
+ end
1176
+
1177
+ def hsh_to_fos(hsh)
1178
+ # first find subject in Fields of Science (OECD)
1179
+ fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
1180
+ subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] }
1181
+
1182
+ if subject
1183
+ return [{
1184
+ "subject" => sanitize(hsh["__content__"]),
1185
+ "subjectScheme" => hsh["subjectScheme"],
1186
+ "schemeUri" => hsh["schemeURI"],
1187
+ "valueUri" => hsh["valueURI"],
1188
+ "lang" => hsh["lang"] }.compact,
1189
+ {
1190
+ "subject" => "FOS: " + subject["fosLabel"],
1191
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1192
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
1193
+ end
1194
+
1195
+ # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1196
+ # and map to Fields of Science. Add an extra entry for the latter
1197
+ fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
1198
+ for_fields = fores.fetch("forFields")
1199
+ for_disciplines = fores.fetch("forDisciplines")
1200
+
1201
+ # try to extract forId
1202
+ if hsh["subjectScheme"] == "FOR"
1203
+ for_id = hsh["__content__"].split(" ").first
1204
+ for_id = for_id.rjust(6, "0")
1205
+
1206
+ subject = for_fields.find { |l| l["forId"] == for_id } ||
1207
+ for_disciplines.find { |l| l["forId"] == for_id[0..3] }
1208
+ else
1209
+ subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] } ||
1210
+ for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] }
1211
+ end
1212
+
1213
+ if subject
1214
+ [{
1215
+ "subject" => sanitize(hsh["__content__"]),
1216
+ "subjectScheme" => hsh["subjectScheme"],
1217
+ "schemeUri" => hsh["schemeURI"],
1218
+ "valueUri" => hsh["valueURI"],
1219
+ "lang" => hsh["lang"] }.compact,
1220
+ {
1221
+ "subject" => "FOS: " + subject["fosLabel"],
1222
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1223
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1224
+ }]
1225
+ else
1226
+ [{
1227
+ "subject" => sanitize(hsh["__content__"]),
1228
+ "subjectScheme" => hsh["subjectScheme"],
1229
+ "schemeUri" => hsh["schemeURI"],
1230
+ "valueUri" => hsh["valueURI"],
1231
+ "lang" => hsh["lang"] }.compact]
1232
+ end
1233
+ end
1059
1234
  end
1060
1235
  end