bolognese 1.5.21 → 1.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/Gemfile.lock +34 -26
  4. data/bolognese.gemspec +3 -3
  5. data/lib/bolognese/datacite_utils.rb +7 -1
  6. data/lib/bolognese/metadata.rb +2 -1
  7. data/lib/bolognese/metadata_utils.rb +1 -0
  8. data/lib/bolognese/readers/bibtex_reader.rb +2 -1
  9. data/lib/bolognese/readers/citeproc_reader.rb +5 -3
  10. data/lib/bolognese/readers/codemeta_reader.rb +6 -4
  11. data/lib/bolognese/readers/crossref_reader.rb +1 -1
  12. data/lib/bolognese/readers/datacite_reader.rb +20 -15
  13. data/lib/bolognese/readers/ris_reader.rb +4 -2
  14. data/lib/bolognese/readers/schema_org_reader.rb +10 -6
  15. data/lib/bolognese/utils.rb +184 -9
  16. data/lib/bolognese/version.rb +1 -1
  17. data/lib/bolognese/writers/bibtex_writer.rb +2 -1
  18. data/lib/bolognese/writers/codemeta_writer.rb +2 -1
  19. data/resources/oecd/for-mappings.json +1101 -0
  20. data/resources/oecd/fos-mappings.json +198 -0
  21. data/resources/spdx/licenses.json +5297 -0
  22. data/spec/datacite_utils_spec.rb +5 -1
  23. data/spec/fixtures/datacite-example-affiliation.xml +1 -1
  24. data/spec/fixtures/datacite-funderIdentifier.xml +4 -0
  25. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_for_match.yml +44 -0
  26. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_match.yml +44 -0
  27. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/hsh_to_fos_no_match.yml +44 -0
  28. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_for_match.yml +44 -0
  29. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_match.yml +44 -0
  30. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/fos/name_to_fos_no_match.yml +44 -0
  31. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/even_more_subject_scheme_FOR.yml +97 -0
  32. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/more_subject_scheme_FOR.yml +107 -0
  33. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/subject_scheme_FOR.yml +110 -0
  34. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_cc_url/not_found.yml +44 -0
  35. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_cc_url/with_trailing_slash.yml +44 -0
  36. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_cc_url/with_trailing_slash_and_to_https.yml +44 -0
  37. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_url/to_https.yml +44 -0
  38. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/normalize_url/with_trailing_slash_and_to_https.yml +44 -0
  39. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/hsh_to_spdx_id.yml +44 -0
  40. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/hsh_to_spdx_not_found.yml +44 -0
  41. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/hsh_to_spdx_url.yml +44 -0
  42. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/name_to_spdx_exists.yml +44 -0
  43. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/name_to_spdx_id.yml +44 -0
  44. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/spdx/name_to_spdx_not_found.yml +44 -0
  45. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/url_to_https/http.yml +44 -0
  46. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/url_to_https/uri.yml +44 -0
  47. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/url_to_https/with_trailing_slash.yml +44 -0
  48. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_schema_org/Funding.yml +58 -10
  49. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_schema_org/Funding_OpenAIRE.yml +65 -28
  50. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_schema_org/Schema_org_JSON_Cyark.yml +98 -0
  51. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_schema_org/Schema_org_JSON_IsSupplementTo.yml +58 -10
  52. data/spec/readers/bibtex_reader_spec.rb +5 -1
  53. data/spec/readers/codemeta_reader_spec.rb +20 -0
  54. data/spec/readers/crossref_reader_spec.rb +21 -5
  55. data/spec/readers/datacite_reader_spec.rb +150 -10
  56. data/spec/readers/schema_org_reader_spec.rb +15 -1
  57. data/spec/utils_spec.rb +108 -0
  58. data/spec/writers/bibtex_writer_spec.rb +6 -0
  59. data/spec/writers/citeproc_writer_spec.rb +10 -0
  60. data/spec/writers/crosscite_writer_spec.rb +5 -1
  61. data/spec/writers/datacite_json_writer_spec.rb +5 -1
  62. data/spec/writers/datacite_writer_spec.rb +30 -6
  63. data/spec/writers/schema_org_writer_spec.rb +31 -3
  64. metadata +51 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 18d7664e5e2bca8c2dbc780ccb7c867bdd0e4347614aa7299f04cb9827097ac2
4
- data.tar.gz: '08a98e78c849527bb4f502522dfb758a856ec6454d4c9be58fbd43bd3d826695'
3
+ metadata.gz: 4a576209bfe96feeeb1e6495da845b4e6526eb2b45304ca136abda3e71f21540
4
+ data.tar.gz: 10efc05b05f546cabdec118f38c39027ba4a0fb6c52cc03f28f75b3d5127243b
5
5
  SHA512:
6
- metadata.gz: 92fbd1272bd5bab24f59eecd979c3333b7864e961189f3ee9686692e2c4fe2d8a1662b91f5c42a41e6924b27fa33e7fa309f7344d8420244f32e8db2afaa1bd2
7
- data.tar.gz: 0cc8738b5dda31898f84a735df09e5eb97c7242d9cc8091071d6b5ed1c718eee2f61b0e7977e051118a9e74b2b0169140cefc04f31aa7dd71ff4763af7d59c14
6
+ metadata.gz: 7e0d9bc3bd09820071226ced2a898eca038282873a67b2b2be22024cc6abf205d428a57e2a01f119dc27aa8a21bee30654c0dd1cc36eab5872de78b482c5b405
7
+ data.tar.gz: 0333ac4f5981ddaa39904a4729c8553f0d67add5a10395b2f32f0d64b39ad58feed240e2ce6009841dc64f514879a31a7a00a64712f3f5bfbfaf161cfde37120
data/.gitignore CHANGED
@@ -55,3 +55,5 @@ coverage/
55
55
  .env.*
56
56
  !.env.example
57
57
  !.env.travis
58
+
59
+ .vscode
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (1.5.21)
4
+ bolognese (1.6.5)
5
5
  activesupport (>= 4.2.5)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (>= 5.1.0)
@@ -11,10 +11,10 @@ PATH
11
11
  concurrent-ruby (~> 1.1, >= 1.1.5)
12
12
  csl-styles (~> 1.0, >= 1.0.1.8)
13
13
  edtf (~> 3.0, >= 3.0.4)
14
- faraday (= 0.17.0)
14
+ faraday (~> 1.0, >= 1.0.1)
15
15
  gender_detector (~> 0.1.2)
16
16
  iso8601 (~> 0.9.1)
17
- json-ld (~> 2.2, >= 2.2.1)
17
+ json-ld (~> 3.1, >= 3.1.4)
18
18
  jsonlint (~> 0.3.0)
19
19
  loofah (~> 2.0, >= 2.0.3)
20
20
  maremma (>= 4.3, < 5)
@@ -25,17 +25,17 @@ PATH
25
25
  postrank-uri (~> 1.0, >= 1.0.18)
26
26
  rdf-rdfxml (~> 3.1)
27
27
  rdf-turtle (~> 3.1)
28
- thor (~> 0.19)
28
+ thor (~> 1.0, >= 1.0.1)
29
29
 
30
30
  GEM
31
31
  remote: https://rubygems.org/
32
32
  specs:
33
- activesupport (6.0.2.2)
33
+ activesupport (6.0.3.2)
34
34
  concurrent-ruby (~> 1.0, >= 1.0.2)
35
35
  i18n (>= 0.7, < 2)
36
36
  minitest (~> 5.1)
37
37
  tzinfo (~> 1.1)
38
- zeitwerk (~> 2.2)
38
+ zeitwerk (~> 2.2, >= 2.2.2)
39
39
  addressable (2.7.0)
40
40
  public_suffix (>= 2.0.2, < 5.0)
41
41
  benchmark_methods (0.7)
@@ -64,12 +64,12 @@ GEM
64
64
  edtf (3.0.5)
65
65
  activesupport (>= 3.0, < 7.0)
66
66
  excon (0.71.1)
67
- faraday (0.17.0)
67
+ faraday (1.0.1)
68
68
  multipart-post (>= 1.2, < 3)
69
69
  faraday-encoding (0.0.5)
70
70
  faraday
71
- faraday_middleware (0.13.1)
72
- faraday (>= 0.7.4, < 1.0)
71
+ faraday_middleware (1.0.0)
72
+ faraday (~> 1.0)
73
73
  gender_detector (0.1.2)
74
74
  unicode_utils (>= 1.3.0)
75
75
  haml (5.1.2)
@@ -79,33 +79,38 @@ GEM
79
79
  concurrent-ruby (~> 1.0)
80
80
  hashdiff (1.0.1)
81
81
  htmlentities (4.3.4)
82
- i18n (1.8.2)
82
+ i18n (1.8.3)
83
83
  concurrent-ruby (~> 1.0)
84
84
  iso8601 (0.9.1)
85
- json-ld (2.2.1)
86
- multi_json (~> 1.12)
87
- rdf (>= 2.2.8, < 4.0)
85
+ json-canonicalization (0.2.0)
86
+ json-ld (3.1.4)
87
+ htmlentities (~> 4.3)
88
+ json-canonicalization (~> 0.2)
89
+ link_header (~> 0.0, >= 0.0.8)
90
+ multi_json (~> 1.14)
91
+ rack (~> 2.0)
92
+ rdf (~> 3.1)
88
93
  jsonlint (0.3.0)
89
94
  oj (~> 3)
90
95
  optimist (~> 3)
91
96
  latex-decode (0.3.1)
92
97
  link_header (0.0.8)
93
- loofah (2.5.0)
98
+ loofah (2.6.0)
94
99
  crass (~> 1.0.2)
95
100
  nokogiri (>= 1.5.9)
96
- maremma (4.7)
101
+ maremma (4.8)
97
102
  activesupport (>= 4.2.5)
98
103
  addressable (>= 2.3.6)
99
104
  builder (~> 3.2, >= 3.2.2)
100
105
  excon (~> 0.71.0)
101
- faraday (= 0.17.0)
106
+ faraday (~> 1.0, >= 1.0.1)
102
107
  faraday-encoding (~> 0.0.4)
103
- faraday_middleware (~> 0.13.1)
108
+ faraday_middleware (~> 1.0)
104
109
  nokogiri (~> 1.10.4)
105
110
  oj (>= 2.8.3)
106
111
  oj_mimic_json (~> 1.0, >= 1.0.1)
107
112
  mini_portile2 (2.4.0)
108
- minitest (5.14.0)
113
+ minitest (5.14.1)
109
114
  multi_json (1.14.1)
110
115
  multipart-post (2.1.1)
111
116
  namae (1.0.1)
@@ -119,29 +124,32 @@ GEM
119
124
  nokogiri (>= 1.8.0)
120
125
  public_suffix (>= 2.0.0, < 2.1)
121
126
  public_suffix (2.0.5)
122
- rack (2.2.2)
127
+ rack (2.2.3)
123
128
  rack-test (0.8.3)
124
129
  rack (>= 1.0, < 3)
125
130
  rake (12.3.3)
126
- rdf (3.1.1)
131
+ rdf (3.1.3)
127
132
  hamster (~> 3.0)
128
133
  link_header (~> 0.0, >= 0.0.8)
129
134
  rdf-aggregate-repo (3.1.0)
130
135
  rdf (~> 3.1)
131
- rdf-rdfa (3.1.0)
136
+ rdf-rdfa (3.1.1)
132
137
  haml (~> 5.1)
133
138
  htmlentities (~> 4.3)
134
- rdf (~> 3.1)
139
+ rdf (~> 3.1, >= 3.1.2)
135
140
  rdf-aggregate-repo (~> 3.1)
141
+ rdf-vocab (~> 3.1, >= 3.1.5)
136
142
  rdf-xsd (~> 3.1)
137
143
  rdf-rdfxml (3.1.0)
138
144
  htmlentities (~> 4.3)
139
145
  rdf (~> 3.1)
140
146
  rdf-rdfa (~> 3.1)
141
147
  rdf-xsd (~> 3.1)
142
- rdf-turtle (3.1.0)
148
+ rdf-turtle (3.1.1)
143
149
  ebnf (~> 1.2)
144
- rdf (~> 3.1)
150
+ rdf (~> 3.1, >= 3.1.2)
151
+ rdf-vocab (3.1.5)
152
+ rdf (~> 3.1, >= 3.1.2)
145
153
  rdf-xsd (3.1.0)
146
154
  rdf (~> 3.1)
147
155
  rspec (3.9.0)
@@ -150,7 +158,7 @@ GEM
150
158
  rspec-mocks (~> 3.9.0)
151
159
  rspec-core (3.9.2)
152
160
  rspec-support (~> 3.9.3)
153
- rspec-expectations (3.9.1)
161
+ rspec-expectations (3.9.2)
154
162
  diff-lcs (>= 1.2.0, < 2.0)
155
163
  rspec-support (~> 3.9.0)
156
164
  rspec-mocks (3.9.1)
@@ -168,7 +176,7 @@ GEM
168
176
  sxp (1.1.0)
169
177
  rdf (~> 3.1)
170
178
  temple (0.8.2)
171
- thor (0.20.3)
179
+ thor (1.0.1)
172
180
  thread_safe (0.3.6)
173
181
  tilt (2.0.10)
174
182
  tzinfo (1.2.7)
@@ -17,13 +17,13 @@ Gem::Specification.new do |s|
17
17
 
18
18
  # Declary dependencies here, rather than in the Gemfile
19
19
  s.add_dependency 'maremma', '>= 4.3', '< 5'
20
- s.add_dependency 'faraday', '0.17.0'
20
+ s.add_dependency 'faraday', '~> 1.0', '>= 1.0.1'
21
21
  s.add_dependency 'nokogiri', '~> 1.10.4'
22
22
  s.add_dependency 'loofah', '~> 2.0', '>= 2.0.3'
23
23
  s.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
24
24
  s.add_dependency 'activesupport', '>= 4.2.5'
25
25
  s.add_dependency 'bibtex-ruby', '>= 5.1.0'
26
- s.add_dependency 'thor', '~> 0.19'
26
+ s.add_dependency 'thor', '~> 1.0', '>= 1.0.1'
27
27
  s.add_dependency 'colorize', '~> 0.8.1'
28
28
  s.add_dependency 'namae', '~> 1.0'
29
29
  s.add_dependency 'edtf', '~> 3.0', '>= 3.0.4'
@@ -31,7 +31,7 @@ Gem::Specification.new do |s|
31
31
  s.add_dependency 'csl-styles', '~> 1.0', '>= 1.0.1.8'
32
32
  s.add_dependency 'iso8601', '~> 0.9.1'
33
33
  s.add_dependency 'postrank-uri', '~> 1.0', '>= 1.0.18'
34
- s.add_dependency 'json-ld', '~> 2.2', '>= 2.2.1'
34
+ s.add_dependency 'json-ld', '~> 3.1', '>= 3.1.4'
35
35
  s.add_dependency 'jsonlint', '~> 0.3.0'
36
36
  s.add_dependency 'oj', '~> 3.10'
37
37
  s.add_dependency "oj_mimic_json", "~> 1.0", ">= 1.0.1"
@@ -232,7 +232,13 @@ module Bolognese
232
232
  r["rightsUri"] = normalize_id(rights)
233
233
  end
234
234
 
235
- attributes = { 'rightsURI' => r["rightsUri"], 'xml:lang' => r["lang"] }.compact
235
+ attributes = {
236
+ "rightsURI" => r["rightsUri"],
237
+ "rightsIdentifier" => r["rightsIdentifier"],
238
+ "rightsIdentifierScheme" => r["rightsIdentifierScheme"],
239
+ "schemeURI" => r["schemeUri"],
240
+ "xml:lang" => r["lang"]
241
+ }.compact
236
242
 
237
243
  xml.rights(r["rights"], attributes)
238
244
  end
@@ -117,7 +117,8 @@ module Bolognese
117
117
 
118
118
  @regenerate = options[:regenerate] || read_options.present?
119
119
  # generate name for method to call dynamically
120
- @meta = @from.present? ? send("read_" + @from, { string: string, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)) : {}
120
+ opts = { string: string, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)
121
+ @meta = @from.present? ? send("read_" + @from, **opts) : {}
121
122
  end
122
123
 
123
124
  def id
@@ -158,6 +158,7 @@ module Bolognese
158
158
  "publisher" => publisher,
159
159
  "title" => parse_attributes(titles, content: "title", first: true),
160
160
  "URL" => url,
161
+ "copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
161
162
  "version" => version_info
162
163
  }.compact.symbolize_keys
163
164
  end
@@ -76,6 +76,7 @@ module Bolognese
76
76
  "dateType" => "Issued" }]
77
77
  end
78
78
  publication_year = meta.try(:date).present? ? meta.date.to_s[0..3] : nil
79
+ rights_list = meta.try(:copyright).present? ? [hsh_to_spdx("rightsURI" => meta[:copyright])] : []
79
80
 
80
81
  { "id" => normalize_doi(doi),
81
82
  "types" => types,
@@ -90,7 +91,7 @@ module Bolognese
90
91
  "dates" => dates,
91
92
  "publication_year" => publication_year,
92
93
  "descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
93
- "rights_list" => meta.try(:copyright).present? ? [{ "rightsUri" => meta.try(:copyright).to_s.presence }.compact] : [],
94
+ "rights_list" => rights_list,
94
95
  "state" => state
95
96
  }.merge(read_options)
96
97
  end
@@ -60,7 +60,7 @@ module Bolognese
60
60
  end
61
61
  publication_year = get_date_from_date_parts(meta.fetch("issued", nil)).to_s[0..3]
62
62
  rights_list = if meta.fetch("copyright", nil)
63
- [{ "rightsUri" => normalize_url(meta.fetch("copyright")) }.compact]
63
+ [hsh_to_spdx("rightsURI" => meta.fetch("copyright"))]
64
64
  end
65
65
  related_identifiers = if meta.fetch("container-title", nil).present? && meta.fetch("ISSN", nil).present?
66
66
  [{ "type" => "Periodical",
@@ -100,8 +100,10 @@ module Bolognese
100
100
  doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
101
101
 
102
102
  state = id.present? || read_options.present? ? "findable" : "not_found"
103
- subjects = Array.wrap(meta.fetch("categories", nil)).map do |s|
104
- { "subject" => s }
103
+ subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
104
+ sum += name_to_fos(subject)
105
+
106
+ sum
105
107
  end
106
108
 
107
109
  { "id" => id,
@@ -57,13 +57,15 @@ module Bolognese
57
57
  "bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
58
58
  "ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
59
59
  }.compact
60
- subjects = Array.wrap(meta.fetch("tags", nil)).map do |s|
61
- { "subject" => s }
60
+ subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
61
+ sum += name_to_fos(subject)
62
+
63
+ sum
62
64
  end
63
65
 
64
66
  has_title = meta.fetch("title", nil)
65
-
66
67
  titles = has_title.nil? ? [{ "title" => meta.fetch("name", nil) }] : [{ "title" => has_title }]
68
+ rights_list = meta.fetch("licenseId", nil).present? ? [hsh_to_spdx("rightsIdentifier" => meta.fetch("licenseId"))] : nil
67
69
 
68
70
  { "id" => id,
69
71
  "types" => types,
@@ -78,7 +80,7 @@ module Bolognese
78
80
  "dates" => dates,
79
81
  "publication_year" => publication_year,
80
82
  "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
81
- "rights_list" => [{ "rightsUri" => meta.fetch("license", nil) }.compact],
83
+ "rights_list" => rights_list,
82
84
  "version_info" => meta.fetch("version", nil),
83
85
  "subjects" => subjects,
84
86
  "state" => state
@@ -211,7 +211,7 @@ module Bolognese
211
211
  access_indicator = Array.wrap(program_metadata).find { |m| m["name"] == "AccessIndicators" }
212
212
  if access_indicator.present?
213
213
  Array.wrap(access_indicator["license_ref"]).map do |license|
214
- { "rightsUri" => normalize_url(parse_attributes(license)) }
214
+ hsh_to_spdx("rightsURI" => parse_attributes(license))
215
215
  end.uniq
216
216
  else
217
217
  []
@@ -93,7 +93,7 @@ module Bolognese
93
93
  "bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
94
94
  "ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
95
95
  }.compact
96
-
96
+
97
97
  titles = Array.wrap(meta.dig("titles", "title")).map do |r|
98
98
  if r.blank?
99
99
  nil
@@ -117,20 +117,22 @@ module Bolognese
117
117
  if r.blank?
118
118
  nil
119
119
  elsif r.is_a?(String)
120
- { "rights" => r }
120
+ name_to_spdx(r)
121
121
  elsif r.is_a?(Hash)
122
- { "rights" => r["__content__"], "rightsUri" => normalize_url(r["rightsURI"]), "lang" => r["lang"] }.compact
122
+ hsh_to_spdx(r)
123
123
  end
124
124
  end.compact
125
- subjects = Array.wrap(meta.dig("subjects", "subject")).map do |k|
126
- if k.blank?
127
- nil
128
- elsif k.is_a?(String)
129
- { "subject" => sanitize(k) }
130
- elsif k.is_a?(Hash)
131
- { "subject" => sanitize(k["__content__"]), "subjectScheme" => k["subjectScheme"], "schemeUri" => k["schemeURI"], "valueUri" => k["valueURI"], "lang" => k["lang"] }.compact
125
+
126
+ subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
127
+ if subject.is_a?(String)
128
+ sum += name_to_fos(subject)
129
+ elsif subject.is_a?(Hash)
130
+ sum += hsh_to_fos(subject)
132
131
  end
133
- end.compact
132
+
133
+ sum
134
+ end.uniq
135
+
134
136
  dates = Array.wrap(meta.dig("dates", "date")).map do |r|
135
137
  if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
136
138
  if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
@@ -164,12 +166,15 @@ module Bolognese
164
166
  funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
165
167
  scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
166
168
  funder_identifier = parse_attributes(fr["funderIdentifier"])
167
- funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
168
-
169
+ funder_identifier_type = parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType")
170
+ if funder_identifier_type != "Other"
171
+ funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
172
+ end
173
+
169
174
  {
170
175
  "funderName" => fr["funderName"],
171
176
  "funderIdentifier" => funder_identifier,
172
- "funderIdentifierType" => parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType"),
177
+ "funderIdentifierType" => funder_identifier_type,
173
178
  "awardNumber" => parse_attributes(fr["awardNumber"]),
174
179
  "awardUri" => parse_attributes(fr["awardNumber"], content: "awardURI"),
175
180
  "awardTitle" => fr["awardTitle"] }.compact
@@ -181,7 +186,7 @@ module Bolognese
181
186
  rid = ri["__content__"]
182
187
  end
183
188
 
184
- {
189
+ {
185
190
  "relatedIdentifier" => rid,
186
191
  "relatedIdentifierType" => ri["relatedIdentifierType"],
187
192
  "relationType" => ri["relationType"],
@@ -82,8 +82,10 @@ module Bolognese
82
82
  nil
83
83
  end
84
84
  state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
85
- subjects = Array.wrap(meta.fetch("KW", nil)).map do |s|
86
- { "subject" => s }
85
+ subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
86
+ sum += name_to_fos(subject)
87
+
88
+ sum
87
89
  end
88
90
 
89
91
  { "id" => id,
@@ -107,10 +107,9 @@ module Bolognese
107
107
  Array.wrap(schema_org_is_supplement_to(meta)) +
108
108
  Array.wrap(schema_org_is_supplemented_by(meta))
109
109
 
110
- rights_list = [{
111
- "rightsUri" => parse_attributes(meta.fetch("license", nil), content: "id", first: true),
112
- "rights" => parse_attributes(meta.fetch("license", nil), content: "name", first: true)
113
- }]
110
+ rights_list = Array.wrap(meta.fetch("license", nil)).compact.map do |rl|
111
+ hsh_to_spdx("__content__" => rl["name"], "rightsURI" => rl["id"])
112
+ end
114
113
 
115
114
  funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
116
115
  if fr["@id"].present?
@@ -150,8 +149,13 @@ module Bolognese
150
149
  "geoLocationBox" => geo_location_box
151
150
  }.compact
152
151
  end
153
- subjects = Array.wrap(meta.fetch("keywords", nil).to_s.split(", ")).map do |s|
154
- { "subject" => s }
152
+
153
+ # handle keywords as array and as comma-separated string
154
+ subjects = meta.fetch("keywords", nil)
155
+ subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
156
+ subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
157
+ sum += name_to_fos(subject)
158
+ sum
155
159
  end
156
160
 
157
161
  { "id" => id,
@@ -2,13 +2,44 @@
2
2
 
3
3
  module Bolognese
4
4
  module Utils
5
- LICENSE_NAMES = {
6
- "http://creativecommons.org/publicdomain/zero/1.0/" => "Public Domain (CC0 1.0)",
7
- "http://creativecommons.org/licenses/by/3.0/" => "Creative Commons Attribution 3.0 (CC-BY 3.0)",
8
- "http://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution 4.0 (CC-BY 4.0)",
9
- "http://creativecommons.org/licenses/by-nc/4.0/" => "Creative Commons Attribution Noncommercial 4.0 (CC-BY-NC 4.0)",
10
- "http://creativecommons.org/licenses/by-sa/4.0/" => "Creative Commons Attribution Share Alike 4.0 (CC-BY-SA 4.0)",
11
- "http://creativecommons.org/licenses/by-nc-nd/4.0/" => "Creative Commons Attribution Noncommercial No Derivatives 4.0 (CC-BY-NC-ND 4.0)"
5
+ NORMALIZED_LICENSES = {
6
+ "https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode",
7
+ "https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode",
8
+ "https://creativecommons.org/licenses/by/2.5" => "https://creativecommons.org/licenses/by/2.5/legalcode",
9
+ "https://creativecommons.org/licenses/by/3.0" => "https://creativecommons.org/licenses/by/3.0/legalcode",
10
+ "https://creativecommons.org/licenses/by/4.0" => "https://creativecommons.org/licenses/by/4.0/legalcode",
11
+ "https://creativecommons.org/licenses/by-nc/1.0" => "https://creativecommons.org/licenses/by-nc/1.0/legalcode",
12
+ "https://creativecommons.org/licenses/by-nc/2.0" => "https://creativecommons.org/licenses/by-nc/2.0/legalcode",
13
+ "https://creativecommons.org/licenses/by-nc/2.5" => "https://creativecommons.org/licenses/by-nc/2.5/legalcode",
14
+ "https://creativecommons.org/licenses/by-nc/3.0" => "https://creativecommons.org/licenses/by-nc/3.0/legalcode",
15
+ "https://creativecommons.org/licenses/by-nc/4.0" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
16
+ "https://creativecommons.org/licenses/by-nd-nc/1.0" => "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode",
17
+ "https://creativecommons.org/licenses/by-nd-nc/2.0" => "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode",
18
+ "https://creativecommons.org/licenses/by-nd-nc/2.5" => "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode",
19
+ "https://creativecommons.org/licenses/by-nd-nc/3.0" => "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode",
20
+ "https://creativecommons.org/licenses/by-nd-nc/4.0" => "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode",
21
+ "https://creativecommons.org/licenses/by-nc-sa/1.0" => "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode",
22
+ "https://creativecommons.org/licenses/by-nc-sa/2.0" => "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode",
23
+ "https://creativecommons.org/licenses/by-nc-sa/2.5" => "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode",
24
+ "https://creativecommons.org/licenses/by-nc-sa/3.0" => "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
25
+ "https://creativecommons.org/licenses/by-nc-sa/4.0" => "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
26
+ "https://creativecommons.org/licenses/by-nd/1.0" => "https://creativecommons.org/licenses/by-nd/1.0/legalcode",
27
+ "https://creativecommons.org/licenses/by-nd/2.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
28
+ "https://creativecommons.org/licenses/by-nd/2.5" => "https://creativecommons.org/licenses/by-nd/2.5/legalcode",
29
+ "https://creativecommons.org/licenses/by-nd/3.0" => "https://creativecommons.org/licenses/by-nd/3.0/legalcode",
30
+ "https://creativecommons.org/licenses/by-nd/4.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
31
+ "https://creativecommons.org/licenses/by-sa/1.0" => "https://creativecommons.org/licenses/by-sa/1.0/legalcode",
32
+ "https://creativecommons.org/licenses/by-sa/2.0" => "https://creativecommons.org/licenses/by-sa/2.0/legalcode",
33
+ "https://creativecommons.org/licenses/by-sa/2.5" => "https://creativecommons.org/licenses/by-sa/2.5/legalcode",
34
+ "https://creativecommons.org/licenses/by-sa/3.0" => "https://creativecommons.org/licenses/by-sa/3.0/legalcode",
35
+ "https://creativecommons.org/licenses/by-sa/4.0" => "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
36
+ "https://creativecommons.org/licenses/by-nc-nd/1.0" => "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode",
37
+ "https://creativecommons.org/licenses/by-nc-nd/2.0" => "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
38
+ "https://creativecommons.org/licenses/by-nc-nd/2.5" => "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode",
39
+ "https://creativecommons.org/licenses/by-nc-nd/3.0" => "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode",
40
+ "https://creativecommons.org/licenses/by-nc-nd/4.0" => "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
41
+ "https://creativecommons.org/licenses/publicdomain" => "https://creativecommons.org/licenses/publicdomain/",
42
+ "https://creativecommons.org/publicdomain/zero/1.0" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
12
43
  }
13
44
 
14
45
  DC_TO_SO_TRANSLATIONS = {
@@ -475,7 +506,7 @@ module Bolognese
475
506
  nil
476
507
  end
477
508
 
478
- def normalize_url(id)
509
+ def normalize_url(id, options={})
479
510
  return nil unless id.present?
480
511
 
481
512
  # handle info URIs
@@ -485,13 +516,23 @@ module Bolognese
485
516
  uri = Addressable::URI.parse(id)
486
517
 
487
518
  return nil unless uri && uri.host && %w(http https ftp).include?(uri.scheme)
519
+
520
+ # optionally turn into https URL
521
+ uri.scheme = "https" if options[:https]
488
522
 
489
523
  # clean up URL
490
- PostRank::URI.clean(id)
524
+ uri.path = PostRank::URI.clean(uri.path)
525
+
526
+ uri.to_s
491
527
  rescue Addressable::URI::InvalidURIError
492
528
  nil
493
529
  end
494
530
 
531
+ def normalize_cc_url(id)
532
+ id = normalize_url(id, https: true)
533
+ NORMALIZED_LICENSES.fetch(id, id)
534
+ end
535
+
495
536
  def normalize_orcid(orcid)
496
537
  orcid = validate_orcid(orcid)
497
538
  return nil unless orcid.present?
@@ -1056,5 +1097,139 @@ module Bolognese
1056
1097
  error_array
1057
1098
  end
1058
1099
 
1100
+ def name_to_spdx(name)
1101
+ spdx = JSON.load(File.read(File.expand_path('../../../resources/spdx/licenses.json', __FILE__))).fetch("licenses")
1102
+ license = spdx.find { |l| l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name) }
1103
+
1104
+ if license
1105
+ {
1106
+ "rights" => license["name"],
1107
+ "rightsUri" => license["seeAlso"].first,
1108
+ "rightsIdentifier" => license["licenseId"],
1109
+ "rightsIdentifierScheme" => "SPDX",
1110
+ "schemeUri" => "https://spdx.org/licenses/" }.compact
1111
+ else
1112
+ { "rights" => name }
1113
+ end
1114
+ end
1115
+
1116
+ def hsh_to_spdx(hsh)
1117
+ spdx = JSON.load(File.read(File.expand_path('../../../resources/spdx/licenses.json', __FILE__))).fetch("licenses")
1118
+ license = spdx.find { |l| l["licenseId"] == hsh["rightsIdentifier"] || l["seeAlso"].first == normalize_cc_url(hsh["rightsURI"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"]) }
1119
+
1120
+ if license
1121
+ {
1122
+ "rights" => license["name"],
1123
+ "rightsUri" => license["seeAlso"].first,
1124
+ "rightsIdentifier" => license["licenseId"],
1125
+ "rightsIdentifierScheme" => "SPDX",
1126
+ "schemeUri" => "https://spdx.org/licenses/",
1127
+ "lang" => hsh["lang"] }.compact
1128
+ else
1129
+ {
1130
+ "rights" => hsh["__content__"],
1131
+ "rightsUri" => hsh["rightsURI"],
1132
+ "rightsIdentifier" => hsh["rightsIdentifier"],
1133
+ "rightsIdentifierScheme" => hsh["rightsIdentifierScheme"],
1134
+ "schemeUri" => hsh["schemeUri"],
1135
+ "lang" => hsh["lang"] }.compact
1136
+ end
1137
+ end
1138
+
1139
+ def name_to_fos(name)
1140
+ # first find subject in Fields of Science (OECD)
1141
+ fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
1142
+
1143
+ subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
1144
+
1145
+ if subject
1146
+ return [{
1147
+ "subject" => sanitize(name) },
1148
+ {
1149
+ "subject" => "FOS: " + subject["fosLabel"],
1150
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1151
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1152
+ }]
1153
+ end
1154
+
1155
+ # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1156
+ # and map to Fields of Science. Add an extra entry for the latter
1157
+ fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
1158
+ for_fields = fores.fetch("forFields")
1159
+ for_disciplines = fores.fetch("forDisciplines")
1160
+
1161
+ subject = for_fields.find { |l| l["forLabel"] == name } ||
1162
+ for_disciplines.find { |l| l["forLabel"] == name }
1163
+
1164
+ if subject
1165
+ [{
1166
+ "subject" => sanitize(name) },
1167
+ {
1168
+ "subject" => "FOS: " + subject["fosLabel"],
1169
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1170
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1171
+ }]
1172
+ else
1173
+ [{ "subject" => sanitize(name) }]
1174
+ end
1175
+ end
1176
+
1177
+ def hsh_to_fos(hsh)
1178
+ # first find subject in Fields of Science (OECD)
1179
+ fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
1180
+ subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] }
1181
+
1182
+ if subject
1183
+ return [{
1184
+ "subject" => sanitize(hsh["__content__"]),
1185
+ "subjectScheme" => hsh["subjectScheme"],
1186
+ "schemeUri" => hsh["schemeURI"],
1187
+ "valueUri" => hsh["valueURI"],
1188
+ "lang" => hsh["lang"] }.compact,
1189
+ {
1190
+ "subject" => "FOS: " + subject["fosLabel"],
1191
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1192
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
1193
+ end
1194
+
1195
+ # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1196
+ # and map to Fields of Science. Add an extra entry for the latter
1197
+ fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
1198
+ for_fields = fores.fetch("forFields")
1199
+ for_disciplines = fores.fetch("forDisciplines")
1200
+
1201
+ # try to extract forId
1202
+ if hsh["subjectScheme"] == "FOR"
1203
+ for_id = hsh["__content__"].split(" ").first
1204
+ for_id = for_id.rjust(6, "0")
1205
+
1206
+ subject = for_fields.find { |l| l["forId"] == for_id } ||
1207
+ for_disciplines.find { |l| l["forId"] == for_id[0..3] }
1208
+ else
1209
+ subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] } ||
1210
+ for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] }
1211
+ end
1212
+
1213
+ if subject
1214
+ [{
1215
+ "subject" => sanitize(hsh["__content__"]),
1216
+ "subjectScheme" => hsh["subjectScheme"],
1217
+ "schemeUri" => hsh["schemeURI"],
1218
+ "valueUri" => hsh["valueURI"],
1219
+ "lang" => hsh["lang"] }.compact,
1220
+ {
1221
+ "subject" => "FOS: " + subject["fosLabel"],
1222
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1223
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1224
+ }]
1225
+ else
1226
+ [{
1227
+ "subject" => sanitize(hsh["__content__"]),
1228
+ "subjectScheme" => hsh["subjectScheme"],
1229
+ "schemeUri" => hsh["schemeURI"],
1230
+ "valueUri" => hsh["valueURI"],
1231
+ "lang" => hsh["lang"] }.compact]
1232
+ end
1233
+ end
1059
1234
  end
1060
1235
  end