commonmeta-ruby 3.0.10 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +37 -36
- data/commonmeta.gemspec +1 -1
- data/lib/commonmeta/metadata_utils.rb +2 -0
- data/lib/commonmeta/readers/json_post_reader.rb +78 -0
- data/lib/commonmeta/utils.rb +19 -0
- data/lib/commonmeta/version.rb +1 -1
- data/spec/author_utils_spec.rb +10 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_item_metadata/blogger_post.yml +94 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_item_metadata/ghost_post.yml +117 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_item_metadata/ghost_post_with_doi.yml +117 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_item_metadata/jekyll_post.yml +170 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_item_metadata/wordpress_post.yml +163 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_post_metadata/blogger_post.yml +94 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_post_metadata/ghost_post_with_doi.yml +117 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_post_metadata/jekyll_post.yml +87 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_post_metadata/wordpress_post.yml +163 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_link/license.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_link/url.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_item_from_rogue_scholar_with_doi.yml +163 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_item_from_upstream_blog.yml +243 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_post_from_rogue_scholar_with_doi.yml +163 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_post_from_upstream_blog.yml +243 -0
- data/spec/readers/json_post_reader_spec.rb +89 -0
- data/spec/utils_spec.rb +330 -314
- data/spec/writers/crossref_xml_writer_spec.rb +183 -137
- metadata +22 -5
data/spec/utils_spec.rb
CHANGED
@@ -1,649 +1,665 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "spec_helper"
|
4
4
|
|
5
5
|
describe Commonmeta::Metadata, vcr: true do
|
6
|
-
subject { described_class.new(input: input, from:
|
6
|
+
subject { described_class.new(input: input, from: "crossref") }
|
7
7
|
|
8
|
-
let(:input) {
|
8
|
+
let(:input) { "https://doi.org/10.1101/097196" }
|
9
9
|
|
10
|
-
context
|
11
|
-
it
|
12
|
-
str =
|
10
|
+
context "validate url" do
|
11
|
+
it "DOI" do
|
12
|
+
str = "https://doi.org/10.5438/0000-00ss"
|
13
13
|
response = subject.validate_url(str)
|
14
|
-
expect(response).to eq(
|
14
|
+
expect(response).to eq("DOI")
|
15
15
|
end
|
16
16
|
|
17
|
-
it
|
18
|
-
str =
|
17
|
+
it "URL" do
|
18
|
+
str = "https://blog.datacite.org/eating-your-own-dog-food"
|
19
19
|
response = subject.validate_url(str)
|
20
|
-
expect(response).to eq(
|
20
|
+
expect(response).to eq("URL")
|
21
21
|
end
|
22
22
|
|
23
|
-
it
|
24
|
-
str =
|
23
|
+
it "ISSN" do
|
24
|
+
str = "ISSN 2050-084X"
|
25
25
|
response = subject.validate_url(str)
|
26
|
-
expect(response).to eq(
|
26
|
+
expect(response).to eq("ISSN")
|
27
27
|
end
|
28
28
|
|
29
|
-
it
|
30
|
-
str =
|
29
|
+
it "string" do
|
30
|
+
str = "eating-your-own-dog-food"
|
31
31
|
response = subject.validate_url(str)
|
32
32
|
expect(response.nil?).to be(true)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
context
|
37
|
-
it
|
38
|
-
orcid =
|
36
|
+
context "validate_orcid" do
|
37
|
+
it "validate_orcid" do
|
38
|
+
orcid = "http://orcid.org/0000-0002-2590-225X"
|
39
39
|
response = subject.validate_orcid(orcid)
|
40
|
-
expect(response).to eq(
|
40
|
+
expect(response).to eq("0000-0002-2590-225X")
|
41
41
|
end
|
42
42
|
|
43
|
-
it
|
44
|
-
orcid =
|
43
|
+
it "validate_orcid https" do
|
44
|
+
orcid = "https://orcid.org/0000-0002-2590-225X"
|
45
45
|
response = subject.validate_orcid(orcid)
|
46
|
-
expect(response).to eq(
|
46
|
+
expect(response).to eq("0000-0002-2590-225X")
|
47
47
|
end
|
48
48
|
|
49
|
-
it
|
50
|
-
orcid =
|
49
|
+
it "validate_orcid id" do
|
50
|
+
orcid = "0000-0002-2590-225X"
|
51
51
|
response = subject.validate_orcid(orcid)
|
52
|
-
expect(response).to eq(
|
52
|
+
expect(response).to eq("0000-0002-2590-225X")
|
53
53
|
end
|
54
54
|
|
55
|
-
it
|
56
|
-
orcid =
|
55
|
+
it "validate_orcid www" do
|
56
|
+
orcid = "http://www.orcid.org/0000-0002-2590-225X"
|
57
57
|
response = subject.validate_orcid(orcid)
|
58
|
-
expect(response).to eq(
|
58
|
+
expect(response).to eq("0000-0002-2590-225X")
|
59
59
|
end
|
60
60
|
|
61
|
-
it
|
62
|
-
orcid =
|
61
|
+
it "validate_orcid with spaces" do
|
62
|
+
orcid = "0000 0002 1394 3097"
|
63
63
|
response = subject.validate_orcid(orcid)
|
64
|
-
expect(response).to eq(
|
64
|
+
expect(response).to eq("0000-0002-1394-3097")
|
65
65
|
end
|
66
66
|
|
67
|
-
it
|
68
|
-
orcid =
|
67
|
+
it "validate_orcid sandbox" do
|
68
|
+
orcid = "http://sandbox.orcid.org/0000-0002-2590-225X"
|
69
69
|
response = subject.validate_orcid(orcid)
|
70
|
-
expect(response).to eq(
|
70
|
+
expect(response).to eq("0000-0002-2590-225X")
|
71
71
|
end
|
72
72
|
|
73
|
-
it
|
74
|
-
orcid =
|
73
|
+
it "validate_orcid sandbox https" do
|
74
|
+
orcid = "https://sandbox.orcid.org/0000-0002-2590-225X"
|
75
75
|
response = subject.validate_orcid(orcid)
|
76
|
-
expect(response).to eq(
|
76
|
+
expect(response).to eq("0000-0002-2590-225X")
|
77
77
|
end
|
78
78
|
|
79
|
-
it
|
80
|
-
orcid =
|
79
|
+
it "validate_orcid wrong id" do
|
80
|
+
orcid = "0000-0002-1394-309"
|
81
81
|
response = subject.validate_orcid(orcid)
|
82
82
|
expect(response.nil?).to be(true)
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
86
|
-
context
|
87
|
-
it
|
88
|
-
orcid =
|
86
|
+
context "validate_orcid_scheme" do
|
87
|
+
it "validate_orcid_scheme" do
|
88
|
+
orcid = "http://orcid.org"
|
89
89
|
response = subject.validate_orcid_scheme(orcid)
|
90
|
-
expect(response).to eq(
|
90
|
+
expect(response).to eq("orcid.org")
|
91
91
|
end
|
92
92
|
|
93
|
-
it
|
94
|
-
orcid =
|
93
|
+
it "validate_orcid_scheme trailing slash" do
|
94
|
+
orcid = "http://orcid.org/"
|
95
95
|
response = subject.validate_orcid_scheme(orcid)
|
96
|
-
expect(response).to eq(
|
96
|
+
expect(response).to eq("orcid.org")
|
97
97
|
end
|
98
98
|
|
99
|
-
it
|
100
|
-
orcid =
|
99
|
+
it "validate_orcid_scheme https" do
|
100
|
+
orcid = "https://orcid.org"
|
101
101
|
response = subject.validate_orcid_scheme(orcid)
|
102
|
-
expect(response).to eq(
|
102
|
+
expect(response).to eq("orcid.org")
|
103
103
|
end
|
104
104
|
|
105
|
-
it
|
106
|
-
orcid =
|
105
|
+
it "validate_orcid_scheme www" do
|
106
|
+
orcid = "http://www.orcid.org"
|
107
107
|
response = subject.validate_orcid_scheme(orcid)
|
108
|
-
expect(response).to eq(
|
108
|
+
expect(response).to eq("orcid.org")
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
-
context
|
113
|
-
it
|
114
|
-
element =
|
112
|
+
context "parse attributes" do
|
113
|
+
it "string" do
|
114
|
+
element = "10.5061/DRYAD.8515"
|
115
115
|
response = subject.parse_attributes(element)
|
116
|
-
expect(response).to eq(
|
116
|
+
expect(response).to eq("10.5061/DRYAD.8515")
|
117
117
|
end
|
118
118
|
|
119
|
-
it
|
120
|
-
element = {
|
119
|
+
it "hash" do
|
120
|
+
element = { "__content__" => "10.5061/DRYAD.8515" }
|
121
121
|
response = subject.parse_attributes(element)
|
122
|
-
expect(response).to eq(
|
122
|
+
expect(response).to eq("10.5061/DRYAD.8515")
|
123
123
|
end
|
124
124
|
|
125
|
-
it
|
126
|
-
element = {
|
125
|
+
it "hash with array value" do
|
126
|
+
element = { "__content__" => ["10.5061/DRYAD.8515", "10.5061/DRYAD.8516"] }
|
127
127
|
response = subject.parse_attributes(element)
|
128
|
-
expect(response).to eq([
|
128
|
+
expect(response).to eq(["10.5061/DRYAD.8515", "10.5061/DRYAD.8516"])
|
129
129
|
end
|
130
130
|
|
131
|
-
it
|
132
|
-
element = [{
|
131
|
+
it "array" do
|
132
|
+
element = [{ "__content__" => "10.5061/DRYAD.8515" }]
|
133
133
|
response = subject.parse_attributes(element)
|
134
|
-
expect(response).to eq(
|
134
|
+
expect(response).to eq("10.5061/DRYAD.8515")
|
135
135
|
end
|
136
136
|
|
137
|
-
it
|
137
|
+
it "array of strings" do
|
138
138
|
element = %w[datacite doi metadata featured]
|
139
139
|
response = subject.parse_attributes(element)
|
140
140
|
expect(response).to eq(%w[datacite doi metadata featured])
|
141
141
|
end
|
142
142
|
|
143
|
-
it
|
143
|
+
it "nil" do
|
144
144
|
element = nil
|
145
145
|
response = subject.parse_attributes(element)
|
146
146
|
expect(response.nil?).to be(true)
|
147
147
|
end
|
148
148
|
|
149
|
-
it
|
150
|
-
element = [{
|
151
|
-
{
|
149
|
+
it "first" do
|
150
|
+
element = [{ "__content__" => "10.5061/DRYAD.8515/1" },
|
151
|
+
{ "__content__" => "10.5061/DRYAD.8515/2" }]
|
152
152
|
response = subject.parse_attributes(element, first: true)
|
153
|
-
expect(response).to eq(
|
153
|
+
expect(response).to eq("10.5061/DRYAD.8515/1")
|
154
154
|
end
|
155
155
|
end
|
156
156
|
|
157
|
-
context
|
158
|
-
it
|
159
|
-
doi =
|
157
|
+
context "normalize id" do
|
158
|
+
it "doi" do
|
159
|
+
doi = "10.5061/DRYAD.8515"
|
160
160
|
response = subject.normalize_id(doi)
|
161
|
-
expect(response).to eq(
|
161
|
+
expect(response).to eq("https://doi.org/10.5061/dryad.8515")
|
162
162
|
end
|
163
163
|
|
164
|
-
it
|
165
|
-
doi =
|
164
|
+
it "doi as url" do
|
165
|
+
doi = "http://dx.doi.org/10.5061/DRYAD.8515"
|
166
166
|
response = subject.normalize_id(doi)
|
167
|
-
expect(response).to eq(
|
167
|
+
expect(response).to eq("https://doi.org/10.5061/dryad.8515")
|
168
168
|
end
|
169
169
|
|
170
|
-
it
|
171
|
-
url =
|
170
|
+
it "url" do
|
171
|
+
url = "https://blog.datacite.org/eating-your-own-dog-food/"
|
172
172
|
response = subject.normalize_id(url)
|
173
|
-
expect(response).to eq(
|
173
|
+
expect(response).to eq("https://blog.datacite.org/eating-your-own-dog-food")
|
174
174
|
end
|
175
175
|
|
176
|
-
it
|
177
|
-
url =
|
176
|
+
it "url with utf-8" do
|
177
|
+
url = "http://www.詹姆斯.com/eating-your-own-dog-food/"
|
178
178
|
response = subject.normalize_id(url)
|
179
|
-
expect(response).to eq(
|
179
|
+
expect(response).to eq("http://www.xn--8ws00zhy3a.com/eating-your-own-dog-food")
|
180
180
|
end
|
181
181
|
|
182
|
-
it
|
183
|
-
url =
|
182
|
+
it "ftp" do
|
183
|
+
url = "ftp://blog.datacite.org/eating-your-own-dog-food/"
|
184
184
|
response = subject.normalize_id(url)
|
185
185
|
expect(response.nil?).to be(true)
|
186
186
|
end
|
187
187
|
|
188
|
-
it
|
189
|
-
url =
|
188
|
+
it "invalid url" do
|
189
|
+
url = "http://"
|
190
190
|
response = subject.normalize_id(url)
|
191
191
|
expect(response.nil?).to be(true)
|
192
192
|
end
|
193
193
|
|
194
|
-
it
|
195
|
-
url =
|
194
|
+
it "string" do
|
195
|
+
url = "eating-your-own-dog-food"
|
196
196
|
response = subject.normalize_id(url)
|
197
197
|
expect(response.nil?).to be(true)
|
198
198
|
end
|
199
199
|
|
200
|
-
it
|
201
|
-
url =
|
200
|
+
it "filename" do
|
201
|
+
url = "crossref.xml"
|
202
202
|
response = subject.normalize_id(url)
|
203
203
|
expect(response.nil?).to be(true)
|
204
204
|
end
|
205
205
|
|
206
|
-
it
|
207
|
-
url =
|
206
|
+
it "sandbox via url" do
|
207
|
+
url = "https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7"
|
208
208
|
response = subject.normalize_id(url)
|
209
|
-
expect(response).to eq(
|
209
|
+
expect(response).to eq("https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7")
|
210
210
|
end
|
211
211
|
|
212
|
-
it
|
213
|
-
url =
|
212
|
+
it "sandbox via options" do
|
213
|
+
url = "10.20375/0000-0001-ddb8-7"
|
214
214
|
response = subject.normalize_id(url, sandbox: true)
|
215
|
-
expect(response).to eq(
|
215
|
+
expect(response).to eq("https://handle.stage.datacite.org/10.20375/0000-0001-ddb8-7")
|
216
216
|
end
|
217
217
|
end
|
218
218
|
|
219
|
-
context
|
220
|
-
it
|
221
|
-
url =
|
219
|
+
context "normalize url" do
|
220
|
+
it "with trailing slash" do
|
221
|
+
url = "http://creativecommons.org/publicdomain/zero/1.0/"
|
222
222
|
response = subject.normalize_url(url)
|
223
|
-
expect(response).to eq(
|
223
|
+
expect(response).to eq("http://creativecommons.org/publicdomain/zero/1.0")
|
224
224
|
end
|
225
225
|
|
226
|
-
it
|
227
|
-
url =
|
226
|
+
it "with trailing slash and to https" do
|
227
|
+
url = "http://creativecommons.org/publicdomain/zero/1.0/"
|
228
228
|
response = subject.normalize_url(url, https: true)
|
229
|
-
expect(response).to eq(
|
229
|
+
expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0")
|
230
230
|
end
|
231
231
|
|
232
|
-
it
|
233
|
-
url =
|
232
|
+
it "uri" do
|
233
|
+
url = "info:eu-repo/semantics/openAccess"
|
234
234
|
response = subject.normalize_url(url)
|
235
|
-
expect(response).to eq(
|
235
|
+
expect(response).to eq("info:eu-repo/semantics/openAccess")
|
236
236
|
end
|
237
237
|
end
|
238
238
|
|
239
|
-
context
|
240
|
-
it
|
241
|
-
url =
|
239
|
+
context "normalize cc url" do
|
240
|
+
it "with trailing slash" do
|
241
|
+
url = "http://creativecommons.org/publicdomain/zero/1.0/"
|
242
242
|
response = subject.normalize_cc_url(url)
|
243
|
-
expect(response).to eq(
|
243
|
+
expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0/legalcode")
|
244
244
|
end
|
245
245
|
|
246
|
-
it
|
247
|
-
url =
|
246
|
+
it "with trailing slash and to https" do
|
247
|
+
url = "http://creativecommons.org/publicdomain/zero/1.0/"
|
248
248
|
response = subject.normalize_cc_url(url)
|
249
|
-
expect(response).to eq(
|
249
|
+
expect(response).to eq("https://creativecommons.org/publicdomain/zero/1.0/legalcode")
|
250
250
|
end
|
251
251
|
|
252
|
-
it
|
253
|
-
url =
|
252
|
+
it "not found" do
|
253
|
+
url = "http://creativecommons.org/publicdomain/zero/2.0/"
|
254
254
|
response = subject.normalize_cc_url(url)
|
255
|
-
expect(response).to eq(
|
255
|
+
expect(response).to eq("https://creativecommons.org/publicdomain/zero/2.0")
|
256
256
|
end
|
257
257
|
end
|
258
258
|
|
259
|
-
context
|
260
|
-
it
|
261
|
-
input = [{
|
262
|
-
{
|
259
|
+
context "normalize issn" do
|
260
|
+
it "from array" do
|
261
|
+
input = [{ "media_type" => "print", "__content__" => "13040855" },
|
262
|
+
{ "media_type" => "electronic", "__content__" => "21468427" }]
|
263
263
|
response = subject.normalize_issn(input)
|
264
|
-
expect(response).to eq(
|
264
|
+
expect(response).to eq("2146-8427")
|
265
265
|
end
|
266
266
|
|
267
|
-
it
|
267
|
+
it "from empty array" do
|
268
268
|
input = []
|
269
269
|
response = subject.normalize_issn(input)
|
270
270
|
expect(response.nil?).to be(true)
|
271
271
|
end
|
272
272
|
|
273
|
-
it
|
274
|
-
input = {
|
273
|
+
it "from hash" do
|
274
|
+
input = { "media_type" => "electronic", "__content__" => "21468427" }
|
275
275
|
response = subject.normalize_issn(input)
|
276
|
-
expect(response).to eq(
|
276
|
+
expect(response).to eq("2146-8427")
|
277
277
|
end
|
278
278
|
|
279
|
-
it
|
280
|
-
input =
|
279
|
+
it "from string" do
|
280
|
+
input = "2146-8427"
|
281
281
|
response = subject.normalize_issn(input)
|
282
|
-
expect(response).to eq(
|
282
|
+
expect(response).to eq("2146-8427")
|
283
283
|
end
|
284
284
|
end
|
285
285
|
|
286
|
-
context
|
287
|
-
it
|
288
|
-
author = {
|
286
|
+
context "to_schema_org" do
|
287
|
+
it "with id" do
|
288
|
+
author = { "type" => "Person", "id" => "http://orcid.org/0000-0003-1419-2405", "givenName" => "Martin", "familyName" => "Fenner", "name" => "Martin Fenner" }
|
289
289
|
response = subject.to_schema_org(author)
|
290
|
-
expect(response).to eq(
|
291
|
-
|
290
|
+
expect(response).to eq("givenName" => "Martin", "familyName" => "Fenner",
|
291
|
+
"name" => "Martin Fenner", "@type" => "Person", "@id" => "http://orcid.org/0000-0003-1419-2405")
|
292
292
|
end
|
293
293
|
end
|
294
294
|
|
295
|
-
context
|
296
|
-
it
|
297
|
-
author = {
|
295
|
+
context "from_schema_org" do
|
296
|
+
it "with @id" do
|
297
|
+
author = { "@type" => "Person", "@id" => "http://orcid.org/0000-0003-1419-2405", "givenName" => "Martin", "familyName" => "Fenner", "name" => "Martin Fenner" }
|
298
298
|
response = subject.from_schema_org(author)
|
299
|
-
expect(response).to eq(
|
300
|
-
|
299
|
+
expect(response).to eq("givenName" => "Martin", "familyName" => "Fenner",
|
300
|
+
"name" => "Martin Fenner", "type" => "Person", "id" => "http://orcid.org/0000-0003-1419-2405")
|
301
301
|
end
|
302
302
|
end
|
303
303
|
|
304
|
-
context
|
305
|
-
it
|
304
|
+
context "to_schema_org_identifiers" do
|
305
|
+
it "with identifiers" do
|
306
306
|
identifiers = [
|
307
|
-
{
|
308
|
-
|
307
|
+
{ "alternateIdentifier" => "https://doi.org/10.23725/8na3-9s47",
|
308
|
+
"alternateIdentifierType" => "DOI" }, { "alternateIdentifierType" => "md5", "alternateIdentifier" => "3b33f6b9338fccab0901b7d317577ea3" }, { "alternateIdentifierType" => "minid", "alternateIdentifier" => "ark:/99999/fk41CrU4eszeLUDe" }, { "alternateIdentifierType" => "dataguid", "alternateIdentifier" => "dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7" },
|
309
309
|
]
|
310
|
-
response = subject.to_schema_org_identifiers(identifiers, type:
|
311
|
-
expect(response).to eq([{
|
312
|
-
{
|
313
|
-
|
314
|
-
|
315
|
-
{
|
316
|
-
|
317
|
-
|
318
|
-
{
|
319
|
-
|
320
|
-
|
310
|
+
response = subject.to_schema_org_identifiers(identifiers, type: "Dataset")
|
311
|
+
expect(response).to eq([{ "@type" => "PropertyValue", "propertyID" => "DOI", "value" => "https://doi.org/10.23725/8na3-9s47" },
|
312
|
+
{ "@type" => "PropertyValue",
|
313
|
+
"propertyID" => "md5",
|
314
|
+
"value" => "3b33f6b9338fccab0901b7d317577ea3" },
|
315
|
+
{ "@type" => "PropertyValue",
|
316
|
+
"propertyID" => "minid",
|
317
|
+
"value" => "ark:/99999/fk41CrU4eszeLUDe" },
|
318
|
+
{ "@type" => "PropertyValue",
|
319
|
+
"propertyID" => "dataguid",
|
320
|
+
"value" => "dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7" }])
|
321
321
|
end
|
322
322
|
end
|
323
323
|
|
324
|
-
context
|
325
|
-
it
|
324
|
+
context "sanitize" do
|
325
|
+
it "removes a tags" do
|
326
326
|
text = 'In 1998 <strong>Tim Berners-Lee</strong> coined the term <a href="https://www.w3.org/Provider/Style/URI">cool URIs</a>'
|
327
327
|
content = subject.sanitize(text)
|
328
|
-
expect(content).to eq(
|
328
|
+
expect(content).to eq("In 1998 <strong>Tim Berners-Lee</strong> coined the term cool URIs")
|
329
329
|
end
|
330
330
|
|
331
|
-
it
|
331
|
+
it "onlies keep specific tags" do
|
332
332
|
text = 'In 1998 <strong>Tim Berners-Lee</strong> coined the term <a href="https://www.w3.org/Provider/Style/URI">cool URIs</a>'
|
333
|
-
content = subject.sanitize(text, tags: [
|
333
|
+
content = subject.sanitize(text, tags: ["a"])
|
334
334
|
expect(content).to eq('In 1998 Tim Berners-Lee coined the term <a href="https://www.w3.org/Provider/Style/URI">cool URIs</a>')
|
335
335
|
end
|
336
336
|
end
|
337
337
|
|
338
|
-
context
|
339
|
-
it
|
340
|
-
time =
|
338
|
+
context "get_datetime_from_time" do
|
339
|
+
it "present" do
|
340
|
+
time = "20200226071709"
|
341
341
|
response = subject.get_datetime_from_time(time)
|
342
|
-
expect(response).to eq(
|
342
|
+
expect(response).to eq("2020-02-26T07:17:09Z")
|
343
343
|
end
|
344
344
|
|
345
|
-
it
|
346
|
-
time =
|
345
|
+
it "past" do
|
346
|
+
time = "18770312071709"
|
347
347
|
response = subject.get_datetime_from_time(time)
|
348
|
-
expect(response).to eq(
|
348
|
+
expect(response).to eq("1877-03-12T07:17:09Z")
|
349
349
|
end
|
350
350
|
|
351
|
-
it
|
352
|
-
time =
|
351
|
+
it "future" do
|
352
|
+
time = "20970114071709"
|
353
353
|
response = subject.get_datetime_from_time(time)
|
354
|
-
expect(response).to eq(
|
354
|
+
expect(response).to eq("2097-01-14T07:17:09Z")
|
355
355
|
end
|
356
356
|
|
357
|
-
it
|
358
|
-
time =
|
357
|
+
it "invalid" do
|
358
|
+
time = "20201587168864794"
|
359
359
|
response = subject.get_datetime_from_time(time)
|
360
360
|
expect(response.nil?).to be(true)
|
361
361
|
end
|
362
362
|
|
363
|
-
it
|
363
|
+
it "nil" do
|
364
364
|
time = nil
|
365
365
|
response = subject.get_datetime_from_time(time)
|
366
366
|
expect(response.nil?).to be(true)
|
367
367
|
end
|
368
368
|
end
|
369
369
|
|
370
|
-
context
|
371
|
-
it
|
372
|
-
date =
|
370
|
+
context "get_date_parts" do
|
371
|
+
it "date" do
|
372
|
+
date = "2016-12-20"
|
373
373
|
response = subject.get_date_parts(date)
|
374
|
-
expect(response).to eq(
|
374
|
+
expect(response).to eq("date-parts" => [[2016, 12, 20]])
|
375
375
|
end
|
376
376
|
|
377
|
-
it
|
378
|
-
date =
|
377
|
+
it "year-month" do
|
378
|
+
date = "2016-12"
|
379
379
|
response = subject.get_date_parts(date)
|
380
|
-
expect(response).to eq(
|
380
|
+
expect(response).to eq("date-parts" => [[2016, 12]])
|
381
381
|
end
|
382
382
|
|
383
|
-
it
|
384
|
-
date =
|
383
|
+
it "year" do
|
384
|
+
date = "2016"
|
385
385
|
response = subject.get_date_parts(date)
|
386
|
-
expect(response).to eq(
|
386
|
+
expect(response).to eq("date-parts" => [[2016]])
|
387
387
|
end
|
388
388
|
end
|
389
389
|
|
390
|
-
context
|
391
|
-
it
|
390
|
+
context "get_date_from_parts" do
|
391
|
+
it "date" do
|
392
392
|
response = subject.get_date_from_parts(2016, 12, 20)
|
393
|
-
expect(response).to eq(
|
393
|
+
expect(response).to eq("2016-12-20")
|
394
394
|
end
|
395
395
|
|
396
|
-
it
|
396
|
+
it "year-month" do
|
397
397
|
response = subject.get_date_from_parts(2016, 12)
|
398
|
-
expect(response).to eq(
|
398
|
+
expect(response).to eq("2016-12")
|
399
399
|
end
|
400
400
|
|
401
|
-
it
|
401
|
+
it "year" do
|
402
402
|
response = subject.get_date_from_parts(2016)
|
403
|
-
expect(response).to eq(
|
403
|
+
expect(response).to eq("2016")
|
404
404
|
end
|
405
405
|
end
|
406
406
|
|
407
|
-
context
|
408
|
-
it
|
409
|
-
date_as_parts = {
|
407
|
+
context "get_date_from_date_parts" do
|
408
|
+
it "date" do
|
409
|
+
date_as_parts = { "date-parts" => [[2016, 12, 20]] }
|
410
410
|
response = subject.get_date_from_date_parts(date_as_parts)
|
411
|
-
expect(response).to eq(
|
411
|
+
expect(response).to eq("2016-12-20")
|
412
412
|
end
|
413
413
|
|
414
|
-
it
|
415
|
-
date_as_parts = {
|
414
|
+
it "year-month" do
|
415
|
+
date_as_parts = { "date-parts" => [[2016, 12]] }
|
416
416
|
response = subject.get_date_from_date_parts(date_as_parts)
|
417
|
-
expect(response).to eq(
|
417
|
+
expect(response).to eq("2016-12")
|
418
418
|
end
|
419
419
|
|
420
|
-
it
|
421
|
-
date_as_parts = {
|
420
|
+
it "year" do
|
421
|
+
date_as_parts = { "date-parts" => [[2016]] }
|
422
422
|
response = subject.get_date_from_date_parts(date_as_parts)
|
423
|
-
expect(response).to eq(
|
423
|
+
expect(response).to eq("2016")
|
424
424
|
end
|
425
425
|
end
|
426
426
|
|
427
|
-
context
|
428
|
-
it
|
429
|
-
dates = [{
|
430
|
-
response = subject.get_date(dates,
|
431
|
-
expect(response).to eq(
|
427
|
+
context "get_date" do
|
428
|
+
it "publication date" do
|
429
|
+
dates = [{ "date" => "2016-12-20", "dateType" => "Issued" }]
|
430
|
+
response = subject.get_date(dates, "Issued")
|
431
|
+
expect(response).to eq("2016-12-20")
|
432
432
|
end
|
433
433
|
end
|
434
434
|
|
435
|
-
context
|
436
|
-
|
435
|
+
context "get_link" do
|
436
|
+
links = [{ "rel" => "self", "type" => "application/atom+xml", "href" => "https://syldavia-gazette.org/atom/" },
|
437
|
+
{ "rel" => "alternate", "type" => "text/html", "href" => "https://syldavia-gazette.org" },
|
438
|
+
{ "rel" => "license", "type" => "text/html", "href" => "https://creativecommons.org/licenses/by/4.0/legalcode" }]
|
439
|
+
|
440
|
+
it "url" do
|
441
|
+
response = subject.get_link(links, "self")
|
442
|
+
expect(response).to eq("https://syldavia-gazette.org/atom/")
|
443
|
+
end
|
444
|
+
|
445
|
+
it "license" do
|
446
|
+
response = subject.get_link(links, "license")
|
447
|
+
expect(response).to eq("https://creativecommons.org/licenses/by/4.0/legalcode")
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
context "get_series_information" do
|
452
|
+
it "only title" do
|
437
453
|
str = nil
|
438
454
|
response = subject.get_series_information(str)
|
439
455
|
expect(response).to eq({})
|
440
456
|
end
|
441
457
|
|
442
|
-
it
|
443
|
-
str =
|
458
|
+
it "only title" do
|
459
|
+
str = "DataCite Blog"
|
444
460
|
response = subject.get_series_information(str)
|
445
|
-
expect(response).to eq(
|
461
|
+
expect(response).to eq("title" => "DataCite Blog")
|
446
462
|
end
|
447
463
|
|
448
|
-
it
|
449
|
-
str =
|
464
|
+
it "title and pages" do
|
465
|
+
str = "DataCite Blog, 1-3"
|
450
466
|
response = subject.get_series_information(str)
|
451
|
-
expect(response).to eq(
|
467
|
+
expect(response).to eq("firstPage" => "1", "lastPage" => "3", "title" => "DataCite Blog")
|
452
468
|
end
|
453
469
|
|
454
|
-
it
|
455
|
-
str =
|
470
|
+
it "title, volume and pages" do
|
471
|
+
str = "DataCite Blog, 7, 1-3"
|
456
472
|
response = subject.get_series_information(str)
|
457
|
-
expect(response).to eq(
|
458
|
-
|
473
|
+
expect(response).to eq("firstPage" => "1", "lastPage" => "3", "title" => "DataCite Blog",
|
474
|
+
"volume" => "7")
|
459
475
|
end
|
460
476
|
|
461
|
-
it
|
462
|
-
str =
|
477
|
+
it "title, volume, issue and pages" do
|
478
|
+
str = "DataCite Blog, 7(11), 1-3"
|
463
479
|
response = subject.get_series_information(str)
|
464
|
-
expect(response).to eq(
|
465
|
-
|
480
|
+
expect(response).to eq("firstPage" => "1", "issue" => "11", "lastPage" => "3",
|
481
|
+
"title" => "DataCite Blog", "volume" => "7")
|
466
482
|
end
|
467
483
|
end
|
468
484
|
|
469
|
-
context
|
470
|
-
it
|
471
|
-
url =
|
485
|
+
context "github" do
|
486
|
+
it "github_from_url" do
|
487
|
+
url = "https://github.com/datacite/bolognese"
|
472
488
|
response = subject.github_from_url(url)
|
473
|
-
expect(response).to eq(owner:
|
489
|
+
expect(response).to eq(owner: "datacite", repo: "bolognese")
|
474
490
|
end
|
475
491
|
|
476
|
-
it
|
477
|
-
url =
|
492
|
+
it "github_from_url file" do
|
493
|
+
url = "https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json"
|
478
494
|
response = subject.github_from_url(url)
|
479
|
-
expect(response).to eq(owner:
|
480
|
-
path:
|
495
|
+
expect(response).to eq(owner: "datacite", repo: "metadata-reports", release: "master",
|
496
|
+
path: "software/codemeta.json")
|
481
497
|
end
|
482
498
|
|
483
|
-
it
|
484
|
-
url =
|
499
|
+
it "github_from_url cff file" do
|
500
|
+
url = "https://github.com/citation-file-format/ruby-cff/blob/main/CITATION.cff"
|
485
501
|
response = subject.github_from_url(url)
|
486
|
-
expect(response).to eq(owner:
|
487
|
-
release:
|
502
|
+
expect(response).to eq(owner: "citation-file-format", path: "CITATION.cff",
|
503
|
+
release: "main", repo: "ruby-cff")
|
488
504
|
end
|
489
505
|
|
490
|
-
it
|
491
|
-
url =
|
506
|
+
it "github_as_codemeta_url" do
|
507
|
+
url = "https://github.com/datacite/bolognese"
|
492
508
|
response = subject.github_as_codemeta_url(url)
|
493
|
-
expect(response).to eq(
|
509
|
+
expect(response).to eq("https://raw.githubusercontent.com/datacite/bolognese/master/codemeta.json")
|
494
510
|
end
|
495
511
|
|
496
|
-
it
|
497
|
-
url =
|
512
|
+
it "github_as_cff_url" do
|
513
|
+
url = "https://github.com/citation-file-format/ruby-cff"
|
498
514
|
response = subject.github_as_cff_url(url)
|
499
|
-
expect(response).to eq(
|
515
|
+
expect(response).to eq("https://raw.githubusercontent.com/citation-file-format/ruby-cff/main/CITATION.cff")
|
500
516
|
end
|
501
517
|
|
502
|
-
it
|
503
|
-
url =
|
518
|
+
it "github_from_url file" do
|
519
|
+
url = "https://github.com/datacite/metadata-reports/blob/master/software/codemeta.json"
|
504
520
|
response = subject.github_as_codemeta_url(url)
|
505
|
-
expect(response).to eq(
|
521
|
+
expect(response).to eq("https://raw.githubusercontent.com/datacite/metadata-reports/master/software/codemeta.json")
|
506
522
|
end
|
507
523
|
end
|
508
524
|
|
509
|
-
context
|
510
|
-
it
|
511
|
-
name =
|
525
|
+
context "spdx" do
|
526
|
+
it "name_to_spdx exists" do
|
527
|
+
name = "Creative Commons Attribution 4.0 International"
|
512
528
|
response = subject.name_to_spdx(name)
|
513
|
-
expect(response).to eq(
|
514
|
-
|
529
|
+
expect(response).to eq("id" => "CC-BY-4.0",
|
530
|
+
"url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
|
515
531
|
end
|
516
532
|
|
517
|
-
it
|
518
|
-
name =
|
533
|
+
it "name_to_spdx id" do
|
534
|
+
name = "CC-BY-4.0"
|
519
535
|
response = subject.name_to_spdx(name)
|
520
|
-
expect(response).to eq(
|
521
|
-
|
536
|
+
expect(response).to eq("id" => "CC-BY-4.0",
|
537
|
+
"url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
|
522
538
|
end
|
523
539
|
|
524
|
-
it
|
525
|
-
hsh = {
|
540
|
+
it "hsh_to_spdx id" do
|
541
|
+
hsh = { "rightsIdentifier" => "CC-BY-4.0" }
|
526
542
|
response = subject.hsh_to_spdx(hsh)
|
527
|
-
expect(response).to eq(
|
528
|
-
|
543
|
+
expect(response).to eq("id" => "CC-BY-4.0",
|
544
|
+
"url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
|
529
545
|
end
|
530
546
|
|
531
|
-
it
|
532
|
-
hsh = {
|
547
|
+
it "hsh_to_spdx url" do
|
548
|
+
hsh = { "rightsURI" => "http://creativecommons.org/licenses/by-nc/4.0/legalcode" }
|
533
549
|
response = subject.hsh_to_spdx(hsh)
|
534
550
|
expect(response).to eq(
|
535
|
-
|
536
|
-
|
551
|
+
"id" => "CC-BY-NC-4.0",
|
552
|
+
"url" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
|
537
553
|
)
|
538
554
|
end
|
539
555
|
|
540
|
-
it
|
541
|
-
hsh = {
|
556
|
+
it "hsh_to_spdx not found" do
|
557
|
+
hsh = { "rightsURI" => "info:eu-repo/semantics/openAccess" }
|
542
558
|
response = subject.hsh_to_spdx(hsh)
|
543
|
-
expect(response).to eq({
|
559
|
+
expect(response).to eq({ "url" => "info:eu-repo/semantics/openAccess" })
|
544
560
|
end
|
545
561
|
end
|
546
562
|
|
547
|
-
context
|
548
|
-
it
|
549
|
-
name =
|
563
|
+
context "fos" do
|
564
|
+
it "name_to_fos match" do
|
565
|
+
name = "Biological sciences"
|
550
566
|
response = subject.name_to_fos(name)
|
551
|
-
expect(response).to eq([{
|
552
|
-
{
|
553
|
-
|
554
|
-
|
567
|
+
expect(response).to eq([{ "subject" => "biological sciences" },
|
568
|
+
{ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
569
|
+
"subject" => "FOS: Biological sciences",
|
570
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)" }])
|
555
571
|
end
|
556
572
|
|
557
|
-
it
|
558
|
-
name =
|
573
|
+
it "name_to_fos for match" do
|
574
|
+
name = "Statistics"
|
559
575
|
response = subject.name_to_fos(name)
|
560
|
-
expect(response).to eq([{
|
561
|
-
{
|
562
|
-
|
563
|
-
|
576
|
+
expect(response).to eq([{ "subject" => "statistics" },
|
577
|
+
{ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
578
|
+
"subject" => "FOS: Mathematics",
|
579
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)" }])
|
564
580
|
end
|
565
581
|
|
566
|
-
it
|
567
|
-
name =
|
582
|
+
it "name_to_fos no match" do
|
583
|
+
name = "Random tag"
|
568
584
|
response = subject.name_to_fos(name)
|
569
|
-
expect(response).to eq([{
|
585
|
+
expect(response).to eq([{ "subject" => "random tag" }])
|
570
586
|
end
|
571
587
|
|
572
|
-
it
|
573
|
-
hsh = {
|
588
|
+
it "hsh_to_fos match" do
|
589
|
+
hsh = { "__content__" => "Biological sciences" }
|
574
590
|
response = subject.hsh_to_fos(hsh)
|
575
|
-
expect(response).to eq([{
|
576
|
-
{
|
577
|
-
|
578
|
-
|
591
|
+
expect(response).to eq([{ "subject" => "Biological sciences" },
|
592
|
+
{ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
593
|
+
"subject" => "FOS: Biological sciences",
|
594
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)" }])
|
579
595
|
end
|
580
596
|
|
581
|
-
it
|
582
|
-
hsh = {
|
597
|
+
it "hsh_to_fos for match" do
|
598
|
+
hsh = { "__content__" => "Statistics" }
|
583
599
|
response = subject.hsh_to_fos(hsh)
|
584
|
-
expect(response).to eq([{
|
585
|
-
{
|
586
|
-
|
587
|
-
|
600
|
+
expect(response).to eq([{ "subject" => "Statistics" },
|
601
|
+
{ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
602
|
+
"subject" => "FOS: Mathematics",
|
603
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)" }])
|
588
604
|
end
|
589
605
|
|
590
|
-
it
|
606
|
+
it "hsh_to_fos for with schemeUri in hash" do
|
591
607
|
hsh = {
|
592
|
-
|
593
|
-
|
594
|
-
|
608
|
+
"subject" => "FOS: Computer and information sciences",
|
609
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
610
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
595
611
|
}
|
596
612
|
response = subject.hsh_to_fos(hsh)
|
597
613
|
expect(response).to eq([{
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
614
|
+
"subject" => "FOS: Computer and information sciences",
|
615
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
616
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
617
|
+
}])
|
602
618
|
end
|
603
619
|
|
604
|
-
it
|
605
|
-
hsh = {
|
620
|
+
it "hsh_to_fos no match" do
|
621
|
+
hsh = { "__content__" => "Random tag" }
|
606
622
|
response = subject.hsh_to_fos(hsh)
|
607
|
-
expect(response).to eq([{
|
623
|
+
expect(response).to eq([{ "subject" => "Random tag" }])
|
608
624
|
end
|
609
625
|
end
|
610
626
|
|
611
|
-
context
|
612
|
-
it
|
613
|
-
prefix =
|
627
|
+
context "random doi" do
|
628
|
+
it "encode doi" do
|
629
|
+
prefix = "10.53731"
|
614
630
|
response = subject.encode_doi(prefix)
|
615
631
|
expect(response).to match(%r{#{prefix}/[a-z0-9]+})
|
616
632
|
expect(response.length).to eq(36)
|
617
633
|
end
|
618
634
|
|
619
|
-
it
|
620
|
-
doi =
|
635
|
+
it "decode doi" do
|
636
|
+
doi = "https://doi.org/10.53731/revzwnv-rpd913d-8drwz"
|
621
637
|
response = subject.decode_doi(doi)
|
622
638
|
expect(response).to eq(30_286_005_717_401_267_192_153_432_991)
|
623
639
|
end
|
624
640
|
|
625
|
-
it
|
626
|
-
doi =
|
641
|
+
it "decode another doi" do
|
642
|
+
doi = "https://doi.org/10.53731/rckvde5-tzg61kj-7zvc1"
|
627
643
|
response = subject.decode_doi(doi)
|
628
644
|
expect(response).to eq(30_198_793_950_250_854_133_601_922_433)
|
629
645
|
end
|
630
646
|
end
|
631
647
|
|
632
|
-
context
|
633
|
-
it
|
648
|
+
context "random id" do
|
649
|
+
it "encode id" do
|
634
650
|
response = subject.encode_container_id
|
635
651
|
expect(response).to match(%r{[a-z0-9]+})
|
636
652
|
expect(response.length).to eq(7)
|
637
653
|
end
|
638
654
|
|
639
|
-
it
|
640
|
-
id=
|
655
|
+
it "decode id" do
|
656
|
+
id = "4425y27"
|
641
657
|
response = subject.decode_container_id(id)
|
642
658
|
expect(response).to eq(4_431_476_807)
|
643
659
|
end
|
644
660
|
|
645
|
-
it
|
646
|
-
id =
|
661
|
+
it "decode another id" do
|
662
|
+
id = "gr1by89"
|
647
663
|
response = subject.decode_container_id(id)
|
648
664
|
expect(response).to eq(17_986_615_561)
|
649
665
|
end
|