cirneco 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/lib/cirneco/data_center.rb +5 -0
- data/lib/cirneco/doi.rb +6 -1
- data/lib/cirneco/file_utils.rb +368 -0
- data/lib/cirneco/media.rb +3 -1
- data/lib/cirneco/metadata.rb +2 -0
- data/lib/cirneco/utils.rb +5 -379
- data/lib/cirneco/version.rb +1 -1
- data/lib/cirneco/work.rb +2 -0
- data/spec/api_spec.rb +3 -3
- data/spec/doi_spec.rb +7 -7
- data/spec/file_utils_spec.rb +203 -0
- data/spec/fixtures/cool-dois-minted/index.html +1 -1
- data/spec/fixtures/cool-dois-minted.html.md +1 -1
- data/spec/fixtures/cool-dois.html.md +1 -1
- data/spec/fixtures/index-minted.html +1 -1
- data/spec/fixtures/index.html +2 -2
- data/spec/fixtures/index.html.erb +1 -1
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/jats/should_generate_jats_for_all_urls.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_all_urls.yml +3 -44
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_all_urls.yml +4 -374
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_all_urls.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/jats/writes_jats_for_list_of_urls.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_dois_for_list_of_urls.yml +4 -47
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_dois_for_list_of_urls.yml +4 -374
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_dois_for_list_of_urls.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_all_dois.yml +16 -577
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi.yml +15 -58
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi_not_found.yml +16 -57
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/username_missing.yml +16 -16
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/put/should_put_doi.yml +16 -63
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/get/should_get_media.yml +16 -61
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/post/should_post_media.yml +17 -62
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/delete/should_delete_metadata.yml +17 -60
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/get/should_get_metadata.yml +17 -104
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/post/should_post_metadata.yml +17 -108
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/media/includes_media.yml +17 -17
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/schema/BlogPosting.yml +15 -15
- data/spec/utils_spec.rb +9 -186
- data/spec/work_spec.rb +1 -1
- metadata +4 -12
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml +0 -562
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_url.yml +0 -46
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_metadata_for_work.yml +0 -46
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_url.yml +0 -182
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_post_metadata_for_work.yml +0 -139
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_all_dois.yml +0 -564
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_doi.yml +0 -46
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/put/should_put_doi.yml +0 -50
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_a_doi.yml +0 -46
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_a_doi.yml +0 -182
data/lib/cirneco/utils.rb
CHANGED
@@ -9,13 +9,6 @@ module Cirneco
|
|
9
9
|
# "ZZZZZZZ" decoded as number
|
10
10
|
UPPER_LIMIT = 34359738367
|
11
11
|
|
12
|
-
JATS_SCHEMA = File.expand_path("../../../resources/jats-1.1/JATS-journalpublishing1.xsd", __FILE__)
|
13
|
-
|
14
|
-
LICENSES = {
|
15
|
-
"https://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution (CC-BY 4.0)",
|
16
|
-
"https://creativecommons.org/publicdomain/zero/1.0/" => "Creative Commons Public Domain Dedication (CC0 1.0)"
|
17
|
-
}
|
18
|
-
|
19
12
|
def get_dois_by_prefix(prefix, options={})
|
20
13
|
response = get_dois(options)
|
21
14
|
|
@@ -33,7 +26,11 @@ module Cirneco
|
|
33
26
|
def encode_doi(prefix, options={})
|
34
27
|
number = options[:number].to_s.scan(/\d+/).first.to_i
|
35
28
|
number = SecureRandom.random_number(UPPER_LIMIT) unless number > 0
|
36
|
-
|
29
|
+
shoulder = options[:shoulder].to_s
|
30
|
+
|
31
|
+
length = shoulder.length > 0 ? 6 : 8
|
32
|
+
split = shoulder.length > 0 ? nil: 4
|
33
|
+
prefix.to_s + "/" + shoulder + Base32::Crockford.encode(number, split: split, length: length, checksum: true).downcase
|
37
34
|
end
|
38
35
|
|
39
36
|
def generate_accession_number(options={})
|
@@ -60,376 +57,5 @@ module Cirneco
|
|
60
57
|
|
61
58
|
namespace + number
|
62
59
|
end
|
63
|
-
|
64
|
-
def get_accession_number(filepath)
|
65
|
-
metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
66
|
-
metadata.fetch("accession_number", nil).to_s.scan(/\d+/).first.to_i
|
67
|
-
end
|
68
|
-
|
69
|
-
def get_all_accession_numbers(folderpath)
|
70
|
-
Dir.glob("#{folderpath}/*.md").sort.map do |filepath|
|
71
|
-
get_accession_number(filepath)
|
72
|
-
end.select { |a| a > 0 }.sort
|
73
|
-
end
|
74
|
-
|
75
|
-
def update_accession_number(filepath, options={})
|
76
|
-
filename = File.basename(filepath)
|
77
|
-
return "File #{filename} ignored: not a markdown or html file" unless %w(.md .html .erb).include?(File.extname(filepath))
|
78
|
-
|
79
|
-
old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
80
|
-
return "File #{filename} ignored: no yaml front matter" unless old_metadata.present?
|
81
|
-
|
82
|
-
return "File #{filename} ignored: no empty accession_number" if options[:opt_in] && !old_metadata.key?("accession_number")
|
83
|
-
|
84
|
-
return "Accession number #{old_metadata["accession_number"]} not changed for #{filename}" if old_metadata["accession_number"]
|
85
|
-
|
86
|
-
if old_metadata["doi"].present?
|
87
|
-
number = decode_doi(old_metadata["doi"])
|
88
|
-
options[:number] = number if number > 0
|
89
|
-
else
|
90
|
-
folderpath = File.dirname(filepath)
|
91
|
-
options[:registered_numbers] = get_all_accession_numbers(folderpath)
|
92
|
-
end
|
93
|
-
|
94
|
-
accession_number = generate_accession_number(options)
|
95
|
-
|
96
|
-
new_metadata = Bergamasco::Markdown.update_file(filepath, "accession_number" => accession_number)
|
97
|
-
"Accession number #{new_metadata["accession_number"]} generated for #{filename}"
|
98
|
-
end
|
99
|
-
|
100
|
-
def update_all_accession_numbers(folderpath, options={})
|
101
|
-
Dir.glob("#{folderpath}/*.md").sort.map do |filepath|
|
102
|
-
update_accession_number(filepath, options)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
# fetch schema.org metadata in JSON-LD format to mint DOI
|
107
|
-
def mint_doi_for_url(url, options={})
|
108
|
-
filename, build_path, source_path = filepath_from_url(url, options)
|
109
|
-
|
110
|
-
json = get_json_ld_from_work(build_path, options)
|
111
|
-
metadata = JSON.parse(json)
|
112
|
-
return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["dateModified"] == metadata["datePublished"] && options[:force].blank?
|
113
|
-
|
114
|
-
prefix = options[:prefix] || ENV['PREFIX']
|
115
|
-
doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
116
|
-
|
117
|
-
new_metadata = Bergamasco::Markdown.update_file(source_path, "doi" => doi, "date" => metadata["dateModified"])
|
118
|
-
"DOI #{new_metadata["doi"]} minted for #{filename}"
|
119
|
-
end
|
120
|
-
|
121
|
-
# fetch schema.org metadata in JSON-LD format to mint DOI
|
122
|
-
def mint_and_hide_doi_for_url(url, options={})
|
123
|
-
filename, build_path, source_path = filepath_from_url(url, options)
|
124
|
-
|
125
|
-
json = get_json_ld_from_work(build_path, options)
|
126
|
-
metadata = JSON.parse(json)
|
127
|
-
metadata["doi"] = doi_from_url(metadata["@id"])
|
128
|
-
return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["dateModified"] == metadata["datePublished"] && options[:force].blank?
|
129
|
-
|
130
|
-
prefix = options[:prefix] || ENV['PREFIX']
|
131
|
-
metadata["doi"] = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
132
|
-
|
133
|
-
response = post_metadata_for_work(json, options.merge(doi: metadata["doi"]))
|
134
|
-
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
|
135
|
-
|
136
|
-
response = hide_metadata_for_work(json, options.merge(doi: metadata["doi"]))
|
137
|
-
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
|
138
|
-
|
139
|
-
new_metadata = Bergamasco::Markdown.update_file(source_path, "published" => false)
|
140
|
-
"DOI #{metadata["doi"]} minted and hidden for #{filename}"
|
141
|
-
end
|
142
|
-
|
143
|
-
# fetch schema.org metadata in JSON-LD format to mint DOI
|
144
|
-
# DOIs are never deleted, but we can remove the metadata from the DataCite index
|
145
|
-
def hide_doi_for_url(url, options={})
|
146
|
-
filename, build_path, source_path = filepath_from_url(url, options)
|
147
|
-
|
148
|
-
json = get_json_ld_from_work(build_path, options)
|
149
|
-
metadata = JSON.parse(json)
|
150
|
-
metadata["doi"] = doi_from_url(metadata["@id"])
|
151
|
-
return "No DOI for #{filename}" unless metadata["doi"]
|
152
|
-
return "DOI #{metadata["doi"]} not active for #{filename}" unless metadata["datePublished"] || options[:force].present?
|
153
|
-
|
154
|
-
response = hide_metadata_for_work(json, options)
|
155
|
-
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
|
156
|
-
|
157
|
-
new_metadata = Bergamasco::Markdown.update_file(source_path, "published" => false)
|
158
|
-
"DOI #{metadata["doi"]} hidden for #{filename}"
|
159
|
-
end
|
160
|
-
|
161
|
-
def mint_dois_for_all_urls(url, options={})
|
162
|
-
urls = get_urls_for_works(url)
|
163
|
-
urls.map do |u|
|
164
|
-
mint_doi_for_url(u, options)
|
165
|
-
end.join("\n")
|
166
|
-
end
|
167
|
-
|
168
|
-
def mint_and_hide_dois_for_all_urls(url, options={})
|
169
|
-
urls = get_urls_for_works(url)
|
170
|
-
urls.map do |u|
|
171
|
-
mint_and_hide_doi_for_url(u, options)
|
172
|
-
end.join("\n")
|
173
|
-
end
|
174
|
-
|
175
|
-
def hide_dois_for_all_urls(url, options={})
|
176
|
-
urls = get_urls_for_works(url)
|
177
|
-
urls.map do |u|
|
178
|
-
hide_doi_for_url(u, options)
|
179
|
-
end.join("\n")
|
180
|
-
end
|
181
|
-
|
182
|
-
def get_urls_for_works(url)
|
183
|
-
doc = Nokogiri::HTML(open(url))
|
184
|
-
json = doc.at_xpath("//script[@type='application/ld+json']")
|
185
|
-
return [url] unless json.present?
|
186
|
-
|
187
|
-
metadata = ActiveSupport::JSON.decode(json.text)
|
188
|
-
metadata.fetch("hasPart", []).map { |p| p["@id"] } + [url]
|
189
|
-
end
|
190
|
-
|
191
|
-
def get_json_ld_from_work(url, options={})
|
192
|
-
doc = Nokogiri::HTML(open(url))
|
193
|
-
json = doc.at_xpath("//script[@type='application/ld+json']")
|
194
|
-
return { "error" => "Error: no schema.org metadata found" } unless json.present?
|
195
|
-
|
196
|
-
json.text
|
197
|
-
end
|
198
|
-
|
199
|
-
def post_metadata_for_work(input, options={})
|
200
|
-
metadata = JSON.parse(input)
|
201
|
-
|
202
|
-
prefix = options[:prefix] || ENV['PREFIX']
|
203
|
-
doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
204
|
-
|
205
|
-
work = Cirneco::Work.new(input: input, doi: doi)
|
206
|
-
return work.errors if work.errors.present?
|
207
|
-
|
208
|
-
response = work.post_metadata(work.datacite, options)
|
209
|
-
return response unless response.status == 201
|
210
|
-
|
211
|
-
response = work.put_doi(work.doi, options.merge(url: metadata["url"]))
|
212
|
-
return response unless response.status == 201
|
213
|
-
|
214
|
-
if work.media.present?
|
215
|
-
work.post_media(metadata.doi, options.merge(media: work.media))
|
216
|
-
else
|
217
|
-
response
|
218
|
-
end
|
219
|
-
end
|
220
|
-
|
221
|
-
def post_and_hide_metadata_for_work(input, options={})
|
222
|
-
metadata = JSON.parse(input)
|
223
|
-
|
224
|
-
prefix = options[:prefix] || ENV['PREFIX']
|
225
|
-
doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
226
|
-
|
227
|
-
work = Cirneco::Work.new(input: input, doi: doi)
|
228
|
-
return work.errors if work.errors.present?
|
229
|
-
|
230
|
-
response = work.post_metadata(work.datacite, options)
|
231
|
-
return response unless response.status == 201
|
232
|
-
|
233
|
-
response = work.put_doi(work.doi, options.merge(url: metadata["url"]))
|
234
|
-
return response unless response.status == 201
|
235
|
-
|
236
|
-
response = work.delete_metadata(doi, options)
|
237
|
-
return response unless response.status == 201
|
238
|
-
|
239
|
-
if work.media.present?
|
240
|
-
work.post_media(metadata["doi"], options.merge(media: work.media))
|
241
|
-
else
|
242
|
-
response
|
243
|
-
end
|
244
|
-
end
|
245
|
-
|
246
|
-
def hide_metadata_for_work(input, options={})
|
247
|
-
metadata = JSON.parse(input)
|
248
|
-
metadata["doi"] = doi_from_url(metadata["@id"])
|
249
|
-
|
250
|
-
prefix = options[:prefix] || ENV['PREFIX']
|
251
|
-
metadata["doi"] = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
252
|
-
|
253
|
-
work = Cirneco::Work.new(input: input, doi: metadata["doi"])
|
254
|
-
return work.errors if work.errors.present?
|
255
|
-
|
256
|
-
work.delete_metadata(metadata["doi"], options)
|
257
|
-
end
|
258
|
-
|
259
|
-
def generate_metadata_for_jats(url, options={})
|
260
|
-
doc = Nokogiri::HTML(open(url))
|
261
|
-
json = doc.at_xpath("//script[@type='application/ld+json']")
|
262
|
-
return { "error" => "Error: no schema.org metadata found" } unless json.present?
|
263
|
-
|
264
|
-
metadata = ActiveSupport::JSON.decode(json.text)
|
265
|
-
return { "error" => "Error: blog post not published" } if metadata["published"].to_s == "false"
|
266
|
-
return { "error" => "Error: required metadata missing" } unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
|
267
|
-
|
268
|
-
# required metadata
|
269
|
-
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
|
270
|
-
uri = Addressable::URI.parse(metadata["@id"])
|
271
|
-
metadata["doi"] = uri.path[1..-1].upcase
|
272
|
-
end
|
273
|
-
|
274
|
-
metadata["title"] = metadata["name"]
|
275
|
-
metadata["author"] = format_authors(metadata["author"]).map do |a|
|
276
|
-
{ "given_name" => a[:given_name],
|
277
|
-
"family_name" => a[:family_name],
|
278
|
-
"name" => a[:name],
|
279
|
-
"orcid" => a[:orcid] }.compact
|
280
|
-
end
|
281
|
-
|
282
|
-
metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
|
283
|
-
metadata["tags"] = metadata["keywords"].to_s.split(", ").select { |k| k != "featured" }
|
284
|
-
metadata["date"] = metadata.fetch("datePublished", "")
|
285
|
-
metadata["publication_year"] = metadata.fetch("date", "")[0..3].to_i
|
286
|
-
metadata["publication_month"] = metadata.fetch("date", "")[5..6].to_i
|
287
|
-
metadata["publication_day"] = metadata.fetch("date", "")[8..9].to_i
|
288
|
-
|
289
|
-
# metadata["accession_number"] = metadata["alternateName"]
|
290
|
-
metadata["journal_title"] = metadata.fetch("isPartOf", {}).fetch("name", nil)
|
291
|
-
|
292
|
-
if metadata["description"].present?
|
293
|
-
metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
|
294
|
-
end
|
295
|
-
|
296
|
-
# use default version 1.0
|
297
|
-
metadata["version"] ||= "1.0"
|
298
|
-
|
299
|
-
# fetch reference metadata if available
|
300
|
-
metadata["related_identifiers"] = get_related_identifiers(metadata)
|
301
|
-
|
302
|
-
if metadata["license"].present?
|
303
|
-
metadata["license_name"] = LICENSES.fetch(metadata["license"], nil)
|
304
|
-
metadata["license_url"] = metadata["license"]
|
305
|
-
end
|
306
|
-
|
307
|
-
metadata = metadata.extract!(*%w(publisher doi tags title author date
|
308
|
-
publication_year publication_month publication_day license_name
|
309
|
-
license_url accession_number journal_title))
|
310
|
-
end
|
311
|
-
|
312
|
-
def generate_jats_for_url(url, options={})
|
313
|
-
filename, build_path, source_path = filepath_from_url(url, options)
|
314
|
-
metadata = generate_metadata_for_jats(build_path, options)
|
315
|
-
return "No JATS XML written for #{filename}" if metadata["published"].to_s == "false"
|
316
|
-
|
317
|
-
file = IO.read(source_path)
|
318
|
-
content = Bergamasco::Markdown.split_yaml_frontmatter(file).last
|
319
|
-
text = Bergamasco::Markdown.join_yaml_frontmatter(metadata, content)
|
320
|
-
|
321
|
-
xml = Bergamasco::Pandoc.convert_to_jats(text, options)
|
322
|
-
xmlname = filename.gsub(/\.html\.(erb|md)/, ".xml")
|
323
|
-
xmlpath = build_path.gsub("index.html", xmlname)
|
324
|
-
IO.write(xmlpath, xml)
|
325
|
-
|
326
|
-
"JATS XML written for #{filename}"
|
327
|
-
end
|
328
|
-
|
329
|
-
def generate_jats_for_all_urls(url, options={})
|
330
|
-
urls = get_urls_for_works(url)
|
331
|
-
urls.map do |u|
|
332
|
-
generate_jats_for_url(u, options)
|
333
|
-
end.join("\n")
|
334
|
-
end
|
335
|
-
|
336
|
-
def validate_jats(xml)
|
337
|
-
schema = Nokogiri::XML::Schema(open(JATS_SCHEMA))
|
338
|
-
puts schema.validate(Nokogiri::XML(xml))
|
339
|
-
|
340
|
-
OpenStruct.new(body: { "errors" => schema.validate(Nokogiri::XML(xml)).map { |error| { "title" => error.to_s } } })
|
341
|
-
end
|
342
|
-
|
343
|
-
def url_from_path(site_url, filepath)
|
344
|
-
site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
|
345
|
-
end
|
346
|
-
|
347
|
-
def format_authors(authors)
|
348
|
-
Array(authors).map do |author|
|
349
|
-
orcid = orcid_from_url(author["@id"])
|
350
|
-
name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
|
351
|
-
|
352
|
-
{ :given_name => author["givenName"],
|
353
|
-
:family_name => author["familyName"],
|
354
|
-
:name => name,
|
355
|
-
:orcid => orcid }.compact
|
356
|
-
end
|
357
|
-
end
|
358
|
-
|
359
|
-
def get_related_identifiers(metadata)
|
360
|
-
citations = Array(metadata["citation"])
|
361
|
-
parent = [metadata["isPartOf"]].compact.map { |r| r.merge("relation_type" => "IsPartOf") }
|
362
|
-
children = Array(metadata["hasPart"]).map { |r| r.merge("relation_type" => "HasPart") }
|
363
|
-
|
364
|
-
(citations + parent + children).map do |r|
|
365
|
-
id = r.fetch("@id", "")
|
366
|
-
relation_type = r.fetch("relation_type", "References")
|
367
|
-
|
368
|
-
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(id)
|
369
|
-
uri = Addressable::URI.parse(id)
|
370
|
-
value = uri.path[1..-1].upcase
|
371
|
-
type = "DOI"
|
372
|
-
elsif /(http|https):\/\//.match(id)
|
373
|
-
uri = Addressable::URI.parse(id)
|
374
|
-
value = uri.normalize.to_s
|
375
|
-
type = "URL"
|
376
|
-
else
|
377
|
-
type = nil
|
378
|
-
end
|
379
|
-
|
380
|
-
{
|
381
|
-
value: value,
|
382
|
-
related_identifier_type: type,
|
383
|
-
relation_type: relation_type
|
384
|
-
}
|
385
|
-
end.select { |t| t[:related_identifier_type].present? }
|
386
|
-
end
|
387
|
-
|
388
|
-
def format_media(metadata)
|
389
|
-
[metadata["encoding"]].compact.map do |m|
|
390
|
-
{ mime_type: m["fileFormat"],
|
391
|
-
url: m["@id"] }
|
392
|
-
end
|
393
|
-
end
|
394
|
-
|
395
|
-
def filepath_from_url(url, options={})
|
396
|
-
if doi_from_url(url)
|
397
|
-
response = Maremma.head(url, limit: 0)
|
398
|
-
url = response.headers.present? ? response.headers.fetch("Location", "") : ""
|
399
|
-
end
|
400
|
-
|
401
|
-
uri = Addressable::URI.parse(url.gsub(Dir.pwd + options[:build_dir].to_s, "")
|
402
|
-
.gsub(/index\.html$/, "")
|
403
|
-
.gsub(/\/$/, ""))
|
404
|
-
basename = File.basename(uri.path, ".html").presence || "index"
|
405
|
-
|
406
|
-
if basename.starts_with?("index")
|
407
|
-
filename = basename + ".html.erb"
|
408
|
-
source_path = Dir.pwd + options[:source_dir].to_s + filename
|
409
|
-
build_path = Dir.pwd + options[:build_dir].to_s + basename + ".html"
|
410
|
-
else
|
411
|
-
filename = basename + ".html.md"
|
412
|
-
source_path = Dir.pwd + options[:source_dir].to_s + options[:posts_dir].to_s + filename
|
413
|
-
build_path = Dir.pwd + options[:build_dir].to_s + basename + "/index.html"
|
414
|
-
end
|
415
|
-
[filename, build_path, source_path]
|
416
|
-
end
|
417
|
-
|
418
|
-
def doi_from_url(url)
|
419
|
-
return nil unless url.present?
|
420
|
-
|
421
|
-
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(url)
|
422
|
-
uri = Addressable::URI.parse(url)
|
423
|
-
uri.path[1..-1].upcase
|
424
|
-
elsif url.starts_with?("doi:")
|
425
|
-
url[4..-1].upcase
|
426
|
-
end
|
427
|
-
end
|
428
|
-
|
429
|
-
def orcid_from_url(url)
|
430
|
-
return nil unless url.present?
|
431
|
-
|
432
|
-
Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last
|
433
|
-
end
|
434
60
|
end
|
435
61
|
end
|
data/lib/cirneco/version.rb
CHANGED
data/lib/cirneco/work.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'bolognese'
|
2
2
|
require_relative 'api'
|
3
3
|
require_relative 'utils'
|
4
|
+
require_relative 'file_utils'
|
4
5
|
require_relative 'base'
|
5
6
|
|
6
7
|
module Cirneco
|
@@ -8,6 +9,7 @@ module Cirneco
|
|
8
9
|
include Cirneco::Base
|
9
10
|
include Cirneco::Api
|
10
11
|
include Cirneco::Utils
|
12
|
+
include Cirneco::FileUtils
|
11
13
|
|
12
14
|
attr_reader :media, :username, :password
|
13
15
|
|
data/spec/api_spec.rb
CHANGED
@@ -18,9 +18,9 @@ describe Cirneco::Work, vcr: true, :order => :defined do
|
|
18
18
|
context "post" do
|
19
19
|
it 'should post metadata' do
|
20
20
|
response = subject.post_metadata(subject.datacite, options)
|
21
|
-
expect(response.body["data"]).to eq("OK (10.5438/
|
21
|
+
expect(response.body["data"]).to eq("OK (10.5438/0000-01hc)")
|
22
22
|
expect(response.status).to eq(201)
|
23
|
-
expect(response.headers["Location"]).to eq("http://mds.test.datacite.org/metadata/10.5438/
|
23
|
+
expect(response.headers["Location"]).to eq("http://mds.test.datacite.org/metadata/10.5438/0000-01hc")
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
@@ -54,7 +54,7 @@ describe Cirneco::Work, vcr: true, :order => :defined do
|
|
54
54
|
it 'should get all dois' do
|
55
55
|
response = subject.get_dois(options)
|
56
56
|
dois = response.body["data"]
|
57
|
-
expect(dois.length).to eq(
|
57
|
+
expect(dois.length).to eq(519)
|
58
58
|
expect(dois.first).to eq("10.23725/0000-03VC")
|
59
59
|
end
|
60
60
|
|
data/spec/doi_spec.rb
CHANGED
@@ -64,7 +64,7 @@ describe Cirneco::Doi do
|
|
64
64
|
context "base32" do
|
65
65
|
it 'generates a doi' do
|
66
66
|
subject.options = { number: number, prefix: prefix }
|
67
|
-
expect { subject.generate }.to output("10.5438/0000-
|
67
|
+
expect { subject.generate }.to output("10.5438/0000-03vc\n").to_stdout
|
68
68
|
end
|
69
69
|
|
70
70
|
it 'requires a prefix' do
|
@@ -89,37 +89,37 @@ describe Cirneco::Doi do
|
|
89
89
|
context "mint and hide DOIs", vcr: true, :order => :defined do
|
90
90
|
it 'mints a doi' do
|
91
91
|
subject.options = mint_options
|
92
|
-
expect { subject.mint filepath }.to output("DOI 10.5438/0000-
|
92
|
+
expect { subject.mint filepath }.to output("DOI 10.5438/0000-03vc minted for cool-dois.html.md\n").to_stdout
|
93
93
|
end
|
94
94
|
|
95
95
|
it 'hides a doi' do
|
96
96
|
filename = 'cool-dois-minted/index.html'
|
97
97
|
filepath = fixture_path + filename
|
98
98
|
subject.options = mint_options.merge(filepath: filepath)
|
99
|
-
expect { subject.hide filepath }.to output("DOI 10.5438/
|
99
|
+
expect { subject.hide filepath }.to output("DOI 10.5438/55e5-t5c0 hidden for cool-dois-minted.html.md\n").to_stdout
|
100
100
|
end
|
101
101
|
|
102
102
|
it 'mints and hides a doi' do
|
103
103
|
subject.options = mint_options
|
104
|
-
expect { subject.mint_and_hide filepath }.to output("DOI 10.5438/0000-
|
104
|
+
expect { subject.mint_and_hide filepath }.to output("DOI 10.5438/0000-03vc minted and hidden for cool-dois.html.md\n").to_stdout
|
105
105
|
end
|
106
106
|
|
107
107
|
it 'mints dois for list of urls' do
|
108
108
|
filepath = fixture_path + 'index.html'
|
109
109
|
subject.options = mint_options
|
110
|
-
expect { subject.mint filepath }.to output("DOI 10.5438/0000-
|
110
|
+
expect { subject.mint filepath }.to output("DOI 10.5438/0000-03vc minted for cool-dois.html.md\nDOI 10.5438/0000-00ss minted for index.html.erb\n").to_stdout
|
111
111
|
end
|
112
112
|
|
113
113
|
it 'hides dois for list of urls' do
|
114
114
|
filepath = fixture_path + 'index.html'
|
115
115
|
subject.options = mint_options
|
116
|
-
expect { subject.hide filepath }.to output("No DOI for cool-dois.html.md\nDOI 10.5438/0000-
|
116
|
+
expect { subject.hide filepath }.to output("No DOI for cool-dois.html.md\nDOI 10.5438/0000-00ss hidden for index.html.erb\n").to_stdout
|
117
117
|
end
|
118
118
|
|
119
119
|
it 'mints and hides dois for list of urls' do
|
120
120
|
filepath = fixture_path + 'index.html'
|
121
121
|
subject.options = mint_options
|
122
|
-
expect { subject.mint_and_hide filepath }.to output("DOI 10.5438/0000-
|
122
|
+
expect { subject.mint_and_hide filepath }.to output("DOI 10.5438/0000-03vc minted and hidden for cool-dois.html.md\nDOI 10.5438/0000-00ss minted and hidden for index.html.erb\n").to_stdout
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|