cirneco 0.9.13 → 0.9.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/lib/cirneco/data_center.rb +5 -0
  4. data/lib/cirneco/doi.rb +6 -1
  5. data/lib/cirneco/file_utils.rb +368 -0
  6. data/lib/cirneco/media.rb +3 -1
  7. data/lib/cirneco/metadata.rb +2 -0
  8. data/lib/cirneco/utils.rb +5 -379
  9. data/lib/cirneco/version.rb +1 -1
  10. data/lib/cirneco/work.rb +2 -0
  11. data/spec/api_spec.rb +3 -3
  12. data/spec/doi_spec.rb +7 -7
  13. data/spec/file_utils_spec.rb +203 -0
  14. data/spec/fixtures/cool-dois-minted/index.html +1 -1
  15. data/spec/fixtures/cool-dois-minted.html.md +1 -1
  16. data/spec/fixtures/cool-dois.html.md +1 -1
  17. data/spec/fixtures/index-minted.html +1 -1
  18. data/spec/fixtures/index.html +2 -2
  19. data/spec/fixtures/index.html.erb +1 -1
  20. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/jats/should_generate_jats_for_all_urls.yml +4 -4
  21. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_all_urls.yml +3 -44
  22. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_all_urls.yml +4 -374
  23. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_all_urls.yml +4 -4
  24. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/jats/writes_jats_for_list_of_urls.yml +4 -4
  25. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_dois_for_list_of_urls.yml +4 -47
  26. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_dois_for_list_of_urls.yml +4 -374
  27. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_dois_for_list_of_urls.yml +4 -4
  28. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_all_dois.yml +16 -577
  29. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi.yml +15 -58
  30. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi_not_found.yml +16 -57
  31. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/username_missing.yml +16 -16
  32. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/put/should_put_doi.yml +16 -63
  33. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/get/should_get_media.yml +16 -61
  34. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/post/should_post_media.yml +17 -62
  35. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/delete/should_delete_metadata.yml +17 -60
  36. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/get/should_get_metadata.yml +17 -104
  37. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/post/should_post_metadata.yml +17 -108
  38. data/spec/fixtures/vcr_cassettes/Cirneco_Work/media/includes_media.yml +17 -17
  39. data/spec/fixtures/vcr_cassettes/Cirneco_Work/schema/BlogPosting.yml +15 -15
  40. data/spec/utils_spec.rb +9 -186
  41. data/spec/work_spec.rb +1 -1
  42. metadata +4 -12
  43. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml +0 -562
  44. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_url.yml +0 -46
  45. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_metadata_for_work.yml +0 -46
  46. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_url.yml +0 -182
  47. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_post_metadata_for_work.yml +0 -139
  48. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_all_dois.yml +0 -564
  49. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_doi.yml +0 -46
  50. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/put/should_put_doi.yml +0 -50
  51. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_a_doi.yml +0 -46
  52. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_a_doi.yml +0 -182
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26264768a88f4fa5359062dc47a64fd378baee79
4
- data.tar.gz: f6a57d8575940385bd04000497800080be5f6216
3
+ metadata.gz: ca288d421311719334b7a9d38849732549f5f2d3
4
+ data.tar.gz: 5d9b99265570d9ab485b463e9287f9d5e16fdd01
5
5
  SHA512:
6
- metadata.gz: 7ec100120cb41af67f14bdf17c70a82e819b24ccf7897f35238336c348ae6fab1773ecb582b42375a9daecc0905d25f758677bd06c672d320c8010e3c5813ada
7
- data.tar.gz: 120c30769c64f9f173e3d1a9d2239c17c2e288cd17f83986c85d501fa288d3c35945101b76f4d1f874d7d8ace745acc54c6f09dca76ce8ef48e0bd4bcbbf59d4
6
+ metadata.gz: c604f57226dbc7ff543575bf1226605ac887bfb070d0789755fcbe6f0d146d90370798f3618f82e9ec35002a0b520d6a84fb3ca6712698acfec0e4fe6696f305
7
+ data.tar.gz: 93f67ce51ff86fe67be5c24f61c9bae6a22318e747015e12f6a87978ef3179554bd88337f47c5c245a9570efd91844d32a4e0c1105555cbfeedcd23458f89263
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cirneco (0.9.13)
4
+ cirneco (0.9.14)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  base32-crockford-checksum (~> 0.2.2)
7
7
  bergamasco (~> 0.3)
@@ -36,7 +36,7 @@ GEM
36
36
  safe_yaml (~> 1.0, >= 1.0.4)
37
37
  bibtex-ruby (4.4.4)
38
38
  latex-decode (~> 0.0)
39
- bolognese (0.9.32)
39
+ bolognese (0.9.34)
40
40
  activesupport (>= 4.2.5, < 6)
41
41
  benchmark_methods (~> 0.7)
42
42
  bibtex-ruby (~> 4.1)
@@ -1,8 +1,10 @@
1
1
  require 'active_support/all'
2
2
  require 'nokogiri'
3
+ require 'bolognese'
3
4
 
4
5
  require_relative 'api'
5
6
  require_relative 'utils'
7
+ require_relative 'file_utils'
6
8
  require_relative 'base'
7
9
 
8
10
  module Cirneco
@@ -10,6 +12,9 @@ module Cirneco
10
12
  include Cirneco::Base
11
13
  include Cirneco::Api
12
14
  include Cirneco::Utils
15
+ include Cirneco::FileUtils
16
+ include Bolognese::Utils
17
+ include Bolognese::DoiUtils
13
18
 
14
19
  attr_accessor :prefix, :username, :password
15
20
 
data/lib/cirneco/doi.rb CHANGED
@@ -1,7 +1,9 @@
1
- require "thor"
1
+ require 'thor'
2
+ require 'bolognese'
2
3
 
3
4
  require_relative 'api'
4
5
  require_relative 'utils'
6
+ require_relative 'file_utils'
5
7
  require_relative 'base'
6
8
 
7
9
  module Cirneco
@@ -9,6 +11,9 @@ module Cirneco
9
11
  include Cirneco::Base
10
12
  include Cirneco::Api
11
13
  include Cirneco::Utils
14
+ include Cirneco::FileUtils
15
+ include Bolognese::Utils
16
+ include Bolognese::DoiUtils
12
17
 
13
18
  desc "get DOI", "get handle url for DOI"
14
19
  method_option :username, :default => ENV['MDS_USERNAME']
@@ -0,0 +1,368 @@
1
+ require 'bergamasco'
2
+ require 'open-uri'
3
+ require 'time'
4
+
5
+ module Cirneco
6
+ module FileUtils
7
+ JATS_SCHEMA = File.expand_path("../../../resources/jats-1.1/JATS-journalpublishing1.xsd", __FILE__)
8
+
9
+ LICENSES = {
10
+ "https://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution (CC-BY 4.0)",
11
+ "https://creativecommons.org/publicdomain/zero/1.0/" => "Creative Commons Public Domain Dedication (CC0 1.0)"
12
+ }
13
+
14
+ def get_accession_number(filepath)
15
+ metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
16
+ metadata.fetch("accession_number", nil).to_s.scan(/\d+/).first.to_i
17
+ end
18
+
19
+ def get_all_accession_numbers(folderpath)
20
+ Dir.glob("#{folderpath}/*.md").sort.map do |filepath|
21
+ get_accession_number(filepath)
22
+ end.select { |a| a > 0 }.sort
23
+ end
24
+
25
+ def update_accession_number(filepath, options={})
26
+ filename = File.basename(filepath)
27
+ return "File #{filename} ignored: not a markdown or html file" unless %w(.md .html .erb).include?(File.extname(filepath))
28
+
29
+ old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
30
+ return "File #{filename} ignored: no yaml front matter" unless old_metadata.present?
31
+
32
+ return "File #{filename} ignored: no empty accession_number" if options[:opt_in] && !old_metadata.key?("accession_number")
33
+
34
+ return "Accession number #{old_metadata["accession_number"]} not changed for #{filename}" if old_metadata["accession_number"]
35
+
36
+ if old_metadata["doi"].present?
37
+ number = decode_doi(old_metadata["doi"])
38
+ options[:number] = number if number > 0
39
+ else
40
+ folderpath = File.dirname(filepath)
41
+ options[:registered_numbers] = get_all_accession_numbers(folderpath)
42
+ end
43
+
44
+ accession_number = generate_accession_number(options)
45
+
46
+ new_metadata = Bergamasco::Markdown.update_file(filepath, "accession_number" => accession_number)
47
+ "Accession number #{new_metadata["accession_number"]} generated for #{filename}"
48
+ end
49
+
50
+ def update_all_accession_numbers(folderpath, options={})
51
+ Dir.glob("#{folderpath}/*.md").sort.map do |filepath|
52
+ update_accession_number(filepath, options)
53
+ end
54
+ end
55
+
56
+ # fetch schema.org metadata in JSON-LD format to mint DOI
57
+ def mint_doi_for_url(url, options={})
58
+ filename, build_path, source_path = filepath_from_url(url, options)
59
+
60
+ json = get_json_ld_from_work(build_path, options)
61
+ metadata = JSON.parse(json)
62
+ return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["dateModified"] == metadata["datePublished"] && options[:force].blank?
63
+
64
+ prefix = options[:prefix] || ENV['PREFIX']
65
+ doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
66
+
67
+ new_metadata = Bergamasco::Markdown.update_file(source_path, "doi" => doi, "date" => metadata["dateModified"])
68
+ "DOI #{new_metadata["doi"]} minted for #{filename}"
69
+ end
70
+
71
+ # fetch schema.org metadata in JSON-LD format to mint DOI
72
+ def mint_and_hide_doi_for_url(url, options={})
73
+ filename, build_path, source_path = filepath_from_url(url, options)
74
+
75
+ json = get_json_ld_from_work(build_path, options)
76
+ metadata = JSON.parse(json)
77
+ metadata["doi"] = doi_from_url(metadata["@id"])
78
+ return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["dateModified"] == metadata["datePublished"] && options[:force].blank?
79
+
80
+ prefix = options[:prefix] || ENV['PREFIX']
81
+ metadata["doi"] = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
82
+
83
+ response = post_metadata_for_work(json, options.merge(doi: metadata["doi"]))
84
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
85
+
86
+ response = hide_metadata_for_work(json, options.merge(doi: metadata["doi"]))
87
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
88
+
89
+ new_metadata = Bergamasco::Markdown.update_file(source_path, "published" => false)
90
+ "DOI #{metadata["doi"]} minted and hidden for #{filename}"
91
+ end
92
+
93
+ # fetch schema.org metadata in JSON-LD format to mint DOI
94
+ # DOIs are never deleted, but we can remove the metadata from the DataCite index
95
+ def hide_doi_for_url(url, options={})
96
+ filename, build_path, source_path = filepath_from_url(url, options)
97
+
98
+ json = get_json_ld_from_work(build_path, options)
99
+ metadata = JSON.parse(json)
100
+ metadata["doi"] = doi_from_url(metadata["@id"])
101
+ return "No DOI for #{filename}" unless metadata["doi"]
102
+ return "DOI #{metadata["doi"]} not active for #{filename}" unless metadata["datePublished"] || options[:force].present?
103
+
104
+ response = hide_metadata_for_work(json, options)
105
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
106
+
107
+ new_metadata = Bergamasco::Markdown.update_file(source_path, "published" => false)
108
+ "DOI #{metadata["doi"]} hidden for #{filename}"
109
+ end
110
+
111
+ def mint_dois_for_all_urls(url, options={})
112
+ urls = get_urls_for_works(url)
113
+ urls.map do |u|
114
+ mint_doi_for_url(u, options)
115
+ end.join("\n")
116
+ end
117
+
118
+ def mint_and_hide_dois_for_all_urls(url, options={})
119
+ urls = get_urls_for_works(url)
120
+ urls.map do |u|
121
+ mint_and_hide_doi_for_url(u, options)
122
+ end.join("\n")
123
+ end
124
+
125
+ def hide_dois_for_all_urls(url, options={})
126
+ urls = get_urls_for_works(url)
127
+ urls.map do |u|
128
+ hide_doi_for_url(u, options)
129
+ end.join("\n")
130
+ end
131
+
132
+ def get_urls_for_works(url)
133
+ doc = Nokogiri::HTML(open(url))
134
+ json = doc.at_xpath("//script[@type='application/ld+json']")
135
+ return [url] unless json.present?
136
+
137
+ metadata = ActiveSupport::JSON.decode(json.text)
138
+ metadata.fetch("hasPart", []).map { |p| p["@id"] } + [url]
139
+ end
140
+
141
+ def get_json_ld_from_work(url, options={})
142
+ doc = Nokogiri::HTML(open(url))
143
+ json = doc.at_xpath("//script[@type='application/ld+json']")
144
+ return { "error" => "Error: no schema.org metadata found" } unless json.present?
145
+
146
+ json.text
147
+ end
148
+
149
+ def post_metadata_for_work(input, options={})
150
+ metadata = JSON.parse(input)
151
+
152
+ prefix = options[:prefix] || ENV['PREFIX']
153
+ doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
154
+
155
+ work = Cirneco::Work.new(input: input, doi: doi)
156
+ return work.errors if work.errors.present?
157
+
158
+ response = work.post_metadata(work.datacite, options)
159
+ return response unless response.status == 201
160
+
161
+ response = work.put_doi(work.doi, options.merge(url: metadata["url"]))
162
+ return response unless response.status == 201
163
+
164
+ if work.media.present?
165
+ work.post_media(metadata.doi, options.merge(media: work.media))
166
+ else
167
+ response
168
+ end
169
+ end
170
+
171
+ def post_and_hide_metadata_for_work(input, options={})
172
+ metadata = JSON.parse(input)
173
+
174
+ prefix = options[:prefix] || ENV['PREFIX']
175
+ doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
176
+
177
+ work = Cirneco::Work.new(input: input, doi: doi)
178
+ return work.errors if work.errors.present?
179
+
180
+ response = work.post_metadata(work.datacite, options)
181
+ return response unless response.status == 201
182
+
183
+ response = work.put_doi(work.doi, options.merge(url: metadata["url"]))
184
+ return response unless response.status == 201
185
+
186
+ response = work.delete_metadata(doi, options)
187
+ return response unless response.status == 201
188
+
189
+ if work.media.present?
190
+ work.post_media(metadata["doi"], options.merge(media: work.media))
191
+ else
192
+ response
193
+ end
194
+ end
195
+
196
+ def hide_metadata_for_work(input, options={})
197
+ metadata = JSON.parse(input)
198
+ metadata["doi"] = doi_from_url(metadata["@id"])
199
+
200
+ prefix = options[:prefix] || ENV['PREFIX']
201
+ metadata["doi"] = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
202
+
203
+ work = Cirneco::Work.new(input: input, doi: metadata["doi"])
204
+ return work.errors if work.errors.present?
205
+
206
+ work.delete_metadata(metadata["doi"], options)
207
+ end
208
+
209
+ def generate_metadata_for_jats(url, options={})
210
+ doc = Nokogiri::HTML(open(url))
211
+ json = doc.at_xpath("//script[@type='application/ld+json']")
212
+ return { "error" => "Error: no schema.org metadata found" } unless json.present?
213
+
214
+ metadata = ActiveSupport::JSON.decode(json.text)
215
+ return { "error" => "Error: blog post not published" } if metadata["published"].to_s == "false"
216
+ return { "error" => "Error: required metadata missing" } unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
217
+
218
+ # required metadata
219
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
220
+ uri = Addressable::URI.parse(metadata["@id"])
221
+ metadata["doi"] = uri.path[1..-1].upcase
222
+ end
223
+
224
+ metadata["title"] = metadata["name"]
225
+ metadata["author"] = format_authors(metadata["author"]).map do |a|
226
+ { "given_name" => a[:given_name],
227
+ "family_name" => a[:family_name],
228
+ "name" => a[:name],
229
+ "orcid" => a[:orcid] }.compact
230
+ end
231
+
232
+ metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
233
+ metadata["tags"] = metadata["keywords"].to_s.split(", ").select { |k| k != "featured" }
234
+ metadata["date"] = metadata.fetch("datePublished", "")
235
+ metadata["publication_year"] = metadata.fetch("date", "")[0..3].to_i
236
+ metadata["publication_month"] = metadata.fetch("date", "")[5..6].to_i
237
+ metadata["publication_day"] = metadata.fetch("date", "")[8..9].to_i
238
+
239
+ # metadata["accession_number"] = metadata["alternateName"]
240
+ metadata["journal_title"] = metadata.fetch("isPartOf", {}).fetch("name", nil)
241
+
242
+ if metadata["description"].present?
243
+ metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
244
+ end
245
+
246
+ # use default version 1.0
247
+ metadata["version"] ||= "1.0"
248
+
249
+ # fetch reference metadata if available
250
+ metadata["related_identifiers"] = get_related_identifiers(metadata)
251
+
252
+ if metadata["license"].present?
253
+ metadata["license_name"] = LICENSES.fetch(metadata["license"], nil)
254
+ metadata["license_url"] = metadata["license"]
255
+ end
256
+
257
+ metadata = metadata.extract!(*%w(publisher doi tags title author date
258
+ publication_year publication_month publication_day license_name
259
+ license_url accession_number journal_title))
260
+ end
261
+
262
+ def generate_jats_for_url(url, options={})
263
+ filename, build_path, source_path = filepath_from_url(url, options)
264
+ metadata = generate_metadata_for_jats(build_path, options)
265
+ return "No JATS XML written for #{filename}" if metadata["published"].to_s == "false"
266
+
267
+ file = IO.read(source_path)
268
+ content = Bergamasco::Markdown.split_yaml_frontmatter(file).last
269
+ text = Bergamasco::Markdown.join_yaml_frontmatter(metadata, content)
270
+
271
+ xml = Bergamasco::Pandoc.convert_to_jats(text, options)
272
+ xmlname = filename.gsub(/\.html\.(erb|md)/, ".xml")
273
+ xmlpath = build_path.gsub("index.html", xmlname)
274
+ IO.write(xmlpath, xml)
275
+
276
+ "JATS XML written for #{filename}"
277
+ end
278
+
279
+ def generate_jats_for_all_urls(url, options={})
280
+ urls = get_urls_for_works(url)
281
+ urls.map do |u|
282
+ generate_jats_for_url(u, options)
283
+ end.join("\n")
284
+ end
285
+
286
+ def validate_jats(xml)
287
+ schema = Nokogiri::XML::Schema(open(JATS_SCHEMA))
288
+ puts schema.validate(Nokogiri::XML(xml))
289
+
290
+ OpenStruct.new(body: { "errors" => schema.validate(Nokogiri::XML(xml)).map { |error| { "title" => error.to_s } } })
291
+ end
292
+
293
+ def url_from_path(site_url, filepath)
294
+ site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
295
+ end
296
+
297
+ def format_authors(authors)
298
+ Array(authors).map do |author|
299
+ orcid = validate_orcid(author["@id"])
300
+ name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
301
+
302
+ { :given_name => author["givenName"],
303
+ :family_name => author["familyName"],
304
+ :name => name,
305
+ :orcid => orcid }.compact
306
+ end
307
+ end
308
+
309
+ def get_related_identifiers(metadata)
310
+ citations = Array(metadata["citation"])
311
+ parent = [metadata["isPartOf"]].compact.map { |r| r.merge("relation_type" => "IsPartOf") }
312
+ children = Array(metadata["hasPart"]).map { |r| r.merge("relation_type" => "HasPart") }
313
+
314
+ (citations + parent + children).map do |r|
315
+ id = r.fetch("@id", "")
316
+ relation_type = r.fetch("relation_type", "References")
317
+
318
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(id)
319
+ uri = Addressable::URI.parse(id)
320
+ value = uri.path[1..-1].upcase
321
+ type = "DOI"
322
+ elsif /(http|https):\/\//.match(id)
323
+ uri = Addressable::URI.parse(id)
324
+ value = uri.normalize.to_s
325
+ type = "URL"
326
+ else
327
+ type = nil
328
+ end
329
+
330
+ {
331
+ value: value,
332
+ related_identifier_type: type,
333
+ relation_type: relation_type
334
+ }
335
+ end.select { |t| t[:related_identifier_type].present? }
336
+ end
337
+
338
+ def format_media(metadata)
339
+ [metadata["encoding"]].compact.map do |m|
340
+ { mime_type: m["fileFormat"],
341
+ url: m["@id"] }
342
+ end
343
+ end
344
+
345
+ def filepath_from_url(url, options={})
346
+ if doi_from_url(url)
347
+ response = Maremma.head(url, limit: 0)
348
+ url = response.headers.present? ? response.headers.fetch("Location", "") : ""
349
+ end
350
+
351
+ uri = Addressable::URI.parse(url.gsub(Dir.pwd + options[:build_dir].to_s, "")
352
+ .gsub(/index\.html$/, "")
353
+ .gsub(/\/$/, ""))
354
+ basename = File.basename(uri.path, ".html").presence || "index"
355
+
356
+ if basename.starts_with?("index")
357
+ filename = basename + ".html.erb"
358
+ source_path = Dir.pwd + options[:source_dir].to_s + filename
359
+ build_path = Dir.pwd + options[:build_dir].to_s + basename + ".html"
360
+ else
361
+ filename = basename + ".html.md"
362
+ source_path = Dir.pwd + options[:source_dir].to_s + options[:posts_dir].to_s + filename
363
+ build_path = Dir.pwd + options[:build_dir].to_s + basename + "/index.html"
364
+ end
365
+ [filename, build_path, source_path]
366
+ end
367
+ end
368
+ end
data/lib/cirneco/media.rb CHANGED
@@ -3,6 +3,7 @@ require 'active_support/all'
3
3
 
4
4
  require_relative 'api'
5
5
  require_relative 'utils'
6
+ require_relative 'file_utils'
6
7
  require_relative 'base'
7
8
 
8
9
  module Cirneco
@@ -10,7 +11,8 @@ module Cirneco
10
11
  include Cirneco::Base
11
12
  include Cirneco::Api
12
13
  include Cirneco::Utils
13
-
14
+ include Cirneco::FileUtils
15
+
14
16
  desc "get DOI", "get media for DOI"
15
17
  method_option :username, :default => ENV['MDS_USERNAME']
16
18
  method_option :password, :default => ENV['MDS_PASSWORD']
@@ -3,6 +3,7 @@ require 'active_support/all'
3
3
 
4
4
  require_relative 'api'
5
5
  require_relative 'utils'
6
+ require_relative 'file_utils'
6
7
  require_relative 'base'
7
8
 
8
9
  module Cirneco
@@ -10,6 +11,7 @@ module Cirneco
10
11
  include Cirneco::Base
11
12
  include Cirneco::Api
12
13
  include Cirneco::Utils
14
+ include Cirneco::FileUtils
13
15
 
14
16
  desc "get DOI", "get metadata for DOI"
15
17
  method_option :username, :default => ENV['MDS_USERNAME']