cirneco 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/lib/cirneco/data_center.rb +5 -0
  4. data/lib/cirneco/doi.rb +6 -1
  5. data/lib/cirneco/file_utils.rb +368 -0
  6. data/lib/cirneco/media.rb +3 -1
  7. data/lib/cirneco/metadata.rb +2 -0
  8. data/lib/cirneco/utils.rb +5 -379
  9. data/lib/cirneco/version.rb +1 -1
  10. data/lib/cirneco/work.rb +2 -0
  11. data/spec/api_spec.rb +3 -3
  12. data/spec/doi_spec.rb +7 -7
  13. data/spec/file_utils_spec.rb +203 -0
  14. data/spec/fixtures/cool-dois-minted/index.html +1 -1
  15. data/spec/fixtures/cool-dois-minted.html.md +1 -1
  16. data/spec/fixtures/cool-dois.html.md +1 -1
  17. data/spec/fixtures/index-minted.html +1 -1
  18. data/spec/fixtures/index.html +2 -2
  19. data/spec/fixtures/index.html.erb +1 -1
  20. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/jats/should_generate_jats_for_all_urls.yml +4 -4
  21. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_all_urls.yml +3 -44
  22. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_all_urls.yml +4 -374
  23. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_all_urls.yml +4 -4
  24. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/jats/writes_jats_for_list_of_urls.yml +4 -4
  25. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_dois_for_list_of_urls.yml +4 -47
  26. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_dois_for_list_of_urls.yml +4 -374
  27. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_dois_for_list_of_urls.yml +4 -4
  28. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_all_dois.yml +16 -577
  29. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi.yml +15 -58
  30. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi_not_found.yml +16 -57
  31. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/username_missing.yml +16 -16
  32. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/put/should_put_doi.yml +16 -63
  33. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/get/should_get_media.yml +16 -61
  34. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/post/should_post_media.yml +17 -62
  35. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/delete/should_delete_metadata.yml +17 -60
  36. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/get/should_get_metadata.yml +17 -104
  37. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/post/should_post_metadata.yml +17 -108
  38. data/spec/fixtures/vcr_cassettes/Cirneco_Work/media/includes_media.yml +17 -17
  39. data/spec/fixtures/vcr_cassettes/Cirneco_Work/schema/BlogPosting.yml +15 -15
  40. data/spec/utils_spec.rb +9 -186
  41. data/spec/work_spec.rb +1 -1
  42. metadata +4 -12
  43. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml +0 -562
  44. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_url.yml +0 -46
  45. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_metadata_for_work.yml +0 -46
  46. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_url.yml +0 -182
  47. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_post_metadata_for_work.yml +0 -139
  48. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_all_dois.yml +0 -564
  49. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_doi.yml +0 -46
  50. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/put/should_put_doi.yml +0 -50
  51. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_a_doi.yml +0 -46
  52. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_a_doi.yml +0 -182
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26264768a88f4fa5359062dc47a64fd378baee79
4
- data.tar.gz: f6a57d8575940385bd04000497800080be5f6216
3
+ metadata.gz: ca288d421311719334b7a9d38849732549f5f2d3
4
+ data.tar.gz: 5d9b99265570d9ab485b463e9287f9d5e16fdd01
5
5
  SHA512:
6
- metadata.gz: 7ec100120cb41af67f14bdf17c70a82e819b24ccf7897f35238336c348ae6fab1773ecb582b42375a9daecc0905d25f758677bd06c672d320c8010e3c5813ada
7
- data.tar.gz: 120c30769c64f9f173e3d1a9d2239c17c2e288cd17f83986c85d501fa288d3c35945101b76f4d1f874d7d8ace745acc54c6f09dca76ce8ef48e0bd4bcbbf59d4
6
+ metadata.gz: c604f57226dbc7ff543575bf1226605ac887bfb070d0789755fcbe6f0d146d90370798f3618f82e9ec35002a0b520d6a84fb3ca6712698acfec0e4fe6696f305
7
+ data.tar.gz: 93f67ce51ff86fe67be5c24f61c9bae6a22318e747015e12f6a87978ef3179554bd88337f47c5c245a9570efd91844d32a4e0c1105555cbfeedcd23458f89263
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cirneco (0.9.13)
4
+ cirneco (0.9.14)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  base32-crockford-checksum (~> 0.2.2)
7
7
  bergamasco (~> 0.3)
@@ -36,7 +36,7 @@ GEM
36
36
  safe_yaml (~> 1.0, >= 1.0.4)
37
37
  bibtex-ruby (4.4.4)
38
38
  latex-decode (~> 0.0)
39
- bolognese (0.9.32)
39
+ bolognese (0.9.34)
40
40
  activesupport (>= 4.2.5, < 6)
41
41
  benchmark_methods (~> 0.7)
42
42
  bibtex-ruby (~> 4.1)
@@ -1,8 +1,10 @@
1
1
  require 'active_support/all'
2
2
  require 'nokogiri'
3
+ require 'bolognese'
3
4
 
4
5
  require_relative 'api'
5
6
  require_relative 'utils'
7
+ require_relative 'file_utils'
6
8
  require_relative 'base'
7
9
 
8
10
  module Cirneco
@@ -10,6 +12,9 @@ module Cirneco
10
12
  include Cirneco::Base
11
13
  include Cirneco::Api
12
14
  include Cirneco::Utils
15
+ include Cirneco::FileUtils
16
+ include Bolognese::Utils
17
+ include Bolognese::DoiUtils
13
18
 
14
19
  attr_accessor :prefix, :username, :password
15
20
 
data/lib/cirneco/doi.rb CHANGED
@@ -1,7 +1,9 @@
1
- require "thor"
1
+ require 'thor'
2
+ require 'bolognese'
2
3
 
3
4
  require_relative 'api'
4
5
  require_relative 'utils'
6
+ require_relative 'file_utils'
5
7
  require_relative 'base'
6
8
 
7
9
  module Cirneco
@@ -9,6 +11,9 @@ module Cirneco
9
11
  include Cirneco::Base
10
12
  include Cirneco::Api
11
13
  include Cirneco::Utils
14
+ include Cirneco::FileUtils
15
+ include Bolognese::Utils
16
+ include Bolognese::DoiUtils
12
17
 
13
18
  desc "get DOI", "get handle url for DOI"
14
19
  method_option :username, :default => ENV['MDS_USERNAME']
@@ -0,0 +1,368 @@
1
+ require 'bergamasco'
2
+ require 'open-uri'
3
+ require 'time'
4
+
5
+ module Cirneco
6
+ module FileUtils
7
+ JATS_SCHEMA = File.expand_path("../../../resources/jats-1.1/JATS-journalpublishing1.xsd", __FILE__)
8
+
9
+ LICENSES = {
10
+ "https://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution (CC-BY 4.0)",
11
+ "https://creativecommons.org/publicdomain/zero/1.0/" => "Creative Commons Public Domain Dedication (CC0 1.0)"
12
+ }
13
+
14
+ def get_accession_number(filepath)
15
+ metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
16
+ metadata.fetch("accession_number", nil).to_s.scan(/\d+/).first.to_i
17
+ end
18
+
19
+ def get_all_accession_numbers(folderpath)
20
+ Dir.glob("#{folderpath}/*.md").sort.map do |filepath|
21
+ get_accession_number(filepath)
22
+ end.select { |a| a > 0 }.sort
23
+ end
24
+
25
+ def update_accession_number(filepath, options={})
26
+ filename = File.basename(filepath)
27
+ return "File #{filename} ignored: not a markdown or html file" unless %w(.md .html .erb).include?(File.extname(filepath))
28
+
29
+ old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
30
+ return "File #{filename} ignored: no yaml front matter" unless old_metadata.present?
31
+
32
+ return "File #{filename} ignored: no empty accession_number" if options[:opt_in] && !old_metadata.key?("accession_number")
33
+
34
+ return "Accession number #{old_metadata["accession_number"]} not changed for #{filename}" if old_metadata["accession_number"]
35
+
36
+ if old_metadata["doi"].present?
37
+ number = decode_doi(old_metadata["doi"])
38
+ options[:number] = number if number > 0
39
+ else
40
+ folderpath = File.dirname(filepath)
41
+ options[:registered_numbers] = get_all_accession_numbers(folderpath)
42
+ end
43
+
44
+ accession_number = generate_accession_number(options)
45
+
46
+ new_metadata = Bergamasco::Markdown.update_file(filepath, "accession_number" => accession_number)
47
+ "Accession number #{new_metadata["accession_number"]} generated for #{filename}"
48
+ end
49
+
50
+ def update_all_accession_numbers(folderpath, options={})
51
+ Dir.glob("#{folderpath}/*.md").sort.map do |filepath|
52
+ update_accession_number(filepath, options)
53
+ end
54
+ end
55
+
56
+ # fetch schema.org metadata in JSON-LD format to mint DOI
57
+ def mint_doi_for_url(url, options={})
58
+ filename, build_path, source_path = filepath_from_url(url, options)
59
+
60
+ json = get_json_ld_from_work(build_path, options)
61
+ metadata = JSON.parse(json)
62
+ return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["dateModified"] == metadata["datePublished"] && options[:force].blank?
63
+
64
+ prefix = options[:prefix] || ENV['PREFIX']
65
+ doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
66
+
67
+ new_metadata = Bergamasco::Markdown.update_file(source_path, "doi" => doi, "date" => metadata["dateModified"])
68
+ "DOI #{new_metadata["doi"]} minted for #{filename}"
69
+ end
70
+
71
+ # fetch schema.org metadata in JSON-LD format to mint DOI
72
+ def mint_and_hide_doi_for_url(url, options={})
73
+ filename, build_path, source_path = filepath_from_url(url, options)
74
+
75
+ json = get_json_ld_from_work(build_path, options)
76
+ metadata = JSON.parse(json)
77
+ metadata["doi"] = doi_from_url(metadata["@id"])
78
+ return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["dateModified"] == metadata["datePublished"] && options[:force].blank?
79
+
80
+ prefix = options[:prefix] || ENV['PREFIX']
81
+ metadata["doi"] = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
82
+
83
+ response = post_metadata_for_work(json, options.merge(doi: metadata["doi"]))
84
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
85
+
86
+ response = hide_metadata_for_work(json, options.merge(doi: metadata["doi"]))
87
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
88
+
89
+ new_metadata = Bergamasco::Markdown.update_file(source_path, "published" => false)
90
+ "DOI #{metadata["doi"]} minted and hidden for #{filename}"
91
+ end
92
+
93
+ # fetch schema.org metadata in JSON-LD format to mint DOI
94
+ # DOIs are never deleted, but we can remove the metadata from the DataCite index
95
+ def hide_doi_for_url(url, options={})
96
+ filename, build_path, source_path = filepath_from_url(url, options)
97
+
98
+ json = get_json_ld_from_work(build_path, options)
99
+ metadata = JSON.parse(json)
100
+ metadata["doi"] = doi_from_url(metadata["@id"])
101
+ return "No DOI for #{filename}" unless metadata["doi"]
102
+ return "DOI #{metadata["doi"]} not active for #{filename}" unless metadata["datePublished"] || options[:force].present?
103
+
104
+ response = hide_metadata_for_work(json, options)
105
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
106
+
107
+ new_metadata = Bergamasco::Markdown.update_file(source_path, "published" => false)
108
+ "DOI #{metadata["doi"]} hidden for #{filename}"
109
+ end
110
+
111
+ def mint_dois_for_all_urls(url, options={})
112
+ urls = get_urls_for_works(url)
113
+ urls.map do |u|
114
+ mint_doi_for_url(u, options)
115
+ end.join("\n")
116
+ end
117
+
118
+ def mint_and_hide_dois_for_all_urls(url, options={})
119
+ urls = get_urls_for_works(url)
120
+ urls.map do |u|
121
+ mint_and_hide_doi_for_url(u, options)
122
+ end.join("\n")
123
+ end
124
+
125
+ def hide_dois_for_all_urls(url, options={})
126
+ urls = get_urls_for_works(url)
127
+ urls.map do |u|
128
+ hide_doi_for_url(u, options)
129
+ end.join("\n")
130
+ end
131
+
132
+ def get_urls_for_works(url)
133
+ doc = Nokogiri::HTML(open(url))
134
+ json = doc.at_xpath("//script[@type='application/ld+json']")
135
+ return [url] unless json.present?
136
+
137
+ metadata = ActiveSupport::JSON.decode(json.text)
138
+ metadata.fetch("hasPart", []).map { |p| p["@id"] } + [url]
139
+ end
140
+
141
+ def get_json_ld_from_work(url, options={})
142
+ doc = Nokogiri::HTML(open(url))
143
+ json = doc.at_xpath("//script[@type='application/ld+json']")
144
+ return { "error" => "Error: no schema.org metadata found" } unless json.present?
145
+
146
+ json.text
147
+ end
148
+
149
+ def post_metadata_for_work(input, options={})
150
+ metadata = JSON.parse(input)
151
+
152
+ prefix = options[:prefix] || ENV['PREFIX']
153
+ doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
154
+
155
+ work = Cirneco::Work.new(input: input, doi: doi)
156
+ return work.errors if work.errors.present?
157
+
158
+ response = work.post_metadata(work.datacite, options)
159
+ return response unless response.status == 201
160
+
161
+ response = work.put_doi(work.doi, options.merge(url: metadata["url"]))
162
+ return response unless response.status == 201
163
+
164
+ if work.media.present?
165
+ work.post_media(metadata.doi, options.merge(media: work.media))
166
+ else
167
+ response
168
+ end
169
+ end
170
+
171
+ def post_and_hide_metadata_for_work(input, options={})
172
+ metadata = JSON.parse(input)
173
+
174
+ prefix = options[:prefix] || ENV['PREFIX']
175
+ doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
176
+
177
+ work = Cirneco::Work.new(input: input, doi: doi)
178
+ return work.errors if work.errors.present?
179
+
180
+ response = work.post_metadata(work.datacite, options)
181
+ return response unless response.status == 201
182
+
183
+ response = work.put_doi(work.doi, options.merge(url: metadata["url"]))
184
+ return response unless response.status == 201
185
+
186
+ response = work.delete_metadata(doi, options)
187
+ return response unless response.status == 201
188
+
189
+ if work.media.present?
190
+ work.post_media(metadata["doi"], options.merge(media: work.media))
191
+ else
192
+ response
193
+ end
194
+ end
195
+
196
+ def hide_metadata_for_work(input, options={})
197
+ metadata = JSON.parse(input)
198
+ metadata["doi"] = doi_from_url(metadata["@id"])
199
+
200
+ prefix = options[:prefix] || ENV['PREFIX']
201
+ metadata["doi"] = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
202
+
203
+ work = Cirneco::Work.new(input: input, doi: metadata["doi"])
204
+ return work.errors if work.errors.present?
205
+
206
+ work.delete_metadata(metadata["doi"], options)
207
+ end
208
+
209
+ def generate_metadata_for_jats(url, options={})
210
+ doc = Nokogiri::HTML(open(url))
211
+ json = doc.at_xpath("//script[@type='application/ld+json']")
212
+ return { "error" => "Error: no schema.org metadata found" } unless json.present?
213
+
214
+ metadata = ActiveSupport::JSON.decode(json.text)
215
+ return { "error" => "Error: blog post not published" } if metadata["published"].to_s == "false"
216
+ return { "error" => "Error: required metadata missing" } unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
217
+
218
+ # required metadata
219
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
220
+ uri = Addressable::URI.parse(metadata["@id"])
221
+ metadata["doi"] = uri.path[1..-1].upcase
222
+ end
223
+
224
+ metadata["title"] = metadata["name"]
225
+ metadata["author"] = format_authors(metadata["author"]).map do |a|
226
+ { "given_name" => a[:given_name],
227
+ "family_name" => a[:family_name],
228
+ "name" => a[:name],
229
+ "orcid" => a[:orcid] }.compact
230
+ end
231
+
232
+ metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
233
+ metadata["tags"] = metadata["keywords"].to_s.split(", ").select { |k| k != "featured" }
234
+ metadata["date"] = metadata.fetch("datePublished", "")
235
+ metadata["publication_year"] = metadata.fetch("date", "")[0..3].to_i
236
+ metadata["publication_month"] = metadata.fetch("date", "")[5..6].to_i
237
+ metadata["publication_day"] = metadata.fetch("date", "")[8..9].to_i
238
+
239
+ # metadata["accession_number"] = metadata["alternateName"]
240
+ metadata["journal_title"] = metadata.fetch("isPartOf", {}).fetch("name", nil)
241
+
242
+ if metadata["description"].present?
243
+ metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
244
+ end
245
+
246
+ # use default version 1.0
247
+ metadata["version"] ||= "1.0"
248
+
249
+ # fetch reference metadata if available
250
+ metadata["related_identifiers"] = get_related_identifiers(metadata)
251
+
252
+ if metadata["license"].present?
253
+ metadata["license_name"] = LICENSES.fetch(metadata["license"], nil)
254
+ metadata["license_url"] = metadata["license"]
255
+ end
256
+
257
+ metadata = metadata.extract!(*%w(publisher doi tags title author date
258
+ publication_year publication_month publication_day license_name
259
+ license_url accession_number journal_title))
260
+ end
261
+
262
+ def generate_jats_for_url(url, options={})
263
+ filename, build_path, source_path = filepath_from_url(url, options)
264
+ metadata = generate_metadata_for_jats(build_path, options)
265
+ return "No JATS XML written for #{filename}" if metadata["published"].to_s == "false"
266
+
267
+ file = IO.read(source_path)
268
+ content = Bergamasco::Markdown.split_yaml_frontmatter(file).last
269
+ text = Bergamasco::Markdown.join_yaml_frontmatter(metadata, content)
270
+
271
+ xml = Bergamasco::Pandoc.convert_to_jats(text, options)
272
+ xmlname = filename.gsub(/\.html\.(erb|md)/, ".xml")
273
+ xmlpath = build_path.gsub("index.html", xmlname)
274
+ IO.write(xmlpath, xml)
275
+
276
+ "JATS XML written for #{filename}"
277
+ end
278
+
279
+ def generate_jats_for_all_urls(url, options={})
280
+ urls = get_urls_for_works(url)
281
+ urls.map do |u|
282
+ generate_jats_for_url(u, options)
283
+ end.join("\n")
284
+ end
285
+
286
+ def validate_jats(xml)
287
+ schema = Nokogiri::XML::Schema(open(JATS_SCHEMA))
288
+ puts schema.validate(Nokogiri::XML(xml))
289
+
290
+ OpenStruct.new(body: { "errors" => schema.validate(Nokogiri::XML(xml)).map { |error| { "title" => error.to_s } } })
291
+ end
292
+
293
+ def url_from_path(site_url, filepath)
294
+ site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
295
+ end
296
+
297
+ def format_authors(authors)
298
+ Array(authors).map do |author|
299
+ orcid = validate_orcid(author["@id"])
300
+ name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
301
+
302
+ { :given_name => author["givenName"],
303
+ :family_name => author["familyName"],
304
+ :name => name,
305
+ :orcid => orcid }.compact
306
+ end
307
+ end
308
+
309
+ def get_related_identifiers(metadata)
310
+ citations = Array(metadata["citation"])
311
+ parent = [metadata["isPartOf"]].compact.map { |r| r.merge("relation_type" => "IsPartOf") }
312
+ children = Array(metadata["hasPart"]).map { |r| r.merge("relation_type" => "HasPart") }
313
+
314
+ (citations + parent + children).map do |r|
315
+ id = r.fetch("@id", "")
316
+ relation_type = r.fetch("relation_type", "References")
317
+
318
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(id)
319
+ uri = Addressable::URI.parse(id)
320
+ value = uri.path[1..-1].upcase
321
+ type = "DOI"
322
+ elsif /(http|https):\/\//.match(id)
323
+ uri = Addressable::URI.parse(id)
324
+ value = uri.normalize.to_s
325
+ type = "URL"
326
+ else
327
+ type = nil
328
+ end
329
+
330
+ {
331
+ value: value,
332
+ related_identifier_type: type,
333
+ relation_type: relation_type
334
+ }
335
+ end.select { |t| t[:related_identifier_type].present? }
336
+ end
337
+
338
+ def format_media(metadata)
339
+ [metadata["encoding"]].compact.map do |m|
340
+ { mime_type: m["fileFormat"],
341
+ url: m["@id"] }
342
+ end
343
+ end
344
+
345
+ def filepath_from_url(url, options={})
346
+ if doi_from_url(url)
347
+ response = Maremma.head(url, limit: 0)
348
+ url = response.headers.present? ? response.headers.fetch("Location", "") : ""
349
+ end
350
+
351
+ uri = Addressable::URI.parse(url.gsub(Dir.pwd + options[:build_dir].to_s, "")
352
+ .gsub(/index\.html$/, "")
353
+ .gsub(/\/$/, ""))
354
+ basename = File.basename(uri.path, ".html").presence || "index"
355
+
356
+ if basename.starts_with?("index")
357
+ filename = basename + ".html.erb"
358
+ source_path = Dir.pwd + options[:source_dir].to_s + filename
359
+ build_path = Dir.pwd + options[:build_dir].to_s + basename + ".html"
360
+ else
361
+ filename = basename + ".html.md"
362
+ source_path = Dir.pwd + options[:source_dir].to_s + options[:posts_dir].to_s + filename
363
+ build_path = Dir.pwd + options[:build_dir].to_s + basename + "/index.html"
364
+ end
365
+ [filename, build_path, source_path]
366
+ end
367
+ end
368
+ end
data/lib/cirneco/media.rb CHANGED
@@ -3,6 +3,7 @@ require 'active_support/all'
3
3
 
4
4
  require_relative 'api'
5
5
  require_relative 'utils'
6
+ require_relative 'file_utils'
6
7
  require_relative 'base'
7
8
 
8
9
  module Cirneco
@@ -10,7 +11,8 @@ module Cirneco
10
11
  include Cirneco::Base
11
12
  include Cirneco::Api
12
13
  include Cirneco::Utils
13
-
14
+ include Cirneco::FileUtils
15
+
14
16
  desc "get DOI", "get media for DOI"
15
17
  method_option :username, :default => ENV['MDS_USERNAME']
16
18
  method_option :password, :default => ENV['MDS_PASSWORD']
@@ -3,6 +3,7 @@ require 'active_support/all'
3
3
 
4
4
  require_relative 'api'
5
5
  require_relative 'utils'
6
+ require_relative 'file_utils'
6
7
  require_relative 'base'
7
8
 
8
9
  module Cirneco
@@ -10,6 +11,7 @@ module Cirneco
10
11
  include Cirneco::Base
11
12
  include Cirneco::Api
12
13
  include Cirneco::Utils
14
+ include Cirneco::FileUtils
13
15
 
14
16
  desc "get DOI", "get metadata for DOI"
15
17
  method_option :username, :default => ENV['MDS_USERNAME']