cirneco 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/lib/cirneco/data_center.rb +5 -0
- data/lib/cirneco/doi.rb +6 -1
- data/lib/cirneco/file_utils.rb +368 -0
- data/lib/cirneco/media.rb +3 -1
- data/lib/cirneco/metadata.rb +2 -0
- data/lib/cirneco/utils.rb +5 -379
- data/lib/cirneco/version.rb +1 -1
- data/lib/cirneco/work.rb +2 -0
- data/spec/api_spec.rb +3 -3
- data/spec/doi_spec.rb +7 -7
- data/spec/file_utils_spec.rb +203 -0
- data/spec/fixtures/cool-dois-minted/index.html +1 -1
- data/spec/fixtures/cool-dois-minted.html.md +1 -1
- data/spec/fixtures/cool-dois.html.md +1 -1
- data/spec/fixtures/index-minted.html +1 -1
- data/spec/fixtures/index.html +2 -2
- data/spec/fixtures/index.html.erb +1 -1
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/jats/should_generate_jats_for_all_urls.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_all_urls.yml +3 -44
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_all_urls.yml +4 -374
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_all_urls.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/jats/writes_jats_for_list_of_urls.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_dois_for_list_of_urls.yml +4 -47
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_dois_for_list_of_urls.yml +4 -374
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_dois_for_list_of_urls.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_all_dois.yml +16 -577
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi.yml +15 -58
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi_not_found.yml +16 -57
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/username_missing.yml +16 -16
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/put/should_put_doi.yml +16 -63
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/get/should_get_media.yml +16 -61
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/post/should_post_media.yml +17 -62
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/delete/should_delete_metadata.yml +17 -60
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/get/should_get_metadata.yml +17 -104
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/post/should_post_metadata.yml +17 -108
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/media/includes_media.yml +17 -17
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/schema/BlogPosting.yml +15 -15
- data/spec/utils_spec.rb +9 -186
- data/spec/work_spec.rb +1 -1
- metadata +4 -12
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml +0 -562
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_url.yml +0 -46
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_metadata_for_work.yml +0 -46
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_url.yml +0 -182
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_post_metadata_for_work.yml +0 -139
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_all_dois.yml +0 -564
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_doi.yml +0 -46
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/put/should_put_doi.yml +0 -50
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_a_doi.yml +0 -46
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_a_doi.yml +0 -182
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca288d421311719334b7a9d38849732549f5f2d3
|
4
|
+
data.tar.gz: 5d9b99265570d9ab485b463e9287f9d5e16fdd01
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c604f57226dbc7ff543575bf1226605ac887bfb070d0789755fcbe6f0d146d90370798f3618f82e9ec35002a0b520d6a84fb3ca6712698acfec0e4fe6696f305
|
7
|
+
data.tar.gz: 93f67ce51ff86fe67be5c24f61c9bae6a22318e747015e12f6a87978ef3179554bd88337f47c5c245a9570efd91844d32a4e0c1105555cbfeedcd23458f89263
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
cirneco (0.9.
|
4
|
+
cirneco (0.9.14)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
base32-crockford-checksum (~> 0.2.2)
|
7
7
|
bergamasco (~> 0.3)
|
@@ -36,7 +36,7 @@ GEM
|
|
36
36
|
safe_yaml (~> 1.0, >= 1.0.4)
|
37
37
|
bibtex-ruby (4.4.4)
|
38
38
|
latex-decode (~> 0.0)
|
39
|
-
bolognese (0.9.
|
39
|
+
bolognese (0.9.34)
|
40
40
|
activesupport (>= 4.2.5, < 6)
|
41
41
|
benchmark_methods (~> 0.7)
|
42
42
|
bibtex-ruby (~> 4.1)
|
data/lib/cirneco/data_center.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
require 'active_support/all'
|
2
2
|
require 'nokogiri'
|
3
|
+
require 'bolognese'
|
3
4
|
|
4
5
|
require_relative 'api'
|
5
6
|
require_relative 'utils'
|
7
|
+
require_relative 'file_utils'
|
6
8
|
require_relative 'base'
|
7
9
|
|
8
10
|
module Cirneco
|
@@ -10,6 +12,9 @@ module Cirneco
|
|
10
12
|
include Cirneco::Base
|
11
13
|
include Cirneco::Api
|
12
14
|
include Cirneco::Utils
|
15
|
+
include Cirneco::FileUtils
|
16
|
+
include Bolognese::Utils
|
17
|
+
include Bolognese::DoiUtils
|
13
18
|
|
14
19
|
attr_accessor :prefix, :username, :password
|
15
20
|
|
data/lib/cirneco/doi.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
require
|
1
|
+
require 'thor'
|
2
|
+
require 'bolognese'
|
2
3
|
|
3
4
|
require_relative 'api'
|
4
5
|
require_relative 'utils'
|
6
|
+
require_relative 'file_utils'
|
5
7
|
require_relative 'base'
|
6
8
|
|
7
9
|
module Cirneco
|
@@ -9,6 +11,9 @@ module Cirneco
|
|
9
11
|
include Cirneco::Base
|
10
12
|
include Cirneco::Api
|
11
13
|
include Cirneco::Utils
|
14
|
+
include Cirneco::FileUtils
|
15
|
+
include Bolognese::Utils
|
16
|
+
include Bolognese::DoiUtils
|
12
17
|
|
13
18
|
desc "get DOI", "get handle url for DOI"
|
14
19
|
method_option :username, :default => ENV['MDS_USERNAME']
|
@@ -0,0 +1,368 @@
|
|
1
|
+
require 'bergamasco'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'time'
|
4
|
+
|
5
|
+
module Cirneco
|
6
|
+
module FileUtils
|
7
|
+
JATS_SCHEMA = File.expand_path("../../../resources/jats-1.1/JATS-journalpublishing1.xsd", __FILE__)
|
8
|
+
|
9
|
+
LICENSES = {
|
10
|
+
"https://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution (CC-BY 4.0)",
|
11
|
+
"https://creativecommons.org/publicdomain/zero/1.0/" => "Creative Commons Public Domain Dedication (CC0 1.0)"
|
12
|
+
}
|
13
|
+
|
14
|
+
def get_accession_number(filepath)
|
15
|
+
metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
16
|
+
metadata.fetch("accession_number", nil).to_s.scan(/\d+/).first.to_i
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_all_accession_numbers(folderpath)
|
20
|
+
Dir.glob("#{folderpath}/*.md").sort.map do |filepath|
|
21
|
+
get_accession_number(filepath)
|
22
|
+
end.select { |a| a > 0 }.sort
|
23
|
+
end
|
24
|
+
|
25
|
+
def update_accession_number(filepath, options={})
|
26
|
+
filename = File.basename(filepath)
|
27
|
+
return "File #{filename} ignored: not a markdown or html file" unless %w(.md .html .erb).include?(File.extname(filepath))
|
28
|
+
|
29
|
+
old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
30
|
+
return "File #{filename} ignored: no yaml front matter" unless old_metadata.present?
|
31
|
+
|
32
|
+
return "File #{filename} ignored: no empty accession_number" if options[:opt_in] && !old_metadata.key?("accession_number")
|
33
|
+
|
34
|
+
return "Accession number #{old_metadata["accession_number"]} not changed for #{filename}" if old_metadata["accession_number"]
|
35
|
+
|
36
|
+
if old_metadata["doi"].present?
|
37
|
+
number = decode_doi(old_metadata["doi"])
|
38
|
+
options[:number] = number if number > 0
|
39
|
+
else
|
40
|
+
folderpath = File.dirname(filepath)
|
41
|
+
options[:registered_numbers] = get_all_accession_numbers(folderpath)
|
42
|
+
end
|
43
|
+
|
44
|
+
accession_number = generate_accession_number(options)
|
45
|
+
|
46
|
+
new_metadata = Bergamasco::Markdown.update_file(filepath, "accession_number" => accession_number)
|
47
|
+
"Accession number #{new_metadata["accession_number"]} generated for #{filename}"
|
48
|
+
end
|
49
|
+
|
50
|
+
def update_all_accession_numbers(folderpath, options={})
|
51
|
+
Dir.glob("#{folderpath}/*.md").sort.map do |filepath|
|
52
|
+
update_accession_number(filepath, options)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
57
|
+
def mint_doi_for_url(url, options={})
|
58
|
+
filename, build_path, source_path = filepath_from_url(url, options)
|
59
|
+
|
60
|
+
json = get_json_ld_from_work(build_path, options)
|
61
|
+
metadata = JSON.parse(json)
|
62
|
+
return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["dateModified"] == metadata["datePublished"] && options[:force].blank?
|
63
|
+
|
64
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
65
|
+
doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
66
|
+
|
67
|
+
new_metadata = Bergamasco::Markdown.update_file(source_path, "doi" => doi, "date" => metadata["dateModified"])
|
68
|
+
"DOI #{new_metadata["doi"]} minted for #{filename}"
|
69
|
+
end
|
70
|
+
|
71
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
72
|
+
def mint_and_hide_doi_for_url(url, options={})
|
73
|
+
filename, build_path, source_path = filepath_from_url(url, options)
|
74
|
+
|
75
|
+
json = get_json_ld_from_work(build_path, options)
|
76
|
+
metadata = JSON.parse(json)
|
77
|
+
metadata["doi"] = doi_from_url(metadata["@id"])
|
78
|
+
return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["dateModified"] == metadata["datePublished"] && options[:force].blank?
|
79
|
+
|
80
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
81
|
+
metadata["doi"] = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
82
|
+
|
83
|
+
response = post_metadata_for_work(json, options.merge(doi: metadata["doi"]))
|
84
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
|
85
|
+
|
86
|
+
response = hide_metadata_for_work(json, options.merge(doi: metadata["doi"]))
|
87
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
|
88
|
+
|
89
|
+
new_metadata = Bergamasco::Markdown.update_file(source_path, "published" => false)
|
90
|
+
"DOI #{metadata["doi"]} minted and hidden for #{filename}"
|
91
|
+
end
|
92
|
+
|
93
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
94
|
+
# DOIs are never deleted, but we can remove the metadata from the DataCite index
|
95
|
+
def hide_doi_for_url(url, options={})
|
96
|
+
filename, build_path, source_path = filepath_from_url(url, options)
|
97
|
+
|
98
|
+
json = get_json_ld_from_work(build_path, options)
|
99
|
+
metadata = JSON.parse(json)
|
100
|
+
metadata["doi"] = doi_from_url(metadata["@id"])
|
101
|
+
return "No DOI for #{filename}" unless metadata["doi"]
|
102
|
+
return "DOI #{metadata["doi"]} not active for #{filename}" unless metadata["datePublished"] || options[:force].present?
|
103
|
+
|
104
|
+
response = hide_metadata_for_work(json, options)
|
105
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors'].present?
|
106
|
+
|
107
|
+
new_metadata = Bergamasco::Markdown.update_file(source_path, "published" => false)
|
108
|
+
"DOI #{metadata["doi"]} hidden for #{filename}"
|
109
|
+
end
|
110
|
+
|
111
|
+
def mint_dois_for_all_urls(url, options={})
|
112
|
+
urls = get_urls_for_works(url)
|
113
|
+
urls.map do |u|
|
114
|
+
mint_doi_for_url(u, options)
|
115
|
+
end.join("\n")
|
116
|
+
end
|
117
|
+
|
118
|
+
def mint_and_hide_dois_for_all_urls(url, options={})
|
119
|
+
urls = get_urls_for_works(url)
|
120
|
+
urls.map do |u|
|
121
|
+
mint_and_hide_doi_for_url(u, options)
|
122
|
+
end.join("\n")
|
123
|
+
end
|
124
|
+
|
125
|
+
def hide_dois_for_all_urls(url, options={})
|
126
|
+
urls = get_urls_for_works(url)
|
127
|
+
urls.map do |u|
|
128
|
+
hide_doi_for_url(u, options)
|
129
|
+
end.join("\n")
|
130
|
+
end
|
131
|
+
|
132
|
+
def get_urls_for_works(url)
|
133
|
+
doc = Nokogiri::HTML(open(url))
|
134
|
+
json = doc.at_xpath("//script[@type='application/ld+json']")
|
135
|
+
return [url] unless json.present?
|
136
|
+
|
137
|
+
metadata = ActiveSupport::JSON.decode(json.text)
|
138
|
+
metadata.fetch("hasPart", []).map { |p| p["@id"] } + [url]
|
139
|
+
end
|
140
|
+
|
141
|
+
def get_json_ld_from_work(url, options={})
|
142
|
+
doc = Nokogiri::HTML(open(url))
|
143
|
+
json = doc.at_xpath("//script[@type='application/ld+json']")
|
144
|
+
return { "error" => "Error: no schema.org metadata found" } unless json.present?
|
145
|
+
|
146
|
+
json.text
|
147
|
+
end
|
148
|
+
|
149
|
+
def post_metadata_for_work(input, options={})
|
150
|
+
metadata = JSON.parse(input)
|
151
|
+
|
152
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
153
|
+
doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
154
|
+
|
155
|
+
work = Cirneco::Work.new(input: input, doi: doi)
|
156
|
+
return work.errors if work.errors.present?
|
157
|
+
|
158
|
+
response = work.post_metadata(work.datacite, options)
|
159
|
+
return response unless response.status == 201
|
160
|
+
|
161
|
+
response = work.put_doi(work.doi, options.merge(url: metadata["url"]))
|
162
|
+
return response unless response.status == 201
|
163
|
+
|
164
|
+
if work.media.present?
|
165
|
+
work.post_media(metadata.doi, options.merge(media: work.media))
|
166
|
+
else
|
167
|
+
response
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def post_and_hide_metadata_for_work(input, options={})
|
172
|
+
metadata = JSON.parse(input)
|
173
|
+
|
174
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
175
|
+
doi = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
176
|
+
|
177
|
+
work = Cirneco::Work.new(input: input, doi: doi)
|
178
|
+
return work.errors if work.errors.present?
|
179
|
+
|
180
|
+
response = work.post_metadata(work.datacite, options)
|
181
|
+
return response unless response.status == 201
|
182
|
+
|
183
|
+
response = work.put_doi(work.doi, options.merge(url: metadata["url"]))
|
184
|
+
return response unless response.status == 201
|
185
|
+
|
186
|
+
response = work.delete_metadata(doi, options)
|
187
|
+
return response unless response.status == 201
|
188
|
+
|
189
|
+
if work.media.present?
|
190
|
+
work.post_media(metadata["doi"], options.merge(media: work.media))
|
191
|
+
else
|
192
|
+
response
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def hide_metadata_for_work(input, options={})
|
197
|
+
metadata = JSON.parse(input)
|
198
|
+
metadata["doi"] = doi_from_url(metadata["@id"])
|
199
|
+
|
200
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
201
|
+
metadata["doi"] = encode_doi(prefix, number: metadata["alternateName"]) if metadata["doi"].blank?
|
202
|
+
|
203
|
+
work = Cirneco::Work.new(input: input, doi: metadata["doi"])
|
204
|
+
return work.errors if work.errors.present?
|
205
|
+
|
206
|
+
work.delete_metadata(metadata["doi"], options)
|
207
|
+
end
|
208
|
+
|
209
|
+
def generate_metadata_for_jats(url, options={})
|
210
|
+
doc = Nokogiri::HTML(open(url))
|
211
|
+
json = doc.at_xpath("//script[@type='application/ld+json']")
|
212
|
+
return { "error" => "Error: no schema.org metadata found" } unless json.present?
|
213
|
+
|
214
|
+
metadata = ActiveSupport::JSON.decode(json.text)
|
215
|
+
return { "error" => "Error: blog post not published" } if metadata["published"].to_s == "false"
|
216
|
+
return { "error" => "Error: required metadata missing" } unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
|
217
|
+
|
218
|
+
# required metadata
|
219
|
+
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
|
220
|
+
uri = Addressable::URI.parse(metadata["@id"])
|
221
|
+
metadata["doi"] = uri.path[1..-1].upcase
|
222
|
+
end
|
223
|
+
|
224
|
+
metadata["title"] = metadata["name"]
|
225
|
+
metadata["author"] = format_authors(metadata["author"]).map do |a|
|
226
|
+
{ "given_name" => a[:given_name],
|
227
|
+
"family_name" => a[:family_name],
|
228
|
+
"name" => a[:name],
|
229
|
+
"orcid" => a[:orcid] }.compact
|
230
|
+
end
|
231
|
+
|
232
|
+
metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
|
233
|
+
metadata["tags"] = metadata["keywords"].to_s.split(", ").select { |k| k != "featured" }
|
234
|
+
metadata["date"] = metadata.fetch("datePublished", "")
|
235
|
+
metadata["publication_year"] = metadata.fetch("date", "")[0..3].to_i
|
236
|
+
metadata["publication_month"] = metadata.fetch("date", "")[5..6].to_i
|
237
|
+
metadata["publication_day"] = metadata.fetch("date", "")[8..9].to_i
|
238
|
+
|
239
|
+
# metadata["accession_number"] = metadata["alternateName"]
|
240
|
+
metadata["journal_title"] = metadata.fetch("isPartOf", {}).fetch("name", nil)
|
241
|
+
|
242
|
+
if metadata["description"].present?
|
243
|
+
metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
|
244
|
+
end
|
245
|
+
|
246
|
+
# use default version 1.0
|
247
|
+
metadata["version"] ||= "1.0"
|
248
|
+
|
249
|
+
# fetch reference metadata if available
|
250
|
+
metadata["related_identifiers"] = get_related_identifiers(metadata)
|
251
|
+
|
252
|
+
if metadata["license"].present?
|
253
|
+
metadata["license_name"] = LICENSES.fetch(metadata["license"], nil)
|
254
|
+
metadata["license_url"] = metadata["license"]
|
255
|
+
end
|
256
|
+
|
257
|
+
metadata = metadata.extract!(*%w(publisher doi tags title author date
|
258
|
+
publication_year publication_month publication_day license_name
|
259
|
+
license_url accession_number journal_title))
|
260
|
+
end
|
261
|
+
|
262
|
+
def generate_jats_for_url(url, options={})
|
263
|
+
filename, build_path, source_path = filepath_from_url(url, options)
|
264
|
+
metadata = generate_metadata_for_jats(build_path, options)
|
265
|
+
return "No JATS XML written for #{filename}" if metadata["published"].to_s == "false"
|
266
|
+
|
267
|
+
file = IO.read(source_path)
|
268
|
+
content = Bergamasco::Markdown.split_yaml_frontmatter(file).last
|
269
|
+
text = Bergamasco::Markdown.join_yaml_frontmatter(metadata, content)
|
270
|
+
|
271
|
+
xml = Bergamasco::Pandoc.convert_to_jats(text, options)
|
272
|
+
xmlname = filename.gsub(/\.html\.(erb|md)/, ".xml")
|
273
|
+
xmlpath = build_path.gsub("index.html", xmlname)
|
274
|
+
IO.write(xmlpath, xml)
|
275
|
+
|
276
|
+
"JATS XML written for #{filename}"
|
277
|
+
end
|
278
|
+
|
279
|
+
def generate_jats_for_all_urls(url, options={})
|
280
|
+
urls = get_urls_for_works(url)
|
281
|
+
urls.map do |u|
|
282
|
+
generate_jats_for_url(u, options)
|
283
|
+
end.join("\n")
|
284
|
+
end
|
285
|
+
|
286
|
+
def validate_jats(xml)
|
287
|
+
schema = Nokogiri::XML::Schema(open(JATS_SCHEMA))
|
288
|
+
puts schema.validate(Nokogiri::XML(xml))
|
289
|
+
|
290
|
+
OpenStruct.new(body: { "errors" => schema.validate(Nokogiri::XML(xml)).map { |error| { "title" => error.to_s } } })
|
291
|
+
end
|
292
|
+
|
293
|
+
def url_from_path(site_url, filepath)
|
294
|
+
site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
|
295
|
+
end
|
296
|
+
|
297
|
+
def format_authors(authors)
|
298
|
+
Array(authors).map do |author|
|
299
|
+
orcid = validate_orcid(author["@id"])
|
300
|
+
name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
|
301
|
+
|
302
|
+
{ :given_name => author["givenName"],
|
303
|
+
:family_name => author["familyName"],
|
304
|
+
:name => name,
|
305
|
+
:orcid => orcid }.compact
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def get_related_identifiers(metadata)
|
310
|
+
citations = Array(metadata["citation"])
|
311
|
+
parent = [metadata["isPartOf"]].compact.map { |r| r.merge("relation_type" => "IsPartOf") }
|
312
|
+
children = Array(metadata["hasPart"]).map { |r| r.merge("relation_type" => "HasPart") }
|
313
|
+
|
314
|
+
(citations + parent + children).map do |r|
|
315
|
+
id = r.fetch("@id", "")
|
316
|
+
relation_type = r.fetch("relation_type", "References")
|
317
|
+
|
318
|
+
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(id)
|
319
|
+
uri = Addressable::URI.parse(id)
|
320
|
+
value = uri.path[1..-1].upcase
|
321
|
+
type = "DOI"
|
322
|
+
elsif /(http|https):\/\//.match(id)
|
323
|
+
uri = Addressable::URI.parse(id)
|
324
|
+
value = uri.normalize.to_s
|
325
|
+
type = "URL"
|
326
|
+
else
|
327
|
+
type = nil
|
328
|
+
end
|
329
|
+
|
330
|
+
{
|
331
|
+
value: value,
|
332
|
+
related_identifier_type: type,
|
333
|
+
relation_type: relation_type
|
334
|
+
}
|
335
|
+
end.select { |t| t[:related_identifier_type].present? }
|
336
|
+
end
|
337
|
+
|
338
|
+
def format_media(metadata)
|
339
|
+
[metadata["encoding"]].compact.map do |m|
|
340
|
+
{ mime_type: m["fileFormat"],
|
341
|
+
url: m["@id"] }
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
def filepath_from_url(url, options={})
|
346
|
+
if doi_from_url(url)
|
347
|
+
response = Maremma.head(url, limit: 0)
|
348
|
+
url = response.headers.present? ? response.headers.fetch("Location", "") : ""
|
349
|
+
end
|
350
|
+
|
351
|
+
uri = Addressable::URI.parse(url.gsub(Dir.pwd + options[:build_dir].to_s, "")
|
352
|
+
.gsub(/index\.html$/, "")
|
353
|
+
.gsub(/\/$/, ""))
|
354
|
+
basename = File.basename(uri.path, ".html").presence || "index"
|
355
|
+
|
356
|
+
if basename.starts_with?("index")
|
357
|
+
filename = basename + ".html.erb"
|
358
|
+
source_path = Dir.pwd + options[:source_dir].to_s + filename
|
359
|
+
build_path = Dir.pwd + options[:build_dir].to_s + basename + ".html"
|
360
|
+
else
|
361
|
+
filename = basename + ".html.md"
|
362
|
+
source_path = Dir.pwd + options[:source_dir].to_s + options[:posts_dir].to_s + filename
|
363
|
+
build_path = Dir.pwd + options[:build_dir].to_s + basename + "/index.html"
|
364
|
+
end
|
365
|
+
[filename, build_path, source_path]
|
366
|
+
end
|
367
|
+
end
|
368
|
+
end
|
data/lib/cirneco/media.rb
CHANGED
@@ -3,6 +3,7 @@ require 'active_support/all'
|
|
3
3
|
|
4
4
|
require_relative 'api'
|
5
5
|
require_relative 'utils'
|
6
|
+
require_relative 'file_utils'
|
6
7
|
require_relative 'base'
|
7
8
|
|
8
9
|
module Cirneco
|
@@ -10,7 +11,8 @@ module Cirneco
|
|
10
11
|
include Cirneco::Base
|
11
12
|
include Cirneco::Api
|
12
13
|
include Cirneco::Utils
|
13
|
-
|
14
|
+
include Cirneco::FileUtils
|
15
|
+
|
14
16
|
desc "get DOI", "get media for DOI"
|
15
17
|
method_option :username, :default => ENV['MDS_USERNAME']
|
16
18
|
method_option :password, :default => ENV['MDS_PASSWORD']
|
data/lib/cirneco/metadata.rb
CHANGED
@@ -3,6 +3,7 @@ require 'active_support/all'
|
|
3
3
|
|
4
4
|
require_relative 'api'
|
5
5
|
require_relative 'utils'
|
6
|
+
require_relative 'file_utils'
|
6
7
|
require_relative 'base'
|
7
8
|
|
8
9
|
module Cirneco
|
@@ -10,6 +11,7 @@ module Cirneco
|
|
10
11
|
include Cirneco::Base
|
11
12
|
include Cirneco::Api
|
12
13
|
include Cirneco::Utils
|
14
|
+
include Cirneco::FileUtils
|
13
15
|
|
14
16
|
desc "get DOI", "get metadata for DOI"
|
15
17
|
method_option :username, :default => ENV['MDS_USERNAME']
|