cirneco 0.7.4 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/lib/cirneco/api.rb +8 -8
- data/lib/cirneco/base.rb +1 -1
- data/lib/cirneco/doi.rb +20 -9
- data/lib/cirneco/utils.rb +204 -128
- data/lib/cirneco/version.rb +1 -1
- data/lib/cirneco/work.rb +19 -7
- data/spec/api_spec.rb +10 -10
- data/spec/doi_spec.rb +29 -27
- data/spec/fixtures/cool-dois-minted.html +404 -0
- data/spec/fixtures/cool-dois-minted.html.md +99 -0
- data/spec/fixtures/cool-dois-missing-metadata.html +356 -0
- data/spec/fixtures/cool-dois-no-json-ld.html +352 -0
- data/spec/fixtures/cool-dois.html +404 -0
- data/spec/fixtures/cool-dois.html.md +1 -0
- data/spec/fixtures/index.html +271 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml +499 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_all_urls.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_url.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_metadata_for_work.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_all_urls.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_url.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_all_urls.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_url.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_post_metadata_for_work.yml +25 -20
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_all_dois.yml +499 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_doi.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/put/should_put_doi.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_a_doi.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_dois_for_list_of_urls.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_a_doi.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_a_doi.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_dois_for_list_of_urls.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_dois_for_list_of_urls.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_all_dois.yml +499 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/put/should_put_doi.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/get/should_get_media.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/post/should_post_media.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/delete/should_delete_metadata.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/get/should_get_metadata.yml +10 -5
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/post/should_post_metadata.yml +11 -6
- data/spec/utils_spec.rb +105 -48
- data/spec/work_spec.rb +4 -4
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b25496604d7050477dbcd531561118874f76705
|
4
|
+
data.tar.gz: f3e38823524d96cd5382a5a54d3838daaf8c3b42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b521bfb2aceebdb52d8248dccd6de030976413720d392748358620e7527c33398224a4a33aea7d7a4dca234c8506da408aceb00f5ac1a47e82177e034392c70
|
7
|
+
data.tar.gz: ada9b59941a9c2f3182736cb3ba54b2c44d3b16226a20abc51be56dc0a5b6f0e6949311c6b5ca9a20e396fe624fd872c2d152abe6fd99f823d0f164d27f46b1c
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
cirneco (0.
|
4
|
+
cirneco (0.8.1)
|
5
5
|
activesupport (~> 4.2, >= 4.2.5)
|
6
6
|
base32-crockford-checksum (~> 0.2.2)
|
7
7
|
bergamasco (~> 0.3)
|
@@ -22,7 +22,7 @@ GEM
|
|
22
22
|
tzinfo (~> 1.1)
|
23
23
|
addressable (2.3.8)
|
24
24
|
base32-crockford-checksum (0.2.3)
|
25
|
-
bergamasco (0.3.
|
25
|
+
bergamasco (0.3.5)
|
26
26
|
activesupport (~> 4.2, >= 4.2.5)
|
27
27
|
addressable (~> 2.3.8)
|
28
28
|
builder (~> 3.2, >= 3.2.2)
|
@@ -33,7 +33,7 @@ GEM
|
|
33
33
|
pandoc-ruby (~> 2.0, >= 2.0.0)
|
34
34
|
safe_yaml (~> 1.0, >= 1.0.4)
|
35
35
|
builder (3.2.2)
|
36
|
-
codeclimate-test-reporter (1.0.
|
36
|
+
codeclimate-test-reporter (1.0.4)
|
37
37
|
simplecov
|
38
38
|
crack (0.4.3)
|
39
39
|
safe_yaml (~> 1.0.0)
|
@@ -47,7 +47,7 @@ GEM
|
|
47
47
|
faraday
|
48
48
|
faraday_middleware (0.10.1)
|
49
49
|
faraday (>= 0.7.4, < 1.0)
|
50
|
-
hashdiff (0.3.
|
50
|
+
hashdiff (0.3.2)
|
51
51
|
i18n (0.7.0)
|
52
52
|
json (1.8.3)
|
53
53
|
loofah (2.0.3)
|
data/lib/cirneco/api.rb
CHANGED
@@ -6,7 +6,7 @@ module Cirneco
|
|
6
6
|
def post_metadata(data, options={})
|
7
7
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
8
8
|
|
9
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
9
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
10
10
|
|
11
11
|
url = "#{mds_url}/metadata"
|
12
12
|
Maremma.post(url, content_type: 'application/xml;charset=UTF-8', data: data, username: options[:username], password: options[:password])
|
@@ -15,7 +15,7 @@ module Cirneco
|
|
15
15
|
def get_metadata(doi, options={})
|
16
16
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
17
17
|
|
18
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
18
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
19
19
|
|
20
20
|
url = "#{mds_url}/metadata/#{doi}"
|
21
21
|
Maremma.get(url, accept: 'application/xml', username: options[:username], password: options[:password], raw: true)
|
@@ -24,7 +24,7 @@ module Cirneco
|
|
24
24
|
def delete_metadata(doi, options={})
|
25
25
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
26
26
|
|
27
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
27
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
28
28
|
|
29
29
|
url = "#{mds_url}/metadata/#{doi}"
|
30
30
|
Maremma.delete(url, username: options[:username], password: options[:password])
|
@@ -35,7 +35,7 @@ module Cirneco
|
|
35
35
|
|
36
36
|
payload = "doi=#{doi}\nurl=#{options[:url]}"
|
37
37
|
|
38
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
38
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
39
39
|
|
40
40
|
url = "#{mds_url}/doi/#{doi}"
|
41
41
|
Maremma.put(url, content_type: 'text/plain;charset=UTF-8', data: payload, username: options[:username], password: options[:password])
|
@@ -44,7 +44,7 @@ module Cirneco
|
|
44
44
|
def get_doi(doi, options={})
|
45
45
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
46
46
|
|
47
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
47
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
48
48
|
|
49
49
|
url = "#{mds_url}/doi/#{doi}"
|
50
50
|
Maremma.get(url, username: options[:username], password: options[:password])
|
@@ -53,7 +53,7 @@ module Cirneco
|
|
53
53
|
def get_dois(options={})
|
54
54
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
55
55
|
|
56
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
56
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
57
57
|
|
58
58
|
url = "#{mds_url}/doi"
|
59
59
|
response = Maremma.get(url, username: options[:username], password: options[:password])
|
@@ -66,7 +66,7 @@ module Cirneco
|
|
66
66
|
|
67
67
|
payload = options[:raw] ? options[:media] : options[:media].map { |m| "#{m[:mime_type]}=#{m[:url]}" }.join("\n")
|
68
68
|
|
69
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
69
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
70
70
|
|
71
71
|
url = "#{mds_url}/media/#{doi}"
|
72
72
|
Maremma.post(url, content_type: 'text/plain;charset=UTF-8', data: payload, username: options[:username], password: options[:password])
|
@@ -75,7 +75,7 @@ module Cirneco
|
|
75
75
|
def get_media(doi, options={})
|
76
76
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
77
77
|
|
78
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
78
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
79
79
|
|
80
80
|
url = "#{mds_url}/media/#{doi}"
|
81
81
|
response = Maremma.get(url, accept: 'application/xml', username: options[:username], password: options[:password])
|
data/lib/cirneco/base.rb
CHANGED
data/lib/cirneco/doi.rb
CHANGED
@@ -14,6 +14,7 @@ module Cirneco
|
|
14
14
|
method_option :username, :default => ENV['MDS_USERNAME']
|
15
15
|
method_option :password, :default => ENV['MDS_PASSWORD']
|
16
16
|
method_option :sandbox, :type => :boolean, :force => false
|
17
|
+
method_option :limit, :type => :numeric, :default => 25
|
17
18
|
def get(doi)
|
18
19
|
if doi == "all"
|
19
20
|
response = get_dois(options)
|
@@ -23,6 +24,8 @@ module Cirneco
|
|
23
24
|
|
24
25
|
if response.body["errors"]
|
25
26
|
puts "Error: " + response.body["errors"].first.fetch("title", "An error occured")
|
27
|
+
elsif doi == "all"
|
28
|
+
puts response.body["data"][0...options[:limit]]
|
26
29
|
else
|
27
30
|
puts response.body["data"]
|
28
31
|
end
|
@@ -54,6 +57,14 @@ module Cirneco
|
|
54
57
|
end
|
55
58
|
end
|
56
59
|
|
60
|
+
desc "generate DOI", "generate a DOI name"
|
61
|
+
method_option :lower_limit, :type => :numeric, :default => 0
|
62
|
+
method_option :namespace, :default => 'MS-'
|
63
|
+
method_option :number, :type => :numeric, :aliases => '-n'
|
64
|
+
def accession_number
|
65
|
+
puts generate_accession_number(options)
|
66
|
+
end
|
67
|
+
|
57
68
|
desc "decode DOI", "decode DOI encoded using Crockford base32 algorithm"
|
58
69
|
def decode(doi)
|
59
70
|
number = decode_doi(doi)
|
@@ -86,10 +97,10 @@ module Cirneco
|
|
86
97
|
method_option :sandbox, :type => :boolean, :force => false
|
87
98
|
def mint(filepath)
|
88
99
|
|
89
|
-
if
|
90
|
-
response =
|
100
|
+
if filepath.is_a?(Array)
|
101
|
+
response = mint_dois_for_all_urls(filepath, options)
|
91
102
|
else
|
92
|
-
response =
|
103
|
+
response = mint_doi_for_url(filepath, options)
|
93
104
|
end
|
94
105
|
|
95
106
|
puts response
|
@@ -107,10 +118,10 @@ module Cirneco
|
|
107
118
|
method_option :sandbox, :type => :boolean, :force => false
|
108
119
|
def mint_and_hide(filepath)
|
109
120
|
|
110
|
-
if
|
111
|
-
response =
|
121
|
+
if filepath.is_a?(Array)
|
122
|
+
response = mint_and_hide_dois_for_all_urls(filepath, options)
|
112
123
|
else
|
113
|
-
response =
|
124
|
+
response = mint_and_hide_doi_for_url(filepath, options)
|
114
125
|
end
|
115
126
|
|
116
127
|
puts response
|
@@ -127,10 +138,10 @@ module Cirneco
|
|
127
138
|
method_option :sandbox, :type => :boolean, :force => false
|
128
139
|
def hide(filepath)
|
129
140
|
|
130
|
-
if
|
131
|
-
response =
|
141
|
+
if filepath.is_a?(Array)
|
142
|
+
response = hide_dois_for_all_urls(filepath, options)
|
132
143
|
else
|
133
|
-
response =
|
144
|
+
response = hide_doi_for_url(filepath, options)
|
134
145
|
end
|
135
146
|
|
136
147
|
puts response
|
data/lib/cirneco/utils.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'base32/crockford'
|
2
2
|
require 'securerandom'
|
3
3
|
require 'bergamasco'
|
4
|
+
require 'open-uri'
|
4
5
|
require 'time'
|
5
6
|
|
6
7
|
module Cirneco
|
@@ -8,6 +9,11 @@ module Cirneco
|
|
8
9
|
# "ZZZZZZZ" decoded as number
|
9
10
|
UPPER_LIMIT = 34359738367
|
10
11
|
|
12
|
+
LICENSES = {
|
13
|
+
"https://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution (CC-BY 4.0)",
|
14
|
+
"https://creativecommons.org/publicdomain/zero/1.0/" => "Creative Commons Public Domain Dedication (CC0 1.0)"
|
15
|
+
}
|
16
|
+
|
11
17
|
def get_dois_by_prefix(prefix, options={})
|
12
18
|
response = get_dois(options)
|
13
19
|
|
@@ -23,193 +29,207 @@ module Cirneco
|
|
23
29
|
end
|
24
30
|
|
25
31
|
def encode_doi(prefix, options={})
|
26
|
-
number = options[:number]
|
32
|
+
number = options[:number].to_s.scan(/\d+/).first.to_i
|
33
|
+
number = SecureRandom.random_number(UPPER_LIMIT) unless number > 0
|
27
34
|
prefix.to_s + "/" + Base32::Crockford.encode(number, split: 4, length: 8, checksum: true)
|
28
35
|
end
|
29
36
|
|
30
|
-
|
31
|
-
|
37
|
+
def generate_accession_number(options={})
|
38
|
+
lower_limit = options[:lower_limit] || 0
|
39
|
+
namespace = options[:namespace] || 'MS-'
|
40
|
+
registered_numbers = options[:registered_numbers] || []
|
41
|
+
|
42
|
+
if options[:number]
|
43
|
+
number = options[:number].to_s
|
44
|
+
else
|
45
|
+
begin
|
46
|
+
number = SecureRandom.random_number(1000000) + lower_limit
|
47
|
+
end while registered_numbers.include? number
|
48
|
+
number = number.to_s
|
49
|
+
end
|
50
|
+
|
51
|
+
number = number.to_s.rjust(options[:length], '0') if options[:length]
|
52
|
+
|
53
|
+
if options[:split]
|
54
|
+
number = number.reverse
|
55
|
+
number = number.scan(/.{1,#{options[:split]}}/).map { |x| x.reverse }
|
56
|
+
number = number.reverse.join("-")
|
57
|
+
end
|
58
|
+
|
59
|
+
namespace + number
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_accession_number(filepath)
|
63
|
+
metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
64
|
+
metadata.fetch("accession_number", nil).to_s.scan(/\d+/).first.to_i
|
65
|
+
end
|
66
|
+
|
67
|
+
def get_all_accession_numbers(folderpath)
|
68
|
+
Dir.glob("#{folderpath}/*.md").map do |filepath|
|
69
|
+
get_accession_number(filepath)
|
70
|
+
end.select { |a| a > 0 }.sort
|
71
|
+
end
|
72
|
+
|
73
|
+
def update_accession_number(filepath, options={})
|
32
74
|
filename = File.basename(filepath)
|
33
75
|
return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
|
34
76
|
|
35
77
|
old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
36
|
-
return "
|
78
|
+
return "Accession number #{old_metadata["accession_number"]} not changed for #{filename}" if old_metadata["accession_number"]
|
79
|
+
|
80
|
+
if old_metadata["doi"].present?
|
81
|
+
number = decode_doi(old_metadata["doi"])
|
82
|
+
options[:number] = number if number > 0
|
83
|
+
else
|
84
|
+
folderpath = File.dirname(filepath)
|
85
|
+
options[:registered_numbers] = get_all_accession_numbers(folderpath)
|
86
|
+
end
|
87
|
+
|
88
|
+
accession_number = generate_accession_number(options)
|
89
|
+
|
90
|
+
new_metadata = Bergamasco::Markdown.update_file(filepath, "accession_number" => accession_number)
|
91
|
+
"Accession number #{new_metadata["accession_number"]} generated for #{filename}"
|
92
|
+
end
|
93
|
+
|
94
|
+
def update_all_accession_numbers(folderpath)
|
95
|
+
Dir.glob("#{folderpath}/*.md").map do |filepath|
|
96
|
+
update_accession_number(filepath)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
101
|
+
def mint_doi_for_url(url, options={})
|
102
|
+
filename = File.basename(url)
|
103
|
+
source_path = options[:source_path] || "/"
|
104
|
+
filepath = Dir.pwd + source_path + filename + ".md"
|
37
105
|
|
38
|
-
metadata = generate_metadata_for_work(
|
39
|
-
|
106
|
+
metadata = generate_metadata_for_work(url, options)
|
107
|
+
return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["date_issued"]
|
40
108
|
|
41
|
-
|
109
|
+
response = post_metadata_for_work(metadata, options)
|
110
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
|
42
111
|
|
43
112
|
new_metadata = Bergamasco::Markdown.update_file(filepath, "doi" => metadata["doi"], "published" => true)
|
44
113
|
"DOI #{new_metadata["doi"]} minted for #{filename}"
|
45
114
|
end
|
46
115
|
|
47
|
-
#
|
48
|
-
def
|
49
|
-
filename = File.basename(
|
50
|
-
|
51
|
-
|
52
|
-
old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
53
|
-
return "DOI #{old_metadata["doi"]} not changed for #{filename}" if old_metadata["doi"] && old_metadata["published"]
|
116
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
117
|
+
def mint_and_hide_doi_for_url(url, options={})
|
118
|
+
filename = File.basename(url)
|
119
|
+
source_path = options[:source_path] || "/"
|
120
|
+
filepath = Dir.pwd + source_path + filename + ".md"
|
54
121
|
|
55
|
-
metadata = generate_metadata_for_work(
|
56
|
-
|
122
|
+
metadata = generate_metadata_for_work(url, options)
|
123
|
+
return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["date_issued"]
|
57
124
|
|
58
|
-
|
125
|
+
response = post_metadata_for_work(metadata, options)
|
126
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
|
59
127
|
|
60
128
|
new_metadata = Bergamasco::Markdown.update_file(filepath, "doi" => metadata["doi"], "published" => false)
|
61
129
|
"DOI #{new_metadata["doi"]} minted and hidden for #{filename}"
|
62
130
|
end
|
63
131
|
|
64
|
-
#
|
132
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
65
133
|
# DOIs are never deleted, but we can remove the metadata from the DataCite index
|
66
|
-
def
|
67
|
-
filename = File.basename(
|
68
|
-
|
69
|
-
|
70
|
-
old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
71
|
-
return "DOI #{old_metadata["doi"]} not changed for #{filename}" unless old_metadata["doi"] && old_metadata["published"]
|
134
|
+
def hide_doi_for_url(url, options={})
|
135
|
+
filename = File.basename(url)
|
136
|
+
source_path = options[:source_path] || "/"
|
137
|
+
filepath = Dir.pwd + source_path + filename + ".md"
|
72
138
|
|
73
|
-
metadata = generate_metadata_for_work(
|
74
|
-
|
139
|
+
metadata = generate_metadata_for_work(url, options)
|
140
|
+
return "DOI #{metadata["doi"]} not changed for #{filename}" unless metadata["doi"] && metadata["date_issued"]
|
75
141
|
|
76
|
-
|
142
|
+
response = hide_metadata_for_work(metadata, options)
|
143
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
|
77
144
|
|
78
145
|
new_metadata = Bergamasco::Markdown.update_file(filepath, "published" => false)
|
79
|
-
"DOI #{
|
146
|
+
"DOI #{metadata["doi"]} hidden for #{filename}"
|
80
147
|
end
|
81
148
|
|
82
|
-
def
|
83
|
-
|
84
|
-
|
149
|
+
def mint_dois_for_all_urls(urls, options={})
|
150
|
+
urls.map do |url|
|
151
|
+
mint_doi_for_url(url, options)
|
85
152
|
end.join("\n")
|
86
153
|
end
|
87
154
|
|
88
|
-
def
|
89
|
-
|
90
|
-
|
155
|
+
def mint_and_hide_dois_for_all_urls(urls, options={})
|
156
|
+
urls.map do |url|
|
157
|
+
mint_and_hide_doi_for_url(url, options)
|
91
158
|
end.join("\n")
|
92
159
|
end
|
93
160
|
|
94
|
-
def
|
95
|
-
|
96
|
-
|
161
|
+
def hide_dois_for_all_urls(urls, options={})
|
162
|
+
urls.map do |url|
|
163
|
+
hide_doi_for_url(url, options)
|
97
164
|
end.join("\n")
|
98
165
|
end
|
99
166
|
|
100
|
-
def generate_metadata_for_work(
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
csl = options[:csl] || ENV['SITE_CSLPATH'] || "styles/apa.csl"
|
105
|
-
options = options.merge(csl: csl, bibliography: referencespath)
|
167
|
+
def generate_metadata_for_work(url, options={})
|
168
|
+
doc = Nokogiri::HTML(open(url))
|
169
|
+
json = doc.at_xpath("//script[@type='application/ld+json']")
|
170
|
+
return "Error: no schema.org metadata found" unless json.present?
|
106
171
|
|
107
|
-
metadata =
|
172
|
+
metadata = ActiveSupport::JSON.decode(json.text)
|
108
173
|
|
109
|
-
return "Error: required metadata missing" unless ["author", "
|
110
|
-
|
111
|
-
# read in optional yaml configuration files for site, author and references
|
112
|
-
site_options = Bergamasco::Markdown.read_yaml(sitepath) || {}
|
113
|
-
author_options = Bergamasco::Markdown.read_yaml(authorpath) || {}
|
114
|
-
references = Bergamasco::Markdown.read_yaml(referencespath) || {}
|
174
|
+
return "Error: required metadata missing" unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
|
115
175
|
|
116
176
|
# required metadata
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
site_url = site_options["site_url"] || ENV['SITE_URL']
|
121
|
-
metadata["url"] ||= url_from_path(site_url, filepath)
|
122
|
-
|
123
|
-
metadata["creators"] = Array(metadata["author"]).map do |a|
|
124
|
-
author = author_options.fetch(a, {})
|
125
|
-
if author.present?
|
126
|
-
{ given_name: author["given"],
|
127
|
-
family_name: author["family"],
|
128
|
-
orcid: author["orcid"] }
|
129
|
-
else
|
130
|
-
{ literal: a }
|
131
|
-
end
|
177
|
+
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
|
178
|
+
uri = Addressable::URI.parse(metadata["@id"])
|
179
|
+
metadata["doi"] = uri.path[1..-1].upcase
|
132
180
|
end
|
133
181
|
|
134
|
-
metadata["
|
135
|
-
|
182
|
+
metadata["title"] = metadata["name"]
|
183
|
+
|
184
|
+
metadata["creators"] = format_authors(metadata["author"])
|
185
|
+
|
186
|
+
metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
|
187
|
+
metadata["publication_year"] = metadata.fetch("datePublished", "")[0..3].to_i
|
136
188
|
|
137
|
-
metadata["type"]
|
138
|
-
resource_type_general = metadata["type"] == "Dataset" ? "Dataset" : "Text"
|
189
|
+
resource_type_general = metadata["@type"] == "Dataset" ? "Dataset" : "Text"
|
139
190
|
|
140
|
-
metadata["resource_type"] = { value: metadata["type"],
|
191
|
+
metadata["resource_type"] = { value: metadata["@type"],
|
141
192
|
resource_type_general: resource_type_general }
|
142
193
|
|
143
194
|
# recommended metadata
|
144
|
-
|
145
|
-
|
195
|
+
|
196
|
+
# use alternate_identifier to generate DOI
|
197
|
+
metadata["alternate_identifier"] = metadata["alternateName"]
|
198
|
+
|
199
|
+
if metadata["description"].present?
|
200
|
+
metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
|
201
|
+
end
|
146
202
|
|
147
203
|
# use default version 1.0
|
148
204
|
metadata["version"] ||= "1.0"
|
149
205
|
|
150
206
|
# fetch reference metadata if available
|
151
|
-
metadata["related_identifiers"] =
|
152
|
-
reference = references.fetch(r, {})
|
153
|
-
if reference.present?
|
154
|
-
if reference["DOI"].present?
|
155
|
-
value = reference["DOI"].upcase
|
156
|
-
type = "DOI"
|
157
|
-
elsif /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(reference["URL"])
|
158
|
-
uri = Addressable::URI.parse(reference["URL"])
|
159
|
-
value = uri.path[1..-1].upcase
|
160
|
-
type = "DOI"
|
161
|
-
elsif reference["URL"].present?
|
162
|
-
value = reference["URL"]
|
163
|
-
type = "URL"
|
164
|
-
else
|
165
|
-
type = nil
|
166
|
-
end
|
167
|
-
else
|
168
|
-
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(r)
|
169
|
-
uri = Addressable::URI.parse(r)
|
170
|
-
value = uri.path[1..-1].upcase
|
171
|
-
type = "DOI"
|
172
|
-
elsif /(http|https):\/\//.match(r)
|
173
|
-
uri = Addressable::URI.parse(r)
|
174
|
-
value = uri.normalize.to_s
|
175
|
-
type = "URL"
|
176
|
-
else
|
177
|
-
type = nil
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
{
|
182
|
-
value: value,
|
183
|
-
related_identifier_type: type,
|
184
|
-
relation_type: "References"
|
185
|
-
}
|
186
|
-
end.select { |t| t[:related_identifier_type].present? }
|
187
|
-
|
188
|
-
metadata["license_name"] = site_options.fetch("license", {}).fetch("name", nil) || ENV['SITE_LICENCE_NAME'] || "Creative Commons Attribution"
|
189
|
-
metadata["license_url"] = site_options.fetch("license", {}).fetch("url", nil) || ENV['SITE_LICENCE_URL'] || "https://creativecommons.org/licenses/by/4.0/"
|
190
|
-
metadata["rights_list"] = [{ value: metadata["license_name"], rights_uri: metadata["license_url"] }]
|
207
|
+
metadata["related_identifiers"] = get_related_identifiers(metadata)
|
191
208
|
|
192
|
-
|
209
|
+
if metadata["license"].present?
|
210
|
+
value = LICENSES.fetch(metadata["license_url"], nil)
|
211
|
+
metadata["rights_list"] = [{ value: value, rights_uri: metadata["license"] }] if value.present?
|
212
|
+
end
|
193
213
|
|
194
|
-
|
195
|
-
|
214
|
+
if metadata["keywords"].present?
|
215
|
+
metadata["subjects"] = Array(metadata["keywords"].split(", ")).select { |k| k != "featured" }
|
216
|
+
end
|
196
217
|
|
197
|
-
metadata["
|
198
|
-
metadata["
|
199
|
-
metadata["
|
218
|
+
metadata["date_created"] = metadata["dateCreated"]
|
219
|
+
metadata["date_issued"] = metadata["datePublished"]
|
220
|
+
metadata["date_updated"] = metadata["dateModified"]
|
200
221
|
|
201
|
-
metadata = metadata.extract!(*%w(doi url creators title
|
202
|
-
publication_year
|
203
|
-
date_issued related_identifiers))
|
204
|
-
end
|
205
|
-
|
206
|
-
def url_from_path(site_url, filepath)
|
207
|
-
site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
|
222
|
+
metadata = metadata.extract!(*%w(doi alternate_identifier url creators title
|
223
|
+
publisher publication_year resource_type descriptions version rights_list
|
224
|
+
subjects date_issued date_created date_updated related_identifiers))
|
208
225
|
end
|
209
226
|
|
210
227
|
def post_metadata_for_work(metadata, options={})
|
228
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
229
|
+
metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
|
230
|
+
|
211
231
|
work = Cirneco::Work.new(metadata)
|
212
|
-
return work.validation_errors if work.validation_errors.present?
|
232
|
+
return work.validation_errors if work.validation_errors.body["errors"].present?
|
213
233
|
|
214
234
|
response = work.post_metadata(work.data, options)
|
215
235
|
return response unless response.status == 201
|
@@ -218,8 +238,11 @@ module Cirneco
|
|
218
238
|
end
|
219
239
|
|
220
240
|
def post_and_hide_metadata_for_work(metadata, options={})
|
241
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
242
|
+
metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
|
243
|
+
|
221
244
|
work = Cirneco::Work.new(metadata)
|
222
|
-
return work.validation_errors if work.validation_errors.present?
|
245
|
+
return work.validation_errors if work.validation_errors.body["errors"].present?
|
223
246
|
|
224
247
|
response = work.post_metadata(work.data, options)
|
225
248
|
return response unless response.status == 201
|
@@ -231,8 +254,11 @@ module Cirneco
|
|
231
254
|
end
|
232
255
|
|
233
256
|
def hide_metadata_for_work(metadata, options={})
|
257
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
258
|
+
metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
|
259
|
+
|
234
260
|
work = Cirneco::Work.new(metadata)
|
235
|
-
return work.validation_errors if work.validation_errors.present?
|
261
|
+
return work.validation_errors if work.validation_errors.body["errors"].present?
|
236
262
|
|
237
263
|
work.delete_metadata(metadata["doi"], options)
|
238
264
|
end
|
@@ -240,5 +266,55 @@ module Cirneco
|
|
240
266
|
def generate_jats(filepath, options={})
|
241
267
|
Bergamasco::Pandoc.write_jats(filepath, options)
|
242
268
|
end
|
269
|
+
|
270
|
+
def url_from_path(site_url, filepath)
|
271
|
+
site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
|
272
|
+
end
|
273
|
+
|
274
|
+
def format_authors(authors)
|
275
|
+
Array(authors).map do |author|
|
276
|
+
orcid = orcid_from_url(author["@id"])
|
277
|
+
name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
|
278
|
+
|
279
|
+
{ given_name: author["givenName"],
|
280
|
+
family_name: author["familyName"],
|
281
|
+
name: name,
|
282
|
+
orcid: orcid }.compact
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def get_related_identifiers(metadata)
|
287
|
+
citations = Array(metadata["citation"])
|
288
|
+
parts = Array(metadata["IsPartOf"]).map { |r| r["relation_type"] = "IsPartOf" }
|
289
|
+
|
290
|
+
(citations + parts).map do |r|
|
291
|
+
id = r.fetch("@id", "")
|
292
|
+
relation_type = r.fetch("relation_type", "References")
|
293
|
+
|
294
|
+
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(id)
|
295
|
+
uri = Addressable::URI.parse(id)
|
296
|
+
value = uri.path[1..-1].upcase
|
297
|
+
type = "DOI"
|
298
|
+
elsif /(http|https):\/\//.match(id)
|
299
|
+
uri = Addressable::URI.parse(id)
|
300
|
+
value = uri.normalize.to_s
|
301
|
+
type = "URL"
|
302
|
+
else
|
303
|
+
type = nil
|
304
|
+
end
|
305
|
+
|
306
|
+
{
|
307
|
+
value: value,
|
308
|
+
related_identifier_type: type,
|
309
|
+
relation_type: relation_type
|
310
|
+
}
|
311
|
+
end.select { |t| t[:related_identifier_type].present? }
|
312
|
+
end
|
313
|
+
|
314
|
+
def orcid_from_url(url)
|
315
|
+
return nil unless url.present?
|
316
|
+
|
317
|
+
Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last
|
318
|
+
end
|
243
319
|
end
|
244
320
|
end
|
data/lib/cirneco/version.rb
CHANGED