cirneco 0.7.4 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/lib/cirneco/api.rb +8 -8
- data/lib/cirneco/base.rb +1 -1
- data/lib/cirneco/doi.rb +20 -9
- data/lib/cirneco/utils.rb +204 -128
- data/lib/cirneco/version.rb +1 -1
- data/lib/cirneco/work.rb +19 -7
- data/spec/api_spec.rb +10 -10
- data/spec/doi_spec.rb +29 -27
- data/spec/fixtures/cool-dois-minted.html +404 -0
- data/spec/fixtures/cool-dois-minted.html.md +99 -0
- data/spec/fixtures/cool-dois-missing-metadata.html +356 -0
- data/spec/fixtures/cool-dois-no-json-ld.html +352 -0
- data/spec/fixtures/cool-dois.html +404 -0
- data/spec/fixtures/cool-dois.html.md +1 -0
- data/spec/fixtures/index.html +271 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml +499 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_all_urls.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_url.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_metadata_for_work.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_all_urls.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_url.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_all_urls.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_url.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_post_metadata_for_work.yml +25 -20
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_all_dois.yml +499 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_doi.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/put/should_put_doi.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_a_doi.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_dois_for_list_of_urls.yml +44 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_a_doi.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_a_doi.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_dois_for_list_of_urls.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_dois_for_list_of_urls.yml +130 -0
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_all_dois.yml +499 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/put/should_put_doi.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/get/should_get_media.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/post/should_post_media.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/delete/should_delete_metadata.yml +8 -4
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/get/should_get_metadata.yml +10 -5
- data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/post/should_post_metadata.yml +11 -6
- data/spec/utils_spec.rb +105 -48
- data/spec/work_spec.rb +4 -4
- metadata +20 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0b25496604d7050477dbcd531561118874f76705
|
|
4
|
+
data.tar.gz: f3e38823524d96cd5382a5a54d3838daaf8c3b42
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8b521bfb2aceebdb52d8248dccd6de030976413720d392748358620e7527c33398224a4a33aea7d7a4dca234c8506da408aceb00f5ac1a47e82177e034392c70
|
|
7
|
+
data.tar.gz: ada9b59941a9c2f3182736cb3ba54b2c44d3b16226a20abc51be56dc0a5b6f0e6949311c6b5ca9a20e396fe624fd872c2d152abe6fd99f823d0f164d27f46b1c
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
cirneco (0.
|
|
4
|
+
cirneco (0.8.1)
|
|
5
5
|
activesupport (~> 4.2, >= 4.2.5)
|
|
6
6
|
base32-crockford-checksum (~> 0.2.2)
|
|
7
7
|
bergamasco (~> 0.3)
|
|
@@ -22,7 +22,7 @@ GEM
|
|
|
22
22
|
tzinfo (~> 1.1)
|
|
23
23
|
addressable (2.3.8)
|
|
24
24
|
base32-crockford-checksum (0.2.3)
|
|
25
|
-
bergamasco (0.3.
|
|
25
|
+
bergamasco (0.3.5)
|
|
26
26
|
activesupport (~> 4.2, >= 4.2.5)
|
|
27
27
|
addressable (~> 2.3.8)
|
|
28
28
|
builder (~> 3.2, >= 3.2.2)
|
|
@@ -33,7 +33,7 @@ GEM
|
|
|
33
33
|
pandoc-ruby (~> 2.0, >= 2.0.0)
|
|
34
34
|
safe_yaml (~> 1.0, >= 1.0.4)
|
|
35
35
|
builder (3.2.2)
|
|
36
|
-
codeclimate-test-reporter (1.0.
|
|
36
|
+
codeclimate-test-reporter (1.0.4)
|
|
37
37
|
simplecov
|
|
38
38
|
crack (0.4.3)
|
|
39
39
|
safe_yaml (~> 1.0.0)
|
|
@@ -47,7 +47,7 @@ GEM
|
|
|
47
47
|
faraday
|
|
48
48
|
faraday_middleware (0.10.1)
|
|
49
49
|
faraday (>= 0.7.4, < 1.0)
|
|
50
|
-
hashdiff (0.3.
|
|
50
|
+
hashdiff (0.3.2)
|
|
51
51
|
i18n (0.7.0)
|
|
52
52
|
json (1.8.3)
|
|
53
53
|
loofah (2.0.3)
|
data/lib/cirneco/api.rb
CHANGED
|
@@ -6,7 +6,7 @@ module Cirneco
|
|
|
6
6
|
def post_metadata(data, options={})
|
|
7
7
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
|
8
8
|
|
|
9
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
|
9
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
|
10
10
|
|
|
11
11
|
url = "#{mds_url}/metadata"
|
|
12
12
|
Maremma.post(url, content_type: 'application/xml;charset=UTF-8', data: data, username: options[:username], password: options[:password])
|
|
@@ -15,7 +15,7 @@ module Cirneco
|
|
|
15
15
|
def get_metadata(doi, options={})
|
|
16
16
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
|
17
17
|
|
|
18
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
|
18
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
|
19
19
|
|
|
20
20
|
url = "#{mds_url}/metadata/#{doi}"
|
|
21
21
|
Maremma.get(url, accept: 'application/xml', username: options[:username], password: options[:password], raw: true)
|
|
@@ -24,7 +24,7 @@ module Cirneco
|
|
|
24
24
|
def delete_metadata(doi, options={})
|
|
25
25
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
|
26
26
|
|
|
27
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
|
27
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
|
28
28
|
|
|
29
29
|
url = "#{mds_url}/metadata/#{doi}"
|
|
30
30
|
Maremma.delete(url, username: options[:username], password: options[:password])
|
|
@@ -35,7 +35,7 @@ module Cirneco
|
|
|
35
35
|
|
|
36
36
|
payload = "doi=#{doi}\nurl=#{options[:url]}"
|
|
37
37
|
|
|
38
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
|
38
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
|
39
39
|
|
|
40
40
|
url = "#{mds_url}/doi/#{doi}"
|
|
41
41
|
Maremma.put(url, content_type: 'text/plain;charset=UTF-8', data: payload, username: options[:username], password: options[:password])
|
|
@@ -44,7 +44,7 @@ module Cirneco
|
|
|
44
44
|
def get_doi(doi, options={})
|
|
45
45
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
|
46
46
|
|
|
47
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
|
47
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
|
48
48
|
|
|
49
49
|
url = "#{mds_url}/doi/#{doi}"
|
|
50
50
|
Maremma.get(url, username: options[:username], password: options[:password])
|
|
@@ -53,7 +53,7 @@ module Cirneco
|
|
|
53
53
|
def get_dois(options={})
|
|
54
54
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
|
55
55
|
|
|
56
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
|
56
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
|
57
57
|
|
|
58
58
|
url = "#{mds_url}/doi"
|
|
59
59
|
response = Maremma.get(url, username: options[:username], password: options[:password])
|
|
@@ -66,7 +66,7 @@ module Cirneco
|
|
|
66
66
|
|
|
67
67
|
payload = options[:raw] ? options[:media] : options[:media].map { |m| "#{m[:mime_type]}=#{m[:url]}" }.join("\n")
|
|
68
68
|
|
|
69
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
|
69
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
|
70
70
|
|
|
71
71
|
url = "#{mds_url}/media/#{doi}"
|
|
72
72
|
Maremma.post(url, content_type: 'text/plain;charset=UTF-8', data: payload, username: options[:username], password: options[:password])
|
|
@@ -75,7 +75,7 @@ module Cirneco
|
|
|
75
75
|
def get_media(doi, options={})
|
|
76
76
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
|
|
77
77
|
|
|
78
|
-
mds_url = options[:sandbox] ? 'https://mds.
|
|
78
|
+
mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
|
|
79
79
|
|
|
80
80
|
url = "#{mds_url}/media/#{doi}"
|
|
81
81
|
response = Maremma.get(url, accept: 'application/xml', username: options[:username], password: options[:password])
|
data/lib/cirneco/base.rb
CHANGED
data/lib/cirneco/doi.rb
CHANGED
|
@@ -14,6 +14,7 @@ module Cirneco
|
|
|
14
14
|
method_option :username, :default => ENV['MDS_USERNAME']
|
|
15
15
|
method_option :password, :default => ENV['MDS_PASSWORD']
|
|
16
16
|
method_option :sandbox, :type => :boolean, :force => false
|
|
17
|
+
method_option :limit, :type => :numeric, :default => 25
|
|
17
18
|
def get(doi)
|
|
18
19
|
if doi == "all"
|
|
19
20
|
response = get_dois(options)
|
|
@@ -23,6 +24,8 @@ module Cirneco
|
|
|
23
24
|
|
|
24
25
|
if response.body["errors"]
|
|
25
26
|
puts "Error: " + response.body["errors"].first.fetch("title", "An error occured")
|
|
27
|
+
elsif doi == "all"
|
|
28
|
+
puts response.body["data"][0...options[:limit]]
|
|
26
29
|
else
|
|
27
30
|
puts response.body["data"]
|
|
28
31
|
end
|
|
@@ -54,6 +57,14 @@ module Cirneco
|
|
|
54
57
|
end
|
|
55
58
|
end
|
|
56
59
|
|
|
60
|
+
desc "generate DOI", "generate a DOI name"
|
|
61
|
+
method_option :lower_limit, :type => :numeric, :default => 0
|
|
62
|
+
method_option :namespace, :default => 'MS-'
|
|
63
|
+
method_option :number, :type => :numeric, :aliases => '-n'
|
|
64
|
+
def accession_number
|
|
65
|
+
puts generate_accession_number(options)
|
|
66
|
+
end
|
|
67
|
+
|
|
57
68
|
desc "decode DOI", "decode DOI encoded using Crockford base32 algorithm"
|
|
58
69
|
def decode(doi)
|
|
59
70
|
number = decode_doi(doi)
|
|
@@ -86,10 +97,10 @@ module Cirneco
|
|
|
86
97
|
method_option :sandbox, :type => :boolean, :force => false
|
|
87
98
|
def mint(filepath)
|
|
88
99
|
|
|
89
|
-
if
|
|
90
|
-
response =
|
|
100
|
+
if filepath.is_a?(Array)
|
|
101
|
+
response = mint_dois_for_all_urls(filepath, options)
|
|
91
102
|
else
|
|
92
|
-
response =
|
|
103
|
+
response = mint_doi_for_url(filepath, options)
|
|
93
104
|
end
|
|
94
105
|
|
|
95
106
|
puts response
|
|
@@ -107,10 +118,10 @@ module Cirneco
|
|
|
107
118
|
method_option :sandbox, :type => :boolean, :force => false
|
|
108
119
|
def mint_and_hide(filepath)
|
|
109
120
|
|
|
110
|
-
if
|
|
111
|
-
response =
|
|
121
|
+
if filepath.is_a?(Array)
|
|
122
|
+
response = mint_and_hide_dois_for_all_urls(filepath, options)
|
|
112
123
|
else
|
|
113
|
-
response =
|
|
124
|
+
response = mint_and_hide_doi_for_url(filepath, options)
|
|
114
125
|
end
|
|
115
126
|
|
|
116
127
|
puts response
|
|
@@ -127,10 +138,10 @@ module Cirneco
|
|
|
127
138
|
method_option :sandbox, :type => :boolean, :force => false
|
|
128
139
|
def hide(filepath)
|
|
129
140
|
|
|
130
|
-
if
|
|
131
|
-
response =
|
|
141
|
+
if filepath.is_a?(Array)
|
|
142
|
+
response = hide_dois_for_all_urls(filepath, options)
|
|
132
143
|
else
|
|
133
|
-
response =
|
|
144
|
+
response = hide_doi_for_url(filepath, options)
|
|
134
145
|
end
|
|
135
146
|
|
|
136
147
|
puts response
|
data/lib/cirneco/utils.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
require 'base32/crockford'
|
|
2
2
|
require 'securerandom'
|
|
3
3
|
require 'bergamasco'
|
|
4
|
+
require 'open-uri'
|
|
4
5
|
require 'time'
|
|
5
6
|
|
|
6
7
|
module Cirneco
|
|
@@ -8,6 +9,11 @@ module Cirneco
|
|
|
8
9
|
# "ZZZZZZZ" decoded as number
|
|
9
10
|
UPPER_LIMIT = 34359738367
|
|
10
11
|
|
|
12
|
+
LICENSES = {
|
|
13
|
+
"https://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution (CC-BY 4.0)",
|
|
14
|
+
"https://creativecommons.org/publicdomain/zero/1.0/" => "Creative Commons Public Domain Dedication (CC0 1.0)"
|
|
15
|
+
}
|
|
16
|
+
|
|
11
17
|
def get_dois_by_prefix(prefix, options={})
|
|
12
18
|
response = get_dois(options)
|
|
13
19
|
|
|
@@ -23,193 +29,207 @@ module Cirneco
|
|
|
23
29
|
end
|
|
24
30
|
|
|
25
31
|
def encode_doi(prefix, options={})
|
|
26
|
-
number = options[:number]
|
|
32
|
+
number = options[:number].to_s.scan(/\d+/).first.to_i
|
|
33
|
+
number = SecureRandom.random_number(UPPER_LIMIT) unless number > 0
|
|
27
34
|
prefix.to_s + "/" + Base32::Crockford.encode(number, split: 4, length: 8, checksum: true)
|
|
28
35
|
end
|
|
29
36
|
|
|
30
|
-
|
|
31
|
-
|
|
37
|
+
def generate_accession_number(options={})
|
|
38
|
+
lower_limit = options[:lower_limit] || 0
|
|
39
|
+
namespace = options[:namespace] || 'MS-'
|
|
40
|
+
registered_numbers = options[:registered_numbers] || []
|
|
41
|
+
|
|
42
|
+
if options[:number]
|
|
43
|
+
number = options[:number].to_s
|
|
44
|
+
else
|
|
45
|
+
begin
|
|
46
|
+
number = SecureRandom.random_number(1000000) + lower_limit
|
|
47
|
+
end while registered_numbers.include? number
|
|
48
|
+
number = number.to_s
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
number = number.to_s.rjust(options[:length], '0') if options[:length]
|
|
52
|
+
|
|
53
|
+
if options[:split]
|
|
54
|
+
number = number.reverse
|
|
55
|
+
number = number.scan(/.{1,#{options[:split]}}/).map { |x| x.reverse }
|
|
56
|
+
number = number.reverse.join("-")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
namespace + number
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def get_accession_number(filepath)
|
|
63
|
+
metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
|
64
|
+
metadata.fetch("accession_number", nil).to_s.scan(/\d+/).first.to_i
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def get_all_accession_numbers(folderpath)
|
|
68
|
+
Dir.glob("#{folderpath}/*.md").map do |filepath|
|
|
69
|
+
get_accession_number(filepath)
|
|
70
|
+
end.select { |a| a > 0 }.sort
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def update_accession_number(filepath, options={})
|
|
32
74
|
filename = File.basename(filepath)
|
|
33
75
|
return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
|
|
34
76
|
|
|
35
77
|
old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
|
36
|
-
return "
|
|
78
|
+
return "Accession number #{old_metadata["accession_number"]} not changed for #{filename}" if old_metadata["accession_number"]
|
|
79
|
+
|
|
80
|
+
if old_metadata["doi"].present?
|
|
81
|
+
number = decode_doi(old_metadata["doi"])
|
|
82
|
+
options[:number] = number if number > 0
|
|
83
|
+
else
|
|
84
|
+
folderpath = File.dirname(filepath)
|
|
85
|
+
options[:registered_numbers] = get_all_accession_numbers(folderpath)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
accession_number = generate_accession_number(options)
|
|
89
|
+
|
|
90
|
+
new_metadata = Bergamasco::Markdown.update_file(filepath, "accession_number" => accession_number)
|
|
91
|
+
"Accession number #{new_metadata["accession_number"]} generated for #{filename}"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def update_all_accession_numbers(folderpath)
|
|
95
|
+
Dir.glob("#{folderpath}/*.md").map do |filepath|
|
|
96
|
+
update_accession_number(filepath)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
|
101
|
+
def mint_doi_for_url(url, options={})
|
|
102
|
+
filename = File.basename(url)
|
|
103
|
+
source_path = options[:source_path] || "/"
|
|
104
|
+
filepath = Dir.pwd + source_path + filename + ".md"
|
|
37
105
|
|
|
38
|
-
metadata = generate_metadata_for_work(
|
|
39
|
-
|
|
106
|
+
metadata = generate_metadata_for_work(url, options)
|
|
107
|
+
return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["date_issued"]
|
|
40
108
|
|
|
41
|
-
|
|
109
|
+
response = post_metadata_for_work(metadata, options)
|
|
110
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
|
|
42
111
|
|
|
43
112
|
new_metadata = Bergamasco::Markdown.update_file(filepath, "doi" => metadata["doi"], "published" => true)
|
|
44
113
|
"DOI #{new_metadata["doi"]} minted for #{filename}"
|
|
45
114
|
end
|
|
46
115
|
|
|
47
|
-
#
|
|
48
|
-
def
|
|
49
|
-
filename = File.basename(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
|
53
|
-
return "DOI #{old_metadata["doi"]} not changed for #{filename}" if old_metadata["doi"] && old_metadata["published"]
|
|
116
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
|
117
|
+
def mint_and_hide_doi_for_url(url, options={})
|
|
118
|
+
filename = File.basename(url)
|
|
119
|
+
source_path = options[:source_path] || "/"
|
|
120
|
+
filepath = Dir.pwd + source_path + filename + ".md"
|
|
54
121
|
|
|
55
|
-
metadata = generate_metadata_for_work(
|
|
56
|
-
|
|
122
|
+
metadata = generate_metadata_for_work(url, options)
|
|
123
|
+
return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["date_issued"]
|
|
57
124
|
|
|
58
|
-
|
|
125
|
+
response = post_metadata_for_work(metadata, options)
|
|
126
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
|
|
59
127
|
|
|
60
128
|
new_metadata = Bergamasco::Markdown.update_file(filepath, "doi" => metadata["doi"], "published" => false)
|
|
61
129
|
"DOI #{new_metadata["doi"]} minted and hidden for #{filename}"
|
|
62
130
|
end
|
|
63
131
|
|
|
64
|
-
#
|
|
132
|
+
# fetch schema.org metadata in JSON-LD format to mint DOI
|
|
65
133
|
# DOIs are never deleted, but we can remove the metadata from the DataCite index
|
|
66
|
-
def
|
|
67
|
-
filename = File.basename(
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
|
|
71
|
-
return "DOI #{old_metadata["doi"]} not changed for #{filename}" unless old_metadata["doi"] && old_metadata["published"]
|
|
134
|
+
def hide_doi_for_url(url, options={})
|
|
135
|
+
filename = File.basename(url)
|
|
136
|
+
source_path = options[:source_path] || "/"
|
|
137
|
+
filepath = Dir.pwd + source_path + filename + ".md"
|
|
72
138
|
|
|
73
|
-
metadata = generate_metadata_for_work(
|
|
74
|
-
|
|
139
|
+
metadata = generate_metadata_for_work(url, options)
|
|
140
|
+
return "DOI #{metadata["doi"]} not changed for #{filename}" unless metadata["doi"] && metadata["date_issued"]
|
|
75
141
|
|
|
76
|
-
|
|
142
|
+
response = hide_metadata_for_work(metadata, options)
|
|
143
|
+
return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
|
|
77
144
|
|
|
78
145
|
new_metadata = Bergamasco::Markdown.update_file(filepath, "published" => false)
|
|
79
|
-
"DOI #{
|
|
146
|
+
"DOI #{metadata["doi"]} hidden for #{filename}"
|
|
80
147
|
end
|
|
81
148
|
|
|
82
|
-
def
|
|
83
|
-
|
|
84
|
-
|
|
149
|
+
def mint_dois_for_all_urls(urls, options={})
|
|
150
|
+
urls.map do |url|
|
|
151
|
+
mint_doi_for_url(url, options)
|
|
85
152
|
end.join("\n")
|
|
86
153
|
end
|
|
87
154
|
|
|
88
|
-
def
|
|
89
|
-
|
|
90
|
-
|
|
155
|
+
def mint_and_hide_dois_for_all_urls(urls, options={})
|
|
156
|
+
urls.map do |url|
|
|
157
|
+
mint_and_hide_doi_for_url(url, options)
|
|
91
158
|
end.join("\n")
|
|
92
159
|
end
|
|
93
160
|
|
|
94
|
-
def
|
|
95
|
-
|
|
96
|
-
|
|
161
|
+
def hide_dois_for_all_urls(urls, options={})
|
|
162
|
+
urls.map do |url|
|
|
163
|
+
hide_doi_for_url(url, options)
|
|
97
164
|
end.join("\n")
|
|
98
165
|
end
|
|
99
166
|
|
|
100
|
-
def generate_metadata_for_work(
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
csl = options[:csl] || ENV['SITE_CSLPATH'] || "styles/apa.csl"
|
|
105
|
-
options = options.merge(csl: csl, bibliography: referencespath)
|
|
167
|
+
def generate_metadata_for_work(url, options={})
|
|
168
|
+
doc = Nokogiri::HTML(open(url))
|
|
169
|
+
json = doc.at_xpath("//script[@type='application/ld+json']")
|
|
170
|
+
return "Error: no schema.org metadata found" unless json.present?
|
|
106
171
|
|
|
107
|
-
metadata =
|
|
172
|
+
metadata = ActiveSupport::JSON.decode(json.text)
|
|
108
173
|
|
|
109
|
-
return "Error: required metadata missing" unless ["author", "
|
|
110
|
-
|
|
111
|
-
# read in optional yaml configuration files for site, author and references
|
|
112
|
-
site_options = Bergamasco::Markdown.read_yaml(sitepath) || {}
|
|
113
|
-
author_options = Bergamasco::Markdown.read_yaml(authorpath) || {}
|
|
114
|
-
references = Bergamasco::Markdown.read_yaml(referencespath) || {}
|
|
174
|
+
return "Error: required metadata missing" unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
|
|
115
175
|
|
|
116
176
|
# required metadata
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
site_url = site_options["site_url"] || ENV['SITE_URL']
|
|
121
|
-
metadata["url"] ||= url_from_path(site_url, filepath)
|
|
122
|
-
|
|
123
|
-
metadata["creators"] = Array(metadata["author"]).map do |a|
|
|
124
|
-
author = author_options.fetch(a, {})
|
|
125
|
-
if author.present?
|
|
126
|
-
{ given_name: author["given"],
|
|
127
|
-
family_name: author["family"],
|
|
128
|
-
orcid: author["orcid"] }
|
|
129
|
-
else
|
|
130
|
-
{ literal: a }
|
|
131
|
-
end
|
|
177
|
+
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
|
|
178
|
+
uri = Addressable::URI.parse(metadata["@id"])
|
|
179
|
+
metadata["doi"] = uri.path[1..-1].upcase
|
|
132
180
|
end
|
|
133
181
|
|
|
134
|
-
metadata["
|
|
135
|
-
|
|
182
|
+
metadata["title"] = metadata["name"]
|
|
183
|
+
|
|
184
|
+
metadata["creators"] = format_authors(metadata["author"])
|
|
185
|
+
|
|
186
|
+
metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
|
|
187
|
+
metadata["publication_year"] = metadata.fetch("datePublished", "")[0..3].to_i
|
|
136
188
|
|
|
137
|
-
metadata["type"]
|
|
138
|
-
resource_type_general = metadata["type"] == "Dataset" ? "Dataset" : "Text"
|
|
189
|
+
resource_type_general = metadata["@type"] == "Dataset" ? "Dataset" : "Text"
|
|
139
190
|
|
|
140
|
-
metadata["resource_type"] = { value: metadata["type"],
|
|
191
|
+
metadata["resource_type"] = { value: metadata["@type"],
|
|
141
192
|
resource_type_general: resource_type_general }
|
|
142
193
|
|
|
143
194
|
# recommended metadata
|
|
144
|
-
|
|
145
|
-
|
|
195
|
+
|
|
196
|
+
# use alternate_identifier to generate DOI
|
|
197
|
+
metadata["alternate_identifier"] = metadata["alternateName"]
|
|
198
|
+
|
|
199
|
+
if metadata["description"].present?
|
|
200
|
+
metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
|
|
201
|
+
end
|
|
146
202
|
|
|
147
203
|
# use default version 1.0
|
|
148
204
|
metadata["version"] ||= "1.0"
|
|
149
205
|
|
|
150
206
|
# fetch reference metadata if available
|
|
151
|
-
metadata["related_identifiers"] =
|
|
152
|
-
reference = references.fetch(r, {})
|
|
153
|
-
if reference.present?
|
|
154
|
-
if reference["DOI"].present?
|
|
155
|
-
value = reference["DOI"].upcase
|
|
156
|
-
type = "DOI"
|
|
157
|
-
elsif /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(reference["URL"])
|
|
158
|
-
uri = Addressable::URI.parse(reference["URL"])
|
|
159
|
-
value = uri.path[1..-1].upcase
|
|
160
|
-
type = "DOI"
|
|
161
|
-
elsif reference["URL"].present?
|
|
162
|
-
value = reference["URL"]
|
|
163
|
-
type = "URL"
|
|
164
|
-
else
|
|
165
|
-
type = nil
|
|
166
|
-
end
|
|
167
|
-
else
|
|
168
|
-
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(r)
|
|
169
|
-
uri = Addressable::URI.parse(r)
|
|
170
|
-
value = uri.path[1..-1].upcase
|
|
171
|
-
type = "DOI"
|
|
172
|
-
elsif /(http|https):\/\//.match(r)
|
|
173
|
-
uri = Addressable::URI.parse(r)
|
|
174
|
-
value = uri.normalize.to_s
|
|
175
|
-
type = "URL"
|
|
176
|
-
else
|
|
177
|
-
type = nil
|
|
178
|
-
end
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
{
|
|
182
|
-
value: value,
|
|
183
|
-
related_identifier_type: type,
|
|
184
|
-
relation_type: "References"
|
|
185
|
-
}
|
|
186
|
-
end.select { |t| t[:related_identifier_type].present? }
|
|
187
|
-
|
|
188
|
-
metadata["license_name"] = site_options.fetch("license", {}).fetch("name", nil) || ENV['SITE_LICENCE_NAME'] || "Creative Commons Attribution"
|
|
189
|
-
metadata["license_url"] = site_options.fetch("license", {}).fetch("url", nil) || ENV['SITE_LICENCE_URL'] || "https://creativecommons.org/licenses/by/4.0/"
|
|
190
|
-
metadata["rights_list"] = [{ value: metadata["license_name"], rights_uri: metadata["license_url"] }]
|
|
207
|
+
metadata["related_identifiers"] = get_related_identifiers(metadata)
|
|
191
208
|
|
|
192
|
-
|
|
209
|
+
if metadata["license"].present?
|
|
210
|
+
value = LICENSES.fetch(metadata["license_url"], nil)
|
|
211
|
+
metadata["rights_list"] = [{ value: value, rights_uri: metadata["license"] }] if value.present?
|
|
212
|
+
end
|
|
193
213
|
|
|
194
|
-
|
|
195
|
-
|
|
214
|
+
if metadata["keywords"].present?
|
|
215
|
+
metadata["subjects"] = Array(metadata["keywords"].split(", ")).select { |k| k != "featured" }
|
|
216
|
+
end
|
|
196
217
|
|
|
197
|
-
metadata["
|
|
198
|
-
metadata["
|
|
199
|
-
metadata["
|
|
218
|
+
metadata["date_created"] = metadata["dateCreated"]
|
|
219
|
+
metadata["date_issued"] = metadata["datePublished"]
|
|
220
|
+
metadata["date_updated"] = metadata["dateModified"]
|
|
200
221
|
|
|
201
|
-
metadata = metadata.extract!(*%w(doi url creators title
|
|
202
|
-
publication_year
|
|
203
|
-
date_issued related_identifiers))
|
|
204
|
-
end
|
|
205
|
-
|
|
206
|
-
def url_from_path(site_url, filepath)
|
|
207
|
-
site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
|
|
222
|
+
metadata = metadata.extract!(*%w(doi alternate_identifier url creators title
|
|
223
|
+
publisher publication_year resource_type descriptions version rights_list
|
|
224
|
+
subjects date_issued date_created date_updated related_identifiers))
|
|
208
225
|
end
|
|
209
226
|
|
|
210
227
|
def post_metadata_for_work(metadata, options={})
|
|
228
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
|
229
|
+
metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
|
|
230
|
+
|
|
211
231
|
work = Cirneco::Work.new(metadata)
|
|
212
|
-
return work.validation_errors if work.validation_errors.present?
|
|
232
|
+
return work.validation_errors if work.validation_errors.body["errors"].present?
|
|
213
233
|
|
|
214
234
|
response = work.post_metadata(work.data, options)
|
|
215
235
|
return response unless response.status == 201
|
|
@@ -218,8 +238,11 @@ module Cirneco
|
|
|
218
238
|
end
|
|
219
239
|
|
|
220
240
|
def post_and_hide_metadata_for_work(metadata, options={})
|
|
241
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
|
242
|
+
metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
|
|
243
|
+
|
|
221
244
|
work = Cirneco::Work.new(metadata)
|
|
222
|
-
return work.validation_errors if work.validation_errors.present?
|
|
245
|
+
return work.validation_errors if work.validation_errors.body["errors"].present?
|
|
223
246
|
|
|
224
247
|
response = work.post_metadata(work.data, options)
|
|
225
248
|
return response unless response.status == 201
|
|
@@ -231,8 +254,11 @@ module Cirneco
|
|
|
231
254
|
end
|
|
232
255
|
|
|
233
256
|
def hide_metadata_for_work(metadata, options={})
|
|
257
|
+
prefix = options[:prefix] || ENV['PREFIX']
|
|
258
|
+
metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
|
|
259
|
+
|
|
234
260
|
work = Cirneco::Work.new(metadata)
|
|
235
|
-
return work.validation_errors if work.validation_errors.present?
|
|
261
|
+
return work.validation_errors if work.validation_errors.body["errors"].present?
|
|
236
262
|
|
|
237
263
|
work.delete_metadata(metadata["doi"], options)
|
|
238
264
|
end
|
|
@@ -240,5 +266,55 @@ module Cirneco
|
|
|
240
266
|
def generate_jats(filepath, options={})
|
|
241
267
|
Bergamasco::Pandoc.write_jats(filepath, options)
|
|
242
268
|
end
|
|
269
|
+
|
|
270
|
+
def url_from_path(site_url, filepath)
|
|
271
|
+
site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def format_authors(authors)
|
|
275
|
+
Array(authors).map do |author|
|
|
276
|
+
orcid = orcid_from_url(author["@id"])
|
|
277
|
+
name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
|
|
278
|
+
|
|
279
|
+
{ given_name: author["givenName"],
|
|
280
|
+
family_name: author["familyName"],
|
|
281
|
+
name: name,
|
|
282
|
+
orcid: orcid }.compact
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def get_related_identifiers(metadata)
|
|
287
|
+
citations = Array(metadata["citation"])
|
|
288
|
+
parts = Array(metadata["IsPartOf"]).map { |r| r["relation_type"] = "IsPartOf" }
|
|
289
|
+
|
|
290
|
+
(citations + parts).map do |r|
|
|
291
|
+
id = r.fetch("@id", "")
|
|
292
|
+
relation_type = r.fetch("relation_type", "References")
|
|
293
|
+
|
|
294
|
+
if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(id)
|
|
295
|
+
uri = Addressable::URI.parse(id)
|
|
296
|
+
value = uri.path[1..-1].upcase
|
|
297
|
+
type = "DOI"
|
|
298
|
+
elsif /(http|https):\/\//.match(id)
|
|
299
|
+
uri = Addressable::URI.parse(id)
|
|
300
|
+
value = uri.normalize.to_s
|
|
301
|
+
type = "URL"
|
|
302
|
+
else
|
|
303
|
+
type = nil
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
{
|
|
307
|
+
value: value,
|
|
308
|
+
related_identifier_type: type,
|
|
309
|
+
relation_type: relation_type
|
|
310
|
+
}
|
|
311
|
+
end.select { |t| t[:related_identifier_type].present? }
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def orcid_from_url(url)
|
|
315
|
+
return nil unless url.present?
|
|
316
|
+
|
|
317
|
+
Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last
|
|
318
|
+
end
|
|
243
319
|
end
|
|
244
320
|
end
|
data/lib/cirneco/version.rb
CHANGED