toccatore 0.2.3 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Dockerfile +6 -3
- data/Gemfile.lock +11 -12
- data/README.md +1 -1
- data/lib/toccatore.rb +2 -0
- data/lib/toccatore/base.rb +26 -184
- data/lib/toccatore/cli.rb +13 -1
- data/lib/toccatore/datacite_related.rb +77 -0
- data/lib/toccatore/version.rb +1 -1
- data/spec/base_spec.rb +49 -0
- data/spec/cli_spec.rb +58 -11
- data/spec/datacite_related_spec.rb +129 -0
- data/spec/fixtures/datacite_related.json +14792 -0
- data/spec/fixtures/datacite_related_nil.json +12 -0
- data/spec/fixtures/orcid_update_is_previous.json +24 -1
- data/spec/fixtures/vcr_cassettes/Toccatore_Base/get_doi_ra/crossref.yml +72 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_Base/get_doi_ra/datacite.yml +74 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_fail.yml +1408 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_query_by_DOI.yml +1145 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_query_by_related_identifier.yml +252 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_succeed.yml +6348 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_succeed_with_no_works.yml +38 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_delete.yml +47 -49
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_fail.yml +21 -132
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_DOI.yml +731 -1016
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_ORCID_ID.yml +1880 -1889
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed.yml +1576 -1591
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed_with_no_works.yml +11 -11
- data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_allow_queries_by_DOI_of_the_Datacite_Metadata_Search_API.yml +38 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_allow_queries_by_related_identifier_of_the_Datacite_Metadata_Search_API.yml +38 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +38 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +40 -0
- data/spec/fixtures/vcr_cassettes/{Toccatore_OrcidUpdate/get_data/should_catch_errors_with_the_Datacite_Metadata_Search_API.yml → Toccatore_DataciteRelated/get_total/with_no_works.yml} +10 -10
- data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_total/with_works.yml +38 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/parse_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +1326 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +6274 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_allow_queries_by_DOI_of_the_Datacite_Metadata_Search_API.yml +13 -458
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_allow_queries_by_ORCID_ID_of_the_Datacite_Metadata_Search_API.yml +10 -143
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +9 -9
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +10 -121
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_total/with_no_works.yml +10 -10
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_total/with_works.yml +10 -10
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/push_data/should_delete_claims.yml +27 -25
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +1506 -1404
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/queue_jobs/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +9 -9
- data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +21 -182
- data/spec/orcid_update_spec.rb +8 -14
- data/spec/spec_helper.rb +1 -0
- data/toccatore.gemspec +1 -1
- metadata +24 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e67c0fc653ac9fc750cd7a75ecd0056c72dcdfec
|
4
|
+
data.tar.gz: cf181d9073928728a382e6740912a982cb02fd61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a1c3d00575f9289cc7938028829b49a1cddbb397419e17483360e4a6586566fbf3e20a1f760727e8a90df20b4dcfce06097ac87e2a96b09ffc12c567f555e16
|
7
|
+
data.tar.gz: 43ae2875879cf644783244d554d52a6bde83c615bd5dbea12a441ac9291c61d65b2bc7fb8bc03a597f880676499d76c19c908d43cd490e8fbeb1ac52705d177d
|
data/Dockerfile
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
-
FROM phusion/passenger-full:0.9.
|
1
|
+
FROM phusion/passenger-full:0.9.20
|
2
2
|
MAINTAINER Martin Fenner "mfenner@datacite.org"
|
3
3
|
|
4
|
-
|
4
|
+
# Install Ruby 2.3.3
|
5
|
+
RUN bash -lc 'rvm --default use ruby-2.3.3'
|
6
|
+
|
7
|
+
ENV PATH="/usr/local/rvm/gems/ruby-2.3.3/bin:${PATH}"
|
5
8
|
|
6
9
|
# Update installed APT packages, clean up APT when done.
|
7
10
|
RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" && \
|
@@ -10,4 +13,4 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
|
|
10
13
|
# Install toccatore gem
|
11
14
|
RUN /sbin/setuser app gem install toccatore
|
12
15
|
|
13
|
-
CMD toccatore
|
16
|
+
CMD toccatore --version
|
data/Gemfile.lock
CHANGED
@@ -1,27 +1,26 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
toccatore (0.
|
4
|
+
toccatore (0.3)
|
5
5
|
activesupport (~> 4.2, >= 4.2.5)
|
6
6
|
dotenv (~> 2.1, >= 2.1.1)
|
7
7
|
gender_detector (~> 1.0)
|
8
|
-
maremma (~> 3.
|
8
|
+
maremma (~> 3.5)
|
9
9
|
namae (~> 0.11.0)
|
10
10
|
thor (~> 0.19)
|
11
11
|
|
12
12
|
GEM
|
13
13
|
remote: https://rubygems.org/
|
14
14
|
specs:
|
15
|
-
activesupport (4.2.
|
15
|
+
activesupport (4.2.8)
|
16
16
|
i18n (~> 0.7)
|
17
|
-
json (~> 1.7, >= 1.7.7)
|
18
17
|
minitest (~> 5.1)
|
19
18
|
thread_safe (~> 0.3, >= 0.3.4)
|
20
19
|
tzinfo (~> 1.1)
|
21
20
|
addressable (2.5.0)
|
22
21
|
public_suffix (~> 2.0, >= 2.0.2)
|
23
22
|
builder (3.2.3)
|
24
|
-
codeclimate-test-reporter (1.0.
|
23
|
+
codeclimate-test-reporter (1.0.7)
|
25
24
|
simplecov
|
26
25
|
crack (0.4.3)
|
27
26
|
safe_yaml (~> 1.0.0)
|
@@ -37,11 +36,11 @@ GEM
|
|
37
36
|
faraday (>= 0.7.4, < 1.0)
|
38
37
|
gender_detector (1.0.0)
|
39
38
|
hashdiff (0.3.2)
|
40
|
-
i18n (0.8.
|
41
|
-
json (
|
42
|
-
maremma (3.1
|
39
|
+
i18n (0.8.1)
|
40
|
+
json (2.0.3)
|
41
|
+
maremma (3.5.1)
|
43
42
|
activesupport (~> 4.2, >= 4.2.5)
|
44
|
-
addressable (
|
43
|
+
addressable (>= 2.3.6)
|
45
44
|
builder (~> 3.2, >= 3.2.2)
|
46
45
|
excon (~> 0.45.0)
|
47
46
|
faraday (~> 0.9.2)
|
@@ -49,7 +48,7 @@ GEM
|
|
49
48
|
faraday_middleware (~> 0.10.0)
|
50
49
|
multi_json (~> 1.11.2)
|
51
50
|
nokogiri (~> 1.6.7)
|
52
|
-
oj (~> 2.
|
51
|
+
oj (~> 2.18, >= 2.18.1)
|
53
52
|
mini_portile2 (2.1.0)
|
54
53
|
minitest (5.10.1)
|
55
54
|
multi_json (1.11.3)
|
@@ -57,7 +56,7 @@ GEM
|
|
57
56
|
namae (0.11.3)
|
58
57
|
nokogiri (1.6.8.1)
|
59
58
|
mini_portile2 (~> 2.1.0)
|
60
|
-
oj (2.
|
59
|
+
oj (2.18.2)
|
61
60
|
public_suffix (2.0.5)
|
62
61
|
rack (2.0.1)
|
63
62
|
rack-test (0.6.3)
|
@@ -83,7 +82,7 @@ GEM
|
|
83
82
|
simplecov-html (~> 0.10.0)
|
84
83
|
simplecov-html (0.10.0)
|
85
84
|
thor (0.19.4)
|
86
|
-
thread_safe (0.3.
|
85
|
+
thread_safe (0.3.6)
|
87
86
|
tzinfo (1.2.2)
|
88
87
|
thread_safe (~> 0.1)
|
89
88
|
vcr (3.0.3)
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
[![Code Climate](https://codeclimate.com/github/datacite/toccatore/badges/gpa.svg)](https://codeclimate.com/github/datacite/toccatore)
|
5
5
|
[![Test Coverage](https://codeclimate.com/github/datacite/toccatore/badges/coverage.svg)](https://codeclimate.com/github/datacite/toccatore/coverage)
|
6
6
|
|
7
|
-
Command-line client for finding ORCID IDs in DataCite metadata.
|
7
|
+
Command-line client for finding ORCID IDs and related identifiers in DataCite metadata.
|
8
8
|
|
9
9
|
## Development
|
10
10
|
|
data/lib/toccatore.rb
CHANGED
data/lib/toccatore/base.rb
CHANGED
@@ -28,6 +28,8 @@ module Toccatore
|
|
28
28
|
q = "doi:#{options[:doi]}"
|
29
29
|
elsif options[:orcid].present?
|
30
30
|
q = "nameIdentifier:ORCID\\:#{options[:orcid]}"
|
31
|
+
elsif options[:related_identifier].present?
|
32
|
+
q = "relatedIdentifier:DOI\\:#{options[:related_identifier]}"
|
31
33
|
elsif options[:query].present?
|
32
34
|
q = options[:query]
|
33
35
|
else
|
@@ -37,7 +39,7 @@ module Toccatore
|
|
37
39
|
params = { q: q,
|
38
40
|
start: options[:offset],
|
39
41
|
rows: options[:rows],
|
40
|
-
fl: "doi,
|
42
|
+
fl: "doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated",
|
41
43
|
fq: fq,
|
42
44
|
wt: "json" }
|
43
45
|
url + URI.encode_www_form(params)
|
@@ -87,206 +89,46 @@ module Toccatore
|
|
87
89
|
Maremma.get(query_url, options)
|
88
90
|
end
|
89
91
|
|
90
|
-
def
|
91
|
-
|
92
|
-
|
93
|
-
items = result.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
|
94
|
-
get_relations_with_related_works(items)
|
95
|
-
end
|
96
|
-
|
97
|
-
# push to Lagotto deposit API if no error and we have collected works
|
98
|
-
def push_data(items, options={})
|
99
|
-
if items.empty?
|
100
|
-
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
|
101
|
-
else
|
102
|
-
Array(items).map do |item|
|
103
|
-
relation = item.fetch(:relation, {})
|
104
|
-
deposit = { "deposit" => { "subj_id" => relation.fetch("subj_id", nil),
|
105
|
-
"obj_id" => relation.fetch("obj_id", nil),
|
106
|
-
"relation_type_id" => relation.fetch("relation_type_id", nil),
|
107
|
-
"source_id" => relation.fetch("source_id", nil),
|
108
|
-
"publisher_id" => relation.fetch("publisher_id", nil),
|
109
|
-
"subj" => item.fetch(:subj, {}),
|
110
|
-
"obj" => item.fetch(:obj, {}),
|
111
|
-
"message_type" => item.fetch(:message_type, "relation"),
|
112
|
-
"prefix" => item.fetch(:prefix, nil),
|
113
|
-
"source_token" => uuid } }
|
114
|
-
|
115
|
-
Maremma.post push_url, data: deposit.to_json, content_type: 'json', token: access_token
|
116
|
-
end
|
117
|
-
end
|
92
|
+
def url
|
93
|
+
"https://search.datacite.org/api?"
|
118
94
|
end
|
119
95
|
|
120
|
-
def
|
121
|
-
|
122
|
-
doi = item.fetch("doi", nil)
|
123
|
-
prefix = doi[/^10\.\d{4,5}/]
|
124
|
-
pid = doi_as_url(doi)
|
125
|
-
type = item.fetch("resourceTypeGeneral", nil)
|
126
|
-
publisher_id = item.fetch("datacentre_symbol", nil)
|
127
|
-
|
128
|
-
xml = Base64.decode64(item.fetch('xml', "PGhzaD48L2hzaD4=\n"))
|
129
|
-
xml = Hash.from_xml(xml).fetch("resource", {})
|
130
|
-
authors = xml.fetch("creators", {}).fetch("creator", [])
|
131
|
-
authors = [authors] if authors.is_a?(Hash)
|
132
|
-
|
133
|
-
subj = { "pid" => pid,
|
134
|
-
"DOI" => doi,
|
135
|
-
"author" => get_hashed_authors(authors),
|
136
|
-
"title" => item.fetch("title", []).first,
|
137
|
-
"container-title" => item.fetch("publisher", nil),
|
138
|
-
"published" => item.fetch("publicationYear", nil),
|
139
|
-
"issued" => item.fetch("minted", nil),
|
140
|
-
"publisher_id" => publisher_id,
|
141
|
-
"registration_agency_id" => "datacite",
|
142
|
-
"tracked" => true,
|
143
|
-
"type" => type }
|
144
|
-
|
145
|
-
related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
|
146
|
-
sum += get_doi_relations(subj, related_doi_identifiers)
|
147
|
-
|
148
|
-
related_github_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:URL:https:\/\/github.com.+/ }
|
149
|
-
sum += get_github_relations(subj, related_github_identifiers)
|
150
|
-
|
151
|
-
name_identifiers = item.fetch('nameIdentifier', []).select { |id| id =~ /^ORCID:.+/ }
|
152
|
-
sum += get_contributions(subj, name_identifiers)
|
153
|
-
|
154
|
-
if source_id == "datacite_import"
|
155
|
-
sum += [{ prefix: prefix,
|
156
|
-
relation: { "subj_id" => subj["pid"],
|
157
|
-
"source_id" => source_id,
|
158
|
-
"publisher_id" => subj["publisher_id"],
|
159
|
-
"occurred_at" => subj["issued"] },
|
160
|
-
subj: subj }]
|
161
|
-
end
|
162
|
-
|
163
|
-
sum
|
164
|
-
end
|
96
|
+
def timeout
|
97
|
+
120
|
165
98
|
end
|
166
99
|
|
167
|
-
def
|
168
|
-
|
169
|
-
|
170
|
-
Array(items).reduce([]) do |sum, item|
|
171
|
-
raw_relation_type, _related_identifier_type, related_identifier = item.split(':', 3)
|
172
|
-
|
173
|
-
# get parent repo
|
174
|
-
# code from https://github.com/octokit/octokit.rb/blob/master/lib/octokit/repository.rb
|
175
|
-
related_identifier = PostRank::URI.clean(related_identifier)
|
176
|
-
github_hash = github_from_url(related_identifier)
|
177
|
-
owner_url = github_as_owner_url(github_hash)
|
178
|
-
repo_url = github_as_repo_url(github_hash)
|
179
|
-
|
180
|
-
sum << { prefix: prefix,
|
181
|
-
relation: { "subj_id" => subj["pid"],
|
182
|
-
"obj_id" => related_identifier,
|
183
|
-
"relation_type_id" => raw_relation_type.underscore,
|
184
|
-
"source_id" => source_id,
|
185
|
-
"publisher_id" => subj["publisher_id"],
|
186
|
-
"registration_agency_id" => "github",
|
187
|
-
"occurred_at" => subj["issued"] },
|
188
|
-
subj: subj }
|
189
|
-
|
190
|
-
# if relatedIdentifier is release URL rather than repo URL
|
191
|
-
if related_identifier != repo_url
|
192
|
-
sum << { relation: { "subj_id" => related_identifier,
|
193
|
-
"obj_id" => repo_url,
|
194
|
-
"relation_type_id" => "is_part_of",
|
195
|
-
"source_id" => source_id,
|
196
|
-
"publisher_id" => "github",
|
197
|
-
"registration_agency_id" => "github" } }
|
198
|
-
end
|
199
|
-
|
200
|
-
sum << { message_type: "contribution",
|
201
|
-
relation: { "subj_id" => owner_url,
|
202
|
-
"obj_id" => repo_url,
|
203
|
-
"source_id" => "github_contributor",
|
204
|
-
"registration_agency_id" => "github" }}
|
205
|
-
end
|
100
|
+
def job_batch_size
|
101
|
+
1000
|
206
102
|
end
|
207
103
|
|
208
|
-
def
|
209
|
-
|
210
|
-
|
211
|
-
Array(items).reduce([]) do |sum, item|
|
212
|
-
raw_relation_type, _related_identifier_type, related_identifier = item.split(':', 3)
|
213
|
-
doi = related_identifier.strip.upcase
|
214
|
-
registration_agency = get_doi_ra(doi)
|
215
|
-
|
216
|
-
if source_id == "datacite_crossref" && registration_agency == "datacite"
|
217
|
-
sum
|
218
|
-
else
|
219
|
-
_source_id = registration_agency == "crossref" ? "datacite_crossref" : "datacite_related"
|
220
|
-
pid = doi_as_url(doi)
|
221
|
-
|
222
|
-
sum << { prefix: prefix,
|
223
|
-
relation: { "subj_id" => subj["pid"],
|
224
|
-
"obj_id" => pid,
|
225
|
-
"relation_type_id" => raw_relation_type.underscore,
|
226
|
-
"source_id" => _source_id,
|
227
|
-
"publisher_id" => subj["publisher_id"],
|
228
|
-
"registration_agency_id" => registration_agency,
|
229
|
-
"occurred_at" => subj["issued"] },
|
230
|
-
subj: subj }
|
231
|
-
end
|
232
|
-
end
|
233
|
-
end
|
104
|
+
def get_doi_ra(prefix)
|
105
|
+
return nil if prefix.blank?
|
234
106
|
|
235
|
-
|
236
|
-
|
237
|
-
prefix = obj["DOI"][/^10\.\d{4,5}/]
|
238
|
-
|
239
|
-
Array(items).reduce([]) do |sum, item|
|
240
|
-
orcid = item.split(':', 2).last
|
241
|
-
orcid = validate_orcid(orcid)
|
242
|
-
|
243
|
-
return sum if orcid.nil?
|
244
|
-
|
245
|
-
sum << { prefix: prefix,
|
246
|
-
message_type: "contribution",
|
247
|
-
relation: { "subj_id" => orcid_as_url(orcid),
|
248
|
-
"obj_id" => obj["pid"],
|
249
|
-
"relation_type_id" => nil,
|
250
|
-
"source_id" => source_id,
|
251
|
-
"publisher_id" => obj["publisher_id"],
|
252
|
-
"registration_agency_id" => "datacite",
|
253
|
-
"occurred_at" => obj["issued"] },
|
254
|
-
obj: obj }
|
255
|
-
end
|
256
|
-
end
|
107
|
+
url = "https://api.datacite.org/prefixes/#{prefix}"
|
108
|
+
result = Maremma.get(url)
|
257
109
|
|
258
|
-
|
259
|
-
[:url, :push_url, :access_token]
|
260
|
-
end
|
110
|
+
return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
|
261
111
|
|
262
|
-
|
263
|
-
"https://search.datacite.org/api?"
|
112
|
+
result.body.fetch("data", {}).fetch('attributes', {}).fetch('registration-agency', nil)
|
264
113
|
end
|
265
114
|
|
266
|
-
def
|
267
|
-
|
115
|
+
def validate_doi(doi)
|
116
|
+
Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
|
268
117
|
end
|
269
118
|
|
270
|
-
def
|
271
|
-
|
119
|
+
def validate_prefix(doi)
|
120
|
+
Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5})\/.+\z/.match(doi)).last
|
272
121
|
end
|
273
122
|
|
274
|
-
|
275
|
-
|
276
|
-
doi.
|
277
|
-
end
|
123
|
+
def normalize_doi(doi)
|
124
|
+
doi = validate_doi(doi)
|
125
|
+
return nil unless doi.present?
|
278
126
|
|
279
|
-
|
280
|
-
|
281
|
-
uri = Addressable::URI.parse(url)
|
282
|
-
uri.path[1..-1].upcase
|
283
|
-
elsif url.starts_with?("doi:")
|
284
|
-
url[4..-1].upcase
|
285
|
-
end
|
286
|
-
end
|
127
|
+
# remove non-printing whitespace and downcase
|
128
|
+
doi = doi.delete("\u200B").downcase
|
287
129
|
|
288
|
-
|
289
|
-
|
130
|
+
# turn DOI into URL, escape unsafe characters
|
131
|
+
"https://doi.org/" + Addressable::URI.encode(doi)
|
290
132
|
end
|
291
133
|
|
292
134
|
def orcid_from_url(url)
|
data/lib/toccatore/cli.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
3
|
require "thor"
|
4
|
-
require_relative 'orcid_update'
|
5
4
|
|
6
5
|
module Toccatore
|
7
6
|
class CLI < Thor
|
@@ -30,5 +29,18 @@ module Toccatore
|
|
30
29
|
orcid_update = Toccatore::OrcidUpdate.new
|
31
30
|
orcid_update.queue_jobs(orcid_update.unfreeze(options))
|
32
31
|
end
|
32
|
+
|
33
|
+
desc "datacite_related", "push non-DataCite DOIs from DataCite MDS to Event Data"
|
34
|
+
method_option :access_token, type: :string, required: true
|
35
|
+
method_option :push_url, type: :string
|
36
|
+
method_option :from_date, type: :string, default: (Time.now.to_date - 1.day).iso8601
|
37
|
+
method_option :until_date, type: :string, default: Time.now.to_date.iso8601
|
38
|
+
method_option :q, type: :string
|
39
|
+
method_option :related_identifier, type: :string
|
40
|
+
method_option :doi, type: :string
|
41
|
+
def datacite_related
|
42
|
+
datacite_related = Toccatore::DataciteRelated.new
|
43
|
+
datacite_related.queue_jobs(datacite_related.unfreeze(options))
|
44
|
+
end
|
33
45
|
end
|
34
46
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require_relative 'base'
|
2
|
+
|
3
|
+
module Toccatore
|
4
|
+
class DataciteRelated < Base
|
5
|
+
def source_id
|
6
|
+
"datacite_related"
|
7
|
+
end
|
8
|
+
|
9
|
+
def query
|
10
|
+
"relatedIdentifier:DOI\\:*"
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse_data(result, options={})
|
14
|
+
return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
|
15
|
+
|
16
|
+
items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
|
17
|
+
registration_agencies = {}
|
18
|
+
|
19
|
+
Array.wrap(items).reduce([]) do |sum, item|
|
20
|
+
doi = item.fetch("doi")
|
21
|
+
pid = normalize_doi(doi)
|
22
|
+
related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
|
23
|
+
|
24
|
+
sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
|
25
|
+
raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
|
26
|
+
related_identifier = related_identifier.strip.downcase
|
27
|
+
prefix = validate_prefix(related_identifier)
|
28
|
+
registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
|
29
|
+
|
30
|
+
# check whether this is a DataCite DOI
|
31
|
+
if registration_agencies[prefix] == "DataCite"
|
32
|
+
ssum
|
33
|
+
else
|
34
|
+
ssum << { "id" => SecureRandom.uuid,
|
35
|
+
"message_action" => "create",
|
36
|
+
"subj_id" => pid,
|
37
|
+
"obj_id" => normalize_doi(related_identifier),
|
38
|
+
"relation_type_id" => raw_relation_type.underscore,
|
39
|
+
"source_id" => "datacite",
|
40
|
+
"occurred_at" => item.fetch("minted") }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
sum
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# push to Event Data API if no error and we have collected works
|
49
|
+
def push_data(items, options={})
|
50
|
+
if items.empty?
|
51
|
+
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
|
52
|
+
elsif options[:access_token].blank?
|
53
|
+
puts "An error occured: Access token missing."
|
54
|
+
else
|
55
|
+
Array(items).each { |item| push_item(item, options) }
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def push_item(item, options={})
|
60
|
+
return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
|
61
|
+
|
62
|
+
host = options[:push_url].presence || "https://bus.eventdata.crossref.org"
|
63
|
+
push_url = host + "/events"
|
64
|
+
|
65
|
+
response = Maremma.post(push_url, data: item.to_json,
|
66
|
+
bearer: options[:access_token],
|
67
|
+
content_type: 'json',
|
68
|
+
host: host)
|
69
|
+
|
70
|
+
if response.status == 201
|
71
|
+
puts "#{item['subj_id']} #{item['relation_type_id']} #{item['obj_id']} pushed to Event Data service."
|
72
|
+
elsif response.body["errors"].present?
|
73
|
+
puts "An error occured: #{response.body['errors'].first['title']}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|