toccatore 0.3 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/toccatore/base.rb +13 -3
- data/lib/toccatore/datacite_related.rb +25 -28
- data/lib/toccatore/orcid_update.rb +0 -11
- data/lib/toccatore/version.rb +1 -1
- data/spec/cli_spec.rb +5 -5
- data/spec/datacite_related_spec.rb +6 -0
- data/spec/fixtures/datacite_related_is_identical.json +20 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_delete.yml +5 -64
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_fail.yml +5 -5
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_DOI.yml +107 -109
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_ORCID_ID.yml +289 -315
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed.yml +249 -245
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed_with_no_works.yml +3 -3
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a65af218bade17a4ae79a00468a16a7dae2c7fc9
|
4
|
+
data.tar.gz: 3f82e450b6738fe12f625219562f2d9595e9d859
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4a593a23c7d0d747e8248de82b71093252f3a490fc3389ab364173eddb690b79991335726b7cb6bc6a1dce6d974ff0a6f46c87941a2b5f4bfa3dd5599b98cc4
|
7
|
+
data.tar.gz: f073088f4e916a6411ea7af84ace1ee31142b2c945b95dec5500c731a9070c6401cd20d1110c3d34a9edd295b39eab10c4133ebab6bd595867005fb15cdc0703
|
data/Gemfile.lock
CHANGED
data/lib/toccatore/base.rb
CHANGED
@@ -77,7 +77,7 @@ module Toccatore
|
|
77
77
|
|
78
78
|
def process_data(options = {})
|
79
79
|
data = get_data(options.merge(timeout: timeout, source_id: source_id))
|
80
|
-
data = parse_data(data
|
80
|
+
data = parse_data(data)
|
81
81
|
|
82
82
|
return [OpenStruct.new(body: { "data" => [] })] if data.empty?
|
83
83
|
|
@@ -89,6 +89,16 @@ module Toccatore
|
|
89
89
|
Maremma.get(query_url, options)
|
90
90
|
end
|
91
91
|
|
92
|
+
def push_data(items, options={})
|
93
|
+
if items.empty?
|
94
|
+
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
|
95
|
+
elsif options[:access_token].blank?
|
96
|
+
puts "An error occured: Access token missing."
|
97
|
+
else
|
98
|
+
Array(items).each { |item| push_item(item, options) }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
92
102
|
def url
|
93
103
|
"https://search.datacite.org/api?"
|
94
104
|
end
|
@@ -145,7 +155,7 @@ module Toccatore
|
|
145
155
|
|
146
156
|
# parse author string into CSL format
|
147
157
|
# only assume personal name when using sort-order: "Turing, Alan"
|
148
|
-
def get_one_author(author
|
158
|
+
def get_one_author(author)
|
149
159
|
return { "literal" => "" } if author.strip.blank?
|
150
160
|
|
151
161
|
author = cleanup_author(author)
|
@@ -180,7 +190,7 @@ module Toccatore
|
|
180
190
|
|
181
191
|
# parse array of author strings into CSL format
|
182
192
|
def get_authors(authors, options={})
|
183
|
-
Array(authors).map { |author| get_one_author(author
|
193
|
+
Array(authors).map { |author| get_one_author(author) }
|
184
194
|
end
|
185
195
|
|
186
196
|
# parse array of author hashes into CSL format
|
@@ -10,7 +10,7 @@ module Toccatore
|
|
10
10
|
"relatedIdentifier:DOI\\:*"
|
11
11
|
end
|
12
12
|
|
13
|
-
def parse_data(result
|
13
|
+
def parse_data(result)
|
14
14
|
return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
|
15
15
|
|
16
16
|
items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
|
@@ -21,23 +21,31 @@ module Toccatore
|
|
21
21
|
pid = normalize_doi(doi)
|
22
22
|
related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
related_identifier
|
27
|
-
|
28
|
-
|
24
|
+
# don't generate event if there is a DOI for identical content with same prefix
|
25
|
+
skip_doi = related_doi_identifiers.any? do |related_identifier|
|
26
|
+
["IsIdenticalTo"].include?(related_identifier.split(':', 3).first) &&
|
27
|
+
related_identifier.split(':', 3).last.to_s.starts_with?(validate_prefix(doi))
|
28
|
+
end
|
29
|
+
|
30
|
+
unless skip_doi
|
31
|
+
sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
|
32
|
+
raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
|
33
|
+
related_identifier = related_identifier.strip.downcase
|
34
|
+
prefix = validate_prefix(related_identifier)
|
35
|
+
registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
|
29
36
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
# check whether this is a DataCite DOI
|
38
|
+
if registration_agencies[prefix] == "DataCite"
|
39
|
+
ssum
|
40
|
+
else
|
41
|
+
ssum << { "id" => SecureRandom.uuid,
|
42
|
+
"message_action" => "create",
|
43
|
+
"subj_id" => pid,
|
44
|
+
"obj_id" => normalize_doi(related_identifier),
|
45
|
+
"relation_type_id" => raw_relation_type.underscore,
|
46
|
+
"source_id" => "datacite",
|
47
|
+
"occurred_at" => item.fetch("minted") }
|
48
|
+
end
|
41
49
|
end
|
42
50
|
end
|
43
51
|
|
@@ -45,17 +53,6 @@ module Toccatore
|
|
45
53
|
end
|
46
54
|
end
|
47
55
|
|
48
|
-
# push to Event Data API if no error and we have collected works
|
49
|
-
def push_data(items, options={})
|
50
|
-
if items.empty?
|
51
|
-
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
|
52
|
-
elsif options[:access_token].blank?
|
53
|
-
puts "An error occured: Access token missing."
|
54
|
-
else
|
55
|
-
Array(items).each { |item| push_item(item, options) }
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
56
|
def push_item(item, options={})
|
60
57
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
|
61
58
|
|
@@ -43,17 +43,6 @@ module Toccatore
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
# push to Volpino API if no error and we have collected works
|
47
|
-
def push_data(items, options={})
|
48
|
-
if items.empty?
|
49
|
-
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
|
50
|
-
elsif options[:access_token].blank?
|
51
|
-
puts "An error occured: Access token missing."
|
52
|
-
else
|
53
|
-
Array(items).each { |item| push_item(item, options) }
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
46
|
def push_item(item, options={})
|
58
47
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
|
59
48
|
|
data/lib/toccatore/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -8,7 +8,7 @@ describe Toccatore::CLI do
|
|
8
8
|
|
9
9
|
describe "version" do
|
10
10
|
it 'has version' do
|
11
|
-
expect { subject.__print_version }.to output("0.3\n").to_stdout
|
11
|
+
expect { subject.__print_version }.to output("0.3.1\n").to_stdout
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -32,10 +32,10 @@ describe Toccatore::CLI do
|
|
32
32
|
expect { subject.orcid_update }.to output(/DOI 10.5438\/6423 for ORCID ID 0000-0001-5331-6592 pushed to Profiles service.\n/).to_stdout
|
33
33
|
end
|
34
34
|
|
35
|
-
it 'should delete' do
|
36
|
-
|
37
|
-
|
38
|
-
end
|
35
|
+
# it 'should delete' do
|
36
|
+
# subject.options = cli_options.merge(doi: "10.6084/M9.FIGSHARE.4126869.V1", from_date: "2013-01-01", until_date: "2017-12-31", claim_action: "delete")
|
37
|
+
# expect { subject.orcid_update }.to output(/Delete DOI 10.6084\/M9.FIGSHARE.4126869.V1 for ORCID ID 0000-0003-1013-1533 pushed to Profiles service.\n/).to_stdout
|
38
|
+
# end
|
39
39
|
|
40
40
|
it 'should query by ORCID ID' do
|
41
41
|
subject.options = cli_options.merge(orcid: "0000-0002-3546-1048", from_date: "2013-01-01", until_date: "2017-12-31")
|
@@ -105,6 +105,12 @@ describe Toccatore::DataciteRelated, vcr: true do
|
|
105
105
|
expect(response.last.except("id")).to eq("message_action" => "create", "subj_id"=>"https://doi.org/10.17180/obs.yzeron", "obj_id"=>"https://doi.org/10.1016/j.jhydrol.2013.09.055", "relation_type_id"=>"is_referenced_by", "source_id"=>"datacite", "occurred_at"=>"2015-04-07T12:22:40Z")
|
106
106
|
end
|
107
107
|
|
108
|
+
it "should report if there are works ignored because of an IsIdenticalTo relation" do
|
109
|
+
body = File.read(fixture_path + 'datacite_related_is_identical.json')
|
110
|
+
result = OpenStruct.new(body: { "data" => JSON.parse(body) })
|
111
|
+
expect(subject.parse_data(result)).to eq([])
|
112
|
+
end
|
113
|
+
|
108
114
|
it "should catch timeout errors with the Datacite Metadata Search API" do
|
109
115
|
result = OpenStruct.new(body: { "errors" => [{ "title" => "the server responded with status 408 for https://search.datacite.org", "status" => 408 }] })
|
110
116
|
response = subject.parse_data(result)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
"responseHeader": {
|
3
|
+
"status": 0,
|
4
|
+
"QTime": 0
|
5
|
+
},
|
6
|
+
"response": {
|
7
|
+
"numFound": 1,
|
8
|
+
"start": 0,
|
9
|
+
"docs": [{
|
10
|
+
"minted": "2016-07-29T18:02:34Z",
|
11
|
+
"updated": "2017-03-09T18:31:17Z",
|
12
|
+
"doi": "10.6084/M9.FIGSHARE.3505442.V1",
|
13
|
+
"relatedIdentifier": [
|
14
|
+
"IsSupplementTo:DOI:10.1080/07391102.2016.1189358",
|
15
|
+
"IsIdenticalTo:DOI:10.6084/m9.figshare.3505442"
|
16
|
+
],
|
17
|
+
"resourceTypeGeneral": "Other"
|
18
|
+
}]
|
19
|
+
}
|
20
|
+
}
|
@@ -23,7 +23,7 @@ http_interactions:
|
|
23
23
|
Content-Type:
|
24
24
|
- application/json;charset=UTF-8
|
25
25
|
Date:
|
26
|
-
- Sat, 11 Mar 2017
|
26
|
+
- Sat, 11 Mar 2017 12:15:48 GMT
|
27
27
|
Server:
|
28
28
|
- openresty/1.11.2.2
|
29
29
|
Connection:
|
@@ -34,7 +34,7 @@ http_interactions:
|
|
34
34
|
|
35
35
|
'
|
36
36
|
http_version:
|
37
|
-
recorded_at: Sat, 11 Mar 2017
|
37
|
+
recorded_at: Sat, 11 Mar 2017 12:15:48 GMT
|
38
38
|
- request:
|
39
39
|
method: get
|
40
40
|
uri: https://search.datacite.org/api?fl=doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated&fq=updated:%5B2013-01-01T00:00:00Z%20TO%202017-12-31T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=doi:10.6084/M9.FIGSHARE.4126869.V1&rows=1000&start=0&wt=json
|
@@ -58,75 +58,16 @@ http_interactions:
|
|
58
58
|
Content-Type:
|
59
59
|
- application/json;charset=UTF-8
|
60
60
|
Date:
|
61
|
-
- Sat, 11 Mar 2017
|
61
|
+
- Sat, 11 Mar 2017 12:15:48 GMT
|
62
62
|
Server:
|
63
63
|
- openresty/1.11.2.2
|
64
64
|
Connection:
|
65
65
|
- keep-alive
|
66
66
|
body:
|
67
67
|
encoding: UTF-8
|
68
|
-
string: '{"responseHeader":{"status":0,"QTime":
|
68
|
+
string: '{"responseHeader":{"status":0,"QTime":1},"response":{"numFound":1,"start":0,"docs":[{"minted":"2016-10-28T21:43:49Z","updated":"2016-10-28T21:44:42Z","doi":"10.6084/M9.FIGSHARE.4126869.V1","resourceTypeGeneral":"Image","relatedIdentifier":["IsIdenticalTo:DOI:10.6084/m9.figshare.4126869"],"nameIdentifier":["ORCID:0000-0003-1013-1533"]}]}}
|
69
69
|
|
70
70
|
'
|
71
71
|
http_version:
|
72
|
-
recorded_at: Sat, 11 Mar 2017
|
73
|
-
- request:
|
74
|
-
method: post
|
75
|
-
uri: https://profiles.test.datacite.org/api/claims
|
76
|
-
body:
|
77
|
-
encoding: UTF-8
|
78
|
-
string: '{"claim":{"orcid":"0000-0003-1013-1533","doi":"10.6084/M9.FIGSHARE.4126869.V1","source_id":"orcid_update","claim_action":"delete"}}'
|
79
|
-
headers:
|
80
|
-
User-Agent:
|
81
|
-
- Maremma - https://github.com/datacite/maremma
|
82
|
-
Content-Type:
|
83
|
-
- application/json
|
84
|
-
Accept:
|
85
|
-
- text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5
|
86
|
-
Authorization:
|
87
|
-
- Token token=<VOLPINO_TOKEN>
|
88
|
-
response:
|
89
|
-
status:
|
90
|
-
code: 202
|
91
|
-
message: ''
|
92
|
-
headers:
|
93
|
-
Access-Control-Allow-Headers:
|
94
|
-
- Origin, Content-Type, Accept, Authorization, Token
|
95
|
-
Access-Control-Allow-Methods:
|
96
|
-
- GET, POST, OPTIONS
|
97
|
-
Access-Control-Allow-Origin:
|
98
|
-
- "*"
|
99
|
-
Access-Control-Max-Age:
|
100
|
-
- '1728000'
|
101
|
-
Cache-Control:
|
102
|
-
- no-cache
|
103
|
-
Content-Type:
|
104
|
-
- application/json; charset=utf-8
|
105
|
-
Date:
|
106
|
-
- Sat, 11 Mar 2017 10:47:29 GMT
|
107
|
-
Server:
|
108
|
-
- openresty/1.11.2.2
|
109
|
-
Status:
|
110
|
-
- 202 Accepted
|
111
|
-
Vary:
|
112
|
-
- Accept-Encoding
|
113
|
-
X-Content-Type-Options:
|
114
|
-
- nosniff
|
115
|
-
X-Frame-Options:
|
116
|
-
- SAMEORIGIN
|
117
|
-
X-Powered-By:
|
118
|
-
- Phusion Passenger 5.1.2
|
119
|
-
X-Request-Id:
|
120
|
-
- dac42b2e-a5e3-485a-977c-c41c21f32e8c
|
121
|
-
X-Runtime:
|
122
|
-
- '0.011878'
|
123
|
-
X-Xss-Protection:
|
124
|
-
- 1; mode=block
|
125
|
-
Connection:
|
126
|
-
- keep-alive
|
127
|
-
body:
|
128
|
-
encoding: UTF-8
|
129
|
-
string: '{"data":{"id":"df624600-24c0-4d97-a85e-ca8fe165fe40","type":"claims","attributes":{"orcid":"0000-0003-1013-1533","doi":"10.6084/M9.FIGSHARE.4126869.V1","source-id":"orcid_update","state":"waiting","claim-action":"delete","claimed-at":null}}}'
|
130
|
-
http_version:
|
131
|
-
recorded_at: Sat, 11 Mar 2017 10:47:29 GMT
|
72
|
+
recorded_at: Sat, 11 Mar 2017 12:15:48 GMT
|
132
73
|
recorded_with: VCR 3.0.3
|
@@ -23,18 +23,18 @@ http_interactions:
|
|
23
23
|
Content-Type:
|
24
24
|
- application/json;charset=UTF-8
|
25
25
|
Date:
|
26
|
-
- Sat, 11 Mar 2017
|
26
|
+
- Sat, 11 Mar 2017 11:13:05 GMT
|
27
27
|
Server:
|
28
28
|
- openresty/1.11.2.2
|
29
29
|
Connection:
|
30
30
|
- keep-alive
|
31
31
|
body:
|
32
32
|
encoding: UTF-8
|
33
|
-
string: '{"responseHeader":{"status":0,"QTime":
|
33
|
+
string: '{"responseHeader":{"status":0,"QTime":0},"response":{"numFound":55,"start":0,"docs":[]}}
|
34
34
|
|
35
35
|
'
|
36
36
|
http_version:
|
37
|
-
recorded_at: Sat, 11 Mar 2017
|
37
|
+
recorded_at: Sat, 11 Mar 2017 11:13:05 GMT
|
38
38
|
- request:
|
39
39
|
method: get
|
40
40
|
uri: https://search.datacite.org/api?fl=doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated&fq=updated:%5B2015-04-07T00:00:00Z%20TO%202015-04-08T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=nameIdentifier:ORCID%5C:*&rows=1000&start=0&wt=json
|
@@ -58,7 +58,7 @@ http_interactions:
|
|
58
58
|
Content-Type:
|
59
59
|
- application/json;charset=UTF-8
|
60
60
|
Date:
|
61
|
-
- Sat, 11 Mar 2017
|
61
|
+
- Sat, 11 Mar 2017 11:13:05 GMT
|
62
62
|
Server:
|
63
63
|
- openresty/1.11.2.2
|
64
64
|
Connection:
|
@@ -69,5 +69,5 @@ http_interactions:
|
|
69
69
|
|
70
70
|
'
|
71
71
|
http_version:
|
72
|
-
recorded_at: Sat, 11 Mar 2017
|
72
|
+
recorded_at: Sat, 11 Mar 2017 11:13:05 GMT
|
73
73
|
recorded_with: VCR 3.0.3
|