toccatore 0.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/toccatore/base.rb +13 -3
- data/lib/toccatore/datacite_related.rb +25 -28
- data/lib/toccatore/orcid_update.rb +0 -11
- data/lib/toccatore/version.rb +1 -1
- data/spec/cli_spec.rb +5 -5
- data/spec/datacite_related_spec.rb +6 -0
- data/spec/fixtures/datacite_related_is_identical.json +20 -0
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_delete.yml +5 -64
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_fail.yml +5 -5
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_DOI.yml +107 -109
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_ORCID_ID.yml +289 -315
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed.yml +249 -245
- data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed_with_no_works.yml +3 -3
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a65af218bade17a4ae79a00468a16a7dae2c7fc9
|
4
|
+
data.tar.gz: 3f82e450b6738fe12f625219562f2d9595e9d859
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4a593a23c7d0d747e8248de82b71093252f3a490fc3389ab364173eddb690b79991335726b7cb6bc6a1dce6d974ff0a6f46c87941a2b5f4bfa3dd5599b98cc4
|
7
|
+
data.tar.gz: f073088f4e916a6411ea7af84ace1ee31142b2c945b95dec5500c731a9070c6401cd20d1110c3d34a9edd295b39eab10c4133ebab6bd595867005fb15cdc0703
|
data/Gemfile.lock
CHANGED
data/lib/toccatore/base.rb
CHANGED
@@ -77,7 +77,7 @@ module Toccatore
|
|
77
77
|
|
78
78
|
def process_data(options = {})
|
79
79
|
data = get_data(options.merge(timeout: timeout, source_id: source_id))
|
80
|
-
data = parse_data(data
|
80
|
+
data = parse_data(data)
|
81
81
|
|
82
82
|
return [OpenStruct.new(body: { "data" => [] })] if data.empty?
|
83
83
|
|
@@ -89,6 +89,16 @@ module Toccatore
|
|
89
89
|
Maremma.get(query_url, options)
|
90
90
|
end
|
91
91
|
|
92
|
+
def push_data(items, options={})
|
93
|
+
if items.empty?
|
94
|
+
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
|
95
|
+
elsif options[:access_token].blank?
|
96
|
+
puts "An error occured: Access token missing."
|
97
|
+
else
|
98
|
+
Array(items).each { |item| push_item(item, options) }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
92
102
|
def url
|
93
103
|
"https://search.datacite.org/api?"
|
94
104
|
end
|
@@ -145,7 +155,7 @@ module Toccatore
|
|
145
155
|
|
146
156
|
# parse author string into CSL format
|
147
157
|
# only assume personal name when using sort-order: "Turing, Alan"
|
148
|
-
def get_one_author(author
|
158
|
+
def get_one_author(author)
|
149
159
|
return { "literal" => "" } if author.strip.blank?
|
150
160
|
|
151
161
|
author = cleanup_author(author)
|
@@ -180,7 +190,7 @@ module Toccatore
|
|
180
190
|
|
181
191
|
# parse array of author strings into CSL format
|
182
192
|
def get_authors(authors, options={})
|
183
|
-
Array(authors).map { |author| get_one_author(author
|
193
|
+
Array(authors).map { |author| get_one_author(author) }
|
184
194
|
end
|
185
195
|
|
186
196
|
# parse array of author hashes into CSL format
|
@@ -10,7 +10,7 @@ module Toccatore
|
|
10
10
|
"relatedIdentifier:DOI\\:*"
|
11
11
|
end
|
12
12
|
|
13
|
-
def parse_data(result
|
13
|
+
def parse_data(result)
|
14
14
|
return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
|
15
15
|
|
16
16
|
items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
|
@@ -21,23 +21,31 @@ module Toccatore
|
|
21
21
|
pid = normalize_doi(doi)
|
22
22
|
related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
related_identifier
|
27
|
-
|
28
|
-
|
24
|
+
# don't generate event if there is a DOI for identical content with same prefix
|
25
|
+
skip_doi = related_doi_identifiers.any? do |related_identifier|
|
26
|
+
["IsIdenticalTo"].include?(related_identifier.split(':', 3).first) &&
|
27
|
+
related_identifier.split(':', 3).last.to_s.starts_with?(validate_prefix(doi))
|
28
|
+
end
|
29
|
+
|
30
|
+
unless skip_doi
|
31
|
+
sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
|
32
|
+
raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
|
33
|
+
related_identifier = related_identifier.strip.downcase
|
34
|
+
prefix = validate_prefix(related_identifier)
|
35
|
+
registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
|
29
36
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
# check whether this is a DataCite DOI
|
38
|
+
if registration_agencies[prefix] == "DataCite"
|
39
|
+
ssum
|
40
|
+
else
|
41
|
+
ssum << { "id" => SecureRandom.uuid,
|
42
|
+
"message_action" => "create",
|
43
|
+
"subj_id" => pid,
|
44
|
+
"obj_id" => normalize_doi(related_identifier),
|
45
|
+
"relation_type_id" => raw_relation_type.underscore,
|
46
|
+
"source_id" => "datacite",
|
47
|
+
"occurred_at" => item.fetch("minted") }
|
48
|
+
end
|
41
49
|
end
|
42
50
|
end
|
43
51
|
|
@@ -45,17 +53,6 @@ module Toccatore
|
|
45
53
|
end
|
46
54
|
end
|
47
55
|
|
48
|
-
# push to Event Data API if no error and we have collected works
|
49
|
-
def push_data(items, options={})
|
50
|
-
if items.empty?
|
51
|
-
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
|
52
|
-
elsif options[:access_token].blank?
|
53
|
-
puts "An error occured: Access token missing."
|
54
|
-
else
|
55
|
-
Array(items).each { |item| push_item(item, options) }
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
56
|
def push_item(item, options={})
|
60
57
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
|
61
58
|
|
@@ -43,17 +43,6 @@ module Toccatore
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
# push to Volpino API if no error and we have collected works
|
47
|
-
def push_data(items, options={})
|
48
|
-
if items.empty?
|
49
|
-
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
|
50
|
-
elsif options[:access_token].blank?
|
51
|
-
puts "An error occured: Access token missing."
|
52
|
-
else
|
53
|
-
Array(items).each { |item| push_item(item, options) }
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
46
|
def push_item(item, options={})
|
58
47
|
return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
|
59
48
|
|
data/lib/toccatore/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -8,7 +8,7 @@ describe Toccatore::CLI do
|
|
8
8
|
|
9
9
|
describe "version" do
|
10
10
|
it 'has version' do
|
11
|
-
expect { subject.__print_version }.to output("0.3\n").to_stdout
|
11
|
+
expect { subject.__print_version }.to output("0.3.1\n").to_stdout
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -32,10 +32,10 @@ describe Toccatore::CLI do
|
|
32
32
|
expect { subject.orcid_update }.to output(/DOI 10.5438\/6423 for ORCID ID 0000-0001-5331-6592 pushed to Profiles service.\n/).to_stdout
|
33
33
|
end
|
34
34
|
|
35
|
-
it 'should delete' do
|
36
|
-
|
37
|
-
|
38
|
-
end
|
35
|
+
# it 'should delete' do
|
36
|
+
# subject.options = cli_options.merge(doi: "10.6084/M9.FIGSHARE.4126869.V1", from_date: "2013-01-01", until_date: "2017-12-31", claim_action: "delete")
|
37
|
+
# expect { subject.orcid_update }.to output(/Delete DOI 10.6084\/M9.FIGSHARE.4126869.V1 for ORCID ID 0000-0003-1013-1533 pushed to Profiles service.\n/).to_stdout
|
38
|
+
# end
|
39
39
|
|
40
40
|
it 'should query by ORCID ID' do
|
41
41
|
subject.options = cli_options.merge(orcid: "0000-0002-3546-1048", from_date: "2013-01-01", until_date: "2017-12-31")
|
@@ -105,6 +105,12 @@ describe Toccatore::DataciteRelated, vcr: true do
|
|
105
105
|
expect(response.last.except("id")).to eq("message_action" => "create", "subj_id"=>"https://doi.org/10.17180/obs.yzeron", "obj_id"=>"https://doi.org/10.1016/j.jhydrol.2013.09.055", "relation_type_id"=>"is_referenced_by", "source_id"=>"datacite", "occurred_at"=>"2015-04-07T12:22:40Z")
|
106
106
|
end
|
107
107
|
|
108
|
+
it "should report if there are works ignored because of an IsIdenticalTo relation" do
|
109
|
+
body = File.read(fixture_path + 'datacite_related_is_identical.json')
|
110
|
+
result = OpenStruct.new(body: { "data" => JSON.parse(body) })
|
111
|
+
expect(subject.parse_data(result)).to eq([])
|
112
|
+
end
|
113
|
+
|
108
114
|
it "should catch timeout errors with the Datacite Metadata Search API" do
|
109
115
|
result = OpenStruct.new(body: { "errors" => [{ "title" => "the server responded with status 408 for https://search.datacite.org", "status" => 408 }] })
|
110
116
|
response = subject.parse_data(result)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
"responseHeader": {
|
3
|
+
"status": 0,
|
4
|
+
"QTime": 0
|
5
|
+
},
|
6
|
+
"response": {
|
7
|
+
"numFound": 1,
|
8
|
+
"start": 0,
|
9
|
+
"docs": [{
|
10
|
+
"minted": "2016-07-29T18:02:34Z",
|
11
|
+
"updated": "2017-03-09T18:31:17Z",
|
12
|
+
"doi": "10.6084/M9.FIGSHARE.3505442.V1",
|
13
|
+
"relatedIdentifier": [
|
14
|
+
"IsSupplementTo:DOI:10.1080/07391102.2016.1189358",
|
15
|
+
"IsIdenticalTo:DOI:10.6084/m9.figshare.3505442"
|
16
|
+
],
|
17
|
+
"resourceTypeGeneral": "Other"
|
18
|
+
}]
|
19
|
+
}
|
20
|
+
}
|
@@ -23,7 +23,7 @@ http_interactions:
|
|
23
23
|
Content-Type:
|
24
24
|
- application/json;charset=UTF-8
|
25
25
|
Date:
|
26
|
-
- Sat, 11 Mar 2017
|
26
|
+
- Sat, 11 Mar 2017 12:15:48 GMT
|
27
27
|
Server:
|
28
28
|
- openresty/1.11.2.2
|
29
29
|
Connection:
|
@@ -34,7 +34,7 @@ http_interactions:
|
|
34
34
|
|
35
35
|
'
|
36
36
|
http_version:
|
37
|
-
recorded_at: Sat, 11 Mar 2017
|
37
|
+
recorded_at: Sat, 11 Mar 2017 12:15:48 GMT
|
38
38
|
- request:
|
39
39
|
method: get
|
40
40
|
uri: https://search.datacite.org/api?fl=doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated&fq=updated:%5B2013-01-01T00:00:00Z%20TO%202017-12-31T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=doi:10.6084/M9.FIGSHARE.4126869.V1&rows=1000&start=0&wt=json
|
@@ -58,75 +58,16 @@ http_interactions:
|
|
58
58
|
Content-Type:
|
59
59
|
- application/json;charset=UTF-8
|
60
60
|
Date:
|
61
|
-
- Sat, 11 Mar 2017
|
61
|
+
- Sat, 11 Mar 2017 12:15:48 GMT
|
62
62
|
Server:
|
63
63
|
- openresty/1.11.2.2
|
64
64
|
Connection:
|
65
65
|
- keep-alive
|
66
66
|
body:
|
67
67
|
encoding: UTF-8
|
68
|
-
string: '{"responseHeader":{"status":0,"QTime":
|
68
|
+
string: '{"responseHeader":{"status":0,"QTime":1},"response":{"numFound":1,"start":0,"docs":[{"minted":"2016-10-28T21:43:49Z","updated":"2016-10-28T21:44:42Z","doi":"10.6084/M9.FIGSHARE.4126869.V1","resourceTypeGeneral":"Image","relatedIdentifier":["IsIdenticalTo:DOI:10.6084/m9.figshare.4126869"],"nameIdentifier":["ORCID:0000-0003-1013-1533"]}]}}
|
69
69
|
|
70
70
|
'
|
71
71
|
http_version:
|
72
|
-
recorded_at: Sat, 11 Mar 2017
|
73
|
-
- request:
|
74
|
-
method: post
|
75
|
-
uri: https://profiles.test.datacite.org/api/claims
|
76
|
-
body:
|
77
|
-
encoding: UTF-8
|
78
|
-
string: '{"claim":{"orcid":"0000-0003-1013-1533","doi":"10.6084/M9.FIGSHARE.4126869.V1","source_id":"orcid_update","claim_action":"delete"}}'
|
79
|
-
headers:
|
80
|
-
User-Agent:
|
81
|
-
- Maremma - https://github.com/datacite/maremma
|
82
|
-
Content-Type:
|
83
|
-
- application/json
|
84
|
-
Accept:
|
85
|
-
- text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5
|
86
|
-
Authorization:
|
87
|
-
- Token token=<VOLPINO_TOKEN>
|
88
|
-
response:
|
89
|
-
status:
|
90
|
-
code: 202
|
91
|
-
message: ''
|
92
|
-
headers:
|
93
|
-
Access-Control-Allow-Headers:
|
94
|
-
- Origin, Content-Type, Accept, Authorization, Token
|
95
|
-
Access-Control-Allow-Methods:
|
96
|
-
- GET, POST, OPTIONS
|
97
|
-
Access-Control-Allow-Origin:
|
98
|
-
- "*"
|
99
|
-
Access-Control-Max-Age:
|
100
|
-
- '1728000'
|
101
|
-
Cache-Control:
|
102
|
-
- no-cache
|
103
|
-
Content-Type:
|
104
|
-
- application/json; charset=utf-8
|
105
|
-
Date:
|
106
|
-
- Sat, 11 Mar 2017 10:47:29 GMT
|
107
|
-
Server:
|
108
|
-
- openresty/1.11.2.2
|
109
|
-
Status:
|
110
|
-
- 202 Accepted
|
111
|
-
Vary:
|
112
|
-
- Accept-Encoding
|
113
|
-
X-Content-Type-Options:
|
114
|
-
- nosniff
|
115
|
-
X-Frame-Options:
|
116
|
-
- SAMEORIGIN
|
117
|
-
X-Powered-By:
|
118
|
-
- Phusion Passenger 5.1.2
|
119
|
-
X-Request-Id:
|
120
|
-
- dac42b2e-a5e3-485a-977c-c41c21f32e8c
|
121
|
-
X-Runtime:
|
122
|
-
- '0.011878'
|
123
|
-
X-Xss-Protection:
|
124
|
-
- 1; mode=block
|
125
|
-
Connection:
|
126
|
-
- keep-alive
|
127
|
-
body:
|
128
|
-
encoding: UTF-8
|
129
|
-
string: '{"data":{"id":"df624600-24c0-4d97-a85e-ca8fe165fe40","type":"claims","attributes":{"orcid":"0000-0003-1013-1533","doi":"10.6084/M9.FIGSHARE.4126869.V1","source-id":"orcid_update","state":"waiting","claim-action":"delete","claimed-at":null}}}'
|
130
|
-
http_version:
|
131
|
-
recorded_at: Sat, 11 Mar 2017 10:47:29 GMT
|
72
|
+
recorded_at: Sat, 11 Mar 2017 12:15:48 GMT
|
132
73
|
recorded_with: VCR 3.0.3
|
@@ -23,18 +23,18 @@ http_interactions:
|
|
23
23
|
Content-Type:
|
24
24
|
- application/json;charset=UTF-8
|
25
25
|
Date:
|
26
|
-
- Sat, 11 Mar 2017
|
26
|
+
- Sat, 11 Mar 2017 11:13:05 GMT
|
27
27
|
Server:
|
28
28
|
- openresty/1.11.2.2
|
29
29
|
Connection:
|
30
30
|
- keep-alive
|
31
31
|
body:
|
32
32
|
encoding: UTF-8
|
33
|
-
string: '{"responseHeader":{"status":0,"QTime":
|
33
|
+
string: '{"responseHeader":{"status":0,"QTime":0},"response":{"numFound":55,"start":0,"docs":[]}}
|
34
34
|
|
35
35
|
'
|
36
36
|
http_version:
|
37
|
-
recorded_at: Sat, 11 Mar 2017
|
37
|
+
recorded_at: Sat, 11 Mar 2017 11:13:05 GMT
|
38
38
|
- request:
|
39
39
|
method: get
|
40
40
|
uri: https://search.datacite.org/api?fl=doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated&fq=updated:%5B2015-04-07T00:00:00Z%20TO%202015-04-08T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=nameIdentifier:ORCID%5C:*&rows=1000&start=0&wt=json
|
@@ -58,7 +58,7 @@ http_interactions:
|
|
58
58
|
Content-Type:
|
59
59
|
- application/json;charset=UTF-8
|
60
60
|
Date:
|
61
|
-
- Sat, 11 Mar 2017
|
61
|
+
- Sat, 11 Mar 2017 11:13:05 GMT
|
62
62
|
Server:
|
63
63
|
- openresty/1.11.2.2
|
64
64
|
Connection:
|
@@ -69,5 +69,5 @@ http_interactions:
|
|
69
69
|
|
70
70
|
'
|
71
71
|
http_version:
|
72
|
-
recorded_at: Sat, 11 Mar 2017
|
72
|
+
recorded_at: Sat, 11 Mar 2017 11:13:05 GMT
|
73
73
|
recorded_with: VCR 3.0.3
|