toccatore 0.3 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e67c0fc653ac9fc750cd7a75ecd0056c72dcdfec
4
- data.tar.gz: cf181d9073928728a382e6740912a982cb02fd61
3
+ metadata.gz: a65af218bade17a4ae79a00468a16a7dae2c7fc9
4
+ data.tar.gz: 3f82e450b6738fe12f625219562f2d9595e9d859
5
5
  SHA512:
6
- metadata.gz: 4a1c3d00575f9289cc7938028829b49a1cddbb397419e17483360e4a6586566fbf3e20a1f760727e8a90df20b4dcfce06097ac87e2a96b09ffc12c567f555e16
7
- data.tar.gz: 43ae2875879cf644783244d554d52a6bde83c615bd5dbea12a441ac9291c61d65b2bc7fb8bc03a597f880676499d76c19c908d43cd490e8fbeb1ac52705d177d
6
+ metadata.gz: d4a593a23c7d0d747e8248de82b71093252f3a490fc3389ab364173eddb690b79991335726b7cb6bc6a1dce6d974ff0a6f46c87941a2b5f4bfa3dd5599b98cc4
7
+ data.tar.gz: f073088f4e916a6411ea7af84ace1ee31142b2c945b95dec5500c731a9070c6401cd20d1110c3d34a9edd295b39eab10c4133ebab6bd595867005fb15cdc0703
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- toccatore (0.3)
4
+ toccatore (0.3.1)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  dotenv (~> 2.1, >= 2.1.1)
7
7
  gender_detector (~> 1.0)
@@ -77,7 +77,7 @@ module Toccatore
77
77
 
78
78
  def process_data(options = {})
79
79
  data = get_data(options.merge(timeout: timeout, source_id: source_id))
80
- data = parse_data(data, options.merge(source_id: source_id))
80
+ data = parse_data(data)
81
81
 
82
82
  return [OpenStruct.new(body: { "data" => [] })] if data.empty?
83
83
 
@@ -89,6 +89,16 @@ module Toccatore
89
89
  Maremma.get(query_url, options)
90
90
  end
91
91
 
92
+ def push_data(items, options={})
93
+ if items.empty?
94
+ puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
95
+ elsif options[:access_token].blank?
96
+ puts "An error occured: Access token missing."
97
+ else
98
+ Array(items).each { |item| push_item(item, options) }
99
+ end
100
+ end
101
+
92
102
  def url
93
103
  "https://search.datacite.org/api?"
94
104
  end
@@ -145,7 +155,7 @@ module Toccatore
145
155
 
146
156
  # parse author string into CSL format
147
157
  # only assume personal name when using sort-order: "Turing, Alan"
148
- def get_one_author(author, options = {})
158
+ def get_one_author(author)
149
159
  return { "literal" => "" } if author.strip.blank?
150
160
 
151
161
  author = cleanup_author(author)
@@ -180,7 +190,7 @@ module Toccatore
180
190
 
181
191
  # parse array of author strings into CSL format
182
192
  def get_authors(authors, options={})
183
- Array(authors).map { |author| get_one_author(author, options) }
193
+ Array(authors).map { |author| get_one_author(author) }
184
194
  end
185
195
 
186
196
  # parse array of author hashes into CSL format
@@ -10,7 +10,7 @@ module Toccatore
10
10
  "relatedIdentifier:DOI\\:*"
11
11
  end
12
12
 
13
- def parse_data(result, options={})
13
+ def parse_data(result)
14
14
  return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
15
15
 
16
16
  items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
@@ -21,23 +21,31 @@ module Toccatore
21
21
  pid = normalize_doi(doi)
22
22
  related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
23
23
 
24
- sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
25
- raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
26
- related_identifier = related_identifier.strip.downcase
27
- prefix = validate_prefix(related_identifier)
28
- registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
24
+ # don't generate event if there is a DOI for identical content with same prefix
25
+ skip_doi = related_doi_identifiers.any? do |related_identifier|
26
+ ["IsIdenticalTo"].include?(related_identifier.split(':', 3).first) &&
27
+ related_identifier.split(':', 3).last.to_s.starts_with?(validate_prefix(doi))
28
+ end
29
+
30
+ unless skip_doi
31
+ sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
32
+ raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
33
+ related_identifier = related_identifier.strip.downcase
34
+ prefix = validate_prefix(related_identifier)
35
+ registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
29
36
 
30
- # check whether this is a DataCite DOI
31
- if registration_agencies[prefix] == "DataCite"
32
- ssum
33
- else
34
- ssum << { "id" => SecureRandom.uuid,
35
- "message_action" => "create",
36
- "subj_id" => pid,
37
- "obj_id" => normalize_doi(related_identifier),
38
- "relation_type_id" => raw_relation_type.underscore,
39
- "source_id" => "datacite",
40
- "occurred_at" => item.fetch("minted") }
37
+ # check whether this is a DataCite DOI
38
+ if registration_agencies[prefix] == "DataCite"
39
+ ssum
40
+ else
41
+ ssum << { "id" => SecureRandom.uuid,
42
+ "message_action" => "create",
43
+ "subj_id" => pid,
44
+ "obj_id" => normalize_doi(related_identifier),
45
+ "relation_type_id" => raw_relation_type.underscore,
46
+ "source_id" => "datacite",
47
+ "occurred_at" => item.fetch("minted") }
48
+ end
41
49
  end
42
50
  end
43
51
 
@@ -45,17 +53,6 @@ module Toccatore
45
53
  end
46
54
  end
47
55
 
48
- # push to Event Data API if no error and we have collected works
49
- def push_data(items, options={})
50
- if items.empty?
51
- puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
52
- elsif options[:access_token].blank?
53
- puts "An error occured: Access token missing."
54
- else
55
- Array(items).each { |item| push_item(item, options) }
56
- end
57
- end
58
-
59
56
  def push_item(item, options={})
60
57
  return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
61
58
 
@@ -43,17 +43,6 @@ module Toccatore
43
43
  end
44
44
  end
45
45
 
46
- # push to Volpino API if no error and we have collected works
47
- def push_data(items, options={})
48
- if items.empty?
49
- puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
50
- elsif options[:access_token].blank?
51
- puts "An error occured: Access token missing."
52
- else
53
- Array(items).each { |item| push_item(item, options) }
54
- end
55
- end
56
-
57
46
  def push_item(item, options={})
58
47
  return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
59
48
 
@@ -1,3 +1,3 @@
1
1
  module Toccatore
2
- VERSION = "0.3"
2
+ VERSION = "0.3.1"
3
3
  end
data/spec/cli_spec.rb CHANGED
@@ -8,7 +8,7 @@ describe Toccatore::CLI do
8
8
 
9
9
  describe "version" do
10
10
  it 'has version' do
11
- expect { subject.__print_version }.to output("0.3\n").to_stdout
11
+ expect { subject.__print_version }.to output("0.3.1\n").to_stdout
12
12
  end
13
13
  end
14
14
 
@@ -32,10 +32,10 @@ describe Toccatore::CLI do
32
32
  expect { subject.orcid_update }.to output(/DOI 10.5438\/6423 for ORCID ID 0000-0001-5331-6592 pushed to Profiles service.\n/).to_stdout
33
33
  end
34
34
 
35
- it 'should delete' do
36
- subject.options = cli_options.merge(doi: "10.6084/M9.FIGSHARE.4126869.V1", from_date: "2013-01-01", until_date: "2017-12-31", claim_action: "delete")
37
- expect { subject.orcid_update }.to output(/Delete DOI 10.6084\/M9.FIGSHARE.4126869.V1 for ORCID ID 0000-0003-1013-1533 pushed to Profiles service.\n/).to_stdout
38
- end
35
+ # it 'should delete' do
36
+ # subject.options = cli_options.merge(doi: "10.6084/M9.FIGSHARE.4126869.V1", from_date: "2013-01-01", until_date: "2017-12-31", claim_action: "delete")
37
+ # expect { subject.orcid_update }.to output(/Delete DOI 10.6084\/M9.FIGSHARE.4126869.V1 for ORCID ID 0000-0003-1013-1533 pushed to Profiles service.\n/).to_stdout
38
+ # end
39
39
 
40
40
  it 'should query by ORCID ID' do
41
41
  subject.options = cli_options.merge(orcid: "0000-0002-3546-1048", from_date: "2013-01-01", until_date: "2017-12-31")
@@ -105,6 +105,12 @@ describe Toccatore::DataciteRelated, vcr: true do
105
105
  expect(response.last.except("id")).to eq("message_action" => "create", "subj_id"=>"https://doi.org/10.17180/obs.yzeron", "obj_id"=>"https://doi.org/10.1016/j.jhydrol.2013.09.055", "relation_type_id"=>"is_referenced_by", "source_id"=>"datacite", "occurred_at"=>"2015-04-07T12:22:40Z")
106
106
  end
107
107
 
108
+ it "should report if there are works ignored because of an IsIdenticalTo relation" do
109
+ body = File.read(fixture_path + 'datacite_related_is_identical.json')
110
+ result = OpenStruct.new(body: { "data" => JSON.parse(body) })
111
+ expect(subject.parse_data(result)).to eq([])
112
+ end
113
+
108
114
  it "should catch timeout errors with the Datacite Metadata Search API" do
109
115
  result = OpenStruct.new(body: { "errors" => [{ "title" => "the server responded with status 408 for https://search.datacite.org", "status" => 408 }] })
110
116
  response = subject.parse_data(result)
@@ -0,0 +1,20 @@
1
+ {
2
+ "responseHeader": {
3
+ "status": 0,
4
+ "QTime": 0
5
+ },
6
+ "response": {
7
+ "numFound": 1,
8
+ "start": 0,
9
+ "docs": [{
10
+ "minted": "2016-07-29T18:02:34Z",
11
+ "updated": "2017-03-09T18:31:17Z",
12
+ "doi": "10.6084/M9.FIGSHARE.3505442.V1",
13
+ "relatedIdentifier": [
14
+ "IsSupplementTo:DOI:10.1080/07391102.2016.1189358",
15
+ "IsIdenticalTo:DOI:10.6084/m9.figshare.3505442"
16
+ ],
17
+ "resourceTypeGeneral": "Other"
18
+ }]
19
+ }
20
+ }
@@ -23,7 +23,7 @@ http_interactions:
23
23
  Content-Type:
24
24
  - application/json;charset=UTF-8
25
25
  Date:
26
- - Sat, 11 Mar 2017 10:47:29 GMT
26
+ - Sat, 11 Mar 2017 12:15:48 GMT
27
27
  Server:
28
28
  - openresty/1.11.2.2
29
29
  Connection:
@@ -34,7 +34,7 @@ http_interactions:
34
34
 
35
35
  '
36
36
  http_version:
37
- recorded_at: Sat, 11 Mar 2017 10:47:29 GMT
37
+ recorded_at: Sat, 11 Mar 2017 12:15:48 GMT
38
38
  - request:
39
39
  method: get
40
40
  uri: https://search.datacite.org/api?fl=doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated&fq=updated:%5B2013-01-01T00:00:00Z%20TO%202017-12-31T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=doi:10.6084/M9.FIGSHARE.4126869.V1&rows=1000&start=0&wt=json
@@ -58,75 +58,16 @@ http_interactions:
58
58
  Content-Type:
59
59
  - application/json;charset=UTF-8
60
60
  Date:
61
- - Sat, 11 Mar 2017 10:47:29 GMT
61
+ - Sat, 11 Mar 2017 12:15:48 GMT
62
62
  Server:
63
63
  - openresty/1.11.2.2
64
64
  Connection:
65
65
  - keep-alive
66
66
  body:
67
67
  encoding: UTF-8
68
- string: '{"responseHeader":{"status":0,"QTime":0},"response":{"numFound":1,"start":0,"docs":[{"minted":"2016-10-28T21:43:49Z","updated":"2016-10-28T21:44:42Z","doi":"10.6084/M9.FIGSHARE.4126869.V1","resourceTypeGeneral":"Image","relatedIdentifier":["IsIdenticalTo:DOI:10.6084/m9.figshare.4126869"],"nameIdentifier":["ORCID:0000-0003-1013-1533"]}]}}
68
+ string: '{"responseHeader":{"status":0,"QTime":1},"response":{"numFound":1,"start":0,"docs":[{"minted":"2016-10-28T21:43:49Z","updated":"2016-10-28T21:44:42Z","doi":"10.6084/M9.FIGSHARE.4126869.V1","resourceTypeGeneral":"Image","relatedIdentifier":["IsIdenticalTo:DOI:10.6084/m9.figshare.4126869"],"nameIdentifier":["ORCID:0000-0003-1013-1533"]}]}}
69
69
 
70
70
  '
71
71
  http_version:
72
- recorded_at: Sat, 11 Mar 2017 10:47:29 GMT
73
- - request:
74
- method: post
75
- uri: https://profiles.test.datacite.org/api/claims
76
- body:
77
- encoding: UTF-8
78
- string: '{"claim":{"orcid":"0000-0003-1013-1533","doi":"10.6084/M9.FIGSHARE.4126869.V1","source_id":"orcid_update","claim_action":"delete"}}'
79
- headers:
80
- User-Agent:
81
- - Maremma - https://github.com/datacite/maremma
82
- Content-Type:
83
- - application/json
84
- Accept:
85
- - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5
86
- Authorization:
87
- - Token token=<VOLPINO_TOKEN>
88
- response:
89
- status:
90
- code: 202
91
- message: ''
92
- headers:
93
- Access-Control-Allow-Headers:
94
- - Origin, Content-Type, Accept, Authorization, Token
95
- Access-Control-Allow-Methods:
96
- - GET, POST, OPTIONS
97
- Access-Control-Allow-Origin:
98
- - "*"
99
- Access-Control-Max-Age:
100
- - '1728000'
101
- Cache-Control:
102
- - no-cache
103
- Content-Type:
104
- - application/json; charset=utf-8
105
- Date:
106
- - Sat, 11 Mar 2017 10:47:29 GMT
107
- Server:
108
- - openresty/1.11.2.2
109
- Status:
110
- - 202 Accepted
111
- Vary:
112
- - Accept-Encoding
113
- X-Content-Type-Options:
114
- - nosniff
115
- X-Frame-Options:
116
- - SAMEORIGIN
117
- X-Powered-By:
118
- - Phusion Passenger 5.1.2
119
- X-Request-Id:
120
- - dac42b2e-a5e3-485a-977c-c41c21f32e8c
121
- X-Runtime:
122
- - '0.011878'
123
- X-Xss-Protection:
124
- - 1; mode=block
125
- Connection:
126
- - keep-alive
127
- body:
128
- encoding: UTF-8
129
- string: '{"data":{"id":"df624600-24c0-4d97-a85e-ca8fe165fe40","type":"claims","attributes":{"orcid":"0000-0003-1013-1533","doi":"10.6084/M9.FIGSHARE.4126869.V1","source-id":"orcid_update","state":"waiting","claim-action":"delete","claimed-at":null}}}'
130
- http_version:
131
- recorded_at: Sat, 11 Mar 2017 10:47:29 GMT
72
+ recorded_at: Sat, 11 Mar 2017 12:15:48 GMT
132
73
  recorded_with: VCR 3.0.3
@@ -23,18 +23,18 @@ http_interactions:
23
23
  Content-Type:
24
24
  - application/json;charset=UTF-8
25
25
  Date:
26
- - Sat, 11 Mar 2017 10:47:46 GMT
26
+ - Sat, 11 Mar 2017 11:13:05 GMT
27
27
  Server:
28
28
  - openresty/1.11.2.2
29
29
  Connection:
30
30
  - keep-alive
31
31
  body:
32
32
  encoding: UTF-8
33
- string: '{"responseHeader":{"status":0,"QTime":1},"response":{"numFound":55,"start":0,"docs":[]}}
33
+ string: '{"responseHeader":{"status":0,"QTime":0},"response":{"numFound":55,"start":0,"docs":[]}}
34
34
 
35
35
  '
36
36
  http_version:
37
- recorded_at: Sat, 11 Mar 2017 10:47:46 GMT
37
+ recorded_at: Sat, 11 Mar 2017 11:13:05 GMT
38
38
  - request:
39
39
  method: get
40
40
  uri: https://search.datacite.org/api?fl=doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated&fq=updated:%5B2015-04-07T00:00:00Z%20TO%202015-04-08T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=nameIdentifier:ORCID%5C:*&rows=1000&start=0&wt=json
@@ -58,7 +58,7 @@ http_interactions:
58
58
  Content-Type:
59
59
  - application/json;charset=UTF-8
60
60
  Date:
61
- - Sat, 11 Mar 2017 10:47:46 GMT
61
+ - Sat, 11 Mar 2017 11:13:05 GMT
62
62
  Server:
63
63
  - openresty/1.11.2.2
64
64
  Connection:
@@ -69,5 +69,5 @@ http_interactions:
69
69
 
70
70
  '
71
71
  http_version:
72
- recorded_at: Sat, 11 Mar 2017 10:47:46 GMT
72
+ recorded_at: Sat, 11 Mar 2017 11:13:05 GMT
73
73
  recorded_with: VCR 3.0.3