toccatore 0.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e67c0fc653ac9fc750cd7a75ecd0056c72dcdfec
4
- data.tar.gz: cf181d9073928728a382e6740912a982cb02fd61
3
+ metadata.gz: a65af218bade17a4ae79a00468a16a7dae2c7fc9
4
+ data.tar.gz: 3f82e450b6738fe12f625219562f2d9595e9d859
5
5
  SHA512:
6
- metadata.gz: 4a1c3d00575f9289cc7938028829b49a1cddbb397419e17483360e4a6586566fbf3e20a1f760727e8a90df20b4dcfce06097ac87e2a96b09ffc12c567f555e16
7
- data.tar.gz: 43ae2875879cf644783244d554d52a6bde83c615bd5dbea12a441ac9291c61d65b2bc7fb8bc03a597f880676499d76c19c908d43cd490e8fbeb1ac52705d177d
6
+ metadata.gz: d4a593a23c7d0d747e8248de82b71093252f3a490fc3389ab364173eddb690b79991335726b7cb6bc6a1dce6d974ff0a6f46c87941a2b5f4bfa3dd5599b98cc4
7
+ data.tar.gz: f073088f4e916a6411ea7af84ace1ee31142b2c945b95dec5500c731a9070c6401cd20d1110c3d34a9edd295b39eab10c4133ebab6bd595867005fb15cdc0703
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- toccatore (0.3)
4
+ toccatore (0.3.1)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  dotenv (~> 2.1, >= 2.1.1)
7
7
  gender_detector (~> 1.0)
@@ -77,7 +77,7 @@ module Toccatore
77
77
 
78
78
  def process_data(options = {})
79
79
  data = get_data(options.merge(timeout: timeout, source_id: source_id))
80
- data = parse_data(data, options.merge(source_id: source_id))
80
+ data = parse_data(data)
81
81
 
82
82
  return [OpenStruct.new(body: { "data" => [] })] if data.empty?
83
83
 
@@ -89,6 +89,16 @@ module Toccatore
89
89
  Maremma.get(query_url, options)
90
90
  end
91
91
 
92
+ def push_data(items, options={})
93
+ if items.empty?
94
+ puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
95
+ elsif options[:access_token].blank?
96
+ puts "An error occured: Access token missing."
97
+ else
98
+ Array(items).each { |item| push_item(item, options) }
99
+ end
100
+ end
101
+
92
102
  def url
93
103
  "https://search.datacite.org/api?"
94
104
  end
@@ -145,7 +155,7 @@ module Toccatore
145
155
 
146
156
  # parse author string into CSL format
147
157
  # only assume personal name when using sort-order: "Turing, Alan"
148
- def get_one_author(author, options = {})
158
+ def get_one_author(author)
149
159
  return { "literal" => "" } if author.strip.blank?
150
160
 
151
161
  author = cleanup_author(author)
@@ -180,7 +190,7 @@ module Toccatore
180
190
 
181
191
  # parse array of author strings into CSL format
182
192
  def get_authors(authors, options={})
183
- Array(authors).map { |author| get_one_author(author, options) }
193
+ Array(authors).map { |author| get_one_author(author) }
184
194
  end
185
195
 
186
196
  # parse array of author hashes into CSL format
@@ -10,7 +10,7 @@ module Toccatore
10
10
  "relatedIdentifier:DOI\\:*"
11
11
  end
12
12
 
13
- def parse_data(result, options={})
13
+ def parse_data(result)
14
14
  return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
15
15
 
16
16
  items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
@@ -21,23 +21,31 @@ module Toccatore
21
21
  pid = normalize_doi(doi)
22
22
  related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
23
23
 
24
- sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
25
- raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
26
- related_identifier = related_identifier.strip.downcase
27
- prefix = validate_prefix(related_identifier)
28
- registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
24
+ # don't generate event if there is a DOI for identical content with same prefix
25
+ skip_doi = related_doi_identifiers.any? do |related_identifier|
26
+ ["IsIdenticalTo"].include?(related_identifier.split(':', 3).first) &&
27
+ related_identifier.split(':', 3).last.to_s.starts_with?(validate_prefix(doi))
28
+ end
29
+
30
+ unless skip_doi
31
+ sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
32
+ raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
33
+ related_identifier = related_identifier.strip.downcase
34
+ prefix = validate_prefix(related_identifier)
35
+ registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
29
36
 
30
- # check whether this is a DataCite DOI
31
- if registration_agencies[prefix] == "DataCite"
32
- ssum
33
- else
34
- ssum << { "id" => SecureRandom.uuid,
35
- "message_action" => "create",
36
- "subj_id" => pid,
37
- "obj_id" => normalize_doi(related_identifier),
38
- "relation_type_id" => raw_relation_type.underscore,
39
- "source_id" => "datacite",
40
- "occurred_at" => item.fetch("minted") }
37
+ # check whether this is a DataCite DOI
38
+ if registration_agencies[prefix] == "DataCite"
39
+ ssum
40
+ else
41
+ ssum << { "id" => SecureRandom.uuid,
42
+ "message_action" => "create",
43
+ "subj_id" => pid,
44
+ "obj_id" => normalize_doi(related_identifier),
45
+ "relation_type_id" => raw_relation_type.underscore,
46
+ "source_id" => "datacite",
47
+ "occurred_at" => item.fetch("minted") }
48
+ end
41
49
  end
42
50
  end
43
51
 
@@ -45,17 +53,6 @@ module Toccatore
45
53
  end
46
54
  end
47
55
 
48
- # push to Event Data API if no error and we have collected works
49
- def push_data(items, options={})
50
- if items.empty?
51
- puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
52
- elsif options[:access_token].blank?
53
- puts "An error occured: Access token missing."
54
- else
55
- Array(items).each { |item| push_item(item, options) }
56
- end
57
- end
58
-
59
56
  def push_item(item, options={})
60
57
  return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
61
58
 
@@ -43,17 +43,6 @@ module Toccatore
43
43
  end
44
44
  end
45
45
 
46
- # push to Volpino API if no error and we have collected works
47
- def push_data(items, options={})
48
- if items.empty?
49
- puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
50
- elsif options[:access_token].blank?
51
- puts "An error occured: Access token missing."
52
- else
53
- Array(items).each { |item| push_item(item, options) }
54
- end
55
- end
56
-
57
46
  def push_item(item, options={})
58
47
  return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
59
48
 
@@ -1,3 +1,3 @@
1
1
  module Toccatore
2
- VERSION = "0.3"
2
+ VERSION = "0.3.1"
3
3
  end
data/spec/cli_spec.rb CHANGED
@@ -8,7 +8,7 @@ describe Toccatore::CLI do
8
8
 
9
9
  describe "version" do
10
10
  it 'has version' do
11
- expect { subject.__print_version }.to output("0.3\n").to_stdout
11
+ expect { subject.__print_version }.to output("0.3.1\n").to_stdout
12
12
  end
13
13
  end
14
14
 
@@ -32,10 +32,10 @@ describe Toccatore::CLI do
32
32
  expect { subject.orcid_update }.to output(/DOI 10.5438\/6423 for ORCID ID 0000-0001-5331-6592 pushed to Profiles service.\n/).to_stdout
33
33
  end
34
34
 
35
- it 'should delete' do
36
- subject.options = cli_options.merge(doi: "10.6084/M9.FIGSHARE.4126869.V1", from_date: "2013-01-01", until_date: "2017-12-31", claim_action: "delete")
37
- expect { subject.orcid_update }.to output(/Delete DOI 10.6084\/M9.FIGSHARE.4126869.V1 for ORCID ID 0000-0003-1013-1533 pushed to Profiles service.\n/).to_stdout
38
- end
35
+ # it 'should delete' do
36
+ # subject.options = cli_options.merge(doi: "10.6084/M9.FIGSHARE.4126869.V1", from_date: "2013-01-01", until_date: "2017-12-31", claim_action: "delete")
37
+ # expect { subject.orcid_update }.to output(/Delete DOI 10.6084\/M9.FIGSHARE.4126869.V1 for ORCID ID 0000-0003-1013-1533 pushed to Profiles service.\n/).to_stdout
38
+ # end
39
39
 
40
40
  it 'should query by ORCID ID' do
41
41
  subject.options = cli_options.merge(orcid: "0000-0002-3546-1048", from_date: "2013-01-01", until_date: "2017-12-31")
@@ -105,6 +105,12 @@ describe Toccatore::DataciteRelated, vcr: true do
105
105
  expect(response.last.except("id")).to eq("message_action" => "create", "subj_id"=>"https://doi.org/10.17180/obs.yzeron", "obj_id"=>"https://doi.org/10.1016/j.jhydrol.2013.09.055", "relation_type_id"=>"is_referenced_by", "source_id"=>"datacite", "occurred_at"=>"2015-04-07T12:22:40Z")
106
106
  end
107
107
 
108
+ it "should report if there are works ignored because of an IsIdenticalTo relation" do
109
+ body = File.read(fixture_path + 'datacite_related_is_identical.json')
110
+ result = OpenStruct.new(body: { "data" => JSON.parse(body) })
111
+ expect(subject.parse_data(result)).to eq([])
112
+ end
113
+
108
114
  it "should catch timeout errors with the Datacite Metadata Search API" do
109
115
  result = OpenStruct.new(body: { "errors" => [{ "title" => "the server responded with status 408 for https://search.datacite.org", "status" => 408 }] })
110
116
  response = subject.parse_data(result)
@@ -0,0 +1,20 @@
1
+ {
2
+ "responseHeader": {
3
+ "status": 0,
4
+ "QTime": 0
5
+ },
6
+ "response": {
7
+ "numFound": 1,
8
+ "start": 0,
9
+ "docs": [{
10
+ "minted": "2016-07-29T18:02:34Z",
11
+ "updated": "2017-03-09T18:31:17Z",
12
+ "doi": "10.6084/M9.FIGSHARE.3505442.V1",
13
+ "relatedIdentifier": [
14
+ "IsSupplementTo:DOI:10.1080/07391102.2016.1189358",
15
+ "IsIdenticalTo:DOI:10.6084/m9.figshare.3505442"
16
+ ],
17
+ "resourceTypeGeneral": "Other"
18
+ }]
19
+ }
20
+ }
@@ -23,7 +23,7 @@ http_interactions:
23
23
  Content-Type:
24
24
  - application/json;charset=UTF-8
25
25
  Date:
26
- - Sat, 11 Mar 2017 10:47:29 GMT
26
+ - Sat, 11 Mar 2017 12:15:48 GMT
27
27
  Server:
28
28
  - openresty/1.11.2.2
29
29
  Connection:
@@ -34,7 +34,7 @@ http_interactions:
34
34
 
35
35
  '
36
36
  http_version:
37
- recorded_at: Sat, 11 Mar 2017 10:47:29 GMT
37
+ recorded_at: Sat, 11 Mar 2017 12:15:48 GMT
38
38
  - request:
39
39
  method: get
40
40
  uri: https://search.datacite.org/api?fl=doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated&fq=updated:%5B2013-01-01T00:00:00Z%20TO%202017-12-31T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=doi:10.6084/M9.FIGSHARE.4126869.V1&rows=1000&start=0&wt=json
@@ -58,75 +58,16 @@ http_interactions:
58
58
  Content-Type:
59
59
  - application/json;charset=UTF-8
60
60
  Date:
61
- - Sat, 11 Mar 2017 10:47:29 GMT
61
+ - Sat, 11 Mar 2017 12:15:48 GMT
62
62
  Server:
63
63
  - openresty/1.11.2.2
64
64
  Connection:
65
65
  - keep-alive
66
66
  body:
67
67
  encoding: UTF-8
68
- string: '{"responseHeader":{"status":0,"QTime":0},"response":{"numFound":1,"start":0,"docs":[{"minted":"2016-10-28T21:43:49Z","updated":"2016-10-28T21:44:42Z","doi":"10.6084/M9.FIGSHARE.4126869.V1","resourceTypeGeneral":"Image","relatedIdentifier":["IsIdenticalTo:DOI:10.6084/m9.figshare.4126869"],"nameIdentifier":["ORCID:0000-0003-1013-1533"]}]}}
68
+ string: '{"responseHeader":{"status":0,"QTime":1},"response":{"numFound":1,"start":0,"docs":[{"minted":"2016-10-28T21:43:49Z","updated":"2016-10-28T21:44:42Z","doi":"10.6084/M9.FIGSHARE.4126869.V1","resourceTypeGeneral":"Image","relatedIdentifier":["IsIdenticalTo:DOI:10.6084/m9.figshare.4126869"],"nameIdentifier":["ORCID:0000-0003-1013-1533"]}]}}
69
69
 
70
70
  '
71
71
  http_version:
72
- recorded_at: Sat, 11 Mar 2017 10:47:29 GMT
73
- - request:
74
- method: post
75
- uri: https://profiles.test.datacite.org/api/claims
76
- body:
77
- encoding: UTF-8
78
- string: '{"claim":{"orcid":"0000-0003-1013-1533","doi":"10.6084/M9.FIGSHARE.4126869.V1","source_id":"orcid_update","claim_action":"delete"}}'
79
- headers:
80
- User-Agent:
81
- - Maremma - https://github.com/datacite/maremma
82
- Content-Type:
83
- - application/json
84
- Accept:
85
- - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5
86
- Authorization:
87
- - Token token=<VOLPINO_TOKEN>
88
- response:
89
- status:
90
- code: 202
91
- message: ''
92
- headers:
93
- Access-Control-Allow-Headers:
94
- - Origin, Content-Type, Accept, Authorization, Token
95
- Access-Control-Allow-Methods:
96
- - GET, POST, OPTIONS
97
- Access-Control-Allow-Origin:
98
- - "*"
99
- Access-Control-Max-Age:
100
- - '1728000'
101
- Cache-Control:
102
- - no-cache
103
- Content-Type:
104
- - application/json; charset=utf-8
105
- Date:
106
- - Sat, 11 Mar 2017 10:47:29 GMT
107
- Server:
108
- - openresty/1.11.2.2
109
- Status:
110
- - 202 Accepted
111
- Vary:
112
- - Accept-Encoding
113
- X-Content-Type-Options:
114
- - nosniff
115
- X-Frame-Options:
116
- - SAMEORIGIN
117
- X-Powered-By:
118
- - Phusion Passenger 5.1.2
119
- X-Request-Id:
120
- - dac42b2e-a5e3-485a-977c-c41c21f32e8c
121
- X-Runtime:
122
- - '0.011878'
123
- X-Xss-Protection:
124
- - 1; mode=block
125
- Connection:
126
- - keep-alive
127
- body:
128
- encoding: UTF-8
129
- string: '{"data":{"id":"df624600-24c0-4d97-a85e-ca8fe165fe40","type":"claims","attributes":{"orcid":"0000-0003-1013-1533","doi":"10.6084/M9.FIGSHARE.4126869.V1","source-id":"orcid_update","state":"waiting","claim-action":"delete","claimed-at":null}}}'
130
- http_version:
131
- recorded_at: Sat, 11 Mar 2017 10:47:29 GMT
72
+ recorded_at: Sat, 11 Mar 2017 12:15:48 GMT
132
73
  recorded_with: VCR 3.0.3
@@ -23,18 +23,18 @@ http_interactions:
23
23
  Content-Type:
24
24
  - application/json;charset=UTF-8
25
25
  Date:
26
- - Sat, 11 Mar 2017 10:47:46 GMT
26
+ - Sat, 11 Mar 2017 11:13:05 GMT
27
27
  Server:
28
28
  - openresty/1.11.2.2
29
29
  Connection:
30
30
  - keep-alive
31
31
  body:
32
32
  encoding: UTF-8
33
- string: '{"responseHeader":{"status":0,"QTime":1},"response":{"numFound":55,"start":0,"docs":[]}}
33
+ string: '{"responseHeader":{"status":0,"QTime":0},"response":{"numFound":55,"start":0,"docs":[]}}
34
34
 
35
35
  '
36
36
  http_version:
37
- recorded_at: Sat, 11 Mar 2017 10:47:46 GMT
37
+ recorded_at: Sat, 11 Mar 2017 11:13:05 GMT
38
38
  - request:
39
39
  method: get
40
40
  uri: https://search.datacite.org/api?fl=doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated&fq=updated:%5B2015-04-07T00:00:00Z%20TO%202015-04-08T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=nameIdentifier:ORCID%5C:*&rows=1000&start=0&wt=json
@@ -58,7 +58,7 @@ http_interactions:
58
58
  Content-Type:
59
59
  - application/json;charset=UTF-8
60
60
  Date:
61
- - Sat, 11 Mar 2017 10:47:46 GMT
61
+ - Sat, 11 Mar 2017 11:13:05 GMT
62
62
  Server:
63
63
  - openresty/1.11.2.2
64
64
  Connection:
@@ -69,5 +69,5 @@ http_interactions:
69
69
 
70
70
  '
71
71
  http_version:
72
- recorded_at: Sat, 11 Mar 2017 10:47:46 GMT
72
+ recorded_at: Sat, 11 Mar 2017 11:13:05 GMT
73
73
  recorded_with: VCR 3.0.3