toccatore 0.3.9 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile.lock +31 -18
  3. data/README.md +1 -1
  4. data/lib/toccatore.rb +2 -0
  5. data/lib/toccatore/cli.rb +12 -0
  6. data/lib/toccatore/queue.rb +50 -0
  7. data/lib/toccatore/usage_update.rb +170 -0
  8. data/lib/toccatore/version.rb +1 -1
  9. data/spec/cli_spec.rb +34 -1
  10. data/spec/fixtures/event_data_resp_1 +2 -0
  11. data/spec/fixtures/event_data_resp_2 +4 -0
  12. data/spec/fixtures/usage_event.json +1 -0
  13. data/spec/fixtures/usage_event_fail.json +17 -0
  14. data/spec/fixtures/usage_events.json +63 -0
  15. data/spec/fixtures/usage_update.json +101 -0
  16. data/spec/fixtures/usage_update_1.json +36738 -0
  17. data/spec/fixtures/usage_update_2.json +171 -0
  18. data/spec/fixtures/usage_update_3.json +176 -0
  19. data/spec/fixtures/usage_update_4.json +101 -0
  20. data/spec/fixtures/usage_update_nil.json +6 -0
  21. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/usage_update/no_reports_in_the_queue/should_succeed_with_no_works.yml +150 -0
  22. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/usage_update/should_fail.yml +150 -0
  23. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/usage_update/should_succeed_with_no_works.yml +150 -0
  24. data/spec/fixtures/vcr_cassettes/Toccatore_UsageUpdate/get_data/when_there_are_messages/should_return_the_data_for_one_message.yml +52 -0
  25. data/spec/fixtures/vcr_cassettes/Toccatore_UsageUpdate/get_data/when_there_is_ONE_message/should_return_the_data_for_one_message.yml +52 -0
  26. data/spec/fixtures/vcr_cassettes/Toccatore_UsageUpdate/push_data/should_fail_if_format_of_the_event_is_wrong.yml +199 -0
  27. data/spec/fixtures/vcr_cassettes/Toccatore_UsageUpdate/push_data/should_work_with_DataCite_Event_Data.yml +199 -0
  28. data/spec/queque_spec.rb +61 -0
  29. data/spec/spec_helper.rb +14 -0
  30. data/spec/usage_update_spec.rb +156 -0
  31. data/toccatore.gemspec +3 -1
  32. metadata +43 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e3f403a9be0abc0a29474ba5afe14da96d2fac0e
4
- data.tar.gz: 7d518eaf6d26315f8291b7c71e92a66c0f2a6cae
2
+ SHA256:
3
+ metadata.gz: b4f98467d1df1b0ce5226b887a5cd1d5b9e3fb551bfc631e9857fca366ee7d29
4
+ data.tar.gz: fd8c901be0ee18936711f73e959e19cee8e9ecdf114df8b6372af83e2e8922e5
5
5
  SHA512:
6
- metadata.gz: bfa83ed964ebd3fdb4cfb6bbf6ecec63477c1d1f3917cecb1040840584468af911add82a09f8c6d079ab814caa8091e3441489dd22ecfab2b67ef24bb98679db
7
- data.tar.gz: b8dd0cdd1975f5a0bd1508e6e8711d4e6e1538a46a8cd4b1fad0ec6a2837bb2c61ba54595f370a7c5d25b1153a5b36b707f7e559dd0b9ebf954db32f77518dc2
6
+ metadata.gz: 5e3f4a9db657604571d0be6f45222e8bae2a7b7c6c5e0945cf7aeb6aec0099d0caaa0f210665d2fa7c9f88a8dc74e5c9e4f4e2326b29df0332d93fd3d93e51e7
7
+ data.tar.gz: f81581346057dbe1e0d0e25c37bdfa2be5cf4ba00c59f5da36cb606a0806085c7a6b47724939e5fb032a93aa153090cf17b6c55d426e8480b61ebb5a5e99bebe
data/Gemfile.lock CHANGED
@@ -1,33 +1,44 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- toccatore (0.3.9)
4
+ toccatore (0.4.0)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
+ aws-sdk-sqs
6
7
  dotenv (~> 2.1, >= 2.1.1)
7
8
  gender_detector (~> 1.0)
8
9
  maremma (~> 3.5)
9
10
  namae (~> 0.11.0)
10
- slack-notifier (~> 2.1)
11
+ slack-notifier (= 2.2.2)
11
12
  thor (~> 0.19)
12
13
 
13
14
  GEM
14
15
  remote: https://rubygems.org/
15
16
  specs:
16
- activesupport (4.2.9)
17
+ activesupport (4.2.10)
17
18
  i18n (~> 0.7)
18
19
  minitest (~> 5.1)
19
20
  thread_safe (~> 0.3, >= 0.3.4)
20
21
  tzinfo (~> 1.1)
21
22
  addressable (2.5.1)
22
23
  public_suffix (~> 2.0, >= 2.0.2)
24
+ aws-partitions (1.84.0)
25
+ aws-sdk-core (3.20.2)
26
+ aws-partitions (~> 1.0)
27
+ aws-sigv4 (~> 1.0)
28
+ jmespath (~> 1.0)
29
+ aws-sdk-sqs (1.3.0)
30
+ aws-sdk-core (~> 3)
31
+ aws-sigv4 (~> 1.0)
32
+ aws-sigv4 (1.0.2)
23
33
  builder (3.2.3)
24
34
  codeclimate-test-reporter (1.0.8)
25
35
  simplecov (<= 0.13)
36
+ concurrent-ruby (1.0.5)
26
37
  crack (0.4.3)
27
38
  safe_yaml (~> 1.0.0)
28
39
  diff-lcs (1.3)
29
40
  docile (1.1.5)
30
- dotenv (2.2.1)
41
+ dotenv (2.4.0)
31
42
  excon (0.45.4)
32
43
  faraday (0.9.2)
33
44
  multipart-post (>= 1.2, < 3)
@@ -37,10 +48,12 @@ GEM
37
48
  faraday (>= 0.7.4, < 1.0)
38
49
  gender_detector (1.0.0)
39
50
  hashdiff (0.3.4)
40
- i18n (0.8.4)
51
+ i18n (0.9.5)
52
+ concurrent-ruby (~> 1.0)
53
+ jmespath (1.4.0)
41
54
  json (2.1.0)
42
- maremma (3.5.8)
43
- activesupport (>= 4.2.5)
55
+ maremma (3.6.2)
56
+ activesupport (>= 4.2.5, < 6)
44
57
  addressable (>= 2.3.6)
45
58
  builder (~> 3.2, >= 3.2.2)
46
59
  excon (~> 0.45.0)
@@ -48,16 +61,16 @@ GEM
48
61
  faraday-encoding (~> 0.0.1)
49
62
  faraday_middleware (~> 0.10.0)
50
63
  multi_json (~> 1.12)
51
- nokogiri (~> 1.6, >= 1.6.8)
52
- oj (~> 2.18, >= 2.18.1)
53
- mini_portile2 (2.2.0)
54
- minitest (5.10.2)
55
- multi_json (1.12.1)
64
+ nokogiri (~> 1.8.1)
65
+ oj (>= 2.8.3)
66
+ mini_portile2 (2.3.0)
67
+ minitest (5.11.3)
68
+ multi_json (1.13.1)
56
69
  multipart-post (2.0.0)
57
70
  namae (0.11.3)
58
- nokogiri (1.8.0)
59
- mini_portile2 (~> 2.2.0)
60
- oj (2.18.5)
71
+ nokogiri (1.8.2)
72
+ mini_portile2 (~> 2.3.0)
73
+ oj (3.6.0)
61
74
  public_suffix (2.0.5)
62
75
  rack (2.0.3)
63
76
  rack-test (0.6.3)
@@ -83,9 +96,9 @@ GEM
83
96
  simplecov-html (~> 0.10.0)
84
97
  simplecov-html (0.10.1)
85
98
  slack-notifier (2.2.2)
86
- thor (0.19.4)
99
+ thor (0.20.0)
87
100
  thread_safe (0.3.6)
88
- tzinfo (1.2.3)
101
+ tzinfo (1.2.5)
89
102
  thread_safe (~> 0.1)
90
103
  vcr (3.0.3)
91
104
  webmock (1.24.6)
@@ -108,4 +121,4 @@ DEPENDENCIES
108
121
  webmock (~> 1.22, >= 1.22.3)
109
122
 
110
123
  BUNDLED WITH
111
- 1.12.5
124
+ 1.16.1
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Code Climate](https://codeclimate.com/github/datacite/toccatore/badges/gpa.svg)](https://codeclimate.com/github/datacite/toccatore)
5
5
  [![Test Coverage](https://codeclimate.com/github/datacite/toccatore/badges/coverage.svg)](https://codeclimate.com/github/datacite/toccatore/coverage)
6
6
 
7
- Agent for Event Data service, providing links to ORCID IDs and DOIs not from DataCite as related identifiers in DataCite metadata.
7
+ Agent for Event Data service. Extracts links to ORCID IDs and DOIs not from DataCite from DataCite metadata, and pushes them to the other services.
8
8
 
9
9
  ## Installation and use
10
10
 
data/lib/toccatore.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  require "toccatore/orcid_update"
2
2
  require "toccatore/datacite_related"
3
+ require "toccatore/queue"
4
+ require "toccatore/usage_update"
3
5
  require "toccatore/cli"
4
6
  require "toccatore/version"
data/lib/toccatore/cli.rb CHANGED
@@ -46,5 +46,17 @@ module Toccatore
46
46
  datacite_related = Toccatore::DataciteRelated.new
47
47
  datacite_related.queue_jobs(datacite_related.unfreeze(options))
48
48
  end
49
+
50
+ desc "usage_update", "push DataCite DOIs usage from DLM Hub to Event Data"
51
+ method_option :access_token, type: :string, required: true
52
+ method_option :source_token, type: :string, required: true
53
+ method_option :push_url, type: :string
54
+ method_option :slack_webhook_url, type: :string
55
+ method_option :doi, type: :string
56
+ method_option :jsonapi, :type => :boolean, :force => true
57
+ def usage_update
58
+ usage_update = Toccatore::UsageUpdate.new
59
+ usage_update.queue_jobs(usage_update.unfreeze(options))
60
+ end
49
61
  end
50
62
  end
@@ -0,0 +1,50 @@
1
+ require 'aws-sdk-sqs'
2
+
3
+ module Toccatore
4
+ module Queue
5
+
6
+ def queue options={}
7
+ Aws::SQS::Client.new(region: ENV['AWS_REGION'].to_s, stub_responses: false)
8
+ end
9
+
10
+ def get_total options={}
11
+ req = @sqs.get_queue_attributes(
12
+ {
13
+ queue_url: queue_url, attribute_names:
14
+ [
15
+ 'ApproximateNumberOfMessages',
16
+ 'ApproximateNumberOfMessagesNotVisible'
17
+ ]
18
+ }
19
+ )
20
+
21
+ msgs_available = req.attributes['ApproximateNumberOfMessages']
22
+ msgs_in_flight = req.attributes['ApproximateNumberOfMessagesNotVisible']
23
+ msgs_available.to_i
24
+ end
25
+
26
+ def get_message options={}
27
+ @sqs.receive_message(queue_url: queue_url, max_number_of_messages: 1, wait_time_seconds: 1)
28
+ end
29
+
30
+ def delete_message options={}
31
+ reponse = @sqs.delete_message({
32
+ queue_url: queue_url,
33
+ receipt_handle: options.messages[0][:receipt_handle]
34
+ })
35
+ if reponse.successful?
36
+ puts "Message #{options.messages[0][:receipt_handle]} deleted"
37
+ 0
38
+ else
39
+ puts "Could NOT delete Message #{options.messages[0][:receipt_handle]}"
40
+ 1
41
+ end
42
+
43
+ end
44
+
45
+ def queue_url options={}
46
+ queue_name = queue_name ||= "#{ENV['ENVIROMENT']}_usage"
47
+ @sqs.get_queue_url(queue_name: queue_name).queue_url
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,170 @@
1
+ require_relative 'base'
2
+
3
+
4
+ module Toccatore
5
+ class UsageUpdate < Base
6
+ include Toccatore::Queue
7
+ LICENSE = "https://creativecommons.org/publicdomain/zero/1.0/"
8
+
9
+
10
+ def initialize options={}
11
+ @sqs = queue options
12
+ end
13
+
14
+ def queue_jobs(options={})
15
+
16
+ total = get_total(options)
17
+
18
+ if total < 1
19
+ text = "No works found for in the Usage Reports Queue."
20
+ end
21
+
22
+ while total > 0
23
+ # walk through paginated results
24
+ total_pages = (total.to_f / job_batch_size).ceil
25
+ error_total = 0
26
+
27
+ (0...total_pages).each do |page|
28
+ options[:offset] = page * job_batch_size
29
+ options[:total] = total
30
+ error_total += process_data(options)
31
+ end
32
+ text = "#{total} works processed with #{error_total} errors for Usage Reports Queue"
33
+ end
34
+
35
+ puts text
36
+ # send slack notification
37
+ options[:level] = total > 0 ? "good" : "warning"
38
+ options[:title] = "Report for #{source_id}"
39
+ send_notification_to_slack(text, options) if options[:slack_webhook_url].present?
40
+
41
+ # return number of works queued
42
+ total
43
+ end
44
+
45
+ def process_data(options = {})
46
+ message = get_message
47
+ data = get_data(message)
48
+ data = parse_data(data, options)
49
+
50
+ return [OpenStruct.new(body: { "data" => [] })] if data.empty?
51
+
52
+ push_data(data, options)
53
+ delete_message message
54
+ end
55
+
56
+ def get_data reponse
57
+ return OpenStruct.new(body: { "errors" => "Queue is empty" }) if reponse.messages.empty?
58
+
59
+ body = JSON.parse(reponse.messages[0].body)
60
+ Maremma.get(body["report_id"])
61
+ end
62
+
63
+
64
+ # method returns number of errors
65
+ def push_data(items, options={})
66
+ if items.empty?
67
+ puts "No works found in the Queue."
68
+ 0
69
+ elsif options[:access_token].blank?
70
+ puts "An error occured: Access token missing."
71
+ options[:total]
72
+ else
73
+ error_total = 0
74
+ Array(items).each do |item|
75
+ error_total += push_item(item, options)
76
+ end
77
+ error_total
78
+ end
79
+ end
80
+
81
+ def metrics_url
82
+ ENV['SASHIMI_URL']
83
+ end
84
+
85
+ def source_id
86
+ "usage_update"
87
+ end
88
+
89
+ def format_event type, data, options
90
+ { "id" => SecureRandom.uuid,
91
+ "message-action" => "add",
92
+ "subj-id" => data[:report_id],
93
+ "subj"=> {
94
+ "pid"=> data[:report_id],
95
+ "issued"=> data[:created]
96
+ },
97
+ "total"=> data[:count],
98
+ "obj-id" => data[:pid],
99
+ "relation-type-id" => type,
100
+ "source-id" => "datacite-usage",
101
+ "source-token" => options[:source_token],
102
+ "occurred-at" => data[:created_at],
103
+ "license" => LICENSE
104
+ }
105
+ end
106
+
107
+
108
+ def parse_data(result, options={})
109
+ return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
110
+
111
+ items = result.body.dig("data","report","report-datasets")
112
+ header = result.body.dig("data","report","report-header")
113
+ report_id = metrics_url + "/" + result.body.dig("data","report","id")
114
+
115
+ created = header.fetch("created")
116
+ Array.wrap(items).reduce([]) do |x, item|
117
+ data = {}
118
+ data[:doi] = item.dig("dataset-id").first.dig("value")
119
+ data[:pid] = normalize_doi(data[:doi])
120
+ data[:created] = created
121
+ data[:report_id] = report_id
122
+ data[:created_at] = created
123
+
124
+ instances = item.dig("performance", 0, "instance")
125
+
126
+ return x += [OpenStruct.new(body: { "errors" => "There are too many instances. There can only be 4" })] if instances.size > 8
127
+
128
+ x += Array.wrap(instances).reduce([]) do |ssum, instance|
129
+ data[:count] = instance.dig("count")
130
+ event_type = "#{instance.dig("metric-type")}-#{instance.dig("access-method")}"
131
+ ssum << format_event(event_type, data, options)
132
+ ssum
133
+ end
134
+ end
135
+ end
136
+
137
+ def push_item(item, options={})
138
+ return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
139
+
140
+ host = options[:push_url].presence || "https://api.test.datacite.org"
141
+ push_url = host + "/events"
142
+
143
+ if options[:jsonapi]
144
+ data = { "data" => {
145
+ "id" => item["id"],
146
+ "type" => "events",
147
+ "attributes" => item.except("id") }}
148
+ response = Maremma.post(push_url, data: data.to_json,
149
+ bearer: options[:access_token],
150
+ content_type: 'json',
151
+ host: host)
152
+ else
153
+ response = Maremma.post(push_url, data: item.to_json,
154
+ bearer: options[:access_token],
155
+ content_type: 'json',
156
+ host: host)
157
+ end
158
+
159
+ # return 0 if successful, 1 if error
160
+ if response.status == 201
161
+ puts "#{item['subj-id']} #{item['relation-type-id']} #{item['obj-id']} pushed to Event Data service."
162
+ 0
163
+ elsif response.body["errors"].present?
164
+ puts "#{item['subj-id']} #{item['relation-type-id']} #{item['obj-id']} had an error:"
165
+ puts "#{response.body['errors'].first['title']}"
166
+ 1
167
+ end
168
+ end
169
+ end
170
+ end
@@ -1,3 +1,3 @@
1
1
  module Toccatore
2
- VERSION = "0.3.9"
2
+ VERSION = "0.4.0"
3
3
  end
data/spec/cli_spec.rb CHANGED
@@ -8,7 +8,7 @@ describe Toccatore::CLI do
8
8
 
9
9
  describe "version" do
10
10
  it 'has version' do
11
- expect { subject.__print_version }.to output("0.3.9\n").to_stdout
11
+ expect { subject.__print_version }.to output("0.4.0\n").to_stdout
12
12
  end
13
13
  end
14
14
 
@@ -106,4 +106,37 @@ describe Toccatore::CLI do
106
106
  expect { subject.datacite_related }.to output(/An error occured: Access token missing.\n/).to_stdout
107
107
  end
108
108
  end
109
+
110
+ describe "usage_update", vcr: true, :order => :defined do
111
+ let(:push_url) { ENV['LAGOTTINO_URL'] }
112
+ let(:access_token) { ENV['LAGOTTO_TOKEN'] }
113
+ let(:source_token) { ENV['SOURCE_TOKEN'] }
114
+ let(:slack_webhook_url) { ENV['SLACK_WEBHOOK_URL'] }
115
+ let(:cli_options) { { push_url: push_url,
116
+ slack_webhook_url: slack_webhook_url,
117
+ access_token: access_token,
118
+ source_token: source_token } }
119
+
120
+
121
+ context "no reports in the queue" do
122
+ it 'should succeed with no works' do
123
+ subject.options = { push_url: push_url,
124
+ slack_webhook_url: slack_webhook_url,
125
+ access_token: access_token}
126
+ expect { subject.usage_update }.to output("No works found for in the Usage Reports Queue.\n").to_stdout
127
+ end
128
+ end
129
+
130
+ context "with reports in the queue" do
131
+ ## TO test this we need a real queue working
132
+ # it 'should succeed' do
133
+ # subject.options = cli_options
134
+ # expect { subject.usage_update }.to output(/https:\/\/doi.org\/10.5281\/zenodo.16396 is_supplement_to https:\/\/doi.org\/10.1007\/s11548-015-1180-7 pushed to Event Data service.\n/).to_stdout
135
+ # end
136
+ # it 'should fail' do
137
+ # subject.options = cli_options.except(:access_token)
138
+ # expect { subject.usage_update }.to output(/An error occured: Access token missing.\n/).to_stdout
139
+ # end
140
+ end
141
+ end
109
142
  end
@@ -0,0 +1,2 @@
1
+ https://metrics.test.datacite.org/reports/2018-3-Dash total-dataset-investigations-regular https://doi.org/10.7291/d1q94r pushed to Event Data service.
2
+ https://metrics.test.datacite.org/reports/2018-3-Dash unique-dataset-investigations-regular https://doi.org/10.7291/d1q94r pushed to Event Data service.
@@ -0,0 +1,4 @@
1
+ https://metrics.test.datacite.org/reports/2018-3-Dash total-dataset-investigations-regular https://doi.org/10.7291/d1q94r pushed to Event Data service.
2
+ https://metrics.test.datacite.org/reports/2018-3-Dash unique-dataset-investigations-regular https://doi.org/10.7291/d1q94r pushed to Event Data service.
3
+ https://metrics.test.datacite.org/reports/2018-3-Dash Total-Dataset-Requests-Machine https://doi.org/10.6071/z7wc73 pushed to Event Data service.
4
+ https://metrics.test.datacite.org/reports/2018-3-Dash Unique-Dataset-Requests-Machine https://doi.org/10.6071/z7wc73 pushed to Event Data service.
@@ -0,0 +1 @@
1
+ {"report_id":"https://metrics.test.datacite.org/reports/2018-3-DataONE"}
@@ -0,0 +1,17 @@
1
+ [{
2
+ "id": 12332423432432,
3
+ "message-action": "add",
4
+ "subj": {
5
+ "pid": "https://metrics.test.datacite.org/reports/2018-3-Dash",
6
+ "issued": "2128-04-09"
7
+ },
8
+ "total": "208",
9
+ "obj-id": "https://doi.org/10.6071/z7wc73",
10
+ "relation-type-id": "Unique-Dataset-Requests-Machine",
11
+ "source-id": "datacite",
12
+ "source-token": "28276d12-b320-41ba-9272-bb0adc3466ff",
13
+ "occurred-at": "2128-04-09",
14
+ "license": "https://creativecommons.org/publicdomain/zero/1.0/"
15
+ }
16
+ ]
17
+