toccatore 0.3.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile.lock +31 -18
  3. data/README.md +1 -1
  4. data/lib/toccatore.rb +2 -0
  5. data/lib/toccatore/cli.rb +12 -0
  6. data/lib/toccatore/queue.rb +50 -0
  7. data/lib/toccatore/usage_update.rb +170 -0
  8. data/lib/toccatore/version.rb +1 -1
  9. data/spec/cli_spec.rb +34 -1
  10. data/spec/fixtures/event_data_resp_1 +2 -0
  11. data/spec/fixtures/event_data_resp_2 +4 -0
  12. data/spec/fixtures/usage_event.json +1 -0
  13. data/spec/fixtures/usage_event_fail.json +17 -0
  14. data/spec/fixtures/usage_events.json +63 -0
  15. data/spec/fixtures/usage_update.json +101 -0
  16. data/spec/fixtures/usage_update_1.json +36738 -0
  17. data/spec/fixtures/usage_update_2.json +171 -0
  18. data/spec/fixtures/usage_update_3.json +176 -0
  19. data/spec/fixtures/usage_update_4.json +101 -0
  20. data/spec/fixtures/usage_update_nil.json +6 -0
  21. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/usage_update/no_reports_in_the_queue/should_succeed_with_no_works.yml +150 -0
  22. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/usage_update/should_fail.yml +150 -0
  23. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/usage_update/should_succeed_with_no_works.yml +150 -0
  24. data/spec/fixtures/vcr_cassettes/Toccatore_UsageUpdate/get_data/when_there_are_messages/should_return_the_data_for_one_message.yml +52 -0
  25. data/spec/fixtures/vcr_cassettes/Toccatore_UsageUpdate/get_data/when_there_is_ONE_message/should_return_the_data_for_one_message.yml +52 -0
  26. data/spec/fixtures/vcr_cassettes/Toccatore_UsageUpdate/push_data/should_fail_if_format_of_the_event_is_wrong.yml +199 -0
  27. data/spec/fixtures/vcr_cassettes/Toccatore_UsageUpdate/push_data/should_work_with_DataCite_Event_Data.yml +199 -0
  28. data/spec/queque_spec.rb +61 -0
  29. data/spec/spec_helper.rb +14 -0
  30. data/spec/usage_update_spec.rb +156 -0
  31. data/toccatore.gemspec +3 -1
  32. metadata +43 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e3f403a9be0abc0a29474ba5afe14da96d2fac0e
4
- data.tar.gz: 7d518eaf6d26315f8291b7c71e92a66c0f2a6cae
2
+ SHA256:
3
+ metadata.gz: b4f98467d1df1b0ce5226b887a5cd1d5b9e3fb551bfc631e9857fca366ee7d29
4
+ data.tar.gz: fd8c901be0ee18936711f73e959e19cee8e9ecdf114df8b6372af83e2e8922e5
5
5
  SHA512:
6
- metadata.gz: bfa83ed964ebd3fdb4cfb6bbf6ecec63477c1d1f3917cecb1040840584468af911add82a09f8c6d079ab814caa8091e3441489dd22ecfab2b67ef24bb98679db
7
- data.tar.gz: b8dd0cdd1975f5a0bd1508e6e8711d4e6e1538a46a8cd4b1fad0ec6a2837bb2c61ba54595f370a7c5d25b1153a5b36b707f7e559dd0b9ebf954db32f77518dc2
6
+ metadata.gz: 5e3f4a9db657604571d0be6f45222e8bae2a7b7c6c5e0945cf7aeb6aec0099d0caaa0f210665d2fa7c9f88a8dc74e5c9e4f4e2326b29df0332d93fd3d93e51e7
7
+ data.tar.gz: f81581346057dbe1e0d0e25c37bdfa2be5cf4ba00c59f5da36cb606a0806085c7a6b47724939e5fb032a93aa153090cf17b6c55d426e8480b61ebb5a5e99bebe
data/Gemfile.lock CHANGED
@@ -1,33 +1,44 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- toccatore (0.3.9)
4
+ toccatore (0.4.0)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
+ aws-sdk-sqs
6
7
  dotenv (~> 2.1, >= 2.1.1)
7
8
  gender_detector (~> 1.0)
8
9
  maremma (~> 3.5)
9
10
  namae (~> 0.11.0)
10
- slack-notifier (~> 2.1)
11
+ slack-notifier (= 2.2.2)
11
12
  thor (~> 0.19)
12
13
 
13
14
  GEM
14
15
  remote: https://rubygems.org/
15
16
  specs:
16
- activesupport (4.2.9)
17
+ activesupport (4.2.10)
17
18
  i18n (~> 0.7)
18
19
  minitest (~> 5.1)
19
20
  thread_safe (~> 0.3, >= 0.3.4)
20
21
  tzinfo (~> 1.1)
21
22
  addressable (2.5.1)
22
23
  public_suffix (~> 2.0, >= 2.0.2)
24
+ aws-partitions (1.84.0)
25
+ aws-sdk-core (3.20.2)
26
+ aws-partitions (~> 1.0)
27
+ aws-sigv4 (~> 1.0)
28
+ jmespath (~> 1.0)
29
+ aws-sdk-sqs (1.3.0)
30
+ aws-sdk-core (~> 3)
31
+ aws-sigv4 (~> 1.0)
32
+ aws-sigv4 (1.0.2)
23
33
  builder (3.2.3)
24
34
  codeclimate-test-reporter (1.0.8)
25
35
  simplecov (<= 0.13)
36
+ concurrent-ruby (1.0.5)
26
37
  crack (0.4.3)
27
38
  safe_yaml (~> 1.0.0)
28
39
  diff-lcs (1.3)
29
40
  docile (1.1.5)
30
- dotenv (2.2.1)
41
+ dotenv (2.4.0)
31
42
  excon (0.45.4)
32
43
  faraday (0.9.2)
33
44
  multipart-post (>= 1.2, < 3)
@@ -37,10 +48,12 @@ GEM
37
48
  faraday (>= 0.7.4, < 1.0)
38
49
  gender_detector (1.0.0)
39
50
  hashdiff (0.3.4)
40
- i18n (0.8.4)
51
+ i18n (0.9.5)
52
+ concurrent-ruby (~> 1.0)
53
+ jmespath (1.4.0)
41
54
  json (2.1.0)
42
- maremma (3.5.8)
43
- activesupport (>= 4.2.5)
55
+ maremma (3.6.2)
56
+ activesupport (>= 4.2.5, < 6)
44
57
  addressable (>= 2.3.6)
45
58
  builder (~> 3.2, >= 3.2.2)
46
59
  excon (~> 0.45.0)
@@ -48,16 +61,16 @@ GEM
48
61
  faraday-encoding (~> 0.0.1)
49
62
  faraday_middleware (~> 0.10.0)
50
63
  multi_json (~> 1.12)
51
- nokogiri (~> 1.6, >= 1.6.8)
52
- oj (~> 2.18, >= 2.18.1)
53
- mini_portile2 (2.2.0)
54
- minitest (5.10.2)
55
- multi_json (1.12.1)
64
+ nokogiri (~> 1.8.1)
65
+ oj (>= 2.8.3)
66
+ mini_portile2 (2.3.0)
67
+ minitest (5.11.3)
68
+ multi_json (1.13.1)
56
69
  multipart-post (2.0.0)
57
70
  namae (0.11.3)
58
- nokogiri (1.8.0)
59
- mini_portile2 (~> 2.2.0)
60
- oj (2.18.5)
71
+ nokogiri (1.8.2)
72
+ mini_portile2 (~> 2.3.0)
73
+ oj (3.6.0)
61
74
  public_suffix (2.0.5)
62
75
  rack (2.0.3)
63
76
  rack-test (0.6.3)
@@ -83,9 +96,9 @@ GEM
83
96
  simplecov-html (~> 0.10.0)
84
97
  simplecov-html (0.10.1)
85
98
  slack-notifier (2.2.2)
86
- thor (0.19.4)
99
+ thor (0.20.0)
87
100
  thread_safe (0.3.6)
88
- tzinfo (1.2.3)
101
+ tzinfo (1.2.5)
89
102
  thread_safe (~> 0.1)
90
103
  vcr (3.0.3)
91
104
  webmock (1.24.6)
@@ -108,4 +121,4 @@ DEPENDENCIES
108
121
  webmock (~> 1.22, >= 1.22.3)
109
122
 
110
123
  BUNDLED WITH
111
- 1.12.5
124
+ 1.16.1
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Code Climate](https://codeclimate.com/github/datacite/toccatore/badges/gpa.svg)](https://codeclimate.com/github/datacite/toccatore)
5
5
  [![Test Coverage](https://codeclimate.com/github/datacite/toccatore/badges/coverage.svg)](https://codeclimate.com/github/datacite/toccatore/coverage)
6
6
 
7
- Agent for Event Data service, providing links to ORCID IDs and DOIs not from DataCite as related identifiers in DataCite metadata.
7
+ Agent for Event Data service. Extracts links to ORCID IDs and DOIs not from DataCite from DataCite metadata, and pushes them to the other services.
8
8
 
9
9
  ## Installation and use
10
10
 
data/lib/toccatore.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  require "toccatore/orcid_update"
2
2
  require "toccatore/datacite_related"
3
+ require "toccatore/queue"
4
+ require "toccatore/usage_update"
3
5
  require "toccatore/cli"
4
6
  require "toccatore/version"
data/lib/toccatore/cli.rb CHANGED
@@ -46,5 +46,17 @@ module Toccatore
46
46
  datacite_related = Toccatore::DataciteRelated.new
47
47
  datacite_related.queue_jobs(datacite_related.unfreeze(options))
48
48
  end
49
+
50
+ desc "usage_update", "push DataCite DOIs usage from DLM Hub to Event Data"
51
+ method_option :access_token, type: :string, required: true
52
+ method_option :source_token, type: :string, required: true
53
+ method_option :push_url, type: :string
54
+ method_option :slack_webhook_url, type: :string
55
+ method_option :doi, type: :string
56
+ method_option :jsonapi, :type => :boolean, :force => true
57
+ def usage_update
58
+ usage_update = Toccatore::UsageUpdate.new
59
+ usage_update.queue_jobs(usage_update.unfreeze(options))
60
+ end
49
61
  end
50
62
  end
@@ -0,0 +1,50 @@
1
+ require 'aws-sdk-sqs'
2
+
3
+ module Toccatore
4
+ module Queue
5
+
6
+ def queue options={}
7
+ Aws::SQS::Client.new(region: ENV['AWS_REGION'].to_s, stub_responses: false)
8
+ end
9
+
10
+ def get_total options={}
11
+ req = @sqs.get_queue_attributes(
12
+ {
13
+ queue_url: queue_url, attribute_names:
14
+ [
15
+ 'ApproximateNumberOfMessages',
16
+ 'ApproximateNumberOfMessagesNotVisible'
17
+ ]
18
+ }
19
+ )
20
+
21
+ msgs_available = req.attributes['ApproximateNumberOfMessages']
22
+ msgs_in_flight = req.attributes['ApproximateNumberOfMessagesNotVisible']
23
+ msgs_available.to_i
24
+ end
25
+
26
+ def get_message options={}
27
+ @sqs.receive_message(queue_url: queue_url, max_number_of_messages: 1, wait_time_seconds: 1)
28
+ end
29
+
30
+ def delete_message options={}
31
+ reponse = @sqs.delete_message({
32
+ queue_url: queue_url,
33
+ receipt_handle: options.messages[0][:receipt_handle]
34
+ })
35
+ if reponse.successful?
36
+ puts "Message #{options.messages[0][:receipt_handle]} deleted"
37
+ 0
38
+ else
39
+ puts "Could NOT delete Message #{options.messages[0][:receipt_handle]}"
40
+ 1
41
+ end
42
+
43
+ end
44
+
45
+ def queue_url options={}
46
+ queue_name = queue_name ||= "#{ENV['ENVIROMENT']}_usage"
47
+ @sqs.get_queue_url(queue_name: queue_name).queue_url
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,170 @@
1
+ require_relative 'base'
2
+
3
+
4
+ module Toccatore
5
+ class UsageUpdate < Base
6
+ include Toccatore::Queue
7
+ LICENSE = "https://creativecommons.org/publicdomain/zero/1.0/"
8
+
9
+
10
+ def initialize options={}
11
+ @sqs = queue options
12
+ end
13
+
14
+ def queue_jobs(options={})
15
+
16
+ total = get_total(options)
17
+
18
+ if total < 1
19
+ text = "No works found for in the Usage Reports Queue."
20
+ end
21
+
22
+ while total > 0
23
+ # walk through paginated results
24
+ total_pages = (total.to_f / job_batch_size).ceil
25
+ error_total = 0
26
+
27
+ (0...total_pages).each do |page|
28
+ options[:offset] = page * job_batch_size
29
+ options[:total] = total
30
+ error_total += process_data(options)
31
+ end
32
+ text = "#{total} works processed with #{error_total} errors for Usage Reports Queue"
33
+ end
34
+
35
+ puts text
36
+ # send slack notification
37
+ options[:level] = total > 0 ? "good" : "warning"
38
+ options[:title] = "Report for #{source_id}"
39
+ send_notification_to_slack(text, options) if options[:slack_webhook_url].present?
40
+
41
+ # return number of works queued
42
+ total
43
+ end
44
+
45
+ def process_data(options = {})
46
+ message = get_message
47
+ data = get_data(message)
48
+ data = parse_data(data, options)
49
+
50
+ return [OpenStruct.new(body: { "data" => [] })] if data.empty?
51
+
52
+ push_data(data, options)
53
+ delete_message message
54
+ end
55
+
56
+ def get_data reponse
57
+ return OpenStruct.new(body: { "errors" => "Queue is empty" }) if reponse.messages.empty?
58
+
59
+ body = JSON.parse(reponse.messages[0].body)
60
+ Maremma.get(body["report_id"])
61
+ end
62
+
63
+
64
+ # method returns number of errors
65
+ def push_data(items, options={})
66
+ if items.empty?
67
+ puts "No works found in the Queue."
68
+ 0
69
+ elsif options[:access_token].blank?
70
+ puts "An error occured: Access token missing."
71
+ options[:total]
72
+ else
73
+ error_total = 0
74
+ Array(items).each do |item|
75
+ error_total += push_item(item, options)
76
+ end
77
+ error_total
78
+ end
79
+ end
80
+
81
+ def metrics_url
82
+ ENV['SASHIMI_URL']
83
+ end
84
+
85
+ def source_id
86
+ "usage_update"
87
+ end
88
+
89
+ def format_event type, data, options
90
+ { "id" => SecureRandom.uuid,
91
+ "message-action" => "add",
92
+ "subj-id" => data[:report_id],
93
+ "subj"=> {
94
+ "pid"=> data[:report_id],
95
+ "issued"=> data[:created]
96
+ },
97
+ "total"=> data[:count],
98
+ "obj-id" => data[:pid],
99
+ "relation-type-id" => type,
100
+ "source-id" => "datacite-usage",
101
+ "source-token" => options[:source_token],
102
+ "occurred-at" => data[:created_at],
103
+ "license" => LICENSE
104
+ }
105
+ end
106
+
107
+
108
+ def parse_data(result, options={})
109
+ return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
110
+
111
+ items = result.body.dig("data","report","report-datasets")
112
+ header = result.body.dig("data","report","report-header")
113
+ report_id = metrics_url + "/" + result.body.dig("data","report","id")
114
+
115
+ created = header.fetch("created")
116
+ Array.wrap(items).reduce([]) do |x, item|
117
+ data = {}
118
+ data[:doi] = item.dig("dataset-id").first.dig("value")
119
+ data[:pid] = normalize_doi(data[:doi])
120
+ data[:created] = created
121
+ data[:report_id] = report_id
122
+ data[:created_at] = created
123
+
124
+ instances = item.dig("performance", 0, "instance")
125
+
126
+ return x += [OpenStruct.new(body: { "errors" => "There are too many instances. There can only be 4" })] if instances.size > 8
127
+
128
+ x += Array.wrap(instances).reduce([]) do |ssum, instance|
129
+ data[:count] = instance.dig("count")
130
+ event_type = "#{instance.dig("metric-type")}-#{instance.dig("access-method")}"
131
+ ssum << format_event(event_type, data, options)
132
+ ssum
133
+ end
134
+ end
135
+ end
136
+
137
+ def push_item(item, options={})
138
+ return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
139
+
140
+ host = options[:push_url].presence || "https://api.test.datacite.org"
141
+ push_url = host + "/events"
142
+
143
+ if options[:jsonapi]
144
+ data = { "data" => {
145
+ "id" => item["id"],
146
+ "type" => "events",
147
+ "attributes" => item.except("id") }}
148
+ response = Maremma.post(push_url, data: data.to_json,
149
+ bearer: options[:access_token],
150
+ content_type: 'json',
151
+ host: host)
152
+ else
153
+ response = Maremma.post(push_url, data: item.to_json,
154
+ bearer: options[:access_token],
155
+ content_type: 'json',
156
+ host: host)
157
+ end
158
+
159
+ # return 0 if successful, 1 if error
160
+ if response.status == 201
161
+ puts "#{item['subj-id']} #{item['relation-type-id']} #{item['obj-id']} pushed to Event Data service."
162
+ 0
163
+ elsif response.body["errors"].present?
164
+ puts "#{item['subj-id']} #{item['relation-type-id']} #{item['obj-id']} had an error:"
165
+ puts "#{response.body['errors'].first['title']}"
166
+ 1
167
+ end
168
+ end
169
+ end
170
+ end
@@ -1,3 +1,3 @@
1
1
  module Toccatore
2
- VERSION = "0.3.9"
2
+ VERSION = "0.4.0"
3
3
  end
data/spec/cli_spec.rb CHANGED
@@ -8,7 +8,7 @@ describe Toccatore::CLI do
8
8
 
9
9
  describe "version" do
10
10
  it 'has version' do
11
- expect { subject.__print_version }.to output("0.3.9\n").to_stdout
11
+ expect { subject.__print_version }.to output("0.4.0\n").to_stdout
12
12
  end
13
13
  end
14
14
 
@@ -106,4 +106,37 @@ describe Toccatore::CLI do
106
106
  expect { subject.datacite_related }.to output(/An error occured: Access token missing.\n/).to_stdout
107
107
  end
108
108
  end
109
+
110
+ describe "usage_update", vcr: true, :order => :defined do
111
+ let(:push_url) { ENV['LAGOTTINO_URL'] }
112
+ let(:access_token) { ENV['LAGOTTO_TOKEN'] }
113
+ let(:source_token) { ENV['SOURCE_TOKEN'] }
114
+ let(:slack_webhook_url) { ENV['SLACK_WEBHOOK_URL'] }
115
+ let(:cli_options) { { push_url: push_url,
116
+ slack_webhook_url: slack_webhook_url,
117
+ access_token: access_token,
118
+ source_token: source_token } }
119
+
120
+
121
+ context "no reports in the queue" do
122
+ it 'should succeed with no works' do
123
+ subject.options = { push_url: push_url,
124
+ slack_webhook_url: slack_webhook_url,
125
+ access_token: access_token}
126
+ expect { subject.usage_update }.to output("No works found for in the Usage Reports Queue.\n").to_stdout
127
+ end
128
+ end
129
+
130
+ context "with reports in the queue" do
131
+ ## TO test this we need a real queue working
132
+ # it 'should succeed' do
133
+ # subject.options = cli_options
134
+ # expect { subject.usage_update }.to output(/https:\/\/doi.org\/10.5281\/zenodo.16396 is_supplement_to https:\/\/doi.org\/10.1007\/s11548-015-1180-7 pushed to Event Data service.\n/).to_stdout
135
+ # end
136
+ # it 'should fail' do
137
+ # subject.options = cli_options.except(:access_token)
138
+ # expect { subject.usage_update }.to output(/An error occured: Access token missing.\n/).to_stdout
139
+ # end
140
+ end
141
+ end
109
142
  end
@@ -0,0 +1,2 @@
1
+ https://metrics.test.datacite.org/reports/2018-3-Dash total-dataset-investigations-regular https://doi.org/10.7291/d1q94r pushed to Event Data service.
2
+ https://metrics.test.datacite.org/reports/2018-3-Dash unique-dataset-investigations-regular https://doi.org/10.7291/d1q94r pushed to Event Data service.
@@ -0,0 +1,4 @@
1
+ https://metrics.test.datacite.org/reports/2018-3-Dash total-dataset-investigations-regular https://doi.org/10.7291/d1q94r pushed to Event Data service.
2
+ https://metrics.test.datacite.org/reports/2018-3-Dash unique-dataset-investigations-regular https://doi.org/10.7291/d1q94r pushed to Event Data service.
3
+ https://metrics.test.datacite.org/reports/2018-3-Dash Total-Dataset-Requests-Machine https://doi.org/10.6071/z7wc73 pushed to Event Data service.
4
+ https://metrics.test.datacite.org/reports/2018-3-Dash Unique-Dataset-Requests-Machine https://doi.org/10.6071/z7wc73 pushed to Event Data service.
@@ -0,0 +1 @@
1
+ {"report_id":"https://metrics.test.datacite.org/reports/2018-3-DataONE"}
@@ -0,0 +1,17 @@
1
+ [{
2
+ "id": 12332423432432,
3
+ "message-action": "add",
4
+ "subj": {
5
+ "pid": "https://metrics.test.datacite.org/reports/2018-3-Dash",
6
+ "issued": "2128-04-09"
7
+ },
8
+ "total": "208",
9
+ "obj-id": "https://doi.org/10.6071/z7wc73",
10
+ "relation-type-id": "Unique-Dataset-Requests-Machine",
11
+ "source-id": "datacite",
12
+ "source-token": "28276d12-b320-41ba-9272-bb0adc3466ff",
13
+ "occurred-at": "2128-04-09",
14
+ "license": "https://creativecommons.org/publicdomain/zero/1.0/"
15
+ }
16
+ ]
17
+