maltese 0.9.5 → 0.9.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. checksums.yaml +4 -4
  2. data/Dockerfile +5 -5
  3. data/Gemfile.lock +49 -51
  4. data/lib/maltese/cli.rb +12 -0
  5. data/lib/maltese/datafile.rb +12 -0
  6. data/lib/maltese/sitemap.rb +13 -15
  7. data/lib/maltese/version.rb +1 -1
  8. data/maltese.gemspec +3 -3
  9. data/spec/cli_spec.rb +1 -1
  10. data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +43 -94
  11. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +13 -11
  12. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml +13 -11
  13. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml +15 -70
  14. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_internal_server_errors_with_the_Datacite_REST_API.yml +59 -0
  15. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml +15 -70
  16. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_retry_2_times_for_bad_request_errors_with_the_Datacite_REST_API.yml +42 -41
  17. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml +15 -70
  18. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +15 -15
  19. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +43 -94
  20. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/send_notification_to_slack/send_error.yml +60 -0
  21. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/send_notification_to_slack/send_info.yml +60 -0
  22. data/spec/sitemap_spec.rb +36 -8
  23. data/spec/spec_helper.rb +2 -0
  24. metadata +11 -9
  25. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_bad_request_errors_with_the_Datacite_REST_API.yml +0 -59
@@ -0,0 +1,60 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: "<SLACK_WEBHOOK_URL>"
6
+ body:
7
+ encoding: US-ASCII
8
+ string: payload=%7B%22username%22%3A%22Fabrica%22%2C%22icon_url%22%3A%22https%3A%2F%2Fraw.githubusercontent.com%2Fdatacite%2Fhomepage%2Fmaster%2Fsource%2Fimages%2Ffabrica.png%22%2C%22attachments%22%3A%5B%7B%22title%22%3A%22DataCite+Fabrica+Stage%3A+Sitemaps+Updated%22%2C%22color%22%3A%22good%22%2C%22fields%22%3A%5B%7B%22title%22%3A%22URL%22%2C%22value%22%3A%22https%3A%2F%2Fcommons.stage.datacite.org%2Fsitemaps%2Fsitemap.xml.gz%22%7D%2C%7B%22title%22%3A%22Number+of+DOIs%22%2C%22value%22%3A%22271%2C605%22%2C%22short%22%3Atrue%7D%2C%7B%22title%22%3A%22Number+of+Sitemaps%22%2C%22value%22%3A%226%22%2C%22short%22%3Atrue%7D%2C%7B%22title%22%3A%22Time+Taken%22%2C%22value%22%3A%2233+min%22%2C%22short%22%3Atrue%7D%5D%7D%5D%7D
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ Content-Type:
17
+ - application/x-www-form-urlencoded
18
+ response:
19
+ status:
20
+ code: 200
21
+ message: OK
22
+ headers:
23
+ Date:
24
+ - Sun, 01 Nov 2020 07:44:02 GMT
25
+ Server:
26
+ - Apache
27
+ Strict-Transport-Security:
28
+ - max-age=31536000; includeSubDomains; preload
29
+ X-Slack-Backend:
30
+ - r
31
+ Access-Control-Allow-Origin:
32
+ - "*"
33
+ X-Frame-Options:
34
+ - SAMEORIGIN
35
+ Vary:
36
+ - Accept-Encoding
37
+ Referrer-Policy:
38
+ - no-referrer
39
+ Content-Length:
40
+ - '22'
41
+ Content-Type:
42
+ - text/html
43
+ X-Envoy-Upstream-Service-Time:
44
+ - '38'
45
+ X-Backend:
46
+ - main_normal main_canary_with_overflow main_control_with_overflow
47
+ X-Server:
48
+ - 10.128.214.120:80
49
+ X-Via:
50
+ - envoy-www-iad-1mri, haproxy-edge-fra-44lk
51
+ X-Slack-Shared-Secret-Outcome:
52
+ - shared-secret
53
+ Via:
54
+ - envoy-www-iad-1mri
55
+ body:
56
+ encoding: ASCII-8BIT
57
+ string: ok
58
+ http_version:
59
+ recorded_at: Sun, 01 Nov 2020 07:44:02 GMT
60
+ recorded_with: VCR 3.0.3
@@ -3,32 +3,32 @@ require 'spec_helper'
3
3
  describe Maltese::Sitemap, vcr: true do
4
4
  subject { Maltese::Sitemap.new(rack_env: "test") }
5
5
 
6
- let(:doi) { "10.3280/ses2013-001015en" }
6
+ let(:doi) { "10.25601/2f56-s909" }
7
7
 
8
8
  context "get_query_url" do
9
9
  it "default" do
10
- expect(subject.get_query_url).to eq("https://api.test.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&page%5Bscroll%5D=7m&page%5Bsize%5D=1000")
10
+ expect(subject.get_query_url).to eq("https://api.stage.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&exclude-registration-agencies=true&page%5Bscroll%5D=7m&page%5Bsize%5D=1000")
11
11
  end
12
12
 
13
13
  it "with page[size] one" do
14
- expect(subject.get_query_url(size: 1)).to eq("https://api.test.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&page%5Bscroll%5D=7m&page%5Bsize%5D=1")
14
+ expect(subject.get_query_url(size: 1)).to eq("https://api.stage.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&exclude-registration-agencies=true&page%5Bscroll%5D=7m&page%5Bsize%5D=1")
15
15
  end
16
16
 
17
17
  it "with size" do
18
- expect(subject.get_query_url(size: 250)).to eq("https://api.test.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&page%5Bscroll%5D=7m&page%5Bsize%5D=250")
18
+ expect(subject.get_query_url(size: 250)).to eq("https://api.stage.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&exclude-registration-agencies=true&page%5Bscroll%5D=7m&page%5Bsize%5D=250")
19
19
  end
20
20
  end
21
21
 
22
22
  context "get_total" do
23
23
  it "with works" do
24
- expect(subject.get_total).to eq(846853)
24
+ expect(subject.get_total).to eq(207479)
25
25
  end
26
26
  end
27
27
 
28
28
  context "queue_jobs" do
29
29
  it "should report if there are works returned by the Datacite REST API" do
30
30
  response = subject.queue_jobs
31
- expect(response).to eq(895955)
31
+ expect(response).to eq(207479)
32
32
  end
33
33
  end
34
34
 
@@ -48,14 +48,20 @@ describe Maltese::Sitemap, vcr: true do
48
48
  it "should retry 2 times for bad request errors with the Datacite REST API" do
49
49
  stub = stub_request(:get, subject.get_query_url).and_return({ status: [502] }, { status: [502] }, { status: [502] })
50
50
  response = subject.process_data(total: 10, url: subject.get_query_url)
51
- expect(response).to eq(0)
51
+ expect(response).to eq(1)
52
+ end
53
+
54
+ it "should handle internal server errors with the Datacite REST API" do
55
+ stub = stub_request(:get, subject.get_query_url).and_return({ status: [500] }, { status: [200] })
56
+ response = subject.process_data(total: 10, url: subject.get_query_url)
57
+ expect(response).to eq(1)
52
58
  end
53
59
  end
54
60
 
55
61
  context "get_data" do
56
62
  it "should report if there are works returned by the Datacite REST API" do
57
63
  response = subject.get_data(subject.get_query_url)
58
- expect(response.body.dig("meta", "total")).to eq(846839)
64
+ expect(response.body.dig("meta", "total")).to eq(207479)
59
65
  expect(response.body.fetch("data", []).size).to eq(1000)
60
66
  doc = response.body.fetch("data", []).first
61
67
  expect(doc.dig("attributes", "doi")).to eq(doi)
@@ -97,4 +103,26 @@ describe Maltese::Sitemap, vcr: true do
97
103
  expect { subject.push_data }.to output(/1001 links/).to_stdout
98
104
  end
99
105
  end
106
+
107
+ context "send_notification_to_slack" do
108
+ it "send info" do
109
+ fields = [
110
+ { title: "URL", value: "https://commons.stage.datacite.org/sitemaps/sitemap.xml.gz" },
111
+ { title: "Number of DOIs", value: 271605.to_s(:delimited), short: true },
112
+ { title: "Number of Sitemaps", value: 6.to_s(:delimited), short: true },
113
+ { title: "Time Taken", value: "33 min", short: true }
114
+ ]
115
+ expect(subject.send_notification_to_slack(nil, title: subject.slack_title + ": Sitemaps Updated", level: "good", fields: fields)).to eq("ok")
116
+ end
117
+
118
+ it "send error" do
119
+ fields = [
120
+ { title: "Error", value: "Error: A bad gateway error occured for URL https://api.datacite.org/dois?page%5Bscroll%5D=7m&page%5Bsize%5D=1000&scroll-id=DnF1ZXJ5VGhlbkZldGNoBQAAAAACJPb5FjI5NUhiM3dqU0JleHRSWlJGTnhYaXcAAAAAAiT2-hYyOTVIYjN3alNCZXh0UlpSRk54WGl3AAAAAAIk9vsWMjk1SGIzd2pTQmV4dFJaUkZOeFhpdwAAAAACJPb8FjI5NUhiM3dqU0JleHRSWlJGTnhYaXcAAAAAAiT2_RYyOTVIYjN3alNCZXh0UlpSRk54WGl3." },
121
+ { title: "Number of DOIs", value: 141572.to_s(:delimited), short: true },
122
+ { title: "Number of Sitemaps", value: 3.to_s(:delimited), short: true },
123
+ { title: "Time Taken", value: "17 min", short: true }
124
+ ]
125
+ expect(subject.send_notification_to_slack(nil, title: subject.slack_title + ": Sitemaps Not Updated", level: "danger", fields: fields)).to eq("ok")
126
+ end
127
+ end
100
128
  end
@@ -90,6 +90,8 @@ VCR.configure do |c|
90
90
  c.configure_rspec_metadata!
91
91
  c.filter_sensitive_data("<AWS_ACCESS_KEY_ID>") { ENV["AWS_ACCESS_KEY_ID"] }
92
92
  c.filter_sensitive_data("<AWS_SECRET_ACCESS_KEY>") { ENV["AWS_SECRET_ACCESS_KEY"] }
93
+ c.filter_sensitive_data("<AWS_REGION>") { ENV["AWS_REGION"] }
93
94
  c.filter_sensitive_data("<FOG_DIRECTORY>") { ENV['FOG_DIRECTORY'] }
94
95
  c.filter_sensitive_data("<FOG_REGION>") { ENV["FOG_REGION"] }
96
+ c.filter_sensitive_data("<SLACK_WEBHOOK_URL>") { ENV["SLACK_WEBHOOK_URL"] }
95
97
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maltese
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.5
4
+ version: 0.9.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-12 00:00:00.000000000 Z
11
+ date: 2020-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: maremma
@@ -182,14 +182,14 @@ dependencies:
182
182
  requirements:
183
183
  - - "~>"
184
184
  - !ruby/object:Gem::Version
185
- version: '1.0'
185
+ version: '2.0'
186
186
  type: :development
187
187
  prerelease: false
188
188
  version_requirements: !ruby/object:Gem::Requirement
189
189
  requirements:
190
190
  - - "~>"
191
191
  - !ruby/object:Gem::Version
192
- version: '1.0'
192
+ version: '2.0'
193
193
  - !ruby/object:Gem::Dependency
194
194
  name: rspec
195
195
  requirement: !ruby/object:Gem::Requirement
@@ -306,7 +306,7 @@ dependencies:
306
306
  - - "~>"
307
307
  - !ruby/object:Gem::Version
308
308
  version: '0.1'
309
- description: Ruby library to generate sitemap for DataCite Search.
309
+ description: Ruby library to generate sitemap for DataCite Commons.
310
310
  email: mfenner@datacite.org
311
311
  executables:
312
312
  - maltese
@@ -327,6 +327,7 @@ files:
327
327
  - lib/maltese.rb
328
328
  - lib/maltese/.dockerignore
329
329
  - lib/maltese/cli.rb
330
+ - lib/maltese/datafile.rb
330
331
  - lib/maltese/sitemap.rb
331
332
  - lib/maltese/version.rb
332
333
  - maltese.gemspec
@@ -336,13 +337,15 @@ files:
336
337
  - spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml
337
338
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
338
339
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml
339
- - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_bad_request_errors_with_the_Datacite_REST_API.yml
340
340
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml
341
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_internal_server_errors_with_the_Datacite_REST_API.yml
341
342
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml
342
343
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_retry_2_times_for_bad_request_errors_with_the_Datacite_REST_API.yml
343
344
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml
344
345
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
345
346
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
347
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/send_notification_to_slack/send_error.yml
348
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/send_notification_to_slack/send_info.yml
346
349
  - spec/sitemap_spec.rb
347
350
  - spec/spec_helper.rb
348
351
  homepage: https://github.com/datacite/maltese
@@ -364,9 +367,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
364
367
  - !ruby/object:Gem::Version
365
368
  version: '0'
366
369
  requirements: []
367
- rubyforge_project:
368
- rubygems_version: 2.7.7
370
+ rubygems_version: 3.0.8
369
371
  signing_key:
370
372
  specification_version: 4
371
- summary: Ruby library to generate sitemap for DataCite Search
373
+ summary: Ruby library to generate sitemap for DataCite Commons
372
374
  test_files: []
@@ -1,59 +0,0 @@
1
- ---
2
- http_interactions:
3
- - request:
4
- method: put
5
- uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
6
- body:
7
- encoding: ASCII-8BIT
8
- string: !binary |-
9
- H4sIANy18F0AA5WSTU+EMBCG/wrp1dAP3IOQbvfmaT25Jl5rqdCktMjUhf33dgtr0HjAG8w8z8xLGH6YOpud9QDGuz1imKJMO+Vr45o9ejk95g/oIPjnYEGHLLIOqgnMHrUh9BUh4zji8R77oSEFpYy8Ph2fVas7mRsHQTqlURb5ClLx6JUMadFKBxNiq4c0ZOaALEVCcZn9g7094wlqNMf9167FqUwnG/3DbLxvrMbKd7+9PMGEYXazz6bWfqud4LXt9Ahb5Su7zt35N2M3B59pkn767PfxS2Jn64AFX0+Y2tDZv86DlWVJUhelexLceiWuHEQQtBxUi4OGgGsZpIobksbJFeNWQuh8LQrKypwVOWMnWlbFrtrt7iitKI3cgnDVStfo90F/CGlHeQFOViXeD8YPJlxETM3J9xsnKRSZT118AY2t+lEZAwAA
10
- headers:
11
- Content-Type:
12
- - application/x-gzip
13
- Accept-Encoding:
14
- - ''
15
- User-Agent:
16
- - aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
17
- X-Amz-Acl:
18
- - public-read
19
- Cache-Control:
20
- - private, max-age=0, no-cache
21
- Expect:
22
- - 100-continue
23
- Content-Md5:
24
- - lxwQjxbjrf0xGMXvjTOX9g==
25
- X-Amz-Date:
26
- - 20191211T092444Z
27
- X-Amz-Content-Sha256:
28
- - 30bde17d18b8abe11ac80301be4511cf1e773a7c993caf77a08f9d216615e7b7
29
- Authorization:
30
- - AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
31
- SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
32
- Signature=99527ec52cae5635e3928767cf9743f88757fc727cf3be085e99071c3959856b
33
- Content-Length:
34
- - '333'
35
- Accept:
36
- - "*/*"
37
- response:
38
- status:
39
- code: 200
40
- message: OK
41
- headers:
42
- X-Amz-Id-2:
43
- - 9SEmB+KZX4UNyualN/z8RPkrjhNZQOrxEREvMwCzXGB35F6gZty/R+QADhxlFJxORz9cYmp9jcs=
44
- X-Amz-Request-Id:
45
- - 2ED234E38358C809
46
- Date:
47
- - Wed, 11 Dec 2019 09:24:48 GMT
48
- Etag:
49
- - '"971c108f16e3adfd3118c5ef8d3397f6"'
50
- Content-Length:
51
- - '0'
52
- Server:
53
- - AmazonS3
54
- body:
55
- encoding: UTF-8
56
- string: ''
57
- http_version:
58
- recorded_at: Wed, 11 Dec 2019 09:24:48 GMT
59
- recorded_with: VCR 3.0.3