maltese 0.9.5 → 0.9.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Dockerfile +5 -5
- data/Gemfile.lock +49 -51
- data/lib/maltese/cli.rb +12 -0
- data/lib/maltese/datafile.rb +12 -0
- data/lib/maltese/sitemap.rb +13 -15
- data/lib/maltese/version.rb +1 -1
- data/maltese.gemspec +3 -3
- data/spec/cli_spec.rb +1 -1
- data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +43 -94
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +13 -11
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml +13 -11
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml +15 -70
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_internal_server_errors_with_the_Datacite_REST_API.yml +59 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml +15 -70
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_retry_2_times_for_bad_request_errors_with_the_Datacite_REST_API.yml +42 -41
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml +15 -70
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +15 -15
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +43 -94
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/send_notification_to_slack/send_error.yml +60 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/send_notification_to_slack/send_info.yml +60 -0
- data/spec/sitemap_spec.rb +36 -8
- data/spec/spec_helper.rb +2 -0
- metadata +11 -9
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_bad_request_errors_with_the_Datacite_REST_API.yml +0 -59
@@ -0,0 +1,60 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: post
|
5
|
+
uri: "<SLACK_WEBHOOK_URL>"
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: payload=%7B%22username%22%3A%22Fabrica%22%2C%22icon_url%22%3A%22https%3A%2F%2Fraw.githubusercontent.com%2Fdatacite%2Fhomepage%2Fmaster%2Fsource%2Fimages%2Ffabrica.png%22%2C%22attachments%22%3A%5B%7B%22title%22%3A%22DataCite+Fabrica+Stage%3A+Sitemaps+Updated%22%2C%22color%22%3A%22good%22%2C%22fields%22%3A%5B%7B%22title%22%3A%22URL%22%2C%22value%22%3A%22https%3A%2F%2Fcommons.stage.datacite.org%2Fsitemaps%2Fsitemap.xml.gz%22%7D%2C%7B%22title%22%3A%22Number+of+DOIs%22%2C%22value%22%3A%22271%2C605%22%2C%22short%22%3Atrue%7D%2C%7B%22title%22%3A%22Number+of+Sitemaps%22%2C%22value%22%3A%226%22%2C%22short%22%3Atrue%7D%2C%7B%22title%22%3A%22Time+Taken%22%2C%22value%22%3A%2233+min%22%2C%22short%22%3Atrue%7D%5D%7D%5D%7D
|
9
|
+
headers:
|
10
|
+
Accept-Encoding:
|
11
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
12
|
+
Accept:
|
13
|
+
- "*/*"
|
14
|
+
User-Agent:
|
15
|
+
- Ruby
|
16
|
+
Content-Type:
|
17
|
+
- application/x-www-form-urlencoded
|
18
|
+
response:
|
19
|
+
status:
|
20
|
+
code: 200
|
21
|
+
message: OK
|
22
|
+
headers:
|
23
|
+
Date:
|
24
|
+
- Sun, 01 Nov 2020 07:44:02 GMT
|
25
|
+
Server:
|
26
|
+
- Apache
|
27
|
+
Strict-Transport-Security:
|
28
|
+
- max-age=31536000; includeSubDomains; preload
|
29
|
+
X-Slack-Backend:
|
30
|
+
- r
|
31
|
+
Access-Control-Allow-Origin:
|
32
|
+
- "*"
|
33
|
+
X-Frame-Options:
|
34
|
+
- SAMEORIGIN
|
35
|
+
Vary:
|
36
|
+
- Accept-Encoding
|
37
|
+
Referrer-Policy:
|
38
|
+
- no-referrer
|
39
|
+
Content-Length:
|
40
|
+
- '22'
|
41
|
+
Content-Type:
|
42
|
+
- text/html
|
43
|
+
X-Envoy-Upstream-Service-Time:
|
44
|
+
- '38'
|
45
|
+
X-Backend:
|
46
|
+
- main_normal main_canary_with_overflow main_control_with_overflow
|
47
|
+
X-Server:
|
48
|
+
- 10.128.214.120:80
|
49
|
+
X-Via:
|
50
|
+
- envoy-www-iad-1mri, haproxy-edge-fra-44lk
|
51
|
+
X-Slack-Shared-Secret-Outcome:
|
52
|
+
- shared-secret
|
53
|
+
Via:
|
54
|
+
- envoy-www-iad-1mri
|
55
|
+
body:
|
56
|
+
encoding: ASCII-8BIT
|
57
|
+
string: ok
|
58
|
+
http_version:
|
59
|
+
recorded_at: Sun, 01 Nov 2020 07:44:02 GMT
|
60
|
+
recorded_with: VCR 3.0.3
|
data/spec/sitemap_spec.rb
CHANGED
@@ -3,32 +3,32 @@ require 'spec_helper'
|
|
3
3
|
describe Maltese::Sitemap, vcr: true do
|
4
4
|
subject { Maltese::Sitemap.new(rack_env: "test") }
|
5
5
|
|
6
|
-
let(:doi) { "10.
|
6
|
+
let(:doi) { "10.25601/2f56-s909" }
|
7
7
|
|
8
8
|
context "get_query_url" do
|
9
9
|
it "default" do
|
10
|
-
expect(subject.get_query_url).to eq("https://api.
|
10
|
+
expect(subject.get_query_url).to eq("https://api.stage.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&exclude-registration-agencies=true&page%5Bscroll%5D=7m&page%5Bsize%5D=1000")
|
11
11
|
end
|
12
12
|
|
13
13
|
it "with page[size] one" do
|
14
|
-
expect(subject.get_query_url(size: 1)).to eq("https://api.
|
14
|
+
expect(subject.get_query_url(size: 1)).to eq("https://api.stage.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&exclude-registration-agencies=true&page%5Bscroll%5D=7m&page%5Bsize%5D=1")
|
15
15
|
end
|
16
16
|
|
17
17
|
it "with size" do
|
18
|
-
expect(subject.get_query_url(size: 250)).to eq("https://api.
|
18
|
+
expect(subject.get_query_url(size: 250)).to eq("https://api.stage.datacite.org/dois?fields%5Bdois%5D=doi%2Cupdated&exclude-registration-agencies=true&page%5Bscroll%5D=7m&page%5Bsize%5D=250")
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
22
|
context "get_total" do
|
23
23
|
it "with works" do
|
24
|
-
expect(subject.get_total).to eq(
|
24
|
+
expect(subject.get_total).to eq(207479)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
context "queue_jobs" do
|
29
29
|
it "should report if there are works returned by the Datacite REST API" do
|
30
30
|
response = subject.queue_jobs
|
31
|
-
expect(response).to eq(
|
31
|
+
expect(response).to eq(207479)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
@@ -48,14 +48,20 @@ describe Maltese::Sitemap, vcr: true do
|
|
48
48
|
it "should retry 2 times for bad request errors with the Datacite REST API" do
|
49
49
|
stub = stub_request(:get, subject.get_query_url).and_return({ status: [502] }, { status: [502] }, { status: [502] })
|
50
50
|
response = subject.process_data(total: 10, url: subject.get_query_url)
|
51
|
-
expect(response).to eq(
|
51
|
+
expect(response).to eq(1)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should handle internal server errors with the Datacite REST API" do
|
55
|
+
stub = stub_request(:get, subject.get_query_url).and_return({ status: [500] }, { status: [200] })
|
56
|
+
response = subject.process_data(total: 10, url: subject.get_query_url)
|
57
|
+
expect(response).to eq(1)
|
52
58
|
end
|
53
59
|
end
|
54
60
|
|
55
61
|
context "get_data" do
|
56
62
|
it "should report if there are works returned by the Datacite REST API" do
|
57
63
|
response = subject.get_data(subject.get_query_url)
|
58
|
-
expect(response.body.dig("meta", "total")).to eq(
|
64
|
+
expect(response.body.dig("meta", "total")).to eq(207479)
|
59
65
|
expect(response.body.fetch("data", []).size).to eq(1000)
|
60
66
|
doc = response.body.fetch("data", []).first
|
61
67
|
expect(doc.dig("attributes", "doi")).to eq(doi)
|
@@ -97,4 +103,26 @@ describe Maltese::Sitemap, vcr: true do
|
|
97
103
|
expect { subject.push_data }.to output(/1001 links/).to_stdout
|
98
104
|
end
|
99
105
|
end
|
106
|
+
|
107
|
+
context "send_notification_to_slack" do
|
108
|
+
it "send info" do
|
109
|
+
fields = [
|
110
|
+
{ title: "URL", value: "https://commons.stage.datacite.org/sitemaps/sitemap.xml.gz" },
|
111
|
+
{ title: "Number of DOIs", value: 271605.to_s(:delimited), short: true },
|
112
|
+
{ title: "Number of Sitemaps", value: 6.to_s(:delimited), short: true },
|
113
|
+
{ title: "Time Taken", value: "33 min", short: true }
|
114
|
+
]
|
115
|
+
expect(subject.send_notification_to_slack(nil, title: subject.slack_title + ": Sitemaps Updated", level: "good", fields: fields)).to eq("ok")
|
116
|
+
end
|
117
|
+
|
118
|
+
it "send error" do
|
119
|
+
fields = [
|
120
|
+
{ title: "Error", value: "Error: A bad gateway error occured for URL https://api.datacite.org/dois?page%5Bscroll%5D=7m&page%5Bsize%5D=1000&scroll-id=DnF1ZXJ5VGhlbkZldGNoBQAAAAACJPb5FjI5NUhiM3dqU0JleHRSWlJGTnhYaXcAAAAAAiT2-hYyOTVIYjN3alNCZXh0UlpSRk54WGl3AAAAAAIk9vsWMjk1SGIzd2pTQmV4dFJaUkZOeFhpdwAAAAACJPb8FjI5NUhiM3dqU0JleHRSWlJGTnhYaXcAAAAAAiT2_RYyOTVIYjN3alNCZXh0UlpSRk54WGl3." },
|
121
|
+
{ title: "Number of DOIs", value: 141572.to_s(:delimited), short: true },
|
122
|
+
{ title: "Number of Sitemaps", value: 3.to_s(:delimited), short: true },
|
123
|
+
{ title: "Time Taken", value: "17 min", short: true }
|
124
|
+
]
|
125
|
+
expect(subject.send_notification_to_slack(nil, title: subject.slack_title + ": Sitemaps Not Updated", level: "danger", fields: fields)).to eq("ok")
|
126
|
+
end
|
127
|
+
end
|
100
128
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -90,6 +90,8 @@ VCR.configure do |c|
|
|
90
90
|
c.configure_rspec_metadata!
|
91
91
|
c.filter_sensitive_data("<AWS_ACCESS_KEY_ID>") { ENV["AWS_ACCESS_KEY_ID"] }
|
92
92
|
c.filter_sensitive_data("<AWS_SECRET_ACCESS_KEY>") { ENV["AWS_SECRET_ACCESS_KEY"] }
|
93
|
+
c.filter_sensitive_data("<AWS_REGION>") { ENV["AWS_REGION"] }
|
93
94
|
c.filter_sensitive_data("<FOG_DIRECTORY>") { ENV['FOG_DIRECTORY'] }
|
94
95
|
c.filter_sensitive_data("<FOG_REGION>") { ENV["FOG_REGION"] }
|
96
|
+
c.filter_sensitive_data("<SLACK_WEBHOOK_URL>") { ENV["SLACK_WEBHOOK_URL"] }
|
95
97
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: maltese
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Fenner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: maremma
|
@@ -182,14 +182,14 @@ dependencies:
|
|
182
182
|
requirements:
|
183
183
|
- - "~>"
|
184
184
|
- !ruby/object:Gem::Version
|
185
|
-
version: '
|
185
|
+
version: '2.0'
|
186
186
|
type: :development
|
187
187
|
prerelease: false
|
188
188
|
version_requirements: !ruby/object:Gem::Requirement
|
189
189
|
requirements:
|
190
190
|
- - "~>"
|
191
191
|
- !ruby/object:Gem::Version
|
192
|
-
version: '
|
192
|
+
version: '2.0'
|
193
193
|
- !ruby/object:Gem::Dependency
|
194
194
|
name: rspec
|
195
195
|
requirement: !ruby/object:Gem::Requirement
|
@@ -306,7 +306,7 @@ dependencies:
|
|
306
306
|
- - "~>"
|
307
307
|
- !ruby/object:Gem::Version
|
308
308
|
version: '0.1'
|
309
|
-
description: Ruby library to generate sitemap for DataCite
|
309
|
+
description: Ruby library to generate sitemap for DataCite Commons.
|
310
310
|
email: mfenner@datacite.org
|
311
311
|
executables:
|
312
312
|
- maltese
|
@@ -327,6 +327,7 @@ files:
|
|
327
327
|
- lib/maltese.rb
|
328
328
|
- lib/maltese/.dockerignore
|
329
329
|
- lib/maltese/cli.rb
|
330
|
+
- lib/maltese/datafile.rb
|
330
331
|
- lib/maltese/sitemap.rb
|
331
332
|
- lib/maltese/version.rb
|
332
333
|
- maltese.gemspec
|
@@ -336,13 +337,15 @@ files:
|
|
336
337
|
- spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml
|
337
338
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
338
339
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml
|
339
|
-
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_bad_request_errors_with_the_Datacite_REST_API.yml
|
340
340
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml
|
341
|
+
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_internal_server_errors_with_the_Datacite_REST_API.yml
|
341
342
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml
|
342
343
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_retry_2_times_for_bad_request_errors_with_the_Datacite_REST_API.yml
|
343
344
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml
|
344
345
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
345
346
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
347
|
+
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/send_notification_to_slack/send_error.yml
|
348
|
+
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/send_notification_to_slack/send_info.yml
|
346
349
|
- spec/sitemap_spec.rb
|
347
350
|
- spec/spec_helper.rb
|
348
351
|
homepage: https://github.com/datacite/maltese
|
@@ -364,9 +367,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
364
367
|
- !ruby/object:Gem::Version
|
365
368
|
version: '0'
|
366
369
|
requirements: []
|
367
|
-
|
368
|
-
rubygems_version: 2.7.7
|
370
|
+
rubygems_version: 3.0.8
|
369
371
|
signing_key:
|
370
372
|
specification_version: 4
|
371
|
-
summary: Ruby library to generate sitemap for DataCite
|
373
|
+
summary: Ruby library to generate sitemap for DataCite Commons
|
372
374
|
test_files: []
|
@@ -1,59 +0,0 @@
|
|
1
|
-
---
|
2
|
-
http_interactions:
|
3
|
-
- request:
|
4
|
-
method: put
|
5
|
-
uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
|
6
|
-
body:
|
7
|
-
encoding: ASCII-8BIT
|
8
|
-
string: !binary |-
|
9
|
-
H4sIANy18F0AA5WSTU+EMBCG/wrp1dAP3IOQbvfmaT25Jl5rqdCktMjUhf33dgtr0HjAG8w8z8xLGH6YOpud9QDGuz1imKJMO+Vr45o9ejk95g/oIPjnYEGHLLIOqgnMHrUh9BUh4zji8R77oSEFpYy8Ph2fVas7mRsHQTqlURb5ClLx6JUMadFKBxNiq4c0ZOaALEVCcZn9g7094wlqNMf9167FqUwnG/3DbLxvrMbKd7+9PMGEYXazz6bWfqud4LXt9Ahb5Su7zt35N2M3B59pkn767PfxS2Jn64AFX0+Y2tDZv86DlWVJUhelexLceiWuHEQQtBxUi4OGgGsZpIobksbJFeNWQuh8LQrKypwVOWMnWlbFrtrt7iitKI3cgnDVStfo90F/CGlHeQFOViXeD8YPJlxETM3J9xsnKRSZT118AY2t+lEZAwAA
|
10
|
-
headers:
|
11
|
-
Content-Type:
|
12
|
-
- application/x-gzip
|
13
|
-
Accept-Encoding:
|
14
|
-
- ''
|
15
|
-
User-Agent:
|
16
|
-
- aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
|
17
|
-
X-Amz-Acl:
|
18
|
-
- public-read
|
19
|
-
Cache-Control:
|
20
|
-
- private, max-age=0, no-cache
|
21
|
-
Expect:
|
22
|
-
- 100-continue
|
23
|
-
Content-Md5:
|
24
|
-
- lxwQjxbjrf0xGMXvjTOX9g==
|
25
|
-
X-Amz-Date:
|
26
|
-
- 20191211T092444Z
|
27
|
-
X-Amz-Content-Sha256:
|
28
|
-
- 30bde17d18b8abe11ac80301be4511cf1e773a7c993caf77a08f9d216615e7b7
|
29
|
-
Authorization:
|
30
|
-
- AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
|
31
|
-
SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
|
32
|
-
Signature=99527ec52cae5635e3928767cf9743f88757fc727cf3be085e99071c3959856b
|
33
|
-
Content-Length:
|
34
|
-
- '333'
|
35
|
-
Accept:
|
36
|
-
- "*/*"
|
37
|
-
response:
|
38
|
-
status:
|
39
|
-
code: 200
|
40
|
-
message: OK
|
41
|
-
headers:
|
42
|
-
X-Amz-Id-2:
|
43
|
-
- 9SEmB+KZX4UNyualN/z8RPkrjhNZQOrxEREvMwCzXGB35F6gZty/R+QADhxlFJxORz9cYmp9jcs=
|
44
|
-
X-Amz-Request-Id:
|
45
|
-
- 2ED234E38358C809
|
46
|
-
Date:
|
47
|
-
- Wed, 11 Dec 2019 09:24:48 GMT
|
48
|
-
Etag:
|
49
|
-
- '"971c108f16e3adfd3118c5ef8d3397f6"'
|
50
|
-
Content-Length:
|
51
|
-
- '0'
|
52
|
-
Server:
|
53
|
-
- AmazonS3
|
54
|
-
body:
|
55
|
-
encoding: UTF-8
|
56
|
-
string: ''
|
57
|
-
http_version:
|
58
|
-
recorded_at: Wed, 11 Dec 2019 09:24:48 GMT
|
59
|
-
recorded_with: VCR 3.0.3
|