maltese 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Dockerfile +1 -1
- data/Gemfile.lock +3 -1
- data/lib/maltese/sitemap.rb +30 -13
- data/lib/maltese/version.rb +1 -1
- data/maltese.gemspec +1 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_bad_request_errors_with_the_Datacite_REST_API.yml +59 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml +59 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml +59 -0
- data/spec/sitemap_spec.rb +14 -2
- metadata +18 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b25d7aed1fa4b41a642a8bf8449e067c67fa28861cf319c54d24b4ffccbcd6d
|
4
|
+
data.tar.gz: af043d259c92a902d5a42c281a0746fc2cf65f356ebb2bec161cc707f89f9b3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1399ede091a478151153b5071b7881ae1bedd9068ac017ab34c1cfcf1931559a607f0f2ccba83b1018b3a251898e4498d8557267905478c7dd48e322b288722
|
7
|
+
data.tar.gz: 50cd6f3a1ee8a4b0c508805720efcc58db31208dd7842668b6785cf0684bd03059d2872113e002d4c89c5e501fb2924f6323ff24caa3db5227c86517fbbef7c3
|
data/Dockerfile
CHANGED
@@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
|
|
11
11
|
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
12
12
|
|
13
13
|
# Install maltese gem
|
14
|
-
RUN /sbin/setuser app gem install maltese -v 0.9.
|
14
|
+
RUN /sbin/setuser app gem install maltese -v 0.9.2
|
15
15
|
|
16
16
|
CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
maltese (0.9.
|
4
|
+
maltese (0.9.2)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
aws-sdk-s3 (~> 1.19)
|
7
7
|
dotenv (~> 2.1, >= 2.1.1)
|
@@ -9,6 +9,7 @@ PATH
|
|
9
9
|
logstash-logger (~> 0.26.1)
|
10
10
|
maremma (~> 4.1)
|
11
11
|
mime-types (~> 3.1)
|
12
|
+
retriable (~> 3.1)
|
12
13
|
sitemap_generator (~> 6.0)
|
13
14
|
thor (~> 0.19)
|
14
15
|
|
@@ -88,6 +89,7 @@ GEM
|
|
88
89
|
rack-test (0.8.3)
|
89
90
|
rack (>= 1.0, < 3)
|
90
91
|
rake (12.3.3)
|
92
|
+
retriable (3.1.2)
|
91
93
|
rspec (3.9.0)
|
92
94
|
rspec-core (~> 3.9.0)
|
93
95
|
rspec-expectations (~> 3.9.0)
|
data/lib/maltese/sitemap.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
require 'logstash-logger'
|
2
|
+
require 'retriable'
|
2
3
|
|
3
4
|
module Maltese
|
5
|
+
class ::BadGatewayError < StandardError; end
|
6
|
+
|
4
7
|
class Sitemap
|
5
8
|
attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region, :logger
|
6
9
|
|
@@ -105,21 +108,35 @@ module Maltese
|
|
105
108
|
|
106
109
|
# walk through paginated results
|
107
110
|
while options[:url] do
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
111
|
+
begin
|
112
|
+
response = nil
|
113
|
+
|
114
|
+
# retry on temporal errors (status codes 408 and 502)
|
115
|
+
Retriable.retriable(base_interval: 10, multiplier: 2) do
|
116
|
+
response = get_data(options[:url])
|
117
|
+
|
118
|
+
raise Timeout::Error, "A timeout error occured for URL #{options[:url]}." if response.status == 408
|
119
|
+
raise BadGatewayError, "A bad gateway error occured for URL #{options[:url]}." if response.status == 502
|
120
|
+
end
|
121
|
+
|
122
|
+
if response.status == 200
|
123
|
+
link_count = parse_data(response)
|
124
|
+
logger.info "#{link_count} DOIs parsed."
|
125
|
+
options[:url] = response.body.dig("links", "next")
|
126
|
+
else
|
127
|
+
logger.error "An error occured for URL #{options[:url]}."
|
128
|
+
logger.error "Error: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
|
129
|
+
error_count += 1
|
130
|
+
options[:url] = nil
|
131
|
+
end
|
132
|
+
rescue => exception
|
133
|
+
logger.error "Error: #{exception.message}."
|
117
134
|
error_count += 1
|
118
135
|
options[:url] = nil
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
136
|
+
ensure
|
137
|
+
# don't loop when testing
|
138
|
+
break if rack_env == "test"
|
139
|
+
end
|
123
140
|
end
|
124
141
|
|
125
142
|
return link_count if error_count > 0
|
data/lib/maltese/version.rb
CHANGED
data/maltese.gemspec
CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_dependency 'activesupport', '>= 4.2.5', '< 6'
|
22
22
|
s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
|
23
23
|
s.add_dependency 'thor', '~> 0.19'
|
24
|
+
s.add_dependency 'retriable', '~> 3.1'
|
24
25
|
s.add_dependency 'sitemap_generator', '~> 6.0'
|
25
26
|
s.add_dependency 'aws-sdk-s3', '~> 1.19'
|
26
27
|
s.add_dependency 'mime-types', '~> 3.1'
|
@@ -0,0 +1,59 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: put
|
5
|
+
uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
|
6
|
+
body:
|
7
|
+
encoding: ASCII-8BIT
|
8
|
+
string: !binary |-
|
9
|
+
H4sIANy18F0AA5WSTU+EMBCG/wrp1dAP3IOQbvfmaT25Jl5rqdCktMjUhf33dgtr0HjAG8w8z8xLGH6YOpud9QDGuz1imKJMO+Vr45o9ejk95g/oIPjnYEGHLLIOqgnMHrUh9BUh4zji8R77oSEFpYy8Ph2fVas7mRsHQTqlURb5ClLx6JUMadFKBxNiq4c0ZOaALEVCcZn9g7094wlqNMf9167FqUwnG/3DbLxvrMbKd7+9PMGEYXazz6bWfqud4LXt9Ahb5Su7zt35N2M3B59pkn767PfxS2Jn64AFX0+Y2tDZv86DlWVJUhelexLceiWuHEQQtBxUi4OGgGsZpIobksbJFeNWQuh8LQrKypwVOWMnWlbFrtrt7iitKI3cgnDVStfo90F/CGlHeQFOViXeD8YPJlxETM3J9xsnKRSZT118AY2t+lEZAwAA
|
10
|
+
headers:
|
11
|
+
Content-Type:
|
12
|
+
- application/x-gzip
|
13
|
+
Accept-Encoding:
|
14
|
+
- ''
|
15
|
+
User-Agent:
|
16
|
+
- aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
|
17
|
+
X-Amz-Acl:
|
18
|
+
- public-read
|
19
|
+
Cache-Control:
|
20
|
+
- private, max-age=0, no-cache
|
21
|
+
Expect:
|
22
|
+
- 100-continue
|
23
|
+
Content-Md5:
|
24
|
+
- lxwQjxbjrf0xGMXvjTOX9g==
|
25
|
+
X-Amz-Date:
|
26
|
+
- 20191211T092444Z
|
27
|
+
X-Amz-Content-Sha256:
|
28
|
+
- 30bde17d18b8abe11ac80301be4511cf1e773a7c993caf77a08f9d216615e7b7
|
29
|
+
Authorization:
|
30
|
+
- AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
|
31
|
+
SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
|
32
|
+
Signature=99527ec52cae5635e3928767cf9743f88757fc727cf3be085e99071c3959856b
|
33
|
+
Content-Length:
|
34
|
+
- '333'
|
35
|
+
Accept:
|
36
|
+
- "*/*"
|
37
|
+
response:
|
38
|
+
status:
|
39
|
+
code: 200
|
40
|
+
message: OK
|
41
|
+
headers:
|
42
|
+
X-Amz-Id-2:
|
43
|
+
- 9SEmB+KZX4UNyualN/z8RPkrjhNZQOrxEREvMwCzXGB35F6gZty/R+QADhxlFJxORz9cYmp9jcs=
|
44
|
+
X-Amz-Request-Id:
|
45
|
+
- 2ED234E38358C809
|
46
|
+
Date:
|
47
|
+
- Wed, 11 Dec 2019 09:24:48 GMT
|
48
|
+
Etag:
|
49
|
+
- '"971c108f16e3adfd3118c5ef8d3397f6"'
|
50
|
+
Content-Length:
|
51
|
+
- '0'
|
52
|
+
Server:
|
53
|
+
- AmazonS3
|
54
|
+
body:
|
55
|
+
encoding: UTF-8
|
56
|
+
string: ''
|
57
|
+
http_version:
|
58
|
+
recorded_at: Wed, 11 Dec 2019 09:24:48 GMT
|
59
|
+
recorded_with: VCR 3.0.3
|
@@ -0,0 +1,59 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: put
|
5
|
+
uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
|
6
|
+
body:
|
7
|
+
encoding: ASCII-8BIT
|
8
|
+
string: !binary |-
|
9
|
+
H4sIACe28F0AA5WSTU+EMBCG/wrp1dAPzCZCut2bp/XkmnitpUKT0iJTF/bf2y2sQeMBbzDzPDMvYfhh6mx21gMY7/aIYYoy7ZSvjWv26OX0mD+gg+CfgwUdssg6qCYwe9SG0FeEjOOIx3vsh4YUlDLy+nR8Vq3uZG4cBOmURlnkK0jFo1cypEUrHUyIrR7SkJkDshQJxWX2D/b2jCeo0Rz3X7sWpzKdbPQPs/G+sRor3/328gQThtnNPpta+612gte20yNsla/sOnfn34zdHHymSfrps9/HL4mdrQMWfD1hakNn/zoPVpYlSV2U7klw65W4chBB0HJQLQ4aAq5lkCpuSBonV4xbCaHztSgoK3NW5IydaFkVu2pX3lFaURq5BeGqla7R74P+ENKO8gKcrEq8H4wfTLiImJqT7zdOUigyn7r4AuvlHP0ZAwAA
|
10
|
+
headers:
|
11
|
+
Content-Type:
|
12
|
+
- application/x-gzip
|
13
|
+
Accept-Encoding:
|
14
|
+
- ''
|
15
|
+
User-Agent:
|
16
|
+
- aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
|
17
|
+
X-Amz-Acl:
|
18
|
+
- public-read
|
19
|
+
Cache-Control:
|
20
|
+
- private, max-age=0, no-cache
|
21
|
+
Expect:
|
22
|
+
- 100-continue
|
23
|
+
Content-Md5:
|
24
|
+
- 6kerbfCyNd6pPzaKoM2YfA==
|
25
|
+
X-Amz-Date:
|
26
|
+
- 20191211T092559Z
|
27
|
+
X-Amz-Content-Sha256:
|
28
|
+
- 8e44f92f3f60740ecea4fa2484527a432755024ddb3217a9594f7ad503653554
|
29
|
+
Authorization:
|
30
|
+
- AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
|
31
|
+
SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
|
32
|
+
Signature=36ccdaef2b69793980e844b45fdd993ed3733c029b0ea41203666025dc6bfc7c
|
33
|
+
Content-Length:
|
34
|
+
- '333'
|
35
|
+
Accept:
|
36
|
+
- "*/*"
|
37
|
+
response:
|
38
|
+
status:
|
39
|
+
code: 200
|
40
|
+
message: OK
|
41
|
+
headers:
|
42
|
+
X-Amz-Id-2:
|
43
|
+
- n6qSeGOxeKGqA0EKgBzqo6Uerr9zBo5MWnpviel/xBg+wUWtujTI8mJy0jVhDBB6xrXD19QKMNc=
|
44
|
+
X-Amz-Request-Id:
|
45
|
+
- 4DEC13308960594B
|
46
|
+
Date:
|
47
|
+
- Wed, 11 Dec 2019 09:26:00 GMT
|
48
|
+
Etag:
|
49
|
+
- '"ea47ab6df0b235dea93f368aa0cd987c"'
|
50
|
+
Content-Length:
|
51
|
+
- '0'
|
52
|
+
Server:
|
53
|
+
- AmazonS3
|
54
|
+
body:
|
55
|
+
encoding: UTF-8
|
56
|
+
string: ''
|
57
|
+
http_version:
|
58
|
+
recorded_at: Wed, 11 Dec 2019 09:25:59 GMT
|
59
|
+
recorded_with: VCR 3.0.3
|
@@ -0,0 +1,59 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: put
|
5
|
+
uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
|
6
|
+
body:
|
7
|
+
encoding: ASCII-8BIT
|
8
|
+
string: !binary |-
|
9
|
+
H4sIACi28F0AA5WSTU+EMBBA/wrp1dAPTIyQbvfmaT25Jl5rqdCktMjUhf33dgtr0HhgL6TMvDczTYfvp85mJz2A8W6HGKYo00752rhmh16PT/kj2gv+NVjQIYusg2oCs0NtCH1FyDiOeLzHfmhIQSkjb8+HF9XqTubGQZBOaZRFvoIUPHglQ2q00sGEmOohFZk5IEuQUFxmN7DXM56gRvO4N/VanMp0stG/zMb7xmqsfPfXyxNMGGZX+2Rq7bfaCV7bTo+wVb6w67k7/27s5sFnmqRHn/0+3iRmthZY8HWFqQ2d/W89WFmWJGVR2ifBrVfiwkEEQctBtThoCLiWQarYIWmcXDBuJYTO16KgrMxZkTN2pGVVPFSU3lEav5FbEK5a6Rr9MehPIe0oz8DJKsT7wfjBhLOIU3Py88dJGorMqy6+AQQYhhIZAwAA
|
10
|
+
headers:
|
11
|
+
Content-Type:
|
12
|
+
- application/x-gzip
|
13
|
+
Accept-Encoding:
|
14
|
+
- ''
|
15
|
+
User-Agent:
|
16
|
+
- aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
|
17
|
+
X-Amz-Acl:
|
18
|
+
- public-read
|
19
|
+
Cache-Control:
|
20
|
+
- private, max-age=0, no-cache
|
21
|
+
Expect:
|
22
|
+
- 100-continue
|
23
|
+
Content-Md5:
|
24
|
+
- K+m6HAwodaexD99LvSm33Q==
|
25
|
+
X-Amz-Date:
|
26
|
+
- 20191211T092600Z
|
27
|
+
X-Amz-Content-Sha256:
|
28
|
+
- fd101b8ee345e029935ddd61b64411b3985312a7e150c2f8c387d55fdabf9b70
|
29
|
+
Authorization:
|
30
|
+
- AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
|
31
|
+
SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
|
32
|
+
Signature=0d11f0f3b8f9499eeaa40c80d540a173f3737a2ef7e452da15af1241f033f304
|
33
|
+
Content-Length:
|
34
|
+
- '333'
|
35
|
+
Accept:
|
36
|
+
- "*/*"
|
37
|
+
response:
|
38
|
+
status:
|
39
|
+
code: 200
|
40
|
+
message: OK
|
41
|
+
headers:
|
42
|
+
X-Amz-Id-2:
|
43
|
+
- pALe39nPehX2z3S/kU9vYTlDBVdZgLtZVs2kzkT+5gs/x1ubsVa3GbONCdTOG9CjJy2OH6SlfHU=
|
44
|
+
X-Amz-Request-Id:
|
45
|
+
- 4FA1F1258A28740E
|
46
|
+
Date:
|
47
|
+
- Wed, 11 Dec 2019 09:26:02 GMT
|
48
|
+
Etag:
|
49
|
+
- '"2be9ba1c0c2875a7b10fdf4bbd29b7dd"'
|
50
|
+
Content-Length:
|
51
|
+
- '0'
|
52
|
+
Server:
|
53
|
+
- AmazonS3
|
54
|
+
body:
|
55
|
+
encoding: UTF-8
|
56
|
+
string: ''
|
57
|
+
http_version:
|
58
|
+
recorded_at: Wed, 11 Dec 2019 09:26:01 GMT
|
59
|
+
recorded_with: VCR 3.0.3
|
data/spec/sitemap_spec.rb
CHANGED
@@ -33,8 +33,20 @@ describe Maltese::Sitemap, vcr: true do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
context "process_data" do
|
36
|
-
it "should
|
37
|
-
stub = stub_request(:get, subject.get_query_url).
|
36
|
+
it "should handle timeout errors with the Datacite REST API" do
|
37
|
+
stub = stub_request(:get, subject.get_query_url).and_return({ status: [408] }, { status: [408] }, { status: [200] })
|
38
|
+
response = subject.process_data(total: 10, url: subject.get_query_url)
|
39
|
+
expect(response).to eq(1)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should handle bad request errors with the Datacite REST API" do
|
43
|
+
stub = stub_request(:get, subject.get_query_url).and_return({ status: [502] }, { status: [200] })
|
44
|
+
response = subject.process_data(total: 10, url: subject.get_query_url)
|
45
|
+
expect(response).to eq(1)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should retry 2 times for bad request errors with the Datacite REST API" do
|
49
|
+
stub = stub_request(:get, subject.get_query_url).and_return({ status: [502] }, { status: [502] }, { status: [502] })
|
38
50
|
response = subject.process_data(total: 10, url: subject.get_query_url)
|
39
51
|
expect(response).to eq(0)
|
40
52
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: maltese
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Fenner
|
@@ -106,6 +106,20 @@ dependencies:
|
|
106
106
|
- - "~>"
|
107
107
|
- !ruby/object:Gem::Version
|
108
108
|
version: '0.19'
|
109
|
+
- !ruby/object:Gem::Dependency
|
110
|
+
name: retriable
|
111
|
+
requirement: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - "~>"
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '3.1'
|
116
|
+
type: :runtime
|
117
|
+
prerelease: false
|
118
|
+
version_requirements: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - "~>"
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '3.1'
|
109
123
|
- !ruby/object:Gem::Dependency
|
110
124
|
name: sitemap_generator
|
111
125
|
requirement: !ruby/object:Gem::Requirement
|
@@ -308,6 +322,9 @@ files:
|
|
308
322
|
- spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml
|
309
323
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
310
324
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml
|
325
|
+
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_bad_request_errors_with_the_Datacite_REST_API.yml
|
326
|
+
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml
|
327
|
+
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml
|
311
328
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml
|
312
329
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
313
330
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|