maltese 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7c8f0fb880fee42bab612c9a17aee2ac8c19bb5c1c320a87ed6b5efb3804b581
4
- data.tar.gz: 4825f893e0fcc9355e5c72b0597f73d471fc813491b1fd778e161633e7665815
3
+ metadata.gz: 4b25d7aed1fa4b41a642a8bf8449e067c67fa28861cf319c54d24b4ffccbcd6d
4
+ data.tar.gz: af043d259c92a902d5a42c281a0746fc2cf65f356ebb2bec161cc707f89f9b3a
5
5
  SHA512:
6
- metadata.gz: c30bd244311af98acd5e3b23353f9336a9abdce721fb730496e1e16e239dc8fc07173a23669fb98621588d45fa09817c45f671370192c54eb2f16c7b57023743
7
- data.tar.gz: c3ad6590a4ec6a16e245db103d5e9dcd0198284c282db98b848f47e7dae16bddc04c435a0e83db789049c5e13ad427f10aae703b3c6dbe597f29ff4b27f98b4f
6
+ metadata.gz: e1399ede091a478151153b5071b7881ae1bedd9068ac017ab34c1cfcf1931559a607f0f2ccba83b1018b3a251898e4498d8557267905478c7dd48e322b288722
7
+ data.tar.gz: 50cd6f3a1ee8a4b0c508805720efcc58db31208dd7842668b6785cf0684bd03059d2872113e002d4c89c5e501fb2924f6323ff24caa3db5227c86517fbbef7c3
data/Dockerfile CHANGED
@@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
11
11
  apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
12
12
 
13
13
  # Install maltese gem
14
- RUN /sbin/setuser app gem install maltese -v 0.9.1
14
+ RUN /sbin/setuser app gem install maltese -v 0.9.2
15
15
 
16
16
  CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- maltese (0.9.1)
4
+ maltese (0.9.2)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  aws-sdk-s3 (~> 1.19)
7
7
  dotenv (~> 2.1, >= 2.1.1)
@@ -9,6 +9,7 @@ PATH
9
9
  logstash-logger (~> 0.26.1)
10
10
  maremma (~> 4.1)
11
11
  mime-types (~> 3.1)
12
+ retriable (~> 3.1)
12
13
  sitemap_generator (~> 6.0)
13
14
  thor (~> 0.19)
14
15
 
@@ -88,6 +89,7 @@ GEM
88
89
  rack-test (0.8.3)
89
90
  rack (>= 1.0, < 3)
90
91
  rake (12.3.3)
92
+ retriable (3.1.2)
91
93
  rspec (3.9.0)
92
94
  rspec-core (~> 3.9.0)
93
95
  rspec-expectations (~> 3.9.0)
@@ -1,6 +1,9 @@
1
1
  require 'logstash-logger'
2
+ require 'retriable'
2
3
 
3
4
  module Maltese
5
+ class ::BadGatewayError < StandardError; end
6
+
4
7
  class Sitemap
5
8
  attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region, :logger
6
9
 
@@ -105,21 +108,35 @@ module Maltese
105
108
 
106
109
  # walk through paginated results
107
110
  while options[:url] do
108
- response = get_data(options[:url])
109
-
110
- if response.status == 200
111
- link_count = parse_data(response)
112
- logger.info "#{link_count} DOIs parsed."
113
- options[:url] = response.body.dig("links", "next")
114
- else
115
- logger.error "An error occured for URL #{options[:url]}."
116
- logger.error "Error message: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
111
+ begin
112
+ response = nil
113
+
114
+ # retry on temporal errors (status codes 408 and 502)
115
+ Retriable.retriable(base_interval: 10, multiplier: 2) do
116
+ response = get_data(options[:url])
117
+
118
+ raise Timeout::Error, "A timeout error occured for URL #{options[:url]}." if response.status == 408
119
+ raise BadGatewayError, "A bad gateway error occured for URL #{options[:url]}." if response.status == 502
120
+ end
121
+
122
+ if response.status == 200
123
+ link_count = parse_data(response)
124
+ logger.info "#{link_count} DOIs parsed."
125
+ options[:url] = response.body.dig("links", "next")
126
+ else
127
+ logger.error "An error occured for URL #{options[:url]}."
128
+ logger.error "Error: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
129
+ error_count += 1
130
+ options[:url] = nil
131
+ end
132
+ rescue => exception
133
+ logger.error "Error: #{exception.message}."
117
134
  error_count += 1
118
135
  options[:url] = nil
119
- end
120
-
121
- # don't loop when testing
122
- break if rack_env == "test"
136
+ ensure
137
+ # don't loop when testing
138
+ break if rack_env == "test"
139
+ end
123
140
  end
124
141
 
125
142
  return link_count if error_count > 0
@@ -1,3 +1,3 @@
1
1
  module Maltese
2
- VERSION = "0.9.1"
2
+ VERSION = "0.9.2"
3
3
  end
data/maltese.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency 'activesupport', '>= 4.2.5', '< 6'
22
22
  s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
23
23
  s.add_dependency 'thor', '~> 0.19'
24
+ s.add_dependency 'retriable', '~> 3.1'
24
25
  s.add_dependency 'sitemap_generator', '~> 6.0'
25
26
  s.add_dependency 'aws-sdk-s3', '~> 1.19'
26
27
  s.add_dependency 'mime-types', '~> 3.1'
@@ -0,0 +1,59 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: put
5
+ uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
6
+ body:
7
+ encoding: ASCII-8BIT
8
+ string: !binary |-
9
+ H4sIANy18F0AA5WSTU+EMBCG/wrp1dAP3IOQbvfmaT25Jl5rqdCktMjUhf33dgtr0HjAG8w8z8xLGH6YOpud9QDGuz1imKJMO+Vr45o9ejk95g/oIPjnYEGHLLIOqgnMHrUh9BUh4zji8R77oSEFpYy8Ph2fVas7mRsHQTqlURb5ClLx6JUMadFKBxNiq4c0ZOaALEVCcZn9g7094wlqNMf9167FqUwnG/3DbLxvrMbKd7+9PMGEYXazz6bWfqud4LXt9Ahb5Su7zt35N2M3B59pkn767PfxS2Jn64AFX0+Y2tDZv86DlWVJUhelexLceiWuHEQQtBxUi4OGgGsZpIobksbJFeNWQuh8LQrKypwVOWMnWlbFrtrt7iitKI3cgnDVStfo90F/CGlHeQFOViXeD8YPJlxETM3J9xsnKRSZT118AY2t+lEZAwAA
10
+ headers:
11
+ Content-Type:
12
+ - application/x-gzip
13
+ Accept-Encoding:
14
+ - ''
15
+ User-Agent:
16
+ - aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
17
+ X-Amz-Acl:
18
+ - public-read
19
+ Cache-Control:
20
+ - private, max-age=0, no-cache
21
+ Expect:
22
+ - 100-continue
23
+ Content-Md5:
24
+ - lxwQjxbjrf0xGMXvjTOX9g==
25
+ X-Amz-Date:
26
+ - 20191211T092444Z
27
+ X-Amz-Content-Sha256:
28
+ - 30bde17d18b8abe11ac80301be4511cf1e773a7c993caf77a08f9d216615e7b7
29
+ Authorization:
30
+ - AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
31
+ SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
32
+ Signature=99527ec52cae5635e3928767cf9743f88757fc727cf3be085e99071c3959856b
33
+ Content-Length:
34
+ - '333'
35
+ Accept:
36
+ - "*/*"
37
+ response:
38
+ status:
39
+ code: 200
40
+ message: OK
41
+ headers:
42
+ X-Amz-Id-2:
43
+ - 9SEmB+KZX4UNyualN/z8RPkrjhNZQOrxEREvMwCzXGB35F6gZty/R+QADhxlFJxORz9cYmp9jcs=
44
+ X-Amz-Request-Id:
45
+ - 2ED234E38358C809
46
+ Date:
47
+ - Wed, 11 Dec 2019 09:24:48 GMT
48
+ Etag:
49
+ - '"971c108f16e3adfd3118c5ef8d3397f6"'
50
+ Content-Length:
51
+ - '0'
52
+ Server:
53
+ - AmazonS3
54
+ body:
55
+ encoding: UTF-8
56
+ string: ''
57
+ http_version:
58
+ recorded_at: Wed, 11 Dec 2019 09:24:48 GMT
59
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,59 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: put
5
+ uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
6
+ body:
7
+ encoding: ASCII-8BIT
8
+ string: !binary |-
9
+ H4sIACe28F0AA5WSTU+EMBCG/wrp1dAPzCZCut2bp/XkmnitpUKT0iJTF/bf2y2sQeMBbzDzPDMvYfhh6mx21gMY7/aIYYoy7ZSvjWv26OX0mD+gg+CfgwUdssg6qCYwe9SG0FeEjOOIx3vsh4YUlDLy+nR8Vq3uZG4cBOmURlnkK0jFo1cypEUrHUyIrR7SkJkDshQJxWX2D/b2jCeo0Rz3X7sWpzKdbPQPs/G+sRor3/328gQThtnNPpta+612gte20yNsla/sOnfn34zdHHymSfrps9/HL4mdrQMWfD1hakNn/zoPVpYlSV2U7klw65W4chBB0HJQLQ4aAq5lkCpuSBonV4xbCaHztSgoK3NW5IydaFkVu2pX3lFaURq5BeGqla7R74P+ENKO8gKcrEq8H4wfTLiImJqT7zdOUigyn7r4AuvlHP0ZAwAA
10
+ headers:
11
+ Content-Type:
12
+ - application/x-gzip
13
+ Accept-Encoding:
14
+ - ''
15
+ User-Agent:
16
+ - aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
17
+ X-Amz-Acl:
18
+ - public-read
19
+ Cache-Control:
20
+ - private, max-age=0, no-cache
21
+ Expect:
22
+ - 100-continue
23
+ Content-Md5:
24
+ - 6kerbfCyNd6pPzaKoM2YfA==
25
+ X-Amz-Date:
26
+ - 20191211T092559Z
27
+ X-Amz-Content-Sha256:
28
+ - 8e44f92f3f60740ecea4fa2484527a432755024ddb3217a9594f7ad503653554
29
+ Authorization:
30
+ - AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
31
+ SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
32
+ Signature=36ccdaef2b69793980e844b45fdd993ed3733c029b0ea41203666025dc6bfc7c
33
+ Content-Length:
34
+ - '333'
35
+ Accept:
36
+ - "*/*"
37
+ response:
38
+ status:
39
+ code: 200
40
+ message: OK
41
+ headers:
42
+ X-Amz-Id-2:
43
+ - n6qSeGOxeKGqA0EKgBzqo6Uerr9zBo5MWnpviel/xBg+wUWtujTI8mJy0jVhDBB6xrXD19QKMNc=
44
+ X-Amz-Request-Id:
45
+ - 4DEC13308960594B
46
+ Date:
47
+ - Wed, 11 Dec 2019 09:26:00 GMT
48
+ Etag:
49
+ - '"ea47ab6df0b235dea93f368aa0cd987c"'
50
+ Content-Length:
51
+ - '0'
52
+ Server:
53
+ - AmazonS3
54
+ body:
55
+ encoding: UTF-8
56
+ string: ''
57
+ http_version:
58
+ recorded_at: Wed, 11 Dec 2019 09:25:59 GMT
59
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,59 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: put
5
+ uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
6
+ body:
7
+ encoding: ASCII-8BIT
8
+ string: !binary |-
9
+ H4sIACi28F0AA5WSTU+EMBBA/wrp1dAPTIyQbvfmaT25Jl5rqdCktMjUhf33dgtr0HhgL6TMvDczTYfvp85mJz2A8W6HGKYo00752rhmh16PT/kj2gv+NVjQIYusg2oCs0NtCH1FyDiOeLzHfmhIQSkjb8+HF9XqTubGQZBOaZRFvoIUPHglQ2q00sGEmOohFZk5IEuQUFxmN7DXM56gRvO4N/VanMp0stG/zMb7xmqsfPfXyxNMGGZX+2Rq7bfaCV7bTo+wVb6w67k7/27s5sFnmqRHn/0+3iRmthZY8HWFqQ2d/W89WFmWJGVR2ifBrVfiwkEEQctBtThoCLiWQarYIWmcXDBuJYTO16KgrMxZkTN2pGVVPFSU3lEav5FbEK5a6Rr9MehPIe0oz8DJKsT7wfjBhLOIU3Py88dJGorMqy6+AQQYhhIZAwAA
10
+ headers:
11
+ Content-Type:
12
+ - application/x-gzip
13
+ Accept-Encoding:
14
+ - ''
15
+ User-Agent:
16
+ - aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
17
+ X-Amz-Acl:
18
+ - public-read
19
+ Cache-Control:
20
+ - private, max-age=0, no-cache
21
+ Expect:
22
+ - 100-continue
23
+ Content-Md5:
24
+ - K+m6HAwodaexD99LvSm33Q==
25
+ X-Amz-Date:
26
+ - 20191211T092600Z
27
+ X-Amz-Content-Sha256:
28
+ - fd101b8ee345e029935ddd61b64411b3985312a7e150c2f8c387d55fdabf9b70
29
+ Authorization:
30
+ - AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
31
+ SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
32
+ Signature=0d11f0f3b8f9499eeaa40c80d540a173f3737a2ef7e452da15af1241f033f304
33
+ Content-Length:
34
+ - '333'
35
+ Accept:
36
+ - "*/*"
37
+ response:
38
+ status:
39
+ code: 200
40
+ message: OK
41
+ headers:
42
+ X-Amz-Id-2:
43
+ - pALe39nPehX2z3S/kU9vYTlDBVdZgLtZVs2kzkT+5gs/x1ubsVa3GbONCdTOG9CjJy2OH6SlfHU=
44
+ X-Amz-Request-Id:
45
+ - 4FA1F1258A28740E
46
+ Date:
47
+ - Wed, 11 Dec 2019 09:26:02 GMT
48
+ Etag:
49
+ - '"2be9ba1c0c2875a7b10fdf4bbd29b7dd"'
50
+ Content-Length:
51
+ - '0'
52
+ Server:
53
+ - AmazonS3
54
+ body:
55
+ encoding: UTF-8
56
+ string: ''
57
+ http_version:
58
+ recorded_at: Wed, 11 Dec 2019 09:26:01 GMT
59
+ recorded_with: VCR 3.0.3
data/spec/sitemap_spec.rb CHANGED
@@ -33,8 +33,20 @@ describe Maltese::Sitemap, vcr: true do
33
33
  end
34
34
 
35
35
  context "process_data" do
36
- it "should catch timeout errors with the Datacite REST API" do
37
- stub = stub_request(:get, subject.get_query_url).to_return(:status => [408])
36
+ it "should handle timeout errors with the Datacite REST API" do
37
+ stub = stub_request(:get, subject.get_query_url).and_return({ status: [408] }, { status: [408] }, { status: [200] })
38
+ response = subject.process_data(total: 10, url: subject.get_query_url)
39
+ expect(response).to eq(1)
40
+ end
41
+
42
+ it "should handle bad request errors with the Datacite REST API" do
43
+ stub = stub_request(:get, subject.get_query_url).and_return({ status: [502] }, { status: [200] })
44
+ response = subject.process_data(total: 10, url: subject.get_query_url)
45
+ expect(response).to eq(1)
46
+ end
47
+
48
+ it "should retry 2 times for bad request errors with the Datacite REST API" do
49
+ stub = stub_request(:get, subject.get_query_url).and_return({ status: [502] }, { status: [502] }, { status: [502] })
38
50
  response = subject.process_data(total: 10, url: subject.get_query_url)
39
51
  expect(response).to eq(0)
40
52
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maltese
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
@@ -106,6 +106,20 @@ dependencies:
106
106
  - - "~>"
107
107
  - !ruby/object:Gem::Version
108
108
  version: '0.19'
109
+ - !ruby/object:Gem::Dependency
110
+ name: retriable
111
+ requirement: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - "~>"
114
+ - !ruby/object:Gem::Version
115
+ version: '3.1'
116
+ type: :runtime
117
+ prerelease: false
118
+ version_requirements: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - "~>"
121
+ - !ruby/object:Gem::Version
122
+ version: '3.1'
109
123
  - !ruby/object:Gem::Dependency
110
124
  name: sitemap_generator
111
125
  requirement: !ruby/object:Gem::Requirement
@@ -308,6 +322,9 @@ files:
308
322
  - spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml
309
323
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
310
324
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml
325
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_bad_request_errors_with_the_Datacite_REST_API.yml
326
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml
327
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml
311
328
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml
312
329
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
313
330
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml