maltese 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7c8f0fb880fee42bab612c9a17aee2ac8c19bb5c1c320a87ed6b5efb3804b581
4
- data.tar.gz: 4825f893e0fcc9355e5c72b0597f73d471fc813491b1fd778e161633e7665815
3
+ metadata.gz: 4b25d7aed1fa4b41a642a8bf8449e067c67fa28861cf319c54d24b4ffccbcd6d
4
+ data.tar.gz: af043d259c92a902d5a42c281a0746fc2cf65f356ebb2bec161cc707f89f9b3a
5
5
  SHA512:
6
- metadata.gz: c30bd244311af98acd5e3b23353f9336a9abdce721fb730496e1e16e239dc8fc07173a23669fb98621588d45fa09817c45f671370192c54eb2f16c7b57023743
7
- data.tar.gz: c3ad6590a4ec6a16e245db103d5e9dcd0198284c282db98b848f47e7dae16bddc04c435a0e83db789049c5e13ad427f10aae703b3c6dbe597f29ff4b27f98b4f
6
+ metadata.gz: e1399ede091a478151153b5071b7881ae1bedd9068ac017ab34c1cfcf1931559a607f0f2ccba83b1018b3a251898e4498d8557267905478c7dd48e322b288722
7
+ data.tar.gz: 50cd6f3a1ee8a4b0c508805720efcc58db31208dd7842668b6785cf0684bd03059d2872113e002d4c89c5e501fb2924f6323ff24caa3db5227c86517fbbef7c3
data/Dockerfile CHANGED
@@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
11
11
  apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
12
12
 
13
13
  # Install maltese gem
14
- RUN /sbin/setuser app gem install maltese -v 0.9.1
14
+ RUN /sbin/setuser app gem install maltese -v 0.9.2
15
15
 
16
16
  CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- maltese (0.9.1)
4
+ maltese (0.9.2)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  aws-sdk-s3 (~> 1.19)
7
7
  dotenv (~> 2.1, >= 2.1.1)
@@ -9,6 +9,7 @@ PATH
9
9
  logstash-logger (~> 0.26.1)
10
10
  maremma (~> 4.1)
11
11
  mime-types (~> 3.1)
12
+ retriable (~> 3.1)
12
13
  sitemap_generator (~> 6.0)
13
14
  thor (~> 0.19)
14
15
 
@@ -88,6 +89,7 @@ GEM
88
89
  rack-test (0.8.3)
89
90
  rack (>= 1.0, < 3)
90
91
  rake (12.3.3)
92
+ retriable (3.1.2)
91
93
  rspec (3.9.0)
92
94
  rspec-core (~> 3.9.0)
93
95
  rspec-expectations (~> 3.9.0)
@@ -1,6 +1,9 @@
1
1
  require 'logstash-logger'
2
+ require 'retriable'
2
3
 
3
4
  module Maltese
5
+ class ::BadGatewayError < StandardError; end
6
+
4
7
  class Sitemap
5
8
  attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region, :logger
6
9
 
@@ -105,21 +108,35 @@ module Maltese
105
108
 
106
109
  # walk through paginated results
107
110
  while options[:url] do
108
- response = get_data(options[:url])
109
-
110
- if response.status == 200
111
- link_count = parse_data(response)
112
- logger.info "#{link_count} DOIs parsed."
113
- options[:url] = response.body.dig("links", "next")
114
- else
115
- logger.error "An error occured for URL #{options[:url]}."
116
- logger.error "Error message: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
111
+ begin
112
+ response = nil
113
+
114
+ # retry on temporal errors (status codes 408 and 502)
115
+ Retriable.retriable(base_interval: 10, multiplier: 2) do
116
+ response = get_data(options[:url])
117
+
118
+ raise Timeout::Error, "A timeout error occured for URL #{options[:url]}." if response.status == 408
119
+ raise BadGatewayError, "A bad gateway error occured for URL #{options[:url]}." if response.status == 502
120
+ end
121
+
122
+ if response.status == 200
123
+ link_count = parse_data(response)
124
+ logger.info "#{link_count} DOIs parsed."
125
+ options[:url] = response.body.dig("links", "next")
126
+ else
127
+ logger.error "An error occured for URL #{options[:url]}."
128
+ logger.error "Error: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
129
+ error_count += 1
130
+ options[:url] = nil
131
+ end
132
+ rescue => exception
133
+ logger.error "Error: #{exception.message}."
117
134
  error_count += 1
118
135
  options[:url] = nil
119
- end
120
-
121
- # don't loop when testing
122
- break if rack_env == "test"
136
+ ensure
137
+ # don't loop when testing
138
+ break if rack_env == "test"
139
+ end
123
140
  end
124
141
 
125
142
  return link_count if error_count > 0
@@ -1,3 +1,3 @@
1
1
  module Maltese
2
- VERSION = "0.9.1"
2
+ VERSION = "0.9.2"
3
3
  end
data/maltese.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency 'activesupport', '>= 4.2.5', '< 6'
22
22
  s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
23
23
  s.add_dependency 'thor', '~> 0.19'
24
+ s.add_dependency 'retriable', '~> 3.1'
24
25
  s.add_dependency 'sitemap_generator', '~> 6.0'
25
26
  s.add_dependency 'aws-sdk-s3', '~> 1.19'
26
27
  s.add_dependency 'mime-types', '~> 3.1'
@@ -0,0 +1,59 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: put
5
+ uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
6
+ body:
7
+ encoding: ASCII-8BIT
8
+ string: !binary |-
9
+ H4sIANy18F0AA5WSTU+EMBCG/wrp1dAP3IOQbvfmaT25Jl5rqdCktMjUhf33dgtr0HjAG8w8z8xLGH6YOpud9QDGuz1imKJMO+Vr45o9ejk95g/oIPjnYEGHLLIOqgnMHrUh9BUh4zji8R77oSEFpYy8Ph2fVas7mRsHQTqlURb5ClLx6JUMadFKBxNiq4c0ZOaALEVCcZn9g7094wlqNMf9167FqUwnG/3DbLxvrMbKd7+9PMGEYXazz6bWfqud4LXt9Ahb5Su7zt35N2M3B59pkn767PfxS2Jn64AFX0+Y2tDZv86DlWVJUhelexLceiWuHEQQtBxUi4OGgGsZpIobksbJFeNWQuh8LQrKypwVOWMnWlbFrtrt7iitKI3cgnDVStfo90F/CGlHeQFOViXeD8YPJlxETM3J9xsnKRSZT118AY2t+lEZAwAA
10
+ headers:
11
+ Content-Type:
12
+ - application/x-gzip
13
+ Accept-Encoding:
14
+ - ''
15
+ User-Agent:
16
+ - aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
17
+ X-Amz-Acl:
18
+ - public-read
19
+ Cache-Control:
20
+ - private, max-age=0, no-cache
21
+ Expect:
22
+ - 100-continue
23
+ Content-Md5:
24
+ - lxwQjxbjrf0xGMXvjTOX9g==
25
+ X-Amz-Date:
26
+ - 20191211T092444Z
27
+ X-Amz-Content-Sha256:
28
+ - 30bde17d18b8abe11ac80301be4511cf1e773a7c993caf77a08f9d216615e7b7
29
+ Authorization:
30
+ - AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
31
+ SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
32
+ Signature=99527ec52cae5635e3928767cf9743f88757fc727cf3be085e99071c3959856b
33
+ Content-Length:
34
+ - '333'
35
+ Accept:
36
+ - "*/*"
37
+ response:
38
+ status:
39
+ code: 200
40
+ message: OK
41
+ headers:
42
+ X-Amz-Id-2:
43
+ - 9SEmB+KZX4UNyualN/z8RPkrjhNZQOrxEREvMwCzXGB35F6gZty/R+QADhxlFJxORz9cYmp9jcs=
44
+ X-Amz-Request-Id:
45
+ - 2ED234E38358C809
46
+ Date:
47
+ - Wed, 11 Dec 2019 09:24:48 GMT
48
+ Etag:
49
+ - '"971c108f16e3adfd3118c5ef8d3397f6"'
50
+ Content-Length:
51
+ - '0'
52
+ Server:
53
+ - AmazonS3
54
+ body:
55
+ encoding: UTF-8
56
+ string: ''
57
+ http_version:
58
+ recorded_at: Wed, 11 Dec 2019 09:24:48 GMT
59
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,59 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: put
5
+ uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
6
+ body:
7
+ encoding: ASCII-8BIT
8
+ string: !binary |-
9
+ H4sIACe28F0AA5WSTU+EMBCG/wrp1dAPzCZCut2bp/XkmnitpUKT0iJTF/bf2y2sQeMBbzDzPDMvYfhh6mx21gMY7/aIYYoy7ZSvjWv26OX0mD+gg+CfgwUdssg6qCYwe9SG0FeEjOOIx3vsh4YUlDLy+nR8Vq3uZG4cBOmURlnkK0jFo1cypEUrHUyIrR7SkJkDshQJxWX2D/b2jCeo0Rz3X7sWpzKdbPQPs/G+sRor3/328gQThtnNPpta+612gte20yNsla/sOnfn34zdHHymSfrps9/HL4mdrQMWfD1hakNn/zoPVpYlSV2U7klw65W4chBB0HJQLQ4aAq5lkCpuSBonV4xbCaHztSgoK3NW5IydaFkVu2pX3lFaURq5BeGqla7R74P+ENKO8gKcrEq8H4wfTLiImJqT7zdOUigyn7r4AuvlHP0ZAwAA
10
+ headers:
11
+ Content-Type:
12
+ - application/x-gzip
13
+ Accept-Encoding:
14
+ - ''
15
+ User-Agent:
16
+ - aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
17
+ X-Amz-Acl:
18
+ - public-read
19
+ Cache-Control:
20
+ - private, max-age=0, no-cache
21
+ Expect:
22
+ - 100-continue
23
+ Content-Md5:
24
+ - 6kerbfCyNd6pPzaKoM2YfA==
25
+ X-Amz-Date:
26
+ - 20191211T092559Z
27
+ X-Amz-Content-Sha256:
28
+ - 8e44f92f3f60740ecea4fa2484527a432755024ddb3217a9594f7ad503653554
29
+ Authorization:
30
+ - AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
31
+ SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
32
+ Signature=36ccdaef2b69793980e844b45fdd993ed3733c029b0ea41203666025dc6bfc7c
33
+ Content-Length:
34
+ - '333'
35
+ Accept:
36
+ - "*/*"
37
+ response:
38
+ status:
39
+ code: 200
40
+ message: OK
41
+ headers:
42
+ X-Amz-Id-2:
43
+ - n6qSeGOxeKGqA0EKgBzqo6Uerr9zBo5MWnpviel/xBg+wUWtujTI8mJy0jVhDBB6xrXD19QKMNc=
44
+ X-Amz-Request-Id:
45
+ - 4DEC13308960594B
46
+ Date:
47
+ - Wed, 11 Dec 2019 09:26:00 GMT
48
+ Etag:
49
+ - '"ea47ab6df0b235dea93f368aa0cd987c"'
50
+ Content-Length:
51
+ - '0'
52
+ Server:
53
+ - AmazonS3
54
+ body:
55
+ encoding: UTF-8
56
+ string: ''
57
+ http_version:
58
+ recorded_at: Wed, 11 Dec 2019 09:25:59 GMT
59
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,59 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: put
5
+ uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
6
+ body:
7
+ encoding: ASCII-8BIT
8
+ string: !binary |-
9
+ H4sIACi28F0AA5WSTU+EMBBA/wrp1dAPTIyQbvfmaT25Jl5rqdCktMjUhf33dgtr0HhgL6TMvDczTYfvp85mJz2A8W6HGKYo00752rhmh16PT/kj2gv+NVjQIYusg2oCs0NtCH1FyDiOeLzHfmhIQSkjb8+HF9XqTubGQZBOaZRFvoIUPHglQ2q00sGEmOohFZk5IEuQUFxmN7DXM56gRvO4N/VanMp0stG/zMb7xmqsfPfXyxNMGGZX+2Rq7bfaCV7bTo+wVb6w67k7/27s5sFnmqRHn/0+3iRmthZY8HWFqQ2d/W89WFmWJGVR2ifBrVfiwkEEQctBtThoCLiWQarYIWmcXDBuJYTO16KgrMxZkTN2pGVVPFSU3lEav5FbEK5a6Rr9MehPIe0oz8DJKsT7wfjBhLOIU3Py88dJGorMqy6+AQQYhhIZAwAA
10
+ headers:
11
+ Content-Type:
12
+ - application/x-gzip
13
+ Accept-Encoding:
14
+ - ''
15
+ User-Agent:
16
+ - aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
17
+ X-Amz-Acl:
18
+ - public-read
19
+ Cache-Control:
20
+ - private, max-age=0, no-cache
21
+ Expect:
22
+ - 100-continue
23
+ Content-Md5:
24
+ - K+m6HAwodaexD99LvSm33Q==
25
+ X-Amz-Date:
26
+ - 20191211T092600Z
27
+ X-Amz-Content-Sha256:
28
+ - fd101b8ee345e029935ddd61b64411b3985312a7e150c2f8c387d55fdabf9b70
29
+ Authorization:
30
+ - AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191211/eu-west-1/s3/aws4_request,
31
+ SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
32
+ Signature=0d11f0f3b8f9499eeaa40c80d540a173f3737a2ef7e452da15af1241f033f304
33
+ Content-Length:
34
+ - '333'
35
+ Accept:
36
+ - "*/*"
37
+ response:
38
+ status:
39
+ code: 200
40
+ message: OK
41
+ headers:
42
+ X-Amz-Id-2:
43
+ - pALe39nPehX2z3S/kU9vYTlDBVdZgLtZVs2kzkT+5gs/x1ubsVa3GbONCdTOG9CjJy2OH6SlfHU=
44
+ X-Amz-Request-Id:
45
+ - 4FA1F1258A28740E
46
+ Date:
47
+ - Wed, 11 Dec 2019 09:26:02 GMT
48
+ Etag:
49
+ - '"2be9ba1c0c2875a7b10fdf4bbd29b7dd"'
50
+ Content-Length:
51
+ - '0'
52
+ Server:
53
+ - AmazonS3
54
+ body:
55
+ encoding: UTF-8
56
+ string: ''
57
+ http_version:
58
+ recorded_at: Wed, 11 Dec 2019 09:26:01 GMT
59
+ recorded_with: VCR 3.0.3
data/spec/sitemap_spec.rb CHANGED
@@ -33,8 +33,20 @@ describe Maltese::Sitemap, vcr: true do
33
33
  end
34
34
 
35
35
  context "process_data" do
36
- it "should catch timeout errors with the Datacite REST API" do
37
- stub = stub_request(:get, subject.get_query_url).to_return(:status => [408])
36
+ it "should handle timeout errors with the Datacite REST API" do
37
+ stub = stub_request(:get, subject.get_query_url).and_return({ status: [408] }, { status: [408] }, { status: [200] })
38
+ response = subject.process_data(total: 10, url: subject.get_query_url)
39
+ expect(response).to eq(1)
40
+ end
41
+
42
+ it "should handle bad request errors with the Datacite REST API" do
43
+ stub = stub_request(:get, subject.get_query_url).and_return({ status: [502] }, { status: [200] })
44
+ response = subject.process_data(total: 10, url: subject.get_query_url)
45
+ expect(response).to eq(1)
46
+ end
47
+
48
+ it "should retry 2 times for bad request errors with the Datacite REST API" do
49
+ stub = stub_request(:get, subject.get_query_url).and_return({ status: [502] }, { status: [502] }, { status: [502] })
38
50
  response = subject.process_data(total: 10, url: subject.get_query_url)
39
51
  expect(response).to eq(0)
40
52
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maltese
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
@@ -106,6 +106,20 @@ dependencies:
106
106
  - - "~>"
107
107
  - !ruby/object:Gem::Version
108
108
  version: '0.19'
109
+ - !ruby/object:Gem::Dependency
110
+ name: retriable
111
+ requirement: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - "~>"
114
+ - !ruby/object:Gem::Version
115
+ version: '3.1'
116
+ type: :runtime
117
+ prerelease: false
118
+ version_requirements: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - "~>"
121
+ - !ruby/object:Gem::Version
122
+ version: '3.1'
109
123
  - !ruby/object:Gem::Dependency
110
124
  name: sitemap_generator
111
125
  requirement: !ruby/object:Gem::Requirement
@@ -308,6 +322,9 @@ files:
308
322
  - spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml
309
323
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
310
324
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml
325
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_bad_request_errors_with_the_Datacite_REST_API.yml
326
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml
327
+ - spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml
311
328
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml
312
329
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
313
330
  - spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml