maltese 0.8.13 → 0.8.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Dockerfile +1 -1
- data/Gemfile.lock +4 -4
- data/lib/maltese/sitemap.rb +15 -8
- data/lib/maltese/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_timeout_errors_with_the_Datacite_REST_API.yml +59 -0
- data/spec/sitemap_spec.rb +8 -6
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3975693a1ddab582888bf8e4628be0e0b1127a4de475afc0f4126fda14cef80e
|
4
|
+
data.tar.gz: f0f57a6a433a36414a853e6e42735577b7e9a40dcd34b9931cd6efb9cd1d664c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a401e5b79ca1f326adb521701a1d355f87f56ef1d7c42122ee0093d3cf2af39597d8b7720eb0b78270f441d7686c0bcf3af250959a212a91340d09f8a032db6d
|
7
|
+
data.tar.gz: 256c0579d4deeb794bae24f6560a216c2953f6ce19f4ab8c94b97a975c19f2949ef696ce8379997fafa068b9888176b0ddeeb1b673b8d5be015c982196422080
|
data/Dockerfile
CHANGED
@@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
|
|
11
11
|
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
12
12
|
|
13
13
|
# Install maltese gem
|
14
|
-
RUN /sbin/setuser app gem install maltese -v 0.8.
|
14
|
+
RUN /sbin/setuser app gem install maltese -v 0.8.14
|
15
15
|
|
16
16
|
CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
maltese (0.8.
|
4
|
+
maltese (0.8.14)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
aws-sdk-s3 (~> 1.19)
|
7
7
|
dotenv (~> 2.1, >= 2.1.1)
|
@@ -21,13 +21,13 @@ GEM
|
|
21
21
|
addressable (2.7.0)
|
22
22
|
public_suffix (>= 2.0.2, < 5.0)
|
23
23
|
aws-eventstream (1.0.3)
|
24
|
-
aws-partitions (1.
|
25
|
-
aws-sdk-core (3.
|
24
|
+
aws-partitions (1.252.0)
|
25
|
+
aws-sdk-core (3.85.0)
|
26
26
|
aws-eventstream (~> 1.0, >= 1.0.2)
|
27
27
|
aws-partitions (~> 1, >= 1.239.0)
|
28
28
|
aws-sigv4 (~> 1.1)
|
29
29
|
jmespath (~> 1.0)
|
30
|
-
aws-sdk-kms (1.
|
30
|
+
aws-sdk-kms (1.27.0)
|
31
31
|
aws-sdk-core (~> 3, >= 3.71.0)
|
32
32
|
aws-sigv4 (~> 1.1)
|
33
33
|
aws-sdk-s3 (1.59.0)
|
data/lib/maltese/sitemap.rb
CHANGED
@@ -90,17 +90,30 @@ module Maltese
|
|
90
90
|
|
91
91
|
def process_data(options = {})
|
92
92
|
options[:start_time] = Time.now
|
93
|
+
link_count = 0
|
94
|
+
error_count = 0
|
93
95
|
|
94
96
|
# walk through paginated results
|
95
97
|
while options[:url] do
|
96
98
|
response = get_data(options[:url])
|
97
|
-
|
98
|
-
|
99
|
+
|
100
|
+
if response.status == 200
|
101
|
+
link_count += parse_data(response)
|
102
|
+
puts "#{link_count} DOIs parsed."
|
103
|
+
options[:url] = response.body.dig("links", "next")
|
104
|
+
else
|
105
|
+
puts "An error occured for URL #{options[:url]}:."
|
106
|
+
puts "Error message: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
|
107
|
+
error_count += 1
|
108
|
+
options[:url] = nil
|
109
|
+
end
|
99
110
|
|
100
111
|
# don't loop when testing
|
101
112
|
break if ENV['RACK'] == "test"
|
102
113
|
end
|
103
114
|
|
115
|
+
return link_count if error_count > 0
|
116
|
+
|
104
117
|
push_data(options)
|
105
118
|
end
|
106
119
|
|
@@ -109,16 +122,10 @@ module Maltese
|
|
109
122
|
end
|
110
123
|
|
111
124
|
def parse_data(result)
|
112
|
-
if result.body.fetch("errors", nil).present?
|
113
|
-
puts "An error occured: #{result.body.fetch("errors").inspect}"
|
114
|
-
return result.body.fetch("errors")
|
115
|
-
end
|
116
|
-
|
117
125
|
result.body.fetch("data", []).each do |item|
|
118
126
|
loc = "/works/" + item.dig("attributes", "doi")
|
119
127
|
sitemap.add loc, changefreq: "monthly", lastmod: item.dig("attributes", "updated")
|
120
128
|
end
|
121
|
-
puts "#{result.body.fetch("data", []).size} DOIs parsed."
|
122
129
|
sitemap.sitemap.link_count
|
123
130
|
end
|
124
131
|
|
data/lib/maltese/version.rb
CHANGED
@@ -0,0 +1,59 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: put
|
5
|
+
uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
|
6
|
+
body:
|
7
|
+
encoding: ASCII-8BIT
|
8
|
+
string: !binary |-
|
9
|
+
H4sIAD9W710AA5WSPW+DMBCG/wryWuEP0iEgQ7ZO6dRU6uoaFywZm3JuIP++jiEVrTqQDe6e5+5FHD9MnUnOagDtbIkYpihRVrpa26ZEr6endI8OFf8aDCifBNZCMYEuUet9XxAyjiMed9gNDckoZeTt+fgiW9WJVFvwwkqFksAXEItHJ4WPi1Y6aB9aPcQhMwdkKRKK8+QO9vaMJ6jRHPeuXYtT6E406pfZONcYhaXr/npphAnD7Gafda3cVjvCa9uqEbbKV3adu3Pv2mwOPtMk/vTZ78OXhM7WAQu+njC1vjP/nQfL85zELor3VHHjZHXlIICgxCBb7BV4XAsvZNgQNU6uGDcCfOfqKqMsT1mWMnqi+yJ7LHbsgdKC0sAtCJetsI36GNRnJcwoLsDJqsT7QbtB+0sVUnPy88ZJDEXmU6++AT8Dg9QZAwAA
|
10
|
+
headers:
|
11
|
+
Content-Type:
|
12
|
+
- application/x-gzip
|
13
|
+
Accept-Encoding:
|
14
|
+
- ''
|
15
|
+
User-Agent:
|
16
|
+
- aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
|
17
|
+
X-Amz-Acl:
|
18
|
+
- public-read
|
19
|
+
Cache-Control:
|
20
|
+
- private, max-age=0, no-cache
|
21
|
+
Expect:
|
22
|
+
- 100-continue
|
23
|
+
Content-Md5:
|
24
|
+
- qw5jsYX5Zml0Xk8/J5o/pw==
|
25
|
+
X-Amz-Date:
|
26
|
+
- 20191210T082431Z
|
27
|
+
X-Amz-Content-Sha256:
|
28
|
+
- c1b9ed2122fe6722ea084cc215ee6b1f866e0f1b1f9cbb70b8d9673bf0b17d43
|
29
|
+
Authorization:
|
30
|
+
- AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191210/eu-west-1/s3/aws4_request,
|
31
|
+
SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
|
32
|
+
Signature=7d5f33f12175ee8b8af02cd03e5d61c36ce3898d1a86660ad1d56ced965508ea
|
33
|
+
Content-Length:
|
34
|
+
- '333'
|
35
|
+
Accept:
|
36
|
+
- "*/*"
|
37
|
+
response:
|
38
|
+
status:
|
39
|
+
code: 200
|
40
|
+
message: OK
|
41
|
+
headers:
|
42
|
+
X-Amz-Id-2:
|
43
|
+
- 8ToOQoDXCK0nj5F+9qE7D6Pn4Clf3tfjbW8CpFD/GJlZd13pEktxxKVozf/EQXvigc703t04J0w=
|
44
|
+
X-Amz-Request-Id:
|
45
|
+
- 7041D9503577FFF6
|
46
|
+
Date:
|
47
|
+
- Tue, 10 Dec 2019 08:24:33 GMT
|
48
|
+
Etag:
|
49
|
+
- '"ab0e63b185f96669745e4f3f279a3fa7"'
|
50
|
+
Content-Length:
|
51
|
+
- '0'
|
52
|
+
Server:
|
53
|
+
- AmazonS3
|
54
|
+
body:
|
55
|
+
encoding: UTF-8
|
56
|
+
string: ''
|
57
|
+
http_version:
|
58
|
+
recorded_at: Tue, 10 Dec 2019 08:24:32 GMT
|
59
|
+
recorded_with: VCR 3.0.3
|
data/spec/sitemap_spec.rb
CHANGED
@@ -32,6 +32,14 @@ describe Maltese::Sitemap, vcr: true do
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
context "process_data" do
|
36
|
+
it "should catch timeout errors with the Datacite REST API" do
|
37
|
+
stub = stub_request(:get, subject.get_query_url).to_return(:status => [408])
|
38
|
+
response = subject.process_data(total: 10, url: subject.get_query_url)
|
39
|
+
expect(response).to eq(0)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
35
43
|
context "get_data" do
|
36
44
|
it "should report if there are works returned by the Datacite REST API" do
|
37
45
|
response = subject.get_data(subject.get_query_url)
|
@@ -62,12 +70,6 @@ describe Maltese::Sitemap, vcr: true do
|
|
62
70
|
response = subject.parse_data(result)
|
63
71
|
expect(response).to eq(1001)
|
64
72
|
end
|
65
|
-
|
66
|
-
it "should catch timeout errors with the Datacite REST API" do
|
67
|
-
result = OpenStruct.new(body: { "errors" => [{ "title" => "the server responded with status 408 for https://REST.test.datacite.org", "status" => 408 }] })
|
68
|
-
response = subject.parse_data(result)
|
69
|
-
expect(response).to eq(result.body["errors"])
|
70
|
-
end
|
71
73
|
end
|
72
74
|
|
73
75
|
context "push_data" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: maltese
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Fenner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-12-
|
11
|
+
date: 2019-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: maremma
|
@@ -281,6 +281,7 @@ files:
|
|
281
281
|
- spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml
|
282
282
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
283
283
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml
|
284
|
+
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_timeout_errors_with_the_Datacite_REST_API.yml
|
284
285
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml
|
285
286
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
286
287
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|