maltese 0.8.13 → 0.8.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Dockerfile +1 -1
- data/Gemfile.lock +4 -4
- data/lib/maltese/sitemap.rb +15 -8
- data/lib/maltese/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_timeout_errors_with_the_Datacite_REST_API.yml +59 -0
- data/spec/sitemap_spec.rb +8 -6
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3975693a1ddab582888bf8e4628be0e0b1127a4de475afc0f4126fda14cef80e
|
4
|
+
data.tar.gz: f0f57a6a433a36414a853e6e42735577b7e9a40dcd34b9931cd6efb9cd1d664c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a401e5b79ca1f326adb521701a1d355f87f56ef1d7c42122ee0093d3cf2af39597d8b7720eb0b78270f441d7686c0bcf3af250959a212a91340d09f8a032db6d
|
7
|
+
data.tar.gz: 256c0579d4deeb794bae24f6560a216c2953f6ce19f4ab8c94b97a975c19f2949ef696ce8379997fafa068b9888176b0ddeeb1b673b8d5be015c982196422080
|
data/Dockerfile
CHANGED
@@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
|
|
11
11
|
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
12
12
|
|
13
13
|
# Install maltese gem
|
14
|
-
RUN /sbin/setuser app gem install maltese -v 0.8.
|
14
|
+
RUN /sbin/setuser app gem install maltese -v 0.8.14
|
15
15
|
|
16
16
|
CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
maltese (0.8.
|
4
|
+
maltese (0.8.14)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
aws-sdk-s3 (~> 1.19)
|
7
7
|
dotenv (~> 2.1, >= 2.1.1)
|
@@ -21,13 +21,13 @@ GEM
|
|
21
21
|
addressable (2.7.0)
|
22
22
|
public_suffix (>= 2.0.2, < 5.0)
|
23
23
|
aws-eventstream (1.0.3)
|
24
|
-
aws-partitions (1.
|
25
|
-
aws-sdk-core (3.
|
24
|
+
aws-partitions (1.252.0)
|
25
|
+
aws-sdk-core (3.85.0)
|
26
26
|
aws-eventstream (~> 1.0, >= 1.0.2)
|
27
27
|
aws-partitions (~> 1, >= 1.239.0)
|
28
28
|
aws-sigv4 (~> 1.1)
|
29
29
|
jmespath (~> 1.0)
|
30
|
-
aws-sdk-kms (1.
|
30
|
+
aws-sdk-kms (1.27.0)
|
31
31
|
aws-sdk-core (~> 3, >= 3.71.0)
|
32
32
|
aws-sigv4 (~> 1.1)
|
33
33
|
aws-sdk-s3 (1.59.0)
|
data/lib/maltese/sitemap.rb
CHANGED
@@ -90,17 +90,30 @@ module Maltese
|
|
90
90
|
|
91
91
|
def process_data(options = {})
|
92
92
|
options[:start_time] = Time.now
|
93
|
+
link_count = 0
|
94
|
+
error_count = 0
|
93
95
|
|
94
96
|
# walk through paginated results
|
95
97
|
while options[:url] do
|
96
98
|
response = get_data(options[:url])
|
97
|
-
|
98
|
-
|
99
|
+
|
100
|
+
if response.status == 200
|
101
|
+
link_count += parse_data(response)
|
102
|
+
puts "#{link_count} DOIs parsed."
|
103
|
+
options[:url] = response.body.dig("links", "next")
|
104
|
+
else
|
105
|
+
puts "An error occured for URL #{options[:url]}:."
|
106
|
+
puts "Error message: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
|
107
|
+
error_count += 1
|
108
|
+
options[:url] = nil
|
109
|
+
end
|
99
110
|
|
100
111
|
# don't loop when testing
|
101
112
|
break if ENV['RACK'] == "test"
|
102
113
|
end
|
103
114
|
|
115
|
+
return link_count if error_count > 0
|
116
|
+
|
104
117
|
push_data(options)
|
105
118
|
end
|
106
119
|
|
@@ -109,16 +122,10 @@ module Maltese
|
|
109
122
|
end
|
110
123
|
|
111
124
|
def parse_data(result)
|
112
|
-
if result.body.fetch("errors", nil).present?
|
113
|
-
puts "An error occured: #{result.body.fetch("errors").inspect}"
|
114
|
-
return result.body.fetch("errors")
|
115
|
-
end
|
116
|
-
|
117
125
|
result.body.fetch("data", []).each do |item|
|
118
126
|
loc = "/works/" + item.dig("attributes", "doi")
|
119
127
|
sitemap.add loc, changefreq: "monthly", lastmod: item.dig("attributes", "updated")
|
120
128
|
end
|
121
|
-
puts "#{result.body.fetch("data", []).size} DOIs parsed."
|
122
129
|
sitemap.sitemap.link_count
|
123
130
|
end
|
124
131
|
|
data/lib/maltese/version.rb
CHANGED
@@ -0,0 +1,59 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: put
|
5
|
+
uri: https://s3.eu-west-1.amazonaws.com/search.test.datacite.org/sitemaps/sitemap.xml.gz
|
6
|
+
body:
|
7
|
+
encoding: ASCII-8BIT
|
8
|
+
string: !binary |-
|
9
|
+
H4sIAD9W710AA5WSPW+DMBCG/wryWuEP0iEgQ7ZO6dRU6uoaFywZm3JuIP++jiEVrTqQDe6e5+5FHD9MnUnOagDtbIkYpihRVrpa26ZEr6endI8OFf8aDCifBNZCMYEuUet9XxAyjiMed9gNDckoZeTt+fgiW9WJVFvwwkqFksAXEItHJ4WPi1Y6aB9aPcQhMwdkKRKK8+QO9vaMJ6jRHPeuXYtT6E406pfZONcYhaXr/npphAnD7Gafda3cVjvCa9uqEbbKV3adu3Pv2mwOPtMk/vTZ78OXhM7WAQu+njC1vjP/nQfL85zELor3VHHjZHXlIICgxCBb7BV4XAsvZNgQNU6uGDcCfOfqKqMsT1mWMnqi+yJ7LHbsgdKC0sAtCJetsI36GNRnJcwoLsDJqsT7QbtB+0sVUnPy88ZJDEXmU6++AT8Dg9QZAwAA
|
10
|
+
headers:
|
11
|
+
Content-Type:
|
12
|
+
- application/x-gzip
|
13
|
+
Accept-Encoding:
|
14
|
+
- ''
|
15
|
+
User-Agent:
|
16
|
+
- aws-sdk-ruby3/3.85.0 ruby/2.6.3 universal.x86_64-darwin19 aws-sdk-s3/1.59.0
|
17
|
+
X-Amz-Acl:
|
18
|
+
- public-read
|
19
|
+
Cache-Control:
|
20
|
+
- private, max-age=0, no-cache
|
21
|
+
Expect:
|
22
|
+
- 100-continue
|
23
|
+
Content-Md5:
|
24
|
+
- qw5jsYX5Zml0Xk8/J5o/pw==
|
25
|
+
X-Amz-Date:
|
26
|
+
- 20191210T082431Z
|
27
|
+
X-Amz-Content-Sha256:
|
28
|
+
- c1b9ed2122fe6722ea084cc215ee6b1f866e0f1b1f9cbb70b8d9673bf0b17d43
|
29
|
+
Authorization:
|
30
|
+
- AWS4-HMAC-SHA256 Credential=AKIAJAMMCXAR3IXMNCGQ/20191210/eu-west-1/s3/aws4_request,
|
31
|
+
SignedHeaders=cache-control;content-md5;content-type;expect;host;user-agent;x-amz-acl;x-amz-content-sha256;x-amz-date,
|
32
|
+
Signature=7d5f33f12175ee8b8af02cd03e5d61c36ce3898d1a86660ad1d56ced965508ea
|
33
|
+
Content-Length:
|
34
|
+
- '333'
|
35
|
+
Accept:
|
36
|
+
- "*/*"
|
37
|
+
response:
|
38
|
+
status:
|
39
|
+
code: 200
|
40
|
+
message: OK
|
41
|
+
headers:
|
42
|
+
X-Amz-Id-2:
|
43
|
+
- 8ToOQoDXCK0nj5F+9qE7D6Pn4Clf3tfjbW8CpFD/GJlZd13pEktxxKVozf/EQXvigc703t04J0w=
|
44
|
+
X-Amz-Request-Id:
|
45
|
+
- 7041D9503577FFF6
|
46
|
+
Date:
|
47
|
+
- Tue, 10 Dec 2019 08:24:33 GMT
|
48
|
+
Etag:
|
49
|
+
- '"ab0e63b185f96669745e4f3f279a3fa7"'
|
50
|
+
Content-Length:
|
51
|
+
- '0'
|
52
|
+
Server:
|
53
|
+
- AmazonS3
|
54
|
+
body:
|
55
|
+
encoding: UTF-8
|
56
|
+
string: ''
|
57
|
+
http_version:
|
58
|
+
recorded_at: Tue, 10 Dec 2019 08:24:32 GMT
|
59
|
+
recorded_with: VCR 3.0.3
|
data/spec/sitemap_spec.rb
CHANGED
@@ -32,6 +32,14 @@ describe Maltese::Sitemap, vcr: true do
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
context "process_data" do
|
36
|
+
it "should catch timeout errors with the Datacite REST API" do
|
37
|
+
stub = stub_request(:get, subject.get_query_url).to_return(:status => [408])
|
38
|
+
response = subject.process_data(total: 10, url: subject.get_query_url)
|
39
|
+
expect(response).to eq(0)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
35
43
|
context "get_data" do
|
36
44
|
it "should report if there are works returned by the Datacite REST API" do
|
37
45
|
response = subject.get_data(subject.get_query_url)
|
@@ -62,12 +70,6 @@ describe Maltese::Sitemap, vcr: true do
|
|
62
70
|
response = subject.parse_data(result)
|
63
71
|
expect(response).to eq(1001)
|
64
72
|
end
|
65
|
-
|
66
|
-
it "should catch timeout errors with the Datacite REST API" do
|
67
|
-
result = OpenStruct.new(body: { "errors" => [{ "title" => "the server responded with status 408 for https://REST.test.datacite.org", "status" => 408 }] })
|
68
|
-
response = subject.parse_data(result)
|
69
|
-
expect(response).to eq(result.body["errors"])
|
70
|
-
end
|
71
73
|
end
|
72
74
|
|
73
75
|
context "push_data" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: maltese
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Fenner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-12-
|
11
|
+
date: 2019-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: maremma
|
@@ -281,6 +281,7 @@ files:
|
|
281
281
|
- spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml
|
282
282
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
283
283
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml
|
284
|
+
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_timeout_errors_with_the_Datacite_REST_API.yml
|
284
285
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml
|
285
286
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|
286
287
|
- spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml
|