maltese 0.8.14 → 0.8.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Dockerfile +2 -2
- data/Gemfile.lock +3 -2
- data/README.md +1 -1
- data/lib/maltese/cli.rb +4 -0
- data/lib/maltese/sitemap.rb +18 -12
- data/lib/maltese/version.rb +1 -1
- data/maltese.gemspec +1 -0
- data/spec/cli_spec.rb +2 -2
- data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +27 -27
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +6 -6
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml +6 -6
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml +13 -13
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +13 -13
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +27 -27
- data/spec/sitemap_spec.rb +5 -5
- metadata +15 -3
- data/.codeclimate.yml +0 -19
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_catch_timeout_errors_with_the_Datacite_REST_API.yml +0 -59
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47f0ba0813eeed3c56a5636d34acbb9bc3349cef65513b3a834c05dcda19bc08
|
4
|
+
data.tar.gz: 6ecb5ff42d4759e07c7632420aa78a81284a57ebba7aeca9d2747218e0d78297
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05aedc2d77881876ac08bf9f11a2796220619e06e128209f83600758d5c5c5448a47db3b7a25f67870b1ceaba63ea5c840958c2f5fa1ad85666280b285362306
|
7
|
+
data.tar.gz: c15bcefd230f7839556345f455879c702ed25a06368410d9e6e1bc72a86885b4cb06fdb8ddb03f5c3a21be605339834da56c21ef2c91fc39f8588fa45e84e483
|
data/Dockerfile
CHANGED
@@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
|
|
11
11
|
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
12
12
|
|
13
13
|
# Install maltese gem
|
14
|
-
RUN /sbin/setuser app gem install maltese -v 0.8.
|
14
|
+
RUN /sbin/setuser app gem install maltese -v 0.8.15
|
15
15
|
|
16
|
-
CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET
|
16
|
+
CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
maltese (0.8.
|
4
|
+
maltese (0.8.15)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
aws-sdk-s3 (~> 1.19)
|
7
7
|
dotenv (~> 2.1, >= 2.1.1)
|
8
|
+
faraday (= 0.17.0)
|
8
9
|
maremma (~> 4.1)
|
9
10
|
mime-types (~> 3.1)
|
10
11
|
sitemap_generator (~> 6.0)
|
@@ -46,7 +47,7 @@ GEM
|
|
46
47
|
docile (1.1.5)
|
47
48
|
dotenv (2.7.5)
|
48
49
|
excon (0.67.0)
|
49
|
-
faraday (0.17.
|
50
|
+
faraday (0.17.0)
|
50
51
|
multipart-post (>= 1.2, < 3)
|
51
52
|
faraday-encoding (0.0.5)
|
52
53
|
faraday
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
# Maltese
|
6
6
|
|
7
|
-
Ruby gem and command-line tool for generating sitemap files from the DataCite REST API. Uses the [SitemapGenerator](https://github.com/kjvarga/sitemap_generator) gem and can be run as Docker container, e.g. using ECS
|
7
|
+
Ruby gem and command-line tool for generating sitemap files from the DataCite REST API. Uses the [SitemapGenerator](https://github.com/kjvarga/sitemap_generator) gem and can be run as Docker container, e.g. using as a scheduled task in AWS ECS triggered by AWS Cloudwatch Events.
|
8
8
|
|
9
9
|
Run as a command-line tool:
|
10
10
|
|
data/lib/maltese/cli.rb
CHANGED
@@ -19,6 +19,10 @@ module Maltese
|
|
19
19
|
|
20
20
|
desc "sitemap", "generate sitemap for DataCite Search"
|
21
21
|
method_option :sitemap_bucket, type: :string
|
22
|
+
method_option :rack_env, type: :string
|
23
|
+
method_option :access_key, type: :string
|
24
|
+
method_option :secret_key, type: :string
|
25
|
+
method_option :region, type: :string
|
22
26
|
def sitemap
|
23
27
|
sitemap = Maltese::Sitemap.new(options)
|
24
28
|
sitemap.queue_jobs
|
data/lib/maltese/sitemap.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Maltese
|
2
2
|
class Sitemap
|
3
|
-
attr_reader :sitemap_bucket
|
3
|
+
attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region
|
4
4
|
|
5
5
|
# load ENV variables from .env file if it exists
|
6
6
|
env_file = File.expand_path("../../../.env", __FILE__)
|
@@ -19,10 +19,14 @@ module Maltese
|
|
19
19
|
|
20
20
|
def initialize(attributes={})
|
21
21
|
@sitemap_bucket = attributes[:sitemap_bucket].presence || "search.test.datacite.org"
|
22
|
+
@rack_env = attributes[:rack_env].presence || ENV['RACK_ENV'] || "stage"
|
23
|
+
@access_key = attributes[:access_key].presence || ENV['AWS_ACCESS_KEY_ID']
|
24
|
+
@secret_key = attributes[:secret_key].presence || ENV['AWS_SECRET_ACCESS_KEY']
|
25
|
+
@region = attributes[:region].presence || ENV['AWS_REGION']
|
22
26
|
end
|
23
27
|
|
24
28
|
def sitemap_url
|
25
|
-
|
29
|
+
rack_env == "production" ? "https://search.datacite.org/" : "https://search.test.datacite.org/"
|
26
30
|
end
|
27
31
|
|
28
32
|
def sitemaps_path
|
@@ -30,7 +34,7 @@ module Maltese
|
|
30
34
|
end
|
31
35
|
|
32
36
|
def search_path
|
33
|
-
|
37
|
+
rack_env == "production" ? "https://api.datacite.org/dois?" : "https://api.test.datacite.org/dois?"
|
34
38
|
end
|
35
39
|
|
36
40
|
def timeout
|
@@ -52,22 +56,24 @@ module Maltese
|
|
52
56
|
|
53
57
|
def s3_adapter
|
54
58
|
SitemapGenerator::AwsSdkAdapter.new(sitemap_bucket,
|
55
|
-
aws_access_key_id:
|
56
|
-
aws_secret_access_key:
|
57
|
-
aws_region:
|
59
|
+
aws_access_key_id: access_key,
|
60
|
+
aws_secret_access_key: secret_key,
|
61
|
+
aws_region: region)
|
58
62
|
end
|
59
63
|
|
60
64
|
def queue_jobs(options={})
|
61
65
|
total = get_total(options)
|
62
66
|
|
63
|
-
if total
|
64
|
-
puts
|
67
|
+
if total.nil?
|
68
|
+
puts "An error occured."
|
69
|
+
elsif total > 0
|
70
|
+
process_data(options.merge(total: total, url: get_query_url))
|
65
71
|
else
|
66
72
|
puts "No works found."
|
67
73
|
end
|
68
74
|
|
69
75
|
# return number of works queued
|
70
|
-
total
|
76
|
+
total.to_i
|
71
77
|
end
|
72
78
|
|
73
79
|
def get_total(options={})
|
@@ -102,14 +108,14 @@ module Maltese
|
|
102
108
|
puts "#{link_count} DOIs parsed."
|
103
109
|
options[:url] = response.body.dig("links", "next")
|
104
110
|
else
|
105
|
-
puts "An error occured for URL #{options[:url]}
|
111
|
+
puts "An error occured for URL #{options[:url]}."
|
106
112
|
puts "Error message: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
|
107
113
|
error_count += 1
|
108
114
|
options[:url] = nil
|
109
115
|
end
|
110
116
|
|
111
117
|
# don't loop when testing
|
112
|
-
break if
|
118
|
+
break if rack_env == "test"
|
113
119
|
end
|
114
120
|
|
115
121
|
return link_count if error_count > 0
|
@@ -122,7 +128,7 @@ module Maltese
|
|
122
128
|
end
|
123
129
|
|
124
130
|
def parse_data(result)
|
125
|
-
result.body.fetch("data",
|
131
|
+
Array.wrap(result.body.fetch("data", nil)).each do |item|
|
126
132
|
loc = "/works/" + item.dig("attributes", "doi")
|
127
133
|
sitemap.add loc, changefreq: "monthly", lastmod: item.dig("attributes", "updated")
|
128
134
|
end
|
data/lib/maltese/version.rb
CHANGED
data/maltese.gemspec
CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |s|
|
|
16
16
|
|
17
17
|
# Declary dependencies here, rather than in the Gemfile
|
18
18
|
s.add_dependency 'maremma', '~> 4.1'
|
19
|
+
s.add_dependency 'faraday', '0.17.0'
|
19
20
|
s.add_dependency 'activesupport', '>= 4.2.5', '< 6'
|
20
21
|
s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
|
21
22
|
s.add_dependency 'thor', '~> 0.19'
|
data/spec/cli_spec.rb
CHANGED
@@ -7,7 +7,7 @@ describe Maltese::CLI do
|
|
7
7
|
end
|
8
8
|
|
9
9
|
let(:sitemap_bucket) { "search.test.datacite.org" }
|
10
|
-
let(:cli_options) { { sitemap_bucket: sitemap_bucket } }
|
10
|
+
let(:cli_options) { { sitemap_bucket: sitemap_bucket, rack_env: "test" } }
|
11
11
|
|
12
12
|
describe "sitemap", vcr: true, :order => :defined do
|
13
13
|
it 'should succeed' do
|
@@ -16,7 +16,7 @@ describe Maltese::CLI do
|
|
16
16
|
sitemap = Zlib::GzipReader.open("public/sitemaps/sitemap.xml.gz") { |gz| gz.read }
|
17
17
|
doc = Nokogiri::XML(sitemap)
|
18
18
|
expect(doc.xpath("//xmlns:url").size).to eq(1001)
|
19
|
-
expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10.
|
19
|
+
expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10.1007/s10620-013-2631-z")
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|