maltese 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Dockerfile +2 -2
- data/Gemfile.lock +4 -2
- data/lib/maltese/cli.rb +1 -0
- data/lib/maltese/sitemap.rb +46 -3
- data/lib/maltese/version.rb +1 -1
- data/maltese.gemspec +1 -0
- data/spec/cli_spec.rb +1 -1
- data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +79 -24
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml +65 -10
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml +65 -10
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_retry_2_times_for_bad_request_errors_with_the_Datacite_REST_API.yml +58 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml +66 -11
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +67 -12
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +79 -24
- data/spec/sitemap_spec.rb +1 -1
- metadata +16 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe93930c90b5ddc8d0b3b89de7e18b7644356b64f82c76451b16cf9395e94269
|
4
|
+
data.tar.gz: 208578435824ca3f9a9cf2582e1dd73f6df8e018319494469c2a1f2408d9a3ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6dbeb4a4e96a050c987746be6a414dfe53428ed7b30ad211dee2a11df7052ce995f1f07841ce0cc32c1cd9d8904b4197c049b9ba3da7614659fc75adfa55f313
|
7
|
+
data.tar.gz: 8dea3a1fb90e4d9cd94cfa85bb2b4e1263b0635ad3010c937d311ab4519da49c8759a8679edf1313305f67843428be780b9db991ae06ac97e8c4878d4ae5f74b
|
data/Dockerfile
CHANGED
@@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
|
|
11
11
|
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
12
12
|
|
13
13
|
# Install maltese gem
|
14
|
-
RUN /sbin/setuser app gem install maltese -v 0.9.
|
14
|
+
RUN /sbin/setuser app gem install maltese -v 0.9.3
|
15
15
|
|
16
|
-
CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION
|
16
|
+
CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION --slack_webhook_url $SLACK_WEBHOOK_URL
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
maltese (0.9.
|
4
|
+
maltese (0.9.3)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
aws-sdk-s3 (~> 1.19)
|
7
7
|
dotenv (~> 2.1, >= 2.1.1)
|
@@ -11,6 +11,7 @@ PATH
|
|
11
11
|
mime-types (~> 3.1)
|
12
12
|
retriable (~> 3.1)
|
13
13
|
sitemap_generator (~> 6.0)
|
14
|
+
slack-notifier (~> 2.1)
|
14
15
|
thor (~> 0.19)
|
15
16
|
|
16
17
|
GEM
|
@@ -59,7 +60,7 @@ GEM
|
|
59
60
|
i18n (1.7.0)
|
60
61
|
concurrent-ruby (~> 1.0)
|
61
62
|
jmespath (1.4.0)
|
62
|
-
json (2.
|
63
|
+
json (2.3.0)
|
63
64
|
logstash-event (1.2.02)
|
64
65
|
logstash-logger (0.26.1)
|
65
66
|
logstash-event (~> 1.2)
|
@@ -111,6 +112,7 @@ GEM
|
|
111
112
|
simplecov-html (0.10.2)
|
112
113
|
sitemap_generator (6.0.2)
|
113
114
|
builder (~> 3.0)
|
115
|
+
slack-notifier (2.3.2)
|
114
116
|
thor (0.20.3)
|
115
117
|
thread_safe (0.3.6)
|
116
118
|
tzinfo (1.2.5)
|
data/lib/maltese/cli.rb
CHANGED
@@ -23,6 +23,7 @@ module Maltese
|
|
23
23
|
method_option :access_key, type: :string
|
24
24
|
method_option :secret_key, type: :string
|
25
25
|
method_option :region, type: :string
|
26
|
+
method_option :slack_webhook_url, type: :string
|
26
27
|
def sitemap
|
27
28
|
sitemap = Maltese::Sitemap.new(options)
|
28
29
|
sitemap.queue_jobs
|
data/lib/maltese/sitemap.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
require 'logstash-logger'
|
2
2
|
require 'retriable'
|
3
|
+
require 'slack-notifier'
|
3
4
|
|
4
5
|
module Maltese
|
5
6
|
class ::BadGatewayError < StandardError; end
|
6
7
|
|
7
8
|
class Sitemap
|
8
|
-
attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region, :logger
|
9
|
+
attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region, :slack_webhook_url, :logger
|
9
10
|
|
10
11
|
# load ENV variables from .env file if it exists
|
11
12
|
env_file = File.expand_path("../../../.env", __FILE__)
|
@@ -22,12 +23,16 @@ module Maltese
|
|
22
23
|
env_vars.each { |k, v| ENV[k] = v }
|
23
24
|
end
|
24
25
|
|
26
|
+
# icon for Slack messages
|
27
|
+
SLACK_ICON_URL = "https://github.com/datacite/segugio/blob/master/source/images/fabrica.png"
|
28
|
+
|
25
29
|
def initialize(attributes={})
|
26
30
|
@sitemap_bucket = attributes[:sitemap_bucket].presence || "search.test.datacite.org"
|
27
31
|
@rack_env = attributes[:rack_env].presence || ENV['RACK_ENV'] || "stage"
|
28
32
|
@access_key = attributes[:access_key].presence || ENV['AWS_ACCESS_KEY_ID']
|
29
33
|
@secret_key = attributes[:secret_key].presence || ENV['AWS_SECRET_ACCESS_KEY']
|
30
34
|
@region = attributes[:region].presence || ENV['AWS_REGION']
|
35
|
+
@slack_webhook_url = attributes[:slack_webhook_url].presence || ENV['SLACK_WEBHOOK_URL']
|
31
36
|
|
32
37
|
@logger = LogStashLogger.new(type: :stdout)
|
33
38
|
end
|
@@ -36,6 +41,10 @@ module Maltese
|
|
36
41
|
rack_env == "production" ? "https://search.datacite.org/" : "https://search.test.datacite.org/"
|
37
42
|
end
|
38
43
|
|
44
|
+
def slack_title
|
45
|
+
rack_env == "production" ? "DataCite Fabrica" : "DataCite Fabrica Test"
|
46
|
+
end
|
47
|
+
|
39
48
|
def sitemaps_path
|
40
49
|
"sitemaps/"
|
41
50
|
end
|
@@ -130,8 +139,13 @@ module Maltese
|
|
130
139
|
options[:url] = nil
|
131
140
|
end
|
132
141
|
rescue => exception
|
133
|
-
logger.error "Error: #{exception.message}
|
142
|
+
logger.error "Error: #{exception.message}"
|
134
143
|
error_count += 1
|
144
|
+
fields = [
|
145
|
+
{ title: "Error", value: exception.message },
|
146
|
+
{ title: "Time Taken", value: "#{((Time.now - options[:start_time])/ 60.0).ceil} min", short: true }
|
147
|
+
]
|
148
|
+
send_notification_to_slack(nil, title: slack_title + ": Sitemaps Not Updated", level: "danger", fields: fields) unless rack_env == "test"
|
135
149
|
options[:url] = nil
|
136
150
|
ensure
|
137
151
|
# don't loop when testing
|
@@ -151,7 +165,7 @@ module Maltese
|
|
151
165
|
def parse_data(result)
|
152
166
|
Array.wrap(result.body.fetch("data", nil)).each do |item|
|
153
167
|
loc = "/works/" + item.dig("attributes", "doi")
|
154
|
-
sitemap.add loc, changefreq: "
|
168
|
+
sitemap.add loc, changefreq: "weekly", lastmod: item.dig("attributes", "updated")
|
155
169
|
end
|
156
170
|
sitemap.sitemap.link_count
|
157
171
|
end
|
@@ -160,7 +174,36 @@ module Maltese
|
|
160
174
|
sitemap.finalize!
|
161
175
|
options[:start_time] ||= Time.now
|
162
176
|
sitemap.sitemap_index.stats_summary(:time_taken => Time.now - options[:start_time])
|
177
|
+
|
178
|
+
fields = [
|
179
|
+
{ title: "URL", value: "#{sitemap_url}sitemaps/sitemap.xml.gz" },
|
180
|
+
{ title: "Number of DOIs", value: format_number(sitemap.sitemap.link_count), short: true },
|
181
|
+
{ title: "Time Taken", value: "#{((Time.now - options[:start_time])/ 60.0).ceil} min", short: true }
|
182
|
+
]
|
183
|
+
send_notification_to_slack(nil, title: slack_title + ": Sitemaps Updated", level: "good", fields: fields) unless rack_env == "test"
|
163
184
|
sitemap.sitemap.link_count
|
164
185
|
end
|
186
|
+
|
187
|
+
def send_notification_to_slack(text, options={})
|
188
|
+
return nil unless slack_webhook_url.present?
|
189
|
+
|
190
|
+
attachment = {
|
191
|
+
title: options[:title] || "Fabrica Message",
|
192
|
+
text: text,
|
193
|
+
color: options[:level] || "good",
|
194
|
+
fields: options[:fields]
|
195
|
+
}.compact
|
196
|
+
|
197
|
+
notifier = Slack::Notifier.new slack_webhook_url,
|
198
|
+
username: "Fabrica",
|
199
|
+
icon_url: SLACK_ICON_URL
|
200
|
+
response = notifier.ping attachments: [attachment]
|
201
|
+
response.first.body
|
202
|
+
end
|
203
|
+
|
204
|
+
# from https://codereview.stackexchange.com/questions/28054/separate-numbers-with-commas
|
205
|
+
def format_number(number)
|
206
|
+
number.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
207
|
+
end
|
165
208
|
end
|
166
209
|
end
|
data/lib/maltese/version.rb
CHANGED
data/maltese.gemspec
CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.add_dependency 'logstash-logger', '~> 0.26.1'
|
21
21
|
s.add_dependency 'activesupport', '>= 4.2.5', '< 6'
|
22
22
|
s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
|
23
|
+
s.add_dependency 'slack-notifier', '~> 2.1'
|
23
24
|
s.add_dependency 'thor', '~> 0.19'
|
24
25
|
s.add_dependency 'retriable', '~> 3.1'
|
25
26
|
s.add_dependency 'sitemap_generator', '~> 6.0'
|
data/spec/cli_spec.rb
CHANGED
@@ -16,7 +16,7 @@ describe Maltese::CLI do
|
|
16
16
|
sitemap = Zlib::GzipReader.open("public/sitemaps/sitemap.xml.gz") { |gz| gz.read }
|
17
17
|
doc = Nokogiri::XML(sitemap)
|
18
18
|
expect(doc.xpath("//xmlns:url").size).to eq(1001)
|
19
|
-
expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10.
|
19
|
+
expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10.1080/19393210.2019.1576774")
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|