maltese 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Dockerfile +2 -2
 - data/Gemfile.lock +4 -2
 - data/lib/maltese/cli.rb +1 -0
 - data/lib/maltese/sitemap.rb +46 -3
 - data/lib/maltese/version.rb +1 -1
 - data/maltese.gemspec +1 -0
 - data/spec/cli_spec.rb +1 -1
 - data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +79 -24
 - data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_bad_request_errors_with_the_Datacite_REST_API.yml +65 -10
 - data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_handle_timeout_errors_with_the_Datacite_REST_API.yml +65 -10
 - data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/process_data/should_retry_2_times_for_bad_request_errors_with_the_Datacite_REST_API.yml +58 -0
 - data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml +66 -11
 - data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +67 -12
 - data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +79 -24
 - data/spec/sitemap_spec.rb +1 -1
 - metadata +16 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: fe93930c90b5ddc8d0b3b89de7e18b7644356b64f82c76451b16cf9395e94269
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 208578435824ca3f9a9cf2582e1dd73f6df8e018319494469c2a1f2408d9a3ed
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 6dbeb4a4e96a050c987746be6a414dfe53428ed7b30ad211dee2a11df7052ce995f1f07841ce0cc32c1cd9d8904b4197c049b9ba3da7614659fc75adfa55f313
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 8dea3a1fb90e4d9cd94cfa85bb2b4e1263b0635ad3010c937d311ab4519da49c8759a8679edf1313305f67843428be780b9db991ae06ac97e8c4878d4ae5f74b
         
     | 
    
        data/Dockerfile
    CHANGED
    
    | 
         @@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" && 
     | 
|
| 
       11 
11 
     | 
    
         
             
                apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
         
     | 
| 
       12 
12 
     | 
    
         | 
| 
       13 
13 
     | 
    
         
             
            # Install maltese gem
         
     | 
| 
       14 
     | 
    
         
            -
            RUN /sbin/setuser app gem install maltese -v 0.9. 
     | 
| 
      
 14 
     | 
    
         
            +
            RUN /sbin/setuser app gem install maltese -v 0.9.3
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
       16 
     | 
    
         
            -
            CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION
         
     | 
| 
      
 16 
     | 
    
         
            +
            CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION --slack_webhook_url $SLACK_WEBHOOK_URL
         
     | 
    
        data/Gemfile.lock
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            PATH
         
     | 
| 
       2 
2 
     | 
    
         
             
              remote: .
         
     | 
| 
       3 
3 
     | 
    
         
             
              specs:
         
     | 
| 
       4 
     | 
    
         
            -
                maltese (0.9. 
     | 
| 
      
 4 
     | 
    
         
            +
                maltese (0.9.3)
         
     | 
| 
       5 
5 
     | 
    
         
             
                  activesupport (>= 4.2.5, < 6)
         
     | 
| 
       6 
6 
     | 
    
         
             
                  aws-sdk-s3 (~> 1.19)
         
     | 
| 
       7 
7 
     | 
    
         
             
                  dotenv (~> 2.1, >= 2.1.1)
         
     | 
| 
         @@ -11,6 +11,7 @@ PATH 
     | 
|
| 
       11 
11 
     | 
    
         
             
                  mime-types (~> 3.1)
         
     | 
| 
       12 
12 
     | 
    
         
             
                  retriable (~> 3.1)
         
     | 
| 
       13 
13 
     | 
    
         
             
                  sitemap_generator (~> 6.0)
         
     | 
| 
      
 14 
     | 
    
         
            +
                  slack-notifier (~> 2.1)
         
     | 
| 
       14 
15 
     | 
    
         
             
                  thor (~> 0.19)
         
     | 
| 
       15 
16 
     | 
    
         | 
| 
       16 
17 
     | 
    
         
             
            GEM
         
     | 
| 
         @@ -59,7 +60,7 @@ GEM 
     | 
|
| 
       59 
60 
     | 
    
         
             
                i18n (1.7.0)
         
     | 
| 
       60 
61 
     | 
    
         
             
                  concurrent-ruby (~> 1.0)
         
     | 
| 
       61 
62 
     | 
    
         
             
                jmespath (1.4.0)
         
     | 
| 
       62 
     | 
    
         
            -
                json (2. 
     | 
| 
      
 63 
     | 
    
         
            +
                json (2.3.0)
         
     | 
| 
       63 
64 
     | 
    
         
             
                logstash-event (1.2.02)
         
     | 
| 
       64 
65 
     | 
    
         
             
                logstash-logger (0.26.1)
         
     | 
| 
       65 
66 
     | 
    
         
             
                  logstash-event (~> 1.2)
         
     | 
| 
         @@ -111,6 +112,7 @@ GEM 
     | 
|
| 
       111 
112 
     | 
    
         
             
                simplecov-html (0.10.2)
         
     | 
| 
       112 
113 
     | 
    
         
             
                sitemap_generator (6.0.2)
         
     | 
| 
       113 
114 
     | 
    
         
             
                  builder (~> 3.0)
         
     | 
| 
      
 115 
     | 
    
         
            +
                slack-notifier (2.3.2)
         
     | 
| 
       114 
116 
     | 
    
         
             
                thor (0.20.3)
         
     | 
| 
       115 
117 
     | 
    
         
             
                thread_safe (0.3.6)
         
     | 
| 
       116 
118 
     | 
    
         
             
                tzinfo (1.2.5)
         
     | 
    
        data/lib/maltese/cli.rb
    CHANGED
    
    | 
         @@ -23,6 +23,7 @@ module Maltese 
     | 
|
| 
       23 
23 
     | 
    
         
             
                method_option :access_key, type: :string
         
     | 
| 
       24 
24 
     | 
    
         
             
                method_option :secret_key, type: :string
         
     | 
| 
       25 
25 
     | 
    
         
             
                method_option :region, type: :string
         
     | 
| 
      
 26 
     | 
    
         
            +
                method_option :slack_webhook_url, type: :string
         
     | 
| 
       26 
27 
     | 
    
         
             
                def sitemap
         
     | 
| 
       27 
28 
     | 
    
         
             
                  sitemap = Maltese::Sitemap.new(options)
         
     | 
| 
       28 
29 
     | 
    
         
             
                  sitemap.queue_jobs
         
     | 
    
        data/lib/maltese/sitemap.rb
    CHANGED
    
    | 
         @@ -1,11 +1,12 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'logstash-logger'
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'retriable'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'slack-notifier'
         
     | 
| 
       3 
4 
     | 
    
         | 
| 
       4 
5 
     | 
    
         
             
            module Maltese
         
     | 
| 
       5 
6 
     | 
    
         
             
              class ::BadGatewayError < StandardError; end
         
     | 
| 
       6 
7 
     | 
    
         | 
| 
       7 
8 
     | 
    
         
             
              class Sitemap
         
     | 
| 
       8 
     | 
    
         
            -
                attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region, :logger
         
     | 
| 
      
 9 
     | 
    
         
            +
                attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region, :slack_webhook_url, :logger
         
     | 
| 
       9 
10 
     | 
    
         | 
| 
       10 
11 
     | 
    
         
             
                # load ENV variables from .env file if it exists
         
     | 
| 
       11 
12 
     | 
    
         
             
                env_file = File.expand_path("../../../.env", __FILE__)
         
     | 
| 
         @@ -22,12 +23,16 @@ module Maltese 
     | 
|
| 
       22 
23 
     | 
    
         
             
                  env_vars.each { |k, v| ENV[k] = v }
         
     | 
| 
       23 
24 
     | 
    
         
             
                end
         
     | 
| 
       24 
25 
     | 
    
         | 
| 
      
 26 
     | 
    
         
            +
                # icon for Slack messages
         
     | 
| 
      
 27 
     | 
    
         
            +
                SLACK_ICON_URL = "https://github.com/datacite/segugio/blob/master/source/images/fabrica.png"
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
       25 
29 
     | 
    
         
             
                def initialize(attributes={})
         
     | 
| 
       26 
30 
     | 
    
         
             
                  @sitemap_bucket = attributes[:sitemap_bucket].presence || "search.test.datacite.org"
         
     | 
| 
       27 
31 
     | 
    
         
             
                  @rack_env = attributes[:rack_env].presence || ENV['RACK_ENV'] || "stage"
         
     | 
| 
       28 
32 
     | 
    
         
             
                  @access_key = attributes[:access_key].presence || ENV['AWS_ACCESS_KEY_ID']
         
     | 
| 
       29 
33 
     | 
    
         
             
                  @secret_key = attributes[:secret_key].presence || ENV['AWS_SECRET_ACCESS_KEY']
         
     | 
| 
       30 
34 
     | 
    
         
             
                  @region = attributes[:region].presence || ENV['AWS_REGION']
         
     | 
| 
      
 35 
     | 
    
         
            +
                  @slack_webhook_url = attributes[:slack_webhook_url].presence || ENV['SLACK_WEBHOOK_URL']
         
     | 
| 
       31 
36 
     | 
    
         | 
| 
       32 
37 
     | 
    
         
             
                  @logger = LogStashLogger.new(type: :stdout)
         
     | 
| 
       33 
38 
     | 
    
         
             
                end
         
     | 
| 
         @@ -36,6 +41,10 @@ module Maltese 
     | 
|
| 
       36 
41 
     | 
    
         
             
                  rack_env == "production" ? "https://search.datacite.org/" : "https://search.test.datacite.org/"
         
     | 
| 
       37 
42 
     | 
    
         
             
                end
         
     | 
| 
       38 
43 
     | 
    
         | 
| 
      
 44 
     | 
    
         
            +
                def slack_title
         
     | 
| 
      
 45 
     | 
    
         
            +
                  rack_env == "production" ? "DataCite Fabrica" : "DataCite Fabrica Test"
         
     | 
| 
      
 46 
     | 
    
         
            +
                end
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
       39 
48 
     | 
    
         
             
                def sitemaps_path
         
     | 
| 
       40 
49 
     | 
    
         
             
                  "sitemaps/"
         
     | 
| 
       41 
50 
     | 
    
         
             
                end
         
     | 
| 
         @@ -130,8 +139,13 @@ module Maltese 
     | 
|
| 
       130 
139 
     | 
    
         
             
                        options[:url] = nil
         
     | 
| 
       131 
140 
     | 
    
         
             
                      end
         
     | 
| 
       132 
141 
     | 
    
         
             
                    rescue => exception
         
     | 
| 
       133 
     | 
    
         
            -
                      logger.error "Error: #{exception.message} 
     | 
| 
      
 142 
     | 
    
         
            +
                      logger.error "Error: #{exception.message}"
         
     | 
| 
       134 
143 
     | 
    
         
             
                      error_count += 1
         
     | 
| 
      
 144 
     | 
    
         
            +
                      fields = [
         
     | 
| 
      
 145 
     | 
    
         
            +
                        { title: "Error", value: exception.message },
         
     | 
| 
      
 146 
     | 
    
         
            +
                        { title: "Time Taken", value: "#{((Time.now - options[:start_time])/ 60.0).ceil} min", short: true }
         
     | 
| 
      
 147 
     | 
    
         
            +
                      ]
         
     | 
| 
      
 148 
     | 
    
         
            +
                      send_notification_to_slack(nil, title: slack_title + ": Sitemaps Not Updated", level: "danger", fields: fields) unless rack_env == "test"
         
     | 
| 
       135 
149 
     | 
    
         
             
                      options[:url] = nil
         
     | 
| 
       136 
150 
     | 
    
         
             
                    ensure
         
     | 
| 
       137 
151 
     | 
    
         
             
                      # don't loop when testing
         
     | 
| 
         @@ -151,7 +165,7 @@ module Maltese 
     | 
|
| 
       151 
165 
     | 
    
         
             
                def parse_data(result)
         
     | 
| 
       152 
166 
     | 
    
         
             
                  Array.wrap(result.body.fetch("data", nil)).each do |item|
         
     | 
| 
       153 
167 
     | 
    
         
             
                    loc = "/works/" + item.dig("attributes", "doi")
         
     | 
| 
       154 
     | 
    
         
            -
                    sitemap.add loc, changefreq: " 
     | 
| 
      
 168 
     | 
    
         
            +
                    sitemap.add loc, changefreq: "weekly", lastmod: item.dig("attributes", "updated")
         
     | 
| 
       155 
169 
     | 
    
         
             
                  end
         
     | 
| 
       156 
170 
     | 
    
         
             
                  sitemap.sitemap.link_count
         
     | 
| 
       157 
171 
     | 
    
         
             
                end
         
     | 
| 
         @@ -160,7 +174,36 @@ module Maltese 
     | 
|
| 
       160 
174 
     | 
    
         
             
                  sitemap.finalize!
         
     | 
| 
       161 
175 
     | 
    
         
             
                  options[:start_time] ||= Time.now
         
     | 
| 
       162 
176 
     | 
    
         
             
                  sitemap.sitemap_index.stats_summary(:time_taken => Time.now - options[:start_time])
         
     | 
| 
      
 177 
     | 
    
         
            +
                  
         
     | 
| 
      
 178 
     | 
    
         
            +
                  fields = [
         
     | 
| 
      
 179 
     | 
    
         
            +
                    { title: "URL", value: "#{sitemap_url}sitemaps/sitemap.xml.gz" },
         
     | 
| 
      
 180 
     | 
    
         
            +
                    { title: "Number of DOIs", value: format_number(sitemap.sitemap.link_count), short: true },
         
     | 
| 
      
 181 
     | 
    
         
            +
                    { title: "Time Taken", value: "#{((Time.now - options[:start_time])/ 60.0).ceil} min", short: true }
         
     | 
| 
      
 182 
     | 
    
         
            +
                  ]
         
     | 
| 
      
 183 
     | 
    
         
            +
                  send_notification_to_slack(nil, title: slack_title + ": Sitemaps Updated", level: "good", fields: fields) unless rack_env == "test"
         
     | 
| 
       163 
184 
     | 
    
         
             
                  sitemap.sitemap.link_count
         
     | 
| 
       164 
185 
     | 
    
         
             
                end
         
     | 
| 
      
 186 
     | 
    
         
            +
             
     | 
| 
      
 187 
     | 
    
         
            +
                def send_notification_to_slack(text, options={})
         
     | 
| 
      
 188 
     | 
    
         
            +
                  return nil unless slack_webhook_url.present?
         
     | 
| 
      
 189 
     | 
    
         
            +
             
     | 
| 
      
 190 
     | 
    
         
            +
                  attachment = {
         
     | 
| 
      
 191 
     | 
    
         
            +
                    title: options[:title] || "Fabrica Message",
         
     | 
| 
      
 192 
     | 
    
         
            +
                    text: text,
         
     | 
| 
      
 193 
     | 
    
         
            +
                    color: options[:level] || "good",
         
     | 
| 
      
 194 
     | 
    
         
            +
                    fields: options[:fields]
         
     | 
| 
      
 195 
     | 
    
         
            +
                  }.compact
         
     | 
| 
      
 196 
     | 
    
         
            +
             
     | 
| 
      
 197 
     | 
    
         
            +
                  notifier = Slack::Notifier.new slack_webhook_url,
         
     | 
| 
      
 198 
     | 
    
         
            +
                                                 username: "Fabrica",
         
     | 
| 
      
 199 
     | 
    
         
            +
                                                 icon_url: SLACK_ICON_URL
         
     | 
| 
      
 200 
     | 
    
         
            +
                  response = notifier.ping attachments: [attachment]
         
     | 
| 
      
 201 
     | 
    
         
            +
                  response.first.body
         
     | 
| 
      
 202 
     | 
    
         
            +
                end
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
                # from https://codereview.stackexchange.com/questions/28054/separate-numbers-with-commas
         
     | 
| 
      
 205 
     | 
    
         
            +
                def format_number(number)
         
     | 
| 
      
 206 
     | 
    
         
            +
                  number.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
         
     | 
| 
      
 207 
     | 
    
         
            +
                end
         
     | 
| 
       165 
208 
     | 
    
         
             
              end
         
     | 
| 
       166 
209 
     | 
    
         
             
            end
         
     | 
    
        data/lib/maltese/version.rb
    CHANGED
    
    
    
        data/maltese.gemspec
    CHANGED
    
    | 
         @@ -20,6 +20,7 @@ Gem::Specification.new do |s| 
     | 
|
| 
       20 
20 
     | 
    
         
             
              s.add_dependency 'logstash-logger', '~> 0.26.1'
         
     | 
| 
       21 
21 
     | 
    
         
             
              s.add_dependency 'activesupport', '>= 4.2.5', '< 6'
         
     | 
| 
       22 
22 
     | 
    
         
             
              s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
         
     | 
| 
      
 23 
     | 
    
         
            +
              s.add_dependency 'slack-notifier', '~> 2.1'
         
     | 
| 
       23 
24 
     | 
    
         
             
              s.add_dependency 'thor', '~> 0.19'
         
     | 
| 
       24 
25 
     | 
    
         
             
              s.add_dependency 'retriable', '~> 3.1'
         
     | 
| 
       25 
26 
     | 
    
         
             
              s.add_dependency 'sitemap_generator', '~> 6.0'
         
     | 
    
        data/spec/cli_spec.rb
    CHANGED
    
    | 
         @@ -16,7 +16,7 @@ describe Maltese::CLI do 
     | 
|
| 
       16 
16 
     | 
    
         
             
                  sitemap = Zlib::GzipReader.open("public/sitemaps/sitemap.xml.gz") { |gz| gz.read }
         
     | 
| 
       17 
17 
     | 
    
         
             
                  doc = Nokogiri::XML(sitemap)
         
     | 
| 
       18 
18 
     | 
    
         
             
                  expect(doc.xpath("//xmlns:url").size).to eq(1001)
         
     | 
| 
       19 
     | 
    
         
            -
                  expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10. 
     | 
| 
      
 19 
     | 
    
         
            +
                  expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10.1080/19393210.2019.1576774")
         
     | 
| 
       20 
20 
     | 
    
         
             
                end
         
     | 
| 
       21 
21 
     | 
    
         
             
              end
         
     | 
| 
       22 
22 
     | 
    
         
             
            end
         
     |