maltese 0.8.14 → 0.8.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3975693a1ddab582888bf8e4628be0e0b1127a4de475afc0f4126fda14cef80e
4
- data.tar.gz: f0f57a6a433a36414a853e6e42735577b7e9a40dcd34b9931cd6efb9cd1d664c
3
+ metadata.gz: 47f0ba0813eeed3c56a5636d34acbb9bc3349cef65513b3a834c05dcda19bc08
4
+ data.tar.gz: 6ecb5ff42d4759e07c7632420aa78a81284a57ebba7aeca9d2747218e0d78297
5
5
  SHA512:
6
- metadata.gz: a401e5b79ca1f326adb521701a1d355f87f56ef1d7c42122ee0093d3cf2af39597d8b7720eb0b78270f441d7686c0bcf3af250959a212a91340d09f8a032db6d
7
- data.tar.gz: 256c0579d4deeb794bae24f6560a216c2953f6ce19f4ab8c94b97a975c19f2949ef696ce8379997fafa068b9888176b0ddeeb1b673b8d5be015c982196422080
6
+ metadata.gz: 05aedc2d77881876ac08bf9f11a2796220619e06e128209f83600758d5c5c5448a47db3b7a25f67870b1ceaba63ea5c840958c2f5fa1ad85666280b285362306
7
+ data.tar.gz: c15bcefd230f7839556345f455879c702ed25a06368410d9e6e1bc72a86885b4cb06fdb8ddb03f5c3a21be605339834da56c21ef2c91fc39f8588fa45e84e483
data/Dockerfile CHANGED
@@ -11,6 +11,6 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
11
11
  apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
12
12
 
13
13
  # Install maltese gem
14
- RUN /sbin/setuser app gem install maltese -v 0.8.14
14
+ RUN /sbin/setuser app gem install maltese -v 0.8.15
15
15
 
16
- CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET
16
+ CMD maltese sitemap --sitemap_bucket $SITEMAP_BUCKET --rack_env $RACK_ENV --access_key $AWS_ACCESS_KEY_ID --secret_key $AWS_SECRET_ACCESS_KEY --region $AWS_REGION
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- maltese (0.8.14)
4
+ maltese (0.8.15)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  aws-sdk-s3 (~> 1.19)
7
7
  dotenv (~> 2.1, >= 2.1.1)
8
+ faraday (= 0.17.0)
8
9
  maremma (~> 4.1)
9
10
  mime-types (~> 3.1)
10
11
  sitemap_generator (~> 6.0)
@@ -46,7 +47,7 @@ GEM
46
47
  docile (1.1.5)
47
48
  dotenv (2.7.5)
48
49
  excon (0.67.0)
49
- faraday (0.17.1)
50
+ faraday (0.17.0)
50
51
  multipart-post (>= 1.2, < 3)
51
52
  faraday-encoding (0.0.5)
52
53
  faraday
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  # Maltese
6
6
 
7
- Ruby gem and command-line tool for generating sitemap files from the DataCite REST API. Uses the [SitemapGenerator](https://github.com/kjvarga/sitemap_generator) gem and can be run as Docker container, e.g. using ECS and triggered by AWS Lambda, as described [here](https://medium.com/@pahud/ecs-task-runner-with-lambda-4594b72ccb#.5xpmf2inz).
7
+ Ruby gem and command-line tool for generating sitemap files from the DataCite REST API. Uses the [SitemapGenerator](https://github.com/kjvarga/sitemap_generator) gem and can be run as Docker container, e.g. using as a scheduled task in AWS ECS triggered by AWS Cloudwatch Events.
8
8
 
9
9
  Run as a command-line tool:
10
10
 
data/lib/maltese/cli.rb CHANGED
@@ -19,6 +19,10 @@ module Maltese
19
19
 
20
20
  desc "sitemap", "generate sitemap for DataCite Search"
21
21
  method_option :sitemap_bucket, type: :string
22
+ method_option :rack_env, type: :string
23
+ method_option :access_key, type: :string
24
+ method_option :secret_key, type: :string
25
+ method_option :region, type: :string
22
26
  def sitemap
23
27
  sitemap = Maltese::Sitemap.new(options)
24
28
  sitemap.queue_jobs
@@ -1,6 +1,6 @@
1
1
  module Maltese
2
2
  class Sitemap
3
- attr_reader :sitemap_bucket
3
+ attr_reader :sitemap_bucket, :rack_env, :access_key, :secret_key, :region
4
4
 
5
5
  # load ENV variables from .env file if it exists
6
6
  env_file = File.expand_path("../../../.env", __FILE__)
@@ -19,10 +19,14 @@ module Maltese
19
19
 
20
20
  def initialize(attributes={})
21
21
  @sitemap_bucket = attributes[:sitemap_bucket].presence || "search.test.datacite.org"
22
+ @rack_env = attributes[:rack_env].presence || ENV['RACK_ENV'] || "stage"
23
+ @access_key = attributes[:access_key].presence || ENV['AWS_ACCESS_KEY_ID']
24
+ @secret_key = attributes[:secret_key].presence || ENV['AWS_SECRET_ACCESS_KEY']
25
+ @region = attributes[:region].presence || ENV['AWS_REGION']
22
26
  end
23
27
 
24
28
  def sitemap_url
25
- ENV['RACK_ENV'] == "production" ? "https://search.datacite.org/" : "https://search.test.datacite.org/"
29
+ rack_env == "production" ? "https://search.datacite.org/" : "https://search.test.datacite.org/"
26
30
  end
27
31
 
28
32
  def sitemaps_path
@@ -30,7 +34,7 @@ module Maltese
30
34
  end
31
35
 
32
36
  def search_path
33
- ENV['RACK_ENV'] == "production" ? "https://api.datacite.org/dois?" : "https://api.test.datacite.org/dois?"
37
+ rack_env == "production" ? "https://api.datacite.org/dois?" : "https://api.test.datacite.org/dois?"
34
38
  end
35
39
 
36
40
  def timeout
@@ -52,22 +56,24 @@ module Maltese
52
56
 
53
57
  def s3_adapter
54
58
  SitemapGenerator::AwsSdkAdapter.new(sitemap_bucket,
55
- aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
56
- aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
57
- aws_region: ENV['AWS_REGION'])
59
+ aws_access_key_id: access_key,
60
+ aws_secret_access_key: secret_key,
61
+ aws_region: region)
58
62
  end
59
63
 
60
64
  def queue_jobs(options={})
61
65
  total = get_total(options)
62
66
 
63
- if total > 0
64
- puts process_data(options.merge(total: total, url: get_query_url))
67
+ if total.nil?
68
+ puts "An error occured."
69
+ elsif total > 0
70
+ process_data(options.merge(total: total, url: get_query_url))
65
71
  else
66
72
  puts "No works found."
67
73
  end
68
74
 
69
75
  # return number of works queued
70
- total
76
+ total.to_i
71
77
  end
72
78
 
73
79
  def get_total(options={})
@@ -102,14 +108,14 @@ module Maltese
102
108
  puts "#{link_count} DOIs parsed."
103
109
  options[:url] = response.body.dig("links", "next")
104
110
  else
105
- puts "An error occured for URL #{options[:url]}:."
111
+ puts "An error occured for URL #{options[:url]}."
106
112
  puts "Error message: #{response.body.fetch("errors").inspect}" if response.body.fetch("errors", nil).present?
107
113
  error_count += 1
108
114
  options[:url] = nil
109
115
  end
110
116
 
111
117
  # don't loop when testing
112
- break if ENV['RACK'] == "test"
118
+ break if rack_env == "test"
113
119
  end
114
120
 
115
121
  return link_count if error_count > 0
@@ -122,7 +128,7 @@ module Maltese
122
128
  end
123
129
 
124
130
  def parse_data(result)
125
- result.body.fetch("data", []).each do |item|
131
+ Array.wrap(result.body.fetch("data", nil)).each do |item|
126
132
  loc = "/works/" + item.dig("attributes", "doi")
127
133
  sitemap.add loc, changefreq: "monthly", lastmod: item.dig("attributes", "updated")
128
134
  end
@@ -1,3 +1,3 @@
1
1
  module Maltese
2
- VERSION = "0.8.14"
2
+ VERSION = "0.8.15"
3
3
  end
data/maltese.gemspec CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |s|
16
16
 
17
17
  # Declary dependencies here, rather than in the Gemfile
18
18
  s.add_dependency 'maremma', '~> 4.1'
19
+ s.add_dependency 'faraday', '0.17.0'
19
20
  s.add_dependency 'activesupport', '>= 4.2.5', '< 6'
20
21
  s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
21
22
  s.add_dependency 'thor', '~> 0.19'
data/spec/cli_spec.rb CHANGED
@@ -7,7 +7,7 @@ describe Maltese::CLI do
7
7
  end
8
8
 
9
9
  let(:sitemap_bucket) { "search.test.datacite.org" }
10
- let(:cli_options) { { sitemap_bucket: sitemap_bucket } }
10
+ let(:cli_options) { { sitemap_bucket: sitemap_bucket, rack_env: "test" } }
11
11
 
12
12
  describe "sitemap", vcr: true, :order => :defined do
13
13
  it 'should succeed' do
@@ -16,7 +16,7 @@ describe Maltese::CLI do
16
16
  sitemap = Zlib::GzipReader.open("public/sitemaps/sitemap.xml.gz") { |gz| gz.read }
17
17
  doc = Nokogiri::XML(sitemap)
18
18
  expect(doc.xpath("//xmlns:url").size).to eq(1001)
19
- expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10.17557/tjfc.424379")
19
+ expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10.1007/s10620-013-2631-z")
20
20
  end
21
21
  end
22
22
  end