maltese 0.2.4 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +137 -1035
  3. data/.travis.yml +1 -1
  4. data/Gemfile.lock +39 -38
  5. data/README.md +1 -1
  6. data/lib/maltese/sitemap.rb +17 -33
  7. data/lib/maltese/version.rb +1 -1
  8. data/spec/cli_spec.rb +4 -17
  9. data/spec/fixtures/sitemap.json +69666 -7571
  10. data/spec/fixtures/sitemap_nil.json +8 -8
  11. data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +37 -73
  12. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +49 -0
  13. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml +24 -19
  14. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml +59 -0
  15. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +59 -0
  16. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +105 -0
  17. data/spec/sitemap_spec.rb +28 -52
  18. data/spec/spec_helper.rb +0 -1
  19. metadata +6 -10
  20. data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed_with_no_works.yml +0 -44
  21. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -44
  22. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -44
  23. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_no_works.yml +0 -44
  24. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -59
  25. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -59
  26. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -44
  27. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -141
data/.travis.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.3.3
3
+ - 2.4.4
4
4
 
5
5
  addons:
6
6
  code_climate:
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- maltese (0.2.4)
4
+ maltese (0.8)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  aws-sdk-s3 (~> 1.19)
7
7
  dotenv (~> 2.1, >= 2.1.1)
@@ -13,50 +13,51 @@ PATH
13
13
  GEM
14
14
  remote: https://rubygems.org/
15
15
  specs:
16
- activesupport (5.2.1)
16
+ activesupport (5.2.3)
17
17
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
18
  i18n (>= 0.7, < 2)
19
19
  minitest (~> 5.1)
20
20
  tzinfo (~> 1.1)
21
- addressable (2.5.2)
21
+ addressable (2.6.0)
22
22
  public_suffix (>= 2.0.2, < 4.0)
23
- aws-eventstream (1.0.1)
24
- aws-partitions (1.103.0)
25
- aws-sdk-core (3.27.0)
26
- aws-eventstream (~> 1.0)
23
+ aws-eventstream (1.0.3)
24
+ aws-partitions (1.170.0)
25
+ aws-sdk-core (3.54.1)
26
+ aws-eventstream (~> 1.0, >= 1.0.2)
27
27
  aws-partitions (~> 1.0)
28
- aws-sigv4 (~> 1.0)
28
+ aws-sigv4 (~> 1.1)
29
29
  jmespath (~> 1.0)
30
- aws-sdk-kms (1.9.0)
31
- aws-sdk-core (~> 3, >= 3.26.0)
32
- aws-sigv4 (~> 1.0)
33
- aws-sdk-s3 (1.19.0)
34
- aws-sdk-core (~> 3, >= 3.26.0)
30
+ aws-sdk-kms (1.21.0)
31
+ aws-sdk-core (~> 3, >= 3.53.0)
32
+ aws-sigv4 (~> 1.1)
33
+ aws-sdk-s3 (1.41.0)
34
+ aws-sdk-core (~> 3, >= 3.53.0)
35
35
  aws-sdk-kms (~> 1)
36
- aws-sigv4 (~> 1.0)
37
- aws-sigv4 (1.0.3)
36
+ aws-sigv4 (~> 1.1)
37
+ aws-sigv4 (1.1.0)
38
+ aws-eventstream (~> 1.0, >= 1.0.2)
38
39
  builder (3.2.3)
39
- codeclimate-test-reporter (1.0.8)
40
+ codeclimate-test-reporter (1.0.9)
40
41
  simplecov (<= 0.13)
41
- concurrent-ruby (1.0.5)
42
+ concurrent-ruby (1.1.5)
42
43
  crack (0.4.3)
43
44
  safe_yaml (~> 1.0.0)
44
45
  diff-lcs (1.3)
45
46
  docile (1.1.5)
46
- dotenv (2.5.0)
47
- excon (0.62.0)
48
- faraday (0.15.2)
47
+ dotenv (2.7.2)
48
+ excon (0.64.0)
49
+ faraday (0.15.4)
49
50
  multipart-post (>= 1.2, < 3)
50
- faraday-encoding (0.0.4)
51
+ faraday-encoding (0.0.5)
51
52
  faraday
52
53
  faraday_middleware (0.12.2)
53
54
  faraday (>= 0.7.4, < 1.0)
54
- hashdiff (0.3.7)
55
- i18n (1.1.0)
55
+ hashdiff (0.4.0)
56
+ i18n (1.6.0)
56
57
  concurrent-ruby (~> 1.0)
57
58
  jmespath (1.4.0)
58
- json (2.1.0)
59
- maremma (4.1.1)
59
+ json (2.2.0)
60
+ maremma (4.2.1)
60
61
  activesupport (>= 4.2.5, < 6)
61
62
  addressable (>= 2.3.6)
62
63
  builder (~> 3.2, >= 3.2.2)
@@ -69,46 +70,46 @@ GEM
69
70
  oj (>= 2.8.3)
70
71
  mime-types (3.2.2)
71
72
  mime-types-data (~> 3.2015)
72
- mime-types-data (3.2018.0812)
73
+ mime-types-data (3.2019.0331)
73
74
  mini_portile2 (2.3.0)
74
75
  minitest (5.11.3)
75
76
  multi_json (1.13.1)
76
- multipart-post (2.0.0)
77
- nokogiri (1.8.4)
77
+ multipart-post (2.1.1)
78
+ nokogiri (1.8.5)
78
79
  mini_portile2 (~> 2.3.0)
79
- oj (3.6.8)
80
- public_suffix (3.0.3)
81
- rack (2.0.5)
80
+ oj (3.7.12)
81
+ public_suffix (3.1.0)
82
+ rack (2.0.7)
82
83
  rack-test (0.8.3)
83
84
  rack (>= 1.0, < 3)
84
- rake (12.3.1)
85
+ rake (12.3.2)
85
86
  rspec (3.8.0)
86
87
  rspec-core (~> 3.8.0)
87
88
  rspec-expectations (~> 3.8.0)
88
89
  rspec-mocks (~> 3.8.0)
89
90
  rspec-core (3.8.0)
90
91
  rspec-support (~> 3.8.0)
91
- rspec-expectations (3.8.1)
92
+ rspec-expectations (3.8.3)
92
93
  diff-lcs (>= 1.2.0, < 2.0)
93
94
  rspec-support (~> 3.8.0)
94
95
  rspec-mocks (3.8.0)
95
96
  diff-lcs (>= 1.2.0, < 2.0)
96
97
  rspec-support (~> 3.8.0)
97
98
  rspec-support (3.8.0)
98
- safe_yaml (1.0.4)
99
+ safe_yaml (1.0.5)
99
100
  simplecov (0.13.0)
100
101
  docile (~> 1.1.0)
101
102
  json (>= 1.8, < 3)
102
103
  simplecov-html (~> 0.10.0)
103
104
  simplecov-html (0.10.2)
104
- sitemap_generator (6.0.1)
105
+ sitemap_generator (6.0.2)
105
106
  builder (~> 3.0)
106
- thor (0.20.0)
107
+ thor (0.20.3)
107
108
  thread_safe (0.3.6)
108
109
  tzinfo (1.2.5)
109
110
  thread_safe (~> 0.1)
110
111
  vcr (3.0.3)
111
- webmock (3.4.2)
112
+ webmock (3.5.1)
112
113
  addressable (>= 2.3.6)
113
114
  crack (>= 0.3.2)
114
115
  hashdiff
@@ -128,4 +129,4 @@ DEPENDENCIES
128
129
  webmock (~> 3.0, >= 3.0.1)
129
130
 
130
131
  BUNDLED WITH
131
- 1.16.1
132
+ 1.17.3
data/README.md CHANGED
@@ -9,7 +9,7 @@ Ruby gem and command-line tool for generating sitemap files from the DataCite RE
9
9
  Run as a command-line tool:
10
10
 
11
11
  ```
12
- maltese sitemap --from_date 2017-02-15
12
+ maltese sitemap
13
13
  ```
14
14
 
15
15
  ## Installation
@@ -21,8 +21,6 @@ module Maltese
21
21
  @sitemap_bucket = attributes[:sitemap_bucket].presence || "search.test.datacite.org"
22
22
  @from_date = attributes[:from_date].presence || (Time.now.to_date - 1.day).iso8601
23
23
  @until_date = attributes[:until_date].presence || Time.now.to_date.iso8601
24
- @solr_username = ENV['SOLR_USERNAME']
25
- @solr_password = ENV['SOLR_PASSWORD']
26
24
  end
27
25
 
28
26
  def sitemap_url
@@ -34,7 +32,7 @@ module Maltese
34
32
  end
35
33
 
36
34
  def search_path
37
- ENV['RACK_ENV'] == "production" ? "https://solr.datacite.org/api?" : "https://solr.test.datacite.org/api?"
35
+ ENV['RACK_ENV'] == "production" ? "https://api.datacite.org/dois?" : "https://api.test.datacite.org/dois?"
38
36
  end
39
37
 
40
38
  def timeout
@@ -42,7 +40,7 @@ module Maltese
42
40
  end
43
41
 
44
42
  def job_batch_size
45
- 50000
43
+ 1000
46
44
  end
47
45
 
48
46
  def sitemap
@@ -67,7 +65,7 @@ module Maltese
67
65
  if total > 0
68
66
  puts process_data(options.merge(total: total))
69
67
  else
70
- puts "No works found for date range #{from_date} - #{until_date}."
68
+ puts "No works found."
71
69
  end
72
70
 
73
71
  # return number of works queued
@@ -75,28 +73,20 @@ module Maltese
75
73
  end
76
74
 
77
75
  def get_total(options={})
78
- query_url = get_query_url(options.merge(rows: 0))
79
- # Add basic auth options in
80
- options = options.merge(username: @solr_username, password: @solr_password)
76
+ query_url = get_query_url(options.merge(size: 0))
81
77
 
82
78
  result = Maremma.get(query_url, options)
83
- result.body.fetch("data", {}).fetch("response", {}).fetch("numFound", 0)
79
+ result.body.dig("meta", "total")
84
80
  end
85
81
 
86
82
  def get_query_url(options={})
87
- options[:offset] = options[:offset].to_i || 0
88
- options[:rows] = options[:rows].presence || job_batch_size
89
-
90
- updated = "updated:[#{from_date}T00:00:00Z TO #{until_date}T23:59:59Z]"
91
- fq = "#{updated} AND has_metadata:true AND is_active:true"
92
-
93
- params = { q: "*:*",
94
- fq: fq,
95
- start: options[:offset],
96
- rows: options[:rows],
97
- fl: "doi,updated",
98
- sort: "updated asc",
99
- wt: "json"}
83
+ options[:cursor] = options[:cursor] || 1
84
+ options[:size] = options[:size] || job_batch_size
85
+
86
+ params = {
87
+ "page[cursor]": options[:cursor],
88
+ "page[size]": options[:size],
89
+ }
100
90
  search_path + URI.encode_www_form(params)
101
91
  end
102
92
 
@@ -104,11 +94,9 @@ module Maltese
104
94
  options[:start_time] = Time.now
105
95
 
106
96
  # walk through paginated results
107
- total_pages = (options[:total].to_f / job_batch_size).ceil
108
-
109
- (0...total_pages).each do |page|
110
- options[:offset] = page * job_batch_size
97
+ while options[:cursor] do
111
98
  data = get_data(options.merge(timeout: timeout))
99
+ options[:cursor] = data.dig("links", "next")
112
100
  parse_data(data)
113
101
  end
114
102
 
@@ -118,19 +106,15 @@ module Maltese
118
106
  def get_data(options={})
119
107
  query_url = get_query_url(options)
120
108
 
121
- # Add basic auth options in
122
- options = options.merge(username: @solr_username, password: @solr_password)
123
-
124
109
  Maremma.get(query_url, options)
125
110
  end
126
111
 
127
112
  def parse_data(result)
128
113
  return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
129
114
 
130
- items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
131
- Array(items).each do |item|
132
- loc = "/works/" + item.fetch("doi")
133
- sitemap.add loc, changefreq: "monthly", lastmod: item.fetch("updated")
115
+ result.body.fetch("data", []).each do |item|
116
+ loc = "/works/" + item.dig("attributes", "doi")
117
+ sitemap.add loc, changefreq: "monthly", lastmod: item.dig("attrributes", "updated")
134
118
  end
135
119
  sitemap.sitemap.link_count
136
120
  end
@@ -1,3 +1,3 @@
1
1
  module Maltese
2
- VERSION = "0.2.4"
2
+ VERSION = "0.8"
3
3
  end
data/spec/cli_spec.rb CHANGED
@@ -6,30 +6,17 @@ describe Maltese::CLI do
6
6
  described_class.new
7
7
  end
8
8
 
9
- let(:from_date) { "2018-03-15" }
10
- let(:until_date) { "2018-04-08" }
11
9
  let(:sitemap_bucket) { "search.test.datacite.org" }
12
- let(:cli_options) { { sitemap_bucket: sitemap_bucket,
13
- from_date: from_date,
14
- until_date: until_date } }
10
+ let(:cli_options) { { sitemap_bucket: sitemap_bucket } }
15
11
 
16
12
  describe "sitemap", vcr: true, :order => :defined do
17
13
  it 'should succeed' do
18
14
  subject.options = cli_options
19
- expect { subject.sitemap }.to output(/266 links/).to_stdout
15
+ expect { subject.sitemap }.to output(/1 links/).to_stdout
20
16
  sitemap = Zlib::GzipReader.open("public/sitemaps/sitemap.xml.gz") { |gz| gz.read }
21
17
  doc = Nokogiri::XML(sitemap)
22
- expect(doc.xpath("//xmlns:url").size).to eq(266)
23
- expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/works/10.0133/37522")
24
- end
25
-
26
- it 'should succeed with no works' do
27
- from_date = "2005-04-07"
28
- until_date = "2005-04-08"
29
- subject.options = { sitemap_bucket: sitemap_bucket,
30
- from_date: from_date,
31
- until_date: until_date }
32
- expect { subject.sitemap }.to output("No works found for date range 2005-04-07 - 2005-04-08.\n").to_stdout
18
+ expect(doc.xpath("//xmlns:url").size).to eq(1)
19
+ expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/")
33
20
  end
34
21
  end
35
22
  end