maltese 0.2.4 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +137 -1035
- data/.travis.yml +1 -1
- data/Gemfile.lock +39 -38
- data/README.md +1 -1
- data/lib/maltese/sitemap.rb +17 -33
- data/lib/maltese/version.rb +1 -1
- data/spec/cli_spec.rb +4 -17
- data/spec/fixtures/sitemap.json +69666 -7571
- data/spec/fixtures/sitemap_nil.json +8 -8
- data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +37 -73
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml +24 -19
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml +59 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +59 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +105 -0
- data/spec/sitemap_spec.rb +28 -52
- data/spec/spec_helper.rb +0 -1
- metadata +6 -10
- data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed_with_no_works.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_no_works.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -59
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -59
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -141
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
maltese (0.
|
4
|
+
maltese (0.8)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
aws-sdk-s3 (~> 1.19)
|
7
7
|
dotenv (~> 2.1, >= 2.1.1)
|
@@ -13,50 +13,51 @@ PATH
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
activesupport (5.2.
|
16
|
+
activesupport (5.2.3)
|
17
17
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
18
18
|
i18n (>= 0.7, < 2)
|
19
19
|
minitest (~> 5.1)
|
20
20
|
tzinfo (~> 1.1)
|
21
|
-
addressable (2.
|
21
|
+
addressable (2.6.0)
|
22
22
|
public_suffix (>= 2.0.2, < 4.0)
|
23
|
-
aws-eventstream (1.0.
|
24
|
-
aws-partitions (1.
|
25
|
-
aws-sdk-core (3.
|
26
|
-
aws-eventstream (~> 1.0)
|
23
|
+
aws-eventstream (1.0.3)
|
24
|
+
aws-partitions (1.170.0)
|
25
|
+
aws-sdk-core (3.54.1)
|
26
|
+
aws-eventstream (~> 1.0, >= 1.0.2)
|
27
27
|
aws-partitions (~> 1.0)
|
28
|
-
aws-sigv4 (~> 1.
|
28
|
+
aws-sigv4 (~> 1.1)
|
29
29
|
jmespath (~> 1.0)
|
30
|
-
aws-sdk-kms (1.
|
31
|
-
aws-sdk-core (~> 3, >= 3.
|
32
|
-
aws-sigv4 (~> 1.
|
33
|
-
aws-sdk-s3 (1.
|
34
|
-
aws-sdk-core (~> 3, >= 3.
|
30
|
+
aws-sdk-kms (1.21.0)
|
31
|
+
aws-sdk-core (~> 3, >= 3.53.0)
|
32
|
+
aws-sigv4 (~> 1.1)
|
33
|
+
aws-sdk-s3 (1.41.0)
|
34
|
+
aws-sdk-core (~> 3, >= 3.53.0)
|
35
35
|
aws-sdk-kms (~> 1)
|
36
|
-
aws-sigv4 (~> 1.
|
37
|
-
aws-sigv4 (1.0
|
36
|
+
aws-sigv4 (~> 1.1)
|
37
|
+
aws-sigv4 (1.1.0)
|
38
|
+
aws-eventstream (~> 1.0, >= 1.0.2)
|
38
39
|
builder (3.2.3)
|
39
|
-
codeclimate-test-reporter (1.0.
|
40
|
+
codeclimate-test-reporter (1.0.9)
|
40
41
|
simplecov (<= 0.13)
|
41
|
-
concurrent-ruby (1.
|
42
|
+
concurrent-ruby (1.1.5)
|
42
43
|
crack (0.4.3)
|
43
44
|
safe_yaml (~> 1.0.0)
|
44
45
|
diff-lcs (1.3)
|
45
46
|
docile (1.1.5)
|
46
|
-
dotenv (2.
|
47
|
-
excon (0.
|
48
|
-
faraday (0.15.
|
47
|
+
dotenv (2.7.2)
|
48
|
+
excon (0.64.0)
|
49
|
+
faraday (0.15.4)
|
49
50
|
multipart-post (>= 1.2, < 3)
|
50
|
-
faraday-encoding (0.0.
|
51
|
+
faraday-encoding (0.0.5)
|
51
52
|
faraday
|
52
53
|
faraday_middleware (0.12.2)
|
53
54
|
faraday (>= 0.7.4, < 1.0)
|
54
|
-
hashdiff (0.
|
55
|
-
i18n (1.
|
55
|
+
hashdiff (0.4.0)
|
56
|
+
i18n (1.6.0)
|
56
57
|
concurrent-ruby (~> 1.0)
|
57
58
|
jmespath (1.4.0)
|
58
|
-
json (2.
|
59
|
-
maremma (4.
|
59
|
+
json (2.2.0)
|
60
|
+
maremma (4.2.1)
|
60
61
|
activesupport (>= 4.2.5, < 6)
|
61
62
|
addressable (>= 2.3.6)
|
62
63
|
builder (~> 3.2, >= 3.2.2)
|
@@ -69,46 +70,46 @@ GEM
|
|
69
70
|
oj (>= 2.8.3)
|
70
71
|
mime-types (3.2.2)
|
71
72
|
mime-types-data (~> 3.2015)
|
72
|
-
mime-types-data (3.
|
73
|
+
mime-types-data (3.2019.0331)
|
73
74
|
mini_portile2 (2.3.0)
|
74
75
|
minitest (5.11.3)
|
75
76
|
multi_json (1.13.1)
|
76
|
-
multipart-post (2.
|
77
|
-
nokogiri (1.8.
|
77
|
+
multipart-post (2.1.1)
|
78
|
+
nokogiri (1.8.5)
|
78
79
|
mini_portile2 (~> 2.3.0)
|
79
|
-
oj (3.
|
80
|
-
public_suffix (3.0
|
81
|
-
rack (2.0.
|
80
|
+
oj (3.7.12)
|
81
|
+
public_suffix (3.1.0)
|
82
|
+
rack (2.0.7)
|
82
83
|
rack-test (0.8.3)
|
83
84
|
rack (>= 1.0, < 3)
|
84
|
-
rake (12.3.
|
85
|
+
rake (12.3.2)
|
85
86
|
rspec (3.8.0)
|
86
87
|
rspec-core (~> 3.8.0)
|
87
88
|
rspec-expectations (~> 3.8.0)
|
88
89
|
rspec-mocks (~> 3.8.0)
|
89
90
|
rspec-core (3.8.0)
|
90
91
|
rspec-support (~> 3.8.0)
|
91
|
-
rspec-expectations (3.8.
|
92
|
+
rspec-expectations (3.8.3)
|
92
93
|
diff-lcs (>= 1.2.0, < 2.0)
|
93
94
|
rspec-support (~> 3.8.0)
|
94
95
|
rspec-mocks (3.8.0)
|
95
96
|
diff-lcs (>= 1.2.0, < 2.0)
|
96
97
|
rspec-support (~> 3.8.0)
|
97
98
|
rspec-support (3.8.0)
|
98
|
-
safe_yaml (1.0.
|
99
|
+
safe_yaml (1.0.5)
|
99
100
|
simplecov (0.13.0)
|
100
101
|
docile (~> 1.1.0)
|
101
102
|
json (>= 1.8, < 3)
|
102
103
|
simplecov-html (~> 0.10.0)
|
103
104
|
simplecov-html (0.10.2)
|
104
|
-
sitemap_generator (6.0.
|
105
|
+
sitemap_generator (6.0.2)
|
105
106
|
builder (~> 3.0)
|
106
|
-
thor (0.20.
|
107
|
+
thor (0.20.3)
|
107
108
|
thread_safe (0.3.6)
|
108
109
|
tzinfo (1.2.5)
|
109
110
|
thread_safe (~> 0.1)
|
110
111
|
vcr (3.0.3)
|
111
|
-
webmock (3.
|
112
|
+
webmock (3.5.1)
|
112
113
|
addressable (>= 2.3.6)
|
113
114
|
crack (>= 0.3.2)
|
114
115
|
hashdiff
|
@@ -128,4 +129,4 @@ DEPENDENCIES
|
|
128
129
|
webmock (~> 3.0, >= 3.0.1)
|
129
130
|
|
130
131
|
BUNDLED WITH
|
131
|
-
1.
|
132
|
+
1.17.3
|
data/README.md
CHANGED
data/lib/maltese/sitemap.rb
CHANGED
@@ -21,8 +21,6 @@ module Maltese
|
|
21
21
|
@sitemap_bucket = attributes[:sitemap_bucket].presence || "search.test.datacite.org"
|
22
22
|
@from_date = attributes[:from_date].presence || (Time.now.to_date - 1.day).iso8601
|
23
23
|
@until_date = attributes[:until_date].presence || Time.now.to_date.iso8601
|
24
|
-
@solr_username = ENV['SOLR_USERNAME']
|
25
|
-
@solr_password = ENV['SOLR_PASSWORD']
|
26
24
|
end
|
27
25
|
|
28
26
|
def sitemap_url
|
@@ -34,7 +32,7 @@ module Maltese
|
|
34
32
|
end
|
35
33
|
|
36
34
|
def search_path
|
37
|
-
ENV['RACK_ENV'] == "production" ? "https://
|
35
|
+
ENV['RACK_ENV'] == "production" ? "https://api.datacite.org/dois?" : "https://api.test.datacite.org/dois?"
|
38
36
|
end
|
39
37
|
|
40
38
|
def timeout
|
@@ -42,7 +40,7 @@ module Maltese
|
|
42
40
|
end
|
43
41
|
|
44
42
|
def job_batch_size
|
45
|
-
|
43
|
+
1000
|
46
44
|
end
|
47
45
|
|
48
46
|
def sitemap
|
@@ -67,7 +65,7 @@ module Maltese
|
|
67
65
|
if total > 0
|
68
66
|
puts process_data(options.merge(total: total))
|
69
67
|
else
|
70
|
-
puts "No works found
|
68
|
+
puts "No works found."
|
71
69
|
end
|
72
70
|
|
73
71
|
# return number of works queued
|
@@ -75,28 +73,20 @@ module Maltese
|
|
75
73
|
end
|
76
74
|
|
77
75
|
def get_total(options={})
|
78
|
-
query_url = get_query_url(options.merge(
|
79
|
-
# Add basic auth options in
|
80
|
-
options = options.merge(username: @solr_username, password: @solr_password)
|
76
|
+
query_url = get_query_url(options.merge(size: 0))
|
81
77
|
|
82
78
|
result = Maremma.get(query_url, options)
|
83
|
-
result.body.
|
79
|
+
result.body.dig("meta", "total")
|
84
80
|
end
|
85
81
|
|
86
82
|
def get_query_url(options={})
|
87
|
-
options[:
|
88
|
-
options[:
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
fq: fq,
|
95
|
-
start: options[:offset],
|
96
|
-
rows: options[:rows],
|
97
|
-
fl: "doi,updated",
|
98
|
-
sort: "updated asc",
|
99
|
-
wt: "json"}
|
83
|
+
options[:cursor] = options[:cursor] || 1
|
84
|
+
options[:size] = options[:size] || job_batch_size
|
85
|
+
|
86
|
+
params = {
|
87
|
+
"page[cursor]": options[:cursor],
|
88
|
+
"page[size]": options[:size],
|
89
|
+
}
|
100
90
|
search_path + URI.encode_www_form(params)
|
101
91
|
end
|
102
92
|
|
@@ -104,11 +94,9 @@ module Maltese
|
|
104
94
|
options[:start_time] = Time.now
|
105
95
|
|
106
96
|
# walk through paginated results
|
107
|
-
|
108
|
-
|
109
|
-
(0...total_pages).each do |page|
|
110
|
-
options[:offset] = page * job_batch_size
|
97
|
+
while options[:cursor] do
|
111
98
|
data = get_data(options.merge(timeout: timeout))
|
99
|
+
options[:cursor] = data.dig("links", "next")
|
112
100
|
parse_data(data)
|
113
101
|
end
|
114
102
|
|
@@ -118,19 +106,15 @@ module Maltese
|
|
118
106
|
def get_data(options={})
|
119
107
|
query_url = get_query_url(options)
|
120
108
|
|
121
|
-
# Add basic auth options in
|
122
|
-
options = options.merge(username: @solr_username, password: @solr_password)
|
123
|
-
|
124
109
|
Maremma.get(query_url, options)
|
125
110
|
end
|
126
111
|
|
127
112
|
def parse_data(result)
|
128
113
|
return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
|
129
114
|
|
130
|
-
|
131
|
-
|
132
|
-
loc
|
133
|
-
sitemap.add loc, changefreq: "monthly", lastmod: item.fetch("updated")
|
115
|
+
result.body.fetch("data", []).each do |item|
|
116
|
+
loc = "/works/" + item.dig("attributes", "doi")
|
117
|
+
sitemap.add loc, changefreq: "monthly", lastmod: item.dig("attrributes", "updated")
|
134
118
|
end
|
135
119
|
sitemap.sitemap.link_count
|
136
120
|
end
|
data/lib/maltese/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -6,30 +6,17 @@ describe Maltese::CLI do
|
|
6
6
|
described_class.new
|
7
7
|
end
|
8
8
|
|
9
|
-
let(:from_date) { "2018-03-15" }
|
10
|
-
let(:until_date) { "2018-04-08" }
|
11
9
|
let(:sitemap_bucket) { "search.test.datacite.org" }
|
12
|
-
let(:cli_options) { { sitemap_bucket: sitemap_bucket
|
13
|
-
from_date: from_date,
|
14
|
-
until_date: until_date } }
|
10
|
+
let(:cli_options) { { sitemap_bucket: sitemap_bucket } }
|
15
11
|
|
16
12
|
describe "sitemap", vcr: true, :order => :defined do
|
17
13
|
it 'should succeed' do
|
18
14
|
subject.options = cli_options
|
19
|
-
expect { subject.sitemap }.to output(/
|
15
|
+
expect { subject.sitemap }.to output(/1 links/).to_stdout
|
20
16
|
sitemap = Zlib::GzipReader.open("public/sitemaps/sitemap.xml.gz") { |gz| gz.read }
|
21
17
|
doc = Nokogiri::XML(sitemap)
|
22
|
-
expect(doc.xpath("//xmlns:url").size).to eq(
|
23
|
-
expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'should succeed with no works' do
|
27
|
-
from_date = "2005-04-07"
|
28
|
-
until_date = "2005-04-08"
|
29
|
-
subject.options = { sitemap_bucket: sitemap_bucket,
|
30
|
-
from_date: from_date,
|
31
|
-
until_date: until_date }
|
32
|
-
expect { subject.sitemap }.to output("No works found for date range 2005-04-07 - 2005-04-08.\n").to_stdout
|
18
|
+
expect(doc.xpath("//xmlns:url").size).to eq(1)
|
19
|
+
expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/")
|
33
20
|
end
|
34
21
|
end
|
35
22
|
end
|