maltese 0.2.4 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +137 -1035
- data/.travis.yml +1 -1
- data/Gemfile.lock +39 -38
- data/README.md +1 -1
- data/lib/maltese/sitemap.rb +17 -33
- data/lib/maltese/version.rb +1 -1
- data/spec/cli_spec.rb +4 -17
- data/spec/fixtures/sitemap.json +69666 -7571
- data/spec/fixtures/sitemap_nil.json +8 -8
- data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +37 -73
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml +24 -19
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_REST_API.yml +59 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +59 -0
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_REST_API.yml +105 -0
- data/spec/sitemap_spec.rb +28 -52
- data/spec/spec_helper.rb +0 -1
- metadata +6 -10
- data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed_with_no_works.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_no_works.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -59
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -59
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +0 -141
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
maltese (0.
|
4
|
+
maltese (0.8)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
aws-sdk-s3 (~> 1.19)
|
7
7
|
dotenv (~> 2.1, >= 2.1.1)
|
@@ -13,50 +13,51 @@ PATH
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
activesupport (5.2.
|
16
|
+
activesupport (5.2.3)
|
17
17
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
18
18
|
i18n (>= 0.7, < 2)
|
19
19
|
minitest (~> 5.1)
|
20
20
|
tzinfo (~> 1.1)
|
21
|
-
addressable (2.
|
21
|
+
addressable (2.6.0)
|
22
22
|
public_suffix (>= 2.0.2, < 4.0)
|
23
|
-
aws-eventstream (1.0.
|
24
|
-
aws-partitions (1.
|
25
|
-
aws-sdk-core (3.
|
26
|
-
aws-eventstream (~> 1.0)
|
23
|
+
aws-eventstream (1.0.3)
|
24
|
+
aws-partitions (1.170.0)
|
25
|
+
aws-sdk-core (3.54.1)
|
26
|
+
aws-eventstream (~> 1.0, >= 1.0.2)
|
27
27
|
aws-partitions (~> 1.0)
|
28
|
-
aws-sigv4 (~> 1.
|
28
|
+
aws-sigv4 (~> 1.1)
|
29
29
|
jmespath (~> 1.0)
|
30
|
-
aws-sdk-kms (1.
|
31
|
-
aws-sdk-core (~> 3, >= 3.
|
32
|
-
aws-sigv4 (~> 1.
|
33
|
-
aws-sdk-s3 (1.
|
34
|
-
aws-sdk-core (~> 3, >= 3.
|
30
|
+
aws-sdk-kms (1.21.0)
|
31
|
+
aws-sdk-core (~> 3, >= 3.53.0)
|
32
|
+
aws-sigv4 (~> 1.1)
|
33
|
+
aws-sdk-s3 (1.41.0)
|
34
|
+
aws-sdk-core (~> 3, >= 3.53.0)
|
35
35
|
aws-sdk-kms (~> 1)
|
36
|
-
aws-sigv4 (~> 1.
|
37
|
-
aws-sigv4 (1.0
|
36
|
+
aws-sigv4 (~> 1.1)
|
37
|
+
aws-sigv4 (1.1.0)
|
38
|
+
aws-eventstream (~> 1.0, >= 1.0.2)
|
38
39
|
builder (3.2.3)
|
39
|
-
codeclimate-test-reporter (1.0.
|
40
|
+
codeclimate-test-reporter (1.0.9)
|
40
41
|
simplecov (<= 0.13)
|
41
|
-
concurrent-ruby (1.
|
42
|
+
concurrent-ruby (1.1.5)
|
42
43
|
crack (0.4.3)
|
43
44
|
safe_yaml (~> 1.0.0)
|
44
45
|
diff-lcs (1.3)
|
45
46
|
docile (1.1.5)
|
46
|
-
dotenv (2.
|
47
|
-
excon (0.
|
48
|
-
faraday (0.15.
|
47
|
+
dotenv (2.7.2)
|
48
|
+
excon (0.64.0)
|
49
|
+
faraday (0.15.4)
|
49
50
|
multipart-post (>= 1.2, < 3)
|
50
|
-
faraday-encoding (0.0.
|
51
|
+
faraday-encoding (0.0.5)
|
51
52
|
faraday
|
52
53
|
faraday_middleware (0.12.2)
|
53
54
|
faraday (>= 0.7.4, < 1.0)
|
54
|
-
hashdiff (0.
|
55
|
-
i18n (1.
|
55
|
+
hashdiff (0.4.0)
|
56
|
+
i18n (1.6.0)
|
56
57
|
concurrent-ruby (~> 1.0)
|
57
58
|
jmespath (1.4.0)
|
58
|
-
json (2.
|
59
|
-
maremma (4.
|
59
|
+
json (2.2.0)
|
60
|
+
maremma (4.2.1)
|
60
61
|
activesupport (>= 4.2.5, < 6)
|
61
62
|
addressable (>= 2.3.6)
|
62
63
|
builder (~> 3.2, >= 3.2.2)
|
@@ -69,46 +70,46 @@ GEM
|
|
69
70
|
oj (>= 2.8.3)
|
70
71
|
mime-types (3.2.2)
|
71
72
|
mime-types-data (~> 3.2015)
|
72
|
-
mime-types-data (3.
|
73
|
+
mime-types-data (3.2019.0331)
|
73
74
|
mini_portile2 (2.3.0)
|
74
75
|
minitest (5.11.3)
|
75
76
|
multi_json (1.13.1)
|
76
|
-
multipart-post (2.
|
77
|
-
nokogiri (1.8.
|
77
|
+
multipart-post (2.1.1)
|
78
|
+
nokogiri (1.8.5)
|
78
79
|
mini_portile2 (~> 2.3.0)
|
79
|
-
oj (3.
|
80
|
-
public_suffix (3.0
|
81
|
-
rack (2.0.
|
80
|
+
oj (3.7.12)
|
81
|
+
public_suffix (3.1.0)
|
82
|
+
rack (2.0.7)
|
82
83
|
rack-test (0.8.3)
|
83
84
|
rack (>= 1.0, < 3)
|
84
|
-
rake (12.3.
|
85
|
+
rake (12.3.2)
|
85
86
|
rspec (3.8.0)
|
86
87
|
rspec-core (~> 3.8.0)
|
87
88
|
rspec-expectations (~> 3.8.0)
|
88
89
|
rspec-mocks (~> 3.8.0)
|
89
90
|
rspec-core (3.8.0)
|
90
91
|
rspec-support (~> 3.8.0)
|
91
|
-
rspec-expectations (3.8.
|
92
|
+
rspec-expectations (3.8.3)
|
92
93
|
diff-lcs (>= 1.2.0, < 2.0)
|
93
94
|
rspec-support (~> 3.8.0)
|
94
95
|
rspec-mocks (3.8.0)
|
95
96
|
diff-lcs (>= 1.2.0, < 2.0)
|
96
97
|
rspec-support (~> 3.8.0)
|
97
98
|
rspec-support (3.8.0)
|
98
|
-
safe_yaml (1.0.
|
99
|
+
safe_yaml (1.0.5)
|
99
100
|
simplecov (0.13.0)
|
100
101
|
docile (~> 1.1.0)
|
101
102
|
json (>= 1.8, < 3)
|
102
103
|
simplecov-html (~> 0.10.0)
|
103
104
|
simplecov-html (0.10.2)
|
104
|
-
sitemap_generator (6.0.
|
105
|
+
sitemap_generator (6.0.2)
|
105
106
|
builder (~> 3.0)
|
106
|
-
thor (0.20.
|
107
|
+
thor (0.20.3)
|
107
108
|
thread_safe (0.3.6)
|
108
109
|
tzinfo (1.2.5)
|
109
110
|
thread_safe (~> 0.1)
|
110
111
|
vcr (3.0.3)
|
111
|
-
webmock (3.
|
112
|
+
webmock (3.5.1)
|
112
113
|
addressable (>= 2.3.6)
|
113
114
|
crack (>= 0.3.2)
|
114
115
|
hashdiff
|
@@ -128,4 +129,4 @@ DEPENDENCIES
|
|
128
129
|
webmock (~> 3.0, >= 3.0.1)
|
129
130
|
|
130
131
|
BUNDLED WITH
|
131
|
-
1.
|
132
|
+
1.17.3
|
data/README.md
CHANGED
data/lib/maltese/sitemap.rb
CHANGED
@@ -21,8 +21,6 @@ module Maltese
|
|
21
21
|
@sitemap_bucket = attributes[:sitemap_bucket].presence || "search.test.datacite.org"
|
22
22
|
@from_date = attributes[:from_date].presence || (Time.now.to_date - 1.day).iso8601
|
23
23
|
@until_date = attributes[:until_date].presence || Time.now.to_date.iso8601
|
24
|
-
@solr_username = ENV['SOLR_USERNAME']
|
25
|
-
@solr_password = ENV['SOLR_PASSWORD']
|
26
24
|
end
|
27
25
|
|
28
26
|
def sitemap_url
|
@@ -34,7 +32,7 @@ module Maltese
|
|
34
32
|
end
|
35
33
|
|
36
34
|
def search_path
|
37
|
-
ENV['RACK_ENV'] == "production" ? "https://
|
35
|
+
ENV['RACK_ENV'] == "production" ? "https://api.datacite.org/dois?" : "https://api.test.datacite.org/dois?"
|
38
36
|
end
|
39
37
|
|
40
38
|
def timeout
|
@@ -42,7 +40,7 @@ module Maltese
|
|
42
40
|
end
|
43
41
|
|
44
42
|
def job_batch_size
|
45
|
-
|
43
|
+
1000
|
46
44
|
end
|
47
45
|
|
48
46
|
def sitemap
|
@@ -67,7 +65,7 @@ module Maltese
|
|
67
65
|
if total > 0
|
68
66
|
puts process_data(options.merge(total: total))
|
69
67
|
else
|
70
|
-
puts "No works found
|
68
|
+
puts "No works found."
|
71
69
|
end
|
72
70
|
|
73
71
|
# return number of works queued
|
@@ -75,28 +73,20 @@ module Maltese
|
|
75
73
|
end
|
76
74
|
|
77
75
|
def get_total(options={})
|
78
|
-
query_url = get_query_url(options.merge(
|
79
|
-
# Add basic auth options in
|
80
|
-
options = options.merge(username: @solr_username, password: @solr_password)
|
76
|
+
query_url = get_query_url(options.merge(size: 0))
|
81
77
|
|
82
78
|
result = Maremma.get(query_url, options)
|
83
|
-
result.body.
|
79
|
+
result.body.dig("meta", "total")
|
84
80
|
end
|
85
81
|
|
86
82
|
def get_query_url(options={})
|
87
|
-
options[:
|
88
|
-
options[:
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
fq: fq,
|
95
|
-
start: options[:offset],
|
96
|
-
rows: options[:rows],
|
97
|
-
fl: "doi,updated",
|
98
|
-
sort: "updated asc",
|
99
|
-
wt: "json"}
|
83
|
+
options[:cursor] = options[:cursor] || 1
|
84
|
+
options[:size] = options[:size] || job_batch_size
|
85
|
+
|
86
|
+
params = {
|
87
|
+
"page[cursor]": options[:cursor],
|
88
|
+
"page[size]": options[:size],
|
89
|
+
}
|
100
90
|
search_path + URI.encode_www_form(params)
|
101
91
|
end
|
102
92
|
|
@@ -104,11 +94,9 @@ module Maltese
|
|
104
94
|
options[:start_time] = Time.now
|
105
95
|
|
106
96
|
# walk through paginated results
|
107
|
-
|
108
|
-
|
109
|
-
(0...total_pages).each do |page|
|
110
|
-
options[:offset] = page * job_batch_size
|
97
|
+
while options[:cursor] do
|
111
98
|
data = get_data(options.merge(timeout: timeout))
|
99
|
+
options[:cursor] = data.dig("links", "next")
|
112
100
|
parse_data(data)
|
113
101
|
end
|
114
102
|
|
@@ -118,19 +106,15 @@ module Maltese
|
|
118
106
|
def get_data(options={})
|
119
107
|
query_url = get_query_url(options)
|
120
108
|
|
121
|
-
# Add basic auth options in
|
122
|
-
options = options.merge(username: @solr_username, password: @solr_password)
|
123
|
-
|
124
109
|
Maremma.get(query_url, options)
|
125
110
|
end
|
126
111
|
|
127
112
|
def parse_data(result)
|
128
113
|
return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
|
129
114
|
|
130
|
-
|
131
|
-
|
132
|
-
loc
|
133
|
-
sitemap.add loc, changefreq: "monthly", lastmod: item.fetch("updated")
|
115
|
+
result.body.fetch("data", []).each do |item|
|
116
|
+
loc = "/works/" + item.dig("attributes", "doi")
|
117
|
+
sitemap.add loc, changefreq: "monthly", lastmod: item.dig("attrributes", "updated")
|
134
118
|
end
|
135
119
|
sitemap.sitemap.link_count
|
136
120
|
end
|
data/lib/maltese/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -6,30 +6,17 @@ describe Maltese::CLI do
|
|
6
6
|
described_class.new
|
7
7
|
end
|
8
8
|
|
9
|
-
let(:from_date) { "2018-03-15" }
|
10
|
-
let(:until_date) { "2018-04-08" }
|
11
9
|
let(:sitemap_bucket) { "search.test.datacite.org" }
|
12
|
-
let(:cli_options) { { sitemap_bucket: sitemap_bucket
|
13
|
-
from_date: from_date,
|
14
|
-
until_date: until_date } }
|
10
|
+
let(:cli_options) { { sitemap_bucket: sitemap_bucket } }
|
15
11
|
|
16
12
|
describe "sitemap", vcr: true, :order => :defined do
|
17
13
|
it 'should succeed' do
|
18
14
|
subject.options = cli_options
|
19
|
-
expect { subject.sitemap }.to output(/
|
15
|
+
expect { subject.sitemap }.to output(/1 links/).to_stdout
|
20
16
|
sitemap = Zlib::GzipReader.open("public/sitemaps/sitemap.xml.gz") { |gz| gz.read }
|
21
17
|
doc = Nokogiri::XML(sitemap)
|
22
|
-
expect(doc.xpath("//xmlns:url").size).to eq(
|
23
|
-
expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'should succeed with no works' do
|
27
|
-
from_date = "2005-04-07"
|
28
|
-
until_date = "2005-04-08"
|
29
|
-
subject.options = { sitemap_bucket: sitemap_bucket,
|
30
|
-
from_date: from_date,
|
31
|
-
until_date: until_date }
|
32
|
-
expect { subject.sitemap }.to output("No works found for date range 2005-04-07 - 2005-04-08.\n").to_stdout
|
18
|
+
expect(doc.xpath("//xmlns:url").size).to eq(1)
|
19
|
+
expect(doc.xpath("//xmlns:loc").last.text).to eq("https://search.test.datacite.org/")
|
33
20
|
end
|
34
21
|
end
|
35
22
|
end
|