govuk_seed_crawler 3.1.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +2 -2
- data/CHANGELOG.md +7 -2
- data/README.md +6 -0
- data/govuk_seed_crawler.gemspec +3 -3
- data/lib/govuk_seed_crawler/cli_parser.rb +36 -35
- data/lib/govuk_seed_crawler/version.rb +1 -1
- data/spec/govuk_seed_crawler/cli_parser_spec.rb +1 -3
- data/spec/govuk_seed_crawler/cli_runner_spec.rb +1 -1
- metadata +17 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b21f7a2314d49cbd45cc088701f0daffb8a3e825b290fcf5c454584d87c4f0ef
|
4
|
+
data.tar.gz: 9196ec323378850139a89bcfa8e6200d9b3cc0ac22b60b07143b4ea836932530
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d184e26fc0f0307ad3eb4117bd8f9591c1d4f6418d59a48a0272d5af81046760a2ba64e2e9756825d3f37c9d66d36ab67aab8ee07df9f631a68438d7c8b74f3
|
7
|
+
data.tar.gz: 8fe2a28c7e108612d5d4f32fa4c0fcf83ab849baefb974d8eb58cc140a788bdc487c521a45c3c8140a126cd60e5d4c1c63c1ace4025bbf6136535e9d8db80b63
|
data/.github/workflows/ci.yml
CHANGED
@@ -7,7 +7,7 @@ jobs:
|
|
7
7
|
fail-fast: false
|
8
8
|
matrix:
|
9
9
|
# Due to https://github.com/actions/runner/issues/849, we have to use quotes for '3.0'
|
10
|
-
ruby: [
|
10
|
+
ruby: [2.7, '3.0', 3.1, 3.2]
|
11
11
|
runs-on: ubuntu-latest
|
12
12
|
services:
|
13
13
|
rabbitmq:
|
@@ -42,6 +42,6 @@ jobs:
|
|
42
42
|
if: ${{ github.ref == 'refs/heads/main' }}
|
43
43
|
permissions:
|
44
44
|
contents: write
|
45
|
-
uses: alphagov/govuk-infrastructure/.github/workflows/publish-rubygem.
|
45
|
+
uses: alphagov/govuk-infrastructure/.github/workflows/publish-rubygem.yml@main
|
46
46
|
secrets:
|
47
47
|
GEM_HOST_API_KEY: ${{ secrets.ALPHAGOV_RUBYGEMS_API_KEY }}
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,11 @@
|
|
6
6
|
useful summary for people upgrading their application, not a replication
|
7
7
|
of the commit log.
|
8
8
|
|
9
|
-
##
|
9
|
+
## 3.2.1
|
10
10
|
|
11
|
-
-
|
11
|
+
- Update minimum Slop requirement from 3.6 to 4.0 ([#38](https://github.com/alphagov/govuk_seed_crawler/pull/38))
|
12
|
+
|
13
|
+
## 3.2.0
|
14
|
+
|
15
|
+
- Drop support for Ruby < 2.7 ([#23](https://github.com/alphagov/govuk_seed_crawler/pull/23))
|
16
|
+
- Update major version of 'Slop' dependency ([#17](https://github.com/alphagov/govuk_seed_crawler/pull/17))
|
data/README.md
CHANGED
@@ -31,6 +31,12 @@ Run with `--help` to see a list of options:
|
|
31
31
|
bundle exec seed-crawler --help
|
32
32
|
```
|
33
33
|
|
34
|
+
## Deployment
|
35
|
+
|
36
|
+
The gem is automatically deployed to RubyGems when the gem [version](https://github.com/alphagov/govuk_seed_crawler/blob/main/lib/govuk_seed_crawler/version.rb) is updated on `main`. (Don't forget to add to the [CHANGELOG](https://github.com/alphagov/govuk_seed_crawler/blob/main/CHANGELOG.md)!
|
37
|
+
|
38
|
+
For the new gem version to be used on GOV.UK, you'll need to update the [reference in govuk-puppet](https://github.com/alphagov/govuk-puppet/blob/c5112961e9c3063f077d2de2ffa887b00466c623/modules/govuk_crawler/manifests/init.pp#L142-L150).
|
39
|
+
|
34
40
|
## Contributing
|
35
41
|
|
36
42
|
1. Fork it ( http://github.com/{my-github-username}/govuk_seed_crawler/fork )
|
data/govuk_seed_crawler.gemspec
CHANGED
@@ -20,17 +20,17 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_runtime_dependency "bunny", ">= 1.3", "< 3.0"
|
22
22
|
spec.add_runtime_dependency "crack", "0.4.5"
|
23
|
-
spec.add_runtime_dependency "nokogiri", ">= 1.6", "< 1.
|
23
|
+
spec.add_runtime_dependency "nokogiri", ">= 1.6", "< 1.15"
|
24
24
|
# Something, somewhere, sometimes requires public_suffix.
|
25
25
|
# public_suffix > 1.5 requires ruby > 2.
|
26
26
|
spec.add_runtime_dependency "public_suffix", ">= 1.4.6", "< 5.1.0"
|
27
27
|
spec.add_runtime_dependency "sitemap-parser", ">= 0.3", "< 0.6"
|
28
|
-
spec.add_runtime_dependency "slop", "
|
28
|
+
spec.add_runtime_dependency "slop", ">= 4.0", "< 4.11"
|
29
29
|
|
30
30
|
spec.add_development_dependency "pry"
|
31
31
|
spec.add_development_dependency "rake"
|
32
32
|
spec.add_development_dependency "rspec", "~> 3.0"
|
33
33
|
spec.add_development_dependency "rspec-mocks", "~> 3.0"
|
34
|
-
spec.add_development_dependency "rubocop-govuk", "4.
|
34
|
+
spec.add_development_dependency "rubocop-govuk", "4.10.0"
|
35
35
|
spec.add_development_dependency "webmock", "~> 3.18"
|
36
36
|
end
|
@@ -13,7 +13,6 @@ module GovukSeedCrawler
|
|
13
13
|
class CLIParser
|
14
14
|
DEFAULTS = {
|
15
15
|
exchange: "govuk_crawler_exchange",
|
16
|
-
help: nil,
|
17
16
|
host: "localhost",
|
18
17
|
password: "guest",
|
19
18
|
port: "5672",
|
@@ -21,7 +20,6 @@ module GovukSeedCrawler
|
|
21
20
|
topic: "#",
|
22
21
|
username: "guest",
|
23
22
|
verbose: false,
|
24
|
-
version: nil,
|
25
23
|
vhost: "/",
|
26
24
|
}.freeze
|
27
25
|
|
@@ -32,45 +30,48 @@ module GovukSeedCrawler
|
|
32
30
|
end
|
33
31
|
|
34
32
|
def options
|
35
|
-
Slop.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
on :vhost=, "AMQP vhost", default: DEFAULTS[:vhost]
|
61
|
-
|
62
|
-
on :quiet, "Quiet output", default: DEFAULTS[:quiet]
|
63
|
-
on :verbose, "Verbose output", default: DEFAULTS[:verbose]
|
33
|
+
opts = Slop::Options.new
|
34
|
+
opts.banner = <<~HELP
|
35
|
+
Usage: #{$PROGRAM_NAME} site_root [options]
|
36
|
+
|
37
|
+
Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
|
38
|
+
to consume:
|
39
|
+
|
40
|
+
https://github.com/alphagov/govuk_crawler_worker
|
41
|
+
|
42
|
+
The AMQP password can also be set as an environment variable and will be read from
|
43
|
+
`#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
|
44
|
+
are set, the environment variable will take higher precedent.
|
45
|
+
HELP
|
46
|
+
opts.string "--host", "AMQP host to publish to", default: DEFAULTS[:host]
|
47
|
+
opts.string "--port", "AMQP port", default: DEFAULTS[:port]
|
48
|
+
opts.string "--username", "AMQP username", default: DEFAULTS[:username]
|
49
|
+
opts.string "--password", "AMQP password", default: DEFAULTS[:password]
|
50
|
+
opts.string "--exchange", "AMQP exchange", default: DEFAULTS[:exchange]
|
51
|
+
opts.string "--topic", "AMQP topic", default: DEFAULTS[:topic]
|
52
|
+
opts.string "--vhost", "AMQP vhost", default: DEFAULTS[:vhost]
|
53
|
+
opts.bool "-q", "--quiet", "Quiet output", default: DEFAULTS[:quiet]
|
54
|
+
opts.bool "-v", "--verbose", "Verbose output", default: DEFAULTS[:verbose]
|
55
|
+
opts.on "--version", "Display version and exit" do
|
56
|
+
puts "Version: #{GovukSeedCrawler::VERSION}"
|
57
|
+
exit 0
|
64
58
|
end
|
59
|
+
opts.on "-h", "--help" do
|
60
|
+
puts opts
|
61
|
+
exit
|
62
|
+
end
|
63
|
+
parser = Slop::Parser.new(opts)
|
64
|
+
parser.parse(@argv_array)
|
65
65
|
end
|
66
66
|
|
67
67
|
def parse
|
68
68
|
opts = options
|
69
69
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
70
|
+
# opts.arguments shows all arguments NOT processed by the parser,
|
71
|
+
# which should just be the first arg (site root).
|
72
|
+
# See https://github.com/leejarvis/slop#arguments
|
73
|
+
raise CLIException.new("too many arguments provided", opts.to_s) if opts.arguments.count > 1
|
74
|
+
raise CLIException.new("site_root must be provided", opts.to_s) if opts.arguments.count.zero?
|
74
75
|
|
75
76
|
options_hash = opts.to_hash
|
76
77
|
options_hash[:password] = ENV[ENV_AMQP_PASS_KEY] unless ENV[ENV_AMQP_PASS_KEY].nil?
|
@@ -32,7 +32,7 @@ describe GovukSeedCrawler::CLIParser do
|
|
32
32
|
options = described_class.new(["http://www.foo.com/"]).options
|
33
33
|
|
34
34
|
expect { described_class.new(["-h"]).parse }
|
35
|
-
.to output(
|
35
|
+
.to output(options.to_s).to_stdout
|
36
36
|
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
|
37
37
|
end
|
38
38
|
|
@@ -66,10 +66,8 @@ describe GovukSeedCrawler::CLIParser do
|
|
66
66
|
password: "bar",
|
67
67
|
exchange: "some_custom_exchange",
|
68
68
|
topic: "some_custom_topic",
|
69
|
-
help: nil,
|
70
69
|
quiet: false,
|
71
70
|
verbose: true,
|
72
|
-
version: nil,
|
73
71
|
vhost: "a_vhost",
|
74
72
|
}
|
75
73
|
|
@@ -33,7 +33,7 @@ describe GovukSeedCrawler::CLIRunner do
|
|
33
33
|
expect(GovukSeedCrawler.logger.level).to eq(Logger::INFO)
|
34
34
|
end
|
35
35
|
|
36
|
-
it "sets to ERROR for
|
36
|
+
it "sets to ERROR for quiet" do
|
37
37
|
described_class.new(["http://www.example.com", "--quiet"])
|
38
38
|
expect(GovukSeedCrawler.logger.level).to eq(Logger::ERROR)
|
39
39
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govuk_seed_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1
|
4
|
+
version: 3.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GOV.UK developers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bunny
|
@@ -53,7 +53,7 @@ dependencies:
|
|
53
53
|
version: '1.6'
|
54
54
|
- - "<"
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: '1.
|
56
|
+
version: '1.15'
|
57
57
|
type: :runtime
|
58
58
|
prerelease: false
|
59
59
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -63,7 +63,7 @@ dependencies:
|
|
63
63
|
version: '1.6'
|
64
64
|
- - "<"
|
65
65
|
- !ruby/object:Gem::Version
|
66
|
-
version: '1.
|
66
|
+
version: '1.15'
|
67
67
|
- !ruby/object:Gem::Dependency
|
68
68
|
name: public_suffix
|
69
69
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,16 +108,22 @@ dependencies:
|
|
108
108
|
name: slop
|
109
109
|
requirement: !ruby/object:Gem::Requirement
|
110
110
|
requirements:
|
111
|
-
- - "
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '4.0'
|
114
|
+
- - "<"
|
112
115
|
- !ruby/object:Gem::Version
|
113
|
-
version:
|
116
|
+
version: '4.11'
|
114
117
|
type: :runtime
|
115
118
|
prerelease: false
|
116
119
|
version_requirements: !ruby/object:Gem::Requirement
|
117
120
|
requirements:
|
118
|
-
- - "
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '4.0'
|
124
|
+
- - "<"
|
119
125
|
- !ruby/object:Gem::Version
|
120
|
-
version:
|
126
|
+
version: '4.11'
|
121
127
|
- !ruby/object:Gem::Dependency
|
122
128
|
name: pry
|
123
129
|
requirement: !ruby/object:Gem::Requirement
|
@@ -180,14 +186,14 @@ dependencies:
|
|
180
186
|
requirements:
|
181
187
|
- - '='
|
182
188
|
- !ruby/object:Gem::Version
|
183
|
-
version: 4.
|
189
|
+
version: 4.10.0
|
184
190
|
type: :development
|
185
191
|
prerelease: false
|
186
192
|
version_requirements: !ruby/object:Gem::Requirement
|
187
193
|
requirements:
|
188
194
|
- - '='
|
189
195
|
- !ruby/object:Gem::Version
|
190
|
-
version: 4.
|
196
|
+
version: 4.10.0
|
191
197
|
- !ruby/object:Gem::Dependency
|
192
198
|
name: webmock
|
193
199
|
requirement: !ruby/object:Gem::Requirement
|
@@ -256,7 +262,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
256
262
|
- !ruby/object:Gem::Version
|
257
263
|
version: '0'
|
258
264
|
requirements: []
|
259
|
-
rubygems_version: 3.
|
265
|
+
rubygems_version: 3.4.9
|
260
266
|
signing_key:
|
261
267
|
specification_version: 4
|
262
268
|
summary: Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ
|