govuk_seed_crawler 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +18 -0
- data/bin/seed-crawler +5 -0
- data/govuk_seed_crawler.gemspec +30 -0
- data/jenkins-branches.sh +18 -0
- data/jenkins-tests.sh +6 -0
- data/jenkins.sh +5 -0
- data/lib/govuk_seed_crawler.rb +17 -0
- data/lib/govuk_seed_crawler/amqp_client.rb +28 -0
- data/lib/govuk_seed_crawler/cli_parser.rb +81 -0
- data/lib/govuk_seed_crawler/cli_runner.rb +29 -0
- data/lib/govuk_seed_crawler/indexer.rb +18 -0
- data/lib/govuk_seed_crawler/seeder.rb +16 -0
- data/lib/govuk_seed_crawler/version.rb +3 -0
- data/spec/govuk_seed_crawler/amqp_client_spec.rb +55 -0
- data/spec/govuk_seed_crawler/cli_parser_spec.rb +122 -0
- data/spec/govuk_seed_crawler/cli_runner_spec.rb +74 -0
- data/spec/govuk_seed_crawler/indexer_spec.rb +21 -0
- data/spec/govuk_seed_crawler/seeder_spec.rb +51 -0
- data/spec/integration/govuk_seed_crawler_spec.rb +66 -0
- data/spec/spec_helper.rb +39 -0
- metadata +205 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a37f428070681bc4ca2466497df0b69f45fed94a
|
4
|
+
data.tar.gz: 55bdafe5ade9251f6f630eeb490e481a9796fc4f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 631f38d96a7d1ea301b38e761d5c55debbb9ee0c99a8e2a88ef1bb965b12637eceaaba70dfbd13941e4b96c088781fc6da3f724e283bfe8afc6d3dd8f0732321
|
7
|
+
data.tar.gz: 2f8b41afecdaba199b32925b13804c6dce588535b611a01383753dac2a22a655e728a9b77cf529845e0b6df1738bdca716c0eac5a5a4032e70e61d7d94c9cd82
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Gemfile.lock
|
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.2
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(c) 2014 Crown copyright
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# GOV.UK: Seed the Crawler
|
2
|
+
|
3
|
+
Retrieves a list of URLs to seed the [crawler](https://github.com/alphagov/govuk_crawler_worker) by publishing them to a RabbitMQ exchange.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'govuk_seed_crawler'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install govuk_seed_crawler
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
To run with the RabbitMQ connection defaults:
|
22
|
+
|
23
|
+
```bash
|
24
|
+
bundle exec seed-crawler https://www.gov.uk/
|
25
|
+
```
|
26
|
+
|
27
|
+
Run with `--help` to see a list of options:
|
28
|
+
|
29
|
+
```bash
|
30
|
+
bundle exec seed-crawler --help
|
31
|
+
```
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
1. Fork it ( http://github.com/{my-github-username}/govuk_seed_crawler/fork )
|
36
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
37
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
38
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
39
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'gem_publisher'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec) do |task|
|
5
|
+
task.pattern = FileList['spec/govuk_seed_crawler/**/*_spec.rb']
|
6
|
+
end
|
7
|
+
|
8
|
+
RSpec::Core::RakeTask.new(:integration) do |task|
|
9
|
+
task.pattern = FileList['spec/integration/**/*_spec.rb']
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => :spec
|
13
|
+
|
14
|
+
desc "Publish gem to RubyGems"
|
15
|
+
task :publish_gem do |t|
|
16
|
+
gem = GemPublisher.publish_if_updated("govuk_seed_crawler.gemspec")
|
17
|
+
puts "Published #{gem}" if gem
|
18
|
+
end
|
data/bin/seed-crawler
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'govuk_seed_crawler/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "govuk_seed_crawler"
|
8
|
+
spec.version = GovukSeedCrawler::VERSION
|
9
|
+
spec.authors = ["Matt Bostock"]
|
10
|
+
spec.email = ["matt.bostock@digital.cabinet-office.gov.uk"]
|
11
|
+
spec.summary = %q{Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ exchange.}
|
12
|
+
spec.homepage = "https://github.gds/gds/govuk_seed_crawler"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_runtime_dependency "bunny", "~> 1.3"
|
21
|
+
spec.add_runtime_dependency "govuk_mirrorer", "~> 1.3.1"
|
22
|
+
spec.add_runtime_dependency "slop", "~> 3.6.0"
|
23
|
+
|
24
|
+
spec.add_development_dependency "gem_publisher", "~> 1.3"
|
25
|
+
spec.add_development_dependency "pry"
|
26
|
+
spec.add_development_dependency "rake"
|
27
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
28
|
+
spec.add_development_dependency "rspec-mocks", "~> 3.0"
|
29
|
+
spec.add_development_dependency "webmock", "~> 1.18.0"
|
30
|
+
end
|
data/jenkins-branches.sh
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
set -e
|
3
|
+
|
4
|
+
[ -x .venv/bin/pip ] || virtualenv .venv
|
5
|
+
. .venv/bin/activate
|
6
|
+
|
7
|
+
pip install -q ghtools
|
8
|
+
|
9
|
+
REPO="gds:gds/govuk_seed_crawler"
|
10
|
+
gh-status "$REPO" "$GIT_COMMIT" pending -d "\"Build #${BUILD_NUMBER} is running on Jenkins\"" -u "$BUILD_URL" >/dev/null
|
11
|
+
|
12
|
+
if ./jenkins-tests.sh; then
|
13
|
+
gh-status "$REPO" "$GIT_COMMIT" success -d "\"Build #${BUILD_NUMBER} succeeded on Jenkins\"" -u "$BUILD_URL" >/dev/null
|
14
|
+
exit 0
|
15
|
+
else
|
16
|
+
gh-status "$REPO" "$GIT_COMMIT" failure -d "\"Build #${BUILD_NUMBER} failed on Jenkins\"" -u "$BUILD_URL" >/dev/null
|
17
|
+
exit 1
|
18
|
+
fi
|
data/jenkins-tests.sh
ADDED
data/jenkins.sh
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'govuk_seed_crawler/amqp_client'
|
2
|
+
require 'govuk_seed_crawler/cli_parser'
|
3
|
+
require 'govuk_seed_crawler/cli_runner'
|
4
|
+
require 'govuk_seed_crawler/indexer'
|
5
|
+
require 'govuk_seed_crawler/seeder'
|
6
|
+
require 'govuk_seed_crawler/version'
|
7
|
+
|
8
|
+
module GovukSeedCrawler
|
9
|
+
def self.logger
|
10
|
+
unless @logger
|
11
|
+
@logger = Logger.new(STDOUT)
|
12
|
+
@logger.level = Logger::INFO
|
13
|
+
end
|
14
|
+
|
15
|
+
@logger
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'bunny'
|
2
|
+
|
3
|
+
module GovukSeedCrawler
|
4
|
+
class AmqpClient
|
5
|
+
attr_reader :channel
|
6
|
+
|
7
|
+
def initialize(connection_options = {})
|
8
|
+
@conn = Bunny.new(connection_options)
|
9
|
+
@conn.start
|
10
|
+
@channel = @conn.create_channel
|
11
|
+
end
|
12
|
+
|
13
|
+
def close
|
14
|
+
@conn.close
|
15
|
+
end
|
16
|
+
|
17
|
+
def publish(exchange, topic, body)
|
18
|
+
raise "Exchange cannot be nil" if exchange.nil?
|
19
|
+
raise "Topic cannot be nil" if topic.nil?
|
20
|
+
raise "Message body cannot be nil" if body.nil?
|
21
|
+
|
22
|
+
GovukSeedCrawler.logger.debug("Publishing '#{body}' to topic '#{topic}'")
|
23
|
+
|
24
|
+
@channel.topic(exchange, :durable => true)
|
25
|
+
.publish(body, :routing_key => topic)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'slop'
|
2
|
+
|
3
|
+
module GovukSeedCrawler
|
4
|
+
class CLIException < StandardError
|
5
|
+
attr_reader :help
|
6
|
+
|
7
|
+
def initialize(message, help)
|
8
|
+
super(message)
|
9
|
+
@help = help
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class CLIParser
|
14
|
+
DEFAULTS = {
|
15
|
+
:exchange => "govuk_crawler_exchange",
|
16
|
+
:help => nil,
|
17
|
+
:host => "localhost",
|
18
|
+
:password => "guest",
|
19
|
+
:port => "5672",
|
20
|
+
:quiet => false,
|
21
|
+
:topic => "#",
|
22
|
+
:username => "guest",
|
23
|
+
:verbose => false,
|
24
|
+
:version => nil,
|
25
|
+
:vhost => "/"
|
26
|
+
}.freeze
|
27
|
+
|
28
|
+
ENV_AMQP_PASS_KEY = "GOVUK_CRAWLER_AMQP_PASS".freeze
|
29
|
+
|
30
|
+
def initialize(argv_array)
|
31
|
+
@argv_array = argv_array
|
32
|
+
end
|
33
|
+
|
34
|
+
def options
|
35
|
+
Slop.parse!(@argv_array, :help => true) do
|
36
|
+
banner <<-EOS
|
37
|
+
Usage: #{$PROGRAM_NAME} site_root [options]
|
38
|
+
|
39
|
+
Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
|
40
|
+
to consume:
|
41
|
+
|
42
|
+
https://github.com/alphagov/govuk_crawler_worker
|
43
|
+
|
44
|
+
The AMQP password can also be set as an environment variable and will be read from
|
45
|
+
`#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
|
46
|
+
are set, the environment variable will take higher precedent.
|
47
|
+
EOS
|
48
|
+
|
49
|
+
on :version, "Display version and exit" do
|
50
|
+
puts "Version: #{GovukSeedCrawler::VERSION}"
|
51
|
+
exit 0
|
52
|
+
end
|
53
|
+
|
54
|
+
on :host=, "AMQP host to publish to", default: DEFAULTS[:host]
|
55
|
+
on :port=, "AMQP port", default: DEFAULTS[:port]
|
56
|
+
on :username=, "AMQP username", default: DEFAULTS[:username]
|
57
|
+
on :password=, "AMQP password", default: DEFAULTS[:password]
|
58
|
+
on :exchange=, "AMQP exchange", default: DEFAULTS[:exchange]
|
59
|
+
on :topic=, "AMQP topic", default: DEFAULTS[:topic]
|
60
|
+
on :vhost=, "AMQP vhost", default: DEFAULTS[:vhost]
|
61
|
+
|
62
|
+
on :quiet, "Quiet output", default: DEFAULTS[:quiet]
|
63
|
+
on :verbose, "Verbose output", default: DEFAULTS[:verbose]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def parse
|
68
|
+
opts = options
|
69
|
+
|
70
|
+
if opts[:version].nil?
|
71
|
+
raise CLIException.new("too many arguments provided", opts.help) if @argv_array.size > 1
|
72
|
+
raise CLIException.new("site_root must be provided", opts.help) if @argv_array.size != 1
|
73
|
+
end
|
74
|
+
|
75
|
+
options_hash = opts.to_hash
|
76
|
+
options_hash[:password] = ENV[ENV_AMQP_PASS_KEY] unless ENV[ENV_AMQP_PASS_KEY].nil?
|
77
|
+
|
78
|
+
return options_hash, @argv_array.first
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module GovukSeedCrawler
|
2
|
+
class CLIRunner
|
3
|
+
def initialize(argv_array)
|
4
|
+
begin
|
5
|
+
@options, @site_root = CLIParser.new(argv_array).parse
|
6
|
+
rescue CLIException => e
|
7
|
+
puts e.message
|
8
|
+
puts e.help
|
9
|
+
exit 2
|
10
|
+
end
|
11
|
+
|
12
|
+
set_logging_level(@options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
Seeder::seed(@site_root, @options)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def set_logging_level(cli_options)
|
22
|
+
if cli_options[:verbose]
|
23
|
+
GovukSeedCrawler.logger.level = Logger::DEBUG
|
24
|
+
elsif cli_options[:quiet]
|
25
|
+
GovukSeedCrawler.logger.level = Logger::ERROR
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'govuk_mirrorer/indexer'
|
2
|
+
require 'govuk_mirrorer/statsd'
|
3
|
+
|
4
|
+
module GovukSeedCrawler
|
5
|
+
class Indexer
|
6
|
+
attr_reader :urls
|
7
|
+
|
8
|
+
def initialize(site_root)
|
9
|
+
raise "No site_root defined" unless site_root
|
10
|
+
|
11
|
+
GovukSeedCrawler.logger.info("Retrieving list of URLs for #{site_root}")
|
12
|
+
indexer = GovukMirrorer::Indexer.new(site_root)
|
13
|
+
@urls = indexer.all_start_urls
|
14
|
+
|
15
|
+
GovukSeedCrawler.logger.info("Found #{@urls.count} URLs")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module GovukSeedCrawler
|
2
|
+
class Seeder
|
3
|
+
def self.seed(site_root, options = {})
|
4
|
+
amqp_client = AmqpClient.new(options)
|
5
|
+
urls = Indexer.new(site_root).urls
|
6
|
+
|
7
|
+
urls.each do |url|
|
8
|
+
amqp_client.publish(options[:exchange], options[:topic], url)
|
9
|
+
end
|
10
|
+
|
11
|
+
GovukSeedCrawler.logger.info("Published #{urls.count} URLs to topic '#{options[:topic]}'")
|
12
|
+
|
13
|
+
amqp_client.close
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::AmqpClient do
|
4
|
+
let(:exchange) { "govuk_seed_crawler_spec_exchange" }
|
5
|
+
let(:options) {{
|
6
|
+
:host => ENV.fetch("AMQP_HOST", "localhost"),
|
7
|
+
:user => ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
|
8
|
+
:pass => ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
|
9
|
+
}}
|
10
|
+
subject { GovukSeedCrawler::AmqpClient.new(options) }
|
11
|
+
|
12
|
+
it "responds to #channel" do
|
13
|
+
expect(subject).to respond_to(:channel)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "responds to #close" do
|
17
|
+
expect(subject).to respond_to(:close)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "closes the connection to the AMQP server" do
|
21
|
+
mock_bunny = double(:mock_bunny,
|
22
|
+
:start => true, :create_channel => true, :close => true)
|
23
|
+
allow(Bunny).to receive(:new).and_return(mock_bunny)
|
24
|
+
expect(mock_bunny).to receive(:close).once
|
25
|
+
|
26
|
+
subject.close
|
27
|
+
end
|
28
|
+
|
29
|
+
context "#publish" do
|
30
|
+
context "error handling" do
|
31
|
+
it "raises an exception if exchange is nil" do
|
32
|
+
expect {
|
33
|
+
subject.publish(nil, "#", "some body")
|
34
|
+
}.to raise_exception(RuntimeError, "Exchange cannot be nil")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "raises an exception if topic is nil" do
|
38
|
+
expect {
|
39
|
+
subject.publish(exchange, nil, "some body")
|
40
|
+
}.to raise_exception(RuntimeError, "Topic cannot be nil")
|
41
|
+
end
|
42
|
+
|
43
|
+
it "raises an exception if body is nil" do
|
44
|
+
expect {
|
45
|
+
subject.publish(exchange, "#", nil)
|
46
|
+
}.to raise_exception(RuntimeError, "Message body cannot be nil")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
it "allows publishing against an exchange" do
|
51
|
+
expect(subject.publish(exchange, "#", "some body"))
|
52
|
+
.to_not be_nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::CLIParser do
|
4
|
+
it "requires the site_root to be provided" do
|
5
|
+
expect {
|
6
|
+
GovukSeedCrawler::CLIParser.new([]).parse
|
7
|
+
}.to raise_exception(GovukSeedCrawler::CLIException, "site_root must be provided")
|
8
|
+
end
|
9
|
+
|
10
|
+
it "provides the defaults when just given the site_root" do
|
11
|
+
options, site_root = GovukSeedCrawler::CLIParser.new(["https://www.example.com"]).parse
|
12
|
+
|
13
|
+
expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS)
|
14
|
+
expect(site_root).to eq("https://www.example.com")
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should tell us when we've given too many arguments" do
|
18
|
+
expect {
|
19
|
+
GovukSeedCrawler::CLIParser.new(["a", "b"]).parse
|
20
|
+
}.to raise_exception(GovukSeedCrawler::CLIException, "too many arguments provided")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should nest the help message in with any CLIExceptions we raise" do
|
24
|
+
expect {
|
25
|
+
GovukSeedCrawler::CLIParser.new(["a", "b"]).parse
|
26
|
+
}.to raise_exception(GovukSeedCrawler::CLIException) { |e|
|
27
|
+
expect(e.help).to include("Usage: ")
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "catching STDOUT" do
|
32
|
+
it "shows the help banner when provided -h" do
|
33
|
+
# Get a valid options response as help closes early with SystemExit.
|
34
|
+
options = GovukSeedCrawler::CLIParser.new(["http://www.foo.com/"]).options
|
35
|
+
|
36
|
+
temp_stdout do |caught_stdout|
|
37
|
+
expect {
|
38
|
+
_, _ = GovukSeedCrawler::CLIParser.new(["-h"]).parse
|
39
|
+
}.to raise_exception(SystemExit) { |e|
|
40
|
+
expect(e.status).to eq(0)
|
41
|
+
}
|
42
|
+
|
43
|
+
expect(caught_stdout.strip).to eq(options.help)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should show the version number and exit" do
|
48
|
+
temp_stdout do |caught_stdout|
|
49
|
+
expect {
|
50
|
+
_, _ = GovukSeedCrawler::CLIParser.new(["--version"]).parse
|
51
|
+
}.to raise_exception(SystemExit) { |e|
|
52
|
+
expect(e.status).to eq(0)
|
53
|
+
}
|
54
|
+
|
55
|
+
expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "passing in valid arguments" do
|
61
|
+
let(:arguments) {
|
62
|
+
[
|
63
|
+
"https://www.override.com/",
|
64
|
+
"--host rabbitmq.some.custom.vhost",
|
65
|
+
"--port 4567",
|
66
|
+
"--username foo",
|
67
|
+
"--password bar",
|
68
|
+
"--exchange some_custom_exchange",
|
69
|
+
"--topic some_custom_topic",
|
70
|
+
"--vhost a_vhost",
|
71
|
+
"--verbose"
|
72
|
+
].join(" ").split(" ")
|
73
|
+
}
|
74
|
+
|
75
|
+
it "should override all of the default arguments that we're providing" do
|
76
|
+
overriden = {
|
77
|
+
host: "rabbitmq.some.custom.vhost",
|
78
|
+
port: "4567",
|
79
|
+
username: "foo",
|
80
|
+
password: "bar",
|
81
|
+
exchange: "some_custom_exchange",
|
82
|
+
topic: "some_custom_topic",
|
83
|
+
help: nil,
|
84
|
+
quiet: false,
|
85
|
+
verbose: true,
|
86
|
+
version: nil,
|
87
|
+
vhost: "a_vhost"
|
88
|
+
}
|
89
|
+
|
90
|
+
expect(GovukSeedCrawler::CLIParser.new(arguments).parse.first).to eq(overriden)
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should set the --quiet value" do
|
94
|
+
options, _ = GovukSeedCrawler::CLIParser.new(["foo.com", "--quiet"]).parse
|
95
|
+
expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS.merge(quiet: true))
|
96
|
+
end
|
97
|
+
|
98
|
+
describe "reading the AMQP password from an environment variable" do
|
99
|
+
def set_amqp_pass(password)
|
100
|
+
ENV[GovukSeedCrawler::CLIParser::ENV_AMQP_PASS_KEY] = password
|
101
|
+
end
|
102
|
+
|
103
|
+
after do
|
104
|
+
ENV[GovukSeedCrawler::CLIParser::ENV_AMQP_PASS_KEY] = nil
|
105
|
+
end
|
106
|
+
|
107
|
+
it "sets the password if set using an environment variable" do
|
108
|
+
set_amqp_pass("foobar")
|
109
|
+
|
110
|
+
expect(GovukSeedCrawler::CLIParser.new(["http://www.example.com"]).parse.first)
|
111
|
+
.to include(password: "foobar")
|
112
|
+
end
|
113
|
+
|
114
|
+
it "picks the environment variable over the parameter if both are set" do
|
115
|
+
set_amqp_pass("bar")
|
116
|
+
|
117
|
+
expect(GovukSeedCrawler::CLIParser.new(["http://www.example.com", "--password", "foo"]).parse.first)
|
118
|
+
.to include(password: "bar")
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::CLIRunner do
|
4
|
+
describe "printing the version" do
|
5
|
+
it "should not try to connect to an AMQP server" do
|
6
|
+
expect(Bunny).not_to receive(:new)
|
7
|
+
|
8
|
+
temp_stdout do |caught_stdout|
|
9
|
+
expect {
|
10
|
+
GovukSeedCrawler::CLIRunner.new(["--version"]).run
|
11
|
+
}.to raise_exception(SystemExit) { |exit|
|
12
|
+
expect(exit.status).to eq(0)
|
13
|
+
}
|
14
|
+
|
15
|
+
expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "catching any CLIException objects and exiting with a status 1" do
|
21
|
+
it "prints to STDOUT for too many arguments" do
|
22
|
+
temp_stdout do |caught_stdout|
|
23
|
+
expect {
|
24
|
+
GovukSeedCrawler::CLIRunner.new(["a", "b"])
|
25
|
+
}.to raise_exception(SystemExit) { |exit|
|
26
|
+
expect(exit.status).to eq(2)
|
27
|
+
}
|
28
|
+
|
29
|
+
expect(caught_stdout.strip).to include("too many arguments provided")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
it "prints to STDOUT when site_root not set" do
|
34
|
+
temp_stdout do |caught_stdout|
|
35
|
+
expect {
|
36
|
+
GovukSeedCrawler::CLIRunner.new(["--verbose"])
|
37
|
+
}.to raise_exception(SystemExit) { |exit|
|
38
|
+
expect(exit.status).to eq(2)
|
39
|
+
}
|
40
|
+
|
41
|
+
expect(caught_stdout.strip).to include("site_root must be provided")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "setting the logging level" do
|
47
|
+
before do
|
48
|
+
GovukSeedCrawler.logger.level = Logger::INFO
|
49
|
+
end
|
50
|
+
|
51
|
+
it "defaults to INFO" do
|
52
|
+
GovukSeedCrawler::CLIRunner.new(["http://www.example.com"])
|
53
|
+
expect(GovukSeedCrawler.logger.level).to eq(Logger::INFO)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "sets to ERROR for quite" do
|
57
|
+
GovukSeedCrawler::CLIRunner.new(["http://www.example.com", "--quiet"])
|
58
|
+
expect(GovukSeedCrawler.logger.level).to eq(Logger::ERROR)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "sets to DEBUG for verbose" do
|
62
|
+
GovukSeedCrawler::CLIRunner.new(["http://www.example.com", "--verbose"])
|
63
|
+
expect(GovukSeedCrawler.logger.level).to eq(Logger::DEBUG)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "#run" do
|
68
|
+
it "passes all options through to seed" do
|
69
|
+
expect(GovukSeedCrawler::Seeder).to receive(:seed).
|
70
|
+
with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
|
71
|
+
GovukSeedCrawler::CLIRunner.new(["http://www.example.com"]).run
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::Indexer do
|
4
|
+
subject { GovukSeedCrawler::Indexer.new('https://example.com/') }
|
5
|
+
|
6
|
+
context "under normal usage" do
|
7
|
+
let(:mock_indexer) do
|
8
|
+
double(:mock_indexer, :all_start_urls => [])
|
9
|
+
end
|
10
|
+
|
11
|
+
it "responds to Indexer#urls" do
|
12
|
+
allow(GovukMirrorer::Indexer).to receive(:new).and_return(mock_indexer)
|
13
|
+
expect(subject).to respond_to(:urls)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "calls GovukMirrorer::Indexer with the site root" do
|
17
|
+
expect(GovukMirrorer::Indexer).to receive(:new).with('https://example.com/').and_return(mock_indexer)
|
18
|
+
subject
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::Seeder do
|
4
|
+
let(:exchange) { "seeder_test_exchange" }
|
5
|
+
let(:topic) { "#" }
|
6
|
+
let(:root_url) { "https://www.example.com" }
|
7
|
+
|
8
|
+
let(:options) {{
|
9
|
+
:exchange => exchange,
|
10
|
+
:topic => topic,
|
11
|
+
}}
|
12
|
+
|
13
|
+
let(:mock_get_urls) { double(:mock_get_urls, :urls => true) }
|
14
|
+
let(:mock_amqp_client) { double(:mock_amqp_client, :close => true) }
|
15
|
+
|
16
|
+
let(:urls) do
|
17
|
+
[
|
18
|
+
"https://example.com/foo",
|
19
|
+
"https://example.com/bar",
|
20
|
+
"https://example.com/baz",
|
21
|
+
]
|
22
|
+
end
|
23
|
+
|
24
|
+
subject { GovukSeedCrawler::Seeder::seed(root_url, options) }
|
25
|
+
|
26
|
+
before(:each) do
|
27
|
+
allow(GovukSeedCrawler::Indexer).to receive(:new)
|
28
|
+
.with(root_url)
|
29
|
+
.and_return(mock_get_urls)
|
30
|
+
allow(mock_get_urls).to receive(:urls).and_return(urls)
|
31
|
+
allow(GovukSeedCrawler::AmqpClient).to receive(:new)
|
32
|
+
.with(options).and_return(mock_amqp_client)
|
33
|
+
end
|
34
|
+
|
35
|
+
context "under normal usage" do
|
36
|
+
it "publishes urls to the queue" do
|
37
|
+
urls.each do |url|
|
38
|
+
expect(mock_amqp_client).to receive(:publish)
|
39
|
+
.with(exchange, topic, url)
|
40
|
+
end
|
41
|
+
|
42
|
+
subject
|
43
|
+
end
|
44
|
+
|
45
|
+
it "closes the connection when done" do
|
46
|
+
allow(mock_amqp_client).to receive(:publish)
|
47
|
+
expect(mock_amqp_client).to receive(:close)
|
48
|
+
subject
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe GovukSeedCrawler do
|
5
|
+
def stub_api_artefacts(count)
|
6
|
+
item = {
|
7
|
+
"id" => "https://www.gov.uk/api/government%2Fnews%2Ffaster-review-of-support-for-renewable-electricity-to-provide-investor-certainty.json",
|
8
|
+
"web_url" => "https://www.gov.uk/government/news/faster-review-of-support-for-renewable-electricity-to-provide-investor-certainty",
|
9
|
+
"title" => "Faster review of support for Renewable electricity to provide investor certainty",
|
10
|
+
"format" => "announcement"
|
11
|
+
}
|
12
|
+
results = count.times.collect { item }
|
13
|
+
response = {
|
14
|
+
"_response_info" => {
|
15
|
+
"status" => "ok",
|
16
|
+
"links" => []
|
17
|
+
},
|
18
|
+
"total" => results.size,
|
19
|
+
"start_index" => 1,
|
20
|
+
"page_size" => 100,
|
21
|
+
"current_page" => 1,
|
22
|
+
"pages" => 1,
|
23
|
+
"results" => results
|
24
|
+
}
|
25
|
+
|
26
|
+
stub_request(:get, "https://www.gov.uk//api/artefacts.json").
|
27
|
+
to_return(:status => 200, :body => response.to_json, :headers => {})
|
28
|
+
end
|
29
|
+
|
30
|
+
let(:vhost) { "/" }
|
31
|
+
let(:exchange_name) { "govuk_seed_crawler_integration_exchange" }
|
32
|
+
let(:queue_name) { "govuk_seed_crawler_integration_queue" }
|
33
|
+
let(:topic) { "#" }
|
34
|
+
let(:site_root) { "https://www.gov.uk/" }
|
35
|
+
let(:options) {{
|
36
|
+
:host => ENV.fetch("AMQP_HOST", "localhost"),
|
37
|
+
:user => ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
|
38
|
+
:pass => ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
|
39
|
+
:exchange => exchange_name,
|
40
|
+
:topic => topic
|
41
|
+
}}
|
42
|
+
let(:rabbitmq_client) { GovukSeedCrawler::AmqpClient.new(options) }
|
43
|
+
|
44
|
+
subject { GovukSeedCrawler::Seeder::seed(site_root, options) }
|
45
|
+
|
46
|
+
before(:each) do
|
47
|
+
@exchange = rabbitmq_client.channel.topic(exchange_name, :durable => true)
|
48
|
+
@queue = rabbitmq_client.channel.queue(queue_name)
|
49
|
+
@queue.bind(@exchange, :routing_key => topic)
|
50
|
+
end
|
51
|
+
|
52
|
+
after(:each) do
|
53
|
+
@queue.unbind(@exchange)
|
54
|
+
@queue.delete
|
55
|
+
@exchange.delete
|
56
|
+
rabbitmq_client.close
|
57
|
+
end
|
58
|
+
|
59
|
+
it "publishes URLs it finds to an AMQP topic exchange" do
|
60
|
+
stub_api_artefacts(10)
|
61
|
+
subject
|
62
|
+
|
63
|
+
# There's an extra 5 URLs from the Indexer class that are hard-coded.
|
64
|
+
expect(@queue.message_count).to be(15)
|
65
|
+
end
|
66
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'govuk_seed_crawler'
|
2
|
+
require 'webmock/rspec'
|
3
|
+
|
4
|
+
RSpec.configure do |config|
|
5
|
+
config.order = :random
|
6
|
+
|
7
|
+
# Seed global randomization in this process using the `--seed` CLI option.
|
8
|
+
# Setting this allows you to use `--seed` to deterministically reproduce
|
9
|
+
# test failures related to randomization by passing the same `--seed` value
|
10
|
+
# as the one that triggered the failure.
|
11
|
+
Kernel.srand config.seed
|
12
|
+
|
13
|
+
config.expect_with :rspec do |expectations|
|
14
|
+
# Enable only the newer, non-monkey-patching expect syntax.
|
15
|
+
# For more details, see:
|
16
|
+
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
|
17
|
+
expectations.syntax = :expect
|
18
|
+
end
|
19
|
+
|
20
|
+
config.mock_with :rspec do |mocks|
|
21
|
+
# Enable only the newer, non-monkey-patching expect syntax.
|
22
|
+
# For more details, see:
|
23
|
+
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
24
|
+
mocks.syntax = :expect
|
25
|
+
|
26
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
27
|
+
# a real object. This is generally recommended.
|
28
|
+
mocks.verify_partial_doubles = true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
WebMock.disable_net_connect!
|
33
|
+
|
34
|
+
def temp_stdout
|
35
|
+
$stdout = StringIO.new
|
36
|
+
yield $stdout.string
|
37
|
+
ensure
|
38
|
+
$stdout = STDOUT
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,205 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: govuk_seed_crawler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matt Bostock
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-08-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bunny
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: govuk_mirrorer
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.3.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.3.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: slop
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 3.6.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 3.6.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: gem_publisher
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '3.0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '3.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rspec-mocks
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '3.0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '3.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: webmock
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.18.0
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 1.18.0
|
139
|
+
description:
|
140
|
+
email:
|
141
|
+
- matt.bostock@digital.cabinet-office.gov.uk
|
142
|
+
executables:
|
143
|
+
- seed-crawler
|
144
|
+
extensions: []
|
145
|
+
extra_rdoc_files: []
|
146
|
+
files:
|
147
|
+
- ".gitignore"
|
148
|
+
- ".rspec"
|
149
|
+
- ".ruby-version"
|
150
|
+
- Gemfile
|
151
|
+
- LICENSE.txt
|
152
|
+
- README.md
|
153
|
+
- Rakefile
|
154
|
+
- bin/seed-crawler
|
155
|
+
- govuk_seed_crawler.gemspec
|
156
|
+
- jenkins-branches.sh
|
157
|
+
- jenkins-tests.sh
|
158
|
+
- jenkins.sh
|
159
|
+
- lib/govuk_seed_crawler.rb
|
160
|
+
- lib/govuk_seed_crawler/amqp_client.rb
|
161
|
+
- lib/govuk_seed_crawler/cli_parser.rb
|
162
|
+
- lib/govuk_seed_crawler/cli_runner.rb
|
163
|
+
- lib/govuk_seed_crawler/indexer.rb
|
164
|
+
- lib/govuk_seed_crawler/seeder.rb
|
165
|
+
- lib/govuk_seed_crawler/version.rb
|
166
|
+
- spec/govuk_seed_crawler/amqp_client_spec.rb
|
167
|
+
- spec/govuk_seed_crawler/cli_parser_spec.rb
|
168
|
+
- spec/govuk_seed_crawler/cli_runner_spec.rb
|
169
|
+
- spec/govuk_seed_crawler/indexer_spec.rb
|
170
|
+
- spec/govuk_seed_crawler/seeder_spec.rb
|
171
|
+
- spec/integration/govuk_seed_crawler_spec.rb
|
172
|
+
- spec/spec_helper.rb
|
173
|
+
homepage: https://github.gds/gds/govuk_seed_crawler
|
174
|
+
licenses:
|
175
|
+
- MIT
|
176
|
+
metadata: {}
|
177
|
+
post_install_message:
|
178
|
+
rdoc_options: []
|
179
|
+
require_paths:
|
180
|
+
- lib
|
181
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
182
|
+
requirements:
|
183
|
+
- - ">="
|
184
|
+
- !ruby/object:Gem::Version
|
185
|
+
version: '0'
|
186
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
187
|
+
requirements:
|
188
|
+
- - ">="
|
189
|
+
- !ruby/object:Gem::Version
|
190
|
+
version: '0'
|
191
|
+
requirements: []
|
192
|
+
rubyforge_project:
|
193
|
+
rubygems_version: 2.2.2
|
194
|
+
signing_key:
|
195
|
+
specification_version: 4
|
196
|
+
summary: Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ
|
197
|
+
exchange.
|
198
|
+
test_files:
|
199
|
+
- spec/govuk_seed_crawler/amqp_client_spec.rb
|
200
|
+
- spec/govuk_seed_crawler/cli_parser_spec.rb
|
201
|
+
- spec/govuk_seed_crawler/cli_runner_spec.rb
|
202
|
+
- spec/govuk_seed_crawler/indexer_spec.rb
|
203
|
+
- spec/govuk_seed_crawler/seeder_spec.rb
|
204
|
+
- spec/integration/govuk_seed_crawler_spec.rb
|
205
|
+
- spec/spec_helper.rb
|