govuk_seed_crawler 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +18 -0
- data/bin/seed-crawler +5 -0
- data/govuk_seed_crawler.gemspec +30 -0
- data/jenkins-branches.sh +18 -0
- data/jenkins-tests.sh +6 -0
- data/jenkins.sh +5 -0
- data/lib/govuk_seed_crawler.rb +17 -0
- data/lib/govuk_seed_crawler/amqp_client.rb +28 -0
- data/lib/govuk_seed_crawler/cli_parser.rb +81 -0
- data/lib/govuk_seed_crawler/cli_runner.rb +29 -0
- data/lib/govuk_seed_crawler/indexer.rb +18 -0
- data/lib/govuk_seed_crawler/seeder.rb +16 -0
- data/lib/govuk_seed_crawler/version.rb +3 -0
- data/spec/govuk_seed_crawler/amqp_client_spec.rb +55 -0
- data/spec/govuk_seed_crawler/cli_parser_spec.rb +122 -0
- data/spec/govuk_seed_crawler/cli_runner_spec.rb +74 -0
- data/spec/govuk_seed_crawler/indexer_spec.rb +21 -0
- data/spec/govuk_seed_crawler/seeder_spec.rb +51 -0
- data/spec/integration/govuk_seed_crawler_spec.rb +66 -0
- data/spec/spec_helper.rb +39 -0
- metadata +205 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a37f428070681bc4ca2466497df0b69f45fed94a
|
4
|
+
data.tar.gz: 55bdafe5ade9251f6f630eeb490e481a9796fc4f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 631f38d96a7d1ea301b38e761d5c55debbb9ee0c99a8e2a88ef1bb965b12637eceaaba70dfbd13941e4b96c088781fc6da3f724e283bfe8afc6d3dd8f0732321
|
7
|
+
data.tar.gz: 2f8b41afecdaba199b32925b13804c6dce588535b611a01383753dac2a22a655e728a9b77cf529845e0b6df1738bdca716c0eac5a5a4032e70e61d7d94c9cd82
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Gemfile.lock
|
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.2
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(c) 2014 Crown copyright
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# GOV.UK: Seed the Crawler
|
2
|
+
|
3
|
+
Retrieves a list of URLs to seed the [crawler](https://github.com/alphagov/govuk_crawler_worker) by publishing them to a RabbitMQ exchange.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'govuk_seed_crawler'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install govuk_seed_crawler
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
To run with the RabbitMQ connection defaults:
|
22
|
+
|
23
|
+
```bash
|
24
|
+
bundle exec seed-crawler https://www.gov.uk/
|
25
|
+
```
|
26
|
+
|
27
|
+
Run with `--help` to see a list of options:
|
28
|
+
|
29
|
+
```bash
|
30
|
+
bundle exec seed-crawler --help
|
31
|
+
```
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
1. Fork it ( http://github.com/{my-github-username}/govuk_seed_crawler/fork )
|
36
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
37
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
38
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
39
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'gem_publisher'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec) do |task|
|
5
|
+
task.pattern = FileList['spec/govuk_seed_crawler/**/*_spec.rb']
|
6
|
+
end
|
7
|
+
|
8
|
+
RSpec::Core::RakeTask.new(:integration) do |task|
|
9
|
+
task.pattern = FileList['spec/integration/**/*_spec.rb']
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => :spec
|
13
|
+
|
14
|
+
desc "Publish gem to RubyGems"
|
15
|
+
task :publish_gem do |t|
|
16
|
+
gem = GemPublisher.publish_if_updated("govuk_seed_crawler.gemspec")
|
17
|
+
puts "Published #{gem}" if gem
|
18
|
+
end
|
data/bin/seed-crawler
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'govuk_seed_crawler/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "govuk_seed_crawler"
|
8
|
+
spec.version = GovukSeedCrawler::VERSION
|
9
|
+
spec.authors = ["Matt Bostock"]
|
10
|
+
spec.email = ["matt.bostock@digital.cabinet-office.gov.uk"]
|
11
|
+
spec.summary = %q{Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ exchange.}
|
12
|
+
spec.homepage = "https://github.gds/gds/govuk_seed_crawler"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_runtime_dependency "bunny", "~> 1.3"
|
21
|
+
spec.add_runtime_dependency "govuk_mirrorer", "~> 1.3.1"
|
22
|
+
spec.add_runtime_dependency "slop", "~> 3.6.0"
|
23
|
+
|
24
|
+
spec.add_development_dependency "gem_publisher", "~> 1.3"
|
25
|
+
spec.add_development_dependency "pry"
|
26
|
+
spec.add_development_dependency "rake"
|
27
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
28
|
+
spec.add_development_dependency "rspec-mocks", "~> 3.0"
|
29
|
+
spec.add_development_dependency "webmock", "~> 1.18.0"
|
30
|
+
end
|
data/jenkins-branches.sh
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
set -e
|
3
|
+
|
4
|
+
[ -x .venv/bin/pip ] || virtualenv .venv
|
5
|
+
. .venv/bin/activate
|
6
|
+
|
7
|
+
pip install -q ghtools
|
8
|
+
|
9
|
+
REPO="gds:gds/govuk_seed_crawler"
|
10
|
+
gh-status "$REPO" "$GIT_COMMIT" pending -d "\"Build #${BUILD_NUMBER} is running on Jenkins\"" -u "$BUILD_URL" >/dev/null
|
11
|
+
|
12
|
+
if ./jenkins-tests.sh; then
|
13
|
+
gh-status "$REPO" "$GIT_COMMIT" success -d "\"Build #${BUILD_NUMBER} succeeded on Jenkins\"" -u "$BUILD_URL" >/dev/null
|
14
|
+
exit 0
|
15
|
+
else
|
16
|
+
gh-status "$REPO" "$GIT_COMMIT" failure -d "\"Build #${BUILD_NUMBER} failed on Jenkins\"" -u "$BUILD_URL" >/dev/null
|
17
|
+
exit 1
|
18
|
+
fi
|
data/jenkins-tests.sh
ADDED
data/jenkins.sh
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'govuk_seed_crawler/amqp_client'
|
2
|
+
require 'govuk_seed_crawler/cli_parser'
|
3
|
+
require 'govuk_seed_crawler/cli_runner'
|
4
|
+
require 'govuk_seed_crawler/indexer'
|
5
|
+
require 'govuk_seed_crawler/seeder'
|
6
|
+
require 'govuk_seed_crawler/version'
|
7
|
+
|
8
|
+
module GovukSeedCrawler
|
9
|
+
def self.logger
|
10
|
+
unless @logger
|
11
|
+
@logger = Logger.new(STDOUT)
|
12
|
+
@logger.level = Logger::INFO
|
13
|
+
end
|
14
|
+
|
15
|
+
@logger
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'bunny'
|
2
|
+
|
3
|
+
module GovukSeedCrawler
|
4
|
+
class AmqpClient
|
5
|
+
attr_reader :channel
|
6
|
+
|
7
|
+
def initialize(connection_options = {})
|
8
|
+
@conn = Bunny.new(connection_options)
|
9
|
+
@conn.start
|
10
|
+
@channel = @conn.create_channel
|
11
|
+
end
|
12
|
+
|
13
|
+
def close
|
14
|
+
@conn.close
|
15
|
+
end
|
16
|
+
|
17
|
+
def publish(exchange, topic, body)
|
18
|
+
raise "Exchange cannot be nil" if exchange.nil?
|
19
|
+
raise "Topic cannot be nil" if topic.nil?
|
20
|
+
raise "Message body cannot be nil" if body.nil?
|
21
|
+
|
22
|
+
GovukSeedCrawler.logger.debug("Publishing '#{body}' to topic '#{topic}'")
|
23
|
+
|
24
|
+
@channel.topic(exchange, :durable => true)
|
25
|
+
.publish(body, :routing_key => topic)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'slop'
|
2
|
+
|
3
|
+
module GovukSeedCrawler
|
4
|
+
class CLIException < StandardError
|
5
|
+
attr_reader :help
|
6
|
+
|
7
|
+
def initialize(message, help)
|
8
|
+
super(message)
|
9
|
+
@help = help
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class CLIParser
|
14
|
+
DEFAULTS = {
|
15
|
+
:exchange => "govuk_crawler_exchange",
|
16
|
+
:help => nil,
|
17
|
+
:host => "localhost",
|
18
|
+
:password => "guest",
|
19
|
+
:port => "5672",
|
20
|
+
:quiet => false,
|
21
|
+
:topic => "#",
|
22
|
+
:username => "guest",
|
23
|
+
:verbose => false,
|
24
|
+
:version => nil,
|
25
|
+
:vhost => "/"
|
26
|
+
}.freeze
|
27
|
+
|
28
|
+
ENV_AMQP_PASS_KEY = "GOVUK_CRAWLER_AMQP_PASS".freeze
|
29
|
+
|
30
|
+
def initialize(argv_array)
|
31
|
+
@argv_array = argv_array
|
32
|
+
end
|
33
|
+
|
34
|
+
def options
|
35
|
+
Slop.parse!(@argv_array, :help => true) do
|
36
|
+
banner <<-EOS
|
37
|
+
Usage: #{$PROGRAM_NAME} site_root [options]
|
38
|
+
|
39
|
+
Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
|
40
|
+
to consume:
|
41
|
+
|
42
|
+
https://github.com/alphagov/govuk_crawler_worker
|
43
|
+
|
44
|
+
The AMQP password can also be set as an environment variable and will be read from
|
45
|
+
`#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
|
46
|
+
are set, the environment variable will take higher precedent.
|
47
|
+
EOS
|
48
|
+
|
49
|
+
on :version, "Display version and exit" do
|
50
|
+
puts "Version: #{GovukSeedCrawler::VERSION}"
|
51
|
+
exit 0
|
52
|
+
end
|
53
|
+
|
54
|
+
on :host=, "AMQP host to publish to", default: DEFAULTS[:host]
|
55
|
+
on :port=, "AMQP port", default: DEFAULTS[:port]
|
56
|
+
on :username=, "AMQP username", default: DEFAULTS[:username]
|
57
|
+
on :password=, "AMQP password", default: DEFAULTS[:password]
|
58
|
+
on :exchange=, "AMQP exchange", default: DEFAULTS[:exchange]
|
59
|
+
on :topic=, "AMQP topic", default: DEFAULTS[:topic]
|
60
|
+
on :vhost=, "AMQP vhost", default: DEFAULTS[:vhost]
|
61
|
+
|
62
|
+
on :quiet, "Quiet output", default: DEFAULTS[:quiet]
|
63
|
+
on :verbose, "Verbose output", default: DEFAULTS[:verbose]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def parse
|
68
|
+
opts = options
|
69
|
+
|
70
|
+
if opts[:version].nil?
|
71
|
+
raise CLIException.new("too many arguments provided", opts.help) if @argv_array.size > 1
|
72
|
+
raise CLIException.new("site_root must be provided", opts.help) if @argv_array.size != 1
|
73
|
+
end
|
74
|
+
|
75
|
+
options_hash = opts.to_hash
|
76
|
+
options_hash[:password] = ENV[ENV_AMQP_PASS_KEY] unless ENV[ENV_AMQP_PASS_KEY].nil?
|
77
|
+
|
78
|
+
return options_hash, @argv_array.first
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module GovukSeedCrawler
|
2
|
+
class CLIRunner
|
3
|
+
def initialize(argv_array)
|
4
|
+
begin
|
5
|
+
@options, @site_root = CLIParser.new(argv_array).parse
|
6
|
+
rescue CLIException => e
|
7
|
+
puts e.message
|
8
|
+
puts e.help
|
9
|
+
exit 2
|
10
|
+
end
|
11
|
+
|
12
|
+
set_logging_level(@options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
Seeder::seed(@site_root, @options)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def set_logging_level(cli_options)
|
22
|
+
if cli_options[:verbose]
|
23
|
+
GovukSeedCrawler.logger.level = Logger::DEBUG
|
24
|
+
elsif cli_options[:quiet]
|
25
|
+
GovukSeedCrawler.logger.level = Logger::ERROR
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'govuk_mirrorer/indexer'
|
2
|
+
require 'govuk_mirrorer/statsd'
|
3
|
+
|
4
|
+
module GovukSeedCrawler
|
5
|
+
class Indexer
|
6
|
+
attr_reader :urls
|
7
|
+
|
8
|
+
def initialize(site_root)
|
9
|
+
raise "No site_root defined" unless site_root
|
10
|
+
|
11
|
+
GovukSeedCrawler.logger.info("Retrieving list of URLs for #{site_root}")
|
12
|
+
indexer = GovukMirrorer::Indexer.new(site_root)
|
13
|
+
@urls = indexer.all_start_urls
|
14
|
+
|
15
|
+
GovukSeedCrawler.logger.info("Found #{@urls.count} URLs")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module GovukSeedCrawler
|
2
|
+
class Seeder
|
3
|
+
def self.seed(site_root, options = {})
|
4
|
+
amqp_client = AmqpClient.new(options)
|
5
|
+
urls = Indexer.new(site_root).urls
|
6
|
+
|
7
|
+
urls.each do |url|
|
8
|
+
amqp_client.publish(options[:exchange], options[:topic], url)
|
9
|
+
end
|
10
|
+
|
11
|
+
GovukSeedCrawler.logger.info("Published #{urls.count} URLs to topic '#{options[:topic]}'")
|
12
|
+
|
13
|
+
amqp_client.close
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::AmqpClient do
|
4
|
+
let(:exchange) { "govuk_seed_crawler_spec_exchange" }
|
5
|
+
let(:options) {{
|
6
|
+
:host => ENV.fetch("AMQP_HOST", "localhost"),
|
7
|
+
:user => ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
|
8
|
+
:pass => ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
|
9
|
+
}}
|
10
|
+
subject { GovukSeedCrawler::AmqpClient.new(options) }
|
11
|
+
|
12
|
+
it "responds to #channel" do
|
13
|
+
expect(subject).to respond_to(:channel)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "responds to #close" do
|
17
|
+
expect(subject).to respond_to(:close)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "closes the connection to the AMQP server" do
|
21
|
+
mock_bunny = double(:mock_bunny,
|
22
|
+
:start => true, :create_channel => true, :close => true)
|
23
|
+
allow(Bunny).to receive(:new).and_return(mock_bunny)
|
24
|
+
expect(mock_bunny).to receive(:close).once
|
25
|
+
|
26
|
+
subject.close
|
27
|
+
end
|
28
|
+
|
29
|
+
context "#publish" do
|
30
|
+
context "error handling" do
|
31
|
+
it "raises an exception if exchange is nil" do
|
32
|
+
expect {
|
33
|
+
subject.publish(nil, "#", "some body")
|
34
|
+
}.to raise_exception(RuntimeError, "Exchange cannot be nil")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "raises an exception if topic is nil" do
|
38
|
+
expect {
|
39
|
+
subject.publish(exchange, nil, "some body")
|
40
|
+
}.to raise_exception(RuntimeError, "Topic cannot be nil")
|
41
|
+
end
|
42
|
+
|
43
|
+
it "raises an exception if body is nil" do
|
44
|
+
expect {
|
45
|
+
subject.publish(exchange, "#", nil)
|
46
|
+
}.to raise_exception(RuntimeError, "Message body cannot be nil")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
it "allows publishing against an exchange" do
|
51
|
+
expect(subject.publish(exchange, "#", "some body"))
|
52
|
+
.to_not be_nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::CLIParser do
|
4
|
+
it "requires the site_root to be provided" do
|
5
|
+
expect {
|
6
|
+
GovukSeedCrawler::CLIParser.new([]).parse
|
7
|
+
}.to raise_exception(GovukSeedCrawler::CLIException, "site_root must be provided")
|
8
|
+
end
|
9
|
+
|
10
|
+
it "provides the defaults when just given the site_root" do
|
11
|
+
options, site_root = GovukSeedCrawler::CLIParser.new(["https://www.example.com"]).parse
|
12
|
+
|
13
|
+
expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS)
|
14
|
+
expect(site_root).to eq("https://www.example.com")
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should tell us when we've given too many arguments" do
|
18
|
+
expect {
|
19
|
+
GovukSeedCrawler::CLIParser.new(["a", "b"]).parse
|
20
|
+
}.to raise_exception(GovukSeedCrawler::CLIException, "too many arguments provided")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should nest the help message in with any CLIExceptions we raise" do
|
24
|
+
expect {
|
25
|
+
GovukSeedCrawler::CLIParser.new(["a", "b"]).parse
|
26
|
+
}.to raise_exception(GovukSeedCrawler::CLIException) { |e|
|
27
|
+
expect(e.help).to include("Usage: ")
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "catching STDOUT" do
|
32
|
+
it "shows the help banner when provided -h" do
|
33
|
+
# Get a valid options response as help closes early with SystemExit.
|
34
|
+
options = GovukSeedCrawler::CLIParser.new(["http://www.foo.com/"]).options
|
35
|
+
|
36
|
+
temp_stdout do |caught_stdout|
|
37
|
+
expect {
|
38
|
+
_, _ = GovukSeedCrawler::CLIParser.new(["-h"]).parse
|
39
|
+
}.to raise_exception(SystemExit) { |e|
|
40
|
+
expect(e.status).to eq(0)
|
41
|
+
}
|
42
|
+
|
43
|
+
expect(caught_stdout.strip).to eq(options.help)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should show the version number and exit" do
|
48
|
+
temp_stdout do |caught_stdout|
|
49
|
+
expect {
|
50
|
+
_, _ = GovukSeedCrawler::CLIParser.new(["--version"]).parse
|
51
|
+
}.to raise_exception(SystemExit) { |e|
|
52
|
+
expect(e.status).to eq(0)
|
53
|
+
}
|
54
|
+
|
55
|
+
expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "passing in valid arguments" do
|
61
|
+
let(:arguments) {
|
62
|
+
[
|
63
|
+
"https://www.override.com/",
|
64
|
+
"--host rabbitmq.some.custom.vhost",
|
65
|
+
"--port 4567",
|
66
|
+
"--username foo",
|
67
|
+
"--password bar",
|
68
|
+
"--exchange some_custom_exchange",
|
69
|
+
"--topic some_custom_topic",
|
70
|
+
"--vhost a_vhost",
|
71
|
+
"--verbose"
|
72
|
+
].join(" ").split(" ")
|
73
|
+
}
|
74
|
+
|
75
|
+
it "should override all of the default arguments that we're providing" do
|
76
|
+
overriden = {
|
77
|
+
host: "rabbitmq.some.custom.vhost",
|
78
|
+
port: "4567",
|
79
|
+
username: "foo",
|
80
|
+
password: "bar",
|
81
|
+
exchange: "some_custom_exchange",
|
82
|
+
topic: "some_custom_topic",
|
83
|
+
help: nil,
|
84
|
+
quiet: false,
|
85
|
+
verbose: true,
|
86
|
+
version: nil,
|
87
|
+
vhost: "a_vhost"
|
88
|
+
}
|
89
|
+
|
90
|
+
expect(GovukSeedCrawler::CLIParser.new(arguments).parse.first).to eq(overriden)
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should set the --quiet value" do
|
94
|
+
options, _ = GovukSeedCrawler::CLIParser.new(["foo.com", "--quiet"]).parse
|
95
|
+
expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS.merge(quiet: true))
|
96
|
+
end
|
97
|
+
|
98
|
+
describe "reading the AMQP password from an environment variable" do
|
99
|
+
def set_amqp_pass(password)
|
100
|
+
ENV[GovukSeedCrawler::CLIParser::ENV_AMQP_PASS_KEY] = password
|
101
|
+
end
|
102
|
+
|
103
|
+
after do
|
104
|
+
ENV[GovukSeedCrawler::CLIParser::ENV_AMQP_PASS_KEY] = nil
|
105
|
+
end
|
106
|
+
|
107
|
+
it "sets the password if set using an environment variable" do
|
108
|
+
set_amqp_pass("foobar")
|
109
|
+
|
110
|
+
expect(GovukSeedCrawler::CLIParser.new(["http://www.example.com"]).parse.first)
|
111
|
+
.to include(password: "foobar")
|
112
|
+
end
|
113
|
+
|
114
|
+
it "picks the environment variable over the parameter if both are set" do
|
115
|
+
set_amqp_pass("bar")
|
116
|
+
|
117
|
+
expect(GovukSeedCrawler::CLIParser.new(["http://www.example.com", "--password", "foo"]).parse.first)
|
118
|
+
.to include(password: "bar")
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::CLIRunner do
|
4
|
+
describe "printing the version" do
|
5
|
+
it "should not try to connect to an AMQP server" do
|
6
|
+
expect(Bunny).not_to receive(:new)
|
7
|
+
|
8
|
+
temp_stdout do |caught_stdout|
|
9
|
+
expect {
|
10
|
+
GovukSeedCrawler::CLIRunner.new(["--version"]).run
|
11
|
+
}.to raise_exception(SystemExit) { |exit|
|
12
|
+
expect(exit.status).to eq(0)
|
13
|
+
}
|
14
|
+
|
15
|
+
expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "catching any CLIException objects and exiting with a status 1" do
|
21
|
+
it "prints to STDOUT for too many arguments" do
|
22
|
+
temp_stdout do |caught_stdout|
|
23
|
+
expect {
|
24
|
+
GovukSeedCrawler::CLIRunner.new(["a", "b"])
|
25
|
+
}.to raise_exception(SystemExit) { |exit|
|
26
|
+
expect(exit.status).to eq(2)
|
27
|
+
}
|
28
|
+
|
29
|
+
expect(caught_stdout.strip).to include("too many arguments provided")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
it "prints to STDOUT when site_root not set" do
|
34
|
+
temp_stdout do |caught_stdout|
|
35
|
+
expect {
|
36
|
+
GovukSeedCrawler::CLIRunner.new(["--verbose"])
|
37
|
+
}.to raise_exception(SystemExit) { |exit|
|
38
|
+
expect(exit.status).to eq(2)
|
39
|
+
}
|
40
|
+
|
41
|
+
expect(caught_stdout.strip).to include("site_root must be provided")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "setting the logging level" do
|
47
|
+
before do
|
48
|
+
GovukSeedCrawler.logger.level = Logger::INFO
|
49
|
+
end
|
50
|
+
|
51
|
+
it "defaults to INFO" do
|
52
|
+
GovukSeedCrawler::CLIRunner.new(["http://www.example.com"])
|
53
|
+
expect(GovukSeedCrawler.logger.level).to eq(Logger::INFO)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "sets to ERROR for quite" do
|
57
|
+
GovukSeedCrawler::CLIRunner.new(["http://www.example.com", "--quiet"])
|
58
|
+
expect(GovukSeedCrawler.logger.level).to eq(Logger::ERROR)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "sets to DEBUG for verbose" do
|
62
|
+
GovukSeedCrawler::CLIRunner.new(["http://www.example.com", "--verbose"])
|
63
|
+
expect(GovukSeedCrawler.logger.level).to eq(Logger::DEBUG)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "#run" do
|
68
|
+
it "passes all options through to seed" do
|
69
|
+
expect(GovukSeedCrawler::Seeder).to receive(:seed).
|
70
|
+
with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
|
71
|
+
GovukSeedCrawler::CLIRunner.new(["http://www.example.com"]).run
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::Indexer do
|
4
|
+
subject { GovukSeedCrawler::Indexer.new('https://example.com/') }
|
5
|
+
|
6
|
+
context "under normal usage" do
|
7
|
+
let(:mock_indexer) do
|
8
|
+
double(:mock_indexer, :all_start_urls => [])
|
9
|
+
end
|
10
|
+
|
11
|
+
it "responds to Indexer#urls" do
|
12
|
+
allow(GovukMirrorer::Indexer).to receive(:new).and_return(mock_indexer)
|
13
|
+
expect(subject).to respond_to(:urls)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "calls GovukMirrorer::Indexer with the site root" do
|
17
|
+
expect(GovukMirrorer::Indexer).to receive(:new).with('https://example.com/').and_return(mock_indexer)
|
18
|
+
subject
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GovukSeedCrawler::Seeder do
|
4
|
+
let(:exchange) { "seeder_test_exchange" }
|
5
|
+
let(:topic) { "#" }
|
6
|
+
let(:root_url) { "https://www.example.com" }
|
7
|
+
|
8
|
+
let(:options) {{
|
9
|
+
:exchange => exchange,
|
10
|
+
:topic => topic,
|
11
|
+
}}
|
12
|
+
|
13
|
+
let(:mock_get_urls) { double(:mock_get_urls, :urls => true) }
|
14
|
+
let(:mock_amqp_client) { double(:mock_amqp_client, :close => true) }
|
15
|
+
|
16
|
+
let(:urls) do
|
17
|
+
[
|
18
|
+
"https://example.com/foo",
|
19
|
+
"https://example.com/bar",
|
20
|
+
"https://example.com/baz",
|
21
|
+
]
|
22
|
+
end
|
23
|
+
|
24
|
+
subject { GovukSeedCrawler::Seeder::seed(root_url, options) }
|
25
|
+
|
26
|
+
before(:each) do
|
27
|
+
allow(GovukSeedCrawler::Indexer).to receive(:new)
|
28
|
+
.with(root_url)
|
29
|
+
.and_return(mock_get_urls)
|
30
|
+
allow(mock_get_urls).to receive(:urls).and_return(urls)
|
31
|
+
allow(GovukSeedCrawler::AmqpClient).to receive(:new)
|
32
|
+
.with(options).and_return(mock_amqp_client)
|
33
|
+
end
|
34
|
+
|
35
|
+
context "under normal usage" do
|
36
|
+
it "publishes urls to the queue" do
|
37
|
+
urls.each do |url|
|
38
|
+
expect(mock_amqp_client).to receive(:publish)
|
39
|
+
.with(exchange, topic, url)
|
40
|
+
end
|
41
|
+
|
42
|
+
subject
|
43
|
+
end
|
44
|
+
|
45
|
+
it "closes the connection when done" do
|
46
|
+
allow(mock_amqp_client).to receive(:publish)
|
47
|
+
expect(mock_amqp_client).to receive(:close)
|
48
|
+
subject
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe GovukSeedCrawler do
|
5
|
+
def stub_api_artefacts(count)
|
6
|
+
item = {
|
7
|
+
"id" => "https://www.gov.uk/api/government%2Fnews%2Ffaster-review-of-support-for-renewable-electricity-to-provide-investor-certainty.json",
|
8
|
+
"web_url" => "https://www.gov.uk/government/news/faster-review-of-support-for-renewable-electricity-to-provide-investor-certainty",
|
9
|
+
"title" => "Faster review of support for Renewable electricity to provide investor certainty",
|
10
|
+
"format" => "announcement"
|
11
|
+
}
|
12
|
+
results = count.times.collect { item }
|
13
|
+
response = {
|
14
|
+
"_response_info" => {
|
15
|
+
"status" => "ok",
|
16
|
+
"links" => []
|
17
|
+
},
|
18
|
+
"total" => results.size,
|
19
|
+
"start_index" => 1,
|
20
|
+
"page_size" => 100,
|
21
|
+
"current_page" => 1,
|
22
|
+
"pages" => 1,
|
23
|
+
"results" => results
|
24
|
+
}
|
25
|
+
|
26
|
+
stub_request(:get, "https://www.gov.uk//api/artefacts.json").
|
27
|
+
to_return(:status => 200, :body => response.to_json, :headers => {})
|
28
|
+
end
|
29
|
+
|
30
|
+
let(:vhost) { "/" }
|
31
|
+
let(:exchange_name) { "govuk_seed_crawler_integration_exchange" }
|
32
|
+
let(:queue_name) { "govuk_seed_crawler_integration_queue" }
|
33
|
+
let(:topic) { "#" }
|
34
|
+
let(:site_root) { "https://www.gov.uk/" }
|
35
|
+
let(:options) {{
|
36
|
+
:host => ENV.fetch("AMQP_HOST", "localhost"),
|
37
|
+
:user => ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
|
38
|
+
:pass => ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
|
39
|
+
:exchange => exchange_name,
|
40
|
+
:topic => topic
|
41
|
+
}}
|
42
|
+
let(:rabbitmq_client) { GovukSeedCrawler::AmqpClient.new(options) }
|
43
|
+
|
44
|
+
subject { GovukSeedCrawler::Seeder::seed(site_root, options) }
|
45
|
+
|
46
|
+
before(:each) do
|
47
|
+
@exchange = rabbitmq_client.channel.topic(exchange_name, :durable => true)
|
48
|
+
@queue = rabbitmq_client.channel.queue(queue_name)
|
49
|
+
@queue.bind(@exchange, :routing_key => topic)
|
50
|
+
end
|
51
|
+
|
52
|
+
after(:each) do
|
53
|
+
@queue.unbind(@exchange)
|
54
|
+
@queue.delete
|
55
|
+
@exchange.delete
|
56
|
+
rabbitmq_client.close
|
57
|
+
end
|
58
|
+
|
59
|
+
it "publishes URLs it finds to an AMQP topic exchange" do
|
60
|
+
stub_api_artefacts(10)
|
61
|
+
subject
|
62
|
+
|
63
|
+
# There's an extra 5 URLs from the Indexer class that are hard-coded.
|
64
|
+
expect(@queue.message_count).to be(15)
|
65
|
+
end
|
66
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'govuk_seed_crawler'
|
2
|
+
require 'webmock/rspec'
|
3
|
+
|
4
|
+
RSpec.configure do |config|
|
5
|
+
config.order = :random
|
6
|
+
|
7
|
+
# Seed global randomization in this process using the `--seed` CLI option.
|
8
|
+
# Setting this allows you to use `--seed` to deterministically reproduce
|
9
|
+
# test failures related to randomization by passing the same `--seed` value
|
10
|
+
# as the one that triggered the failure.
|
11
|
+
Kernel.srand config.seed
|
12
|
+
|
13
|
+
config.expect_with :rspec do |expectations|
|
14
|
+
# Enable only the newer, non-monkey-patching expect syntax.
|
15
|
+
# For more details, see:
|
16
|
+
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
|
17
|
+
expectations.syntax = :expect
|
18
|
+
end
|
19
|
+
|
20
|
+
config.mock_with :rspec do |mocks|
|
21
|
+
# Enable only the newer, non-monkey-patching expect syntax.
|
22
|
+
# For more details, see:
|
23
|
+
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
24
|
+
mocks.syntax = :expect
|
25
|
+
|
26
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
27
|
+
# a real object. This is generally recommended.
|
28
|
+
mocks.verify_partial_doubles = true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
WebMock.disable_net_connect!
|
33
|
+
|
34
|
+
def temp_stdout
|
35
|
+
$stdout = StringIO.new
|
36
|
+
yield $stdout.string
|
37
|
+
ensure
|
38
|
+
$stdout = STDOUT
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,205 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: govuk_seed_crawler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matt Bostock
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-08-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bunny
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: govuk_mirrorer
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.3.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.3.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: slop
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 3.6.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 3.6.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: gem_publisher
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '3.0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '3.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rspec-mocks
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '3.0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '3.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: webmock
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.18.0
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 1.18.0
|
139
|
+
description:
|
140
|
+
email:
|
141
|
+
- matt.bostock@digital.cabinet-office.gov.uk
|
142
|
+
executables:
|
143
|
+
- seed-crawler
|
144
|
+
extensions: []
|
145
|
+
extra_rdoc_files: []
|
146
|
+
files:
|
147
|
+
- ".gitignore"
|
148
|
+
- ".rspec"
|
149
|
+
- ".ruby-version"
|
150
|
+
- Gemfile
|
151
|
+
- LICENSE.txt
|
152
|
+
- README.md
|
153
|
+
- Rakefile
|
154
|
+
- bin/seed-crawler
|
155
|
+
- govuk_seed_crawler.gemspec
|
156
|
+
- jenkins-branches.sh
|
157
|
+
- jenkins-tests.sh
|
158
|
+
- jenkins.sh
|
159
|
+
- lib/govuk_seed_crawler.rb
|
160
|
+
- lib/govuk_seed_crawler/amqp_client.rb
|
161
|
+
- lib/govuk_seed_crawler/cli_parser.rb
|
162
|
+
- lib/govuk_seed_crawler/cli_runner.rb
|
163
|
+
- lib/govuk_seed_crawler/indexer.rb
|
164
|
+
- lib/govuk_seed_crawler/seeder.rb
|
165
|
+
- lib/govuk_seed_crawler/version.rb
|
166
|
+
- spec/govuk_seed_crawler/amqp_client_spec.rb
|
167
|
+
- spec/govuk_seed_crawler/cli_parser_spec.rb
|
168
|
+
- spec/govuk_seed_crawler/cli_runner_spec.rb
|
169
|
+
- spec/govuk_seed_crawler/indexer_spec.rb
|
170
|
+
- spec/govuk_seed_crawler/seeder_spec.rb
|
171
|
+
- spec/integration/govuk_seed_crawler_spec.rb
|
172
|
+
- spec/spec_helper.rb
|
173
|
+
homepage: https://github.gds/gds/govuk_seed_crawler
|
174
|
+
licenses:
|
175
|
+
- MIT
|
176
|
+
metadata: {}
|
177
|
+
post_install_message:
|
178
|
+
rdoc_options: []
|
179
|
+
require_paths:
|
180
|
+
- lib
|
181
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
182
|
+
requirements:
|
183
|
+
- - ">="
|
184
|
+
- !ruby/object:Gem::Version
|
185
|
+
version: '0'
|
186
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
187
|
+
requirements:
|
188
|
+
- - ">="
|
189
|
+
- !ruby/object:Gem::Version
|
190
|
+
version: '0'
|
191
|
+
requirements: []
|
192
|
+
rubyforge_project:
|
193
|
+
rubygems_version: 2.2.2
|
194
|
+
signing_key:
|
195
|
+
specification_version: 4
|
196
|
+
summary: Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ
|
197
|
+
exchange.
|
198
|
+
test_files:
|
199
|
+
- spec/govuk_seed_crawler/amqp_client_spec.rb
|
200
|
+
- spec/govuk_seed_crawler/cli_parser_spec.rb
|
201
|
+
- spec/govuk_seed_crawler/cli_runner_spec.rb
|
202
|
+
- spec/govuk_seed_crawler/indexer_spec.rb
|
203
|
+
- spec/govuk_seed_crawler/seeder_spec.rb
|
204
|
+
- spec/integration/govuk_seed_crawler_spec.rb
|
205
|
+
- spec/spec_helper.rb
|