govuk_seed_crawler 2.1.0 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/dependabot.yml +10 -0
- data/.github/workflows/ci.yml +47 -0
- data/.rubocop.yml +19 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +11 -0
- data/Gemfile +1 -1
- data/{LICENSE.txt → LICENCE} +2 -2
- data/README.md +4 -0
- data/Rakefile +5 -9
- data/govuk_seed_crawler.gemspec +15 -14
- data/lib/govuk_seed_crawler/amqp_client.rb +3 -3
- data/lib/govuk_seed_crawler/cli_parser.rb +23 -23
- data/lib/govuk_seed_crawler/cli_runner.rb +2 -2
- data/lib/govuk_seed_crawler/indexer.rb +2 -2
- data/lib/govuk_seed_crawler/version.rb +1 -1
- data/lib/govuk_seed_crawler.rb +16 -12
- data/spec/govuk_seed_crawler/amqp_client_spec.rb +33 -34
- data/spec/govuk_seed_crawler/cli_parser_spec.rb +32 -46
- data/spec/govuk_seed_crawler/cli_runner_spec.rb +16 -36
- data/spec/govuk_seed_crawler/indexer_spec.rb +13 -16
- data/spec/govuk_seed_crawler/seeder_spec.rb +25 -27
- data/spec/integration/govuk_seed_crawler_spec.rb +28 -28
- data/spec/spec_helper.rb +8 -10
- metadata +84 -84
- data/Jenkinsfile +0 -49
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b0d80cc9a1bf29784700e23b1f8c883fc6012a2e7a47776cc3af04ca87f253e7
|
4
|
+
data.tar.gz: ca2d04361ff8e9d9b3cf34f0432eb786b026ffcfd035b1a5faeed6c32801880a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 895569f56489bbfce199620c211742b226b56d0cc8a03d17451b8a382f2743ba1d7c7821ff9fe75925d411d9b99a595981cf72fb5334907f59f33bf0a24ec000
|
7
|
+
data.tar.gz: 8ea165b2ed3d62a63a5a331b03e14e2fe0dd68713bf7732d28bd2b5cfc4e20a6b5a4932c503fc0514d1a0ea1c6d58343afe864612d8190a2521638d62eea0e83
|
@@ -0,0 +1,47 @@
|
|
1
|
+
on: [push, pull_request]
|
2
|
+
|
3
|
+
jobs:
|
4
|
+
# This matrix job runs the test suite against multiple Ruby versions
|
5
|
+
test_matrix:
|
6
|
+
strategy:
|
7
|
+
fail-fast: false
|
8
|
+
matrix:
|
9
|
+
# Due to https://github.com/actions/runner/issues/849, we have to use quotes for '3.0'
|
10
|
+
ruby: [ 2.7, '3.0', 3.1 ]
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
services:
|
13
|
+
rabbitmq:
|
14
|
+
image: rabbitmq
|
15
|
+
env:
|
16
|
+
RABBITMQ_DEFAULT_USER: rabbitmq
|
17
|
+
RABBITMQ_DEFAULT_PASS: rabbitmq
|
18
|
+
ports:
|
19
|
+
- 5672:5672
|
20
|
+
steps:
|
21
|
+
- uses: actions/checkout@v3
|
22
|
+
- uses: ruby/setup-ruby@v1
|
23
|
+
with:
|
24
|
+
ruby-version: ${{ matrix.ruby }}
|
25
|
+
bundler-cache: true
|
26
|
+
- run: bundle exec rake
|
27
|
+
env:
|
28
|
+
AMQP_USER: rabbitmq
|
29
|
+
AMQP_PASS: rabbitmq
|
30
|
+
|
31
|
+
# Branch protection rules cannot directly depend on status checks from matrix jobs.
|
32
|
+
# So instead we define `test` as a dummy job which only runs after the preceding `test_matrix` checks have passed.
|
33
|
+
# Solution inspired by: https://github.community/t/status-check-for-a-matrix-jobs/127354/3
|
34
|
+
test:
|
35
|
+
needs: test_matrix
|
36
|
+
runs-on: ubuntu-latest
|
37
|
+
steps:
|
38
|
+
- run: echo "All matrix tests have passed 🚀"
|
39
|
+
|
40
|
+
publish:
|
41
|
+
needs: test
|
42
|
+
if: ${{ github.ref == 'refs/heads/main' }}
|
43
|
+
permissions:
|
44
|
+
contents: write
|
45
|
+
uses: alphagov/govuk-infrastructure/.github/workflows/publish-rubygem.yaml@main
|
46
|
+
secrets:
|
47
|
+
GEM_HOST_API_KEY: ${{ secrets.ALPHAGOV_RUBYGEMS_API_KEY }}
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
inherit_gem:
|
2
|
+
rubocop-govuk:
|
3
|
+
- config/default.yml
|
4
|
+
- config/rake.yml
|
5
|
+
- config/rspec.yml
|
6
|
+
|
7
|
+
inherit_mode:
|
8
|
+
merge:
|
9
|
+
- Exclude
|
10
|
+
|
11
|
+
# **************************************************************
|
12
|
+
# TRY NOT TO ADD OVERRIDES IN THIS FILE
|
13
|
+
#
|
14
|
+
# This repo is configured to follow the RuboCop GOV.UK styleguide.
|
15
|
+
# Any rules you override here will cause this repo to diverge from
|
16
|
+
# the way we write code in all other GOV.UK repos.
|
17
|
+
#
|
18
|
+
# See https://github.com/alphagov/rubocop-govuk/blob/main/CONTRIBUTING.md
|
19
|
+
# **************************************************************
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.7.6
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
- We use the [GOV.UK versioning guidelines](https://docs.publishing.service.gov.uk/manual/publishing-a-ruby-gem.html#versioning).
|
4
|
+
- Mark breaking changes with `BREAKING:`. Be sure to include instructions on how applications should be upgraded.
|
5
|
+
- Don't include changes that are purely internal. The CHANGELOG should be a
|
6
|
+
useful summary for people upgrading their application, not a replication
|
7
|
+
of the commit log.
|
8
|
+
|
9
|
+
## Unreleased
|
10
|
+
|
11
|
+
- Drop support for Ruby < 2.7
|
data/Gemfile
CHANGED
data/{LICENSE.txt → LICENCE}
RENAMED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
1
|
+
The MIT License (MIT)
|
2
2
|
|
3
|
-
|
3
|
+
Copyright (C) 2014 Crown Copyright (Government Digital Service)
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
data/Rakefile
CHANGED
@@ -1,11 +1,7 @@
|
|
1
|
-
require
|
1
|
+
require "rspec/core/rake_task"
|
2
|
+
require "rubocop/rake_task"
|
2
3
|
|
3
|
-
RSpec::Core::RakeTask.new(:spec)
|
4
|
-
|
5
|
-
end
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
RuboCop::RakeTask.new
|
6
6
|
|
7
|
-
|
8
|
-
task.pattern = FileList['spec/integration/**/*_spec.rb']
|
9
|
-
end
|
10
|
-
|
11
|
-
task :default => :spec
|
7
|
+
task default: %i[rubocop spec]
|
data/govuk_seed_crawler.gemspec
CHANGED
@@ -1,35 +1,36 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
1
|
+
lib = File.expand_path("lib", __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require "govuk_seed_crawler/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
6
|
spec.name = "govuk_seed_crawler"
|
8
7
|
spec.version = GovukSeedCrawler::VERSION
|
9
|
-
spec.authors = [
|
8
|
+
spec.authors = ["GOV.UK developers"]
|
10
9
|
spec.email = ["govuk-dev@digital.cabinet-office.gov.uk"]
|
11
|
-
spec.summary =
|
10
|
+
spec.summary = "Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ exchange."
|
12
11
|
spec.homepage = "https://github.com/alphagov/govuk_seed_crawler"
|
13
12
|
spec.license = "MIT"
|
14
13
|
|
14
|
+
spec.required_ruby_version = ">= 2.7"
|
15
|
+
|
15
16
|
spec.files = `git ls-files -z`.split("\x0")
|
16
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
-
spec.require_paths = [
|
19
|
+
spec.require_paths = %w[lib]
|
19
20
|
|
20
|
-
spec.add_runtime_dependency "bunny", "
|
21
|
-
spec.add_runtime_dependency "crack", "0.4.
|
22
|
-
spec.add_runtime_dependency "
|
23
|
-
spec.add_runtime_dependency "nokogiri", "~> 1.6.0"
|
21
|
+
spec.add_runtime_dependency "bunny", ">= 1.3", "< 3.0"
|
22
|
+
spec.add_runtime_dependency "crack", "0.4.5"
|
23
|
+
spec.add_runtime_dependency "nokogiri", ">= 1.6", "< 1.14"
|
24
24
|
# Something, somewhere, sometimes requires public_suffix.
|
25
25
|
# public_suffix > 1.5 requires ruby > 2.
|
26
|
-
spec.add_runtime_dependency "public_suffix", "
|
27
|
-
spec.add_runtime_dependency "sitemap-parser", "
|
26
|
+
spec.add_runtime_dependency "public_suffix", ">= 1.4.6", "< 5.1.0"
|
27
|
+
spec.add_runtime_dependency "sitemap-parser", ">= 0.3", "< 0.6"
|
28
28
|
spec.add_runtime_dependency "slop", "~> 3.6.0"
|
29
29
|
|
30
30
|
spec.add_development_dependency "pry"
|
31
|
-
spec.add_development_dependency "rake"
|
31
|
+
spec.add_development_dependency "rake"
|
32
32
|
spec.add_development_dependency "rspec", "~> 3.0"
|
33
33
|
spec.add_development_dependency "rspec-mocks", "~> 3.0"
|
34
|
-
spec.add_development_dependency "
|
34
|
+
spec.add_development_dependency "rubocop-govuk", "4.8.0"
|
35
|
+
spec.add_development_dependency "webmock", "~> 3.18"
|
35
36
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "bunny"
|
2
2
|
|
3
3
|
module GovukSeedCrawler
|
4
4
|
class AmqpClient
|
@@ -21,8 +21,8 @@ module GovukSeedCrawler
|
|
21
21
|
|
22
22
|
GovukSeedCrawler.logger.debug("Publishing '#{body}' to topic '#{topic}'")
|
23
23
|
|
24
|
-
@channel.topic(exchange, :
|
25
|
-
.publish(body, :
|
24
|
+
@channel.topic(exchange, durable: true)
|
25
|
+
.publish(body, routing_key: topic)
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "slop"
|
2
2
|
|
3
3
|
module GovukSeedCrawler
|
4
4
|
class CLIException < StandardError
|
@@ -12,17 +12,17 @@ module GovukSeedCrawler
|
|
12
12
|
|
13
13
|
class CLIParser
|
14
14
|
DEFAULTS = {
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
23
|
-
:
|
24
|
-
:
|
25
|
-
:
|
15
|
+
exchange: "govuk_crawler_exchange",
|
16
|
+
help: nil,
|
17
|
+
host: "localhost",
|
18
|
+
password: "guest",
|
19
|
+
port: "5672",
|
20
|
+
quiet: false,
|
21
|
+
topic: "#",
|
22
|
+
username: "guest",
|
23
|
+
verbose: false,
|
24
|
+
version: nil,
|
25
|
+
vhost: "/",
|
26
26
|
}.freeze
|
27
27
|
|
28
28
|
ENV_AMQP_PASS_KEY = "GOVUK_CRAWLER_AMQP_PASS".freeze
|
@@ -32,19 +32,19 @@ module GovukSeedCrawler
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def options
|
35
|
-
Slop.parse!(@argv_array, :
|
36
|
-
banner
|
37
|
-
Usage: #{$PROGRAM_NAME} site_root [options]
|
35
|
+
Slop.parse!(@argv_array, help: true) do
|
36
|
+
banner <<~HELP
|
37
|
+
Usage: #{$PROGRAM_NAME} site_root [options]
|
38
38
|
|
39
|
-
Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
|
40
|
-
to consume:
|
39
|
+
Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
|
40
|
+
to consume:
|
41
41
|
|
42
|
-
https://github.com/alphagov/govuk_crawler_worker
|
42
|
+
https://github.com/alphagov/govuk_crawler_worker
|
43
43
|
|
44
|
-
The AMQP password can also be set as an environment variable and will be read from
|
45
|
-
`#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
|
46
|
-
are set, the environment variable will take higher precedent.
|
47
|
-
|
44
|
+
The AMQP password can also be set as an environment variable and will be read from
|
45
|
+
`#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
|
46
|
+
are set, the environment variable will take higher precedent.
|
47
|
+
HELP
|
48
48
|
|
49
49
|
on :version, "Display version and exit" do
|
50
50
|
puts "Version: #{GovukSeedCrawler::VERSION}"
|
@@ -75,7 +75,7 @@ are set, the environment variable will take higher precedent.
|
|
75
75
|
options_hash = opts.to_hash
|
76
76
|
options_hash[:password] = ENV[ENV_AMQP_PASS_KEY] unless ENV[ENV_AMQP_PASS_KEY].nil?
|
77
77
|
|
78
|
-
|
78
|
+
[options_hash, @argv_array.first]
|
79
79
|
end
|
80
80
|
end
|
81
81
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "sitemap-parser"
|
2
2
|
|
3
3
|
module GovukSeedCrawler
|
4
4
|
class Indexer
|
@@ -9,7 +9,7 @@ module GovukSeedCrawler
|
|
9
9
|
|
10
10
|
GovukSeedCrawler.logger.info("Retrieving list of URLs for #{site_root}")
|
11
11
|
|
12
|
-
sitemap = SitemapParser.new("#{site_root}/sitemap.xml", {recurse: true})
|
12
|
+
sitemap = SitemapParser.new("#{site_root}/sitemap.xml", { recurse: true })
|
13
13
|
@urls = sitemap.to_a
|
14
14
|
|
15
15
|
GovukSeedCrawler.logger.info("Found #{@urls.count} URLs")
|
data/lib/govuk_seed_crawler.rb
CHANGED
@@ -1,17 +1,21 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
1
|
+
require "govuk_seed_crawler/amqp_client"
|
2
|
+
require "govuk_seed_crawler/cli_parser"
|
3
|
+
require "govuk_seed_crawler/cli_runner"
|
4
|
+
require "govuk_seed_crawler/indexer"
|
5
|
+
require "govuk_seed_crawler/seeder"
|
6
|
+
require "govuk_seed_crawler/version"
|
7
7
|
|
8
8
|
module GovukSeedCrawler
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
class << self
|
10
|
+
attr_writer :logger
|
11
|
+
|
12
|
+
def logger
|
13
|
+
unless @logger
|
14
|
+
@logger = Logger.new($stdout)
|
15
|
+
@logger.level = Logger::INFO
|
16
|
+
end
|
14
17
|
|
15
|
-
|
18
|
+
@logger
|
19
|
+
end
|
16
20
|
end
|
17
21
|
end
|
@@ -1,55 +1,54 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::AmqpClient do
|
4
2
|
let(:exchange) { "govuk_seed_crawler_spec_exchange" }
|
5
|
-
let(:options)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
3
|
+
let(:options) do
|
4
|
+
{
|
5
|
+
host: ENV.fetch("AMQP_HOST", "localhost"),
|
6
|
+
user: ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
|
7
|
+
pass: ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
|
8
|
+
}
|
9
|
+
end
|
11
10
|
|
12
11
|
it "responds to #channel" do
|
13
|
-
expect(
|
12
|
+
expect(described_class.new(options)).to respond_to(:channel)
|
14
13
|
end
|
15
14
|
|
16
15
|
it "responds to #close" do
|
17
|
-
expect(
|
16
|
+
expect(described_class.new(options)).to respond_to(:close)
|
18
17
|
end
|
19
18
|
|
20
19
|
it "closes the connection to the AMQP server" do
|
21
|
-
mock_bunny =
|
22
|
-
|
20
|
+
mock_bunny = instance_double(Bunny::Session,
|
21
|
+
start: true,
|
22
|
+
create_channel: true,
|
23
|
+
close: true)
|
23
24
|
allow(Bunny).to receive(:new).and_return(mock_bunny)
|
24
25
|
expect(mock_bunny).to receive(:close).once
|
25
26
|
|
26
|
-
|
27
|
+
described_class.new(options).close
|
27
28
|
end
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
}.to raise_exception(RuntimeError, "Message body cannot be nil")
|
47
|
-
end
|
30
|
+
describe "#publish" do
|
31
|
+
it "raises an exception if exchange is nil" do
|
32
|
+
expect {
|
33
|
+
described_class.new(options).publish(nil, "#", "some body")
|
34
|
+
}.to raise_exception(RuntimeError, "Exchange cannot be nil")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "raises an exception if topic is nil" do
|
38
|
+
expect {
|
39
|
+
described_class.new(options).publish(exchange, nil, "some body")
|
40
|
+
}.to raise_exception(RuntimeError, "Topic cannot be nil")
|
41
|
+
end
|
42
|
+
|
43
|
+
it "raises an exception if body is nil" do
|
44
|
+
expect {
|
45
|
+
described_class.new(options).publish(exchange, "#", nil)
|
46
|
+
}.to raise_exception(RuntimeError, "Message body cannot be nil")
|
48
47
|
end
|
49
48
|
|
50
49
|
it "allows publishing against an exchange" do
|
51
|
-
expect(
|
52
|
-
.
|
50
|
+
expect(described_class.new(options).publish(exchange, "#", "some body"))
|
51
|
+
.not_to be_nil
|
53
52
|
end
|
54
53
|
end
|
55
54
|
end
|
@@ -1,28 +1,26 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::CLIParser do
|
4
2
|
it "requires the site_root to be provided" do
|
5
3
|
expect {
|
6
|
-
|
4
|
+
described_class.new([]).parse
|
7
5
|
}.to raise_exception(GovukSeedCrawler::CLIException, "site_root must be provided")
|
8
6
|
end
|
9
7
|
|
10
8
|
it "provides the defaults when just given the site_root" do
|
11
|
-
options, site_root =
|
9
|
+
options, site_root = described_class.new(["https://www.example.com"]).parse
|
12
10
|
|
13
11
|
expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS)
|
14
12
|
expect(site_root).to eq("https://www.example.com")
|
15
13
|
end
|
16
14
|
|
17
|
-
it "
|
15
|
+
it "tells us when we've given too many arguments" do
|
18
16
|
expect {
|
19
|
-
|
17
|
+
described_class.new(%w[a b]).parse
|
20
18
|
}.to raise_exception(GovukSeedCrawler::CLIException, "too many arguments provided")
|
21
19
|
end
|
22
20
|
|
23
|
-
it "
|
21
|
+
it "nests the help message in with any CLIExceptions we raise" do
|
24
22
|
expect {
|
25
|
-
|
23
|
+
described_class.new(%w[a b]).parse
|
26
24
|
}.to raise_exception(GovukSeedCrawler::CLIException) { |e|
|
27
25
|
expect(e.help).to include("Usage: ")
|
28
26
|
}
|
@@ -31,48 +29,36 @@ describe GovukSeedCrawler::CLIParser do
|
|
31
29
|
describe "catching STDOUT" do
|
32
30
|
it "shows the help banner when provided -h" do
|
33
31
|
# Get a valid options response as help closes early with SystemExit.
|
34
|
-
options =
|
35
|
-
|
36
|
-
temp_stdout do |caught_stdout|
|
37
|
-
expect {
|
38
|
-
_, _ = GovukSeedCrawler::CLIParser.new(["-h"]).parse
|
39
|
-
}.to raise_exception(SystemExit) { |e|
|
40
|
-
expect(e.status).to eq(0)
|
41
|
-
}
|
32
|
+
options = described_class.new(["http://www.foo.com/"]).options
|
42
33
|
|
43
|
-
|
44
|
-
|
34
|
+
expect { described_class.new(["-h"]).parse }
|
35
|
+
.to output("#{options.help}\n").to_stdout
|
36
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
|
45
37
|
end
|
46
38
|
|
47
|
-
it "
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
}.to raise_exception(SystemExit) { |e|
|
52
|
-
expect(e.status).to eq(0)
|
53
|
-
}
|
54
|
-
|
55
|
-
expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
|
56
|
-
end
|
39
|
+
it "shows the version number and exit" do
|
40
|
+
expect { described_class.new(["--version"]).parse }
|
41
|
+
.to output("Version: #{GovukSeedCrawler::VERSION}\n").to_stdout
|
42
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
|
57
43
|
end
|
58
44
|
end
|
59
45
|
|
60
46
|
describe "passing in valid arguments" do
|
61
|
-
let(:arguments)
|
47
|
+
let(:arguments) do
|
62
48
|
[
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
49
|
+
"https://www.override.com/",
|
50
|
+
"--host rabbitmq.some.custom.vhost",
|
51
|
+
"--port 4567",
|
52
|
+
"--username foo",
|
53
|
+
"--password bar",
|
54
|
+
"--exchange some_custom_exchange",
|
55
|
+
"--topic some_custom_topic",
|
56
|
+
"--vhost a_vhost",
|
57
|
+
"--verbose",
|
72
58
|
].join(" ").split(" ")
|
73
|
-
|
59
|
+
end
|
74
60
|
|
75
|
-
it "
|
61
|
+
it "overrides all of the default arguments that we're providing" do
|
76
62
|
overriden = {
|
77
63
|
host: "rabbitmq.some.custom.vhost",
|
78
64
|
port: "4567",
|
@@ -84,14 +70,14 @@ describe GovukSeedCrawler::CLIParser do
|
|
84
70
|
quiet: false,
|
85
71
|
verbose: true,
|
86
72
|
version: nil,
|
87
|
-
vhost: "a_vhost"
|
73
|
+
vhost: "a_vhost",
|
88
74
|
}
|
89
75
|
|
90
|
-
expect(
|
76
|
+
expect(described_class.new(arguments).parse.first).to eq(overriden)
|
91
77
|
end
|
92
78
|
|
93
|
-
it "
|
94
|
-
options,
|
79
|
+
it "sets the --quiet value" do
|
80
|
+
options, = described_class.new(["foo.com", "--quiet"]).parse
|
95
81
|
expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS.merge(quiet: true))
|
96
82
|
end
|
97
83
|
|
@@ -107,14 +93,14 @@ describe GovukSeedCrawler::CLIParser do
|
|
107
93
|
it "sets the password if set using an environment variable" do
|
108
94
|
set_amqp_pass("foobar")
|
109
95
|
|
110
|
-
expect(
|
96
|
+
expect(described_class.new(["http://www.example.com"]).parse.first)
|
111
97
|
.to include(password: "foobar")
|
112
98
|
end
|
113
99
|
|
114
100
|
it "picks the environment variable over the parameter if both are set" do
|
115
101
|
set_amqp_pass("bar")
|
116
102
|
|
117
|
-
expect(
|
103
|
+
expect(described_class.new(["http://www.example.com", "--password", "foo"]).parse.first)
|
118
104
|
.to include(password: "bar")
|
119
105
|
end
|
120
106
|
end
|
@@ -1,45 +1,25 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::CLIRunner do
|
4
2
|
describe "printing the version" do
|
5
|
-
it "
|
3
|
+
it "does not try to connect to an AMQP server" do
|
6
4
|
expect(Bunny).not_to receive(:new)
|
7
5
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
}.to raise_exception(SystemExit) { |exit|
|
12
|
-
expect(exit.status).to eq(0)
|
13
|
-
}
|
14
|
-
|
15
|
-
expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
|
16
|
-
end
|
6
|
+
expect { described_class.new(["--version"]).run }
|
7
|
+
.to output("Version: #{GovukSeedCrawler::VERSION}\n").to_stdout
|
8
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
|
17
9
|
end
|
18
10
|
end
|
19
11
|
|
20
12
|
describe "catching any CLIException objects and exiting with a status 1" do
|
21
13
|
it "prints to STDOUT for too many arguments" do
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
}.to raise_exception(SystemExit) { |exit|
|
26
|
-
expect(exit.status).to eq(2)
|
27
|
-
}
|
28
|
-
|
29
|
-
expect(caught_stdout.strip).to include("too many arguments provided")
|
30
|
-
end
|
14
|
+
expect { described_class.new(%w[a b]).run }
|
15
|
+
.to output(/\Atoo many arguments provided/).to_stdout
|
16
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(2) }
|
31
17
|
end
|
32
18
|
|
33
19
|
it "prints to STDOUT when site_root not set" do
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
}.to raise_exception(SystemExit) { |exit|
|
38
|
-
expect(exit.status).to eq(2)
|
39
|
-
}
|
40
|
-
|
41
|
-
expect(caught_stdout.strip).to include("site_root must be provided")
|
42
|
-
end
|
20
|
+
expect { described_class.new(["--verbose"]).run }
|
21
|
+
.to output(/\Asite_root must be provided/).to_stdout
|
22
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(2) }
|
43
23
|
end
|
44
24
|
end
|
45
25
|
|
@@ -49,26 +29,26 @@ describe GovukSeedCrawler::CLIRunner do
|
|
49
29
|
end
|
50
30
|
|
51
31
|
it "defaults to INFO" do
|
52
|
-
|
32
|
+
described_class.new(["http://www.example.com"])
|
53
33
|
expect(GovukSeedCrawler.logger.level).to eq(Logger::INFO)
|
54
34
|
end
|
55
35
|
|
56
36
|
it "sets to ERROR for quite" do
|
57
|
-
|
37
|
+
described_class.new(["http://www.example.com", "--quiet"])
|
58
38
|
expect(GovukSeedCrawler.logger.level).to eq(Logger::ERROR)
|
59
39
|
end
|
60
40
|
|
61
41
|
it "sets to DEBUG for verbose" do
|
62
|
-
|
42
|
+
described_class.new(["http://www.example.com", "--verbose"])
|
63
43
|
expect(GovukSeedCrawler.logger.level).to eq(Logger::DEBUG)
|
64
44
|
end
|
65
45
|
end
|
66
46
|
|
67
47
|
describe "#run" do
|
68
48
|
it "passes all options through to seed" do
|
69
|
-
expect(GovukSeedCrawler::Seeder).to receive(:seed)
|
70
|
-
with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
|
71
|
-
|
49
|
+
expect(GovukSeedCrawler::Seeder).to receive(:seed)
|
50
|
+
.with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
|
51
|
+
described_class.new(["http://www.example.com"]).run
|
72
52
|
end
|
73
53
|
end
|
74
54
|
end
|
@@ -1,21 +1,18 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::Indexer do
|
4
|
-
|
5
|
-
|
6
|
-
context "under normal usage" do
|
7
|
-
let(:mock_parser) do
|
8
|
-
double(:mock_parser, :to_a => [])
|
9
|
-
end
|
2
|
+
let(:mock_parser) { instance_double(SitemapParser, to_a: []) }
|
10
3
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
4
|
+
it "responds to Indexer#urls" do
|
5
|
+
allow(SitemapParser).to receive(:new).and_return(mock_parser)
|
6
|
+
instance = nil
|
7
|
+
expect { instance = described_class.new("https://example.com") }
|
8
|
+
.to output.to_stdout
|
9
|
+
expect(instance).to respond_to(:urls)
|
10
|
+
end
|
15
11
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
12
|
+
it "calls SitemapParser with the sitemap file" do
|
13
|
+
allow(SitemapParser).to receive(:new).with("https://example.com/sitemap.xml", { recurse: true }).and_return(mock_parser)
|
14
|
+
expect { described_class.new("https://example.com") }
|
15
|
+
.to output.to_stdout
|
16
|
+
expect(SitemapParser).to have_received(:new)
|
20
17
|
end
|
21
18
|
end
|
@@ -1,29 +1,27 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::Seeder do
|
4
2
|
let(:exchange) { "seeder_test_exchange" }
|
5
3
|
let(:topic) { "#" }
|
6
4
|
let(:root_url) { "https://www.example.com" }
|
7
5
|
|
8
|
-
let(:options)
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
let(:options) do
|
7
|
+
{
|
8
|
+
exchange: exchange,
|
9
|
+
topic: topic,
|
10
|
+
}
|
11
|
+
end
|
12
12
|
|
13
|
-
let(:mock_get_urls) {
|
14
|
-
let(:mock_amqp_client) {
|
13
|
+
let(:mock_get_urls) { instance_double(GovukSeedCrawler::Indexer, urls: true) }
|
14
|
+
let(:mock_amqp_client) { instance_double(GovukSeedCrawler::AmqpClient, close: true) }
|
15
15
|
|
16
16
|
let(:urls) do
|
17
17
|
[
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
"https://example.com/foo",
|
19
|
+
"https://example.com/bar",
|
20
|
+
"https://example.com/baz",
|
21
21
|
]
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
before(:each) do
|
24
|
+
before do
|
27
25
|
allow(GovukSeedCrawler::Indexer).to receive(:new)
|
28
26
|
.with(root_url)
|
29
27
|
.and_return(mock_get_urls)
|
@@ -32,20 +30,20 @@ describe GovukSeedCrawler::Seeder do
|
|
32
30
|
.with(options).and_return(mock_amqp_client)
|
33
31
|
end
|
34
32
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
.with(exchange, topic, url)
|
40
|
-
end
|
41
|
-
|
42
|
-
subject
|
33
|
+
it "publishes urls to the queue" do
|
34
|
+
urls.each do |url|
|
35
|
+
expect(mock_amqp_client).to receive(:publish)
|
36
|
+
.with(exchange, topic, url)
|
43
37
|
end
|
44
38
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
39
|
+
expect { described_class.seed(root_url, options) }
|
40
|
+
.to output.to_stdout
|
41
|
+
end
|
42
|
+
|
43
|
+
it "closes the connection when done" do
|
44
|
+
allow(mock_amqp_client).to receive(:publish)
|
45
|
+
expect(mock_amqp_client).to receive(:close)
|
46
|
+
expect { described_class.seed(root_url, options) }
|
47
|
+
.to output.to_stdout
|
50
48
|
end
|
51
49
|
end
|
@@ -1,9 +1,8 @@
|
|
1
|
-
require
|
2
|
-
require 'spec_helper'
|
1
|
+
require "json"
|
3
2
|
|
4
3
|
describe GovukSeedCrawler do
|
5
4
|
def stub_sitemap
|
6
|
-
sitemap = %
|
5
|
+
sitemap = %(<?xml version="1.0" encoding="UTF-8"?>
|
7
6
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
8
7
|
<url>
|
9
8
|
<loc>https://www.gov.uk/</loc>
|
@@ -15,45 +14,46 @@ describe GovukSeedCrawler do
|
|
15
14
|
<loc>https://www.gov.uk/help</loc>
|
16
15
|
</url>
|
17
16
|
</urlset>
|
18
|
-
|
17
|
+
)
|
19
18
|
|
20
|
-
stub_request(:get, "https://www.gov.uk/sitemap.xml")
|
21
|
-
to_return(:
|
19
|
+
stub_request(:get, "https://www.gov.uk/sitemap.xml")
|
20
|
+
.to_return(status: 200, body: sitemap, headers: {})
|
22
21
|
end
|
23
22
|
|
23
|
+
subject { GovukSeedCrawler::Seeder.seed(site_root, options) }
|
24
|
+
|
24
25
|
let(:vhost) { "/" }
|
25
26
|
let(:exchange_name) { "govuk_seed_crawler_integration_exchange" }
|
26
27
|
let(:queue_name) { "govuk_seed_crawler_integration_queue" }
|
27
28
|
let(:topic) { "#" }
|
28
29
|
let(:site_root) { "https://www.gov.uk" }
|
29
|
-
let(:options)
|
30
|
-
|
31
|
-
:
|
32
|
-
:
|
33
|
-
:
|
34
|
-
:
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
subject { GovukSeedCrawler::Seeder::seed(site_root, options) }
|
39
|
-
|
40
|
-
before(:each) do
|
41
|
-
@exchange = rabbitmq_client.channel.topic(exchange_name, :durable => true)
|
42
|
-
@queue = rabbitmq_client.channel.queue(queue_name)
|
43
|
-
@queue.bind(@exchange, :routing_key => topic)
|
30
|
+
let(:options) do
|
31
|
+
{
|
32
|
+
host: ENV.fetch("AMQP_HOST", "localhost"),
|
33
|
+
user: ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
|
34
|
+
pass: ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
|
35
|
+
exchange: exchange_name,
|
36
|
+
topic: topic,
|
37
|
+
}
|
44
38
|
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
39
|
+
let(:rabbitmq_client) { GovukSeedCrawler::AmqpClient.new(options) }
|
40
|
+
let(:exchange) { rabbitmq_client.channel.topic(exchange_name, durable: true) }
|
41
|
+
let(:queue) { rabbitmq_client.channel.queue(queue_name) }
|
42
|
+
|
43
|
+
around do |example|
|
44
|
+
queue.bind(exchange, routing_key: topic)
|
45
|
+
example.run
|
46
|
+
queue.unbind(exchange)
|
47
|
+
queue.delete
|
48
|
+
exchange.delete
|
50
49
|
rabbitmq_client.close
|
51
50
|
end
|
52
51
|
|
53
52
|
it "publishes URLs it finds to an AMQP topic exchange" do
|
54
53
|
stub_sitemap
|
55
|
-
|
54
|
+
expect { GovukSeedCrawler::Seeder.seed(site_root, options) }
|
55
|
+
.to output.to_stdout
|
56
56
|
|
57
|
-
expect(
|
57
|
+
expect(queue.message_count).to be(3)
|
58
58
|
end
|
59
59
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "govuk_seed_crawler"
|
2
|
+
require "webmock/rspec"
|
3
|
+
|
4
|
+
WebMock.disable_net_connect!
|
3
5
|
|
4
6
|
RSpec.configure do |config|
|
5
7
|
config.order = :random
|
@@ -27,13 +29,9 @@ RSpec.configure do |config|
|
|
27
29
|
# a real object. This is generally recommended.
|
28
30
|
mocks.verify_partial_doubles = true
|
29
31
|
end
|
30
|
-
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
yield $stdout.string
|
37
|
-
ensure
|
38
|
-
$stdout = STDOUT
|
33
|
+
config.before do
|
34
|
+
# reset logger before each invocation so we can catch stdout
|
35
|
+
GovukSeedCrawler.logger = nil
|
36
|
+
end
|
39
37
|
end
|
metadata
CHANGED
@@ -1,208 +1,207 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govuk_seed_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
5
|
-
prerelease:
|
4
|
+
version: 3.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- GOV.UK developers
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2022-10-31 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bunny
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.3'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '3.0'
|
22
23
|
type: :runtime
|
23
24
|
prerelease: false
|
24
25
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '1.3'
|
30
|
-
-
|
31
|
-
name: crack
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - '='
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: 0.4.4
|
38
|
-
type: :runtime
|
39
|
-
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - '='
|
30
|
+
- - "<"
|
44
31
|
- !ruby/object:Gem::Version
|
45
|
-
version: 0
|
32
|
+
version: '3.0'
|
46
33
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
34
|
+
name: crack
|
48
35
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
36
|
requirements:
|
51
37
|
- - '='
|
52
38
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
39
|
+
version: 0.4.5
|
54
40
|
type: :runtime
|
55
41
|
prerelease: false
|
56
42
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
43
|
requirements:
|
59
44
|
- - '='
|
60
45
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
46
|
+
version: 0.4.5
|
62
47
|
- !ruby/object:Gem::Dependency
|
63
48
|
name: nokogiri
|
64
49
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
50
|
requirements:
|
67
|
-
- -
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '1.6'
|
54
|
+
- - "<"
|
68
55
|
- !ruby/object:Gem::Version
|
69
|
-
version: 1.
|
56
|
+
version: '1.14'
|
70
57
|
type: :runtime
|
71
58
|
prerelease: false
|
72
59
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
60
|
requirements:
|
75
|
-
- -
|
61
|
+
- - ">="
|
76
62
|
- !ruby/object:Gem::Version
|
77
|
-
version: 1.6
|
63
|
+
version: '1.6'
|
64
|
+
- - "<"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '1.14'
|
78
67
|
- !ruby/object:Gem::Dependency
|
79
68
|
name: public_suffix
|
80
69
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
70
|
requirements:
|
83
|
-
- -
|
71
|
+
- - ">="
|
84
72
|
- !ruby/object:Gem::Version
|
85
73
|
version: 1.4.6
|
74
|
+
- - "<"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 5.1.0
|
86
77
|
type: :runtime
|
87
78
|
prerelease: false
|
88
79
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
80
|
requirements:
|
91
|
-
- -
|
81
|
+
- - ">="
|
92
82
|
- !ruby/object:Gem::Version
|
93
83
|
version: 1.4.6
|
84
|
+
- - "<"
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 5.1.0
|
94
87
|
- !ruby/object:Gem::Dependency
|
95
88
|
name: sitemap-parser
|
96
89
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
90
|
requirements:
|
99
|
-
- -
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0.3'
|
94
|
+
- - "<"
|
100
95
|
- !ruby/object:Gem::Version
|
101
|
-
version: 0.
|
96
|
+
version: '0.6'
|
102
97
|
type: :runtime
|
103
98
|
prerelease: false
|
104
99
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
100
|
requirements:
|
107
|
-
- -
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.3'
|
104
|
+
- - "<"
|
108
105
|
- !ruby/object:Gem::Version
|
109
|
-
version: 0.
|
106
|
+
version: '0.6'
|
110
107
|
- !ruby/object:Gem::Dependency
|
111
108
|
name: slop
|
112
109
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
110
|
requirements:
|
115
|
-
- - ~>
|
111
|
+
- - "~>"
|
116
112
|
- !ruby/object:Gem::Version
|
117
113
|
version: 3.6.0
|
118
114
|
type: :runtime
|
119
115
|
prerelease: false
|
120
116
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
117
|
requirements:
|
123
|
-
- - ~>
|
118
|
+
- - "~>"
|
124
119
|
- !ruby/object:Gem::Version
|
125
120
|
version: 3.6.0
|
126
121
|
- !ruby/object:Gem::Dependency
|
127
122
|
name: pry
|
128
123
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
124
|
requirements:
|
131
|
-
- -
|
125
|
+
- - ">="
|
132
126
|
- !ruby/object:Gem::Version
|
133
127
|
version: '0'
|
134
128
|
type: :development
|
135
129
|
prerelease: false
|
136
130
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
131
|
requirements:
|
139
|
-
- -
|
132
|
+
- - ">="
|
140
133
|
- !ruby/object:Gem::Version
|
141
134
|
version: '0'
|
142
135
|
- !ruby/object:Gem::Dependency
|
143
136
|
name: rake
|
144
137
|
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
138
|
requirements:
|
147
|
-
- -
|
139
|
+
- - ">="
|
148
140
|
- !ruby/object:Gem::Version
|
149
|
-
version: '0
|
141
|
+
version: '0'
|
150
142
|
type: :development
|
151
143
|
prerelease: false
|
152
144
|
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
145
|
requirements:
|
155
|
-
- -
|
146
|
+
- - ">="
|
156
147
|
- !ruby/object:Gem::Version
|
157
|
-
version: '0
|
148
|
+
version: '0'
|
158
149
|
- !ruby/object:Gem::Dependency
|
159
150
|
name: rspec
|
160
151
|
requirement: !ruby/object:Gem::Requirement
|
161
|
-
none: false
|
162
152
|
requirements:
|
163
|
-
- - ~>
|
153
|
+
- - "~>"
|
164
154
|
- !ruby/object:Gem::Version
|
165
155
|
version: '3.0'
|
166
156
|
type: :development
|
167
157
|
prerelease: false
|
168
158
|
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
159
|
requirements:
|
171
|
-
- - ~>
|
160
|
+
- - "~>"
|
172
161
|
- !ruby/object:Gem::Version
|
173
162
|
version: '3.0'
|
174
163
|
- !ruby/object:Gem::Dependency
|
175
164
|
name: rspec-mocks
|
176
165
|
requirement: !ruby/object:Gem::Requirement
|
177
|
-
none: false
|
178
166
|
requirements:
|
179
|
-
- - ~>
|
167
|
+
- - "~>"
|
180
168
|
- !ruby/object:Gem::Version
|
181
169
|
version: '3.0'
|
182
170
|
type: :development
|
183
171
|
prerelease: false
|
184
172
|
version_requirements: !ruby/object:Gem::Requirement
|
185
|
-
none: false
|
186
173
|
requirements:
|
187
|
-
- - ~>
|
174
|
+
- - "~>"
|
188
175
|
- !ruby/object:Gem::Version
|
189
176
|
version: '3.0'
|
177
|
+
- !ruby/object:Gem::Dependency
|
178
|
+
name: rubocop-govuk
|
179
|
+
requirement: !ruby/object:Gem::Requirement
|
180
|
+
requirements:
|
181
|
+
- - '='
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
version: 4.8.0
|
184
|
+
type: :development
|
185
|
+
prerelease: false
|
186
|
+
version_requirements: !ruby/object:Gem::Requirement
|
187
|
+
requirements:
|
188
|
+
- - '='
|
189
|
+
- !ruby/object:Gem::Version
|
190
|
+
version: 4.8.0
|
190
191
|
- !ruby/object:Gem::Dependency
|
191
192
|
name: webmock
|
192
193
|
requirement: !ruby/object:Gem::Requirement
|
193
|
-
none: false
|
194
194
|
requirements:
|
195
|
-
- - ~>
|
195
|
+
- - "~>"
|
196
196
|
- !ruby/object:Gem::Version
|
197
|
-
version:
|
197
|
+
version: '3.18'
|
198
198
|
type: :development
|
199
199
|
prerelease: false
|
200
200
|
version_requirements: !ruby/object:Gem::Requirement
|
201
|
-
none: false
|
202
201
|
requirements:
|
203
|
-
- - ~>
|
202
|
+
- - "~>"
|
204
203
|
- !ruby/object:Gem::Version
|
205
|
-
version:
|
204
|
+
version: '3.18'
|
206
205
|
description:
|
207
206
|
email:
|
208
207
|
- govuk-dev@digital.cabinet-office.gov.uk
|
@@ -211,12 +210,15 @@ executables:
|
|
211
210
|
extensions: []
|
212
211
|
extra_rdoc_files: []
|
213
212
|
files:
|
214
|
-
- .
|
215
|
-
- .
|
216
|
-
- .
|
213
|
+
- ".github/dependabot.yml"
|
214
|
+
- ".github/workflows/ci.yml"
|
215
|
+
- ".gitignore"
|
216
|
+
- ".rspec"
|
217
|
+
- ".rubocop.yml"
|
218
|
+
- ".ruby-version"
|
219
|
+
- CHANGELOG.md
|
217
220
|
- Gemfile
|
218
|
-
-
|
219
|
-
- LICENSE.txt
|
221
|
+
- LICENCE
|
220
222
|
- README.md
|
221
223
|
- Rakefile
|
222
224
|
- bin/seed-crawler
|
@@ -238,27 +240,25 @@ files:
|
|
238
240
|
homepage: https://github.com/alphagov/govuk_seed_crawler
|
239
241
|
licenses:
|
240
242
|
- MIT
|
243
|
+
metadata: {}
|
241
244
|
post_install_message:
|
242
245
|
rdoc_options: []
|
243
246
|
require_paths:
|
244
247
|
- lib
|
245
248
|
required_ruby_version: !ruby/object:Gem::Requirement
|
246
|
-
none: false
|
247
249
|
requirements:
|
248
|
-
- -
|
250
|
+
- - ">="
|
249
251
|
- !ruby/object:Gem::Version
|
250
|
-
version: '
|
252
|
+
version: '2.7'
|
251
253
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
252
|
-
none: false
|
253
254
|
requirements:
|
254
|
-
- -
|
255
|
+
- - ">="
|
255
256
|
- !ruby/object:Gem::Version
|
256
257
|
version: '0'
|
257
258
|
requirements: []
|
258
|
-
|
259
|
-
rubygems_version: 1.8.23.2
|
259
|
+
rubygems_version: 3.3.24
|
260
260
|
signing_key:
|
261
|
-
specification_version:
|
261
|
+
specification_version: 4
|
262
262
|
summary: Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ
|
263
263
|
exchange.
|
264
264
|
test_files:
|
data/Jenkinsfile
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
#!/usr/bin/env groovy
|
2
|
-
|
3
|
-
library("govuk")
|
4
|
-
|
5
|
-
node {
|
6
|
-
try {
|
7
|
-
// This doesn't use the buildProject as this project doesn't conform to
|
8
|
-
// required norms (e.g. running in Ruby 1.9, non-standard tests).
|
9
|
-
|
10
|
-
repoName = JOB_NAME.split('/')[0]
|
11
|
-
|
12
|
-
stage("Checkout") {
|
13
|
-
govuk.checkoutFromGitHubWithSSH(repoName)
|
14
|
-
}
|
15
|
-
|
16
|
-
stage("Clean up workspace") {
|
17
|
-
govuk.cleanupGit()
|
18
|
-
}
|
19
|
-
|
20
|
-
stage('Configure environment') {
|
21
|
-
govuk.setEnvar('RBENV_VERSION', '1.9.3-p550')
|
22
|
-
}
|
23
|
-
|
24
|
-
stage('Bundle install') {
|
25
|
-
govuk.bundleGem()
|
26
|
-
}
|
27
|
-
|
28
|
-
stage('Spec tests') {
|
29
|
-
govuk.runRakeTask('spec')
|
30
|
-
}
|
31
|
-
|
32
|
-
stage('Integration tests') {
|
33
|
-
govuk.runRakeTask('integration')
|
34
|
-
}
|
35
|
-
|
36
|
-
if (env.BRANCH_NAME == 'master') {
|
37
|
-
stage('Publish Gem to Rubygems') {
|
38
|
-
govuk.publishGem(repoName, repoName, 'master')
|
39
|
-
}
|
40
|
-
}
|
41
|
-
} catch (e) {
|
42
|
-
currentBuild.result = "FAILED"
|
43
|
-
step([$class: 'Mailer',
|
44
|
-
notifyEveryUnstableBuild: true,
|
45
|
-
recipients: 'govuk-ci-notifications@digital.cabinet-office.gov.uk',
|
46
|
-
sendToIndividuals: true])
|
47
|
-
throw e
|
48
|
-
}
|
49
|
-
}
|