govuk_seed_crawler 2.1.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/dependabot.yml +10 -0
- data/.github/workflows/ci.yml +47 -0
- data/.rubocop.yml +19 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +11 -0
- data/Gemfile +1 -1
- data/{LICENSE.txt → LICENCE} +2 -2
- data/README.md +4 -0
- data/Rakefile +5 -9
- data/govuk_seed_crawler.gemspec +15 -14
- data/lib/govuk_seed_crawler/amqp_client.rb +3 -3
- data/lib/govuk_seed_crawler/cli_parser.rb +23 -23
- data/lib/govuk_seed_crawler/cli_runner.rb +2 -2
- data/lib/govuk_seed_crawler/indexer.rb +2 -2
- data/lib/govuk_seed_crawler/version.rb +1 -1
- data/lib/govuk_seed_crawler.rb +16 -12
- data/spec/govuk_seed_crawler/amqp_client_spec.rb +33 -34
- data/spec/govuk_seed_crawler/cli_parser_spec.rb +32 -46
- data/spec/govuk_seed_crawler/cli_runner_spec.rb +16 -36
- data/spec/govuk_seed_crawler/indexer_spec.rb +13 -16
- data/spec/govuk_seed_crawler/seeder_spec.rb +25 -27
- data/spec/integration/govuk_seed_crawler_spec.rb +28 -28
- data/spec/spec_helper.rb +8 -10
- metadata +84 -84
- data/Jenkinsfile +0 -49
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b0d80cc9a1bf29784700e23b1f8c883fc6012a2e7a47776cc3af04ca87f253e7
|
4
|
+
data.tar.gz: ca2d04361ff8e9d9b3cf34f0432eb786b026ffcfd035b1a5faeed6c32801880a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 895569f56489bbfce199620c211742b226b56d0cc8a03d17451b8a382f2743ba1d7c7821ff9fe75925d411d9b99a595981cf72fb5334907f59f33bf0a24ec000
|
7
|
+
data.tar.gz: 8ea165b2ed3d62a63a5a331b03e14e2fe0dd68713bf7732d28bd2b5cfc4e20a6b5a4932c503fc0514d1a0ea1c6d58343afe864612d8190a2521638d62eea0e83
|
@@ -0,0 +1,47 @@
|
|
1
|
+
on: [push, pull_request]
|
2
|
+
|
3
|
+
jobs:
|
4
|
+
# This matrix job runs the test suite against multiple Ruby versions
|
5
|
+
test_matrix:
|
6
|
+
strategy:
|
7
|
+
fail-fast: false
|
8
|
+
matrix:
|
9
|
+
# Due to https://github.com/actions/runner/issues/849, we have to use quotes for '3.0'
|
10
|
+
ruby: [ 2.7, '3.0', 3.1 ]
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
services:
|
13
|
+
rabbitmq:
|
14
|
+
image: rabbitmq
|
15
|
+
env:
|
16
|
+
RABBITMQ_DEFAULT_USER: rabbitmq
|
17
|
+
RABBITMQ_DEFAULT_PASS: rabbitmq
|
18
|
+
ports:
|
19
|
+
- 5672:5672
|
20
|
+
steps:
|
21
|
+
- uses: actions/checkout@v3
|
22
|
+
- uses: ruby/setup-ruby@v1
|
23
|
+
with:
|
24
|
+
ruby-version: ${{ matrix.ruby }}
|
25
|
+
bundler-cache: true
|
26
|
+
- run: bundle exec rake
|
27
|
+
env:
|
28
|
+
AMQP_USER: rabbitmq
|
29
|
+
AMQP_PASS: rabbitmq
|
30
|
+
|
31
|
+
# Branch protection rules cannot directly depend on status checks from matrix jobs.
|
32
|
+
# So instead we define `test` as a dummy job which only runs after the preceding `test_matrix` checks have passed.
|
33
|
+
# Solution inspired by: https://github.community/t/status-check-for-a-matrix-jobs/127354/3
|
34
|
+
test:
|
35
|
+
needs: test_matrix
|
36
|
+
runs-on: ubuntu-latest
|
37
|
+
steps:
|
38
|
+
- run: echo "All matrix tests have passed 🚀"
|
39
|
+
|
40
|
+
publish:
|
41
|
+
needs: test
|
42
|
+
if: ${{ github.ref == 'refs/heads/main' }}
|
43
|
+
permissions:
|
44
|
+
contents: write
|
45
|
+
uses: alphagov/govuk-infrastructure/.github/workflows/publish-rubygem.yaml@main
|
46
|
+
secrets:
|
47
|
+
GEM_HOST_API_KEY: ${{ secrets.ALPHAGOV_RUBYGEMS_API_KEY }}
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
inherit_gem:
|
2
|
+
rubocop-govuk:
|
3
|
+
- config/default.yml
|
4
|
+
- config/rake.yml
|
5
|
+
- config/rspec.yml
|
6
|
+
|
7
|
+
inherit_mode:
|
8
|
+
merge:
|
9
|
+
- Exclude
|
10
|
+
|
11
|
+
# **************************************************************
|
12
|
+
# TRY NOT TO ADD OVERRIDES IN THIS FILE
|
13
|
+
#
|
14
|
+
# This repo is configured to follow the RuboCop GOV.UK styleguide.
|
15
|
+
# Any rules you override here will cause this repo to diverge from
|
16
|
+
# the way we write code in all other GOV.UK repos.
|
17
|
+
#
|
18
|
+
# See https://github.com/alphagov/rubocop-govuk/blob/main/CONTRIBUTING.md
|
19
|
+
# **************************************************************
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.7.6
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
- We use the [GOV.UK versioning guidelines](https://docs.publishing.service.gov.uk/manual/publishing-a-ruby-gem.html#versioning).
|
4
|
+
- Mark breaking changes with `BREAKING:`. Be sure to include instructions on how applications should be upgraded.
|
5
|
+
- Don't include changes that are purely internal. The CHANGELOG should be a
|
6
|
+
useful summary for people upgrading their application, not a replication
|
7
|
+
of the commit log.
|
8
|
+
|
9
|
+
## Unreleased
|
10
|
+
|
11
|
+
- Drop support for Ruby < 2.7
|
data/Gemfile
CHANGED
data/{LICENSE.txt → LICENCE}
RENAMED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
1
|
+
The MIT License (MIT)
|
2
2
|
|
3
|
-
|
3
|
+
Copyright (C) 2014 Crown Copyright (Government Digital Service)
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
data/Rakefile
CHANGED
@@ -1,11 +1,7 @@
|
|
1
|
-
require
|
1
|
+
require "rspec/core/rake_task"
|
2
|
+
require "rubocop/rake_task"
|
2
3
|
|
3
|
-
RSpec::Core::RakeTask.new(:spec)
|
4
|
-
|
5
|
-
end
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
RuboCop::RakeTask.new
|
6
6
|
|
7
|
-
|
8
|
-
task.pattern = FileList['spec/integration/**/*_spec.rb']
|
9
|
-
end
|
10
|
-
|
11
|
-
task :default => :spec
|
7
|
+
task default: %i[rubocop spec]
|
data/govuk_seed_crawler.gemspec
CHANGED
@@ -1,35 +1,36 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
1
|
+
lib = File.expand_path("lib", __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require "govuk_seed_crawler/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
6
|
spec.name = "govuk_seed_crawler"
|
8
7
|
spec.version = GovukSeedCrawler::VERSION
|
9
|
-
spec.authors = [
|
8
|
+
spec.authors = ["GOV.UK developers"]
|
10
9
|
spec.email = ["govuk-dev@digital.cabinet-office.gov.uk"]
|
11
|
-
spec.summary =
|
10
|
+
spec.summary = "Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ exchange."
|
12
11
|
spec.homepage = "https://github.com/alphagov/govuk_seed_crawler"
|
13
12
|
spec.license = "MIT"
|
14
13
|
|
14
|
+
spec.required_ruby_version = ">= 2.7"
|
15
|
+
|
15
16
|
spec.files = `git ls-files -z`.split("\x0")
|
16
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
-
spec.require_paths = [
|
19
|
+
spec.require_paths = %w[lib]
|
19
20
|
|
20
|
-
spec.add_runtime_dependency "bunny", "
|
21
|
-
spec.add_runtime_dependency "crack", "0.4.
|
22
|
-
spec.add_runtime_dependency "
|
23
|
-
spec.add_runtime_dependency "nokogiri", "~> 1.6.0"
|
21
|
+
spec.add_runtime_dependency "bunny", ">= 1.3", "< 3.0"
|
22
|
+
spec.add_runtime_dependency "crack", "0.4.5"
|
23
|
+
spec.add_runtime_dependency "nokogiri", ">= 1.6", "< 1.14"
|
24
24
|
# Something, somewhere, sometimes requires public_suffix.
|
25
25
|
# public_suffix > 1.5 requires ruby > 2.
|
26
|
-
spec.add_runtime_dependency "public_suffix", "
|
27
|
-
spec.add_runtime_dependency "sitemap-parser", "
|
26
|
+
spec.add_runtime_dependency "public_suffix", ">= 1.4.6", "< 5.1.0"
|
27
|
+
spec.add_runtime_dependency "sitemap-parser", ">= 0.3", "< 0.6"
|
28
28
|
spec.add_runtime_dependency "slop", "~> 3.6.0"
|
29
29
|
|
30
30
|
spec.add_development_dependency "pry"
|
31
|
-
spec.add_development_dependency "rake"
|
31
|
+
spec.add_development_dependency "rake"
|
32
32
|
spec.add_development_dependency "rspec", "~> 3.0"
|
33
33
|
spec.add_development_dependency "rspec-mocks", "~> 3.0"
|
34
|
-
spec.add_development_dependency "
|
34
|
+
spec.add_development_dependency "rubocop-govuk", "4.8.0"
|
35
|
+
spec.add_development_dependency "webmock", "~> 3.18"
|
35
36
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "bunny"
|
2
2
|
|
3
3
|
module GovukSeedCrawler
|
4
4
|
class AmqpClient
|
@@ -21,8 +21,8 @@ module GovukSeedCrawler
|
|
21
21
|
|
22
22
|
GovukSeedCrawler.logger.debug("Publishing '#{body}' to topic '#{topic}'")
|
23
23
|
|
24
|
-
@channel.topic(exchange, :
|
25
|
-
.publish(body, :
|
24
|
+
@channel.topic(exchange, durable: true)
|
25
|
+
.publish(body, routing_key: topic)
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "slop"
|
2
2
|
|
3
3
|
module GovukSeedCrawler
|
4
4
|
class CLIException < StandardError
|
@@ -12,17 +12,17 @@ module GovukSeedCrawler
|
|
12
12
|
|
13
13
|
class CLIParser
|
14
14
|
DEFAULTS = {
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
23
|
-
:
|
24
|
-
:
|
25
|
-
:
|
15
|
+
exchange: "govuk_crawler_exchange",
|
16
|
+
help: nil,
|
17
|
+
host: "localhost",
|
18
|
+
password: "guest",
|
19
|
+
port: "5672",
|
20
|
+
quiet: false,
|
21
|
+
topic: "#",
|
22
|
+
username: "guest",
|
23
|
+
verbose: false,
|
24
|
+
version: nil,
|
25
|
+
vhost: "/",
|
26
26
|
}.freeze
|
27
27
|
|
28
28
|
ENV_AMQP_PASS_KEY = "GOVUK_CRAWLER_AMQP_PASS".freeze
|
@@ -32,19 +32,19 @@ module GovukSeedCrawler
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def options
|
35
|
-
Slop.parse!(@argv_array, :
|
36
|
-
banner
|
37
|
-
Usage: #{$PROGRAM_NAME} site_root [options]
|
35
|
+
Slop.parse!(@argv_array, help: true) do
|
36
|
+
banner <<~HELP
|
37
|
+
Usage: #{$PROGRAM_NAME} site_root [options]
|
38
38
|
|
39
|
-
Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
|
40
|
-
to consume:
|
39
|
+
Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
|
40
|
+
to consume:
|
41
41
|
|
42
|
-
https://github.com/alphagov/govuk_crawler_worker
|
42
|
+
https://github.com/alphagov/govuk_crawler_worker
|
43
43
|
|
44
|
-
The AMQP password can also be set as an environment variable and will be read from
|
45
|
-
`#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
|
46
|
-
are set, the environment variable will take higher precedent.
|
47
|
-
|
44
|
+
The AMQP password can also be set as an environment variable and will be read from
|
45
|
+
`#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
|
46
|
+
are set, the environment variable will take higher precedent.
|
47
|
+
HELP
|
48
48
|
|
49
49
|
on :version, "Display version and exit" do
|
50
50
|
puts "Version: #{GovukSeedCrawler::VERSION}"
|
@@ -75,7 +75,7 @@ are set, the environment variable will take higher precedent.
|
|
75
75
|
options_hash = opts.to_hash
|
76
76
|
options_hash[:password] = ENV[ENV_AMQP_PASS_KEY] unless ENV[ENV_AMQP_PASS_KEY].nil?
|
77
77
|
|
78
|
-
|
78
|
+
[options_hash, @argv_array.first]
|
79
79
|
end
|
80
80
|
end
|
81
81
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "sitemap-parser"
|
2
2
|
|
3
3
|
module GovukSeedCrawler
|
4
4
|
class Indexer
|
@@ -9,7 +9,7 @@ module GovukSeedCrawler
|
|
9
9
|
|
10
10
|
GovukSeedCrawler.logger.info("Retrieving list of URLs for #{site_root}")
|
11
11
|
|
12
|
-
sitemap = SitemapParser.new("#{site_root}/sitemap.xml", {recurse: true})
|
12
|
+
sitemap = SitemapParser.new("#{site_root}/sitemap.xml", { recurse: true })
|
13
13
|
@urls = sitemap.to_a
|
14
14
|
|
15
15
|
GovukSeedCrawler.logger.info("Found #{@urls.count} URLs")
|
data/lib/govuk_seed_crawler.rb
CHANGED
@@ -1,17 +1,21 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
1
|
+
require "govuk_seed_crawler/amqp_client"
|
2
|
+
require "govuk_seed_crawler/cli_parser"
|
3
|
+
require "govuk_seed_crawler/cli_runner"
|
4
|
+
require "govuk_seed_crawler/indexer"
|
5
|
+
require "govuk_seed_crawler/seeder"
|
6
|
+
require "govuk_seed_crawler/version"
|
7
7
|
|
8
8
|
module GovukSeedCrawler
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
class << self
|
10
|
+
attr_writer :logger
|
11
|
+
|
12
|
+
def logger
|
13
|
+
unless @logger
|
14
|
+
@logger = Logger.new($stdout)
|
15
|
+
@logger.level = Logger::INFO
|
16
|
+
end
|
14
17
|
|
15
|
-
|
18
|
+
@logger
|
19
|
+
end
|
16
20
|
end
|
17
21
|
end
|
@@ -1,55 +1,54 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::AmqpClient do
|
4
2
|
let(:exchange) { "govuk_seed_crawler_spec_exchange" }
|
5
|
-
let(:options)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
3
|
+
let(:options) do
|
4
|
+
{
|
5
|
+
host: ENV.fetch("AMQP_HOST", "localhost"),
|
6
|
+
user: ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
|
7
|
+
pass: ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
|
8
|
+
}
|
9
|
+
end
|
11
10
|
|
12
11
|
it "responds to #channel" do
|
13
|
-
expect(
|
12
|
+
expect(described_class.new(options)).to respond_to(:channel)
|
14
13
|
end
|
15
14
|
|
16
15
|
it "responds to #close" do
|
17
|
-
expect(
|
16
|
+
expect(described_class.new(options)).to respond_to(:close)
|
18
17
|
end
|
19
18
|
|
20
19
|
it "closes the connection to the AMQP server" do
|
21
|
-
mock_bunny =
|
22
|
-
|
20
|
+
mock_bunny = instance_double(Bunny::Session,
|
21
|
+
start: true,
|
22
|
+
create_channel: true,
|
23
|
+
close: true)
|
23
24
|
allow(Bunny).to receive(:new).and_return(mock_bunny)
|
24
25
|
expect(mock_bunny).to receive(:close).once
|
25
26
|
|
26
|
-
|
27
|
+
described_class.new(options).close
|
27
28
|
end
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
}.to raise_exception(RuntimeError, "Message body cannot be nil")
|
47
|
-
end
|
30
|
+
describe "#publish" do
|
31
|
+
it "raises an exception if exchange is nil" do
|
32
|
+
expect {
|
33
|
+
described_class.new(options).publish(nil, "#", "some body")
|
34
|
+
}.to raise_exception(RuntimeError, "Exchange cannot be nil")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "raises an exception if topic is nil" do
|
38
|
+
expect {
|
39
|
+
described_class.new(options).publish(exchange, nil, "some body")
|
40
|
+
}.to raise_exception(RuntimeError, "Topic cannot be nil")
|
41
|
+
end
|
42
|
+
|
43
|
+
it "raises an exception if body is nil" do
|
44
|
+
expect {
|
45
|
+
described_class.new(options).publish(exchange, "#", nil)
|
46
|
+
}.to raise_exception(RuntimeError, "Message body cannot be nil")
|
48
47
|
end
|
49
48
|
|
50
49
|
it "allows publishing against an exchange" do
|
51
|
-
expect(
|
52
|
-
.
|
50
|
+
expect(described_class.new(options).publish(exchange, "#", "some body"))
|
51
|
+
.not_to be_nil
|
53
52
|
end
|
54
53
|
end
|
55
54
|
end
|
@@ -1,28 +1,26 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::CLIParser do
|
4
2
|
it "requires the site_root to be provided" do
|
5
3
|
expect {
|
6
|
-
|
4
|
+
described_class.new([]).parse
|
7
5
|
}.to raise_exception(GovukSeedCrawler::CLIException, "site_root must be provided")
|
8
6
|
end
|
9
7
|
|
10
8
|
it "provides the defaults when just given the site_root" do
|
11
|
-
options, site_root =
|
9
|
+
options, site_root = described_class.new(["https://www.example.com"]).parse
|
12
10
|
|
13
11
|
expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS)
|
14
12
|
expect(site_root).to eq("https://www.example.com")
|
15
13
|
end
|
16
14
|
|
17
|
-
it "
|
15
|
+
it "tells us when we've given too many arguments" do
|
18
16
|
expect {
|
19
|
-
|
17
|
+
described_class.new(%w[a b]).parse
|
20
18
|
}.to raise_exception(GovukSeedCrawler::CLIException, "too many arguments provided")
|
21
19
|
end
|
22
20
|
|
23
|
-
it "
|
21
|
+
it "nests the help message in with any CLIExceptions we raise" do
|
24
22
|
expect {
|
25
|
-
|
23
|
+
described_class.new(%w[a b]).parse
|
26
24
|
}.to raise_exception(GovukSeedCrawler::CLIException) { |e|
|
27
25
|
expect(e.help).to include("Usage: ")
|
28
26
|
}
|
@@ -31,48 +29,36 @@ describe GovukSeedCrawler::CLIParser do
|
|
31
29
|
describe "catching STDOUT" do
|
32
30
|
it "shows the help banner when provided -h" do
|
33
31
|
# Get a valid options response as help closes early with SystemExit.
|
34
|
-
options =
|
35
|
-
|
36
|
-
temp_stdout do |caught_stdout|
|
37
|
-
expect {
|
38
|
-
_, _ = GovukSeedCrawler::CLIParser.new(["-h"]).parse
|
39
|
-
}.to raise_exception(SystemExit) { |e|
|
40
|
-
expect(e.status).to eq(0)
|
41
|
-
}
|
32
|
+
options = described_class.new(["http://www.foo.com/"]).options
|
42
33
|
|
43
|
-
|
44
|
-
|
34
|
+
expect { described_class.new(["-h"]).parse }
|
35
|
+
.to output("#{options.help}\n").to_stdout
|
36
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
|
45
37
|
end
|
46
38
|
|
47
|
-
it "
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
}.to raise_exception(SystemExit) { |e|
|
52
|
-
expect(e.status).to eq(0)
|
53
|
-
}
|
54
|
-
|
55
|
-
expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
|
56
|
-
end
|
39
|
+
it "shows the version number and exit" do
|
40
|
+
expect { described_class.new(["--version"]).parse }
|
41
|
+
.to output("Version: #{GovukSeedCrawler::VERSION}\n").to_stdout
|
42
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
|
57
43
|
end
|
58
44
|
end
|
59
45
|
|
60
46
|
describe "passing in valid arguments" do
|
61
|
-
let(:arguments)
|
47
|
+
let(:arguments) do
|
62
48
|
[
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
49
|
+
"https://www.override.com/",
|
50
|
+
"--host rabbitmq.some.custom.vhost",
|
51
|
+
"--port 4567",
|
52
|
+
"--username foo",
|
53
|
+
"--password bar",
|
54
|
+
"--exchange some_custom_exchange",
|
55
|
+
"--topic some_custom_topic",
|
56
|
+
"--vhost a_vhost",
|
57
|
+
"--verbose",
|
72
58
|
].join(" ").split(" ")
|
73
|
-
|
59
|
+
end
|
74
60
|
|
75
|
-
it "
|
61
|
+
it "overrides all of the default arguments that we're providing" do
|
76
62
|
overriden = {
|
77
63
|
host: "rabbitmq.some.custom.vhost",
|
78
64
|
port: "4567",
|
@@ -84,14 +70,14 @@ describe GovukSeedCrawler::CLIParser do
|
|
84
70
|
quiet: false,
|
85
71
|
verbose: true,
|
86
72
|
version: nil,
|
87
|
-
vhost: "a_vhost"
|
73
|
+
vhost: "a_vhost",
|
88
74
|
}
|
89
75
|
|
90
|
-
expect(
|
76
|
+
expect(described_class.new(arguments).parse.first).to eq(overriden)
|
91
77
|
end
|
92
78
|
|
93
|
-
it "
|
94
|
-
options,
|
79
|
+
it "sets the --quiet value" do
|
80
|
+
options, = described_class.new(["foo.com", "--quiet"]).parse
|
95
81
|
expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS.merge(quiet: true))
|
96
82
|
end
|
97
83
|
|
@@ -107,14 +93,14 @@ describe GovukSeedCrawler::CLIParser do
|
|
107
93
|
it "sets the password if set using an environment variable" do
|
108
94
|
set_amqp_pass("foobar")
|
109
95
|
|
110
|
-
expect(
|
96
|
+
expect(described_class.new(["http://www.example.com"]).parse.first)
|
111
97
|
.to include(password: "foobar")
|
112
98
|
end
|
113
99
|
|
114
100
|
it "picks the environment variable over the parameter if both are set" do
|
115
101
|
set_amqp_pass("bar")
|
116
102
|
|
117
|
-
expect(
|
103
|
+
expect(described_class.new(["http://www.example.com", "--password", "foo"]).parse.first)
|
118
104
|
.to include(password: "bar")
|
119
105
|
end
|
120
106
|
end
|
@@ -1,45 +1,25 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::CLIRunner do
|
4
2
|
describe "printing the version" do
|
5
|
-
it "
|
3
|
+
it "does not try to connect to an AMQP server" do
|
6
4
|
expect(Bunny).not_to receive(:new)
|
7
5
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
}.to raise_exception(SystemExit) { |exit|
|
12
|
-
expect(exit.status).to eq(0)
|
13
|
-
}
|
14
|
-
|
15
|
-
expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
|
16
|
-
end
|
6
|
+
expect { described_class.new(["--version"]).run }
|
7
|
+
.to output("Version: #{GovukSeedCrawler::VERSION}\n").to_stdout
|
8
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
|
17
9
|
end
|
18
10
|
end
|
19
11
|
|
20
12
|
describe "catching any CLIException objects and exiting with a status 1" do
|
21
13
|
it "prints to STDOUT for too many arguments" do
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
}.to raise_exception(SystemExit) { |exit|
|
26
|
-
expect(exit.status).to eq(2)
|
27
|
-
}
|
28
|
-
|
29
|
-
expect(caught_stdout.strip).to include("too many arguments provided")
|
30
|
-
end
|
14
|
+
expect { described_class.new(%w[a b]).run }
|
15
|
+
.to output(/\Atoo many arguments provided/).to_stdout
|
16
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(2) }
|
31
17
|
end
|
32
18
|
|
33
19
|
it "prints to STDOUT when site_root not set" do
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
}.to raise_exception(SystemExit) { |exit|
|
38
|
-
expect(exit.status).to eq(2)
|
39
|
-
}
|
40
|
-
|
41
|
-
expect(caught_stdout.strip).to include("site_root must be provided")
|
42
|
-
end
|
20
|
+
expect { described_class.new(["--verbose"]).run }
|
21
|
+
.to output(/\Asite_root must be provided/).to_stdout
|
22
|
+
.and raise_exception(SystemExit) { |e| expect(e.status).to eq(2) }
|
43
23
|
end
|
44
24
|
end
|
45
25
|
|
@@ -49,26 +29,26 @@ describe GovukSeedCrawler::CLIRunner do
|
|
49
29
|
end
|
50
30
|
|
51
31
|
it "defaults to INFO" do
|
52
|
-
|
32
|
+
described_class.new(["http://www.example.com"])
|
53
33
|
expect(GovukSeedCrawler.logger.level).to eq(Logger::INFO)
|
54
34
|
end
|
55
35
|
|
56
36
|
it "sets to ERROR for quite" do
|
57
|
-
|
37
|
+
described_class.new(["http://www.example.com", "--quiet"])
|
58
38
|
expect(GovukSeedCrawler.logger.level).to eq(Logger::ERROR)
|
59
39
|
end
|
60
40
|
|
61
41
|
it "sets to DEBUG for verbose" do
|
62
|
-
|
42
|
+
described_class.new(["http://www.example.com", "--verbose"])
|
63
43
|
expect(GovukSeedCrawler.logger.level).to eq(Logger::DEBUG)
|
64
44
|
end
|
65
45
|
end
|
66
46
|
|
67
47
|
describe "#run" do
|
68
48
|
it "passes all options through to seed" do
|
69
|
-
expect(GovukSeedCrawler::Seeder).to receive(:seed)
|
70
|
-
with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
|
71
|
-
|
49
|
+
expect(GovukSeedCrawler::Seeder).to receive(:seed)
|
50
|
+
.with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
|
51
|
+
described_class.new(["http://www.example.com"]).run
|
72
52
|
end
|
73
53
|
end
|
74
54
|
end
|
@@ -1,21 +1,18 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::Indexer do
|
4
|
-
|
5
|
-
|
6
|
-
context "under normal usage" do
|
7
|
-
let(:mock_parser) do
|
8
|
-
double(:mock_parser, :to_a => [])
|
9
|
-
end
|
2
|
+
let(:mock_parser) { instance_double(SitemapParser, to_a: []) }
|
10
3
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
4
|
+
it "responds to Indexer#urls" do
|
5
|
+
allow(SitemapParser).to receive(:new).and_return(mock_parser)
|
6
|
+
instance = nil
|
7
|
+
expect { instance = described_class.new("https://example.com") }
|
8
|
+
.to output.to_stdout
|
9
|
+
expect(instance).to respond_to(:urls)
|
10
|
+
end
|
15
11
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
12
|
+
it "calls SitemapParser with the sitemap file" do
|
13
|
+
allow(SitemapParser).to receive(:new).with("https://example.com/sitemap.xml", { recurse: true }).and_return(mock_parser)
|
14
|
+
expect { described_class.new("https://example.com") }
|
15
|
+
.to output.to_stdout
|
16
|
+
expect(SitemapParser).to have_received(:new)
|
20
17
|
end
|
21
18
|
end
|
@@ -1,29 +1,27 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe GovukSeedCrawler::Seeder do
|
4
2
|
let(:exchange) { "seeder_test_exchange" }
|
5
3
|
let(:topic) { "#" }
|
6
4
|
let(:root_url) { "https://www.example.com" }
|
7
5
|
|
8
|
-
let(:options)
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
let(:options) do
|
7
|
+
{
|
8
|
+
exchange: exchange,
|
9
|
+
topic: topic,
|
10
|
+
}
|
11
|
+
end
|
12
12
|
|
13
|
-
let(:mock_get_urls) {
|
14
|
-
let(:mock_amqp_client) {
|
13
|
+
let(:mock_get_urls) { instance_double(GovukSeedCrawler::Indexer, urls: true) }
|
14
|
+
let(:mock_amqp_client) { instance_double(GovukSeedCrawler::AmqpClient, close: true) }
|
15
15
|
|
16
16
|
let(:urls) do
|
17
17
|
[
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
"https://example.com/foo",
|
19
|
+
"https://example.com/bar",
|
20
|
+
"https://example.com/baz",
|
21
21
|
]
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
before(:each) do
|
24
|
+
before do
|
27
25
|
allow(GovukSeedCrawler::Indexer).to receive(:new)
|
28
26
|
.with(root_url)
|
29
27
|
.and_return(mock_get_urls)
|
@@ -32,20 +30,20 @@ describe GovukSeedCrawler::Seeder do
|
|
32
30
|
.with(options).and_return(mock_amqp_client)
|
33
31
|
end
|
34
32
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
.with(exchange, topic, url)
|
40
|
-
end
|
41
|
-
|
42
|
-
subject
|
33
|
+
it "publishes urls to the queue" do
|
34
|
+
urls.each do |url|
|
35
|
+
expect(mock_amqp_client).to receive(:publish)
|
36
|
+
.with(exchange, topic, url)
|
43
37
|
end
|
44
38
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
39
|
+
expect { described_class.seed(root_url, options) }
|
40
|
+
.to output.to_stdout
|
41
|
+
end
|
42
|
+
|
43
|
+
it "closes the connection when done" do
|
44
|
+
allow(mock_amqp_client).to receive(:publish)
|
45
|
+
expect(mock_amqp_client).to receive(:close)
|
46
|
+
expect { described_class.seed(root_url, options) }
|
47
|
+
.to output.to_stdout
|
50
48
|
end
|
51
49
|
end
|
@@ -1,9 +1,8 @@
|
|
1
|
-
require
|
2
|
-
require 'spec_helper'
|
1
|
+
require "json"
|
3
2
|
|
4
3
|
describe GovukSeedCrawler do
|
5
4
|
def stub_sitemap
|
6
|
-
sitemap = %
|
5
|
+
sitemap = %(<?xml version="1.0" encoding="UTF-8"?>
|
7
6
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
8
7
|
<url>
|
9
8
|
<loc>https://www.gov.uk/</loc>
|
@@ -15,45 +14,46 @@ describe GovukSeedCrawler do
|
|
15
14
|
<loc>https://www.gov.uk/help</loc>
|
16
15
|
</url>
|
17
16
|
</urlset>
|
18
|
-
|
17
|
+
)
|
19
18
|
|
20
|
-
stub_request(:get, "https://www.gov.uk/sitemap.xml")
|
21
|
-
to_return(:
|
19
|
+
stub_request(:get, "https://www.gov.uk/sitemap.xml")
|
20
|
+
.to_return(status: 200, body: sitemap, headers: {})
|
22
21
|
end
|
23
22
|
|
23
|
+
subject { GovukSeedCrawler::Seeder.seed(site_root, options) }
|
24
|
+
|
24
25
|
let(:vhost) { "/" }
|
25
26
|
let(:exchange_name) { "govuk_seed_crawler_integration_exchange" }
|
26
27
|
let(:queue_name) { "govuk_seed_crawler_integration_queue" }
|
27
28
|
let(:topic) { "#" }
|
28
29
|
let(:site_root) { "https://www.gov.uk" }
|
29
|
-
let(:options)
|
30
|
-
|
31
|
-
:
|
32
|
-
:
|
33
|
-
:
|
34
|
-
:
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
subject { GovukSeedCrawler::Seeder::seed(site_root, options) }
|
39
|
-
|
40
|
-
before(:each) do
|
41
|
-
@exchange = rabbitmq_client.channel.topic(exchange_name, :durable => true)
|
42
|
-
@queue = rabbitmq_client.channel.queue(queue_name)
|
43
|
-
@queue.bind(@exchange, :routing_key => topic)
|
30
|
+
let(:options) do
|
31
|
+
{
|
32
|
+
host: ENV.fetch("AMQP_HOST", "localhost"),
|
33
|
+
user: ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
|
34
|
+
pass: ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
|
35
|
+
exchange: exchange_name,
|
36
|
+
topic: topic,
|
37
|
+
}
|
44
38
|
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
39
|
+
let(:rabbitmq_client) { GovukSeedCrawler::AmqpClient.new(options) }
|
40
|
+
let(:exchange) { rabbitmq_client.channel.topic(exchange_name, durable: true) }
|
41
|
+
let(:queue) { rabbitmq_client.channel.queue(queue_name) }
|
42
|
+
|
43
|
+
around do |example|
|
44
|
+
queue.bind(exchange, routing_key: topic)
|
45
|
+
example.run
|
46
|
+
queue.unbind(exchange)
|
47
|
+
queue.delete
|
48
|
+
exchange.delete
|
50
49
|
rabbitmq_client.close
|
51
50
|
end
|
52
51
|
|
53
52
|
it "publishes URLs it finds to an AMQP topic exchange" do
|
54
53
|
stub_sitemap
|
55
|
-
|
54
|
+
expect { GovukSeedCrawler::Seeder.seed(site_root, options) }
|
55
|
+
.to output.to_stdout
|
56
56
|
|
57
|
-
expect(
|
57
|
+
expect(queue.message_count).to be(3)
|
58
58
|
end
|
59
59
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "govuk_seed_crawler"
|
2
|
+
require "webmock/rspec"
|
3
|
+
|
4
|
+
WebMock.disable_net_connect!
|
3
5
|
|
4
6
|
RSpec.configure do |config|
|
5
7
|
config.order = :random
|
@@ -27,13 +29,9 @@ RSpec.configure do |config|
|
|
27
29
|
# a real object. This is generally recommended.
|
28
30
|
mocks.verify_partial_doubles = true
|
29
31
|
end
|
30
|
-
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
yield $stdout.string
|
37
|
-
ensure
|
38
|
-
$stdout = STDOUT
|
33
|
+
config.before do
|
34
|
+
# reset logger before each invocation so we can catch stdout
|
35
|
+
GovukSeedCrawler.logger = nil
|
36
|
+
end
|
39
37
|
end
|
metadata
CHANGED
@@ -1,208 +1,207 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govuk_seed_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
5
|
-
prerelease:
|
4
|
+
version: 3.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- GOV.UK developers
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2022-10-31 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bunny
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.3'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '3.0'
|
22
23
|
type: :runtime
|
23
24
|
prerelease: false
|
24
25
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '1.3'
|
30
|
-
-
|
31
|
-
name: crack
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - '='
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: 0.4.4
|
38
|
-
type: :runtime
|
39
|
-
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - '='
|
30
|
+
- - "<"
|
44
31
|
- !ruby/object:Gem::Version
|
45
|
-
version: 0
|
32
|
+
version: '3.0'
|
46
33
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
34
|
+
name: crack
|
48
35
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
36
|
requirements:
|
51
37
|
- - '='
|
52
38
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
39
|
+
version: 0.4.5
|
54
40
|
type: :runtime
|
55
41
|
prerelease: false
|
56
42
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
43
|
requirements:
|
59
44
|
- - '='
|
60
45
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
46
|
+
version: 0.4.5
|
62
47
|
- !ruby/object:Gem::Dependency
|
63
48
|
name: nokogiri
|
64
49
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
50
|
requirements:
|
67
|
-
- -
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '1.6'
|
54
|
+
- - "<"
|
68
55
|
- !ruby/object:Gem::Version
|
69
|
-
version: 1.
|
56
|
+
version: '1.14'
|
70
57
|
type: :runtime
|
71
58
|
prerelease: false
|
72
59
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
60
|
requirements:
|
75
|
-
- -
|
61
|
+
- - ">="
|
76
62
|
- !ruby/object:Gem::Version
|
77
|
-
version: 1.6
|
63
|
+
version: '1.6'
|
64
|
+
- - "<"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '1.14'
|
78
67
|
- !ruby/object:Gem::Dependency
|
79
68
|
name: public_suffix
|
80
69
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
70
|
requirements:
|
83
|
-
- -
|
71
|
+
- - ">="
|
84
72
|
- !ruby/object:Gem::Version
|
85
73
|
version: 1.4.6
|
74
|
+
- - "<"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 5.1.0
|
86
77
|
type: :runtime
|
87
78
|
prerelease: false
|
88
79
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
80
|
requirements:
|
91
|
-
- -
|
81
|
+
- - ">="
|
92
82
|
- !ruby/object:Gem::Version
|
93
83
|
version: 1.4.6
|
84
|
+
- - "<"
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 5.1.0
|
94
87
|
- !ruby/object:Gem::Dependency
|
95
88
|
name: sitemap-parser
|
96
89
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
90
|
requirements:
|
99
|
-
- -
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0.3'
|
94
|
+
- - "<"
|
100
95
|
- !ruby/object:Gem::Version
|
101
|
-
version: 0.
|
96
|
+
version: '0.6'
|
102
97
|
type: :runtime
|
103
98
|
prerelease: false
|
104
99
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
100
|
requirements:
|
107
|
-
- -
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.3'
|
104
|
+
- - "<"
|
108
105
|
- !ruby/object:Gem::Version
|
109
|
-
version: 0.
|
106
|
+
version: '0.6'
|
110
107
|
- !ruby/object:Gem::Dependency
|
111
108
|
name: slop
|
112
109
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
110
|
requirements:
|
115
|
-
- - ~>
|
111
|
+
- - "~>"
|
116
112
|
- !ruby/object:Gem::Version
|
117
113
|
version: 3.6.0
|
118
114
|
type: :runtime
|
119
115
|
prerelease: false
|
120
116
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
117
|
requirements:
|
123
|
-
- - ~>
|
118
|
+
- - "~>"
|
124
119
|
- !ruby/object:Gem::Version
|
125
120
|
version: 3.6.0
|
126
121
|
- !ruby/object:Gem::Dependency
|
127
122
|
name: pry
|
128
123
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
124
|
requirements:
|
131
|
-
- -
|
125
|
+
- - ">="
|
132
126
|
- !ruby/object:Gem::Version
|
133
127
|
version: '0'
|
134
128
|
type: :development
|
135
129
|
prerelease: false
|
136
130
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
131
|
requirements:
|
139
|
-
- -
|
132
|
+
- - ">="
|
140
133
|
- !ruby/object:Gem::Version
|
141
134
|
version: '0'
|
142
135
|
- !ruby/object:Gem::Dependency
|
143
136
|
name: rake
|
144
137
|
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
138
|
requirements:
|
147
|
-
- -
|
139
|
+
- - ">="
|
148
140
|
- !ruby/object:Gem::Version
|
149
|
-
version: '0
|
141
|
+
version: '0'
|
150
142
|
type: :development
|
151
143
|
prerelease: false
|
152
144
|
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
145
|
requirements:
|
155
|
-
- -
|
146
|
+
- - ">="
|
156
147
|
- !ruby/object:Gem::Version
|
157
|
-
version: '0
|
148
|
+
version: '0'
|
158
149
|
- !ruby/object:Gem::Dependency
|
159
150
|
name: rspec
|
160
151
|
requirement: !ruby/object:Gem::Requirement
|
161
|
-
none: false
|
162
152
|
requirements:
|
163
|
-
- - ~>
|
153
|
+
- - "~>"
|
164
154
|
- !ruby/object:Gem::Version
|
165
155
|
version: '3.0'
|
166
156
|
type: :development
|
167
157
|
prerelease: false
|
168
158
|
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
159
|
requirements:
|
171
|
-
- - ~>
|
160
|
+
- - "~>"
|
172
161
|
- !ruby/object:Gem::Version
|
173
162
|
version: '3.0'
|
174
163
|
- !ruby/object:Gem::Dependency
|
175
164
|
name: rspec-mocks
|
176
165
|
requirement: !ruby/object:Gem::Requirement
|
177
|
-
none: false
|
178
166
|
requirements:
|
179
|
-
- - ~>
|
167
|
+
- - "~>"
|
180
168
|
- !ruby/object:Gem::Version
|
181
169
|
version: '3.0'
|
182
170
|
type: :development
|
183
171
|
prerelease: false
|
184
172
|
version_requirements: !ruby/object:Gem::Requirement
|
185
|
-
none: false
|
186
173
|
requirements:
|
187
|
-
- - ~>
|
174
|
+
- - "~>"
|
188
175
|
- !ruby/object:Gem::Version
|
189
176
|
version: '3.0'
|
177
|
+
- !ruby/object:Gem::Dependency
|
178
|
+
name: rubocop-govuk
|
179
|
+
requirement: !ruby/object:Gem::Requirement
|
180
|
+
requirements:
|
181
|
+
- - '='
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
version: 4.8.0
|
184
|
+
type: :development
|
185
|
+
prerelease: false
|
186
|
+
version_requirements: !ruby/object:Gem::Requirement
|
187
|
+
requirements:
|
188
|
+
- - '='
|
189
|
+
- !ruby/object:Gem::Version
|
190
|
+
version: 4.8.0
|
190
191
|
- !ruby/object:Gem::Dependency
|
191
192
|
name: webmock
|
192
193
|
requirement: !ruby/object:Gem::Requirement
|
193
|
-
none: false
|
194
194
|
requirements:
|
195
|
-
- - ~>
|
195
|
+
- - "~>"
|
196
196
|
- !ruby/object:Gem::Version
|
197
|
-
version:
|
197
|
+
version: '3.18'
|
198
198
|
type: :development
|
199
199
|
prerelease: false
|
200
200
|
version_requirements: !ruby/object:Gem::Requirement
|
201
|
-
none: false
|
202
201
|
requirements:
|
203
|
-
- - ~>
|
202
|
+
- - "~>"
|
204
203
|
- !ruby/object:Gem::Version
|
205
|
-
version:
|
204
|
+
version: '3.18'
|
206
205
|
description:
|
207
206
|
email:
|
208
207
|
- govuk-dev@digital.cabinet-office.gov.uk
|
@@ -211,12 +210,15 @@ executables:
|
|
211
210
|
extensions: []
|
212
211
|
extra_rdoc_files: []
|
213
212
|
files:
|
214
|
-
- .
|
215
|
-
- .
|
216
|
-
- .
|
213
|
+
- ".github/dependabot.yml"
|
214
|
+
- ".github/workflows/ci.yml"
|
215
|
+
- ".gitignore"
|
216
|
+
- ".rspec"
|
217
|
+
- ".rubocop.yml"
|
218
|
+
- ".ruby-version"
|
219
|
+
- CHANGELOG.md
|
217
220
|
- Gemfile
|
218
|
-
-
|
219
|
-
- LICENSE.txt
|
221
|
+
- LICENCE
|
220
222
|
- README.md
|
221
223
|
- Rakefile
|
222
224
|
- bin/seed-crawler
|
@@ -238,27 +240,25 @@ files:
|
|
238
240
|
homepage: https://github.com/alphagov/govuk_seed_crawler
|
239
241
|
licenses:
|
240
242
|
- MIT
|
243
|
+
metadata: {}
|
241
244
|
post_install_message:
|
242
245
|
rdoc_options: []
|
243
246
|
require_paths:
|
244
247
|
- lib
|
245
248
|
required_ruby_version: !ruby/object:Gem::Requirement
|
246
|
-
none: false
|
247
249
|
requirements:
|
248
|
-
- -
|
250
|
+
- - ">="
|
249
251
|
- !ruby/object:Gem::Version
|
250
|
-
version: '
|
252
|
+
version: '2.7'
|
251
253
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
252
|
-
none: false
|
253
254
|
requirements:
|
254
|
-
- -
|
255
|
+
- - ">="
|
255
256
|
- !ruby/object:Gem::Version
|
256
257
|
version: '0'
|
257
258
|
requirements: []
|
258
|
-
|
259
|
-
rubygems_version: 1.8.23.2
|
259
|
+
rubygems_version: 3.3.24
|
260
260
|
signing_key:
|
261
|
-
specification_version:
|
261
|
+
specification_version: 4
|
262
262
|
summary: Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ
|
263
263
|
exchange.
|
264
264
|
test_files:
|
data/Jenkinsfile
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
#!/usr/bin/env groovy
|
2
|
-
|
3
|
-
library("govuk")
|
4
|
-
|
5
|
-
node {
|
6
|
-
try {
|
7
|
-
// This doesn't use the buildProject as this project doesn't conform to
|
8
|
-
// required norms (e.g. running in Ruby 1.9, non-standard tests).
|
9
|
-
|
10
|
-
repoName = JOB_NAME.split('/')[0]
|
11
|
-
|
12
|
-
stage("Checkout") {
|
13
|
-
govuk.checkoutFromGitHubWithSSH(repoName)
|
14
|
-
}
|
15
|
-
|
16
|
-
stage("Clean up workspace") {
|
17
|
-
govuk.cleanupGit()
|
18
|
-
}
|
19
|
-
|
20
|
-
stage('Configure environment') {
|
21
|
-
govuk.setEnvar('RBENV_VERSION', '1.9.3-p550')
|
22
|
-
}
|
23
|
-
|
24
|
-
stage('Bundle install') {
|
25
|
-
govuk.bundleGem()
|
26
|
-
}
|
27
|
-
|
28
|
-
stage('Spec tests') {
|
29
|
-
govuk.runRakeTask('spec')
|
30
|
-
}
|
31
|
-
|
32
|
-
stage('Integration tests') {
|
33
|
-
govuk.runRakeTask('integration')
|
34
|
-
}
|
35
|
-
|
36
|
-
if (env.BRANCH_NAME == 'master') {
|
37
|
-
stage('Publish Gem to Rubygems') {
|
38
|
-
govuk.publishGem(repoName, repoName, 'master')
|
39
|
-
}
|
40
|
-
}
|
41
|
-
} catch (e) {
|
42
|
-
currentBuild.result = "FAILED"
|
43
|
-
step([$class: 'Mailer',
|
44
|
-
notifyEveryUnstableBuild: true,
|
45
|
-
recipients: 'govuk-ci-notifications@digital.cabinet-office.gov.uk',
|
46
|
-
sendToIndividuals: true])
|
47
|
-
throw e
|
48
|
-
}
|
49
|
-
}
|