govuk_seed_crawler 3.0.0 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2ee27868f4df61a72b764044b8d04ad6f6b252a63cd1f365d1056a7d331a85d1
4
- data.tar.gz: 514ee036b88544be5a7935dcc2103e07aa7fe82c2acc0841109d15e572a9ebf9
3
+ metadata.gz: b0d80cc9a1bf29784700e23b1f8c883fc6012a2e7a47776cc3af04ca87f253e7
4
+ data.tar.gz: ca2d04361ff8e9d9b3cf34f0432eb786b026ffcfd035b1a5faeed6c32801880a
5
5
  SHA512:
6
- metadata.gz: 3d005f87f519187b619e1dbbeabcc441d0ccb0daf652db2d6b515f7abf2de49e9090f457e208afc5128d68db65bf618dcbf5bc74e54bb31c16bf6dbf405495c1
7
- data.tar.gz: ec43d4205fd5714be7ab39669a9397371792802b3f50fa97d580c7327db018afd8409988b54c5876aba603456cd3019c454ce41736dad054e5f58d7cc79a80ed
6
+ metadata.gz: 895569f56489bbfce199620c211742b226b56d0cc8a03d17451b8a382f2743ba1d7c7821ff9fe75925d411d9b99a595981cf72fb5334907f59f33bf0a24ec000
7
+ data.tar.gz: 8ea165b2ed3d62a63a5a331b03e14e2fe0dd68713bf7732d28bd2b5cfc4e20a6b5a4932c503fc0514d1a0ea1c6d58343afe864612d8190a2521638d62eea0e83
@@ -0,0 +1,10 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: /
5
+ schedule:
6
+ interval: daily
7
+ - package-ecosystem: "github-actions"
8
+ directory: /
9
+ schedule:
10
+ interval: daily
@@ -0,0 +1,47 @@
1
+ on: [push, pull_request]
2
+
3
+ jobs:
4
+ # This matrix job runs the test suite against multiple Ruby versions
5
+ test_matrix:
6
+ strategy:
7
+ fail-fast: false
8
+ matrix:
9
+ # Due to https://github.com/actions/runner/issues/849, we have to use quotes for '3.0'
10
+ ruby: [ 2.7, '3.0', 3.1 ]
11
+ runs-on: ubuntu-latest
12
+ services:
13
+ rabbitmq:
14
+ image: rabbitmq
15
+ env:
16
+ RABBITMQ_DEFAULT_USER: rabbitmq
17
+ RABBITMQ_DEFAULT_PASS: rabbitmq
18
+ ports:
19
+ - 5672:5672
20
+ steps:
21
+ - uses: actions/checkout@v3
22
+ - uses: ruby/setup-ruby@v1
23
+ with:
24
+ ruby-version: ${{ matrix.ruby }}
25
+ bundler-cache: true
26
+ - run: bundle exec rake
27
+ env:
28
+ AMQP_USER: rabbitmq
29
+ AMQP_PASS: rabbitmq
30
+
31
+ # Branch protection rules cannot directly depend on status checks from matrix jobs.
32
+ # So instead we define `test` as a dummy job which only runs after the preceding `test_matrix` checks have passed.
33
+ # Solution inspired by: https://github.community/t/status-check-for-a-matrix-jobs/127354/3
34
+ test:
35
+ needs: test_matrix
36
+ runs-on: ubuntu-latest
37
+ steps:
38
+ - run: echo "All matrix tests have passed 🚀"
39
+
40
+ publish:
41
+ needs: test
42
+ if: ${{ github.ref == 'refs/heads/main' }}
43
+ permissions:
44
+ contents: write
45
+ uses: alphagov/govuk-infrastructure/.github/workflows/publish-rubygem.yaml@main
46
+ secrets:
47
+ GEM_HOST_API_KEY: ${{ secrets.ALPHAGOV_RUBYGEMS_API_KEY }}
data/.rubocop.yml ADDED
@@ -0,0 +1,19 @@
1
+ inherit_gem:
2
+ rubocop-govuk:
3
+ - config/default.yml
4
+ - config/rake.yml
5
+ - config/rspec.yml
6
+
7
+ inherit_mode:
8
+ merge:
9
+ - Exclude
10
+
11
+ # **************************************************************
12
+ # TRY NOT TO ADD OVERRIDES IN THIS FILE
13
+ #
14
+ # This repo is configured to follow the RuboCop GOV.UK styleguide.
15
+ # Any rules you override here will cause this repo to diverge from
16
+ # the way we write code in all other GOV.UK repos.
17
+ #
18
+ # See https://github.com/alphagov/rubocop-govuk/blob/main/CONTRIBUTING.md
19
+ # **************************************************************
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.3
1
+ 2.7.6
data/CHANGELOG.md ADDED
@@ -0,0 +1,11 @@
1
+ # Changelog
2
+
3
+ - We use the [GOV.UK versioning guidelines](https://docs.publishing.service.gov.uk/manual/publishing-a-ruby-gem.html#versioning).
4
+ - Mark breaking changes with `BREAKING:`. Be sure to include instructions on how applications should be upgraded.
5
+ - Don't include changes that are purely internal. The CHANGELOG should be a
6
+ useful summary for people upgrading their application, not a replication
7
+ of the commit log.
8
+
9
+ ## Unreleased
10
+
11
+ - Drop support for Ruby < 2.7
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source 'https://rubygems.org'
1
+ source "https://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in govuk_seed_crawler.gemspec
4
4
  gemspec
@@ -1,6 +1,6 @@
1
- (c) 2014 Crown copyright
1
+ The MIT License (MIT)
2
2
 
3
- MIT License
3
+ Copyright (C) 2014 Crown Copyright (Government Digital Service)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining
6
6
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -38,3 +38,7 @@ bundle exec seed-crawler --help
38
38
  3. Commit your changes (`git commit -am 'Add some feature'`)
39
39
  4. Push to the branch (`git push origin my-new-feature`)
40
40
  5. Create new Pull Request
41
+
42
+ ## Licence
43
+
44
+ [MIT License](LICENCE)
data/Rakefile CHANGED
@@ -1,11 +1,7 @@
1
- require 'rspec/core/rake_task'
1
+ require "rspec/core/rake_task"
2
+ require "rubocop/rake_task"
2
3
 
3
- RSpec::Core::RakeTask.new(:spec) do |task|
4
- task.pattern = FileList['spec/govuk_seed_crawler/**/*_spec.rb']
5
- end
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ RuboCop::RakeTask.new
6
6
 
7
- RSpec::Core::RakeTask.new(:integration) do |task|
8
- task.pattern = FileList['spec/integration/**/*_spec.rb']
9
- end
10
-
11
- task :default => :spec
7
+ task default: %i[rubocop spec]
@@ -1,36 +1,36 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
1
+ lib = File.expand_path("lib", __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'govuk_seed_crawler/version'
3
+ require "govuk_seed_crawler/version"
5
4
 
6
5
  Gem::Specification.new do |spec|
7
6
  spec.name = "govuk_seed_crawler"
8
7
  spec.version = GovukSeedCrawler::VERSION
9
- spec.authors = ['GOV.UK developers']
8
+ spec.authors = ["GOV.UK developers"]
10
9
  spec.email = ["govuk-dev@digital.cabinet-office.gov.uk"]
11
- spec.summary = %q{Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ exchange.}
10
+ spec.summary = "Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ exchange."
12
11
  spec.homepage = "https://github.com/alphagov/govuk_seed_crawler"
13
12
  spec.license = "MIT"
14
13
 
15
- spec.required_ruby_version = "~> 2.6"
14
+ spec.required_ruby_version = ">= 2.7"
16
15
 
17
16
  spec.files = `git ls-files -z`.split("\x0")
18
17
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
- spec.require_paths = ["lib"]
19
+ spec.require_paths = %w[lib]
21
20
 
22
- spec.add_runtime_dependency "bunny", "~> 1.3"
23
- spec.add_runtime_dependency "crack", "0.4.4"
24
- spec.add_runtime_dependency "nokogiri", "~> 1.6.0"
21
+ spec.add_runtime_dependency "bunny", ">= 1.3", "< 3.0"
22
+ spec.add_runtime_dependency "crack", "0.4.5"
23
+ spec.add_runtime_dependency "nokogiri", ">= 1.6", "< 1.14"
25
24
  # Something, somewhere, sometimes requires public_suffix.
26
25
  # public_suffix > 1.5 requires ruby > 2.
27
- spec.add_runtime_dependency "public_suffix", "~> 1.4.6"
28
- spec.add_runtime_dependency "sitemap-parser", "~> 0.3.0"
26
+ spec.add_runtime_dependency "public_suffix", ">= 1.4.6", "< 5.1.0"
27
+ spec.add_runtime_dependency "sitemap-parser", ">= 0.3", "< 0.6"
29
28
  spec.add_runtime_dependency "slop", "~> 3.6.0"
30
29
 
31
30
  spec.add_development_dependency "pry"
32
- spec.add_development_dependency "rake", "~> 0.9"
31
+ spec.add_development_dependency "rake"
33
32
  spec.add_development_dependency "rspec", "~> 3.0"
34
33
  spec.add_development_dependency "rspec-mocks", "~> 3.0"
35
- spec.add_development_dependency "webmock", "~> 1.18.0"
34
+ spec.add_development_dependency "rubocop-govuk", "4.8.0"
35
+ spec.add_development_dependency "webmock", "~> 3.18"
36
36
  end
@@ -1,4 +1,4 @@
1
- require 'bunny'
1
+ require "bunny"
2
2
 
3
3
  module GovukSeedCrawler
4
4
  class AmqpClient
@@ -21,8 +21,8 @@ module GovukSeedCrawler
21
21
 
22
22
  GovukSeedCrawler.logger.debug("Publishing '#{body}' to topic '#{topic}'")
23
23
 
24
- @channel.topic(exchange, :durable => true)
25
- .publish(body, :routing_key => topic)
24
+ @channel.topic(exchange, durable: true)
25
+ .publish(body, routing_key: topic)
26
26
  end
27
27
  end
28
28
  end
@@ -1,4 +1,4 @@
1
- require 'slop'
1
+ require "slop"
2
2
 
3
3
  module GovukSeedCrawler
4
4
  class CLIException < StandardError
@@ -12,17 +12,17 @@ module GovukSeedCrawler
12
12
 
13
13
  class CLIParser
14
14
  DEFAULTS = {
15
- :exchange => "govuk_crawler_exchange",
16
- :help => nil,
17
- :host => "localhost",
18
- :password => "guest",
19
- :port => "5672",
20
- :quiet => false,
21
- :topic => "#",
22
- :username => "guest",
23
- :verbose => false,
24
- :version => nil,
25
- :vhost => "/"
15
+ exchange: "govuk_crawler_exchange",
16
+ help: nil,
17
+ host: "localhost",
18
+ password: "guest",
19
+ port: "5672",
20
+ quiet: false,
21
+ topic: "#",
22
+ username: "guest",
23
+ verbose: false,
24
+ version: nil,
25
+ vhost: "/",
26
26
  }.freeze
27
27
 
28
28
  ENV_AMQP_PASS_KEY = "GOVUK_CRAWLER_AMQP_PASS".freeze
@@ -32,19 +32,19 @@ module GovukSeedCrawler
32
32
  end
33
33
 
34
34
  def options
35
- Slop.parse!(@argv_array, :help => true) do
36
- banner <<-EOS
37
- Usage: #{$PROGRAM_NAME} site_root [options]
35
+ Slop.parse!(@argv_array, help: true) do
36
+ banner <<~HELP
37
+ Usage: #{$PROGRAM_NAME} site_root [options]
38
38
 
39
- Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
40
- to consume:
39
+ Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
40
+ to consume:
41
41
 
42
- https://github.com/alphagov/govuk_crawler_worker
42
+ https://github.com/alphagov/govuk_crawler_worker
43
43
 
44
- The AMQP password can also be set as an environment variable and will be read from
45
- `#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
46
- are set, the environment variable will take higher precedent.
47
- EOS
44
+ The AMQP password can also be set as an environment variable and will be read from
45
+ `#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
46
+ are set, the environment variable will take higher precedent.
47
+ HELP
48
48
 
49
49
  on :version, "Display version and exit" do
50
50
  puts "Version: #{GovukSeedCrawler::VERSION}"
@@ -75,7 +75,7 @@ are set, the environment variable will take higher precedent.
75
75
  options_hash = opts.to_hash
76
76
  options_hash[:password] = ENV[ENV_AMQP_PASS_KEY] unless ENV[ENV_AMQP_PASS_KEY].nil?
77
77
 
78
- return options_hash, @argv_array.first
78
+ [options_hash, @argv_array.first]
79
79
  end
80
80
  end
81
81
  end
@@ -13,10 +13,10 @@ module GovukSeedCrawler
13
13
  end
14
14
 
15
15
  def run
16
- Seeder::seed(@site_root, @options)
16
+ Seeder.seed(@site_root, @options)
17
17
  end
18
18
 
19
- private
19
+ private
20
20
 
21
21
  def set_logging_level(cli_options)
22
22
  if cli_options[:verbose]
@@ -1,4 +1,4 @@
1
- require 'sitemap-parser'
1
+ require "sitemap-parser"
2
2
 
3
3
  module GovukSeedCrawler
4
4
  class Indexer
@@ -9,7 +9,7 @@ module GovukSeedCrawler
9
9
 
10
10
  GovukSeedCrawler.logger.info("Retrieving list of URLs for #{site_root}")
11
11
 
12
- sitemap = SitemapParser.new("#{site_root}/sitemap.xml", {recurse: true})
12
+ sitemap = SitemapParser.new("#{site_root}/sitemap.xml", { recurse: true })
13
13
  @urls = sitemap.to_a
14
14
 
15
15
  GovukSeedCrawler.logger.info("Found #{@urls.count} URLs")
@@ -1,3 +1,3 @@
1
1
  module GovukSeedCrawler
2
- VERSION = "3.0.0"
2
+ VERSION = "3.1.0".freeze
3
3
  end
@@ -1,17 +1,21 @@
1
- require 'govuk_seed_crawler/amqp_client'
2
- require 'govuk_seed_crawler/cli_parser'
3
- require 'govuk_seed_crawler/cli_runner'
4
- require 'govuk_seed_crawler/indexer'
5
- require 'govuk_seed_crawler/seeder'
6
- require 'govuk_seed_crawler/version'
1
+ require "govuk_seed_crawler/amqp_client"
2
+ require "govuk_seed_crawler/cli_parser"
3
+ require "govuk_seed_crawler/cli_runner"
4
+ require "govuk_seed_crawler/indexer"
5
+ require "govuk_seed_crawler/seeder"
6
+ require "govuk_seed_crawler/version"
7
7
 
8
8
  module GovukSeedCrawler
9
- def self.logger
10
- unless @logger
11
- @logger = Logger.new(STDOUT)
12
- @logger.level = Logger::INFO
13
- end
9
+ class << self
10
+ attr_writer :logger
11
+
12
+ def logger
13
+ unless @logger
14
+ @logger = Logger.new($stdout)
15
+ @logger.level = Logger::INFO
16
+ end
14
17
 
15
- @logger
18
+ @logger
19
+ end
16
20
  end
17
21
  end
@@ -1,55 +1,54 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::AmqpClient do
4
2
  let(:exchange) { "govuk_seed_crawler_spec_exchange" }
5
- let(:options) {{
6
- :host => ENV.fetch("AMQP_HOST", "localhost"),
7
- :user => ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
8
- :pass => ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
9
- }}
10
- subject { GovukSeedCrawler::AmqpClient.new(options) }
3
+ let(:options) do
4
+ {
5
+ host: ENV.fetch("AMQP_HOST", "localhost"),
6
+ user: ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
7
+ pass: ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
8
+ }
9
+ end
11
10
 
12
11
  it "responds to #channel" do
13
- expect(subject).to respond_to(:channel)
12
+ expect(described_class.new(options)).to respond_to(:channel)
14
13
  end
15
14
 
16
15
  it "responds to #close" do
17
- expect(subject).to respond_to(:close)
16
+ expect(described_class.new(options)).to respond_to(:close)
18
17
  end
19
18
 
20
19
  it "closes the connection to the AMQP server" do
21
- mock_bunny = double(:mock_bunny,
22
- :start => true, :create_channel => true, :close => true)
20
+ mock_bunny = instance_double(Bunny::Session,
21
+ start: true,
22
+ create_channel: true,
23
+ close: true)
23
24
  allow(Bunny).to receive(:new).and_return(mock_bunny)
24
25
  expect(mock_bunny).to receive(:close).once
25
26
 
26
- subject.close
27
+ described_class.new(options).close
27
28
  end
28
29
 
29
- context "#publish" do
30
- context "error handling" do
31
- it "raises an exception if exchange is nil" do
32
- expect {
33
- subject.publish(nil, "#", "some body")
34
- }.to raise_exception(RuntimeError, "Exchange cannot be nil")
35
- end
36
-
37
- it "raises an exception if topic is nil" do
38
- expect {
39
- subject.publish(exchange, nil, "some body")
40
- }.to raise_exception(RuntimeError, "Topic cannot be nil")
41
- end
42
-
43
- it "raises an exception if body is nil" do
44
- expect {
45
- subject.publish(exchange, "#", nil)
46
- }.to raise_exception(RuntimeError, "Message body cannot be nil")
47
- end
30
+ describe "#publish" do
31
+ it "raises an exception if exchange is nil" do
32
+ expect {
33
+ described_class.new(options).publish(nil, "#", "some body")
34
+ }.to raise_exception(RuntimeError, "Exchange cannot be nil")
35
+ end
36
+
37
+ it "raises an exception if topic is nil" do
38
+ expect {
39
+ described_class.new(options).publish(exchange, nil, "some body")
40
+ }.to raise_exception(RuntimeError, "Topic cannot be nil")
41
+ end
42
+
43
+ it "raises an exception if body is nil" do
44
+ expect {
45
+ described_class.new(options).publish(exchange, "#", nil)
46
+ }.to raise_exception(RuntimeError, "Message body cannot be nil")
48
47
  end
49
48
 
50
49
  it "allows publishing against an exchange" do
51
- expect(subject.publish(exchange, "#", "some body"))
52
- .to_not be_nil
50
+ expect(described_class.new(options).publish(exchange, "#", "some body"))
51
+ .not_to be_nil
53
52
  end
54
53
  end
55
54
  end
@@ -1,28 +1,26 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::CLIParser do
4
2
  it "requires the site_root to be provided" do
5
3
  expect {
6
- GovukSeedCrawler::CLIParser.new([]).parse
4
+ described_class.new([]).parse
7
5
  }.to raise_exception(GovukSeedCrawler::CLIException, "site_root must be provided")
8
6
  end
9
7
 
10
8
  it "provides the defaults when just given the site_root" do
11
- options, site_root = GovukSeedCrawler::CLIParser.new(["https://www.example.com"]).parse
9
+ options, site_root = described_class.new(["https://www.example.com"]).parse
12
10
 
13
11
  expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS)
14
12
  expect(site_root).to eq("https://www.example.com")
15
13
  end
16
14
 
17
- it "should tell us when we've given too many arguments" do
15
+ it "tells us when we've given too many arguments" do
18
16
  expect {
19
- GovukSeedCrawler::CLIParser.new(["a", "b"]).parse
17
+ described_class.new(%w[a b]).parse
20
18
  }.to raise_exception(GovukSeedCrawler::CLIException, "too many arguments provided")
21
19
  end
22
20
 
23
- it "should nest the help message in with any CLIExceptions we raise" do
21
+ it "nests the help message in with any CLIExceptions we raise" do
24
22
  expect {
25
- GovukSeedCrawler::CLIParser.new(["a", "b"]).parse
23
+ described_class.new(%w[a b]).parse
26
24
  }.to raise_exception(GovukSeedCrawler::CLIException) { |e|
27
25
  expect(e.help).to include("Usage: ")
28
26
  }
@@ -31,48 +29,36 @@ describe GovukSeedCrawler::CLIParser do
31
29
  describe "catching STDOUT" do
32
30
  it "shows the help banner when provided -h" do
33
31
  # Get a valid options response as help closes early with SystemExit.
34
- options = GovukSeedCrawler::CLIParser.new(["http://www.foo.com/"]).options
35
-
36
- temp_stdout do |caught_stdout|
37
- expect {
38
- _, _ = GovukSeedCrawler::CLIParser.new(["-h"]).parse
39
- }.to raise_exception(SystemExit) { |e|
40
- expect(e.status).to eq(0)
41
- }
32
+ options = described_class.new(["http://www.foo.com/"]).options
42
33
 
43
- expect(caught_stdout.strip).to eq(options.help)
44
- end
34
+ expect { described_class.new(["-h"]).parse }
35
+ .to output("#{options.help}\n").to_stdout
36
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
45
37
  end
46
38
 
47
- it "should show the version number and exit" do
48
- temp_stdout do |caught_stdout|
49
- expect {
50
- _, _ = GovukSeedCrawler::CLIParser.new(["--version"]).parse
51
- }.to raise_exception(SystemExit) { |e|
52
- expect(e.status).to eq(0)
53
- }
54
-
55
- expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
56
- end
39
+ it "shows the version number and exit" do
40
+ expect { described_class.new(["--version"]).parse }
41
+ .to output("Version: #{GovukSeedCrawler::VERSION}\n").to_stdout
42
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
57
43
  end
58
44
  end
59
45
 
60
46
  describe "passing in valid arguments" do
61
- let(:arguments) {
47
+ let(:arguments) do
62
48
  [
63
- "https://www.override.com/",
64
- "--host rabbitmq.some.custom.vhost",
65
- "--port 4567",
66
- "--username foo",
67
- "--password bar",
68
- "--exchange some_custom_exchange",
69
- "--topic some_custom_topic",
70
- "--vhost a_vhost",
71
- "--verbose"
49
+ "https://www.override.com/",
50
+ "--host rabbitmq.some.custom.vhost",
51
+ "--port 4567",
52
+ "--username foo",
53
+ "--password bar",
54
+ "--exchange some_custom_exchange",
55
+ "--topic some_custom_topic",
56
+ "--vhost a_vhost",
57
+ "--verbose",
72
58
  ].join(" ").split(" ")
73
- }
59
+ end
74
60
 
75
- it "should override all of the default arguments that we're providing" do
61
+ it "overrides all of the default arguments that we're providing" do
76
62
  overriden = {
77
63
  host: "rabbitmq.some.custom.vhost",
78
64
  port: "4567",
@@ -84,14 +70,14 @@ describe GovukSeedCrawler::CLIParser do
84
70
  quiet: false,
85
71
  verbose: true,
86
72
  version: nil,
87
- vhost: "a_vhost"
73
+ vhost: "a_vhost",
88
74
  }
89
75
 
90
- expect(GovukSeedCrawler::CLIParser.new(arguments).parse.first).to eq(overriden)
76
+ expect(described_class.new(arguments).parse.first).to eq(overriden)
91
77
  end
92
78
 
93
- it "should set the --quiet value" do
94
- options, _ = GovukSeedCrawler::CLIParser.new(["foo.com", "--quiet"]).parse
79
+ it "sets the --quiet value" do
80
+ options, = described_class.new(["foo.com", "--quiet"]).parse
95
81
  expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS.merge(quiet: true))
96
82
  end
97
83
 
@@ -107,14 +93,14 @@ describe GovukSeedCrawler::CLIParser do
107
93
  it "sets the password if set using an environment variable" do
108
94
  set_amqp_pass("foobar")
109
95
 
110
- expect(GovukSeedCrawler::CLIParser.new(["http://www.example.com"]).parse.first)
96
+ expect(described_class.new(["http://www.example.com"]).parse.first)
111
97
  .to include(password: "foobar")
112
98
  end
113
99
 
114
100
  it "picks the environment variable over the parameter if both are set" do
115
101
  set_amqp_pass("bar")
116
102
 
117
- expect(GovukSeedCrawler::CLIParser.new(["http://www.example.com", "--password", "foo"]).parse.first)
103
+ expect(described_class.new(["http://www.example.com", "--password", "foo"]).parse.first)
118
104
  .to include(password: "bar")
119
105
  end
120
106
  end
@@ -1,45 +1,25 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::CLIRunner do
4
2
  describe "printing the version" do
5
- it "should not try to connect to an AMQP server" do
3
+ it "does not try to connect to an AMQP server" do
6
4
  expect(Bunny).not_to receive(:new)
7
5
 
8
- temp_stdout do |caught_stdout|
9
- expect {
10
- GovukSeedCrawler::CLIRunner.new(["--version"]).run
11
- }.to raise_exception(SystemExit) { |exit|
12
- expect(exit.status).to eq(0)
13
- }
14
-
15
- expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
16
- end
6
+ expect { described_class.new(["--version"]).run }
7
+ .to output("Version: #{GovukSeedCrawler::VERSION}\n").to_stdout
8
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
17
9
  end
18
10
  end
19
11
 
20
12
  describe "catching any CLIException objects and exiting with a status 1" do
21
13
  it "prints to STDOUT for too many arguments" do
22
- temp_stdout do |caught_stdout|
23
- expect {
24
- GovukSeedCrawler::CLIRunner.new(["a", "b"])
25
- }.to raise_exception(SystemExit) { |exit|
26
- expect(exit.status).to eq(2)
27
- }
28
-
29
- expect(caught_stdout.strip).to include("too many arguments provided")
30
- end
14
+ expect { described_class.new(%w[a b]).run }
15
+ .to output(/\Atoo many arguments provided/).to_stdout
16
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(2) }
31
17
  end
32
18
 
33
19
  it "prints to STDOUT when site_root not set" do
34
- temp_stdout do |caught_stdout|
35
- expect {
36
- GovukSeedCrawler::CLIRunner.new(["--verbose"])
37
- }.to raise_exception(SystemExit) { |exit|
38
- expect(exit.status).to eq(2)
39
- }
40
-
41
- expect(caught_stdout.strip).to include("site_root must be provided")
42
- end
20
+ expect { described_class.new(["--verbose"]).run }
21
+ .to output(/\Asite_root must be provided/).to_stdout
22
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(2) }
43
23
  end
44
24
  end
45
25
 
@@ -49,26 +29,26 @@ describe GovukSeedCrawler::CLIRunner do
49
29
  end
50
30
 
51
31
  it "defaults to INFO" do
52
- GovukSeedCrawler::CLIRunner.new(["http://www.example.com"])
32
+ described_class.new(["http://www.example.com"])
53
33
  expect(GovukSeedCrawler.logger.level).to eq(Logger::INFO)
54
34
  end
55
35
 
56
36
  it "sets to ERROR for quite" do
57
- GovukSeedCrawler::CLIRunner.new(["http://www.example.com", "--quiet"])
37
+ described_class.new(["http://www.example.com", "--quiet"])
58
38
  expect(GovukSeedCrawler.logger.level).to eq(Logger::ERROR)
59
39
  end
60
40
 
61
41
  it "sets to DEBUG for verbose" do
62
- GovukSeedCrawler::CLIRunner.new(["http://www.example.com", "--verbose"])
42
+ described_class.new(["http://www.example.com", "--verbose"])
63
43
  expect(GovukSeedCrawler.logger.level).to eq(Logger::DEBUG)
64
44
  end
65
45
  end
66
46
 
67
47
  describe "#run" do
68
48
  it "passes all options through to seed" do
69
- expect(GovukSeedCrawler::Seeder).to receive(:seed).
70
- with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
71
- GovukSeedCrawler::CLIRunner.new(["http://www.example.com"]).run
49
+ expect(GovukSeedCrawler::Seeder).to receive(:seed)
50
+ .with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
51
+ described_class.new(["http://www.example.com"]).run
72
52
  end
73
53
  end
74
54
  end
@@ -1,21 +1,18 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::Indexer do
4
- subject { GovukSeedCrawler::Indexer.new('https://example.com') }
5
-
6
- context "under normal usage" do
7
- let(:mock_parser) do
8
- double(:mock_parser, :to_a => [])
9
- end
2
+ let(:mock_parser) { instance_double(SitemapParser, to_a: []) }
10
3
 
11
- it "responds to Indexer#urls" do
12
- allow(SitemapParser).to receive(:new).and_return(mock_parser)
13
- expect(subject).to respond_to(:urls)
14
- end
4
+ it "responds to Indexer#urls" do
5
+ allow(SitemapParser).to receive(:new).and_return(mock_parser)
6
+ instance = nil
7
+ expect { instance = described_class.new("https://example.com") }
8
+ .to output.to_stdout
9
+ expect(instance).to respond_to(:urls)
10
+ end
15
11
 
16
- it "calls SitemapParser with the sitemap file" do
17
- expect(SitemapParser).to receive(:new).with('https://example.com/sitemap.xml', {:recurse => true}).and_return(mock_parser)
18
- subject
19
- end
12
+ it "calls SitemapParser with the sitemap file" do
13
+ allow(SitemapParser).to receive(:new).with("https://example.com/sitemap.xml", { recurse: true }).and_return(mock_parser)
14
+ expect { described_class.new("https://example.com") }
15
+ .to output.to_stdout
16
+ expect(SitemapParser).to have_received(:new)
20
17
  end
21
18
  end
@@ -1,29 +1,27 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::Seeder do
4
2
  let(:exchange) { "seeder_test_exchange" }
5
3
  let(:topic) { "#" }
6
4
  let(:root_url) { "https://www.example.com" }
7
5
 
8
- let(:options) {{
9
- :exchange => exchange,
10
- :topic => topic,
11
- }}
6
+ let(:options) do
7
+ {
8
+ exchange: exchange,
9
+ topic: topic,
10
+ }
11
+ end
12
12
 
13
- let(:mock_get_urls) { double(:mock_get_urls, :urls => true) }
14
- let(:mock_amqp_client) { double(:mock_amqp_client, :close => true) }
13
+ let(:mock_get_urls) { instance_double(GovukSeedCrawler::Indexer, urls: true) }
14
+ let(:mock_amqp_client) { instance_double(GovukSeedCrawler::AmqpClient, close: true) }
15
15
 
16
16
  let(:urls) do
17
17
  [
18
- "https://example.com/foo",
19
- "https://example.com/bar",
20
- "https://example.com/baz",
18
+ "https://example.com/foo",
19
+ "https://example.com/bar",
20
+ "https://example.com/baz",
21
21
  ]
22
22
  end
23
23
 
24
- subject { GovukSeedCrawler::Seeder::seed(root_url, options) }
25
-
26
- before(:each) do
24
+ before do
27
25
  allow(GovukSeedCrawler::Indexer).to receive(:new)
28
26
  .with(root_url)
29
27
  .and_return(mock_get_urls)
@@ -32,20 +30,20 @@ describe GovukSeedCrawler::Seeder do
32
30
  .with(options).and_return(mock_amqp_client)
33
31
  end
34
32
 
35
- context "under normal usage" do
36
- it "publishes urls to the queue" do
37
- urls.each do |url|
38
- expect(mock_amqp_client).to receive(:publish)
39
- .with(exchange, topic, url)
40
- end
41
-
42
- subject
33
+ it "publishes urls to the queue" do
34
+ urls.each do |url|
35
+ expect(mock_amqp_client).to receive(:publish)
36
+ .with(exchange, topic, url)
43
37
  end
44
38
 
45
- it "closes the connection when done" do
46
- allow(mock_amqp_client).to receive(:publish)
47
- expect(mock_amqp_client).to receive(:close)
48
- subject
49
- end
39
+ expect { described_class.seed(root_url, options) }
40
+ .to output.to_stdout
41
+ end
42
+
43
+ it "closes the connection when done" do
44
+ allow(mock_amqp_client).to receive(:publish)
45
+ expect(mock_amqp_client).to receive(:close)
46
+ expect { described_class.seed(root_url, options) }
47
+ .to output.to_stdout
50
48
  end
51
49
  end
@@ -1,9 +1,8 @@
1
- require 'json'
2
- require 'spec_helper'
1
+ require "json"
3
2
 
4
3
  describe GovukSeedCrawler do
5
4
  def stub_sitemap
6
- sitemap = %{<?xml version="1.0" encoding="UTF-8"?>
5
+ sitemap = %(<?xml version="1.0" encoding="UTF-8"?>
7
6
  <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
8
7
  <url>
9
8
  <loc>https://www.gov.uk/</loc>
@@ -15,45 +14,46 @@ describe GovukSeedCrawler do
15
14
  <loc>https://www.gov.uk/help</loc>
16
15
  </url>
17
16
  </urlset>
18
- }
17
+ )
19
18
 
20
- stub_request(:get, "https://www.gov.uk/sitemap.xml").
21
- to_return(:status => 200, :body => sitemap, :headers => {})
19
+ stub_request(:get, "https://www.gov.uk/sitemap.xml")
20
+ .to_return(status: 200, body: sitemap, headers: {})
22
21
  end
23
22
 
23
+ subject { GovukSeedCrawler::Seeder.seed(site_root, options) }
24
+
24
25
  let(:vhost) { "/" }
25
26
  let(:exchange_name) { "govuk_seed_crawler_integration_exchange" }
26
27
  let(:queue_name) { "govuk_seed_crawler_integration_queue" }
27
28
  let(:topic) { "#" }
28
29
  let(:site_root) { "https://www.gov.uk" }
29
- let(:options) {{
30
- :host => ENV.fetch("AMQP_HOST", "localhost"),
31
- :user => ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
32
- :pass => ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
33
- :exchange => exchange_name,
34
- :topic => topic
35
- }}
36
- let(:rabbitmq_client) { GovukSeedCrawler::AmqpClient.new(options) }
37
-
38
- subject { GovukSeedCrawler::Seeder::seed(site_root, options) }
39
-
40
- before(:each) do
41
- @exchange = rabbitmq_client.channel.topic(exchange_name, :durable => true)
42
- @queue = rabbitmq_client.channel.queue(queue_name)
43
- @queue.bind(@exchange, :routing_key => topic)
30
+ let(:options) do
31
+ {
32
+ host: ENV.fetch("AMQP_HOST", "localhost"),
33
+ user: ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
34
+ pass: ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
35
+ exchange: exchange_name,
36
+ topic: topic,
37
+ }
44
38
  end
45
-
46
- after(:each) do
47
- @queue.unbind(@exchange)
48
- @queue.delete
49
- @exchange.delete
39
+ let(:rabbitmq_client) { GovukSeedCrawler::AmqpClient.new(options) }
40
+ let(:exchange) { rabbitmq_client.channel.topic(exchange_name, durable: true) }
41
+ let(:queue) { rabbitmq_client.channel.queue(queue_name) }
42
+
43
+ around do |example|
44
+ queue.bind(exchange, routing_key: topic)
45
+ example.run
46
+ queue.unbind(exchange)
47
+ queue.delete
48
+ exchange.delete
50
49
  rabbitmq_client.close
51
50
  end
52
51
 
53
52
  it "publishes URLs it finds to an AMQP topic exchange" do
54
53
  stub_sitemap
55
- subject
54
+ expect { GovukSeedCrawler::Seeder.seed(site_root, options) }
55
+ .to output.to_stdout
56
56
 
57
- expect(@queue.message_count).to be(3)
57
+ expect(queue.message_count).to be(3)
58
58
  end
59
59
  end
data/spec/spec_helper.rb CHANGED
@@ -1,5 +1,7 @@
1
- require 'govuk_seed_crawler'
2
- require 'webmock/rspec'
1
+ require "govuk_seed_crawler"
2
+ require "webmock/rspec"
3
+
4
+ WebMock.disable_net_connect!
3
5
 
4
6
  RSpec.configure do |config|
5
7
  config.order = :random
@@ -27,13 +29,9 @@ RSpec.configure do |config|
27
29
  # a real object. This is generally recommended.
28
30
  mocks.verify_partial_doubles = true
29
31
  end
30
- end
31
32
 
32
- WebMock.disable_net_connect!
33
-
34
- def temp_stdout
35
- $stdout = StringIO.new
36
- yield $stdout.string
37
- ensure
38
- $stdout = STDOUT
33
+ config.before do
34
+ # reset logger before each invocation so we can catch stdout
35
+ GovukSeedCrawler.logger = nil
36
+ end
39
37
  end
metadata CHANGED
@@ -1,85 +1,109 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govuk_seed_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GOV.UK developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-22 00:00:00.000000000 Z
11
+ date: 2022-10-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bunny
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.3'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '3.0'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - "~>"
27
+ - - ">="
25
28
  - !ruby/object:Gem::Version
26
29
  version: '1.3'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.0'
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: crack
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
37
  - - '='
32
38
  - !ruby/object:Gem::Version
33
- version: 0.4.4
39
+ version: 0.4.5
34
40
  type: :runtime
35
41
  prerelease: false
36
42
  version_requirements: !ruby/object:Gem::Requirement
37
43
  requirements:
38
44
  - - '='
39
45
  - !ruby/object:Gem::Version
40
- version: 0.4.4
46
+ version: 0.4.5
41
47
  - !ruby/object:Gem::Dependency
42
48
  name: nokogiri
43
49
  requirement: !ruby/object:Gem::Requirement
44
50
  requirements:
45
- - - "~>"
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '1.6'
54
+ - - "<"
46
55
  - !ruby/object:Gem::Version
47
- version: 1.6.0
56
+ version: '1.14'
48
57
  type: :runtime
49
58
  prerelease: false
50
59
  version_requirements: !ruby/object:Gem::Requirement
51
60
  requirements:
52
- - - "~>"
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '1.6'
64
+ - - "<"
53
65
  - !ruby/object:Gem::Version
54
- version: 1.6.0
66
+ version: '1.14'
55
67
  - !ruby/object:Gem::Dependency
56
68
  name: public_suffix
57
69
  requirement: !ruby/object:Gem::Requirement
58
70
  requirements:
59
- - - "~>"
71
+ - - ">="
60
72
  - !ruby/object:Gem::Version
61
73
  version: 1.4.6
74
+ - - "<"
75
+ - !ruby/object:Gem::Version
76
+ version: 5.1.0
62
77
  type: :runtime
63
78
  prerelease: false
64
79
  version_requirements: !ruby/object:Gem::Requirement
65
80
  requirements:
66
- - - "~>"
81
+ - - ">="
67
82
  - !ruby/object:Gem::Version
68
83
  version: 1.4.6
84
+ - - "<"
85
+ - !ruby/object:Gem::Version
86
+ version: 5.1.0
69
87
  - !ruby/object:Gem::Dependency
70
88
  name: sitemap-parser
71
89
  requirement: !ruby/object:Gem::Requirement
72
90
  requirements:
73
- - - "~>"
91
+ - - ">="
74
92
  - !ruby/object:Gem::Version
75
- version: 0.3.0
93
+ version: '0.3'
94
+ - - "<"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.6'
76
97
  type: :runtime
77
98
  prerelease: false
78
99
  version_requirements: !ruby/object:Gem::Requirement
79
100
  requirements:
80
- - - "~>"
101
+ - - ">="
81
102
  - !ruby/object:Gem::Version
82
- version: 0.3.0
103
+ version: '0.3'
104
+ - - "<"
105
+ - !ruby/object:Gem::Version
106
+ version: '0.6'
83
107
  - !ruby/object:Gem::Dependency
84
108
  name: slop
85
109
  requirement: !ruby/object:Gem::Requirement
@@ -112,16 +136,16 @@ dependencies:
112
136
  name: rake
113
137
  requirement: !ruby/object:Gem::Requirement
114
138
  requirements:
115
- - - "~>"
139
+ - - ">="
116
140
  - !ruby/object:Gem::Version
117
- version: '0.9'
141
+ version: '0'
118
142
  type: :development
119
143
  prerelease: false
120
144
  version_requirements: !ruby/object:Gem::Requirement
121
145
  requirements:
122
- - - "~>"
146
+ - - ">="
123
147
  - !ruby/object:Gem::Version
124
- version: '0.9'
148
+ version: '0'
125
149
  - !ruby/object:Gem::Dependency
126
150
  name: rspec
127
151
  requirement: !ruby/object:Gem::Requirement
@@ -150,20 +174,34 @@ dependencies:
150
174
  - - "~>"
151
175
  - !ruby/object:Gem::Version
152
176
  version: '3.0'
177
+ - !ruby/object:Gem::Dependency
178
+ name: rubocop-govuk
179
+ requirement: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - '='
182
+ - !ruby/object:Gem::Version
183
+ version: 4.8.0
184
+ type: :development
185
+ prerelease: false
186
+ version_requirements: !ruby/object:Gem::Requirement
187
+ requirements:
188
+ - - '='
189
+ - !ruby/object:Gem::Version
190
+ version: 4.8.0
153
191
  - !ruby/object:Gem::Dependency
154
192
  name: webmock
155
193
  requirement: !ruby/object:Gem::Requirement
156
194
  requirements:
157
195
  - - "~>"
158
196
  - !ruby/object:Gem::Version
159
- version: 1.18.0
197
+ version: '3.18'
160
198
  type: :development
161
199
  prerelease: false
162
200
  version_requirements: !ruby/object:Gem::Requirement
163
201
  requirements:
164
202
  - - "~>"
165
203
  - !ruby/object:Gem::Version
166
- version: 1.18.0
204
+ version: '3.18'
167
205
  description:
168
206
  email:
169
207
  - govuk-dev@digital.cabinet-office.gov.uk
@@ -172,12 +210,15 @@ executables:
172
210
  extensions: []
173
211
  extra_rdoc_files: []
174
212
  files:
213
+ - ".github/dependabot.yml"
214
+ - ".github/workflows/ci.yml"
175
215
  - ".gitignore"
176
216
  - ".rspec"
217
+ - ".rubocop.yml"
177
218
  - ".ruby-version"
219
+ - CHANGELOG.md
178
220
  - Gemfile
179
- - Jenkinsfile
180
- - LICENSE.txt
221
+ - LICENCE
181
222
  - README.md
182
223
  - Rakefile
183
224
  - bin/seed-crawler
@@ -206,16 +247,16 @@ require_paths:
206
247
  - lib
207
248
  required_ruby_version: !ruby/object:Gem::Requirement
208
249
  requirements:
209
- - - "~>"
250
+ - - ">="
210
251
  - !ruby/object:Gem::Version
211
- version: '2.6'
252
+ version: '2.7'
212
253
  required_rubygems_version: !ruby/object:Gem::Requirement
213
254
  requirements:
214
255
  - - ">="
215
256
  - !ruby/object:Gem::Version
216
257
  version: '0'
217
258
  requirements: []
218
- rubygems_version: 3.0.3
259
+ rubygems_version: 3.3.24
219
260
  signing_key:
220
261
  specification_version: 4
221
262
  summary: Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ
data/Jenkinsfile DELETED
@@ -1,49 +0,0 @@
1
- #!/usr/bin/env groovy
2
-
3
- library("govuk")
4
-
5
- node {
6
- try {
7
- // This doesn't use the buildProject as this project doesn't conform to
8
- // required norms (e.g. running in Ruby 1.9, non-standard tests).
9
-
10
- repoName = JOB_NAME.split('/')[0]
11
-
12
- stage("Checkout") {
13
- govuk.checkoutFromGitHubWithSSH(repoName)
14
- }
15
-
16
- stage("Clean up workspace") {
17
- govuk.cleanupGit()
18
- }
19
-
20
- stage('Configure environment') {
21
- govuk.setEnvar('RBENV_VERSION', '2.6.3')
22
- }
23
-
24
- stage('Bundle install') {
25
- govuk.bundleGem()
26
- }
27
-
28
- stage('Spec tests') {
29
- govuk.runRakeTask('spec')
30
- }
31
-
32
- stage('Integration tests') {
33
- govuk.runRakeTask('integration')
34
- }
35
-
36
- if (env.BRANCH_NAME == 'master') {
37
- stage('Publish Gem to Rubygems') {
38
- govuk.publishGem(repoName, repoName, 'master')
39
- }
40
- }
41
- } catch (e) {
42
- currentBuild.result = "FAILED"
43
- step([$class: 'Mailer',
44
- notifyEveryUnstableBuild: true,
45
- recipients: 'govuk-ci-notifications@digital.cabinet-office.gov.uk',
46
- sendToIndividuals: true])
47
- throw e
48
- }
49
- }