govuk_seed_crawler 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2ee27868f4df61a72b764044b8d04ad6f6b252a63cd1f365d1056a7d331a85d1
4
- data.tar.gz: 514ee036b88544be5a7935dcc2103e07aa7fe82c2acc0841109d15e572a9ebf9
3
+ metadata.gz: b0d80cc9a1bf29784700e23b1f8c883fc6012a2e7a47776cc3af04ca87f253e7
4
+ data.tar.gz: ca2d04361ff8e9d9b3cf34f0432eb786b026ffcfd035b1a5faeed6c32801880a
5
5
  SHA512:
6
- metadata.gz: 3d005f87f519187b619e1dbbeabcc441d0ccb0daf652db2d6b515f7abf2de49e9090f457e208afc5128d68db65bf618dcbf5bc74e54bb31c16bf6dbf405495c1
7
- data.tar.gz: ec43d4205fd5714be7ab39669a9397371792802b3f50fa97d580c7327db018afd8409988b54c5876aba603456cd3019c454ce41736dad054e5f58d7cc79a80ed
6
+ metadata.gz: 895569f56489bbfce199620c211742b226b56d0cc8a03d17451b8a382f2743ba1d7c7821ff9fe75925d411d9b99a595981cf72fb5334907f59f33bf0a24ec000
7
+ data.tar.gz: 8ea165b2ed3d62a63a5a331b03e14e2fe0dd68713bf7732d28bd2b5cfc4e20a6b5a4932c503fc0514d1a0ea1c6d58343afe864612d8190a2521638d62eea0e83
@@ -0,0 +1,10 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: /
5
+ schedule:
6
+ interval: daily
7
+ - package-ecosystem: "github-actions"
8
+ directory: /
9
+ schedule:
10
+ interval: daily
@@ -0,0 +1,47 @@
1
+ on: [push, pull_request]
2
+
3
+ jobs:
4
+ # This matrix job runs the test suite against multiple Ruby versions
5
+ test_matrix:
6
+ strategy:
7
+ fail-fast: false
8
+ matrix:
9
+ # Due to https://github.com/actions/runner/issues/849, we have to use quotes for '3.0'
10
+ ruby: [ 2.7, '3.0', 3.1 ]
11
+ runs-on: ubuntu-latest
12
+ services:
13
+ rabbitmq:
14
+ image: rabbitmq
15
+ env:
16
+ RABBITMQ_DEFAULT_USER: rabbitmq
17
+ RABBITMQ_DEFAULT_PASS: rabbitmq
18
+ ports:
19
+ - 5672:5672
20
+ steps:
21
+ - uses: actions/checkout@v3
22
+ - uses: ruby/setup-ruby@v1
23
+ with:
24
+ ruby-version: ${{ matrix.ruby }}
25
+ bundler-cache: true
26
+ - run: bundle exec rake
27
+ env:
28
+ AMQP_USER: rabbitmq
29
+ AMQP_PASS: rabbitmq
30
+
31
+ # Branch protection rules cannot directly depend on status checks from matrix jobs.
32
+ # So instead we define `test` as a dummy job which only runs after the preceding `test_matrix` checks have passed.
33
+ # Solution inspired by: https://github.community/t/status-check-for-a-matrix-jobs/127354/3
34
+ test:
35
+ needs: test_matrix
36
+ runs-on: ubuntu-latest
37
+ steps:
38
+ - run: echo "All matrix tests have passed 🚀"
39
+
40
+ publish:
41
+ needs: test
42
+ if: ${{ github.ref == 'refs/heads/main' }}
43
+ permissions:
44
+ contents: write
45
+ uses: alphagov/govuk-infrastructure/.github/workflows/publish-rubygem.yaml@main
46
+ secrets:
47
+ GEM_HOST_API_KEY: ${{ secrets.ALPHAGOV_RUBYGEMS_API_KEY }}
data/.rubocop.yml ADDED
@@ -0,0 +1,19 @@
1
+ inherit_gem:
2
+ rubocop-govuk:
3
+ - config/default.yml
4
+ - config/rake.yml
5
+ - config/rspec.yml
6
+
7
+ inherit_mode:
8
+ merge:
9
+ - Exclude
10
+
11
+ # **************************************************************
12
+ # TRY NOT TO ADD OVERRIDES IN THIS FILE
13
+ #
14
+ # This repo is configured to follow the RuboCop GOV.UK styleguide.
15
+ # Any rules you override here will cause this repo to diverge from
16
+ # the way we write code in all other GOV.UK repos.
17
+ #
18
+ # See https://github.com/alphagov/rubocop-govuk/blob/main/CONTRIBUTING.md
19
+ # **************************************************************
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.3
1
+ 2.7.6
data/CHANGELOG.md ADDED
@@ -0,0 +1,11 @@
1
+ # Changelog
2
+
3
+ - We use the [GOV.UK versioning guidelines](https://docs.publishing.service.gov.uk/manual/publishing-a-ruby-gem.html#versioning).
4
+ - Mark breaking changes with `BREAKING:`. Be sure to include instructions on how applications should be upgraded.
5
+ - Don't include changes that are purely internal. The CHANGELOG should be a
6
+ useful summary for people upgrading their application, not a replication
7
+ of the commit log.
8
+
9
+ ## Unreleased
10
+
11
+ - Drop support for Ruby < 2.7
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source 'https://rubygems.org'
1
+ source "https://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in govuk_seed_crawler.gemspec
4
4
  gemspec
@@ -1,6 +1,6 @@
1
- (c) 2014 Crown copyright
1
+ The MIT License (MIT)
2
2
 
3
- MIT License
3
+ Copyright (C) 2014 Crown Copyright (Government Digital Service)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining
6
6
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -38,3 +38,7 @@ bundle exec seed-crawler --help
38
38
  3. Commit your changes (`git commit -am 'Add some feature'`)
39
39
  4. Push to the branch (`git push origin my-new-feature`)
40
40
  5. Create new Pull Request
41
+
42
+ ## Licence
43
+
44
+ [MIT License](LICENCE)
data/Rakefile CHANGED
@@ -1,11 +1,7 @@
1
- require 'rspec/core/rake_task'
1
+ require "rspec/core/rake_task"
2
+ require "rubocop/rake_task"
2
3
 
3
- RSpec::Core::RakeTask.new(:spec) do |task|
4
- task.pattern = FileList['spec/govuk_seed_crawler/**/*_spec.rb']
5
- end
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ RuboCop::RakeTask.new
6
6
 
7
- RSpec::Core::RakeTask.new(:integration) do |task|
8
- task.pattern = FileList['spec/integration/**/*_spec.rb']
9
- end
10
-
11
- task :default => :spec
7
+ task default: %i[rubocop spec]
@@ -1,36 +1,36 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
1
+ lib = File.expand_path("lib", __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'govuk_seed_crawler/version'
3
+ require "govuk_seed_crawler/version"
5
4
 
6
5
  Gem::Specification.new do |spec|
7
6
  spec.name = "govuk_seed_crawler"
8
7
  spec.version = GovukSeedCrawler::VERSION
9
- spec.authors = ['GOV.UK developers']
8
+ spec.authors = ["GOV.UK developers"]
10
9
  spec.email = ["govuk-dev@digital.cabinet-office.gov.uk"]
11
- spec.summary = %q{Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ exchange.}
10
+ spec.summary = "Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ exchange."
12
11
  spec.homepage = "https://github.com/alphagov/govuk_seed_crawler"
13
12
  spec.license = "MIT"
14
13
 
15
- spec.required_ruby_version = "~> 2.6"
14
+ spec.required_ruby_version = ">= 2.7"
16
15
 
17
16
  spec.files = `git ls-files -z`.split("\x0")
18
17
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
- spec.require_paths = ["lib"]
19
+ spec.require_paths = %w[lib]
21
20
 
22
- spec.add_runtime_dependency "bunny", "~> 1.3"
23
- spec.add_runtime_dependency "crack", "0.4.4"
24
- spec.add_runtime_dependency "nokogiri", "~> 1.6.0"
21
+ spec.add_runtime_dependency "bunny", ">= 1.3", "< 3.0"
22
+ spec.add_runtime_dependency "crack", "0.4.5"
23
+ spec.add_runtime_dependency "nokogiri", ">= 1.6", "< 1.14"
25
24
  # Something, somewhere, sometimes requires public_suffix.
26
25
  # public_suffix > 1.5 requires ruby > 2.
27
- spec.add_runtime_dependency "public_suffix", "~> 1.4.6"
28
- spec.add_runtime_dependency "sitemap-parser", "~> 0.3.0"
26
+ spec.add_runtime_dependency "public_suffix", ">= 1.4.6", "< 5.1.0"
27
+ spec.add_runtime_dependency "sitemap-parser", ">= 0.3", "< 0.6"
29
28
  spec.add_runtime_dependency "slop", "~> 3.6.0"
30
29
 
31
30
  spec.add_development_dependency "pry"
32
- spec.add_development_dependency "rake", "~> 0.9"
31
+ spec.add_development_dependency "rake"
33
32
  spec.add_development_dependency "rspec", "~> 3.0"
34
33
  spec.add_development_dependency "rspec-mocks", "~> 3.0"
35
- spec.add_development_dependency "webmock", "~> 1.18.0"
34
+ spec.add_development_dependency "rubocop-govuk", "4.8.0"
35
+ spec.add_development_dependency "webmock", "~> 3.18"
36
36
  end
@@ -1,4 +1,4 @@
1
- require 'bunny'
1
+ require "bunny"
2
2
 
3
3
  module GovukSeedCrawler
4
4
  class AmqpClient
@@ -21,8 +21,8 @@ module GovukSeedCrawler
21
21
 
22
22
  GovukSeedCrawler.logger.debug("Publishing '#{body}' to topic '#{topic}'")
23
23
 
24
- @channel.topic(exchange, :durable => true)
25
- .publish(body, :routing_key => topic)
24
+ @channel.topic(exchange, durable: true)
25
+ .publish(body, routing_key: topic)
26
26
  end
27
27
  end
28
28
  end
@@ -1,4 +1,4 @@
1
- require 'slop'
1
+ require "slop"
2
2
 
3
3
  module GovukSeedCrawler
4
4
  class CLIException < StandardError
@@ -12,17 +12,17 @@ module GovukSeedCrawler
12
12
 
13
13
  class CLIParser
14
14
  DEFAULTS = {
15
- :exchange => "govuk_crawler_exchange",
16
- :help => nil,
17
- :host => "localhost",
18
- :password => "guest",
19
- :port => "5672",
20
- :quiet => false,
21
- :topic => "#",
22
- :username => "guest",
23
- :verbose => false,
24
- :version => nil,
25
- :vhost => "/"
15
+ exchange: "govuk_crawler_exchange",
16
+ help: nil,
17
+ host: "localhost",
18
+ password: "guest",
19
+ port: "5672",
20
+ quiet: false,
21
+ topic: "#",
22
+ username: "guest",
23
+ verbose: false,
24
+ version: nil,
25
+ vhost: "/",
26
26
  }.freeze
27
27
 
28
28
  ENV_AMQP_PASS_KEY = "GOVUK_CRAWLER_AMQP_PASS".freeze
@@ -32,19 +32,19 @@ module GovukSeedCrawler
32
32
  end
33
33
 
34
34
  def options
35
- Slop.parse!(@argv_array, :help => true) do
36
- banner <<-EOS
37
- Usage: #{$PROGRAM_NAME} site_root [options]
35
+ Slop.parse!(@argv_array, help: true) do
36
+ banner <<~HELP
37
+ Usage: #{$PROGRAM_NAME} site_root [options]
38
38
 
39
- Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
40
- to consume:
39
+ Seeds an AMQP topic exchange with messages, each containing a URL, for the GOV.UK Crawler Worker
40
+ to consume:
41
41
 
42
- https://github.com/alphagov/govuk_crawler_worker
42
+ https://github.com/alphagov/govuk_crawler_worker
43
43
 
44
- The AMQP password can also be set as an environment variable and will be read from
45
- `#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
46
- are set, the environment variable will take higher precedent.
47
- EOS
44
+ The AMQP password can also be set as an environment variable and will be read from
45
+ `#{ENV_AMQP_PASS_KEY}`. If both the environment variable and command-line option for password
46
+ are set, the environment variable will take higher precedent.
47
+ HELP
48
48
 
49
49
  on :version, "Display version and exit" do
50
50
  puts "Version: #{GovukSeedCrawler::VERSION}"
@@ -75,7 +75,7 @@ are set, the environment variable will take higher precedent.
75
75
  options_hash = opts.to_hash
76
76
  options_hash[:password] = ENV[ENV_AMQP_PASS_KEY] unless ENV[ENV_AMQP_PASS_KEY].nil?
77
77
 
78
- return options_hash, @argv_array.first
78
+ [options_hash, @argv_array.first]
79
79
  end
80
80
  end
81
81
  end
@@ -13,10 +13,10 @@ module GovukSeedCrawler
13
13
  end
14
14
 
15
15
  def run
16
- Seeder::seed(@site_root, @options)
16
+ Seeder.seed(@site_root, @options)
17
17
  end
18
18
 
19
- private
19
+ private
20
20
 
21
21
  def set_logging_level(cli_options)
22
22
  if cli_options[:verbose]
@@ -1,4 +1,4 @@
1
- require 'sitemap-parser'
1
+ require "sitemap-parser"
2
2
 
3
3
  module GovukSeedCrawler
4
4
  class Indexer
@@ -9,7 +9,7 @@ module GovukSeedCrawler
9
9
 
10
10
  GovukSeedCrawler.logger.info("Retrieving list of URLs for #{site_root}")
11
11
 
12
- sitemap = SitemapParser.new("#{site_root}/sitemap.xml", {recurse: true})
12
+ sitemap = SitemapParser.new("#{site_root}/sitemap.xml", { recurse: true })
13
13
  @urls = sitemap.to_a
14
14
 
15
15
  GovukSeedCrawler.logger.info("Found #{@urls.count} URLs")
@@ -1,3 +1,3 @@
1
1
  module GovukSeedCrawler
2
- VERSION = "3.0.0"
2
+ VERSION = "3.1.0".freeze
3
3
  end
@@ -1,17 +1,21 @@
1
- require 'govuk_seed_crawler/amqp_client'
2
- require 'govuk_seed_crawler/cli_parser'
3
- require 'govuk_seed_crawler/cli_runner'
4
- require 'govuk_seed_crawler/indexer'
5
- require 'govuk_seed_crawler/seeder'
6
- require 'govuk_seed_crawler/version'
1
+ require "govuk_seed_crawler/amqp_client"
2
+ require "govuk_seed_crawler/cli_parser"
3
+ require "govuk_seed_crawler/cli_runner"
4
+ require "govuk_seed_crawler/indexer"
5
+ require "govuk_seed_crawler/seeder"
6
+ require "govuk_seed_crawler/version"
7
7
 
8
8
  module GovukSeedCrawler
9
- def self.logger
10
- unless @logger
11
- @logger = Logger.new(STDOUT)
12
- @logger.level = Logger::INFO
13
- end
9
+ class << self
10
+ attr_writer :logger
11
+
12
+ def logger
13
+ unless @logger
14
+ @logger = Logger.new($stdout)
15
+ @logger.level = Logger::INFO
16
+ end
14
17
 
15
- @logger
18
+ @logger
19
+ end
16
20
  end
17
21
  end
@@ -1,55 +1,54 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::AmqpClient do
4
2
  let(:exchange) { "govuk_seed_crawler_spec_exchange" }
5
- let(:options) {{
6
- :host => ENV.fetch("AMQP_HOST", "localhost"),
7
- :user => ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
8
- :pass => ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
9
- }}
10
- subject { GovukSeedCrawler::AmqpClient.new(options) }
3
+ let(:options) do
4
+ {
5
+ host: ENV.fetch("AMQP_HOST", "localhost"),
6
+ user: ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
7
+ pass: ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
8
+ }
9
+ end
11
10
 
12
11
  it "responds to #channel" do
13
- expect(subject).to respond_to(:channel)
12
+ expect(described_class.new(options)).to respond_to(:channel)
14
13
  end
15
14
 
16
15
  it "responds to #close" do
17
- expect(subject).to respond_to(:close)
16
+ expect(described_class.new(options)).to respond_to(:close)
18
17
  end
19
18
 
20
19
  it "closes the connection to the AMQP server" do
21
- mock_bunny = double(:mock_bunny,
22
- :start => true, :create_channel => true, :close => true)
20
+ mock_bunny = instance_double(Bunny::Session,
21
+ start: true,
22
+ create_channel: true,
23
+ close: true)
23
24
  allow(Bunny).to receive(:new).and_return(mock_bunny)
24
25
  expect(mock_bunny).to receive(:close).once
25
26
 
26
- subject.close
27
+ described_class.new(options).close
27
28
  end
28
29
 
29
- context "#publish" do
30
- context "error handling" do
31
- it "raises an exception if exchange is nil" do
32
- expect {
33
- subject.publish(nil, "#", "some body")
34
- }.to raise_exception(RuntimeError, "Exchange cannot be nil")
35
- end
36
-
37
- it "raises an exception if topic is nil" do
38
- expect {
39
- subject.publish(exchange, nil, "some body")
40
- }.to raise_exception(RuntimeError, "Topic cannot be nil")
41
- end
42
-
43
- it "raises an exception if body is nil" do
44
- expect {
45
- subject.publish(exchange, "#", nil)
46
- }.to raise_exception(RuntimeError, "Message body cannot be nil")
47
- end
30
+ describe "#publish" do
31
+ it "raises an exception if exchange is nil" do
32
+ expect {
33
+ described_class.new(options).publish(nil, "#", "some body")
34
+ }.to raise_exception(RuntimeError, "Exchange cannot be nil")
35
+ end
36
+
37
+ it "raises an exception if topic is nil" do
38
+ expect {
39
+ described_class.new(options).publish(exchange, nil, "some body")
40
+ }.to raise_exception(RuntimeError, "Topic cannot be nil")
41
+ end
42
+
43
+ it "raises an exception if body is nil" do
44
+ expect {
45
+ described_class.new(options).publish(exchange, "#", nil)
46
+ }.to raise_exception(RuntimeError, "Message body cannot be nil")
48
47
  end
49
48
 
50
49
  it "allows publishing against an exchange" do
51
- expect(subject.publish(exchange, "#", "some body"))
52
- .to_not be_nil
50
+ expect(described_class.new(options).publish(exchange, "#", "some body"))
51
+ .not_to be_nil
53
52
  end
54
53
  end
55
54
  end
@@ -1,28 +1,26 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::CLIParser do
4
2
  it "requires the site_root to be provided" do
5
3
  expect {
6
- GovukSeedCrawler::CLIParser.new([]).parse
4
+ described_class.new([]).parse
7
5
  }.to raise_exception(GovukSeedCrawler::CLIException, "site_root must be provided")
8
6
  end
9
7
 
10
8
  it "provides the defaults when just given the site_root" do
11
- options, site_root = GovukSeedCrawler::CLIParser.new(["https://www.example.com"]).parse
9
+ options, site_root = described_class.new(["https://www.example.com"]).parse
12
10
 
13
11
  expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS)
14
12
  expect(site_root).to eq("https://www.example.com")
15
13
  end
16
14
 
17
- it "should tell us when we've given too many arguments" do
15
+ it "tells us when we've given too many arguments" do
18
16
  expect {
19
- GovukSeedCrawler::CLIParser.new(["a", "b"]).parse
17
+ described_class.new(%w[a b]).parse
20
18
  }.to raise_exception(GovukSeedCrawler::CLIException, "too many arguments provided")
21
19
  end
22
20
 
23
- it "should nest the help message in with any CLIExceptions we raise" do
21
+ it "nests the help message in with any CLIExceptions we raise" do
24
22
  expect {
25
- GovukSeedCrawler::CLIParser.new(["a", "b"]).parse
23
+ described_class.new(%w[a b]).parse
26
24
  }.to raise_exception(GovukSeedCrawler::CLIException) { |e|
27
25
  expect(e.help).to include("Usage: ")
28
26
  }
@@ -31,48 +29,36 @@ describe GovukSeedCrawler::CLIParser do
31
29
  describe "catching STDOUT" do
32
30
  it "shows the help banner when provided -h" do
33
31
  # Get a valid options response as help closes early with SystemExit.
34
- options = GovukSeedCrawler::CLIParser.new(["http://www.foo.com/"]).options
35
-
36
- temp_stdout do |caught_stdout|
37
- expect {
38
- _, _ = GovukSeedCrawler::CLIParser.new(["-h"]).parse
39
- }.to raise_exception(SystemExit) { |e|
40
- expect(e.status).to eq(0)
41
- }
32
+ options = described_class.new(["http://www.foo.com/"]).options
42
33
 
43
- expect(caught_stdout.strip).to eq(options.help)
44
- end
34
+ expect { described_class.new(["-h"]).parse }
35
+ .to output("#{options.help}\n").to_stdout
36
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
45
37
  end
46
38
 
47
- it "should show the version number and exit" do
48
- temp_stdout do |caught_stdout|
49
- expect {
50
- _, _ = GovukSeedCrawler::CLIParser.new(["--version"]).parse
51
- }.to raise_exception(SystemExit) { |e|
52
- expect(e.status).to eq(0)
53
- }
54
-
55
- expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
56
- end
39
+ it "shows the version number and exit" do
40
+ expect { described_class.new(["--version"]).parse }
41
+ .to output("Version: #{GovukSeedCrawler::VERSION}\n").to_stdout
42
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
57
43
  end
58
44
  end
59
45
 
60
46
  describe "passing in valid arguments" do
61
- let(:arguments) {
47
+ let(:arguments) do
62
48
  [
63
- "https://www.override.com/",
64
- "--host rabbitmq.some.custom.vhost",
65
- "--port 4567",
66
- "--username foo",
67
- "--password bar",
68
- "--exchange some_custom_exchange",
69
- "--topic some_custom_topic",
70
- "--vhost a_vhost",
71
- "--verbose"
49
+ "https://www.override.com/",
50
+ "--host rabbitmq.some.custom.vhost",
51
+ "--port 4567",
52
+ "--username foo",
53
+ "--password bar",
54
+ "--exchange some_custom_exchange",
55
+ "--topic some_custom_topic",
56
+ "--vhost a_vhost",
57
+ "--verbose",
72
58
  ].join(" ").split(" ")
73
- }
59
+ end
74
60
 
75
- it "should override all of the default arguments that we're providing" do
61
+ it "overrides all of the default arguments that we're providing" do
76
62
  overriden = {
77
63
  host: "rabbitmq.some.custom.vhost",
78
64
  port: "4567",
@@ -84,14 +70,14 @@ describe GovukSeedCrawler::CLIParser do
84
70
  quiet: false,
85
71
  verbose: true,
86
72
  version: nil,
87
- vhost: "a_vhost"
73
+ vhost: "a_vhost",
88
74
  }
89
75
 
90
- expect(GovukSeedCrawler::CLIParser.new(arguments).parse.first).to eq(overriden)
76
+ expect(described_class.new(arguments).parse.first).to eq(overriden)
91
77
  end
92
78
 
93
- it "should set the --quiet value" do
94
- options, _ = GovukSeedCrawler::CLIParser.new(["foo.com", "--quiet"]).parse
79
+ it "sets the --quiet value" do
80
+ options, = described_class.new(["foo.com", "--quiet"]).parse
95
81
  expect(options).to eq(GovukSeedCrawler::CLIParser::DEFAULTS.merge(quiet: true))
96
82
  end
97
83
 
@@ -107,14 +93,14 @@ describe GovukSeedCrawler::CLIParser do
107
93
  it "sets the password if set using an environment variable" do
108
94
  set_amqp_pass("foobar")
109
95
 
110
- expect(GovukSeedCrawler::CLIParser.new(["http://www.example.com"]).parse.first)
96
+ expect(described_class.new(["http://www.example.com"]).parse.first)
111
97
  .to include(password: "foobar")
112
98
  end
113
99
 
114
100
  it "picks the environment variable over the parameter if both are set" do
115
101
  set_amqp_pass("bar")
116
102
 
117
- expect(GovukSeedCrawler::CLIParser.new(["http://www.example.com", "--password", "foo"]).parse.first)
103
+ expect(described_class.new(["http://www.example.com", "--password", "foo"]).parse.first)
118
104
  .to include(password: "bar")
119
105
  end
120
106
  end
@@ -1,45 +1,25 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::CLIRunner do
4
2
  describe "printing the version" do
5
- it "should not try to connect to an AMQP server" do
3
+ it "does not try to connect to an AMQP server" do
6
4
  expect(Bunny).not_to receive(:new)
7
5
 
8
- temp_stdout do |caught_stdout|
9
- expect {
10
- GovukSeedCrawler::CLIRunner.new(["--version"]).run
11
- }.to raise_exception(SystemExit) { |exit|
12
- expect(exit.status).to eq(0)
13
- }
14
-
15
- expect(caught_stdout.strip).to eq("Version: #{GovukSeedCrawler::VERSION}")
16
- end
6
+ expect { described_class.new(["--version"]).run }
7
+ .to output("Version: #{GovukSeedCrawler::VERSION}\n").to_stdout
8
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(0) }
17
9
  end
18
10
  end
19
11
 
20
12
  describe "catching any CLIException objects and exiting with a status 1" do
21
13
  it "prints to STDOUT for too many arguments" do
22
- temp_stdout do |caught_stdout|
23
- expect {
24
- GovukSeedCrawler::CLIRunner.new(["a", "b"])
25
- }.to raise_exception(SystemExit) { |exit|
26
- expect(exit.status).to eq(2)
27
- }
28
-
29
- expect(caught_stdout.strip).to include("too many arguments provided")
30
- end
14
+ expect { described_class.new(%w[a b]).run }
15
+ .to output(/\Atoo many arguments provided/).to_stdout
16
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(2) }
31
17
  end
32
18
 
33
19
  it "prints to STDOUT when site_root not set" do
34
- temp_stdout do |caught_stdout|
35
- expect {
36
- GovukSeedCrawler::CLIRunner.new(["--verbose"])
37
- }.to raise_exception(SystemExit) { |exit|
38
- expect(exit.status).to eq(2)
39
- }
40
-
41
- expect(caught_stdout.strip).to include("site_root must be provided")
42
- end
20
+ expect { described_class.new(["--verbose"]).run }
21
+ .to output(/\Asite_root must be provided/).to_stdout
22
+ .and raise_exception(SystemExit) { |e| expect(e.status).to eq(2) }
43
23
  end
44
24
  end
45
25
 
@@ -49,26 +29,26 @@ describe GovukSeedCrawler::CLIRunner do
49
29
  end
50
30
 
51
31
  it "defaults to INFO" do
52
- GovukSeedCrawler::CLIRunner.new(["http://www.example.com"])
32
+ described_class.new(["http://www.example.com"])
53
33
  expect(GovukSeedCrawler.logger.level).to eq(Logger::INFO)
54
34
  end
55
35
 
56
36
  it "sets to ERROR for quite" do
57
- GovukSeedCrawler::CLIRunner.new(["http://www.example.com", "--quiet"])
37
+ described_class.new(["http://www.example.com", "--quiet"])
58
38
  expect(GovukSeedCrawler.logger.level).to eq(Logger::ERROR)
59
39
  end
60
40
 
61
41
  it "sets to DEBUG for verbose" do
62
- GovukSeedCrawler::CLIRunner.new(["http://www.example.com", "--verbose"])
42
+ described_class.new(["http://www.example.com", "--verbose"])
63
43
  expect(GovukSeedCrawler.logger.level).to eq(Logger::DEBUG)
64
44
  end
65
45
  end
66
46
 
67
47
  describe "#run" do
68
48
  it "passes all options through to seed" do
69
- expect(GovukSeedCrawler::Seeder).to receive(:seed).
70
- with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
71
- GovukSeedCrawler::CLIRunner.new(["http://www.example.com"]).run
49
+ expect(GovukSeedCrawler::Seeder).to receive(:seed)
50
+ .with("http://www.example.com", GovukSeedCrawler::CLIParser::DEFAULTS).once
51
+ described_class.new(["http://www.example.com"]).run
72
52
  end
73
53
  end
74
54
  end
@@ -1,21 +1,18 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::Indexer do
4
- subject { GovukSeedCrawler::Indexer.new('https://example.com') }
5
-
6
- context "under normal usage" do
7
- let(:mock_parser) do
8
- double(:mock_parser, :to_a => [])
9
- end
2
+ let(:mock_parser) { instance_double(SitemapParser, to_a: []) }
10
3
 
11
- it "responds to Indexer#urls" do
12
- allow(SitemapParser).to receive(:new).and_return(mock_parser)
13
- expect(subject).to respond_to(:urls)
14
- end
4
+ it "responds to Indexer#urls" do
5
+ allow(SitemapParser).to receive(:new).and_return(mock_parser)
6
+ instance = nil
7
+ expect { instance = described_class.new("https://example.com") }
8
+ .to output.to_stdout
9
+ expect(instance).to respond_to(:urls)
10
+ end
15
11
 
16
- it "calls SitemapParser with the sitemap file" do
17
- expect(SitemapParser).to receive(:new).with('https://example.com/sitemap.xml', {:recurse => true}).and_return(mock_parser)
18
- subject
19
- end
12
+ it "calls SitemapParser with the sitemap file" do
13
+ allow(SitemapParser).to receive(:new).with("https://example.com/sitemap.xml", { recurse: true }).and_return(mock_parser)
14
+ expect { described_class.new("https://example.com") }
15
+ .to output.to_stdout
16
+ expect(SitemapParser).to have_received(:new)
20
17
  end
21
18
  end
@@ -1,29 +1,27 @@
1
- require 'spec_helper'
2
-
3
1
  describe GovukSeedCrawler::Seeder do
4
2
  let(:exchange) { "seeder_test_exchange" }
5
3
  let(:topic) { "#" }
6
4
  let(:root_url) { "https://www.example.com" }
7
5
 
8
- let(:options) {{
9
- :exchange => exchange,
10
- :topic => topic,
11
- }}
6
+ let(:options) do
7
+ {
8
+ exchange: exchange,
9
+ topic: topic,
10
+ }
11
+ end
12
12
 
13
- let(:mock_get_urls) { double(:mock_get_urls, :urls => true) }
14
- let(:mock_amqp_client) { double(:mock_amqp_client, :close => true) }
13
+ let(:mock_get_urls) { instance_double(GovukSeedCrawler::Indexer, urls: true) }
14
+ let(:mock_amqp_client) { instance_double(GovukSeedCrawler::AmqpClient, close: true) }
15
15
 
16
16
  let(:urls) do
17
17
  [
18
- "https://example.com/foo",
19
- "https://example.com/bar",
20
- "https://example.com/baz",
18
+ "https://example.com/foo",
19
+ "https://example.com/bar",
20
+ "https://example.com/baz",
21
21
  ]
22
22
  end
23
23
 
24
- subject { GovukSeedCrawler::Seeder::seed(root_url, options) }
25
-
26
- before(:each) do
24
+ before do
27
25
  allow(GovukSeedCrawler::Indexer).to receive(:new)
28
26
  .with(root_url)
29
27
  .and_return(mock_get_urls)
@@ -32,20 +30,20 @@ describe GovukSeedCrawler::Seeder do
32
30
  .with(options).and_return(mock_amqp_client)
33
31
  end
34
32
 
35
- context "under normal usage" do
36
- it "publishes urls to the queue" do
37
- urls.each do |url|
38
- expect(mock_amqp_client).to receive(:publish)
39
- .with(exchange, topic, url)
40
- end
41
-
42
- subject
33
+ it "publishes urls to the queue" do
34
+ urls.each do |url|
35
+ expect(mock_amqp_client).to receive(:publish)
36
+ .with(exchange, topic, url)
43
37
  end
44
38
 
45
- it "closes the connection when done" do
46
- allow(mock_amqp_client).to receive(:publish)
47
- expect(mock_amqp_client).to receive(:close)
48
- subject
49
- end
39
+ expect { described_class.seed(root_url, options) }
40
+ .to output.to_stdout
41
+ end
42
+
43
+ it "closes the connection when done" do
44
+ allow(mock_amqp_client).to receive(:publish)
45
+ expect(mock_amqp_client).to receive(:close)
46
+ expect { described_class.seed(root_url, options) }
47
+ .to output.to_stdout
50
48
  end
51
49
  end
@@ -1,9 +1,8 @@
1
- require 'json'
2
- require 'spec_helper'
1
+ require "json"
3
2
 
4
3
  describe GovukSeedCrawler do
5
4
  def stub_sitemap
6
- sitemap = %{<?xml version="1.0" encoding="UTF-8"?>
5
+ sitemap = %(<?xml version="1.0" encoding="UTF-8"?>
7
6
  <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
8
7
  <url>
9
8
  <loc>https://www.gov.uk/</loc>
@@ -15,45 +14,46 @@ describe GovukSeedCrawler do
15
14
  <loc>https://www.gov.uk/help</loc>
16
15
  </url>
17
16
  </urlset>
18
- }
17
+ )
19
18
 
20
- stub_request(:get, "https://www.gov.uk/sitemap.xml").
21
- to_return(:status => 200, :body => sitemap, :headers => {})
19
+ stub_request(:get, "https://www.gov.uk/sitemap.xml")
20
+ .to_return(status: 200, body: sitemap, headers: {})
22
21
  end
23
22
 
23
+ subject { GovukSeedCrawler::Seeder.seed(site_root, options) }
24
+
24
25
  let(:vhost) { "/" }
25
26
  let(:exchange_name) { "govuk_seed_crawler_integration_exchange" }
26
27
  let(:queue_name) { "govuk_seed_crawler_integration_queue" }
27
28
  let(:topic) { "#" }
28
29
  let(:site_root) { "https://www.gov.uk" }
29
- let(:options) {{
30
- :host => ENV.fetch("AMQP_HOST", "localhost"),
31
- :user => ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
32
- :pass => ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
33
- :exchange => exchange_name,
34
- :topic => topic
35
- }}
36
- let(:rabbitmq_client) { GovukSeedCrawler::AmqpClient.new(options) }
37
-
38
- subject { GovukSeedCrawler::Seeder::seed(site_root, options) }
39
-
40
- before(:each) do
41
- @exchange = rabbitmq_client.channel.topic(exchange_name, :durable => true)
42
- @queue = rabbitmq_client.channel.queue(queue_name)
43
- @queue.bind(@exchange, :routing_key => topic)
30
+ let(:options) do
31
+ {
32
+ host: ENV.fetch("AMQP_HOST", "localhost"),
33
+ user: ENV.fetch("AMQP_USER", "govuk_seed_crawler"),
34
+ pass: ENV.fetch("AMQP_PASS", "govuk_seed_crawler"),
35
+ exchange: exchange_name,
36
+ topic: topic,
37
+ }
44
38
  end
45
-
46
- after(:each) do
47
- @queue.unbind(@exchange)
48
- @queue.delete
49
- @exchange.delete
39
+ let(:rabbitmq_client) { GovukSeedCrawler::AmqpClient.new(options) }
40
+ let(:exchange) { rabbitmq_client.channel.topic(exchange_name, durable: true) }
41
+ let(:queue) { rabbitmq_client.channel.queue(queue_name) }
42
+
43
+ around do |example|
44
+ queue.bind(exchange, routing_key: topic)
45
+ example.run
46
+ queue.unbind(exchange)
47
+ queue.delete
48
+ exchange.delete
50
49
  rabbitmq_client.close
51
50
  end
52
51
 
53
52
  it "publishes URLs it finds to an AMQP topic exchange" do
54
53
  stub_sitemap
55
- subject
54
+ expect { GovukSeedCrawler::Seeder.seed(site_root, options) }
55
+ .to output.to_stdout
56
56
 
57
- expect(@queue.message_count).to be(3)
57
+ expect(queue.message_count).to be(3)
58
58
  end
59
59
  end
data/spec/spec_helper.rb CHANGED
@@ -1,5 +1,7 @@
1
- require 'govuk_seed_crawler'
2
- require 'webmock/rspec'
1
+ require "govuk_seed_crawler"
2
+ require "webmock/rspec"
3
+
4
+ WebMock.disable_net_connect!
3
5
 
4
6
  RSpec.configure do |config|
5
7
  config.order = :random
@@ -27,13 +29,9 @@ RSpec.configure do |config|
27
29
  # a real object. This is generally recommended.
28
30
  mocks.verify_partial_doubles = true
29
31
  end
30
- end
31
32
 
32
- WebMock.disable_net_connect!
33
-
34
- def temp_stdout
35
- $stdout = StringIO.new
36
- yield $stdout.string
37
- ensure
38
- $stdout = STDOUT
33
+ config.before do
34
+ # reset logger before each invocation so we can catch stdout
35
+ GovukSeedCrawler.logger = nil
36
+ end
39
37
  end
metadata CHANGED
@@ -1,85 +1,109 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govuk_seed_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GOV.UK developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-22 00:00:00.000000000 Z
11
+ date: 2022-10-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bunny
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.3'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '3.0'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - "~>"
27
+ - - ">="
25
28
  - !ruby/object:Gem::Version
26
29
  version: '1.3'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.0'
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: crack
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
37
  - - '='
32
38
  - !ruby/object:Gem::Version
33
- version: 0.4.4
39
+ version: 0.4.5
34
40
  type: :runtime
35
41
  prerelease: false
36
42
  version_requirements: !ruby/object:Gem::Requirement
37
43
  requirements:
38
44
  - - '='
39
45
  - !ruby/object:Gem::Version
40
- version: 0.4.4
46
+ version: 0.4.5
41
47
  - !ruby/object:Gem::Dependency
42
48
  name: nokogiri
43
49
  requirement: !ruby/object:Gem::Requirement
44
50
  requirements:
45
- - - "~>"
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '1.6'
54
+ - - "<"
46
55
  - !ruby/object:Gem::Version
47
- version: 1.6.0
56
+ version: '1.14'
48
57
  type: :runtime
49
58
  prerelease: false
50
59
  version_requirements: !ruby/object:Gem::Requirement
51
60
  requirements:
52
- - - "~>"
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '1.6'
64
+ - - "<"
53
65
  - !ruby/object:Gem::Version
54
- version: 1.6.0
66
+ version: '1.14'
55
67
  - !ruby/object:Gem::Dependency
56
68
  name: public_suffix
57
69
  requirement: !ruby/object:Gem::Requirement
58
70
  requirements:
59
- - - "~>"
71
+ - - ">="
60
72
  - !ruby/object:Gem::Version
61
73
  version: 1.4.6
74
+ - - "<"
75
+ - !ruby/object:Gem::Version
76
+ version: 5.1.0
62
77
  type: :runtime
63
78
  prerelease: false
64
79
  version_requirements: !ruby/object:Gem::Requirement
65
80
  requirements:
66
- - - "~>"
81
+ - - ">="
67
82
  - !ruby/object:Gem::Version
68
83
  version: 1.4.6
84
+ - - "<"
85
+ - !ruby/object:Gem::Version
86
+ version: 5.1.0
69
87
  - !ruby/object:Gem::Dependency
70
88
  name: sitemap-parser
71
89
  requirement: !ruby/object:Gem::Requirement
72
90
  requirements:
73
- - - "~>"
91
+ - - ">="
74
92
  - !ruby/object:Gem::Version
75
- version: 0.3.0
93
+ version: '0.3'
94
+ - - "<"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.6'
76
97
  type: :runtime
77
98
  prerelease: false
78
99
  version_requirements: !ruby/object:Gem::Requirement
79
100
  requirements:
80
- - - "~>"
101
+ - - ">="
81
102
  - !ruby/object:Gem::Version
82
- version: 0.3.0
103
+ version: '0.3'
104
+ - - "<"
105
+ - !ruby/object:Gem::Version
106
+ version: '0.6'
83
107
  - !ruby/object:Gem::Dependency
84
108
  name: slop
85
109
  requirement: !ruby/object:Gem::Requirement
@@ -112,16 +136,16 @@ dependencies:
112
136
  name: rake
113
137
  requirement: !ruby/object:Gem::Requirement
114
138
  requirements:
115
- - - "~>"
139
+ - - ">="
116
140
  - !ruby/object:Gem::Version
117
- version: '0.9'
141
+ version: '0'
118
142
  type: :development
119
143
  prerelease: false
120
144
  version_requirements: !ruby/object:Gem::Requirement
121
145
  requirements:
122
- - - "~>"
146
+ - - ">="
123
147
  - !ruby/object:Gem::Version
124
- version: '0.9'
148
+ version: '0'
125
149
  - !ruby/object:Gem::Dependency
126
150
  name: rspec
127
151
  requirement: !ruby/object:Gem::Requirement
@@ -150,20 +174,34 @@ dependencies:
150
174
  - - "~>"
151
175
  - !ruby/object:Gem::Version
152
176
  version: '3.0'
177
+ - !ruby/object:Gem::Dependency
178
+ name: rubocop-govuk
179
+ requirement: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - '='
182
+ - !ruby/object:Gem::Version
183
+ version: 4.8.0
184
+ type: :development
185
+ prerelease: false
186
+ version_requirements: !ruby/object:Gem::Requirement
187
+ requirements:
188
+ - - '='
189
+ - !ruby/object:Gem::Version
190
+ version: 4.8.0
153
191
  - !ruby/object:Gem::Dependency
154
192
  name: webmock
155
193
  requirement: !ruby/object:Gem::Requirement
156
194
  requirements:
157
195
  - - "~>"
158
196
  - !ruby/object:Gem::Version
159
- version: 1.18.0
197
+ version: '3.18'
160
198
  type: :development
161
199
  prerelease: false
162
200
  version_requirements: !ruby/object:Gem::Requirement
163
201
  requirements:
164
202
  - - "~>"
165
203
  - !ruby/object:Gem::Version
166
- version: 1.18.0
204
+ version: '3.18'
167
205
  description:
168
206
  email:
169
207
  - govuk-dev@digital.cabinet-office.gov.uk
@@ -172,12 +210,15 @@ executables:
172
210
  extensions: []
173
211
  extra_rdoc_files: []
174
212
  files:
213
+ - ".github/dependabot.yml"
214
+ - ".github/workflows/ci.yml"
175
215
  - ".gitignore"
176
216
  - ".rspec"
217
+ - ".rubocop.yml"
177
218
  - ".ruby-version"
219
+ - CHANGELOG.md
178
220
  - Gemfile
179
- - Jenkinsfile
180
- - LICENSE.txt
221
+ - LICENCE
181
222
  - README.md
182
223
  - Rakefile
183
224
  - bin/seed-crawler
@@ -206,16 +247,16 @@ require_paths:
206
247
  - lib
207
248
  required_ruby_version: !ruby/object:Gem::Requirement
208
249
  requirements:
209
- - - "~>"
250
+ - - ">="
210
251
  - !ruby/object:Gem::Version
211
- version: '2.6'
252
+ version: '2.7'
212
253
  required_rubygems_version: !ruby/object:Gem::Requirement
213
254
  requirements:
214
255
  - - ">="
215
256
  - !ruby/object:Gem::Version
216
257
  version: '0'
217
258
  requirements: []
218
- rubygems_version: 3.0.3
259
+ rubygems_version: 3.3.24
219
260
  signing_key:
220
261
  specification_version: 4
221
262
  summary: Retrieves a list of URLs to seed the crawler by publishing them to a RabbitMQ
data/Jenkinsfile DELETED
@@ -1,49 +0,0 @@
1
- #!/usr/bin/env groovy
2
-
3
- library("govuk")
4
-
5
- node {
6
- try {
7
- // This doesn't use the buildProject as this project doesn't conform to
8
- // required norms (e.g. running in Ruby 1.9, non-standard tests).
9
-
10
- repoName = JOB_NAME.split('/')[0]
11
-
12
- stage("Checkout") {
13
- govuk.checkoutFromGitHubWithSSH(repoName)
14
- }
15
-
16
- stage("Clean up workspace") {
17
- govuk.cleanupGit()
18
- }
19
-
20
- stage('Configure environment') {
21
- govuk.setEnvar('RBENV_VERSION', '2.6.3')
22
- }
23
-
24
- stage('Bundle install') {
25
- govuk.bundleGem()
26
- }
27
-
28
- stage('Spec tests') {
29
- govuk.runRakeTask('spec')
30
- }
31
-
32
- stage('Integration tests') {
33
- govuk.runRakeTask('integration')
34
- }
35
-
36
- if (env.BRANCH_NAME == 'master') {
37
- stage('Publish Gem to Rubygems') {
38
- govuk.publishGem(repoName, repoName, 'master')
39
- }
40
- }
41
- } catch (e) {
42
- currentBuild.result = "FAILED"
43
- step([$class: 'Mailer',
44
- notifyEveryUnstableBuild: true,
45
- recipients: 'govuk-ci-notifications@digital.cabinet-office.gov.uk',
46
- sendToIndividuals: true])
47
- throw e
48
- }
49
- }