wayfarer 0.4.6 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env +17 -0
- data/.github/workflows/lint.yaml +27 -0
- data/.github/workflows/release.yaml +30 -0
- data/.github/workflows/tests.yaml +21 -0
- data/.gitignore +5 -1
- data/.rubocop.yml +36 -0
- data/.vale.ini +8 -0
- data/.yardopts +1 -3
- data/Dockerfile +6 -4
- data/Gemfile +24 -0
- data/Gemfile.lock +274 -164
- data/Rakefile +7 -51
- data/bin/wayfarer +1 -1
- data/docker-compose.yml +23 -13
- data/docs/cookbook/consent_screen.md +2 -2
- data/docs/cookbook/executing_javascript.md +3 -3
- data/docs/cookbook/navigation.md +12 -12
- data/docs/cookbook/querying_html.md +3 -3
- data/docs/cookbook/screenshots.md +2 -2
- data/docs/guides/callbacks.md +25 -125
- data/docs/guides/cli.md +71 -0
- data/docs/guides/configuration.md +10 -35
- data/docs/guides/development.md +67 -0
- data/docs/guides/handlers.md +60 -0
- data/docs/guides/index.md +1 -0
- data/docs/guides/jobs.md +142 -31
- data/docs/guides/navigation.md +1 -1
- data/docs/guides/networking/capybara.md +13 -22
- data/docs/guides/networking/custom_adapters.md +103 -41
- data/docs/guides/networking/ferrum.md +4 -4
- data/docs/guides/networking/http.md +9 -13
- data/docs/guides/networking/selenium.md +10 -11
- data/docs/guides/pages.md +78 -10
- data/docs/guides/redis.md +10 -0
- data/docs/guides/routing.md +156 -0
- data/docs/guides/tasks.md +53 -9
- data/docs/guides/tutorial.md +66 -0
- data/docs/guides/user_agents.md +115 -0
- data/docs/index.md +17 -40
- data/lib/wayfarer/base.rb +125 -46
- data/lib/wayfarer/batch_completion.rb +60 -0
- data/lib/wayfarer/callbacks.rb +22 -48
- data/lib/wayfarer/cli/route_printer.rb +85 -89
- data/lib/wayfarer/cli.rb +103 -0
- data/lib/wayfarer/gc.rb +18 -6
- data/lib/wayfarer/handler.rb +15 -7
- data/lib/wayfarer/kv.rb +28 -0
- data/lib/wayfarer/logging.rb +38 -0
- data/lib/wayfarer/middleware/base.rb +2 -0
- data/lib/wayfarer/middleware/batch_completion.rb +19 -0
- data/lib/wayfarer/middleware/chain.rb +7 -1
- data/lib/wayfarer/middleware/content_type.rb +59 -0
- data/lib/wayfarer/middleware/controller.rb +19 -15
- data/lib/wayfarer/middleware/dedup.rb +22 -13
- data/lib/wayfarer/middleware/dispatch.rb +17 -4
- data/lib/wayfarer/middleware/normalize.rb +7 -14
- data/lib/wayfarer/middleware/redis.rb +15 -0
- data/lib/wayfarer/middleware/router.rb +33 -35
- data/lib/wayfarer/middleware/stage.rb +5 -5
- data/lib/wayfarer/middleware/uri_parser.rb +31 -0
- data/lib/wayfarer/middleware/user_agent.rb +49 -0
- data/lib/wayfarer/networking/capybara.rb +1 -1
- data/lib/wayfarer/networking/context.rb +14 -3
- data/lib/wayfarer/networking/ferrum.rb +1 -4
- data/lib/wayfarer/networking/follow.rb +14 -7
- data/lib/wayfarer/networking/http.rb +1 -1
- data/lib/wayfarer/networking/pool.rb +23 -13
- data/lib/wayfarer/networking/selenium.rb +15 -7
- data/lib/wayfarer/networking/strategy.rb +2 -2
- data/lib/wayfarer/page.rb +34 -14
- data/lib/wayfarer/parsing/xml.rb +6 -6
- data/lib/wayfarer/parsing.rb +21 -0
- data/lib/wayfarer/redis/barrier.rb +26 -21
- data/lib/wayfarer/redis/counter.rb +18 -9
- data/lib/wayfarer/redis/pool.rb +1 -1
- data/lib/wayfarer/redis/resettable.rb +19 -0
- data/lib/wayfarer/routing/dsl.rb +166 -30
- data/lib/wayfarer/routing/hash_stack.rb +33 -0
- data/lib/wayfarer/routing/matchers/custom.rb +8 -5
- data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
- data/lib/wayfarer/routing/matchers/host.rb +15 -9
- data/lib/wayfarer/routing/matchers/path.rb +11 -31
- data/lib/wayfarer/routing/matchers/query.rb +41 -17
- data/lib/wayfarer/routing/matchers/result.rb +12 -0
- data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
- data/lib/wayfarer/routing/matchers/url.rb +13 -5
- data/lib/wayfarer/routing/path_consumer.rb +130 -0
- data/lib/wayfarer/routing/path_finder.rb +151 -23
- data/lib/wayfarer/routing/result.rb +1 -1
- data/lib/wayfarer/routing/root_route.rb +17 -1
- data/lib/wayfarer/routing/route.rb +66 -19
- data/lib/wayfarer/routing/serializable.rb +28 -0
- data/lib/wayfarer/routing/sub_route.rb +53 -0
- data/lib/wayfarer/routing/target_route.rb +17 -1
- data/lib/wayfarer/stringify.rb +21 -30
- data/lib/wayfarer/task.rb +9 -17
- data/lib/wayfarer/uri/normalization.rb +120 -0
- data/lib/wayfarer.rb +72 -5
- data/mise.toml +2 -0
- data/mkdocs.yml +44 -8
- data/rake/docs.rake +26 -0
- data/rake/lint.rake +9 -0
- data/rake/release.rake +23 -0
- data/rake/tests.rake +32 -0
- data/requirements.txt +1 -1
- data/spec/factories/job.rb +8 -0
- data/spec/factories/middleware.rb +2 -2
- data/spec/factories/path_finder.rb +11 -0
- data/spec/factories/redis.rb +19 -0
- data/spec/factories/task.rb +46 -2
- data/spec/spec_helpers.rb +55 -51
- data/spec/support/active_job_helpers.rb +8 -0
- data/spec/support/integration_helpers.rb +21 -0
- data/spec/support/redis_helpers.rb +9 -0
- data/spec/support/test_app.rb +66 -37
- data/spec/wayfarer/base_spec.rb +200 -0
- data/spec/wayfarer/batch_completion_spec.rb +142 -0
- data/spec/wayfarer/cli/job_spec.rb +88 -0
- data/spec/wayfarer/cli/routing_spec.rb +322 -0
- data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
- data/spec/wayfarer/gc_spec.rb +29 -0
- data/spec/wayfarer/handler_spec.rb +9 -0
- data/spec/wayfarer/integration/callbacks_spec.rb +200 -0
- data/spec/wayfarer/integration/content_type_spec.rb +37 -0
- data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
- data/spec/wayfarer/integration/gc_spec.rb +40 -0
- data/spec/wayfarer/integration/handler_spec.rb +65 -0
- data/spec/wayfarer/integration/page_spec.rb +79 -0
- data/spec/wayfarer/integration/params_spec.rb +64 -0
- data/spec/wayfarer/integration/parsing_spec.rb +99 -0
- data/spec/wayfarer/integration/retry_spec.rb +112 -0
- data/spec/wayfarer/integration/stage_spec.rb +58 -0
- data/spec/wayfarer/middleware/batch_completion_spec.rb +33 -0
- data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +24 -19
- data/spec/wayfarer/middleware/content_type_spec.rb +83 -0
- data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +24 -22
- data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
- data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
- data/spec/wayfarer/middleware/router_spec.rb +102 -0
- data/spec/wayfarer/middleware/stage_spec.rb +63 -0
- data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
- data/spec/wayfarer/middleware/user_agent_spec.rb +158 -0
- data/spec/wayfarer/networking/capybara_spec.rb +13 -0
- data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
- data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
- data/spec/{networking → wayfarer/networking}/follow_spec.rb +11 -6
- data/spec/wayfarer/networking/http_spec.rb +12 -0
- data/spec/{networking → wayfarer/networking}/pool_spec.rb +16 -14
- data/spec/wayfarer/networking/selenium_spec.rb +12 -0
- data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
- data/spec/wayfarer/page_spec.rb +69 -0
- data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
- data/spec/wayfarer/parsing/xml_parse_spec.rb +25 -0
- data/spec/wayfarer/redis/barrier_spec.rb +39 -0
- data/spec/wayfarer/redis/counter_spec.rb +34 -0
- data/spec/{redis → wayfarer/redis}/pool_spec.rb +4 -3
- data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
- data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
- data/spec/wayfarer/routing/integration_spec.rb +101 -0
- data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
- data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
- data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
- data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
- data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
- data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
- data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
- data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
- data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
- data/spec/wayfarer/routing/root_route_spec.rb +51 -0
- data/spec/wayfarer/routing/route_spec.rb +74 -0
- data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
- data/spec/wayfarer/task_spec.rb +13 -0
- data/spec/wayfarer/uri/normalization_spec.rb +98 -0
- data/spec/wayfarer_spec.rb +2 -2
- data/wayfarer.gemspec +18 -28
- metadata +797 -265
- data/.github/workflows/ci.yaml +0 -32
- data/.rbenv-gemsets +0 -1
- data/.ruby-version +0 -1
- data/RELEASING.md +0 -17
- data/docs/cookbook/user_agent.md +0 -7
- data/docs/guides/error_handling.md +0 -53
- data/docs/guides/networking.md +0 -94
- data/docs/guides/performance.md +0 -130
- data/docs/guides/reliability.md +0 -41
- data/docs/guides/routing/steering.md +0 -30
- data/docs/reference/api/base.md +0 -48
- data/docs/reference/cli.md +0 -61
- data/docs/reference/configuration_keys.md +0 -43
- data/docs/reference/environment_variables.md +0 -83
- data/lib/wayfarer/cli/base.rb +0 -45
- data/lib/wayfarer/cli/generate.rb +0 -17
- data/lib/wayfarer/cli/job.rb +0 -56
- data/lib/wayfarer/cli/route.rb +0 -29
- data/lib/wayfarer/cli/runner.rb +0 -34
- data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
- data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
- data/lib/wayfarer/config/capybara.rb +0 -10
- data/lib/wayfarer/config/ferrum.rb +0 -11
- data/lib/wayfarer/config/networking.rb +0 -29
- data/lib/wayfarer/config/redis.rb +0 -14
- data/lib/wayfarer/config/root.rb +0 -11
- data/lib/wayfarer/config/selenium.rb +0 -21
- data/lib/wayfarer/config/strconv.rb +0 -45
- data/lib/wayfarer/config/struct.rb +0 -72
- data/lib/wayfarer/middleware/fetch.rb +0 -56
- data/lib/wayfarer/redis/connection.rb +0 -13
- data/lib/wayfarer/redis/version.rb +0 -19
- data/lib/wayfarer/routing/router.rb +0 -28
- data/spec/base_spec.rb +0 -224
- data/spec/callbacks_spec.rb +0 -102
- data/spec/cli/generate_spec.rb +0 -39
- data/spec/cli/job_spec.rb +0 -78
- data/spec/config/capybara_spec.rb +0 -18
- data/spec/config/ferrum_spec.rb +0 -24
- data/spec/config/networking_spec.rb +0 -73
- data/spec/config/redis_spec.rb +0 -32
- data/spec/config/root_spec.rb +0 -31
- data/spec/config/selenium_spec.rb +0 -56
- data/spec/config/strconv_spec.rb +0 -58
- data/spec/config/struct_spec.rb +0 -66
- data/spec/fixtures/dummy_job.rb +0 -7
- data/spec/gc_spec.rb +0 -59
- data/spec/handler_spec.rb +0 -11
- data/spec/integration/callbacks_spec.rb +0 -85
- data/spec/integration/page_spec.rb +0 -62
- data/spec/integration/params_spec.rb +0 -56
- data/spec/integration/stage_spec.rb +0 -51
- data/spec/integration/steering_spec.rb +0 -57
- data/spec/middleware/dedup_spec.rb +0 -88
- data/spec/middleware/dispatch_spec.rb +0 -43
- data/spec/middleware/fetch_spec.rb +0 -155
- data/spec/middleware/normalize_spec.rb +0 -29
- data/spec/middleware/router_spec.rb +0 -105
- data/spec/middleware/stage_spec.rb +0 -62
- data/spec/networking/capybara_spec.rb +0 -12
- data/spec/networking/ferrum_spec.rb +0 -12
- data/spec/networking/http_spec.rb +0 -12
- data/spec/networking/selenium_spec.rb +0 -12
- data/spec/page_spec.rb +0 -47
- data/spec/parsing/xml_spec.rb +0 -25
- data/spec/redis/barrier_spec.rb +0 -78
- data/spec/redis/counter_spec.rb +0 -32
- data/spec/redis/version_spec.rb +0 -13
- data/spec/routing/integration_spec.rb +0 -110
- data/spec/routing/matchers/custom_spec.rb +0 -31
- data/spec/routing/matchers/host_spec.rb +0 -49
- data/spec/routing/matchers/path_spec.rb +0 -43
- data/spec/routing/matchers/query_spec.rb +0 -137
- data/spec/routing/matchers/scheme_spec.rb +0 -25
- data/spec/routing/matchers/suffix_spec.rb +0 -41
- data/spec/routing/matchers/uri_spec.rb +0 -27
- data/spec/routing/path_finder_spec.rb +0 -33
- data/spec/routing/root_route_spec.rb +0 -29
- data/spec/routing/route_spec.rb +0 -43
- data/spec/routing/router_spec.rb +0 -24
- data/spec/task_spec.rb +0 -34
- data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
data/spec/config/strconv_spec.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Config::Strconv do
|
6
|
-
subject(:strconv) { Wayfarer::Config::Strconv }
|
7
|
-
|
8
|
-
describe "::parse" do
|
9
|
-
describe Hash do
|
10
|
-
it "parses" do
|
11
|
-
input = "alpha:1,beta:two,gamma:true"
|
12
|
-
output = strconv.parse(input, Hash)
|
13
|
-
expect(output).to eq(alpha: 1, beta: "two", gamma: true)
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
describe Array do
|
18
|
-
it "parses" do
|
19
|
-
input = "alpha, beta , gamma"
|
20
|
-
output = strconv.parse(input, Array)
|
21
|
-
expect(output).to eq(%w[alpha beta gamma])
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
describe Symbol do
|
26
|
-
it "parses" do
|
27
|
-
expect(strconv.parse("foobar", Symbol)).to be(:foobar)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
describe Integer do
|
32
|
-
it "parses" do
|
33
|
-
expect(strconv.parse("42", Integer)).to be(42)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
describe "Primitives" do
|
38
|
-
context "Booleans" do
|
39
|
-
it "parses" do
|
40
|
-
expect(strconv.parse("true")).to be(true)
|
41
|
-
expect(strconv.parse("false")).to be(false)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
context "Numbers" do
|
46
|
-
it "parses" do
|
47
|
-
expect(strconv.parse("42")).to be(42)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
context "Strings" do
|
52
|
-
it "parses" do
|
53
|
-
expect(strconv.parse("foobar")).to be("foobar")
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
data/spec/config/struct_spec.rb
DELETED
@@ -1,66 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Config::Struct do
|
6
|
-
let(:struct) do
|
7
|
-
Wayfarer::Config::Struct.new(members)
|
8
|
-
end
|
9
|
-
|
10
|
-
let(:members) { { foo: options } }
|
11
|
-
let(:options) { {} }
|
12
|
-
let(:env) { {} }
|
13
|
-
subject { struct.new(env) }
|
14
|
-
|
15
|
-
describe "Reader" do
|
16
|
-
context "without environment values and default" do
|
17
|
-
let(:options) { {} }
|
18
|
-
|
19
|
-
it "returns nil" do
|
20
|
-
expect(subject.foo).to be(nil)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
context "with default only" do
|
25
|
-
let(:options) { { default: 42 } }
|
26
|
-
|
27
|
-
it "returns the default" do
|
28
|
-
expect(subject.foo).to be(42)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
context "with environment key specified" do
|
33
|
-
let(:options) { { env_key: "FOO" } }
|
34
|
-
|
35
|
-
context "with environment value" do
|
36
|
-
let(:env) { { "FOO" => "hello" } }
|
37
|
-
|
38
|
-
it "returns the value" do
|
39
|
-
expect(subject.foo).to eq("hello")
|
40
|
-
end
|
41
|
-
|
42
|
-
context "with type specified" do
|
43
|
-
let(:options) { { env_key: "FOO", type: Symbol } }
|
44
|
-
|
45
|
-
it "parses the value" do
|
46
|
-
expect(subject.foo).to be(:hello)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
context "without environment value" do
|
52
|
-
it "returns nil" do
|
53
|
-
expect(subject.foo).to be(nil)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
describe "Writer" do
|
60
|
-
it "allows overriding environment values and defaults" do
|
61
|
-
expect {
|
62
|
-
subject.foo = 3
|
63
|
-
}.to change { subject.foo }.from(nil).to(3)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
data/spec/fixtures/dummy_job.rb
DELETED
data/spec/gc_spec.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::GC, redis: true do
|
6
|
-
include Wayfarer::Redis::Connection
|
7
|
-
|
8
|
-
let(:task) { build(:task) }
|
9
|
-
subject(:gc) { described_class.new(task) }
|
10
|
-
|
11
|
-
before do
|
12
|
-
task.metadata.job = spy
|
13
|
-
task.barrier.seen?(task.url)
|
14
|
-
end
|
15
|
-
|
16
|
-
describe "#run" do
|
17
|
-
context "when counter reaches 0" do
|
18
|
-
before { task.counter.increment }
|
19
|
-
|
20
|
-
it "resets the barrier" do
|
21
|
-
expect {
|
22
|
-
gc.run
|
23
|
-
}.to change { redis { |conn| conn.exists?(task.barrier.redis_key) } }.to(false)
|
24
|
-
end
|
25
|
-
|
26
|
-
it "resets the counter" do
|
27
|
-
expect {
|
28
|
-
gc.run
|
29
|
-
}.to change { redis { |conn| conn.exists?(task.counter.redis_key) } }.to(false)
|
30
|
-
end
|
31
|
-
|
32
|
-
it "runs after batch callbacks" do
|
33
|
-
expect(task.metadata.job).to receive(:run_callbacks).with(:batch).exactly(:once)
|
34
|
-
gc.run
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
context "when counter does not reach 0" do
|
39
|
-
before { 2.times { task.counter.increment } }
|
40
|
-
|
41
|
-
it "does not reset the barrier" do
|
42
|
-
expect {
|
43
|
-
gc.run
|
44
|
-
}.not_to(change { redis { |conn| conn.exists?(task.barrier.redis_key) } })
|
45
|
-
end
|
46
|
-
|
47
|
-
it "does not reset the counter" do
|
48
|
-
expect {
|
49
|
-
gc.run
|
50
|
-
}.not_to(change { redis { |conn| conn.exists?(task.counter.redis_key) } })
|
51
|
-
end
|
52
|
-
|
53
|
-
it "does not run after batch callbacks" do
|
54
|
-
expect(task.metadata.job).not_to receive(:run_callbacks).with(:batch)
|
55
|
-
gc.run
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
data/spec/handler_spec.rb
DELETED
@@ -1,85 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe "Callbacks" do
|
6
|
-
let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
|
7
|
-
|
8
|
-
describe Wayfarer::Base do
|
9
|
-
specify do
|
10
|
-
class self.class::DummyJob < Wayfarer::Base
|
11
|
-
extend SpecHelpers
|
12
|
-
include RSpec::Matchers
|
13
|
-
|
14
|
-
route { host test_app_host, to: :index }
|
15
|
-
|
16
|
-
attr_accessor :callbacks_fired
|
17
|
-
|
18
|
-
before_fetch do
|
19
|
-
self.callbacks_fired = %i[before_fetch]
|
20
|
-
end
|
21
|
-
|
22
|
-
before_action do
|
23
|
-
callbacks_fired.push(:before_action)
|
24
|
-
end
|
25
|
-
|
26
|
-
after_batch do
|
27
|
-
expect(callbacks_fired).to eq(%i[before_fetch before_action])
|
28
|
-
end
|
29
|
-
|
30
|
-
def index; end
|
31
|
-
end
|
32
|
-
|
33
|
-
self.class::DummyJob.crawl(url)
|
34
|
-
perform_enqueued_jobs
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
describe Wayfarer::Handler do
|
39
|
-
specify do
|
40
|
-
class self.class::DummyJob < Wayfarer::Base
|
41
|
-
extend SpecHelpers
|
42
|
-
include RSpec::Matchers
|
43
|
-
|
44
|
-
route { host test_app_host, to: DummyHandler }
|
45
|
-
|
46
|
-
attr_accessor :callbacks_fired
|
47
|
-
|
48
|
-
before_fetch do
|
49
|
-
self.callbacks_fired = %i[before_fetch_job]
|
50
|
-
end
|
51
|
-
|
52
|
-
before_action do
|
53
|
-
callbacks_fired.push(:before_action_job)
|
54
|
-
end
|
55
|
-
|
56
|
-
after_batch do
|
57
|
-
expect(callbacks_fired).to eq(%i[before_fetch_job before_action_job])
|
58
|
-
end
|
59
|
-
|
60
|
-
class DummyHandler < Wayfarer::Handler
|
61
|
-
include RSpec::Matchers
|
62
|
-
|
63
|
-
route { to :index }
|
64
|
-
|
65
|
-
attr_accessor :callbacks_fired
|
66
|
-
|
67
|
-
before_fetch do
|
68
|
-
raise "before_fetch ran in handler"
|
69
|
-
end
|
70
|
-
|
71
|
-
before_action do
|
72
|
-
self.callbacks_fired = %i[before_action_handler]
|
73
|
-
end
|
74
|
-
|
75
|
-
def index
|
76
|
-
expect(callbacks_fired).to eq(%i[before_action_handler])
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
self.class::DummyJob.crawl(url)
|
82
|
-
perform_enqueued_jobs
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe "Pages" do
|
6
|
-
let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
|
7
|
-
|
8
|
-
describe Wayfarer::Base do
|
9
|
-
specify do
|
10
|
-
class self.class::DummyJob < Wayfarer::Base
|
11
|
-
extend SpecHelpers
|
12
|
-
include RSpec::Matchers
|
13
|
-
|
14
|
-
route { host test_app_host, to: :index }
|
15
|
-
|
16
|
-
def index
|
17
|
-
expect(page.url).to eq("http://test:9876/git-scm.com/book/en/v2.html")
|
18
|
-
expect(page.status_code).to be(200)
|
19
|
-
expect(page.body).not_to be_empty
|
20
|
-
expect(page.headers.count).to be(9)
|
21
|
-
|
22
|
-
expect(page.meta.links.all.count).to be(157)
|
23
|
-
expect(page.meta.links.internal.count).to be(102)
|
24
|
-
expect(page.meta.links.external.count).to be(55)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
self.class::DummyJob.crawl(url)
|
29
|
-
perform_enqueued_jobs
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
describe Wayfarer::Handler do
|
34
|
-
specify do
|
35
|
-
class self.class::DummyJob < Wayfarer::Base
|
36
|
-
extend SpecHelpers
|
37
|
-
|
38
|
-
route { host test_app_host, to: DummyHandler }
|
39
|
-
|
40
|
-
class DummyHandler < Wayfarer::Handler
|
41
|
-
include RSpec::Matchers
|
42
|
-
|
43
|
-
route { to :index }
|
44
|
-
|
45
|
-
def index
|
46
|
-
expect(page.url).to eq("http://test:9876/git-scm.com/book/en/v2.html")
|
47
|
-
expect(page.status_code).to be(200)
|
48
|
-
expect(page.body).not_to be_empty
|
49
|
-
expect(page.headers.count).to be(9)
|
50
|
-
|
51
|
-
expect(page.meta.links.all.count).to be(157)
|
52
|
-
expect(page.meta.links.internal.count).to be(102)
|
53
|
-
expect(page.meta.links.external.count).to be(55)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
self.class::DummyJob.crawl(url)
|
59
|
-
perform_enqueued_jobs
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe "URL parameters" do
|
6
|
-
let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
|
7
|
-
|
8
|
-
describe Wayfarer::Base do
|
9
|
-
specify do
|
10
|
-
class self.class::DummyJob < Wayfarer::Base
|
11
|
-
extend SpecHelpers
|
12
|
-
include RSpec::Matchers
|
13
|
-
|
14
|
-
route do
|
15
|
-
to :index, host: test_app_host do
|
16
|
-
path "git-scm.com/book/:lang/:file"
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def index
|
21
|
-
expect(params).to eq("lang" => "en", "file" => "v2.html")
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
self.class::DummyJob.crawl(url)
|
26
|
-
perform_enqueued_jobs
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
describe Wayfarer::Handler do
|
31
|
-
specify do
|
32
|
-
class self.class::DummyJob < Wayfarer::Base
|
33
|
-
extend SpecHelpers
|
34
|
-
|
35
|
-
route do
|
36
|
-
to DummyHandler, host: test_app_host do
|
37
|
-
path "git-scm.com/book/:lang/:file"
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
class DummyHandler < Wayfarer::Handler
|
42
|
-
include RSpec::Matchers
|
43
|
-
|
44
|
-
route { to :index }
|
45
|
-
|
46
|
-
def index
|
47
|
-
expect(params).to eq("lang" => "en", "file" => "v2.html")
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
self.class::DummyJob.crawl(url)
|
53
|
-
perform_enqueued_jobs
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe "Staging" do
|
6
|
-
let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
|
7
|
-
|
8
|
-
describe Wayfarer::Base do
|
9
|
-
specify do
|
10
|
-
class self.class::DummyJob < Wayfarer::Base
|
11
|
-
extend SpecHelpers
|
12
|
-
|
13
|
-
route { host test_app_host, to: :index }
|
14
|
-
|
15
|
-
def index
|
16
|
-
stage page.meta.links.all
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
expect {
|
21
|
-
self.class::DummyJob.crawl(url)
|
22
|
-
perform_enqueued_jobs
|
23
|
-
}.to change { enqueued_jobs.size }.by(156)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
describe Wayfarer::Handler do
|
28
|
-
specify do
|
29
|
-
class self.class::DummyJob < Wayfarer::Base
|
30
|
-
extend SpecHelpers
|
31
|
-
|
32
|
-
route do
|
33
|
-
host test_app_host, to: DummyHandler
|
34
|
-
end
|
35
|
-
|
36
|
-
class DummyHandler < Wayfarer::Handler
|
37
|
-
route { to :index }
|
38
|
-
|
39
|
-
def index
|
40
|
-
stage page.meta.links.all
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
expect {
|
46
|
-
self.class::DummyJob.crawl(url)
|
47
|
-
perform_enqueued_jobs
|
48
|
-
}.to change { enqueued_jobs.size }.by(156)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe "Steering" do
|
6
|
-
let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
|
7
|
-
|
8
|
-
describe Wayfarer::Base do
|
9
|
-
specify do
|
10
|
-
class self.class::DummyJob < Wayfarer::Base
|
11
|
-
extend RSpec::Matchers
|
12
|
-
|
13
|
-
route do |a, b|
|
14
|
-
expect(a).to eq("foobar")
|
15
|
-
expect(b).to eq("barqux")
|
16
|
-
end
|
17
|
-
|
18
|
-
steer do |task|
|
19
|
-
[task.batch, "barqux"]
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
self.class::DummyJob.crawl(url, batch: "foobar")
|
24
|
-
perform_enqueued_jobs
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
describe Wayfarer::Handler do
|
29
|
-
specify do
|
30
|
-
class self.class::DummyJob < Wayfarer::Base
|
31
|
-
route do |_a, _b|
|
32
|
-
to DummyHandler
|
33
|
-
end
|
34
|
-
|
35
|
-
steer do |_task|
|
36
|
-
[123, "barqux"]
|
37
|
-
end
|
38
|
-
|
39
|
-
class DummyHandler < Wayfarer::Handler
|
40
|
-
extend RSpec::Matchers
|
41
|
-
|
42
|
-
route do |a, b|
|
43
|
-
expect(a).to eq("foobar")
|
44
|
-
expect(b).to eq("fooz")
|
45
|
-
end
|
46
|
-
|
47
|
-
steer do |task|
|
48
|
-
[task.batch, "fooz"]
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
self.class::DummyJob.crawl(url, batch: "foobar")
|
54
|
-
perform_enqueued_jobs
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,88 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Middleware::Dedup, redis: true do
|
6
|
-
let(:task) { build(:task) }
|
7
|
-
subject { described_class.new }
|
8
|
-
|
9
|
-
before { task.metadata.staged_urls = SortedSet.new }
|
10
|
-
|
11
|
-
describe "#call" do
|
12
|
-
context "if already routed" do
|
13
|
-
before { task.metadata.action = :action }
|
14
|
-
|
15
|
-
it "does not call the barrier" do
|
16
|
-
expect(task.barrier).not_to receive(:seen?)
|
17
|
-
end
|
18
|
-
|
19
|
-
it "yields" do
|
20
|
-
expect { |spy| subject.call(task, &spy) }.to yield_control
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
context "without staged URLs" do
|
25
|
-
it "does not raise" do
|
26
|
-
expect { subject.call(task) }.not_to raise_error
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
context "with unseen URL" do
|
31
|
-
it "marks the URL as seen" do
|
32
|
-
expect {
|
33
|
-
subject.call(task)
|
34
|
-
}.to change { task.barrier.seen?(task.url) }.to(true)
|
35
|
-
end
|
36
|
-
|
37
|
-
it "yields" do
|
38
|
-
expect { |spy| subject.call(task, &spy) }.to yield_control
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
context "with seen URL" do
|
43
|
-
before { task.barrier.seen?(task.url) }
|
44
|
-
|
45
|
-
it "does not yield" do
|
46
|
-
expect { |spy| subject.call(task, &spy) }.not_to yield_control
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
context "with exception raised" do
|
51
|
-
it "does not mark the URL as seen" do
|
52
|
-
begin
|
53
|
-
subject.call(task) { raise }
|
54
|
-
rescue StandardError
|
55
|
-
nil
|
56
|
-
end
|
57
|
-
expect(task.barrier.seen?(task.url)).to be(false)
|
58
|
-
end
|
59
|
-
|
60
|
-
it "re-raises the exception" do
|
61
|
-
expect {
|
62
|
-
subject.call(task) { raise }
|
63
|
-
}.to raise_error(RuntimeError)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
describe "staged URL filtering" do
|
68
|
-
let(:seen_urls) { %w[https://yahoo.com https://google.com] }
|
69
|
-
let(:unseen_urls) { %w[https://w3c.org https://nasa.gov] }
|
70
|
-
|
71
|
-
before do
|
72
|
-
seen_urls.each do |url|
|
73
|
-
task.barrier.seen?(url)
|
74
|
-
end
|
75
|
-
|
76
|
-
[*seen_urls, *unseen_urls].each do |url|
|
77
|
-
task.metadata.staged_urls.add(url)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
it "filters seen staged URLs" do
|
82
|
-
expect {
|
83
|
-
subject.call(task)
|
84
|
-
}.to change { task.metadata.staged_urls }.to(SortedSet.new(unseen_urls))
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Middleware::Dispatch do
|
6
|
-
let(:task) { build(:task) }
|
7
|
-
let(:action) { :action }
|
8
|
-
subject(:chain) { described_class.new }
|
9
|
-
|
10
|
-
before do
|
11
|
-
task.metadata.controller = spy
|
12
|
-
task.metadata.action = action
|
13
|
-
|
14
|
-
allow(task.metadata.controller).to receive(:run_callbacks).and_yield
|
15
|
-
end
|
16
|
-
|
17
|
-
describe "#call" do
|
18
|
-
it "runs callbacks" do
|
19
|
-
expect(task.metadata.controller).to receive(:run_callbacks).with(action)
|
20
|
-
subject.call(task)
|
21
|
-
end
|
22
|
-
|
23
|
-
context "when action is a Symbol" do
|
24
|
-
it "calls the method" do
|
25
|
-
expect(task.metadata.controller).to receive(action)
|
26
|
-
subject.call(task)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
context "with other action" do
|
31
|
-
let(:action) { Class.new }
|
32
|
-
|
33
|
-
it "instantiates and calls" do
|
34
|
-
expect_any_instance_of(action).to receive(:call).with(task)
|
35
|
-
subject.call(task)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
it "yields" do
|
40
|
-
expect { |spy| subject.call(task, &spy) }.to yield_control
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|