wayfarer 0.4.7 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env +17 -0
- data/.github/workflows/lint.yaml +8 -6
- data/.github/workflows/release.yaml +4 -3
- data/.github/workflows/tests.yaml +5 -14
- data/.gitignore +2 -2
- data/.rubocop.yml +31 -0
- data/.vale.ini +6 -3
- data/Dockerfile +3 -2
- data/Gemfile +21 -0
- data/Gemfile.lock +233 -128
- data/Rakefile +7 -0
- data/docker-compose.yml +13 -14
- data/docs/guides/callbacks.md +3 -1
- data/docs/guides/configuration.md +10 -35
- data/docs/guides/development.md +67 -0
- data/docs/guides/handlers.md +7 -7
- data/docs/guides/jobs.md +54 -11
- data/docs/guides/networking/custom_adapters.md +31 -10
- data/docs/guides/pages.md +24 -22
- data/docs/guides/routing.md +116 -34
- data/docs/guides/tasks.md +30 -10
- data/docs/guides/tutorial.md +23 -17
- data/docs/guides/user_agents.md +11 -9
- data/lib/wayfarer/base.rb +9 -8
- data/lib/wayfarer/batch_completion.rb +18 -14
- data/lib/wayfarer/callbacks.rb +14 -14
- data/lib/wayfarer/cli/route_printer.rb +78 -96
- data/lib/wayfarer/cli.rb +12 -30
- data/lib/wayfarer/gc.rb +6 -1
- data/lib/wayfarer/kv.rb +28 -0
- data/lib/wayfarer/middleware/chain.rb +7 -1
- data/lib/wayfarer/middleware/content_type.rb +20 -15
- data/lib/wayfarer/middleware/dedup.rb +9 -3
- data/lib/wayfarer/middleware/dispatch.rb +7 -2
- data/lib/wayfarer/middleware/normalize.rb +4 -12
- data/lib/wayfarer/middleware/router.rb +1 -1
- data/lib/wayfarer/middleware/uri_parser.rb +4 -3
- data/lib/wayfarer/networking/context.rb +12 -1
- data/lib/wayfarer/networking/ferrum.rb +1 -4
- data/lib/wayfarer/networking/follow.rb +2 -1
- data/lib/wayfarer/networking/pool.rb +12 -7
- data/lib/wayfarer/networking/selenium.rb +15 -7
- data/lib/wayfarer/page.rb +0 -2
- data/lib/wayfarer/parsing/xml.rb +1 -1
- data/lib/wayfarer/parsing.rb +2 -5
- data/lib/wayfarer/redis/barrier.rb +15 -2
- data/lib/wayfarer/redis/counter.rb +1 -2
- data/lib/wayfarer/routing/dsl.rb +166 -31
- data/lib/wayfarer/routing/hash_stack.rb +33 -0
- data/lib/wayfarer/routing/matchers/custom.rb +8 -5
- data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
- data/lib/wayfarer/routing/matchers/host.rb +15 -9
- data/lib/wayfarer/routing/matchers/path.rb +11 -33
- data/lib/wayfarer/routing/matchers/query.rb +41 -17
- data/lib/wayfarer/routing/matchers/result.rb +12 -0
- data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
- data/lib/wayfarer/routing/matchers/url.rb +13 -5
- data/lib/wayfarer/routing/path_consumer.rb +130 -0
- data/lib/wayfarer/routing/path_finder.rb +151 -23
- data/lib/wayfarer/routing/result.rb +1 -1
- data/lib/wayfarer/routing/root_route.rb +14 -2
- data/lib/wayfarer/routing/route.rb +71 -14
- data/lib/wayfarer/routing/serializable.rb +28 -0
- data/lib/wayfarer/routing/sub_route.rb +53 -0
- data/lib/wayfarer/routing/target_route.rb +17 -1
- data/lib/wayfarer/stringify.rb +1 -2
- data/lib/wayfarer/task.rb +3 -5
- data/lib/wayfarer/uri/normalization.rb +120 -0
- data/lib/wayfarer.rb +50 -10
- data/mise.toml +2 -0
- data/mkdocs.yml +8 -17
- data/rake/lint.rake +0 -96
- data/rake/release.rake +5 -11
- data/rake/tests.rake +8 -4
- data/requirements.txt +1 -1
- data/spec/factories/job.rb +8 -0
- data/spec/factories/middleware.rb +2 -2
- data/spec/factories/path_finder.rb +11 -0
- data/spec/factories/redis.rb +19 -0
- data/spec/factories/task.rb +39 -1
- data/spec/spec_helpers.rb +50 -57
- data/spec/support/active_job_helpers.rb +8 -0
- data/spec/support/integration_helpers.rb +21 -0
- data/spec/support/redis_helpers.rb +9 -0
- data/spec/support/test_app.rb +64 -43
- data/spec/{base_spec.rb → wayfarer/base_spec.rb} +32 -36
- data/spec/wayfarer/batch_completion_spec.rb +142 -0
- data/spec/wayfarer/cli/job_spec.rb +88 -0
- data/spec/wayfarer/cli/routing_spec.rb +322 -0
- data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
- data/spec/wayfarer/gc_spec.rb +29 -0
- data/spec/{handler_spec.rb → wayfarer/handler_spec.rb} +1 -3
- data/spec/{integration → wayfarer/integration}/callbacks_spec.rb +9 -6
- data/spec/wayfarer/integration/content_type_spec.rb +37 -0
- data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
- data/spec/{integration → wayfarer/integration}/gc_spec.rb +9 -13
- data/spec/{integration → wayfarer/integration}/handler_spec.rb +9 -10
- data/spec/{integration → wayfarer/integration}/page_spec.rb +8 -6
- data/spec/{integration → wayfarer/integration}/params_spec.rb +4 -4
- data/spec/{integration → wayfarer/integration}/parsing_spec.rb +7 -33
- data/spec/wayfarer/integration/retry_spec.rb +112 -0
- data/spec/{integration → wayfarer/integration}/stage_spec.rb +5 -5
- data/spec/{middleware → wayfarer/middleware}/batch_completion_spec.rb +4 -5
- data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +20 -15
- data/spec/{middleware → wayfarer/middleware}/content_type_spec.rb +18 -21
- data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +22 -20
- data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
- data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
- data/spec/{middleware → wayfarer/middleware}/router_spec.rb +18 -20
- data/spec/{middleware → wayfarer/middleware}/stage_spec.rb +11 -10
- data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
- data/spec/{middleware → wayfarer/middleware}/user_agent_spec.rb +34 -32
- data/spec/wayfarer/networking/capybara_spec.rb +13 -0
- data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
- data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
- data/spec/{networking → wayfarer/networking}/follow_spec.rb +9 -4
- data/spec/wayfarer/networking/http_spec.rb +12 -0
- data/spec/{networking → wayfarer/networking}/pool_spec.rb +11 -9
- data/spec/wayfarer/networking/selenium_spec.rb +12 -0
- data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
- data/spec/{page_spec.rb → wayfarer/page_spec.rb} +3 -3
- data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
- data/spec/{parsing/xml_spec.rb → wayfarer/parsing/xml_parse_spec.rb} +4 -3
- data/spec/{redis → wayfarer/redis}/barrier_spec.rb +5 -4
- data/spec/wayfarer/redis/counter_spec.rb +34 -0
- data/spec/{redis → wayfarer/redis}/pool_spec.rb +3 -2
- data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
- data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
- data/spec/wayfarer/routing/integration_spec.rb +101 -0
- data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
- data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
- data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
- data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
- data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
- data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
- data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
- data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
- data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
- data/spec/wayfarer/routing/root_route_spec.rb +51 -0
- data/spec/wayfarer/routing/route_spec.rb +74 -0
- data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
- data/spec/wayfarer/uri/normalization_spec.rb +98 -0
- data/spec/wayfarer_spec.rb +2 -2
- data/wayfarer.gemspec +17 -28
- metadata +768 -246
- data/.rbenv-gemsets +0 -1
- data/.ruby-version +0 -1
- data/RELEASING.md +0 -17
- data/docs/cookbook/user_agent.md +0 -7
- data/docs/design.md +0 -36
- data/docs/guides/jobs/error_handling.md +0 -40
- data/docs/reference/configuration.md +0 -36
- data/spec/batch_completion_spec.rb +0 -104
- data/spec/cli/job_spec.rb +0 -74
- data/spec/cli/routing_spec.rb +0 -101
- data/spec/fixtures/dummy_job.rb +0 -9
- data/spec/gc_spec.rb +0 -17
- data/spec/integration/content_type_spec.rb +0 -145
- data/spec/integration/routing_spec.rb +0 -18
- data/spec/middleware/dedup_spec.rb +0 -71
- data/spec/middleware/dispatch_spec.rb +0 -59
- data/spec/middleware/normalize_spec.rb +0 -60
- data/spec/middleware/uri_parser_spec.rb +0 -53
- data/spec/networking/capybara_spec.rb +0 -12
- data/spec/networking/ferrum_spec.rb +0 -12
- data/spec/networking/http_spec.rb +0 -12
- data/spec/networking/selenium_spec.rb +0 -12
- data/spec/redis/counter_spec.rb +0 -44
- data/spec/routing/integration_spec.rb +0 -110
- data/spec/routing/matchers/custom_spec.rb +0 -31
- data/spec/routing/matchers/host_spec.rb +0 -49
- data/spec/routing/matchers/path_spec.rb +0 -43
- data/spec/routing/matchers/query_spec.rb +0 -137
- data/spec/routing/matchers/scheme_spec.rb +0 -25
- data/spec/routing/matchers/suffix_spec.rb +0 -41
- data/spec/routing/matchers/uri_spec.rb +0 -27
- data/spec/routing/path_finder_spec.rb +0 -33
- data/spec/routing/root_route_spec.rb +0 -29
- data/spec/routing/route_spec.rb +0 -43
- data/docs/{reference → guides}/cli.md +0 -0
- data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
- /data/spec/{task_spec.rb → wayfarer/task_spec.rb} +0 -0
data/.rbenv-gemsets
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
wayfarer
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.7.4
|
data/RELEASING.md
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
# Release Procedure
|
2
|
-
|
3
|
-
1. Ensure `Wayfarer::VERSION` was bumped appropriately.
|
4
|
-
2. Ensure the version in wayfarer.gemspec matches.
|
5
|
-
3. Open a release Pull Request develop -> master branch
|
6
|
-
4. Merge the Pull Request
|
7
|
-
5. Publish RubyGem and git tag as follows:
|
8
|
-
|
9
|
-
```
|
10
|
-
git checkout master
|
11
|
-
git pull origin master --rebase
|
12
|
-
bundle exec rake build
|
13
|
-
gem push build/wayfarer-*.gem
|
14
|
-
bundle exec rake clean
|
15
|
-
git tag <VERSION>
|
16
|
-
git push origin <VERSION>
|
17
|
-
```
|
data/docs/cookbook/user_agent.md
DELETED
data/docs/design.md
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
# Design decisions
|
2
|
-
|
3
|
-
## Navigate the web along URL patterns
|
4
|
-
|
5
|
-
URLs are less prone to change than served markup.
|
6
|
-
One reason for this is that changes to a URL's path can have a negative effect
|
7
|
-
on its page ranking in search engines. Many websites also implement common
|
8
|
-
architectural URL patterns, for example REST and its variations, that
|
9
|
-
lend themselves to pattern matching.
|
10
|
-
|
11
|
-
## Follow URLs verbatim
|
12
|
-
|
13
|
-
Normalized URLs are useful for deduplication, but URLs should be followed
|
14
|
-
as they appear in responses. Navigating to normalized versions of URLs makes
|
15
|
-
crawlers stick out from other user agents, for example.
|
16
|
-
|
17
|
-
## Tasks are version-less and don't persist metadata
|
18
|
-
|
19
|
-
Tasks serialize to their URL and batch. No other data gets written to
|
20
|
-
the message queue. Wayfarer aims to minimise job payloads.
|
21
|
-
There is also no need for versioning persisted tasks, since there is only one
|
22
|
-
version of a task: URL and batch.
|
23
|
-
|
24
|
-
## Why depend on Redis
|
25
|
-
|
26
|
-
There are two core features that depend on Redis. First, per-batch acylicity is
|
27
|
-
achieved by maintaining the set of processed URLs per batch in Redis.
|
28
|
-
There's no option to follow links in a cyclic manner. Second, batch completion
|
29
|
-
requires updating an integer value in Redis, and batch completion is a very
|
30
|
-
useful feature, since most crawls should end eventually, and often you want to
|
31
|
-
know when.
|
32
|
-
|
33
|
-
## Persistence and document mapping not included
|
34
|
-
|
35
|
-
Like Active Job, Wayfarer is not concerned with persistence.
|
36
|
-
Model <-> DOM mapping abstractions are also out of scope.
|
@@ -1,40 +0,0 @@
|
|
1
|
-
# Error handling
|
2
|
-
|
3
|
-
!!! danger "Only ActiveJob error handling is supported"
|
4
|
-
|
5
|
-
Wayfarer exclusively supports ActiveJob's error handling. You cannot use
|
6
|
-
message queue-specific error handling, for example error handling with
|
7
|
-
`sidekiq_options` is unsupported. Otherwise batches get garbage-collected
|
8
|
-
too early as Wayfarer instruments ActiveJob.
|
9
|
-
|
10
|
-
Wayfarer relies on ActiveJob's [error handling methods](https://guides.rubyonrails.org/active_job_basics.html#exceptions):
|
11
|
-
|
12
|
-
* `retry_on` to retry jobs a number of times on certain errors:
|
13
|
-
|
14
|
-
```ruby
|
15
|
-
class DummyJob < Wayfarer::Base
|
16
|
-
retry_on MyError, attempts: 3 do |job, error|
|
17
|
-
# This block runs once all 3 attempts have failed
|
18
|
-
# (1 initial attempt + 2 retries)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
```
|
22
|
-
|
23
|
-
* `discard_on` to throw away jobs on certain errors:
|
24
|
-
|
25
|
-
```ruby
|
26
|
-
class DummyJob < Wayfarer::Base
|
27
|
-
discard_on MyError do |job, error|
|
28
|
-
# This block runs once and buries the job
|
29
|
-
end
|
30
|
-
end
|
31
|
-
```
|
32
|
-
|
33
|
-
## Recreating user agents on certain errors
|
34
|
-
|
35
|
-
You can configure a list of exception classes upon which user agents
|
36
|
-
get recreated (see [User agent API]()):
|
37
|
-
|
38
|
-
```ruby
|
39
|
-
Wayfarer.config[:network][:renew_on] = [MyIrrecoverableError]
|
40
|
-
```
|
@@ -1,36 +0,0 @@
|
|
1
|
-
---
|
2
|
-
hide:
|
3
|
-
- toc
|
4
|
-
---
|
5
|
-
|
6
|
-
# Configuration
|
7
|
-
|
8
|
-
You can configure Wayfarer by assigning to the `Wayfarer.config` Hash
|
9
|
-
which has the following defaults:
|
10
|
-
|
11
|
-
```ruby
|
12
|
-
{
|
13
|
-
redis: {
|
14
|
-
url: "redis://localhost:6379/0",
|
15
|
-
factory: ->(redis) { ::Redis.new(url: redis[:url]) }
|
16
|
-
},
|
17
|
-
network: {
|
18
|
-
agent: :http,
|
19
|
-
pool_size: 1,
|
20
|
-
pool_timeout: 10,
|
21
|
-
http_headers: {},
|
22
|
-
renew_on: []
|
23
|
-
},
|
24
|
-
capybara: {
|
25
|
-
driver: nil
|
26
|
-
},
|
27
|
-
ferrum: {
|
28
|
-
options: {}
|
29
|
-
},
|
30
|
-
selenium: {
|
31
|
-
driver: :chrome,
|
32
|
-
options: {},
|
33
|
-
client_timeout: 60
|
34
|
-
}
|
35
|
-
}
|
36
|
-
```
|
@@ -1,104 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::BatchCompletion, redis: true do
|
6
|
-
let(:task) { build(:task, :redis_pool) }
|
7
|
-
let(:job) { double(arguments: [task]) }
|
8
|
-
|
9
|
-
describe "::call" do
|
10
|
-
let(:name) { "foo" }
|
11
|
-
|
12
|
-
subject { described_class.call(name, nil, nil, nil, { job: job }) }
|
13
|
-
|
14
|
-
context "with Wayfarer job" do
|
15
|
-
before { job.extend(Wayfarer::Base) }
|
16
|
-
|
17
|
-
specify do
|
18
|
-
expect(Wayfarer::BatchCompletion)
|
19
|
-
.to receive(:handle).with(name, job, task, instance_of(Wayfarer::Redis::Counter))
|
20
|
-
|
21
|
-
subject
|
22
|
-
end
|
23
|
-
|
24
|
-
it "does not reassign Redis pool" do
|
25
|
-
expect { subject }.not_to change { task[:redis_pool] }.from(Wayfarer::Redis::Pool.instance)
|
26
|
-
end
|
27
|
-
|
28
|
-
context "without Redis pool" do
|
29
|
-
before do
|
30
|
-
task[:redis_pool] = nil
|
31
|
-
end
|
32
|
-
|
33
|
-
it "assigns Redis pool" do
|
34
|
-
expect { subject }.to change { task[:redis_pool] }.from(nil).to(Wayfarer::Redis::Pool.instance)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
context "with other job" do
|
40
|
-
specify do
|
41
|
-
expect(Wayfarer::BatchCompletion).not_to receive(:handle)
|
42
|
-
|
43
|
-
subject
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
describe "::handle" do
|
49
|
-
let(:job) do
|
50
|
-
double(arguments: [task],
|
51
|
-
executions: executions,
|
52
|
-
exception_executions: exception_executions)
|
53
|
-
end
|
54
|
-
|
55
|
-
let(:initial_exception_executions) { {} }
|
56
|
-
let(:exception_executions) { initial_exception_executions }
|
57
|
-
let(:executions) { 0 }
|
58
|
-
let(:counter) { Wayfarer::Redis::Counter.new(task) }
|
59
|
-
|
60
|
-
subject(:handle) { described_class.handle(event, job, task, counter) }
|
61
|
-
|
62
|
-
before { task[:initial_exception_executions] = initial_exception_executions }
|
63
|
-
|
64
|
-
context "enqueue.active_job" do
|
65
|
-
let(:event) { "enqueue.active_job" }
|
66
|
-
|
67
|
-
specify do
|
68
|
-
expect { handle }.to change { counter.value }.by(1)
|
69
|
-
end
|
70
|
-
|
71
|
-
context "with retry" do
|
72
|
-
let(:executions) { 1 }
|
73
|
-
|
74
|
-
specify do
|
75
|
-
expect { handle }.not_to(change { counter.value })
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
context "perform.active_job" do
|
81
|
-
let(:event) { "perform.active_job" }
|
82
|
-
|
83
|
-
specify do
|
84
|
-
expect { handle }.to change { counter.value }.by(-1)
|
85
|
-
end
|
86
|
-
|
87
|
-
context "with exception occurred" do
|
88
|
-
let(:exception_executions) { { "[RuntimeError]" => 1 } }
|
89
|
-
|
90
|
-
specify do
|
91
|
-
expect { handle }.not_to(change { counter.value })
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
context "retry_stopped.active_job" do
|
97
|
-
let(:event) { "retry_stopped.active_job" }
|
98
|
-
|
99
|
-
specify do
|
100
|
-
expect { handle }.to change { counter.value }.by(-1)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
data/spec/cli/job_spec.rb
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::CLI, cli: true, redis: true do
|
6
|
-
include Wayfarer::Redis
|
7
|
-
|
8
|
-
let(:url) { test_app_path("/hello_world") }
|
9
|
-
let(:batch) { "my-batch" }
|
10
|
-
subject(:cli) { Wayfarer::CLI }
|
11
|
-
|
12
|
-
before do
|
13
|
-
write_file "dummy_job.rb", <<~FILE
|
14
|
-
class DummyJob < ActiveJob::Base
|
15
|
-
include Wayfarer::Base
|
16
|
-
end
|
17
|
-
FILE
|
18
|
-
end
|
19
|
-
|
20
|
-
before { Wayfarer::CLI.new.send(:load_environment, "dummy_job.rb") }
|
21
|
-
|
22
|
-
describe "perform" do
|
23
|
-
it "performs the worker" do
|
24
|
-
expect_any_instance_of(DummyJob).to receive(:perform).with(kind_of(Wayfarer::Task)) do |job|
|
25
|
-
task = job.arguments.first
|
26
|
-
task[:job] = job
|
27
|
-
end
|
28
|
-
|
29
|
-
cli.start(["perform", "-r", "dummy_job.rb", "DummyJob", url])
|
30
|
-
end
|
31
|
-
|
32
|
-
context "using MockRedis" do
|
33
|
-
it "performs the worker using MockRedis" do
|
34
|
-
cli.start(["perform", "--mock-redis", "DummyJob", url])
|
35
|
-
expect(Wayfarer.config[:redis][:factory].call(nil)).to be_a(MockRedis)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
describe "enqueue" do
|
41
|
-
it "enqueues the job" do
|
42
|
-
expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: kind_of(String))
|
43
|
-
cli.start(["enqueue", "DummyJob", url])
|
44
|
-
end
|
45
|
-
|
46
|
-
context "with batch provided" do
|
47
|
-
it "enqueues the job" do
|
48
|
-
expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: batch)
|
49
|
-
cli.start(["enqueue", "--batch", batch, "DummyJob", url])
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
describe "execute" do
|
55
|
-
it "executes the job" do
|
56
|
-
expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: kind_of(String))
|
57
|
-
cli.start(["execute", "DummyJob", url])
|
58
|
-
end
|
59
|
-
|
60
|
-
context "with batch provided" do
|
61
|
-
it "enqueues the job" do
|
62
|
-
expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: batch)
|
63
|
-
cli.start(["execute", "--batch", batch, "DummyJob", url])
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
context "using MockRedis" do
|
68
|
-
it "performs the worker using MockRedis" do
|
69
|
-
cli.start(["execute", "--mock-redis", "DummyJob", url])
|
70
|
-
expect(Wayfarer.config[:redis][:factory].call(nil)).to be_a(MockRedis)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
data/spec/cli/routing_spec.rb
DELETED
@@ -1,101 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::CLI, cli: true do
|
6
|
-
before { write_file("dummy_job.rb", contents) }
|
7
|
-
|
8
|
-
let(:contents) do
|
9
|
-
<<~RUBY
|
10
|
-
class DummyJob < ActiveJob::Base
|
11
|
-
include Wayfarer::Base
|
12
|
-
|
13
|
-
#{routes}
|
14
|
-
end
|
15
|
-
RUBY
|
16
|
-
end
|
17
|
-
|
18
|
-
describe "route" do
|
19
|
-
let(:routes) do
|
20
|
-
<<~RUBY
|
21
|
-
route.host #{test_app_host.inspect}, to: :index
|
22
|
-
RUBY
|
23
|
-
end
|
24
|
-
|
25
|
-
subject(:route) { described_class.start(["route", "-r", "dummy_job.rb", "DummyJob", url]) }
|
26
|
-
|
27
|
-
context "with matching URL" do
|
28
|
-
let(:url) { "http://#{test_app_host}" }
|
29
|
-
|
30
|
-
specify do
|
31
|
-
expect { route }.to output("Match => :index\n").to_stdout
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
context "with mismatching URL" do
|
36
|
-
let(:url) { "http://example.com" }
|
37
|
-
|
38
|
-
specify do
|
39
|
-
expect { route }.to output("Mismatch\n").to_stdout
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
describe "tree" do
|
45
|
-
subject(:tree) { described_class.start(["tree", "-r", "dummy_job.rb", "DummyJob", url]) }
|
46
|
-
|
47
|
-
context "without child routes" do
|
48
|
-
let(:url) { "http://#{test_app_host}" }
|
49
|
-
let(:routes) { "" }
|
50
|
-
|
51
|
-
specify do
|
52
|
-
expect { tree }.to output("Mismatch\n").to_stdout
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
context "symbol target" do
|
57
|
-
let(:url) { "http://#{test_app_host}" }
|
58
|
-
let(:routes) do
|
59
|
-
<<~RUBY
|
60
|
-
route.host #{test_app_host.inspect}, to: :index
|
61
|
-
RUBY
|
62
|
-
end
|
63
|
-
|
64
|
-
let(:expected_output) do
|
65
|
-
<<~OUTPUT
|
66
|
-
Match(:index)
|
67
|
-
└──Host("#{test_app_host}", match: true)
|
68
|
-
└──Target(match: true)
|
69
|
-
OUTPUT
|
70
|
-
end
|
71
|
-
|
72
|
-
specify do
|
73
|
-
expect { tree }.to output(expected_output).to_stdout
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
context "symbol target with params" do
|
78
|
-
let(:url) { "http://#{test_app_host}/barqux" }
|
79
|
-
let(:routes) do
|
80
|
-
<<~RUBY
|
81
|
-
route.host #{test_app_host.inspect}, to: :index do
|
82
|
-
path ":foobar"
|
83
|
-
end
|
84
|
-
RUBY
|
85
|
-
end
|
86
|
-
|
87
|
-
let(:expected_output) do
|
88
|
-
<<~OUTPUT
|
89
|
-
Match(:index, params: {:foobar=>"barqux"})
|
90
|
-
└──Host("#{test_app_host}", match: true)
|
91
|
-
└──Target(match: true)
|
92
|
-
└──Path("/:foobar", match: true, params: {:foobar=>"barqux"})
|
93
|
-
OUTPUT
|
94
|
-
end
|
95
|
-
|
96
|
-
specify do
|
97
|
-
expect { tree }.to output(expected_output).to_stdout
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
data/spec/fixtures/dummy_job.rb
DELETED
data/spec/gc_spec.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::GC, "::run", redis: true do
|
6
|
-
let(:task) { build(:task, :redis_pool) }
|
7
|
-
let(:barrier) { instance_double(Wayfarer::Redis::Barrier) }
|
8
|
-
|
9
|
-
subject(:run) { described_class.run(task) }
|
10
|
-
|
11
|
-
it "resets barrier and counter" do
|
12
|
-
expect(barrier).to receive(:reset!)
|
13
|
-
expect(Wayfarer::Redis::Barrier).to receive(:new).with(task).and_return(barrier)
|
14
|
-
|
15
|
-
run
|
16
|
-
end
|
17
|
-
end
|
@@ -1,145 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe "Content-Type allow listing", redis: true do
|
6
|
-
def perform(content_type)
|
7
|
-
DummyJob.new.perform(
|
8
|
-
build(:task, url: test_app_path("response_header/Content-Type/#{content_type}"))
|
9
|
-
)
|
10
|
-
end
|
11
|
-
|
12
|
-
before do
|
13
|
-
stub_const("DummyJob", Class.new(ActiveJob::Base).include(Wayfarer::Base))
|
14
|
-
stub_const("DummyHandler", Class.new.include(Wayfarer::Handler))
|
15
|
-
end
|
16
|
-
|
17
|
-
context "with registered Content-Types" do
|
18
|
-
before do
|
19
|
-
DummyJob.class_eval do
|
20
|
-
route.to :index
|
21
|
-
|
22
|
-
content_type "foo", "bar"
|
23
|
-
|
24
|
-
def index
|
25
|
-
:ok
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
specify do
|
31
|
-
expect(perform("foo")).to be(:ok)
|
32
|
-
expect(perform("bar")).to be(:ok)
|
33
|
-
expect(perform("qux")).to be(nil)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
context "with registered Content-Type media type" do
|
38
|
-
before do
|
39
|
-
DummyJob.class_eval do
|
40
|
-
route.to :index
|
41
|
-
|
42
|
-
content_type(/text/)
|
43
|
-
content_type "application/vnd.scenario.custom+json"
|
44
|
-
|
45
|
-
def index
|
46
|
-
:ok
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
specify do
|
52
|
-
expect(perform("text/xml")).to be(:ok)
|
53
|
-
expect(perform("application/xml")).to be(nil)
|
54
|
-
expect(perform("application/rss+xml")).to be(nil)
|
55
|
-
expect(perform("image/svg+xml")).to be(nil)
|
56
|
-
expect(perform("application/vnd.scenario.custom+json")).to be(:ok)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
context "without registered Content-Types" do
|
61
|
-
before do
|
62
|
-
DummyJob.class_eval do
|
63
|
-
route.to :index
|
64
|
-
|
65
|
-
def index
|
66
|
-
:ok
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
specify do
|
72
|
-
expect(perform("foo")).to be(:ok)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
context "with regular expression Content-Type" do
|
77
|
-
before do
|
78
|
-
DummyJob.class_eval do
|
79
|
-
route.to :index
|
80
|
-
|
81
|
-
content_type(/xml/)
|
82
|
-
|
83
|
-
def index
|
84
|
-
:ok
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
specify do
|
90
|
-
expect(perform("text/xml")).to be(:ok)
|
91
|
-
expect(perform("application/xml")).to be(:ok)
|
92
|
-
expect(perform("application/rss+xml")).to be(:ok)
|
93
|
-
expect(perform("image/svg+xml")).to be(:ok)
|
94
|
-
expect(perform("application/vnd.scenario.custom+json")).to be(nil)
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
context "with registered Content-Types" do
|
99
|
-
before do
|
100
|
-
DummyJob.class_eval do
|
101
|
-
route.to :index
|
102
|
-
|
103
|
-
content_type(/text/, "foo")
|
104
|
-
|
105
|
-
def index
|
106
|
-
:ok
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
specify do
|
112
|
-
expect(perform("text/application")).to be(:ok)
|
113
|
-
expect(perform("foo")).to be(:ok)
|
114
|
-
expect(perform("bar")).to be(nil)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
context "with handler" do
|
119
|
-
before do
|
120
|
-
DummyJob.class_eval do
|
121
|
-
route.to DummyHandler
|
122
|
-
|
123
|
-
content_type(/text/, "foo")
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
before do
|
128
|
-
DummyHandler.class_eval do
|
129
|
-
route.to :index
|
130
|
-
|
131
|
-
content_type "foo"
|
132
|
-
|
133
|
-
def index
|
134
|
-
:handler
|
135
|
-
end
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
specify do
|
140
|
-
expect(perform("text/application")).to be(nil)
|
141
|
-
expect(perform("foo")).to be(:handler)
|
142
|
-
expect(perform("bar")).to be(nil)
|
143
|
-
end
|
144
|
-
end
|
145
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe "Routing" do
|
6
|
-
let(:task) { build(:task) }
|
7
|
-
|
8
|
-
before do
|
9
|
-
stub_const("DummyJob", Class.new(ActiveJob::Base).include(Wayfarer::Base))
|
10
|
-
end
|
11
|
-
|
12
|
-
describe "custom routing" do
|
13
|
-
before do
|
14
|
-
DummyJob.class_eval do
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,71 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Middleware::Dedup, redis: true do
|
6
|
-
let(:task) { build(:task, :redis_pool) }
|
7
|
-
let(:uri) { Addressable::URI.parse(task.url) }
|
8
|
-
let(:executions) { 1 }
|
9
|
-
let(:job) { double(executions: executions) }
|
10
|
-
subject { described_class.new }
|
11
|
-
|
12
|
-
before do
|
13
|
-
task[:job] = job
|
14
|
-
task[:controller] = job
|
15
|
-
task[:normalized_url] = task.url
|
16
|
-
end
|
17
|
-
|
18
|
-
describe "#call" do
|
19
|
-
it "assigns barrier" do
|
20
|
-
expect { subject.call(task) }.to change { task[:barrier] }.from(nil).to(instance_of(Wayfarer::Redis::Barrier))
|
21
|
-
end
|
22
|
-
|
23
|
-
it "assigns barrier for batch" do
|
24
|
-
subject.call(task)
|
25
|
-
|
26
|
-
expect(task[:barrier].task).to be(task)
|
27
|
-
end
|
28
|
-
|
29
|
-
context "with retry" do
|
30
|
-
let(:executions) { 2 }
|
31
|
-
|
32
|
-
specify do
|
33
|
-
expect { |spy| subject.call(task, &spy) }.to yield_control
|
34
|
-
end
|
35
|
-
|
36
|
-
specify do
|
37
|
-
expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::DEBUG, "Not deduplicating retry")
|
38
|
-
|
39
|
-
subject.call(task)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
context "when rerouted" do
|
44
|
-
before { task[:controller] = Object.new }
|
45
|
-
|
46
|
-
specify do
|
47
|
-
expect { |spy| subject.call(task, &spy) }.to yield_control
|
48
|
-
end
|
49
|
-
|
50
|
-
specify do
|
51
|
-
expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::DEBUG, "Not deduplicating rerouted task")
|
52
|
-
|
53
|
-
subject.call(task)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
context "with unchecked URL" do
|
58
|
-
specify do
|
59
|
-
expect { |spy| subject.call(task, &spy) }.to yield_control
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
context "with checked URL" do
|
64
|
-
before { Wayfarer::Redis::Barrier.new(task).check!(task[:normalized_url]) }
|
65
|
-
|
66
|
-
specify do
|
67
|
-
expect { |spy| subject.call(task, &spy) }.not_to yield_control
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|