wayfarer 0.4.6 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env +17 -0
- data/.github/workflows/lint.yaml +27 -0
- data/.github/workflows/release.yaml +30 -0
- data/.github/workflows/tests.yaml +21 -0
- data/.gitignore +5 -1
- data/.rubocop.yml +36 -0
- data/.vale.ini +8 -0
- data/.yardopts +1 -3
- data/Dockerfile +6 -4
- data/Gemfile +24 -0
- data/Gemfile.lock +274 -164
- data/Rakefile +7 -51
- data/bin/wayfarer +1 -1
- data/docker-compose.yml +23 -13
- data/docs/cookbook/consent_screen.md +2 -2
- data/docs/cookbook/executing_javascript.md +3 -3
- data/docs/cookbook/navigation.md +12 -12
- data/docs/cookbook/querying_html.md +3 -3
- data/docs/cookbook/screenshots.md +2 -2
- data/docs/guides/callbacks.md +25 -125
- data/docs/guides/cli.md +71 -0
- data/docs/guides/configuration.md +10 -35
- data/docs/guides/development.md +67 -0
- data/docs/guides/handlers.md +60 -0
- data/docs/guides/index.md +1 -0
- data/docs/guides/jobs.md +142 -31
- data/docs/guides/navigation.md +1 -1
- data/docs/guides/networking/capybara.md +13 -22
- data/docs/guides/networking/custom_adapters.md +103 -41
- data/docs/guides/networking/ferrum.md +4 -4
- data/docs/guides/networking/http.md +9 -13
- data/docs/guides/networking/selenium.md +10 -11
- data/docs/guides/pages.md +78 -10
- data/docs/guides/redis.md +10 -0
- data/docs/guides/routing.md +156 -0
- data/docs/guides/tasks.md +53 -9
- data/docs/guides/tutorial.md +66 -0
- data/docs/guides/user_agents.md +115 -0
- data/docs/index.md +17 -40
- data/lib/wayfarer/base.rb +125 -46
- data/lib/wayfarer/batch_completion.rb +60 -0
- data/lib/wayfarer/callbacks.rb +22 -48
- data/lib/wayfarer/cli/route_printer.rb +85 -89
- data/lib/wayfarer/cli.rb +103 -0
- data/lib/wayfarer/gc.rb +18 -6
- data/lib/wayfarer/handler.rb +15 -7
- data/lib/wayfarer/kv.rb +28 -0
- data/lib/wayfarer/logging.rb +38 -0
- data/lib/wayfarer/middleware/base.rb +2 -0
- data/lib/wayfarer/middleware/batch_completion.rb +19 -0
- data/lib/wayfarer/middleware/chain.rb +7 -1
- data/lib/wayfarer/middleware/content_type.rb +59 -0
- data/lib/wayfarer/middleware/controller.rb +19 -15
- data/lib/wayfarer/middleware/dedup.rb +22 -13
- data/lib/wayfarer/middleware/dispatch.rb +17 -4
- data/lib/wayfarer/middleware/normalize.rb +7 -14
- data/lib/wayfarer/middleware/redis.rb +15 -0
- data/lib/wayfarer/middleware/router.rb +33 -35
- data/lib/wayfarer/middleware/stage.rb +5 -5
- data/lib/wayfarer/middleware/uri_parser.rb +31 -0
- data/lib/wayfarer/middleware/user_agent.rb +49 -0
- data/lib/wayfarer/networking/capybara.rb +1 -1
- data/lib/wayfarer/networking/context.rb +14 -3
- data/lib/wayfarer/networking/ferrum.rb +1 -4
- data/lib/wayfarer/networking/follow.rb +14 -7
- data/lib/wayfarer/networking/http.rb +1 -1
- data/lib/wayfarer/networking/pool.rb +23 -13
- data/lib/wayfarer/networking/selenium.rb +15 -7
- data/lib/wayfarer/networking/strategy.rb +2 -2
- data/lib/wayfarer/page.rb +34 -14
- data/lib/wayfarer/parsing/xml.rb +6 -6
- data/lib/wayfarer/parsing.rb +21 -0
- data/lib/wayfarer/redis/barrier.rb +26 -21
- data/lib/wayfarer/redis/counter.rb +18 -9
- data/lib/wayfarer/redis/pool.rb +1 -1
- data/lib/wayfarer/redis/resettable.rb +19 -0
- data/lib/wayfarer/routing/dsl.rb +166 -30
- data/lib/wayfarer/routing/hash_stack.rb +33 -0
- data/lib/wayfarer/routing/matchers/custom.rb +8 -5
- data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
- data/lib/wayfarer/routing/matchers/host.rb +15 -9
- data/lib/wayfarer/routing/matchers/path.rb +11 -31
- data/lib/wayfarer/routing/matchers/query.rb +41 -17
- data/lib/wayfarer/routing/matchers/result.rb +12 -0
- data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
- data/lib/wayfarer/routing/matchers/url.rb +13 -5
- data/lib/wayfarer/routing/path_consumer.rb +130 -0
- data/lib/wayfarer/routing/path_finder.rb +151 -23
- data/lib/wayfarer/routing/result.rb +1 -1
- data/lib/wayfarer/routing/root_route.rb +17 -1
- data/lib/wayfarer/routing/route.rb +66 -19
- data/lib/wayfarer/routing/serializable.rb +28 -0
- data/lib/wayfarer/routing/sub_route.rb +53 -0
- data/lib/wayfarer/routing/target_route.rb +17 -1
- data/lib/wayfarer/stringify.rb +21 -30
- data/lib/wayfarer/task.rb +9 -17
- data/lib/wayfarer/uri/normalization.rb +120 -0
- data/lib/wayfarer.rb +72 -5
- data/mise.toml +2 -0
- data/mkdocs.yml +44 -8
- data/rake/docs.rake +26 -0
- data/rake/lint.rake +9 -0
- data/rake/release.rake +23 -0
- data/rake/tests.rake +32 -0
- data/requirements.txt +1 -1
- data/spec/factories/job.rb +8 -0
- data/spec/factories/middleware.rb +2 -2
- data/spec/factories/path_finder.rb +11 -0
- data/spec/factories/redis.rb +19 -0
- data/spec/factories/task.rb +46 -2
- data/spec/spec_helpers.rb +55 -51
- data/spec/support/active_job_helpers.rb +8 -0
- data/spec/support/integration_helpers.rb +21 -0
- data/spec/support/redis_helpers.rb +9 -0
- data/spec/support/test_app.rb +66 -37
- data/spec/wayfarer/base_spec.rb +200 -0
- data/spec/wayfarer/batch_completion_spec.rb +142 -0
- data/spec/wayfarer/cli/job_spec.rb +88 -0
- data/spec/wayfarer/cli/routing_spec.rb +322 -0
- data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
- data/spec/wayfarer/gc_spec.rb +29 -0
- data/spec/wayfarer/handler_spec.rb +9 -0
- data/spec/wayfarer/integration/callbacks_spec.rb +200 -0
- data/spec/wayfarer/integration/content_type_spec.rb +37 -0
- data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
- data/spec/wayfarer/integration/gc_spec.rb +40 -0
- data/spec/wayfarer/integration/handler_spec.rb +65 -0
- data/spec/wayfarer/integration/page_spec.rb +79 -0
- data/spec/wayfarer/integration/params_spec.rb +64 -0
- data/spec/wayfarer/integration/parsing_spec.rb +99 -0
- data/spec/wayfarer/integration/retry_spec.rb +112 -0
- data/spec/wayfarer/integration/stage_spec.rb +58 -0
- data/spec/wayfarer/middleware/batch_completion_spec.rb +33 -0
- data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +24 -19
- data/spec/wayfarer/middleware/content_type_spec.rb +83 -0
- data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +24 -22
- data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
- data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
- data/spec/wayfarer/middleware/router_spec.rb +102 -0
- data/spec/wayfarer/middleware/stage_spec.rb +63 -0
- data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
- data/spec/wayfarer/middleware/user_agent_spec.rb +158 -0
- data/spec/wayfarer/networking/capybara_spec.rb +13 -0
- data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
- data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
- data/spec/{networking → wayfarer/networking}/follow_spec.rb +11 -6
- data/spec/wayfarer/networking/http_spec.rb +12 -0
- data/spec/{networking → wayfarer/networking}/pool_spec.rb +16 -14
- data/spec/wayfarer/networking/selenium_spec.rb +12 -0
- data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
- data/spec/wayfarer/page_spec.rb +69 -0
- data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
- data/spec/wayfarer/parsing/xml_parse_spec.rb +25 -0
- data/spec/wayfarer/redis/barrier_spec.rb +39 -0
- data/spec/wayfarer/redis/counter_spec.rb +34 -0
- data/spec/{redis → wayfarer/redis}/pool_spec.rb +4 -3
- data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
- data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
- data/spec/wayfarer/routing/integration_spec.rb +101 -0
- data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
- data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
- data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
- data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
- data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
- data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
- data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
- data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
- data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
- data/spec/wayfarer/routing/root_route_spec.rb +51 -0
- data/spec/wayfarer/routing/route_spec.rb +74 -0
- data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
- data/spec/wayfarer/task_spec.rb +13 -0
- data/spec/wayfarer/uri/normalization_spec.rb +98 -0
- data/spec/wayfarer_spec.rb +2 -2
- data/wayfarer.gemspec +18 -28
- metadata +797 -265
- data/.github/workflows/ci.yaml +0 -32
- data/.rbenv-gemsets +0 -1
- data/.ruby-version +0 -1
- data/RELEASING.md +0 -17
- data/docs/cookbook/user_agent.md +0 -7
- data/docs/guides/error_handling.md +0 -53
- data/docs/guides/networking.md +0 -94
- data/docs/guides/performance.md +0 -130
- data/docs/guides/reliability.md +0 -41
- data/docs/guides/routing/steering.md +0 -30
- data/docs/reference/api/base.md +0 -48
- data/docs/reference/cli.md +0 -61
- data/docs/reference/configuration_keys.md +0 -43
- data/docs/reference/environment_variables.md +0 -83
- data/lib/wayfarer/cli/base.rb +0 -45
- data/lib/wayfarer/cli/generate.rb +0 -17
- data/lib/wayfarer/cli/job.rb +0 -56
- data/lib/wayfarer/cli/route.rb +0 -29
- data/lib/wayfarer/cli/runner.rb +0 -34
- data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
- data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
- data/lib/wayfarer/config/capybara.rb +0 -10
- data/lib/wayfarer/config/ferrum.rb +0 -11
- data/lib/wayfarer/config/networking.rb +0 -29
- data/lib/wayfarer/config/redis.rb +0 -14
- data/lib/wayfarer/config/root.rb +0 -11
- data/lib/wayfarer/config/selenium.rb +0 -21
- data/lib/wayfarer/config/strconv.rb +0 -45
- data/lib/wayfarer/config/struct.rb +0 -72
- data/lib/wayfarer/middleware/fetch.rb +0 -56
- data/lib/wayfarer/redis/connection.rb +0 -13
- data/lib/wayfarer/redis/version.rb +0 -19
- data/lib/wayfarer/routing/router.rb +0 -28
- data/spec/base_spec.rb +0 -224
- data/spec/callbacks_spec.rb +0 -102
- data/spec/cli/generate_spec.rb +0 -39
- data/spec/cli/job_spec.rb +0 -78
- data/spec/config/capybara_spec.rb +0 -18
- data/spec/config/ferrum_spec.rb +0 -24
- data/spec/config/networking_spec.rb +0 -73
- data/spec/config/redis_spec.rb +0 -32
- data/spec/config/root_spec.rb +0 -31
- data/spec/config/selenium_spec.rb +0 -56
- data/spec/config/strconv_spec.rb +0 -58
- data/spec/config/struct_spec.rb +0 -66
- data/spec/fixtures/dummy_job.rb +0 -7
- data/spec/gc_spec.rb +0 -59
- data/spec/handler_spec.rb +0 -11
- data/spec/integration/callbacks_spec.rb +0 -85
- data/spec/integration/page_spec.rb +0 -62
- data/spec/integration/params_spec.rb +0 -56
- data/spec/integration/stage_spec.rb +0 -51
- data/spec/integration/steering_spec.rb +0 -57
- data/spec/middleware/dedup_spec.rb +0 -88
- data/spec/middleware/dispatch_spec.rb +0 -43
- data/spec/middleware/fetch_spec.rb +0 -155
- data/spec/middleware/normalize_spec.rb +0 -29
- data/spec/middleware/router_spec.rb +0 -105
- data/spec/middleware/stage_spec.rb +0 -62
- data/spec/networking/capybara_spec.rb +0 -12
- data/spec/networking/ferrum_spec.rb +0 -12
- data/spec/networking/http_spec.rb +0 -12
- data/spec/networking/selenium_spec.rb +0 -12
- data/spec/page_spec.rb +0 -47
- data/spec/parsing/xml_spec.rb +0 -25
- data/spec/redis/barrier_spec.rb +0 -78
- data/spec/redis/counter_spec.rb +0 -32
- data/spec/redis/version_spec.rb +0 -13
- data/spec/routing/integration_spec.rb +0 -110
- data/spec/routing/matchers/custom_spec.rb +0 -31
- data/spec/routing/matchers/host_spec.rb +0 -49
- data/spec/routing/matchers/path_spec.rb +0 -43
- data/spec/routing/matchers/query_spec.rb +0 -137
- data/spec/routing/matchers/scheme_spec.rb +0 -25
- data/spec/routing/matchers/suffix_spec.rb +0 -41
- data/spec/routing/matchers/uri_spec.rb +0 -27
- data/spec/routing/path_finder_spec.rb +0 -33
- data/spec/routing/root_route_spec.rb +0 -29
- data/spec/routing/route_spec.rb +0 -43
- data/spec/routing/router_spec.rb +0 -24
- data/spec/task_spec.rb +0 -34
- data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
data/spec/base_spec.rb
DELETED
@@ -1,224 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Base, redis: true do
|
6
|
-
include Wayfarer::Redis::Connection
|
7
|
-
|
8
|
-
let(:url) { "https://example.com" }
|
9
|
-
let(:batch) { "batch" }
|
10
|
-
let(:task) { build(:task, batch: batch, url: url) }
|
11
|
-
let(:klass) { Class.new(Wayfarer::Base) }
|
12
|
-
|
13
|
-
before { stub_const("DummyJob", klass) }
|
14
|
-
|
15
|
-
describe "::crawl" do
|
16
|
-
it "enqueues a task" do
|
17
|
-
expect(DummyJob).to receive(:perform_later).with(task)
|
18
|
-
DummyJob.crawl(url, batch: batch)
|
19
|
-
end
|
20
|
-
|
21
|
-
it "returns a task" do
|
22
|
-
expect(DummyJob.crawl(url)).to be_a(Wayfarer::Task)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
describe "Callbacks" do
|
27
|
-
let(:counter) { task.counter }
|
28
|
-
|
29
|
-
describe "after enqueue" do
|
30
|
-
it "increments the counter" do
|
31
|
-
expect {
|
32
|
-
DummyJob.crawl(url, batch: batch)
|
33
|
-
}.to change { counter.value }.by(1)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
describe "after perform" do
|
38
|
-
it "decrements the counter" do
|
39
|
-
DummyJob.crawl(url, batch: batch)
|
40
|
-
task.counter.increment
|
41
|
-
expect { perform_enqueued_jobs }.to change { task.counter.value }.by(-1)
|
42
|
-
end
|
43
|
-
|
44
|
-
context "when counter reaches 0" do
|
45
|
-
it "resets the barrier" do
|
46
|
-
DummyJob.crawl(url, batch: batch)
|
47
|
-
perform_enqueued_jobs
|
48
|
-
redis do |conn|
|
49
|
-
expect(conn.exists?(task.barrier.redis_key)).to be(false)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
it "resets the counter" do
|
54
|
-
DummyJob.crawl(url, batch: batch)
|
55
|
-
perform_enqueued_jobs
|
56
|
-
redis do |conn|
|
57
|
-
expect(conn.exists?(task.counter.redis_key)).to be(false)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
it "runs after batch callbacks" do
|
62
|
-
expect { |spy|
|
63
|
-
klass.after_batch(&spy)
|
64
|
-
DummyJob.crawl(url, batch: batch)
|
65
|
-
perform_enqueued_jobs
|
66
|
-
}.to yield_control
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
describe "Unhandled exceptions" do
|
73
|
-
let(:klass) { Class.new(Wayfarer::Base) }
|
74
|
-
|
75
|
-
before do
|
76
|
-
allow_any_instance_of(DummyJob).to receive(:perform).and_raise(RuntimeError.new)
|
77
|
-
end
|
78
|
-
|
79
|
-
it "does not retry the job" do
|
80
|
-
DummyJob.crawl(url, batch: batch)
|
81
|
-
|
82
|
-
expect {
|
83
|
-
begin
|
84
|
-
perform_enqueued_jobs
|
85
|
-
rescue StandardError
|
86
|
-
nil
|
87
|
-
end
|
88
|
-
}.to change { enqueued_jobs.size }.by(-1)
|
89
|
-
end
|
90
|
-
|
91
|
-
it "decrements the counter" do
|
92
|
-
3.times { task.counter.increment }
|
93
|
-
|
94
|
-
DummyJob.crawl(url, batch: batch)
|
95
|
-
begin
|
96
|
-
perform_enqueued_jobs
|
97
|
-
rescue StandardError
|
98
|
-
nil
|
99
|
-
end
|
100
|
-
|
101
|
-
expect(task.counter.value).to be(3)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
describe "Retries" do
|
106
|
-
let(:klass) do
|
107
|
-
Class.new(Wayfarer::Base) do
|
108
|
-
retry_on RuntimeError, attempts: 3 do |job, error|
|
109
|
-
Spy.call(job, error)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
before do
|
115
|
-
allow_any_instance_of(DummyJob).to receive(:perform) do |job|
|
116
|
-
task = job.arguments.first
|
117
|
-
task.metadata.job = job
|
118
|
-
|
119
|
-
raise RuntimeError
|
120
|
-
end
|
121
|
-
|
122
|
-
stub_const("Spy", spy)
|
123
|
-
end
|
124
|
-
|
125
|
-
it "retries the job" do
|
126
|
-
expect(Spy).to receive(:call).exactly(:once)
|
127
|
-
.with(kind_of(DummyJob),
|
128
|
-
kind_of(RuntimeError))
|
129
|
-
|
130
|
-
DummyJob.crawl(url, batch: batch)
|
131
|
-
|
132
|
-
expect {
|
133
|
-
perform_enqueued_jobs
|
134
|
-
}.to change { enqueued_jobs.last["executions"] }.by(1)
|
135
|
-
|
136
|
-
expect {
|
137
|
-
perform_enqueued_jobs
|
138
|
-
}.to change { enqueued_jobs.last["executions"] }.by(1)
|
139
|
-
|
140
|
-
expect {
|
141
|
-
perform_enqueued_jobs
|
142
|
-
}.to change { enqueued_jobs.size }.by(-1)
|
143
|
-
end
|
144
|
-
|
145
|
-
it "marks the URL seen" do
|
146
|
-
task.counter.increment # otherwise barrier gets reset
|
147
|
-
DummyJob.crawl(url, batch: batch)
|
148
|
-
3.times { perform_enqueued_jobs }
|
149
|
-
expect(task.barrier.seen?(task.url)).to be(true)
|
150
|
-
end
|
151
|
-
|
152
|
-
it "decrements the counter" do
|
153
|
-
3.times { task.counter.increment }
|
154
|
-
|
155
|
-
DummyJob.crawl(url, batch: batch)
|
156
|
-
3.times { perform_enqueued_jobs }
|
157
|
-
|
158
|
-
expect(task.counter.value).to be(3)
|
159
|
-
end
|
160
|
-
|
161
|
-
it "runs after batch callbacks" do
|
162
|
-
expect { |spy|
|
163
|
-
klass.after_batch(&spy)
|
164
|
-
DummyJob.crawl(url, batch: batch)
|
165
|
-
3.times { perform_enqueued_jobs }
|
166
|
-
}.to yield_control
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
describe "Discarding" do
|
171
|
-
let(:klass) do
|
172
|
-
Class.new(Wayfarer::Base) do
|
173
|
-
discard_on RuntimeError do |job, error|
|
174
|
-
Spy.call(job, error)
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
178
|
-
|
179
|
-
before do
|
180
|
-
allow_any_instance_of(DummyJob).to receive(:perform) do |job|
|
181
|
-
task = job.arguments.first
|
182
|
-
task.metadata.job = job
|
183
|
-
|
184
|
-
raise RuntimeError
|
185
|
-
end
|
186
|
-
|
187
|
-
stub_const("Spy", spy)
|
188
|
-
end
|
189
|
-
|
190
|
-
it "discards the job" do
|
191
|
-
expect(Spy).to receive(:call).exactly(:once)
|
192
|
-
.with(kind_of(DummyJob),
|
193
|
-
kind_of(RuntimeError))
|
194
|
-
|
195
|
-
DummyJob.crawl(url, batch: batch)
|
196
|
-
|
197
|
-
expect {
|
198
|
-
perform_enqueued_jobs
|
199
|
-
}.to change { enqueued_jobs.size }.by(-1)
|
200
|
-
end
|
201
|
-
|
202
|
-
it "marks the URL seen" do
|
203
|
-
task.counter.increment # otherwise barrier gets reset
|
204
|
-
DummyJob.crawl(url, batch: batch)
|
205
|
-
perform_enqueued_jobs
|
206
|
-
expect(task.barrier.seen?(task.url)).to be(true)
|
207
|
-
end
|
208
|
-
|
209
|
-
it "decrements the counter" do
|
210
|
-
3.times { task.counter.increment }
|
211
|
-
DummyJob.crawl(url, batch: batch)
|
212
|
-
perform_enqueued_jobs
|
213
|
-
expect(task.counter.value).to be(3)
|
214
|
-
end
|
215
|
-
|
216
|
-
it "runs after batch callbacks" do
|
217
|
-
expect { |spy|
|
218
|
-
klass.after_batch(&spy)
|
219
|
-
DummyJob.crawl(url, batch: batch)
|
220
|
-
perform_enqueued_jobs
|
221
|
-
}.to yield_control
|
222
|
-
end
|
223
|
-
end
|
224
|
-
end
|
data/spec/callbacks_spec.rb
DELETED
@@ -1,102 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Callbacks do
|
6
|
-
let(:url) { "https://alpha.com" }
|
7
|
-
let(:task) { build(:task, batch: "batch", url: url) }
|
8
|
-
|
9
|
-
let(:klass) do
|
10
|
-
Class.new(Wayfarer::Base) do
|
11
|
-
route { host "alpha.com", to: :alpha }
|
12
|
-
route { host "beta.com", to: :beta }
|
13
|
-
|
14
|
-
before_fetch do |job|
|
15
|
-
Spy.before_fetch(self, job)
|
16
|
-
end
|
17
|
-
|
18
|
-
before_fetch(except: %i[alpha beta], only: :alpha) do |job|
|
19
|
-
Spy.before_fetch_except_alpha_beta_only_alpha(self, job)
|
20
|
-
end
|
21
|
-
|
22
|
-
before_action do |job|
|
23
|
-
Spy.before_action(self, job)
|
24
|
-
end
|
25
|
-
|
26
|
-
before_action(only: :alpha) do |job|
|
27
|
-
Spy.before_action_only_alpha(self, job)
|
28
|
-
end
|
29
|
-
|
30
|
-
before_action(only: :beta) do |job|
|
31
|
-
Spy.before_action_only_beta(self, job)
|
32
|
-
end
|
33
|
-
|
34
|
-
before_action(only: %i[alpha gamma]) do |job|
|
35
|
-
Spy.before_action_only_alpha_gamma(self, job)
|
36
|
-
end
|
37
|
-
|
38
|
-
before_action(except: :alpha) do |job|
|
39
|
-
Spy.before_action_except_alpha(self, job)
|
40
|
-
end
|
41
|
-
|
42
|
-
before_action(except: %i[beta gamma]) do |job|
|
43
|
-
Spy.before_action_except_beta_gamma(self, job)
|
44
|
-
end
|
45
|
-
|
46
|
-
before_action :callback_a, only: :alpha
|
47
|
-
|
48
|
-
before_action :callback_b
|
49
|
-
|
50
|
-
before_action :callback_c, except: %i[beta]
|
51
|
-
|
52
|
-
def alpha; end
|
53
|
-
|
54
|
-
def beta; end
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
def callback_a
|
59
|
-
Spy.before_action_callback_a_only_alpha(self)
|
60
|
-
end
|
61
|
-
|
62
|
-
def callback_b
|
63
|
-
false
|
64
|
-
end
|
65
|
-
|
66
|
-
def callback_c
|
67
|
-
Spy.before_action_callback_c(self)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
let(:job) { klass.new }
|
73
|
-
|
74
|
-
before do
|
75
|
-
stub_const("DummyJob", klass)
|
76
|
-
stub_const("Spy", spy)
|
77
|
-
|
78
|
-
allow(job).to receive(:arguments).and_return([task])
|
79
|
-
end
|
80
|
-
|
81
|
-
describe "before_fetch" do
|
82
|
-
it "fires" do
|
83
|
-
expect(Spy).to receive(:before_fetch).exactly(:once).with(job, job)
|
84
|
-
expect(Spy).not_to receive(:before_fetch_except_alpha_beta_only_alpha)
|
85
|
-
job.perform(task)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
describe "before_action" do
|
90
|
-
it "fires" do
|
91
|
-
expect(Spy).to receive(:before_action).exactly(:once).with(job, job)
|
92
|
-
expect(Spy).to receive(:before_action_only_alpha).exactly(:once).with(job, job)
|
93
|
-
expect(Spy).not_to receive(:before_action_only_beta).with(job, job)
|
94
|
-
expect(Spy).to receive(:before_action_only_alpha_gamma).exactly(:once).with(job, job)
|
95
|
-
expect(Spy).not_to receive(:before_action_except_alpha).with(job, job)
|
96
|
-
expect(Spy).to receive(:before_action_except_beta_gamma).exactly(:once).with(job, job)
|
97
|
-
expect(Spy).to receive(:before_action_callback_a_only_alpha).exactly(:once).with(job)
|
98
|
-
expect(Spy).not_to receive(:before_action_callback_c).with(job)
|
99
|
-
job.perform(task)
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
data/spec/cli/generate_spec.rb
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::CLI::Generate, cli: true do
|
6
|
-
subject(:cli) { Wayfarer::CLI::Runner }
|
7
|
-
|
8
|
-
describe "generate project" do
|
9
|
-
it "outputs" do
|
10
|
-
expected_output = <<-OUT
|
11
|
-
create foobar
|
12
|
-
create foobar/Gemfile
|
13
|
-
create foobar/app/jobs/foobar.rb
|
14
|
-
OUT
|
15
|
-
|
16
|
-
expect {
|
17
|
-
cli.start(%w[generate project foobar])
|
18
|
-
}.to output(expected_output).to_stdout
|
19
|
-
end
|
20
|
-
|
21
|
-
it "creates the project directory" do
|
22
|
-
expect {
|
23
|
-
cli.start(%w[generate project foobar])
|
24
|
-
}.to change { File.directory?("foobar") }.to(true)
|
25
|
-
end
|
26
|
-
|
27
|
-
it "creates the Gemfile" do
|
28
|
-
expect {
|
29
|
-
cli.start(%w[generate project foobar])
|
30
|
-
}.to change { File.file?("foobar/Gemfile") }.to(true)
|
31
|
-
end
|
32
|
-
|
33
|
-
it "creates the job" do
|
34
|
-
expect {
|
35
|
-
cli.start(%w[generate project foobar])
|
36
|
-
}.to change { File.file?("foobar/app/jobs/foobar.rb") }.to(true)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
data/spec/cli/job_spec.rb
DELETED
@@ -1,78 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::CLI::Job, cli: true, redis: true do
|
6
|
-
include Wayfarer::Redis
|
7
|
-
|
8
|
-
let(:url) { test_app_path("/hello_world") }
|
9
|
-
let(:batch) { "my-batch" }
|
10
|
-
subject(:cli) { Wayfarer::CLI::Runner }
|
11
|
-
|
12
|
-
before do
|
13
|
-
write_file "app/jobs/dummy_job.rb", <<~FILE
|
14
|
-
class DummyJob < Wayfarer::Base
|
15
|
-
end
|
16
|
-
FILE
|
17
|
-
end
|
18
|
-
|
19
|
-
before { Wayfarer::CLI::Base.new.send(:load_environment) }
|
20
|
-
|
21
|
-
describe "job perform" do
|
22
|
-
it "performs the worker" do
|
23
|
-
expect_any_instance_of(DummyJob).to receive(:perform).with(kind_of(Wayfarer::Task)) do |job|
|
24
|
-
task = job.arguments.first
|
25
|
-
task.metadata.job = job
|
26
|
-
end
|
27
|
-
|
28
|
-
cli.start(["job", "perform", "DummyJob", url])
|
29
|
-
end
|
30
|
-
|
31
|
-
it "collects garbage" do
|
32
|
-
expect_any_instance_of(Wayfarer::GC).to receive(:run).exactly(:once)
|
33
|
-
cli.start(["job", "perform", "DummyJob", url])
|
34
|
-
end
|
35
|
-
|
36
|
-
context "using MockRedis" do
|
37
|
-
it "performs the worker using MockRedis" do
|
38
|
-
cli.start(["job", "perform", "--mock-redis", "DummyJob", url])
|
39
|
-
expect(Wayfarer.config.redis.factory.call(nil)).to be_a(MockRedis)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
describe "job enqueue" do
|
45
|
-
it "enqueues the job" do
|
46
|
-
expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: kind_of(String))
|
47
|
-
cli.start(["job", "enqueue", "DummyJob", url])
|
48
|
-
end
|
49
|
-
|
50
|
-
context "with batch provided" do
|
51
|
-
it "enqueues the job" do
|
52
|
-
expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: batch)
|
53
|
-
cli.start(["job", "enqueue", "--batch", batch, "DummyJob", url])
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
describe "job execute" do
|
59
|
-
it "executes the job" do
|
60
|
-
expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: kind_of(String))
|
61
|
-
cli.start(["job", "execute", "DummyJob", url])
|
62
|
-
end
|
63
|
-
|
64
|
-
context "with batch provided" do
|
65
|
-
it "enqueues the job" do
|
66
|
-
expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: batch)
|
67
|
-
cli.start(["job", "execute", "--batch", batch, "DummyJob", url])
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
context "using MockRedis" do
|
72
|
-
it "performs the worker using MockRedis" do
|
73
|
-
cli.start(["job", "execute", "--mock-redis", "DummyJob", url])
|
74
|
-
expect(Wayfarer.config.redis.factory.call(nil)).to be_a(MockRedis)
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Config::Capybara do
|
6
|
-
let(:env) { {} }
|
7
|
-
subject(:capybara) { Wayfarer::Config::Capybara.new(env) }
|
8
|
-
|
9
|
-
describe "#driver" do
|
10
|
-
context "with env var set" do
|
11
|
-
before { env["WAYFARER_CAPYBARA_DRIVER"] = "cuprite" }
|
12
|
-
|
13
|
-
it "parses the env var" do
|
14
|
-
expect(capybara.driver).to be(:cuprite)
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/spec/config/ferrum_spec.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Config::Ferrum do
|
6
|
-
let(:env) { {} }
|
7
|
-
subject(:ferrum) { Wayfarer::Config::Ferrum.new(env) }
|
8
|
-
|
9
|
-
describe "#options" do
|
10
|
-
context "by default" do
|
11
|
-
it "is {}" do
|
12
|
-
expect(ferrum.options).to eq({})
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
context "with env var set" do
|
17
|
-
before { env["WAYFARER_FERRUM_OPTIONS"] = "url:http://chrome:3000,headless:false" }
|
18
|
-
|
19
|
-
it "parses the env var" do
|
20
|
-
expect(ferrum.options).to eq(url: "http://chrome:3000", headless: false)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,73 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Config::Networking do
|
6
|
-
let(:env) { {} }
|
7
|
-
subject(:network) { Wayfarer::Config::Networking.new(env) }
|
8
|
-
|
9
|
-
describe "#agent" do
|
10
|
-
context "by default" do
|
11
|
-
it "is :http" do
|
12
|
-
expect(network.agent).to be(:http)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
context "with env var set" do
|
17
|
-
before { env["WAYFARER_NETWORK_AGENT"] = "ferrum" }
|
18
|
-
|
19
|
-
it "parses the env var" do
|
20
|
-
expect(network.agent).to be(:ferrum)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
describe "#pool_size" do
|
26
|
-
context "by default" do
|
27
|
-
it "is 1" do
|
28
|
-
expect(network.pool_size).to be(1)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
context "with env var set" do
|
33
|
-
before { env["WAYFARER_NETWORK_POOL_SIZE"] = "42" }
|
34
|
-
|
35
|
-
it "parses the env var" do
|
36
|
-
expect(network.pool_size).to be(42)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe "#pool_timeout" do
|
42
|
-
context "by default" do
|
43
|
-
it "is 10" do
|
44
|
-
expect(network.pool_timeout).to be(10)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
context "with env var set" do
|
49
|
-
before { env["WAYFARER_NETWORK_POOL_SIZE"] = "1337" }
|
50
|
-
|
51
|
-
it "parses the env var" do
|
52
|
-
expect(network.pool_size).to be(1337)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
describe "#http_headers" do
|
58
|
-
context "by default" do
|
59
|
-
it "is {}" do
|
60
|
-
expect(network.http_headers).to eq({})
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
context "with env var set" do
|
65
|
-
before { env["WAYFARER_NETWORK_HTTP_HEADERS"] = "user-agent:foo,authorization:bar" }
|
66
|
-
|
67
|
-
it "parses the env var" do
|
68
|
-
expect(network.http_headers).to eq("user-agent": "foo",
|
69
|
-
authorization: "bar")
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
data/spec/config/redis_spec.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Config::Redis do
|
6
|
-
let(:env) { {} }
|
7
|
-
subject(:redis) { Wayfarer::Config::Redis.new(env) }
|
8
|
-
|
9
|
-
describe "#url" do
|
10
|
-
context "by default" do
|
11
|
-
it "is redis://localhost:6379" do
|
12
|
-
expect(redis.url).to eq("redis://localhost:6379")
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
context "with env var set" do
|
17
|
-
before { env["WAYFARER_REDIS_URL"] = "redis://redis:6379" }
|
18
|
-
|
19
|
-
it "parses the env var" do
|
20
|
-
expect(redis.url).to eq("redis://redis:6379")
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
describe "#factory" do
|
26
|
-
context "by default" do
|
27
|
-
it "instantiates Redis" do
|
28
|
-
expect(redis.factory.call(redis)).to be_a(::Redis)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
data/spec/config/root_spec.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Config::Root do
|
6
|
-
subject(:config) { Wayfarer::Config::Root.new }
|
7
|
-
|
8
|
-
describe "#ferrum" do
|
9
|
-
it "returns a Ferrum config" do
|
10
|
-
expect(config.ferrum).to be_a(Wayfarer::Config::Ferrum)
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
describe "#network" do
|
15
|
-
it "returns a network config" do
|
16
|
-
expect(config.network).to be_a(Wayfarer::Config::Networking)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
describe "#redis" do
|
21
|
-
it "returns a Redis config" do
|
22
|
-
expect(config.redis).to be_a(Wayfarer::Config::Redis)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
describe "#selenium" do
|
27
|
-
it "returns a Selenium config" do
|
28
|
-
expect(config.selenium).to be_a(Wayfarer::Config::Selenium)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helpers"
|
4
|
-
|
5
|
-
describe Wayfarer::Config::Selenium do
|
6
|
-
let(:env) { {} }
|
7
|
-
subject(:selenium) { Wayfarer::Config::Selenium.new(env) }
|
8
|
-
|
9
|
-
describe "#driver" do
|
10
|
-
context "by default" do
|
11
|
-
it "is :chrome" do
|
12
|
-
expect(selenium.driver).to be(:chrome)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
context "with env var set" do
|
17
|
-
before { env["WAYFARER_SELENIUM_DRIVER"] = "firefox" }
|
18
|
-
|
19
|
-
it "parses the env var" do
|
20
|
-
expect(selenium.driver).to be(:firefox)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
describe "#options" do
|
26
|
-
context "by default" do
|
27
|
-
it "is {}" do
|
28
|
-
expect(selenium.options).to eq({})
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
context "with env var set" do
|
33
|
-
before { env["WAYFARER_SELENIUM_OPTIONS"] = "url:http://firefox" }
|
34
|
-
|
35
|
-
it "parses the env var" do
|
36
|
-
expect(selenium.options).to eq(url: "http://firefox")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe "#client_timeout" do
|
42
|
-
context "by default" do
|
43
|
-
it "is 60" do
|
44
|
-
expect(selenium.client_timeout).to be(60)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
context "with env var set" do
|
49
|
-
before { env["WAYFARER_SELENIUM_CLIENT_TIMEOUT"] = "10" }
|
50
|
-
|
51
|
-
it "parses the env var" do
|
52
|
-
expect(selenium.client_timeout).to be(10)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|