wayfarer 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. checksums.yaml +4 -4
  2. data/.env +17 -0
  3. data/.github/workflows/lint.yaml +27 -0
  4. data/.github/workflows/release.yaml +30 -0
  5. data/.github/workflows/tests.yaml +21 -0
  6. data/.gitignore +5 -1
  7. data/.rubocop.yml +36 -0
  8. data/.vale.ini +8 -0
  9. data/.yardopts +1 -3
  10. data/Dockerfile +6 -4
  11. data/Gemfile +24 -0
  12. data/Gemfile.lock +274 -164
  13. data/Rakefile +7 -51
  14. data/bin/wayfarer +1 -1
  15. data/docker-compose.yml +23 -13
  16. data/docs/cookbook/consent_screen.md +2 -2
  17. data/docs/cookbook/executing_javascript.md +3 -3
  18. data/docs/cookbook/navigation.md +12 -12
  19. data/docs/cookbook/querying_html.md +3 -3
  20. data/docs/cookbook/screenshots.md +2 -2
  21. data/docs/guides/callbacks.md +25 -125
  22. data/docs/guides/cli.md +71 -0
  23. data/docs/guides/configuration.md +10 -35
  24. data/docs/guides/development.md +67 -0
  25. data/docs/guides/handlers.md +60 -0
  26. data/docs/guides/index.md +1 -0
  27. data/docs/guides/jobs.md +142 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +103 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +78 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +156 -0
  37. data/docs/guides/tasks.md +53 -9
  38. data/docs/guides/tutorial.md +66 -0
  39. data/docs/guides/user_agents.md +115 -0
  40. data/docs/index.md +17 -40
  41. data/lib/wayfarer/base.rb +125 -46
  42. data/lib/wayfarer/batch_completion.rb +60 -0
  43. data/lib/wayfarer/callbacks.rb +22 -48
  44. data/lib/wayfarer/cli/route_printer.rb +85 -89
  45. data/lib/wayfarer/cli.rb +103 -0
  46. data/lib/wayfarer/gc.rb +18 -6
  47. data/lib/wayfarer/handler.rb +15 -7
  48. data/lib/wayfarer/kv.rb +28 -0
  49. data/lib/wayfarer/logging.rb +38 -0
  50. data/lib/wayfarer/middleware/base.rb +2 -0
  51. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  52. data/lib/wayfarer/middleware/chain.rb +7 -1
  53. data/lib/wayfarer/middleware/content_type.rb +59 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +22 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +17 -4
  57. data/lib/wayfarer/middleware/normalize.rb +7 -14
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +31 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +14 -3
  65. data/lib/wayfarer/networking/ferrum.rb +1 -4
  66. data/lib/wayfarer/networking/follow.rb +14 -7
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +23 -13
  69. data/lib/wayfarer/networking/selenium.rb +15 -7
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +34 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +21 -0
  74. data/lib/wayfarer/redis/barrier.rb +26 -21
  75. data/lib/wayfarer/redis/counter.rb +18 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +166 -30
  79. data/lib/wayfarer/routing/hash_stack.rb +33 -0
  80. data/lib/wayfarer/routing/matchers/custom.rb +8 -5
  81. data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
  82. data/lib/wayfarer/routing/matchers/host.rb +15 -9
  83. data/lib/wayfarer/routing/matchers/path.rb +11 -31
  84. data/lib/wayfarer/routing/matchers/query.rb +41 -17
  85. data/lib/wayfarer/routing/matchers/result.rb +12 -0
  86. data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
  87. data/lib/wayfarer/routing/matchers/url.rb +13 -5
  88. data/lib/wayfarer/routing/path_consumer.rb +130 -0
  89. data/lib/wayfarer/routing/path_finder.rb +151 -23
  90. data/lib/wayfarer/routing/result.rb +1 -1
  91. data/lib/wayfarer/routing/root_route.rb +17 -1
  92. data/lib/wayfarer/routing/route.rb +66 -19
  93. data/lib/wayfarer/routing/serializable.rb +28 -0
  94. data/lib/wayfarer/routing/sub_route.rb +53 -0
  95. data/lib/wayfarer/routing/target_route.rb +17 -1
  96. data/lib/wayfarer/stringify.rb +21 -30
  97. data/lib/wayfarer/task.rb +9 -17
  98. data/lib/wayfarer/uri/normalization.rb +120 -0
  99. data/lib/wayfarer.rb +72 -5
  100. data/mise.toml +2 -0
  101. data/mkdocs.yml +44 -8
  102. data/rake/docs.rake +26 -0
  103. data/rake/lint.rake +9 -0
  104. data/rake/release.rake +23 -0
  105. data/rake/tests.rake +32 -0
  106. data/requirements.txt +1 -1
  107. data/spec/factories/job.rb +8 -0
  108. data/spec/factories/middleware.rb +2 -2
  109. data/spec/factories/path_finder.rb +11 -0
  110. data/spec/factories/redis.rb +19 -0
  111. data/spec/factories/task.rb +46 -2
  112. data/spec/spec_helpers.rb +55 -51
  113. data/spec/support/active_job_helpers.rb +8 -0
  114. data/spec/support/integration_helpers.rb +21 -0
  115. data/spec/support/redis_helpers.rb +9 -0
  116. data/spec/support/test_app.rb +66 -37
  117. data/spec/wayfarer/base_spec.rb +200 -0
  118. data/spec/wayfarer/batch_completion_spec.rb +142 -0
  119. data/spec/wayfarer/cli/job_spec.rb +88 -0
  120. data/spec/wayfarer/cli/routing_spec.rb +322 -0
  121. data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
  122. data/spec/wayfarer/gc_spec.rb +29 -0
  123. data/spec/wayfarer/handler_spec.rb +9 -0
  124. data/spec/wayfarer/integration/callbacks_spec.rb +200 -0
  125. data/spec/wayfarer/integration/content_type_spec.rb +37 -0
  126. data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
  127. data/spec/wayfarer/integration/gc_spec.rb +40 -0
  128. data/spec/wayfarer/integration/handler_spec.rb +65 -0
  129. data/spec/wayfarer/integration/page_spec.rb +79 -0
  130. data/spec/wayfarer/integration/params_spec.rb +64 -0
  131. data/spec/wayfarer/integration/parsing_spec.rb +99 -0
  132. data/spec/wayfarer/integration/retry_spec.rb +112 -0
  133. data/spec/wayfarer/integration/stage_spec.rb +58 -0
  134. data/spec/wayfarer/middleware/batch_completion_spec.rb +33 -0
  135. data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +24 -19
  136. data/spec/wayfarer/middleware/content_type_spec.rb +83 -0
  137. data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +24 -22
  138. data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
  139. data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
  140. data/spec/wayfarer/middleware/router_spec.rb +102 -0
  141. data/spec/wayfarer/middleware/stage_spec.rb +63 -0
  142. data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
  143. data/spec/wayfarer/middleware/user_agent_spec.rb +158 -0
  144. data/spec/wayfarer/networking/capybara_spec.rb +13 -0
  145. data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
  146. data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
  147. data/spec/{networking → wayfarer/networking}/follow_spec.rb +11 -6
  148. data/spec/wayfarer/networking/http_spec.rb +12 -0
  149. data/spec/{networking → wayfarer/networking}/pool_spec.rb +16 -14
  150. data/spec/wayfarer/networking/selenium_spec.rb +12 -0
  151. data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
  152. data/spec/wayfarer/page_spec.rb +69 -0
  153. data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
  154. data/spec/wayfarer/parsing/xml_parse_spec.rb +25 -0
  155. data/spec/wayfarer/redis/barrier_spec.rb +39 -0
  156. data/spec/wayfarer/redis/counter_spec.rb +34 -0
  157. data/spec/{redis → wayfarer/redis}/pool_spec.rb +4 -3
  158. data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
  159. data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
  160. data/spec/wayfarer/routing/integration_spec.rb +101 -0
  161. data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
  162. data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
  163. data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
  164. data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
  165. data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
  166. data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
  167. data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
  168. data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
  169. data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
  170. data/spec/wayfarer/routing/root_route_spec.rb +51 -0
  171. data/spec/wayfarer/routing/route_spec.rb +74 -0
  172. data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
  173. data/spec/wayfarer/task_spec.rb +13 -0
  174. data/spec/wayfarer/uri/normalization_spec.rb +98 -0
  175. data/spec/wayfarer_spec.rb +2 -2
  176. data/wayfarer.gemspec +18 -28
  177. metadata +797 -265
  178. data/.github/workflows/ci.yaml +0 -32
  179. data/.rbenv-gemsets +0 -1
  180. data/.ruby-version +0 -1
  181. data/RELEASING.md +0 -17
  182. data/docs/cookbook/user_agent.md +0 -7
  183. data/docs/guides/error_handling.md +0 -53
  184. data/docs/guides/networking.md +0 -94
  185. data/docs/guides/performance.md +0 -130
  186. data/docs/guides/reliability.md +0 -41
  187. data/docs/guides/routing/steering.md +0 -30
  188. data/docs/reference/api/base.md +0 -48
  189. data/docs/reference/cli.md +0 -61
  190. data/docs/reference/configuration_keys.md +0 -43
  191. data/docs/reference/environment_variables.md +0 -83
  192. data/lib/wayfarer/cli/base.rb +0 -45
  193. data/lib/wayfarer/cli/generate.rb +0 -17
  194. data/lib/wayfarer/cli/job.rb +0 -56
  195. data/lib/wayfarer/cli/route.rb +0 -29
  196. data/lib/wayfarer/cli/runner.rb +0 -34
  197. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  198. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  199. data/lib/wayfarer/config/capybara.rb +0 -10
  200. data/lib/wayfarer/config/ferrum.rb +0 -11
  201. data/lib/wayfarer/config/networking.rb +0 -29
  202. data/lib/wayfarer/config/redis.rb +0 -14
  203. data/lib/wayfarer/config/root.rb +0 -11
  204. data/lib/wayfarer/config/selenium.rb +0 -21
  205. data/lib/wayfarer/config/strconv.rb +0 -45
  206. data/lib/wayfarer/config/struct.rb +0 -72
  207. data/lib/wayfarer/middleware/fetch.rb +0 -56
  208. data/lib/wayfarer/redis/connection.rb +0 -13
  209. data/lib/wayfarer/redis/version.rb +0 -19
  210. data/lib/wayfarer/routing/router.rb +0 -28
  211. data/spec/base_spec.rb +0 -224
  212. data/spec/callbacks_spec.rb +0 -102
  213. data/spec/cli/generate_spec.rb +0 -39
  214. data/spec/cli/job_spec.rb +0 -78
  215. data/spec/config/capybara_spec.rb +0 -18
  216. data/spec/config/ferrum_spec.rb +0 -24
  217. data/spec/config/networking_spec.rb +0 -73
  218. data/spec/config/redis_spec.rb +0 -32
  219. data/spec/config/root_spec.rb +0 -31
  220. data/spec/config/selenium_spec.rb +0 -56
  221. data/spec/config/strconv_spec.rb +0 -58
  222. data/spec/config/struct_spec.rb +0 -66
  223. data/spec/fixtures/dummy_job.rb +0 -7
  224. data/spec/gc_spec.rb +0 -59
  225. data/spec/handler_spec.rb +0 -11
  226. data/spec/integration/callbacks_spec.rb +0 -85
  227. data/spec/integration/page_spec.rb +0 -62
  228. data/spec/integration/params_spec.rb +0 -56
  229. data/spec/integration/stage_spec.rb +0 -51
  230. data/spec/integration/steering_spec.rb +0 -57
  231. data/spec/middleware/dedup_spec.rb +0 -88
  232. data/spec/middleware/dispatch_spec.rb +0 -43
  233. data/spec/middleware/fetch_spec.rb +0 -155
  234. data/spec/middleware/normalize_spec.rb +0 -29
  235. data/spec/middleware/router_spec.rb +0 -105
  236. data/spec/middleware/stage_spec.rb +0 -62
  237. data/spec/networking/capybara_spec.rb +0 -12
  238. data/spec/networking/ferrum_spec.rb +0 -12
  239. data/spec/networking/http_spec.rb +0 -12
  240. data/spec/networking/selenium_spec.rb +0 -12
  241. data/spec/page_spec.rb +0 -47
  242. data/spec/parsing/xml_spec.rb +0 -25
  243. data/spec/redis/barrier_spec.rb +0 -78
  244. data/spec/redis/counter_spec.rb +0 -32
  245. data/spec/redis/version_spec.rb +0 -13
  246. data/spec/routing/integration_spec.rb +0 -110
  247. data/spec/routing/matchers/custom_spec.rb +0 -31
  248. data/spec/routing/matchers/host_spec.rb +0 -49
  249. data/spec/routing/matchers/path_spec.rb +0 -43
  250. data/spec/routing/matchers/query_spec.rb +0 -137
  251. data/spec/routing/matchers/scheme_spec.rb +0 -25
  252. data/spec/routing/matchers/suffix_spec.rb +0 -41
  253. data/spec/routing/matchers/uri_spec.rb +0 -27
  254. data/spec/routing/path_finder_spec.rb +0 -33
  255. data/spec/routing/root_route_spec.rb +0 -29
  256. data/spec/routing/route_spec.rb +0 -43
  257. data/spec/routing/router_spec.rb +0 -24
  258. data/spec/task_spec.rb +0 -34
  259. data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
data/spec/base_spec.rb DELETED
@@ -1,224 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Base, redis: true do
6
- include Wayfarer::Redis::Connection
7
-
8
- let(:url) { "https://example.com" }
9
- let(:batch) { "batch" }
10
- let(:task) { build(:task, batch: batch, url: url) }
11
- let(:klass) { Class.new(Wayfarer::Base) }
12
-
13
- before { stub_const("DummyJob", klass) }
14
-
15
- describe "::crawl" do
16
- it "enqueues a task" do
17
- expect(DummyJob).to receive(:perform_later).with(task)
18
- DummyJob.crawl(url, batch: batch)
19
- end
20
-
21
- it "returns a task" do
22
- expect(DummyJob.crawl(url)).to be_a(Wayfarer::Task)
23
- end
24
- end
25
-
26
- describe "Callbacks" do
27
- let(:counter) { task.counter }
28
-
29
- describe "after enqueue" do
30
- it "increments the counter" do
31
- expect {
32
- DummyJob.crawl(url, batch: batch)
33
- }.to change { counter.value }.by(1)
34
- end
35
- end
36
-
37
- describe "after perform" do
38
- it "decrements the counter" do
39
- DummyJob.crawl(url, batch: batch)
40
- task.counter.increment
41
- expect { perform_enqueued_jobs }.to change { task.counter.value }.by(-1)
42
- end
43
-
44
- context "when counter reaches 0" do
45
- it "resets the barrier" do
46
- DummyJob.crawl(url, batch: batch)
47
- perform_enqueued_jobs
48
- redis do |conn|
49
- expect(conn.exists?(task.barrier.redis_key)).to be(false)
50
- end
51
- end
52
-
53
- it "resets the counter" do
54
- DummyJob.crawl(url, batch: batch)
55
- perform_enqueued_jobs
56
- redis do |conn|
57
- expect(conn.exists?(task.counter.redis_key)).to be(false)
58
- end
59
- end
60
-
61
- it "runs after batch callbacks" do
62
- expect { |spy|
63
- klass.after_batch(&spy)
64
- DummyJob.crawl(url, batch: batch)
65
- perform_enqueued_jobs
66
- }.to yield_control
67
- end
68
- end
69
- end
70
- end
71
-
72
- describe "Unhandled exceptions" do
73
- let(:klass) { Class.new(Wayfarer::Base) }
74
-
75
- before do
76
- allow_any_instance_of(DummyJob).to receive(:perform).and_raise(RuntimeError.new)
77
- end
78
-
79
- it "does not retry the job" do
80
- DummyJob.crawl(url, batch: batch)
81
-
82
- expect {
83
- begin
84
- perform_enqueued_jobs
85
- rescue StandardError
86
- nil
87
- end
88
- }.to change { enqueued_jobs.size }.by(-1)
89
- end
90
-
91
- it "decrements the counter" do
92
- 3.times { task.counter.increment }
93
-
94
- DummyJob.crawl(url, batch: batch)
95
- begin
96
- perform_enqueued_jobs
97
- rescue StandardError
98
- nil
99
- end
100
-
101
- expect(task.counter.value).to be(3)
102
- end
103
- end
104
-
105
- describe "Retries" do
106
- let(:klass) do
107
- Class.new(Wayfarer::Base) do
108
- retry_on RuntimeError, attempts: 3 do |job, error|
109
- Spy.call(job, error)
110
- end
111
- end
112
- end
113
-
114
- before do
115
- allow_any_instance_of(DummyJob).to receive(:perform) do |job|
116
- task = job.arguments.first
117
- task.metadata.job = job
118
-
119
- raise RuntimeError
120
- end
121
-
122
- stub_const("Spy", spy)
123
- end
124
-
125
- it "retries the job" do
126
- expect(Spy).to receive(:call).exactly(:once)
127
- .with(kind_of(DummyJob),
128
- kind_of(RuntimeError))
129
-
130
- DummyJob.crawl(url, batch: batch)
131
-
132
- expect {
133
- perform_enqueued_jobs
134
- }.to change { enqueued_jobs.last["executions"] }.by(1)
135
-
136
- expect {
137
- perform_enqueued_jobs
138
- }.to change { enqueued_jobs.last["executions"] }.by(1)
139
-
140
- expect {
141
- perform_enqueued_jobs
142
- }.to change { enqueued_jobs.size }.by(-1)
143
- end
144
-
145
- it "marks the URL seen" do
146
- task.counter.increment # otherwise barrier gets reset
147
- DummyJob.crawl(url, batch: batch)
148
- 3.times { perform_enqueued_jobs }
149
- expect(task.barrier.seen?(task.url)).to be(true)
150
- end
151
-
152
- it "decrements the counter" do
153
- 3.times { task.counter.increment }
154
-
155
- DummyJob.crawl(url, batch: batch)
156
- 3.times { perform_enqueued_jobs }
157
-
158
- expect(task.counter.value).to be(3)
159
- end
160
-
161
- it "runs after batch callbacks" do
162
- expect { |spy|
163
- klass.after_batch(&spy)
164
- DummyJob.crawl(url, batch: batch)
165
- 3.times { perform_enqueued_jobs }
166
- }.to yield_control
167
- end
168
- end
169
-
170
- describe "Discarding" do
171
- let(:klass) do
172
- Class.new(Wayfarer::Base) do
173
- discard_on RuntimeError do |job, error|
174
- Spy.call(job, error)
175
- end
176
- end
177
- end
178
-
179
- before do
180
- allow_any_instance_of(DummyJob).to receive(:perform) do |job|
181
- task = job.arguments.first
182
- task.metadata.job = job
183
-
184
- raise RuntimeError
185
- end
186
-
187
- stub_const("Spy", spy)
188
- end
189
-
190
- it "discards the job" do
191
- expect(Spy).to receive(:call).exactly(:once)
192
- .with(kind_of(DummyJob),
193
- kind_of(RuntimeError))
194
-
195
- DummyJob.crawl(url, batch: batch)
196
-
197
- expect {
198
- perform_enqueued_jobs
199
- }.to change { enqueued_jobs.size }.by(-1)
200
- end
201
-
202
- it "marks the URL seen" do
203
- task.counter.increment # otherwise barrier gets reset
204
- DummyJob.crawl(url, batch: batch)
205
- perform_enqueued_jobs
206
- expect(task.barrier.seen?(task.url)).to be(true)
207
- end
208
-
209
- it "decrements the counter" do
210
- 3.times { task.counter.increment }
211
- DummyJob.crawl(url, batch: batch)
212
- perform_enqueued_jobs
213
- expect(task.counter.value).to be(3)
214
- end
215
-
216
- it "runs after batch callbacks" do
217
- expect { |spy|
218
- klass.after_batch(&spy)
219
- DummyJob.crawl(url, batch: batch)
220
- perform_enqueued_jobs
221
- }.to yield_control
222
- end
223
- end
224
- end
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Callbacks do
6
- let(:url) { "https://alpha.com" }
7
- let(:task) { build(:task, batch: "batch", url: url) }
8
-
9
- let(:klass) do
10
- Class.new(Wayfarer::Base) do
11
- route { host "alpha.com", to: :alpha }
12
- route { host "beta.com", to: :beta }
13
-
14
- before_fetch do |job|
15
- Spy.before_fetch(self, job)
16
- end
17
-
18
- before_fetch(except: %i[alpha beta], only: :alpha) do |job|
19
- Spy.before_fetch_except_alpha_beta_only_alpha(self, job)
20
- end
21
-
22
- before_action do |job|
23
- Spy.before_action(self, job)
24
- end
25
-
26
- before_action(only: :alpha) do |job|
27
- Spy.before_action_only_alpha(self, job)
28
- end
29
-
30
- before_action(only: :beta) do |job|
31
- Spy.before_action_only_beta(self, job)
32
- end
33
-
34
- before_action(only: %i[alpha gamma]) do |job|
35
- Spy.before_action_only_alpha_gamma(self, job)
36
- end
37
-
38
- before_action(except: :alpha) do |job|
39
- Spy.before_action_except_alpha(self, job)
40
- end
41
-
42
- before_action(except: %i[beta gamma]) do |job|
43
- Spy.before_action_except_beta_gamma(self, job)
44
- end
45
-
46
- before_action :callback_a, only: :alpha
47
-
48
- before_action :callback_b
49
-
50
- before_action :callback_c, except: %i[beta]
51
-
52
- def alpha; end
53
-
54
- def beta; end
55
-
56
- private
57
-
58
- def callback_a
59
- Spy.before_action_callback_a_only_alpha(self)
60
- end
61
-
62
- def callback_b
63
- false
64
- end
65
-
66
- def callback_c
67
- Spy.before_action_callback_c(self)
68
- end
69
- end
70
- end
71
-
72
- let(:job) { klass.new }
73
-
74
- before do
75
- stub_const("DummyJob", klass)
76
- stub_const("Spy", spy)
77
-
78
- allow(job).to receive(:arguments).and_return([task])
79
- end
80
-
81
- describe "before_fetch" do
82
- it "fires" do
83
- expect(Spy).to receive(:before_fetch).exactly(:once).with(job, job)
84
- expect(Spy).not_to receive(:before_fetch_except_alpha_beta_only_alpha)
85
- job.perform(task)
86
- end
87
- end
88
-
89
- describe "before_action" do
90
- it "fires" do
91
- expect(Spy).to receive(:before_action).exactly(:once).with(job, job)
92
- expect(Spy).to receive(:before_action_only_alpha).exactly(:once).with(job, job)
93
- expect(Spy).not_to receive(:before_action_only_beta).with(job, job)
94
- expect(Spy).to receive(:before_action_only_alpha_gamma).exactly(:once).with(job, job)
95
- expect(Spy).not_to receive(:before_action_except_alpha).with(job, job)
96
- expect(Spy).to receive(:before_action_except_beta_gamma).exactly(:once).with(job, job)
97
- expect(Spy).to receive(:before_action_callback_a_only_alpha).exactly(:once).with(job)
98
- expect(Spy).not_to receive(:before_action_callback_c).with(job)
99
- job.perform(task)
100
- end
101
- end
102
- end
@@ -1,39 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::CLI::Generate, cli: true do
6
- subject(:cli) { Wayfarer::CLI::Runner }
7
-
8
- describe "generate project" do
9
- it "outputs" do
10
- expected_output = <<-OUT
11
- create foobar
12
- create foobar/Gemfile
13
- create foobar/app/jobs/foobar.rb
14
- OUT
15
-
16
- expect {
17
- cli.start(%w[generate project foobar])
18
- }.to output(expected_output).to_stdout
19
- end
20
-
21
- it "creates the project directory" do
22
- expect {
23
- cli.start(%w[generate project foobar])
24
- }.to change { File.directory?("foobar") }.to(true)
25
- end
26
-
27
- it "creates the Gemfile" do
28
- expect {
29
- cli.start(%w[generate project foobar])
30
- }.to change { File.file?("foobar/Gemfile") }.to(true)
31
- end
32
-
33
- it "creates the job" do
34
- expect {
35
- cli.start(%w[generate project foobar])
36
- }.to change { File.file?("foobar/app/jobs/foobar.rb") }.to(true)
37
- end
38
- end
39
- end
data/spec/cli/job_spec.rb DELETED
@@ -1,78 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::CLI::Job, cli: true, redis: true do
6
- include Wayfarer::Redis
7
-
8
- let(:url) { test_app_path("/hello_world") }
9
- let(:batch) { "my-batch" }
10
- subject(:cli) { Wayfarer::CLI::Runner }
11
-
12
- before do
13
- write_file "app/jobs/dummy_job.rb", <<~FILE
14
- class DummyJob < Wayfarer::Base
15
- end
16
- FILE
17
- end
18
-
19
- before { Wayfarer::CLI::Base.new.send(:load_environment) }
20
-
21
- describe "job perform" do
22
- it "performs the worker" do
23
- expect_any_instance_of(DummyJob).to receive(:perform).with(kind_of(Wayfarer::Task)) do |job|
24
- task = job.arguments.first
25
- task.metadata.job = job
26
- end
27
-
28
- cli.start(["job", "perform", "DummyJob", url])
29
- end
30
-
31
- it "collects garbage" do
32
- expect_any_instance_of(Wayfarer::GC).to receive(:run).exactly(:once)
33
- cli.start(["job", "perform", "DummyJob", url])
34
- end
35
-
36
- context "using MockRedis" do
37
- it "performs the worker using MockRedis" do
38
- cli.start(["job", "perform", "--mock-redis", "DummyJob", url])
39
- expect(Wayfarer.config.redis.factory.call(nil)).to be_a(MockRedis)
40
- end
41
- end
42
- end
43
-
44
- describe "job enqueue" do
45
- it "enqueues the job" do
46
- expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: kind_of(String))
47
- cli.start(["job", "enqueue", "DummyJob", url])
48
- end
49
-
50
- context "with batch provided" do
51
- it "enqueues the job" do
52
- expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: batch)
53
- cli.start(["job", "enqueue", "--batch", batch, "DummyJob", url])
54
- end
55
- end
56
- end
57
-
58
- describe "job execute" do
59
- it "executes the job" do
60
- expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: kind_of(String))
61
- cli.start(["job", "execute", "DummyJob", url])
62
- end
63
-
64
- context "with batch provided" do
65
- it "enqueues the job" do
66
- expect(DummyJob).to receive(:crawl).with(Addressable::URI.parse(url), batch: batch)
67
- cli.start(["job", "execute", "--batch", batch, "DummyJob", url])
68
- end
69
- end
70
-
71
- context "using MockRedis" do
72
- it "performs the worker using MockRedis" do
73
- cli.start(["job", "execute", "--mock-redis", "DummyJob", url])
74
- expect(Wayfarer.config.redis.factory.call(nil)).to be_a(MockRedis)
75
- end
76
- end
77
- end
78
- end
@@ -1,18 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Config::Capybara do
6
- let(:env) { {} }
7
- subject(:capybara) { Wayfarer::Config::Capybara.new(env) }
8
-
9
- describe "#driver" do
10
- context "with env var set" do
11
- before { env["WAYFARER_CAPYBARA_DRIVER"] = "cuprite" }
12
-
13
- it "parses the env var" do
14
- expect(capybara.driver).to be(:cuprite)
15
- end
16
- end
17
- end
18
- end
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Config::Ferrum do
6
- let(:env) { {} }
7
- subject(:ferrum) { Wayfarer::Config::Ferrum.new(env) }
8
-
9
- describe "#options" do
10
- context "by default" do
11
- it "is {}" do
12
- expect(ferrum.options).to eq({})
13
- end
14
- end
15
-
16
- context "with env var set" do
17
- before { env["WAYFARER_FERRUM_OPTIONS"] = "url:http://chrome:3000,headless:false" }
18
-
19
- it "parses the env var" do
20
- expect(ferrum.options).to eq(url: "http://chrome:3000", headless: false)
21
- end
22
- end
23
- end
24
- end
@@ -1,73 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Config::Networking do
6
- let(:env) { {} }
7
- subject(:network) { Wayfarer::Config::Networking.new(env) }
8
-
9
- describe "#agent" do
10
- context "by default" do
11
- it "is :http" do
12
- expect(network.agent).to be(:http)
13
- end
14
- end
15
-
16
- context "with env var set" do
17
- before { env["WAYFARER_NETWORK_AGENT"] = "ferrum" }
18
-
19
- it "parses the env var" do
20
- expect(network.agent).to be(:ferrum)
21
- end
22
- end
23
- end
24
-
25
- describe "#pool_size" do
26
- context "by default" do
27
- it "is 1" do
28
- expect(network.pool_size).to be(1)
29
- end
30
- end
31
-
32
- context "with env var set" do
33
- before { env["WAYFARER_NETWORK_POOL_SIZE"] = "42" }
34
-
35
- it "parses the env var" do
36
- expect(network.pool_size).to be(42)
37
- end
38
- end
39
- end
40
-
41
- describe "#pool_timeout" do
42
- context "by default" do
43
- it "is 10" do
44
- expect(network.pool_timeout).to be(10)
45
- end
46
- end
47
-
48
- context "with env var set" do
49
- before { env["WAYFARER_NETWORK_POOL_SIZE"] = "1337" }
50
-
51
- it "parses the env var" do
52
- expect(network.pool_size).to be(1337)
53
- end
54
- end
55
- end
56
-
57
- describe "#http_headers" do
58
- context "by default" do
59
- it "is {}" do
60
- expect(network.http_headers).to eq({})
61
- end
62
- end
63
-
64
- context "with env var set" do
65
- before { env["WAYFARER_NETWORK_HTTP_HEADERS"] = "user-agent:foo,authorization:bar" }
66
-
67
- it "parses the env var" do
68
- expect(network.http_headers).to eq("user-agent": "foo",
69
- authorization: "bar")
70
- end
71
- end
72
- end
73
- end
@@ -1,32 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Config::Redis do
6
- let(:env) { {} }
7
- subject(:redis) { Wayfarer::Config::Redis.new(env) }
8
-
9
- describe "#url" do
10
- context "by default" do
11
- it "is redis://localhost:6379" do
12
- expect(redis.url).to eq("redis://localhost:6379")
13
- end
14
- end
15
-
16
- context "with env var set" do
17
- before { env["WAYFARER_REDIS_URL"] = "redis://redis:6379" }
18
-
19
- it "parses the env var" do
20
- expect(redis.url).to eq("redis://redis:6379")
21
- end
22
- end
23
- end
24
-
25
- describe "#factory" do
26
- context "by default" do
27
- it "instantiates Redis" do
28
- expect(redis.factory.call(redis)).to be_a(::Redis)
29
- end
30
- end
31
- end
32
- end
@@ -1,31 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Config::Root do
6
- subject(:config) { Wayfarer::Config::Root.new }
7
-
8
- describe "#ferrum" do
9
- it "returns a Ferrum config" do
10
- expect(config.ferrum).to be_a(Wayfarer::Config::Ferrum)
11
- end
12
- end
13
-
14
- describe "#network" do
15
- it "returns a network config" do
16
- expect(config.network).to be_a(Wayfarer::Config::Networking)
17
- end
18
- end
19
-
20
- describe "#redis" do
21
- it "returns a Redis config" do
22
- expect(config.redis).to be_a(Wayfarer::Config::Redis)
23
- end
24
- end
25
-
26
- describe "#selenium" do
27
- it "returns a Selenium config" do
28
- expect(config.selenium).to be_a(Wayfarer::Config::Selenium)
29
- end
30
- end
31
- end
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Config::Selenium do
6
- let(:env) { {} }
7
- subject(:selenium) { Wayfarer::Config::Selenium.new(env) }
8
-
9
- describe "#driver" do
10
- context "by default" do
11
- it "is :chrome" do
12
- expect(selenium.driver).to be(:chrome)
13
- end
14
- end
15
-
16
- context "with env var set" do
17
- before { env["WAYFARER_SELENIUM_DRIVER"] = "firefox" }
18
-
19
- it "parses the env var" do
20
- expect(selenium.driver).to be(:firefox)
21
- end
22
- end
23
- end
24
-
25
- describe "#options" do
26
- context "by default" do
27
- it "is {}" do
28
- expect(selenium.options).to eq({})
29
- end
30
- end
31
-
32
- context "with env var set" do
33
- before { env["WAYFARER_SELENIUM_OPTIONS"] = "url:http://firefox" }
34
-
35
- it "parses the env var" do
36
- expect(selenium.options).to eq(url: "http://firefox")
37
- end
38
- end
39
- end
40
-
41
- describe "#client_timeout" do
42
- context "by default" do
43
- it "is 60" do
44
- expect(selenium.client_timeout).to be(60)
45
- end
46
- end
47
-
48
- context "with env var set" do
49
- before { env["WAYFARER_SELENIUM_CLIENT_TIMEOUT"] = "10" }
50
-
51
- it "parses the env var" do
52
- expect(selenium.client_timeout).to be(10)
53
- end
54
- end
55
- end
56
- end