wayfarer 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. checksums.yaml +4 -4
  2. data/.env +17 -0
  3. data/.github/workflows/lint.yaml +27 -0
  4. data/.github/workflows/release.yaml +30 -0
  5. data/.github/workflows/tests.yaml +21 -0
  6. data/.gitignore +5 -1
  7. data/.rubocop.yml +36 -0
  8. data/.vale.ini +8 -0
  9. data/.yardopts +1 -3
  10. data/Dockerfile +6 -4
  11. data/Gemfile +24 -0
  12. data/Gemfile.lock +274 -164
  13. data/Rakefile +7 -51
  14. data/bin/wayfarer +1 -1
  15. data/docker-compose.yml +23 -13
  16. data/docs/cookbook/consent_screen.md +2 -2
  17. data/docs/cookbook/executing_javascript.md +3 -3
  18. data/docs/cookbook/navigation.md +12 -12
  19. data/docs/cookbook/querying_html.md +3 -3
  20. data/docs/cookbook/screenshots.md +2 -2
  21. data/docs/guides/callbacks.md +25 -125
  22. data/docs/guides/cli.md +71 -0
  23. data/docs/guides/configuration.md +10 -35
  24. data/docs/guides/development.md +67 -0
  25. data/docs/guides/handlers.md +60 -0
  26. data/docs/guides/index.md +1 -0
  27. data/docs/guides/jobs.md +142 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +103 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +78 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +156 -0
  37. data/docs/guides/tasks.md +53 -9
  38. data/docs/guides/tutorial.md +66 -0
  39. data/docs/guides/user_agents.md +115 -0
  40. data/docs/index.md +17 -40
  41. data/lib/wayfarer/base.rb +125 -46
  42. data/lib/wayfarer/batch_completion.rb +60 -0
  43. data/lib/wayfarer/callbacks.rb +22 -48
  44. data/lib/wayfarer/cli/route_printer.rb +85 -89
  45. data/lib/wayfarer/cli.rb +103 -0
  46. data/lib/wayfarer/gc.rb +18 -6
  47. data/lib/wayfarer/handler.rb +15 -7
  48. data/lib/wayfarer/kv.rb +28 -0
  49. data/lib/wayfarer/logging.rb +38 -0
  50. data/lib/wayfarer/middleware/base.rb +2 -0
  51. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  52. data/lib/wayfarer/middleware/chain.rb +7 -1
  53. data/lib/wayfarer/middleware/content_type.rb +59 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +22 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +17 -4
  57. data/lib/wayfarer/middleware/normalize.rb +7 -14
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +31 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +14 -3
  65. data/lib/wayfarer/networking/ferrum.rb +1 -4
  66. data/lib/wayfarer/networking/follow.rb +14 -7
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +23 -13
  69. data/lib/wayfarer/networking/selenium.rb +15 -7
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +34 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +21 -0
  74. data/lib/wayfarer/redis/barrier.rb +26 -21
  75. data/lib/wayfarer/redis/counter.rb +18 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +166 -30
  79. data/lib/wayfarer/routing/hash_stack.rb +33 -0
  80. data/lib/wayfarer/routing/matchers/custom.rb +8 -5
  81. data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
  82. data/lib/wayfarer/routing/matchers/host.rb +15 -9
  83. data/lib/wayfarer/routing/matchers/path.rb +11 -31
  84. data/lib/wayfarer/routing/matchers/query.rb +41 -17
  85. data/lib/wayfarer/routing/matchers/result.rb +12 -0
  86. data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
  87. data/lib/wayfarer/routing/matchers/url.rb +13 -5
  88. data/lib/wayfarer/routing/path_consumer.rb +130 -0
  89. data/lib/wayfarer/routing/path_finder.rb +151 -23
  90. data/lib/wayfarer/routing/result.rb +1 -1
  91. data/lib/wayfarer/routing/root_route.rb +17 -1
  92. data/lib/wayfarer/routing/route.rb +66 -19
  93. data/lib/wayfarer/routing/serializable.rb +28 -0
  94. data/lib/wayfarer/routing/sub_route.rb +53 -0
  95. data/lib/wayfarer/routing/target_route.rb +17 -1
  96. data/lib/wayfarer/stringify.rb +21 -30
  97. data/lib/wayfarer/task.rb +9 -17
  98. data/lib/wayfarer/uri/normalization.rb +120 -0
  99. data/lib/wayfarer.rb +72 -5
  100. data/mise.toml +2 -0
  101. data/mkdocs.yml +44 -8
  102. data/rake/docs.rake +26 -0
  103. data/rake/lint.rake +9 -0
  104. data/rake/release.rake +23 -0
  105. data/rake/tests.rake +32 -0
  106. data/requirements.txt +1 -1
  107. data/spec/factories/job.rb +8 -0
  108. data/spec/factories/middleware.rb +2 -2
  109. data/spec/factories/path_finder.rb +11 -0
  110. data/spec/factories/redis.rb +19 -0
  111. data/spec/factories/task.rb +46 -2
  112. data/spec/spec_helpers.rb +55 -51
  113. data/spec/support/active_job_helpers.rb +8 -0
  114. data/spec/support/integration_helpers.rb +21 -0
  115. data/spec/support/redis_helpers.rb +9 -0
  116. data/spec/support/test_app.rb +66 -37
  117. data/spec/wayfarer/base_spec.rb +200 -0
  118. data/spec/wayfarer/batch_completion_spec.rb +142 -0
  119. data/spec/wayfarer/cli/job_spec.rb +88 -0
  120. data/spec/wayfarer/cli/routing_spec.rb +322 -0
  121. data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
  122. data/spec/wayfarer/gc_spec.rb +29 -0
  123. data/spec/wayfarer/handler_spec.rb +9 -0
  124. data/spec/wayfarer/integration/callbacks_spec.rb +200 -0
  125. data/spec/wayfarer/integration/content_type_spec.rb +37 -0
  126. data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
  127. data/spec/wayfarer/integration/gc_spec.rb +40 -0
  128. data/spec/wayfarer/integration/handler_spec.rb +65 -0
  129. data/spec/wayfarer/integration/page_spec.rb +79 -0
  130. data/spec/wayfarer/integration/params_spec.rb +64 -0
  131. data/spec/wayfarer/integration/parsing_spec.rb +99 -0
  132. data/spec/wayfarer/integration/retry_spec.rb +112 -0
  133. data/spec/wayfarer/integration/stage_spec.rb +58 -0
  134. data/spec/wayfarer/middleware/batch_completion_spec.rb +33 -0
  135. data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +24 -19
  136. data/spec/wayfarer/middleware/content_type_spec.rb +83 -0
  137. data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +24 -22
  138. data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
  139. data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
  140. data/spec/wayfarer/middleware/router_spec.rb +102 -0
  141. data/spec/wayfarer/middleware/stage_spec.rb +63 -0
  142. data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
  143. data/spec/wayfarer/middleware/user_agent_spec.rb +158 -0
  144. data/spec/wayfarer/networking/capybara_spec.rb +13 -0
  145. data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
  146. data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
  147. data/spec/{networking → wayfarer/networking}/follow_spec.rb +11 -6
  148. data/spec/wayfarer/networking/http_spec.rb +12 -0
  149. data/spec/{networking → wayfarer/networking}/pool_spec.rb +16 -14
  150. data/spec/wayfarer/networking/selenium_spec.rb +12 -0
  151. data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
  152. data/spec/wayfarer/page_spec.rb +69 -0
  153. data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
  154. data/spec/wayfarer/parsing/xml_parse_spec.rb +25 -0
  155. data/spec/wayfarer/redis/barrier_spec.rb +39 -0
  156. data/spec/wayfarer/redis/counter_spec.rb +34 -0
  157. data/spec/{redis → wayfarer/redis}/pool_spec.rb +4 -3
  158. data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
  159. data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
  160. data/spec/wayfarer/routing/integration_spec.rb +101 -0
  161. data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
  162. data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
  163. data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
  164. data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
  165. data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
  166. data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
  167. data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
  168. data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
  169. data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
  170. data/spec/wayfarer/routing/root_route_spec.rb +51 -0
  171. data/spec/wayfarer/routing/route_spec.rb +74 -0
  172. data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
  173. data/spec/wayfarer/task_spec.rb +13 -0
  174. data/spec/wayfarer/uri/normalization_spec.rb +98 -0
  175. data/spec/wayfarer_spec.rb +2 -2
  176. data/wayfarer.gemspec +18 -28
  177. metadata +797 -265
  178. data/.github/workflows/ci.yaml +0 -32
  179. data/.rbenv-gemsets +0 -1
  180. data/.ruby-version +0 -1
  181. data/RELEASING.md +0 -17
  182. data/docs/cookbook/user_agent.md +0 -7
  183. data/docs/guides/error_handling.md +0 -53
  184. data/docs/guides/networking.md +0 -94
  185. data/docs/guides/performance.md +0 -130
  186. data/docs/guides/reliability.md +0 -41
  187. data/docs/guides/routing/steering.md +0 -30
  188. data/docs/reference/api/base.md +0 -48
  189. data/docs/reference/cli.md +0 -61
  190. data/docs/reference/configuration_keys.md +0 -43
  191. data/docs/reference/environment_variables.md +0 -83
  192. data/lib/wayfarer/cli/base.rb +0 -45
  193. data/lib/wayfarer/cli/generate.rb +0 -17
  194. data/lib/wayfarer/cli/job.rb +0 -56
  195. data/lib/wayfarer/cli/route.rb +0 -29
  196. data/lib/wayfarer/cli/runner.rb +0 -34
  197. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  198. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  199. data/lib/wayfarer/config/capybara.rb +0 -10
  200. data/lib/wayfarer/config/ferrum.rb +0 -11
  201. data/lib/wayfarer/config/networking.rb +0 -29
  202. data/lib/wayfarer/config/redis.rb +0 -14
  203. data/lib/wayfarer/config/root.rb +0 -11
  204. data/lib/wayfarer/config/selenium.rb +0 -21
  205. data/lib/wayfarer/config/strconv.rb +0 -45
  206. data/lib/wayfarer/config/struct.rb +0 -72
  207. data/lib/wayfarer/middleware/fetch.rb +0 -56
  208. data/lib/wayfarer/redis/connection.rb +0 -13
  209. data/lib/wayfarer/redis/version.rb +0 -19
  210. data/lib/wayfarer/routing/router.rb +0 -28
  211. data/spec/base_spec.rb +0 -224
  212. data/spec/callbacks_spec.rb +0 -102
  213. data/spec/cli/generate_spec.rb +0 -39
  214. data/spec/cli/job_spec.rb +0 -78
  215. data/spec/config/capybara_spec.rb +0 -18
  216. data/spec/config/ferrum_spec.rb +0 -24
  217. data/spec/config/networking_spec.rb +0 -73
  218. data/spec/config/redis_spec.rb +0 -32
  219. data/spec/config/root_spec.rb +0 -31
  220. data/spec/config/selenium_spec.rb +0 -56
  221. data/spec/config/strconv_spec.rb +0 -58
  222. data/spec/config/struct_spec.rb +0 -66
  223. data/spec/fixtures/dummy_job.rb +0 -7
  224. data/spec/gc_spec.rb +0 -59
  225. data/spec/handler_spec.rb +0 -11
  226. data/spec/integration/callbacks_spec.rb +0 -85
  227. data/spec/integration/page_spec.rb +0 -62
  228. data/spec/integration/params_spec.rb +0 -56
  229. data/spec/integration/stage_spec.rb +0 -51
  230. data/spec/integration/steering_spec.rb +0 -57
  231. data/spec/middleware/dedup_spec.rb +0 -88
  232. data/spec/middleware/dispatch_spec.rb +0 -43
  233. data/spec/middleware/fetch_spec.rb +0 -155
  234. data/spec/middleware/normalize_spec.rb +0 -29
  235. data/spec/middleware/router_spec.rb +0 -105
  236. data/spec/middleware/stage_spec.rb +0 -62
  237. data/spec/networking/capybara_spec.rb +0 -12
  238. data/spec/networking/ferrum_spec.rb +0 -12
  239. data/spec/networking/http_spec.rb +0 -12
  240. data/spec/networking/selenium_spec.rb +0 -12
  241. data/spec/page_spec.rb +0 -47
  242. data/spec/parsing/xml_spec.rb +0 -25
  243. data/spec/redis/barrier_spec.rb +0 -78
  244. data/spec/redis/counter_spec.rb +0 -32
  245. data/spec/redis/version_spec.rb +0 -13
  246. data/spec/routing/integration_spec.rb +0 -110
  247. data/spec/routing/matchers/custom_spec.rb +0 -31
  248. data/spec/routing/matchers/host_spec.rb +0 -49
  249. data/spec/routing/matchers/path_spec.rb +0 -43
  250. data/spec/routing/matchers/query_spec.rb +0 -137
  251. data/spec/routing/matchers/scheme_spec.rb +0 -25
  252. data/spec/routing/matchers/suffix_spec.rb +0 -41
  253. data/spec/routing/matchers/uri_spec.rb +0 -27
  254. data/spec/routing/path_finder_spec.rb +0 -33
  255. data/spec/routing/root_route_spec.rb +0 -29
  256. data/spec/routing/route_spec.rb +0 -43
  257. data/spec/routing/router_spec.rb +0 -24
  258. data/spec/task_spec.rb +0 -34
  259. data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe "Handlers", :redis do
6
+ before do
7
+ mock_job! :dummy_job
8
+ mock_job! :other_job
9
+
10
+ mock_handler! :dummy_handler
11
+ mock_handler! :other_handler
12
+ end
13
+
14
+ describe "bypassing the router" do
15
+ before do
16
+ DummyJob.class_eval do
17
+ route.to DummyHandler
18
+ end
19
+
20
+ DummyHandler.class_eval do
21
+ class_attribute :called
22
+
23
+ before_action do
24
+ self.class.called = true
25
+ end
26
+
27
+ route.to [OtherHandler, :foobar]
28
+ end
29
+
30
+ OtherHandler.class_eval do
31
+ class_attribute :called
32
+
33
+ def foobar
34
+ self.class.called = true
35
+ end
36
+ end
37
+ end
38
+
39
+ specify do
40
+ DummyJob.crawl(test_app_path("/"))
41
+ perform_enqueued_jobs
42
+ assert_performed_jobs 1
43
+ expect(enqueued_jobs).to be_empty
44
+ expect(DummyHandler.called).to be(true)
45
+ expect(OtherHandler.called).to be(true)
46
+ end
47
+ end
48
+
49
+ describe "dispatching to another Wayfarer::Base" do
50
+ before do
51
+ DummyJob.class_eval do
52
+ extend SpecHelpers
53
+
54
+ route.to OtherJob
55
+ end
56
+ end
57
+
58
+ let(:error) { Wayfarer::Middleware::Dispatch::InvalidTargetError }
59
+
60
+ specify do
61
+ DummyJob.crawl(test_app_path("/"))
62
+ expect { perform_enqueued_jobs }.to raise_error(error, "routed to invalid action: #{OtherJob}")
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe "Pages", :redis do
6
+ before do
7
+ mock_job! :dummy_job
8
+ mock_handler! :dummy_handler
9
+ end
10
+
11
+ let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
12
+
13
+ shared_examples "executes" do
14
+ specify do
15
+ DummyJob.crawl(url)
16
+ perform_enqueued_jobs
17
+ assert_performed_jobs 1
18
+ expect(enqueued_jobs).to be_empty
19
+ end
20
+ end
21
+
22
+ describe "page content" do
23
+ before do
24
+ DummyJob.class_eval do
25
+ include RSpec::Matchers
26
+ extend SpecHelpers
27
+ include SpecHelpers
28
+
29
+ route.host test_app_host, to: :index
30
+
31
+ def index
32
+ expect(page.url).to eq(test_app_path("git-scm.com/book/en/v2.html"))
33
+ expect(page.status_code).to be(200)
34
+ expect(page.body).not_to be_empty
35
+ expect(page.headers.count).to be(9)
36
+
37
+ expect(page.meta.links.all.count).to be(157)
38
+ expect(page.meta.links.internal.count).to be(102)
39
+ expect(page.meta.links.external.count).to be(55)
40
+ end
41
+ end
42
+ end
43
+
44
+ it_behaves_like "executes"
45
+ end
46
+
47
+ describe "page content with handler" do
48
+ before do
49
+ handler = DummyHandler
50
+
51
+ DummyJob.class_eval do
52
+ include RSpec::Matchers
53
+ include SpecHelpers
54
+
55
+ route.to :index
56
+
57
+ def index
58
+ expect(page.url).to eq(test_app_path("git-scm.com/book/en/v2.html"))
59
+ expect(page.status_code).to be(200)
60
+ expect(page.body).not_to be_empty
61
+ expect(page.headers.count).to be(9)
62
+
63
+ expect(page.meta.links.all.count).to be(157)
64
+ expect(page.meta.links.internal.count).to be(102)
65
+ expect(page.meta.links.external.count).to be(55)
66
+ end
67
+ end
68
+
69
+ DummyHandler.class_eval do
70
+ extend SpecHelpers
71
+ include SpecHelpers
72
+
73
+ route.host test_app_host, to: handler
74
+ end
75
+ end
76
+
77
+ it_behaves_like "executes"
78
+ end
79
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe "Path parameters", :redis do
6
+ let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
7
+
8
+ before do
9
+ stub_const("DummyJob", Class.new(ActiveJob::Base).include(Wayfarer::Base))
10
+ stub_const("DummyHandler", Class.new.include(Wayfarer::Handler))
11
+ end
12
+
13
+ shared_examples "executes" do
14
+ specify do
15
+ DummyJob.crawl(url)
16
+ perform_enqueued_jobs
17
+ assert_performed_jobs 1
18
+ expect(enqueued_jobs).to be_empty
19
+ end
20
+ end
21
+
22
+ describe "path pattern matching" do
23
+ before do
24
+ DummyJob.class_eval do
25
+ extend SpecHelpers
26
+ include RSpec::Matchers
27
+
28
+ route.to :index, host: test_app_host, scheme: :https do
29
+ path "git-scm.com/book/:lang/:file"
30
+ end
31
+
32
+ def index
33
+ expect(params).to eq("lang" => "en", "file" => "v2.html")
34
+ end
35
+ end
36
+ end
37
+
38
+ it_behaves_like "executes"
39
+ end
40
+
41
+ describe "path pattern matching with handler" do
42
+ before do
43
+ DummyJob.class_eval do
44
+ extend SpecHelpers
45
+
46
+ route.to DummyHandler, host: test_app_host do
47
+ path "git-scm.com/book/:lang/:file"
48
+ end
49
+ end
50
+
51
+ DummyHandler.class_eval do
52
+ include RSpec::Matchers
53
+
54
+ route.to :index
55
+
56
+ def index
57
+ expect(params).to eq("lang" => "en", "file" => "v2.html")
58
+ end
59
+ end
60
+ end
61
+
62
+ it_behaves_like "executes"
63
+ end
64
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe "Response body parsing", :redis do
6
+ before do
7
+ mock_job! :dummy_job
8
+ mock_handler! :dummy_handler
9
+ end
10
+
11
+ shared_examples "executes" do
12
+ specify do
13
+ DummyJob.crawl(url)
14
+ perform_enqueued_jobs
15
+ assert_performed_jobs 1
16
+ expect(enqueued_jobs).to be_empty
17
+ end
18
+ end
19
+
20
+ context "with HTML" do
21
+ let(:url) { test_app_path("finders.html") }
22
+
23
+ before do
24
+ DummyJob.class_eval do
25
+ include RSpec::Matchers
26
+
27
+ route.to :index
28
+
29
+ def index
30
+ expect(page.doc).to be_a(Nokogiri::HTML::Document)
31
+ end
32
+ end
33
+ end
34
+
35
+ it_behaves_like "executes"
36
+ end
37
+
38
+ context "with JSON" do
39
+ let(:url) { test_app_path("json/dummy.json") }
40
+
41
+ before do
42
+ DummyJob.class_eval do
43
+ include RSpec::Matchers
44
+
45
+ route.to :index
46
+
47
+ def index
48
+ expect(page.doc).to be_a(Hash)
49
+ end
50
+ end
51
+ end
52
+
53
+ it_behaves_like "executes"
54
+ end
55
+
56
+ describe "custom Content-Type parsers" do
57
+ context "with registered Content-Types" do
58
+ let(:parser) do
59
+ Class.new do
60
+ def self.parse(_body)
61
+ :ok
62
+ end
63
+ end
64
+ end
65
+
66
+ let(:parser_with_options) do
67
+ Class.new do
68
+ def self.parse(_body, options)
69
+ options
70
+ end
71
+ end
72
+ end
73
+
74
+ before do
75
+ Wayfarer.config[:parsing][:registry]["foo/bar"] = parser
76
+ Wayfarer.config[:parsing][:registry]["bar/qux"] = [parser_with_options, :ok]
77
+ DummyJob.class_eval do
78
+ route.to :index
79
+
80
+ def index
81
+ page.doc
82
+ end
83
+ end
84
+ end
85
+
86
+ def perform(content_type)
87
+ DummyJob.new.perform(
88
+ build(:task, url: test_app_path("response_header/Content-Type/#{content_type}"))
89
+ )
90
+ end
91
+
92
+ specify :aggregate_failures do
93
+ expect(perform("foo/bar")).to be(:ok)
94
+ expect(perform("bar/qux")).to be(:ok)
95
+ expect(perform("image/jpeg")).to be_nil
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe "Retries", :redis do
6
+ # rubocop:disable RSpec/MultipleExpectations
7
+ let(:task) { build(:task, :normalized, :redis_pool) }
8
+ let(:counter) { build(:counter, task: task) }
9
+ let(:barrier) { build(:barrier, task: task) }
10
+
11
+ before do
12
+ stub_const("DummyJob", Class.new(ActiveJob::Base).include(Wayfarer::Base))
13
+
14
+ Wayfarer::Redis::Counter.new(task).increment # avoid garbage collection
15
+ end
16
+
17
+ context "when job fails" do
18
+ before do
19
+ DummyJob.class_eval do
20
+ class_attribute :attempts, default: 0
21
+
22
+ retry_on RuntimeError, wait: 0, attempts: 3
23
+
24
+ route.to :index
25
+
26
+ def index
27
+ self.class.attempts += 1
28
+
29
+ raise
30
+ end
31
+ end
32
+ end
33
+
34
+ specify do
35
+ DummyJob.perform_later(task)
36
+
37
+ expect { perform_enqueued_jobs }.to change(DummyJob, :attempts).from(0).to(1)
38
+
39
+ assert_performed_jobs 1
40
+ expect(enqueued_jobs.pluck("executions")).to contain_exactly(1)
41
+
42
+ expect { perform_enqueued_jobs }.to change(DummyJob, :attempts).from(1).to(2)
43
+ assert_performed_jobs 2
44
+ expect(enqueued_jobs.pluck("executions")).to contain_exactly(2)
45
+
46
+ expect { perform_enqueued_jobs }.to change(DummyJob, :attempts).from(2).to(3).and raise_error(RuntimeError)
47
+ assert_performed_jobs 3
48
+ expect(enqueued_jobs).to be_empty
49
+
50
+ expect(redis.hget(barrier.redis_key, task[:uri].to_s)).to eq(Wayfarer::Redis::Barrier::VALUE)
51
+ end
52
+ end
53
+
54
+ context "when job fails and eventually succeeds" do
55
+ before do
56
+ DummyJob.class_eval do
57
+ class_attribute :attempts, default: 0
58
+
59
+ retry_on RuntimeError, wait: 0, attempts: 3
60
+
61
+ route.to :index
62
+
63
+ def index
64
+ raise if [1, 2].include?(self.class.attempts += 1)
65
+ end
66
+ end
67
+ end
68
+
69
+ specify do
70
+ DummyJob.perform_later(task)
71
+
72
+ expect { perform_enqueued_jobs }.to change(DummyJob, :attempts).from(0).to(1)
73
+ assert_performed_jobs 1
74
+ expect(enqueued_jobs.pluck("executions")).to contain_exactly(1)
75
+
76
+ expect { perform_enqueued_jobs }.to change(DummyJob, :attempts).from(1).to(2)
77
+ assert_performed_jobs 2
78
+ expect(enqueued_jobs.pluck("executions")).to contain_exactly(2)
79
+
80
+ expect { perform_enqueued_jobs }.to change(DummyJob, :attempts).from(2).to(3)
81
+ assert_performed_jobs 3
82
+ expect(enqueued_jobs).to be_empty
83
+
84
+ expect(redis.hget(barrier.redis_key, task[:uri].to_s)).to eq(Wayfarer::Redis::Barrier::VALUE)
85
+ end
86
+ end
87
+
88
+ context "when job succeeds" do
89
+ before do
90
+ DummyJob.class_eval do
91
+ class_attribute :attempts, default: 0
92
+
93
+ route.to :index
94
+
95
+ def index
96
+ self.class.attempts += 1
97
+ end
98
+ end
99
+ end
100
+
101
+ specify do
102
+ DummyJob.perform_later(task)
103
+
104
+ expect { perform_enqueued_jobs }.to change(DummyJob, :attempts).from(0).to(1)
105
+ assert_performed_jobs 1
106
+ expect(enqueued_jobs).to be_empty
107
+
108
+ expect(redis.hget(barrier.redis_key, task[:uri].to_s)).to eq(Wayfarer::Redis::Barrier::VALUE)
109
+ end
110
+ end
111
+ # rubocop:enable RSpec/MultipleExpectations
112
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe "Staging", :redis do
6
+ let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
7
+
8
+ before do
9
+ stub_const("DummyJob", Class.new(ActiveJob::Base).include(Wayfarer::Base))
10
+ stub_const("DummyHandler", Class.new.include(Wayfarer::Handler))
11
+ end
12
+
13
+ describe "staging URLs" do
14
+ before do
15
+ DummyJob.class_eval do
16
+ extend SpecHelpers
17
+
18
+ route.host test_app_hostname, to: :index
19
+
20
+ def index
21
+ stage page.meta.links.all
22
+ end
23
+ end
24
+ end
25
+
26
+ specify do
27
+ expect {
28
+ DummyJob.crawl(url)
29
+ perform_enqueued_jobs
30
+ }.to change(enqueued_jobs, :size).by(157)
31
+ end
32
+ end
33
+
34
+ context "with handler" do
35
+ before do
36
+ DummyJob.class_eval do
37
+ extend SpecHelpers
38
+
39
+ route.host test_app_hostname, to: DummyHandler
40
+ end
41
+
42
+ DummyHandler.class_eval do
43
+ route.to :index
44
+
45
+ def index
46
+ stage page.meta.links.all
47
+ end
48
+ end
49
+ end
50
+
51
+ specify do
52
+ expect {
53
+ DummyJob.crawl(url)
54
+ perform_enqueued_jobs
55
+ }.to change(enqueued_jobs, :size).by(157)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::BatchCompletion do
6
+ subject(:batch_completion) { described_class.new }
7
+
8
+ let(:task) { build(:task, job: job) }
9
+ let(:job) { build(:job, exception_executions: exception_executions) }
10
+ let(:exception_executions) { {} }
11
+
12
+ it "assigns cloned exception_executions" do
13
+ expect {
14
+ batch_completion.call(task)
15
+ }.to change {
16
+ task[:initial_exception_executions]
17
+ }.from(nil).to(exception_executions)
18
+
19
+ expect(task[:initial_exception_executions]).not_to be(exception_executions)
20
+ end
21
+
22
+ context "when already assigned" do
23
+ before { task[:initial_exception_executions] = Object.new }
24
+
25
+ specify do
26
+ expect { |spy| batch_completion.call(task, &spy) }.to yield_control
27
+ end
28
+
29
+ it "doesn't assign exception_executions" do
30
+ expect { batch_completion.call(task) }.not_to(change { task[:initial_exception_executions] })
31
+ end
32
+ end
33
+ end
@@ -3,9 +3,10 @@
3
3
  require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Chain do
6
+ subject(:chain) { described_class.new(middlewares) }
7
+
6
8
  let(:task) { build(:task) }
7
9
  let(:middlewares) { [] }
8
- subject(:chain) { described_class.new(middlewares) }
9
10
 
10
11
  describe "::empty" do
11
12
  it "returns an empty chain" do
@@ -13,7 +14,7 @@ describe Wayfarer::Middleware::Chain do
13
14
  end
14
15
  end
15
16
 
16
- describe "#call" do
17
+ describe "#push" do
17
18
  it "adds middleware" do
18
19
  expect {
19
20
  chain.push(Class.new)
@@ -35,20 +36,23 @@ describe Wayfarer::Middleware::Chain do
35
36
  context "when middleware yields" do
36
37
  let(:middlewares) { [spy, spy] }
37
38
 
38
- it "yields" do
39
+ it "yields", :aggregate_failures do
39
40
  allow(middlewares.first).to receive(:call).and_yield
40
- expect(middlewares.last).to receive(:call).with(task).and_yield
41
+ allow(middlewares.last).to receive(:call).with(task).and_yield
42
+
41
43
  expect { |spy| chain.call(task, &spy) }.to yield_control
44
+ expect(middlewares.last).to have_received(:call).with(task)
42
45
  end
43
46
  end
44
47
 
45
48
  context "when middleware does not yield" do
46
49
  let(:middlewares) { [spy, spy] }
47
50
 
48
- it "does not yield" do
51
+ it "does not yield", :aggregate_failures do
49
52
  allow(middlewares.first).to receive(:call)
50
- expect(middlewares.last).not_to receive(:call)
53
+
51
54
  expect { |spy| chain.call(task, &spy) }.not_to yield_control
55
+ expect(middlewares.last).not_to have_received(:call)
52
56
  end
53
57
  end
54
58
 
@@ -58,18 +62,19 @@ describe Wayfarer::Middleware::Chain do
58
62
  it "does not alter its middleware" do
59
63
  allow(middlewares.first).to receive(:call).and_yield
60
64
  allow(middlewares.second).to receive(:call).and_yield
61
- expect { chain.call(task) }.not_to(change { chain.middlewares })
65
+
66
+ expect { chain.call(task) }.not_to(change(chain, :middlewares))
62
67
  end
63
68
  end
64
69
 
65
70
  describe "Return value" do
66
71
  let(:middlewares) do
67
72
  [build(:middleware, receiver: lambda do |task, &block|
68
- task.metadata.foobar = 42
73
+ task[:foobar] = 42
69
74
  block.call
70
75
  end),
71
76
  build(:middleware, receiver: lambda do |task|
72
- task.metadata.foobar *= 1337
77
+ task[:foobar] *= 1337
73
78
  end)]
74
79
  end
75
80
 
@@ -81,28 +86,28 @@ describe Wayfarer::Middleware::Chain do
81
86
  describe "Metadata" do
82
87
  let(:first) do
83
88
  build(:middleware, receiver: lambda do |task, &block|
84
- task.metadata.foobar = 42
85
- block.call
86
- task.metadata.barqux = 1337
87
- end)
89
+ task[:foobar] = 42
90
+ block.call
91
+ task[:barqux] = 1337
92
+ end)
88
93
  end
89
94
 
90
95
  let(:last) do
91
96
  build(:middleware, receiver: lambda do |task|
92
- raise unless task.metadata.foobar == 42
93
- raise if task.metadata.barqux
94
- end)
97
+ raise unless task[:foobar] == 42
98
+ raise if task[:barqux]
99
+ end)
95
100
  end
96
101
 
97
102
  let(:middlewares) { [first, last] }
98
103
 
99
- it "is accessible across middleware" do
104
+ it "is accessible across middleware", :aggregate_failures do
100
105
  expect {
101
106
  chain.call(task)
102
107
  }.not_to raise_error
103
108
 
104
- expect(task.metadata.foobar).to be(42)
105
- expect(task.metadata.barqux).to be(1337)
109
+ expect(task[:foobar]).to be(42)
110
+ expect(task[:barqux]).to be(1337)
106
111
  end
107
112
  end
108
113
  end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::ContentType do
6
+ let(:content_type) { "text/html" }
7
+ let(:task) { build(:task, :normalized) }
8
+ let(:page) { build(:page, headers: { "Content-Type" => content_type }) }
9
+
10
+ describe "#call" do
11
+ subject(:middleware) { Class.new(described_class).include(described_class::API).new }
12
+
13
+ before do
14
+ task[:page] = page
15
+ task[:controller] = middleware
16
+ middleware.class.content_type "text/html"
17
+ end
18
+
19
+ context "with permitted Content-Type" do
20
+ specify do
21
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
22
+ end
23
+ end
24
+
25
+ context "when permitted Content-Type has parameters" do
26
+ let(:page) { build(:page, headers: { "Content-Type" => "#{content_type}; charset=UTF-8" }) }
27
+
28
+ specify do
29
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
30
+ end
31
+ end
32
+
33
+ context "with forbidden Content-Type" do
34
+ let(:content_type) { "application/json" }
35
+
36
+ specify do
37
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
38
+ end
39
+ end
40
+
41
+ context "with permitted Regexp Content-Type" do
42
+ before do
43
+ middleware.class.content_type(/text/)
44
+ end
45
+
46
+ specify do
47
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
48
+ end
49
+ end
50
+ end
51
+
52
+ describe described_class::API do
53
+ subject(:controller) { Class.new.include(described_class).new }
54
+
55
+ describe "::allowed_content_types" do
56
+ describe "index" do
57
+ subject(:index) { controller.class.allowed_content_types[:index] }
58
+
59
+ it { is_expected.to be_empty }
60
+ end
61
+
62
+ describe "patterns" do
63
+ subject(:patterns) { controller.class.allowed_content_types[:patterns] }
64
+
65
+ it { is_expected.to be_empty }
66
+ end
67
+ end
68
+
69
+ describe "#content_type" do
70
+ it "allows Content-Types" do
71
+ controller.class.content_type(content_type)
72
+
73
+ expect(controller.class.allowed_content_types.dig(:index, content_type)).to be(true)
74
+ end
75
+
76
+ it "allows Content-Type patterns" do
77
+ controller.class.content_type(/text/)
78
+
79
+ expect(controller.class.allowed_content_types[:patterns]).to contain_exactly(/text/)
80
+ end
81
+ end
82
+ end
83
+ end