wayfarer 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. checksums.yaml +4 -4
  2. data/.env +17 -0
  3. data/.github/workflows/lint.yaml +27 -0
  4. data/.github/workflows/release.yaml +30 -0
  5. data/.github/workflows/tests.yaml +21 -0
  6. data/.gitignore +5 -1
  7. data/.rubocop.yml +36 -0
  8. data/.vale.ini +8 -0
  9. data/.yardopts +1 -3
  10. data/Dockerfile +6 -4
  11. data/Gemfile +24 -0
  12. data/Gemfile.lock +274 -164
  13. data/Rakefile +7 -51
  14. data/bin/wayfarer +1 -1
  15. data/docker-compose.yml +23 -13
  16. data/docs/cookbook/consent_screen.md +2 -2
  17. data/docs/cookbook/executing_javascript.md +3 -3
  18. data/docs/cookbook/navigation.md +12 -12
  19. data/docs/cookbook/querying_html.md +3 -3
  20. data/docs/cookbook/screenshots.md +2 -2
  21. data/docs/guides/callbacks.md +25 -125
  22. data/docs/guides/cli.md +71 -0
  23. data/docs/guides/configuration.md +10 -35
  24. data/docs/guides/development.md +67 -0
  25. data/docs/guides/handlers.md +60 -0
  26. data/docs/guides/index.md +1 -0
  27. data/docs/guides/jobs.md +142 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +103 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +78 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +156 -0
  37. data/docs/guides/tasks.md +53 -9
  38. data/docs/guides/tutorial.md +66 -0
  39. data/docs/guides/user_agents.md +115 -0
  40. data/docs/index.md +17 -40
  41. data/lib/wayfarer/base.rb +125 -46
  42. data/lib/wayfarer/batch_completion.rb +60 -0
  43. data/lib/wayfarer/callbacks.rb +22 -48
  44. data/lib/wayfarer/cli/route_printer.rb +85 -89
  45. data/lib/wayfarer/cli.rb +103 -0
  46. data/lib/wayfarer/gc.rb +18 -6
  47. data/lib/wayfarer/handler.rb +15 -7
  48. data/lib/wayfarer/kv.rb +28 -0
  49. data/lib/wayfarer/logging.rb +38 -0
  50. data/lib/wayfarer/middleware/base.rb +2 -0
  51. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  52. data/lib/wayfarer/middleware/chain.rb +7 -1
  53. data/lib/wayfarer/middleware/content_type.rb +59 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +22 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +17 -4
  57. data/lib/wayfarer/middleware/normalize.rb +7 -14
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +31 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +14 -3
  65. data/lib/wayfarer/networking/ferrum.rb +1 -4
  66. data/lib/wayfarer/networking/follow.rb +14 -7
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +23 -13
  69. data/lib/wayfarer/networking/selenium.rb +15 -7
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +34 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +21 -0
  74. data/lib/wayfarer/redis/barrier.rb +26 -21
  75. data/lib/wayfarer/redis/counter.rb +18 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +166 -30
  79. data/lib/wayfarer/routing/hash_stack.rb +33 -0
  80. data/lib/wayfarer/routing/matchers/custom.rb +8 -5
  81. data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
  82. data/lib/wayfarer/routing/matchers/host.rb +15 -9
  83. data/lib/wayfarer/routing/matchers/path.rb +11 -31
  84. data/lib/wayfarer/routing/matchers/query.rb +41 -17
  85. data/lib/wayfarer/routing/matchers/result.rb +12 -0
  86. data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
  87. data/lib/wayfarer/routing/matchers/url.rb +13 -5
  88. data/lib/wayfarer/routing/path_consumer.rb +130 -0
  89. data/lib/wayfarer/routing/path_finder.rb +151 -23
  90. data/lib/wayfarer/routing/result.rb +1 -1
  91. data/lib/wayfarer/routing/root_route.rb +17 -1
  92. data/lib/wayfarer/routing/route.rb +66 -19
  93. data/lib/wayfarer/routing/serializable.rb +28 -0
  94. data/lib/wayfarer/routing/sub_route.rb +53 -0
  95. data/lib/wayfarer/routing/target_route.rb +17 -1
  96. data/lib/wayfarer/stringify.rb +21 -30
  97. data/lib/wayfarer/task.rb +9 -17
  98. data/lib/wayfarer/uri/normalization.rb +120 -0
  99. data/lib/wayfarer.rb +72 -5
  100. data/mise.toml +2 -0
  101. data/mkdocs.yml +44 -8
  102. data/rake/docs.rake +26 -0
  103. data/rake/lint.rake +9 -0
  104. data/rake/release.rake +23 -0
  105. data/rake/tests.rake +32 -0
  106. data/requirements.txt +1 -1
  107. data/spec/factories/job.rb +8 -0
  108. data/spec/factories/middleware.rb +2 -2
  109. data/spec/factories/path_finder.rb +11 -0
  110. data/spec/factories/redis.rb +19 -0
  111. data/spec/factories/task.rb +46 -2
  112. data/spec/spec_helpers.rb +55 -51
  113. data/spec/support/active_job_helpers.rb +8 -0
  114. data/spec/support/integration_helpers.rb +21 -0
  115. data/spec/support/redis_helpers.rb +9 -0
  116. data/spec/support/test_app.rb +66 -37
  117. data/spec/wayfarer/base_spec.rb +200 -0
  118. data/spec/wayfarer/batch_completion_spec.rb +142 -0
  119. data/spec/wayfarer/cli/job_spec.rb +88 -0
  120. data/spec/wayfarer/cli/routing_spec.rb +322 -0
  121. data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
  122. data/spec/wayfarer/gc_spec.rb +29 -0
  123. data/spec/wayfarer/handler_spec.rb +9 -0
  124. data/spec/wayfarer/integration/callbacks_spec.rb +200 -0
  125. data/spec/wayfarer/integration/content_type_spec.rb +37 -0
  126. data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
  127. data/spec/wayfarer/integration/gc_spec.rb +40 -0
  128. data/spec/wayfarer/integration/handler_spec.rb +65 -0
  129. data/spec/wayfarer/integration/page_spec.rb +79 -0
  130. data/spec/wayfarer/integration/params_spec.rb +64 -0
  131. data/spec/wayfarer/integration/parsing_spec.rb +99 -0
  132. data/spec/wayfarer/integration/retry_spec.rb +112 -0
  133. data/spec/wayfarer/integration/stage_spec.rb +58 -0
  134. data/spec/wayfarer/middleware/batch_completion_spec.rb +33 -0
  135. data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +24 -19
  136. data/spec/wayfarer/middleware/content_type_spec.rb +83 -0
  137. data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +24 -22
  138. data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
  139. data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
  140. data/spec/wayfarer/middleware/router_spec.rb +102 -0
  141. data/spec/wayfarer/middleware/stage_spec.rb +63 -0
  142. data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
  143. data/spec/wayfarer/middleware/user_agent_spec.rb +158 -0
  144. data/spec/wayfarer/networking/capybara_spec.rb +13 -0
  145. data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
  146. data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
  147. data/spec/{networking → wayfarer/networking}/follow_spec.rb +11 -6
  148. data/spec/wayfarer/networking/http_spec.rb +12 -0
  149. data/spec/{networking → wayfarer/networking}/pool_spec.rb +16 -14
  150. data/spec/wayfarer/networking/selenium_spec.rb +12 -0
  151. data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
  152. data/spec/wayfarer/page_spec.rb +69 -0
  153. data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
  154. data/spec/wayfarer/parsing/xml_parse_spec.rb +25 -0
  155. data/spec/wayfarer/redis/barrier_spec.rb +39 -0
  156. data/spec/wayfarer/redis/counter_spec.rb +34 -0
  157. data/spec/{redis → wayfarer/redis}/pool_spec.rb +4 -3
  158. data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
  159. data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
  160. data/spec/wayfarer/routing/integration_spec.rb +101 -0
  161. data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
  162. data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
  163. data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
  164. data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
  165. data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
  166. data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
  167. data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
  168. data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
  169. data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
  170. data/spec/wayfarer/routing/root_route_spec.rb +51 -0
  171. data/spec/wayfarer/routing/route_spec.rb +74 -0
  172. data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
  173. data/spec/wayfarer/task_spec.rb +13 -0
  174. data/spec/wayfarer/uri/normalization_spec.rb +98 -0
  175. data/spec/wayfarer_spec.rb +2 -2
  176. data/wayfarer.gemspec +18 -28
  177. metadata +797 -265
  178. data/.github/workflows/ci.yaml +0 -32
  179. data/.rbenv-gemsets +0 -1
  180. data/.ruby-version +0 -1
  181. data/RELEASING.md +0 -17
  182. data/docs/cookbook/user_agent.md +0 -7
  183. data/docs/guides/error_handling.md +0 -53
  184. data/docs/guides/networking.md +0 -94
  185. data/docs/guides/performance.md +0 -130
  186. data/docs/guides/reliability.md +0 -41
  187. data/docs/guides/routing/steering.md +0 -30
  188. data/docs/reference/api/base.md +0 -48
  189. data/docs/reference/cli.md +0 -61
  190. data/docs/reference/configuration_keys.md +0 -43
  191. data/docs/reference/environment_variables.md +0 -83
  192. data/lib/wayfarer/cli/base.rb +0 -45
  193. data/lib/wayfarer/cli/generate.rb +0 -17
  194. data/lib/wayfarer/cli/job.rb +0 -56
  195. data/lib/wayfarer/cli/route.rb +0 -29
  196. data/lib/wayfarer/cli/runner.rb +0 -34
  197. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  198. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  199. data/lib/wayfarer/config/capybara.rb +0 -10
  200. data/lib/wayfarer/config/ferrum.rb +0 -11
  201. data/lib/wayfarer/config/networking.rb +0 -29
  202. data/lib/wayfarer/config/redis.rb +0 -14
  203. data/lib/wayfarer/config/root.rb +0 -11
  204. data/lib/wayfarer/config/selenium.rb +0 -21
  205. data/lib/wayfarer/config/strconv.rb +0 -45
  206. data/lib/wayfarer/config/struct.rb +0 -72
  207. data/lib/wayfarer/middleware/fetch.rb +0 -56
  208. data/lib/wayfarer/redis/connection.rb +0 -13
  209. data/lib/wayfarer/redis/version.rb +0 -19
  210. data/lib/wayfarer/routing/router.rb +0 -28
  211. data/spec/base_spec.rb +0 -224
  212. data/spec/callbacks_spec.rb +0 -102
  213. data/spec/cli/generate_spec.rb +0 -39
  214. data/spec/cli/job_spec.rb +0 -78
  215. data/spec/config/capybara_spec.rb +0 -18
  216. data/spec/config/ferrum_spec.rb +0 -24
  217. data/spec/config/networking_spec.rb +0 -73
  218. data/spec/config/redis_spec.rb +0 -32
  219. data/spec/config/root_spec.rb +0 -31
  220. data/spec/config/selenium_spec.rb +0 -56
  221. data/spec/config/strconv_spec.rb +0 -58
  222. data/spec/config/struct_spec.rb +0 -66
  223. data/spec/fixtures/dummy_job.rb +0 -7
  224. data/spec/gc_spec.rb +0 -59
  225. data/spec/handler_spec.rb +0 -11
  226. data/spec/integration/callbacks_spec.rb +0 -85
  227. data/spec/integration/page_spec.rb +0 -62
  228. data/spec/integration/params_spec.rb +0 -56
  229. data/spec/integration/stage_spec.rb +0 -51
  230. data/spec/integration/steering_spec.rb +0 -57
  231. data/spec/middleware/dedup_spec.rb +0 -88
  232. data/spec/middleware/dispatch_spec.rb +0 -43
  233. data/spec/middleware/fetch_spec.rb +0 -155
  234. data/spec/middleware/normalize_spec.rb +0 -29
  235. data/spec/middleware/router_spec.rb +0 -105
  236. data/spec/middleware/stage_spec.rb +0 -62
  237. data/spec/networking/capybara_spec.rb +0 -12
  238. data/spec/networking/ferrum_spec.rb +0 -12
  239. data/spec/networking/http_spec.rb +0 -12
  240. data/spec/networking/selenium_spec.rb +0 -12
  241. data/spec/page_spec.rb +0 -47
  242. data/spec/parsing/xml_spec.rb +0 -25
  243. data/spec/redis/barrier_spec.rb +0 -78
  244. data/spec/redis/counter_spec.rb +0 -32
  245. data/spec/redis/version_spec.rb +0 -13
  246. data/spec/routing/integration_spec.rb +0 -110
  247. data/spec/routing/matchers/custom_spec.rb +0 -31
  248. data/spec/routing/matchers/host_spec.rb +0 -49
  249. data/spec/routing/matchers/path_spec.rb +0 -43
  250. data/spec/routing/matchers/query_spec.rb +0 -137
  251. data/spec/routing/matchers/scheme_spec.rb +0 -25
  252. data/spec/routing/matchers/suffix_spec.rb +0 -41
  253. data/spec/routing/matchers/uri_spec.rb +0 -27
  254. data/spec/routing/path_finder_spec.rb +0 -33
  255. data/spec/routing/root_route_spec.rb +0 -29
  256. data/spec/routing/route_spec.rb +0 -43
  257. data/spec/routing/router_spec.rb +0 -24
  258. data/spec/task_spec.rb +0 -34
  259. data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
@@ -3,20 +3,18 @@
3
3
  require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Controller do
6
+ subject(:controller) { Class.new.include(described_class) }
7
+
6
8
  let(:task) { build(:task) }
7
9
 
8
10
  let(:middleware) do
9
- Class.new do
10
- extend Wayfarer::Middleware::Base
11
-
12
- module API; end
13
-
14
- def call(task); end
11
+ Class.new.tap do |klass|
12
+ klass.extend(Wayfarer::Middleware::Base)
13
+ klass.define_method(:call, &->(_) {})
14
+ klass.const_set("API", Module.new)
15
15
  end
16
16
  end
17
17
 
18
- subject(:controller) { Class.new.include(described_class) }
19
-
20
18
  describe "::use" do
21
19
  it "adds middleware" do
22
20
  expect {
@@ -28,7 +26,7 @@ describe Wayfarer::Middleware::Controller do
28
26
  controller.use(middleware)
29
27
  expect(controller.chain.middlewares.last).to be_a(Wayfarer::Middleware::Lazy)
30
28
  expect(controller.chain.middlewares.last.klass).to be(middleware)
31
- expect_any_instance_of(middleware).to receive(:call).with(task)
29
+ expect_any_instance_of(middleware).to receive(:call).with(task) # rubocop:disable RSpec/AnyInstance
32
30
  controller.new.call(task)
33
31
  end
34
32
  end
@@ -42,45 +40,49 @@ describe Wayfarer::Middleware::Controller do
42
40
  end
43
41
 
44
42
  describe "#call" do
45
- subject { controller.new }
43
+ let(:instance) { controller.new }
46
44
 
47
45
  it "assigns the task" do
48
46
  expect {
49
- subject.call(task)
50
- }.to change { subject.task }.to(task)
47
+ instance.call(task)
48
+ }.to change(instance, :task).to(task)
51
49
  end
52
50
 
53
51
  context "without job assigned" do
54
52
  it "assigns itself" do
55
53
  expect {
56
- subject.call(task)
57
- }.to change { task.metadata.job }.to(subject)
54
+ instance.call(task)
55
+ }.to change { task[:job] }.to(instance)
58
56
  end
59
57
  end
60
58
 
61
59
  context "with job assigned" do
62
- before { task.metadata.job = Object.new }
60
+ before { task[:job] = Object.new }
63
61
 
64
62
  it "does not override the job" do
65
63
  expect {
66
- subject.call(task)
67
- }.not_to(change { task.metadata.job })
64
+ instance.call(task)
65
+ }.not_to(change { task[:job] })
68
66
  end
69
67
  end
70
68
 
71
69
  it "assigns itself as controller" do
72
70
  expect {
73
- subject.call(task)
74
- }.to change { task.metadata.controller }.to(subject)
71
+ instance.call(task)
72
+ }.to change { task[:controller] }.to(instance)
75
73
  end
76
74
 
77
75
  it "calls the chain" do
78
- expect(subject.chain).to receive(:call)
79
- subject.call(task)
76
+ chain = instance.class.chain
77
+ allow(chain).to receive(:call)
78
+
79
+ instance.call(task)
80
+
81
+ expect(chain).to have_received(:call)
80
82
  end
81
83
 
82
84
  it "yields" do
83
- expect { |spy| subject.call(task, &spy) }.to yield_control
85
+ expect { |spy| instance.call(task, &spy) }.to yield_control
84
86
  end
85
87
  end
86
88
  end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../spec_helpers"
4
+
5
+ RSpec.describe Wayfarer::Middleware::Dedup, :redis do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:job) { build(:job, executions: 1) }
9
+ let(:controller) { job }
10
+ let(:task) { build(:task, :normalized, :barrier, job: job, controller: controller, url: "http://example.com/") }
11
+ let(:barrier) { task[:barrier] }
12
+
13
+ context "with unseen URL" do
14
+ specify do
15
+ expect { |block| middleware.call(task, &block) }.to yield_control
16
+ end
17
+ end
18
+
19
+ context "with seen URL" do
20
+ before { barrier.check!(task[:uri].to_s) }
21
+
22
+ specify do
23
+ expect { |block| middleware.call(task, &block) }.not_to yield_control
24
+ end
25
+
26
+ context "when task is rerouted" do
27
+ let(:controller) { Class.new.include(Wayfarer::Handler) }
28
+
29
+ specify do
30
+ expect { |block| middleware.call(task, &block) }.to yield_control
31
+ end
32
+ end
33
+
34
+ context "with retry" do
35
+ let(:job) { build(:job, executions: 3) }
36
+
37
+ specify do
38
+ expect { |block| middleware.call(task, &block) }.to yield_control
39
+ end
40
+ end
41
+ end
42
+
43
+ describe "cache keys" do
44
+ before do
45
+ task[:barrier] = spy
46
+ end
47
+
48
+ specify do
49
+ middleware.call(task)
50
+
51
+ expect(task[:barrier]).to have_received(:check!).with(task[:uri].to_s)
52
+ end
53
+
54
+ context "with custom key" do
55
+ before do
56
+ Wayfarer.config[:deduplication][:key] = ->(task) { Digest::MD5.base64digest(task[:uri]) }
57
+ end
58
+
59
+ specify do
60
+ middleware.call(task)
61
+
62
+ expect(task[:barrier]).to have_received(:check!).with("qbnwQzbOAYGgjndOARE7MQ==")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::Normalize do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task, :uri, url: "http://example.com/") }
9
+
10
+ context "with assigned URI" do
11
+ specify do
12
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
13
+ end
14
+
15
+ specify do
16
+ expect { middleware.call(task) }.to(change { task[:uri] })
17
+ end
18
+ end
19
+
20
+ context "with invalid URL" do
21
+ before do
22
+ allow(Wayfarer::URI::Normalization)
23
+ .to receive(:canonical!)
24
+ .with(task[:uri])
25
+ .and_raise(Wayfarer::URI::Normalization::InvalidURIError)
26
+ end
27
+
28
+ specify do
29
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::Router do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task, :uri) }
9
+
10
+ describe "#call" do
11
+ let(:controller) do
12
+ Struct.new(:task).include(described_class::API).new(task)
13
+ end
14
+
15
+ before do
16
+ allow(controller.class.route).to receive(:invoke)
17
+ .with(task)
18
+ .and_return(result)
19
+ task[:controller] = controller
20
+ end
21
+
22
+ context "with matching route" do
23
+ let(:action) { :action }
24
+ let(:params) { { "foo" => "bar" } }
25
+ let(:result) do
26
+ Wayfarer::Routing::Result::Match.new(action, params)
27
+ end
28
+
29
+ it "assigns the action" do
30
+ expect {
31
+ middleware.call(task)
32
+ }.to change { task[:action] }.to(action)
33
+ end
34
+
35
+ it "merges params" do
36
+ task[:params] = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
37
+
38
+ expect {
39
+ middleware.call(task)
40
+ }.to change { task[:params] }.to("foo" => "bar", "bar" => "qux")
41
+ end
42
+
43
+ specify do
44
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
45
+ end
46
+
47
+ specify do
48
+ # rubocop:disable RSpec/MessageSpies
49
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, kind_of(String))
50
+ # rubocop:enable RSpec/MessageSpies
51
+
52
+ middleware.call(task)
53
+ end
54
+ end
55
+
56
+ context "without matching route" do
57
+ let(:result) { Wayfarer::Routing::Result::Mismatch.instance }
58
+
59
+ it "does not assign the action" do
60
+ expect {
61
+ middleware.call(task)
62
+ }.not_to(change { task[:action] })
63
+ end
64
+
65
+ it "does not alter params" do
66
+ task[:params] = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
67
+
68
+ expect {
69
+ middleware.call(task)
70
+ }.not_to(change { task[:params] })
71
+ end
72
+
73
+ specify do
74
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
75
+ end
76
+
77
+ specify do
78
+ # rubocop:disable RSpec/MessageSpies
79
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, kind_of(String))
80
+ # rubocop:enable RSpec/MessageSpies
81
+
82
+ middleware.call(task)
83
+ end
84
+ end
85
+ end
86
+
87
+ describe described_class::API do
88
+ subject(:controller) { Struct.new(:task).include(described_class).new(task) }
89
+
90
+ describe "::route" do
91
+ it "returns a root route" do
92
+ expect(controller.class.route).to be_a(Wayfarer::Routing::RootRoute)
93
+ end
94
+
95
+ it "adds a routing block" do
96
+ expect {
97
+ controller.class.route.to(:index)
98
+ }.to change { controller.class.route.children.count }.by(1)
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::Stage do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task) }
9
+
10
+ describe "#call" do
11
+ it "assigns an empty set" do
12
+ middleware.call(task)
13
+ expect(task[:staged_urls]).to eq(SortedSet.new)
14
+ end
15
+
16
+ it "yields" do
17
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
18
+ end
19
+
20
+ it "enqueues tasks" do
21
+ job = double(crawl: nil) # rubocop:disable RSpec/VerifiedDoubles
22
+ urls = [test_app_path("/alpha"), test_app_path("/beta")]
23
+
24
+ task[:job] = instance_spy(Object, class: job)
25
+
26
+ middleware.call(task) do
27
+ task[:staged_urls] = SortedSet.new(urls)
28
+ end
29
+
30
+ expect(job).to have_received(:crawl).with(urls.first, batch: task.batch).ordered
31
+ expect(job).to have_received(:crawl).with(urls.second, batch: task.batch).ordered
32
+ end
33
+
34
+ it "resets staged URLs" do
35
+ task[:staged_urls] = SortedSet.new([test_app_path("/foo")])
36
+
37
+ expect {
38
+ middleware.call(task)
39
+ }.to change { task[:staged_urls].count }.to(0)
40
+ end
41
+ end
42
+
43
+ describe described_class::API do
44
+ subject(:controller) do
45
+ Struct.new(:task).include(described_class).new(task)
46
+ end
47
+
48
+ describe "#stage" do
49
+ before { task[:staged_urls] = SortedSet.new }
50
+
51
+ it "stages URLs" do
52
+ expect {
53
+ controller.stage(test_app_path("/foo"))
54
+ }.to change { task[:staged_urls].count }.by(1)
55
+ end
56
+
57
+ it "converts to strings" do
58
+ controller.stage(Addressable::URI.parse(test_app_path("/foo")))
59
+ expect(task[:staged_urls].to_a.first).to be_a(String)
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::UriParser do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task) }
9
+
10
+ it "parses URLs" do
11
+ expect { middleware.call(task) }.to change { task[:uri] }.to(Addressable::URI.parse(task.url))
12
+ end
13
+
14
+ specify do
15
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
16
+ end
17
+
18
+ context "with already parsed URI" do
19
+ before do
20
+ task[:uri] = Addressable::URI.parse("http://example.com")
21
+ end
22
+
23
+ specify do
24
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
25
+ end
26
+
27
+ specify do
28
+ expect { middleware.call(task) }.not_to(change { task[:uri] })
29
+ end
30
+ end
31
+
32
+ context "with invalid URL" do
33
+ let(:task) { build(:task, url: "ht%0atp://localhost/") }
34
+
35
+ specify do
36
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
37
+ end
38
+
39
+ specify do
40
+ expect(Wayfarer::Logging.logger)
41
+ .to receive(:add) # rubocop:disable RSpec/MessageSpies
42
+ .with(Logger::INFO, "Not processing invalid URL (Invalid scheme format: 'ht%0atp')")
43
+
44
+ middleware.call(task)
45
+ end
46
+ end
47
+
48
+ describe described_class::API do
49
+ subject(:controller) do
50
+ Struct.new(:task).include(described_class).new(task)
51
+ end
52
+
53
+ describe "#uri" do
54
+ let(:uri) { Addressable::URI.parse(task.url) }
55
+
56
+ before { task[:uri] = uri }
57
+
58
+ specify do
59
+ expect(controller.uri).to be(uri)
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::UserAgent do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task, :staged_urls) }
9
+ let(:page) { instance_double(Wayfarer::Page) }
10
+ let(:agent) { Object.new }
11
+ let(:result) { Wayfarer::Networking::Result::Success.new(page) }
12
+ let(:context) do
13
+ instance_spy(Wayfarer::Networking::Context, instance: agent).tap do |context|
14
+ allow(context).to receive(:fetch).with(task.url).and_return(result)
15
+ end
16
+ end
17
+
18
+ before { task[:controller] = controller }
19
+
20
+ describe "#call" do
21
+ let(:controller) do
22
+ Struct.new(:task).include(Wayfarer::Middleware::Stage::API).new(task)
23
+ end
24
+
25
+ before do
26
+ allow(controller).to receive(:run_callbacks).with(:fetch).and_yield
27
+
28
+ spy.tap do |pool|
29
+ allow(pool).to receive(:with).and_yield(context)
30
+ allow(middleware).to receive(:pool).and_return(pool) # rubocop:disable RSpec/SubjectStub
31
+ end
32
+ end
33
+
34
+ context "with page assigned" do
35
+ before { task[:page] = page }
36
+
37
+ specify do
38
+ expect {
39
+ middleware.call(task)
40
+ }.not_to(change { task[:page] })
41
+ end
42
+
43
+ specify do
44
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
45
+ end
46
+ end
47
+
48
+ specify do
49
+ middleware.call(task)
50
+
51
+ expect(controller).to have_received(:run_callbacks).with(:fetch)
52
+ end
53
+
54
+ specify do
55
+ middleware.call(task)
56
+
57
+ expect(context).to have_received(:fetch).with(task.url)
58
+ end
59
+
60
+ context "with Redirect" do
61
+ let(:redirect_url) { test_app_path("/foobar") }
62
+ let(:result) { Wayfarer::Networking::Result::Redirect.new(page) }
63
+
64
+ specify do
65
+ expect {
66
+ middleware.call(task)
67
+ }.to change { task[:staged_urls].count }.by(1)
68
+ end
69
+
70
+ specify do
71
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
72
+ end
73
+ end
74
+
75
+ context "with Success" do
76
+ specify do
77
+ expect {
78
+ middleware.call(task)
79
+ }.to change { task[:context] }.to(context)
80
+ end
81
+
82
+ specify do
83
+ expect {
84
+ middleware.call(task)
85
+ }.to change { task[:page] }.to(result.page)
86
+ end
87
+
88
+ specify do
89
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
90
+ end
91
+ end
92
+ end
93
+
94
+ describe described_class::API do
95
+ subject(:controller) do
96
+ Struct.new(:task).include(described_class).new(task)
97
+ end
98
+
99
+ describe "#user_agent" do
100
+ before { task[:context] = context }
101
+
102
+ specify do
103
+ expect(controller.user_agent).to be(context.instance)
104
+ end
105
+ end
106
+
107
+ describe "#page" do
108
+ before { task[:page] = page }
109
+
110
+ specify do
111
+ expect(controller.page).to be(task[:page])
112
+ end
113
+
114
+ context "with live keyword" do
115
+ before { task[:context] = context }
116
+
117
+ context "with stateful agent" do
118
+ before do
119
+ result.page = Object.new
120
+ allow(context).to receive(:live).and_return(result)
121
+ end
122
+
123
+ it "replaces the page" do
124
+ expect {
125
+ controller.page(live: true)
126
+ }.to change { task[:page] }.to(result.page)
127
+ end
128
+ end
129
+
130
+ context "with stateless agent" do
131
+ before { allow(context).to receive(:live).and_return(nil) }
132
+
133
+ it "does not alter the page" do
134
+ expect {
135
+ controller.page(live: true)
136
+ }.not_to(change { task[:page] })
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ describe "#fetch" do
143
+ subject(:page) { controller.fetch(url) }
144
+
145
+ let(:url) { test_app_path("/redirect?times=3") }
146
+
147
+ it { is_expected.to be_a(Wayfarer::Page) }
148
+
149
+ context "with reries exhausted" do
150
+ let(:url) { test_app_path("/redirect?times=4") }
151
+
152
+ specify do
153
+ expect { page }.to raise_error(Wayfarer::Networking::Follow::RedirectsExhaustedError)
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+ require_relative "strategy"
5
+
6
+ describe Wayfarer::Networking::Capybara, :ferrum do
7
+ it_behaves_like "Network strategy", strategy: described_class,
8
+ browser: true,
9
+ request_headers: false,
10
+ response_headers: true,
11
+ status_code: true,
12
+ raises_on_error_response: true # uses Ferrum
13
+ end