wayfarer 0.4.6 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/lint.yaml +25 -0
  3. data/.github/workflows/release.yaml +29 -0
  4. data/.github/workflows/tests.yaml +30 -0
  5. data/.gitignore +4 -0
  6. data/.rubocop.yml +5 -0
  7. data/.vale.ini +5 -0
  8. data/.yardopts +1 -3
  9. data/Dockerfile +5 -4
  10. data/Gemfile +3 -0
  11. data/Gemfile.lock +107 -102
  12. data/Rakefile +5 -56
  13. data/bin/wayfarer +1 -1
  14. data/docker-compose.yml +20 -9
  15. data/docs/cookbook/consent_screen.md +2 -2
  16. data/docs/cookbook/executing_javascript.md +3 -3
  17. data/docs/cookbook/navigation.md +12 -12
  18. data/docs/cookbook/querying_html.md +3 -3
  19. data/docs/cookbook/screenshots.md +2 -2
  20. data/docs/cookbook/user_agent.md +1 -1
  21. data/docs/design.md +36 -0
  22. data/docs/guides/callbacks.md +24 -126
  23. data/docs/guides/configuration.md +8 -8
  24. data/docs/guides/handlers.md +60 -0
  25. data/docs/guides/index.md +1 -0
  26. data/docs/guides/jobs/error_handling.md +40 -0
  27. data/docs/guides/jobs.md +99 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +82 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +76 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +74 -0
  37. data/docs/guides/tasks.md +33 -9
  38. data/docs/guides/tutorial.md +60 -0
  39. data/docs/guides/user_agents.md +113 -0
  40. data/docs/index.md +17 -40
  41. data/docs/reference/cli.md +35 -25
  42. data/docs/reference/configuration.md +36 -0
  43. data/lib/wayfarer/base.rb +124 -46
  44. data/lib/wayfarer/batch_completion.rb +56 -0
  45. data/lib/wayfarer/callbacks.rb +22 -48
  46. data/lib/wayfarer/cli/route_printer.rb +71 -57
  47. data/lib/wayfarer/cli.rb +121 -0
  48. data/lib/wayfarer/gc.rb +13 -6
  49. data/lib/wayfarer/handler.rb +15 -7
  50. data/lib/wayfarer/logging.rb +38 -0
  51. data/lib/wayfarer/middleware/base.rb +2 -0
  52. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  53. data/lib/wayfarer/middleware/content_type.rb +54 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +16 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +12 -4
  57. data/lib/wayfarer/middleware/normalize.rb +12 -11
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +30 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +2 -2
  65. data/lib/wayfarer/networking/ferrum.rb +2 -2
  66. data/lib/wayfarer/networking/follow.rb +12 -6
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +17 -12
  69. data/lib/wayfarer/networking/selenium.rb +3 -3
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +36 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +24 -0
  74. data/lib/wayfarer/redis/barrier.rb +13 -21
  75. data/lib/wayfarer/redis/counter.rb +19 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +1 -0
  79. data/lib/wayfarer/routing/matchers/path.rb +4 -2
  80. data/lib/wayfarer/routing/root_route.rb +5 -1
  81. data/lib/wayfarer/routing/route.rb +4 -14
  82. data/lib/wayfarer/stringify.rb +22 -30
  83. data/lib/wayfarer/task.rb +12 -18
  84. data/lib/wayfarer.rb +28 -1
  85. data/mkdocs.yml +52 -7
  86. data/rake/docs.rake +26 -0
  87. data/rake/lint.rake +105 -0
  88. data/rake/release.rake +29 -0
  89. data/rake/tests.rake +28 -0
  90. data/requirements.txt +1 -1
  91. data/spec/base_spec.rb +140 -160
  92. data/spec/batch_completion_spec.rb +104 -0
  93. data/spec/cli/job_spec.rb +19 -23
  94. data/spec/cli/routing_spec.rb +101 -0
  95. data/spec/cli/version_spec.rb +1 -1
  96. data/spec/factories/task.rb +7 -1
  97. data/spec/fixtures/dummy_job.rb +5 -3
  98. data/spec/gc_spec.rb +8 -50
  99. data/spec/handler_spec.rb +1 -1
  100. data/spec/integration/callbacks_spec.rb +157 -45
  101. data/spec/integration/content_type_spec.rb +145 -0
  102. data/spec/integration/gc_spec.rb +44 -0
  103. data/spec/integration/handler_spec.rb +66 -0
  104. data/spec/integration/page_spec.rb +44 -29
  105. data/spec/integration/params_spec.rb +33 -25
  106. data/spec/integration/parsing_spec.rb +125 -0
  107. data/spec/integration/routing_spec.rb +18 -0
  108. data/spec/integration/stage_spec.rb +27 -20
  109. data/spec/middleware/batch_completion_spec.rb +34 -0
  110. data/spec/middleware/chain_spec.rb +8 -8
  111. data/spec/middleware/content_type_spec.rb +86 -0
  112. data/spec/middleware/controller_spec.rb +5 -5
  113. data/spec/middleware/dedup_spec.rb +38 -55
  114. data/spec/middleware/dispatch_spec.rb +23 -7
  115. data/spec/middleware/normalize_spec.rb +44 -13
  116. data/spec/middleware/router_spec.rb +29 -30
  117. data/spec/middleware/stage_spec.rb +8 -8
  118. data/spec/middleware/uri_parser_spec.rb +53 -0
  119. data/spec/middleware/{fetch_spec.rb → user_agent_spec.rb} +28 -27
  120. data/spec/networking/context_spec.rb +1 -1
  121. data/spec/networking/follow_spec.rb +2 -2
  122. data/spec/networking/pool_spec.rb +5 -5
  123. data/spec/networking/strategy.rb +2 -2
  124. data/spec/page_spec.rb +42 -20
  125. data/spec/parsing/xml_spec.rb +11 -12
  126. data/spec/redis/barrier_spec.rb +8 -48
  127. data/spec/redis/counter_spec.rb +13 -1
  128. data/spec/redis/pool_spec.rb +1 -1
  129. data/spec/spec_helpers.rb +27 -16
  130. data/spec/support/test_app.rb +8 -0
  131. data/spec/task_spec.rb +3 -24
  132. data/spec/wayfarer_spec.rb +1 -1
  133. data/wayfarer.gemspec +4 -3
  134. metadata +61 -51
  135. data/.github/workflows/ci.yaml +0 -32
  136. data/docs/guides/error_handling.md +0 -53
  137. data/docs/guides/networking.md +0 -94
  138. data/docs/guides/performance.md +0 -130
  139. data/docs/guides/reliability.md +0 -41
  140. data/docs/guides/routing/steering.md +0 -30
  141. data/docs/reference/api/base.md +0 -48
  142. data/docs/reference/configuration_keys.md +0 -43
  143. data/docs/reference/environment_variables.md +0 -83
  144. data/lib/wayfarer/cli/base.rb +0 -45
  145. data/lib/wayfarer/cli/generate.rb +0 -17
  146. data/lib/wayfarer/cli/job.rb +0 -56
  147. data/lib/wayfarer/cli/route.rb +0 -29
  148. data/lib/wayfarer/cli/runner.rb +0 -34
  149. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  150. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  151. data/lib/wayfarer/config/capybara.rb +0 -10
  152. data/lib/wayfarer/config/ferrum.rb +0 -11
  153. data/lib/wayfarer/config/networking.rb +0 -29
  154. data/lib/wayfarer/config/redis.rb +0 -14
  155. data/lib/wayfarer/config/root.rb +0 -11
  156. data/lib/wayfarer/config/selenium.rb +0 -21
  157. data/lib/wayfarer/config/strconv.rb +0 -45
  158. data/lib/wayfarer/config/struct.rb +0 -72
  159. data/lib/wayfarer/middleware/fetch.rb +0 -56
  160. data/lib/wayfarer/redis/connection.rb +0 -13
  161. data/lib/wayfarer/redis/version.rb +0 -19
  162. data/lib/wayfarer/routing/router.rb +0 -28
  163. data/spec/callbacks_spec.rb +0 -102
  164. data/spec/cli/generate_spec.rb +0 -39
  165. data/spec/config/capybara_spec.rb +0 -18
  166. data/spec/config/ferrum_spec.rb +0 -24
  167. data/spec/config/networking_spec.rb +0 -73
  168. data/spec/config/redis_spec.rb +0 -32
  169. data/spec/config/root_spec.rb +0 -31
  170. data/spec/config/selenium_spec.rb +0 -56
  171. data/spec/config/strconv_spec.rb +0 -58
  172. data/spec/config/struct_spec.rb +0 -66
  173. data/spec/integration/steering_spec.rb +0 -57
  174. data/spec/redis/version_spec.rb +0 -13
  175. data/spec/routing/router_spec.rb +0 -24
@@ -3,85 +3,68 @@
3
3
  require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Dedup, redis: true do
6
- let(:task) { build(:task) }
6
+ let(:task) { build(:task, :redis_pool) }
7
+ let(:uri) { Addressable::URI.parse(task.url) }
8
+ let(:executions) { 1 }
9
+ let(:job) { double(executions: executions) }
7
10
  subject { described_class.new }
8
11
 
9
- before { task.metadata.staged_urls = SortedSet.new }
12
+ before do
13
+ task[:job] = job
14
+ task[:controller] = job
15
+ task[:normalized_url] = task.url
16
+ end
10
17
 
11
18
  describe "#call" do
12
- context "if already routed" do
13
- before { task.metadata.action = :action }
19
+ it "assigns barrier" do
20
+ expect { subject.call(task) }.to change { task[:barrier] }.from(nil).to(instance_of(Wayfarer::Redis::Barrier))
21
+ end
14
22
 
15
- it "does not call the barrier" do
16
- expect(task.barrier).not_to receive(:seen?)
17
- end
23
+ it "assigns barrier for batch" do
24
+ subject.call(task)
25
+
26
+ expect(task[:barrier].task).to be(task)
27
+ end
18
28
 
19
- it "yields" do
29
+ context "with retry" do
30
+ let(:executions) { 2 }
31
+
32
+ specify do
20
33
  expect { |spy| subject.call(task, &spy) }.to yield_control
21
34
  end
22
- end
23
35
 
24
- context "without staged URLs" do
25
- it "does not raise" do
26
- expect { subject.call(task) }.not_to raise_error
36
+ specify do
37
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::DEBUG, "Not deduplicating retry")
38
+
39
+ subject.call(task)
27
40
  end
28
41
  end
29
42
 
30
- context "with unseen URL" do
31
- it "marks the URL as seen" do
32
- expect {
33
- subject.call(task)
34
- }.to change { task.barrier.seen?(task.url) }.to(true)
35
- end
43
+ context "when rerouted" do
44
+ before { task[:controller] = Object.new }
36
45
 
37
- it "yields" do
46
+ specify do
38
47
  expect { |spy| subject.call(task, &spy) }.to yield_control
39
48
  end
40
- end
41
49
 
42
- context "with seen URL" do
43
- before { task.barrier.seen?(task.url) }
50
+ specify do
51
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::DEBUG, "Not deduplicating rerouted task")
44
52
 
45
- it "does not yield" do
46
- expect { |spy| subject.call(task, &spy) }.not_to yield_control
53
+ subject.call(task)
47
54
  end
48
55
  end
49
56
 
50
- context "with exception raised" do
51
- it "does not mark the URL as seen" do
52
- begin
53
- subject.call(task) { raise }
54
- rescue StandardError
55
- nil
56
- end
57
- expect(task.barrier.seen?(task.url)).to be(false)
58
- end
59
-
60
- it "re-raises the exception" do
61
- expect {
62
- subject.call(task) { raise }
63
- }.to raise_error(RuntimeError)
57
+ context "with unchecked URL" do
58
+ specify do
59
+ expect { |spy| subject.call(task, &spy) }.to yield_control
64
60
  end
65
61
  end
66
62
 
67
- describe "staged URL filtering" do
68
- let(:seen_urls) { %w[https://yahoo.com https://google.com] }
69
- let(:unseen_urls) { %w[https://w3c.org https://nasa.gov] }
63
+ context "with checked URL" do
64
+ before { Wayfarer::Redis::Barrier.new(task).check!(task[:normalized_url]) }
70
65
 
71
- before do
72
- seen_urls.each do |url|
73
- task.barrier.seen?(url)
74
- end
75
-
76
- [*seen_urls, *unseen_urls].each do |url|
77
- task.metadata.staged_urls.add(url)
78
- end
79
- end
80
-
81
- it "filters seen staged URLs" do
82
- expect {
83
- subject.call(task)
84
- }.to change { task.metadata.staged_urls }.to(SortedSet.new(unseen_urls))
66
+ specify do
67
+ expect { |spy| subject.call(task, &spy) }.not_to yield_control
85
68
  end
86
69
  end
87
70
  end
@@ -8,27 +8,27 @@ describe Wayfarer::Middleware::Dispatch do
8
8
  subject(:chain) { described_class.new }
9
9
 
10
10
  before do
11
- task.metadata.controller = spy
12
- task.metadata.action = action
11
+ task[:controller] = spy
12
+ task[:action] = action
13
13
 
14
- allow(task.metadata.controller).to receive(:run_callbacks).and_yield
14
+ allow(task[:controller]).to receive(:run_callbacks).and_yield
15
15
  end
16
16
 
17
17
  describe "#call" do
18
18
  it "runs callbacks" do
19
- expect(task.metadata.controller).to receive(:run_callbacks).with(action)
19
+ expect(task[:controller]).to receive(:run_callbacks).with(action)
20
20
  subject.call(task)
21
21
  end
22
22
 
23
23
  context "when action is a Symbol" do
24
24
  it "calls the method" do
25
- expect(task.metadata.controller).to receive(action)
25
+ expect(task[:controller]).to receive(action)
26
26
  subject.call(task)
27
27
  end
28
28
  end
29
29
 
30
- context "with other action" do
31
- let(:action) { Class.new }
30
+ context "with handler" do
31
+ let(:action) { Class.new.include(Wayfarer::Handler) }
32
32
 
33
33
  it "instantiates and calls" do
34
34
  expect_any_instance_of(action).to receive(:call).with(task)
@@ -36,6 +36,22 @@ describe Wayfarer::Middleware::Dispatch do
36
36
  end
37
37
  end
38
38
 
39
+ context "without action" do
40
+ let(:action) { nil }
41
+
42
+ it "instantiates and calls" do
43
+ expect { subject.call(task) }.to raise_error(ArgumentError)
44
+ end
45
+ end
46
+
47
+ context "without other action" do
48
+ let(:action) { Class.new }
49
+
50
+ it "instantiates and calls" do
51
+ expect { subject.call(task) }.to raise_error(ArgumentError)
52
+ end
53
+ end
54
+
39
55
  it "yields" do
40
56
  expect { |spy| subject.call(task, &spy) }.to yield_control
41
57
  end
@@ -4,26 +4,57 @@ require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Normalize do
6
6
  let(:task) { build(:task) }
7
+ let(:uri) { Addressable::URI.parse(url) }
7
8
  subject { described_class.new }
8
9
 
9
- describe "#call" do
10
- let(:urls) do
11
- ["http://example.com/products?product_id=123",
12
- "HTTP://EXAMPLE.COM/products/?product_id=123",
13
- "http://example.com/products/?product_id=123",
14
- "http://example.com/foo/../products?product_id=123",
15
- "invalid@url-net"]
10
+ describe "::normalize" do
11
+ subject { described_class.normalize(uri).to_s }
12
+
13
+ context "without HTTP(S) URL" do
14
+ let(:url) { "localhost:3000" }
15
+
16
+ it { is_expected.to eq(url) }
16
17
  end
17
18
 
18
- before { task.metadata.staged_urls = SortedSet.new(urls) }
19
+ context "with HTTP URL" do
20
+ let(:url) { "http://example.com" }
19
21
 
20
- it "yields" do
21
- expect { |spy| subject.call(task, &spy) }.to yield_control
22
+ it { is_expected.to eq("http://example.com/") }
22
23
  end
24
+ end
25
+
26
+ describe "#call" do
27
+ let(:url) { task.url }
28
+ before { task[:uri] = uri }
29
+
30
+ context "when already assigned" do
31
+ before { task[:normalized_url] = Object.new }
32
+
33
+ specify do
34
+ expect { |spy| subject.call(task, &spy) }.to yield_control
35
+ end
36
+
37
+ it "doesn't normalize URL" do
38
+ expect { subject.call(task) }.not_to(change { task[:normalized_url] })
39
+ end
40
+ end
41
+
42
+ context "with invalid URL" do
43
+ before do
44
+ # I can't come up with a URL that normalize_url raises on but
45
+ # Addressable::URI doesn't, hence the stub
46
+ allow(described_class).to receive(:normalize).with(uri).and_return(nil)
47
+ end
48
+
49
+ specify do
50
+ expect { |spy| subject.call(task, &spy) }.not_to yield_control
51
+ end
52
+
53
+ specify do
54
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, "Failed to normalize HTTP(S) URL")
23
55
 
24
- it "normalizes and compacts URLs" do
25
- subject.call(task)
26
- expect(task.metadata.staged_urls).to eq(SortedSet[urls.first])
56
+ subject.call(task)
57
+ end
27
58
  end
28
59
  end
29
60
  end
@@ -12,10 +12,11 @@ describe Wayfarer::Middleware::Router do
12
12
  end
13
13
 
14
14
  before do
15
- allow(controller.class.router).to receive(:invoke)
16
- .with(Addressable::URI.parse(task.url), controller.steer)
15
+ allow(controller.class.route).to receive(:invoke)
16
+ .with(Addressable::URI.parse(task.url))
17
17
  .and_return(result)
18
- task.metadata.controller = controller
18
+ task[:controller] = controller
19
+ task[:uri] = Addressable::URI.parse(task.url)
19
20
  end
20
21
 
21
22
  context "with matching route" do
@@ -28,20 +29,26 @@ describe Wayfarer::Middleware::Router do
28
29
  it "assigns the action" do
29
30
  expect {
30
31
  subject.call(task)
31
- }.to change { task.metadata.action }.to(action)
32
+ }.to change { task[:action] }.to(action)
32
33
  end
33
34
 
34
35
  it "merges params" do
35
- task.metadata.params = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
36
+ task[:params] = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
36
37
 
37
38
  expect {
38
39
  subject.call(task)
39
- }.to change { task.metadata.params }.to("foo" => "bar", "bar" => "qux")
40
+ }.to change { task[:params] }.to("foo" => "bar", "bar" => "qux")
40
41
  end
41
42
 
42
- it "yields" do
43
+ specify do
43
44
  expect { |spy| subject.call(task, &spy) }.to yield_control
44
45
  end
46
+
47
+ specify do
48
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, kind_of(String))
49
+
50
+ subject.call(task)
51
+ end
45
52
  end
46
53
 
47
54
  context "without matching route" do
@@ -52,20 +59,26 @@ describe Wayfarer::Middleware::Router do
52
59
  it "does not assign the action" do
53
60
  expect {
54
61
  subject.call(task)
55
- }.not_to(change { task.metadata.action })
62
+ }.not_to(change { task[:action] })
56
63
  end
57
64
 
58
65
  it "does not alter params" do
59
- task.metadata.params = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
66
+ task[:params] = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
60
67
 
61
68
  expect {
62
69
  subject.call(task)
63
- }.not_to(change { task.metadata.params })
70
+ }.not_to(change { task[:params] })
64
71
  end
65
72
 
66
- it "does not yield" do
73
+ specify do
67
74
  expect { |spy| subject.call(task, &spy) }.not_to yield_control
68
75
  end
76
+
77
+ specify do
78
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, kind_of(String))
79
+
80
+ subject.call(task)
81
+ end
69
82
  end
70
83
  end
71
84
 
@@ -74,31 +87,17 @@ describe Wayfarer::Middleware::Router do
74
87
  Struct.new(:task).include(described_class).new(task)
75
88
  end
76
89
 
77
- describe "::router" do
78
- it "returns a router" do
79
- expect(controller.class.router).to be_a(Wayfarer::Routing::Router)
90
+ describe "::route" do
91
+ it "returns a root route" do
92
+ expect(controller.class.route).to be_a(Wayfarer::Routing::RootRoute)
80
93
  end
81
94
  end
82
95
 
83
96
  describe "::route" do
84
97
  it "adds a routing block" do
85
98
  expect {
86
- controller.class.route { to :index }
87
- }.to change { controller.class.router.blocks.count }.by(1)
88
- end
89
- end
90
-
91
- describe "::steer" do
92
- it "overrides #steer" do
93
- expect {
94
- controller.class.steer { :foobar }
95
- }.to change { controller.steer }.from([]).to(:foobar)
96
- end
97
- end
98
-
99
- describe "#steer" do
100
- it "returns [] by default" do
101
- expect(controller.steer).to eq([])
99
+ controller.class.route.to(:index)
100
+ }.to change { controller.class.route.children.count }.by(1)
102
101
  end
103
102
  end
104
103
  end
@@ -9,7 +9,7 @@ describe Wayfarer::Middleware::Stage do
9
9
  describe "#call" do
10
10
  it "assigns an empty set" do
11
11
  subject.call(task)
12
- expect(task.metadata.staged_urls).to eq(SortedSet.new)
12
+ expect(task[:staged_urls]).to eq(SortedSet.new)
13
13
  end
14
14
 
15
15
  it "yields" do
@@ -22,20 +22,20 @@ describe Wayfarer::Middleware::Stage do
22
22
  spy.tap do |job|
23
23
  expect(job).to receive(:crawl).with(urls.first, batch: task.batch).ordered
24
24
  expect(job).to receive(:crawl).with(urls.second, batch: task.batch).ordered
25
- task.metadata.job = double(class: job)
25
+ task[:job] = double(class: job)
26
26
  end
27
27
 
28
28
  subject.call(task) do
29
- task.metadata.staged_urls = SortedSet.new(urls)
29
+ task[:staged_urls] = SortedSet.new(urls)
30
30
  end
31
31
  end
32
32
 
33
33
  it "resets staged URLs" do
34
- task.metadata.staged_urls = SortedSet.new([test_app_path("/foo")])
34
+ task[:staged_urls] = SortedSet.new([test_app_path("/foo")])
35
35
 
36
36
  expect {
37
37
  subject.call(task)
38
- }.to change { task.metadata.staged_urls.count }.to(0)
38
+ }.to change { task[:staged_urls].count }.to(0)
39
39
  end
40
40
  end
41
41
 
@@ -45,17 +45,17 @@ describe Wayfarer::Middleware::Stage do
45
45
  end
46
46
 
47
47
  describe "#stage" do
48
- before { task.metadata.staged_urls = SortedSet.new }
48
+ before { task[:staged_urls] = SortedSet.new }
49
49
 
50
50
  it "stages URLs" do
51
51
  expect {
52
52
  controller.stage(test_app_path("/foo"))
53
- }.to change { task.metadata.staged_urls.count }.by(1)
53
+ }.to change { task[:staged_urls].count }.by(1)
54
54
  end
55
55
 
56
56
  it "converts to strings" do
57
57
  controller.stage(Addressable::URI.parse(test_app_path("/foo")))
58
- expect(task.metadata.staged_urls.to_a.first).to be_a(String)
58
+ expect(task[:staged_urls].to_a.first).to be_a(String)
59
59
  end
60
60
  end
61
61
  end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::UriParser, "#call" do
6
+ let(:task) { build(:task) }
7
+ subject { described_class.new }
8
+
9
+ it "parses URLs" do
10
+ expect { subject.call(task) }.to change { task[:uri] }.to(Addressable::URI.parse(task.url))
11
+ end
12
+
13
+ it "normalizes URLs" do
14
+ task = build(:task, url: "http://example.com")
15
+
16
+ expect { subject.call(task) }.to change { task[:uri].to_s }.to("http://example.com/")
17
+ end
18
+
19
+ specify do
20
+ expect { |spy| subject.call(task, &spy) }.to yield_control
21
+ end
22
+
23
+ context "with invalid URL" do
24
+ let(:task) { build(:task, url: "ht%0atp://localhost/") }
25
+
26
+ specify do
27
+ expect { |spy| subject.call(task, &spy) }.not_to yield_control
28
+ end
29
+
30
+ specify do
31
+ expect(Wayfarer::Logging.logger)
32
+ .to receive(:add).with(Logger::INFO, "Not processing invalid URL (Invalid scheme format: 'ht%0atp')")
33
+
34
+ subject.call(task)
35
+ end
36
+ end
37
+
38
+ describe described_class::API do
39
+ subject(:controller) do
40
+ Struct.new(:task).include(described_class).new(task)
41
+ end
42
+
43
+ describe "#uri" do
44
+ let(:uri) { Addressable::URI.parse(task.url) }
45
+
46
+ before { task[:uri] = uri }
47
+
48
+ it "returns the agent" do
49
+ expect(controller.uri).to be(uri)
50
+ end
51
+ end
52
+ end
53
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  require "spec_helpers"
4
4
 
5
- describe Wayfarer::Middleware::Fetch do
5
+ describe Wayfarer::Middleware::UserAgent do
6
6
  let(:task) { build(:task) }
7
7
  let(:page) { Object.new }
8
8
  let(:agent) { Object.new }
@@ -28,17 +28,17 @@ describe Wayfarer::Middleware::Fetch do
28
28
  allow(subject).to receive(:pool).and_return(pool)
29
29
  end
30
30
 
31
- task.metadata.staged_urls = SortedSet.new
32
- task.metadata.controller = controller
31
+ task[:staged_urls] = SortedSet.new
32
+ task[:controller] = controller
33
33
  end
34
34
 
35
35
  context "with page assigned" do
36
- before { task.metadata.page = page }
36
+ before { task[:page] = page }
37
37
 
38
38
  it "does not alter the page" do
39
39
  expect {
40
40
  subject.call(task)
41
- }.not_to(change { task.metadata.page })
41
+ }.not_to(change { task[:page] })
42
42
  end
43
43
 
44
44
  it "yields" do
@@ -63,7 +63,7 @@ describe Wayfarer::Middleware::Fetch do
63
63
  it "stages the redirect URL" do
64
64
  expect {
65
65
  subject.call(task)
66
- }.to change { task.metadata.staged_urls.count }.by(1)
66
+ }.to change { task[:staged_urls].count }.by(1)
67
67
  end
68
68
 
69
69
  it "does not yield" do
@@ -75,13 +75,13 @@ describe Wayfarer::Middleware::Fetch do
75
75
  it "assigns the context" do
76
76
  expect {
77
77
  subject.call(task)
78
- }.to change { task.metadata.context }.to(context)
78
+ }.to change { task[:context] }.to(context)
79
79
  end
80
80
 
81
81
  it "assigns the page" do
82
82
  expect {
83
83
  subject.call(task)
84
- }.to change { task.metadata.page }.to(result.page)
84
+ }.to change { task[:page] }.to(result.page)
85
85
  end
86
86
 
87
87
  it "yields" do
@@ -95,31 +95,23 @@ describe Wayfarer::Middleware::Fetch do
95
95
  Struct.new(:task).include(described_class).new(task)
96
96
  end
97
97
 
98
- describe "#agent" do
99
- before { task.metadata.context = context }
98
+ describe "#user_agent" do
99
+ before { task[:context] = context }
100
100
 
101
101
  it "returns the agent" do
102
- expect(controller.agent).to be(context.instance)
103
- end
104
- end
105
-
106
- describe "#context" do
107
- before { task.metadata.context = context }
108
-
109
- it "returns the context" do
110
- expect(controller.context).to be(task.metadata.context)
102
+ expect(controller.user_agent).to be(context.instance)
111
103
  end
112
104
  end
113
105
 
114
106
  describe "#page" do
115
- before { task.metadata.page = page }
107
+ before { task[:page] = page }
116
108
 
117
109
  it "returns the page" do
118
- expect(controller.page).to be(task.metadata.page)
110
+ expect(controller.page).to be(task[:page])
119
111
  end
120
112
 
121
113
  context "with live keyword" do
122
- before { task.metadata.context = context }
114
+ before { task[:context] = context }
123
115
 
124
116
  context "with stateful agent" do
125
117
  before do
@@ -130,7 +122,7 @@ describe Wayfarer::Middleware::Fetch do
130
122
  it "replaces the page" do
131
123
  expect {
132
124
  controller.page(live: true)
133
- }.to change { task.metadata.page }.to(result.page)
125
+ }.to change { task[:page] }.to(result.page)
134
126
  end
135
127
  end
136
128
 
@@ -140,15 +132,24 @@ describe Wayfarer::Middleware::Fetch do
140
132
  it "does not alter the page" do
141
133
  expect {
142
134
  controller.page(live: true)
143
- }.not_to(change { task.metadata.page })
135
+ }.not_to(change { task[:page] })
144
136
  end
145
137
  end
146
138
  end
147
139
  end
148
140
 
149
- describe "#http" do
150
- it "returns a redirect-following HTTP agent" do
151
- expect(controller.http).to be_a(Wayfarer::Networking::Follow)
141
+ describe "#fetch" do
142
+ let(:url) { test_app_path("/redirect?times=3") }
143
+ subject { controller.fetch(url) }
144
+
145
+ it { is_expected.to be_a(Wayfarer::Page) }
146
+
147
+ context "with reries exhausted" do
148
+ let(:url) { test_app_path("/redirect?times=4") }
149
+
150
+ specify do
151
+ expect { subject }.to raise_error(Wayfarer::Networking::Follow::RedirectsExhaustedError)
152
+ end
152
153
  end
153
154
  end
154
155
  end
@@ -49,7 +49,7 @@ describe Wayfarer::Networking::Context do
49
49
  let(:other_error) { Class.new(StandardError) }
50
50
 
51
51
  before do
52
- Wayfarer.config.network.renew_on = [other_error]
52
+ Wayfarer.config[:network][:renew_on] = [other_error]
53
53
  allow(strategy).to receive(:fetch).and_raise(other_error)
54
54
  end
55
55
 
@@ -16,7 +16,7 @@ describe Wayfarer::Networking::Follow do
16
16
  before { allow(inner).to receive(:fetch).and_return(success) }
17
17
 
18
18
  it "returns the page" do
19
- expect(outer.fetch(url)).to be(page)
19
+ expect(outer.fetch(url, follow: 3)).to be(page)
20
20
  end
21
21
  end
22
22
 
@@ -34,7 +34,7 @@ describe Wayfarer::Networking::Follow do
34
34
 
35
35
  expect {
36
36
  outer.fetch(url, follow: 0)
37
- }.to raise_error(Wayfarer::Networking::RedirectsExhaustedError)
37
+ }.to raise_error(described_class::RedirectsExhaustedError)
38
38
  end
39
39
  end
40
40
  end
@@ -17,7 +17,7 @@ describe Wayfarer::Networking::Pool do
17
17
  end
18
18
 
19
19
  context "when using Ferrum", ferrum: true do
20
- before { Wayfarer.config.network.agent = :ferrum }
20
+ before { Wayfarer.config[:network][:agent] = :ferrum }
21
21
 
22
22
  it "yields Ferrum" do
23
23
  pool.with do |context|
@@ -27,7 +27,7 @@ describe Wayfarer::Networking::Pool do
27
27
  end
28
28
 
29
29
  context "when using Selenium", selenium: true do
30
- before { Wayfarer.config.network.agent = :selenium }
30
+ before { Wayfarer.config[:network][:agent] = :selenium }
31
31
 
32
32
  it "yields Selenium" do
33
33
  pool.with do |context|
@@ -38,8 +38,8 @@ describe Wayfarer::Networking::Pool do
38
38
 
39
39
  context "when using Capybara", ferrum: true do
40
40
  before do
41
- Wayfarer.config.network.agent = :capybara
42
- Wayfarer.config.capybara.driver = :cuprite
41
+ Wayfarer.config[:network][:agent] = :capybara
42
+ Wayfarer.config[:capybara][:driver] = :cuprite
43
43
  end
44
44
 
45
45
  it "yields Capybara" do
@@ -55,7 +55,7 @@ describe Wayfarer::Networking::Pool do
55
55
 
56
56
  before do
57
57
  pool.class.registry[:foobar] = double(new: strategy)
58
- Wayfarer.config.network.agent = :foobar
58
+ Wayfarer.config[:network][:agent] = :foobar
59
59
  end
60
60
 
61
61
  it "destroys the strategy" do