wayfarer 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/lint.yaml +25 -0
  3. data/.github/workflows/release.yaml +29 -0
  4. data/.github/workflows/tests.yaml +30 -0
  5. data/.gitignore +4 -0
  6. data/.rubocop.yml +5 -0
  7. data/.vale.ini +5 -0
  8. data/.yardopts +1 -3
  9. data/Dockerfile +5 -4
  10. data/Gemfile +3 -0
  11. data/Gemfile.lock +107 -102
  12. data/Rakefile +5 -56
  13. data/bin/wayfarer +1 -1
  14. data/docker-compose.yml +20 -9
  15. data/docs/cookbook/consent_screen.md +2 -2
  16. data/docs/cookbook/executing_javascript.md +3 -3
  17. data/docs/cookbook/navigation.md +12 -12
  18. data/docs/cookbook/querying_html.md +3 -3
  19. data/docs/cookbook/screenshots.md +2 -2
  20. data/docs/cookbook/user_agent.md +1 -1
  21. data/docs/design.md +36 -0
  22. data/docs/guides/callbacks.md +24 -126
  23. data/docs/guides/configuration.md +8 -8
  24. data/docs/guides/handlers.md +60 -0
  25. data/docs/guides/index.md +1 -0
  26. data/docs/guides/jobs/error_handling.md +40 -0
  27. data/docs/guides/jobs.md +99 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +82 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +76 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +74 -0
  37. data/docs/guides/tasks.md +33 -9
  38. data/docs/guides/tutorial.md +60 -0
  39. data/docs/guides/user_agents.md +113 -0
  40. data/docs/index.md +17 -40
  41. data/docs/reference/cli.md +35 -25
  42. data/docs/reference/configuration.md +36 -0
  43. data/lib/wayfarer/base.rb +124 -46
  44. data/lib/wayfarer/batch_completion.rb +56 -0
  45. data/lib/wayfarer/callbacks.rb +22 -48
  46. data/lib/wayfarer/cli/route_printer.rb +71 -57
  47. data/lib/wayfarer/cli.rb +121 -0
  48. data/lib/wayfarer/gc.rb +13 -6
  49. data/lib/wayfarer/handler.rb +15 -7
  50. data/lib/wayfarer/logging.rb +38 -0
  51. data/lib/wayfarer/middleware/base.rb +2 -0
  52. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  53. data/lib/wayfarer/middleware/content_type.rb +54 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +16 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +12 -4
  57. data/lib/wayfarer/middleware/normalize.rb +12 -11
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +30 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +2 -2
  65. data/lib/wayfarer/networking/ferrum.rb +2 -2
  66. data/lib/wayfarer/networking/follow.rb +12 -6
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +17 -12
  69. data/lib/wayfarer/networking/selenium.rb +3 -3
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +36 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +24 -0
  74. data/lib/wayfarer/redis/barrier.rb +13 -21
  75. data/lib/wayfarer/redis/counter.rb +19 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +1 -0
  79. data/lib/wayfarer/routing/matchers/path.rb +4 -2
  80. data/lib/wayfarer/routing/root_route.rb +5 -1
  81. data/lib/wayfarer/routing/route.rb +4 -14
  82. data/lib/wayfarer/stringify.rb +22 -30
  83. data/lib/wayfarer/task.rb +12 -18
  84. data/lib/wayfarer.rb +29 -2
  85. data/mkdocs.yml +52 -7
  86. data/rake/docs.rake +26 -0
  87. data/rake/lint.rake +105 -0
  88. data/rake/release.rake +29 -0
  89. data/rake/tests.rake +28 -0
  90. data/requirements.txt +1 -1
  91. data/spec/base_spec.rb +140 -160
  92. data/spec/batch_completion_spec.rb +104 -0
  93. data/spec/cli/job_spec.rb +19 -23
  94. data/spec/cli/routing_spec.rb +101 -0
  95. data/spec/cli/version_spec.rb +1 -1
  96. data/spec/factories/task.rb +7 -1
  97. data/spec/fixtures/dummy_job.rb +5 -3
  98. data/spec/gc_spec.rb +8 -50
  99. data/spec/handler_spec.rb +1 -1
  100. data/spec/integration/callbacks_spec.rb +157 -45
  101. data/spec/integration/content_type_spec.rb +145 -0
  102. data/spec/integration/gc_spec.rb +44 -0
  103. data/spec/integration/handler_spec.rb +66 -0
  104. data/spec/integration/page_spec.rb +44 -29
  105. data/spec/integration/params_spec.rb +33 -25
  106. data/spec/integration/parsing_spec.rb +125 -0
  107. data/spec/integration/routing_spec.rb +18 -0
  108. data/spec/integration/stage_spec.rb +27 -20
  109. data/spec/middleware/batch_completion_spec.rb +34 -0
  110. data/spec/middleware/chain_spec.rb +8 -8
  111. data/spec/middleware/content_type_spec.rb +86 -0
  112. data/spec/middleware/controller_spec.rb +5 -5
  113. data/spec/middleware/dedup_spec.rb +38 -55
  114. data/spec/middleware/dispatch_spec.rb +23 -7
  115. data/spec/middleware/normalize_spec.rb +44 -13
  116. data/spec/middleware/router_spec.rb +29 -30
  117. data/spec/middleware/stage_spec.rb +8 -8
  118. data/spec/middleware/uri_parser_spec.rb +53 -0
  119. data/spec/middleware/{fetch_spec.rb → user_agent_spec.rb} +28 -27
  120. data/spec/networking/context_spec.rb +17 -0
  121. data/spec/networking/follow_spec.rb +2 -2
  122. data/spec/networking/pool_spec.rb +5 -5
  123. data/spec/networking/strategy.rb +2 -2
  124. data/spec/page_spec.rb +42 -20
  125. data/spec/parsing/xml_spec.rb +11 -12
  126. data/spec/redis/barrier_spec.rb +8 -48
  127. data/spec/redis/counter_spec.rb +13 -1
  128. data/spec/redis/pool_spec.rb +1 -1
  129. data/spec/spec_helpers.rb +27 -16
  130. data/spec/support/test_app.rb +8 -0
  131. data/spec/task_spec.rb +3 -24
  132. data/spec/wayfarer_spec.rb +1 -1
  133. data/wayfarer.gemspec +4 -3
  134. metadata +61 -51
  135. data/.github/workflows/ci.yaml +0 -32
  136. data/docs/guides/error_handling.md +0 -31
  137. data/docs/guides/networking.md +0 -94
  138. data/docs/guides/performance.md +0 -130
  139. data/docs/guides/reliability.md +0 -41
  140. data/docs/guides/routing/steering.md +0 -30
  141. data/docs/reference/api/base.md +0 -48
  142. data/docs/reference/configuration_keys.md +0 -42
  143. data/docs/reference/environment_variables.md +0 -83
  144. data/lib/wayfarer/cli/base.rb +0 -45
  145. data/lib/wayfarer/cli/generate.rb +0 -17
  146. data/lib/wayfarer/cli/job.rb +0 -56
  147. data/lib/wayfarer/cli/route.rb +0 -29
  148. data/lib/wayfarer/cli/runner.rb +0 -34
  149. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  150. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  151. data/lib/wayfarer/config/capybara.rb +0 -10
  152. data/lib/wayfarer/config/ferrum.rb +0 -11
  153. data/lib/wayfarer/config/networking.rb +0 -26
  154. data/lib/wayfarer/config/redis.rb +0 -14
  155. data/lib/wayfarer/config/root.rb +0 -11
  156. data/lib/wayfarer/config/selenium.rb +0 -21
  157. data/lib/wayfarer/config/strconv.rb +0 -45
  158. data/lib/wayfarer/config/struct.rb +0 -72
  159. data/lib/wayfarer/middleware/fetch.rb +0 -56
  160. data/lib/wayfarer/redis/connection.rb +0 -13
  161. data/lib/wayfarer/redis/version.rb +0 -19
  162. data/lib/wayfarer/routing/router.rb +0 -28
  163. data/spec/callbacks_spec.rb +0 -102
  164. data/spec/cli/generate_spec.rb +0 -39
  165. data/spec/config/capybara_spec.rb +0 -18
  166. data/spec/config/ferrum_spec.rb +0 -24
  167. data/spec/config/networking_spec.rb +0 -73
  168. data/spec/config/redis_spec.rb +0 -32
  169. data/spec/config/root_spec.rb +0 -31
  170. data/spec/config/selenium_spec.rb +0 -56
  171. data/spec/config/strconv_spec.rb +0 -58
  172. data/spec/config/struct_spec.rb +0 -66
  173. data/spec/integration/steering_spec.rb +0 -57
  174. data/spec/redis/version_spec.rb +0 -13
  175. data/spec/routing/router_spec.rb +0 -24
@@ -3,85 +3,68 @@
3
3
  require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Dedup, redis: true do
6
- let(:task) { build(:task) }
6
+ let(:task) { build(:task, :redis_pool) }
7
+ let(:uri) { Addressable::URI.parse(task.url) }
8
+ let(:executions) { 1 }
9
+ let(:job) { double(executions: executions) }
7
10
  subject { described_class.new }
8
11
 
9
- before { task.metadata.staged_urls = SortedSet.new }
12
+ before do
13
+ task[:job] = job
14
+ task[:controller] = job
15
+ task[:normalized_url] = task.url
16
+ end
10
17
 
11
18
  describe "#call" do
12
- context "if already routed" do
13
- before { task.metadata.action = :action }
19
+ it "assigns barrier" do
20
+ expect { subject.call(task) }.to change { task[:barrier] }.from(nil).to(instance_of(Wayfarer::Redis::Barrier))
21
+ end
14
22
 
15
- it "does not call the barrier" do
16
- expect(task.barrier).not_to receive(:seen?)
17
- end
23
+ it "assigns barrier for batch" do
24
+ subject.call(task)
25
+
26
+ expect(task[:barrier].task).to be(task)
27
+ end
18
28
 
19
- it "yields" do
29
+ context "with retry" do
30
+ let(:executions) { 2 }
31
+
32
+ specify do
20
33
  expect { |spy| subject.call(task, &spy) }.to yield_control
21
34
  end
22
- end
23
35
 
24
- context "without staged URLs" do
25
- it "does not raise" do
26
- expect { subject.call(task) }.not_to raise_error
36
+ specify do
37
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::DEBUG, "Not deduplicating retry")
38
+
39
+ subject.call(task)
27
40
  end
28
41
  end
29
42
 
30
- context "with unseen URL" do
31
- it "marks the URL as seen" do
32
- expect {
33
- subject.call(task)
34
- }.to change { task.barrier.seen?(task.url) }.to(true)
35
- end
43
+ context "when rerouted" do
44
+ before { task[:controller] = Object.new }
36
45
 
37
- it "yields" do
46
+ specify do
38
47
  expect { |spy| subject.call(task, &spy) }.to yield_control
39
48
  end
40
- end
41
49
 
42
- context "with seen URL" do
43
- before { task.barrier.seen?(task.url) }
50
+ specify do
51
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::DEBUG, "Not deduplicating rerouted task")
44
52
 
45
- it "does not yield" do
46
- expect { |spy| subject.call(task, &spy) }.not_to yield_control
53
+ subject.call(task)
47
54
  end
48
55
  end
49
56
 
50
- context "with exception raised" do
51
- it "does not mark the URL as seen" do
52
- begin
53
- subject.call(task) { raise }
54
- rescue StandardError
55
- nil
56
- end
57
- expect(task.barrier.seen?(task.url)).to be(false)
58
- end
59
-
60
- it "re-raises the exception" do
61
- expect {
62
- subject.call(task) { raise }
63
- }.to raise_error(RuntimeError)
57
+ context "with unchecked URL" do
58
+ specify do
59
+ expect { |spy| subject.call(task, &spy) }.to yield_control
64
60
  end
65
61
  end
66
62
 
67
- describe "staged URL filtering" do
68
- let(:seen_urls) { %w[https://yahoo.com https://google.com] }
69
- let(:unseen_urls) { %w[https://w3c.org https://nasa.gov] }
63
+ context "with checked URL" do
64
+ before { Wayfarer::Redis::Barrier.new(task).check!(task[:normalized_url]) }
70
65
 
71
- before do
72
- seen_urls.each do |url|
73
- task.barrier.seen?(url)
74
- end
75
-
76
- [*seen_urls, *unseen_urls].each do |url|
77
- task.metadata.staged_urls.add(url)
78
- end
79
- end
80
-
81
- it "filters seen staged URLs" do
82
- expect {
83
- subject.call(task)
84
- }.to change { task.metadata.staged_urls }.to(SortedSet.new(unseen_urls))
66
+ specify do
67
+ expect { |spy| subject.call(task, &spy) }.not_to yield_control
85
68
  end
86
69
  end
87
70
  end
@@ -8,27 +8,27 @@ describe Wayfarer::Middleware::Dispatch do
8
8
  subject(:chain) { described_class.new }
9
9
 
10
10
  before do
11
- task.metadata.controller = spy
12
- task.metadata.action = action
11
+ task[:controller] = spy
12
+ task[:action] = action
13
13
 
14
- allow(task.metadata.controller).to receive(:run_callbacks).and_yield
14
+ allow(task[:controller]).to receive(:run_callbacks).and_yield
15
15
  end
16
16
 
17
17
  describe "#call" do
18
18
  it "runs callbacks" do
19
- expect(task.metadata.controller).to receive(:run_callbacks).with(action)
19
+ expect(task[:controller]).to receive(:run_callbacks).with(action)
20
20
  subject.call(task)
21
21
  end
22
22
 
23
23
  context "when action is a Symbol" do
24
24
  it "calls the method" do
25
- expect(task.metadata.controller).to receive(action)
25
+ expect(task[:controller]).to receive(action)
26
26
  subject.call(task)
27
27
  end
28
28
  end
29
29
 
30
- context "with other action" do
31
- let(:action) { Class.new }
30
+ context "with handler" do
31
+ let(:action) { Class.new.include(Wayfarer::Handler) }
32
32
 
33
33
  it "instantiates and calls" do
34
34
  expect_any_instance_of(action).to receive(:call).with(task)
@@ -36,6 +36,22 @@ describe Wayfarer::Middleware::Dispatch do
36
36
  end
37
37
  end
38
38
 
39
+ context "without action" do
40
+ let(:action) { nil }
41
+
42
+ it "instantiates and calls" do
43
+ expect { subject.call(task) }.to raise_error(ArgumentError)
44
+ end
45
+ end
46
+
47
+ context "without other action" do
48
+ let(:action) { Class.new }
49
+
50
+ it "instantiates and calls" do
51
+ expect { subject.call(task) }.to raise_error(ArgumentError)
52
+ end
53
+ end
54
+
39
55
  it "yields" do
40
56
  expect { |spy| subject.call(task, &spy) }.to yield_control
41
57
  end
@@ -4,26 +4,57 @@ require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Normalize do
6
6
  let(:task) { build(:task) }
7
+ let(:uri) { Addressable::URI.parse(url) }
7
8
  subject { described_class.new }
8
9
 
9
- describe "#call" do
10
- let(:urls) do
11
- ["http://example.com/products?product_id=123",
12
- "HTTP://EXAMPLE.COM/products/?product_id=123",
13
- "http://example.com/products/?product_id=123",
14
- "http://example.com/foo/../products?product_id=123",
15
- "invalid@url-net"]
10
+ describe "::normalize" do
11
+ subject { described_class.normalize(uri).to_s }
12
+
13
+ context "without HTTP(S) URL" do
14
+ let(:url) { "localhost:3000" }
15
+
16
+ it { is_expected.to eq(url) }
16
17
  end
17
18
 
18
- before { task.metadata.staged_urls = SortedSet.new(urls) }
19
+ context "with HTTP URL" do
20
+ let(:url) { "http://example.com" }
19
21
 
20
- it "yields" do
21
- expect { |spy| subject.call(task, &spy) }.to yield_control
22
+ it { is_expected.to eq("http://example.com/") }
22
23
  end
24
+ end
25
+
26
+ describe "#call" do
27
+ let(:url) { task.url }
28
+ before { task[:uri] = uri }
29
+
30
+ context "when already assigned" do
31
+ before { task[:normalized_url] = Object.new }
32
+
33
+ specify do
34
+ expect { |spy| subject.call(task, &spy) }.to yield_control
35
+ end
36
+
37
+ it "doesn't normalize URL" do
38
+ expect { subject.call(task) }.not_to(change { task[:normalized_url] })
39
+ end
40
+ end
41
+
42
+ context "with invalid URL" do
43
+ before do
44
+ # I can't come up with a URL that normalize_url raises on but
45
+ # Addressable::URI doesn't, hence the stub
46
+ allow(described_class).to receive(:normalize).with(uri).and_return(nil)
47
+ end
48
+
49
+ specify do
50
+ expect { |spy| subject.call(task, &spy) }.not_to yield_control
51
+ end
52
+
53
+ specify do
54
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, "Failed to normalize HTTP(S) URL")
23
55
 
24
- it "normalizes and compacts URLs" do
25
- subject.call(task)
26
- expect(task.metadata.staged_urls).to eq(SortedSet[urls.first])
56
+ subject.call(task)
57
+ end
27
58
  end
28
59
  end
29
60
  end
@@ -12,10 +12,11 @@ describe Wayfarer::Middleware::Router do
12
12
  end
13
13
 
14
14
  before do
15
- allow(controller.class.router).to receive(:invoke)
16
- .with(Addressable::URI.parse(task.url), controller.steer)
15
+ allow(controller.class.route).to receive(:invoke)
16
+ .with(Addressable::URI.parse(task.url))
17
17
  .and_return(result)
18
- task.metadata.controller = controller
18
+ task[:controller] = controller
19
+ task[:uri] = Addressable::URI.parse(task.url)
19
20
  end
20
21
 
21
22
  context "with matching route" do
@@ -28,20 +29,26 @@ describe Wayfarer::Middleware::Router do
28
29
  it "assigns the action" do
29
30
  expect {
30
31
  subject.call(task)
31
- }.to change { task.metadata.action }.to(action)
32
+ }.to change { task[:action] }.to(action)
32
33
  end
33
34
 
34
35
  it "merges params" do
35
- task.metadata.params = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
36
+ task[:params] = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
36
37
 
37
38
  expect {
38
39
  subject.call(task)
39
- }.to change { task.metadata.params }.to("foo" => "bar", "bar" => "qux")
40
+ }.to change { task[:params] }.to("foo" => "bar", "bar" => "qux")
40
41
  end
41
42
 
42
- it "yields" do
43
+ specify do
43
44
  expect { |spy| subject.call(task, &spy) }.to yield_control
44
45
  end
46
+
47
+ specify do
48
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, kind_of(String))
49
+
50
+ subject.call(task)
51
+ end
45
52
  end
46
53
 
47
54
  context "without matching route" do
@@ -52,20 +59,26 @@ describe Wayfarer::Middleware::Router do
52
59
  it "does not assign the action" do
53
60
  expect {
54
61
  subject.call(task)
55
- }.not_to(change { task.metadata.action })
62
+ }.not_to(change { task[:action] })
56
63
  end
57
64
 
58
65
  it "does not alter params" do
59
- task.metadata.params = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
66
+ task[:params] = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
60
67
 
61
68
  expect {
62
69
  subject.call(task)
63
- }.not_to(change { task.metadata.params })
70
+ }.not_to(change { task[:params] })
64
71
  end
65
72
 
66
- it "does not yield" do
73
+ specify do
67
74
  expect { |spy| subject.call(task, &spy) }.not_to yield_control
68
75
  end
76
+
77
+ specify do
78
+ expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, kind_of(String))
79
+
80
+ subject.call(task)
81
+ end
69
82
  end
70
83
  end
71
84
 
@@ -74,31 +87,17 @@ describe Wayfarer::Middleware::Router do
74
87
  Struct.new(:task).include(described_class).new(task)
75
88
  end
76
89
 
77
- describe "::router" do
78
- it "returns a router" do
79
- expect(controller.class.router).to be_a(Wayfarer::Routing::Router)
90
+ describe "::route" do
91
+ it "returns a root route" do
92
+ expect(controller.class.route).to be_a(Wayfarer::Routing::RootRoute)
80
93
  end
81
94
  end
82
95
 
83
96
  describe "::route" do
84
97
  it "adds a routing block" do
85
98
  expect {
86
- controller.class.route { to :index }
87
- }.to change { controller.class.router.blocks.count }.by(1)
88
- end
89
- end
90
-
91
- describe "::steer" do
92
- it "overrides #steer" do
93
- expect {
94
- controller.class.steer { :foobar }
95
- }.to change { controller.steer }.from([]).to(:foobar)
96
- end
97
- end
98
-
99
- describe "#steer" do
100
- it "returns [] by default" do
101
- expect(controller.steer).to eq([])
99
+ controller.class.route.to(:index)
100
+ }.to change { controller.class.route.children.count }.by(1)
102
101
  end
103
102
  end
104
103
  end
@@ -9,7 +9,7 @@ describe Wayfarer::Middleware::Stage do
9
9
  describe "#call" do
10
10
  it "assigns an empty set" do
11
11
  subject.call(task)
12
- expect(task.metadata.staged_urls).to eq(SortedSet.new)
12
+ expect(task[:staged_urls]).to eq(SortedSet.new)
13
13
  end
14
14
 
15
15
  it "yields" do
@@ -22,20 +22,20 @@ describe Wayfarer::Middleware::Stage do
22
22
  spy.tap do |job|
23
23
  expect(job).to receive(:crawl).with(urls.first, batch: task.batch).ordered
24
24
  expect(job).to receive(:crawl).with(urls.second, batch: task.batch).ordered
25
- task.metadata.job = double(class: job)
25
+ task[:job] = double(class: job)
26
26
  end
27
27
 
28
28
  subject.call(task) do
29
- task.metadata.staged_urls = SortedSet.new(urls)
29
+ task[:staged_urls] = SortedSet.new(urls)
30
30
  end
31
31
  end
32
32
 
33
33
  it "resets staged URLs" do
34
- task.metadata.staged_urls = SortedSet.new([test_app_path("/foo")])
34
+ task[:staged_urls] = SortedSet.new([test_app_path("/foo")])
35
35
 
36
36
  expect {
37
37
  subject.call(task)
38
- }.to change { task.metadata.staged_urls.count }.to(0)
38
+ }.to change { task[:staged_urls].count }.to(0)
39
39
  end
40
40
  end
41
41
 
@@ -45,17 +45,17 @@ describe Wayfarer::Middleware::Stage do
45
45
  end
46
46
 
47
47
  describe "#stage" do
48
- before { task.metadata.staged_urls = SortedSet.new }
48
+ before { task[:staged_urls] = SortedSet.new }
49
49
 
50
50
  it "stages URLs" do
51
51
  expect {
52
52
  controller.stage(test_app_path("/foo"))
53
- }.to change { task.metadata.staged_urls.count }.by(1)
53
+ }.to change { task[:staged_urls].count }.by(1)
54
54
  end
55
55
 
56
56
  it "converts to strings" do
57
57
  controller.stage(Addressable::URI.parse(test_app_path("/foo")))
58
- expect(task.metadata.staged_urls.to_a.first).to be_a(String)
58
+ expect(task[:staged_urls].to_a.first).to be_a(String)
59
59
  end
60
60
  end
61
61
  end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::UriParser, "#call" do
6
+ let(:task) { build(:task) }
7
+ subject { described_class.new }
8
+
9
+ it "parses URLs" do
10
+ expect { subject.call(task) }.to change { task[:uri] }.to(Addressable::URI.parse(task.url))
11
+ end
12
+
13
+ it "normalizes URLs" do
14
+ task = build(:task, url: "http://example.com")
15
+
16
+ expect { subject.call(task) }.to change { task[:uri].to_s }.to("http://example.com/")
17
+ end
18
+
19
+ specify do
20
+ expect { |spy| subject.call(task, &spy) }.to yield_control
21
+ end
22
+
23
+ context "with invalid URL" do
24
+ let(:task) { build(:task, url: "ht%0atp://localhost/") }
25
+
26
+ specify do
27
+ expect { |spy| subject.call(task, &spy) }.not_to yield_control
28
+ end
29
+
30
+ specify do
31
+ expect(Wayfarer::Logging.logger)
32
+ .to receive(:add).with(Logger::INFO, "Not processing invalid URL (Invalid scheme format: 'ht%0atp')")
33
+
34
+ subject.call(task)
35
+ end
36
+ end
37
+
38
+ describe described_class::API do
39
+ subject(:controller) do
40
+ Struct.new(:task).include(described_class).new(task)
41
+ end
42
+
43
+ describe "#uri" do
44
+ let(:uri) { Addressable::URI.parse(task.url) }
45
+
46
+ before { task[:uri] = uri }
47
+
48
+ it "returns the agent" do
49
+ expect(controller.uri).to be(uri)
50
+ end
51
+ end
52
+ end
53
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  require "spec_helpers"
4
4
 
5
- describe Wayfarer::Middleware::Fetch do
5
+ describe Wayfarer::Middleware::UserAgent do
6
6
  let(:task) { build(:task) }
7
7
  let(:page) { Object.new }
8
8
  let(:agent) { Object.new }
@@ -28,17 +28,17 @@ describe Wayfarer::Middleware::Fetch do
28
28
  allow(subject).to receive(:pool).and_return(pool)
29
29
  end
30
30
 
31
- task.metadata.staged_urls = SortedSet.new
32
- task.metadata.controller = controller
31
+ task[:staged_urls] = SortedSet.new
32
+ task[:controller] = controller
33
33
  end
34
34
 
35
35
  context "with page assigned" do
36
- before { task.metadata.page = page }
36
+ before { task[:page] = page }
37
37
 
38
38
  it "does not alter the page" do
39
39
  expect {
40
40
  subject.call(task)
41
- }.not_to(change { task.metadata.page })
41
+ }.not_to(change { task[:page] })
42
42
  end
43
43
 
44
44
  it "yields" do
@@ -63,7 +63,7 @@ describe Wayfarer::Middleware::Fetch do
63
63
  it "stages the redirect URL" do
64
64
  expect {
65
65
  subject.call(task)
66
- }.to change { task.metadata.staged_urls.count }.by(1)
66
+ }.to change { task[:staged_urls].count }.by(1)
67
67
  end
68
68
 
69
69
  it "does not yield" do
@@ -75,13 +75,13 @@ describe Wayfarer::Middleware::Fetch do
75
75
  it "assigns the context" do
76
76
  expect {
77
77
  subject.call(task)
78
- }.to change { task.metadata.context }.to(context)
78
+ }.to change { task[:context] }.to(context)
79
79
  end
80
80
 
81
81
  it "assigns the page" do
82
82
  expect {
83
83
  subject.call(task)
84
- }.to change { task.metadata.page }.to(result.page)
84
+ }.to change { task[:page] }.to(result.page)
85
85
  end
86
86
 
87
87
  it "yields" do
@@ -95,31 +95,23 @@ describe Wayfarer::Middleware::Fetch do
95
95
  Struct.new(:task).include(described_class).new(task)
96
96
  end
97
97
 
98
- describe "#agent" do
99
- before { task.metadata.context = context }
98
+ describe "#user_agent" do
99
+ before { task[:context] = context }
100
100
 
101
101
  it "returns the agent" do
102
- expect(controller.agent).to be(context.instance)
103
- end
104
- end
105
-
106
- describe "#context" do
107
- before { task.metadata.context = context }
108
-
109
- it "returns the context" do
110
- expect(controller.context).to be(task.metadata.context)
102
+ expect(controller.user_agent).to be(context.instance)
111
103
  end
112
104
  end
113
105
 
114
106
  describe "#page" do
115
- before { task.metadata.page = page }
107
+ before { task[:page] = page }
116
108
 
117
109
  it "returns the page" do
118
- expect(controller.page).to be(task.metadata.page)
110
+ expect(controller.page).to be(task[:page])
119
111
  end
120
112
 
121
113
  context "with live keyword" do
122
- before { task.metadata.context = context }
114
+ before { task[:context] = context }
123
115
 
124
116
  context "with stateful agent" do
125
117
  before do
@@ -130,7 +122,7 @@ describe Wayfarer::Middleware::Fetch do
130
122
  it "replaces the page" do
131
123
  expect {
132
124
  controller.page(live: true)
133
- }.to change { task.metadata.page }.to(result.page)
125
+ }.to change { task[:page] }.to(result.page)
134
126
  end
135
127
  end
136
128
 
@@ -140,15 +132,24 @@ describe Wayfarer::Middleware::Fetch do
140
132
  it "does not alter the page" do
141
133
  expect {
142
134
  controller.page(live: true)
143
- }.not_to(change { task.metadata.page })
135
+ }.not_to(change { task[:page] })
144
136
  end
145
137
  end
146
138
  end
147
139
  end
148
140
 
149
- describe "#http" do
150
- it "returns a redirect-following HTTP agent" do
151
- expect(controller.http).to be_a(Wayfarer::Networking::Follow)
141
+ describe "#fetch" do
142
+ let(:url) { test_app_path("/redirect?times=3") }
143
+ subject { controller.fetch(url) }
144
+
145
+ it { is_expected.to be_a(Wayfarer::Page) }
146
+
147
+ context "with reries exhausted" do
148
+ let(:url) { test_app_path("/redirect?times=4") }
149
+
150
+ specify do
151
+ expect { subject }.to raise_error(Wayfarer::Networking::Follow::RedirectsExhaustedError)
152
+ end
152
153
  end
153
154
  end
154
155
  end
@@ -45,6 +45,23 @@ describe Wayfarer::Networking::Context do
45
45
  end
46
46
  end
47
47
 
48
+ context "with configured renewing exception raised" do
49
+ let(:other_error) { Class.new(StandardError) }
50
+
51
+ before do
52
+ Wayfarer.config[:network][:renew_on] = [other_error]
53
+ allow(strategy).to receive(:fetch).and_raise(other_error)
54
+ end
55
+
56
+ it "renews and reraises" do
57
+ expect(context).to receive(:renew)
58
+
59
+ expect {
60
+ context.fetch(url)
61
+ }.to raise_error(other_error)
62
+ end
63
+ end
64
+
48
65
  context "with non-renewing exception raised" do
49
66
  before do
50
67
  allow(strategy).to receive(:fetch).and_raise(StandardError)
@@ -16,7 +16,7 @@ describe Wayfarer::Networking::Follow do
16
16
  before { allow(inner).to receive(:fetch).and_return(success) }
17
17
 
18
18
  it "returns the page" do
19
- expect(outer.fetch(url)).to be(page)
19
+ expect(outer.fetch(url, follow: 3)).to be(page)
20
20
  end
21
21
  end
22
22
 
@@ -34,7 +34,7 @@ describe Wayfarer::Networking::Follow do
34
34
 
35
35
  expect {
36
36
  outer.fetch(url, follow: 0)
37
- }.to raise_error(Wayfarer::Networking::RedirectsExhaustedError)
37
+ }.to raise_error(described_class::RedirectsExhaustedError)
38
38
  end
39
39
  end
40
40
  end