wayfarer 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. checksums.yaml +4 -4
  2. data/.env +17 -0
  3. data/.github/workflows/lint.yaml +8 -6
  4. data/.github/workflows/release.yaml +4 -3
  5. data/.github/workflows/tests.yaml +5 -14
  6. data/.gitignore +2 -2
  7. data/.rubocop.yml +31 -0
  8. data/.vale.ini +6 -3
  9. data/Dockerfile +3 -2
  10. data/Gemfile +21 -0
  11. data/Gemfile.lock +233 -128
  12. data/Rakefile +7 -0
  13. data/docker-compose.yml +13 -14
  14. data/docs/guides/callbacks.md +3 -1
  15. data/docs/guides/configuration.md +10 -35
  16. data/docs/guides/development.md +67 -0
  17. data/docs/guides/handlers.md +7 -7
  18. data/docs/guides/jobs.md +54 -11
  19. data/docs/guides/networking/custom_adapters.md +31 -10
  20. data/docs/guides/pages.md +24 -22
  21. data/docs/guides/routing.md +116 -34
  22. data/docs/guides/tasks.md +30 -10
  23. data/docs/guides/tutorial.md +23 -17
  24. data/docs/guides/user_agents.md +11 -9
  25. data/lib/wayfarer/base.rb +9 -8
  26. data/lib/wayfarer/batch_completion.rb +18 -14
  27. data/lib/wayfarer/callbacks.rb +14 -14
  28. data/lib/wayfarer/cli/route_printer.rb +78 -96
  29. data/lib/wayfarer/cli.rb +12 -30
  30. data/lib/wayfarer/gc.rb +6 -1
  31. data/lib/wayfarer/kv.rb +28 -0
  32. data/lib/wayfarer/middleware/chain.rb +7 -1
  33. data/lib/wayfarer/middleware/content_type.rb +20 -15
  34. data/lib/wayfarer/middleware/dedup.rb +9 -3
  35. data/lib/wayfarer/middleware/dispatch.rb +7 -2
  36. data/lib/wayfarer/middleware/normalize.rb +4 -12
  37. data/lib/wayfarer/middleware/router.rb +1 -1
  38. data/lib/wayfarer/middleware/uri_parser.rb +4 -3
  39. data/lib/wayfarer/networking/context.rb +12 -1
  40. data/lib/wayfarer/networking/ferrum.rb +1 -4
  41. data/lib/wayfarer/networking/follow.rb +2 -1
  42. data/lib/wayfarer/networking/pool.rb +12 -7
  43. data/lib/wayfarer/networking/selenium.rb +15 -7
  44. data/lib/wayfarer/page.rb +0 -2
  45. data/lib/wayfarer/parsing/xml.rb +1 -1
  46. data/lib/wayfarer/parsing.rb +2 -5
  47. data/lib/wayfarer/redis/barrier.rb +15 -2
  48. data/lib/wayfarer/redis/counter.rb +1 -2
  49. data/lib/wayfarer/routing/dsl.rb +166 -31
  50. data/lib/wayfarer/routing/hash_stack.rb +33 -0
  51. data/lib/wayfarer/routing/matchers/custom.rb +8 -5
  52. data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
  53. data/lib/wayfarer/routing/matchers/host.rb +15 -9
  54. data/lib/wayfarer/routing/matchers/path.rb +11 -33
  55. data/lib/wayfarer/routing/matchers/query.rb +41 -17
  56. data/lib/wayfarer/routing/matchers/result.rb +12 -0
  57. data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
  58. data/lib/wayfarer/routing/matchers/url.rb +13 -5
  59. data/lib/wayfarer/routing/path_consumer.rb +130 -0
  60. data/lib/wayfarer/routing/path_finder.rb +151 -23
  61. data/lib/wayfarer/routing/result.rb +1 -1
  62. data/lib/wayfarer/routing/root_route.rb +14 -2
  63. data/lib/wayfarer/routing/route.rb +71 -14
  64. data/lib/wayfarer/routing/serializable.rb +28 -0
  65. data/lib/wayfarer/routing/sub_route.rb +53 -0
  66. data/lib/wayfarer/routing/target_route.rb +17 -1
  67. data/lib/wayfarer/stringify.rb +1 -2
  68. data/lib/wayfarer/task.rb +3 -5
  69. data/lib/wayfarer/uri/normalization.rb +120 -0
  70. data/lib/wayfarer.rb +50 -10
  71. data/mise.toml +2 -0
  72. data/mkdocs.yml +8 -17
  73. data/rake/lint.rake +0 -96
  74. data/rake/release.rake +5 -11
  75. data/rake/tests.rake +8 -4
  76. data/requirements.txt +1 -1
  77. data/spec/factories/job.rb +8 -0
  78. data/spec/factories/middleware.rb +2 -2
  79. data/spec/factories/path_finder.rb +11 -0
  80. data/spec/factories/redis.rb +19 -0
  81. data/spec/factories/task.rb +39 -1
  82. data/spec/spec_helpers.rb +50 -57
  83. data/spec/support/active_job_helpers.rb +8 -0
  84. data/spec/support/integration_helpers.rb +21 -0
  85. data/spec/support/redis_helpers.rb +9 -0
  86. data/spec/support/test_app.rb +64 -43
  87. data/spec/{base_spec.rb → wayfarer/base_spec.rb} +32 -36
  88. data/spec/wayfarer/batch_completion_spec.rb +142 -0
  89. data/spec/wayfarer/cli/job_spec.rb +88 -0
  90. data/spec/wayfarer/cli/routing_spec.rb +322 -0
  91. data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
  92. data/spec/wayfarer/gc_spec.rb +29 -0
  93. data/spec/{handler_spec.rb → wayfarer/handler_spec.rb} +1 -3
  94. data/spec/{integration → wayfarer/integration}/callbacks_spec.rb +9 -6
  95. data/spec/wayfarer/integration/content_type_spec.rb +37 -0
  96. data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
  97. data/spec/{integration → wayfarer/integration}/gc_spec.rb +9 -13
  98. data/spec/{integration → wayfarer/integration}/handler_spec.rb +9 -10
  99. data/spec/{integration → wayfarer/integration}/page_spec.rb +8 -6
  100. data/spec/{integration → wayfarer/integration}/params_spec.rb +4 -4
  101. data/spec/{integration → wayfarer/integration}/parsing_spec.rb +7 -33
  102. data/spec/wayfarer/integration/retry_spec.rb +112 -0
  103. data/spec/{integration → wayfarer/integration}/stage_spec.rb +5 -5
  104. data/spec/{middleware → wayfarer/middleware}/batch_completion_spec.rb +4 -5
  105. data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +20 -15
  106. data/spec/{middleware → wayfarer/middleware}/content_type_spec.rb +18 -21
  107. data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +22 -20
  108. data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
  109. data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
  110. data/spec/{middleware → wayfarer/middleware}/router_spec.rb +18 -20
  111. data/spec/{middleware → wayfarer/middleware}/stage_spec.rb +11 -10
  112. data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
  113. data/spec/{middleware → wayfarer/middleware}/user_agent_spec.rb +34 -32
  114. data/spec/wayfarer/networking/capybara_spec.rb +13 -0
  115. data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
  116. data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
  117. data/spec/{networking → wayfarer/networking}/follow_spec.rb +9 -4
  118. data/spec/wayfarer/networking/http_spec.rb +12 -0
  119. data/spec/{networking → wayfarer/networking}/pool_spec.rb +11 -9
  120. data/spec/wayfarer/networking/selenium_spec.rb +12 -0
  121. data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
  122. data/spec/{page_spec.rb → wayfarer/page_spec.rb} +3 -3
  123. data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
  124. data/spec/{parsing/xml_spec.rb → wayfarer/parsing/xml_parse_spec.rb} +4 -3
  125. data/spec/{redis → wayfarer/redis}/barrier_spec.rb +5 -4
  126. data/spec/wayfarer/redis/counter_spec.rb +34 -0
  127. data/spec/{redis → wayfarer/redis}/pool_spec.rb +3 -2
  128. data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
  129. data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
  130. data/spec/wayfarer/routing/integration_spec.rb +101 -0
  131. data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
  132. data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
  133. data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
  134. data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
  135. data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
  136. data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
  137. data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
  138. data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
  139. data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
  140. data/spec/wayfarer/routing/root_route_spec.rb +51 -0
  141. data/spec/wayfarer/routing/route_spec.rb +74 -0
  142. data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
  143. data/spec/wayfarer/uri/normalization_spec.rb +98 -0
  144. data/spec/wayfarer_spec.rb +2 -2
  145. data/wayfarer.gemspec +17 -28
  146. metadata +768 -246
  147. data/.rbenv-gemsets +0 -1
  148. data/.ruby-version +0 -1
  149. data/RELEASING.md +0 -17
  150. data/docs/cookbook/user_agent.md +0 -7
  151. data/docs/design.md +0 -36
  152. data/docs/guides/jobs/error_handling.md +0 -40
  153. data/docs/reference/configuration.md +0 -36
  154. data/spec/batch_completion_spec.rb +0 -104
  155. data/spec/cli/job_spec.rb +0 -74
  156. data/spec/cli/routing_spec.rb +0 -101
  157. data/spec/fixtures/dummy_job.rb +0 -9
  158. data/spec/gc_spec.rb +0 -17
  159. data/spec/integration/content_type_spec.rb +0 -145
  160. data/spec/integration/routing_spec.rb +0 -18
  161. data/spec/middleware/dedup_spec.rb +0 -71
  162. data/spec/middleware/dispatch_spec.rb +0 -59
  163. data/spec/middleware/normalize_spec.rb +0 -60
  164. data/spec/middleware/uri_parser_spec.rb +0 -53
  165. data/spec/networking/capybara_spec.rb +0 -12
  166. data/spec/networking/ferrum_spec.rb +0 -12
  167. data/spec/networking/http_spec.rb +0 -12
  168. data/spec/networking/selenium_spec.rb +0 -12
  169. data/spec/redis/counter_spec.rb +0 -44
  170. data/spec/routing/integration_spec.rb +0 -110
  171. data/spec/routing/matchers/custom_spec.rb +0 -31
  172. data/spec/routing/matchers/host_spec.rb +0 -49
  173. data/spec/routing/matchers/path_spec.rb +0 -43
  174. data/spec/routing/matchers/query_spec.rb +0 -137
  175. data/spec/routing/matchers/scheme_spec.rb +0 -25
  176. data/spec/routing/matchers/suffix_spec.rb +0 -41
  177. data/spec/routing/matchers/uri_spec.rb +0 -27
  178. data/spec/routing/path_finder_spec.rb +0 -33
  179. data/spec/routing/root_route_spec.rb +0 -29
  180. data/spec/routing/route_spec.rb +0 -43
  181. data/docs/{reference → guides}/cli.md +0 -0
  182. data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
  183. /data/spec/{task_spec.rb → wayfarer/task_spec.rb} +0 -0
@@ -3,20 +3,18 @@
3
3
  require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Controller do
6
+ subject(:controller) { Class.new.include(described_class) }
7
+
6
8
  let(:task) { build(:task) }
7
9
 
8
10
  let(:middleware) do
9
- Class.new do
10
- extend Wayfarer::Middleware::Base
11
-
12
- module API; end
13
-
14
- def call(task); end
11
+ Class.new.tap do |klass|
12
+ klass.extend(Wayfarer::Middleware::Base)
13
+ klass.define_method(:call, &->(_) {})
14
+ klass.const_set("API", Module.new)
15
15
  end
16
16
  end
17
17
 
18
- subject(:controller) { Class.new.include(described_class) }
19
-
20
18
  describe "::use" do
21
19
  it "adds middleware" do
22
20
  expect {
@@ -28,7 +26,7 @@ describe Wayfarer::Middleware::Controller do
28
26
  controller.use(middleware)
29
27
  expect(controller.chain.middlewares.last).to be_a(Wayfarer::Middleware::Lazy)
30
28
  expect(controller.chain.middlewares.last.klass).to be(middleware)
31
- expect_any_instance_of(middleware).to receive(:call).with(task)
29
+ expect_any_instance_of(middleware).to receive(:call).with(task) # rubocop:disable RSpec/AnyInstance
32
30
  controller.new.call(task)
33
31
  end
34
32
  end
@@ -42,19 +40,19 @@ describe Wayfarer::Middleware::Controller do
42
40
  end
43
41
 
44
42
  describe "#call" do
45
- subject { controller.new }
43
+ let(:instance) { controller.new }
46
44
 
47
45
  it "assigns the task" do
48
46
  expect {
49
- subject.call(task)
50
- }.to change { subject.task }.to(task)
47
+ instance.call(task)
48
+ }.to change(instance, :task).to(task)
51
49
  end
52
50
 
53
51
  context "without job assigned" do
54
52
  it "assigns itself" do
55
53
  expect {
56
- subject.call(task)
57
- }.to change { task[:job] }.to(subject)
54
+ instance.call(task)
55
+ }.to change { task[:job] }.to(instance)
58
56
  end
59
57
  end
60
58
 
@@ -63,24 +61,28 @@ describe Wayfarer::Middleware::Controller do
63
61
 
64
62
  it "does not override the job" do
65
63
  expect {
66
- subject.call(task)
64
+ instance.call(task)
67
65
  }.not_to(change { task[:job] })
68
66
  end
69
67
  end
70
68
 
71
69
  it "assigns itself as controller" do
72
70
  expect {
73
- subject.call(task)
74
- }.to change { task[:controller] }.to(subject)
71
+ instance.call(task)
72
+ }.to change { task[:controller] }.to(instance)
75
73
  end
76
74
 
77
75
  it "calls the chain" do
78
- expect(subject.class.chain).to receive(:call)
79
- subject.call(task)
76
+ chain = instance.class.chain
77
+ allow(chain).to receive(:call)
78
+
79
+ instance.call(task)
80
+
81
+ expect(chain).to have_received(:call)
80
82
  end
81
83
 
82
84
  it "yields" do
83
- expect { |spy| subject.call(task, &spy) }.to yield_control
85
+ expect { |spy| instance.call(task, &spy) }.to yield_control
84
86
  end
85
87
  end
86
88
  end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../spec_helpers"
4
+
5
+ RSpec.describe Wayfarer::Middleware::Dedup, :redis do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:job) { build(:job, executions: 1) }
9
+ let(:controller) { job }
10
+ let(:task) { build(:task, :normalized, :barrier, job: job, controller: controller, url: "http://example.com/") }
11
+ let(:barrier) { task[:barrier] }
12
+
13
+ context "with unseen URL" do
14
+ specify do
15
+ expect { |block| middleware.call(task, &block) }.to yield_control
16
+ end
17
+ end
18
+
19
+ context "with seen URL" do
20
+ before { barrier.check!(task[:uri].to_s) }
21
+
22
+ specify do
23
+ expect { |block| middleware.call(task, &block) }.not_to yield_control
24
+ end
25
+
26
+ context "when task is rerouted" do
27
+ let(:controller) { Class.new.include(Wayfarer::Handler) }
28
+
29
+ specify do
30
+ expect { |block| middleware.call(task, &block) }.to yield_control
31
+ end
32
+ end
33
+
34
+ context "with retry" do
35
+ let(:job) { build(:job, executions: 3) }
36
+
37
+ specify do
38
+ expect { |block| middleware.call(task, &block) }.to yield_control
39
+ end
40
+ end
41
+ end
42
+
43
+ describe "cache keys" do
44
+ before do
45
+ task[:barrier] = spy
46
+ end
47
+
48
+ specify do
49
+ middleware.call(task)
50
+
51
+ expect(task[:barrier]).to have_received(:check!).with(task[:uri].to_s)
52
+ end
53
+
54
+ context "with custom key" do
55
+ before do
56
+ Wayfarer.config[:deduplication][:key] = ->(task) { Digest::MD5.base64digest(task[:uri]) }
57
+ end
58
+
59
+ specify do
60
+ middleware.call(task)
61
+
62
+ expect(task[:barrier]).to have_received(:check!).with("qbnwQzbOAYGgjndOARE7MQ==")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::Normalize do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task, :uri, url: "http://example.com/") }
9
+
10
+ context "with assigned URI" do
11
+ specify do
12
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
13
+ end
14
+
15
+ specify do
16
+ expect { middleware.call(task) }.to(change { task[:uri] })
17
+ end
18
+ end
19
+
20
+ context "with invalid URL" do
21
+ before do
22
+ allow(Wayfarer::URI::Normalization)
23
+ .to receive(:canonical!)
24
+ .with(task[:uri])
25
+ .and_raise(Wayfarer::URI::Normalization::InvalidURIError)
26
+ end
27
+
28
+ specify do
29
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
30
+ end
31
+ end
32
+ end
@@ -3,8 +3,9 @@
3
3
  require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Router do
6
- let(:task) { build(:task) }
7
- subject { described_class.new }
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task, :uri) }
8
9
 
9
10
  describe "#call" do
10
11
  let(:controller) do
@@ -13,10 +14,9 @@ describe Wayfarer::Middleware::Router do
13
14
 
14
15
  before do
15
16
  allow(controller.class.route).to receive(:invoke)
16
- .with(Addressable::URI.parse(task.url))
17
+ .with(task)
17
18
  .and_return(result)
18
19
  task[:controller] = controller
19
- task[:uri] = Addressable::URI.parse(task.url)
20
20
  end
21
21
 
22
22
  context "with matching route" do
@@ -28,7 +28,7 @@ describe Wayfarer::Middleware::Router do
28
28
 
29
29
  it "assigns the action" do
30
30
  expect {
31
- subject.call(task)
31
+ middleware.call(task)
32
32
  }.to change { task[:action] }.to(action)
33
33
  end
34
34
 
@@ -36,29 +36,29 @@ describe Wayfarer::Middleware::Router do
36
36
  task[:params] = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
37
37
 
38
38
  expect {
39
- subject.call(task)
39
+ middleware.call(task)
40
40
  }.to change { task[:params] }.to("foo" => "bar", "bar" => "qux")
41
41
  end
42
42
 
43
43
  specify do
44
- expect { |spy| subject.call(task, &spy) }.to yield_control
44
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
45
45
  end
46
46
 
47
47
  specify do
48
+ # rubocop:disable RSpec/MessageSpies
48
49
  expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, kind_of(String))
50
+ # rubocop:enable RSpec/MessageSpies
49
51
 
50
- subject.call(task)
52
+ middleware.call(task)
51
53
  end
52
54
  end
53
55
 
54
56
  context "without matching route" do
55
- let(:result) do
56
- Wayfarer::Routing::Result::Mismatch.new
57
- end
57
+ let(:result) { Wayfarer::Routing::Result::Mismatch.instance }
58
58
 
59
59
  it "does not assign the action" do
60
60
  expect {
61
- subject.call(task)
61
+ middleware.call(task)
62
62
  }.not_to(change { task[:action] })
63
63
  end
64
64
 
@@ -66,34 +66,32 @@ describe Wayfarer::Middleware::Router do
66
66
  task[:params] = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
67
67
 
68
68
  expect {
69
- subject.call(task)
69
+ middleware.call(task)
70
70
  }.not_to(change { task[:params] })
71
71
  end
72
72
 
73
73
  specify do
74
- expect { |spy| subject.call(task, &spy) }.not_to yield_control
74
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
75
75
  end
76
76
 
77
77
  specify do
78
+ # rubocop:disable RSpec/MessageSpies
78
79
  expect(Wayfarer::Logging.logger).to receive(:add).with(Logger::INFO, kind_of(String))
80
+ # rubocop:enable RSpec/MessageSpies
79
81
 
80
- subject.call(task)
82
+ middleware.call(task)
81
83
  end
82
84
  end
83
85
  end
84
86
 
85
87
  describe described_class::API do
86
- subject(:controller) do
87
- Struct.new(:task).include(described_class).new(task)
88
- end
88
+ subject(:controller) { Struct.new(:task).include(described_class).new(task) }
89
89
 
90
90
  describe "::route" do
91
91
  it "returns a root route" do
92
92
  expect(controller.class.route).to be_a(Wayfarer::Routing::RootRoute)
93
93
  end
94
- end
95
94
 
96
- describe "::route" do
97
95
  it "adds a routing block" do
98
96
  expect {
99
97
  controller.class.route.to(:index)
@@ -3,38 +3,39 @@
3
3
  require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::Stage do
6
+ subject(:middleware) { described_class.new }
7
+
6
8
  let(:task) { build(:task) }
7
- subject { described_class.new }
8
9
 
9
10
  describe "#call" do
10
11
  it "assigns an empty set" do
11
- subject.call(task)
12
+ middleware.call(task)
12
13
  expect(task[:staged_urls]).to eq(SortedSet.new)
13
14
  end
14
15
 
15
16
  it "yields" do
16
- expect { |spy| subject.call(task, &spy) }.to yield_control
17
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
17
18
  end
18
19
 
19
20
  it "enqueues tasks" do
21
+ job = double(crawl: nil) # rubocop:disable RSpec/VerifiedDoubles
20
22
  urls = [test_app_path("/alpha"), test_app_path("/beta")]
21
23
 
22
- spy.tap do |job|
23
- expect(job).to receive(:crawl).with(urls.first, batch: task.batch).ordered
24
- expect(job).to receive(:crawl).with(urls.second, batch: task.batch).ordered
25
- task[:job] = double(class: job)
26
- end
24
+ task[:job] = instance_spy(Object, class: job)
27
25
 
28
- subject.call(task) do
26
+ middleware.call(task) do
29
27
  task[:staged_urls] = SortedSet.new(urls)
30
28
  end
29
+
30
+ expect(job).to have_received(:crawl).with(urls.first, batch: task.batch).ordered
31
+ expect(job).to have_received(:crawl).with(urls.second, batch: task.batch).ordered
31
32
  end
32
33
 
33
34
  it "resets staged URLs" do
34
35
  task[:staged_urls] = SortedSet.new([test_app_path("/foo")])
35
36
 
36
37
  expect {
37
- subject.call(task)
38
+ middleware.call(task)
38
39
  }.to change { task[:staged_urls].count }.to(0)
39
40
  end
40
41
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+
5
+ describe Wayfarer::Middleware::UriParser do
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task) }
9
+
10
+ it "parses URLs" do
11
+ expect { middleware.call(task) }.to change { task[:uri] }.to(Addressable::URI.parse(task.url))
12
+ end
13
+
14
+ specify do
15
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
16
+ end
17
+
18
+ context "with already parsed URI" do
19
+ before do
20
+ task[:uri] = Addressable::URI.parse("http://example.com")
21
+ end
22
+
23
+ specify do
24
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
25
+ end
26
+
27
+ specify do
28
+ expect { middleware.call(task) }.not_to(change { task[:uri] })
29
+ end
30
+ end
31
+
32
+ context "with invalid URL" do
33
+ let(:task) { build(:task, url: "ht%0atp://localhost/") }
34
+
35
+ specify do
36
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
37
+ end
38
+
39
+ specify do
40
+ expect(Wayfarer::Logging.logger)
41
+ .to receive(:add) # rubocop:disable RSpec/MessageSpies
42
+ .with(Logger::INFO, "Not processing invalid URL (Invalid scheme format: 'ht%0atp')")
43
+
44
+ middleware.call(task)
45
+ end
46
+ end
47
+
48
+ describe described_class::API do
49
+ subject(:controller) do
50
+ Struct.new(:task).include(described_class).new(task)
51
+ end
52
+
53
+ describe "#uri" do
54
+ let(:uri) { Addressable::URI.parse(task.url) }
55
+
56
+ before { task[:uri] = uri }
57
+
58
+ specify do
59
+ expect(controller.uri).to be(uri)
60
+ end
61
+ end
62
+ end
63
+ end
@@ -3,17 +3,19 @@
3
3
  require "spec_helpers"
4
4
 
5
5
  describe Wayfarer::Middleware::UserAgent do
6
- let(:task) { build(:task) }
7
- let(:page) { Object.new }
6
+ subject(:middleware) { described_class.new }
7
+
8
+ let(:task) { build(:task, :staged_urls) }
9
+ let(:page) { instance_double(Wayfarer::Page) }
8
10
  let(:agent) { Object.new }
9
11
  let(:result) { Wayfarer::Networking::Result::Success.new(page) }
10
12
  let(:context) do
11
- double(instance: agent).tap do |context|
13
+ instance_spy(Wayfarer::Networking::Context, instance: agent).tap do |context|
12
14
  allow(context).to receive(:fetch).with(task.url).and_return(result)
13
15
  end
14
16
  end
15
17
 
16
- subject { described_class.new }
18
+ before { task[:controller] = controller }
17
19
 
18
20
  describe "#call" do
19
21
  let(:controller) do
@@ -25,67 +27,66 @@ describe Wayfarer::Middleware::UserAgent do
25
27
 
26
28
  spy.tap do |pool|
27
29
  allow(pool).to receive(:with).and_yield(context)
28
- allow(subject).to receive(:pool).and_return(pool)
30
+ allow(middleware).to receive(:pool).and_return(pool) # rubocop:disable RSpec/SubjectStub
29
31
  end
30
-
31
- task[:staged_urls] = SortedSet.new
32
- task[:controller] = controller
33
32
  end
34
33
 
35
34
  context "with page assigned" do
36
35
  before { task[:page] = page }
37
36
 
38
- it "does not alter the page" do
37
+ specify do
39
38
  expect {
40
- subject.call(task)
39
+ middleware.call(task)
41
40
  }.not_to(change { task[:page] })
42
41
  end
43
42
 
44
- it "yields" do
45
- expect { |spy| subject.call(task, &spy) }.to yield_control
43
+ specify do
44
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
46
45
  end
47
46
  end
48
47
 
49
- it "runs callbacks" do
50
- expect(controller).to receive(:run_callbacks).with(:fetch)
51
- subject.call(task)
48
+ specify do
49
+ middleware.call(task)
50
+
51
+ expect(controller).to have_received(:run_callbacks).with(:fetch)
52
52
  end
53
53
 
54
- it "fetches the URL" do
55
- expect(context).to receive(:fetch).with(task.url)
56
- subject.call(task)
54
+ specify do
55
+ middleware.call(task)
56
+
57
+ expect(context).to have_received(:fetch).with(task.url)
57
58
  end
58
59
 
59
60
  context "with Redirect" do
60
61
  let(:redirect_url) { test_app_path("/foobar") }
61
62
  let(:result) { Wayfarer::Networking::Result::Redirect.new(page) }
62
63
 
63
- it "stages the redirect URL" do
64
+ specify do
64
65
  expect {
65
- subject.call(task)
66
+ middleware.call(task)
66
67
  }.to change { task[:staged_urls].count }.by(1)
67
68
  end
68
69
 
69
- it "does not yield" do
70
- expect { |spy| subject.call(task, &spy) }.not_to yield_control
70
+ specify do
71
+ expect { |spy| middleware.call(task, &spy) }.not_to yield_control
71
72
  end
72
73
  end
73
74
 
74
75
  context "with Success" do
75
- it "assigns the context" do
76
+ specify do
76
77
  expect {
77
- subject.call(task)
78
+ middleware.call(task)
78
79
  }.to change { task[:context] }.to(context)
79
80
  end
80
81
 
81
- it "assigns the page" do
82
+ specify do
82
83
  expect {
83
- subject.call(task)
84
+ middleware.call(task)
84
85
  }.to change { task[:page] }.to(result.page)
85
86
  end
86
87
 
87
- it "yields" do
88
- expect { |spy| subject.call(task, &spy) }.to yield_control
88
+ specify do
89
+ expect { |spy| middleware.call(task, &spy) }.to yield_control
89
90
  end
90
91
  end
91
92
  end
@@ -98,7 +99,7 @@ describe Wayfarer::Middleware::UserAgent do
98
99
  describe "#user_agent" do
99
100
  before { task[:context] = context }
100
101
 
101
- it "returns the agent" do
102
+ specify do
102
103
  expect(controller.user_agent).to be(context.instance)
103
104
  end
104
105
  end
@@ -106,7 +107,7 @@ describe Wayfarer::Middleware::UserAgent do
106
107
  describe "#page" do
107
108
  before { task[:page] = page }
108
109
 
109
- it "returns the page" do
110
+ specify do
110
111
  expect(controller.page).to be(task[:page])
111
112
  end
112
113
 
@@ -139,8 +140,9 @@ describe Wayfarer::Middleware::UserAgent do
139
140
  end
140
141
 
141
142
  describe "#fetch" do
143
+ subject(:page) { controller.fetch(url) }
144
+
142
145
  let(:url) { test_app_path("/redirect?times=3") }
143
- subject { controller.fetch(url) }
144
146
 
145
147
  it { is_expected.to be_a(Wayfarer::Page) }
146
148
 
@@ -148,7 +150,7 @@ describe Wayfarer::Middleware::UserAgent do
148
150
  let(:url) { test_app_path("/redirect?times=4") }
149
151
 
150
152
  specify do
151
- expect { subject }.to raise_error(Wayfarer::Networking::Follow::RedirectsExhaustedError)
153
+ expect { page }.to raise_error(Wayfarer::Networking::Follow::RedirectsExhaustedError)
152
154
  end
153
155
  end
154
156
  end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helpers"
4
+ require_relative "strategy"
5
+
6
+ describe Wayfarer::Networking::Capybara, :ferrum do
7
+ it_behaves_like "Network strategy", strategy: described_class,
8
+ browser: true,
9
+ request_headers: false,
10
+ response_headers: true,
11
+ status_code: true,
12
+ raises_on_error_response: true # uses Ferrum
13
+ end