wayfarer 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. checksums.yaml +4 -4
  2. data/.env +17 -0
  3. data/.github/workflows/lint.yaml +27 -0
  4. data/.github/workflows/release.yaml +30 -0
  5. data/.github/workflows/tests.yaml +21 -0
  6. data/.gitignore +5 -1
  7. data/.rubocop.yml +36 -0
  8. data/.vale.ini +8 -0
  9. data/.yardopts +1 -3
  10. data/Dockerfile +6 -4
  11. data/Gemfile +24 -0
  12. data/Gemfile.lock +274 -164
  13. data/Rakefile +7 -51
  14. data/bin/wayfarer +1 -1
  15. data/docker-compose.yml +23 -13
  16. data/docs/cookbook/consent_screen.md +2 -2
  17. data/docs/cookbook/executing_javascript.md +3 -3
  18. data/docs/cookbook/navigation.md +12 -12
  19. data/docs/cookbook/querying_html.md +3 -3
  20. data/docs/cookbook/screenshots.md +2 -2
  21. data/docs/guides/callbacks.md +25 -125
  22. data/docs/guides/cli.md +71 -0
  23. data/docs/guides/configuration.md +10 -35
  24. data/docs/guides/development.md +67 -0
  25. data/docs/guides/handlers.md +60 -0
  26. data/docs/guides/index.md +1 -0
  27. data/docs/guides/jobs.md +142 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +103 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +78 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +156 -0
  37. data/docs/guides/tasks.md +53 -9
  38. data/docs/guides/tutorial.md +66 -0
  39. data/docs/guides/user_agents.md +115 -0
  40. data/docs/index.md +17 -40
  41. data/lib/wayfarer/base.rb +125 -46
  42. data/lib/wayfarer/batch_completion.rb +60 -0
  43. data/lib/wayfarer/callbacks.rb +22 -48
  44. data/lib/wayfarer/cli/route_printer.rb +85 -89
  45. data/lib/wayfarer/cli.rb +103 -0
  46. data/lib/wayfarer/gc.rb +18 -6
  47. data/lib/wayfarer/handler.rb +15 -7
  48. data/lib/wayfarer/kv.rb +28 -0
  49. data/lib/wayfarer/logging.rb +38 -0
  50. data/lib/wayfarer/middleware/base.rb +2 -0
  51. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  52. data/lib/wayfarer/middleware/chain.rb +7 -1
  53. data/lib/wayfarer/middleware/content_type.rb +59 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +22 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +17 -4
  57. data/lib/wayfarer/middleware/normalize.rb +7 -14
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +31 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +14 -3
  65. data/lib/wayfarer/networking/ferrum.rb +1 -4
  66. data/lib/wayfarer/networking/follow.rb +14 -7
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +23 -13
  69. data/lib/wayfarer/networking/selenium.rb +15 -7
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +34 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +21 -0
  74. data/lib/wayfarer/redis/barrier.rb +26 -21
  75. data/lib/wayfarer/redis/counter.rb +18 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +166 -30
  79. data/lib/wayfarer/routing/hash_stack.rb +33 -0
  80. data/lib/wayfarer/routing/matchers/custom.rb +8 -5
  81. data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
  82. data/lib/wayfarer/routing/matchers/host.rb +15 -9
  83. data/lib/wayfarer/routing/matchers/path.rb +11 -31
  84. data/lib/wayfarer/routing/matchers/query.rb +41 -17
  85. data/lib/wayfarer/routing/matchers/result.rb +12 -0
  86. data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
  87. data/lib/wayfarer/routing/matchers/url.rb +13 -5
  88. data/lib/wayfarer/routing/path_consumer.rb +130 -0
  89. data/lib/wayfarer/routing/path_finder.rb +151 -23
  90. data/lib/wayfarer/routing/result.rb +1 -1
  91. data/lib/wayfarer/routing/root_route.rb +17 -1
  92. data/lib/wayfarer/routing/route.rb +66 -19
  93. data/lib/wayfarer/routing/serializable.rb +28 -0
  94. data/lib/wayfarer/routing/sub_route.rb +53 -0
  95. data/lib/wayfarer/routing/target_route.rb +17 -1
  96. data/lib/wayfarer/stringify.rb +21 -30
  97. data/lib/wayfarer/task.rb +9 -17
  98. data/lib/wayfarer/uri/normalization.rb +120 -0
  99. data/lib/wayfarer.rb +72 -5
  100. data/mise.toml +2 -0
  101. data/mkdocs.yml +44 -8
  102. data/rake/docs.rake +26 -0
  103. data/rake/lint.rake +9 -0
  104. data/rake/release.rake +23 -0
  105. data/rake/tests.rake +32 -0
  106. data/requirements.txt +1 -1
  107. data/spec/factories/job.rb +8 -0
  108. data/spec/factories/middleware.rb +2 -2
  109. data/spec/factories/path_finder.rb +11 -0
  110. data/spec/factories/redis.rb +19 -0
  111. data/spec/factories/task.rb +46 -2
  112. data/spec/spec_helpers.rb +55 -51
  113. data/spec/support/active_job_helpers.rb +8 -0
  114. data/spec/support/integration_helpers.rb +21 -0
  115. data/spec/support/redis_helpers.rb +9 -0
  116. data/spec/support/test_app.rb +66 -37
  117. data/spec/wayfarer/base_spec.rb +200 -0
  118. data/spec/wayfarer/batch_completion_spec.rb +142 -0
  119. data/spec/wayfarer/cli/job_spec.rb +88 -0
  120. data/spec/wayfarer/cli/routing_spec.rb +322 -0
  121. data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
  122. data/spec/wayfarer/gc_spec.rb +29 -0
  123. data/spec/wayfarer/handler_spec.rb +9 -0
  124. data/spec/wayfarer/integration/callbacks_spec.rb +200 -0
  125. data/spec/wayfarer/integration/content_type_spec.rb +37 -0
  126. data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
  127. data/spec/wayfarer/integration/gc_spec.rb +40 -0
  128. data/spec/wayfarer/integration/handler_spec.rb +65 -0
  129. data/spec/wayfarer/integration/page_spec.rb +79 -0
  130. data/spec/wayfarer/integration/params_spec.rb +64 -0
  131. data/spec/wayfarer/integration/parsing_spec.rb +99 -0
  132. data/spec/wayfarer/integration/retry_spec.rb +112 -0
  133. data/spec/wayfarer/integration/stage_spec.rb +58 -0
  134. data/spec/wayfarer/middleware/batch_completion_spec.rb +33 -0
  135. data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +24 -19
  136. data/spec/wayfarer/middleware/content_type_spec.rb +83 -0
  137. data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +24 -22
  138. data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
  139. data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
  140. data/spec/wayfarer/middleware/router_spec.rb +102 -0
  141. data/spec/wayfarer/middleware/stage_spec.rb +63 -0
  142. data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
  143. data/spec/wayfarer/middleware/user_agent_spec.rb +158 -0
  144. data/spec/wayfarer/networking/capybara_spec.rb +13 -0
  145. data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
  146. data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
  147. data/spec/{networking → wayfarer/networking}/follow_spec.rb +11 -6
  148. data/spec/wayfarer/networking/http_spec.rb +12 -0
  149. data/spec/{networking → wayfarer/networking}/pool_spec.rb +16 -14
  150. data/spec/wayfarer/networking/selenium_spec.rb +12 -0
  151. data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
  152. data/spec/wayfarer/page_spec.rb +69 -0
  153. data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
  154. data/spec/wayfarer/parsing/xml_parse_spec.rb +25 -0
  155. data/spec/wayfarer/redis/barrier_spec.rb +39 -0
  156. data/spec/wayfarer/redis/counter_spec.rb +34 -0
  157. data/spec/{redis → wayfarer/redis}/pool_spec.rb +4 -3
  158. data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
  159. data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
  160. data/spec/wayfarer/routing/integration_spec.rb +101 -0
  161. data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
  162. data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
  163. data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
  164. data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
  165. data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
  166. data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
  167. data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
  168. data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
  169. data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
  170. data/spec/wayfarer/routing/root_route_spec.rb +51 -0
  171. data/spec/wayfarer/routing/route_spec.rb +74 -0
  172. data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
  173. data/spec/wayfarer/task_spec.rb +13 -0
  174. data/spec/wayfarer/uri/normalization_spec.rb +98 -0
  175. data/spec/wayfarer_spec.rb +2 -2
  176. data/wayfarer.gemspec +18 -28
  177. metadata +797 -265
  178. data/.github/workflows/ci.yaml +0 -32
  179. data/.rbenv-gemsets +0 -1
  180. data/.ruby-version +0 -1
  181. data/RELEASING.md +0 -17
  182. data/docs/cookbook/user_agent.md +0 -7
  183. data/docs/guides/error_handling.md +0 -53
  184. data/docs/guides/networking.md +0 -94
  185. data/docs/guides/performance.md +0 -130
  186. data/docs/guides/reliability.md +0 -41
  187. data/docs/guides/routing/steering.md +0 -30
  188. data/docs/reference/api/base.md +0 -48
  189. data/docs/reference/cli.md +0 -61
  190. data/docs/reference/configuration_keys.md +0 -43
  191. data/docs/reference/environment_variables.md +0 -83
  192. data/lib/wayfarer/cli/base.rb +0 -45
  193. data/lib/wayfarer/cli/generate.rb +0 -17
  194. data/lib/wayfarer/cli/job.rb +0 -56
  195. data/lib/wayfarer/cli/route.rb +0 -29
  196. data/lib/wayfarer/cli/runner.rb +0 -34
  197. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  198. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  199. data/lib/wayfarer/config/capybara.rb +0 -10
  200. data/lib/wayfarer/config/ferrum.rb +0 -11
  201. data/lib/wayfarer/config/networking.rb +0 -29
  202. data/lib/wayfarer/config/redis.rb +0 -14
  203. data/lib/wayfarer/config/root.rb +0 -11
  204. data/lib/wayfarer/config/selenium.rb +0 -21
  205. data/lib/wayfarer/config/strconv.rb +0 -45
  206. data/lib/wayfarer/config/struct.rb +0 -72
  207. data/lib/wayfarer/middleware/fetch.rb +0 -56
  208. data/lib/wayfarer/redis/connection.rb +0 -13
  209. data/lib/wayfarer/redis/version.rb +0 -19
  210. data/lib/wayfarer/routing/router.rb +0 -28
  211. data/spec/base_spec.rb +0 -224
  212. data/spec/callbacks_spec.rb +0 -102
  213. data/spec/cli/generate_spec.rb +0 -39
  214. data/spec/cli/job_spec.rb +0 -78
  215. data/spec/config/capybara_spec.rb +0 -18
  216. data/spec/config/ferrum_spec.rb +0 -24
  217. data/spec/config/networking_spec.rb +0 -73
  218. data/spec/config/redis_spec.rb +0 -32
  219. data/spec/config/root_spec.rb +0 -31
  220. data/spec/config/selenium_spec.rb +0 -56
  221. data/spec/config/strconv_spec.rb +0 -58
  222. data/spec/config/struct_spec.rb +0 -66
  223. data/spec/fixtures/dummy_job.rb +0 -7
  224. data/spec/gc_spec.rb +0 -59
  225. data/spec/handler_spec.rb +0 -11
  226. data/spec/integration/callbacks_spec.rb +0 -85
  227. data/spec/integration/page_spec.rb +0 -62
  228. data/spec/integration/params_spec.rb +0 -56
  229. data/spec/integration/stage_spec.rb +0 -51
  230. data/spec/integration/steering_spec.rb +0 -57
  231. data/spec/middleware/dedup_spec.rb +0 -88
  232. data/spec/middleware/dispatch_spec.rb +0 -43
  233. data/spec/middleware/fetch_spec.rb +0 -155
  234. data/spec/middleware/normalize_spec.rb +0 -29
  235. data/spec/middleware/router_spec.rb +0 -105
  236. data/spec/middleware/stage_spec.rb +0 -62
  237. data/spec/networking/capybara_spec.rb +0 -12
  238. data/spec/networking/ferrum_spec.rb +0 -12
  239. data/spec/networking/http_spec.rb +0 -12
  240. data/spec/networking/selenium_spec.rb +0 -12
  241. data/spec/page_spec.rb +0 -47
  242. data/spec/parsing/xml_spec.rb +0 -25
  243. data/spec/redis/barrier_spec.rb +0 -78
  244. data/spec/redis/counter_spec.rb +0 -32
  245. data/spec/redis/version_spec.rb +0 -13
  246. data/spec/routing/integration_spec.rb +0 -110
  247. data/spec/routing/matchers/custom_spec.rb +0 -31
  248. data/spec/routing/matchers/host_spec.rb +0 -49
  249. data/spec/routing/matchers/path_spec.rb +0 -43
  250. data/spec/routing/matchers/query_spec.rb +0 -137
  251. data/spec/routing/matchers/scheme_spec.rb +0 -25
  252. data/spec/routing/matchers/suffix_spec.rb +0 -41
  253. data/spec/routing/matchers/uri_spec.rb +0 -27
  254. data/spec/routing/path_finder_spec.rb +0 -33
  255. data/spec/routing/root_route_spec.rb +0 -29
  256. data/spec/routing/route_spec.rb +0 -43
  257. data/spec/routing/router_spec.rb +0 -24
  258. data/spec/task_spec.rb +0 -34
  259. data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
@@ -1,155 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Middleware::Fetch do
6
- let(:task) { build(:task) }
7
- let(:page) { Object.new }
8
- let(:agent) { Object.new }
9
- let(:result) { Wayfarer::Networking::Result::Success.new(page) }
10
- let(:context) do
11
- double(instance: agent).tap do |context|
12
- allow(context).to receive(:fetch).with(task.url).and_return(result)
13
- end
14
- end
15
-
16
- subject { described_class.new }
17
-
18
- describe "#call" do
19
- let(:controller) do
20
- Struct.new(:task).include(Wayfarer::Middleware::Stage::API).new(task)
21
- end
22
-
23
- before do
24
- allow(controller).to receive(:run_callbacks).with(:fetch).and_yield
25
-
26
- spy.tap do |pool|
27
- allow(pool).to receive(:with).and_yield(context)
28
- allow(subject).to receive(:pool).and_return(pool)
29
- end
30
-
31
- task.metadata.staged_urls = SortedSet.new
32
- task.metadata.controller = controller
33
- end
34
-
35
- context "with page assigned" do
36
- before { task.metadata.page = page }
37
-
38
- it "does not alter the page" do
39
- expect {
40
- subject.call(task)
41
- }.not_to(change { task.metadata.page })
42
- end
43
-
44
- it "yields" do
45
- expect { |spy| subject.call(task, &spy) }.to yield_control
46
- end
47
- end
48
-
49
- it "runs callbacks" do
50
- expect(controller).to receive(:run_callbacks).with(:fetch)
51
- subject.call(task)
52
- end
53
-
54
- it "fetches the URL" do
55
- expect(context).to receive(:fetch).with(task.url)
56
- subject.call(task)
57
- end
58
-
59
- context "with Redirect" do
60
- let(:redirect_url) { test_app_path("/foobar") }
61
- let(:result) { Wayfarer::Networking::Result::Redirect.new(page) }
62
-
63
- it "stages the redirect URL" do
64
- expect {
65
- subject.call(task)
66
- }.to change { task.metadata.staged_urls.count }.by(1)
67
- end
68
-
69
- it "does not yield" do
70
- expect { |spy| subject.call(task, &spy) }.not_to yield_control
71
- end
72
- end
73
-
74
- context "with Success" do
75
- it "assigns the context" do
76
- expect {
77
- subject.call(task)
78
- }.to change { task.metadata.context }.to(context)
79
- end
80
-
81
- it "assigns the page" do
82
- expect {
83
- subject.call(task)
84
- }.to change { task.metadata.page }.to(result.page)
85
- end
86
-
87
- it "yields" do
88
- expect { |spy| subject.call(task, &spy) }.to yield_control
89
- end
90
- end
91
- end
92
-
93
- describe described_class::API do
94
- subject(:controller) do
95
- Struct.new(:task).include(described_class).new(task)
96
- end
97
-
98
- describe "#agent" do
99
- before { task.metadata.context = context }
100
-
101
- it "returns the agent" do
102
- expect(controller.agent).to be(context.instance)
103
- end
104
- end
105
-
106
- describe "#context" do
107
- before { task.metadata.context = context }
108
-
109
- it "returns the context" do
110
- expect(controller.context).to be(task.metadata.context)
111
- end
112
- end
113
-
114
- describe "#page" do
115
- before { task.metadata.page = page }
116
-
117
- it "returns the page" do
118
- expect(controller.page).to be(task.metadata.page)
119
- end
120
-
121
- context "with live keyword" do
122
- before { task.metadata.context = context }
123
-
124
- context "with stateful agent" do
125
- before do
126
- result.page = Object.new
127
- allow(context).to receive(:live).and_return(result)
128
- end
129
-
130
- it "replaces the page" do
131
- expect {
132
- controller.page(live: true)
133
- }.to change { task.metadata.page }.to(result.page)
134
- end
135
- end
136
-
137
- context "with stateless agent" do
138
- before { allow(context).to receive(:live).and_return(nil) }
139
-
140
- it "does not alter the page" do
141
- expect {
142
- controller.page(live: true)
143
- }.not_to(change { task.metadata.page })
144
- end
145
- end
146
- end
147
- end
148
-
149
- describe "#http" do
150
- it "returns a redirect-following HTTP agent" do
151
- expect(controller.http).to be_a(Wayfarer::Networking::Follow)
152
- end
153
- end
154
- end
155
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Middleware::Normalize do
6
- let(:task) { build(:task) }
7
- subject { described_class.new }
8
-
9
- describe "#call" do
10
- let(:urls) do
11
- ["http://example.com/products?product_id=123",
12
- "HTTP://EXAMPLE.COM/products/?product_id=123",
13
- "http://example.com/products/?product_id=123",
14
- "http://example.com/foo/../products?product_id=123",
15
- "invalid@url-net"]
16
- end
17
-
18
- before { task.metadata.staged_urls = SortedSet.new(urls) }
19
-
20
- it "yields" do
21
- expect { |spy| subject.call(task, &spy) }.to yield_control
22
- end
23
-
24
- it "normalizes and compacts URLs" do
25
- subject.call(task)
26
- expect(task.metadata.staged_urls).to eq(SortedSet[urls.first])
27
- end
28
- end
29
- end
@@ -1,105 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Middleware::Router do
6
- let(:task) { build(:task) }
7
- subject { described_class.new }
8
-
9
- describe "#call" do
10
- let(:controller) do
11
- Struct.new(:task).include(described_class::API).new(task)
12
- end
13
-
14
- before do
15
- allow(controller.class.router).to receive(:invoke)
16
- .with(Addressable::URI.parse(task.url), controller.steer)
17
- .and_return(result)
18
- task.metadata.controller = controller
19
- end
20
-
21
- context "with matching route" do
22
- let(:action) { :action }
23
- let(:params) { { "foo" => "bar" } }
24
- let(:result) do
25
- Wayfarer::Routing::Result::Match.new(action, params)
26
- end
27
-
28
- it "assigns the action" do
29
- expect {
30
- subject.call(task)
31
- }.to change { task.metadata.action }.to(action)
32
- end
33
-
34
- it "merges params" do
35
- task.metadata.params = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
36
-
37
- expect {
38
- subject.call(task)
39
- }.to change { task.metadata.params }.to("foo" => "bar", "bar" => "qux")
40
- end
41
-
42
- it "yields" do
43
- expect { |spy| subject.call(task, &spy) }.to yield_control
44
- end
45
- end
46
-
47
- context "without matching route" do
48
- let(:result) do
49
- Wayfarer::Routing::Result::Mismatch.new
50
- end
51
-
52
- it "does not assign the action" do
53
- expect {
54
- subject.call(task)
55
- }.not_to(change { task.metadata.action })
56
- end
57
-
58
- it "does not alter params" do
59
- task.metadata.params = ActiveSupport::HashWithIndifferentAccess.new("bar" => "qux")
60
-
61
- expect {
62
- subject.call(task)
63
- }.not_to(change { task.metadata.params })
64
- end
65
-
66
- it "does not yield" do
67
- expect { |spy| subject.call(task, &spy) }.not_to yield_control
68
- end
69
- end
70
- end
71
-
72
- describe described_class::API do
73
- subject(:controller) do
74
- Struct.new(:task).include(described_class).new(task)
75
- end
76
-
77
- describe "::router" do
78
- it "returns a router" do
79
- expect(controller.class.router).to be_a(Wayfarer::Routing::Router)
80
- end
81
- end
82
-
83
- describe "::route" do
84
- it "adds a routing block" do
85
- expect {
86
- controller.class.route { to :index }
87
- }.to change { controller.class.router.blocks.count }.by(1)
88
- end
89
- end
90
-
91
- describe "::steer" do
92
- it "overrides #steer" do
93
- expect {
94
- controller.class.steer { :foobar }
95
- }.to change { controller.steer }.from([]).to(:foobar)
96
- end
97
- end
98
-
99
- describe "#steer" do
100
- it "returns [] by default" do
101
- expect(controller.steer).to eq([])
102
- end
103
- end
104
- end
105
- end
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Middleware::Stage do
6
- let(:task) { build(:task) }
7
- subject { described_class.new }
8
-
9
- describe "#call" do
10
- it "assigns an empty set" do
11
- subject.call(task)
12
- expect(task.metadata.staged_urls).to eq(SortedSet.new)
13
- end
14
-
15
- it "yields" do
16
- expect { |spy| subject.call(task, &spy) }.to yield_control
17
- end
18
-
19
- it "enqueues tasks" do
20
- urls = [test_app_path("/alpha"), test_app_path("/beta")]
21
-
22
- spy.tap do |job|
23
- expect(job).to receive(:crawl).with(urls.first, batch: task.batch).ordered
24
- expect(job).to receive(:crawl).with(urls.second, batch: task.batch).ordered
25
- task.metadata.job = double(class: job)
26
- end
27
-
28
- subject.call(task) do
29
- task.metadata.staged_urls = SortedSet.new(urls)
30
- end
31
- end
32
-
33
- it "resets staged URLs" do
34
- task.metadata.staged_urls = SortedSet.new([test_app_path("/foo")])
35
-
36
- expect {
37
- subject.call(task)
38
- }.to change { task.metadata.staged_urls.count }.to(0)
39
- end
40
- end
41
-
42
- describe described_class::API do
43
- subject(:controller) do
44
- Struct.new(:task).include(described_class).new(task)
45
- end
46
-
47
- describe "#stage" do
48
- before { task.metadata.staged_urls = SortedSet.new }
49
-
50
- it "stages URLs" do
51
- expect {
52
- controller.stage(test_app_path("/foo"))
53
- }.to change { task.metadata.staged_urls.count }.by(1)
54
- end
55
-
56
- it "converts to strings" do
57
- controller.stage(Addressable::URI.parse(test_app_path("/foo")))
58
- expect(task.metadata.staged_urls.to_a.first).to be_a(String)
59
- end
60
- end
61
- end
62
- end
@@ -1,12 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
- require_relative "strategy"
5
-
6
- describe Wayfarer::Networking::Capybara, ferrum: true do
7
- include_examples "Network strategy", strategy: described_class,
8
- browser: true,
9
- request_headers: false,
10
- response_headers: true,
11
- status_code: true
12
- end
@@ -1,12 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
- require_relative "strategy"
5
-
6
- describe Wayfarer::Networking::Ferrum, ferrum: true do
7
- include_examples "Network strategy", strategy: described_class,
8
- browser: true,
9
- request_headers: true,
10
- response_headers: true,
11
- status_code: true
12
- end
@@ -1,12 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
- require_relative "strategy"
5
-
6
- describe Wayfarer::Networking::HTTP do
7
- include_examples "Network strategy", strategy: described_class,
8
- browser: false,
9
- request_headers: true,
10
- response_headers: true,
11
- status_code: true
12
- end
@@ -1,12 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
- require_relative "strategy"
5
-
6
- describe Wayfarer::Networking::Selenium, selenium: true do
7
- include_examples "Network strategy", strategy: described_class,
8
- browser: true,
9
- request_headers: false,
10
- response_headers: false,
11
- status_code: false
12
- end
data/spec/page_spec.rb DELETED
@@ -1,47 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Page do
6
- describe "#doc" do
7
- context "when Content-Type is absent" do
8
- subject(:page) { build(:page, :html, headers: {}) }
9
-
10
- it "returns a Nokogiri::HTML::Document" do
11
- expect(page.doc).to be_a(Nokogiri::HTML::Document)
12
- end
13
- end
14
-
15
- context "when Content-Type is HTML" do
16
- subject(:page) { build(:page, :html) }
17
-
18
- it "returns a Nokogiri::HTML::Document" do
19
- expect(page.doc).to be_a(Nokogiri::HTML::Document)
20
- end
21
- end
22
-
23
- context "when Content-Type is XML" do
24
- subject(:page) { build(:page, :xml) }
25
-
26
- it "returns a Nokogiri::XML::Document" do
27
- expect(page.doc).to be_a(Nokogiri::XML::Document)
28
- end
29
- end
30
-
31
- context "when Content-Type is JSON" do
32
- subject(:page) { build(:page, :json) }
33
-
34
- it "returns a Hash" do
35
- expect(page.doc).to be_a(Hash)
36
- end
37
- end
38
- end
39
-
40
- describe "#meta" do
41
- subject(:page) { build(:page, :html) }
42
-
43
- it "returns a MetaInspector document" do
44
- expect(page.meta).to be_a(MetaInspector::Document)
45
- end
46
- end
47
- end
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Parsing::XML do
6
- subject(:parser) { Wayfarer::Parsing::XML }
7
-
8
- describe ".parse_html" do
9
- it "returns a Nokogiri::HTML::Document" do
10
- html_str = "<span>Foobar</span>"
11
- doc = parser.parse_html(html_str)
12
-
13
- expect(doc).to be_a Nokogiri::HTML::Document
14
- end
15
- end
16
-
17
- describe ".parse_xml" do
18
- it "returns a Nokogiri::XML::Document" do
19
- xml_str = "<barqux>Foobar</barqux>"
20
- doc = parser.parse_xml(xml_str)
21
-
22
- expect(doc).to be_a Nokogiri::XML::Document
23
- end
24
- end
25
- end
@@ -1,78 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Redis::Barrier, redis: true do
6
- let(:url) { "https://example.com" }
7
- subject(:barrier) { Wayfarer::Redis::Barrier.new("batch") }
8
-
9
- describe "#redis_key" do
10
- it "returns the expected Redis key" do
11
- expect(barrier.redis_key).to eq("wayfarer-barrier-batch")
12
- end
13
- end
14
-
15
- describe "#reset!" do
16
- it "resets seen URLs" do
17
- barrier.seen?(url)
18
- barrier.reset!
19
- expect(barrier.seen?(url)).to be false
20
- end
21
- end
22
-
23
- describe "#seen?" do
24
- context "with seen URL" do
25
- before { barrier.seen?(url) }
26
-
27
- it "returns true" do
28
- expect(barrier.seen?(url)).to be(true)
29
- end
30
- end
31
-
32
- context "with unseen URL" do
33
- it "returns false" do
34
- expect(barrier.seen?(url)).to be(false)
35
- end
36
- end
37
- end
38
-
39
- describe "#peek" do
40
- let(:seen_urls) { %w[https://yahoo.com https://google.com] }
41
- let(:unseen_urls) { %w[https://w3c.org https://nasa.gov] }
42
-
43
- before do
44
- allow(Wayfarer::Redis::Version).to receive(:determine).and_return(redis_version)
45
-
46
- seen_urls.each do |url|
47
- barrier.seen?(url)
48
- end
49
- end
50
-
51
- context "Redis >= 6.2.0" do
52
- let(:redis_version) { [6, 2, 5] }
53
-
54
- it "returns membership" do
55
- urls = [*seen_urls, *unseen_urls]
56
- expect(barrier.peek(urls)).to eq([true, true, false, false])
57
- end
58
- end
59
-
60
- context "Redis < 6.2.0" do
61
- let(:redis_version) { [2, 6, 10] }
62
-
63
- it "returns membership" do
64
- urls = [*seen_urls, *unseen_urls]
65
- expect(barrier.peek(urls)).to eq([true, true, false, false])
66
- end
67
- end
68
- end
69
-
70
- describe "#unsee" do
71
- it "removes a seen URL" do
72
- barrier.seen?(url)
73
- expect {
74
- barrier.unsee(url)
75
- }.to change { barrier.seen?(url) }.to(false)
76
- end
77
- end
78
- end
@@ -1,32 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Redis::Counter, redis: true do
6
- subject(:counter) { Wayfarer::Redis::Counter.new("batch") }
7
-
8
- describe "#redis_key" do
9
- it "returns the expected Redis key" do
10
- expect(counter.redis_key).to eq("wayfarer-counter-batch")
11
- end
12
- end
13
-
14
- describe "#increment" do
15
- it "increments" do
16
- expect { counter.increment }.to change { counter.value }.by(1)
17
- end
18
- end
19
-
20
- describe "#decrement" do
21
- it "decrements" do
22
- expect { counter.decrement }.to change { counter.value }.by(-1)
23
- end
24
- end
25
-
26
- describe "#reset!" do
27
- it "resets" do
28
- 3.times { counter.increment }
29
- expect { counter.reset! }.to change { counter.value }.to(0)
30
- end
31
- end
32
- end
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Redis::Version, redis: true do
6
- describe "::determine" do
7
- it "returns Redis major, minor, patch version" do
8
- Wayfarer::Redis::Version.instance_variable_set(:@determine, nil)
9
- expect(Wayfarer::Redis::Version).to receive(:server_version).and_return("1.20.300")
10
- expect(Wayfarer::Redis::Version.determine).to eq([1, 20, 300])
11
- end
12
- end
13
- end