wayfarer 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. checksums.yaml +4 -4
  2. data/.env +17 -0
  3. data/.github/workflows/lint.yaml +27 -0
  4. data/.github/workflows/release.yaml +30 -0
  5. data/.github/workflows/tests.yaml +21 -0
  6. data/.gitignore +5 -1
  7. data/.rubocop.yml +36 -0
  8. data/.vale.ini +8 -0
  9. data/.yardopts +1 -3
  10. data/Dockerfile +6 -4
  11. data/Gemfile +24 -0
  12. data/Gemfile.lock +274 -164
  13. data/Rakefile +7 -51
  14. data/bin/wayfarer +1 -1
  15. data/docker-compose.yml +23 -13
  16. data/docs/cookbook/consent_screen.md +2 -2
  17. data/docs/cookbook/executing_javascript.md +3 -3
  18. data/docs/cookbook/navigation.md +12 -12
  19. data/docs/cookbook/querying_html.md +3 -3
  20. data/docs/cookbook/screenshots.md +2 -2
  21. data/docs/guides/callbacks.md +25 -125
  22. data/docs/guides/cli.md +71 -0
  23. data/docs/guides/configuration.md +10 -35
  24. data/docs/guides/development.md +67 -0
  25. data/docs/guides/handlers.md +60 -0
  26. data/docs/guides/index.md +1 -0
  27. data/docs/guides/jobs.md +142 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +103 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +78 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +156 -0
  37. data/docs/guides/tasks.md +53 -9
  38. data/docs/guides/tutorial.md +66 -0
  39. data/docs/guides/user_agents.md +115 -0
  40. data/docs/index.md +17 -40
  41. data/lib/wayfarer/base.rb +125 -46
  42. data/lib/wayfarer/batch_completion.rb +60 -0
  43. data/lib/wayfarer/callbacks.rb +22 -48
  44. data/lib/wayfarer/cli/route_printer.rb +85 -89
  45. data/lib/wayfarer/cli.rb +103 -0
  46. data/lib/wayfarer/gc.rb +18 -6
  47. data/lib/wayfarer/handler.rb +15 -7
  48. data/lib/wayfarer/kv.rb +28 -0
  49. data/lib/wayfarer/logging.rb +38 -0
  50. data/lib/wayfarer/middleware/base.rb +2 -0
  51. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  52. data/lib/wayfarer/middleware/chain.rb +7 -1
  53. data/lib/wayfarer/middleware/content_type.rb +59 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +22 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +17 -4
  57. data/lib/wayfarer/middleware/normalize.rb +7 -14
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +31 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +14 -3
  65. data/lib/wayfarer/networking/ferrum.rb +1 -4
  66. data/lib/wayfarer/networking/follow.rb +14 -7
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +23 -13
  69. data/lib/wayfarer/networking/selenium.rb +15 -7
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +34 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +21 -0
  74. data/lib/wayfarer/redis/barrier.rb +26 -21
  75. data/lib/wayfarer/redis/counter.rb +18 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +166 -30
  79. data/lib/wayfarer/routing/hash_stack.rb +33 -0
  80. data/lib/wayfarer/routing/matchers/custom.rb +8 -5
  81. data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
  82. data/lib/wayfarer/routing/matchers/host.rb +15 -9
  83. data/lib/wayfarer/routing/matchers/path.rb +11 -31
  84. data/lib/wayfarer/routing/matchers/query.rb +41 -17
  85. data/lib/wayfarer/routing/matchers/result.rb +12 -0
  86. data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
  87. data/lib/wayfarer/routing/matchers/url.rb +13 -5
  88. data/lib/wayfarer/routing/path_consumer.rb +130 -0
  89. data/lib/wayfarer/routing/path_finder.rb +151 -23
  90. data/lib/wayfarer/routing/result.rb +1 -1
  91. data/lib/wayfarer/routing/root_route.rb +17 -1
  92. data/lib/wayfarer/routing/route.rb +66 -19
  93. data/lib/wayfarer/routing/serializable.rb +28 -0
  94. data/lib/wayfarer/routing/sub_route.rb +53 -0
  95. data/lib/wayfarer/routing/target_route.rb +17 -1
  96. data/lib/wayfarer/stringify.rb +21 -30
  97. data/lib/wayfarer/task.rb +9 -17
  98. data/lib/wayfarer/uri/normalization.rb +120 -0
  99. data/lib/wayfarer.rb +72 -5
  100. data/mise.toml +2 -0
  101. data/mkdocs.yml +44 -8
  102. data/rake/docs.rake +26 -0
  103. data/rake/lint.rake +9 -0
  104. data/rake/release.rake +23 -0
  105. data/rake/tests.rake +32 -0
  106. data/requirements.txt +1 -1
  107. data/spec/factories/job.rb +8 -0
  108. data/spec/factories/middleware.rb +2 -2
  109. data/spec/factories/path_finder.rb +11 -0
  110. data/spec/factories/redis.rb +19 -0
  111. data/spec/factories/task.rb +46 -2
  112. data/spec/spec_helpers.rb +55 -51
  113. data/spec/support/active_job_helpers.rb +8 -0
  114. data/spec/support/integration_helpers.rb +21 -0
  115. data/spec/support/redis_helpers.rb +9 -0
  116. data/spec/support/test_app.rb +66 -37
  117. data/spec/wayfarer/base_spec.rb +200 -0
  118. data/spec/wayfarer/batch_completion_spec.rb +142 -0
  119. data/spec/wayfarer/cli/job_spec.rb +88 -0
  120. data/spec/wayfarer/cli/routing_spec.rb +322 -0
  121. data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
  122. data/spec/wayfarer/gc_spec.rb +29 -0
  123. data/spec/wayfarer/handler_spec.rb +9 -0
  124. data/spec/wayfarer/integration/callbacks_spec.rb +200 -0
  125. data/spec/wayfarer/integration/content_type_spec.rb +37 -0
  126. data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
  127. data/spec/wayfarer/integration/gc_spec.rb +40 -0
  128. data/spec/wayfarer/integration/handler_spec.rb +65 -0
  129. data/spec/wayfarer/integration/page_spec.rb +79 -0
  130. data/spec/wayfarer/integration/params_spec.rb +64 -0
  131. data/spec/wayfarer/integration/parsing_spec.rb +99 -0
  132. data/spec/wayfarer/integration/retry_spec.rb +112 -0
  133. data/spec/wayfarer/integration/stage_spec.rb +58 -0
  134. data/spec/wayfarer/middleware/batch_completion_spec.rb +33 -0
  135. data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +24 -19
  136. data/spec/wayfarer/middleware/content_type_spec.rb +83 -0
  137. data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +24 -22
  138. data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
  139. data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
  140. data/spec/wayfarer/middleware/router_spec.rb +102 -0
  141. data/spec/wayfarer/middleware/stage_spec.rb +63 -0
  142. data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
  143. data/spec/wayfarer/middleware/user_agent_spec.rb +158 -0
  144. data/spec/wayfarer/networking/capybara_spec.rb +13 -0
  145. data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
  146. data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
  147. data/spec/{networking → wayfarer/networking}/follow_spec.rb +11 -6
  148. data/spec/wayfarer/networking/http_spec.rb +12 -0
  149. data/spec/{networking → wayfarer/networking}/pool_spec.rb +16 -14
  150. data/spec/wayfarer/networking/selenium_spec.rb +12 -0
  151. data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
  152. data/spec/wayfarer/page_spec.rb +69 -0
  153. data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
  154. data/spec/wayfarer/parsing/xml_parse_spec.rb +25 -0
  155. data/spec/wayfarer/redis/barrier_spec.rb +39 -0
  156. data/spec/wayfarer/redis/counter_spec.rb +34 -0
  157. data/spec/{redis → wayfarer/redis}/pool_spec.rb +4 -3
  158. data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
  159. data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
  160. data/spec/wayfarer/routing/integration_spec.rb +101 -0
  161. data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
  162. data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
  163. data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
  164. data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
  165. data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
  166. data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
  167. data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
  168. data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
  169. data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
  170. data/spec/wayfarer/routing/root_route_spec.rb +51 -0
  171. data/spec/wayfarer/routing/route_spec.rb +74 -0
  172. data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
  173. data/spec/wayfarer/task_spec.rb +13 -0
  174. data/spec/wayfarer/uri/normalization_spec.rb +98 -0
  175. data/spec/wayfarer_spec.rb +2 -2
  176. data/wayfarer.gemspec +18 -28
  177. metadata +797 -265
  178. data/.github/workflows/ci.yaml +0 -32
  179. data/.rbenv-gemsets +0 -1
  180. data/.ruby-version +0 -1
  181. data/RELEASING.md +0 -17
  182. data/docs/cookbook/user_agent.md +0 -7
  183. data/docs/guides/error_handling.md +0 -53
  184. data/docs/guides/networking.md +0 -94
  185. data/docs/guides/performance.md +0 -130
  186. data/docs/guides/reliability.md +0 -41
  187. data/docs/guides/routing/steering.md +0 -30
  188. data/docs/reference/api/base.md +0 -48
  189. data/docs/reference/cli.md +0 -61
  190. data/docs/reference/configuration_keys.md +0 -43
  191. data/docs/reference/environment_variables.md +0 -83
  192. data/lib/wayfarer/cli/base.rb +0 -45
  193. data/lib/wayfarer/cli/generate.rb +0 -17
  194. data/lib/wayfarer/cli/job.rb +0 -56
  195. data/lib/wayfarer/cli/route.rb +0 -29
  196. data/lib/wayfarer/cli/runner.rb +0 -34
  197. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  198. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  199. data/lib/wayfarer/config/capybara.rb +0 -10
  200. data/lib/wayfarer/config/ferrum.rb +0 -11
  201. data/lib/wayfarer/config/networking.rb +0 -29
  202. data/lib/wayfarer/config/redis.rb +0 -14
  203. data/lib/wayfarer/config/root.rb +0 -11
  204. data/lib/wayfarer/config/selenium.rb +0 -21
  205. data/lib/wayfarer/config/strconv.rb +0 -45
  206. data/lib/wayfarer/config/struct.rb +0 -72
  207. data/lib/wayfarer/middleware/fetch.rb +0 -56
  208. data/lib/wayfarer/redis/connection.rb +0 -13
  209. data/lib/wayfarer/redis/version.rb +0 -19
  210. data/lib/wayfarer/routing/router.rb +0 -28
  211. data/spec/base_spec.rb +0 -224
  212. data/spec/callbacks_spec.rb +0 -102
  213. data/spec/cli/generate_spec.rb +0 -39
  214. data/spec/cli/job_spec.rb +0 -78
  215. data/spec/config/capybara_spec.rb +0 -18
  216. data/spec/config/ferrum_spec.rb +0 -24
  217. data/spec/config/networking_spec.rb +0 -73
  218. data/spec/config/redis_spec.rb +0 -32
  219. data/spec/config/root_spec.rb +0 -31
  220. data/spec/config/selenium_spec.rb +0 -56
  221. data/spec/config/strconv_spec.rb +0 -58
  222. data/spec/config/struct_spec.rb +0 -66
  223. data/spec/fixtures/dummy_job.rb +0 -7
  224. data/spec/gc_spec.rb +0 -59
  225. data/spec/handler_spec.rb +0 -11
  226. data/spec/integration/callbacks_spec.rb +0 -85
  227. data/spec/integration/page_spec.rb +0 -62
  228. data/spec/integration/params_spec.rb +0 -56
  229. data/spec/integration/stage_spec.rb +0 -51
  230. data/spec/integration/steering_spec.rb +0 -57
  231. data/spec/middleware/dedup_spec.rb +0 -88
  232. data/spec/middleware/dispatch_spec.rb +0 -43
  233. data/spec/middleware/fetch_spec.rb +0 -155
  234. data/spec/middleware/normalize_spec.rb +0 -29
  235. data/spec/middleware/router_spec.rb +0 -105
  236. data/spec/middleware/stage_spec.rb +0 -62
  237. data/spec/networking/capybara_spec.rb +0 -12
  238. data/spec/networking/ferrum_spec.rb +0 -12
  239. data/spec/networking/http_spec.rb +0 -12
  240. data/spec/networking/selenium_spec.rb +0 -12
  241. data/spec/page_spec.rb +0 -47
  242. data/spec/parsing/xml_spec.rb +0 -25
  243. data/spec/redis/barrier_spec.rb +0 -78
  244. data/spec/redis/counter_spec.rb +0 -32
  245. data/spec/redis/version_spec.rb +0 -13
  246. data/spec/routing/integration_spec.rb +0 -110
  247. data/spec/routing/matchers/custom_spec.rb +0 -31
  248. data/spec/routing/matchers/host_spec.rb +0 -49
  249. data/spec/routing/matchers/path_spec.rb +0 -43
  250. data/spec/routing/matchers/query_spec.rb +0 -137
  251. data/spec/routing/matchers/scheme_spec.rb +0 -25
  252. data/spec/routing/matchers/suffix_spec.rb +0 -41
  253. data/spec/routing/matchers/uri_spec.rb +0 -27
  254. data/spec/routing/path_finder_spec.rb +0 -33
  255. data/spec/routing/root_route_spec.rb +0 -29
  256. data/spec/routing/route_spec.rb +0 -43
  257. data/spec/routing/router_spec.rb +0 -24
  258. data/spec/task_spec.rb +0 -34
  259. data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
@@ -1,58 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Config::Strconv do
6
- subject(:strconv) { Wayfarer::Config::Strconv }
7
-
8
- describe "::parse" do
9
- describe Hash do
10
- it "parses" do
11
- input = "alpha:1,beta:two,gamma:true"
12
- output = strconv.parse(input, Hash)
13
- expect(output).to eq(alpha: 1, beta: "two", gamma: true)
14
- end
15
- end
16
-
17
- describe Array do
18
- it "parses" do
19
- input = "alpha, beta , gamma"
20
- output = strconv.parse(input, Array)
21
- expect(output).to eq(%w[alpha beta gamma])
22
- end
23
- end
24
-
25
- describe Symbol do
26
- it "parses" do
27
- expect(strconv.parse("foobar", Symbol)).to be(:foobar)
28
- end
29
- end
30
-
31
- describe Integer do
32
- it "parses" do
33
- expect(strconv.parse("42", Integer)).to be(42)
34
- end
35
- end
36
-
37
- describe "Primitives" do
38
- context "Booleans" do
39
- it "parses" do
40
- expect(strconv.parse("true")).to be(true)
41
- expect(strconv.parse("false")).to be(false)
42
- end
43
- end
44
-
45
- context "Numbers" do
46
- it "parses" do
47
- expect(strconv.parse("42")).to be(42)
48
- end
49
- end
50
-
51
- context "Strings" do
52
- it "parses" do
53
- expect(strconv.parse("foobar")).to be("foobar")
54
- end
55
- end
56
- end
57
- end
58
- end
@@ -1,66 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Config::Struct do
6
- let(:struct) do
7
- Wayfarer::Config::Struct.new(members)
8
- end
9
-
10
- let(:members) { { foo: options } }
11
- let(:options) { {} }
12
- let(:env) { {} }
13
- subject { struct.new(env) }
14
-
15
- describe "Reader" do
16
- context "without environment values and default" do
17
- let(:options) { {} }
18
-
19
- it "returns nil" do
20
- expect(subject.foo).to be(nil)
21
- end
22
- end
23
-
24
- context "with default only" do
25
- let(:options) { { default: 42 } }
26
-
27
- it "returns the default" do
28
- expect(subject.foo).to be(42)
29
- end
30
- end
31
-
32
- context "with environment key specified" do
33
- let(:options) { { env_key: "FOO" } }
34
-
35
- context "with environment value" do
36
- let(:env) { { "FOO" => "hello" } }
37
-
38
- it "returns the value" do
39
- expect(subject.foo).to eq("hello")
40
- end
41
-
42
- context "with type specified" do
43
- let(:options) { { env_key: "FOO", type: Symbol } }
44
-
45
- it "parses the value" do
46
- expect(subject.foo).to be(:hello)
47
- end
48
- end
49
- end
50
-
51
- context "without environment value" do
52
- it "returns nil" do
53
- expect(subject.foo).to be(nil)
54
- end
55
- end
56
- end
57
- end
58
-
59
- describe "Writer" do
60
- it "allows overriding environment values and defaults" do
61
- expect {
62
- subject.foo = 3
63
- }.to change { subject.foo }.from(nil).to(3)
64
- end
65
- end
66
- end
@@ -1,7 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class DummyJob < Wayfarer::Base
4
- route { to :index }
5
-
6
- def hello; end
7
- end
data/spec/gc_spec.rb DELETED
@@ -1,59 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::GC, redis: true do
6
- include Wayfarer::Redis::Connection
7
-
8
- let(:task) { build(:task) }
9
- subject(:gc) { described_class.new(task) }
10
-
11
- before do
12
- task.metadata.job = spy
13
- task.barrier.seen?(task.url)
14
- end
15
-
16
- describe "#run" do
17
- context "when counter reaches 0" do
18
- before { task.counter.increment }
19
-
20
- it "resets the barrier" do
21
- expect {
22
- gc.run
23
- }.to change { redis { |conn| conn.exists?(task.barrier.redis_key) } }.to(false)
24
- end
25
-
26
- it "resets the counter" do
27
- expect {
28
- gc.run
29
- }.to change { redis { |conn| conn.exists?(task.counter.redis_key) } }.to(false)
30
- end
31
-
32
- it "runs after batch callbacks" do
33
- expect(task.metadata.job).to receive(:run_callbacks).with(:batch).exactly(:once)
34
- gc.run
35
- end
36
- end
37
-
38
- context "when counter does not reach 0" do
39
- before { 2.times { task.counter.increment } }
40
-
41
- it "does not reset the barrier" do
42
- expect {
43
- gc.run
44
- }.not_to(change { redis { |conn| conn.exists?(task.barrier.redis_key) } })
45
- end
46
-
47
- it "does not reset the counter" do
48
- expect {
49
- gc.run
50
- }.not_to(change { redis { |conn| conn.exists?(task.counter.redis_key) } })
51
- end
52
-
53
- it "does not run after batch callbacks" do
54
- expect(task.metadata.job).not_to receive(:run_callbacks).with(:batch)
55
- gc.run
56
- end
57
- end
58
- end
59
- end
data/spec/handler_spec.rb DELETED
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Handler do
6
- subject { Class.new(described_class) }
7
-
8
- it "undefines ::after_batch" do
9
- expect(subject).not_to respond_to(:after_batch)
10
- end
11
- end
@@ -1,85 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe "Callbacks" do
6
- let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
7
-
8
- describe Wayfarer::Base do
9
- specify do
10
- class self.class::DummyJob < Wayfarer::Base
11
- extend SpecHelpers
12
- include RSpec::Matchers
13
-
14
- route { host test_app_host, to: :index }
15
-
16
- attr_accessor :callbacks_fired
17
-
18
- before_fetch do
19
- self.callbacks_fired = %i[before_fetch]
20
- end
21
-
22
- before_action do
23
- callbacks_fired.push(:before_action)
24
- end
25
-
26
- after_batch do
27
- expect(callbacks_fired).to eq(%i[before_fetch before_action])
28
- end
29
-
30
- def index; end
31
- end
32
-
33
- self.class::DummyJob.crawl(url)
34
- perform_enqueued_jobs
35
- end
36
- end
37
-
38
- describe Wayfarer::Handler do
39
- specify do
40
- class self.class::DummyJob < Wayfarer::Base
41
- extend SpecHelpers
42
- include RSpec::Matchers
43
-
44
- route { host test_app_host, to: DummyHandler }
45
-
46
- attr_accessor :callbacks_fired
47
-
48
- before_fetch do
49
- self.callbacks_fired = %i[before_fetch_job]
50
- end
51
-
52
- before_action do
53
- callbacks_fired.push(:before_action_job)
54
- end
55
-
56
- after_batch do
57
- expect(callbacks_fired).to eq(%i[before_fetch_job before_action_job])
58
- end
59
-
60
- class DummyHandler < Wayfarer::Handler
61
- include RSpec::Matchers
62
-
63
- route { to :index }
64
-
65
- attr_accessor :callbacks_fired
66
-
67
- before_fetch do
68
- raise "before_fetch ran in handler"
69
- end
70
-
71
- before_action do
72
- self.callbacks_fired = %i[before_action_handler]
73
- end
74
-
75
- def index
76
- expect(callbacks_fired).to eq(%i[before_action_handler])
77
- end
78
- end
79
- end
80
-
81
- self.class::DummyJob.crawl(url)
82
- perform_enqueued_jobs
83
- end
84
- end
85
- end
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe "Pages" do
6
- let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
7
-
8
- describe Wayfarer::Base do
9
- specify do
10
- class self.class::DummyJob < Wayfarer::Base
11
- extend SpecHelpers
12
- include RSpec::Matchers
13
-
14
- route { host test_app_host, to: :index }
15
-
16
- def index
17
- expect(page.url).to eq("http://test:9876/git-scm.com/book/en/v2.html")
18
- expect(page.status_code).to be(200)
19
- expect(page.body).not_to be_empty
20
- expect(page.headers.count).to be(9)
21
-
22
- expect(page.meta.links.all.count).to be(157)
23
- expect(page.meta.links.internal.count).to be(102)
24
- expect(page.meta.links.external.count).to be(55)
25
- end
26
- end
27
-
28
- self.class::DummyJob.crawl(url)
29
- perform_enqueued_jobs
30
- end
31
- end
32
-
33
- describe Wayfarer::Handler do
34
- specify do
35
- class self.class::DummyJob < Wayfarer::Base
36
- extend SpecHelpers
37
-
38
- route { host test_app_host, to: DummyHandler }
39
-
40
- class DummyHandler < Wayfarer::Handler
41
- include RSpec::Matchers
42
-
43
- route { to :index }
44
-
45
- def index
46
- expect(page.url).to eq("http://test:9876/git-scm.com/book/en/v2.html")
47
- expect(page.status_code).to be(200)
48
- expect(page.body).not_to be_empty
49
- expect(page.headers.count).to be(9)
50
-
51
- expect(page.meta.links.all.count).to be(157)
52
- expect(page.meta.links.internal.count).to be(102)
53
- expect(page.meta.links.external.count).to be(55)
54
- end
55
- end
56
- end
57
-
58
- self.class::DummyJob.crawl(url)
59
- perform_enqueued_jobs
60
- end
61
- end
62
- end
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe "URL parameters" do
6
- let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
7
-
8
- describe Wayfarer::Base do
9
- specify do
10
- class self.class::DummyJob < Wayfarer::Base
11
- extend SpecHelpers
12
- include RSpec::Matchers
13
-
14
- route do
15
- to :index, host: test_app_host do
16
- path "git-scm.com/book/:lang/:file"
17
- end
18
- end
19
-
20
- def index
21
- expect(params).to eq("lang" => "en", "file" => "v2.html")
22
- end
23
- end
24
-
25
- self.class::DummyJob.crawl(url)
26
- perform_enqueued_jobs
27
- end
28
- end
29
-
30
- describe Wayfarer::Handler do
31
- specify do
32
- class self.class::DummyJob < Wayfarer::Base
33
- extend SpecHelpers
34
-
35
- route do
36
- to DummyHandler, host: test_app_host do
37
- path "git-scm.com/book/:lang/:file"
38
- end
39
- end
40
-
41
- class DummyHandler < Wayfarer::Handler
42
- include RSpec::Matchers
43
-
44
- route { to :index }
45
-
46
- def index
47
- expect(params).to eq("lang" => "en", "file" => "v2.html")
48
- end
49
- end
50
- end
51
-
52
- self.class::DummyJob.crawl(url)
53
- perform_enqueued_jobs
54
- end
55
- end
56
- end
@@ -1,51 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe "Staging" do
6
- let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
7
-
8
- describe Wayfarer::Base do
9
- specify do
10
- class self.class::DummyJob < Wayfarer::Base
11
- extend SpecHelpers
12
-
13
- route { host test_app_host, to: :index }
14
-
15
- def index
16
- stage page.meta.links.all
17
- end
18
- end
19
-
20
- expect {
21
- self.class::DummyJob.crawl(url)
22
- perform_enqueued_jobs
23
- }.to change { enqueued_jobs.size }.by(156)
24
- end
25
- end
26
-
27
- describe Wayfarer::Handler do
28
- specify do
29
- class self.class::DummyJob < Wayfarer::Base
30
- extend SpecHelpers
31
-
32
- route do
33
- host test_app_host, to: DummyHandler
34
- end
35
-
36
- class DummyHandler < Wayfarer::Handler
37
- route { to :index }
38
-
39
- def index
40
- stage page.meta.links.all
41
- end
42
- end
43
- end
44
-
45
- expect {
46
- self.class::DummyJob.crawl(url)
47
- perform_enqueued_jobs
48
- }.to change { enqueued_jobs.size }.by(156)
49
- end
50
- end
51
- end
@@ -1,57 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe "Steering" do
6
- let(:url) { test_app_path("git-scm.com/book/en/v2.html") }
7
-
8
- describe Wayfarer::Base do
9
- specify do
10
- class self.class::DummyJob < Wayfarer::Base
11
- extend RSpec::Matchers
12
-
13
- route do |a, b|
14
- expect(a).to eq("foobar")
15
- expect(b).to eq("barqux")
16
- end
17
-
18
- steer do |task|
19
- [task.batch, "barqux"]
20
- end
21
- end
22
-
23
- self.class::DummyJob.crawl(url, batch: "foobar")
24
- perform_enqueued_jobs
25
- end
26
- end
27
-
28
- describe Wayfarer::Handler do
29
- specify do
30
- class self.class::DummyJob < Wayfarer::Base
31
- route do |_a, _b|
32
- to DummyHandler
33
- end
34
-
35
- steer do |_task|
36
- [123, "barqux"]
37
- end
38
-
39
- class DummyHandler < Wayfarer::Handler
40
- extend RSpec::Matchers
41
-
42
- route do |a, b|
43
- expect(a).to eq("foobar")
44
- expect(b).to eq("fooz")
45
- end
46
-
47
- steer do |task|
48
- [task.batch, "fooz"]
49
- end
50
- end
51
- end
52
-
53
- self.class::DummyJob.crawl(url, batch: "foobar")
54
- perform_enqueued_jobs
55
- end
56
- end
57
- end
@@ -1,88 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Middleware::Dedup, redis: true do
6
- let(:task) { build(:task) }
7
- subject { described_class.new }
8
-
9
- before { task.metadata.staged_urls = SortedSet.new }
10
-
11
- describe "#call" do
12
- context "if already routed" do
13
- before { task.metadata.action = :action }
14
-
15
- it "does not call the barrier" do
16
- expect(task.barrier).not_to receive(:seen?)
17
- end
18
-
19
- it "yields" do
20
- expect { |spy| subject.call(task, &spy) }.to yield_control
21
- end
22
- end
23
-
24
- context "without staged URLs" do
25
- it "does not raise" do
26
- expect { subject.call(task) }.not_to raise_error
27
- end
28
- end
29
-
30
- context "with unseen URL" do
31
- it "marks the URL as seen" do
32
- expect {
33
- subject.call(task)
34
- }.to change { task.barrier.seen?(task.url) }.to(true)
35
- end
36
-
37
- it "yields" do
38
- expect { |spy| subject.call(task, &spy) }.to yield_control
39
- end
40
- end
41
-
42
- context "with seen URL" do
43
- before { task.barrier.seen?(task.url) }
44
-
45
- it "does not yield" do
46
- expect { |spy| subject.call(task, &spy) }.not_to yield_control
47
- end
48
- end
49
-
50
- context "with exception raised" do
51
- it "does not mark the URL as seen" do
52
- begin
53
- subject.call(task) { raise }
54
- rescue StandardError
55
- nil
56
- end
57
- expect(task.barrier.seen?(task.url)).to be(false)
58
- end
59
-
60
- it "re-raises the exception" do
61
- expect {
62
- subject.call(task) { raise }
63
- }.to raise_error(RuntimeError)
64
- end
65
- end
66
-
67
- describe "staged URL filtering" do
68
- let(:seen_urls) { %w[https://yahoo.com https://google.com] }
69
- let(:unseen_urls) { %w[https://w3c.org https://nasa.gov] }
70
-
71
- before do
72
- seen_urls.each do |url|
73
- task.barrier.seen?(url)
74
- end
75
-
76
- [*seen_urls, *unseen_urls].each do |url|
77
- task.metadata.staged_urls.add(url)
78
- end
79
- end
80
-
81
- it "filters seen staged URLs" do
82
- expect {
83
- subject.call(task)
84
- }.to change { task.metadata.staged_urls }.to(SortedSet.new(unseen_urls))
85
- end
86
- end
87
- end
88
- end
@@ -1,43 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helpers"
4
-
5
- describe Wayfarer::Middleware::Dispatch do
6
- let(:task) { build(:task) }
7
- let(:action) { :action }
8
- subject(:chain) { described_class.new }
9
-
10
- before do
11
- task.metadata.controller = spy
12
- task.metadata.action = action
13
-
14
- allow(task.metadata.controller).to receive(:run_callbacks).and_yield
15
- end
16
-
17
- describe "#call" do
18
- it "runs callbacks" do
19
- expect(task.metadata.controller).to receive(:run_callbacks).with(action)
20
- subject.call(task)
21
- end
22
-
23
- context "when action is a Symbol" do
24
- it "calls the method" do
25
- expect(task.metadata.controller).to receive(action)
26
- subject.call(task)
27
- end
28
- end
29
-
30
- context "with other action" do
31
- let(:action) { Class.new }
32
-
33
- it "instantiates and calls" do
34
- expect_any_instance_of(action).to receive(:call).with(task)
35
- subject.call(task)
36
- end
37
- end
38
-
39
- it "yields" do
40
- expect { |spy| subject.call(task, &spy) }.to yield_control
41
- end
42
- end
43
- end