wayfarer 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +14 -10
  3. data/docs/cookbook/batch_routing.md +22 -0
  4. data/docs/cookbook/consent_screen.md +36 -0
  5. data/docs/cookbook/executing_javascript.md +41 -0
  6. data/docs/cookbook/querying_html.md +3 -3
  7. data/docs/cookbook/screenshots.md +2 -2
  8. data/docs/guides/browser_automation/capybara.md +6 -3
  9. data/docs/guides/browser_automation/ferrum.md +3 -1
  10. data/docs/guides/browser_automation/selenium.md +4 -2
  11. data/docs/guides/callbacks.md +5 -5
  12. data/docs/guides/debugging.md +17 -0
  13. data/docs/guides/error_handling.md +22 -26
  14. data/docs/guides/jobs.md +44 -18
  15. data/docs/guides/navigation.md +73 -0
  16. data/docs/guides/pages.md +4 -4
  17. data/docs/guides/performance.md +108 -0
  18. data/docs/guides/reliability.md +41 -0
  19. data/docs/guides/routing/steering.md +30 -0
  20. data/docs/guides/tasks.md +9 -33
  21. data/docs/reference/api/base.md +13 -127
  22. data/docs/reference/api/route.md +1 -1
  23. data/docs/reference/cli.md +0 -78
  24. data/docs/reference/configuration_keys.md +1 -1
  25. data/lib/wayfarer/cli/job.rb +1 -3
  26. data/lib/wayfarer/cli/route.rb +4 -2
  27. data/lib/wayfarer/cli/templates/job.rb.tt +3 -1
  28. data/lib/wayfarer/config/networking.rb +1 -1
  29. data/lib/wayfarer/config/struct.rb +1 -1
  30. data/lib/wayfarer/middleware/fetch.rb +15 -4
  31. data/lib/wayfarer/middleware/router.rb +34 -2
  32. data/lib/wayfarer/middleware/worker.rb +4 -24
  33. data/lib/wayfarer/networking/pool.rb +9 -8
  34. data/lib/wayfarer/page.rb +1 -1
  35. data/lib/wayfarer/routing/matchers/custom.rb +2 -0
  36. data/lib/wayfarer/routing/matchers/path.rb +1 -0
  37. data/lib/wayfarer/routing/route.rb +6 -0
  38. data/lib/wayfarer/routing/router.rb +27 -0
  39. data/lib/wayfarer/stringify.rb +13 -7
  40. data/lib/wayfarer.rb +3 -1
  41. data/spec/callbacks_spec.rb +2 -2
  42. data/spec/config/networking_spec.rb +2 -2
  43. data/spec/factories/{queue/middleware.rb → middleware.rb} +3 -3
  44. data/spec/factories/{queue/page.rb → page.rb} +3 -3
  45. data/spec/factories/{queue/task.rb → task.rb} +0 -0
  46. data/spec/fixtures/dummy_job.rb +1 -1
  47. data/spec/middleware/chain_spec.rb +17 -17
  48. data/spec/middleware/fetch_spec.rb +27 -11
  49. data/spec/middleware/router_spec.rb +34 -7
  50. data/spec/middleware/worker_spec.rb +3 -13
  51. data/spec/routing/router_spec.rb +24 -0
  52. data/wayfarer.gemspec +1 -1
  53. metadata +16 -8
  54. data/spec/factories/queue/chain.rb +0 -11
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2bbcc550d6799e1e3588b832905866116105fdcceb0eeef5ec244622f15bb10
4
- data.tar.gz: b8b324d89d162e578cde829f15a44bb08b9e93ad2a3bb5b6619e96275a7fa6cd
3
+ metadata.gz: 04baaa6967fc9de4970e4d3a14cb8bb2d7458c70bb6529189ef3823d7792aa18
4
+ data.tar.gz: '058de8aa89a46c88fb460a0d39e542c43e4b0a9f23faa9b672367fb6a9b12820'
5
5
  SHA512:
6
- metadata.gz: 998c06776f7a7922aa2d36770dc7e4389c5814ac36a20062b20e7fe6986fb52e4fde9f538287510fccfca1689fb9f7f4e019ec23dcbeff777add1a99e24fba26
7
- data.tar.gz: 239e3db3d5fffb8f81e74c655a648ce03febd346c25fb86b695bca8a8d328e6ff341d63d4becf63a2811defae3af5a1c4bf1c772e327b114df5909a06151a95b
6
+ metadata.gz: ba5feb1b4116f53a53166a999953b791aecc1356dbf4e3db5170f16f42703e708176a33a8a05553698a5cc6e011e4bc94521c163ff67e7d3d2dfd6c29e6a14f3
7
+ data.tar.gz: d0f0dddf9b091820b59476ecae9c048169fe867f5559c077ec306d74abc6540ea01d1723dd722cfeded64d206f67c9948eaef2e6a29b38b729243ee4aa046836
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- wayfarer (0.4.1)
4
+ wayfarer (0.4.2)
5
5
  activejob (~> 6.0)
6
6
  addressable (~> 2.8)
7
7
  capybara (~> 3.0)
@@ -59,16 +59,17 @@ GEM
59
59
  activesupport (>= 5.0.0)
60
60
  faker (1.9.6)
61
61
  i18n (>= 0.7)
62
- faraday (1.8.0)
62
+ faraday (1.9.3)
63
63
  faraday-em_http (~> 1.0)
64
64
  faraday-em_synchrony (~> 1.0)
65
65
  faraday-excon (~> 1.1)
66
- faraday-httpclient (~> 1.0.1)
66
+ faraday-httpclient (~> 1.0)
67
+ faraday-multipart (~> 1.0)
67
68
  faraday-net_http (~> 1.0)
68
- faraday-net_http_persistent (~> 1.1)
69
+ faraday-net_http_persistent (~> 1.0)
69
70
  faraday-patron (~> 1.0)
70
71
  faraday-rack (~> 1.0)
71
- multipart-post (>= 1.2, < 3)
72
+ faraday-retry (~> 1.0)
72
73
  ruby2_keywords (>= 0.0.4)
73
74
  faraday-cookie_jar (0.0.7)
74
75
  faraday (>= 0.8.0)
@@ -81,19 +82,22 @@ GEM
81
82
  faraday-http-cache (2.2.0)
82
83
  faraday (>= 0.8)
83
84
  faraday-httpclient (1.0.1)
85
+ faraday-multipart (1.0.3)
86
+ multipart-post (>= 1.2, < 3)
84
87
  faraday-net_http (1.0.1)
85
88
  faraday-net_http_persistent (1.2.0)
86
89
  faraday-patron (1.0.0)
87
90
  faraday-rack (1.0.0)
91
+ faraday-retry (1.0.3)
88
92
  faraday_middleware (1.2.0)
89
93
  faraday (~> 1.0)
90
- fastimage (2.2.5)
94
+ fastimage (2.2.6)
91
95
  ferrum (0.11)
92
96
  addressable (~> 2.5)
93
97
  cliver (~> 0.3)
94
98
  concurrent-ruby (~> 1.1)
95
99
  websocket-driver (>= 0.6, < 0.8)
96
- globalid (0.5.2)
100
+ globalid (1.0.0)
97
101
  activesupport (>= 5.0)
98
102
  http-cookie (1.0.4)
99
103
  domain_name (~> 0.5)
@@ -111,9 +115,9 @@ GEM
111
115
  nesty (~> 1.0)
112
116
  nokogiri (~> 1.11)
113
117
  method_source (1.0.0)
114
- mime-types (3.3.1)
118
+ mime-types (3.4.1)
115
119
  mime-types-data (~> 3.2015)
116
- mime-types-data (3.2021.0901)
120
+ mime-types-data (3.2022.0105)
117
121
  mini_mime (1.1.2)
118
122
  mini_portile2 (2.6.1)
119
123
  minitest (5.14.4)
@@ -182,7 +186,7 @@ GEM
182
186
  rack (~> 1.5)
183
187
  rack-protection (~> 1.4)
184
188
  tilt (>= 1.3, < 3)
185
- thor (1.1.0)
189
+ thor (1.2.1)
186
190
  tilt (2.0.10)
187
191
  tzinfo (2.0.4)
188
192
  concurrent-ruby (~> 1.0)
@@ -0,0 +1,22 @@
1
+ # Batch routing
2
+
3
+ ```ruby
4
+ # Create a record in an external database and store the hostname
5
+ record = Database::Row.create(hostname: "example.com")
6
+
7
+ class DummyJob < Wayfarer::Base
8
+ route do |hostname|
9
+ host hostname, to: :index
10
+ end
11
+
12
+ steer do |task|
13
+ # Pass the external record's hostname to the router
14
+ [Database::Row.find(task.batch).hostname]
15
+ end
16
+
17
+ # ...
18
+ end
19
+
20
+ # Enqueue the task and use the database record's key as batch
21
+ DummyJob.crawl_later("https://example.com", batch: record.id)
22
+ ```
@@ -0,0 +1,36 @@
1
+ # Consent Screens
2
+
3
+ Some websites have nag-screens that make visitors wait for a button to show up.
4
+ Here is an example with Ferrum where the opt-in button is contained in an
5
+ iframe, clicked, and makes the live page behind the screen accessible to
6
+ `#index`:
7
+
8
+ ```ruby
9
+ Wayfarer.config.network.agent = :ferrum
10
+
11
+ class DummyJob < Wayfarer::Base
12
+ route { to :index, host: "example.com" }
13
+
14
+ before_action if: :consent_required? do
15
+ sleep(5) # If the consent form has a loading animation
16
+ consent_button&.click
17
+ sleep(5) # Wait for browser to get redirected behind nag-screen
18
+ end
19
+
20
+ def index
21
+ # Nag-screen passed
22
+ stage page(live: true).meta.links.internal
23
+ end
24
+
25
+ private
26
+
27
+ def consent_button
28
+ browser.frames.third.css("button#consent")&.first
29
+ end
30
+
31
+ def consent_required?
32
+ browser.css(".consent_screen").any?
33
+ end
34
+ end
35
+
36
+ ```
@@ -0,0 +1,41 @@
1
+ # Executing JavaScript
2
+
3
+ Executing JavaScript requires automating a browser.
4
+
5
+ === "Ferrum"
6
+
7
+ ```ruby
8
+ class DummyJob < Wayfarer::Base
9
+ route { to :index }
10
+
11
+ def index
12
+ browser.evaluate("[window.scrollX, window.scrollY]")
13
+ end
14
+ end
15
+ ```
16
+
17
+ === "Selenium"
18
+
19
+ ```ruby
20
+ class DummyJob < Wayfarer::Base
21
+ route { to :index }
22
+
23
+ def index
24
+ # Mind the explicit return
25
+ browser.execute_script("return [window.scrollX, window.scrollY]")
26
+ end
27
+ end
28
+ ```
29
+
30
+ === "Capybara"
31
+
32
+ ```ruby
33
+ class DummyJob < Wayfarer::Base
34
+ route { to :index }
35
+
36
+ def index
37
+ # Capybara does not return value of JavaScript execution
38
+ browser.execute_script("console.log('Foobar')") # => nil
39
+ end
40
+ end
41
+ ```
@@ -6,7 +6,7 @@ See: [Nokogiri: Searching an HTML / XML Document](https://nokogiri.org/tutorials
6
6
 
7
7
  ```ruby
8
8
  class DummyJob < Wayfarer::Base
9
- route.to :index
9
+ route { to :index }
10
10
 
11
11
  def index
12
12
  page.doc.css("html")
@@ -19,7 +19,7 @@ See: [Nokogiri: Searching an HTML / XML Document](https://nokogiri.org/tutorials
19
19
 
20
20
  ```ruby
21
21
  class DummyJob < Wayfarer::Base
22
- route.to :index
22
+ route { to :index }
23
23
 
24
24
  def index
25
25
  browser.at_css("html")
@@ -32,7 +32,7 @@ See: [Nokogiri: Searching an HTML / XML Document](https://nokogiri.org/tutorials
32
32
 
33
33
  ```ruby
34
34
  class DummyJob < Wayfarer::Base
35
- route.to :index
35
+ route { to :index }
36
36
 
37
37
  def index
38
38
  browser.find_elements(css: "html")
@@ -6,7 +6,7 @@ Taking screenshots requires automating a browser.
6
6
 
7
7
  ```ruby
8
8
  class DummyJob < Wayfarer::Base
9
- route.to :index
9
+ route { to :index }
10
10
 
11
11
  def index
12
12
  browser.screenshot(path: "screenshot.png")
@@ -18,7 +18,7 @@ Taking screenshots requires automating a browser.
18
18
 
19
19
  ```ruby
20
20
  class DummyJob < Wayfarer::Base
21
- route.to :index
21
+ route { to :index }
22
22
 
23
23
  def index
24
24
  browser.save_screenshot("screenshot.png")
@@ -7,8 +7,11 @@ When Capybara is in use, a remote browser process is available as a Capybara
7
7
  session:
8
8
 
9
9
  ```ruby
10
- class DummyWorker < Wayfarer::Worker
11
- route.to :index
10
+ Wayfarer.config.network.agent = :capybara
11
+ # Wayfarer.config.capybara.driver = ...
12
+
13
+ class DummyJob < Wayfarer::Worker
14
+ route { to :index }
12
15
 
13
16
  def index
14
17
  browser # => #<Capybara::Session ...>
@@ -61,6 +64,6 @@ end
61
64
 
62
65
  Capybara.register_driver(:cuprite) do |app|
63
66
  # Wayfarer's Ferrum or Selenium options must be passed along manually
64
- Capybara::Cuprite::Driver.new(app, Wayfare.config.ferrum.options)
67
+ Capybara::Cuprite::Driver.new(app, Wayfarer.config.ferrum.options)
65
68
  end
66
69
  ```
@@ -11,8 +11,10 @@ When Ferrum is in use, a Google Chrome process is accessible within jobs like
11
11
  so:
12
12
 
13
13
  ```ruby
14
+ Wayfarer.config.network.agent = :ferrum
15
+
14
16
  class DummyWorker < Wayfarer::Worker
15
- route.to :index
17
+ route { to :index }
16
18
 
17
19
  def index
18
20
  browser # => #<Ferrum::Browser ...>
@@ -7,8 +7,10 @@ When Selenium is in use, a remote browser process is accessible within jobs like
7
7
  so:
8
8
 
9
9
  ```ruby
10
+ Wayfarer.config.network.agent = :selenium
11
+
10
12
  class DummyWorker < Wayfarer::Worker
11
- route.to :index
13
+ route { to :index }
12
14
 
13
15
  def index
14
16
  browser # => #<Selenium::WebDriver ...>
@@ -28,7 +30,7 @@ process.
28
30
  Wayfarer.config.network.agent = :selenium
29
31
 
30
32
  class DummyJob < Wayfarer::Base
31
- route.to :index
33
+ route { to :index }
32
34
 
33
35
  def index
34
36
  page.headers # => always {}
@@ -52,16 +52,16 @@ end
52
52
  Internally, a batch counter is in-/decremented on certain events. Once the
53
53
  counter reaches zero, `after_batch` callbacks runs in declaration order.
54
54
 
55
- The counter is incremented when:
55
+ The counter is incremented when within the batch:
56
56
 
57
- * A job is enqueued within the batch.
57
+ * A job is enqueued.
58
58
 
59
59
  The counter is decremented when:
60
60
 
61
61
  * A job succeeds.
62
- * A job fails due to an unhandled exception.
63
- * A job fails due to a discarded exception.
64
- * A job fails and thereyby exhausts its maximum attempts.
62
+ * A job errors due to an unhandled exception.
63
+ * A job is discarded due to an exception.
64
+ * A job errors and thereyby exhausts its maximum attempts.
65
65
 
66
66
  !!! attention "Batch callbacks can fail jobs"
67
67
 
@@ -0,0 +1,17 @@
1
+ # Debugging
2
+
3
+ [Wayfarer's CLI](/reference/cli/) has two sub-commands that come in handy when
4
+ diagnosing problems in the development workflow.
5
+
6
+ ## Routing a URL from the shell
7
+
8
+ ## `wayfarer route`
9
+
10
+ ### `wayfarer route result JOB URL`
11
+
12
+ : Prints the result of invoking `JOB`'s router with `URL`.
13
+
14
+ ### `wayfarer route tree JOB URL`
15
+
16
+ : Visualises the routing tree result of invoking `JOB`'s router with `URL`.
17
+
@@ -1,35 +1,31 @@
1
1
  # Error handling
2
2
 
3
- Wayfarer relies on Active Job's error handling facilities, `retry_on` and
4
- `discard_on`:
3
+ ## Wayfarer never swallows exceptions
5
4
 
6
- * [Active Job Basics: Exceptions](https://guides.rubyonrails.org/active_job_basics.html#exceptions)
7
- * [ActiveJob::Exceptions](https://edgeapi.rubyonrails.org/classes/ActiveJob/Exceptions/ClassMethods.html)
5
+ * Wayfarer never swallows exceptions.
6
+ * Jobs with unhandled exceptions are not retried.
8
7
 
9
- ## Retrying
8
+ ## Retrying and discarding
10
9
 
11
- ```ruby
12
- class DummyJob < Wayfarer::Base
13
- retry_on MyError, attempts: 3 do |job, error|
14
- # All 3 attempts have failed (1 initial attempt + 2 retries)
15
- end
16
- end
17
- ```
10
+ Wayfarer relies on [Active Job's two error handling facilities](https://guides.rubyonrails.org/active_job_basics.html#exceptions).
18
11
 
19
- ## Discarding
12
+ * `retry_on` to retry jobs a number of times on certain errors:
20
13
 
21
- ```ruby
22
- class DummyJob < Wayfarer::Base
23
- discard_on MyError do |job, error|
24
- # The job will not get retried
25
- end
26
- end
27
- ```
14
+ ```ruby
15
+ class DummyJob < Wayfarer::Base
16
+ retry_on MyError, attempts: 3 do |job, error|
17
+ # This block runs once all 3 attempts have failed
18
+ # (1 initial attempt + 2 retries)
19
+ end
20
+ end
21
+ ```
28
22
 
29
- ## Job failures
23
+ * `discard_on` to throw away jobs on certain errors:
30
24
 
31
- Jobs are not retried and their URLs locked within their batch if:
32
-
33
- * A discarded exception is raised.
34
- * An unhandled exception is raised.
35
- * A handled exception is raised, but retry attempts are exhausted.
25
+ ```ruby
26
+ class DummyJob < Wayfarer::Base
27
+ discard_on MyError do |job, error|
28
+ # This block runs once and buries the job
29
+ end
30
+ end
31
+ ```
data/docs/guides/jobs.md CHANGED
@@ -1,16 +1,36 @@
1
1
  # Jobs
2
2
 
3
- Jobs are Ruby classes that look as follows:
3
+ Jobs are Ruby classes that process [tasks](/guides/tasks) and look as follows:
4
4
 
5
5
  ```ruby
6
6
  class DummyJob < Wayfarer::Base
7
- route.to :index
7
+ route { to :index }
8
8
 
9
9
  def index
10
10
  end
11
11
  end
12
12
  ```
13
13
 
14
+ Here is how to enqueue a task for a URL:
15
+
16
+ ```ruby
17
+ DummyJob.crawl_later("https://example.com")
18
+ ```
19
+
20
+ This is the same as calling the Active Job API directly and passing a task
21
+ and a random batch:
22
+
23
+ ```ruby
24
+ task = Wayfarer::Task.new("https://example.com", SecureRandom.uuid)
25
+ DummyJob.perform_later(task)
26
+ ```
27
+
28
+ A batch can be specified with `::crawl_later`, too:
29
+
30
+ ```ruby
31
+ DummyJob.crawl_later("https://example.com", batch: "my-batch")
32
+ ```
33
+
14
34
  ## Current task
15
35
 
16
36
  Jobs consume [tasks](../tasks) from a message queue. The currently processed
@@ -18,58 +38,64 @@ task is accessible like so:
18
38
 
19
39
  ```ruby
20
40
  class DummyJob < Wayfarer::Base
21
- route.to :index
41
+ route { to :index }
22
42
 
23
43
  def index
24
44
  task.url # => "https://example.com"
25
- task.batch # => "55fe80d4-97ce-..."
45
+ task.batch # => "my-batch"
26
46
  end
27
47
  end
28
48
  ```
29
49
 
30
50
  ## Current page
31
51
 
32
- Once control is handed over to jobs, their task's URL has been retrieved into a
33
- [page](../pages) object:
52
+ A task's URL contents get fetched into a [page](../pages) object if the task URL
53
+ matched a route:
34
54
 
35
55
  ```ruby
36
56
  class DummyJob < Wayfarer::Base
37
- route.to :index
57
+ route { to :index, host: "example.com" }
38
58
 
39
59
  def index
40
- page.url # => "https://example.com"
41
- page.body # => "<html>..."
60
+ page.url # => "https://example.com"
61
+ page.body # => "<html>..."
62
+ page.status_code # => 200
63
+ page.headers # { "Content-Type" => ... }
42
64
  end
43
65
  end
44
66
  ```
45
67
 
46
68
  ## URL parameters
47
69
 
48
- TODO
70
+ Jobs can extract data from URLs with their router:
49
71
 
50
72
  ```ruby
51
73
  class DummyJob < Wayfarer::Base
52
- route.to :index
74
+ route do
75
+ path "/users/:id/profile"
76
+ end
53
77
 
54
78
  def index
55
- page.url # => "https://example.com"
56
- page.body # => "<html>..."
79
+ params[:id] # => "42"
57
80
  end
58
81
  end
82
+
83
+ DummyJob.crawl_later("https://example.com/users/42/profile")
59
84
  ```
60
85
 
61
86
 
62
- ## Automated browser
87
+ ## User agent
63
88
 
64
- When automating browsers, the remote browser process that retrieved the URL is
65
- accessible like so:
89
+ The HTTP client or automated browser that fetched the URL is available:
66
90
 
67
91
  ```ruby
92
+ Wayfarer.config.network.agent = :ferrum # Chrome DevTools Protocol
93
+
68
94
  class DummyJob < Wayfarer::Base
69
- route.to :index
95
+ route { to :index }
70
96
 
71
97
  def index
72
- browser # => #<Ferrum::Browser ...> or #<Selenium::WebDriver ...>
98
+ browser.save_screenshot("capture.png")
73
99
  end
74
100
  end
75
101
  ```
@@ -0,0 +1,73 @@
1
+ # Navigation
2
+
3
+ Wayfarer has two mechanisms for navigating crawls:
4
+
5
+ * Jobs have a router that decides if a task's URL gets fetched and processed.
6
+ * Jobs can add URLs to a processing set with `#stage`.
7
+
8
+ ## Staging URLs
9
+
10
+ Jobs can turn URLs into tasks within their own batch with `#stage`. Staging a
11
+ URL does not enqueue it immediately. Instead, the URL is added to a processing
12
+ set first.
13
+
14
+ ```ruby
15
+ class DummyJob < Wayfarer::Base
16
+ route { to :index }
17
+
18
+ def index
19
+ stage page.meta.links.all
20
+ end
21
+ end
22
+ ```
23
+
24
+ Once the `index` action method returns, all URLs in `page.meta.links.all`
25
+ are (1) normalized to a canonical form and (2) checked for inclusion in
26
+ the batch's processed URL Redis set. All unprocessed URLs are enqueued as
27
+ tasks within the same batch.
28
+
29
+ `#stage` can be called arbitrarily often, with invalid URLs too, as they are
30
+ filtered out behind the scenes:
31
+
32
+ ```ruby
33
+ def index
34
+ stage "_bro:ken@url/" # => ["_bro:ken@url/"]
35
+ end
36
+ ```
37
+
38
+ See also: [Performance: Stage less URLs](/guides/performance)
39
+
40
+ !!! attention "Failing action methods do not enqueue tasks"
41
+
42
+ If an action method fails as in:
43
+
44
+ ```ruby
45
+ def index
46
+ stage page.meta.links.all
47
+ fail "Error occured"
48
+ end
49
+ ```
50
+
51
+ None of the staged URLs are enqueued as tasks. Jobs that raise an exception
52
+ should get retried, or the exception should be handled.
53
+
54
+
55
+ ## Routing URLs
56
+
57
+ In the following example, the task is written to the message queue, but the
58
+ job's routes do not match the URL. When the task gets consumed, the URL does not
59
+ get fetched and the action method not called.
60
+
61
+ ```ruby
62
+ class DummyJob < Wayfarer::Base
63
+ route do
64
+ host "example.com", path: "/users/:user_id", to: :user
65
+ end
66
+
67
+ # ...
68
+ end
69
+
70
+ DummyJob.crawl_later("https://mismatching.host/users/42")
71
+ ```
72
+
73
+
data/docs/guides/pages.md CHANGED
@@ -1,11 +1,11 @@
1
1
  # Pages
2
2
 
3
- Retrieved pages are represented by `Wayfarer::Page` objects and are available
4
- within jobs like so:
3
+ Retrieved pages take the shape of `Wayfarer::Page` objects and are available
4
+ to jobs:
5
5
 
6
6
  ```ruby
7
7
  class DummyJob < Wayfarer::Worker
8
- route.to :index
8
+ route { to :index }
9
9
 
10
10
  def index
11
11
  page # => #<Wayfarer::Page ...>
@@ -35,7 +35,7 @@ To access a page reflecting the current browser state, pass the `live` keyword:
35
35
 
36
36
  ```ruby
37
37
  class DummyJob < Wayfarer::Worker
38
- route.to :index
38
+ route { to :index }
39
39
 
40
40
  def index
41
41
  page # => #<Wayfarer::Page ...>