wayfarer 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yaml +1 -1
- data/Gemfile.lock +20 -15
- data/docs/cookbook/user_agent.md +1 -1
- data/docs/guides/browser_automation/capybara.md +64 -1
- data/docs/guides/browser_automation/custom_adapters.md +100 -0
- data/docs/guides/browser_automation/ferrum.md +3 -3
- data/docs/guides/browser_automation/selenium.md +7 -5
- data/docs/guides/callbacks.md +117 -10
- data/docs/guides/configuration.md +16 -10
- data/docs/guides/error_handling.md +9 -5
- data/docs/guides/networking.md +77 -3
- data/docs/index.md +9 -1
- data/docs/reference/api/base.md +4 -4
- data/docs/reference/configuration_keys.md +42 -0
- data/docs/reference/environment_variables.md +25 -27
- data/lib/wayfarer/base.rb +7 -17
- data/lib/wayfarer/callbacks.rb +71 -0
- data/lib/wayfarer/cli/base.rb +5 -1
- data/lib/wayfarer/cli/job.rb +7 -3
- data/lib/wayfarer/cli/route.rb +2 -2
- data/lib/wayfarer/cli/route_printer.rb +7 -7
- data/lib/wayfarer/config/capybara.rb +10 -0
- data/lib/wayfarer/config/ferrum.rb +11 -0
- data/lib/wayfarer/config/networking.rb +26 -0
- data/lib/wayfarer/config/redis.rb +14 -0
- data/lib/wayfarer/config/root.rb +11 -0
- data/lib/wayfarer/config/selenium.rb +21 -0
- data/lib/wayfarer/config/strconv.rb +45 -0
- data/lib/wayfarer/config/struct.rb +72 -0
- data/lib/wayfarer/gc.rb +3 -7
- data/lib/wayfarer/middleware/fetch.rb +7 -3
- data/lib/wayfarer/middleware/router.rb +2 -2
- data/lib/wayfarer/middleware/worker.rb +12 -9
- data/lib/wayfarer/networking/capybara.rb +28 -0
- data/lib/wayfarer/networking/context.rb +36 -0
- data/lib/wayfarer/networking/ferrum.rb +17 -52
- data/lib/wayfarer/networking/http.rb +34 -0
- data/lib/wayfarer/networking/pool.rb +15 -10
- data/lib/wayfarer/networking/result.rb +1 -1
- data/lib/wayfarer/networking/selenium.rb +20 -47
- data/lib/wayfarer/networking/strategy.rb +38 -0
- data/lib/wayfarer/page.rb +2 -3
- data/lib/wayfarer/redis/pool.rb +3 -1
- data/lib/wayfarer/routing/dsl.rb +8 -8
- data/lib/wayfarer/routing/matchers/custom.rb +23 -0
- data/lib/wayfarer/routing/matchers/host.rb +19 -0
- data/lib/wayfarer/routing/matchers/path.rb +48 -0
- data/lib/wayfarer/routing/matchers/query.rb +63 -0
- data/lib/wayfarer/routing/matchers/scheme.rb +17 -0
- data/lib/wayfarer/routing/matchers/suffix.rb +17 -0
- data/lib/wayfarer/routing/matchers/url.rb +17 -0
- data/lib/wayfarer/routing/route.rb +1 -1
- data/lib/wayfarer.rb +9 -9
- data/spec/base_spec.rb +14 -0
- data/spec/callbacks_spec.rb +102 -0
- data/spec/cli/job_spec.rb +6 -6
- data/spec/config/capybara_spec.rb +18 -0
- data/spec/config/ferrum_spec.rb +24 -0
- data/spec/config/networking_spec.rb +73 -0
- data/spec/config/redis_spec.rb +32 -0
- data/spec/config/root_spec.rb +31 -0
- data/spec/config/selenium_spec.rb +56 -0
- data/spec/config/strconv_spec.rb +58 -0
- data/spec/config/struct_spec.rb +66 -0
- data/spec/gc_spec.rb +8 -6
- data/spec/middleware/fetch_spec.rb +20 -8
- data/spec/middleware/router_spec.rb +7 -0
- data/spec/middleware/worker_spec.rb +64 -27
- data/spec/networking/capybara_spec.rb +12 -0
- data/spec/networking/context_spec.rb +127 -0
- data/spec/networking/ferrum_spec.rb +6 -22
- data/spec/networking/http_spec.rb +12 -0
- data/spec/networking/pool_spec.rb +37 -12
- data/spec/networking/selenium_spec.rb +6 -22
- data/spec/networking/strategy.rb +170 -0
- data/spec/redis/pool_spec.rb +1 -1
- data/spec/routing/dsl_spec.rb +10 -10
- data/spec/routing/integration_spec.rb +22 -22
- data/spec/routing/{custom_matcher_spec.rb → matchers/custom_spec.rb} +4 -4
- data/spec/routing/{host_matcher_spec.rb → matchers/host_spec.rb} +6 -6
- data/spec/routing/{path_matcher_spec.rb → matchers/path_spec.rb} +6 -6
- data/spec/routing/{query_matcher_spec.rb → matchers/query_spec.rb} +15 -15
- data/spec/routing/{scheme_matcher_spec.rb → matchers/scheme_spec.rb} +4 -4
- data/spec/routing/{suffix_matcher_spec.rb → matchers/suffix_spec.rb} +4 -4
- data/spec/routing/{uri_matcher_spec.rb → matchers/uri_spec.rb} +4 -4
- data/spec/routing/path_finder_spec.rb +1 -1
- data/spec/routing/root_route_spec.rb +2 -2
- data/spec/routing/route_spec.rb +2 -2
- data/spec/spec_helpers.rb +13 -5
- data/spec/wayfarer_spec.rb +1 -1
- data/wayfarer.gemspec +8 -7
- metadata +74 -33
- data/lib/wayfarer/config.rb +0 -67
- data/lib/wayfarer/networking/healer.rb +0 -21
- data/lib/wayfarer/networking/net_http.rb +0 -52
- data/lib/wayfarer/routing/custom_matcher.rb +0 -21
- data/lib/wayfarer/routing/host_matcher.rb +0 -23
- data/lib/wayfarer/routing/path_matcher.rb +0 -46
- data/lib/wayfarer/routing/query_matcher.rb +0 -67
- data/lib/wayfarer/routing/scheme_matcher.rb +0 -21
- data/lib/wayfarer/routing/suffix_matcher.rb +0 -21
- data/lib/wayfarer/routing/url_matcher.rb +0 -21
- data/spec/config_spec.rb +0 -144
- data/spec/networking/adapter.rb +0 -135
- data/spec/networking/healer_spec.rb +0 -46
- data/spec/networking/net_http_spec.rb +0 -37
data/docs/reference/api/base.md
CHANGED
@@ -102,7 +102,7 @@ Base functionality every job is equipped with:
|
|
102
102
|
|
103
103
|
### `#browser -> Ferrum::Browser | Selenium::WebDriver | nil`
|
104
104
|
: The browser process used to retrieve the current response.
|
105
|
-
If the configured
|
105
|
+
If the configured agent is the default `:http`, `nil` is returned.
|
106
106
|
|
107
107
|
Guides:
|
108
108
|
|
@@ -112,7 +112,7 @@ Base functionality every job is equipped with:
|
|
112
112
|
!!! example "Accessing a Google Chrome process"
|
113
113
|
|
114
114
|
```ruby
|
115
|
-
Wayfarer.config.
|
115
|
+
Wayfarer.config.network.agent = :ferrum
|
116
116
|
|
117
117
|
class DummyJob < Wayfarer::Base
|
118
118
|
route.to :index
|
@@ -126,7 +126,7 @@ Base functionality every job is equipped with:
|
|
126
126
|
!!! example "Accessing a Selenium WebDriver"
|
127
127
|
|
128
128
|
```ruby
|
129
|
-
Wayfarer.config.
|
129
|
+
Wayfarer.config.network.agent = :selenium
|
130
130
|
|
131
131
|
class DummyJob < Wayfarer::Base
|
132
132
|
route.to :index
|
@@ -144,7 +144,7 @@ Base functionality every job is equipped with:
|
|
144
144
|
processing URL.
|
145
145
|
|
146
146
|
With `page(live: true)` passed, the returned `Page` reflects the current
|
147
|
-
browser DOM. No-op when the `net/http`
|
147
|
+
browser DOM. No-op when the `net/http` agent is in use. Calls to
|
148
148
|
`page()` without the keyword return the most recent page.
|
149
149
|
|
150
150
|
---
|
@@ -0,0 +1,42 @@
|
|
1
|
+
---
|
2
|
+
hide:
|
3
|
+
- toc
|
4
|
+
---
|
5
|
+
|
6
|
+
# Configuration Keys
|
7
|
+
|
8
|
+
## `Wayfarer.config.network`
|
9
|
+
|
10
|
+
| Runtime config key | Environment variable | Description | Default | Supported values |
|
11
|
+
| ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
|
12
|
+
| `network.agent` | `WAYFARER_NETWORK_AGENT` | The user agent to use. | `:http` | `:http`, `:ferrum`, `:selenium` |
|
13
|
+
| `network.pool_size` | `WAYFARER_NETWORK_POOL_SIZE` | How many user agents to spawn. | 3 | Integers |
|
14
|
+
| `network.pool_timeout` | `WAYFARER_NETWORK_POOL_TIMEOUT` | How long jobs may use an agent in seconds. | 10 | Integers |
|
15
|
+
| `network.http_headers` | `WAYFARER_NETWORK_HTTP_HEADERS` | HTTP headers to append to requests. | `{}` | Hashes |
|
16
|
+
|
17
|
+
## `Wayfarer.config.ferrum`
|
18
|
+
|
19
|
+
| Runtime config key | Environment variable | Description | Default | Supported values |
|
20
|
+
| ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
|
21
|
+
| `ferrum.options` | `WAYFARER_FERRUM_OPTIONS` | Ferrum options. | `{}` | Hashes |
|
22
|
+
|
23
|
+
## `Wayfarer.config.selenium`
|
24
|
+
|
25
|
+
| Runtime config key | Environment variable | Description | Default | Supported values |
|
26
|
+
| ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
|
27
|
+
| `selenium.driver` | `WAYFARER_SELENIUM_DRIVER` | Selenium driver to use. | `:chrome` | Symbols |
|
28
|
+
| `selenium.options` | `WAYFARER_SELENIUM_OPTIONS` | Selenium options. | `{}` | Hashes |
|
29
|
+
| `selenium.client_timeout` | `WAYFARER_SELENIUM_CLIENT_TIMEOUT` | Selenium client timeout in seconds. | 60 | Integers |
|
30
|
+
|
31
|
+
## `Wayfarer.config.redis`
|
32
|
+
|
33
|
+
| Runtime config key | Environment variable | Description | Default | Supported values |
|
34
|
+
| ---------------------- | ------------------------------------ | ------------------------------------------- | ------------------------------------------ | ----------------------------------- |
|
35
|
+
| `redis.url` | `WAYFARER_REDIS_URL` | Redis URL to connect to. | http://localhost:6379 | Strings |
|
36
|
+
| `redis.factory` | n/a | Redis factory lambda. | ` ->(redis) { ::Redis.new(url: redis.url)` | Lambdas |
|
37
|
+
|
38
|
+
## `Wayfarer.config.capybara`
|
39
|
+
|
40
|
+
| Runtime config key | Environment variable | Description | Default | Supported values |
|
41
|
+
| ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
|
42
|
+
| `capybara.driver` | `WAYFARER_CAPYBARA_DRIVER` | The Capybara driver to use. | n/a | Symbols |
|
@@ -10,26 +10,26 @@ with the following syntaxes:
|
|
10
10
|
|
11
11
|
## Variables
|
12
12
|
|
13
|
-
### `
|
14
|
-
: Either `ferrum`, `selenium` or `
|
13
|
+
### `WAYFARER_AGENT`
|
14
|
+
: Either `ferrum`, `selenium` or `http`.
|
15
15
|
|
16
16
|
* Type: String
|
17
|
-
* Key: `config.
|
18
|
-
* Default value: `:
|
17
|
+
* Key: `config.agent`
|
18
|
+
* Default value: `:http`
|
19
19
|
|
20
20
|
### `WAYFARER_POOL_SIZE`
|
21
|
-
: Number of
|
21
|
+
: Number of user agents to maintain.
|
22
22
|
|
23
23
|
* Type: Integer
|
24
24
|
* Key: `config.pool_size`
|
25
25
|
* Default value: `1`
|
26
26
|
|
27
27
|
### `WAYFARER_POOL_TIMEOUT`
|
28
|
-
: How long a
|
28
|
+
: How long a user agent may remain checked out until the owning job
|
29
29
|
fails.
|
30
30
|
|
31
31
|
* Type: Integer
|
32
|
-
* Key: `config.
|
32
|
+
* Key: `config.agent_pool_timeout`
|
33
33
|
* Default value: `1`
|
34
34
|
|
35
35
|
---
|
@@ -43,32 +43,30 @@ with the following syntaxes:
|
|
43
43
|
|
44
44
|
---
|
45
45
|
|
46
|
-
### `
|
47
|
-
:
|
46
|
+
### `WAYFARER_SELENIUM_DRIVER`
|
47
|
+
: Driver passed to `Selenium::WebDriver.for`.
|
48
48
|
|
49
|
-
* Type:
|
50
|
-
* Key: `config.
|
51
|
-
* Default value: `
|
49
|
+
* Type: Symbol
|
50
|
+
* Key: `config.selenium_driver`
|
51
|
+
* Default value: `:chrome`
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
!!! example "Foobar"
|
56
|
-
|
57
|
-
For example, to run Google Chrome in foreground with Ferrum:
|
58
|
-
|
59
|
-
```
|
60
|
-
Wayfarer.config.adapter = :ferrum
|
61
|
-
Wayfarer.ferrum_options = { headless: false, url: "http://chrome:3000" }
|
62
|
-
```
|
53
|
+
---
|
63
54
|
|
55
|
+
### `WAYFARER_SELENIUM_OPTIONS`
|
56
|
+
: Options passed to `Selenium::WebDriver.for`.
|
64
57
|
|
65
|
-
|
58
|
+
* Type: Hash
|
59
|
+
* Key: `config.selenium_options`
|
60
|
+
* Default value: `{}`
|
61
|
+
|
62
|
+
---
|
66
63
|
|
67
|
-
|
64
|
+
### `WAYFARER_SELENIUM_CLIENT_TIMEOUT`
|
65
|
+
: Selenium HTTP client timeout (seconds).
|
68
66
|
|
69
|
-
|
70
|
-
|
71
|
-
|
67
|
+
* Type: Integer
|
68
|
+
* Key: `config.selenium_client_timeout`
|
69
|
+
* Default value: `60`
|
72
70
|
|
73
71
|
---
|
74
72
|
|
data/lib/wayfarer/base.rb
CHANGED
@@ -5,22 +5,7 @@ module Wayfarer
|
|
5
5
|
include Wayfarer::Middleware::Worker
|
6
6
|
extend Forwardable
|
7
7
|
|
8
|
-
|
9
|
-
def after_batch_callbacks
|
10
|
-
@after_batch_callbacks ||= []
|
11
|
-
end
|
12
|
-
|
13
|
-
def after_batch(&block)
|
14
|
-
after_batch_callbacks.push(block)
|
15
|
-
end
|
16
|
-
|
17
|
-
def run_after_batch_callbacks
|
18
|
-
after_batch_callbacks.each(&:call)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
after_enqueue do |job|
|
23
|
-
task = job.arguments.first
|
8
|
+
after_enqueue do |_job|
|
24
9
|
task.counter.increment
|
25
10
|
end
|
26
11
|
|
@@ -30,6 +15,7 @@ module Wayfarer
|
|
30
15
|
|
31
16
|
def self.retry_on(*argv)
|
32
17
|
super(*argv) do |job, error|
|
18
|
+
job.task.barrier.seen?(job.task.url)
|
33
19
|
GC.new(job).run
|
34
20
|
yield job, error if block_given?
|
35
21
|
end
|
@@ -37,6 +23,7 @@ module Wayfarer
|
|
37
23
|
|
38
24
|
def self.discard_on(*argv)
|
39
25
|
super(*argv) do |job, error|
|
26
|
+
job.task.barrier.seen?(job.task.url)
|
40
27
|
GC.new(job).run
|
41
28
|
yield job, error if block_given?
|
42
29
|
end
|
@@ -48,7 +35,6 @@ module Wayfarer
|
|
48
35
|
|
49
36
|
def retry_job(...)
|
50
37
|
super(...) # increments the counter by re-enqueuing the job
|
51
|
-
task = arguments.first
|
52
38
|
task.counter.decrement
|
53
39
|
end
|
54
40
|
|
@@ -56,5 +42,9 @@ module Wayfarer
|
|
56
42
|
task.job = self
|
57
43
|
chain.call(task)
|
58
44
|
end
|
45
|
+
|
46
|
+
def task
|
47
|
+
arguments.first
|
48
|
+
end
|
59
49
|
end
|
60
50
|
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wayfarer
|
4
|
+
module Callbacks
|
5
|
+
TERMINATOR = ->(_target, result) { result.call == false }
|
6
|
+
OPTIONS = { terminator: TERMINATOR, skip_after_callbacks_if_terminated: true }.freeze
|
7
|
+
|
8
|
+
ConditionalCallback = Struct.new(:job, :filters) do
|
9
|
+
def run(method, &block)
|
10
|
+
return if only && !applies?(only)
|
11
|
+
return if except && applies?(except)
|
12
|
+
|
13
|
+
return job.send(method) if method
|
14
|
+
|
15
|
+
job.instance_eval(&block)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def applies?(condition)
|
21
|
+
case condition
|
22
|
+
when Symbol then condition == action
|
23
|
+
when Enumerable then condition&.include?(action)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def only
|
28
|
+
filters[:only]
|
29
|
+
end
|
30
|
+
|
31
|
+
def except
|
32
|
+
filters[:except]
|
33
|
+
end
|
34
|
+
|
35
|
+
def action
|
36
|
+
task.metadata.action
|
37
|
+
end
|
38
|
+
|
39
|
+
def task
|
40
|
+
job.task
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.included(base)
|
45
|
+
base.include(ActiveSupport::Callbacks)
|
46
|
+
base.extend(ClassMethods)
|
47
|
+
|
48
|
+
base.class_eval do
|
49
|
+
define_callbacks(:fetch, OPTIONS)
|
50
|
+
define_callbacks(:action, OPTIONS)
|
51
|
+
define_callbacks(:batch, OPTIONS)
|
52
|
+
|
53
|
+
define(:fetch, :before)
|
54
|
+
define(:action, :before)
|
55
|
+
define(:batch, :after)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
module ClassMethods
|
60
|
+
private
|
61
|
+
|
62
|
+
def define(name, stage)
|
63
|
+
define_singleton_method([stage, name].join("_")) do |method = nil, **filters, &block|
|
64
|
+
set_callback(name, stage, **filters) do |job|
|
65
|
+
ConditionalCallback.new(job, filters).run(method, &block)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/lib/wayfarer/cli/base.rb
CHANGED
@@ -12,12 +12,16 @@ module Wayfarer
|
|
12
12
|
private
|
13
13
|
|
14
14
|
def mock_redis
|
15
|
-
Wayfarer.config.
|
15
|
+
Wayfarer.config.redis.factory = ->(_) { MockRedis.new }
|
16
16
|
end
|
17
17
|
|
18
18
|
def load_environment
|
19
19
|
Wayfarer::CLI::Runner.loader.setup
|
20
20
|
end
|
21
|
+
|
22
|
+
def free_agent_pool
|
23
|
+
Wayfarer::Networking::Pool.instance.free
|
24
|
+
end
|
21
25
|
end
|
22
26
|
end
|
23
27
|
end
|
data/lib/wayfarer/cli/job.rb
CHANGED
@@ -11,12 +11,14 @@ module Wayfarer
|
|
11
11
|
load_environment
|
12
12
|
mock_redis if options[:mock_redis]
|
13
13
|
|
14
|
-
url = URI(url)
|
14
|
+
url = Addressable::URI.parse(url)
|
15
15
|
job = job.classify.constantize.new
|
16
16
|
task = Wayfarer::Task.new(url, "tmp")
|
17
17
|
job.arguments.push(task)
|
18
18
|
job.perform(task)
|
19
19
|
GC.new(job).run
|
20
|
+
|
21
|
+
free_agent_pool
|
20
22
|
end
|
21
23
|
|
22
24
|
desc "enqueue JOB URL",
|
@@ -26,7 +28,7 @@ module Wayfarer
|
|
26
28
|
load_environment
|
27
29
|
mock_redis if options[:mock_redis]
|
28
30
|
|
29
|
-
url = URI(url)
|
31
|
+
url = Addressable::URI.parse(url)
|
30
32
|
job = job.classify.constantize
|
31
33
|
job.crawl_later(url, batch: options[:batch])
|
32
34
|
end
|
@@ -41,7 +43,7 @@ module Wayfarer
|
|
41
43
|
load_environment
|
42
44
|
mock_redis if options[:mock_redis]
|
43
45
|
|
44
|
-
url = URI(url)
|
46
|
+
url = Addressable::URI.parse(url)
|
45
47
|
job = job.classify.constantize
|
46
48
|
|
47
49
|
job.queue_adapter = ActiveJob::QueueAdapters::AsyncAdapter.new(min_threads: options[:min_threads],
|
@@ -52,6 +54,8 @@ module Wayfarer
|
|
52
54
|
job.crawl_later(url, batch: options[:batch])
|
53
55
|
|
54
56
|
sleep(1) while executor.scheduled_task_count > executor.completed_task_count
|
57
|
+
|
58
|
+
free_agent_pool
|
55
59
|
end
|
56
60
|
end
|
57
61
|
end
|
data/lib/wayfarer/cli/route.rb
CHANGED
@@ -9,7 +9,7 @@ module Wayfarer
|
|
9
9
|
"Invoke JOB's router with URL"
|
10
10
|
def result(job, url)
|
11
11
|
load_environment
|
12
|
-
url = URI(url)
|
12
|
+
url = Addressable::URI.parse(url)
|
13
13
|
job = job.classify.constantize
|
14
14
|
puts Wayfarer::Routing::PathFinder.result(job.route, url)
|
15
15
|
end
|
@@ -18,7 +18,7 @@ module Wayfarer
|
|
18
18
|
"Visualize JOB's routing tree for URL"
|
19
19
|
def tree(job, url)
|
20
20
|
load_environment
|
21
|
-
url = URI(url)
|
21
|
+
url = Addressable::URI.parse(url)
|
22
22
|
job = job.classify.constantize
|
23
23
|
Wayfarer::CLI::RoutePrinter.print(job.route, url)
|
24
24
|
end
|
@@ -77,19 +77,19 @@ module Wayfarer
|
|
77
77
|
def matcher_label(route)
|
78
78
|
return "Target" if route.is_a?(Wayfarer::Routing::TargetRoute)
|
79
79
|
|
80
|
-
route.matcher.class.name.demodulize
|
80
|
+
route.matcher.class.name.demodulize
|
81
81
|
end
|
82
82
|
|
83
83
|
def options(route)
|
84
84
|
return "" if route.is_a?(Wayfarer::Routing::RootRoute)
|
85
85
|
|
86
86
|
case (matcher = route.matcher)
|
87
|
-
when Wayfarer::Routing::
|
88
|
-
when Wayfarer::Routing::
|
89
|
-
when Wayfarer::Routing::
|
90
|
-
when Wayfarer::Routing::
|
91
|
-
when Wayfarer::Routing::
|
92
|
-
when Wayfarer::Routing::
|
87
|
+
when Wayfarer::Routing::Matchers::Host then matcher.host
|
88
|
+
when Wayfarer::Routing::Matchers::Path then matcher.path
|
89
|
+
when Wayfarer::Routing::Matchers::Query then matcher.fields
|
90
|
+
when Wayfarer::Routing::Matchers::Custom then "##{route.action}"
|
91
|
+
when Wayfarer::Routing::Matchers::Scheme then matcher.scheme
|
92
|
+
when Wayfarer::Routing::Matchers::Suffix then matcher.suffix
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wayfarer
|
4
|
+
module Config
|
5
|
+
Networking = Struct.new(agent: {
|
6
|
+
env_key: "WAYFARER_NETWORK_AGENT",
|
7
|
+
type: Symbol,
|
8
|
+
default: :http
|
9
|
+
},
|
10
|
+
pool_size: {
|
11
|
+
env_key: "WAYFARER_NETWORK_POOL_SIZE",
|
12
|
+
type: Integer,
|
13
|
+
default: 3
|
14
|
+
},
|
15
|
+
pool_timeout: {
|
16
|
+
env_key: "WAYFARER_NETWORK_POOL_TIMEOUT",
|
17
|
+
type: Integer,
|
18
|
+
default: 10
|
19
|
+
},
|
20
|
+
http_headers: {
|
21
|
+
env_key: "WAYFARER_NETWORK_HTTP_HEADERS",
|
22
|
+
type: Hash,
|
23
|
+
default: {}
|
24
|
+
})
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wayfarer
|
4
|
+
module Config
|
5
|
+
Redis = Struct.new(url: {
|
6
|
+
env_key: "WAYFARER_REDIS_URL",
|
7
|
+
type: String,
|
8
|
+
default: "redis://localhost:6379"
|
9
|
+
},
|
10
|
+
factory: {
|
11
|
+
default: ->(redis) { ::Redis.new(url: redis.url) }
|
12
|
+
})
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wayfarer
|
4
|
+
module Config
|
5
|
+
Root = Struct.new(ferrum: { default: Wayfarer::Config::Ferrum.new },
|
6
|
+
network: { default: Wayfarer::Config::Networking.new },
|
7
|
+
redis: { default: Wayfarer::Config::Redis.new },
|
8
|
+
selenium: { default: Wayfarer::Config::Selenium.new },
|
9
|
+
capybara: { default: Wayfarer::Config::Capybara.new })
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wayfarer
|
4
|
+
module Config
|
5
|
+
Selenium = Struct.new(driver: {
|
6
|
+
env_key: "WAYFARER_SELENIUM_DRIVER",
|
7
|
+
type: Symbol,
|
8
|
+
default: :chrome
|
9
|
+
},
|
10
|
+
options: {
|
11
|
+
env_key: "WAYFARER_SELENIUM_OPTIONS",
|
12
|
+
type: Hash,
|
13
|
+
default: {}
|
14
|
+
},
|
15
|
+
client_timeout: {
|
16
|
+
env_key: "WAYFARER_SELENIUM_CLIENT_TIMEOUT",
|
17
|
+
type: Integer,
|
18
|
+
default: 60 # seconds
|
19
|
+
})
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wayfarer
|
4
|
+
module Config
|
5
|
+
module Strconv
|
6
|
+
module_function
|
7
|
+
|
8
|
+
def parse(str, type = nil)
|
9
|
+
return primitive(str) unless type
|
10
|
+
|
11
|
+
case type.name
|
12
|
+
when "Hash" then hash(str)
|
13
|
+
when "Array" then array(str)
|
14
|
+
when "Symbol" then str.to_sym
|
15
|
+
when "Integer" then Integer(str)
|
16
|
+
else str
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def hash(str)
|
21
|
+
array(str).reduce({}) do |acc, pair|
|
22
|
+
k, v = pair.split(":", 2)
|
23
|
+
next acc unless k && v
|
24
|
+
|
25
|
+
acc.merge({ parse(k, Symbol) => primitive(v) })
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def array(str)
|
30
|
+
str.split(",").map(&:strip)
|
31
|
+
end
|
32
|
+
|
33
|
+
def primitive(str)
|
34
|
+
return true if str == "true"
|
35
|
+
return false if str == "false"
|
36
|
+
|
37
|
+
begin
|
38
|
+
parse(str, Integer)
|
39
|
+
rescue StandardError
|
40
|
+
str
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wayfarer
|
4
|
+
module Config
|
5
|
+
class Struct
|
6
|
+
module ClassMethods
|
7
|
+
attr_accessor :members
|
8
|
+
end
|
9
|
+
|
10
|
+
module InstanceMethods
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
delegate members: "self.class"
|
14
|
+
|
15
|
+
attr_reader :env
|
16
|
+
|
17
|
+
def initialize(env = ENV)
|
18
|
+
@env = env
|
19
|
+
|
20
|
+
define_writers
|
21
|
+
define_readers
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def define_writers
|
27
|
+
members.each { |key, _| define_writer(key) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def define_writer(key)
|
31
|
+
define_singleton_method(:"#{key}=") do |val|
|
32
|
+
set(key, val)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def define_readers
|
37
|
+
members.each { |key, options| define_reader(key, **options) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def define_reader(key, env_key: nil, type: nil, default: nil)
|
41
|
+
define_singleton_method(key.to_sym) do
|
42
|
+
get(key) || set(key, get(key) || env_val(env_key, type) || default)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def env_val(env_key, type)
|
47
|
+
return nil unless env_key
|
48
|
+
return nil unless env.key?(env_key)
|
49
|
+
|
50
|
+
Strconv.parse(env[env_key], type)
|
51
|
+
end
|
52
|
+
|
53
|
+
def get(key)
|
54
|
+
instance_variable_get(:"@#{key}")
|
55
|
+
end
|
56
|
+
|
57
|
+
def set(key, val)
|
58
|
+
instance_variable_set(:"@#{key}", val)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.new(members)
|
63
|
+
Class.new do
|
64
|
+
include InstanceMethods
|
65
|
+
extend ClassMethods
|
66
|
+
|
67
|
+
self.members = members
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/wayfarer/gc.rb
CHANGED
@@ -3,17 +3,13 @@
|
|
3
3
|
module Wayfarer
|
4
4
|
GC = Struct.new(:job) do
|
5
5
|
def run
|
6
|
+
task = job.task
|
7
|
+
|
6
8
|
return unless task.counter.decrement <= 0
|
7
9
|
|
8
10
|
task.barrier.reset!
|
9
11
|
task.counter.reset!
|
10
|
-
job.
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
|
15
|
-
def task
|
16
|
-
job.arguments.first
|
12
|
+
job.run_callbacks(:batch)
|
17
13
|
end
|
18
14
|
end
|
19
15
|
end
|
@@ -15,10 +15,14 @@ module Wayfarer
|
|
15
15
|
def call(task)
|
16
16
|
self.task = task
|
17
17
|
|
18
|
-
pool.with do |
|
19
|
-
task.metadata.
|
18
|
+
pool.with do |agent|
|
19
|
+
task.metadata.agent = agent
|
20
20
|
|
21
|
-
|
21
|
+
result = task.job.run_callbacks :fetch do
|
22
|
+
agent.fetch(task.url)
|
23
|
+
end
|
24
|
+
|
25
|
+
case result
|
22
26
|
when Networking::Result::Redirect
|
23
27
|
stage(result.redirect_url)
|
24
28
|
when Networking::Result::Success
|
@@ -6,12 +6,12 @@ module Wayfarer
|
|
6
6
|
def call(task)
|
7
7
|
route = task.job.class.route
|
8
8
|
|
9
|
-
case result = route.invoke(URI(task.url))
|
9
|
+
case result = route.invoke(Addressable::URI.parse(task.url))
|
10
10
|
when Routing::Result::Mismatch
|
11
11
|
return
|
12
12
|
when Routing::Result::Match
|
13
13
|
task.metadata.action = result.action
|
14
|
-
task.metadata.params = result.params
|
14
|
+
task.metadata.params = ActiveSupport::HashWithIndifferentAccess.new(result.params)
|
15
15
|
end
|
16
16
|
|
17
17
|
yield if block_given?
|