wayfarer 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/lint.yaml +25 -0
- data/.github/workflows/release.yaml +29 -0
- data/.github/workflows/tests.yaml +30 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +5 -0
- data/.vale.ini +5 -0
- data/.yardopts +1 -3
- data/Dockerfile +5 -4
- data/Gemfile +3 -0
- data/Gemfile.lock +107 -102
- data/Rakefile +5 -56
- data/bin/wayfarer +1 -1
- data/docker-compose.yml +20 -9
- data/docs/cookbook/consent_screen.md +2 -2
- data/docs/cookbook/executing_javascript.md +3 -3
- data/docs/cookbook/navigation.md +12 -12
- data/docs/cookbook/querying_html.md +3 -3
- data/docs/cookbook/screenshots.md +2 -2
- data/docs/cookbook/user_agent.md +1 -1
- data/docs/design.md +36 -0
- data/docs/guides/callbacks.md +24 -126
- data/docs/guides/configuration.md +8 -8
- data/docs/guides/handlers.md +60 -0
- data/docs/guides/index.md +1 -0
- data/docs/guides/jobs/error_handling.md +40 -0
- data/docs/guides/jobs.md +99 -31
- data/docs/guides/navigation.md +1 -1
- data/docs/guides/networking/capybara.md +13 -22
- data/docs/guides/networking/custom_adapters.md +82 -41
- data/docs/guides/networking/ferrum.md +4 -4
- data/docs/guides/networking/http.md +9 -13
- data/docs/guides/networking/selenium.md +10 -11
- data/docs/guides/pages.md +76 -10
- data/docs/guides/redis.md +10 -0
- data/docs/guides/routing.md +74 -0
- data/docs/guides/tasks.md +33 -9
- data/docs/guides/tutorial.md +60 -0
- data/docs/guides/user_agents.md +113 -0
- data/docs/index.md +17 -40
- data/docs/reference/cli.md +35 -25
- data/docs/reference/configuration.md +36 -0
- data/lib/wayfarer/base.rb +124 -46
- data/lib/wayfarer/batch_completion.rb +56 -0
- data/lib/wayfarer/callbacks.rb +22 -48
- data/lib/wayfarer/cli/route_printer.rb +71 -57
- data/lib/wayfarer/cli.rb +121 -0
- data/lib/wayfarer/gc.rb +13 -6
- data/lib/wayfarer/handler.rb +15 -7
- data/lib/wayfarer/logging.rb +38 -0
- data/lib/wayfarer/middleware/base.rb +2 -0
- data/lib/wayfarer/middleware/batch_completion.rb +19 -0
- data/lib/wayfarer/middleware/content_type.rb +54 -0
- data/lib/wayfarer/middleware/controller.rb +19 -15
- data/lib/wayfarer/middleware/dedup.rb +16 -13
- data/lib/wayfarer/middleware/dispatch.rb +12 -4
- data/lib/wayfarer/middleware/normalize.rb +12 -11
- data/lib/wayfarer/middleware/redis.rb +15 -0
- data/lib/wayfarer/middleware/router.rb +33 -35
- data/lib/wayfarer/middleware/stage.rb +5 -5
- data/lib/wayfarer/middleware/uri_parser.rb +30 -0
- data/lib/wayfarer/middleware/user_agent.rb +49 -0
- data/lib/wayfarer/networking/capybara.rb +1 -1
- data/lib/wayfarer/networking/context.rb +2 -2
- data/lib/wayfarer/networking/ferrum.rb +2 -2
- data/lib/wayfarer/networking/follow.rb +12 -6
- data/lib/wayfarer/networking/http.rb +1 -1
- data/lib/wayfarer/networking/pool.rb +17 -12
- data/lib/wayfarer/networking/selenium.rb +3 -3
- data/lib/wayfarer/networking/strategy.rb +2 -2
- data/lib/wayfarer/page.rb +36 -14
- data/lib/wayfarer/parsing/xml.rb +6 -6
- data/lib/wayfarer/parsing.rb +24 -0
- data/lib/wayfarer/redis/barrier.rb +13 -21
- data/lib/wayfarer/redis/counter.rb +19 -9
- data/lib/wayfarer/redis/pool.rb +1 -1
- data/lib/wayfarer/redis/resettable.rb +19 -0
- data/lib/wayfarer/routing/dsl.rb +1 -0
- data/lib/wayfarer/routing/matchers/path.rb +4 -2
- data/lib/wayfarer/routing/root_route.rb +5 -1
- data/lib/wayfarer/routing/route.rb +4 -14
- data/lib/wayfarer/stringify.rb +22 -30
- data/lib/wayfarer/task.rb +12 -18
- data/lib/wayfarer.rb +29 -2
- data/mkdocs.yml +52 -7
- data/rake/docs.rake +26 -0
- data/rake/lint.rake +105 -0
- data/rake/release.rake +29 -0
- data/rake/tests.rake +28 -0
- data/requirements.txt +1 -1
- data/spec/base_spec.rb +140 -160
- data/spec/batch_completion_spec.rb +104 -0
- data/spec/cli/job_spec.rb +19 -23
- data/spec/cli/routing_spec.rb +101 -0
- data/spec/cli/version_spec.rb +1 -1
- data/spec/factories/task.rb +7 -1
- data/spec/fixtures/dummy_job.rb +5 -3
- data/spec/gc_spec.rb +8 -50
- data/spec/handler_spec.rb +1 -1
- data/spec/integration/callbacks_spec.rb +157 -45
- data/spec/integration/content_type_spec.rb +145 -0
- data/spec/integration/gc_spec.rb +44 -0
- data/spec/integration/handler_spec.rb +66 -0
- data/spec/integration/page_spec.rb +44 -29
- data/spec/integration/params_spec.rb +33 -25
- data/spec/integration/parsing_spec.rb +125 -0
- data/spec/integration/routing_spec.rb +18 -0
- data/spec/integration/stage_spec.rb +27 -20
- data/spec/middleware/batch_completion_spec.rb +34 -0
- data/spec/middleware/chain_spec.rb +8 -8
- data/spec/middleware/content_type_spec.rb +86 -0
- data/spec/middleware/controller_spec.rb +5 -5
- data/spec/middleware/dedup_spec.rb +38 -55
- data/spec/middleware/dispatch_spec.rb +23 -7
- data/spec/middleware/normalize_spec.rb +44 -13
- data/spec/middleware/router_spec.rb +29 -30
- data/spec/middleware/stage_spec.rb +8 -8
- data/spec/middleware/uri_parser_spec.rb +53 -0
- data/spec/middleware/{fetch_spec.rb → user_agent_spec.rb} +28 -27
- data/spec/networking/context_spec.rb +17 -0
- data/spec/networking/follow_spec.rb +2 -2
- data/spec/networking/pool_spec.rb +5 -5
- data/spec/networking/strategy.rb +2 -2
- data/spec/page_spec.rb +42 -20
- data/spec/parsing/xml_spec.rb +11 -12
- data/spec/redis/barrier_spec.rb +8 -48
- data/spec/redis/counter_spec.rb +13 -1
- data/spec/redis/pool_spec.rb +1 -1
- data/spec/spec_helpers.rb +27 -16
- data/spec/support/test_app.rb +8 -0
- data/spec/task_spec.rb +3 -24
- data/spec/wayfarer_spec.rb +1 -1
- data/wayfarer.gemspec +4 -3
- metadata +61 -51
- data/.github/workflows/ci.yaml +0 -32
- data/docs/guides/error_handling.md +0 -31
- data/docs/guides/networking.md +0 -94
- data/docs/guides/performance.md +0 -130
- data/docs/guides/reliability.md +0 -41
- data/docs/guides/routing/steering.md +0 -30
- data/docs/reference/api/base.md +0 -48
- data/docs/reference/configuration_keys.md +0 -42
- data/docs/reference/environment_variables.md +0 -83
- data/lib/wayfarer/cli/base.rb +0 -45
- data/lib/wayfarer/cli/generate.rb +0 -17
- data/lib/wayfarer/cli/job.rb +0 -56
- data/lib/wayfarer/cli/route.rb +0 -29
- data/lib/wayfarer/cli/runner.rb +0 -34
- data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
- data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
- data/lib/wayfarer/config/capybara.rb +0 -10
- data/lib/wayfarer/config/ferrum.rb +0 -11
- data/lib/wayfarer/config/networking.rb +0 -26
- data/lib/wayfarer/config/redis.rb +0 -14
- data/lib/wayfarer/config/root.rb +0 -11
- data/lib/wayfarer/config/selenium.rb +0 -21
- data/lib/wayfarer/config/strconv.rb +0 -45
- data/lib/wayfarer/config/struct.rb +0 -72
- data/lib/wayfarer/middleware/fetch.rb +0 -56
- data/lib/wayfarer/redis/connection.rb +0 -13
- data/lib/wayfarer/redis/version.rb +0 -19
- data/lib/wayfarer/routing/router.rb +0 -28
- data/spec/callbacks_spec.rb +0 -102
- data/spec/cli/generate_spec.rb +0 -39
- data/spec/config/capybara_spec.rb +0 -18
- data/spec/config/ferrum_spec.rb +0 -24
- data/spec/config/networking_spec.rb +0 -73
- data/spec/config/redis_spec.rb +0 -32
- data/spec/config/root_spec.rb +0 -31
- data/spec/config/selenium_spec.rb +0 -56
- data/spec/config/strconv_spec.rb +0 -58
- data/spec/config/struct_spec.rb +0 -66
- data/spec/integration/steering_spec.rb +0 -57
- data/spec/redis/version_spec.rb +0 -13
- data/spec/routing/router_spec.rb +0 -24
@@ -1,42 +0,0 @@
|
|
1
|
-
---
|
2
|
-
hide:
|
3
|
-
- toc
|
4
|
-
---
|
5
|
-
|
6
|
-
# Configuration Keys
|
7
|
-
|
8
|
-
## `Wayfarer.config.network`
|
9
|
-
|
10
|
-
| Runtime config key | Environment variable | Description | Default | Supported values |
|
11
|
-
| ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
|
12
|
-
| `network.agent` | `WAYFARER_NETWORK_AGENT` | The user agent to use. | `:http` | `:http`, `:ferrum`, `:selenium` |
|
13
|
-
| `network.pool_size` | `WAYFARER_NETWORK_POOL_SIZE` | How many user agents to spawn. | 1 | Integers |
|
14
|
-
| `network.pool_timeout` | `WAYFARER_NETWORK_POOL_TIMEOUT` | How long jobs may use an agent in seconds. | 10 | Integers |
|
15
|
-
| `network.http_headers` | `WAYFARER_NETWORK_HTTP_HEADERS` | HTTP headers to append to requests. | `{}` | Hashes |
|
16
|
-
|
17
|
-
## `Wayfarer.config.ferrum`
|
18
|
-
|
19
|
-
| Runtime config key | Environment variable | Description | Default | Supported values |
|
20
|
-
| ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
|
21
|
-
| `ferrum.options` | `WAYFARER_FERRUM_OPTIONS` | Ferrum options. | `{}` | Hashes |
|
22
|
-
|
23
|
-
## `Wayfarer.config.selenium`
|
24
|
-
|
25
|
-
| Runtime config key | Environment variable | Description | Default | Supported values |
|
26
|
-
| ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
|
27
|
-
| `selenium.driver` | `WAYFARER_SELENIUM_DRIVER` | Selenium driver to use. | `:chrome` | Symbols |
|
28
|
-
| `selenium.options` | `WAYFARER_SELENIUM_OPTIONS` | Selenium options. | `{}` | Hashes |
|
29
|
-
| `selenium.client_timeout` | `WAYFARER_SELENIUM_CLIENT_TIMEOUT` | Selenium client timeout in seconds. | 60 | Integers |
|
30
|
-
|
31
|
-
## `Wayfarer.config.redis`
|
32
|
-
|
33
|
-
| Runtime config key | Environment variable | Description | Default | Supported values |
|
34
|
-
| ---------------------- | ------------------------------------ | ------------------------------------------- | ------------------------------------------ | ----------------------------------- |
|
35
|
-
| `redis.url` | `WAYFARER_REDIS_URL` | Redis URL to connect to. | http://localhost:6379 | Strings |
|
36
|
-
| `redis.factory` | n/a | Redis factory lambda. | ` ->(redis) { ::Redis.new(url: redis.url)` | Lambdas |
|
37
|
-
|
38
|
-
## `Wayfarer.config.capybara`
|
39
|
-
|
40
|
-
| Runtime config key | Environment variable | Description | Default | Supported values |
|
41
|
-
| ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
|
42
|
-
| `capybara.driver` | `WAYFARER_CAPYBARA_DRIVER` | The Capybara driver to use. | n/a | Symbols |
|
@@ -1,83 +0,0 @@
|
|
1
|
-
# Environment Variables
|
2
|
-
|
3
|
-
## String formats
|
4
|
-
|
5
|
-
Environment variable values can be parsed to Hash or Array at runtime
|
6
|
-
with the following syntaxes:
|
7
|
-
|
8
|
-
* Hash: Variable string `a:1,b:2,c:3` parses to `{a:1, b:2, c:3}` at runtime
|
9
|
-
* Array: Variable string `a,b,c` parses to `[:a, :b, :c]` at runtime
|
10
|
-
|
11
|
-
## Variables
|
12
|
-
|
13
|
-
### `WAYFARER_AGENT`
|
14
|
-
: Either `ferrum`, `selenium` or `http`.
|
15
|
-
|
16
|
-
* Type: String
|
17
|
-
* Key: `config.agent`
|
18
|
-
* Default value: `:http`
|
19
|
-
|
20
|
-
### `WAYFARER_POOL_SIZE`
|
21
|
-
: Number of user agents to maintain.
|
22
|
-
|
23
|
-
* Type: Integer
|
24
|
-
* Key: `config.pool_size`
|
25
|
-
* Default value: `1`
|
26
|
-
|
27
|
-
### `WAYFARER_POOL_TIMEOUT`
|
28
|
-
: How long a user agent may remain checked out until the owning job
|
29
|
-
fails.
|
30
|
-
|
31
|
-
* Type: Integer
|
32
|
-
* Key: `config.agent_pool_timeout`
|
33
|
-
* Default value: `1`
|
34
|
-
|
35
|
-
---
|
36
|
-
|
37
|
-
### `WAYFARER_FERRUM_OPTIONS`
|
38
|
-
: Key/value options passed to `Ferrum::Browser.new`.
|
39
|
-
|
40
|
-
* Type: Hash
|
41
|
-
* Key: `config.ferrum_options`
|
42
|
-
* Default value: `{}`
|
43
|
-
|
44
|
-
---
|
45
|
-
|
46
|
-
### `WAYFARER_SELENIUM_DRIVER`
|
47
|
-
: Driver passed to `Selenium::WebDriver.for`.
|
48
|
-
|
49
|
-
* Type: Symbol
|
50
|
-
* Key: `config.selenium_driver`
|
51
|
-
* Default value: `:chrome`
|
52
|
-
|
53
|
-
---
|
54
|
-
|
55
|
-
### `WAYFARER_SELENIUM_OPTIONS`
|
56
|
-
: Options passed to `Selenium::WebDriver.for`.
|
57
|
-
|
58
|
-
* Type: Hash
|
59
|
-
* Key: `config.selenium_options`
|
60
|
-
* Default value: `{}`
|
61
|
-
|
62
|
-
---
|
63
|
-
|
64
|
-
### `WAYFARER_SELENIUM_CLIENT_TIMEOUT`
|
65
|
-
: Selenium HTTP client timeout (seconds).
|
66
|
-
|
67
|
-
* Type: Integer
|
68
|
-
* Key: `config.selenium_client_timeout`
|
69
|
-
* Default value: `60`
|
70
|
-
|
71
|
-
---
|
72
|
-
|
73
|
-
### `WAYFARER_HTTP_HEADERS`
|
74
|
-
: HTTP request headers used when retrieving pages.
|
75
|
-
|
76
|
-
* Type: Hash
|
77
|
-
* Key: `config.http_headers`
|
78
|
-
* Default value: `{}`
|
79
|
-
|
80
|
-
!!! attention "Partial support"
|
81
|
-
|
82
|
-
Selenium does not support configuring HTTP request headers.
|
83
|
-
|
data/lib/wayfarer/cli/base.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module CLI
|
5
|
-
class Base < Thor
|
6
|
-
include Thor::Actions
|
7
|
-
|
8
|
-
def self.source_root
|
9
|
-
File.expand_path("templates", __dir__)
|
10
|
-
end
|
11
|
-
|
12
|
-
private
|
13
|
-
|
14
|
-
def mock_redis
|
15
|
-
Wayfarer.config.redis.factory = ->(_) { MockRedis.new }
|
16
|
-
end
|
17
|
-
|
18
|
-
def load_environment
|
19
|
-
load_rails || load_plain
|
20
|
-
end
|
21
|
-
|
22
|
-
def free_agent_pool
|
23
|
-
Wayfarer::Networking::Pool.instance.free
|
24
|
-
end
|
25
|
-
|
26
|
-
def load_rails
|
27
|
-
begin
|
28
|
-
require "rails/app_loader"
|
29
|
-
rescue LoadError
|
30
|
-
return
|
31
|
-
end
|
32
|
-
|
33
|
-
return unless Rails::AppLoader.find_executable
|
34
|
-
|
35
|
-
require File.expand_path("config/application", Dir.pwd)
|
36
|
-
require File.expand_path("config/boot", Dir.pwd)
|
37
|
-
require File.expand_path("config/environment", Dir.pwd)
|
38
|
-
end
|
39
|
-
|
40
|
-
def load_plain
|
41
|
-
Wayfarer::CLI::Runner.loader.setup
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module CLI
|
5
|
-
class Generate < Base
|
6
|
-
desc "project NAME",
|
7
|
-
"Generate new project directory NAME"
|
8
|
-
def project(name)
|
9
|
-
@name = name
|
10
|
-
|
11
|
-
empty_directory name
|
12
|
-
template "Gemfile.tt", "#{name}/Gemfile"
|
13
|
-
template "job.rb.tt", "#{name}/app/jobs/#{name}.rb"
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
data/lib/wayfarer/cli/job.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module CLI
|
5
|
-
class Job < Base
|
6
|
-
desc "perform JOB URL",
|
7
|
-
"Perform JOB with URL"
|
8
|
-
option :mock_redis, type: :boolean
|
9
|
-
option :batch, type: :string, default: SecureRandom.uuid
|
10
|
-
def perform(job, url)
|
11
|
-
load_environment
|
12
|
-
mock_redis if options[:mock_redis]
|
13
|
-
|
14
|
-
job = job.classify.constantize.new
|
15
|
-
task = Wayfarer::Task.new(url, options[:batch])
|
16
|
-
job.arguments.push(task)
|
17
|
-
job.perform(task)
|
18
|
-
task.gc.run
|
19
|
-
end
|
20
|
-
|
21
|
-
desc "enqueue JOB URL",
|
22
|
-
"Enqueue JOB with URL"
|
23
|
-
option :batch, type: :string, default: SecureRandom.uuid
|
24
|
-
def enqueue(job, url)
|
25
|
-
load_environment
|
26
|
-
mock_redis if options[:mock_redis] # TODO: Remove, does not belong here
|
27
|
-
|
28
|
-
job = job.classify.constantize
|
29
|
-
job.crawl(url, batch: options[:batch])
|
30
|
-
end
|
31
|
-
|
32
|
-
desc "execute JOB URL",
|
33
|
-
"Execute JOB with async adapter"
|
34
|
-
option :mock_redis, type: :boolean
|
35
|
-
option :batch, type: :string, default: SecureRandom.uuid
|
36
|
-
option :min_threads, type: :numeric, default: 1
|
37
|
-
option :max_threads, type: :numeric, default: 1
|
38
|
-
def execute(job, url)
|
39
|
-
load_environment
|
40
|
-
mock_redis if options[:mock_redis]
|
41
|
-
|
42
|
-
job = job.classify.constantize
|
43
|
-
job.queue_adapter = ActiveJob::QueueAdapters::AsyncAdapter.new(min_threads: options[:min_threads],
|
44
|
-
max_threads: options[:max_threads])
|
45
|
-
scheduler = job.queue_adapter.instance_variable_get(:@scheduler)
|
46
|
-
executor = scheduler.instance_variable_get(:@async_executor)
|
47
|
-
|
48
|
-
job.crawl(url, batch: options[:batch])
|
49
|
-
|
50
|
-
sleep(1) while executor.scheduled_task_count > executor.completed_task_count
|
51
|
-
|
52
|
-
free_agent_pool
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
data/lib/wayfarer/cli/route.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# # frozen_string_literal: true
|
4
|
-
|
5
|
-
module Wayfarer
|
6
|
-
module CLI
|
7
|
-
class Route < Base
|
8
|
-
desc "result JOB URL",
|
9
|
-
"Invoke JOB's router with URL"
|
10
|
-
def result(job, url)
|
11
|
-
load_environment
|
12
|
-
url = Addressable::URI.parse(url)
|
13
|
-
job = job.classify.constantize
|
14
|
-
job.router.invoke(url, job.new.steer)
|
15
|
-
say Wayfarer::Routing::PathFinder.result(job.router.root, url)
|
16
|
-
end
|
17
|
-
|
18
|
-
desc "tree JOB URL",
|
19
|
-
"Visualize JOB's routing tree for URL"
|
20
|
-
def tree(job, url)
|
21
|
-
load_environment
|
22
|
-
url = Addressable::URI.parse(url)
|
23
|
-
job = job.classify.constantize
|
24
|
-
job.router.invoke(url, job.new.steer)
|
25
|
-
Wayfarer::CLI::RoutePrinter.print(job.router.root, url)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
data/lib/wayfarer/cli/runner.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module CLI
|
5
|
-
class Runner < Thor
|
6
|
-
def self.exit_on_failure?
|
7
|
-
true
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.loader
|
11
|
-
# TODO: Raises if $PWD/app/jobs does not exist
|
12
|
-
|
13
|
-
@loader ||= Zeitwerk::Loader.new.tap do |load|
|
14
|
-
load.inflector = Zeitwerk::GemInflector.new(__FILE__)
|
15
|
-
load.push_dir("#{Dir.pwd}/app/jobs")
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
desc "version", "Print version"
|
20
|
-
def version
|
21
|
-
say Wayfarer::VERSION::STRING
|
22
|
-
end
|
23
|
-
|
24
|
-
desc "route", "Routing"
|
25
|
-
subcommand "route", Wayfarer::CLI::Route
|
26
|
-
|
27
|
-
desc "generate", "Generators"
|
28
|
-
subcommand "generate", Wayfarer::CLI::Generate
|
29
|
-
|
30
|
-
desc "job", "Jobs"
|
31
|
-
subcommand "job", Wayfarer::CLI::Job
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Config
|
5
|
-
Networking = Struct.new(agent: {
|
6
|
-
env_key: "WAYFARER_NETWORK_AGENT",
|
7
|
-
type: Symbol,
|
8
|
-
default: :http
|
9
|
-
},
|
10
|
-
pool_size: {
|
11
|
-
env_key: "WAYFARER_NETWORK_POOL_SIZE",
|
12
|
-
type: Integer,
|
13
|
-
default: 1
|
14
|
-
},
|
15
|
-
pool_timeout: {
|
16
|
-
env_key: "WAYFARER_NETWORK_POOL_TIMEOUT",
|
17
|
-
type: Integer,
|
18
|
-
default: 10
|
19
|
-
},
|
20
|
-
http_headers: {
|
21
|
-
env_key: "WAYFARER_NETWORK_HTTP_HEADERS",
|
22
|
-
type: Hash,
|
23
|
-
default: {}
|
24
|
-
})
|
25
|
-
end
|
26
|
-
end
|
@@ -1,14 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Config
|
5
|
-
Redis = Struct.new(url: {
|
6
|
-
env_key: "WAYFARER_REDIS_URL",
|
7
|
-
type: String,
|
8
|
-
default: "redis://localhost:6379"
|
9
|
-
},
|
10
|
-
factory: {
|
11
|
-
default: ->(redis) { ::Redis.new(url: redis.url) }
|
12
|
-
})
|
13
|
-
end
|
14
|
-
end
|
data/lib/wayfarer/config/root.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Config
|
5
|
-
Root = Struct.new(ferrum: { default: Wayfarer::Config::Ferrum.new },
|
6
|
-
network: { default: Wayfarer::Config::Networking.new },
|
7
|
-
redis: { default: Wayfarer::Config::Redis.new },
|
8
|
-
selenium: { default: Wayfarer::Config::Selenium.new },
|
9
|
-
capybara: { default: Wayfarer::Config::Capybara.new })
|
10
|
-
end
|
11
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Config
|
5
|
-
Selenium = Struct.new(driver: {
|
6
|
-
env_key: "WAYFARER_SELENIUM_DRIVER",
|
7
|
-
type: Symbol,
|
8
|
-
default: :chrome
|
9
|
-
},
|
10
|
-
options: {
|
11
|
-
env_key: "WAYFARER_SELENIUM_OPTIONS",
|
12
|
-
type: Hash,
|
13
|
-
default: {}
|
14
|
-
},
|
15
|
-
client_timeout: {
|
16
|
-
env_key: "WAYFARER_SELENIUM_CLIENT_TIMEOUT",
|
17
|
-
type: Integer,
|
18
|
-
default: 60 # seconds
|
19
|
-
})
|
20
|
-
end
|
21
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Config
|
5
|
-
module Strconv
|
6
|
-
module_function
|
7
|
-
|
8
|
-
def parse(str, type = nil)
|
9
|
-
return primitive(str) unless type
|
10
|
-
|
11
|
-
case type.name
|
12
|
-
when "Hash" then hash(str)
|
13
|
-
when "Array" then array(str)
|
14
|
-
when "Symbol" then str.to_sym
|
15
|
-
when "Integer" then Integer(str)
|
16
|
-
else str
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def hash(str)
|
21
|
-
array(str).reduce({}) do |acc, pair|
|
22
|
-
k, v = pair.split(":", 2)
|
23
|
-
next acc unless k && v
|
24
|
-
|
25
|
-
acc.merge({ parse(k, Symbol) => primitive(v) })
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def array(str)
|
30
|
-
str.split(",").map(&:strip)
|
31
|
-
end
|
32
|
-
|
33
|
-
def primitive(str)
|
34
|
-
return true if str == "true"
|
35
|
-
return false if str == "false"
|
36
|
-
|
37
|
-
begin
|
38
|
-
parse(str, Integer)
|
39
|
-
rescue StandardError
|
40
|
-
str
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
@@ -1,72 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Config
|
5
|
-
class Struct
|
6
|
-
module ClassMethods
|
7
|
-
attr_accessor :members
|
8
|
-
end
|
9
|
-
|
10
|
-
module InstanceMethods
|
11
|
-
extend Forwardable
|
12
|
-
|
13
|
-
delegate members: "self.class"
|
14
|
-
|
15
|
-
attr_reader :env
|
16
|
-
|
17
|
-
def initialize(env = ENV)
|
18
|
-
@env = env
|
19
|
-
|
20
|
-
define_writers
|
21
|
-
define_readers
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def define_writers
|
27
|
-
members.each { |key, _| define_writer(key) }
|
28
|
-
end
|
29
|
-
|
30
|
-
def define_writer(key)
|
31
|
-
define_singleton_method(:"#{key}=") do |val|
|
32
|
-
set(key, val)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def define_readers
|
37
|
-
members.each { |key, options| define_reader(key, **options) }
|
38
|
-
end
|
39
|
-
|
40
|
-
def define_reader(key, env_key: nil, type: nil, default: nil)
|
41
|
-
define_singleton_method(key.to_sym) do
|
42
|
-
get(key) || set(key, env_val(env_key, type) || default)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def env_val(env_key, type)
|
47
|
-
return nil unless env_key
|
48
|
-
return nil unless env.key?(env_key)
|
49
|
-
|
50
|
-
Strconv.parse(env[env_key], type)
|
51
|
-
end
|
52
|
-
|
53
|
-
def get(key)
|
54
|
-
instance_variable_get(:"@#{key}")
|
55
|
-
end
|
56
|
-
|
57
|
-
def set(key, val)
|
58
|
-
instance_variable_set(:"@#{key}", val)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def self.new(members)
|
63
|
-
Class.new do
|
64
|
-
include InstanceMethods
|
65
|
-
extend ClassMethods
|
66
|
-
|
67
|
-
self.members = members
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Middleware
|
5
|
-
class Fetch
|
6
|
-
extend Base
|
7
|
-
|
8
|
-
module API
|
9
|
-
def agent
|
10
|
-
context.instance
|
11
|
-
end
|
12
|
-
|
13
|
-
def context
|
14
|
-
task.metadata.context
|
15
|
-
end
|
16
|
-
|
17
|
-
def page(live: false)
|
18
|
-
return task.metadata.page unless live
|
19
|
-
|
20
|
-
task.metadata.page = context.live&.page || task.metadata.page
|
21
|
-
end
|
22
|
-
|
23
|
-
def http
|
24
|
-
@http ||= Wayfarer::Networking::Follow.new(
|
25
|
-
Wayfarer::Networking::Context.new(
|
26
|
-
Wayfarer::Networking::HTTP.new
|
27
|
-
)
|
28
|
-
)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def call(task)
|
33
|
-
pool.with do |context|
|
34
|
-
result = task.metadata.controller.run_callbacks(:fetch) do
|
35
|
-
context.fetch(task.url)
|
36
|
-
end
|
37
|
-
|
38
|
-
case result
|
39
|
-
when Networking::Result::Redirect
|
40
|
-
task.metadata.controller.stage(result.redirect_url)
|
41
|
-
when Networking::Result::Success
|
42
|
-
task.metadata.context = context
|
43
|
-
task.metadata.page = result.page
|
44
|
-
yield if block_given?
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
private
|
50
|
-
|
51
|
-
def pool
|
52
|
-
Wayfarer::Networking::Pool.instance
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Redis
|
5
|
-
module Version
|
6
|
-
extend Connection
|
7
|
-
|
8
|
-
module_function
|
9
|
-
|
10
|
-
def determine
|
11
|
-
@determine ||= server_version.split(".").first(3).map(&:to_i)
|
12
|
-
end
|
13
|
-
|
14
|
-
def server_version
|
15
|
-
redis { |conn| conn.info["redis_version"] }
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Routing
|
5
|
-
class Router
|
6
|
-
ArgumentCountError = Class.new(StandardError) # TODO: Unused, remove
|
7
|
-
|
8
|
-
extend Forwardable
|
9
|
-
|
10
|
-
attr_reader :root,
|
11
|
-
:blocks
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@blocks = []
|
15
|
-
end
|
16
|
-
|
17
|
-
def draw(&block)
|
18
|
-
@blocks.push(block)
|
19
|
-
end
|
20
|
-
|
21
|
-
def invoke(url, arguments)
|
22
|
-
@root = Wayfarer::Routing::RootRoute.new
|
23
|
-
@blocks.each { |block| Docile.dsl_eval(@root, *arguments, &block) }
|
24
|
-
root.invoke(url)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|