wayfarer 0.4.6 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/lint.yaml +25 -0
  3. data/.github/workflows/release.yaml +29 -0
  4. data/.github/workflows/tests.yaml +30 -0
  5. data/.gitignore +4 -0
  6. data/.rubocop.yml +5 -0
  7. data/.vale.ini +5 -0
  8. data/.yardopts +1 -3
  9. data/Dockerfile +5 -4
  10. data/Gemfile +3 -0
  11. data/Gemfile.lock +107 -102
  12. data/Rakefile +5 -56
  13. data/bin/wayfarer +1 -1
  14. data/docker-compose.yml +20 -9
  15. data/docs/cookbook/consent_screen.md +2 -2
  16. data/docs/cookbook/executing_javascript.md +3 -3
  17. data/docs/cookbook/navigation.md +12 -12
  18. data/docs/cookbook/querying_html.md +3 -3
  19. data/docs/cookbook/screenshots.md +2 -2
  20. data/docs/cookbook/user_agent.md +1 -1
  21. data/docs/design.md +36 -0
  22. data/docs/guides/callbacks.md +24 -126
  23. data/docs/guides/configuration.md +8 -8
  24. data/docs/guides/handlers.md +60 -0
  25. data/docs/guides/index.md +1 -0
  26. data/docs/guides/jobs/error_handling.md +40 -0
  27. data/docs/guides/jobs.md +99 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +82 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +76 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +74 -0
  37. data/docs/guides/tasks.md +33 -9
  38. data/docs/guides/tutorial.md +60 -0
  39. data/docs/guides/user_agents.md +113 -0
  40. data/docs/index.md +17 -40
  41. data/docs/reference/cli.md +35 -25
  42. data/docs/reference/configuration.md +36 -0
  43. data/lib/wayfarer/base.rb +124 -46
  44. data/lib/wayfarer/batch_completion.rb +56 -0
  45. data/lib/wayfarer/callbacks.rb +22 -48
  46. data/lib/wayfarer/cli/route_printer.rb +71 -57
  47. data/lib/wayfarer/cli.rb +121 -0
  48. data/lib/wayfarer/gc.rb +13 -6
  49. data/lib/wayfarer/handler.rb +15 -7
  50. data/lib/wayfarer/logging.rb +38 -0
  51. data/lib/wayfarer/middleware/base.rb +2 -0
  52. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  53. data/lib/wayfarer/middleware/content_type.rb +54 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +16 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +12 -4
  57. data/lib/wayfarer/middleware/normalize.rb +12 -11
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +30 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +2 -2
  65. data/lib/wayfarer/networking/ferrum.rb +2 -2
  66. data/lib/wayfarer/networking/follow.rb +12 -6
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +17 -12
  69. data/lib/wayfarer/networking/selenium.rb +3 -3
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +36 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +24 -0
  74. data/lib/wayfarer/redis/barrier.rb +13 -21
  75. data/lib/wayfarer/redis/counter.rb +19 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +1 -0
  79. data/lib/wayfarer/routing/matchers/path.rb +4 -2
  80. data/lib/wayfarer/routing/root_route.rb +5 -1
  81. data/lib/wayfarer/routing/route.rb +4 -14
  82. data/lib/wayfarer/stringify.rb +22 -30
  83. data/lib/wayfarer/task.rb +12 -18
  84. data/lib/wayfarer.rb +28 -1
  85. data/mkdocs.yml +52 -7
  86. data/rake/docs.rake +26 -0
  87. data/rake/lint.rake +105 -0
  88. data/rake/release.rake +29 -0
  89. data/rake/tests.rake +28 -0
  90. data/requirements.txt +1 -1
  91. data/spec/base_spec.rb +140 -160
  92. data/spec/batch_completion_spec.rb +104 -0
  93. data/spec/cli/job_spec.rb +19 -23
  94. data/spec/cli/routing_spec.rb +101 -0
  95. data/spec/cli/version_spec.rb +1 -1
  96. data/spec/factories/task.rb +7 -1
  97. data/spec/fixtures/dummy_job.rb +5 -3
  98. data/spec/gc_spec.rb +8 -50
  99. data/spec/handler_spec.rb +1 -1
  100. data/spec/integration/callbacks_spec.rb +157 -45
  101. data/spec/integration/content_type_spec.rb +145 -0
  102. data/spec/integration/gc_spec.rb +44 -0
  103. data/spec/integration/handler_spec.rb +66 -0
  104. data/spec/integration/page_spec.rb +44 -29
  105. data/spec/integration/params_spec.rb +33 -25
  106. data/spec/integration/parsing_spec.rb +125 -0
  107. data/spec/integration/routing_spec.rb +18 -0
  108. data/spec/integration/stage_spec.rb +27 -20
  109. data/spec/middleware/batch_completion_spec.rb +34 -0
  110. data/spec/middleware/chain_spec.rb +8 -8
  111. data/spec/middleware/content_type_spec.rb +86 -0
  112. data/spec/middleware/controller_spec.rb +5 -5
  113. data/spec/middleware/dedup_spec.rb +38 -55
  114. data/spec/middleware/dispatch_spec.rb +23 -7
  115. data/spec/middleware/normalize_spec.rb +44 -13
  116. data/spec/middleware/router_spec.rb +29 -30
  117. data/spec/middleware/stage_spec.rb +8 -8
  118. data/spec/middleware/uri_parser_spec.rb +53 -0
  119. data/spec/middleware/{fetch_spec.rb → user_agent_spec.rb} +28 -27
  120. data/spec/networking/context_spec.rb +1 -1
  121. data/spec/networking/follow_spec.rb +2 -2
  122. data/spec/networking/pool_spec.rb +5 -5
  123. data/spec/networking/strategy.rb +2 -2
  124. data/spec/page_spec.rb +42 -20
  125. data/spec/parsing/xml_spec.rb +11 -12
  126. data/spec/redis/barrier_spec.rb +8 -48
  127. data/spec/redis/counter_spec.rb +13 -1
  128. data/spec/redis/pool_spec.rb +1 -1
  129. data/spec/spec_helpers.rb +27 -16
  130. data/spec/support/test_app.rb +8 -0
  131. data/spec/task_spec.rb +3 -24
  132. data/spec/wayfarer_spec.rb +1 -1
  133. data/wayfarer.gemspec +4 -3
  134. metadata +61 -51
  135. data/.github/workflows/ci.yaml +0 -32
  136. data/docs/guides/error_handling.md +0 -53
  137. data/docs/guides/networking.md +0 -94
  138. data/docs/guides/performance.md +0 -130
  139. data/docs/guides/reliability.md +0 -41
  140. data/docs/guides/routing/steering.md +0 -30
  141. data/docs/reference/api/base.md +0 -48
  142. data/docs/reference/configuration_keys.md +0 -43
  143. data/docs/reference/environment_variables.md +0 -83
  144. data/lib/wayfarer/cli/base.rb +0 -45
  145. data/lib/wayfarer/cli/generate.rb +0 -17
  146. data/lib/wayfarer/cli/job.rb +0 -56
  147. data/lib/wayfarer/cli/route.rb +0 -29
  148. data/lib/wayfarer/cli/runner.rb +0 -34
  149. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  150. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  151. data/lib/wayfarer/config/capybara.rb +0 -10
  152. data/lib/wayfarer/config/ferrum.rb +0 -11
  153. data/lib/wayfarer/config/networking.rb +0 -29
  154. data/lib/wayfarer/config/redis.rb +0 -14
  155. data/lib/wayfarer/config/root.rb +0 -11
  156. data/lib/wayfarer/config/selenium.rb +0 -21
  157. data/lib/wayfarer/config/strconv.rb +0 -45
  158. data/lib/wayfarer/config/struct.rb +0 -72
  159. data/lib/wayfarer/middleware/fetch.rb +0 -56
  160. data/lib/wayfarer/redis/connection.rb +0 -13
  161. data/lib/wayfarer/redis/version.rb +0 -19
  162. data/lib/wayfarer/routing/router.rb +0 -28
  163. data/spec/callbacks_spec.rb +0 -102
  164. data/spec/cli/generate_spec.rb +0 -39
  165. data/spec/config/capybara_spec.rb +0 -18
  166. data/spec/config/ferrum_spec.rb +0 -24
  167. data/spec/config/networking_spec.rb +0 -73
  168. data/spec/config/redis_spec.rb +0 -32
  169. data/spec/config/root_spec.rb +0 -31
  170. data/spec/config/selenium_spec.rb +0 -56
  171. data/spec/config/strconv_spec.rb +0 -58
  172. data/spec/config/struct_spec.rb +0 -66
  173. data/spec/integration/steering_spec.rb +0 -57
  174. data/spec/redis/version_spec.rb +0 -13
  175. data/spec/routing/router_spec.rb +0 -24
@@ -1,43 +0,0 @@
1
- ---
2
- hide:
3
- - toc
4
- ---
5
-
6
- # Configuration Keys
7
-
8
- ## `Wayfarer.config.network`
9
-
10
- | Runtime config key | Environment variable | Description | Default | Supported values |
11
- | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
12
- | `network.agent` | `WAYFARER_NETWORK_AGENT` | The user agent to use. | `:http` | `:http`, `:ferrum`, `:selenium` |
13
- | `network.pool_size` | `WAYFARER_NETWORK_POOL_SIZE` | How many user agents to spawn. | 1 | Integers |
14
- | `network.pool_timeout` | `WAYFARER_NETWORK_POOL_TIMEOUT` | How long jobs may use an agent in seconds. | 10 | Integers |
15
- | `network.http_headers` | `WAYFARER_NETWORK_HTTP_HEADERS` | HTTP headers to append to requests. | `{}` | Hashes |
16
- | `network.renew_on` | | Exception classes to renew agents on. | `[]` | Classes |
17
-
18
- ## `Wayfarer.config.ferrum`
19
-
20
- | Runtime config key | Environment variable | Description | Default | Supported values |
21
- | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
22
- | `ferrum.options` | `WAYFARER_FERRUM_OPTIONS` | Ferrum options. | `{}` | Hashes |
23
-
24
- ## `Wayfarer.config.selenium`
25
-
26
- | Runtime config key | Environment variable | Description | Default | Supported values |
27
- | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
28
- | `selenium.driver` | `WAYFARER_SELENIUM_DRIVER` | Selenium driver to use. | `:chrome` | Symbols |
29
- | `selenium.options` | `WAYFARER_SELENIUM_OPTIONS` | Selenium options. | `{}` | Hashes |
30
- | `selenium.client_timeout` | `WAYFARER_SELENIUM_CLIENT_TIMEOUT` | Selenium client timeout in seconds. | 60 | Integers |
31
-
32
- ## `Wayfarer.config.redis`
33
-
34
- | Runtime config key | Environment variable | Description | Default | Supported values |
35
- | ---------------------- | ------------------------------------ | ------------------------------------------- | ------------------------------------------ | ----------------------------------- |
36
- | `redis.url` | `WAYFARER_REDIS_URL` | Redis URL to connect to. | http://localhost:6379 | Strings |
37
- | `redis.factory` | n/a | Redis factory lambda. | ` ->(redis) { ::Redis.new(url: redis.url)` | Lambdas |
38
-
39
- ## `Wayfarer.config.capybara`
40
-
41
- | Runtime config key | Environment variable | Description | Default | Supported values |
42
- | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
43
- | `capybara.driver` | `WAYFARER_CAPYBARA_DRIVER` | The Capybara driver to use. | n/a | Symbols |
@@ -1,83 +0,0 @@
1
- # Environment Variables
2
-
3
- ## String formats
4
-
5
- Environment variable values can be parsed to Hash or Array at runtime
6
- with the following syntaxes:
7
-
8
- * Hash: Variable string `a:1,b:2,c:3` parses to `{a:1, b:2, c:3}` at runtime
9
- * Array: Variable string `a,b,c` parses to `[:a, :b, :c]` at runtime
10
-
11
- ## Variables
12
-
13
- ### `WAYFARER_AGENT`
14
- : Either `ferrum`, `selenium` or `http`.
15
-
16
- * Type: String
17
- * Key: `config.agent`
18
- * Default value: `:http`
19
-
20
- ### `WAYFARER_POOL_SIZE`
21
- : Number of user agents to maintain.
22
-
23
- * Type: Integer
24
- * Key: `config.pool_size`
25
- * Default value: `1`
26
-
27
- ### `WAYFARER_POOL_TIMEOUT`
28
- : How long a user agent may remain checked out until the owning job
29
- fails.
30
-
31
- * Type: Integer
32
- * Key: `config.agent_pool_timeout`
33
- * Default value: `1`
34
-
35
- ---
36
-
37
- ### `WAYFARER_FERRUM_OPTIONS`
38
- : Key/value options passed to `Ferrum::Browser.new`.
39
-
40
- * Type: Hash
41
- * Key: `config.ferrum_options`
42
- * Default value: `{}`
43
-
44
- ---
45
-
46
- ### `WAYFARER_SELENIUM_DRIVER`
47
- : Driver passed to `Selenium::WebDriver.for`.
48
-
49
- * Type: Symbol
50
- * Key: `config.selenium_driver`
51
- * Default value: `:chrome`
52
-
53
- ---
54
-
55
- ### `WAYFARER_SELENIUM_OPTIONS`
56
- : Options passed to `Selenium::WebDriver.for`.
57
-
58
- * Type: Hash
59
- * Key: `config.selenium_options`
60
- * Default value: `{}`
61
-
62
- ---
63
-
64
- ### `WAYFARER_SELENIUM_CLIENT_TIMEOUT`
65
- : Selenium HTTP client timeout (seconds).
66
-
67
- * Type: Integer
68
- * Key: `config.selenium_client_timeout`
69
- * Default value: `60`
70
-
71
- ---
72
-
73
- ### `WAYFARER_HTTP_HEADERS`
74
- : HTTP request headers used when retrieving pages.
75
-
76
- * Type: Hash
77
- * Key: `config.http_headers`
78
- * Default value: `{}`
79
-
80
- !!! attention "Partial support"
81
-
82
- Selenium does not support configuring HTTP request headers.
83
-
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module CLI
5
- class Base < Thor
6
- include Thor::Actions
7
-
8
- def self.source_root
9
- File.expand_path("templates", __dir__)
10
- end
11
-
12
- private
13
-
14
- def mock_redis
15
- Wayfarer.config.redis.factory = ->(_) { MockRedis.new }
16
- end
17
-
18
- def load_environment
19
- load_rails || load_plain
20
- end
21
-
22
- def free_agent_pool
23
- Wayfarer::Networking::Pool.instance.free
24
- end
25
-
26
- def load_rails
27
- begin
28
- require "rails/app_loader"
29
- rescue LoadError
30
- return
31
- end
32
-
33
- return unless Rails::AppLoader.find_executable
34
-
35
- require File.expand_path("config/application", Dir.pwd)
36
- require File.expand_path("config/boot", Dir.pwd)
37
- require File.expand_path("config/environment", Dir.pwd)
38
- end
39
-
40
- def load_plain
41
- Wayfarer::CLI::Runner.loader.setup
42
- end
43
- end
44
- end
45
- end
@@ -1,17 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module CLI
5
- class Generate < Base
6
- desc "project NAME",
7
- "Generate new project directory NAME"
8
- def project(name)
9
- @name = name
10
-
11
- empty_directory name
12
- template "Gemfile.tt", "#{name}/Gemfile"
13
- template "job.rb.tt", "#{name}/app/jobs/#{name}.rb"
14
- end
15
- end
16
- end
17
- end
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module CLI
5
- class Job < Base
6
- desc "perform JOB URL",
7
- "Perform JOB with URL"
8
- option :mock_redis, type: :boolean
9
- option :batch, type: :string, default: SecureRandom.uuid
10
- def perform(job, url)
11
- load_environment
12
- mock_redis if options[:mock_redis]
13
-
14
- job = job.classify.constantize.new
15
- task = Wayfarer::Task.new(url, options[:batch])
16
- job.arguments.push(task)
17
- job.perform(task)
18
- task.gc.run
19
- end
20
-
21
- desc "enqueue JOB URL",
22
- "Enqueue JOB with URL"
23
- option :batch, type: :string, default: SecureRandom.uuid
24
- def enqueue(job, url)
25
- load_environment
26
- mock_redis if options[:mock_redis] # TODO: Remove, does not belong here
27
-
28
- job = job.classify.constantize
29
- job.crawl(url, batch: options[:batch])
30
- end
31
-
32
- desc "execute JOB URL",
33
- "Execute JOB with async adapter"
34
- option :mock_redis, type: :boolean
35
- option :batch, type: :string, default: SecureRandom.uuid
36
- option :min_threads, type: :numeric, default: 1
37
- option :max_threads, type: :numeric, default: 1
38
- def execute(job, url)
39
- load_environment
40
- mock_redis if options[:mock_redis]
41
-
42
- job = job.classify.constantize
43
- job.queue_adapter = ActiveJob::QueueAdapters::AsyncAdapter.new(min_threads: options[:min_threads],
44
- max_threads: options[:max_threads])
45
- scheduler = job.queue_adapter.instance_variable_get(:@scheduler)
46
- executor = scheduler.instance_variable_get(:@async_executor)
47
-
48
- job.crawl(url, batch: options[:batch])
49
-
50
- sleep(1) while executor.scheduled_task_count > executor.completed_task_count
51
-
52
- free_agent_pool
53
- end
54
- end
55
- end
56
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # # frozen_string_literal: true
4
-
5
- module Wayfarer
6
- module CLI
7
- class Route < Base
8
- desc "result JOB URL",
9
- "Invoke JOB's router with URL"
10
- def result(job, url)
11
- load_environment
12
- url = Addressable::URI.parse(url)
13
- job = job.classify.constantize
14
- job.router.invoke(url, job.new.steer)
15
- say Wayfarer::Routing::PathFinder.result(job.router.root, url)
16
- end
17
-
18
- desc "tree JOB URL",
19
- "Visualize JOB's routing tree for URL"
20
- def tree(job, url)
21
- load_environment
22
- url = Addressable::URI.parse(url)
23
- job = job.classify.constantize
24
- job.router.invoke(url, job.new.steer)
25
- Wayfarer::CLI::RoutePrinter.print(job.router.root, url)
26
- end
27
- end
28
- end
29
- end
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module CLI
5
- class Runner < Thor
6
- def self.exit_on_failure?
7
- true
8
- end
9
-
10
- def self.loader
11
- # TODO: Raises if $PWD/app/jobs does not exist
12
-
13
- @loader ||= Zeitwerk::Loader.new.tap do |load|
14
- load.inflector = Zeitwerk::GemInflector.new(__FILE__)
15
- load.push_dir("#{Dir.pwd}/app/jobs")
16
- end
17
- end
18
-
19
- desc "version", "Print version"
20
- def version
21
- say Wayfarer::VERSION::STRING
22
- end
23
-
24
- desc "route", "Routing"
25
- subcommand "route", Wayfarer::CLI::Route
26
-
27
- desc "generate", "Generators"
28
- subcommand "generate", Wayfarer::CLI::Generate
29
-
30
- desc "job", "Jobs"
31
- subcommand "job", Wayfarer::CLI::Job
32
- end
33
- end
34
- end
@@ -1,5 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- source "https://rubygems.org"
4
-
5
- gem "wayfarer", <%= Wayfarer::VERSION::STRING.inspect %>
@@ -1,10 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class <%= @name.camelize %> < Wayfarer::Base
4
- route { to :index }
5
-
6
- retry_on ConnectionPool::TimeoutError, attempts: 3
7
-
8
- def index
9
- end
10
- end
@@ -1,10 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Capybara = Struct.new(driver: {
6
- env_key: "WAYFARER_CAPYBARA_DRIVER",
7
- type: Symbol
8
- })
9
- end
10
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Ferrum = Struct.new(options: {
6
- env_key: "WAYFARER_FERRUM_OPTIONS",
7
- type: Hash,
8
- default: {}
9
- })
10
- end
11
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Networking = Struct.new(agent: {
6
- env_key: "WAYFARER_NETWORK_AGENT",
7
- type: Symbol,
8
- default: :http
9
- },
10
- pool_size: {
11
- env_key: "WAYFARER_NETWORK_POOL_SIZE",
12
- type: Integer,
13
- default: 1
14
- },
15
- pool_timeout: {
16
- env_key: "WAYFARER_NETWORK_POOL_TIMEOUT",
17
- type: Integer,
18
- default: 10
19
- },
20
- http_headers: {
21
- env_key: "WAYFARER_NETWORK_HTTP_HEADERS",
22
- type: Hash,
23
- default: {}
24
- },
25
- renew_on: {
26
- default: []
27
- })
28
- end
29
- end
@@ -1,14 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Redis = Struct.new(url: {
6
- env_key: "WAYFARER_REDIS_URL",
7
- type: String,
8
- default: "redis://localhost:6379"
9
- },
10
- factory: {
11
- default: ->(redis) { ::Redis.new(url: redis.url) }
12
- })
13
- end
14
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Root = Struct.new(ferrum: { default: Wayfarer::Config::Ferrum.new },
6
- network: { default: Wayfarer::Config::Networking.new },
7
- redis: { default: Wayfarer::Config::Redis.new },
8
- selenium: { default: Wayfarer::Config::Selenium.new },
9
- capybara: { default: Wayfarer::Config::Capybara.new })
10
- end
11
- end
@@ -1,21 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Selenium = Struct.new(driver: {
6
- env_key: "WAYFARER_SELENIUM_DRIVER",
7
- type: Symbol,
8
- default: :chrome
9
- },
10
- options: {
11
- env_key: "WAYFARER_SELENIUM_OPTIONS",
12
- type: Hash,
13
- default: {}
14
- },
15
- client_timeout: {
16
- env_key: "WAYFARER_SELENIUM_CLIENT_TIMEOUT",
17
- type: Integer,
18
- default: 60 # seconds
19
- })
20
- end
21
- end
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- module Strconv
6
- module_function
7
-
8
- def parse(str, type = nil)
9
- return primitive(str) unless type
10
-
11
- case type.name
12
- when "Hash" then hash(str)
13
- when "Array" then array(str)
14
- when "Symbol" then str.to_sym
15
- when "Integer" then Integer(str)
16
- else str
17
- end
18
- end
19
-
20
- def hash(str)
21
- array(str).reduce({}) do |acc, pair|
22
- k, v = pair.split(":", 2)
23
- next acc unless k && v
24
-
25
- acc.merge({ parse(k, Symbol) => primitive(v) })
26
- end
27
- end
28
-
29
- def array(str)
30
- str.split(",").map(&:strip)
31
- end
32
-
33
- def primitive(str)
34
- return true if str == "true"
35
- return false if str == "false"
36
-
37
- begin
38
- parse(str, Integer)
39
- rescue StandardError
40
- str
41
- end
42
- end
43
- end
44
- end
45
- end
@@ -1,72 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- class Struct
6
- module ClassMethods
7
- attr_accessor :members
8
- end
9
-
10
- module InstanceMethods
11
- extend Forwardable
12
-
13
- delegate members: "self.class"
14
-
15
- attr_reader :env
16
-
17
- def initialize(env = ENV)
18
- @env = env
19
-
20
- define_writers
21
- define_readers
22
- end
23
-
24
- private
25
-
26
- def define_writers
27
- members.each { |key, _| define_writer(key) }
28
- end
29
-
30
- def define_writer(key)
31
- define_singleton_method(:"#{key}=") do |val|
32
- set(key, val)
33
- end
34
- end
35
-
36
- def define_readers
37
- members.each { |key, options| define_reader(key, **options) }
38
- end
39
-
40
- def define_reader(key, env_key: nil, type: nil, default: nil)
41
- define_singleton_method(key.to_sym) do
42
- get(key) || set(key, env_val(env_key, type) || default)
43
- end
44
- end
45
-
46
- def env_val(env_key, type)
47
- return nil unless env_key
48
- return nil unless env.key?(env_key)
49
-
50
- Strconv.parse(env[env_key], type)
51
- end
52
-
53
- def get(key)
54
- instance_variable_get(:"@#{key}")
55
- end
56
-
57
- def set(key, val)
58
- instance_variable_set(:"@#{key}", val)
59
- end
60
- end
61
-
62
- def self.new(members)
63
- Class.new do
64
- include InstanceMethods
65
- extend ClassMethods
66
-
67
- self.members = members
68
- end
69
- end
70
- end
71
- end
72
- end
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Middleware
5
- class Fetch
6
- extend Base
7
-
8
- module API
9
- def agent
10
- context.instance
11
- end
12
-
13
- def context
14
- task.metadata.context
15
- end
16
-
17
- def page(live: false)
18
- return task.metadata.page unless live
19
-
20
- task.metadata.page = context.live&.page || task.metadata.page
21
- end
22
-
23
- def http
24
- @http ||= Wayfarer::Networking::Follow.new(
25
- Wayfarer::Networking::Context.new(
26
- Wayfarer::Networking::HTTP.new
27
- )
28
- )
29
- end
30
- end
31
-
32
- def call(task)
33
- pool.with do |context|
34
- result = task.metadata.controller.run_callbacks(:fetch) do
35
- context.fetch(task.url)
36
- end
37
-
38
- case result
39
- when Networking::Result::Redirect
40
- task.metadata.controller.stage(result.redirect_url)
41
- when Networking::Result::Success
42
- task.metadata.context = context
43
- task.metadata.page = result.page
44
- yield if block_given?
45
- end
46
- end
47
- end
48
-
49
- private
50
-
51
- def pool
52
- Wayfarer::Networking::Pool.instance
53
- end
54
- end
55
- end
56
- end
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Redis
5
- module Connection
6
- extend Forwardable
7
-
8
- delegate with: "Wayfarer::Redis::Pool.instance"
9
-
10
- alias redis with
11
- end
12
- end
13
- end
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Redis
5
- module Version
6
- extend Connection
7
-
8
- module_function
9
-
10
- def determine
11
- @determine ||= server_version.split(".").first(3).map(&:to_i)
12
- end
13
-
14
- def server_version
15
- redis { |conn| conn.info["redis_version"] }
16
- end
17
- end
18
- end
19
- end
@@ -1,28 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Routing
5
- class Router
6
- ArgumentCountError = Class.new(StandardError) # TODO: Unused, remove
7
-
8
- extend Forwardable
9
-
10
- attr_reader :root,
11
- :blocks
12
-
13
- def initialize
14
- @blocks = []
15
- end
16
-
17
- def draw(&block)
18
- @blocks.push(block)
19
- end
20
-
21
- def invoke(url, arguments)
22
- @root = Wayfarer::Routing::RootRoute.new
23
- @blocks.each { |block| Docile.dsl_eval(@root, *arguments, &block) }
24
- root.invoke(url)
25
- end
26
- end
27
- end
28
- end