wayfarer 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/lint.yaml +25 -0
  3. data/.github/workflows/release.yaml +29 -0
  4. data/.github/workflows/tests.yaml +30 -0
  5. data/.gitignore +4 -0
  6. data/.rubocop.yml +5 -0
  7. data/.vale.ini +5 -0
  8. data/.yardopts +1 -3
  9. data/Dockerfile +5 -4
  10. data/Gemfile +3 -0
  11. data/Gemfile.lock +107 -102
  12. data/Rakefile +5 -56
  13. data/bin/wayfarer +1 -1
  14. data/docker-compose.yml +20 -9
  15. data/docs/cookbook/consent_screen.md +2 -2
  16. data/docs/cookbook/executing_javascript.md +3 -3
  17. data/docs/cookbook/navigation.md +12 -12
  18. data/docs/cookbook/querying_html.md +3 -3
  19. data/docs/cookbook/screenshots.md +2 -2
  20. data/docs/cookbook/user_agent.md +1 -1
  21. data/docs/design.md +36 -0
  22. data/docs/guides/callbacks.md +24 -126
  23. data/docs/guides/configuration.md +8 -8
  24. data/docs/guides/handlers.md +60 -0
  25. data/docs/guides/index.md +1 -0
  26. data/docs/guides/jobs/error_handling.md +40 -0
  27. data/docs/guides/jobs.md +99 -31
  28. data/docs/guides/navigation.md +1 -1
  29. data/docs/guides/networking/capybara.md +13 -22
  30. data/docs/guides/networking/custom_adapters.md +82 -41
  31. data/docs/guides/networking/ferrum.md +4 -4
  32. data/docs/guides/networking/http.md +9 -13
  33. data/docs/guides/networking/selenium.md +10 -11
  34. data/docs/guides/pages.md +76 -10
  35. data/docs/guides/redis.md +10 -0
  36. data/docs/guides/routing.md +74 -0
  37. data/docs/guides/tasks.md +33 -9
  38. data/docs/guides/tutorial.md +60 -0
  39. data/docs/guides/user_agents.md +113 -0
  40. data/docs/index.md +17 -40
  41. data/docs/reference/cli.md +35 -25
  42. data/docs/reference/configuration.md +36 -0
  43. data/lib/wayfarer/base.rb +124 -46
  44. data/lib/wayfarer/batch_completion.rb +56 -0
  45. data/lib/wayfarer/callbacks.rb +22 -48
  46. data/lib/wayfarer/cli/route_printer.rb +71 -57
  47. data/lib/wayfarer/cli.rb +121 -0
  48. data/lib/wayfarer/gc.rb +13 -6
  49. data/lib/wayfarer/handler.rb +15 -7
  50. data/lib/wayfarer/logging.rb +38 -0
  51. data/lib/wayfarer/middleware/base.rb +2 -0
  52. data/lib/wayfarer/middleware/batch_completion.rb +19 -0
  53. data/lib/wayfarer/middleware/content_type.rb +54 -0
  54. data/lib/wayfarer/middleware/controller.rb +19 -15
  55. data/lib/wayfarer/middleware/dedup.rb +16 -13
  56. data/lib/wayfarer/middleware/dispatch.rb +12 -4
  57. data/lib/wayfarer/middleware/normalize.rb +12 -11
  58. data/lib/wayfarer/middleware/redis.rb +15 -0
  59. data/lib/wayfarer/middleware/router.rb +33 -35
  60. data/lib/wayfarer/middleware/stage.rb +5 -5
  61. data/lib/wayfarer/middleware/uri_parser.rb +30 -0
  62. data/lib/wayfarer/middleware/user_agent.rb +49 -0
  63. data/lib/wayfarer/networking/capybara.rb +1 -1
  64. data/lib/wayfarer/networking/context.rb +2 -2
  65. data/lib/wayfarer/networking/ferrum.rb +2 -2
  66. data/lib/wayfarer/networking/follow.rb +12 -6
  67. data/lib/wayfarer/networking/http.rb +1 -1
  68. data/lib/wayfarer/networking/pool.rb +17 -12
  69. data/lib/wayfarer/networking/selenium.rb +3 -3
  70. data/lib/wayfarer/networking/strategy.rb +2 -2
  71. data/lib/wayfarer/page.rb +36 -14
  72. data/lib/wayfarer/parsing/xml.rb +6 -6
  73. data/lib/wayfarer/parsing.rb +24 -0
  74. data/lib/wayfarer/redis/barrier.rb +13 -21
  75. data/lib/wayfarer/redis/counter.rb +19 -9
  76. data/lib/wayfarer/redis/pool.rb +1 -1
  77. data/lib/wayfarer/redis/resettable.rb +19 -0
  78. data/lib/wayfarer/routing/dsl.rb +1 -0
  79. data/lib/wayfarer/routing/matchers/path.rb +4 -2
  80. data/lib/wayfarer/routing/root_route.rb +5 -1
  81. data/lib/wayfarer/routing/route.rb +4 -14
  82. data/lib/wayfarer/stringify.rb +22 -30
  83. data/lib/wayfarer/task.rb +12 -18
  84. data/lib/wayfarer.rb +29 -2
  85. data/mkdocs.yml +52 -7
  86. data/rake/docs.rake +26 -0
  87. data/rake/lint.rake +105 -0
  88. data/rake/release.rake +29 -0
  89. data/rake/tests.rake +28 -0
  90. data/requirements.txt +1 -1
  91. data/spec/base_spec.rb +140 -160
  92. data/spec/batch_completion_spec.rb +104 -0
  93. data/spec/cli/job_spec.rb +19 -23
  94. data/spec/cli/routing_spec.rb +101 -0
  95. data/spec/cli/version_spec.rb +1 -1
  96. data/spec/factories/task.rb +7 -1
  97. data/spec/fixtures/dummy_job.rb +5 -3
  98. data/spec/gc_spec.rb +8 -50
  99. data/spec/handler_spec.rb +1 -1
  100. data/spec/integration/callbacks_spec.rb +157 -45
  101. data/spec/integration/content_type_spec.rb +145 -0
  102. data/spec/integration/gc_spec.rb +44 -0
  103. data/spec/integration/handler_spec.rb +66 -0
  104. data/spec/integration/page_spec.rb +44 -29
  105. data/spec/integration/params_spec.rb +33 -25
  106. data/spec/integration/parsing_spec.rb +125 -0
  107. data/spec/integration/routing_spec.rb +18 -0
  108. data/spec/integration/stage_spec.rb +27 -20
  109. data/spec/middleware/batch_completion_spec.rb +34 -0
  110. data/spec/middleware/chain_spec.rb +8 -8
  111. data/spec/middleware/content_type_spec.rb +86 -0
  112. data/spec/middleware/controller_spec.rb +5 -5
  113. data/spec/middleware/dedup_spec.rb +38 -55
  114. data/spec/middleware/dispatch_spec.rb +23 -7
  115. data/spec/middleware/normalize_spec.rb +44 -13
  116. data/spec/middleware/router_spec.rb +29 -30
  117. data/spec/middleware/stage_spec.rb +8 -8
  118. data/spec/middleware/uri_parser_spec.rb +53 -0
  119. data/spec/middleware/{fetch_spec.rb → user_agent_spec.rb} +28 -27
  120. data/spec/networking/context_spec.rb +17 -0
  121. data/spec/networking/follow_spec.rb +2 -2
  122. data/spec/networking/pool_spec.rb +5 -5
  123. data/spec/networking/strategy.rb +2 -2
  124. data/spec/page_spec.rb +42 -20
  125. data/spec/parsing/xml_spec.rb +11 -12
  126. data/spec/redis/barrier_spec.rb +8 -48
  127. data/spec/redis/counter_spec.rb +13 -1
  128. data/spec/redis/pool_spec.rb +1 -1
  129. data/spec/spec_helpers.rb +27 -16
  130. data/spec/support/test_app.rb +8 -0
  131. data/spec/task_spec.rb +3 -24
  132. data/spec/wayfarer_spec.rb +1 -1
  133. data/wayfarer.gemspec +4 -3
  134. metadata +61 -51
  135. data/.github/workflows/ci.yaml +0 -32
  136. data/docs/guides/error_handling.md +0 -31
  137. data/docs/guides/networking.md +0 -94
  138. data/docs/guides/performance.md +0 -130
  139. data/docs/guides/reliability.md +0 -41
  140. data/docs/guides/routing/steering.md +0 -30
  141. data/docs/reference/api/base.md +0 -48
  142. data/docs/reference/configuration_keys.md +0 -42
  143. data/docs/reference/environment_variables.md +0 -83
  144. data/lib/wayfarer/cli/base.rb +0 -45
  145. data/lib/wayfarer/cli/generate.rb +0 -17
  146. data/lib/wayfarer/cli/job.rb +0 -56
  147. data/lib/wayfarer/cli/route.rb +0 -29
  148. data/lib/wayfarer/cli/runner.rb +0 -34
  149. data/lib/wayfarer/cli/templates/Gemfile.tt +0 -5
  150. data/lib/wayfarer/cli/templates/job.rb.tt +0 -10
  151. data/lib/wayfarer/config/capybara.rb +0 -10
  152. data/lib/wayfarer/config/ferrum.rb +0 -11
  153. data/lib/wayfarer/config/networking.rb +0 -26
  154. data/lib/wayfarer/config/redis.rb +0 -14
  155. data/lib/wayfarer/config/root.rb +0 -11
  156. data/lib/wayfarer/config/selenium.rb +0 -21
  157. data/lib/wayfarer/config/strconv.rb +0 -45
  158. data/lib/wayfarer/config/struct.rb +0 -72
  159. data/lib/wayfarer/middleware/fetch.rb +0 -56
  160. data/lib/wayfarer/redis/connection.rb +0 -13
  161. data/lib/wayfarer/redis/version.rb +0 -19
  162. data/lib/wayfarer/routing/router.rb +0 -28
  163. data/spec/callbacks_spec.rb +0 -102
  164. data/spec/cli/generate_spec.rb +0 -39
  165. data/spec/config/capybara_spec.rb +0 -18
  166. data/spec/config/ferrum_spec.rb +0 -24
  167. data/spec/config/networking_spec.rb +0 -73
  168. data/spec/config/redis_spec.rb +0 -32
  169. data/spec/config/root_spec.rb +0 -31
  170. data/spec/config/selenium_spec.rb +0 -56
  171. data/spec/config/strconv_spec.rb +0 -58
  172. data/spec/config/struct_spec.rb +0 -66
  173. data/spec/integration/steering_spec.rb +0 -57
  174. data/spec/redis/version_spec.rb +0 -13
  175. data/spec/routing/router_spec.rb +0 -24
@@ -1,42 +0,0 @@
1
- ---
2
- hide:
3
- - toc
4
- ---
5
-
6
- # Configuration Keys
7
-
8
- ## `Wayfarer.config.network`
9
-
10
- | Runtime config key | Environment variable | Description | Default | Supported values |
11
- | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
12
- | `network.agent` | `WAYFARER_NETWORK_AGENT` | The user agent to use. | `:http` | `:http`, `:ferrum`, `:selenium` |
13
- | `network.pool_size` | `WAYFARER_NETWORK_POOL_SIZE` | How many user agents to spawn. | 1 | Integers |
14
- | `network.pool_timeout` | `WAYFARER_NETWORK_POOL_TIMEOUT` | How long jobs may use an agent in seconds. | 10 | Integers |
15
- | `network.http_headers` | `WAYFARER_NETWORK_HTTP_HEADERS` | HTTP headers to append to requests. | `{}` | Hashes |
16
-
17
- ## `Wayfarer.config.ferrum`
18
-
19
- | Runtime config key | Environment variable | Description | Default | Supported values |
20
- | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
21
- | `ferrum.options` | `WAYFARER_FERRUM_OPTIONS` | Ferrum options. | `{}` | Hashes |
22
-
23
- ## `Wayfarer.config.selenium`
24
-
25
- | Runtime config key | Environment variable | Description | Default | Supported values |
26
- | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
27
- | `selenium.driver` | `WAYFARER_SELENIUM_DRIVER` | Selenium driver to use. | `:chrome` | Symbols |
28
- | `selenium.options` | `WAYFARER_SELENIUM_OPTIONS` | Selenium options. | `{}` | Hashes |
29
- | `selenium.client_timeout` | `WAYFARER_SELENIUM_CLIENT_TIMEOUT` | Selenium client timeout in seconds. | 60 | Integers |
30
-
31
- ## `Wayfarer.config.redis`
32
-
33
- | Runtime config key | Environment variable | Description | Default | Supported values |
34
- | ---------------------- | ------------------------------------ | ------------------------------------------- | ------------------------------------------ | ----------------------------------- |
35
- | `redis.url` | `WAYFARER_REDIS_URL` | Redis URL to connect to. | http://localhost:6379 | Strings |
36
- | `redis.factory` | n/a | Redis factory lambda. | ` ->(redis) { ::Redis.new(url: redis.url)` | Lambdas |
37
-
38
- ## `Wayfarer.config.capybara`
39
-
40
- | Runtime config key | Environment variable | Description | Default | Supported values |
41
- | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
42
- | `capybara.driver` | `WAYFARER_CAPYBARA_DRIVER` | The Capybara driver to use. | n/a | Symbols |
@@ -1,83 +0,0 @@
1
- # Environment Variables
2
-
3
- ## String formats
4
-
5
- Environment variable values can be parsed to Hash or Array at runtime
6
- with the following syntaxes:
7
-
8
- * Hash: Variable string `a:1,b:2,c:3` parses to `{a:1, b:2, c:3}` at runtime
9
- * Array: Variable string `a,b,c` parses to `[:a, :b, :c]` at runtime
10
-
11
- ## Variables
12
-
13
- ### `WAYFARER_AGENT`
14
- : Either `ferrum`, `selenium` or `http`.
15
-
16
- * Type: String
17
- * Key: `config.agent`
18
- * Default value: `:http`
19
-
20
- ### `WAYFARER_POOL_SIZE`
21
- : Number of user agents to maintain.
22
-
23
- * Type: Integer
24
- * Key: `config.pool_size`
25
- * Default value: `1`
26
-
27
- ### `WAYFARER_POOL_TIMEOUT`
28
- : How long a user agent may remain checked out until the owning job
29
- fails.
30
-
31
- * Type: Integer
32
- * Key: `config.agent_pool_timeout`
33
- * Default value: `1`
34
-
35
- ---
36
-
37
- ### `WAYFARER_FERRUM_OPTIONS`
38
- : Key/value options passed to `Ferrum::Browser.new`.
39
-
40
- * Type: Hash
41
- * Key: `config.ferrum_options`
42
- * Default value: `{}`
43
-
44
- ---
45
-
46
- ### `WAYFARER_SELENIUM_DRIVER`
47
- : Driver passed to `Selenium::WebDriver.for`.
48
-
49
- * Type: Symbol
50
- * Key: `config.selenium_driver`
51
- * Default value: `:chrome`
52
-
53
- ---
54
-
55
- ### `WAYFARER_SELENIUM_OPTIONS`
56
- : Options passed to `Selenium::WebDriver.for`.
57
-
58
- * Type: Hash
59
- * Key: `config.selenium_options`
60
- * Default value: `{}`
61
-
62
- ---
63
-
64
- ### `WAYFARER_SELENIUM_CLIENT_TIMEOUT`
65
- : Selenium HTTP client timeout (seconds).
66
-
67
- * Type: Integer
68
- * Key: `config.selenium_client_timeout`
69
- * Default value: `60`
70
-
71
- ---
72
-
73
- ### `WAYFARER_HTTP_HEADERS`
74
- : HTTP request headers used when retrieving pages.
75
-
76
- * Type: Hash
77
- * Key: `config.http_headers`
78
- * Default value: `{}`
79
-
80
- !!! attention "Partial support"
81
-
82
- Selenium does not support configuring HTTP request headers.
83
-
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module CLI
5
- class Base < Thor
6
- include Thor::Actions
7
-
8
- def self.source_root
9
- File.expand_path("templates", __dir__)
10
- end
11
-
12
- private
13
-
14
- def mock_redis
15
- Wayfarer.config.redis.factory = ->(_) { MockRedis.new }
16
- end
17
-
18
- def load_environment
19
- load_rails || load_plain
20
- end
21
-
22
- def free_agent_pool
23
- Wayfarer::Networking::Pool.instance.free
24
- end
25
-
26
- def load_rails
27
- begin
28
- require "rails/app_loader"
29
- rescue LoadError
30
- return
31
- end
32
-
33
- return unless Rails::AppLoader.find_executable
34
-
35
- require File.expand_path("config/application", Dir.pwd)
36
- require File.expand_path("config/boot", Dir.pwd)
37
- require File.expand_path("config/environment", Dir.pwd)
38
- end
39
-
40
- def load_plain
41
- Wayfarer::CLI::Runner.loader.setup
42
- end
43
- end
44
- end
45
- end
@@ -1,17 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module CLI
5
- class Generate < Base
6
- desc "project NAME",
7
- "Generate new project directory NAME"
8
- def project(name)
9
- @name = name
10
-
11
- empty_directory name
12
- template "Gemfile.tt", "#{name}/Gemfile"
13
- template "job.rb.tt", "#{name}/app/jobs/#{name}.rb"
14
- end
15
- end
16
- end
17
- end
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module CLI
5
- class Job < Base
6
- desc "perform JOB URL",
7
- "Perform JOB with URL"
8
- option :mock_redis, type: :boolean
9
- option :batch, type: :string, default: SecureRandom.uuid
10
- def perform(job, url)
11
- load_environment
12
- mock_redis if options[:mock_redis]
13
-
14
- job = job.classify.constantize.new
15
- task = Wayfarer::Task.new(url, options[:batch])
16
- job.arguments.push(task)
17
- job.perform(task)
18
- task.gc.run
19
- end
20
-
21
- desc "enqueue JOB URL",
22
- "Enqueue JOB with URL"
23
- option :batch, type: :string, default: SecureRandom.uuid
24
- def enqueue(job, url)
25
- load_environment
26
- mock_redis if options[:mock_redis] # TODO: Remove, does not belong here
27
-
28
- job = job.classify.constantize
29
- job.crawl(url, batch: options[:batch])
30
- end
31
-
32
- desc "execute JOB URL",
33
- "Execute JOB with async adapter"
34
- option :mock_redis, type: :boolean
35
- option :batch, type: :string, default: SecureRandom.uuid
36
- option :min_threads, type: :numeric, default: 1
37
- option :max_threads, type: :numeric, default: 1
38
- def execute(job, url)
39
- load_environment
40
- mock_redis if options[:mock_redis]
41
-
42
- job = job.classify.constantize
43
- job.queue_adapter = ActiveJob::QueueAdapters::AsyncAdapter.new(min_threads: options[:min_threads],
44
- max_threads: options[:max_threads])
45
- scheduler = job.queue_adapter.instance_variable_get(:@scheduler)
46
- executor = scheduler.instance_variable_get(:@async_executor)
47
-
48
- job.crawl(url, batch: options[:batch])
49
-
50
- sleep(1) while executor.scheduled_task_count > executor.completed_task_count
51
-
52
- free_agent_pool
53
- end
54
- end
55
- end
56
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # # frozen_string_literal: true
4
-
5
- module Wayfarer
6
- module CLI
7
- class Route < Base
8
- desc "result JOB URL",
9
- "Invoke JOB's router with URL"
10
- def result(job, url)
11
- load_environment
12
- url = Addressable::URI.parse(url)
13
- job = job.classify.constantize
14
- job.router.invoke(url, job.new.steer)
15
- say Wayfarer::Routing::PathFinder.result(job.router.root, url)
16
- end
17
-
18
- desc "tree JOB URL",
19
- "Visualize JOB's routing tree for URL"
20
- def tree(job, url)
21
- load_environment
22
- url = Addressable::URI.parse(url)
23
- job = job.classify.constantize
24
- job.router.invoke(url, job.new.steer)
25
- Wayfarer::CLI::RoutePrinter.print(job.router.root, url)
26
- end
27
- end
28
- end
29
- end
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module CLI
5
- class Runner < Thor
6
- def self.exit_on_failure?
7
- true
8
- end
9
-
10
- def self.loader
11
- # TODO: Raises if $PWD/app/jobs does not exist
12
-
13
- @loader ||= Zeitwerk::Loader.new.tap do |load|
14
- load.inflector = Zeitwerk::GemInflector.new(__FILE__)
15
- load.push_dir("#{Dir.pwd}/app/jobs")
16
- end
17
- end
18
-
19
- desc "version", "Print version"
20
- def version
21
- say Wayfarer::VERSION::STRING
22
- end
23
-
24
- desc "route", "Routing"
25
- subcommand "route", Wayfarer::CLI::Route
26
-
27
- desc "generate", "Generators"
28
- subcommand "generate", Wayfarer::CLI::Generate
29
-
30
- desc "job", "Jobs"
31
- subcommand "job", Wayfarer::CLI::Job
32
- end
33
- end
34
- end
@@ -1,5 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- source "https://rubygems.org"
4
-
5
- gem "wayfarer", <%= Wayfarer::VERSION::STRING.inspect %>
@@ -1,10 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class <%= @name.camelize %> < Wayfarer::Base
4
- route { to :index }
5
-
6
- retry_on ConnectionPool::TimeoutError, attempts: 3
7
-
8
- def index
9
- end
10
- end
@@ -1,10 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Capybara = Struct.new(driver: {
6
- env_key: "WAYFARER_CAPYBARA_DRIVER",
7
- type: Symbol
8
- })
9
- end
10
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Ferrum = Struct.new(options: {
6
- env_key: "WAYFARER_FERRUM_OPTIONS",
7
- type: Hash,
8
- default: {}
9
- })
10
- end
11
- end
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Networking = Struct.new(agent: {
6
- env_key: "WAYFARER_NETWORK_AGENT",
7
- type: Symbol,
8
- default: :http
9
- },
10
- pool_size: {
11
- env_key: "WAYFARER_NETWORK_POOL_SIZE",
12
- type: Integer,
13
- default: 1
14
- },
15
- pool_timeout: {
16
- env_key: "WAYFARER_NETWORK_POOL_TIMEOUT",
17
- type: Integer,
18
- default: 10
19
- },
20
- http_headers: {
21
- env_key: "WAYFARER_NETWORK_HTTP_HEADERS",
22
- type: Hash,
23
- default: {}
24
- })
25
- end
26
- end
@@ -1,14 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Redis = Struct.new(url: {
6
- env_key: "WAYFARER_REDIS_URL",
7
- type: String,
8
- default: "redis://localhost:6379"
9
- },
10
- factory: {
11
- default: ->(redis) { ::Redis.new(url: redis.url) }
12
- })
13
- end
14
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Root = Struct.new(ferrum: { default: Wayfarer::Config::Ferrum.new },
6
- network: { default: Wayfarer::Config::Networking.new },
7
- redis: { default: Wayfarer::Config::Redis.new },
8
- selenium: { default: Wayfarer::Config::Selenium.new },
9
- capybara: { default: Wayfarer::Config::Capybara.new })
10
- end
11
- end
@@ -1,21 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- Selenium = Struct.new(driver: {
6
- env_key: "WAYFARER_SELENIUM_DRIVER",
7
- type: Symbol,
8
- default: :chrome
9
- },
10
- options: {
11
- env_key: "WAYFARER_SELENIUM_OPTIONS",
12
- type: Hash,
13
- default: {}
14
- },
15
- client_timeout: {
16
- env_key: "WAYFARER_SELENIUM_CLIENT_TIMEOUT",
17
- type: Integer,
18
- default: 60 # seconds
19
- })
20
- end
21
- end
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- module Strconv
6
- module_function
7
-
8
- def parse(str, type = nil)
9
- return primitive(str) unless type
10
-
11
- case type.name
12
- when "Hash" then hash(str)
13
- when "Array" then array(str)
14
- when "Symbol" then str.to_sym
15
- when "Integer" then Integer(str)
16
- else str
17
- end
18
- end
19
-
20
- def hash(str)
21
- array(str).reduce({}) do |acc, pair|
22
- k, v = pair.split(":", 2)
23
- next acc unless k && v
24
-
25
- acc.merge({ parse(k, Symbol) => primitive(v) })
26
- end
27
- end
28
-
29
- def array(str)
30
- str.split(",").map(&:strip)
31
- end
32
-
33
- def primitive(str)
34
- return true if str == "true"
35
- return false if str == "false"
36
-
37
- begin
38
- parse(str, Integer)
39
- rescue StandardError
40
- str
41
- end
42
- end
43
- end
44
- end
45
- end
@@ -1,72 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Config
5
- class Struct
6
- module ClassMethods
7
- attr_accessor :members
8
- end
9
-
10
- module InstanceMethods
11
- extend Forwardable
12
-
13
- delegate members: "self.class"
14
-
15
- attr_reader :env
16
-
17
- def initialize(env = ENV)
18
- @env = env
19
-
20
- define_writers
21
- define_readers
22
- end
23
-
24
- private
25
-
26
- def define_writers
27
- members.each { |key, _| define_writer(key) }
28
- end
29
-
30
- def define_writer(key)
31
- define_singleton_method(:"#{key}=") do |val|
32
- set(key, val)
33
- end
34
- end
35
-
36
- def define_readers
37
- members.each { |key, options| define_reader(key, **options) }
38
- end
39
-
40
- def define_reader(key, env_key: nil, type: nil, default: nil)
41
- define_singleton_method(key.to_sym) do
42
- get(key) || set(key, env_val(env_key, type) || default)
43
- end
44
- end
45
-
46
- def env_val(env_key, type)
47
- return nil unless env_key
48
- return nil unless env.key?(env_key)
49
-
50
- Strconv.parse(env[env_key], type)
51
- end
52
-
53
- def get(key)
54
- instance_variable_get(:"@#{key}")
55
- end
56
-
57
- def set(key, val)
58
- instance_variable_set(:"@#{key}", val)
59
- end
60
- end
61
-
62
- def self.new(members)
63
- Class.new do
64
- include InstanceMethods
65
- extend ClassMethods
66
-
67
- self.members = members
68
- end
69
- end
70
- end
71
- end
72
- end
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Middleware
5
- class Fetch
6
- extend Base
7
-
8
- module API
9
- def agent
10
- context.instance
11
- end
12
-
13
- def context
14
- task.metadata.context
15
- end
16
-
17
- def page(live: false)
18
- return task.metadata.page unless live
19
-
20
- task.metadata.page = context.live&.page || task.metadata.page
21
- end
22
-
23
- def http
24
- @http ||= Wayfarer::Networking::Follow.new(
25
- Wayfarer::Networking::Context.new(
26
- Wayfarer::Networking::HTTP.new
27
- )
28
- )
29
- end
30
- end
31
-
32
- def call(task)
33
- pool.with do |context|
34
- result = task.metadata.controller.run_callbacks(:fetch) do
35
- context.fetch(task.url)
36
- end
37
-
38
- case result
39
- when Networking::Result::Redirect
40
- task.metadata.controller.stage(result.redirect_url)
41
- when Networking::Result::Success
42
- task.metadata.context = context
43
- task.metadata.page = result.page
44
- yield if block_given?
45
- end
46
- end
47
- end
48
-
49
- private
50
-
51
- def pool
52
- Wayfarer::Networking::Pool.instance
53
- end
54
- end
55
- end
56
- end
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Redis
5
- module Connection
6
- extend Forwardable
7
-
8
- delegate with: "Wayfarer::Redis::Pool.instance"
9
-
10
- alias redis with
11
- end
12
- end
13
- end
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Redis
5
- module Version
6
- extend Connection
7
-
8
- module_function
9
-
10
- def determine
11
- @determine ||= server_version.split(".").first(3).map(&:to_i)
12
- end
13
-
14
- def server_version
15
- redis { |conn| conn.info["redis_version"] }
16
- end
17
- end
18
- end
19
- end
@@ -1,28 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Wayfarer
4
- module Routing
5
- class Router
6
- ArgumentCountError = Class.new(StandardError) # TODO: Unused, remove
7
-
8
- extend Forwardable
9
-
10
- attr_reader :root,
11
- :blocks
12
-
13
- def initialize
14
- @blocks = []
15
- end
16
-
17
- def draw(&block)
18
- @blocks.push(block)
19
- end
20
-
21
- def invoke(url, arguments)
22
- @root = Wayfarer::Routing::RootRoute.new
23
- @blocks.each { |block| Docile.dsl_eval(@root, *arguments, &block) }
24
- root.invoke(url)
25
- end
26
- end
27
- end
28
- end