wayfarer 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yaml +1 -1
  3. data/Gemfile.lock +20 -15
  4. data/docs/cookbook/user_agent.md +1 -1
  5. data/docs/guides/browser_automation/capybara.md +64 -1
  6. data/docs/guides/browser_automation/custom_adapters.md +100 -0
  7. data/docs/guides/browser_automation/ferrum.md +3 -3
  8. data/docs/guides/browser_automation/selenium.md +7 -5
  9. data/docs/guides/callbacks.md +117 -10
  10. data/docs/guides/configuration.md +16 -10
  11. data/docs/guides/error_handling.md +9 -5
  12. data/docs/guides/networking.md +77 -3
  13. data/docs/index.md +9 -1
  14. data/docs/reference/api/base.md +4 -4
  15. data/docs/reference/configuration_keys.md +42 -0
  16. data/docs/reference/environment_variables.md +25 -27
  17. data/lib/wayfarer/base.rb +7 -17
  18. data/lib/wayfarer/callbacks.rb +71 -0
  19. data/lib/wayfarer/cli/base.rb +5 -1
  20. data/lib/wayfarer/cli/job.rb +7 -3
  21. data/lib/wayfarer/cli/route.rb +2 -2
  22. data/lib/wayfarer/cli/route_printer.rb +7 -7
  23. data/lib/wayfarer/config/capybara.rb +10 -0
  24. data/lib/wayfarer/config/ferrum.rb +11 -0
  25. data/lib/wayfarer/config/networking.rb +26 -0
  26. data/lib/wayfarer/config/redis.rb +14 -0
  27. data/lib/wayfarer/config/root.rb +11 -0
  28. data/lib/wayfarer/config/selenium.rb +21 -0
  29. data/lib/wayfarer/config/strconv.rb +45 -0
  30. data/lib/wayfarer/config/struct.rb +72 -0
  31. data/lib/wayfarer/gc.rb +3 -7
  32. data/lib/wayfarer/middleware/fetch.rb +7 -3
  33. data/lib/wayfarer/middleware/router.rb +2 -2
  34. data/lib/wayfarer/middleware/worker.rb +12 -9
  35. data/lib/wayfarer/networking/capybara.rb +28 -0
  36. data/lib/wayfarer/networking/context.rb +36 -0
  37. data/lib/wayfarer/networking/ferrum.rb +17 -52
  38. data/lib/wayfarer/networking/http.rb +34 -0
  39. data/lib/wayfarer/networking/pool.rb +15 -10
  40. data/lib/wayfarer/networking/result.rb +1 -1
  41. data/lib/wayfarer/networking/selenium.rb +20 -47
  42. data/lib/wayfarer/networking/strategy.rb +38 -0
  43. data/lib/wayfarer/page.rb +2 -3
  44. data/lib/wayfarer/redis/pool.rb +3 -1
  45. data/lib/wayfarer/routing/dsl.rb +8 -8
  46. data/lib/wayfarer/routing/matchers/custom.rb +23 -0
  47. data/lib/wayfarer/routing/matchers/host.rb +19 -0
  48. data/lib/wayfarer/routing/matchers/path.rb +48 -0
  49. data/lib/wayfarer/routing/matchers/query.rb +63 -0
  50. data/lib/wayfarer/routing/matchers/scheme.rb +17 -0
  51. data/lib/wayfarer/routing/matchers/suffix.rb +17 -0
  52. data/lib/wayfarer/routing/matchers/url.rb +17 -0
  53. data/lib/wayfarer/routing/route.rb +1 -1
  54. data/lib/wayfarer.rb +9 -9
  55. data/spec/base_spec.rb +14 -0
  56. data/spec/callbacks_spec.rb +102 -0
  57. data/spec/cli/job_spec.rb +6 -6
  58. data/spec/config/capybara_spec.rb +18 -0
  59. data/spec/config/ferrum_spec.rb +24 -0
  60. data/spec/config/networking_spec.rb +73 -0
  61. data/spec/config/redis_spec.rb +32 -0
  62. data/spec/config/root_spec.rb +31 -0
  63. data/spec/config/selenium_spec.rb +56 -0
  64. data/spec/config/strconv_spec.rb +58 -0
  65. data/spec/config/struct_spec.rb +66 -0
  66. data/spec/gc_spec.rb +8 -6
  67. data/spec/middleware/fetch_spec.rb +20 -8
  68. data/spec/middleware/router_spec.rb +7 -0
  69. data/spec/middleware/worker_spec.rb +64 -27
  70. data/spec/networking/capybara_spec.rb +12 -0
  71. data/spec/networking/context_spec.rb +127 -0
  72. data/spec/networking/ferrum_spec.rb +6 -22
  73. data/spec/networking/http_spec.rb +12 -0
  74. data/spec/networking/pool_spec.rb +37 -12
  75. data/spec/networking/selenium_spec.rb +6 -22
  76. data/spec/networking/strategy.rb +170 -0
  77. data/spec/redis/pool_spec.rb +1 -1
  78. data/spec/routing/dsl_spec.rb +10 -10
  79. data/spec/routing/integration_spec.rb +22 -22
  80. data/spec/routing/{custom_matcher_spec.rb → matchers/custom_spec.rb} +4 -4
  81. data/spec/routing/{host_matcher_spec.rb → matchers/host_spec.rb} +6 -6
  82. data/spec/routing/{path_matcher_spec.rb → matchers/path_spec.rb} +6 -6
  83. data/spec/routing/{query_matcher_spec.rb → matchers/query_spec.rb} +15 -15
  84. data/spec/routing/{scheme_matcher_spec.rb → matchers/scheme_spec.rb} +4 -4
  85. data/spec/routing/{suffix_matcher_spec.rb → matchers/suffix_spec.rb} +4 -4
  86. data/spec/routing/{uri_matcher_spec.rb → matchers/uri_spec.rb} +4 -4
  87. data/spec/routing/path_finder_spec.rb +1 -1
  88. data/spec/routing/root_route_spec.rb +2 -2
  89. data/spec/routing/route_spec.rb +2 -2
  90. data/spec/spec_helpers.rb +13 -5
  91. data/spec/wayfarer_spec.rb +1 -1
  92. data/wayfarer.gemspec +8 -7
  93. metadata +74 -33
  94. data/lib/wayfarer/config.rb +0 -67
  95. data/lib/wayfarer/networking/healer.rb +0 -21
  96. data/lib/wayfarer/networking/net_http.rb +0 -52
  97. data/lib/wayfarer/routing/custom_matcher.rb +0 -21
  98. data/lib/wayfarer/routing/host_matcher.rb +0 -23
  99. data/lib/wayfarer/routing/path_matcher.rb +0 -46
  100. data/lib/wayfarer/routing/query_matcher.rb +0 -67
  101. data/lib/wayfarer/routing/scheme_matcher.rb +0 -21
  102. data/lib/wayfarer/routing/suffix_matcher.rb +0 -21
  103. data/lib/wayfarer/routing/url_matcher.rb +0 -21
  104. data/spec/config_spec.rb +0 -144
  105. data/spec/networking/adapter.rb +0 -135
  106. data/spec/networking/healer_spec.rb +0 -46
  107. data/spec/networking/net_http_spec.rb +0 -37
@@ -102,7 +102,7 @@ Base functionality every job is equipped with:
102
102
 
103
103
  ### `#browser -> Ferrum::Browser | Selenium::WebDriver | nil`
104
104
  : The browser process used to retrieve the current response.
105
- If the configured adapter is the default `:net_http`, `nil` is returned.
105
+ If the configured agent is the default `:http`, `nil` is returned.
106
106
 
107
107
  Guides:
108
108
 
@@ -112,7 +112,7 @@ Base functionality every job is equipped with:
112
112
  !!! example "Accessing a Google Chrome process"
113
113
 
114
114
  ```ruby
115
- Wayfarer.config.adapter = :ferrum
115
+ Wayfarer.config.network.agent = :ferrum
116
116
 
117
117
  class DummyJob < Wayfarer::Base
118
118
  route.to :index
@@ -126,7 +126,7 @@ Base functionality every job is equipped with:
126
126
  !!! example "Accessing a Selenium WebDriver"
127
127
 
128
128
  ```ruby
129
- Wayfarer.config.adapter = :selenium
129
+ Wayfarer.config.network.agent = :selenium
130
130
 
131
131
  class DummyJob < Wayfarer::Base
132
132
  route.to :index
@@ -144,7 +144,7 @@ Base functionality every job is equipped with:
144
144
  processing URL.
145
145
 
146
146
  With `page(live: true)` passed, the returned `Page` reflects the current
147
- browser DOM. No-op when the `net/http` adapter is in use. Calls to
147
+ browser DOM. No-op when the `net/http` agent is in use. Calls to
148
148
  `page()` without the keyword return the most recent page.
149
149
 
150
150
  ---
@@ -0,0 +1,42 @@
1
+ ---
2
+ hide:
3
+ - toc
4
+ ---
5
+
6
+ # Configuration Keys
7
+
8
+ ## `Wayfarer.config.network`
9
+
10
+ | Runtime config key | Environment variable | Description | Default | Supported values |
11
+ | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
12
+ | `network.agent` | `WAYFARER_NETWORK_AGENT` | The user agent to use. | `:http` | `:http`, `:ferrum`, `:selenium` |
13
+ | `network.pool_size` | `WAYFARER_NETWORK_POOL_SIZE` | How many user agents to spawn. | 3 | Integers |
14
+ | `network.pool_timeout` | `WAYFARER_NETWORK_POOL_TIMEOUT` | How long jobs may use an agent in seconds. | 10 | Integers |
15
+ | `network.http_headers` | `WAYFARER_NETWORK_HTTP_HEADERS` | HTTP headers to append to requests. | `{}` | Hashes |
16
+
17
+ ## `Wayfarer.config.ferrum`
18
+
19
+ | Runtime config key | Environment variable | Description | Default | Supported values |
20
+ | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
21
+ | `ferrum.options` | `WAYFARER_FERRUM_OPTIONS` | Ferrum options. | `{}` | Hashes |
22
+
23
+ ## `Wayfarer.config.selenium`
24
+
25
+ | Runtime config key | Environment variable | Description | Default | Supported values |
26
+ | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
27
+ | `selenium.driver` | `WAYFARER_SELENIUM_DRIVER` | Selenium driver to use. | `:chrome` | Symbols |
28
+ | `selenium.options` | `WAYFARER_SELENIUM_OPTIONS` | Selenium options. | `{}` | Hashes |
29
+ | `selenium.client_timeout` | `WAYFARER_SELENIUM_CLIENT_TIMEOUT` | Selenium client timeout in seconds. | 60 | Integers |
30
+
31
+ ## `Wayfarer.config.redis`
32
+
33
+ | Runtime config key | Environment variable | Description | Default | Supported values |
34
+ | ---------------------- | ------------------------------------ | ------------------------------------------- | ------------------------------------------ | ----------------------------------- |
35
+ | `redis.url` | `WAYFARER_REDIS_URL` | Redis URL to connect to. | http://localhost:6379 | Strings |
36
+ | `redis.factory` | n/a | Redis factory lambda. | ` ->(redis) { ::Redis.new(url: redis.url)` | Lambdas |
37
+
38
+ ## `Wayfarer.config.capybara`
39
+
40
+ | Runtime config key | Environment variable | Description | Default | Supported values |
41
+ | ---------------------- | ------------------------------------ | ------------------------------------------- | -------------------------------- | ----------------------------------- |
42
+ | `capybara.driver` | `WAYFARER_CAPYBARA_DRIVER` | The Capybara driver to use. | n/a | Symbols |
@@ -10,26 +10,26 @@ with the following syntaxes:
10
10
 
11
11
  ## Variables
12
12
 
13
- ### `WAYFARER_ADAPTER`
14
- : Either `ferrum`, `selenium` or `net_http`.
13
+ ### `WAYFARER_AGENT`
14
+ : Either `ferrum`, `selenium` or `http`.
15
15
 
16
16
  * Type: String
17
- * Key: `config.adapter`
18
- * Default value: `:net_http`
17
+ * Key: `config.agent`
18
+ * Default value: `:http`
19
19
 
20
20
  ### `WAYFARER_POOL_SIZE`
21
- : Number of network adapters to maintain.
21
+ : Number of user agents to maintain.
22
22
 
23
23
  * Type: Integer
24
24
  * Key: `config.pool_size`
25
25
  * Default value: `1`
26
26
 
27
27
  ### `WAYFARER_POOL_TIMEOUT`
28
- : How long a network adapter may remain checked out until the owning job
28
+ : How long a user agent may remain checked out until the owning job
29
29
  fails.
30
30
 
31
31
  * Type: Integer
32
- * Key: `config.adapter_pool_timeout`
32
+ * Key: `config.agent_pool_timeout`
33
33
  * Default value: `1`
34
34
 
35
35
  ---
@@ -43,32 +43,30 @@ with the following syntaxes:
43
43
 
44
44
  ---
45
45
 
46
- ### `WAYFARER_SELENIUM_ARGV`
47
- : Argument list passed to `Selenium::WebDriver.for`.
46
+ ### `WAYFARER_SELENIUM_DRIVER`
47
+ : Driver passed to `Selenium::WebDriver.for`.
48
48
 
49
- * Type: Array
50
- * Key: `config.selenium_argv`
51
- * Default value: `[]`
49
+ * Type: Symbol
50
+ * Key: `config.selenium_driver`
51
+ * Default value: `:chrome`
52
52
 
53
- ## Examples
54
-
55
- !!! example "Foobar"
56
-
57
- For example, to run Google Chrome in foreground with Ferrum:
58
-
59
- ```
60
- Wayfarer.config.adapter = :ferrum
61
- Wayfarer.ferrum_options = { headless: false, url: "http://chrome:3000" }
62
- ```
53
+ ---
63
54
 
55
+ ### `WAYFARER_SELENIUM_OPTIONS`
56
+ : Options passed to `Selenium::WebDriver.for`.
64
57
 
65
- !!! example "Run Google Chrome in foreground with Ferrum"
58
+ * Type: Hash
59
+ * Key: `config.selenium_options`
60
+ * Default value: `{}`
61
+
62
+ ---
66
63
 
67
- To construct the
64
+ ### `WAYFARER_SELENIUM_CLIENT_TIMEOUT`
65
+ : Selenium HTTP client timeout (seconds).
68
66
 
69
- ```
70
- WAYFARER_FERRUM_OPTIONS=headless:false,url:http://chrome:3000
71
- ```
67
+ * Type: Integer
68
+ * Key: `config.selenium_client_timeout`
69
+ * Default value: `60`
72
70
 
73
71
  ---
74
72
 
data/lib/wayfarer/base.rb CHANGED
@@ -5,22 +5,7 @@ module Wayfarer
5
5
  include Wayfarer::Middleware::Worker
6
6
  extend Forwardable
7
7
 
8
- class << self
9
- def after_batch_callbacks
10
- @after_batch_callbacks ||= []
11
- end
12
-
13
- def after_batch(&block)
14
- after_batch_callbacks.push(block)
15
- end
16
-
17
- def run_after_batch_callbacks
18
- after_batch_callbacks.each(&:call)
19
- end
20
- end
21
-
22
- after_enqueue do |job|
23
- task = job.arguments.first
8
+ after_enqueue do |_job|
24
9
  task.counter.increment
25
10
  end
26
11
 
@@ -30,6 +15,7 @@ module Wayfarer
30
15
 
31
16
  def self.retry_on(*argv)
32
17
  super(*argv) do |job, error|
18
+ job.task.barrier.seen?(job.task.url)
33
19
  GC.new(job).run
34
20
  yield job, error if block_given?
35
21
  end
@@ -37,6 +23,7 @@ module Wayfarer
37
23
 
38
24
  def self.discard_on(*argv)
39
25
  super(*argv) do |job, error|
26
+ job.task.barrier.seen?(job.task.url)
40
27
  GC.new(job).run
41
28
  yield job, error if block_given?
42
29
  end
@@ -48,7 +35,6 @@ module Wayfarer
48
35
 
49
36
  def retry_job(...)
50
37
  super(...) # increments the counter by re-enqueuing the job
51
- task = arguments.first
52
38
  task.counter.decrement
53
39
  end
54
40
 
@@ -56,5 +42,9 @@ module Wayfarer
56
42
  task.job = self
57
43
  chain.call(task)
58
44
  end
45
+
46
+ def task
47
+ arguments.first
48
+ end
59
49
  end
60
50
  end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Callbacks
5
+ TERMINATOR = ->(_target, result) { result.call == false }
6
+ OPTIONS = { terminator: TERMINATOR, skip_after_callbacks_if_terminated: true }.freeze
7
+
8
+ ConditionalCallback = Struct.new(:job, :filters) do
9
+ def run(method, &block)
10
+ return if only && !applies?(only)
11
+ return if except && applies?(except)
12
+
13
+ return job.send(method) if method
14
+
15
+ job.instance_eval(&block)
16
+ end
17
+
18
+ private
19
+
20
+ def applies?(condition)
21
+ case condition
22
+ when Symbol then condition == action
23
+ when Enumerable then condition&.include?(action)
24
+ end
25
+ end
26
+
27
+ def only
28
+ filters[:only]
29
+ end
30
+
31
+ def except
32
+ filters[:except]
33
+ end
34
+
35
+ def action
36
+ task.metadata.action
37
+ end
38
+
39
+ def task
40
+ job.task
41
+ end
42
+ end
43
+
44
+ def self.included(base)
45
+ base.include(ActiveSupport::Callbacks)
46
+ base.extend(ClassMethods)
47
+
48
+ base.class_eval do
49
+ define_callbacks(:fetch, OPTIONS)
50
+ define_callbacks(:action, OPTIONS)
51
+ define_callbacks(:batch, OPTIONS)
52
+
53
+ define(:fetch, :before)
54
+ define(:action, :before)
55
+ define(:batch, :after)
56
+ end
57
+ end
58
+
59
+ module ClassMethods
60
+ private
61
+
62
+ def define(name, stage)
63
+ define_singleton_method([stage, name].join("_")) do |method = nil, **filters, &block|
64
+ set_callback(name, stage, **filters) do |job|
65
+ ConditionalCallback.new(job, filters).run(method, &block)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -12,12 +12,16 @@ module Wayfarer
12
12
  private
13
13
 
14
14
  def mock_redis
15
- Wayfarer.config.redis_factory = -> { MockRedis.new }
15
+ Wayfarer.config.redis.factory = ->(_) { MockRedis.new }
16
16
  end
17
17
 
18
18
  def load_environment
19
19
  Wayfarer::CLI::Runner.loader.setup
20
20
  end
21
+
22
+ def free_agent_pool
23
+ Wayfarer::Networking::Pool.instance.free
24
+ end
21
25
  end
22
26
  end
23
27
  end
@@ -11,12 +11,14 @@ module Wayfarer
11
11
  load_environment
12
12
  mock_redis if options[:mock_redis]
13
13
 
14
- url = URI(url)
14
+ url = Addressable::URI.parse(url)
15
15
  job = job.classify.constantize.new
16
16
  task = Wayfarer::Task.new(url, "tmp")
17
17
  job.arguments.push(task)
18
18
  job.perform(task)
19
19
  GC.new(job).run
20
+
21
+ free_agent_pool
20
22
  end
21
23
 
22
24
  desc "enqueue JOB URL",
@@ -26,7 +28,7 @@ module Wayfarer
26
28
  load_environment
27
29
  mock_redis if options[:mock_redis]
28
30
 
29
- url = URI(url)
31
+ url = Addressable::URI.parse(url)
30
32
  job = job.classify.constantize
31
33
  job.crawl_later(url, batch: options[:batch])
32
34
  end
@@ -41,7 +43,7 @@ module Wayfarer
41
43
  load_environment
42
44
  mock_redis if options[:mock_redis]
43
45
 
44
- url = URI(url)
46
+ url = Addressable::URI.parse(url)
45
47
  job = job.classify.constantize
46
48
 
47
49
  job.queue_adapter = ActiveJob::QueueAdapters::AsyncAdapter.new(min_threads: options[:min_threads],
@@ -52,6 +54,8 @@ module Wayfarer
52
54
  job.crawl_later(url, batch: options[:batch])
53
55
 
54
56
  sleep(1) while executor.scheduled_task_count > executor.completed_task_count
57
+
58
+ free_agent_pool
55
59
  end
56
60
  end
57
61
  end
@@ -9,7 +9,7 @@ module Wayfarer
9
9
  "Invoke JOB's router with URL"
10
10
  def result(job, url)
11
11
  load_environment
12
- url = URI(url)
12
+ url = Addressable::URI.parse(url)
13
13
  job = job.classify.constantize
14
14
  puts Wayfarer::Routing::PathFinder.result(job.route, url)
15
15
  end
@@ -18,7 +18,7 @@ module Wayfarer
18
18
  "Visualize JOB's routing tree for URL"
19
19
  def tree(job, url)
20
20
  load_environment
21
- url = URI(url)
21
+ url = Addressable::URI.parse(url)
22
22
  job = job.classify.constantize
23
23
  Wayfarer::CLI::RoutePrinter.print(job.route, url)
24
24
  end
@@ -77,19 +77,19 @@ module Wayfarer
77
77
  def matcher_label(route)
78
78
  return "Target" if route.is_a?(Wayfarer::Routing::TargetRoute)
79
79
 
80
- route.matcher.class.name.demodulize.delete_suffix("Matcher")
80
+ route.matcher.class.name.demodulize
81
81
  end
82
82
 
83
83
  def options(route)
84
84
  return "" if route.is_a?(Wayfarer::Routing::RootRoute)
85
85
 
86
86
  case (matcher = route.matcher)
87
- when Wayfarer::Routing::HostMatcher then matcher.host
88
- when Wayfarer::Routing::PathMatcher then matcher.path
89
- when Wayfarer::Routing::QueryMatcher then matcher.fields
90
- when Wayfarer::Routing::CustomMatcher then "##{route.action}"
91
- when Wayfarer::Routing::SchemeMatcher then matcher.scheme
92
- when Wayfarer::Routing::SuffixMatcher then matcher.suffix
87
+ when Wayfarer::Routing::Matchers::Host then matcher.host
88
+ when Wayfarer::Routing::Matchers::Path then matcher.path
89
+ when Wayfarer::Routing::Matchers::Query then matcher.fields
90
+ when Wayfarer::Routing::Matchers::Custom then "##{route.action}"
91
+ when Wayfarer::Routing::Matchers::Scheme then matcher.scheme
92
+ when Wayfarer::Routing::Matchers::Suffix then matcher.suffix
93
93
  end
94
94
  end
95
95
 
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Config
5
+ Capybara = Struct.new(driver: {
6
+ env_key: "WAYFARER_CAPYBARA_DRIVER",
7
+ type: Symbol
8
+ })
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Config
5
+ Ferrum = Struct.new(options: {
6
+ env_key: "WAYFARER_FERRUM_OPTIONS",
7
+ type: Hash,
8
+ default: {}
9
+ })
10
+ end
11
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Config
5
+ Networking = Struct.new(agent: {
6
+ env_key: "WAYFARER_NETWORK_AGENT",
7
+ type: Symbol,
8
+ default: :http
9
+ },
10
+ pool_size: {
11
+ env_key: "WAYFARER_NETWORK_POOL_SIZE",
12
+ type: Integer,
13
+ default: 3
14
+ },
15
+ pool_timeout: {
16
+ env_key: "WAYFARER_NETWORK_POOL_TIMEOUT",
17
+ type: Integer,
18
+ default: 10
19
+ },
20
+ http_headers: {
21
+ env_key: "WAYFARER_NETWORK_HTTP_HEADERS",
22
+ type: Hash,
23
+ default: {}
24
+ })
25
+ end
26
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Config
5
+ Redis = Struct.new(url: {
6
+ env_key: "WAYFARER_REDIS_URL",
7
+ type: String,
8
+ default: "redis://localhost:6379"
9
+ },
10
+ factory: {
11
+ default: ->(redis) { ::Redis.new(url: redis.url) }
12
+ })
13
+ end
14
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Config
5
+ Root = Struct.new(ferrum: { default: Wayfarer::Config::Ferrum.new },
6
+ network: { default: Wayfarer::Config::Networking.new },
7
+ redis: { default: Wayfarer::Config::Redis.new },
8
+ selenium: { default: Wayfarer::Config::Selenium.new },
9
+ capybara: { default: Wayfarer::Config::Capybara.new })
10
+ end
11
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Config
5
+ Selenium = Struct.new(driver: {
6
+ env_key: "WAYFARER_SELENIUM_DRIVER",
7
+ type: Symbol,
8
+ default: :chrome
9
+ },
10
+ options: {
11
+ env_key: "WAYFARER_SELENIUM_OPTIONS",
12
+ type: Hash,
13
+ default: {}
14
+ },
15
+ client_timeout: {
16
+ env_key: "WAYFARER_SELENIUM_CLIENT_TIMEOUT",
17
+ type: Integer,
18
+ default: 60 # seconds
19
+ })
20
+ end
21
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Config
5
+ module Strconv
6
+ module_function
7
+
8
+ def parse(str, type = nil)
9
+ return primitive(str) unless type
10
+
11
+ case type.name
12
+ when "Hash" then hash(str)
13
+ when "Array" then array(str)
14
+ when "Symbol" then str.to_sym
15
+ when "Integer" then Integer(str)
16
+ else str
17
+ end
18
+ end
19
+
20
+ def hash(str)
21
+ array(str).reduce({}) do |acc, pair|
22
+ k, v = pair.split(":", 2)
23
+ next acc unless k && v
24
+
25
+ acc.merge({ parse(k, Symbol) => primitive(v) })
26
+ end
27
+ end
28
+
29
+ def array(str)
30
+ str.split(",").map(&:strip)
31
+ end
32
+
33
+ def primitive(str)
34
+ return true if str == "true"
35
+ return false if str == "false"
36
+
37
+ begin
38
+ parse(str, Integer)
39
+ rescue StandardError
40
+ str
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Config
5
+ class Struct
6
+ module ClassMethods
7
+ attr_accessor :members
8
+ end
9
+
10
+ module InstanceMethods
11
+ extend Forwardable
12
+
13
+ delegate members: "self.class"
14
+
15
+ attr_reader :env
16
+
17
+ def initialize(env = ENV)
18
+ @env = env
19
+
20
+ define_writers
21
+ define_readers
22
+ end
23
+
24
+ private
25
+
26
+ def define_writers
27
+ members.each { |key, _| define_writer(key) }
28
+ end
29
+
30
+ def define_writer(key)
31
+ define_singleton_method(:"#{key}=") do |val|
32
+ set(key, val)
33
+ end
34
+ end
35
+
36
+ def define_readers
37
+ members.each { |key, options| define_reader(key, **options) }
38
+ end
39
+
40
+ def define_reader(key, env_key: nil, type: nil, default: nil)
41
+ define_singleton_method(key.to_sym) do
42
+ get(key) || set(key, get(key) || env_val(env_key, type) || default)
43
+ end
44
+ end
45
+
46
+ def env_val(env_key, type)
47
+ return nil unless env_key
48
+ return nil unless env.key?(env_key)
49
+
50
+ Strconv.parse(env[env_key], type)
51
+ end
52
+
53
+ def get(key)
54
+ instance_variable_get(:"@#{key}")
55
+ end
56
+
57
+ def set(key, val)
58
+ instance_variable_set(:"@#{key}", val)
59
+ end
60
+ end
61
+
62
+ def self.new(members)
63
+ Class.new do
64
+ include InstanceMethods
65
+ extend ClassMethods
66
+
67
+ self.members = members
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
data/lib/wayfarer/gc.rb CHANGED
@@ -3,17 +3,13 @@
3
3
  module Wayfarer
4
4
  GC = Struct.new(:job) do
5
5
  def run
6
+ task = job.task
7
+
6
8
  return unless task.counter.decrement <= 0
7
9
 
8
10
  task.barrier.reset!
9
11
  task.counter.reset!
10
- job.class.run_after_batch_callbacks
11
- end
12
-
13
- private
14
-
15
- def task
16
- job.arguments.first
12
+ job.run_callbacks(:batch)
17
13
  end
18
14
  end
19
15
  end
@@ -15,10 +15,14 @@ module Wayfarer
15
15
  def call(task)
16
16
  self.task = task
17
17
 
18
- pool.with do |adapter|
19
- task.metadata.adapter = adapter
18
+ pool.with do |agent|
19
+ task.metadata.agent = agent
20
20
 
21
- case result = adapter.fetch(task.url)
21
+ result = task.job.run_callbacks :fetch do
22
+ agent.fetch(task.url)
23
+ end
24
+
25
+ case result
22
26
  when Networking::Result::Redirect
23
27
  stage(result.redirect_url)
24
28
  when Networking::Result::Success
@@ -6,12 +6,12 @@ module Wayfarer
6
6
  def call(task)
7
7
  route = task.job.class.route
8
8
 
9
- case result = route.invoke(URI(task.url))
9
+ case result = route.invoke(Addressable::URI.parse(task.url))
10
10
  when Routing::Result::Mismatch
11
11
  return
12
12
  when Routing::Result::Match
13
13
  task.metadata.action = result.action
14
- task.metadata.params = result.params
14
+ task.metadata.params = ActiveSupport::HashWithIndifferentAccess.new(result.params)
15
15
  end
16
16
 
17
17
  yield if block_given?