wayfarer 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. checksums.yaml +4 -4
  2. data/.env +17 -0
  3. data/.github/workflows/lint.yaml +8 -6
  4. data/.github/workflows/release.yaml +4 -3
  5. data/.github/workflows/tests.yaml +5 -14
  6. data/.gitignore +2 -2
  7. data/.rubocop.yml +31 -0
  8. data/.vale.ini +6 -3
  9. data/Dockerfile +3 -2
  10. data/Gemfile +21 -0
  11. data/Gemfile.lock +233 -128
  12. data/Rakefile +7 -0
  13. data/docker-compose.yml +13 -14
  14. data/docs/guides/callbacks.md +3 -1
  15. data/docs/guides/configuration.md +10 -35
  16. data/docs/guides/development.md +67 -0
  17. data/docs/guides/handlers.md +7 -7
  18. data/docs/guides/jobs.md +54 -11
  19. data/docs/guides/networking/custom_adapters.md +31 -10
  20. data/docs/guides/pages.md +24 -22
  21. data/docs/guides/routing.md +116 -34
  22. data/docs/guides/tasks.md +30 -10
  23. data/docs/guides/tutorial.md +23 -17
  24. data/docs/guides/user_agents.md +11 -9
  25. data/lib/wayfarer/base.rb +9 -8
  26. data/lib/wayfarer/batch_completion.rb +18 -14
  27. data/lib/wayfarer/callbacks.rb +14 -14
  28. data/lib/wayfarer/cli/route_printer.rb +78 -96
  29. data/lib/wayfarer/cli.rb +12 -30
  30. data/lib/wayfarer/gc.rb +6 -1
  31. data/lib/wayfarer/kv.rb +28 -0
  32. data/lib/wayfarer/middleware/chain.rb +7 -1
  33. data/lib/wayfarer/middleware/content_type.rb +20 -15
  34. data/lib/wayfarer/middleware/dedup.rb +9 -3
  35. data/lib/wayfarer/middleware/dispatch.rb +7 -2
  36. data/lib/wayfarer/middleware/normalize.rb +4 -12
  37. data/lib/wayfarer/middleware/router.rb +1 -1
  38. data/lib/wayfarer/middleware/uri_parser.rb +4 -3
  39. data/lib/wayfarer/networking/context.rb +12 -1
  40. data/lib/wayfarer/networking/ferrum.rb +1 -4
  41. data/lib/wayfarer/networking/follow.rb +2 -1
  42. data/lib/wayfarer/networking/pool.rb +12 -7
  43. data/lib/wayfarer/networking/selenium.rb +15 -7
  44. data/lib/wayfarer/page.rb +0 -2
  45. data/lib/wayfarer/parsing/xml.rb +1 -1
  46. data/lib/wayfarer/parsing.rb +2 -5
  47. data/lib/wayfarer/redis/barrier.rb +15 -2
  48. data/lib/wayfarer/redis/counter.rb +1 -2
  49. data/lib/wayfarer/routing/dsl.rb +166 -31
  50. data/lib/wayfarer/routing/hash_stack.rb +33 -0
  51. data/lib/wayfarer/routing/matchers/custom.rb +8 -5
  52. data/lib/wayfarer/routing/matchers/{suffix.rb → empty_params.rb} +2 -6
  53. data/lib/wayfarer/routing/matchers/host.rb +15 -9
  54. data/lib/wayfarer/routing/matchers/path.rb +11 -33
  55. data/lib/wayfarer/routing/matchers/query.rb +41 -17
  56. data/lib/wayfarer/routing/matchers/result.rb +12 -0
  57. data/lib/wayfarer/routing/matchers/scheme.rb +13 -5
  58. data/lib/wayfarer/routing/matchers/url.rb +13 -5
  59. data/lib/wayfarer/routing/path_consumer.rb +130 -0
  60. data/lib/wayfarer/routing/path_finder.rb +151 -23
  61. data/lib/wayfarer/routing/result.rb +1 -1
  62. data/lib/wayfarer/routing/root_route.rb +14 -2
  63. data/lib/wayfarer/routing/route.rb +71 -14
  64. data/lib/wayfarer/routing/serializable.rb +28 -0
  65. data/lib/wayfarer/routing/sub_route.rb +53 -0
  66. data/lib/wayfarer/routing/target_route.rb +17 -1
  67. data/lib/wayfarer/stringify.rb +1 -2
  68. data/lib/wayfarer/task.rb +3 -5
  69. data/lib/wayfarer/uri/normalization.rb +120 -0
  70. data/lib/wayfarer.rb +50 -10
  71. data/mise.toml +2 -0
  72. data/mkdocs.yml +8 -17
  73. data/rake/lint.rake +0 -96
  74. data/rake/release.rake +5 -11
  75. data/rake/tests.rake +8 -4
  76. data/requirements.txt +1 -1
  77. data/spec/factories/job.rb +8 -0
  78. data/spec/factories/middleware.rb +2 -2
  79. data/spec/factories/path_finder.rb +11 -0
  80. data/spec/factories/redis.rb +19 -0
  81. data/spec/factories/task.rb +39 -1
  82. data/spec/spec_helpers.rb +50 -57
  83. data/spec/support/active_job_helpers.rb +8 -0
  84. data/spec/support/integration_helpers.rb +21 -0
  85. data/spec/support/redis_helpers.rb +9 -0
  86. data/spec/support/test_app.rb +64 -43
  87. data/spec/{base_spec.rb → wayfarer/base_spec.rb} +32 -36
  88. data/spec/wayfarer/batch_completion_spec.rb +142 -0
  89. data/spec/wayfarer/cli/job_spec.rb +88 -0
  90. data/spec/wayfarer/cli/routing_spec.rb +322 -0
  91. data/spec/{cli → wayfarer/cli}/version_spec.rb +1 -1
  92. data/spec/wayfarer/gc_spec.rb +29 -0
  93. data/spec/{handler_spec.rb → wayfarer/handler_spec.rb} +1 -3
  94. data/spec/{integration → wayfarer/integration}/callbacks_spec.rb +9 -6
  95. data/spec/wayfarer/integration/content_type_spec.rb +37 -0
  96. data/spec/wayfarer/integration/custom_routing_spec.rb +51 -0
  97. data/spec/{integration → wayfarer/integration}/gc_spec.rb +9 -13
  98. data/spec/{integration → wayfarer/integration}/handler_spec.rb +9 -10
  99. data/spec/{integration → wayfarer/integration}/page_spec.rb +8 -6
  100. data/spec/{integration → wayfarer/integration}/params_spec.rb +4 -4
  101. data/spec/{integration → wayfarer/integration}/parsing_spec.rb +7 -33
  102. data/spec/wayfarer/integration/retry_spec.rb +112 -0
  103. data/spec/{integration → wayfarer/integration}/stage_spec.rb +5 -5
  104. data/spec/{middleware → wayfarer/middleware}/batch_completion_spec.rb +4 -5
  105. data/spec/{middleware → wayfarer/middleware}/chain_spec.rb +20 -15
  106. data/spec/{middleware → wayfarer/middleware}/content_type_spec.rb +18 -21
  107. data/spec/{middleware → wayfarer/middleware}/controller_spec.rb +22 -20
  108. data/spec/wayfarer/middleware/dedup_spec.rb +66 -0
  109. data/spec/wayfarer/middleware/normalize_spec.rb +32 -0
  110. data/spec/{middleware → wayfarer/middleware}/router_spec.rb +18 -20
  111. data/spec/{middleware → wayfarer/middleware}/stage_spec.rb +11 -10
  112. data/spec/wayfarer/middleware/uri_parser_spec.rb +63 -0
  113. data/spec/{middleware → wayfarer/middleware}/user_agent_spec.rb +34 -32
  114. data/spec/wayfarer/networking/capybara_spec.rb +13 -0
  115. data/spec/{networking → wayfarer/networking}/context_spec.rb +46 -38
  116. data/spec/wayfarer/networking/ferrum_spec.rb +13 -0
  117. data/spec/{networking → wayfarer/networking}/follow_spec.rb +9 -4
  118. data/spec/wayfarer/networking/http_spec.rb +12 -0
  119. data/spec/{networking → wayfarer/networking}/pool_spec.rb +11 -9
  120. data/spec/wayfarer/networking/selenium_spec.rb +12 -0
  121. data/spec/{networking → wayfarer/networking}/strategy.rb +33 -54
  122. data/spec/{page_spec.rb → wayfarer/page_spec.rb} +3 -3
  123. data/spec/{parsing → wayfarer/parsing}/json_spec.rb +1 -1
  124. data/spec/{parsing/xml_spec.rb → wayfarer/parsing/xml_parse_spec.rb} +4 -3
  125. data/spec/{redis → wayfarer/redis}/barrier_spec.rb +5 -4
  126. data/spec/wayfarer/redis/counter_spec.rb +34 -0
  127. data/spec/{redis → wayfarer/redis}/pool_spec.rb +3 -2
  128. data/spec/{routing → wayfarer/routing}/dsl_spec.rb +12 -22
  129. data/spec/wayfarer/routing/hash_stack_spec.rb +63 -0
  130. data/spec/wayfarer/routing/integration_spec.rb +101 -0
  131. data/spec/wayfarer/routing/matchers/custom_spec.rb +39 -0
  132. data/spec/wayfarer/routing/matchers/host_spec.rb +56 -0
  133. data/spec/wayfarer/routing/matchers/matcher.rb +17 -0
  134. data/spec/wayfarer/routing/matchers/path_spec.rb +43 -0
  135. data/spec/wayfarer/routing/matchers/query_spec.rb +123 -0
  136. data/spec/wayfarer/routing/matchers/scheme_spec.rb +45 -0
  137. data/spec/wayfarer/routing/matchers/url_spec.rb +33 -0
  138. data/spec/wayfarer/routing/path_consumer_spec.rb +123 -0
  139. data/spec/wayfarer/routing/path_finder_spec.rb +409 -0
  140. data/spec/wayfarer/routing/root_route_spec.rb +51 -0
  141. data/spec/wayfarer/routing/route_spec.rb +74 -0
  142. data/spec/wayfarer/routing/sub_route_spec.rb +103 -0
  143. data/spec/wayfarer/uri/normalization_spec.rb +98 -0
  144. data/spec/wayfarer_spec.rb +2 -2
  145. data/wayfarer.gemspec +17 -28
  146. metadata +768 -246
  147. data/.rbenv-gemsets +0 -1
  148. data/.ruby-version +0 -1
  149. data/RELEASING.md +0 -17
  150. data/docs/cookbook/user_agent.md +0 -7
  151. data/docs/design.md +0 -36
  152. data/docs/guides/jobs/error_handling.md +0 -40
  153. data/docs/reference/configuration.md +0 -36
  154. data/spec/batch_completion_spec.rb +0 -104
  155. data/spec/cli/job_spec.rb +0 -74
  156. data/spec/cli/routing_spec.rb +0 -101
  157. data/spec/fixtures/dummy_job.rb +0 -9
  158. data/spec/gc_spec.rb +0 -17
  159. data/spec/integration/content_type_spec.rb +0 -145
  160. data/spec/integration/routing_spec.rb +0 -18
  161. data/spec/middleware/dedup_spec.rb +0 -71
  162. data/spec/middleware/dispatch_spec.rb +0 -59
  163. data/spec/middleware/normalize_spec.rb +0 -60
  164. data/spec/middleware/uri_parser_spec.rb +0 -53
  165. data/spec/networking/capybara_spec.rb +0 -12
  166. data/spec/networking/ferrum_spec.rb +0 -12
  167. data/spec/networking/http_spec.rb +0 -12
  168. data/spec/networking/selenium_spec.rb +0 -12
  169. data/spec/redis/counter_spec.rb +0 -44
  170. data/spec/routing/integration_spec.rb +0 -110
  171. data/spec/routing/matchers/custom_spec.rb +0 -31
  172. data/spec/routing/matchers/host_spec.rb +0 -49
  173. data/spec/routing/matchers/path_spec.rb +0 -43
  174. data/spec/routing/matchers/query_spec.rb +0 -137
  175. data/spec/routing/matchers/scheme_spec.rb +0 -25
  176. data/spec/routing/matchers/suffix_spec.rb +0 -41
  177. data/spec/routing/matchers/uri_spec.rb +0 -27
  178. data/spec/routing/path_finder_spec.rb +0 -33
  179. data/spec/routing/root_route_spec.rb +0 -29
  180. data/spec/routing/route_spec.rb +0 -43
  181. data/docs/{reference → guides}/cli.md +0 -0
  182. data/spec/{stringify_spec.rb → wayfarer/stringify_spec.rb} +2 -2
  183. /data/spec/{task_spec.rb → wayfarer/task_spec.rb} +0 -0
@@ -5,33 +5,90 @@ module Wayfarer
5
5
  class Route
6
6
  include DSL
7
7
  include Stringify
8
- extend Forwardable
8
+ include Serializable
9
9
 
10
- attr_reader :children
10
+ attr_reader :matcher,
11
+ :parent,
12
+ :children
11
13
 
12
- attr_accessor :matcher,
13
- :parent,
14
- :action,
15
- :path_offset
14
+ stringify :matcher
16
15
 
17
- stringify :matcher,
18
- :action,
19
- :path_offset
16
+ EMPTY_PARAMS = {}.freeze
20
17
 
21
- def initialize(matcher = Matchers::Custom.new { children.any? }, path_offset = "/")
18
+ def initialize(
19
+ parent: nil,
20
+ matcher: nil,
21
+ action: nil,
22
+ **options,
23
+ &block
24
+ )
25
+ raise "missing parent" unless parent || is_a?(RootRoute)
26
+
27
+ @parent = parent
22
28
  @matcher = matcher
29
+ @action = action
30
+
23
31
  @children = []
24
- @path_offset = path_offset
32
+
33
+ leaf = options.reduce(self) { |acc, (key, val)| acc.public_send(key, val) }
34
+ leaf.instance_eval(&block) if block
25
35
  end
26
36
 
27
- # Accepts a visitor for in-order traversal.
37
+ # @return [true, false]
38
+ def root?
39
+ parent.nil?
40
+ end
41
+
42
+ # @return [true, false]
43
+ def leaf?
44
+ children.empty?
45
+ end
46
+
47
+ # @return [false]
48
+ def target?
49
+ false
50
+ end
51
+
52
+ # Accepts a visitor for pre-order traversal.
28
53
  def accept(visitor)
29
- return unless visitor.visit(self)
54
+ visitor.enter(self)
55
+
56
+ return visitor.leave unless visitor.visit(self)
30
57
 
31
58
  children.each { |child| child.accept(visitor) }
59
+
60
+ visitor.leave
32
61
  end
33
62
 
34
- delegate match: :matcher
63
+ # @param [path_finder] PathFinder
64
+ # @return [Hash]
65
+ def params(path_finder)
66
+ matcher&.params(path_finder) || EMPTY_PARAMS
67
+ end
68
+
69
+ # @param [_path_finder] PathFinder
70
+ # @return [nil, Symbol, Wayfarer::Handler]
71
+ def action(_path_finder)
72
+ @action
73
+ end
74
+
75
+ # @param [path_finder] PathFinder
76
+ # @return [Result::Match, Result::Mismatch, Object]
77
+ def match(path_finder)
78
+ evaluate(path_finder)
79
+ end
80
+
81
+ # @param [path_finder] PathFinder
82
+ # @return [true, false, Wayfarer::Routing::Route]
83
+ def evaluate(path_finder)
84
+ matcher.evaluate(path_finder)
85
+ end
86
+
87
+ def to_h
88
+ return {} unless matcher
89
+
90
+ { matcher.class.name.demodulize.underscore => matcher.to_h }
91
+ end
35
92
  end
36
93
  end
37
94
  end
@@ -0,0 +1,28 @@
1
+ # lib/wayfarer/routing/hash_serialisable.rb
2
+ # frozen_string_literal: true
3
+
4
+ module Wayfarer
5
+ module Routing
6
+ module Serializable
7
+ def to_h
8
+ as_hash(self)
9
+ end
10
+
11
+ private
12
+
13
+ def as_hash(route)
14
+ {
15
+ matcher: matcher_name(route),
16
+ action: route.action(nil),
17
+ children: route.children.map { |child| as_hash(child) }
18
+ }.tap(&:compact!)
19
+ end
20
+
21
+ def matcher_name(route)
22
+ return nil unless route.matcher
23
+
24
+ route.matcher.class.name.split("::").last
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module Routing
5
+ class SubRoute < Route
6
+ def evaluate(path_finder)
7
+ handle(
8
+ path_finder,
9
+ match: ->(_result) { true },
10
+ mismatch: ->(_result) { false }
11
+ )
12
+ end
13
+
14
+ def params(path_finder)
15
+ handle(
16
+ path_finder,
17
+ match: lambda(&:params),
18
+ mismatch: ->(_result) { EMPTY_PARAMS }
19
+ )
20
+ end
21
+
22
+ def action(path_finder)
23
+ handle(
24
+ path_finder,
25
+ match: lambda(&:action),
26
+ mismatch: ->(_result) {}
27
+ )
28
+ end
29
+
30
+ private
31
+
32
+ def handle(path_finder, match:, mismatch:)
33
+ case root = evaluate_matcher(path_finder)
34
+ when Wayfarer::Routing::RootRoute
35
+ case result = sub_result(root, path_finder)
36
+ when Wayfarer::Routing::Result::Match then match.call(result)
37
+ when Wayfarer::Routing::Result::Mismatch then mismatch.call(result)
38
+ else raise "invalid result: #{result.inspect}"
39
+ end
40
+ else raise "#{route.inspect} is not a root route"
41
+ end
42
+ end
43
+
44
+ def evaluate_matcher(path_finder)
45
+ path_finder[matcher] ||= matcher.evaluate(path_finder)
46
+ end
47
+
48
+ def sub_result(route, path_finder)
49
+ path_finder[route] ||= Wayfarer::Routing::PathFinder.sub_result(route, path_finder)
50
+ end
51
+ end
52
+ end
53
+ end
@@ -2,6 +2,22 @@
2
2
 
3
3
  module Wayfarer
4
4
  module Routing
5
- class TargetRoute < Route; end
5
+ class TargetRoute < Route
6
+ def evaluate(_path_finder)
7
+ true
8
+ end
9
+
10
+ def target?
11
+ true
12
+ end
13
+
14
+ def to_h
15
+ { action: case @action
16
+ when Wayfarer::Handler then { class: @action.class.name }
17
+ when Array then { handler: @action.first.class.name, action: @action.second }
18
+ else @action
19
+ end }
20
+ end
21
+ end
6
22
  end
7
23
  end
@@ -25,8 +25,7 @@ module Wayfarer
25
25
  if self.class.stringified_attributes.any?
26
26
  attrs = self.class
27
27
  .stringified_attributes
28
- .map { |attr| [attr, public_send(attr)] }
29
- .to_h
28
+ .to_h { |attr| [attr, public_send(attr)] }
30
29
  .map { |k, v| "#{k}=#{v.inspect}" }
31
30
  .join(", ")
32
31
 
data/lib/wayfarer/task.rb CHANGED
@@ -6,20 +6,18 @@ module Wayfarer
6
6
  # @!attribute [r] batch
7
7
  # @return [String] the batch the task belongs to
8
8
  class Task
9
- extend Forwardable
9
+ include KV
10
10
  include Stringify
11
11
 
12
- attr_reader :url, :batch
12
+ attr_reader :url,
13
+ :batch
13
14
 
14
15
  stringify :url, :batch
15
16
 
16
- delegate %i([] []=) => :@ephemeral
17
-
18
17
  # @!visibility private
19
18
  def initialize(url, batch)
20
19
  @url = url
21
20
  @batch = batch
22
- @ephemeral = {}
23
21
  end
24
22
 
25
23
  # @!visibility private
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wayfarer
4
+ module URI
5
+ # HTTP(S) URL normalization.
6
+ module Normalization
7
+ InvalidURIError = Class.new(StandardError)
8
+
9
+ # Raised when URI is relative.
10
+ RelativeURIError = Class.new(InvalidURIError)
11
+
12
+ # Raised when URI scheme is not hypertext.
13
+ NoHypertextError = Class.new(InvalidURIError)
14
+
15
+ # Raised when URI has no host.
16
+ NoHostError = Class.new(InvalidURIError)
17
+
18
+ extend self
19
+
20
+ # Normalizes `uri` in-place.
21
+ # @param uri [Addressable::URI]
22
+ # @raise [InvalidURIError]
23
+ # @return [Addressable::URI]
24
+ def canonical!(uri)
25
+ had_no_path = uri.path.blank?
26
+
27
+ uri.normalize!
28
+ validate_uri!(uri)
29
+
30
+ normalize_host!(uri) if remove_www?
31
+
32
+ if remove_trailing_slash?
33
+ normalize_path!(uri)
34
+ root_path!(uri)
35
+ end
36
+
37
+ remove_fragment!(uri) if remove_fragment?
38
+ normalize_query_params!(uri)
39
+
40
+ root_path!(uri) if had_no_path && uri.query.nil?
41
+
42
+ uri
43
+ end
44
+
45
+ private
46
+
47
+ def validate_uri!(uri)
48
+ raise RelativeURIError, "URL is not absolute" unless uri.absolute?
49
+ raise NoHypertextError, "URL is using unsupported protocol" unless supported_protocols.include?(uri.scheme)
50
+ raise NoHostError, "URL misses hostname" if uri.host.blank?
51
+ end
52
+
53
+ def normalize_query_params!(uri)
54
+ return unless remove_tracking_parameters? || order_query_parameters?
55
+ return unless (params = uri.query_values(Array))
56
+
57
+ remove_tracking_parameters!(params) if remove_tracking_parameters?
58
+ order_query_parameters!(params) if order_query_parameters?
59
+
60
+ uri.query_values = params.empty? ? nil : params
61
+ end
62
+
63
+ def remove_tracking_parameters!(params)
64
+ params.reject! { |key, val| val.to_s.empty? || tracking_params.include?(key) }
65
+ end
66
+
67
+ def order_query_parameters!(params)
68
+ params.sort_by!(&:first)
69
+ end
70
+
71
+ def normalize_host!(uri)
72
+ uri.host &&= uri.host.delete_prefix("www.")
73
+ end
74
+
75
+ def normalize_path!(uri)
76
+ uri.path = uri.path.delete_suffix(File::SEPARATOR) if uri.path && uri.path.length > 1
77
+ end
78
+
79
+ def remove_fragment!(uri)
80
+ uri.fragment = nil
81
+ end
82
+
83
+ def root_path!(uri)
84
+ uri.path = "" if uri.path == File::SEPARATOR
85
+ end
86
+
87
+ def normalization_config
88
+ Wayfarer.config.fetch(:normalization)
89
+ end
90
+
91
+ def supported_protocols
92
+ normalization_config.fetch(:schemes)
93
+ end
94
+
95
+ def tracking_params
96
+ normalization_config.fetch(:tracking_params)
97
+ end
98
+
99
+ def remove_www?
100
+ normalization_config.fetch(:remove_www)
101
+ end
102
+
103
+ def remove_trailing_slash?
104
+ normalization_config.fetch(:remove_trailing_slash)
105
+ end
106
+
107
+ def remove_fragment?
108
+ normalization_config.fetch(:remove_fragment)
109
+ end
110
+
111
+ def remove_tracking_parameters?
112
+ normalization_config.fetch(:remove_tracking_parameters)
113
+ end
114
+
115
+ def order_query_parameters?
116
+ normalization_config.fetch(:order_query_parameters)
117
+ end
118
+ end
119
+ end
120
+ end
data/lib/wayfarer.rb CHANGED
@@ -3,13 +3,16 @@
3
3
  require "cgi"
4
4
  require "forwardable"
5
5
  require "net/http"
6
+ require "pp"
6
7
  require "securerandom"
7
8
  require "uri"
9
+ require "yaml"
8
10
 
9
11
  require "active_job"
12
+ require "active_support/core_ext/array/wrap"
13
+ require "active_support/core_ext/object/deep_dup"
10
14
  require "capybara"
11
15
  require "connection_pool"
12
- require "docile"
13
16
  require "ferrum"
14
17
  require "metainspector"
15
18
  require "mime/types"
@@ -18,7 +21,6 @@ require "mock_redis"
18
21
  require "mustermann"
19
22
  require "net/http/persistent"
20
23
  require "nokogiri"
21
- require "normalize_url"
22
24
  require "selenium-webdriver"
23
25
  require "redis"
24
26
  require "thor"
@@ -28,32 +30,71 @@ loader = Zeitwerk::Loader.for_gem
28
30
  loader.inflector.inflect("cli" => "CLI",
29
31
  "dsl" => "DSL",
30
32
  "http" => "HTTP",
33
+ "uri" => "URI",
31
34
  "url" => "URL",
32
35
  "xml" => "XML",
33
36
  "json" => "JSON",
34
- "gc" => "GC")
37
+ "gc" => "GC",
38
+ "kv" => "KV")
35
39
  loader.setup
36
40
 
37
41
  module Wayfarer
38
42
  module VERSION
39
43
  MAJOR = 0
40
44
  MINOR = 4
41
- TINY = 6
45
+ TINY = 8
42
46
  STRING = [MAJOR, MINOR, TINY].join(".")
43
47
  end
44
48
 
45
49
  DEFAULT_CONFIG = {
46
50
  redis: {
47
51
  url: "redis://localhost:6379/0",
48
- factory: ->(redis) { ::Redis.new(url: redis[:url]) }
52
+ factory: ->(redis_config) { ::Redis.new(url: redis_config.fetch(:url)) }
49
53
  },
50
54
  network: {
51
55
  agent: :http,
52
- pool_size: 1,
53
- pool_timeout: 10,
56
+ pool: {
57
+ size: 1,
58
+ timeout: 10
59
+ },
54
60
  http_headers: {},
55
61
  renew_on: []
56
62
  },
63
+ parsing: {
64
+ registry: {
65
+ "application/json" => Wayfarer::Parsing::JSON,
66
+ "text/html" => [Wayfarer::Parsing::XML, :html],
67
+ "application/xml" => [Wayfarer::Parsing::XML, :xml]
68
+ }
69
+ },
70
+ normalization: {
71
+ remove_www: true,
72
+ remove_trailing_slash: true,
73
+ remove_fragment: true,
74
+ remove_tracking_parameters: true,
75
+ order_query_parameters: true,
76
+ schemes: %w[
77
+ http
78
+ https
79
+ ].to_set,
80
+ tracking_params: %w[
81
+ utm_source
82
+ utm_medium
83
+ utm_term
84
+ utm_content
85
+ utm_campaign
86
+ gclid
87
+ fbclid
88
+ msclkid
89
+ sms_ss
90
+ awesm
91
+ xtor
92
+ PHPSESSID
93
+ ].to_set
94
+ },
95
+ deduplication: {
96
+ key: ->(task) { task[:uri].to_s }
97
+ },
57
98
  capybara: {
58
99
  driver: nil
59
100
  },
@@ -62,12 +103,11 @@ module Wayfarer
62
103
  },
63
104
  selenium: {
64
105
  driver: :chrome,
65
- options: {},
66
- client_timeout: 60
106
+ options: {}
67
107
  }
68
108
  }.freeze
69
109
 
70
- mattr_accessor :config, default: DEFAULT_CONFIG.clone
110
+ mattr_accessor :config, default: DEFAULT_CONFIG.deep_dup
71
111
 
72
112
  UserAgentTimeoutError = Class.new(StandardError) # TODO: Move to Networking namespace
73
113
  end
data/mise.toml ADDED
@@ -0,0 +1,2 @@
1
+ [tools]
2
+ ruby = "3.4.4"
data/mkdocs.yml CHANGED
@@ -7,6 +7,7 @@ markdown_extensions:
7
7
  - attr_list
8
8
  - meta
9
9
  - def_list
10
+ - pymdownx.snippets
10
11
  - pymdownx.details
11
12
  - pymdownx.highlight
12
13
  - pymdownx.inlinehilite
@@ -19,7 +20,8 @@ markdown_extensions:
19
20
  - pymdownx.caret
20
21
  - pymdownx.mark
21
22
  - pymdownx.tilde
22
- - pymdownx.tabbed
23
+ - pymdownx.tabbed:
24
+ alternate_style: true
23
25
  - pymdownx.tasklist:
24
26
  custom_checkbox: true
25
27
 
@@ -56,21 +58,14 @@ nav:
56
58
  - Home: index.md
57
59
  - Guides:
58
60
  - Tutorial: guides/tutorial.md
59
- - Jobs:
60
- - Overview: guides/jobs.md
61
- - Error handling: guides/jobs/error_handling.md
61
+ - Jobs: guides/jobs.md
62
62
  - Tasks: guides/tasks.md
63
63
  - Pages: guides/pages.md
64
- - Routing:
65
- - Overview: guides/routing.md
66
- - Matchers:
67
- - URL: todo
68
- - Host: todo
69
- - Path: todo
70
- - Query: todo
71
- - Custom matchers: todo
64
+ - Routing: guides/routing.md
72
65
  - Callbacks: guides/callbacks.md
73
66
  - Handlers: guides/handlers.md
67
+ - Configuration: guides/configuration.md
68
+ - Command-line interface: guides/cli.md
74
69
  - Networking:
75
70
  - Introduction: guides/user_agents.md
76
71
  - User agent API: guides/networking/custom_adapters.md
@@ -80,13 +75,9 @@ nav:
80
75
  - Selenium: guides/networking/selenium.md
81
76
  - Capybara: guides/networking/capybara.md
82
77
  - Redis: guides/redis.md
83
- - Design decisions: design.md
84
- - Reference:
85
- - Configuration: reference/configuration.md
86
- - Command-line interface: reference/cli.md
78
+ - Development: guides/development.md
87
79
  - Cookbook:
88
80
  - Browser navigation: cookbook/navigation.md
89
81
  - Executing JavaScript: cookbook/executing_javascript.md
90
82
  - Screenhots: cookbook/screenshots.md
91
- - Setting the User-Agent: cookbook/user_agent.md
92
83
  - API documentation: "https://www.rubydoc.info"
data/rake/lint.rake CHANGED
@@ -1,105 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "open-uri"
4
- require "fileutils"
5
- require "rubygems/package"
6
- require "zlib"
7
- require "zip"
8
3
  require "rubocop/rake_task"
9
- require "net/http"
10
- require "uri"
11
-
12
- VALE_URL_ROOT = "https://github.com/errata-ai"
13
- VALE_VERSION = "2.15.4"
14
- VALE_STYLE_GUIDE_VERSION = "v0.3.3"
15
- VALE_PREFIX = File.expand_path(File.join(".github", "bin"))
16
- VALE_STYLE_GUIDE_PREFIX = File.expand_path(File.join(".github", "share"))
17
4
 
18
5
  namespace :lint do
19
6
  RuboCop::RakeTask.new do |task|
20
7
  task.formatters = %w[simple]
21
8
  end
22
-
23
- vale_command = File.join(VALE_PREFIX, "vale")
24
- style_guide_directory = File.join(VALE_STYLE_GUIDE_PREFIX, "vale")
25
-
26
- desc "Lint documentation"
27
- task vale: [:"lint:vale:clean", vale_command, style_guide_directory] do
28
- sh "#{vale_command} docs/**/*.md"
29
- end
30
-
31
- namespace :vale do
32
- directory VALE_PREFIX
33
-
34
- desc "Install Vale to #{vale_command}"
35
- file vale_command => VALE_PREFIX do
36
- unless (local_command = `which vale`.strip).empty?
37
- next File.symlink(local_command, vale_command)
38
- end
39
-
40
- filename = "vale_#{VALE_VERSION}_Linux_64-bit.tar.gz"
41
- url = File.join(VALE_URL_ROOT, "/vale/releases/download/v#{VALE_VERSION}/#{filename}")
42
-
43
- extract_tar_gz(url, vale_command)
44
-
45
- FileUtils.chmod("+x", vale_command)
46
- end
47
-
48
- desc "Deletes Vale"
49
- task clean: :"lint:vale:style_guide:clean" do
50
- File.delete(vale_command) if File.exist?(vale_command)
51
- end
52
-
53
- namespace :style_guide do
54
- directory VALE_STYLE_GUIDE_PREFIX
55
-
56
- desc "Retrieve Vale Google style guide #{VALE_STYLE_GUIDE_VERSION}"
57
- directory style_guide_directory => VALE_STYLE_GUIDE_PREFIX do
58
- FileUtils.mkdir_p(style_guide_directory)
59
- url = "https://github.com/errata-ai/Google/releases/download/#{VALE_STYLE_GUIDE_VERSION}/Google.zip"
60
- extract_zip(url, style_guide_directory)
61
- end
62
-
63
- desc "Deletes the Vale Google style guide"
64
- task :clean do
65
- FileUtils.rm_rf([style_guide_directory])
66
- end
67
-
68
- private
69
-
70
- def extract_zip(url, destination)
71
- content = URI.open(url)
72
-
73
- Zip::File.open_buffer(content) do |zip|
74
- zip.each do |entry|
75
- path = File.join(destination, entry.name)
76
-
77
- if entry.directory?
78
- FileUtils.mkdir_p(path)
79
- else
80
- entry.extract(path)
81
- File.chmod(0o755, path)
82
- end
83
- end
84
- end
85
- end
86
- end
87
-
88
- private
89
-
90
- def extract_tar_gz(url, destination)
91
- URI.open(url) do |file|
92
- Zlib::GzipReader.open(file) do |gz|
93
- Gem::Package::TarReader.new(gz) do |tar|
94
- tar.each do |entry|
95
- next unless entry.file?
96
-
97
- binary = entry.read
98
- File.write(destination, binary)
99
- end
100
- end
101
- end
102
- end
103
- end
104
- end
105
9
  end
data/rake/release.rake CHANGED
@@ -13,17 +13,11 @@ namespace :release do
13
13
  raise "Gem version #{gem_version} deviates from library version #{lib_version}" unless gem_version == lib_version
14
14
  end
15
15
 
16
- # TODO: `GEM_HOST_API_KEY` is only supported for RubyGems 3+
17
- task :write_credentials do
18
- raise "RubyGems 3+ supports `GEM_HOST_API_KEY`" unless RUBY_VERSION.split(".").first.to_i == 2
19
-
20
- key = ENV.fetch("GEM_HOST_API_KEY") { raise "`GEM_HOST_API_KEY` is unset" }
21
- contents = YAML.dump(rubygems_api_key: "Basic #{key}")
22
- gem_path = File.join(Dir.home, ".gem")
23
- FileUtils.mkdir_p(gem_path)
24
- File.write(File.join(gem_path, "credentials"), contents)
16
+ task :guard_debug do
17
+ sh "git diff --exit-code"
18
+ sh "git diff-index --quiet --cached HEAD"
25
19
  end
26
20
  end
27
21
 
28
- Rake::Task[:release].enhance(%i[release:guard_versions])
29
- Rake::Task[:"release:rubygem_push"].enhance(%i[release:write_credentials])
22
+ Rake::Task[:"release:rubygem_push"].enhance(%i[release:guard_versions])
23
+ Rake::Task[:"release:guard_clean"].enhance(%i[release:guard_debug])