domain_extractor 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +268 -0
- data/LICENSE +28 -0
- data/README.md +395 -5
- data/lib/domain_extractor/auth.rb +82 -0
- data/lib/domain_extractor/parsed_url.rb +236 -5
- data/lib/domain_extractor/parser.rb +91 -14
- data/lib/domain_extractor/result.rb +40 -9
- data/lib/domain_extractor/uri_helpers.rb +168 -0
- data/lib/domain_extractor/validators.rb +15 -0
- data/lib/domain_extractor/version.rb +1 -1
- data/lib/domain_extractor.rb +30 -0
- data/spec/auth_and_uri_spec.rb +454 -0
- data/spec/domain_extractor_spec.rb +2 -2
- data/spec/domain_validator_spec.rb +1 -1
- data/spec/formatter_spec.rb +2 -2
- metadata +32 -12
- data/LICENSE.txt +0 -21
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'auth'
|
|
4
|
+
require_relative 'query_params'
|
|
5
|
+
require_relative 'uri_helpers'
|
|
6
|
+
|
|
3
7
|
module DomainExtractor
|
|
4
8
|
# ParsedURL wraps the parsing result and provides convenient accessor methods
|
|
5
9
|
# with support for bang (!) and question mark (?) variants.
|
|
@@ -15,16 +19,26 @@ module DomainExtractor
|
|
|
15
19
|
# parsed.host # => nil
|
|
16
20
|
# parsed.host? # => false
|
|
17
21
|
# parsed.host! # raises InvalidURLError
|
|
22
|
+
# rubocop:disable Metrics/ClassLength
|
|
18
23
|
class ParsedURL
|
|
24
|
+
EMPTY_STRING = ''
|
|
25
|
+
|
|
19
26
|
# Expose the underlying hash for backward compatibility
|
|
20
27
|
attr_reader :result
|
|
21
28
|
|
|
29
|
+
# Store the original URI object for advanced operations
|
|
30
|
+
attr_reader :uri
|
|
31
|
+
|
|
22
32
|
# List of valid result keys that should have method accessors
|
|
23
|
-
RESULT_KEYS = %i[
|
|
33
|
+
RESULT_KEYS = %i[
|
|
34
|
+
subdomain domain tld root_domain host path query_params
|
|
35
|
+
scheme port fragment user password userinfo decoded_user decoded_password
|
|
36
|
+
].freeze
|
|
24
37
|
|
|
25
|
-
def initialize(result)
|
|
26
|
-
@result = result || {}
|
|
27
|
-
|
|
38
|
+
def initialize(result, uri = nil)
|
|
39
|
+
@result = (result || {}).dup
|
|
40
|
+
@uri = uri
|
|
41
|
+
sync_uri_state!
|
|
28
42
|
end
|
|
29
43
|
|
|
30
44
|
# Hash-style access for backward compatibility
|
|
@@ -88,7 +102,9 @@ module DomainExtractor
|
|
|
88
102
|
end
|
|
89
103
|
|
|
90
104
|
def to_s
|
|
91
|
-
@
|
|
105
|
+
return EMPTY_STRING unless valid? && @uri
|
|
106
|
+
|
|
107
|
+
@uri.to_s
|
|
92
108
|
end
|
|
93
109
|
|
|
94
110
|
# Allow to_h conversion for hash compatibility
|
|
@@ -99,8 +115,222 @@ module DomainExtractor
|
|
|
99
115
|
# Allow to_hash as well for better Ruby compatibility
|
|
100
116
|
alias to_hash to_h
|
|
101
117
|
|
|
118
|
+
# Alias for URI compatibility
|
|
119
|
+
alias to_str to_s
|
|
120
|
+
|
|
121
|
+
def scheme
|
|
122
|
+
@result[:scheme]
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def host
|
|
126
|
+
@result[:host]
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def port
|
|
130
|
+
@result[:port]
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def path
|
|
134
|
+
@result[:path]
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def fragment
|
|
138
|
+
@result[:fragment]
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def user
|
|
142
|
+
@result[:user]
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def password
|
|
146
|
+
@result[:password]
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def userinfo
|
|
150
|
+
@result[:userinfo]
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# hostname returns host without IPv6 brackets (URI compatibility)
|
|
154
|
+
def hostname
|
|
155
|
+
return nil unless @uri || host
|
|
156
|
+
|
|
157
|
+
@uri&.hostname || host.to_s.gsub(/^\[|\]$/, '')
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# query returns the query string (not parsed params)
|
|
161
|
+
def query
|
|
162
|
+
return nil unless @uri
|
|
163
|
+
|
|
164
|
+
@uri.query
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Setter methods for URI compatibility
|
|
168
|
+
def scheme=(value)
|
|
169
|
+
mutate_uri! { @uri.scheme = normalize_scheme(value) }
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def host=(value)
|
|
173
|
+
mutate_uri! { replace_host(value) }
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def hostname=(value)
|
|
177
|
+
self.host = value
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def port=(value)
|
|
181
|
+
mutate_uri! { @uri.port = value }
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def path=(value)
|
|
185
|
+
mutate_uri! { @uri.path = value.to_s }
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def query=(value)
|
|
189
|
+
mutate_uri! { @uri.query = value }
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def fragment=(value)
|
|
193
|
+
mutate_uri! { @uri.fragment = value }
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def user=(value)
|
|
197
|
+
mutate_uri! { @uri.user = value }
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def password=(value)
|
|
201
|
+
mutate_uri! { @uri.password = value }
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def userinfo=(value)
|
|
205
|
+
mutate_uri! { @uri.userinfo = value }
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Advanced URI methods
|
|
209
|
+
|
|
210
|
+
# Generate Basic Authentication header from current credentials
|
|
211
|
+
# @return [String, nil] Authorization header value or nil if no credentials
|
|
212
|
+
def basic_auth_header
|
|
213
|
+
return nil if user.nil? || password.nil?
|
|
214
|
+
|
|
215
|
+
URIHelpers.basic_auth_header(decoded_user || user, decoded_password || password)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Generate Bearer token header
|
|
219
|
+
# @param token [String] The bearer token
|
|
220
|
+
# @return [String] Authorization header value
|
|
221
|
+
def bearer_auth_header(token)
|
|
222
|
+
URIHelpers.bearer_auth_header(token)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Find proxy for this URL
|
|
226
|
+
# @return [URI::Generic, nil] Proxy URI or nil
|
|
227
|
+
def find_proxy
|
|
228
|
+
return nil unless @uri
|
|
229
|
+
|
|
230
|
+
URIHelpers.find_proxy(@uri)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Merge with a relative URI
|
|
234
|
+
# @param relative [String, URI::Generic] The relative URI
|
|
235
|
+
# @return [ParsedURL] New ParsedURL with merged URI
|
|
236
|
+
def merge(relative)
|
|
237
|
+
return self unless @uri
|
|
238
|
+
|
|
239
|
+
merged_uri = URIHelpers.merge_uri(@uri, relative)
|
|
240
|
+
DomainExtractor.parse(merged_uri.to_s)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Normalize the URI (lowercase scheme/host, remove default ports)
|
|
244
|
+
# @return [ParsedURL] New ParsedURL with normalized URI
|
|
245
|
+
def normalize
|
|
246
|
+
return self unless @uri
|
|
247
|
+
|
|
248
|
+
normalized_uri = URIHelpers.normalize_uri(@uri)
|
|
249
|
+
DomainExtractor.parse(normalized_uri.to_s)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Check if this is an absolute URI
|
|
253
|
+
# @return [Boolean] True if absolute
|
|
254
|
+
def absolute?
|
|
255
|
+
!@result[:scheme].nil?
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Check if this is a relative URI
|
|
259
|
+
# @return [Boolean] True if relative
|
|
260
|
+
def relative?
|
|
261
|
+
@result[:scheme].nil?
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Get the default port for the scheme
|
|
265
|
+
# @return [Integer, nil] Default port or nil
|
|
266
|
+
def default_port
|
|
267
|
+
URIHelpers.default_port_for(@uri || scheme)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Build a complete URL string from components
|
|
271
|
+
# @return [String] The complete URL
|
|
272
|
+
def build_url
|
|
273
|
+
to_s
|
|
274
|
+
end
|
|
275
|
+
|
|
102
276
|
private
|
|
103
277
|
|
|
278
|
+
def mutate_uri!
|
|
279
|
+
return unless @uri
|
|
280
|
+
|
|
281
|
+
yield
|
|
282
|
+
sync_from_uri!
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def sync_uri_state!
|
|
286
|
+
return unless @uri && valid?
|
|
287
|
+
|
|
288
|
+
current_userinfo = @uri.userinfo
|
|
289
|
+
@uri.scheme = normalize_scheme(@result[:scheme]) if @result[:scheme]
|
|
290
|
+
if @result[:host]
|
|
291
|
+
@uri.host = normalize_host(@result[:host])
|
|
292
|
+
@uri.userinfo = current_userinfo if current_userinfo
|
|
293
|
+
end
|
|
294
|
+
sync_from_uri!
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def sync_from_uri!
|
|
298
|
+
attributes = DomainExtractor::Parser.host_attributes(@uri.host)
|
|
299
|
+
|
|
300
|
+
unless attributes
|
|
301
|
+
@result.clear
|
|
302
|
+
return
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
@result.replace(
|
|
306
|
+
attributes.merge(
|
|
307
|
+
path: @uri.path || EMPTY_STRING,
|
|
308
|
+
query_params: QueryParams.call(@uri.query),
|
|
309
|
+
scheme: normalize_scheme(@uri.scheme),
|
|
310
|
+
port: @uri.port,
|
|
311
|
+
fragment: @uri.fragment
|
|
312
|
+
).merge(Auth.extract(@uri))
|
|
313
|
+
)
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
def normalize_host(value)
|
|
317
|
+
return nil if value.nil?
|
|
318
|
+
|
|
319
|
+
value.to_s.downcase
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def normalize_scheme(value)
|
|
323
|
+
return nil if value.nil?
|
|
324
|
+
|
|
325
|
+
value.to_s.downcase
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def replace_host(value)
|
|
329
|
+
current_userinfo = @uri.userinfo
|
|
330
|
+
@uri.host = normalize_host(value)
|
|
331
|
+
@uri.userinfo = current_userinfo if current_userinfo
|
|
332
|
+
end
|
|
333
|
+
|
|
104
334
|
# Handle bang methods that raise errors for missing values
|
|
105
335
|
def handle_bang_method(method_str)
|
|
106
336
|
key = method_str[0...-1].to_sym
|
|
@@ -128,4 +358,5 @@ module DomainExtractor
|
|
|
128
358
|
true
|
|
129
359
|
end
|
|
130
360
|
end
|
|
361
|
+
# rubocop:enable Metrics/ClassLength
|
|
131
362
|
end
|
|
@@ -7,18 +7,29 @@ require_relative 'normalizer'
|
|
|
7
7
|
require_relative 'result'
|
|
8
8
|
require_relative 'validators'
|
|
9
9
|
require_relative 'parsed_url'
|
|
10
|
+
require_relative 'auth'
|
|
11
|
+
|
|
12
|
+
# Register custom URI schemes for database and other protocols
|
|
13
|
+
# This allows URI.parse to handle redis://, mysql://, postgresql://, etc.
|
|
14
|
+
%w[redis rediss mysql postgresql mongodb sftp ftps].each do |scheme|
|
|
15
|
+
URI.scheme_list[scheme.upcase] = URI::Generic
|
|
16
|
+
rescue StandardError
|
|
17
|
+
# Ignore if can't register
|
|
18
|
+
end
|
|
10
19
|
|
|
11
20
|
module DomainExtractor
|
|
12
21
|
# Parser orchestrates the pipeline for url normalization, validation, and domain extraction.
|
|
13
22
|
module Parser
|
|
23
|
+
SCHEME_PATTERN = %r{\A([a-z][a-z0-9+.-]*)://}i
|
|
24
|
+
RETRYABLE_URI_MESSAGES = ['bad URI', 'is not URI'].freeze
|
|
25
|
+
|
|
14
26
|
module_function
|
|
15
27
|
|
|
16
28
|
def call(raw_url)
|
|
17
|
-
|
|
18
|
-
return ParsedURL.new(nil) unless
|
|
29
|
+
uri, host_attributes = extract_components(raw_url)
|
|
30
|
+
return ParsedURL.new(nil) unless uri && host_attributes
|
|
19
31
|
|
|
20
|
-
|
|
21
|
-
build_result(domain: domain, host: host, uri: uri)
|
|
32
|
+
build_result(host_attributes: host_attributes, uri: uri)
|
|
22
33
|
rescue ::URI::InvalidURIError, ::PublicSuffix::Error
|
|
23
34
|
ParsedURL.new(nil)
|
|
24
35
|
end
|
|
@@ -29,16 +40,29 @@ module DomainExtractor
|
|
|
29
40
|
false
|
|
30
41
|
end
|
|
31
42
|
|
|
32
|
-
def
|
|
43
|
+
def host_attributes(host)
|
|
44
|
+
return if invalid_host?(host)
|
|
45
|
+
|
|
46
|
+
normalized_host = host.downcase
|
|
47
|
+
domain = parse_domain(normalized_host)
|
|
48
|
+
|
|
49
|
+
return domain_attributes(domain, normalized_host) if domain
|
|
50
|
+
|
|
51
|
+
hostname_attributes(normalized_host) if Validators.valid_hostname?(normalized_host)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def build_uri(raw_url, retry_count = 0)
|
|
33
55
|
normalized = Normalizer.call(raw_url)
|
|
34
56
|
return unless normalized
|
|
35
57
|
|
|
36
58
|
::URI.parse(normalized)
|
|
59
|
+
rescue ::URI::InvalidURIError => e
|
|
60
|
+
retry_parse_with_registered_scheme(e, normalized, raw_url, retry_count)
|
|
37
61
|
end
|
|
38
62
|
private_class_method :build_uri
|
|
39
63
|
|
|
40
64
|
def invalid_host?(host)
|
|
41
|
-
host.nil? || Validators.ip_address?(host)
|
|
65
|
+
host.nil? || Validators.ip_address?(host)
|
|
42
66
|
end
|
|
43
67
|
private_class_method :invalid_host?
|
|
44
68
|
|
|
@@ -46,23 +70,76 @@ module DomainExtractor
|
|
|
46
70
|
uri = build_uri(raw_url)
|
|
47
71
|
return unless uri
|
|
48
72
|
|
|
49
|
-
|
|
50
|
-
return
|
|
73
|
+
attributes = host_attributes(uri.host)
|
|
74
|
+
return unless attributes
|
|
51
75
|
|
|
52
|
-
|
|
53
|
-
[uri, domain, host]
|
|
76
|
+
[uri, attributes]
|
|
54
77
|
end
|
|
55
78
|
private_class_method :extract_components
|
|
56
79
|
|
|
57
|
-
def
|
|
58
|
-
|
|
80
|
+
def parse_domain(host)
|
|
81
|
+
::PublicSuffix.parse(host)
|
|
82
|
+
rescue ::PublicSuffix::Error
|
|
83
|
+
nil
|
|
84
|
+
end
|
|
85
|
+
private_class_method :parse_domain
|
|
86
|
+
|
|
87
|
+
def domain_attributes(domain, host)
|
|
88
|
+
{
|
|
59
89
|
subdomain: domain.trd,
|
|
60
90
|
root_domain: domain.domain,
|
|
61
91
|
domain: domain.sld,
|
|
62
92
|
tld: domain.tld,
|
|
63
|
-
host: host
|
|
93
|
+
host: host
|
|
94
|
+
}
|
|
95
|
+
end
|
|
96
|
+
private_class_method :domain_attributes
|
|
97
|
+
|
|
98
|
+
def hostname_attributes(host)
|
|
99
|
+
{
|
|
100
|
+
subdomain: nil,
|
|
101
|
+
root_domain: host,
|
|
102
|
+
domain: host,
|
|
103
|
+
tld: nil,
|
|
104
|
+
host: host
|
|
105
|
+
}
|
|
106
|
+
end
|
|
107
|
+
private_class_method :hostname_attributes
|
|
108
|
+
|
|
109
|
+
def retry_parse_with_registered_scheme(error, normalized, raw_url, retry_count)
|
|
110
|
+
return nil unless retryable_scheme_registration?(error.message, normalized, retry_count)
|
|
111
|
+
|
|
112
|
+
register_scheme(normalized[SCHEME_PATTERN, 1])
|
|
113
|
+
build_uri(raw_url, 1)
|
|
114
|
+
rescue StandardError
|
|
115
|
+
nil
|
|
116
|
+
end
|
|
117
|
+
private_class_method :retry_parse_with_registered_scheme
|
|
118
|
+
|
|
119
|
+
def retryable_scheme_registration?(message, normalized, retry_count)
|
|
120
|
+
retry_count.zero? &&
|
|
121
|
+
RETRYABLE_URI_MESSAGES.any? { |fragment| message.include?(fragment) } &&
|
|
122
|
+
normalized.match?(SCHEME_PATTERN)
|
|
123
|
+
end
|
|
124
|
+
private_class_method :retryable_scheme_registration?
|
|
125
|
+
|
|
126
|
+
def register_scheme(scheme)
|
|
127
|
+
URI.scheme_list[scheme.upcase] = URI::Generic
|
|
128
|
+
end
|
|
129
|
+
private_class_method :register_scheme
|
|
130
|
+
|
|
131
|
+
def build_result(host_attributes:, uri:)
|
|
132
|
+
auth_components = Auth.extract(uri)
|
|
133
|
+
|
|
134
|
+
Result.build(
|
|
135
|
+
**host_attributes,
|
|
64
136
|
path: uri.path,
|
|
65
|
-
query: uri.query
|
|
137
|
+
query: uri.query,
|
|
138
|
+
scheme: uri.scheme,
|
|
139
|
+
port: uri.port,
|
|
140
|
+
fragment: uri.fragment,
|
|
141
|
+
**auth_components,
|
|
142
|
+
uri: uri
|
|
66
143
|
)
|
|
67
144
|
end
|
|
68
145
|
private_class_method :build_result
|
|
@@ -12,22 +12,53 @@ module DomainExtractor
|
|
|
12
12
|
module_function
|
|
13
13
|
|
|
14
14
|
def build(**attributes)
|
|
15
|
-
|
|
15
|
+
ParsedURL.new(result_hash(attributes), attributes[:uri])
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def normalize_subdomain(value)
|
|
19
|
+
value.nil? || value.empty? ? nil : value
|
|
20
|
+
end
|
|
21
|
+
private_class_method :normalize_subdomain
|
|
22
|
+
|
|
23
|
+
def result_hash(attributes)
|
|
24
|
+
domain_attributes(attributes)
|
|
25
|
+
.merge(uri_attributes(attributes))
|
|
26
|
+
.merge(auth_attributes(attributes))
|
|
27
|
+
.freeze
|
|
28
|
+
end
|
|
29
|
+
private_class_method :result_hash
|
|
30
|
+
|
|
31
|
+
def domain_attributes(attributes)
|
|
32
|
+
{
|
|
16
33
|
subdomain: normalize_subdomain(attributes[:subdomain]),
|
|
17
34
|
root_domain: attributes[:root_domain],
|
|
18
35
|
domain: attributes[:domain],
|
|
19
36
|
tld: attributes[:tld],
|
|
20
|
-
host: attributes[:host]
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
37
|
+
host: attributes[:host]
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
private_class_method :domain_attributes
|
|
24
41
|
|
|
25
|
-
|
|
42
|
+
def uri_attributes(attributes)
|
|
43
|
+
{
|
|
44
|
+
path: attributes[:path] || EMPTY_PATH,
|
|
45
|
+
query_params: QueryParams.call(attributes[:query]),
|
|
46
|
+
scheme: attributes[:scheme],
|
|
47
|
+
port: attributes[:port],
|
|
48
|
+
fragment: attributes[:fragment]
|
|
49
|
+
}
|
|
26
50
|
end
|
|
51
|
+
private_class_method :uri_attributes
|
|
27
52
|
|
|
28
|
-
def
|
|
29
|
-
|
|
53
|
+
def auth_attributes(attributes)
|
|
54
|
+
{
|
|
55
|
+
user: attributes[:user],
|
|
56
|
+
password: attributes[:password],
|
|
57
|
+
userinfo: attributes[:userinfo],
|
|
58
|
+
decoded_user: attributes[:decoded_user],
|
|
59
|
+
decoded_password: attributes[:decoded_password]
|
|
60
|
+
}
|
|
30
61
|
end
|
|
31
|
-
private_class_method :
|
|
62
|
+
private_class_method :auth_attributes
|
|
32
63
|
end
|
|
33
64
|
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'base64'
|
|
4
|
+
require 'uri'
|
|
5
|
+
|
|
6
|
+
module DomainExtractor
|
|
7
|
+
# URIHelpers provides advanced URI manipulation methods
|
|
8
|
+
# Including merge, normalize, authentication helpers, and proxy detection
|
|
9
|
+
# rubocop:disable Metrics/ModuleLength
|
|
10
|
+
module URIHelpers
|
|
11
|
+
CREDENTIAL_ESCAPE_PATTERN = /[^A-Za-z0-9\-._~]/
|
|
12
|
+
DEFAULT_PORTS = {
|
|
13
|
+
'ftp' => 21,
|
|
14
|
+
'ftps' => 990,
|
|
15
|
+
'http' => 80,
|
|
16
|
+
'https' => 443,
|
|
17
|
+
'mongodb' => 27_017,
|
|
18
|
+
'mysql' => 3306,
|
|
19
|
+
'postgresql' => 5432,
|
|
20
|
+
'redis' => 6379,
|
|
21
|
+
'rediss' => 6380,
|
|
22
|
+
'sftp' => 22,
|
|
23
|
+
'ssh' => 22
|
|
24
|
+
}.freeze
|
|
25
|
+
HTTP_PROXY_KEYS = %w[http_proxy HTTP_PROXY].freeze
|
|
26
|
+
ALL_PROXY_KEYS = %w[all_proxy ALL_PROXY].freeze
|
|
27
|
+
|
|
28
|
+
module_function
|
|
29
|
+
|
|
30
|
+
# Generate Basic Authentication header
|
|
31
|
+
# @param username [String] The username
|
|
32
|
+
# @param password [String] The password
|
|
33
|
+
# @return [String] The Authorization header value
|
|
34
|
+
def basic_auth_header(username, password)
|
|
35
|
+
credentials = "#{username}:#{password}"
|
|
36
|
+
encoded = Base64.strict_encode64(credentials)
|
|
37
|
+
"Basic #{encoded}"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Generate Bearer token header
|
|
41
|
+
# @param token [String] The bearer token
|
|
42
|
+
# @return [String] The Authorization header value
|
|
43
|
+
def bearer_auth_header(token)
|
|
44
|
+
"Bearer #{token}"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Encode credentials for URL (percent-encoding)
|
|
48
|
+
# @param value [String] The value to encode
|
|
49
|
+
# @return [String] Percent-encoded value
|
|
50
|
+
def encode_credential(value)
|
|
51
|
+
URI::DEFAULT_PARSER.escape(value.to_s, CREDENTIAL_ESCAPE_PATTERN)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Decode percent-encoded credential
|
|
55
|
+
# @param value [String] The encoded value
|
|
56
|
+
# @return [String] Decoded value
|
|
57
|
+
def decode_credential(value)
|
|
58
|
+
URI::DEFAULT_PARSER.unescape(value.to_s)
|
|
59
|
+
rescue StandardError
|
|
60
|
+
value
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Find proxy from environment variables
|
|
64
|
+
# Checks http_proxy, HTTP_PROXY, and no_proxy
|
|
65
|
+
# @param uri [URI::Generic, String] The URI to check
|
|
66
|
+
# @return [URI::Generic, nil] The proxy URI or nil
|
|
67
|
+
def find_proxy(uri)
|
|
68
|
+
uri_obj = coerce_uri(uri)
|
|
69
|
+
return nil unless uri_obj
|
|
70
|
+
return nil if should_bypass_proxy?(uri_obj)
|
|
71
|
+
|
|
72
|
+
proxy_url = proxy_url_for(uri_obj.scheme)
|
|
73
|
+
return nil unless proxy_url
|
|
74
|
+
|
|
75
|
+
URI.parse(proxy_url)
|
|
76
|
+
rescue URI::InvalidURIError
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Check if URI should bypass proxy based on no_proxy
|
|
81
|
+
# @param uri [URI::Generic] The URI to check
|
|
82
|
+
# @return [Boolean] True if should bypass proxy
|
|
83
|
+
def should_bypass_proxy?(uri)
|
|
84
|
+
no_proxy = ENV['no_proxy'] || ENV.fetch('NO_PROXY', nil)
|
|
85
|
+
return false unless no_proxy
|
|
86
|
+
|
|
87
|
+
host = proxy_host(uri)
|
|
88
|
+
return false unless host
|
|
89
|
+
|
|
90
|
+
no_proxy
|
|
91
|
+
.split(',')
|
|
92
|
+
.map(&:strip)
|
|
93
|
+
.reject(&:empty?)
|
|
94
|
+
.any? { |pattern| proxy_pattern_match?(host, pattern) }
|
|
95
|
+
end
|
|
96
|
+
private_class_method :should_bypass_proxy?
|
|
97
|
+
|
|
98
|
+
def coerce_uri(uri)
|
|
99
|
+
uri.is_a?(String) ? URI.parse(uri) : uri
|
|
100
|
+
end
|
|
101
|
+
private_class_method :coerce_uri
|
|
102
|
+
|
|
103
|
+
def proxy_url_for(scheme)
|
|
104
|
+
proxy_env_keys(scheme).each do |key|
|
|
105
|
+
value = env_value(key)
|
|
106
|
+
return value if value
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
nil
|
|
110
|
+
end
|
|
111
|
+
private_class_method :proxy_url_for
|
|
112
|
+
|
|
113
|
+
def env_value(key)
|
|
114
|
+
value = ENV.fetch(key, nil)
|
|
115
|
+
value unless value.nil? || value.empty?
|
|
116
|
+
end
|
|
117
|
+
private_class_method :env_value
|
|
118
|
+
|
|
119
|
+
def proxy_host(uri)
|
|
120
|
+
uri.hostname || uri.host
|
|
121
|
+
end
|
|
122
|
+
private_class_method :proxy_host
|
|
123
|
+
|
|
124
|
+
def proxy_pattern_match?(host, pattern)
|
|
125
|
+
return true if pattern == '*'
|
|
126
|
+
|
|
127
|
+
normalized_pattern = pattern.delete_prefix('.')
|
|
128
|
+
host == normalized_pattern || host.end_with?(".#{normalized_pattern}")
|
|
129
|
+
end
|
|
130
|
+
private_class_method :proxy_pattern_match?
|
|
131
|
+
|
|
132
|
+
def proxy_env_keys(scheme)
|
|
133
|
+
scheme_keys = if scheme && !scheme.empty?
|
|
134
|
+
["#{scheme.downcase}_proxy", "#{scheme.upcase}_PROXY"]
|
|
135
|
+
else
|
|
136
|
+
[]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
(scheme_keys + HTTP_PROXY_KEYS + ALL_PROXY_KEYS).uniq
|
|
140
|
+
end
|
|
141
|
+
private_class_method :proxy_env_keys
|
|
142
|
+
|
|
143
|
+
# Normalize a URI (lowercase scheme and host, remove default ports)
|
|
144
|
+
# @param uri [URI::Generic] The URI to normalize
|
|
145
|
+
# @return [URI::Generic] Normalized URI
|
|
146
|
+
def normalize_uri(uri)
|
|
147
|
+
uri.normalize
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Merge a relative URI with a base URI
|
|
151
|
+
# @param base [URI::Generic] The base URI
|
|
152
|
+
# @param relative [String, URI::Generic] The relative URI
|
|
153
|
+
# @return [URI::Generic] The merged URI
|
|
154
|
+
def merge_uri(base, relative)
|
|
155
|
+
base.merge(relative)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def default_port_for(uri_or_scheme)
|
|
159
|
+
case uri_or_scheme
|
|
160
|
+
when URI::Generic
|
|
161
|
+
uri_or_scheme.default_port || DEFAULT_PORTS[uri_or_scheme.scheme]
|
|
162
|
+
else
|
|
163
|
+
DEFAULT_PORTS[uri_or_scheme.to_s]
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
# rubocop:enable Metrics/ModuleLength
|
|
168
|
+
end
|
|
@@ -8,6 +8,11 @@ module DomainExtractor
|
|
|
8
8
|
IPV4_REGEX = /\A#{IPV4_SEGMENT}(?:\.#{IPV4_SEGMENT}){3}\z/
|
|
9
9
|
IPV6_REGEX = /\A\[?[0-9a-fA-F:]+\]?\z/
|
|
10
10
|
|
|
11
|
+
# Valid hostname pattern (RFC 1123)
|
|
12
|
+
# Allows: letters, numbers, hyphens, dots
|
|
13
|
+
# Must start with alphanumeric, can contain hyphens, must end with alphanumeric
|
|
14
|
+
HOSTNAME_REGEX = /\A[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?(\.[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?)*\z/i
|
|
15
|
+
|
|
11
16
|
# Frozen string constants
|
|
12
17
|
DOT = '.'
|
|
13
18
|
COLON = ':'
|
|
@@ -27,5 +32,15 @@ module DomainExtractor
|
|
|
27
32
|
false
|
|
28
33
|
end
|
|
29
34
|
end
|
|
35
|
+
|
|
36
|
+
# Check if a string is a valid hostname
|
|
37
|
+
# @param host [String] The hostname to validate
|
|
38
|
+
# @return [Boolean] True if valid hostname
|
|
39
|
+
def valid_hostname?(host)
|
|
40
|
+
return false if host.nil? || host.empty?
|
|
41
|
+
return false if host.length > 253 # Max hostname length
|
|
42
|
+
|
|
43
|
+
HOSTNAME_REGEX.match?(host)
|
|
44
|
+
end
|
|
30
45
|
end
|
|
31
46
|
end
|