domain_extractor 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +268 -0
- data/LICENSE +28 -0
- data/README.md +395 -5
- data/lib/domain_extractor/auth.rb +82 -0
- data/lib/domain_extractor/parsed_url.rb +236 -5
- data/lib/domain_extractor/parser.rb +91 -14
- data/lib/domain_extractor/result.rb +40 -9
- data/lib/domain_extractor/uri_helpers.rb +168 -0
- data/lib/domain_extractor/validators.rb +15 -0
- data/lib/domain_extractor/version.rb +1 -1
- data/lib/domain_extractor.rb +30 -0
- data/spec/auth_and_uri_spec.rb +454 -0
- data/spec/domain_extractor_spec.rb +2 -2
- data/spec/domain_validator_spec.rb +1 -1
- data/spec/formatter_spec.rb +2 -2
- metadata +32 -12
- data/LICENSE.txt +0 -21
data/lib/domain_extractor.rb
CHANGED
|
@@ -9,6 +9,7 @@ require_relative 'domain_extractor/parsed_url'
|
|
|
9
9
|
require_relative 'domain_extractor/parser'
|
|
10
10
|
require_relative 'domain_extractor/query_params'
|
|
11
11
|
require_relative 'domain_extractor/formatter'
|
|
12
|
+
require_relative 'domain_extractor/uri_helpers'
|
|
12
13
|
|
|
13
14
|
# Conditionally load Rails validator if ActiveModel is available
|
|
14
15
|
begin
|
|
@@ -98,5 +99,34 @@ module DomainExtractor
|
|
|
98
99
|
end
|
|
99
100
|
|
|
100
101
|
alias parse_query parse_query_params
|
|
102
|
+
|
|
103
|
+
# Generate Basic Authentication header
|
|
104
|
+
# @param username [String] The username
|
|
105
|
+
# @param password [String] The password
|
|
106
|
+
# @return [String] The Authorization header value
|
|
107
|
+
def basic_auth_header(username, password)
|
|
108
|
+
URIHelpers.basic_auth_header(username, password)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Generate Bearer token header
|
|
112
|
+
# @param token [String] The bearer token
|
|
113
|
+
# @return [String] The Authorization header value
|
|
114
|
+
def bearer_auth_header(token)
|
|
115
|
+
URIHelpers.bearer_auth_header(token)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Encode a credential for use in URLs (percent-encoding)
|
|
119
|
+
# @param value [String] The value to encode
|
|
120
|
+
# @return [String] Percent-encoded value
|
|
121
|
+
def encode_credential(value)
|
|
122
|
+
URIHelpers.encode_credential(value)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Decode a percent-encoded credential
|
|
126
|
+
# @param value [String] The encoded value
|
|
127
|
+
# @return [String] Decoded value
|
|
128
|
+
def decode_credential(value)
|
|
129
|
+
URIHelpers.decode_credential(value)
|
|
130
|
+
end
|
|
101
131
|
end
|
|
102
132
|
end
|
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe 'Auth Extraction and URI Features' do
|
|
6
|
+
describe 'Redis URL parsing' do
|
|
7
|
+
it 'parses Redis URL with username and password' do
|
|
8
|
+
url = 'redis://username:password@localhost:6379/0'
|
|
9
|
+
result = DomainExtractor.parse(url)
|
|
10
|
+
|
|
11
|
+
expect(result.valid?).to be(true)
|
|
12
|
+
expect(result.scheme).to eq('redis')
|
|
13
|
+
expect(result.user).to eq('username')
|
|
14
|
+
expect(result.password).to eq('password')
|
|
15
|
+
expect(result.host).to eq('localhost')
|
|
16
|
+
expect(result.port).to eq(6379)
|
|
17
|
+
expect(result.path).to eq('/0')
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it 'parses Redis URL with password only (no username)' do
|
|
21
|
+
url = 'redis://:my_password@localhost:6379/0'
|
|
22
|
+
result = DomainExtractor.parse(url)
|
|
23
|
+
|
|
24
|
+
expect(result.valid?).to be(true)
|
|
25
|
+
expect(result.user).to be_nil
|
|
26
|
+
expect(result.password).to eq('my_password')
|
|
27
|
+
expect(result.userinfo).to eq(':my_password')
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it 'parses Rediss (SSL) URL' do
|
|
31
|
+
url = 'rediss://default:my_secret_pw@redissubdomain.dragonflydb.cloud:6385'
|
|
32
|
+
result = DomainExtractor.parse(url)
|
|
33
|
+
|
|
34
|
+
expect(result.valid?).to be(true)
|
|
35
|
+
expect(DomainExtractor.valid?(url)).to be(true)
|
|
36
|
+
expect(result.scheme).to eq('rediss')
|
|
37
|
+
expect(result.user).to eq('default')
|
|
38
|
+
expect(result.password).to eq('my_secret_pw')
|
|
39
|
+
expect(result.host).to eq('redissubdomain.dragonflydb.cloud')
|
|
40
|
+
expect(result.port).to eq(6385)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it 'parses Redis URL with special characters in password' do
|
|
44
|
+
url = 'redis://user:P%40ss%3Aword@localhost:6379'
|
|
45
|
+
result = DomainExtractor.parse(url)
|
|
46
|
+
|
|
47
|
+
expect(result.password).to eq('P%40ss%3Aword')
|
|
48
|
+
expect(result.decoded_password).to eq('P@ss:word')
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
describe 'Database URL parsing' do
|
|
53
|
+
it 'parses PostgreSQL URL' do
|
|
54
|
+
url = 'postgresql://janedoe:mypassword@localhost:5432/mydb'
|
|
55
|
+
result = DomainExtractor.parse(url)
|
|
56
|
+
|
|
57
|
+
expect(result.valid?).to be(true)
|
|
58
|
+
expect(result.scheme).to eq('postgresql')
|
|
59
|
+
expect(result.user).to eq('janedoe')
|
|
60
|
+
expect(result.password).to eq('mypassword')
|
|
61
|
+
expect(result.host).to eq('localhost')
|
|
62
|
+
expect(result.port).to eq(5432)
|
|
63
|
+
expect(result.path).to eq('/mydb')
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it 'parses MySQL URL' do
|
|
67
|
+
# Password with @ must be percent-encoded
|
|
68
|
+
url = 'mysql://webapp:P%40ssw0rd@db-server.example.com:3306/app_database'
|
|
69
|
+
result = DomainExtractor.parse(url)
|
|
70
|
+
|
|
71
|
+
expect(result.valid?).to be(true)
|
|
72
|
+
expect(result.scheme).to eq('mysql')
|
|
73
|
+
expect(result.user).to eq('webapp')
|
|
74
|
+
expect(result.password).to eq('P%40ssw0rd')
|
|
75
|
+
expect(result.decoded_password).to eq('P@ssw0rd')
|
|
76
|
+
expect(result.host).to eq('db-server.example.com')
|
|
77
|
+
expect(result.port).to eq(3306)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it 'parses MongoDB URL' do
|
|
81
|
+
url = 'mongodb+srv://root:password123@cluster0.ab1cd.mongodb.net/myDatabase'
|
|
82
|
+
result = DomainExtractor.parse(url)
|
|
83
|
+
|
|
84
|
+
expect(result.valid?).to be(true)
|
|
85
|
+
expect(result.scheme).to eq('mongodb+srv')
|
|
86
|
+
expect(result.user).to eq('root')
|
|
87
|
+
expect(result.password).to eq('password123')
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
describe 'FTP/SFTP URL parsing' do
|
|
92
|
+
it 'parses FTP URL with credentials' do
|
|
93
|
+
url = 'ftp://ftpuser:ftppass@ftp.example.com/path/to/file'
|
|
94
|
+
result = DomainExtractor.parse(url)
|
|
95
|
+
|
|
96
|
+
expect(result.valid?).to be(true)
|
|
97
|
+
expect(result.scheme).to eq('ftp')
|
|
98
|
+
expect(result.user).to eq('ftpuser')
|
|
99
|
+
expect(result.password).to eq('ftppass')
|
|
100
|
+
expect(result.host).to eq('ftp.example.com')
|
|
101
|
+
# FTP paths may not include leading slash depending on URI parsing
|
|
102
|
+
expect(result.path).to match(%r{^/?path/to/file$})
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it 'parses SFTP URL' do
|
|
106
|
+
url = 'sftp://deploy_user:DeployKey123@deployment.internal:22/var/www/app'
|
|
107
|
+
result = DomainExtractor.parse(url)
|
|
108
|
+
|
|
109
|
+
expect(result.valid?).to be(true)
|
|
110
|
+
expect(result.scheme).to eq('sftp')
|
|
111
|
+
expect(result.user).to eq('deploy_user')
|
|
112
|
+
expect(result.password).to eq('DeployKey123')
|
|
113
|
+
expect(result.port).to eq(22)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
describe 'Special character handling in credentials' do
|
|
118
|
+
it 'handles @ symbol in username' do
|
|
119
|
+
url = 'https://user%40domain.com:password@example.com'
|
|
120
|
+
result = DomainExtractor.parse(url)
|
|
121
|
+
|
|
122
|
+
expect(result.user).to eq('user%40domain.com')
|
|
123
|
+
expect(result.decoded_user).to eq('user@domain.com')
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
it 'handles colon in password' do
|
|
127
|
+
url = 'https://user:Pass%3Aword@example.com'
|
|
128
|
+
result = DomainExtractor.parse(url)
|
|
129
|
+
|
|
130
|
+
expect(result.password).to eq('Pass%3Aword')
|
|
131
|
+
expect(result.decoded_password).to eq('Pass:word')
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it 'handles multiple special characters in password' do
|
|
135
|
+
url = 'https://user:P%40%24%24w0rd%21@example.com'
|
|
136
|
+
result = DomainExtractor.parse(url)
|
|
137
|
+
|
|
138
|
+
expect(result.password).to eq('P%40%24%24w0rd%21')
|
|
139
|
+
expect(result.decoded_password).to eq('P@$$w0rd!')
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it 'handles empty password' do
|
|
143
|
+
url = 'https://user:@example.com'
|
|
144
|
+
result = DomainExtractor.parse(url)
|
|
145
|
+
|
|
146
|
+
expect(result.user).to eq('user')
|
|
147
|
+
expect(result.password).to eq('')
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
it 'handles username only (no password)' do
|
|
151
|
+
url = 'https://user@example.com'
|
|
152
|
+
result = DomainExtractor.parse(url)
|
|
153
|
+
|
|
154
|
+
expect(result.user).to eq('user')
|
|
155
|
+
expect(result.password).to be_nil
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
describe 'Authentication helper methods' do
|
|
160
|
+
it 'generates Basic Auth header from credentials' do
|
|
161
|
+
url = 'https://researcher:secure_pwd123@api.example.com'
|
|
162
|
+
result = DomainExtractor.parse(url)
|
|
163
|
+
|
|
164
|
+
header = result.basic_auth_header
|
|
165
|
+
expect(header).to start_with('Basic ')
|
|
166
|
+
|
|
167
|
+
# Verify it's properly base64 encoded
|
|
168
|
+
encoded_part = header.sub('Basic ', '')
|
|
169
|
+
decoded = Base64.strict_decode64(encoded_part)
|
|
170
|
+
expect(decoded).to eq('researcher:secure_pwd123')
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
it 'generates Basic Auth header using module method' do
|
|
174
|
+
header = DomainExtractor.basic_auth_header('user', 'pass')
|
|
175
|
+
expect(header).to start_with('Basic ')
|
|
176
|
+
|
|
177
|
+
encoded_part = header.sub('Basic ', '')
|
|
178
|
+
decoded = Base64.strict_decode64(encoded_part)
|
|
179
|
+
expect(decoded).to eq('user:pass')
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
it 'generates Bearer token header' do
|
|
183
|
+
token = 'eyJhbGciOiJIUzI1NiIs...'
|
|
184
|
+
header = DomainExtractor.bearer_auth_header(token)
|
|
185
|
+
|
|
186
|
+
expect(header).to eq("Bearer #{token}")
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
it 'encodes credentials for URL use' do
|
|
190
|
+
password = 'P@ss:word\!'
|
|
191
|
+
encoded = DomainExtractor.encode_credential(password)
|
|
192
|
+
|
|
193
|
+
expect(encoded).to eq('P%40ss%3Aword%5C%21')
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
it 'decodes percent-encoded credentials' do
|
|
197
|
+
encoded = 'P%40ss%3Aword%21'
|
|
198
|
+
decoded = DomainExtractor.decode_credential(encoded)
|
|
199
|
+
|
|
200
|
+
expect(decoded).to eq('P@ss:word!')
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
it 'encodes spaces as %20 and preserves literal plus signs' do
|
|
204
|
+
encoded = DomainExtractor.encode_credential('a+b c')
|
|
205
|
+
|
|
206
|
+
expect(encoded).to eq('a%2Bb%20c')
|
|
207
|
+
expect(DomainExtractor.decode_credential(encoded)).to eq('a+b c')
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
describe 'URI component accessors' do
|
|
212
|
+
it 'provides scheme accessor' do
|
|
213
|
+
result = DomainExtractor.parse('https://example.com')
|
|
214
|
+
expect(result.scheme).to eq('https')
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
it 'provides port accessor' do
|
|
218
|
+
result = DomainExtractor.parse('https://example.com:8443')
|
|
219
|
+
expect(result.port).to eq(8443)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
it 'provides fragment accessor' do
|
|
223
|
+
result = DomainExtractor.parse('https://example.com/page#section')
|
|
224
|
+
expect(result.fragment).to eq('section')
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
it 'provides hostname (without IPv6 brackets)' do
|
|
228
|
+
result = DomainExtractor.parse('https://example.com')
|
|
229
|
+
expect(result.hostname).to eq('example.com')
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
it 'provides query string reconstruction' do
|
|
233
|
+
result = DomainExtractor.parse('https://example.com?foo=bar&baz=qux')
|
|
234
|
+
expect(result.query).to include('foo=bar')
|
|
235
|
+
expect(result.query).to include('baz=qux')
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
describe 'URI manipulation methods' do
|
|
240
|
+
it 'checks if URI is absolute' do
|
|
241
|
+
result = DomainExtractor.parse('https://example.com')
|
|
242
|
+
expect(result.absolute?).to be(true)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
it 'checks if URI is relative' do
|
|
246
|
+
result = DomainExtractor.parse('example.com')
|
|
247
|
+
expect(result.relative?).to be(false) # We normalize to https://
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
it 'provides default port for scheme' do
|
|
251
|
+
https_result = DomainExtractor.parse('https://example.com')
|
|
252
|
+
expect(https_result.default_port).to eq(443)
|
|
253
|
+
|
|
254
|
+
http_result = DomainExtractor.parse('http://example.com')
|
|
255
|
+
expect(http_result.default_port).to eq(80)
|
|
256
|
+
|
|
257
|
+
redis_result = DomainExtractor.parse('redis://localhost')
|
|
258
|
+
expect(redis_result.default_port).to eq(6379)
|
|
259
|
+
|
|
260
|
+
postgres_result = DomainExtractor.parse('postgresql://db.example.com/app')
|
|
261
|
+
expect(postgres_result.default_port).to eq(5432)
|
|
262
|
+
|
|
263
|
+
mysql_result = DomainExtractor.parse('mysql://db.example.com/app')
|
|
264
|
+
expect(mysql_result.default_port).to eq(3306)
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
it 'preserves the raw query string and duplicate keys' do
|
|
268
|
+
result = DomainExtractor.parse('https://example.com/path?foo=bar&foo=baz&empty=#frag')
|
|
269
|
+
|
|
270
|
+
expect(result.query).to eq('foo=bar&foo=baz&empty=')
|
|
271
|
+
expect(result.to_s).to eq('https://example.com/path?foo=bar&foo=baz&empty=#frag')
|
|
272
|
+
expect(result.to_str).to eq(result.to_s)
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
it 'normalizes scheme and host while keeping URI-compatible port behavior' do
|
|
276
|
+
normalized = DomainExtractor.parse('HTTP://EXAMPLE.COM:80/Path').normalize
|
|
277
|
+
|
|
278
|
+
expect(normalized.to_s).to eq('http://example.com/Path')
|
|
279
|
+
expect(normalized.scheme).to eq('http')
|
|
280
|
+
expect(normalized.host).to eq('example.com')
|
|
281
|
+
expect(normalized.port).to eq(80)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
it 'merges relative paths using URI semantics' do
|
|
285
|
+
base = DomainExtractor.parse('https://example.com/api/v1/')
|
|
286
|
+
merged = base.merge('users/123')
|
|
287
|
+
|
|
288
|
+
expect(merged.to_s).to eq('https://example.com/api/v1/users/123')
|
|
289
|
+
expect(merged.path).to eq('/api/v1/users/123')
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
it 'builds a URL string from the current URI state' do
|
|
293
|
+
result = DomainExtractor.parse('https://user:pass@example.com:8443/path?foo=bar#frag')
|
|
294
|
+
|
|
295
|
+
expect(result.build_url).to eq('https://user:pass@example.com:8443/path?foo=bar#frag')
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
describe 'URI setters' do
|
|
300
|
+
it 'updates core URI components and keeps query params in sync' do
|
|
301
|
+
result = DomainExtractor.parse('http://example.com')
|
|
302
|
+
|
|
303
|
+
result.scheme = 'https'
|
|
304
|
+
result.host = 'api.secure.example.co.uk'
|
|
305
|
+
result.port = 8443
|
|
306
|
+
result.path = '/v1/users'
|
|
307
|
+
result.query = 'page=2&empty='
|
|
308
|
+
result.fragment = 'results'
|
|
309
|
+
|
|
310
|
+
expect(result.scheme).to eq('https')
|
|
311
|
+
expect(result.host).to eq('api.secure.example.co.uk')
|
|
312
|
+
expect(result.subdomain).to eq('api.secure')
|
|
313
|
+
expect(result.root_domain).to eq('example.co.uk')
|
|
314
|
+
expect(result.port).to eq(8443)
|
|
315
|
+
expect(result.path).to eq('/v1/users')
|
|
316
|
+
expect(result.query).to eq('page=2&empty=')
|
|
317
|
+
expect(result.query_params).to eq({ 'page' => '2', 'empty' => nil })
|
|
318
|
+
expect(result.fragment).to eq('results')
|
|
319
|
+
expect(result.to_s).to eq('https://api.secure.example.co.uk:8443/v1/users?page=2&empty=#results')
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
it 'preserves authentication when the host changes' do
|
|
323
|
+
result = DomainExtractor.parse('https://user:pass@example.com')
|
|
324
|
+
|
|
325
|
+
result.host = 'api.example.com'
|
|
326
|
+
|
|
327
|
+
expect(result.user).to eq('user')
|
|
328
|
+
expect(result.password).to eq('pass')
|
|
329
|
+
expect(result.to_s).to eq('https://user:pass@api.example.com')
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
it 'updates auth-derived fields when userinfo changes' do
|
|
333
|
+
result = DomainExtractor.parse('https://example.com')
|
|
334
|
+
|
|
335
|
+
result.userinfo = 'deploy%40site.com:secret%3Avalue'
|
|
336
|
+
|
|
337
|
+
expect(result.user).to eq('deploy%40site.com')
|
|
338
|
+
expect(result.password).to eq('secret%3Avalue')
|
|
339
|
+
expect(result.decoded_user).to eq('deploy@site.com')
|
|
340
|
+
expect(result.decoded_password).to eq('secret:value')
|
|
341
|
+
expect(result.basic_auth_header).to eq('Basic ZGVwbG95QHNpdGUuY29tOnNlY3JldDp2YWx1ZQ==')
|
|
342
|
+
expect(result.to_s).to eq('https://deploy%40site.com:secret%3Avalue@example.com')
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
describe 'Proxy detection' do
|
|
347
|
+
around do |example|
|
|
348
|
+
proxy_keys = %w[http_proxy HTTP_PROXY https_proxy HTTPS_PROXY all_proxy ALL_PROXY no_proxy NO_PROXY]
|
|
349
|
+
previous_env = proxy_keys.to_h { |key| [key, ENV.fetch(key, nil)] }
|
|
350
|
+
|
|
351
|
+
begin
|
|
352
|
+
proxy_keys.each { |key| ENV.delete(key) }
|
|
353
|
+
example.run
|
|
354
|
+
ensure
|
|
355
|
+
previous_env.each do |key, value|
|
|
356
|
+
value.nil? ? ENV.delete(key) : ENV[key] = value
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
it 'prefers the scheme-specific proxy when available' do
|
|
362
|
+
ENV['https_proxy'] = 'http://secure-proxy.internal:8443'
|
|
363
|
+
|
|
364
|
+
result = DomainExtractor.parse('https://api.example.com')
|
|
365
|
+
|
|
366
|
+
expect(result.find_proxy.to_s).to eq('http://secure-proxy.internal:8443')
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
it 'respects no_proxy exclusions' do
|
|
370
|
+
ENV['https_proxy'] = 'http://secure-proxy.internal:8443'
|
|
371
|
+
ENV['no_proxy'] = 'api.example.com'
|
|
372
|
+
|
|
373
|
+
result = DomainExtractor.parse('https://api.example.com')
|
|
374
|
+
|
|
375
|
+
expect(result.find_proxy).to be_nil
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
it 'falls back to http_proxy for custom schemes' do
|
|
379
|
+
ENV['http_proxy'] = 'http://fallback-proxy.internal:8080'
|
|
380
|
+
|
|
381
|
+
result = DomainExtractor.parse('redis://localhost:6379/0')
|
|
382
|
+
|
|
383
|
+
expect(result.find_proxy.to_s).to eq('http://fallback-proxy.internal:8080')
|
|
384
|
+
end
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
describe 'Backward compatibility' do
|
|
388
|
+
it 'maintains hash-style access' do
|
|
389
|
+
url = 'https://user:pass@example.com:8080/path?query=value#fragment'
|
|
390
|
+
result = DomainExtractor.parse(url)
|
|
391
|
+
|
|
392
|
+
expect(result[:host]).to eq('example.com')
|
|
393
|
+
expect(result[:path]).to eq('/path')
|
|
394
|
+
expect(result[:domain]).to eq('example')
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
it 'maintains method-style access for original fields' do
|
|
398
|
+
url = 'https://www.example.com/path'
|
|
399
|
+
result = DomainExtractor.parse(url)
|
|
400
|
+
|
|
401
|
+
expect(result.subdomain).to eq('www')
|
|
402
|
+
expect(result.domain).to eq('example')
|
|
403
|
+
expect(result.tld).to eq('com')
|
|
404
|
+
expect(result.root_domain).to eq('example.com')
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
it 'maintains to_h conversion' do
|
|
408
|
+
url = 'https://user:pass@example.com'
|
|
409
|
+
result = DomainExtractor.parse(url)
|
|
410
|
+
hash = result.to_h
|
|
411
|
+
|
|
412
|
+
expect(hash).to be_a(Hash)
|
|
413
|
+
expect(hash[:user]).to eq('user')
|
|
414
|
+
expect(hash[:password]).to eq('pass')
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
describe 'Edge cases' do
|
|
419
|
+
it 'keeps internal database hosts valid' do
|
|
420
|
+
result = DomainExtractor.parse('postgresql://appuser:SecurePass@db.prod.internal:5432/production')
|
|
421
|
+
|
|
422
|
+
expect(result.valid?).to be(true)
|
|
423
|
+
expect(result.scheme).to eq('postgresql')
|
|
424
|
+
expect(result.host).to eq('db.prod.internal')
|
|
425
|
+
expect(result.subdomain).to eq('db')
|
|
426
|
+
expect(result.domain).to eq('prod')
|
|
427
|
+
expect(result.root_domain).to eq('prod.internal')
|
|
428
|
+
expect(result.tld).to eq('internal')
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
it 'handles URLs without auth' do
|
|
432
|
+
result = DomainExtractor.parse('https://example.com')
|
|
433
|
+
|
|
434
|
+
expect(result.user).to be_nil
|
|
435
|
+
expect(result.password).to be_nil
|
|
436
|
+
expect(result.userinfo).to be_nil
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
it 'handles invalid URLs gracefully' do
|
|
440
|
+
result = DomainExtractor.parse(':::invalid:::')
|
|
441
|
+
|
|
442
|
+
expect(result.valid?).to be(false)
|
|
443
|
+
expect(result.user).to be_nil
|
|
444
|
+
expect(result.password).to be_nil
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
it 'handles nil input' do
|
|
448
|
+
result = DomainExtractor.parse(nil)
|
|
449
|
+
|
|
450
|
+
expect(result.valid?).to be(false)
|
|
451
|
+
expect(result.user).to be_nil
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
end
|
|
@@ -272,7 +272,7 @@ RSpec.describe DomainExtractor do
|
|
|
272
272
|
'dashtrack.com',
|
|
273
273
|
'www.insurancesite.ai',
|
|
274
274
|
'https://hitting.com/index',
|
|
275
|
-
'
|
|
275
|
+
'invalid_url_with_underscore',
|
|
276
276
|
''
|
|
277
277
|
]
|
|
278
278
|
|
|
@@ -292,7 +292,7 @@ RSpec.describe DomainExtractor do
|
|
|
292
292
|
end
|
|
293
293
|
|
|
294
294
|
it 'handles all invalid URLs' do
|
|
295
|
-
results = described_class.parse_batch(['
|
|
295
|
+
results = described_class.parse_batch(['invalid_url', '', nil])
|
|
296
296
|
|
|
297
297
|
expect(results).to all(be_nil)
|
|
298
298
|
end
|
|
@@ -67,7 +67,7 @@ RSpec.describe DomainValidator do
|
|
|
67
67
|
end
|
|
68
68
|
|
|
69
69
|
it 'rejects invalid URLs' do
|
|
70
|
-
record.url = '
|
|
70
|
+
record.url = 'not_a_url'
|
|
71
71
|
validator.validate_each(record, :url, record.url)
|
|
72
72
|
expect(record.errors.messages).not_to be_empty
|
|
73
73
|
expect(record.errors.messages.first[:message]).to include('not a valid URL')
|
data/spec/formatter_spec.rb
CHANGED
|
@@ -247,7 +247,7 @@ RSpec.describe DomainExtractor::Formatter do
|
|
|
247
247
|
|
|
248
248
|
context 'with invalid input' do
|
|
249
249
|
it 'returns nil for invalid URLs' do
|
|
250
|
-
result = described_class.call('
|
|
250
|
+
result = described_class.call('not_a_url')
|
|
251
251
|
expect(result).to be_nil
|
|
252
252
|
end
|
|
253
253
|
|
|
@@ -292,7 +292,7 @@ RSpec.describe DomainExtractor do
|
|
|
292
292
|
end
|
|
293
293
|
|
|
294
294
|
it 'returns nil for invalid URLs' do
|
|
295
|
-
result = DomainExtractor.format('
|
|
295
|
+
result = DomainExtractor.format('invalid_url')
|
|
296
296
|
expect(result).to be_nil
|
|
297
297
|
end
|
|
298
298
|
end
|
metadata
CHANGED
|
@@ -1,29 +1,48 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: domain_extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- OpenSite AI
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
|
-
name:
|
|
13
|
+
name: base64
|
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
|
16
15
|
requirements:
|
|
17
16
|
- - "~>"
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version:
|
|
18
|
+
version: 0.3.0
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
23
|
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: 0.3.0
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: public_suffix
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
25
31
|
- !ruby/object:Gem::Version
|
|
26
32
|
version: '6.0'
|
|
33
|
+
- - "<"
|
|
34
|
+
- !ruby/object:Gem::Version
|
|
35
|
+
version: '8.0'
|
|
36
|
+
type: :runtime
|
|
37
|
+
prerelease: false
|
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
39
|
+
requirements:
|
|
40
|
+
- - ">="
|
|
41
|
+
- !ruby/object:Gem::Version
|
|
42
|
+
version: '6.0'
|
|
43
|
+
- - "<"
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: '8.0'
|
|
27
46
|
description: |-
|
|
28
47
|
DomainExtractor is a high-performance url parser and domain parser for Ruby. It delivers precise
|
|
29
48
|
domain extraction, query parameter parsing, url normalization, and multi-part tld parsing via
|
|
@@ -32,15 +51,16 @@ email: dev@opensite.ai
|
|
|
32
51
|
executables: []
|
|
33
52
|
extensions: []
|
|
34
53
|
extra_rdoc_files:
|
|
35
|
-
- README.md
|
|
36
|
-
- LICENSE.txt
|
|
37
54
|
- CHANGELOG.md
|
|
55
|
+
- LICENSE
|
|
56
|
+
- README.md
|
|
38
57
|
files:
|
|
39
58
|
- ".rubocop.yml"
|
|
40
59
|
- CHANGELOG.md
|
|
41
|
-
- LICENSE
|
|
60
|
+
- LICENSE
|
|
42
61
|
- README.md
|
|
43
62
|
- lib/domain_extractor.rb
|
|
63
|
+
- lib/domain_extractor/auth.rb
|
|
44
64
|
- lib/domain_extractor/domain_validator.rb
|
|
45
65
|
- lib/domain_extractor/errors.rb
|
|
46
66
|
- lib/domain_extractor/formatter.rb
|
|
@@ -49,8 +69,10 @@ files:
|
|
|
49
69
|
- lib/domain_extractor/parser.rb
|
|
50
70
|
- lib/domain_extractor/query_params.rb
|
|
51
71
|
- lib/domain_extractor/result.rb
|
|
72
|
+
- lib/domain_extractor/uri_helpers.rb
|
|
52
73
|
- lib/domain_extractor/validators.rb
|
|
53
74
|
- lib/domain_extractor/version.rb
|
|
75
|
+
- spec/auth_and_uri_spec.rb
|
|
54
76
|
- spec/domain_extractor_spec.rb
|
|
55
77
|
- spec/domain_validator_spec.rb
|
|
56
78
|
- spec/formatter_spec.rb
|
|
@@ -58,7 +80,7 @@ files:
|
|
|
58
80
|
- spec/spec_helper.rb
|
|
59
81
|
homepage: https://github.com/opensite-ai/domain_extractor
|
|
60
82
|
licenses:
|
|
61
|
-
-
|
|
83
|
+
- BSD-3-Clause
|
|
62
84
|
metadata:
|
|
63
85
|
source_code_uri: https://github.com/opensite-ai/domain_extractor
|
|
64
86
|
changelog_uri: https://github.com/opensite-ai/domain_extractor/blob/master/CHANGELOG.md
|
|
@@ -68,7 +90,6 @@ metadata:
|
|
|
68
90
|
wiki_uri: https://docs.devguides.com/domain_extractor
|
|
69
91
|
rubygems_mfa_required: 'true'
|
|
70
92
|
allowed_push_host: https://rubygems.org
|
|
71
|
-
post_install_message:
|
|
72
93
|
rdoc_options:
|
|
73
94
|
- "--main"
|
|
74
95
|
- README.md
|
|
@@ -89,8 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
89
110
|
- !ruby/object:Gem::Version
|
|
90
111
|
version: '0'
|
|
91
112
|
requirements: []
|
|
92
|
-
rubygems_version:
|
|
93
|
-
signing_key:
|
|
113
|
+
rubygems_version: 4.0.7
|
|
94
114
|
specification_version: 4
|
|
95
115
|
summary: High-performance url parser and domain extractor for Ruby
|
|
96
116
|
test_files: []
|
data/LICENSE.txt
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 OpenSite AI
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|