domain_extractor 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +190 -3
- data/README.md +237 -0
- data/lib/domain_extractor/domain_validator.rb +130 -121
- data/lib/domain_extractor/formatter.rb +105 -0
- data/lib/domain_extractor/version.rb +1 -1
- data/lib/domain_extractor.rb +27 -0
- data/spec/domain_validator_spec.rb +1 -1
- data/spec/formatter_spec.rb +299 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bb9ff9b765f3037fb6a2f0af330ecf415c76dde8b59aea7c362e353460d20049
|
|
4
|
+
data.tar.gz: 3349872d55a4a6252a1886b69eacc33743af7b6a8d74be1fb142842884cb41e7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 54d64c3c9b3cf04ac2405c86f563cea1d22d0e37491caee3de5e0a6ab569686ce57b4a0efa9b23729bb6efe5a2eda25b5421c1a0188dd241348f4dd2f0663540
|
|
7
|
+
data.tar.gz: ee15d47829741ac24ebcc13621a62e83f421f6dd2572f0abafeaca2fbac12da02b72bec9350aeb2f375da71de13518a82856dc5af049fb813952509ed9b5e94f
|
data/CHANGELOG.md
CHANGED
|
@@ -5,24 +5,210 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [0.2.
|
|
8
|
+
## [0.2.7] - 2025-11-09
|
|
9
9
|
|
|
10
|
-
### Added
|
|
10
|
+
### Added - URL Formatting API
|
|
11
11
|
|
|
12
|
-
Added a comprehensive
|
|
12
|
+
Added a comprehensive `format` method for programmatic URL normalization and transformation. The formatter provides precise control over URL structure, protocol, and formatting while maintaining the same validation modes as the Rails validator.
|
|
13
13
|
|
|
14
14
|
#### Features
|
|
15
15
|
|
|
16
|
+
**Core Method:**
|
|
17
|
+
- `DomainExtractor.format(url, **options)` - Format and normalize URLs based on specified options
|
|
18
|
+
- Returns formatted URL string or `nil` for invalid input
|
|
19
|
+
- Strips paths and query parameters from URLs
|
|
20
|
+
- Supports all validation modes from the Rails validator
|
|
21
|
+
|
|
22
|
+
**Validation Modes:**
|
|
23
|
+
- `:standard` (default) - Preserves full host as-is while normalizing protocol/slashes
|
|
24
|
+
- `:root_domain` - Strips all subdomains, returns only root domain
|
|
25
|
+
- `:root_or_custom_subdomain` - Preserves custom subdomains but removes 'www'
|
|
26
|
+
|
|
27
|
+
**Formatting Options:**
|
|
28
|
+
- `use_protocol` (default: `true`) - Include/exclude protocol in output
|
|
29
|
+
- `use_https` (default: `true`) - Use HTTPS vs HTTP (only when `use_protocol` is true)
|
|
30
|
+
- `use_trailing_slash` (default: `false`) - Add/remove trailing slash from output
|
|
31
|
+
|
|
32
|
+
#### Usage Examples
|
|
33
|
+
|
|
34
|
+
**Basic Formatting:**
|
|
35
|
+
```ruby
|
|
36
|
+
# Remove trailing slash (default)
|
|
37
|
+
DomainExtractor.format('https://example.com/')
|
|
38
|
+
# => 'https://example.com'
|
|
39
|
+
|
|
40
|
+
# Strip paths and query parameters
|
|
41
|
+
DomainExtractor.format('https://example.com/path?query=value')
|
|
42
|
+
# => 'https://example.com'
|
|
43
|
+
|
|
44
|
+
# Normalize to HTTPS
|
|
45
|
+
DomainExtractor.format('http://example.com')
|
|
46
|
+
# => 'https://example.com'
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Validation Modes:**
|
|
50
|
+
```ruby
|
|
51
|
+
# Root domain only (strips subdomains)
|
|
52
|
+
DomainExtractor.format('https://shop.example.com', validation: :root_domain)
|
|
53
|
+
# => 'https://example.com'
|
|
54
|
+
|
|
55
|
+
# Strip www but keep custom subdomains
|
|
56
|
+
DomainExtractor.format('https://www.example.com', validation: :root_or_custom_subdomain)
|
|
57
|
+
# => 'https://example.com'
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Protocol Control:**
|
|
61
|
+
```ruby
|
|
62
|
+
# Without protocol
|
|
63
|
+
DomainExtractor.format('https://example.com', use_protocol: false)
|
|
64
|
+
# => 'example.com'
|
|
65
|
+
|
|
66
|
+
# Force HTTP instead of HTTPS
|
|
67
|
+
DomainExtractor.format('https://example.com', use_https: false)
|
|
68
|
+
# => 'http://example.com'
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Trailing Slash Control:**
|
|
72
|
+
```ruby
|
|
73
|
+
# Add trailing slash
|
|
74
|
+
DomainExtractor.format('https://example.com', use_trailing_slash: true)
|
|
75
|
+
# => 'https://example.com/'
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Combined Options:**
|
|
79
|
+
```ruby
|
|
80
|
+
# Root domain, no protocol, with trailing slash
|
|
81
|
+
DomainExtractor.format('https://shop.example.com/path',
|
|
82
|
+
validation: :root_domain,
|
|
83
|
+
use_protocol: false,
|
|
84
|
+
use_trailing_slash: true)
|
|
85
|
+
# => 'example.com/'
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
#### Real-World Use Cases
|
|
89
|
+
|
|
90
|
+
**Canonical URL Generation:**
|
|
91
|
+
```ruby
|
|
92
|
+
def canonical_url(url)
|
|
93
|
+
DomainExtractor.format(url,
|
|
94
|
+
validation: :root_or_custom_subdomain,
|
|
95
|
+
use_https: true,
|
|
96
|
+
use_trailing_slash: false)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
canonical_url('http://www.example.com/') # => 'https://example.com'
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
**Domain Normalization for Allowlists:**
|
|
103
|
+
```ruby
|
|
104
|
+
def normalize_domain(url)
|
|
105
|
+
DomainExtractor.format(url, validation: :root_domain, use_protocol: false)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
normalize_domain('https://shop.example.com/path') # => 'example.com'
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Multi-Tenant URL Standardization:**
|
|
112
|
+
```ruby
|
|
113
|
+
class Tenant < ApplicationRecord
|
|
114
|
+
before_validation :normalize_custom_domain
|
|
115
|
+
|
|
116
|
+
private
|
|
117
|
+
|
|
118
|
+
def normalize_custom_domain
|
|
119
|
+
return if custom_domain.blank?
|
|
120
|
+
|
|
121
|
+
self.custom_domain = DomainExtractor.format(
|
|
122
|
+
custom_domain,
|
|
123
|
+
validation: :root_or_custom_subdomain,
|
|
124
|
+
use_https: true,
|
|
125
|
+
use_trailing_slash: false
|
|
126
|
+
)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
#### Implementation Details
|
|
132
|
+
|
|
133
|
+
- **Performance**: Leverages existing DomainExtractor parsing engine with minimal overhead
|
|
134
|
+
- **Nil-safe**: Returns `nil` for invalid URLs instead of raising exceptions
|
|
135
|
+
- **Consistent API**: Uses same option names and validation modes as Rails validator
|
|
136
|
+
- **Path/Query Stripping**: Automatically removes paths and query parameters
|
|
137
|
+
- **Multi-part TLD Support**: Correctly handles complex TLDs like `.co.uk`, `.com.au`
|
|
138
|
+
|
|
139
|
+
#### Code Quality
|
|
140
|
+
|
|
141
|
+
- **49 comprehensive test cases** covering all formatting modes and options
|
|
142
|
+
- **RuboCop clean** with zero offenses
|
|
143
|
+
- **100% test coverage** maintained across entire gem (200 total tests)
|
|
144
|
+
- **Well-documented** with extensive README section and real-world examples
|
|
145
|
+
|
|
146
|
+
#### Documentation
|
|
147
|
+
|
|
148
|
+
- Added comprehensive **URL Formatting** section to README.md
|
|
149
|
+
- Includes examples for all validation modes and options
|
|
150
|
+
- Real-world use cases: canonical URLs, domain normalization, multi-tenant standardization
|
|
151
|
+
- Clear API reference with all available options
|
|
152
|
+
|
|
153
|
+
## [0.2.6] - 2025-11-09
|
|
154
|
+
|
|
155
|
+
### Fixed - Rails Validator Registration
|
|
156
|
+
|
|
157
|
+
**CRITICAL FIX**: Moved `DomainValidator` class to the **top-level namespace** (from `DomainExtractor::DomainValidator`) to ensure Rails can properly autoload and find the validator.
|
|
158
|
+
|
|
159
|
+
#### The Problem
|
|
160
|
+
|
|
161
|
+
Version 0.2.5 defined the validator as `DomainExtractor::DomainValidator`, which caused Rails to fail with:
|
|
162
|
+
|
|
163
|
+
```
|
|
164
|
+
ArgumentError: Unknown validator: 'DomainValidator'
|
|
165
|
+
NameError: uninitialized constant Website::DomainValidator
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
This occurred because when using `validates :url, domain: { ... }`, Rails searches for `DomainValidator` in:
|
|
169
|
+
|
|
170
|
+
1. The model's namespace (e.g., `Website::DomainValidator`)
|
|
171
|
+
2. The top-level namespace (`::DomainValidator`)
|
|
172
|
+
3. ActiveModel::Validations namespace
|
|
173
|
+
|
|
174
|
+
It does **not** search module namespaces like `DomainExtractor::`.
|
|
175
|
+
|
|
176
|
+
#### The Solution
|
|
177
|
+
|
|
178
|
+
- Moved `DomainValidator` to top-level namespace where Rails can find it
|
|
179
|
+
- Added `DomainExtractor::DomainValidator` as an alias for backward compatibility
|
|
180
|
+
- All functionality remains identical; only the class location changed
|
|
181
|
+
|
|
182
|
+
#### Verification
|
|
183
|
+
|
|
184
|
+
- All 151 tests pass including 35 validator-specific tests
|
|
185
|
+
- RuboCop clean with zero offenses
|
|
186
|
+
- Verified in production Rails 8 application
|
|
187
|
+
- Confirmed working with `validates :url, domain: { validation: :root_or_custom_subdomain }`
|
|
188
|
+
|
|
189
|
+
## [0.2.5] - 2025-11-09 [YANKED]
|
|
190
|
+
|
|
191
|
+
**This version was yanked due to validator registration issue. Use 0.2.6 instead.**
|
|
192
|
+
|
|
193
|
+
### Added Rails Integration - Custom ActiveModel Validator (BROKEN)
|
|
194
|
+
|
|
195
|
+
Added a comprehensive custom ActiveModel validator for declarative URL and domain validation in Rails applications. However, the validator was incorrectly namespaced and did not work in Rails applications.
|
|
196
|
+
|
|
197
|
+
#### Features (Broken in 0.2.5)
|
|
198
|
+
|
|
16
199
|
**Validation Modes:**
|
|
200
|
+
|
|
17
201
|
- `:standard` - Validates any parseable URL (default mode)
|
|
18
202
|
- `:root_domain` - Only allows root domains without subdomains (e.g., `example.com` ✅, `shop.example.com` ❌)
|
|
19
203
|
- `:root_or_custom_subdomain` - Allows root or custom subdomains but excludes `www` subdomain (e.g., `example.com` ✅, `shop.example.com` ✅, `www.example.com` ❌)
|
|
20
204
|
|
|
21
205
|
**Protocol Options:**
|
|
206
|
+
|
|
22
207
|
- `use_protocol` (default: `true`) - Controls whether protocol (http/https) is required in the URL
|
|
23
208
|
- `use_https` (default: `true`) - Controls whether HTTPS is required (only relevant when `use_protocol` is true)
|
|
24
209
|
|
|
25
210
|
**Usage Examples:**
|
|
211
|
+
|
|
26
212
|
```ruby
|
|
27
213
|
# Standard validation - any valid URL
|
|
28
214
|
validates :url, domain: { validation: :standard }
|
|
@@ -77,6 +263,7 @@ validates :domain, domain: {
|
|
|
77
263
|
#### Use Cases
|
|
78
264
|
|
|
79
265
|
Perfect for Rails applications requiring:
|
|
266
|
+
|
|
80
267
|
- Multi-tenant custom domain validation
|
|
81
268
|
- Secure URL validation (HTTPS enforcement)
|
|
82
269
|
- Subdomain-based architecture validation
|
data/README.md
CHANGED
|
@@ -13,6 +13,8 @@ Use **DomainExtractor** whenever you need a dependable tld parser for tricky mul
|
|
|
13
13
|
✅ **Accurate Multi-part TLD Parser** - Handles complex multi-part TLDs (co.uk, com.au, gov.br) using the [Public Suffix List](https://publicsuffix.org/)
|
|
14
14
|
✅ **Nested Subdomain Extraction** - Correctly parses multi-level subdomains (api.staging.example.com)
|
|
15
15
|
✅ **Smart URL Normalization** - Automatically handles URLs with or without schemes
|
|
16
|
+
✅ **Powerful URL Formatting** - Transform and standardize URLs with flexible options
|
|
17
|
+
✅ **Rails Integration** - Custom ActiveModel validator for declarative URL validation
|
|
16
18
|
✅ **Query Parameter Parsing** - Parse query strings into structured hashes
|
|
17
19
|
✅ **Batch Processing** - Parse multiple URLs efficiently
|
|
18
20
|
✅ **IP Address Detection** - Identifies and handles IPv4 and IPv6 addresses
|
|
@@ -355,6 +357,241 @@ DomainExtractor.parse_query_params(query_string)
|
|
|
355
357
|
# Returns: Hash of query parameters
|
|
356
358
|
```
|
|
357
359
|
|
|
360
|
+
```ruby
|
|
361
|
+
DomainExtractor.format(url_string, **options)
|
|
362
|
+
|
|
363
|
+
# => Formats a URL according to the specified options.
|
|
364
|
+
|
|
365
|
+
# Returns: Formatted URL string or nil if invalid
|
|
366
|
+
# Options:
|
|
367
|
+
# :validation (:standard, :root_domain, :root_or_custom_subdomain)
|
|
368
|
+
# :use_protocol (true/false)
|
|
369
|
+
# :use_https (true/false)
|
|
370
|
+
# :use_trailing_slash (true/false)
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
## URL Formatting
|
|
374
|
+
|
|
375
|
+
DomainExtractor provides powerful URL formatting capabilities to normalize, transform, and standardize URLs according to your application's requirements.
|
|
376
|
+
|
|
377
|
+
### Basic Formatting
|
|
378
|
+
|
|
379
|
+
```ruby
|
|
380
|
+
# Remove trailing slash (default)
|
|
381
|
+
DomainExtractor.format('https://example.com/')
|
|
382
|
+
# => 'https://example.com'
|
|
383
|
+
|
|
384
|
+
# Strip paths and query parameters
|
|
385
|
+
DomainExtractor.format('https://example.com/path?query=value')
|
|
386
|
+
# => 'https://example.com'
|
|
387
|
+
|
|
388
|
+
# Normalize to HTTPS
|
|
389
|
+
DomainExtractor.format('http://example.com')
|
|
390
|
+
# => 'https://example.com'
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
### Validation Modes
|
|
394
|
+
|
|
395
|
+
#### Standard Mode (Default)
|
|
396
|
+
|
|
397
|
+
Preserves the full host as-is while normalizing protocol and trailing slashes.
|
|
398
|
+
|
|
399
|
+
```ruby
|
|
400
|
+
DomainExtractor.format('https://shop.example.com')
|
|
401
|
+
# => 'https://shop.example.com'
|
|
402
|
+
|
|
403
|
+
DomainExtractor.format('https://www.example.com/')
|
|
404
|
+
# => 'https://www.example.com'
|
|
405
|
+
|
|
406
|
+
DomainExtractor.format('https://api.staging.example.com')
|
|
407
|
+
# => 'https://api.staging.example.com'
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
#### Root Domain Mode
|
|
411
|
+
|
|
412
|
+
Strips all subdomains and returns only the root domain.
|
|
413
|
+
|
|
414
|
+
```ruby
|
|
415
|
+
DomainExtractor.format('https://shop.example.com', validation: :root_domain)
|
|
416
|
+
# => 'https://example.com'
|
|
417
|
+
|
|
418
|
+
DomainExtractor.format('https://www.example.com/', validation: :root_domain)
|
|
419
|
+
# => 'https://example.com'
|
|
420
|
+
|
|
421
|
+
DomainExtractor.format('https://api.staging.example.com', validation: :root_domain)
|
|
422
|
+
# => 'https://example.com'
|
|
423
|
+
|
|
424
|
+
# Works with multi-part TLDs
|
|
425
|
+
DomainExtractor.format('https://shop.example.co.uk', validation: :root_domain)
|
|
426
|
+
# => 'https://example.co.uk'
|
|
427
|
+
```
|
|
428
|
+
|
|
429
|
+
#### Root or Custom Subdomain Mode
|
|
430
|
+
|
|
431
|
+
Preserves custom subdomains but specifically removes the 'www' subdomain.
|
|
432
|
+
|
|
433
|
+
```ruby
|
|
434
|
+
DomainExtractor.format('https://example.com', validation: :root_or_custom_subdomain)
|
|
435
|
+
# => 'https://example.com'
|
|
436
|
+
|
|
437
|
+
DomainExtractor.format('https://shop.example.com', validation: :root_or_custom_subdomain)
|
|
438
|
+
# => 'https://shop.example.com'
|
|
439
|
+
|
|
440
|
+
# Strips www subdomain
|
|
441
|
+
DomainExtractor.format('https://www.example.com', validation: :root_or_custom_subdomain)
|
|
442
|
+
# => 'https://example.com'
|
|
443
|
+
|
|
444
|
+
DomainExtractor.format('https://api.example.com', validation: :root_or_custom_subdomain)
|
|
445
|
+
# => 'https://api.example.com'
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
### Protocol Options
|
|
449
|
+
|
|
450
|
+
#### Without Protocol
|
|
451
|
+
|
|
452
|
+
Remove the protocol entirely from the output.
|
|
453
|
+
|
|
454
|
+
```ruby
|
|
455
|
+
DomainExtractor.format('https://example.com', use_protocol: false)
|
|
456
|
+
# => 'example.com'
|
|
457
|
+
|
|
458
|
+
DomainExtractor.format('https://shop.example.com', use_protocol: false)
|
|
459
|
+
# => 'shop.example.com'
|
|
460
|
+
|
|
461
|
+
# Combine with root_domain
|
|
462
|
+
DomainExtractor.format('https://shop.example.com',
|
|
463
|
+
validation: :root_domain,
|
|
464
|
+
use_protocol: false)
|
|
465
|
+
# => 'example.com'
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
#### HTTP vs HTTPS
|
|
469
|
+
|
|
470
|
+
Control which protocol to use in the output.
|
|
471
|
+
|
|
472
|
+
```ruby
|
|
473
|
+
# Default: use HTTPS
|
|
474
|
+
DomainExtractor.format('http://example.com')
|
|
475
|
+
# => 'https://example.com'
|
|
476
|
+
|
|
477
|
+
# Allow HTTP
|
|
478
|
+
DomainExtractor.format('https://example.com', use_https: false)
|
|
479
|
+
# => 'http://example.com'
|
|
480
|
+
|
|
481
|
+
DomainExtractor.format('http://example.com', use_https: false)
|
|
482
|
+
# => 'http://example.com'
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
### Trailing Slash Options
|
|
486
|
+
|
|
487
|
+
```ruby
|
|
488
|
+
# Remove trailing slash (default)
|
|
489
|
+
DomainExtractor.format('https://example.com/')
|
|
490
|
+
# => 'https://example.com'
|
|
491
|
+
|
|
492
|
+
# Add trailing slash
|
|
493
|
+
DomainExtractor.format('https://example.com', use_trailing_slash: true)
|
|
494
|
+
# => 'https://example.com/'
|
|
495
|
+
|
|
496
|
+
DomainExtractor.format('https://example.com/', use_trailing_slash: true)
|
|
497
|
+
# => 'https://example.com/'
|
|
498
|
+
|
|
499
|
+
# Works with other options
|
|
500
|
+
DomainExtractor.format('https://shop.example.com',
|
|
501
|
+
validation: :root_domain,
|
|
502
|
+
use_trailing_slash: true)
|
|
503
|
+
# => 'https://example.com/'
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
### Combined Options
|
|
507
|
+
|
|
508
|
+
Mix and match options for precise URL formatting:
|
|
509
|
+
|
|
510
|
+
```ruby
|
|
511
|
+
# Root domain, no protocol, with trailing slash
|
|
512
|
+
DomainExtractor.format('https://shop.example.com/path',
|
|
513
|
+
validation: :root_domain,
|
|
514
|
+
use_protocol: false,
|
|
515
|
+
use_trailing_slash: true)
|
|
516
|
+
# => 'example.com/'
|
|
517
|
+
|
|
518
|
+
# Strip www, use HTTP, with trailing slash
|
|
519
|
+
DomainExtractor.format('https://www.example.com',
|
|
520
|
+
validation: :root_or_custom_subdomain,
|
|
521
|
+
use_https: false,
|
|
522
|
+
use_trailing_slash: true)
|
|
523
|
+
# => 'http://example.com/'
|
|
524
|
+
|
|
525
|
+
# Standard mode, no protocol, with trailing slash
|
|
526
|
+
DomainExtractor.format('https://api.example.com',
|
|
527
|
+
use_protocol: false,
|
|
528
|
+
use_trailing_slash: true)
|
|
529
|
+
# => 'api.example.com/'
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
### Real-World Use Cases
|
|
533
|
+
|
|
534
|
+
#### Canonical URL Generation
|
|
535
|
+
|
|
536
|
+
```ruby
|
|
537
|
+
def canonical_url(url)
|
|
538
|
+
DomainExtractor.format(url,
|
|
539
|
+
validation: :root_or_custom_subdomain,
|
|
540
|
+
use_https: true,
|
|
541
|
+
use_trailing_slash: false)
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
canonical_url('http://www.example.com/') # => 'https://example.com'
|
|
545
|
+
canonical_url('https://shop.example.com/') # => 'https://shop.example.com'
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
#### Domain Normalization for Allowlists
|
|
549
|
+
|
|
550
|
+
```ruby
|
|
551
|
+
def normalize_domain_for_allowlist(url)
|
|
552
|
+
DomainExtractor.format(url,
|
|
553
|
+
validation: :root_domain,
|
|
554
|
+
use_protocol: false)
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
normalize_domain_for_allowlist('https://shop.example.com/path') # => 'example.com'
|
|
558
|
+
normalize_domain_for_allowlist('http://www.example.com') # => 'example.com'
|
|
559
|
+
```
|
|
560
|
+
|
|
561
|
+
#### Multi-Tenant URL Standardization
|
|
562
|
+
|
|
563
|
+
```ruby
|
|
564
|
+
class Tenant < ApplicationRecord
|
|
565
|
+
before_validation :normalize_custom_domain
|
|
566
|
+
|
|
567
|
+
private
|
|
568
|
+
|
|
569
|
+
def normalize_custom_domain
|
|
570
|
+
return if custom_domain.blank?
|
|
571
|
+
|
|
572
|
+
self.custom_domain = DomainExtractor.format(
|
|
573
|
+
custom_domain,
|
|
574
|
+
validation: :root_or_custom_subdomain,
|
|
575
|
+
use_https: true,
|
|
576
|
+
use_trailing_slash: false
|
|
577
|
+
)
|
|
578
|
+
end
|
|
579
|
+
end
|
|
580
|
+
```
|
|
581
|
+
|
|
582
|
+
#### API Endpoint Formatting
|
|
583
|
+
|
|
584
|
+
```ruby
|
|
585
|
+
def format_api_endpoint(url)
|
|
586
|
+
DomainExtractor.format(url,
|
|
587
|
+
validation: :standard,
|
|
588
|
+
use_https: true,
|
|
589
|
+
use_trailing_slash: true)
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
format_api_endpoint('http://api.example.com') # => 'https://api.example.com/'
|
|
593
|
+
```
|
|
594
|
+
|
|
358
595
|
## Rails Integration
|
|
359
596
|
|
|
360
597
|
DomainExtractor provides a custom ActiveModel validator for Rails applications, enabling declarative URL/domain validation with multiple modes and options.
|
|
@@ -16,151 +16,160 @@ rescue LoadError
|
|
|
16
16
|
end
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
19
|
+
# DomainValidator is a custom ActiveModel validator for URL/domain validation.
|
|
20
|
+
#
|
|
21
|
+
# This validator is defined at the top level so Rails can find it when using:
|
|
22
|
+
# validates :url, domain: { validation: :standard }
|
|
23
|
+
#
|
|
24
|
+
# Validation modes:
|
|
25
|
+
# - :standard - Validates any valid URL using DomainExtractor.valid?
|
|
26
|
+
# - :root_domain - Only allows root domains (no subdomains) like https://mysite.com
|
|
27
|
+
# - :root_or_custom_subdomain - Allows root or custom subdomains but excludes 'www'
|
|
28
|
+
#
|
|
29
|
+
# Optional flags:
|
|
30
|
+
# - use_protocol (default: true) - Whether protocol (http/https) is required
|
|
31
|
+
# - use_https (default: true) - Whether https is required (only if use_protocol is true)
|
|
32
|
+
#
|
|
33
|
+
# @example Standard validation
|
|
34
|
+
# validates :url, domain: { validation: :standard }
|
|
35
|
+
#
|
|
36
|
+
# @example Root domain only, no protocol required
|
|
37
|
+
# validates :url, domain: { validation: :root_domain, use_protocol: false }
|
|
38
|
+
#
|
|
39
|
+
# @example Root or custom subdomain with https required
|
|
40
|
+
# validates :url, domain: { validation: :root_or_custom_subdomain, use_https: true }
|
|
41
|
+
class DomainValidator < ActiveModel::EachValidator
|
|
42
|
+
VALIDATION_MODES = %i[standard root_domain root_or_custom_subdomain].freeze
|
|
43
|
+
WWW_SUBDOMAIN = 'www'
|
|
44
|
+
|
|
45
|
+
def validate_each(record, attribute, value)
|
|
46
|
+
return if blank?(value)
|
|
47
|
+
|
|
48
|
+
validation_mode = extract_validation_mode
|
|
49
|
+
use_protocol = options.fetch(:use_protocol, true)
|
|
50
|
+
use_https = options.fetch(:use_https, true)
|
|
51
|
+
|
|
52
|
+
normalized_url = normalize_url(value, use_protocol, use_https)
|
|
53
|
+
|
|
54
|
+
return unless protocol_valid?(record, attribute, normalized_url, use_protocol, use_https)
|
|
55
|
+
|
|
56
|
+
parsed = parse_and_validate_url(record, attribute, normalized_url)
|
|
57
|
+
return unless parsed
|
|
58
|
+
|
|
59
|
+
apply_validation_mode(record, attribute, parsed, validation_mode)
|
|
60
|
+
end
|
|
56
61
|
|
|
57
|
-
|
|
58
|
-
end
|
|
62
|
+
private
|
|
59
63
|
|
|
60
|
-
|
|
64
|
+
# Extract and validate the validation mode option
|
|
65
|
+
def extract_validation_mode
|
|
66
|
+
validation_mode = options.fetch(:validation, :standard)
|
|
67
|
+
return validation_mode if VALIDATION_MODES.include?(validation_mode)
|
|
61
68
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
return validation_mode if VALIDATION_MODES.include?(validation_mode)
|
|
69
|
+
raise ArgumentError, "Invalid validation mode: #{validation_mode}. " \
|
|
70
|
+
"Must be one of: #{VALIDATION_MODES.join(', ')}"
|
|
71
|
+
end
|
|
66
72
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
73
|
+
# Check protocol requirements
|
|
74
|
+
def protocol_valid?(record, attribute, url, use_protocol, use_https)
|
|
75
|
+
return true unless use_protocol
|
|
76
|
+
return true if valid_protocol?(url, use_https)
|
|
70
77
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
78
|
+
protocol = use_https ? 'https://' : 'http:// or https://'
|
|
79
|
+
record.errors.add(attribute, "must use #{protocol}")
|
|
80
|
+
false
|
|
81
|
+
end
|
|
75
82
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
83
|
+
# Parse URL and validate it's valid
|
|
84
|
+
def parse_and_validate_url(record, attribute, url)
|
|
85
|
+
parsed = DomainExtractor.parse(url)
|
|
86
|
+
return parsed if parsed.valid?
|
|
80
87
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
return parsed if parsed.valid?
|
|
88
|
+
record.errors.add(attribute, 'is not a valid URL')
|
|
89
|
+
nil
|
|
90
|
+
end
|
|
85
91
|
|
|
86
|
-
|
|
92
|
+
# Apply the validation mode rules
|
|
93
|
+
def apply_validation_mode(record, attribute, parsed, validation_mode)
|
|
94
|
+
case validation_mode
|
|
95
|
+
when :standard
|
|
96
|
+
# Already validated - any valid URL passes
|
|
87
97
|
nil
|
|
98
|
+
when :root_domain
|
|
99
|
+
validate_root_domain(record, attribute, parsed)
|
|
100
|
+
when :root_or_custom_subdomain
|
|
101
|
+
validate_root_or_custom_subdomain(record, attribute, parsed)
|
|
88
102
|
end
|
|
103
|
+
end
|
|
89
104
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
nil
|
|
96
|
-
when :root_domain
|
|
97
|
-
validate_root_domain(record, attribute, parsed)
|
|
98
|
-
when :root_or_custom_subdomain
|
|
99
|
-
validate_root_or_custom_subdomain(record, attribute, parsed)
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
# Check if value is blank (nil, empty string, or whitespace-only)
|
|
104
|
-
def blank?(value)
|
|
105
|
-
value.nil? || (value.respond_to?(:empty?) && value.empty?) ||
|
|
106
|
-
(value.is_a?(String) && value.strip.empty?)
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# Normalize URL for validation based on protocol requirements
|
|
110
|
-
def normalize_url(url, use_protocol, use_https)
|
|
111
|
-
return url if blank?(url)
|
|
105
|
+
# Check if value is blank (nil, empty string, or whitespace-only)
|
|
106
|
+
def blank?(value)
|
|
107
|
+
value.nil? || (value.respond_to?(:empty?) && value.empty?) ||
|
|
108
|
+
(value.is_a?(String) && value.strip.empty?)
|
|
109
|
+
end
|
|
112
110
|
|
|
113
|
-
|
|
111
|
+
# Normalize URL for validation based on protocol requirements
|
|
112
|
+
def normalize_url(url, use_protocol, use_https)
|
|
113
|
+
return url if blank?(url)
|
|
114
114
|
|
|
115
|
-
|
|
116
|
-
url = url.gsub(%r{\A[A-Za-z][A-Za-z0-9+\-.]*://}, '') unless use_protocol
|
|
115
|
+
url = url.strip
|
|
117
116
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
scheme = use_https ? 'https://' : 'http://'
|
|
121
|
-
url = scheme + url
|
|
122
|
-
end
|
|
117
|
+
# If protocol is not required, strip any existing protocol
|
|
118
|
+
url = url.gsub(%r{\A[A-Za-z][A-Za-z0-9+\-.]*://}, '') unless use_protocol
|
|
123
119
|
|
|
124
|
-
|
|
120
|
+
# Add protocol if needed for parsing
|
|
121
|
+
unless url.match?(%r{\A[A-Za-z][A-Za-z0-9+\-.]*://})
|
|
122
|
+
scheme = use_https ? 'https://' : 'http://'
|
|
123
|
+
url = scheme + url
|
|
125
124
|
end
|
|
126
125
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
126
|
+
url
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Check if URL has valid protocol
|
|
130
|
+
def valid_protocol?(url, use_https)
|
|
131
|
+
return true unless url.match?(%r{\A[A-Za-z][A-Za-z0-9+\-.]*://})
|
|
130
132
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
end
|
|
133
|
+
if use_https
|
|
134
|
+
url.start_with?('https://')
|
|
135
|
+
else
|
|
136
|
+
url.start_with?('http://', 'https://')
|
|
136
137
|
end
|
|
138
|
+
end
|
|
137
139
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
140
|
+
# Validate that URL is a root domain (no subdomain)
|
|
141
|
+
def validate_root_domain(record, attribute, parsed)
|
|
142
|
+
return unless parsed.subdomain?
|
|
141
143
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
+
record.errors.add(attribute, 'must be a root domain (no subdomains allowed)')
|
|
145
|
+
end
|
|
144
146
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
147
|
+
# Validate that URL is either root domain or has custom subdomain (not 'www')
|
|
148
|
+
def validate_root_or_custom_subdomain(record, attribute, parsed)
|
|
149
|
+
return unless parsed.subdomain == WWW_SUBDOMAIN
|
|
148
150
|
|
|
149
|
-
|
|
150
|
-
end
|
|
151
|
+
record.errors.add(attribute, 'cannot use www subdomain')
|
|
151
152
|
end
|
|
152
153
|
end
|
|
153
154
|
|
|
154
|
-
#
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
155
|
+
# Also register in DomainExtractor namespace for backwards compatibility
|
|
156
|
+
module DomainExtractor
|
|
157
|
+
# DomainValidator is now defined at the top level for Rails autoloading.
|
|
158
|
+
# This constant provides a reference for explicit usage.
|
|
159
|
+
#
|
|
160
|
+
# Validation modes:
|
|
161
|
+
# - :standard - Validates any valid URL using DomainExtractor.valid?
|
|
162
|
+
# - :root_domain - Only allows root domains (no subdomains) like https://mysite.com
|
|
163
|
+
# - :root_or_custom_subdomain - Allows root or custom subdomains, but excludes 'www'
|
|
164
|
+
#
|
|
165
|
+
# Optional flags:
|
|
166
|
+
# - use_protocol (default: true) - Whether protocol (http/https) is required
|
|
167
|
+
# - use_https (default: true) - Whether https is required (only if use_protocol is true)
|
|
168
|
+
#
|
|
169
|
+
# @example Standard validation
|
|
170
|
+
# validates :url, domain: { validation: :standard }
|
|
171
|
+
#
|
|
172
|
+
# @example Root domain only, no protocol required
|
|
173
|
+
# validates :url, domain: { validation: :root_domain, use_protocol: false }
|
|
174
|
+
DomainValidator = ::DomainValidator
|
|
166
175
|
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DomainExtractor
|
|
4
|
+
# Formatter provides URL formatting based on validation modes and protocol requirements.
|
|
5
|
+
#
|
|
6
|
+
# Formats a URL string according to the specified options:
|
|
7
|
+
# - Validation modes: :standard, :root_domain, :root_or_custom_subdomain
|
|
8
|
+
# - Protocol options: use_protocol, use_https
|
|
9
|
+
# - Trailing slash: use_trailing_slash
|
|
10
|
+
#
|
|
11
|
+
# @example Standard formatting
|
|
12
|
+
# DomainExtractor.format('https://www.example.com/')
|
|
13
|
+
# # => 'https://www.example.com'
|
|
14
|
+
#
|
|
15
|
+
# @example Root domain only
|
|
16
|
+
# DomainExtractor.format('https://shop.example.com/path', validation: :root_domain)
|
|
17
|
+
# # => 'https://example.com'
|
|
18
|
+
#
|
|
19
|
+
# @example Without protocol
|
|
20
|
+
# DomainExtractor.format('https://example.com', use_protocol: false)
|
|
21
|
+
# # => 'example.com'
|
|
22
|
+
module Formatter
|
|
23
|
+
VALIDATION_MODES = %i[standard root_domain root_or_custom_subdomain].freeze
|
|
24
|
+
WWW_SUBDOMAIN = 'www'
|
|
25
|
+
|
|
26
|
+
module_function
|
|
27
|
+
|
|
28
|
+
# Format a URL according to the specified options
|
|
29
|
+
#
|
|
30
|
+
# @param url [String] The URL to format
|
|
31
|
+
# @param options [Hash] Formatting options
|
|
32
|
+
# @option options [Symbol] :validation (:standard) Validation mode
|
|
33
|
+
# @option options [Boolean] :use_protocol (true) Include protocol in output
|
|
34
|
+
# @option options [Boolean] :use_https (true) Use https instead of http
|
|
35
|
+
# @option options [Boolean] :use_trailing_slash (false) Include trailing slash
|
|
36
|
+
# @return [String, nil] Formatted URL or nil if invalid
|
|
37
|
+
def call(url, **options)
|
|
38
|
+
validation = options.fetch(:validation, :standard)
|
|
39
|
+
use_protocol = options.fetch(:use_protocol, true)
|
|
40
|
+
use_https = options.fetch(:use_https, true)
|
|
41
|
+
use_trailing_slash = options.fetch(:use_trailing_slash, false)
|
|
42
|
+
|
|
43
|
+
validate_options!(validation)
|
|
44
|
+
|
|
45
|
+
# Parse the URL
|
|
46
|
+
parsed = DomainExtractor.parse(url)
|
|
47
|
+
return nil unless parsed.valid?
|
|
48
|
+
|
|
49
|
+
# Build the formatted URL based on validation mode
|
|
50
|
+
formatted_host = build_host(parsed, validation)
|
|
51
|
+
build_url(formatted_host, use_protocol, use_https, use_trailing_slash)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def validate_options!(validation)
|
|
55
|
+
return if VALIDATION_MODES.include?(validation)
|
|
56
|
+
|
|
57
|
+
raise ArgumentError, "Invalid validation mode: #{validation}. " \
|
|
58
|
+
"Must be one of: #{VALIDATION_MODES.join(', ')}"
|
|
59
|
+
end
|
|
60
|
+
private_class_method :validate_options!
|
|
61
|
+
|
|
62
|
+
# Build the host portion based on validation mode
|
|
63
|
+
def build_host(parsed, validation)
|
|
64
|
+
case validation
|
|
65
|
+
when :standard
|
|
66
|
+
# Return the full host as-is
|
|
67
|
+
parsed.host
|
|
68
|
+
when :root_domain
|
|
69
|
+
# Return only the root domain (no subdomains)
|
|
70
|
+
parsed.root_domain
|
|
71
|
+
when :root_or_custom_subdomain
|
|
72
|
+
# Return root domain or custom subdomain (strip www)
|
|
73
|
+
if parsed.subdomain == WWW_SUBDOMAIN
|
|
74
|
+
parsed.root_domain
|
|
75
|
+
else
|
|
76
|
+
parsed.host
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
private_class_method :build_host
|
|
81
|
+
|
|
82
|
+
# Build the final URL string with protocol and trailing slash options
|
|
83
|
+
def build_url(host, use_protocol, use_https, use_trailing_slash)
|
|
84
|
+
url = ''
|
|
85
|
+
|
|
86
|
+
# Add protocol if requested
|
|
87
|
+
if use_protocol
|
|
88
|
+
protocol = use_https ? 'https://' : 'http://'
|
|
89
|
+
url = protocol + host
|
|
90
|
+
else
|
|
91
|
+
url = host
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Add or remove trailing slash
|
|
95
|
+
if use_trailing_slash
|
|
96
|
+
url += '/' unless url.end_with?('/')
|
|
97
|
+
else
|
|
98
|
+
url = url.chomp('/')
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
url
|
|
102
|
+
end
|
|
103
|
+
private_class_method :build_url
|
|
104
|
+
end
|
|
105
|
+
end
|
data/lib/domain_extractor.rb
CHANGED
|
@@ -8,6 +8,7 @@ require_relative 'domain_extractor/errors'
|
|
|
8
8
|
require_relative 'domain_extractor/parsed_url'
|
|
9
9
|
require_relative 'domain_extractor/parser'
|
|
10
10
|
require_relative 'domain_extractor/query_params'
|
|
11
|
+
require_relative 'domain_extractor/formatter'
|
|
11
12
|
|
|
12
13
|
# Conditionally load Rails validator if ActiveModel is available
|
|
13
14
|
begin
|
|
@@ -70,6 +71,32 @@ module DomainExtractor
|
|
|
70
71
|
QueryParams.call(query_string)
|
|
71
72
|
end
|
|
72
73
|
|
|
74
|
+
# Format a URL according to the specified options.
|
|
75
|
+
# Returns a formatted URL string or nil if the input is invalid.
|
|
76
|
+
#
|
|
77
|
+
# @param url [String] The URL to format
|
|
78
|
+
# @param options [Hash] Formatting options
|
|
79
|
+
# @option options [Symbol] :validation (:standard) Validation mode
|
|
80
|
+
# @option options [Boolean] :use_protocol (true) Include protocol in output
|
|
81
|
+
# @option options [Boolean] :use_https (true) Use https instead of http
|
|
82
|
+
# @option options [Boolean] :use_trailing_slash (false) Include trailing slash
|
|
83
|
+
# @return [String, nil]
|
|
84
|
+
#
|
|
85
|
+
# @example Standard formatting
|
|
86
|
+
# DomainExtractor.format('https://www.example.com/')
|
|
87
|
+
# # => 'https://www.example.com'
|
|
88
|
+
#
|
|
89
|
+
# @example Root domain only
|
|
90
|
+
# DomainExtractor.format('https://shop.example.com/path', validation: :root_domain)
|
|
91
|
+
# # => 'https://example.com'
|
|
92
|
+
#
|
|
93
|
+
# @example Without protocol
|
|
94
|
+
# DomainExtractor.format('https://example.com', use_protocol: false)
|
|
95
|
+
# # => 'example.com'
|
|
96
|
+
def format(url, **)
|
|
97
|
+
Formatter.call(url, **)
|
|
98
|
+
end
|
|
99
|
+
|
|
73
100
|
alias parse_query parse_query_params
|
|
74
101
|
end
|
|
75
102
|
end
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe DomainExtractor::Formatter do
|
|
6
|
+
describe '.call' do
|
|
7
|
+
context 'with :standard validation mode' do
|
|
8
|
+
it 'formats a simple URL with default options' do
|
|
9
|
+
result = described_class.call('https://example.com')
|
|
10
|
+
expect(result).to eq('https://example.com')
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'removes trailing slash by default' do
|
|
14
|
+
result = described_class.call('https://example.com/')
|
|
15
|
+
expect(result).to eq('https://example.com')
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it 'preserves subdomains' do
|
|
19
|
+
result = described_class.call('https://shop.example.com')
|
|
20
|
+
expect(result).to eq('https://shop.example.com')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'preserves www subdomain' do
|
|
24
|
+
result = described_class.call('https://www.example.com')
|
|
25
|
+
expect(result).to eq('https://www.example.com')
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it 'preserves multi-level subdomains' do
|
|
29
|
+
result = described_class.call('https://api.staging.example.com')
|
|
30
|
+
expect(result).to eq('https://api.staging.example.com')
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it 'handles URLs without protocol' do
|
|
34
|
+
result = described_class.call('example.com')
|
|
35
|
+
expect(result).to eq('https://example.com')
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it 'strips path from URL' do
|
|
39
|
+
result = described_class.call('https://example.com/path/to/page')
|
|
40
|
+
expect(result).to eq('https://example.com')
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it 'strips query parameters from URL' do
|
|
44
|
+
result = described_class.call('https://example.com?foo=bar')
|
|
45
|
+
expect(result).to eq('https://example.com')
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
context 'with :root_domain validation mode' do
|
|
50
|
+
it 'returns root domain for URL with subdomain' do
|
|
51
|
+
result = described_class.call('https://shop.example.com', validation: :root_domain)
|
|
52
|
+
expect(result).to eq('https://example.com')
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it 'returns root domain for URL with www' do
|
|
56
|
+
result = described_class.call('https://www.example.com', validation: :root_domain)
|
|
57
|
+
expect(result).to eq('https://example.com')
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it 'returns root domain for URL without subdomain' do
|
|
61
|
+
result = described_class.call('https://example.com', validation: :root_domain)
|
|
62
|
+
expect(result).to eq('https://example.com')
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
it 'returns root domain for multi-level subdomains' do
|
|
66
|
+
result = described_class.call('https://api.staging.example.com', validation: :root_domain)
|
|
67
|
+
expect(result).to eq('https://example.com')
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it 'handles multi-part TLDs' do
|
|
71
|
+
result = described_class.call('https://shop.example.co.uk', validation: :root_domain)
|
|
72
|
+
expect(result).to eq('https://example.co.uk')
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
context 'with :root_or_custom_subdomain validation mode' do
|
|
77
|
+
it 'preserves root domain' do
|
|
78
|
+
result = described_class.call('https://example.com', validation: :root_or_custom_subdomain)
|
|
79
|
+
expect(result).to eq('https://example.com')
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it 'preserves custom subdomains' do
|
|
83
|
+
result = described_class.call('https://shop.example.com', validation: :root_or_custom_subdomain)
|
|
84
|
+
expect(result).to eq('https://shop.example.com')
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'strips www subdomain' do
|
|
88
|
+
result = described_class.call('https://www.example.com', validation: :root_or_custom_subdomain)
|
|
89
|
+
expect(result).to eq('https://example.com')
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it 'preserves api subdomain' do
|
|
93
|
+
result = described_class.call('https://api.example.com', validation: :root_or_custom_subdomain)
|
|
94
|
+
expect(result).to eq('https://api.example.com')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it 'preserves multi-level custom subdomains' do
|
|
98
|
+
result = described_class.call('https://api.staging.example.com', validation: :root_or_custom_subdomain)
|
|
99
|
+
expect(result).to eq('https://api.staging.example.com')
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
context 'with use_protocol option' do
|
|
104
|
+
it 'includes protocol by default' do
|
|
105
|
+
result = described_class.call('https://example.com')
|
|
106
|
+
expect(result).to eq('https://example.com')
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it 'includes protocol when use_protocol is true' do
|
|
110
|
+
result = described_class.call('https://example.com', use_protocol: true)
|
|
111
|
+
expect(result).to eq('https://example.com')
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
it 'excludes protocol when use_protocol is false' do
|
|
115
|
+
result = described_class.call('https://example.com', use_protocol: false)
|
|
116
|
+
expect(result).to eq('example.com')
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
it 'excludes protocol with subdomain' do
|
|
120
|
+
result = described_class.call('https://shop.example.com', use_protocol: false)
|
|
121
|
+
expect(result).to eq('shop.example.com')
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'works with root_domain validation' do
|
|
125
|
+
result = described_class.call('https://shop.example.com',
|
|
126
|
+
validation: :root_domain,
|
|
127
|
+
use_protocol: false)
|
|
128
|
+
expect(result).to eq('example.com')
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
context 'with use_https option' do
|
|
133
|
+
it 'uses https by default' do
|
|
134
|
+
result = described_class.call('http://example.com')
|
|
135
|
+
expect(result).to eq('https://example.com')
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
it 'uses https when use_https is true' do
|
|
139
|
+
result = described_class.call('http://example.com', use_https: true)
|
|
140
|
+
expect(result).to eq('https://example.com')
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
it 'uses http when use_https is false' do
|
|
144
|
+
result = described_class.call('https://example.com', use_https: false)
|
|
145
|
+
expect(result).to eq('http://example.com')
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
it 'preserves http when use_https is false' do
|
|
149
|
+
result = described_class.call('http://example.com', use_https: false)
|
|
150
|
+
expect(result).to eq('http://example.com')
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
it 'ignores use_https when use_protocol is false' do
|
|
154
|
+
result = described_class.call('https://example.com',
|
|
155
|
+
use_protocol: false,
|
|
156
|
+
use_https: false)
|
|
157
|
+
expect(result).to eq('example.com')
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
context 'with use_trailing_slash option' do
|
|
162
|
+
it 'removes trailing slash by default' do
|
|
163
|
+
result = described_class.call('https://example.com/')
|
|
164
|
+
expect(result).to eq('https://example.com')
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
it 'removes trailing slash when use_trailing_slash is false' do
|
|
168
|
+
result = described_class.call('https://example.com/', use_trailing_slash: false)
|
|
169
|
+
expect(result).to eq('https://example.com')
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
it 'adds trailing slash when use_trailing_slash is true' do
|
|
173
|
+
result = described_class.call('https://example.com', use_trailing_slash: true)
|
|
174
|
+
expect(result).to eq('https://example.com/')
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
it 'preserves trailing slash when use_trailing_slash is true' do
|
|
178
|
+
result = described_class.call('https://example.com/', use_trailing_slash: true)
|
|
179
|
+
expect(result).to eq('https://example.com/')
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
it 'works without protocol' do
|
|
183
|
+
result = described_class.call('https://example.com',
|
|
184
|
+
use_protocol: false,
|
|
185
|
+
use_trailing_slash: true)
|
|
186
|
+
expect(result).to eq('example.com/')
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
it 'works with root_domain validation' do
|
|
190
|
+
result = described_class.call('https://shop.example.com',
|
|
191
|
+
validation: :root_domain,
|
|
192
|
+
use_trailing_slash: true)
|
|
193
|
+
expect(result).to eq('https://example.com/')
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
context 'with combined options' do
|
|
198
|
+
it 'formats with all options: root_domain, no protocol, with trailing slash' do
|
|
199
|
+
result = described_class.call('https://shop.example.com/path',
|
|
200
|
+
validation: :root_domain,
|
|
201
|
+
use_protocol: false,
|
|
202
|
+
use_trailing_slash: true)
|
|
203
|
+
expect(result).to eq('example.com/')
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
it 'formats with root_or_custom_subdomain, http protocol, no trailing slash' do
|
|
207
|
+
result = described_class.call('https://www.example.com/',
|
|
208
|
+
validation: :root_or_custom_subdomain,
|
|
209
|
+
use_https: false,
|
|
210
|
+
use_trailing_slash: false)
|
|
211
|
+
expect(result).to eq('http://example.com')
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
it 'formats with standard, no protocol, http, with trailing slash' do
|
|
215
|
+
result = described_class.call('https://api.example.com',
|
|
216
|
+
validation: :standard,
|
|
217
|
+
use_protocol: false,
|
|
218
|
+
use_trailing_slash: true)
|
|
219
|
+
expect(result).to eq('api.example.com/')
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
it 'strips www and adds trailing slash' do
|
|
223
|
+
result = described_class.call('https://www.example.com',
|
|
224
|
+
validation: :root_or_custom_subdomain,
|
|
225
|
+
use_trailing_slash: true)
|
|
226
|
+
expect(result).to eq('https://example.com/')
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
context 'with multi-part TLDs' do
|
|
231
|
+
it 'handles UK domains with standard mode' do
|
|
232
|
+
result = described_class.call('https://shop.example.co.uk')
|
|
233
|
+
expect(result).to eq('https://shop.example.co.uk')
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
it 'handles UK domains with root_domain mode' do
|
|
237
|
+
result = described_class.call('https://shop.example.co.uk', validation: :root_domain)
|
|
238
|
+
expect(result).to eq('https://example.co.uk')
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
it 'handles Australian domains' do
|
|
242
|
+
result = described_class.call('https://www.example.com.au',
|
|
243
|
+
validation: :root_or_custom_subdomain)
|
|
244
|
+
expect(result).to eq('https://example.com.au')
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
context 'with invalid input' do
|
|
249
|
+
it 'returns nil for invalid URLs' do
|
|
250
|
+
result = described_class.call('not-a-url')
|
|
251
|
+
expect(result).to be_nil
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
it 'returns nil for nil input' do
|
|
255
|
+
result = described_class.call(nil)
|
|
256
|
+
expect(result).to be_nil
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
it 'returns nil for empty string' do
|
|
260
|
+
result = described_class.call('')
|
|
261
|
+
expect(result).to be_nil
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
it 'returns nil for IP addresses' do
|
|
265
|
+
result = described_class.call('https://192.168.1.1')
|
|
266
|
+
expect(result).to be_nil
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
context 'with error handling' do
|
|
271
|
+
it 'raises error for invalid validation mode' do
|
|
272
|
+
expect do
|
|
273
|
+
described_class.call('https://example.com', validation: :invalid_mode)
|
|
274
|
+
end.to raise_error(ArgumentError, /Invalid validation mode/)
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
RSpec.describe DomainExtractor do
|
|
281
|
+
describe '.format' do
|
|
282
|
+
it 'delegates to Formatter.call' do
|
|
283
|
+
result = DomainExtractor.format('https://www.example.com/')
|
|
284
|
+
expect(result).to eq('https://www.example.com')
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
it 'passes options correctly' do
|
|
288
|
+
result = DomainExtractor.format('https://shop.example.com',
|
|
289
|
+
validation: :root_domain,
|
|
290
|
+
use_protocol: false)
|
|
291
|
+
expect(result).to eq('example.com')
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
it 'returns nil for invalid URLs' do
|
|
295
|
+
result = DomainExtractor.format('invalid-url')
|
|
296
|
+
expect(result).to be_nil
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: domain_extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- OpenSite AI
|
|
@@ -43,6 +43,7 @@ files:
|
|
|
43
43
|
- lib/domain_extractor.rb
|
|
44
44
|
- lib/domain_extractor/domain_validator.rb
|
|
45
45
|
- lib/domain_extractor/errors.rb
|
|
46
|
+
- lib/domain_extractor/formatter.rb
|
|
46
47
|
- lib/domain_extractor/normalizer.rb
|
|
47
48
|
- lib/domain_extractor/parsed_url.rb
|
|
48
49
|
- lib/domain_extractor/parser.rb
|
|
@@ -52,6 +53,7 @@ files:
|
|
|
52
53
|
- lib/domain_extractor/version.rb
|
|
53
54
|
- spec/domain_extractor_spec.rb
|
|
54
55
|
- spec/domain_validator_spec.rb
|
|
56
|
+
- spec/formatter_spec.rb
|
|
55
57
|
- spec/parsed_url_spec.rb
|
|
56
58
|
- spec/spec_helper.rb
|
|
57
59
|
homepage: https://github.com/opensite-ai/domain_extractor
|