domain_extractor 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/README.md +205 -3
- data/lib/domain_extractor/errors.rb +11 -0
- data/lib/domain_extractor/parsed_url.rb +131 -0
- data/lib/domain_extractor/parser.rb +23 -7
- data/lib/domain_extractor/result.rb +5 -1
- data/lib/domain_extractor/version.rb +1 -1
- data/lib/domain_extractor.rb +22 -4
- data/spec/domain_extractor_spec.rb +51 -13
- data/spec/parsed_url_spec.rb +422 -0
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4bc4d6ad831692d1251048f8b21820bb0efb10ed5b3cce641441b31afb5308b4
|
|
4
|
+
data.tar.gz: 67a96b33dc3544847af271c8bd837dbc592031bff5dac126022a147c2281460c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 02bca764446a3391461695cfeeaef9c6e7920308bc78768b062ae676005d3610b09733133cb10c34cd5e29dc35169f770b4789f418fd554cba762a6d5a19022a
|
|
7
|
+
data.tar.gz: eeaaa8356b306feba33e08e54c8da2926f7e052ebac5d6920f0a6f26c0dacd3bfbb0d4f863fa694377d87441564a2c1eecff764d756ce4efde0569fabf573ee2
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.8] - 2025-10-31
|
|
11
|
+
|
|
12
|
+
### Implemented Declarative Method-style Accessors
|
|
13
|
+
|
|
14
|
+
#### Added
|
|
15
|
+
|
|
16
|
+
- **ParsedURL API**: Introduced intuitive method-style accessors with three variants:
|
|
17
|
+
- Default methods (e.g., `result.subdomain`) - Returns value or nil
|
|
18
|
+
- Bang methods (e.g., `result.subdomain!`) - Returns value or raises `InvalidURLError`
|
|
19
|
+
- Question methods (e.g., `result.subdomain?`) - Returns boolean true/false
|
|
20
|
+
- Added `www_subdomain?` helper method to check if subdomain is specifically 'www'
|
|
21
|
+
- Added `valid?` method to check if parsed result contains valid data
|
|
22
|
+
- Added `to_h` and `to_hash` methods for hash conversion
|
|
23
|
+
- Comprehensive documentation in `docs/PARSED_URL_API.md`
|
|
24
|
+
|
|
25
|
+
#### Changed
|
|
26
|
+
|
|
27
|
+
- `DomainExtractor.parse` now returns `ParsedURL` object instead of plain Hash (backward compatible via `[]` accessor)
|
|
28
|
+
- `DomainExtractor.parse_batch` now returns array of `ParsedURL` objects (or nil for invalid URLs)
|
|
29
|
+
|
|
30
|
+
#### Maintained
|
|
31
|
+
|
|
32
|
+
- Full backward compatibility with hash-style access using `[]`
|
|
33
|
+
- All existing tests continue to pass
|
|
34
|
+
- No breaking changes to existing API
|
|
35
|
+
|
|
36
|
+
## [0.1.7] - 2025-10-31
|
|
37
|
+
|
|
38
|
+
### Added valid? method and enhanced error handling
|
|
39
|
+
|
|
40
|
+
- Added `DomainExtractor.valid?` helper to allow safe URL pre-checks without raising.
|
|
41
|
+
- `DomainExtractor.parse` now raises `DomainExtractor::InvalidURLError` with a clear `"Invalid URL Value"` message when the input cannot be parsed.
|
|
42
|
+
|
|
10
43
|
## [0.1.6] - 2025-10-31
|
|
11
44
|
|
|
12
45
|
### Integrate Rakefile for Release and Task Workflow Refactors
|
data/README.md
CHANGED
|
@@ -52,8 +52,191 @@ result[:domain] # => 'example'
|
|
|
52
52
|
result[:tld] # => 'co.uk'
|
|
53
53
|
result[:root_domain] # => 'example.co.uk'
|
|
54
54
|
result[:host] # => 'www.example.co.uk'
|
|
55
|
+
|
|
56
|
+
# Guard a parse with the validity helper
|
|
57
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
58
|
+
if DomainExtractor.valid?(url)
|
|
59
|
+
DomainExtractor.parse(url)
|
|
60
|
+
else
|
|
61
|
+
# handle invalid input
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# New intuitive method-style access
|
|
65
|
+
result.subdomain # => 'www'
|
|
66
|
+
result.domain # => 'example'
|
|
67
|
+
result.host # => 'www.example.co.uk'
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## ParsedURL API - Intuitive Method Access
|
|
71
|
+
|
|
72
|
+
DomainExtractor now returns a `ParsedURL` object that supports three accessor styles, making your intent clear and your code more robust:
|
|
73
|
+
|
|
74
|
+
### Method Accessor Styles
|
|
75
|
+
|
|
76
|
+
#### 1. Default Methods (Silent Nil)
|
|
77
|
+
Returns the value or `nil` - perfect for exploratory code or when handling invalid data gracefully.
|
|
78
|
+
|
|
79
|
+
```ruby
|
|
80
|
+
result = DomainExtractor.parse('https://api.example.com')
|
|
81
|
+
result.subdomain # => 'api'
|
|
82
|
+
result.domain # => 'example'
|
|
83
|
+
result.host # => 'api.example.com'
|
|
84
|
+
|
|
85
|
+
# Without subdomain
|
|
86
|
+
result = DomainExtractor.parse('https://example.com')
|
|
87
|
+
result.subdomain # => nil (no error)
|
|
88
|
+
result.domain # => 'example'
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
#### 2. Bang Methods (!) - Explicit Errors
|
|
92
|
+
Returns the value or raises `InvalidURLError` - ideal for production code where missing data should fail fast.
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
result = DomainExtractor.parse('https://example.com')
|
|
96
|
+
result.domain! # => 'example'
|
|
97
|
+
result.subdomain! # raises InvalidURLError: "subdomain not found or invalid"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
#### 3. Question Methods (?) - Boolean Checks
|
|
101
|
+
Always returns `true` or `false` - perfect for conditional logic without exceptions.
|
|
102
|
+
|
|
103
|
+
```ruby
|
|
104
|
+
DomainExtractor.parse('https://dashtrack.com').subdomain? # => false
|
|
105
|
+
DomainExtractor.parse('https://api.dashtrack.com').subdomain? # => true
|
|
106
|
+
DomainExtractor.parse('https://www.dashtrack.com').www_subdomain? # => true
|
|
55
107
|
```
|
|
56
108
|
|
|
109
|
+
### Quick Examples
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
url = 'https://api.staging.example.com/path'
|
|
113
|
+
parsed = DomainExtractor.parse(url)
|
|
114
|
+
|
|
115
|
+
# Method-style access
|
|
116
|
+
parsed.host # => 'api.staging.example.com'
|
|
117
|
+
parsed.subdomain # => 'api.staging'
|
|
118
|
+
parsed.domain # => 'example'
|
|
119
|
+
parsed.root_domain # => 'example.com'
|
|
120
|
+
parsed.tld # => 'com'
|
|
121
|
+
parsed.path # => '/path'
|
|
122
|
+
|
|
123
|
+
# Question methods for conditionals
|
|
124
|
+
if parsed.subdomain?
|
|
125
|
+
puts "Has subdomain: #{parsed.subdomain}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Bang methods when values are required
|
|
129
|
+
begin
|
|
130
|
+
subdomain = parsed.subdomain! # Safe - has subdomain
|
|
131
|
+
domain = parsed.domain! # Safe - has domain
|
|
132
|
+
rescue DomainExtractor::InvalidURLError => e
|
|
133
|
+
puts "Missing required component: #{e.message}"
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Hash-style access still works (backward compatible)
|
|
137
|
+
parsed[:subdomain] # => 'api.staging'
|
|
138
|
+
parsed[:host] # => 'api.staging.example.com'
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Additional Examples
|
|
142
|
+
|
|
143
|
+
#### Boolean Checks with Question Methods
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
# Check for subdomain presence
|
|
147
|
+
DomainExtractor.parse('https://dashtrack.com').subdomain? # => false
|
|
148
|
+
DomainExtractor.parse('https://api.dashtrack.com').subdomain? # => true
|
|
149
|
+
|
|
150
|
+
# Check for www subdomain specifically
|
|
151
|
+
DomainExtractor.parse('https://www.dashtrack.com').www_subdomain? # => true
|
|
152
|
+
DomainExtractor.parse('https://api.dashtrack.com').www_subdomain? # => false
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
#### Safe Batch Processing
|
|
156
|
+
|
|
157
|
+
```ruby
|
|
158
|
+
urls = [
|
|
159
|
+
'https://api.example.com',
|
|
160
|
+
'https://example.com',
|
|
161
|
+
'https://www.example.com'
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
urls.each do |url|
|
|
165
|
+
result = DomainExtractor.parse(url)
|
|
166
|
+
|
|
167
|
+
info = {
|
|
168
|
+
url: url,
|
|
169
|
+
has_subdomain: result.subdomain?,
|
|
170
|
+
is_www: result.www_subdomain?,
|
|
171
|
+
host: result.host
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
puts "#{info[:url]} - subdomain: #{info[:has_subdomain]}, www: #{info[:is_www]}"
|
|
175
|
+
end
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
#### Production URL Validation
|
|
179
|
+
|
|
180
|
+
```ruby
|
|
181
|
+
def validate_api_url(url)
|
|
182
|
+
result = DomainExtractor.parse(url)
|
|
183
|
+
|
|
184
|
+
# Ensure all required components exist
|
|
185
|
+
result.subdomain! # Must have subdomain
|
|
186
|
+
result.domain! # Must have domain
|
|
187
|
+
|
|
188
|
+
# Additional validation
|
|
189
|
+
return false unless result.subdomain.start_with?('api')
|
|
190
|
+
|
|
191
|
+
true
|
|
192
|
+
rescue DomainExtractor::InvalidURLError => e
|
|
193
|
+
puts "Validation failed: #{e.message}"
|
|
194
|
+
false
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
validate_api_url('https://api.example.com/endpoint') # => true
|
|
198
|
+
validate_api_url('https://example.com/endpoint') # => false (no subdomain)
|
|
199
|
+
validate_api_url('https://www.example.com/endpoint') # => false (not api subdomain)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
#### Guard Clauses with Question Methods
|
|
203
|
+
|
|
204
|
+
```ruby
|
|
205
|
+
def process_url(url)
|
|
206
|
+
result = DomainExtractor.parse(url)
|
|
207
|
+
|
|
208
|
+
return 'Invalid URL' unless result.valid?
|
|
209
|
+
return 'No subdomain present' unless result.subdomain?
|
|
210
|
+
return 'WWW redirect needed' if result.www_subdomain?
|
|
211
|
+
|
|
212
|
+
"Processing subdomain: #{result.subdomain}"
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
process_url('https://api.example.com') # => "Processing subdomain: api"
|
|
216
|
+
process_url('https://www.example.com') # => "WWW redirect needed"
|
|
217
|
+
process_url('https://example.com') # => "No subdomain present"
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
#### Converting to Hash
|
|
221
|
+
|
|
222
|
+
```ruby
|
|
223
|
+
url = 'https://api.example.com/path'
|
|
224
|
+
result = DomainExtractor.parse(url)
|
|
225
|
+
|
|
226
|
+
hash = result.to_h
|
|
227
|
+
# => {
|
|
228
|
+
# subdomain: "api",
|
|
229
|
+
# domain: "example",
|
|
230
|
+
# tld: "com",
|
|
231
|
+
# root_domain: "example.com",
|
|
232
|
+
# host: "api.example.com",
|
|
233
|
+
# path: "/path",
|
|
234
|
+
# query_params: {}
|
|
235
|
+
# }
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
**See [docs/PARSED_URL_API.md](docs/PARSED_URL_API.md) for comprehensive documentation and real-world examples.**
|
|
239
|
+
|
|
57
240
|
## Usage Examples
|
|
58
241
|
|
|
59
242
|
### Basic Domain Parsing
|
|
@@ -105,13 +288,25 @@ urls = ['https://example.com', 'https://blog.example.org']
|
|
|
105
288
|
results = DomainExtractor.parse_batch(urls)
|
|
106
289
|
```
|
|
107
290
|
|
|
291
|
+
### Validation and Error Handling
|
|
292
|
+
|
|
293
|
+
```ruby
|
|
294
|
+
DomainExtractor.valid?('https://www.example.com') # => true
|
|
295
|
+
|
|
296
|
+
# DomainExtractor.parse raises DomainExtractor::InvalidURLError on invalid input
|
|
297
|
+
DomainExtractor.parse('not-a-url')
|
|
298
|
+
# => raises DomainExtractor::InvalidURLError (message: "Invalid URL Value")
|
|
299
|
+
```
|
|
300
|
+
|
|
108
301
|
## API Reference
|
|
109
302
|
|
|
110
303
|
### `DomainExtractor.parse(url_string)`
|
|
111
304
|
|
|
112
305
|
Parses a URL string and extracts domain components.
|
|
113
306
|
|
|
114
|
-
**Returns:** Hash with keys `:subdomain`, `:domain`, `:tld`, `:root_domain`, `:host`, `:path`
|
|
307
|
+
**Returns:** Hash with keys `:subdomain`, `:domain`, `:tld`, `:root_domain`, `:host`, `:path`
|
|
308
|
+
|
|
309
|
+
**Raises:** `DomainExtractor::InvalidURLError` when the URL fails validation
|
|
115
310
|
|
|
116
311
|
### `DomainExtractor.parse_batch(urls)`
|
|
117
312
|
|
|
@@ -119,6 +314,12 @@ Parses multiple URLs efficiently.
|
|
|
119
314
|
|
|
120
315
|
**Returns:** Array of parsed results
|
|
121
316
|
|
|
317
|
+
### `DomainExtractor.valid?(url_string)`
|
|
318
|
+
|
|
319
|
+
Checks if a URL can be parsed successfully without raising.
|
|
320
|
+
|
|
321
|
+
**Returns:** `true` or `false`
|
|
322
|
+
|
|
122
323
|
### `DomainExtractor.parse_query_params(query_string)`
|
|
123
324
|
|
|
124
325
|
Parses a query string into a hash of parameters.
|
|
@@ -146,8 +347,9 @@ track_event('page_view', source_domain: parsed[:root_domain]) if parsed
|
|
|
146
347
|
|
|
147
348
|
```ruby
|
|
148
349
|
def internal_link?(url, base_domain)
|
|
149
|
-
|
|
150
|
-
|
|
350
|
+
return false unless DomainExtractor.valid?(url)
|
|
351
|
+
|
|
352
|
+
DomainExtractor.parse(url)[:root_domain] == base_domain
|
|
151
353
|
end
|
|
152
354
|
```
|
|
153
355
|
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DomainExtractor
|
|
4
|
+
# ParsedURL wraps the parsing result and provides convenient accessor methods
|
|
5
|
+
# with support for bang (!) and question mark (?) variants.
|
|
6
|
+
#
|
|
7
|
+
# Examples:
|
|
8
|
+
# parsed = DomainExtractor.parse('https://api.example.com')
|
|
9
|
+
# parsed.host # => 'api.example.com'
|
|
10
|
+
# parsed.subdomain # => 'api'
|
|
11
|
+
# parsed.subdomain? # => true
|
|
12
|
+
# parsed.www_subdomain? # => false
|
|
13
|
+
#
|
|
14
|
+
# parsed = DomainExtractor.parse('invalid')
|
|
15
|
+
# parsed.host # => nil
|
|
16
|
+
# parsed.host? # => false
|
|
17
|
+
# parsed.host! # raises InvalidURLError
|
|
18
|
+
class ParsedURL
|
|
19
|
+
# Expose the underlying hash for backward compatibility
|
|
20
|
+
attr_reader :result
|
|
21
|
+
|
|
22
|
+
# List of valid result keys that should have method accessors
|
|
23
|
+
RESULT_KEYS = %i[subdomain domain tld root_domain host path query_params].freeze
|
|
24
|
+
|
|
25
|
+
def initialize(result)
|
|
26
|
+
@result = result || {}
|
|
27
|
+
freeze
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Hash-style access for backward compatibility
|
|
31
|
+
# result[:subdomain], result[:host], etc.
|
|
32
|
+
def [](key)
|
|
33
|
+
@result[key]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Check if the parsed result is valid (not nil/empty)
|
|
37
|
+
def valid?
|
|
38
|
+
!@result.empty?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Special helper: check if subdomain is specifically 'www'
|
|
42
|
+
def www_subdomain?
|
|
43
|
+
@result[:subdomain] == 'www'
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Dynamically handle method calls for all result keys
|
|
47
|
+
# Supports three variants:
|
|
48
|
+
# - method_name: returns value or nil
|
|
49
|
+
# - method_name!: returns value or raises InvalidURLError
|
|
50
|
+
# - method_name?: returns boolean (true if value exists and not nil/empty)
|
|
51
|
+
def method_missing(method_name, *args, &)
|
|
52
|
+
method_str = method_name.to_s
|
|
53
|
+
|
|
54
|
+
# Handle bang methods (method_name!)
|
|
55
|
+
return handle_bang_method(method_str) if method_str.end_with?('!')
|
|
56
|
+
|
|
57
|
+
# Handle question mark methods (method_name?)
|
|
58
|
+
return handle_question_method(method_str) if method_str.end_with?('?')
|
|
59
|
+
|
|
60
|
+
# Handle regular methods (method_name)
|
|
61
|
+
key = method_name.to_sym
|
|
62
|
+
return @result[key] if RESULT_KEYS.include?(key)
|
|
63
|
+
|
|
64
|
+
super
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
68
|
+
method_str = method_name.to_s
|
|
69
|
+
|
|
70
|
+
# Check for www_subdomain? special case
|
|
71
|
+
return true if method_name == :www_subdomain?
|
|
72
|
+
|
|
73
|
+
# Check if it's a bang or question mark variant
|
|
74
|
+
if method_str.end_with?('!') || method_str.end_with?('?')
|
|
75
|
+
key = method_str[0...-1].to_sym
|
|
76
|
+
return true if RESULT_KEYS.include?(key)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if it's a regular method
|
|
80
|
+
return true if RESULT_KEYS.include?(method_name.to_sym)
|
|
81
|
+
|
|
82
|
+
super
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Provide hash-like inspection
|
|
86
|
+
def inspect
|
|
87
|
+
"#<DomainExtractor::ParsedURL #{@result.inspect}>"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def to_s
|
|
91
|
+
@result.to_s
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Allow to_h conversion for hash compatibility
|
|
95
|
+
def to_h
|
|
96
|
+
@result.dup
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Allow to_hash as well for better Ruby compatibility
|
|
100
|
+
alias to_hash to_h
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
# Handle bang methods that raise errors for missing values
|
|
105
|
+
def handle_bang_method(method_str)
|
|
106
|
+
key = method_str[0...-1].to_sym
|
|
107
|
+
return unless RESULT_KEYS.include?(key)
|
|
108
|
+
|
|
109
|
+
value = @result[key]
|
|
110
|
+
return value if value_present?(value)
|
|
111
|
+
|
|
112
|
+
raise InvalidURLError, "#{key} not found or invalid"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Handle question mark methods that return booleans
|
|
116
|
+
def handle_question_method(method_str)
|
|
117
|
+
key = method_str[0...-1].to_sym
|
|
118
|
+
return unless RESULT_KEYS.include?(key)
|
|
119
|
+
|
|
120
|
+
value_present?(@result[key])
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Check if a value is present (not nil and not empty for strings/hashes)
|
|
124
|
+
def value_present?(value)
|
|
125
|
+
return false if value.nil?
|
|
126
|
+
return !value.empty? if value.respond_to?(:empty?)
|
|
127
|
+
|
|
128
|
+
true
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -6,6 +6,7 @@ require 'public_suffix'
|
|
|
6
6
|
require_relative 'normalizer'
|
|
7
7
|
require_relative 'result'
|
|
8
8
|
require_relative 'validators'
|
|
9
|
+
require_relative 'parsed_url'
|
|
9
10
|
|
|
10
11
|
module DomainExtractor
|
|
11
12
|
# Parser orchestrates the pipeline for url normalization, validation, and domain extraction.
|
|
@@ -13,16 +14,19 @@ module DomainExtractor
|
|
|
13
14
|
module_function
|
|
14
15
|
|
|
15
16
|
def call(raw_url)
|
|
16
|
-
|
|
17
|
-
return unless
|
|
18
|
-
|
|
19
|
-
host = uri.host&.downcase
|
|
20
|
-
return if invalid_host?(host)
|
|
17
|
+
components = extract_components(raw_url)
|
|
18
|
+
return ParsedURL.new(nil) unless components
|
|
21
19
|
|
|
22
|
-
domain =
|
|
20
|
+
uri, domain, host = components
|
|
23
21
|
build_result(domain: domain, host: host, uri: uri)
|
|
24
22
|
rescue ::URI::InvalidURIError, ::PublicSuffix::Error
|
|
25
|
-
nil
|
|
23
|
+
ParsedURL.new(nil)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def valid?(raw_url)
|
|
27
|
+
!!extract_components(raw_url)
|
|
28
|
+
rescue ::URI::InvalidURIError, ::PublicSuffix::Error
|
|
29
|
+
false
|
|
26
30
|
end
|
|
27
31
|
|
|
28
32
|
def build_uri(raw_url)
|
|
@@ -38,6 +42,18 @@ module DomainExtractor
|
|
|
38
42
|
end
|
|
39
43
|
private_class_method :invalid_host?
|
|
40
44
|
|
|
45
|
+
def extract_components(raw_url)
|
|
46
|
+
uri = build_uri(raw_url)
|
|
47
|
+
return unless uri
|
|
48
|
+
|
|
49
|
+
host = uri.host&.downcase
|
|
50
|
+
return if invalid_host?(host)
|
|
51
|
+
|
|
52
|
+
domain = ::PublicSuffix.parse(host)
|
|
53
|
+
[uri, domain, host]
|
|
54
|
+
end
|
|
55
|
+
private_class_method :extract_components
|
|
56
|
+
|
|
41
57
|
def build_result(domain:, host:, uri:)
|
|
42
58
|
Result.build(
|
|
43
59
|
subdomain: domain.trd,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'parsed_url'
|
|
4
|
+
|
|
3
5
|
module DomainExtractor
|
|
4
6
|
# Result encapsulates the final parsed attributes and exposes a hash interface.
|
|
5
7
|
module Result
|
|
@@ -10,7 +12,7 @@ module DomainExtractor
|
|
|
10
12
|
module_function
|
|
11
13
|
|
|
12
14
|
def build(**attributes)
|
|
13
|
-
{
|
|
15
|
+
hash = {
|
|
14
16
|
subdomain: normalize_subdomain(attributes[:subdomain]),
|
|
15
17
|
root_domain: attributes[:root_domain],
|
|
16
18
|
domain: attributes[:domain],
|
|
@@ -19,6 +21,8 @@ module DomainExtractor
|
|
|
19
21
|
path: attributes[:path] || EMPTY_PATH,
|
|
20
22
|
query_params: QueryParams.call(attributes[:query])
|
|
21
23
|
}.freeze
|
|
24
|
+
|
|
25
|
+
ParsedURL.new(hash)
|
|
22
26
|
end
|
|
23
27
|
|
|
24
28
|
def normalize_subdomain(value)
|
data/lib/domain_extractor.rb
CHANGED
|
@@ -4,6 +4,8 @@ require 'uri'
|
|
|
4
4
|
require 'public_suffix'
|
|
5
5
|
|
|
6
6
|
require_relative 'domain_extractor/version'
|
|
7
|
+
require_relative 'domain_extractor/errors'
|
|
8
|
+
require_relative 'domain_extractor/parsed_url'
|
|
7
9
|
require_relative 'domain_extractor/parser'
|
|
8
10
|
require_relative 'domain_extractor/query_params'
|
|
9
11
|
|
|
@@ -12,19 +14,35 @@ require_relative 'domain_extractor/query_params'
|
|
|
12
14
|
module DomainExtractor
|
|
13
15
|
class << self
|
|
14
16
|
# Parse an individual URL and extract domain attributes.
|
|
17
|
+
# Returns a ParsedURL object that supports hash-style access and method calls.
|
|
18
|
+
# Raises DomainExtractor::InvalidURLError when the URL fails validation.
|
|
15
19
|
# @param url [String, #to_s]
|
|
16
|
-
# @return [
|
|
20
|
+
# @return [ParsedURL]
|
|
17
21
|
def parse(url)
|
|
18
|
-
Parser.call(url)
|
|
22
|
+
result = Parser.call(url)
|
|
23
|
+
raise InvalidURLError unless result.valid?
|
|
24
|
+
|
|
25
|
+
result
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Determine if a URL is considered valid by the parser.
|
|
29
|
+
# @param url [String, #to_s]
|
|
30
|
+
# @return [Boolean]
|
|
31
|
+
def valid?(url)
|
|
32
|
+
Parser.valid?(url)
|
|
19
33
|
end
|
|
20
34
|
|
|
21
35
|
# Parse many URLs and return their individual parse results.
|
|
36
|
+
# Returns nil for invalid URLs to maintain backward compatibility.
|
|
22
37
|
# @param urls [Enumerable<String>]
|
|
23
|
-
# @return [Array<
|
|
38
|
+
# @return [Array<ParsedURL, nil>]
|
|
24
39
|
def parse_batch(urls)
|
|
25
40
|
return [] unless urls.respond_to?(:map)
|
|
26
41
|
|
|
27
|
-
urls.map
|
|
42
|
+
urls.map do |url|
|
|
43
|
+
result = Parser.call(url)
|
|
44
|
+
result.valid? ? result : nil
|
|
45
|
+
end
|
|
28
46
|
end
|
|
29
47
|
|
|
30
48
|
# Convert a query string into a Hash representation.
|
|
@@ -142,32 +142,70 @@ RSpec.describe DomainExtractor do
|
|
|
142
142
|
end
|
|
143
143
|
|
|
144
144
|
context 'with invalid URLs' do
|
|
145
|
-
it '
|
|
146
|
-
expect
|
|
145
|
+
it 'raises InvalidURLError for malformed URLs' do
|
|
146
|
+
expect { described_class.parse('http://') }.to raise_error(
|
|
147
|
+
DomainExtractor::InvalidURLError,
|
|
148
|
+
'Invalid URL Value'
|
|
149
|
+
)
|
|
147
150
|
end
|
|
148
151
|
|
|
149
|
-
it '
|
|
150
|
-
expect
|
|
152
|
+
it 'raises InvalidURLError for invalid domains' do
|
|
153
|
+
expect { described_class.parse('not_a_url') }.to raise_error(
|
|
154
|
+
DomainExtractor::InvalidURLError,
|
|
155
|
+
'Invalid URL Value'
|
|
156
|
+
)
|
|
151
157
|
end
|
|
152
158
|
|
|
153
|
-
it '
|
|
154
|
-
expect
|
|
159
|
+
it 'raises InvalidURLError for IP addresses' do
|
|
160
|
+
expect { described_class.parse('192.168.1.1') }.to raise_error(
|
|
161
|
+
DomainExtractor::InvalidURLError,
|
|
162
|
+
'Invalid URL Value'
|
|
163
|
+
)
|
|
155
164
|
end
|
|
156
165
|
|
|
157
|
-
it '
|
|
158
|
-
expect
|
|
166
|
+
it 'raises InvalidURLError for IPv6 addresses' do
|
|
167
|
+
expect { described_class.parse('[2001:db8::1]') }.to raise_error(
|
|
168
|
+
DomainExtractor::InvalidURLError,
|
|
169
|
+
'Invalid URL Value'
|
|
170
|
+
)
|
|
159
171
|
end
|
|
160
172
|
|
|
161
|
-
it '
|
|
162
|
-
expect
|
|
173
|
+
it 'raises InvalidURLError for empty string' do
|
|
174
|
+
expect { described_class.parse('') }.to raise_error(DomainExtractor::InvalidURLError, 'Invalid URL Value')
|
|
163
175
|
end
|
|
164
176
|
|
|
165
|
-
it '
|
|
166
|
-
expect
|
|
177
|
+
it 'raises InvalidURLError for nil' do
|
|
178
|
+
expect { described_class.parse(nil) }.to raise_error(DomainExtractor::InvalidURLError, 'Invalid URL Value')
|
|
167
179
|
end
|
|
168
180
|
end
|
|
169
181
|
end
|
|
170
182
|
|
|
183
|
+
describe '.valid?' do
|
|
184
|
+
it 'returns true for a normalized domain' do
|
|
185
|
+
expect(described_class.valid?('dashtrack.com')).to be(true)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
it 'returns true for a full URL with subdomain and query' do
|
|
189
|
+
expect(described_class.valid?('https://www.example.co.uk/path?query=value')).to be(true)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
it 'returns false for malformed URLs' do
|
|
193
|
+
expect(described_class.valid?('http://')).to be(false)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
it 'returns false for invalid domains' do
|
|
197
|
+
expect(described_class.valid?('not_a_url')).to be(false)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
it 'returns false for IP addresses' do
|
|
201
|
+
expect(described_class.valid?('192.168.1.1')).to be(false)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
it 'returns false for nil values' do
|
|
205
|
+
expect(described_class.valid?(nil)).to be(false)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
171
209
|
describe '.parse_query_params' do
|
|
172
210
|
it 'converts simple query string to hash' do
|
|
173
211
|
result = described_class.parse_query_params('foo=bar')
|
|
@@ -262,7 +300,7 @@ RSpec.describe DomainExtractor do
|
|
|
262
300
|
|
|
263
301
|
results = described_class.parse_batch(urls)
|
|
264
302
|
|
|
265
|
-
expect(results).to all(be_a(
|
|
303
|
+
expect(results).to all(be_a(DomainExtractor::ParsedURL))
|
|
266
304
|
expect(results.map { |result| result[:root_domain] }).to all(eq('example.com'))
|
|
267
305
|
end
|
|
268
306
|
|
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe DomainExtractor::ParsedURL do
|
|
6
|
+
describe 'method accessor styles' do
|
|
7
|
+
context 'with a valid URL with subdomain' do
|
|
8
|
+
let(:parsed) { DomainExtractor.parse('https://api.dashtrack.com/path?query=value') }
|
|
9
|
+
|
|
10
|
+
describe 'default accessor methods' do
|
|
11
|
+
it 'returns subdomain' do
|
|
12
|
+
expect(parsed.subdomain).to eq('api')
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'returns domain' do
|
|
16
|
+
expect(parsed.domain).to eq('dashtrack')
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'returns tld' do
|
|
20
|
+
expect(parsed.tld).to eq('com')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'returns root_domain' do
|
|
24
|
+
expect(parsed.root_domain).to eq('dashtrack.com')
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it 'returns host' do
|
|
28
|
+
expect(parsed.host).to eq('api.dashtrack.com')
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it 'returns path' do
|
|
32
|
+
expect(parsed.path).to eq('/path')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'returns query_params' do
|
|
36
|
+
expect(parsed.query_params).to eq({ 'query' => 'value' })
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe 'bang (!) accessor methods' do
|
|
41
|
+
it 'returns subdomain!' do
|
|
42
|
+
expect(parsed.subdomain!).to eq('api')
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it 'returns domain!' do
|
|
46
|
+
expect(parsed.domain!).to eq('dashtrack')
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it 'returns tld!' do
|
|
50
|
+
expect(parsed.tld!).to eq('com')
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it 'returns root_domain!' do
|
|
54
|
+
expect(parsed.root_domain!).to eq('dashtrack.com')
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it 'returns host!' do
|
|
58
|
+
expect(parsed.host!).to eq('api.dashtrack.com')
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe 'question mark (?) accessor methods' do
|
|
63
|
+
it 'returns true for subdomain?' do
|
|
64
|
+
expect(parsed.subdomain?).to be true
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it 'returns true for domain?' do
|
|
68
|
+
expect(parsed.domain?).to be true
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it 'returns true for tld?' do
|
|
72
|
+
expect(parsed.tld?).to be true
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it 'returns true for root_domain?' do
|
|
76
|
+
expect(parsed.root_domain?).to be true
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it 'returns true for host?' do
|
|
80
|
+
expect(parsed.host?).to be true
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
context 'with a valid URL without subdomain' do
|
|
86
|
+
let(:parsed) { DomainExtractor.parse('https://dashtrack.com') }
|
|
87
|
+
|
|
88
|
+
describe 'default accessor methods for nil subdomain' do
|
|
89
|
+
it 'returns nil for subdomain' do
|
|
90
|
+
expect(parsed.subdomain).to be_nil
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it 'returns domain' do
|
|
94
|
+
expect(parsed.domain).to eq('dashtrack')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it 'returns host' do
|
|
98
|
+
expect(parsed.host).to eq('dashtrack.com')
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
describe 'bang (!) accessor methods with nil subdomain' do
|
|
103
|
+
it 'raises InvalidURLError for subdomain!' do
|
|
104
|
+
expect { parsed.subdomain! }.to raise_error(
|
|
105
|
+
DomainExtractor::InvalidURLError,
|
|
106
|
+
'subdomain not found or invalid'
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it 'returns domain!' do
|
|
111
|
+
expect(parsed.domain!).to eq('dashtrack')
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
describe 'question mark (?) accessor methods with nil subdomain' do
|
|
116
|
+
it 'returns false for subdomain?' do
|
|
117
|
+
expect(parsed.subdomain?).to be false
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it 'returns true for domain?' do
|
|
121
|
+
expect(parsed.domain?).to be true
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'returns true for host?' do
|
|
125
|
+
expect(parsed.host?).to be true
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
context 'with invalid URL' do
|
|
131
|
+
let(:parsed) { DomainExtractor::ParsedURL.new(nil) }
|
|
132
|
+
|
|
133
|
+
describe 'default accessor methods' do
|
|
134
|
+
it 'returns nil for subdomain' do
|
|
135
|
+
expect(parsed.subdomain).to be_nil
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
it 'returns nil for domain' do
|
|
139
|
+
expect(parsed.domain).to be_nil
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it 'returns nil for host' do
|
|
143
|
+
expect(parsed.host).to be_nil
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
it 'returns nil for root_domain' do
|
|
147
|
+
expect(parsed.root_domain).to be_nil
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
describe 'bang (!) accessor methods' do
|
|
152
|
+
it 'raises InvalidURLError for host!' do
|
|
153
|
+
expect { parsed.host! }.to raise_error(
|
|
154
|
+
DomainExtractor::InvalidURLError,
|
|
155
|
+
'host not found or invalid'
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
it 'raises InvalidURLError for domain!' do
|
|
160
|
+
expect { parsed.domain! }.to raise_error(
|
|
161
|
+
DomainExtractor::InvalidURLError,
|
|
162
|
+
'domain not found or invalid'
|
|
163
|
+
)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it 'raises InvalidURLError for subdomain!' do
|
|
167
|
+
expect { parsed.subdomain! }.to raise_error(
|
|
168
|
+
DomainExtractor::InvalidURLError,
|
|
169
|
+
'subdomain not found or invalid'
|
|
170
|
+
)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
describe 'question mark (?) accessor methods' do
|
|
175
|
+
it 'returns false for subdomain?' do
|
|
176
|
+
expect(parsed.subdomain?).to be false
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
it 'returns false for domain?' do
|
|
180
|
+
expect(parsed.domain?).to be false
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
it 'returns false for host?' do
|
|
184
|
+
expect(parsed.host?).to be false
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
it 'returns false for root_domain?' do
|
|
188
|
+
expect(parsed.root_domain?).to be false
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
describe '#www_subdomain?' do
|
|
195
|
+
it 'returns true when subdomain is www' do
|
|
196
|
+
parsed = DomainExtractor.parse('https://www.dashtrack.com')
|
|
197
|
+
expect(parsed.www_subdomain?).to be true
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
it 'returns false when subdomain is not www' do
|
|
201
|
+
parsed = DomainExtractor.parse('https://api.dashtrack.com')
|
|
202
|
+
expect(parsed.www_subdomain?).to be false
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
it 'returns false when there is no subdomain' do
|
|
206
|
+
parsed = DomainExtractor.parse('https://dashtrack.com')
|
|
207
|
+
expect(parsed.www_subdomain?).to be false
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it 'returns false for invalid URL' do
|
|
211
|
+
parsed = DomainExtractor::ParsedURL.new(nil)
|
|
212
|
+
expect(parsed.www_subdomain?).to be false
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
describe '#valid?' do
|
|
217
|
+
it 'returns true for valid URL' do
|
|
218
|
+
parsed = DomainExtractor.parse('https://dashtrack.com')
|
|
219
|
+
expect(parsed.valid?).to be true
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
it 'returns false for invalid URL' do
|
|
223
|
+
parsed = DomainExtractor::ParsedURL.new(nil)
|
|
224
|
+
expect(parsed.valid?).to be false
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
it 'returns false for empty result' do
|
|
228
|
+
parsed = DomainExtractor::ParsedURL.new({})
|
|
229
|
+
expect(parsed.valid?).to be false
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
describe 'hash-style access for backward compatibility' do
|
|
234
|
+
let(:parsed) { DomainExtractor.parse('https://www.example.co.uk/path?query=value') }
|
|
235
|
+
|
|
236
|
+
it 'supports hash-style access with []' do
|
|
237
|
+
expect(parsed[:subdomain]).to eq('www')
|
|
238
|
+
expect(parsed[:domain]).to eq('example')
|
|
239
|
+
expect(parsed[:tld]).to eq('co.uk')
|
|
240
|
+
expect(parsed[:root_domain]).to eq('example.co.uk')
|
|
241
|
+
expect(parsed[:host]).to eq('www.example.co.uk')
|
|
242
|
+
expect(parsed[:path]).to eq('/path')
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
describe '#to_h and #to_hash' do
|
|
247
|
+
let(:parsed) { DomainExtractor.parse('https://api.example.com') }
|
|
248
|
+
|
|
249
|
+
it 'converts to hash with to_h' do
|
|
250
|
+
hash = parsed.to_h
|
|
251
|
+
expect(hash).to be_a(Hash)
|
|
252
|
+
expect(hash[:subdomain]).to eq('api')
|
|
253
|
+
expect(hash[:domain]).to eq('example')
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
it 'converts to hash with to_hash' do
|
|
257
|
+
hash = parsed.to_hash
|
|
258
|
+
expect(hash).to be_a(Hash)
|
|
259
|
+
expect(hash[:subdomain]).to eq('api')
|
|
260
|
+
expect(hash[:domain]).to eq('example')
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
describe 'integration examples from requirements' do
|
|
265
|
+
it 'handles example: DomainExtractor.parse(url).host' do
|
|
266
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
267
|
+
expect(DomainExtractor.parse(url).host).to eq('www.example.co.uk')
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
it 'handles example: DomainExtractor.parse(url).domain' do
|
|
271
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
272
|
+
expect(DomainExtractor.parse(url).domain).to eq('example')
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
it 'handles example: DomainExtractor.parse(url).subdomain' do
|
|
276
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
277
|
+
expect(DomainExtractor.parse(url).subdomain).to eq('www')
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
it 'handles example: no subdomain returns false' do
|
|
281
|
+
expect(DomainExtractor.parse('https://dashtrack.com').subdomain?).to be false
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
it 'handles example: with subdomain returns true' do
|
|
285
|
+
expect(DomainExtractor.parse('https://api.dashtrack.com').subdomain?).to be true
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
it 'handles example: www_subdomain? returns true for www' do
|
|
289
|
+
expect(DomainExtractor.parse('https://www.dashtrack.com').www_subdomain?).to be true
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
it 'handles example: www_subdomain? returns false for non-www' do
|
|
293
|
+
expect(DomainExtractor.parse('https://dashtrack.com').www_subdomain?).to be false
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
it 'handles example: host returns value for valid URL' do
|
|
297
|
+
expect(DomainExtractor.parse('https://api.dashtrack.com').host).to eq('api.dashtrack.com')
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
it 'handles example: domain returns nil for invalid URL' do
|
|
301
|
+
# Parser returns ParsedURL with empty result for invalid URLs
|
|
302
|
+
# But parse() raises error, so we need to construct directly
|
|
303
|
+
parsed = DomainExtractor::ParsedURL.new(nil)
|
|
304
|
+
expect(parsed.domain).to be_nil
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
describe 'edge cases' do
|
|
309
|
+
context 'with multi-part TLD' do
|
|
310
|
+
let(:parsed) { DomainExtractor.parse('shop.example.com.au') }
|
|
311
|
+
|
|
312
|
+
it 'correctly identifies subdomain' do
|
|
313
|
+
expect(parsed.subdomain).to eq('shop')
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
it 'correctly identifies tld' do
|
|
317
|
+
expect(parsed.tld).to eq('com.au')
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
it 'subdomain? returns true' do
|
|
321
|
+
expect(parsed.subdomain?).to be true
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
context 'with nested subdomains' do
|
|
326
|
+
let(:parsed) { DomainExtractor.parse('api.staging.example.com') }
|
|
327
|
+
|
|
328
|
+
it 'returns nested subdomain' do
|
|
329
|
+
expect(parsed.subdomain).to eq('api.staging')
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
it 'subdomain? returns true' do
|
|
333
|
+
expect(parsed.subdomain?).to be true
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
it 'subdomain! returns the value' do
|
|
337
|
+
expect(parsed.subdomain!).to eq('api.staging')
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
context 'with empty path' do
|
|
342
|
+
let(:parsed) { DomainExtractor.parse('https://example.com') }
|
|
343
|
+
|
|
344
|
+
it 'returns empty string for path' do
|
|
345
|
+
expect(parsed.path).to eq('')
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
it 'path? returns false for empty path' do
|
|
349
|
+
expect(parsed.path?).to be false
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
context 'with query params' do
|
|
354
|
+
let(:parsed) { DomainExtractor.parse('https://example.com?foo=bar&baz=qux') }
|
|
355
|
+
|
|
356
|
+
it 'returns query_params hash' do
|
|
357
|
+
expect(parsed.query_params).to eq({ 'foo' => 'bar', 'baz' => 'qux' })
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
it 'query_params? returns true' do
|
|
361
|
+
expect(parsed.query_params?).to be true
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
it 'query_params! returns the hash' do
|
|
365
|
+
expect(parsed.query_params!).to eq({ 'foo' => 'bar', 'baz' => 'qux' })
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
context 'with empty query params' do
|
|
370
|
+
let(:parsed) { DomainExtractor.parse('https://example.com') }
|
|
371
|
+
|
|
372
|
+
it 'returns empty hash for query_params' do
|
|
373
|
+
expect(parsed.query_params).to eq({})
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
it 'query_params? returns false for empty hash' do
|
|
377
|
+
expect(parsed.query_params?).to be false
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
describe '#respond_to_missing?' do
|
|
383
|
+
let(:parsed) { DomainExtractor.parse('https://api.example.com') }
|
|
384
|
+
|
|
385
|
+
it 'responds to valid accessor methods' do
|
|
386
|
+
expect(parsed).to respond_to(:host)
|
|
387
|
+
expect(parsed).to respond_to(:domain)
|
|
388
|
+
expect(parsed).to respond_to(:subdomain)
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
it 'responds to bang methods' do
|
|
392
|
+
expect(parsed).to respond_to(:host!)
|
|
393
|
+
expect(parsed).to respond_to(:domain!)
|
|
394
|
+
expect(parsed).to respond_to(:subdomain!)
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
it 'responds to question mark methods' do
|
|
398
|
+
expect(parsed).to respond_to(:host?)
|
|
399
|
+
expect(parsed).to respond_to(:domain?)
|
|
400
|
+
expect(parsed).to respond_to(:subdomain?)
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
it 'responds to www_subdomain?' do
|
|
404
|
+
expect(parsed).to respond_to(:www_subdomain?)
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
it 'does not respond to invalid methods' do
|
|
408
|
+
expect(parsed).not_to respond_to(:invalid_method)
|
|
409
|
+
expect(parsed).not_to respond_to(:not_a_real_method!)
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
describe '#inspect' do
|
|
414
|
+
it 'provides meaningful inspection output' do
|
|
415
|
+
parsed = DomainExtractor.parse('https://api.example.com')
|
|
416
|
+
output = parsed.inspect
|
|
417
|
+
expect(output).to include('DomainExtractor::ParsedURL')
|
|
418
|
+
expect(output).to include('subdomain')
|
|
419
|
+
expect(output).to include('api')
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: domain_extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- OpenSite AI
|
|
@@ -41,13 +41,16 @@ files:
|
|
|
41
41
|
- LICENSE.txt
|
|
42
42
|
- README.md
|
|
43
43
|
- lib/domain_extractor.rb
|
|
44
|
+
- lib/domain_extractor/errors.rb
|
|
44
45
|
- lib/domain_extractor/normalizer.rb
|
|
46
|
+
- lib/domain_extractor/parsed_url.rb
|
|
45
47
|
- lib/domain_extractor/parser.rb
|
|
46
48
|
- lib/domain_extractor/query_params.rb
|
|
47
49
|
- lib/domain_extractor/result.rb
|
|
48
50
|
- lib/domain_extractor/validators.rb
|
|
49
51
|
- lib/domain_extractor/version.rb
|
|
50
52
|
- spec/domain_extractor_spec.rb
|
|
53
|
+
- spec/parsed_url_spec.rb
|
|
51
54
|
- spec/spec_helper.rb
|
|
52
55
|
homepage: https://github.com/opensite-ai/domain_extractor
|
|
53
56
|
licenses:
|