domain_extractor 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/README.md +201 -0
- data/lib/domain_extractor/parsed_url.rb +131 -0
- data/lib/domain_extractor/parser.rb +3 -2
- data/lib/domain_extractor/result.rb +5 -1
- data/lib/domain_extractor/version.rb +1 -1
- data/lib/domain_extractor.rb +25 -5
- data/spec/domain_extractor_spec.rb +27 -25
- data/spec/parsed_url_spec.rb +465 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2b919b52fe3fda7edf9738141e66e2acbcb2458d276800cb62b48c4c34d3b914
|
|
4
|
+
data.tar.gz: be02cf195da3a6a9da51136140f1487e96c026b22f1a300825d15733eb493acf
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 981d228483ba55b85834c38df3146a47018be6b7456053add7356a74f15d33cd27038d50682e5488f14ff9dfd09ef463bee1bbd6f7c486d9e9698dd305543738
|
|
7
|
+
data.tar.gz: e320e6d216196664de8f73b18be95746863b60fba99e496dc166ebd2f051afdf475d56f7d7a30dc6910e66d380e5b03b7d472730698905ed5b83e87aba3eb4e5
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.8] - 2025-10-31
|
|
11
|
+
|
|
12
|
+
### Implemented Declarative Method-style Accessors
|
|
13
|
+
|
|
14
|
+
#### Added
|
|
15
|
+
|
|
16
|
+
- **ParsedURL API**: Introduced intuitive method-style accessors with three variants:
|
|
17
|
+
- Default methods (e.g., `result.subdomain`) - Returns value or nil
|
|
18
|
+
- Bang methods (e.g., `result.subdomain!`) - Returns value or raises `InvalidURLError`
|
|
19
|
+
- Question methods (e.g., `result.subdomain?`) - Returns boolean true/false
|
|
20
|
+
- Added `www_subdomain?` helper method to check if subdomain is specifically 'www'
|
|
21
|
+
- Added `valid?` method to check if parsed result contains valid data
|
|
22
|
+
- Added `to_h` and `to_hash` methods for hash conversion
|
|
23
|
+
- Comprehensive documentation in `docs/PARSED_URL_API.md`
|
|
24
|
+
|
|
25
|
+
#### Changed
|
|
26
|
+
|
|
27
|
+
- `DomainExtractor.parse` now returns `ParsedURL` object instead of plain Hash (backward compatible via `[]` accessor)
|
|
28
|
+
- `DomainExtractor.parse_batch` now returns array of `ParsedURL` objects (or nil for invalid URLs)
|
|
29
|
+
|
|
30
|
+
#### Maintained
|
|
31
|
+
|
|
32
|
+
- Full backward compatibility with hash-style access using `[]`
|
|
33
|
+
- All existing tests continue to pass
|
|
34
|
+
- No breaking changes to existing API
|
|
35
|
+
|
|
10
36
|
## [0.1.7] - 2025-10-31
|
|
11
37
|
|
|
12
38
|
### Added valid? method and enhanced error handling
|
data/README.md
CHANGED
|
@@ -60,8 +60,209 @@ if DomainExtractor.valid?(url)
|
|
|
60
60
|
else
|
|
61
61
|
# handle invalid input
|
|
62
62
|
end
|
|
63
|
+
|
|
64
|
+
# New intuitive method-style access
|
|
65
|
+
result.subdomain # => 'www'
|
|
66
|
+
result.domain # => 'example'
|
|
67
|
+
result.host # => 'www.example.co.uk'
|
|
68
|
+
|
|
69
|
+
# Opt into strict parsing when needed
|
|
70
|
+
DomainExtractor.parse!('notaurl')
|
|
71
|
+
# => raises DomainExtractor::InvalidURLError: Invalid URL Value
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## ParsedURL API - Intuitive Method Access
|
|
75
|
+
|
|
76
|
+
DomainExtractor now returns a `ParsedURL` object that supports three accessor styles, making your intent clear and your code more robust:
|
|
77
|
+
|
|
78
|
+
### Method Accessor Styles
|
|
79
|
+
|
|
80
|
+
#### 1. Default Methods (Silent Nil)
|
|
81
|
+
Returns the value or `nil` - perfect for exploratory code or when handling invalid data gracefully.
|
|
82
|
+
|
|
83
|
+
```ruby
|
|
84
|
+
result = DomainExtractor.parse('https://api.example.com')
|
|
85
|
+
result.subdomain # => 'api'
|
|
86
|
+
result.domain # => 'example'
|
|
87
|
+
result.host # => 'api.example.com'
|
|
88
|
+
|
|
89
|
+
# Without subdomain
|
|
90
|
+
result = DomainExtractor.parse('https://example.com')
|
|
91
|
+
result.subdomain # => nil (no error)
|
|
92
|
+
result.domain # => 'example'
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
#### 2. Bang Methods (!) - Explicit Errors
|
|
96
|
+
Returns the value or raises `InvalidURLError` - ideal for production code where missing data should fail fast.
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
result = DomainExtractor.parse('https://example.com')
|
|
100
|
+
result.domain! # => 'example'
|
|
101
|
+
result.subdomain! # raises InvalidURLError: "subdomain not found or invalid"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
#### 3. Question Methods (?) - Boolean Checks
|
|
105
|
+
Always returns `true` or `false` - perfect for conditional logic without exceptions.
|
|
106
|
+
|
|
107
|
+
```ruby
|
|
108
|
+
DomainExtractor.parse('https://dashtrack.com').subdomain? # => false
|
|
109
|
+
DomainExtractor.parse('https://api.dashtrack.com').subdomain? # => true
|
|
110
|
+
DomainExtractor.parse('https://www.dashtrack.com').www_subdomain? # => true
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Quick Examples
|
|
114
|
+
|
|
115
|
+
```ruby
|
|
116
|
+
url = 'https://api.staging.example.com/path'
|
|
117
|
+
parsed = DomainExtractor.parse(url)
|
|
118
|
+
|
|
119
|
+
# Method-style access
|
|
120
|
+
parsed.host # => 'api.staging.example.com'
|
|
121
|
+
parsed.subdomain # => 'api.staging'
|
|
122
|
+
parsed.domain # => 'example'
|
|
123
|
+
parsed.root_domain # => 'example.com'
|
|
124
|
+
parsed.tld # => 'com'
|
|
125
|
+
parsed.path # => '/path'
|
|
126
|
+
|
|
127
|
+
# Question methods for conditionals
|
|
128
|
+
if parsed.subdomain?
|
|
129
|
+
puts "Has subdomain: #{parsed.subdomain}"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Bang methods when values are required
|
|
133
|
+
begin
|
|
134
|
+
subdomain = parsed.subdomain! # Safe - has subdomain
|
|
135
|
+
domain = parsed.domain! # Safe - has domain
|
|
136
|
+
rescue DomainExtractor::InvalidURLError => e
|
|
137
|
+
puts "Missing required component: #{e.message}"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Hash-style access still works (backward compatible)
|
|
141
|
+
parsed[:subdomain] # => 'api.staging'
|
|
142
|
+
parsed[:host] # => 'api.staging.example.com'
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Additional Examples
|
|
146
|
+
|
|
147
|
+
#### Boolean Checks with Question Methods
|
|
148
|
+
|
|
149
|
+
```ruby
|
|
150
|
+
# Check for subdomain presence
|
|
151
|
+
DomainExtractor.parse('https://dashtrack.com').subdomain? # => false
|
|
152
|
+
DomainExtractor.parse('https://api.dashtrack.com').subdomain? # => true
|
|
153
|
+
|
|
154
|
+
# Check for www subdomain specifically
|
|
155
|
+
DomainExtractor.parse('https://www.dashtrack.com').www_subdomain? # => true
|
|
156
|
+
DomainExtractor.parse('https://api.dashtrack.com').www_subdomain? # => false
|
|
157
|
+
|
|
63
158
|
```
|
|
64
159
|
|
|
160
|
+
#### Handling Unknown or Invalid Data
|
|
161
|
+
|
|
162
|
+
```ruby
|
|
163
|
+
# Default accessors fail silently with nil
|
|
164
|
+
DomainExtractor.parse(nil).domain # => nil
|
|
165
|
+
DomainExtractor.parse('').host # => nil
|
|
166
|
+
DomainExtractor.parse('asdfasdfds').domain # => nil
|
|
167
|
+
|
|
168
|
+
# Boolean checks never raise
|
|
169
|
+
DomainExtractor.parse(nil).subdomain? # => false
|
|
170
|
+
DomainExtractor.parse('').domain? # => false
|
|
171
|
+
DomainExtractor.parse('https://dashtrack.com').subdomain? # => false
|
|
172
|
+
|
|
173
|
+
# Bang methods raise when a component is missing
|
|
174
|
+
DomainExtractor.parse('').host! # => raises DomainExtractor::InvalidURLError
|
|
175
|
+
DomainExtractor.parse('asdfasdfds').domain! # => raises DomainExtractor::InvalidURLError
|
|
176
|
+
|
|
177
|
+
# Strict parsing helper mirrors legacy behaviour
|
|
178
|
+
DomainExtractor.parse!('asdfasdfds') # => raises DomainExtractor::InvalidURLError
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
#### Safe Batch Processing
|
|
182
|
+
|
|
183
|
+
```ruby
|
|
184
|
+
urls = [
|
|
185
|
+
'https://api.example.com',
|
|
186
|
+
'https://example.com',
|
|
187
|
+
'https://www.example.com'
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
urls.each do |url|
|
|
191
|
+
result = DomainExtractor.parse(url)
|
|
192
|
+
|
|
193
|
+
info = {
|
|
194
|
+
url: url,
|
|
195
|
+
has_subdomain: result.subdomain?,
|
|
196
|
+
is_www: result.www_subdomain?,
|
|
197
|
+
host: result.host
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
puts "#{info[:url]} - subdomain: #{info[:has_subdomain]}, www: #{info[:is_www]}"
|
|
201
|
+
end
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
#### Production URL Validation
|
|
205
|
+
|
|
206
|
+
```ruby
|
|
207
|
+
def validate_api_url(url)
|
|
208
|
+
result = DomainExtractor.parse(url)
|
|
209
|
+
|
|
210
|
+
# Ensure all required components exist
|
|
211
|
+
result.subdomain! # Must have subdomain
|
|
212
|
+
result.domain! # Must have domain
|
|
213
|
+
|
|
214
|
+
# Additional validation
|
|
215
|
+
return false unless result.subdomain.start_with?('api')
|
|
216
|
+
|
|
217
|
+
true
|
|
218
|
+
rescue DomainExtractor::InvalidURLError => e
|
|
219
|
+
puts "Validation failed: #{e.message}"
|
|
220
|
+
false
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
validate_api_url('https://api.example.com/endpoint') # => true
|
|
224
|
+
validate_api_url('https://example.com/endpoint') # => false (no subdomain)
|
|
225
|
+
validate_api_url('https://www.example.com/endpoint') # => false (not api subdomain)
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
#### Guard Clauses with Question Methods
|
|
229
|
+
|
|
230
|
+
```ruby
|
|
231
|
+
def process_url(url)
|
|
232
|
+
result = DomainExtractor.parse(url)
|
|
233
|
+
|
|
234
|
+
return 'Invalid URL' unless result.valid?
|
|
235
|
+
return 'No subdomain present' unless result.subdomain?
|
|
236
|
+
return 'WWW redirect needed' if result.www_subdomain?
|
|
237
|
+
|
|
238
|
+
"Processing subdomain: #{result.subdomain}"
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
process_url('https://api.example.com') # => "Processing subdomain: api"
|
|
242
|
+
process_url('https://www.example.com') # => "WWW redirect needed"
|
|
243
|
+
process_url('https://example.com') # => "No subdomain present"
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
#### Converting to Hash
|
|
247
|
+
|
|
248
|
+
```ruby
|
|
249
|
+
url = 'https://api.example.com/path'
|
|
250
|
+
result = DomainExtractor.parse(url)
|
|
251
|
+
|
|
252
|
+
hash = result.to_h
|
|
253
|
+
# => {
|
|
254
|
+
# subdomain: "api",
|
|
255
|
+
# domain: "example",
|
|
256
|
+
# tld: "com",
|
|
257
|
+
# root_domain: "example.com",
|
|
258
|
+
# host: "api.example.com",
|
|
259
|
+
# path: "/path",
|
|
260
|
+
# query_params: {}
|
|
261
|
+
# }
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
**See [docs/PARSED_URL_API.md](docs/PARSED_URL_API.md) for comprehensive documentation and real-world examples.**
|
|
265
|
+
|
|
65
266
|
## Usage Examples
|
|
66
267
|
|
|
67
268
|
### Basic Domain Parsing
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DomainExtractor
|
|
4
|
+
# ParsedURL wraps the parsing result and provides convenient accessor methods
|
|
5
|
+
# with support for bang (!) and question mark (?) variants.
|
|
6
|
+
#
|
|
7
|
+
# Examples:
|
|
8
|
+
# parsed = DomainExtractor.parse('https://api.example.com')
|
|
9
|
+
# parsed.host # => 'api.example.com'
|
|
10
|
+
# parsed.subdomain # => 'api'
|
|
11
|
+
# parsed.subdomain? # => true
|
|
12
|
+
# parsed.www_subdomain? # => false
|
|
13
|
+
#
|
|
14
|
+
# parsed = DomainExtractor.parse('invalid')
|
|
15
|
+
# parsed.host # => nil
|
|
16
|
+
# parsed.host? # => false
|
|
17
|
+
# parsed.host! # raises InvalidURLError
|
|
18
|
+
class ParsedURL
|
|
19
|
+
# Expose the underlying hash for backward compatibility
|
|
20
|
+
attr_reader :result
|
|
21
|
+
|
|
22
|
+
# List of valid result keys that should have method accessors
|
|
23
|
+
RESULT_KEYS = %i[subdomain domain tld root_domain host path query_params].freeze
|
|
24
|
+
|
|
25
|
+
def initialize(result)
|
|
26
|
+
@result = result || {}
|
|
27
|
+
freeze
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Hash-style access for backward compatibility
|
|
31
|
+
# result[:subdomain], result[:host], etc.
|
|
32
|
+
def [](key)
|
|
33
|
+
@result[key]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Check if the parsed result is valid (not nil/empty)
|
|
37
|
+
def valid?
|
|
38
|
+
!@result.empty?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Special helper: check if subdomain is specifically 'www'
|
|
42
|
+
def www_subdomain?
|
|
43
|
+
@result[:subdomain] == 'www'
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Dynamically handle method calls for all result keys
|
|
47
|
+
# Supports three variants:
|
|
48
|
+
# - method_name: returns value or nil
|
|
49
|
+
# - method_name!: returns value or raises InvalidURLError
|
|
50
|
+
# - method_name?: returns boolean (true if value exists and not nil/empty)
|
|
51
|
+
def method_missing(method_name, *args, &)
|
|
52
|
+
method_str = method_name.to_s
|
|
53
|
+
|
|
54
|
+
# Handle bang methods (method_name!)
|
|
55
|
+
return handle_bang_method(method_str) if method_str.end_with?('!')
|
|
56
|
+
|
|
57
|
+
# Handle question mark methods (method_name?)
|
|
58
|
+
return handle_question_method(method_str) if method_str.end_with?('?')
|
|
59
|
+
|
|
60
|
+
# Handle regular methods (method_name)
|
|
61
|
+
key = method_name.to_sym
|
|
62
|
+
return @result[key] if RESULT_KEYS.include?(key)
|
|
63
|
+
|
|
64
|
+
super
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
68
|
+
method_str = method_name.to_s
|
|
69
|
+
|
|
70
|
+
# Check for www_subdomain? special case
|
|
71
|
+
return true if method_name == :www_subdomain?
|
|
72
|
+
|
|
73
|
+
# Check if it's a bang or question mark variant
|
|
74
|
+
if method_str.end_with?('!') || method_str.end_with?('?')
|
|
75
|
+
key = method_str[0...-1].to_sym
|
|
76
|
+
return true if RESULT_KEYS.include?(key)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if it's a regular method
|
|
80
|
+
return true if RESULT_KEYS.include?(method_name.to_sym)
|
|
81
|
+
|
|
82
|
+
super
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Provide hash-like inspection
|
|
86
|
+
def inspect
|
|
87
|
+
"#<DomainExtractor::ParsedURL #{@result.inspect}>"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def to_s
|
|
91
|
+
@result.to_s
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Allow to_h conversion for hash compatibility
|
|
95
|
+
def to_h
|
|
96
|
+
@result.dup
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Allow to_hash as well for better Ruby compatibility
|
|
100
|
+
alias to_hash to_h
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
# Handle bang methods that raise errors for missing values
|
|
105
|
+
def handle_bang_method(method_str)
|
|
106
|
+
key = method_str[0...-1].to_sym
|
|
107
|
+
return unless RESULT_KEYS.include?(key)
|
|
108
|
+
|
|
109
|
+
value = @result[key]
|
|
110
|
+
return value if value_present?(value)
|
|
111
|
+
|
|
112
|
+
raise InvalidURLError, "#{key} not found or invalid"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Handle question mark methods that return booleans
|
|
116
|
+
def handle_question_method(method_str)
|
|
117
|
+
key = method_str[0...-1].to_sym
|
|
118
|
+
return unless RESULT_KEYS.include?(key)
|
|
119
|
+
|
|
120
|
+
value_present?(@result[key])
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Check if a value is present (not nil and not empty for strings/hashes)
|
|
124
|
+
def value_present?(value)
|
|
125
|
+
return false if value.nil?
|
|
126
|
+
return !value.empty? if value.respond_to?(:empty?)
|
|
127
|
+
|
|
128
|
+
true
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -6,6 +6,7 @@ require 'public_suffix'
|
|
|
6
6
|
require_relative 'normalizer'
|
|
7
7
|
require_relative 'result'
|
|
8
8
|
require_relative 'validators'
|
|
9
|
+
require_relative 'parsed_url'
|
|
9
10
|
|
|
10
11
|
module DomainExtractor
|
|
11
12
|
# Parser orchestrates the pipeline for url normalization, validation, and domain extraction.
|
|
@@ -14,12 +15,12 @@ module DomainExtractor
|
|
|
14
15
|
|
|
15
16
|
def call(raw_url)
|
|
16
17
|
components = extract_components(raw_url)
|
|
17
|
-
return unless components
|
|
18
|
+
return ParsedURL.new(nil) unless components
|
|
18
19
|
|
|
19
20
|
uri, domain, host = components
|
|
20
21
|
build_result(domain: domain, host: host, uri: uri)
|
|
21
22
|
rescue ::URI::InvalidURIError, ::PublicSuffix::Error
|
|
22
|
-
nil
|
|
23
|
+
ParsedURL.new(nil)
|
|
23
24
|
end
|
|
24
25
|
|
|
25
26
|
def valid?(raw_url)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'parsed_url'
|
|
4
|
+
|
|
3
5
|
module DomainExtractor
|
|
4
6
|
# Result encapsulates the final parsed attributes and exposes a hash interface.
|
|
5
7
|
module Result
|
|
@@ -10,7 +12,7 @@ module DomainExtractor
|
|
|
10
12
|
module_function
|
|
11
13
|
|
|
12
14
|
def build(**attributes)
|
|
13
|
-
{
|
|
15
|
+
hash = {
|
|
14
16
|
subdomain: normalize_subdomain(attributes[:subdomain]),
|
|
15
17
|
root_domain: attributes[:root_domain],
|
|
16
18
|
domain: attributes[:domain],
|
|
@@ -19,6 +21,8 @@ module DomainExtractor
|
|
|
19
21
|
path: attributes[:path] || EMPTY_PATH,
|
|
20
22
|
query_params: QueryParams.call(attributes[:query])
|
|
21
23
|
}.freeze
|
|
24
|
+
|
|
25
|
+
ParsedURL.new(hash)
|
|
22
26
|
end
|
|
23
27
|
|
|
24
28
|
def normalize_subdomain(value)
|
data/lib/domain_extractor.rb
CHANGED
|
@@ -5,6 +5,7 @@ require 'public_suffix'
|
|
|
5
5
|
|
|
6
6
|
require_relative 'domain_extractor/version'
|
|
7
7
|
require_relative 'domain_extractor/errors'
|
|
8
|
+
require_relative 'domain_extractor/parsed_url'
|
|
8
9
|
require_relative 'domain_extractor/parser'
|
|
9
10
|
require_relative 'domain_extractor/query_params'
|
|
10
11
|
|
|
@@ -13,11 +14,26 @@ require_relative 'domain_extractor/query_params'
|
|
|
13
14
|
module DomainExtractor
|
|
14
15
|
class << self
|
|
15
16
|
# Parse an individual URL and extract domain attributes.
|
|
16
|
-
#
|
|
17
|
+
# Returns a ParsedURL object that supports hash-style access and method calls.
|
|
18
|
+
# For invalid inputs the returned ParsedURL will be marked invalid and all
|
|
19
|
+
# accessors (without bang) will evaluate to nil/false.
|
|
17
20
|
# @param url [String, #to_s]
|
|
18
|
-
# @return [
|
|
21
|
+
# @return [ParsedURL]
|
|
19
22
|
def parse(url)
|
|
20
|
-
Parser.call(url)
|
|
23
|
+
Parser.call(url)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Parse an individual URL and raise when extraction fails.
|
|
27
|
+
# This mirrors the legacy behaviour of .parse while giving callers an
|
|
28
|
+
# explicit opt-in to strict validation.
|
|
29
|
+
# @param url [String, #to_s]
|
|
30
|
+
# @return [ParsedURL]
|
|
31
|
+
# @raise [InvalidURLError]
|
|
32
|
+
def parse!(url)
|
|
33
|
+
result = Parser.call(url)
|
|
34
|
+
raise InvalidURLError unless result.valid?
|
|
35
|
+
|
|
36
|
+
result
|
|
21
37
|
end
|
|
22
38
|
|
|
23
39
|
# Determine if a URL is considered valid by the parser.
|
|
@@ -28,12 +44,16 @@ module DomainExtractor
|
|
|
28
44
|
end
|
|
29
45
|
|
|
30
46
|
# Parse many URLs and return their individual parse results.
|
|
47
|
+
# Returns nil for invalid URLs to maintain backward compatibility.
|
|
31
48
|
# @param urls [Enumerable<String>]
|
|
32
|
-
# @return [Array<
|
|
49
|
+
# @return [Array<ParsedURL, nil>]
|
|
33
50
|
def parse_batch(urls)
|
|
34
51
|
return [] unless urls.respond_to?(:map)
|
|
35
52
|
|
|
36
|
-
urls.map
|
|
53
|
+
urls.map do |url|
|
|
54
|
+
result = Parser.call(url)
|
|
55
|
+
result.valid? ? result : nil
|
|
56
|
+
end
|
|
37
57
|
end
|
|
38
58
|
|
|
39
59
|
# Convert a query string into a Hash representation.
|
|
@@ -142,40 +142,42 @@ RSpec.describe DomainExtractor do
|
|
|
142
142
|
end
|
|
143
143
|
|
|
144
144
|
context 'with invalid URLs' do
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
145
|
+
let(:invalid_inputs) { ['http://', 'not_a_url', '192.168.1.1', '[2001:db8::1]', '', nil] }
|
|
146
|
+
|
|
147
|
+
it 'returns an invalid ParsedURL that safely yields nil values' do
|
|
148
|
+
invalid_inputs.each do |input|
|
|
149
|
+
result = described_class.parse(input)
|
|
150
|
+
|
|
151
|
+
expect(result).to be_a(DomainExtractor::ParsedURL)
|
|
152
|
+
expect(result.valid?).to be(false)
|
|
153
|
+
expect(result.domain).to be_nil
|
|
154
|
+
expect(result.domain?).to be(false)
|
|
155
|
+
expect(result.host).to be_nil
|
|
156
|
+
expect(result.host?).to be(false)
|
|
157
|
+
end
|
|
150
158
|
end
|
|
151
159
|
|
|
152
|
-
it '
|
|
153
|
-
|
|
154
|
-
DomainExtractor::InvalidURLError,
|
|
155
|
-
'Invalid URL Value'
|
|
156
|
-
)
|
|
157
|
-
end
|
|
160
|
+
it 'allows bang accessors to raise explicit errors' do
|
|
161
|
+
result = described_class.parse('not_a_url')
|
|
158
162
|
|
|
159
|
-
|
|
160
|
-
expect { described_class.parse('192.168.1.1') }.to raise_error(
|
|
163
|
+
expect { result.domain! }.to raise_error(
|
|
161
164
|
DomainExtractor::InvalidURLError,
|
|
162
|
-
'
|
|
165
|
+
'domain not found or invalid'
|
|
163
166
|
)
|
|
164
|
-
end
|
|
165
167
|
|
|
166
|
-
|
|
167
|
-
expect { described_class.parse('[2001:db8::1]') }.to raise_error(
|
|
168
|
+
expect { result.host! }.to raise_error(
|
|
168
169
|
DomainExtractor::InvalidURLError,
|
|
169
|
-
'
|
|
170
|
+
'host not found or invalid'
|
|
170
171
|
)
|
|
171
172
|
end
|
|
172
173
|
|
|
173
|
-
it '
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
174
|
+
it 'provides strict parsing via parse!' do
|
|
175
|
+
invalid_inputs.each do |input|
|
|
176
|
+
expect { described_class.parse!(input) }.to raise_error(
|
|
177
|
+
DomainExtractor::InvalidURLError,
|
|
178
|
+
'Invalid URL Value'
|
|
179
|
+
)
|
|
180
|
+
end
|
|
179
181
|
end
|
|
180
182
|
end
|
|
181
183
|
end
|
|
@@ -300,7 +302,7 @@ RSpec.describe DomainExtractor do
|
|
|
300
302
|
|
|
301
303
|
results = described_class.parse_batch(urls)
|
|
302
304
|
|
|
303
|
-
expect(results).to all(be_a(
|
|
305
|
+
expect(results).to all(be_a(DomainExtractor::ParsedURL))
|
|
304
306
|
expect(results.map { |result| result[:root_domain] }).to all(eq('example.com'))
|
|
305
307
|
end
|
|
306
308
|
|
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe DomainExtractor::ParsedURL do
|
|
6
|
+
describe 'method accessor styles' do
|
|
7
|
+
context 'with a valid URL with subdomain' do
|
|
8
|
+
let(:parsed) { DomainExtractor.parse('https://api.dashtrack.com/path?query=value') }
|
|
9
|
+
|
|
10
|
+
describe 'default accessor methods' do
|
|
11
|
+
it 'returns subdomain' do
|
|
12
|
+
expect(parsed.subdomain).to eq('api')
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'returns domain' do
|
|
16
|
+
expect(parsed.domain).to eq('dashtrack')
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'returns tld' do
|
|
20
|
+
expect(parsed.tld).to eq('com')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'returns root_domain' do
|
|
24
|
+
expect(parsed.root_domain).to eq('dashtrack.com')
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it 'returns host' do
|
|
28
|
+
expect(parsed.host).to eq('api.dashtrack.com')
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it 'returns path' do
|
|
32
|
+
expect(parsed.path).to eq('/path')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'returns query_params' do
|
|
36
|
+
expect(parsed.query_params).to eq({ 'query' => 'value' })
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe 'bang (!) accessor methods' do
|
|
41
|
+
it 'returns subdomain!' do
|
|
42
|
+
expect(parsed.subdomain!).to eq('api')
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it 'returns domain!' do
|
|
46
|
+
expect(parsed.domain!).to eq('dashtrack')
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it 'returns tld!' do
|
|
50
|
+
expect(parsed.tld!).to eq('com')
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it 'returns root_domain!' do
|
|
54
|
+
expect(parsed.root_domain!).to eq('dashtrack.com')
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it 'returns host!' do
|
|
58
|
+
expect(parsed.host!).to eq('api.dashtrack.com')
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe 'question mark (?) accessor methods' do
|
|
63
|
+
it 'returns true for subdomain?' do
|
|
64
|
+
expect(parsed.subdomain?).to be true
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it 'returns true for domain?' do
|
|
68
|
+
expect(parsed.domain?).to be true
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it 'returns true for tld?' do
|
|
72
|
+
expect(parsed.tld?).to be true
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it 'returns true for root_domain?' do
|
|
76
|
+
expect(parsed.root_domain?).to be true
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it 'returns true for host?' do
|
|
80
|
+
expect(parsed.host?).to be true
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
context 'with a valid URL without subdomain' do
|
|
86
|
+
let(:parsed) { DomainExtractor.parse('https://dashtrack.com') }
|
|
87
|
+
|
|
88
|
+
describe 'default accessor methods for nil subdomain' do
|
|
89
|
+
it 'returns nil for subdomain' do
|
|
90
|
+
expect(parsed.subdomain).to be_nil
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it 'returns domain' do
|
|
94
|
+
expect(parsed.domain).to eq('dashtrack')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it 'returns host' do
|
|
98
|
+
expect(parsed.host).to eq('dashtrack.com')
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
describe 'bang (!) accessor methods with nil subdomain' do
|
|
103
|
+
it 'raises InvalidURLError for subdomain!' do
|
|
104
|
+
expect { parsed.subdomain! }.to raise_error(
|
|
105
|
+
DomainExtractor::InvalidURLError,
|
|
106
|
+
'subdomain not found or invalid'
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it 'returns domain!' do
|
|
111
|
+
expect(parsed.domain!).to eq('dashtrack')
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
describe 'question mark (?) accessor methods with nil subdomain' do
|
|
116
|
+
it 'returns false for subdomain?' do
|
|
117
|
+
expect(parsed.subdomain?).to be false
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it 'returns true for domain?' do
|
|
121
|
+
expect(parsed.domain?).to be true
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'returns true for host?' do
|
|
125
|
+
expect(parsed.host?).to be true
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
context 'with invalid URL input' do
|
|
131
|
+
let(:parsed) { DomainExtractor.parse('invalid_url_value') }
|
|
132
|
+
|
|
133
|
+
describe 'default accessor methods' do
|
|
134
|
+
it 'returns nil for subdomain' do
|
|
135
|
+
expect(parsed.subdomain).to be_nil
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
it 'returns nil for domain' do
|
|
139
|
+
expect(parsed.domain).to be_nil
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it 'returns nil for host' do
|
|
143
|
+
expect(parsed.host).to be_nil
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
it 'returns nil for root_domain' do
|
|
147
|
+
expect(parsed.root_domain).to be_nil
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
describe 'bang (!) accessor methods' do
|
|
152
|
+
it 'raises InvalidURLError for host!' do
|
|
153
|
+
expect { parsed.host! }.to raise_error(
|
|
154
|
+
DomainExtractor::InvalidURLError,
|
|
155
|
+
'host not found or invalid'
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
it 'raises InvalidURLError for domain!' do
|
|
160
|
+
expect { parsed.domain! }.to raise_error(
|
|
161
|
+
DomainExtractor::InvalidURLError,
|
|
162
|
+
'domain not found or invalid'
|
|
163
|
+
)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it 'raises InvalidURLError for subdomain!' do
|
|
167
|
+
expect { parsed.subdomain! }.to raise_error(
|
|
168
|
+
DomainExtractor::InvalidURLError,
|
|
169
|
+
'subdomain not found or invalid'
|
|
170
|
+
)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
describe 'question mark (?) accessor methods' do
|
|
175
|
+
it 'returns false for subdomain?' do
|
|
176
|
+
expect(parsed.subdomain?).to be false
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
it 'returns false for domain?' do
|
|
180
|
+
expect(parsed.domain?).to be false
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
it 'returns false for host?' do
|
|
184
|
+
expect(parsed.host?).to be false
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
it 'returns false for root_domain?' do
|
|
188
|
+
expect(parsed.root_domain?).to be false
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
context 'with nil input' do
|
|
194
|
+
let(:parsed) { DomainExtractor.parse(nil) }
|
|
195
|
+
|
|
196
|
+
it 'returns nil for default accessors' do
|
|
197
|
+
expect(parsed.domain).to be_nil
|
|
198
|
+
expect(parsed.host).to be_nil
|
|
199
|
+
expect(parsed.subdomain).to be_nil
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
it 'returns false for question accessors' do
|
|
203
|
+
expect(parsed.domain?).to be false
|
|
204
|
+
expect(parsed.host?).to be false
|
|
205
|
+
expect(parsed.subdomain?).to be false
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
it 'raises for bang accessors' do
|
|
209
|
+
expect { parsed.domain! }.to raise_error(
|
|
210
|
+
DomainExtractor::InvalidURLError,
|
|
211
|
+
'domain not found or invalid'
|
|
212
|
+
)
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
context 'with empty string input' do
|
|
217
|
+
let(:parsed) { DomainExtractor.parse('') }
|
|
218
|
+
|
|
219
|
+
it 'returns nil for default accessors' do
|
|
220
|
+
expect(parsed.domain).to be_nil
|
|
221
|
+
expect(parsed.host).to be_nil
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
it 'returns false for question accessors' do
|
|
225
|
+
expect(parsed.domain?).to be false
|
|
226
|
+
expect(parsed.host?).to be false
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
it 'raises for bang accessors' do
|
|
230
|
+
expect { parsed.host! }.to raise_error(
|
|
231
|
+
DomainExtractor::InvalidURLError,
|
|
232
|
+
'host not found or invalid'
|
|
233
|
+
)
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
describe '#www_subdomain?' do
|
|
239
|
+
it 'returns true when subdomain is www' do
|
|
240
|
+
parsed = DomainExtractor.parse('https://www.dashtrack.com')
|
|
241
|
+
expect(parsed.www_subdomain?).to be true
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
it 'returns false when subdomain is not www' do
|
|
245
|
+
parsed = DomainExtractor.parse('https://api.dashtrack.com')
|
|
246
|
+
expect(parsed.www_subdomain?).to be false
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
it 'returns false when there is no subdomain' do
|
|
250
|
+
parsed = DomainExtractor.parse('https://dashtrack.com')
|
|
251
|
+
expect(parsed.www_subdomain?).to be false
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
it 'returns false for invalid URL' do
|
|
255
|
+
parsed = DomainExtractor.parse('invalid_url_value')
|
|
256
|
+
expect(parsed.www_subdomain?).to be false
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
describe '#valid?' do
|
|
261
|
+
it 'returns true for valid URL' do
|
|
262
|
+
parsed = DomainExtractor.parse('https://dashtrack.com')
|
|
263
|
+
expect(parsed.valid?).to be true
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
it 'returns false for invalid URL' do
|
|
267
|
+
parsed = DomainExtractor.parse('invalid_url_value')
|
|
268
|
+
expect(parsed.valid?).to be false
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
it 'returns false for empty result' do
|
|
272
|
+
parsed = DomainExtractor::ParsedURL.new({})
|
|
273
|
+
expect(parsed.valid?).to be false
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
describe 'hash-style access for backward compatibility' do
|
|
278
|
+
let(:parsed) { DomainExtractor.parse('https://www.example.co.uk/path?query=value') }
|
|
279
|
+
|
|
280
|
+
it 'supports hash-style access with []' do
|
|
281
|
+
expect(parsed[:subdomain]).to eq('www')
|
|
282
|
+
expect(parsed[:domain]).to eq('example')
|
|
283
|
+
expect(parsed[:tld]).to eq('co.uk')
|
|
284
|
+
expect(parsed[:root_domain]).to eq('example.co.uk')
|
|
285
|
+
expect(parsed[:host]).to eq('www.example.co.uk')
|
|
286
|
+
expect(parsed[:path]).to eq('/path')
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
describe '#to_h and #to_hash' do
|
|
291
|
+
let(:parsed) { DomainExtractor.parse('https://api.example.com') }
|
|
292
|
+
|
|
293
|
+
it 'converts to hash with to_h' do
|
|
294
|
+
hash = parsed.to_h
|
|
295
|
+
expect(hash).to be_a(Hash)
|
|
296
|
+
expect(hash[:subdomain]).to eq('api')
|
|
297
|
+
expect(hash[:domain]).to eq('example')
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
it 'converts to hash with to_hash' do
|
|
301
|
+
hash = parsed.to_hash
|
|
302
|
+
expect(hash).to be_a(Hash)
|
|
303
|
+
expect(hash[:subdomain]).to eq('api')
|
|
304
|
+
expect(hash[:domain]).to eq('example')
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
describe 'integration examples from requirements' do
|
|
309
|
+
it 'handles example: DomainExtractor.parse(url).host' do
|
|
310
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
311
|
+
expect(DomainExtractor.parse(url).host).to eq('www.example.co.uk')
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
it 'handles example: DomainExtractor.parse(url).domain' do
|
|
315
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
316
|
+
expect(DomainExtractor.parse(url).domain).to eq('example')
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
it 'handles example: DomainExtractor.parse(url).subdomain' do
|
|
320
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
321
|
+
expect(DomainExtractor.parse(url).subdomain).to eq('www')
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
it 'handles example: no subdomain returns false' do
|
|
325
|
+
expect(DomainExtractor.parse('https://dashtrack.com').subdomain?).to be false
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
it 'handles example: with subdomain returns true' do
|
|
329
|
+
expect(DomainExtractor.parse('https://api.dashtrack.com').subdomain?).to be true
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
it 'handles example: www_subdomain? returns true for www' do
|
|
333
|
+
expect(DomainExtractor.parse('https://www.dashtrack.com').www_subdomain?).to be true
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
it 'handles example: www_subdomain? returns false for non-www' do
|
|
337
|
+
expect(DomainExtractor.parse('https://dashtrack.com').www_subdomain?).to be false
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
it 'handles example: host returns value for valid URL' do
|
|
341
|
+
expect(DomainExtractor.parse('https://api.dashtrack.com').host).to eq('api.dashtrack.com')
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
it 'handles example: domain returns nil for invalid URL' do
|
|
345
|
+
# Parser returns ParsedURL with empty result for invalid URLs
|
|
346
|
+
parsed = DomainExtractor.parse('invalid_url_value')
|
|
347
|
+
expect(parsed.domain).to be_nil
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
describe 'edge cases' do
|
|
352
|
+
context 'with multi-part TLD' do
|
|
353
|
+
let(:parsed) { DomainExtractor.parse('shop.example.com.au') }
|
|
354
|
+
|
|
355
|
+
it 'correctly identifies subdomain' do
|
|
356
|
+
expect(parsed.subdomain).to eq('shop')
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
it 'correctly identifies tld' do
|
|
360
|
+
expect(parsed.tld).to eq('com.au')
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
it 'subdomain? returns true' do
|
|
364
|
+
expect(parsed.subdomain?).to be true
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
context 'with nested subdomains' do
|
|
369
|
+
let(:parsed) { DomainExtractor.parse('api.staging.example.com') }
|
|
370
|
+
|
|
371
|
+
it 'returns nested subdomain' do
|
|
372
|
+
expect(parsed.subdomain).to eq('api.staging')
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
it 'subdomain? returns true' do
|
|
376
|
+
expect(parsed.subdomain?).to be true
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
it 'subdomain! returns the value' do
|
|
380
|
+
expect(parsed.subdomain!).to eq('api.staging')
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
context 'with empty path' do
|
|
385
|
+
let(:parsed) { DomainExtractor.parse('https://example.com') }
|
|
386
|
+
|
|
387
|
+
it 'returns empty string for path' do
|
|
388
|
+
expect(parsed.path).to eq('')
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
it 'path? returns false for empty path' do
|
|
392
|
+
expect(parsed.path?).to be false
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
context 'with query params' do
|
|
397
|
+
let(:parsed) { DomainExtractor.parse('https://example.com?foo=bar&baz=qux') }
|
|
398
|
+
|
|
399
|
+
it 'returns query_params hash' do
|
|
400
|
+
expect(parsed.query_params).to eq({ 'foo' => 'bar', 'baz' => 'qux' })
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
it 'query_params? returns true' do
|
|
404
|
+
expect(parsed.query_params?).to be true
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
it 'query_params! returns the hash' do
|
|
408
|
+
expect(parsed.query_params!).to eq({ 'foo' => 'bar', 'baz' => 'qux' })
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
context 'with empty query params' do
|
|
413
|
+
let(:parsed) { DomainExtractor.parse('https://example.com') }
|
|
414
|
+
|
|
415
|
+
it 'returns empty hash for query_params' do
|
|
416
|
+
expect(parsed.query_params).to eq({})
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
it 'query_params? returns false for empty hash' do
|
|
420
|
+
expect(parsed.query_params?).to be false
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
describe '#respond_to_missing?' do
|
|
426
|
+
let(:parsed) { DomainExtractor.parse('https://api.example.com') }
|
|
427
|
+
|
|
428
|
+
it 'responds to valid accessor methods' do
|
|
429
|
+
expect(parsed).to respond_to(:host)
|
|
430
|
+
expect(parsed).to respond_to(:domain)
|
|
431
|
+
expect(parsed).to respond_to(:subdomain)
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
it 'responds to bang methods' do
|
|
435
|
+
expect(parsed).to respond_to(:host!)
|
|
436
|
+
expect(parsed).to respond_to(:domain!)
|
|
437
|
+
expect(parsed).to respond_to(:subdomain!)
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
it 'responds to question mark methods' do
|
|
441
|
+
expect(parsed).to respond_to(:host?)
|
|
442
|
+
expect(parsed).to respond_to(:domain?)
|
|
443
|
+
expect(parsed).to respond_to(:subdomain?)
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
it 'responds to www_subdomain?' do
|
|
447
|
+
expect(parsed).to respond_to(:www_subdomain?)
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
it 'does not respond to invalid methods' do
|
|
451
|
+
expect(parsed).not_to respond_to(:invalid_method)
|
|
452
|
+
expect(parsed).not_to respond_to(:not_a_real_method!)
|
|
453
|
+
end
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
describe '#inspect' do
|
|
457
|
+
it 'provides meaningful inspection output' do
|
|
458
|
+
parsed = DomainExtractor.parse('https://api.example.com')
|
|
459
|
+
output = parsed.inspect
|
|
460
|
+
expect(output).to include('DomainExtractor::ParsedURL')
|
|
461
|
+
expect(output).to include('subdomain')
|
|
462
|
+
expect(output).to include('api')
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: domain_extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- OpenSite AI
|
|
@@ -43,12 +43,14 @@ files:
|
|
|
43
43
|
- lib/domain_extractor.rb
|
|
44
44
|
- lib/domain_extractor/errors.rb
|
|
45
45
|
- lib/domain_extractor/normalizer.rb
|
|
46
|
+
- lib/domain_extractor/parsed_url.rb
|
|
46
47
|
- lib/domain_extractor/parser.rb
|
|
47
48
|
- lib/domain_extractor/query_params.rb
|
|
48
49
|
- lib/domain_extractor/result.rb
|
|
49
50
|
- lib/domain_extractor/validators.rb
|
|
50
51
|
- lib/domain_extractor/version.rb
|
|
51
52
|
- spec/domain_extractor_spec.rb
|
|
53
|
+
- spec/parsed_url_spec.rb
|
|
52
54
|
- spec/spec_helper.rb
|
|
53
55
|
homepage: https://github.com/opensite-ai/domain_extractor
|
|
54
56
|
licenses:
|