domain_extractor 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/README.md +175 -0
- data/lib/domain_extractor/parsed_url.rb +131 -0
- data/lib/domain_extractor/parser.rb +3 -2
- data/lib/domain_extractor/result.rb +5 -1
- data/lib/domain_extractor/version.rb +1 -1
- data/lib/domain_extractor.rb +13 -4
- data/spec/domain_extractor_spec.rb +1 -1
- data/spec/parsed_url_spec.rb +422 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4bc4d6ad831692d1251048f8b21820bb0efb10ed5b3cce641441b31afb5308b4
|
|
4
|
+
data.tar.gz: 67a96b33dc3544847af271c8bd837dbc592031bff5dac126022a147c2281460c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 02bca764446a3391461695cfeeaef9c6e7920308bc78768b062ae676005d3610b09733133cb10c34cd5e29dc35169f770b4789f418fd554cba762a6d5a19022a
|
|
7
|
+
data.tar.gz: eeaaa8356b306feba33e08e54c8da2926f7e052ebac5d6920f0a6f26c0dacd3bfbb0d4f863fa694377d87441564a2c1eecff764d756ce4efde0569fabf573ee2
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.8] - 2025-10-31
|
|
11
|
+
|
|
12
|
+
### Implemented Declarative Method-style Accessors
|
|
13
|
+
|
|
14
|
+
#### Added
|
|
15
|
+
|
|
16
|
+
- **ParsedURL API**: Introduced intuitive method-style accessors with three variants:
|
|
17
|
+
- Default methods (e.g., `result.subdomain`) - Returns value or nil
|
|
18
|
+
- Bang methods (e.g., `result.subdomain!`) - Returns value or raises `InvalidURLError`
|
|
19
|
+
- Question methods (e.g., `result.subdomain?`) - Returns boolean true/false
|
|
20
|
+
- Added `www_subdomain?` helper method to check if subdomain is specifically 'www'
|
|
21
|
+
- Added `valid?` method to check if parsed result contains valid data
|
|
22
|
+
- Added `to_h` and `to_hash` methods for hash conversion
|
|
23
|
+
- Comprehensive documentation in `docs/PARSED_URL_API.md`
|
|
24
|
+
|
|
25
|
+
#### Changed
|
|
26
|
+
|
|
27
|
+
- `DomainExtractor.parse` now returns `ParsedURL` object instead of plain Hash (backward compatible via `[]` accessor)
|
|
28
|
+
- `DomainExtractor.parse_batch` now returns array of `ParsedURL` objects (or nil for invalid URLs)
|
|
29
|
+
|
|
30
|
+
#### Maintained
|
|
31
|
+
|
|
32
|
+
- Full backward compatibility with hash-style access using `[]`
|
|
33
|
+
- All existing tests continue to pass
|
|
34
|
+
- No breaking changes to existing API
|
|
35
|
+
|
|
10
36
|
## [0.1.7] - 2025-10-31
|
|
11
37
|
|
|
12
38
|
### Added valid? method and enhanced error handling
|
data/README.md
CHANGED
|
@@ -60,8 +60,183 @@ if DomainExtractor.valid?(url)
|
|
|
60
60
|
else
|
|
61
61
|
# handle invalid input
|
|
62
62
|
end
|
|
63
|
+
|
|
64
|
+
# New intuitive method-style access
|
|
65
|
+
result.subdomain # => 'www'
|
|
66
|
+
result.domain # => 'example'
|
|
67
|
+
result.host # => 'www.example.co.uk'
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## ParsedURL API - Intuitive Method Access
|
|
71
|
+
|
|
72
|
+
DomainExtractor now returns a `ParsedURL` object that supports three accessor styles, making your intent clear and your code more robust:
|
|
73
|
+
|
|
74
|
+
### Method Accessor Styles
|
|
75
|
+
|
|
76
|
+
#### 1. Default Methods (Silent Nil)
|
|
77
|
+
Returns the value or `nil` - perfect for exploratory code or when handling invalid data gracefully.
|
|
78
|
+
|
|
79
|
+
```ruby
|
|
80
|
+
result = DomainExtractor.parse('https://api.example.com')
|
|
81
|
+
result.subdomain # => 'api'
|
|
82
|
+
result.domain # => 'example'
|
|
83
|
+
result.host # => 'api.example.com'
|
|
84
|
+
|
|
85
|
+
# Without subdomain
|
|
86
|
+
result = DomainExtractor.parse('https://example.com')
|
|
87
|
+
result.subdomain # => nil (no error)
|
|
88
|
+
result.domain # => 'example'
|
|
63
89
|
```
|
|
64
90
|
|
|
91
|
+
#### 2. Bang Methods (!) - Explicit Errors
|
|
92
|
+
Returns the value or raises `InvalidURLError` - ideal for production code where missing data should fail fast.
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
result = DomainExtractor.parse('https://example.com')
|
|
96
|
+
result.domain! # => 'example'
|
|
97
|
+
result.subdomain! # raises InvalidURLError: "subdomain not found or invalid"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
#### 3. Question Methods (?) - Boolean Checks
|
|
101
|
+
Always returns `true` or `false` - perfect for conditional logic without exceptions.
|
|
102
|
+
|
|
103
|
+
```ruby
|
|
104
|
+
DomainExtractor.parse('https://dashtrack.com').subdomain? # => false
|
|
105
|
+
DomainExtractor.parse('https://api.dashtrack.com').subdomain? # => true
|
|
106
|
+
DomainExtractor.parse('https://www.dashtrack.com').www_subdomain? # => true
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Quick Examples
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
url = 'https://api.staging.example.com/path'
|
|
113
|
+
parsed = DomainExtractor.parse(url)
|
|
114
|
+
|
|
115
|
+
# Method-style access
|
|
116
|
+
parsed.host # => 'api.staging.example.com'
|
|
117
|
+
parsed.subdomain # => 'api.staging'
|
|
118
|
+
parsed.domain # => 'example'
|
|
119
|
+
parsed.root_domain # => 'example.com'
|
|
120
|
+
parsed.tld # => 'com'
|
|
121
|
+
parsed.path # => '/path'
|
|
122
|
+
|
|
123
|
+
# Question methods for conditionals
|
|
124
|
+
if parsed.subdomain?
|
|
125
|
+
puts "Has subdomain: #{parsed.subdomain}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Bang methods when values are required
|
|
129
|
+
begin
|
|
130
|
+
subdomain = parsed.subdomain! # Safe - has subdomain
|
|
131
|
+
domain = parsed.domain! # Safe - has domain
|
|
132
|
+
rescue DomainExtractor::InvalidURLError => e
|
|
133
|
+
puts "Missing required component: #{e.message}"
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Hash-style access still works (backward compatible)
|
|
137
|
+
parsed[:subdomain] # => 'api.staging'
|
|
138
|
+
parsed[:host] # => 'api.staging.example.com'
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Additional Examples
|
|
142
|
+
|
|
143
|
+
#### Boolean Checks with Question Methods
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
# Check for subdomain presence
|
|
147
|
+
DomainExtractor.parse('https://dashtrack.com').subdomain? # => false
|
|
148
|
+
DomainExtractor.parse('https://api.dashtrack.com').subdomain? # => true
|
|
149
|
+
|
|
150
|
+
# Check for www subdomain specifically
|
|
151
|
+
DomainExtractor.parse('https://www.dashtrack.com').www_subdomain? # => true
|
|
152
|
+
DomainExtractor.parse('https://api.dashtrack.com').www_subdomain? # => false
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
#### Safe Batch Processing
|
|
156
|
+
|
|
157
|
+
```ruby
|
|
158
|
+
urls = [
|
|
159
|
+
'https://api.example.com',
|
|
160
|
+
'https://example.com',
|
|
161
|
+
'https://www.example.com'
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
urls.each do |url|
|
|
165
|
+
result = DomainExtractor.parse(url)
|
|
166
|
+
|
|
167
|
+
info = {
|
|
168
|
+
url: url,
|
|
169
|
+
has_subdomain: result.subdomain?,
|
|
170
|
+
is_www: result.www_subdomain?,
|
|
171
|
+
host: result.host
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
puts "#{info[:url]} - subdomain: #{info[:has_subdomain]}, www: #{info[:is_www]}"
|
|
175
|
+
end
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
#### Production URL Validation
|
|
179
|
+
|
|
180
|
+
```ruby
|
|
181
|
+
def validate_api_url(url)
|
|
182
|
+
result = DomainExtractor.parse(url)
|
|
183
|
+
|
|
184
|
+
# Ensure all required components exist
|
|
185
|
+
result.subdomain! # Must have subdomain
|
|
186
|
+
result.domain! # Must have domain
|
|
187
|
+
|
|
188
|
+
# Additional validation
|
|
189
|
+
return false unless result.subdomain.start_with?('api')
|
|
190
|
+
|
|
191
|
+
true
|
|
192
|
+
rescue DomainExtractor::InvalidURLError => e
|
|
193
|
+
puts "Validation failed: #{e.message}"
|
|
194
|
+
false
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
validate_api_url('https://api.example.com/endpoint') # => true
|
|
198
|
+
validate_api_url('https://example.com/endpoint') # => false (no subdomain)
|
|
199
|
+
validate_api_url('https://www.example.com/endpoint') # => false (not api subdomain)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
#### Guard Clauses with Question Methods
|
|
203
|
+
|
|
204
|
+
```ruby
|
|
205
|
+
def process_url(url)
|
|
206
|
+
result = DomainExtractor.parse(url)
|
|
207
|
+
|
|
208
|
+
return 'Invalid URL' unless result.valid?
|
|
209
|
+
return 'No subdomain present' unless result.subdomain?
|
|
210
|
+
return 'WWW redirect needed' if result.www_subdomain?
|
|
211
|
+
|
|
212
|
+
"Processing subdomain: #{result.subdomain}"
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
process_url('https://api.example.com') # => "Processing subdomain: api"
|
|
216
|
+
process_url('https://www.example.com') # => "WWW redirect needed"
|
|
217
|
+
process_url('https://example.com') # => "No subdomain present"
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
#### Converting to Hash
|
|
221
|
+
|
|
222
|
+
```ruby
|
|
223
|
+
url = 'https://api.example.com/path'
|
|
224
|
+
result = DomainExtractor.parse(url)
|
|
225
|
+
|
|
226
|
+
hash = result.to_h
|
|
227
|
+
# => {
|
|
228
|
+
# subdomain: "api",
|
|
229
|
+
# domain: "example",
|
|
230
|
+
# tld: "com",
|
|
231
|
+
# root_domain: "example.com",
|
|
232
|
+
# host: "api.example.com",
|
|
233
|
+
# path: "/path",
|
|
234
|
+
# query_params: {}
|
|
235
|
+
# }
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
**See [docs/PARSED_URL_API.md](docs/PARSED_URL_API.md) for comprehensive documentation and real-world examples.**
|
|
239
|
+
|
|
65
240
|
## Usage Examples
|
|
66
241
|
|
|
67
242
|
### Basic Domain Parsing
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DomainExtractor
|
|
4
|
+
# ParsedURL wraps the parsing result and provides convenient accessor methods
|
|
5
|
+
# with support for bang (!) and question mark (?) variants.
|
|
6
|
+
#
|
|
7
|
+
# Examples:
|
|
8
|
+
# parsed = DomainExtractor.parse('https://api.example.com')
|
|
9
|
+
# parsed.host # => 'api.example.com'
|
|
10
|
+
# parsed.subdomain # => 'api'
|
|
11
|
+
# parsed.subdomain? # => true
|
|
12
|
+
# parsed.www_subdomain? # => false
|
|
13
|
+
#
|
|
14
|
+
# parsed = DomainExtractor.parse('invalid')
|
|
15
|
+
# parsed.host # => nil
|
|
16
|
+
# parsed.host? # => false
|
|
17
|
+
# parsed.host! # raises InvalidURLError
|
|
18
|
+
class ParsedURL
|
|
19
|
+
# Expose the underlying hash for backward compatibility
|
|
20
|
+
attr_reader :result
|
|
21
|
+
|
|
22
|
+
# List of valid result keys that should have method accessors
|
|
23
|
+
RESULT_KEYS = %i[subdomain domain tld root_domain host path query_params].freeze
|
|
24
|
+
|
|
25
|
+
def initialize(result)
|
|
26
|
+
@result = result || {}
|
|
27
|
+
freeze
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Hash-style access for backward compatibility
|
|
31
|
+
# result[:subdomain], result[:host], etc.
|
|
32
|
+
def [](key)
|
|
33
|
+
@result[key]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Check if the parsed result is valid (not nil/empty)
|
|
37
|
+
def valid?
|
|
38
|
+
!@result.empty?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Special helper: check if subdomain is specifically 'www'
|
|
42
|
+
def www_subdomain?
|
|
43
|
+
@result[:subdomain] == 'www'
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Dynamically handle method calls for all result keys
|
|
47
|
+
# Supports three variants:
|
|
48
|
+
# - method_name: returns value or nil
|
|
49
|
+
# - method_name!: returns value or raises InvalidURLError
|
|
50
|
+
# - method_name?: returns boolean (true if value exists and not nil/empty)
|
|
51
|
+
def method_missing(method_name, *args, &)
|
|
52
|
+
method_str = method_name.to_s
|
|
53
|
+
|
|
54
|
+
# Handle bang methods (method_name!)
|
|
55
|
+
return handle_bang_method(method_str) if method_str.end_with?('!')
|
|
56
|
+
|
|
57
|
+
# Handle question mark methods (method_name?)
|
|
58
|
+
return handle_question_method(method_str) if method_str.end_with?('?')
|
|
59
|
+
|
|
60
|
+
# Handle regular methods (method_name)
|
|
61
|
+
key = method_name.to_sym
|
|
62
|
+
return @result[key] if RESULT_KEYS.include?(key)
|
|
63
|
+
|
|
64
|
+
super
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
68
|
+
method_str = method_name.to_s
|
|
69
|
+
|
|
70
|
+
# Check for www_subdomain? special case
|
|
71
|
+
return true if method_name == :www_subdomain?
|
|
72
|
+
|
|
73
|
+
# Check if it's a bang or question mark variant
|
|
74
|
+
if method_str.end_with?('!') || method_str.end_with?('?')
|
|
75
|
+
key = method_str[0...-1].to_sym
|
|
76
|
+
return true if RESULT_KEYS.include?(key)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if it's a regular method
|
|
80
|
+
return true if RESULT_KEYS.include?(method_name.to_sym)
|
|
81
|
+
|
|
82
|
+
super
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Provide hash-like inspection
|
|
86
|
+
def inspect
|
|
87
|
+
"#<DomainExtractor::ParsedURL #{@result.inspect}>"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def to_s
|
|
91
|
+
@result.to_s
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Allow to_h conversion for hash compatibility
|
|
95
|
+
def to_h
|
|
96
|
+
@result.dup
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Allow to_hash as well for better Ruby compatibility
|
|
100
|
+
alias to_hash to_h
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
# Handle bang methods that raise errors for missing values
|
|
105
|
+
def handle_bang_method(method_str)
|
|
106
|
+
key = method_str[0...-1].to_sym
|
|
107
|
+
return unless RESULT_KEYS.include?(key)
|
|
108
|
+
|
|
109
|
+
value = @result[key]
|
|
110
|
+
return value if value_present?(value)
|
|
111
|
+
|
|
112
|
+
raise InvalidURLError, "#{key} not found or invalid"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Handle question mark methods that return booleans
|
|
116
|
+
def handle_question_method(method_str)
|
|
117
|
+
key = method_str[0...-1].to_sym
|
|
118
|
+
return unless RESULT_KEYS.include?(key)
|
|
119
|
+
|
|
120
|
+
value_present?(@result[key])
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Check if a value is present (not nil and not empty for strings/hashes)
|
|
124
|
+
def value_present?(value)
|
|
125
|
+
return false if value.nil?
|
|
126
|
+
return !value.empty? if value.respond_to?(:empty?)
|
|
127
|
+
|
|
128
|
+
true
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -6,6 +6,7 @@ require 'public_suffix'
|
|
|
6
6
|
require_relative 'normalizer'
|
|
7
7
|
require_relative 'result'
|
|
8
8
|
require_relative 'validators'
|
|
9
|
+
require_relative 'parsed_url'
|
|
9
10
|
|
|
10
11
|
module DomainExtractor
|
|
11
12
|
# Parser orchestrates the pipeline for url normalization, validation, and domain extraction.
|
|
@@ -14,12 +15,12 @@ module DomainExtractor
|
|
|
14
15
|
|
|
15
16
|
def call(raw_url)
|
|
16
17
|
components = extract_components(raw_url)
|
|
17
|
-
return unless components
|
|
18
|
+
return ParsedURL.new(nil) unless components
|
|
18
19
|
|
|
19
20
|
uri, domain, host = components
|
|
20
21
|
build_result(domain: domain, host: host, uri: uri)
|
|
21
22
|
rescue ::URI::InvalidURIError, ::PublicSuffix::Error
|
|
22
|
-
nil
|
|
23
|
+
ParsedURL.new(nil)
|
|
23
24
|
end
|
|
24
25
|
|
|
25
26
|
def valid?(raw_url)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'parsed_url'
|
|
4
|
+
|
|
3
5
|
module DomainExtractor
|
|
4
6
|
# Result encapsulates the final parsed attributes and exposes a hash interface.
|
|
5
7
|
module Result
|
|
@@ -10,7 +12,7 @@ module DomainExtractor
|
|
|
10
12
|
module_function
|
|
11
13
|
|
|
12
14
|
def build(**attributes)
|
|
13
|
-
{
|
|
15
|
+
hash = {
|
|
14
16
|
subdomain: normalize_subdomain(attributes[:subdomain]),
|
|
15
17
|
root_domain: attributes[:root_domain],
|
|
16
18
|
domain: attributes[:domain],
|
|
@@ -19,6 +21,8 @@ module DomainExtractor
|
|
|
19
21
|
path: attributes[:path] || EMPTY_PATH,
|
|
20
22
|
query_params: QueryParams.call(attributes[:query])
|
|
21
23
|
}.freeze
|
|
24
|
+
|
|
25
|
+
ParsedURL.new(hash)
|
|
22
26
|
end
|
|
23
27
|
|
|
24
28
|
def normalize_subdomain(value)
|
data/lib/domain_extractor.rb
CHANGED
|
@@ -5,6 +5,7 @@ require 'public_suffix'
|
|
|
5
5
|
|
|
6
6
|
require_relative 'domain_extractor/version'
|
|
7
7
|
require_relative 'domain_extractor/errors'
|
|
8
|
+
require_relative 'domain_extractor/parsed_url'
|
|
8
9
|
require_relative 'domain_extractor/parser'
|
|
9
10
|
require_relative 'domain_extractor/query_params'
|
|
10
11
|
|
|
@@ -13,11 +14,15 @@ require_relative 'domain_extractor/query_params'
|
|
|
13
14
|
module DomainExtractor
|
|
14
15
|
class << self
|
|
15
16
|
# Parse an individual URL and extract domain attributes.
|
|
17
|
+
# Returns a ParsedURL object that supports hash-style access and method calls.
|
|
16
18
|
# Raises DomainExtractor::InvalidURLError when the URL fails validation.
|
|
17
19
|
# @param url [String, #to_s]
|
|
18
|
-
# @return [
|
|
20
|
+
# @return [ParsedURL]
|
|
19
21
|
def parse(url)
|
|
20
|
-
Parser.call(url)
|
|
22
|
+
result = Parser.call(url)
|
|
23
|
+
raise InvalidURLError unless result.valid?
|
|
24
|
+
|
|
25
|
+
result
|
|
21
26
|
end
|
|
22
27
|
|
|
23
28
|
# Determine if a URL is considered valid by the parser.
|
|
@@ -28,12 +33,16 @@ module DomainExtractor
|
|
|
28
33
|
end
|
|
29
34
|
|
|
30
35
|
# Parse many URLs and return their individual parse results.
|
|
36
|
+
# Returns nil for invalid URLs to maintain backward compatibility.
|
|
31
37
|
# @param urls [Enumerable<String>]
|
|
32
|
-
# @return [Array<
|
|
38
|
+
# @return [Array<ParsedURL, nil>]
|
|
33
39
|
def parse_batch(urls)
|
|
34
40
|
return [] unless urls.respond_to?(:map)
|
|
35
41
|
|
|
36
|
-
urls.map
|
|
42
|
+
urls.map do |url|
|
|
43
|
+
result = Parser.call(url)
|
|
44
|
+
result.valid? ? result : nil
|
|
45
|
+
end
|
|
37
46
|
end
|
|
38
47
|
|
|
39
48
|
# Convert a query string into a Hash representation.
|
|
@@ -300,7 +300,7 @@ RSpec.describe DomainExtractor do
|
|
|
300
300
|
|
|
301
301
|
results = described_class.parse_batch(urls)
|
|
302
302
|
|
|
303
|
-
expect(results).to all(be_a(
|
|
303
|
+
expect(results).to all(be_a(DomainExtractor::ParsedURL))
|
|
304
304
|
expect(results.map { |result| result[:root_domain] }).to all(eq('example.com'))
|
|
305
305
|
end
|
|
306
306
|
|
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe DomainExtractor::ParsedURL do
|
|
6
|
+
describe 'method accessor styles' do
|
|
7
|
+
context 'with a valid URL with subdomain' do
|
|
8
|
+
let(:parsed) { DomainExtractor.parse('https://api.dashtrack.com/path?query=value') }
|
|
9
|
+
|
|
10
|
+
describe 'default accessor methods' do
|
|
11
|
+
it 'returns subdomain' do
|
|
12
|
+
expect(parsed.subdomain).to eq('api')
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'returns domain' do
|
|
16
|
+
expect(parsed.domain).to eq('dashtrack')
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'returns tld' do
|
|
20
|
+
expect(parsed.tld).to eq('com')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'returns root_domain' do
|
|
24
|
+
expect(parsed.root_domain).to eq('dashtrack.com')
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it 'returns host' do
|
|
28
|
+
expect(parsed.host).to eq('api.dashtrack.com')
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it 'returns path' do
|
|
32
|
+
expect(parsed.path).to eq('/path')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'returns query_params' do
|
|
36
|
+
expect(parsed.query_params).to eq({ 'query' => 'value' })
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe 'bang (!) accessor methods' do
|
|
41
|
+
it 'returns subdomain!' do
|
|
42
|
+
expect(parsed.subdomain!).to eq('api')
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it 'returns domain!' do
|
|
46
|
+
expect(parsed.domain!).to eq('dashtrack')
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it 'returns tld!' do
|
|
50
|
+
expect(parsed.tld!).to eq('com')
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it 'returns root_domain!' do
|
|
54
|
+
expect(parsed.root_domain!).to eq('dashtrack.com')
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it 'returns host!' do
|
|
58
|
+
expect(parsed.host!).to eq('api.dashtrack.com')
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe 'question mark (?) accessor methods' do
|
|
63
|
+
it 'returns true for subdomain?' do
|
|
64
|
+
expect(parsed.subdomain?).to be true
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it 'returns true for domain?' do
|
|
68
|
+
expect(parsed.domain?).to be true
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it 'returns true for tld?' do
|
|
72
|
+
expect(parsed.tld?).to be true
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it 'returns true for root_domain?' do
|
|
76
|
+
expect(parsed.root_domain?).to be true
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it 'returns true for host?' do
|
|
80
|
+
expect(parsed.host?).to be true
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
context 'with a valid URL without subdomain' do
|
|
86
|
+
let(:parsed) { DomainExtractor.parse('https://dashtrack.com') }
|
|
87
|
+
|
|
88
|
+
describe 'default accessor methods for nil subdomain' do
|
|
89
|
+
it 'returns nil for subdomain' do
|
|
90
|
+
expect(parsed.subdomain).to be_nil
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it 'returns domain' do
|
|
94
|
+
expect(parsed.domain).to eq('dashtrack')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it 'returns host' do
|
|
98
|
+
expect(parsed.host).to eq('dashtrack.com')
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
describe 'bang (!) accessor methods with nil subdomain' do
|
|
103
|
+
it 'raises InvalidURLError for subdomain!' do
|
|
104
|
+
expect { parsed.subdomain! }.to raise_error(
|
|
105
|
+
DomainExtractor::InvalidURLError,
|
|
106
|
+
'subdomain not found or invalid'
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it 'returns domain!' do
|
|
111
|
+
expect(parsed.domain!).to eq('dashtrack')
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
describe 'question mark (?) accessor methods with nil subdomain' do
|
|
116
|
+
it 'returns false for subdomain?' do
|
|
117
|
+
expect(parsed.subdomain?).to be false
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it 'returns true for domain?' do
|
|
121
|
+
expect(parsed.domain?).to be true
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'returns true for host?' do
|
|
125
|
+
expect(parsed.host?).to be true
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
context 'with invalid URL' do
|
|
131
|
+
let(:parsed) { DomainExtractor::ParsedURL.new(nil) }
|
|
132
|
+
|
|
133
|
+
describe 'default accessor methods' do
|
|
134
|
+
it 'returns nil for subdomain' do
|
|
135
|
+
expect(parsed.subdomain).to be_nil
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
it 'returns nil for domain' do
|
|
139
|
+
expect(parsed.domain).to be_nil
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it 'returns nil for host' do
|
|
143
|
+
expect(parsed.host).to be_nil
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
it 'returns nil for root_domain' do
|
|
147
|
+
expect(parsed.root_domain).to be_nil
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
describe 'bang (!) accessor methods' do
|
|
152
|
+
it 'raises InvalidURLError for host!' do
|
|
153
|
+
expect { parsed.host! }.to raise_error(
|
|
154
|
+
DomainExtractor::InvalidURLError,
|
|
155
|
+
'host not found or invalid'
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
it 'raises InvalidURLError for domain!' do
|
|
160
|
+
expect { parsed.domain! }.to raise_error(
|
|
161
|
+
DomainExtractor::InvalidURLError,
|
|
162
|
+
'domain not found or invalid'
|
|
163
|
+
)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it 'raises InvalidURLError for subdomain!' do
|
|
167
|
+
expect { parsed.subdomain! }.to raise_error(
|
|
168
|
+
DomainExtractor::InvalidURLError,
|
|
169
|
+
'subdomain not found or invalid'
|
|
170
|
+
)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
describe 'question mark (?) accessor methods' do
|
|
175
|
+
it 'returns false for subdomain?' do
|
|
176
|
+
expect(parsed.subdomain?).to be false
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
it 'returns false for domain?' do
|
|
180
|
+
expect(parsed.domain?).to be false
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
it 'returns false for host?' do
|
|
184
|
+
expect(parsed.host?).to be false
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
it 'returns false for root_domain?' do
|
|
188
|
+
expect(parsed.root_domain?).to be false
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
describe '#www_subdomain?' do
|
|
195
|
+
it 'returns true when subdomain is www' do
|
|
196
|
+
parsed = DomainExtractor.parse('https://www.dashtrack.com')
|
|
197
|
+
expect(parsed.www_subdomain?).to be true
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
it 'returns false when subdomain is not www' do
|
|
201
|
+
parsed = DomainExtractor.parse('https://api.dashtrack.com')
|
|
202
|
+
expect(parsed.www_subdomain?).to be false
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
it 'returns false when there is no subdomain' do
|
|
206
|
+
parsed = DomainExtractor.parse('https://dashtrack.com')
|
|
207
|
+
expect(parsed.www_subdomain?).to be false
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it 'returns false for invalid URL' do
|
|
211
|
+
parsed = DomainExtractor::ParsedURL.new(nil)
|
|
212
|
+
expect(parsed.www_subdomain?).to be false
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
describe '#valid?' do
|
|
217
|
+
it 'returns true for valid URL' do
|
|
218
|
+
parsed = DomainExtractor.parse('https://dashtrack.com')
|
|
219
|
+
expect(parsed.valid?).to be true
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
it 'returns false for invalid URL' do
|
|
223
|
+
parsed = DomainExtractor::ParsedURL.new(nil)
|
|
224
|
+
expect(parsed.valid?).to be false
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
it 'returns false for empty result' do
|
|
228
|
+
parsed = DomainExtractor::ParsedURL.new({})
|
|
229
|
+
expect(parsed.valid?).to be false
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
describe 'hash-style access for backward compatibility' do
|
|
234
|
+
let(:parsed) { DomainExtractor.parse('https://www.example.co.uk/path?query=value') }
|
|
235
|
+
|
|
236
|
+
it 'supports hash-style access with []' do
|
|
237
|
+
expect(parsed[:subdomain]).to eq('www')
|
|
238
|
+
expect(parsed[:domain]).to eq('example')
|
|
239
|
+
expect(parsed[:tld]).to eq('co.uk')
|
|
240
|
+
expect(parsed[:root_domain]).to eq('example.co.uk')
|
|
241
|
+
expect(parsed[:host]).to eq('www.example.co.uk')
|
|
242
|
+
expect(parsed[:path]).to eq('/path')
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
describe '#to_h and #to_hash' do
|
|
247
|
+
let(:parsed) { DomainExtractor.parse('https://api.example.com') }
|
|
248
|
+
|
|
249
|
+
it 'converts to hash with to_h' do
|
|
250
|
+
hash = parsed.to_h
|
|
251
|
+
expect(hash).to be_a(Hash)
|
|
252
|
+
expect(hash[:subdomain]).to eq('api')
|
|
253
|
+
expect(hash[:domain]).to eq('example')
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
it 'converts to hash with to_hash' do
|
|
257
|
+
hash = parsed.to_hash
|
|
258
|
+
expect(hash).to be_a(Hash)
|
|
259
|
+
expect(hash[:subdomain]).to eq('api')
|
|
260
|
+
expect(hash[:domain]).to eq('example')
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
describe 'integration examples from requirements' do
|
|
265
|
+
it 'handles example: DomainExtractor.parse(url).host' do
|
|
266
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
267
|
+
expect(DomainExtractor.parse(url).host).to eq('www.example.co.uk')
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
it 'handles example: DomainExtractor.parse(url).domain' do
|
|
271
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
272
|
+
expect(DomainExtractor.parse(url).domain).to eq('example')
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
it 'handles example: DomainExtractor.parse(url).subdomain' do
|
|
276
|
+
url = 'https://www.example.co.uk/path?query=value'
|
|
277
|
+
expect(DomainExtractor.parse(url).subdomain).to eq('www')
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
it 'handles example: no subdomain returns false' do
|
|
281
|
+
expect(DomainExtractor.parse('https://dashtrack.com').subdomain?).to be false
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
it 'handles example: with subdomain returns true' do
|
|
285
|
+
expect(DomainExtractor.parse('https://api.dashtrack.com').subdomain?).to be true
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
it 'handles example: www_subdomain? returns true for www' do
|
|
289
|
+
expect(DomainExtractor.parse('https://www.dashtrack.com').www_subdomain?).to be true
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
it 'handles example: www_subdomain? returns false for non-www' do
|
|
293
|
+
expect(DomainExtractor.parse('https://dashtrack.com').www_subdomain?).to be false
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
it 'handles example: host returns value for valid URL' do
|
|
297
|
+
expect(DomainExtractor.parse('https://api.dashtrack.com').host).to eq('api.dashtrack.com')
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
it 'handles example: domain returns nil for invalid URL' do
|
|
301
|
+
# Parser returns ParsedURL with empty result for invalid URLs
|
|
302
|
+
# But parse() raises error, so we need to construct directly
|
|
303
|
+
parsed = DomainExtractor::ParsedURL.new(nil)
|
|
304
|
+
expect(parsed.domain).to be_nil
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
describe 'edge cases' do
|
|
309
|
+
context 'with multi-part TLD' do
|
|
310
|
+
let(:parsed) { DomainExtractor.parse('shop.example.com.au') }
|
|
311
|
+
|
|
312
|
+
it 'correctly identifies subdomain' do
|
|
313
|
+
expect(parsed.subdomain).to eq('shop')
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
it 'correctly identifies tld' do
|
|
317
|
+
expect(parsed.tld).to eq('com.au')
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
it 'subdomain? returns true' do
|
|
321
|
+
expect(parsed.subdomain?).to be true
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
context 'with nested subdomains' do
|
|
326
|
+
let(:parsed) { DomainExtractor.parse('api.staging.example.com') }
|
|
327
|
+
|
|
328
|
+
it 'returns nested subdomain' do
|
|
329
|
+
expect(parsed.subdomain).to eq('api.staging')
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
it 'subdomain? returns true' do
|
|
333
|
+
expect(parsed.subdomain?).to be true
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
it 'subdomain! returns the value' do
|
|
337
|
+
expect(parsed.subdomain!).to eq('api.staging')
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
context 'with empty path' do
|
|
342
|
+
let(:parsed) { DomainExtractor.parse('https://example.com') }
|
|
343
|
+
|
|
344
|
+
it 'returns empty string for path' do
|
|
345
|
+
expect(parsed.path).to eq('')
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
it 'path? returns false for empty path' do
|
|
349
|
+
expect(parsed.path?).to be false
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
context 'with query params' do
|
|
354
|
+
let(:parsed) { DomainExtractor.parse('https://example.com?foo=bar&baz=qux') }
|
|
355
|
+
|
|
356
|
+
it 'returns query_params hash' do
|
|
357
|
+
expect(parsed.query_params).to eq({ 'foo' => 'bar', 'baz' => 'qux' })
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
it 'query_params? returns true' do
|
|
361
|
+
expect(parsed.query_params?).to be true
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
it 'query_params! returns the hash' do
|
|
365
|
+
expect(parsed.query_params!).to eq({ 'foo' => 'bar', 'baz' => 'qux' })
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
context 'with empty query params' do
|
|
370
|
+
let(:parsed) { DomainExtractor.parse('https://example.com') }
|
|
371
|
+
|
|
372
|
+
it 'returns empty hash for query_params' do
|
|
373
|
+
expect(parsed.query_params).to eq({})
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
it 'query_params? returns false for empty hash' do
|
|
377
|
+
expect(parsed.query_params?).to be false
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
describe '#respond_to_missing?' do
|
|
383
|
+
let(:parsed) { DomainExtractor.parse('https://api.example.com') }
|
|
384
|
+
|
|
385
|
+
it 'responds to valid accessor methods' do
|
|
386
|
+
expect(parsed).to respond_to(:host)
|
|
387
|
+
expect(parsed).to respond_to(:domain)
|
|
388
|
+
expect(parsed).to respond_to(:subdomain)
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
it 'responds to bang methods' do
|
|
392
|
+
expect(parsed).to respond_to(:host!)
|
|
393
|
+
expect(parsed).to respond_to(:domain!)
|
|
394
|
+
expect(parsed).to respond_to(:subdomain!)
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
it 'responds to question mark methods' do
|
|
398
|
+
expect(parsed).to respond_to(:host?)
|
|
399
|
+
expect(parsed).to respond_to(:domain?)
|
|
400
|
+
expect(parsed).to respond_to(:subdomain?)
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
it 'responds to www_subdomain?' do
|
|
404
|
+
expect(parsed).to respond_to(:www_subdomain?)
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
it 'does not respond to invalid methods' do
|
|
408
|
+
expect(parsed).not_to respond_to(:invalid_method)
|
|
409
|
+
expect(parsed).not_to respond_to(:not_a_real_method!)
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
describe '#inspect' do
|
|
414
|
+
it 'provides meaningful inspection output' do
|
|
415
|
+
parsed = DomainExtractor.parse('https://api.example.com')
|
|
416
|
+
output = parsed.inspect
|
|
417
|
+
expect(output).to include('DomainExtractor::ParsedURL')
|
|
418
|
+
expect(output).to include('subdomain')
|
|
419
|
+
expect(output).to include('api')
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: domain_extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- OpenSite AI
|
|
@@ -43,12 +43,14 @@ files:
|
|
|
43
43
|
- lib/domain_extractor.rb
|
|
44
44
|
- lib/domain_extractor/errors.rb
|
|
45
45
|
- lib/domain_extractor/normalizer.rb
|
|
46
|
+
- lib/domain_extractor/parsed_url.rb
|
|
46
47
|
- lib/domain_extractor/parser.rb
|
|
47
48
|
- lib/domain_extractor/query_params.rb
|
|
48
49
|
- lib/domain_extractor/result.rb
|
|
49
50
|
- lib/domain_extractor/validators.rb
|
|
50
51
|
- lib/domain_extractor/version.rb
|
|
51
52
|
- spec/domain_extractor_spec.rb
|
|
53
|
+
- spec/parsed_url_spec.rb
|
|
52
54
|
- spec/spec_helper.rb
|
|
53
55
|
homepage: https://github.com/opensite-ai/domain_extractor
|
|
54
56
|
licenses:
|