domain_extractor 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 917b77910cd8c96304a71f1bfe4609ab9ec2a75e15eada0481cf4a1019a4d90f
4
- data.tar.gz: 8a46bc97ff626af7fc835ab07c4c1efb29714e3e50a16ab7f928e33bd9ef1f32
3
+ metadata.gz: ada4e5f18e79e144f7fc82e53c4f9cffc5b750f32ac579e5533ad50a3c4ee07b
4
+ data.tar.gz: 7213bcce37c9956ff164411433e929ad7ae8f0953e3101a2bd93d7f761ab37dc
5
5
  SHA512:
6
- metadata.gz: fdd1aca915f4a991c0dd6d1ad3cb8e1f0d2f831fa54f0c7424180cbd23da9b6cc2aa6302ca396b630f0ed70231c59a588f52bc458f4b4917abb9daf8dd8b921d
7
- data.tar.gz: 7ea38ed35b6eadc2d81e8b827ef1c6d938090f177983e49f5a1347cdfc0700daa58458ecccf0c292f924b77f354a571aa4436923b307233ca2d0d0fec09454e7
6
+ metadata.gz: 9eb86f40c167428966581ee0446db8ef03994680904c7712f22b75e4117304bce59928ef16ba29143da6ad2046011f0cc5f428a54405922cfeab21e06ea9a06f
7
+ data.tar.gz: cae58aa85a024e3e10a236041476775e887896d1bb96af35a6e6a14f5cb97f2f2caefff7a2804c02ec9885fbcb4e92e48cdef2bbc2e9bfcfecd7a6b58e47e6ad
data/CHANGELOG.md CHANGED
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.1.7] - 2025-10-31
11
+
12
+ ### Added valid? method and enhanced error handling
13
+
14
+ - Added `DomainExtractor.valid?` helper to allow safe URL pre-checks without raising.
15
+ - `DomainExtractor.parse` now raises `DomainExtractor::InvalidURLError` with a clear `"Invalid URL Value"` message when the input cannot be parsed.
16
+
10
17
  ## [0.1.6] - 2025-10-31
11
18
 
12
19
  ### Integrate Rakefile for Release and Task Workflow Refactors
data/README.md CHANGED
@@ -52,6 +52,14 @@ result[:domain] # => 'example'
52
52
  result[:tld] # => 'co.uk'
53
53
  result[:root_domain] # => 'example.co.uk'
54
54
  result[:host] # => 'www.example.co.uk'
55
+
56
+ # Guard a parse with the validity helper
57
+ url = 'https://www.example.co.uk/path?query=value'
58
+ if DomainExtractor.valid?(url)
59
+ DomainExtractor.parse(url)
60
+ else
61
+ # handle invalid input
62
+ end
55
63
  ```
56
64
 
57
65
  ## Usage Examples
@@ -105,13 +113,25 @@ urls = ['https://example.com', 'https://blog.example.org']
105
113
  results = DomainExtractor.parse_batch(urls)
106
114
  ```
107
115
 
116
+ ### Validation and Error Handling
117
+
118
+ ```ruby
119
+ DomainExtractor.valid?('https://www.example.com') # => true
120
+
121
+ # DomainExtractor.parse raises DomainExtractor::InvalidURLError on invalid input
122
+ DomainExtractor.parse('not-a-url')
123
+ # => raises DomainExtractor::InvalidURLError (message: "Invalid URL Value")
124
+ ```
125
+
108
126
  ## API Reference
109
127
 
110
128
  ### `DomainExtractor.parse(url_string)`
111
129
 
112
130
  Parses a URL string and extracts domain components.
113
131
 
114
- **Returns:** Hash with keys `:subdomain`, `:domain`, `:tld`, `:root_domain`, `:host`, `:path` or `nil`
132
+ **Returns:** Hash with keys `:subdomain`, `:domain`, `:tld`, `:root_domain`, `:host`, `:path`
133
+
134
+ **Raises:** `DomainExtractor::InvalidURLError` when the URL fails validation
115
135
 
116
136
  ### `DomainExtractor.parse_batch(urls)`
117
137
 
@@ -119,6 +139,12 @@ Parses multiple URLs efficiently.
119
139
 
120
140
  **Returns:** Array of parsed results
121
141
 
142
+ ### `DomainExtractor.valid?(url_string)`
143
+
144
+ Checks if a URL can be parsed successfully without raising.
145
+
146
+ **Returns:** `true` or `false`
147
+
122
148
  ### `DomainExtractor.parse_query_params(query_string)`
123
149
 
124
150
  Parses a query string into a hash of parameters.
@@ -146,8 +172,9 @@ track_event('page_view', source_domain: parsed[:root_domain]) if parsed
146
172
 
147
173
  ```ruby
148
174
  def internal_link?(url, base_domain)
149
- parsed = DomainExtractor.parse(url)
150
- parsed && parsed[:root_domain] == base_domain
175
+ return false unless DomainExtractor.valid?(url)
176
+
177
+ DomainExtractor.parse(url)[:root_domain] == base_domain
151
178
  end
152
179
  ```
153
180
 
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DomainExtractor
4
+ class InvalidURLError < StandardError
5
+ DEFAULT_MESSAGE = 'Invalid URL Value'
6
+
7
+ def initialize(message = DEFAULT_MESSAGE)
8
+ super
9
+ end
10
+ end
11
+ end
@@ -13,18 +13,21 @@ module DomainExtractor
13
13
  module_function
14
14
 
15
15
  def call(raw_url)
16
- uri = build_uri(raw_url)
17
- return unless uri
18
-
19
- host = uri.host&.downcase
20
- return if invalid_host?(host)
16
+ components = extract_components(raw_url)
17
+ return unless components
21
18
 
22
- domain = ::PublicSuffix.parse(host)
19
+ uri, domain, host = components
23
20
  build_result(domain: domain, host: host, uri: uri)
24
21
  rescue ::URI::InvalidURIError, ::PublicSuffix::Error
25
22
  nil
26
23
  end
27
24
 
25
+ def valid?(raw_url)
26
+ !!extract_components(raw_url)
27
+ rescue ::URI::InvalidURIError, ::PublicSuffix::Error
28
+ false
29
+ end
30
+
28
31
  def build_uri(raw_url)
29
32
  normalized = Normalizer.call(raw_url)
30
33
  return unless normalized
@@ -38,6 +41,18 @@ module DomainExtractor
38
41
  end
39
42
  private_class_method :invalid_host?
40
43
 
44
+ def extract_components(raw_url)
45
+ uri = build_uri(raw_url)
46
+ return unless uri
47
+
48
+ host = uri.host&.downcase
49
+ return if invalid_host?(host)
50
+
51
+ domain = ::PublicSuffix.parse(host)
52
+ [uri, domain, host]
53
+ end
54
+ private_class_method :extract_components
55
+
41
56
  def build_result(domain:, host:, uri:)
42
57
  Result.build(
43
58
  subdomain: domain.trd,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DomainExtractor
4
- VERSION = '0.1.6'
4
+ VERSION = '0.1.7'
5
5
  end
@@ -4,6 +4,7 @@ require 'uri'
4
4
  require 'public_suffix'
5
5
 
6
6
  require_relative 'domain_extractor/version'
7
+ require_relative 'domain_extractor/errors'
7
8
  require_relative 'domain_extractor/parser'
8
9
  require_relative 'domain_extractor/query_params'
9
10
 
@@ -12,10 +13,18 @@ require_relative 'domain_extractor/query_params'
12
13
  module DomainExtractor
13
14
  class << self
14
15
  # Parse an individual URL and extract domain attributes.
16
+ # Raises DomainExtractor::InvalidURLError when the URL fails validation.
15
17
  # @param url [String, #to_s]
16
- # @return [Hash, nil]
18
+ # @return [Hash]
17
19
  def parse(url)
18
- Parser.call(url)
20
+ Parser.call(url) || raise(InvalidURLError)
21
+ end
22
+
23
+ # Determine if a URL is considered valid by the parser.
24
+ # @param url [String, #to_s]
25
+ # @return [Boolean]
26
+ def valid?(url)
27
+ Parser.valid?(url)
19
28
  end
20
29
 
21
30
  # Parse many URLs and return their individual parse results.
@@ -24,7 +33,7 @@ module DomainExtractor
24
33
  def parse_batch(urls)
25
34
  return [] unless urls.respond_to?(:map)
26
35
 
27
- urls.map { |url| parse(url) }
36
+ urls.map { |url| Parser.call(url) }
28
37
  end
29
38
 
30
39
  # Convert a query string into a Hash representation.
@@ -142,32 +142,70 @@ RSpec.describe DomainExtractor do
142
142
  end
143
143
 
144
144
  context 'with invalid URLs' do
145
- it 'returns nil for malformed URLs' do
146
- expect(described_class.parse('http://')).to be_nil
145
+ it 'raises InvalidURLError for malformed URLs' do
146
+ expect { described_class.parse('http://') }.to raise_error(
147
+ DomainExtractor::InvalidURLError,
148
+ 'Invalid URL Value'
149
+ )
147
150
  end
148
151
 
149
- it 'returns nil for invalid domains' do
150
- expect(described_class.parse('not_a_url')).to be_nil
152
+ it 'raises InvalidURLError for invalid domains' do
153
+ expect { described_class.parse('not_a_url') }.to raise_error(
154
+ DomainExtractor::InvalidURLError,
155
+ 'Invalid URL Value'
156
+ )
151
157
  end
152
158
 
153
- it 'returns nil for IP addresses' do
154
- expect(described_class.parse('192.168.1.1')).to be_nil
159
+ it 'raises InvalidURLError for IP addresses' do
160
+ expect { described_class.parse('192.168.1.1') }.to raise_error(
161
+ DomainExtractor::InvalidURLError,
162
+ 'Invalid URL Value'
163
+ )
155
164
  end
156
165
 
157
- it 'returns nil for IPv6 addresses' do
158
- expect(described_class.parse('[2001:db8::1]')).to be_nil
166
+ it 'raises InvalidURLError for IPv6 addresses' do
167
+ expect { described_class.parse('[2001:db8::1]') }.to raise_error(
168
+ DomainExtractor::InvalidURLError,
169
+ 'Invalid URL Value'
170
+ )
159
171
  end
160
172
 
161
- it 'returns nil for empty string' do
162
- expect(described_class.parse('')).to be_nil
173
+ it 'raises InvalidURLError for empty string' do
174
+ expect { described_class.parse('') }.to raise_error(DomainExtractor::InvalidURLError, 'Invalid URL Value')
163
175
  end
164
176
 
165
- it 'returns nil for nil' do
166
- expect(described_class.parse(nil)).to be_nil
177
+ it 'raises InvalidURLError for nil' do
178
+ expect { described_class.parse(nil) }.to raise_error(DomainExtractor::InvalidURLError, 'Invalid URL Value')
167
179
  end
168
180
  end
169
181
  end
170
182
 
183
+ describe '.valid?' do
184
+ it 'returns true for a normalized domain' do
185
+ expect(described_class.valid?('dashtrack.com')).to be(true)
186
+ end
187
+
188
+ it 'returns true for a full URL with subdomain and query' do
189
+ expect(described_class.valid?('https://www.example.co.uk/path?query=value')).to be(true)
190
+ end
191
+
192
+ it 'returns false for malformed URLs' do
193
+ expect(described_class.valid?('http://')).to be(false)
194
+ end
195
+
196
+ it 'returns false for invalid domains' do
197
+ expect(described_class.valid?('not_a_url')).to be(false)
198
+ end
199
+
200
+ it 'returns false for IP addresses' do
201
+ expect(described_class.valid?('192.168.1.1')).to be(false)
202
+ end
203
+
204
+ it 'returns false for nil values' do
205
+ expect(described_class.valid?(nil)).to be(false)
206
+ end
207
+ end
208
+
171
209
  describe '.parse_query_params' do
172
210
  it 'converts simple query string to hash' do
173
211
  result = described_class.parse_query_params('foo=bar')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: domain_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenSite AI
@@ -41,6 +41,7 @@ files:
41
41
  - LICENSE.txt
42
42
  - README.md
43
43
  - lib/domain_extractor.rb
44
+ - lib/domain_extractor/errors.rb
44
45
  - lib/domain_extractor/normalizer.rb
45
46
  - lib/domain_extractor/parser.rb
46
47
  - lib/domain_extractor/query_params.rb