RubyGems - domain_extractor - Versions diffs - 0.1.6 → 0.1.7 - Mend

domain_extractor 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/README.md +30 -3
data/lib/domain_extractor/errors.rb +11 -0
data/lib/domain_extractor/parser.rb +21 -6
data/lib/domain_extractor/version.rb +1 -1
data/lib/domain_extractor.rb +12 -3
data/spec/domain_extractor_spec.rb +50 -12
metadata +2 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 917b77910cd8c96304a71f1bfe4609ab9ec2a75e15eada0481cf4a1019a4d90f
-  data.tar.gz: 8a46bc97ff626af7fc835ab07c4c1efb29714e3e50a16ab7f928e33bd9ef1f32
+  metadata.gz: ada4e5f18e79e144f7fc82e53c4f9cffc5b750f32ac579e5533ad50a3c4ee07b
+  data.tar.gz: 7213bcce37c9956ff164411433e929ad7ae8f0953e3101a2bd93d7f761ab37dc
 SHA512:
-  metadata.gz: fdd1aca915f4a991c0dd6d1ad3cb8e1f0d2f831fa54f0c7424180cbd23da9b6cc2aa6302ca396b630f0ed70231c59a588f52bc458f4b4917abb9daf8dd8b921d
-  data.tar.gz: 7ea38ed35b6eadc2d81e8b827ef1c6d938090f177983e49f5a1347cdfc0700daa58458ecccf0c292f924b77f354a571aa4436923b307233ca2d0d0fec09454e7
+  metadata.gz: 9eb86f40c167428966581ee0446db8ef03994680904c7712f22b75e4117304bce59928ef16ba29143da6ad2046011f0cc5f428a54405922cfeab21e06ea9a06f
+  data.tar.gz: cae58aa85a024e3e10a236041476775e887896d1bb96af35a6e6a14f5cb97f2f2caefff7a2804c02ec9885fbcb4e92e48cdef2bbc2e9bfcfecd7a6b58e47e6ad

data/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.1.7] - 2025-10-31
+### Added valid? method and enhanced error handling
+- Added `DomainExtractor.valid?` helper to allow safe URL pre-checks without raising.
+- `DomainExtractor.parse` now raises `DomainExtractor::InvalidURLError` with a clear `"Invalid URL Value"` message when the input cannot be parsed.
 ## [0.1.6] - 2025-10-31
 ### Integrate Rakefile for Release and Task Workflow Refactors

data/README.md CHANGED Viewed

@@ -52,6 +52,14 @@ result[:domain]       # => 'example'
 result[:tld]          # => 'co.uk'
 result[:root_domain]  # => 'example.co.uk'
 result[:host]         # => 'www.example.co.uk'
+# Guard a parse with the validity helper
+url = 'https://www.example.co.uk/path?query=value'
+if DomainExtractor.valid?(url)
+  DomainExtractor.parse(url)
+else
+  # handle invalid input
+end
 ```
 ## Usage Examples
@@ -105,13 +113,25 @@ urls = ['https://example.com', 'https://blog.example.org']
 results = DomainExtractor.parse_batch(urls)
 ```
+### Validation and Error Handling
+```ruby
+DomainExtractor.valid?('https://www.example.com') # => true
+# DomainExtractor.parse raises DomainExtractor::InvalidURLError on invalid input
+DomainExtractor.parse('not-a-url')
+# => raises DomainExtractor::InvalidURLError (message: "Invalid URL Value")
+```
 ## API Reference
 ### `DomainExtractor.parse(url_string)`
 Parses a URL string and extracts domain components.
-**Returns:** Hash with keys `:subdomain`, `:domain`, `:tld`, `:root_domain`, `:host`, `:path` or `nil`
+**Returns:** Hash with keys `:subdomain`, `:domain`, `:tld`, `:root_domain`, `:host`, `:path`
+**Raises:** `DomainExtractor::InvalidURLError` when the URL fails validation
 ### `DomainExtractor.parse_batch(urls)`
@@ -119,6 +139,12 @@ Parses multiple URLs efficiently.
 **Returns:** Array of parsed results
+### `DomainExtractor.valid?(url_string)`
+Checks if a URL can be parsed successfully without raising.
+**Returns:** `true` or `false`
 ### `DomainExtractor.parse_query_params(query_string)`
 Parses a query string into a hash of parameters.
@@ -146,8 +172,9 @@ track_event('page_view', source_domain: parsed[:root_domain]) if parsed
 ```ruby
 def internal_link?(url, base_domain)
-  parsed = DomainExtractor.parse(url)
-  parsed && parsed[:root_domain] == base_domain
+  return false unless DomainExtractor.valid?(url)
+  DomainExtractor.parse(url)[:root_domain] == base_domain
 end
 ```

data/lib/domain_extractor/errors.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+module DomainExtractor
+  class InvalidURLError < StandardError
+    DEFAULT_MESSAGE = 'Invalid URL Value'
+    def initialize(message = DEFAULT_MESSAGE)
+      super
+    end
+  end
+end

data/lib/domain_extractor/parser.rb CHANGED Viewed

@@ -13,18 +13,21 @@ module DomainExtractor
     module_function
     def call(raw_url)
-      uri = build_uri(raw_url)
-      return unless uri
-      host = uri.host&.downcase
-      return if invalid_host?(host)
+      components = extract_components(raw_url)
+      return unless components
-      domain = ::PublicSuffix.parse(host)
+      uri, domain, host = components
       build_result(domain: domain, host: host, uri: uri)
     rescue ::URI::InvalidURIError, ::PublicSuffix::Error
       nil
     end
+    def valid?(raw_url)
+      !!extract_components(raw_url)
+    rescue ::URI::InvalidURIError, ::PublicSuffix::Error
+      false
+    end
     def build_uri(raw_url)
       normalized = Normalizer.call(raw_url)
       return unless normalized
@@ -38,6 +41,18 @@ module DomainExtractor
     end
     private_class_method :invalid_host?
+    def extract_components(raw_url)
+      uri = build_uri(raw_url)
+      return unless uri
+      host = uri.host&.downcase
+      return if invalid_host?(host)
+      domain = ::PublicSuffix.parse(host)
+      [uri, domain, host]
+    end
+    private_class_method :extract_components
     def build_result(domain:, host:, uri:)
       Result.build(
         subdomain: domain.trd,

data/lib/domain_extractor/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module DomainExtractor
-  VERSION = '0.1.6'
+  VERSION = '0.1.7'
 end

data/lib/domain_extractor.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require 'uri'
 require 'public_suffix'
 require_relative 'domain_extractor/version'
+require_relative 'domain_extractor/errors'
 require_relative 'domain_extractor/parser'
 require_relative 'domain_extractor/query_params'
@@ -12,10 +13,18 @@ require_relative 'domain_extractor/query_params'
 module DomainExtractor
   class << self
     # Parse an individual URL and extract domain attributes.
+    # Raises DomainExtractor::InvalidURLError when the URL fails validation.
     # @param url [String, #to_s]
-    # @return [Hash, nil]
+    # @return [Hash]
     def parse(url)
-      Parser.call(url)
+      Parser.call(url) || raise(InvalidURLError)
+    end
+    # Determine if a URL is considered valid by the parser.
+    # @param url [String, #to_s]
+    # @return [Boolean]
+    def valid?(url)
+      Parser.valid?(url)
     end
     # Parse many URLs and return their individual parse results.
@@ -24,7 +33,7 @@ module DomainExtractor
     def parse_batch(urls)
       return [] unless urls.respond_to?(:map)
-      urls.map { |url| parse(url) }
+      urls.map { |url| Parser.call(url) }
     end
     # Convert a query string into a Hash representation.

data/spec/domain_extractor_spec.rb CHANGED Viewed

@@ -142,32 +142,70 @@ RSpec.describe DomainExtractor do
     end
     context 'with invalid URLs' do
-      it 'returns nil for malformed URLs' do
-        expect(described_class.parse('http://')).to be_nil
+      it 'raises InvalidURLError for malformed URLs' do
+        expect { described_class.parse('http://') }.to raise_error(
+          DomainExtractor::InvalidURLError,
+          'Invalid URL Value'
+        )
       end
-      it 'returns nil for invalid domains' do
-        expect(described_class.parse('not_a_url')).to be_nil
+      it 'raises InvalidURLError for invalid domains' do
+        expect { described_class.parse('not_a_url') }.to raise_error(
+          DomainExtractor::InvalidURLError,
+          'Invalid URL Value'
+        )
       end
-      it 'returns nil for IP addresses' do
-        expect(described_class.parse('192.168.1.1')).to be_nil
+      it 'raises InvalidURLError for IP addresses' do
+        expect { described_class.parse('192.168.1.1') }.to raise_error(
+          DomainExtractor::InvalidURLError,
+          'Invalid URL Value'
+        )
       end
-      it 'returns nil for IPv6 addresses' do
-        expect(described_class.parse('[2001:db8::1]')).to be_nil
+      it 'raises InvalidURLError for IPv6 addresses' do
+        expect { described_class.parse('[2001:db8::1]') }.to raise_error(
+          DomainExtractor::InvalidURLError,
+          'Invalid URL Value'
+        )
       end
-      it 'returns nil for empty string' do
-        expect(described_class.parse('')).to be_nil
+      it 'raises InvalidURLError for empty string' do
+        expect { described_class.parse('') }.to raise_error(DomainExtractor::InvalidURLError, 'Invalid URL Value')
       end
-      it 'returns nil for nil' do
-        expect(described_class.parse(nil)).to be_nil
+      it 'raises InvalidURLError for nil' do
+        expect { described_class.parse(nil) }.to raise_error(DomainExtractor::InvalidURLError, 'Invalid URL Value')
       end
     end
   end
+  describe '.valid?' do
+    it 'returns true for a normalized domain' do
+      expect(described_class.valid?('dashtrack.com')).to be(true)
+    end
+    it 'returns true for a full URL with subdomain and query' do
+      expect(described_class.valid?('https://www.example.co.uk/path?query=value')).to be(true)
+    end
+    it 'returns false for malformed URLs' do
+      expect(described_class.valid?('http://')).to be(false)
+    end
+    it 'returns false for invalid domains' do
+      expect(described_class.valid?('not_a_url')).to be(false)
+    end
+    it 'returns false for IP addresses' do
+      expect(described_class.valid?('192.168.1.1')).to be(false)
+    end
+    it 'returns false for nil values' do
+      expect(described_class.valid?(nil)).to be(false)
+    end
+  end
   describe '.parse_query_params' do
     it 'converts simple query string to hash' do
       result = described_class.parse_query_params('foo=bar')

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: domain_extractor
 version: !ruby/object:Gem::Version
-  version: 0.1.6
+  version: 0.1.7
 platform: ruby
 authors:
 - OpenSite AI
@@ -41,6 +41,7 @@ files:
 - LICENSE.txt
 - README.md
 - lib/domain_extractor.rb
+- lib/domain_extractor/errors.rb
 - lib/domain_extractor/normalizer.rb
 - lib/domain_extractor/parser.rb
 - lib/domain_extractor/query_params.rb