domain_extractor 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +28 -8
- data/README.md +2 -2
- data/lib/domain_extractor/normalizer.rb +1 -1
- data/lib/domain_extractor/validators.rb +2 -2
- data/lib/domain_extractor/version.rb +2 -2
- data/spec/domain_extractor_spec.rb +8 -8
- metadata +15 -11
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4c2ae245bf951dcae02e2064f57ae9c67467429e53fadc3abcdd57b5c3bacd84
|
|
4
|
+
data.tar.gz: d90c4fe217b3565421cb01272ab3223910446dc504c1faf8b555ea7c19a8e2bc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7e27485891529d74739c4afe6b9f86545a93005bfe5b079de33b9cfd0c5cc11583b1233e9066e941ede3d1992b483c62a09fb1e143ffbd178b98f476f677d3e4
|
|
7
|
+
data.tar.gz: 60297cd93aded5f11751d15c3440823a50b7474b8ca11b15492ce08f1468dfda1e017b86cccb2dba5dc92689d9cdf3a6d87e040014659520b481e50763520bb8
|
data/.rubocop.yml
CHANGED
|
@@ -1,20 +1,40 @@
|
|
|
1
1
|
AllCops:
|
|
2
|
+
# Should match your gemspec's required_ruby_version minimum
|
|
3
|
+
TargetRubyVersion: 3.2
|
|
2
4
|
NewCops: enable
|
|
3
|
-
|
|
5
|
+
SuggestExtensions: false
|
|
4
6
|
Exclude:
|
|
5
|
-
-
|
|
6
|
-
-
|
|
7
|
+
- "vendor/**/*"
|
|
8
|
+
- "spec/fixtures/**/*"
|
|
9
|
+
- "tmp/**/*"
|
|
10
|
+
- "bin/**/*"
|
|
7
11
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
12
|
+
# Customize your style preferences here
|
|
13
|
+
Style/StringLiterals:
|
|
14
|
+
Enabled: true
|
|
15
|
+
EnforcedStyle: single_quotes
|
|
16
|
+
|
|
17
|
+
Style/FrozenStringLiteralComment:
|
|
18
|
+
Enabled: true
|
|
19
|
+
EnforcedStyle: always
|
|
20
|
+
|
|
21
|
+
Layout/LineLength:
|
|
22
|
+
Max: 120
|
|
23
|
+
AllowedPatterns: ['\A#'] # Allow long comment lines
|
|
11
24
|
|
|
12
25
|
Metrics/BlockLength:
|
|
13
26
|
Exclude:
|
|
14
|
-
-
|
|
27
|
+
- "spec/**/*"
|
|
28
|
+
- "**/*.gemspec"
|
|
15
29
|
|
|
16
30
|
Metrics/MethodLength:
|
|
17
|
-
Max:
|
|
31
|
+
Max: 15
|
|
32
|
+
Exclude:
|
|
33
|
+
- "spec/**/*"
|
|
18
34
|
|
|
35
|
+
# Disable some overly strict cops for gems
|
|
19
36
|
Style/Documentation:
|
|
20
37
|
Enabled: false
|
|
38
|
+
|
|
39
|
+
Style/AsciiComments:
|
|
40
|
+
Enabled: false
|
data/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# DomainExtractor
|
|
2
2
|
|
|
3
3
|
[](https://badge.fury.io/rb/domain_extractor)
|
|
4
|
-
[](https://github.com/opensite-ai/domain_extractor/actions/workflows/ci.yml)
|
|
5
5
|
[](https://codeclimate.com/github/opensite-ai/domain_extractor)
|
|
6
6
|
|
|
7
7
|
A lightweight, robust Ruby library for url parsing and domain parsing with **accurate multi-part TLD support**. DomainExtractor delivers a high-throughput url parser and domain parser that excels at domain extraction tasks while staying friendly to analytics pipelines. Perfect for web scraping, analytics, url manipulation, query parameter parsing, and multi-environment domain analysis.
|
|
@@ -170,7 +170,7 @@ end
|
|
|
170
170
|
|
|
171
171
|
## Requirements
|
|
172
172
|
|
|
173
|
-
- Ruby
|
|
173
|
+
- Ruby 3.0.0 or higher
|
|
174
174
|
- public_suffix gem (~> 6.0)
|
|
175
175
|
|
|
176
176
|
## Contributing
|
|
@@ -4,7 +4,7 @@ module DomainExtractor
|
|
|
4
4
|
# Normalizer ensures URLs include a scheme and removes extraneous whitespace
|
|
5
5
|
# before passing them into the URI parser.
|
|
6
6
|
module Normalizer
|
|
7
|
-
SCHEME_PATTERN = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
|
|
7
|
+
SCHEME_PATTERN = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
|
|
8
8
|
|
|
9
9
|
module_function
|
|
10
10
|
|
|
@@ -4,8 +4,8 @@ module DomainExtractor
|
|
|
4
4
|
# Validators hosts fast checks for excluding unsupported hostnames (e.g. IP addresses).
|
|
5
5
|
module Validators
|
|
6
6
|
IPV4_SEGMENT = '(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)'
|
|
7
|
-
IPV4_REGEX = /\A#{IPV4_SEGMENT}(?:\.#{IPV4_SEGMENT}){3}\z
|
|
8
|
-
IPV6_REGEX = /\A\[?[0-9a-fA-F:]+\]?\z
|
|
7
|
+
IPV4_REGEX = /\A#{IPV4_SEGMENT}(?:\.#{IPV4_SEGMENT}){3}\z/
|
|
8
|
+
IPV6_REGEX = /\A\[?[0-9a-fA-F:]+\]?\z/
|
|
9
9
|
|
|
10
10
|
module_function
|
|
11
11
|
|
|
@@ -87,11 +87,11 @@ RSpec.describe DomainExtractor do
|
|
|
87
87
|
it 'extracts multiple query parameters' do
|
|
88
88
|
result = described_class.parse('https://example.com/page?foo=bar&baz=qux&id=123')
|
|
89
89
|
|
|
90
|
-
expect(result[:query_params]).to eq(
|
|
90
|
+
expect(result[:query_params]).to eq(
|
|
91
91
|
'foo' => 'bar',
|
|
92
92
|
'baz' => 'qux',
|
|
93
93
|
'id' => '123'
|
|
94
|
-
|
|
94
|
+
)
|
|
95
95
|
end
|
|
96
96
|
|
|
97
97
|
it 'handles URLs with path and multiple query parameters' do
|
|
@@ -100,10 +100,10 @@ RSpec.describe DomainExtractor do
|
|
|
100
100
|
expect(result[:subdomain]).to eq('api')
|
|
101
101
|
expect(result[:root_domain]).to eq('example.com')
|
|
102
102
|
expect(result[:path]).to eq('/v1/users')
|
|
103
|
-
expect(result[:query_params]).to eq(
|
|
103
|
+
expect(result[:query_params]).to eq(
|
|
104
104
|
'page' => '2',
|
|
105
105
|
'limit' => '10'
|
|
106
|
-
|
|
106
|
+
)
|
|
107
107
|
end
|
|
108
108
|
|
|
109
109
|
it 'handles URLs with empty query string' do
|
|
@@ -178,11 +178,11 @@ RSpec.describe DomainExtractor do
|
|
|
178
178
|
it 'converts multiple parameters to hash' do
|
|
179
179
|
result = described_class.parse_query_params('foo=bar&baz=qux&id=123')
|
|
180
180
|
|
|
181
|
-
expect(result).to eq(
|
|
181
|
+
expect(result).to eq(
|
|
182
182
|
'foo' => 'bar',
|
|
183
183
|
'baz' => 'qux',
|
|
184
184
|
'id' => '123'
|
|
185
|
-
|
|
185
|
+
)
|
|
186
186
|
end
|
|
187
187
|
|
|
188
188
|
it 'returns empty hash for nil query' do
|
|
@@ -212,11 +212,11 @@ RSpec.describe DomainExtractor do
|
|
|
212
212
|
it 'handles mixed parameters with and without values' do
|
|
213
213
|
result = described_class.parse_query_params('foo=bar&flag&baz=qux')
|
|
214
214
|
|
|
215
|
-
expect(result).to eq(
|
|
215
|
+
expect(result).to eq(
|
|
216
216
|
'foo' => 'bar',
|
|
217
217
|
'flag' => nil,
|
|
218
218
|
'baz' => 'qux'
|
|
219
|
-
|
|
219
|
+
)
|
|
220
220
|
end
|
|
221
221
|
|
|
222
222
|
it 'ignores blank keys' do
|
metadata
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: domain_extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- OpenSite AI
|
|
8
|
+
autorequire:
|
|
8
9
|
bindir: bin
|
|
9
10
|
cert_chain: []
|
|
10
|
-
date:
|
|
11
|
+
date: 2025-10-31 00:00:00.000000000 Z
|
|
11
12
|
dependencies:
|
|
12
13
|
- !ruby/object:Gem::Dependency
|
|
13
14
|
name: public_suffix
|
|
@@ -23,16 +24,17 @@ dependencies:
|
|
|
23
24
|
- - "~>"
|
|
24
25
|
- !ruby/object:Gem::Version
|
|
25
26
|
version: '6.0'
|
|
26
|
-
description:
|
|
27
|
-
|
|
28
|
-
and multi-part tld parsing via
|
|
27
|
+
description: |-
|
|
28
|
+
DomainExtractor is a high-performance url parser and domain parser for Ruby. It delivers precise
|
|
29
|
+
domain extraction, query parameter parsing, url normalization, and multi-part tld parsing via
|
|
30
|
+
public_suffix for web scraping and analytics workflows.
|
|
29
31
|
email: dev@opensite.ai
|
|
30
32
|
executables: []
|
|
31
33
|
extensions: []
|
|
32
34
|
extra_rdoc_files:
|
|
33
|
-
- CHANGELOG.md
|
|
34
|
-
- LICENSE.txt
|
|
35
35
|
- README.md
|
|
36
|
+
- LICENSE.txt
|
|
37
|
+
- CHANGELOG.md
|
|
36
38
|
files:
|
|
37
39
|
- ".rubocop.yml"
|
|
38
40
|
- CHANGELOG.md
|
|
@@ -52,13 +54,14 @@ licenses:
|
|
|
52
54
|
- MIT
|
|
53
55
|
metadata:
|
|
54
56
|
source_code_uri: https://github.com/opensite-ai/domain_extractor
|
|
55
|
-
changelog_uri: https://github.com/opensite-ai/domain_extractor/blob/
|
|
57
|
+
changelog_uri: https://github.com/opensite-ai/domain_extractor/blob/master/CHANGELOG.md
|
|
56
58
|
documentation_uri: https://rubydoc.info/gems/domain_extractor
|
|
57
59
|
bug_tracker_uri: https://github.com/opensite-ai/domain_extractor/issues
|
|
58
|
-
homepage_uri: https://opensite
|
|
60
|
+
homepage_uri: https://github.com/opensite-ai/domain_extractor
|
|
59
61
|
wiki_uri: https://docs.devguides.com/domain_extractor
|
|
60
62
|
rubygems_mfa_required: 'true'
|
|
61
63
|
allowed_push_host: https://rubygems.org
|
|
64
|
+
post_install_message:
|
|
62
65
|
rdoc_options:
|
|
63
66
|
- "--main"
|
|
64
67
|
- README.md
|
|
@@ -72,14 +75,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
72
75
|
requirements:
|
|
73
76
|
- - ">="
|
|
74
77
|
- !ruby/object:Gem::Version
|
|
75
|
-
version: 2.
|
|
78
|
+
version: 3.2.0
|
|
76
79
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
80
|
requirements:
|
|
78
81
|
- - ">="
|
|
79
82
|
- !ruby/object:Gem::Version
|
|
80
83
|
version: '0'
|
|
81
84
|
requirements: []
|
|
82
|
-
rubygems_version: 3.
|
|
85
|
+
rubygems_version: 3.5.22
|
|
86
|
+
signing_key:
|
|
83
87
|
specification_version: 4
|
|
84
88
|
summary: High-performance url parser and domain extractor for Ruby
|
|
85
89
|
test_files: []
|