twingly-url 6.0.4 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/lib/twingly/url/hasher.rb +0 -4
- data/lib/twingly/url.rb +24 -3
- data/lib/twingly/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 97ddbac02316ba39d3a55c162ceb1ce0065a7be690a98ab4bea4870eddfd1316
|
|
4
|
+
data.tar.gz: 618a3e2544a478439dda7acc3fb103a0fcf7f818e4c09095ca39117871e6bc0f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a9fce1402fe896c2ee5e343fb33f2b5c2113c5499c79575958172824c50035132674135a212fcce96337361177b582340dfe3f780ef4cf5ec261b29ab87479d3
|
|
7
|
+
data.tar.gz: 9617ed3bcb459921188f3da2e7ce04c26764c350aed55751fd319e6f0aa0daefab9bfbefcccccbfb5235f8f9ce656e56eca3bddb69741fca14df71125b8e29b4
|
data/README.md
CHANGED
|
@@ -8,7 +8,6 @@ Twingly URL tools.
|
|
|
8
8
|
* `Twingly::URL.parse` - Returns one or more `Twingly::URL` instance
|
|
9
9
|
* `twingly/url/hasher` - Generate URL hashes suitable for primary keys
|
|
10
10
|
* `Twingly::URL::Hasher.taskdb_hash(url)` - MD5 hexdigest
|
|
11
|
-
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
|
12
11
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
|
13
12
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
|
14
13
|
* `twingly/url/utilities` - Utilities to work with URLs
|
data/lib/twingly/url/hasher.rb
CHANGED
data/lib/twingly/url.rb
CHANGED
|
@@ -22,6 +22,8 @@ module Twingly
|
|
|
22
22
|
Addressable::URI::InvalidURIError,
|
|
23
23
|
PublicSuffix::DomainInvalid,
|
|
24
24
|
].freeze
|
|
25
|
+
DOT = "."
|
|
26
|
+
HYPHEN = "-"
|
|
25
27
|
CARRIAGE_RETURN = "\u000D"
|
|
26
28
|
LINE_FEED = "\u000A"
|
|
27
29
|
NBSP = "\u00A0"
|
|
@@ -34,16 +36,20 @@ module Twingly
|
|
|
34
36
|
].join.freeze
|
|
35
37
|
LEADING_AND_TRAILING_WHITESPACE =
|
|
36
38
|
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
|
39
|
+
LETTERS_DIGITS_HYPHEN = /\A[a-zA-Z0-9-]+\z/.freeze
|
|
37
40
|
|
|
38
41
|
private_constant :ACCEPTED_SCHEMES
|
|
39
42
|
private_constant :CUSTOM_PSL
|
|
40
43
|
private_constant :STARTS_WITH_WWW
|
|
41
44
|
private_constant :ENDS_WITH_SLASH
|
|
42
45
|
private_constant :ERRORS_TO_EXTEND
|
|
46
|
+
private_constant :DOT
|
|
47
|
+
private_constant :HYPHEN
|
|
43
48
|
private_constant :NBSP
|
|
44
49
|
private_constant :SPACE
|
|
45
50
|
private_constant :WHITESPACE_CHARS
|
|
46
51
|
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
|
52
|
+
private_constant :LETTERS_DIGITS_HYPHEN
|
|
47
53
|
|
|
48
54
|
class << self
|
|
49
55
|
def parse(potential_url)
|
|
@@ -91,10 +97,9 @@ module Twingly
|
|
|
91
97
|
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
|
92
98
|
end
|
|
93
99
|
|
|
94
|
-
# Workaround for the following bug in addressable:
|
|
95
|
-
# https://github.com/sporkmonger/addressable/issues/224
|
|
96
100
|
def try_addressable_normalize(addressable_uri)
|
|
97
|
-
addressable_uri.normalize
|
|
101
|
+
ascii_host = addressable_uri.normalize.host
|
|
102
|
+
raise Twingly::URL::Error::ParseError unless valid_hostname?(ascii_host)
|
|
98
103
|
rescue ArgumentError => error
|
|
99
104
|
if error.message.include?("invalid byte sequence in UTF-8")
|
|
100
105
|
raise Twingly::URL::Error::ParseError
|
|
@@ -103,11 +108,27 @@ module Twingly
|
|
|
103
108
|
raise
|
|
104
109
|
end
|
|
105
110
|
|
|
111
|
+
def valid_hostname?(hostname)
|
|
112
|
+
# No need to check the TLD, the public suffix list does that
|
|
113
|
+
labels = hostname.split(DOT)[0...-1].map(&:to_s)
|
|
114
|
+
|
|
115
|
+
labels.all? { |label| valid_label?(label) }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def valid_label?(label)
|
|
119
|
+
return false if label.start_with?(HYPHEN)
|
|
120
|
+
return false if label.end_with?(HYPHEN)
|
|
121
|
+
|
|
122
|
+
label.match?(LETTERS_DIGITS_HYPHEN)
|
|
123
|
+
end
|
|
124
|
+
|
|
106
125
|
private :new
|
|
107
126
|
private :internal_parse
|
|
108
127
|
private :clean_input
|
|
109
128
|
private :strip_whitespace
|
|
110
129
|
private :try_addressable_normalize
|
|
130
|
+
private :valid_hostname?
|
|
131
|
+
private :valid_label?
|
|
111
132
|
end
|
|
112
133
|
|
|
113
134
|
def initialize(addressable_uri, public_suffix_domain)
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: twingly-url
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 7.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Twingly AB
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-10-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: addressable
|
|
@@ -33,7 +33,7 @@ dependencies:
|
|
|
33
33
|
version: 3.0.1
|
|
34
34
|
- - "<"
|
|
35
35
|
- !ruby/object:Gem::Version
|
|
36
|
-
version: '
|
|
36
|
+
version: '6.0'
|
|
37
37
|
type: :runtime
|
|
38
38
|
prerelease: false
|
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -43,7 +43,7 @@ dependencies:
|
|
|
43
43
|
version: 3.0.1
|
|
44
44
|
- - "<"
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: '
|
|
46
|
+
version: '6.0'
|
|
47
47
|
- !ruby/object:Gem::Dependency
|
|
48
48
|
name: rake
|
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -113,14 +113,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
113
113
|
requirements:
|
|
114
114
|
- - ">="
|
|
115
115
|
- !ruby/object:Gem::Version
|
|
116
|
-
version: '2.
|
|
116
|
+
version: '2.6'
|
|
117
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
118
118
|
requirements:
|
|
119
119
|
- - ">="
|
|
120
120
|
- !ruby/object:Gem::Version
|
|
121
121
|
version: '0'
|
|
122
122
|
requirements: []
|
|
123
|
-
rubygems_version: 3.
|
|
123
|
+
rubygems_version: 3.3.7
|
|
124
124
|
signing_key:
|
|
125
125
|
specification_version: 4
|
|
126
126
|
summary: Ruby library for URL handling
|