twingly-url 6.0.4 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/lib/twingly/url/hasher.rb +0 -4
- data/lib/twingly/url.rb +24 -3
- data/lib/twingly/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97ddbac02316ba39d3a55c162ceb1ce0065a7be690a98ab4bea4870eddfd1316
|
4
|
+
data.tar.gz: 618a3e2544a478439dda7acc3fb103a0fcf7f818e4c09095ca39117871e6bc0f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9fce1402fe896c2ee5e343fb33f2b5c2113c5499c79575958172824c50035132674135a212fcce96337361177b582340dfe3f780ef4cf5ec261b29ab87479d3
|
7
|
+
data.tar.gz: 9617ed3bcb459921188f3da2e7ce04c26764c350aed55751fd319e6f0aa0daefab9bfbefcccccbfb5235f8f9ce656e56eca3bddb69741fca14df71125b8e29b4
|
data/README.md
CHANGED
@@ -8,7 +8,6 @@ Twingly URL tools.
|
|
8
8
|
* `Twingly::URL.parse` - Returns one or more `Twingly::URL` instance
|
9
9
|
* `twingly/url/hasher` - Generate URL hashes suitable for primary keys
|
10
10
|
* `Twingly::URL::Hasher.taskdb_hash(url)` - MD5 hexdigest
|
11
|
-
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
12
11
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
13
12
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
14
13
|
* `twingly/url/utilities` - Utilities to work with URLs
|
data/lib/twingly/url/hasher.rb
CHANGED
data/lib/twingly/url.rb
CHANGED
@@ -22,6 +22,8 @@ module Twingly
|
|
22
22
|
Addressable::URI::InvalidURIError,
|
23
23
|
PublicSuffix::DomainInvalid,
|
24
24
|
].freeze
|
25
|
+
DOT = "."
|
26
|
+
HYPHEN = "-"
|
25
27
|
CARRIAGE_RETURN = "\u000D"
|
26
28
|
LINE_FEED = "\u000A"
|
27
29
|
NBSP = "\u00A0"
|
@@ -34,16 +36,20 @@ module Twingly
|
|
34
36
|
].join.freeze
|
35
37
|
LEADING_AND_TRAILING_WHITESPACE =
|
36
38
|
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
39
|
+
LETTERS_DIGITS_HYPHEN = /\A[a-zA-Z0-9-]+\z/.freeze
|
37
40
|
|
38
41
|
private_constant :ACCEPTED_SCHEMES
|
39
42
|
private_constant :CUSTOM_PSL
|
40
43
|
private_constant :STARTS_WITH_WWW
|
41
44
|
private_constant :ENDS_WITH_SLASH
|
42
45
|
private_constant :ERRORS_TO_EXTEND
|
46
|
+
private_constant :DOT
|
47
|
+
private_constant :HYPHEN
|
43
48
|
private_constant :NBSP
|
44
49
|
private_constant :SPACE
|
45
50
|
private_constant :WHITESPACE_CHARS
|
46
51
|
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
52
|
+
private_constant :LETTERS_DIGITS_HYPHEN
|
47
53
|
|
48
54
|
class << self
|
49
55
|
def parse(potential_url)
|
@@ -91,10 +97,9 @@ module Twingly
|
|
91
97
|
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
92
98
|
end
|
93
99
|
|
94
|
-
# Workaround for the following bug in addressable:
|
95
|
-
# https://github.com/sporkmonger/addressable/issues/224
|
96
100
|
def try_addressable_normalize(addressable_uri)
|
97
|
-
addressable_uri.normalize
|
101
|
+
ascii_host = addressable_uri.normalize.host
|
102
|
+
raise Twingly::URL::Error::ParseError unless valid_hostname?(ascii_host)
|
98
103
|
rescue ArgumentError => error
|
99
104
|
if error.message.include?("invalid byte sequence in UTF-8")
|
100
105
|
raise Twingly::URL::Error::ParseError
|
@@ -103,11 +108,27 @@ module Twingly
|
|
103
108
|
raise
|
104
109
|
end
|
105
110
|
|
111
|
+
def valid_hostname?(hostname)
|
112
|
+
# No need to check the TLD, the public suffix list does that
|
113
|
+
labels = hostname.split(DOT)[0...-1].map(&:to_s)
|
114
|
+
|
115
|
+
labels.all? { |label| valid_label?(label) }
|
116
|
+
end
|
117
|
+
|
118
|
+
def valid_label?(label)
|
119
|
+
return false if label.start_with?(HYPHEN)
|
120
|
+
return false if label.end_with?(HYPHEN)
|
121
|
+
|
122
|
+
label.match?(LETTERS_DIGITS_HYPHEN)
|
123
|
+
end
|
124
|
+
|
106
125
|
private :new
|
107
126
|
private :internal_parse
|
108
127
|
private :clean_input
|
109
128
|
private :strip_whitespace
|
110
129
|
private :try_addressable_normalize
|
130
|
+
private :valid_hostname?
|
131
|
+
private :valid_label?
|
111
132
|
end
|
112
133
|
|
113
134
|
def initialize(addressable_uri, public_suffix_domain)
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 7.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -33,7 +33,7 @@ dependencies:
|
|
33
33
|
version: 3.0.1
|
34
34
|
- - "<"
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: '
|
36
|
+
version: '6.0'
|
37
37
|
type: :runtime
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 3.0.1
|
44
44
|
- - "<"
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
46
|
+
version: '6.0'
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rake
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -113,14 +113,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
113
113
|
requirements:
|
114
114
|
- - ">="
|
115
115
|
- !ruby/object:Gem::Version
|
116
|
-
version: '2.
|
116
|
+
version: '2.6'
|
117
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
118
|
requirements:
|
119
119
|
- - ">="
|
120
120
|
- !ruby/object:Gem::Version
|
121
121
|
version: '0'
|
122
122
|
requirements: []
|
123
|
-
rubygems_version: 3.
|
123
|
+
rubygems_version: 3.3.7
|
124
124
|
signing_key:
|
125
125
|
specification_version: 4
|
126
126
|
summary: Ruby library for URL handling
|