twingly-url 6.0.4 → 7.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/lib/twingly/url/hasher.rb +0 -4
- data/lib/twingly/url.rb +26 -3
- data/lib/twingly/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d077940af63f0646300c3438d45f3ca66370825c5ca81df32c6d1597e156071
|
4
|
+
data.tar.gz: 308a66eb3c281139a9f747a3538c3e53291f491eafa3bbdd3dbe30d1decb42d3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b50ee3c1e3dd3beb12b85c813e36952945fca3de296257d7e0428a1de78d866932d334f13c405f784659324a69de0b2de293c07520f94f0e9d3c9a0665a3955
|
7
|
+
data.tar.gz: 278af99f73d82edb9f05eed9f795fe0e0395bf45626f9fde2e09b605ce467afc90c13780020fc94b6e54f2c64daf995f061a20eddbc1fcc84d6ac1cf0a1fcd48
|
data/README.md
CHANGED
@@ -8,7 +8,6 @@ Twingly URL tools.
|
|
8
8
|
* `Twingly::URL.parse` - Returns one or more `Twingly::URL` instance
|
9
9
|
* `twingly/url/hasher` - Generate URL hashes suitable for primary keys
|
10
10
|
* `Twingly::URL::Hasher.taskdb_hash(url)` - MD5 hexdigest
|
11
|
-
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
12
11
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
13
12
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
14
13
|
* `twingly/url/utilities` - Utilities to work with URLs
|
data/lib/twingly/url/hasher.rb
CHANGED
data/lib/twingly/url.rb
CHANGED
@@ -22,6 +22,8 @@ module Twingly
|
|
22
22
|
Addressable::URI::InvalidURIError,
|
23
23
|
PublicSuffix::DomainInvalid,
|
24
24
|
].freeze
|
25
|
+
DOT = "."
|
26
|
+
HYPHEN = "-"
|
25
27
|
CARRIAGE_RETURN = "\u000D"
|
26
28
|
LINE_FEED = "\u000A"
|
27
29
|
NBSP = "\u00A0"
|
@@ -34,16 +36,20 @@ module Twingly
|
|
34
36
|
].join.freeze
|
35
37
|
LEADING_AND_TRAILING_WHITESPACE =
|
36
38
|
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
39
|
+
LETTERS_DIGITS_HYPHEN = /\A[a-zA-Z0-9-]+\z/.freeze
|
37
40
|
|
38
41
|
private_constant :ACCEPTED_SCHEMES
|
39
42
|
private_constant :CUSTOM_PSL
|
40
43
|
private_constant :STARTS_WITH_WWW
|
41
44
|
private_constant :ENDS_WITH_SLASH
|
42
45
|
private_constant :ERRORS_TO_EXTEND
|
46
|
+
private_constant :DOT
|
47
|
+
private_constant :HYPHEN
|
43
48
|
private_constant :NBSP
|
44
49
|
private_constant :SPACE
|
45
50
|
private_constant :WHITESPACE_CHARS
|
46
51
|
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
52
|
+
private_constant :LETTERS_DIGITS_HYPHEN
|
47
53
|
|
48
54
|
class << self
|
49
55
|
def parse(potential_url)
|
@@ -91,10 +97,9 @@ module Twingly
|
|
91
97
|
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
92
98
|
end
|
93
99
|
|
94
|
-
# Workaround for the following bug in addressable:
|
95
|
-
# https://github.com/sporkmonger/addressable/issues/224
|
96
100
|
def try_addressable_normalize(addressable_uri)
|
97
|
-
addressable_uri.normalize
|
101
|
+
ascii_host = addressable_uri.normalize.host
|
102
|
+
raise Twingly::URL::Error::ParseError unless valid_hostname?(ascii_host)
|
98
103
|
rescue ArgumentError => error
|
99
104
|
if error.message.include?("invalid byte sequence in UTF-8")
|
100
105
|
raise Twingly::URL::Error::ParseError
|
@@ -103,11 +108,29 @@ module Twingly
|
|
103
108
|
raise
|
104
109
|
end
|
105
110
|
|
111
|
+
def valid_hostname?(hostname)
|
112
|
+
return false if hostname.nil?
|
113
|
+
|
114
|
+
# No need to check the TLD, the public suffix list does that
|
115
|
+
labels = hostname.split(DOT)[0...-1].map(&:to_s)
|
116
|
+
|
117
|
+
labels.all? { |label| valid_label?(label) }
|
118
|
+
end
|
119
|
+
|
120
|
+
def valid_label?(label)
|
121
|
+
return false if label.start_with?(HYPHEN)
|
122
|
+
return false if label.end_with?(HYPHEN)
|
123
|
+
|
124
|
+
label.match?(LETTERS_DIGITS_HYPHEN)
|
125
|
+
end
|
126
|
+
|
106
127
|
private :new
|
107
128
|
private :internal_parse
|
108
129
|
private :clean_input
|
109
130
|
private :strip_whitespace
|
110
131
|
private :try_addressable_normalize
|
132
|
+
private :valid_hostname?
|
133
|
+
private :valid_label?
|
111
134
|
end
|
112
135
|
|
113
136
|
def initialize(addressable_uri, public_suffix_domain)
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 7.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -33,7 +33,7 @@ dependencies:
|
|
33
33
|
version: 3.0.1
|
34
34
|
- - "<"
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: '
|
36
|
+
version: '6.0'
|
37
37
|
type: :runtime
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 3.0.1
|
44
44
|
- - "<"
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
46
|
+
version: '6.0'
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rake
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -113,14 +113,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
113
113
|
requirements:
|
114
114
|
- - ">="
|
115
115
|
- !ruby/object:Gem::Version
|
116
|
-
version: '2.
|
116
|
+
version: '2.6'
|
117
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
118
|
requirements:
|
119
119
|
- - ">="
|
120
120
|
- !ruby/object:Gem::Version
|
121
121
|
version: '0'
|
122
122
|
requirements: []
|
123
|
-
rubygems_version: 3.
|
123
|
+
rubygems_version: 3.3.7
|
124
124
|
signing_key:
|
125
125
|
specification_version: 4
|
126
126
|
summary: Ruby library for URL handling
|