twingly-url 6.0.4 → 7.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/lib/twingly/url/hasher.rb +0 -4
- data/lib/twingly/url.rb +26 -3
- data/lib/twingly/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d077940af63f0646300c3438d45f3ca66370825c5ca81df32c6d1597e156071
|
4
|
+
data.tar.gz: 308a66eb3c281139a9f747a3538c3e53291f491eafa3bbdd3dbe30d1decb42d3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b50ee3c1e3dd3beb12b85c813e36952945fca3de296257d7e0428a1de78d866932d334f13c405f784659324a69de0b2de293c07520f94f0e9d3c9a0665a3955
|
7
|
+
data.tar.gz: 278af99f73d82edb9f05eed9f795fe0e0395bf45626f9fde2e09b605ce467afc90c13780020fc94b6e54f2c64daf995f061a20eddbc1fcc84d6ac1cf0a1fcd48
|
data/README.md
CHANGED
@@ -8,7 +8,6 @@ Twingly URL tools.
|
|
8
8
|
* `Twingly::URL.parse` - Returns one or more `Twingly::URL` instance
|
9
9
|
* `twingly/url/hasher` - Generate URL hashes suitable for primary keys
|
10
10
|
* `Twingly::URL::Hasher.taskdb_hash(url)` - MD5 hexdigest
|
11
|
-
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
12
11
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
13
12
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
14
13
|
* `twingly/url/utilities` - Utilities to work with URLs
|
data/lib/twingly/url/hasher.rb
CHANGED
data/lib/twingly/url.rb
CHANGED
@@ -22,6 +22,8 @@ module Twingly
|
|
22
22
|
Addressable::URI::InvalidURIError,
|
23
23
|
PublicSuffix::DomainInvalid,
|
24
24
|
].freeze
|
25
|
+
DOT = "."
|
26
|
+
HYPHEN = "-"
|
25
27
|
CARRIAGE_RETURN = "\u000D"
|
26
28
|
LINE_FEED = "\u000A"
|
27
29
|
NBSP = "\u00A0"
|
@@ -34,16 +36,20 @@ module Twingly
|
|
34
36
|
].join.freeze
|
35
37
|
LEADING_AND_TRAILING_WHITESPACE =
|
36
38
|
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
39
|
+
LETTERS_DIGITS_HYPHEN = /\A[a-zA-Z0-9-]+\z/.freeze
|
37
40
|
|
38
41
|
private_constant :ACCEPTED_SCHEMES
|
39
42
|
private_constant :CUSTOM_PSL
|
40
43
|
private_constant :STARTS_WITH_WWW
|
41
44
|
private_constant :ENDS_WITH_SLASH
|
42
45
|
private_constant :ERRORS_TO_EXTEND
|
46
|
+
private_constant :DOT
|
47
|
+
private_constant :HYPHEN
|
43
48
|
private_constant :NBSP
|
44
49
|
private_constant :SPACE
|
45
50
|
private_constant :WHITESPACE_CHARS
|
46
51
|
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
52
|
+
private_constant :LETTERS_DIGITS_HYPHEN
|
47
53
|
|
48
54
|
class << self
|
49
55
|
def parse(potential_url)
|
@@ -91,10 +97,9 @@ module Twingly
|
|
91
97
|
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
92
98
|
end
|
93
99
|
|
94
|
-
# Workaround for the following bug in addressable:
|
95
|
-
# https://github.com/sporkmonger/addressable/issues/224
|
96
100
|
def try_addressable_normalize(addressable_uri)
|
97
|
-
addressable_uri.normalize
|
101
|
+
ascii_host = addressable_uri.normalize.host
|
102
|
+
raise Twingly::URL::Error::ParseError unless valid_hostname?(ascii_host)
|
98
103
|
rescue ArgumentError => error
|
99
104
|
if error.message.include?("invalid byte sequence in UTF-8")
|
100
105
|
raise Twingly::URL::Error::ParseError
|
@@ -103,11 +108,29 @@ module Twingly
|
|
103
108
|
raise
|
104
109
|
end
|
105
110
|
|
111
|
+
def valid_hostname?(hostname)
|
112
|
+
return false if hostname.nil?
|
113
|
+
|
114
|
+
# No need to check the TLD, the public suffix list does that
|
115
|
+
labels = hostname.split(DOT)[0...-1].map(&:to_s)
|
116
|
+
|
117
|
+
labels.all? { |label| valid_label?(label) }
|
118
|
+
end
|
119
|
+
|
120
|
+
def valid_label?(label)
|
121
|
+
return false if label.start_with?(HYPHEN)
|
122
|
+
return false if label.end_with?(HYPHEN)
|
123
|
+
|
124
|
+
label.match?(LETTERS_DIGITS_HYPHEN)
|
125
|
+
end
|
126
|
+
|
106
127
|
private :new
|
107
128
|
private :internal_parse
|
108
129
|
private :clean_input
|
109
130
|
private :strip_whitespace
|
110
131
|
private :try_addressable_normalize
|
132
|
+
private :valid_hostname?
|
133
|
+
private :valid_label?
|
111
134
|
end
|
112
135
|
|
113
136
|
def initialize(addressable_uri, public_suffix_domain)
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 7.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -33,7 +33,7 @@ dependencies:
|
|
33
33
|
version: 3.0.1
|
34
34
|
- - "<"
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: '
|
36
|
+
version: '6.0'
|
37
37
|
type: :runtime
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 3.0.1
|
44
44
|
- - "<"
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
46
|
+
version: '6.0'
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rake
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -113,14 +113,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
113
113
|
requirements:
|
114
114
|
- - ">="
|
115
115
|
- !ruby/object:Gem::Version
|
116
|
-
version: '2.
|
116
|
+
version: '2.6'
|
117
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
118
|
requirements:
|
119
119
|
- - ">="
|
120
120
|
- !ruby/object:Gem::Version
|
121
121
|
version: '0'
|
122
122
|
requirements: []
|
123
|
-
rubygems_version: 3.
|
123
|
+
rubygems_version: 3.3.7
|
124
124
|
signing_key:
|
125
125
|
specification_version: 4
|
126
126
|
summary: Ruby library for URL handling
|