twingly-url 5.1.1 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/lib/twingly/public_suffix_list.rb +2 -0
- data/lib/twingly/url.rb +45 -16
- data/lib/twingly/url/error.rb +2 -0
- data/lib/twingly/url/hasher.rb +2 -4
- data/lib/twingly/url/null_url.rb +11 -1
- data/lib/twingly/url/utilities.rb +2 -0
- data/lib/twingly/version.rb +3 -1
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56c971217509cb49a880e44835cd28fb7cbd0993ed3c770e916dbc5692aa3a4a
|
4
|
+
data.tar.gz: d09caeb8ea2a01b7523565fa8088852922afbcd042ce650bb1de41a13010bed4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b960358767ae47df3d234adcb16242bce1de669b27af753691c206b5d8c0899ed2cf1272a1468fe74faa61e008647b015b028277d63878e1054301e01b43585
|
7
|
+
data.tar.gz: cb82dbf7140f0cae7ddc8068a6fe892ddb1bab5afde458d21f0c34e5fd6e36bfe189c916bcdc27f5f4f3db88130bcf288f75fe5955413c2102dde0d3d9d7b299
|
data/README.md
CHANGED
@@ -11,7 +11,6 @@ Twingly URL tools.
|
|
11
11
|
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
12
12
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
13
13
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
14
|
-
* `Twingly::URL::Hasher.pingloggerdb_hash(url)` - SHA256 64-bit unsigned, native endian digest
|
15
14
|
* `twingly/url/utilities` - Utilities to work with URLs
|
16
15
|
* `Twingly::URL::Utilities.extract_valid_urls` - Returns Array of valid `Twingly::URL`
|
17
16
|
|
data/lib/twingly/url.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "addressable/idna/pure"
|
2
4
|
require "addressable/uri"
|
3
5
|
require "public_suffix"
|
4
6
|
|
@@ -11,19 +13,33 @@ module Twingly
|
|
11
13
|
class URL
|
12
14
|
include Comparable
|
13
15
|
|
14
|
-
ACCEPTED_SCHEMES = /\Ahttps?\z/i
|
16
|
+
ACCEPTED_SCHEMES = /\Ahttps?\z/i.freeze
|
15
17
|
CUSTOM_PSL = PublicSuffixList.with_punycoded_names
|
16
|
-
ENDS_WITH_SLASH =
|
18
|
+
ENDS_WITH_SLASH = /\/+$/.freeze
|
19
|
+
STARTS_WITH_WWW = /\Awww\./i.freeze
|
17
20
|
ERRORS_TO_EXTEND = [
|
18
21
|
Addressable::IDNA::PunycodeBigOutput,
|
19
22
|
Addressable::URI::InvalidURIError,
|
20
23
|
PublicSuffix::DomainInvalid,
|
21
|
-
]
|
24
|
+
].freeze
|
25
|
+
NBSP = "\u00A0"
|
26
|
+
SPACE = "\u0020"
|
27
|
+
WHITESPACE_CHARS = [
|
28
|
+
NBSP,
|
29
|
+
SPACE,
|
30
|
+
].join.freeze
|
31
|
+
LEADING_AND_TRAILING_WHITESPACE =
|
32
|
+
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
22
33
|
|
23
34
|
private_constant :ACCEPTED_SCHEMES
|
24
35
|
private_constant :CUSTOM_PSL
|
36
|
+
private_constant :STARTS_WITH_WWW
|
25
37
|
private_constant :ENDS_WITH_SLASH
|
26
38
|
private_constant :ERRORS_TO_EXTEND
|
39
|
+
private_constant :NBSP
|
40
|
+
private_constant :SPACE
|
41
|
+
private_constant :WHITESPACE_CHARS
|
42
|
+
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
27
43
|
|
28
44
|
class << self
|
29
45
|
def parse(potential_url)
|
@@ -35,8 +51,9 @@ module Twingly
|
|
35
51
|
raise
|
36
52
|
end
|
37
53
|
|
38
|
-
def internal_parse(
|
39
|
-
|
54
|
+
def internal_parse(input)
|
55
|
+
potential_url = clean_input(input)
|
56
|
+
addressable_uri = Addressable::URI.heuristic_parse(potential_url)
|
40
57
|
raise Twingly::URL::Error::ParseError if addressable_uri.nil?
|
41
58
|
|
42
59
|
scheme = addressable_uri.scheme
|
@@ -58,15 +75,16 @@ module Twingly
|
|
58
75
|
raise
|
59
76
|
end
|
60
77
|
|
61
|
-
def
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
potential_url = potential_url.scrub
|
78
|
+
def clean_input(input)
|
79
|
+
input = String(input)
|
80
|
+
input = input.scrub
|
81
|
+
input = strip_whitespace(input)
|
82
|
+
end
|
67
83
|
|
68
|
-
|
69
|
-
|
84
|
+
def strip_whitespace(input)
|
85
|
+
return input unless input.encoding == Encoding::UTF_8
|
86
|
+
|
87
|
+
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
70
88
|
end
|
71
89
|
|
72
90
|
# Workaround for the following bug in addressable:
|
@@ -83,7 +101,8 @@ module Twingly
|
|
83
101
|
|
84
102
|
private :new
|
85
103
|
private :internal_parse
|
86
|
-
private :
|
104
|
+
private :clean_input
|
105
|
+
private :strip_whitespace
|
87
106
|
private :try_addressable_normalize
|
88
107
|
end
|
89
108
|
|
@@ -189,6 +208,16 @@ module Twingly
|
|
189
208
|
self.to_s <=> other.to_s
|
190
209
|
end
|
191
210
|
|
211
|
+
def eql?(other)
|
212
|
+
return false unless other.is_a?(self.class)
|
213
|
+
|
214
|
+
self.hash == other.hash
|
215
|
+
end
|
216
|
+
|
217
|
+
def hash
|
218
|
+
self.to_s.hash
|
219
|
+
end
|
220
|
+
|
192
221
|
def to_s
|
193
222
|
addressable_uri.to_s
|
194
223
|
end
|
@@ -203,7 +232,7 @@ module Twingly
|
|
203
232
|
|
204
233
|
def normalize_blogspot(host, domain)
|
205
234
|
if domain.sld.downcase == "blogspot"
|
206
|
-
host.sub(
|
235
|
+
host.sub(STARTS_WITH_WWW, "").sub(/#{domain.tld}\z/i, "com")
|
207
236
|
else
|
208
237
|
host
|
209
238
|
end
|
data/lib/twingly/url/error.rb
CHANGED
data/lib/twingly/url/hasher.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'digest'
|
2
4
|
|
3
5
|
require "twingly/url"
|
@@ -28,10 +30,6 @@ module Twingly
|
|
28
30
|
def autopingdb_hash(url)
|
29
31
|
SHA256_DIGEST.digest(url).unpack("q")[0]
|
30
32
|
end
|
31
|
-
|
32
|
-
def pingloggerdb_hash(url)
|
33
|
-
SHA256_DIGEST.digest(url).unpack("Q")[0]
|
34
|
-
end
|
35
33
|
end
|
36
34
|
end
|
37
35
|
end
|
data/lib/twingly/url/null_url.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Twingly
|
4
4
|
class URL
|
@@ -24,6 +24,16 @@ module Twingly
|
|
24
24
|
self.to_s <=> other.to_s
|
25
25
|
end
|
26
26
|
|
27
|
+
def eql?(other)
|
28
|
+
return false unless other.is_a?(self.class)
|
29
|
+
|
30
|
+
self.hash == other.hash
|
31
|
+
end
|
32
|
+
|
33
|
+
def hash
|
34
|
+
self.to_s.hash
|
35
|
+
end
|
36
|
+
|
27
37
|
def to_s
|
28
38
|
""
|
29
39
|
end
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 6.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -107,15 +107,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '2.
|
110
|
+
version: '2.4'
|
111
111
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
112
|
requirements:
|
113
113
|
- - ">="
|
114
114
|
- !ruby/object:Gem::Version
|
115
115
|
version: '0'
|
116
116
|
requirements: []
|
117
|
-
|
118
|
-
rubygems_version: 2.7.4
|
117
|
+
rubygems_version: 3.0.1
|
119
118
|
signing_key:
|
120
119
|
specification_version: 4
|
121
120
|
summary: Ruby library for URL handling
|