twingly-url 5.1.1 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/lib/twingly/public_suffix_list.rb +2 -0
- data/lib/twingly/url.rb +45 -16
- data/lib/twingly/url/error.rb +2 -0
- data/lib/twingly/url/hasher.rb +2 -4
- data/lib/twingly/url/null_url.rb +11 -1
- data/lib/twingly/url/utilities.rb +2 -0
- data/lib/twingly/version.rb +3 -1
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56c971217509cb49a880e44835cd28fb7cbd0993ed3c770e916dbc5692aa3a4a
|
4
|
+
data.tar.gz: d09caeb8ea2a01b7523565fa8088852922afbcd042ce650bb1de41a13010bed4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b960358767ae47df3d234adcb16242bce1de669b27af753691c206b5d8c0899ed2cf1272a1468fe74faa61e008647b015b028277d63878e1054301e01b43585
|
7
|
+
data.tar.gz: cb82dbf7140f0cae7ddc8068a6fe892ddb1bab5afde458d21f0c34e5fd6e36bfe189c916bcdc27f5f4f3db88130bcf288f75fe5955413c2102dde0d3d9d7b299
|
data/README.md
CHANGED
@@ -11,7 +11,6 @@ Twingly URL tools.
|
|
11
11
|
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
12
12
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
13
13
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
14
|
-
* `Twingly::URL::Hasher.pingloggerdb_hash(url)` - SHA256 64-bit unsigned, native endian digest
|
15
14
|
* `twingly/url/utilities` - Utilities to work with URLs
|
16
15
|
* `Twingly::URL::Utilities.extract_valid_urls` - Returns Array of valid `Twingly::URL`
|
17
16
|
|
data/lib/twingly/url.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "addressable/idna/pure"
|
2
4
|
require "addressable/uri"
|
3
5
|
require "public_suffix"
|
4
6
|
|
@@ -11,19 +13,33 @@ module Twingly
|
|
11
13
|
class URL
|
12
14
|
include Comparable
|
13
15
|
|
14
|
-
ACCEPTED_SCHEMES = /\Ahttps?\z/i
|
16
|
+
ACCEPTED_SCHEMES = /\Ahttps?\z/i.freeze
|
15
17
|
CUSTOM_PSL = PublicSuffixList.with_punycoded_names
|
16
|
-
ENDS_WITH_SLASH =
|
18
|
+
ENDS_WITH_SLASH = /\/+$/.freeze
|
19
|
+
STARTS_WITH_WWW = /\Awww\./i.freeze
|
17
20
|
ERRORS_TO_EXTEND = [
|
18
21
|
Addressable::IDNA::PunycodeBigOutput,
|
19
22
|
Addressable::URI::InvalidURIError,
|
20
23
|
PublicSuffix::DomainInvalid,
|
21
|
-
]
|
24
|
+
].freeze
|
25
|
+
NBSP = "\u00A0"
|
26
|
+
SPACE = "\u0020"
|
27
|
+
WHITESPACE_CHARS = [
|
28
|
+
NBSP,
|
29
|
+
SPACE,
|
30
|
+
].join.freeze
|
31
|
+
LEADING_AND_TRAILING_WHITESPACE =
|
32
|
+
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
22
33
|
|
23
34
|
private_constant :ACCEPTED_SCHEMES
|
24
35
|
private_constant :CUSTOM_PSL
|
36
|
+
private_constant :STARTS_WITH_WWW
|
25
37
|
private_constant :ENDS_WITH_SLASH
|
26
38
|
private_constant :ERRORS_TO_EXTEND
|
39
|
+
private_constant :NBSP
|
40
|
+
private_constant :SPACE
|
41
|
+
private_constant :WHITESPACE_CHARS
|
42
|
+
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
27
43
|
|
28
44
|
class << self
|
29
45
|
def parse(potential_url)
|
@@ -35,8 +51,9 @@ module Twingly
|
|
35
51
|
raise
|
36
52
|
end
|
37
53
|
|
38
|
-
def internal_parse(
|
39
|
-
|
54
|
+
def internal_parse(input)
|
55
|
+
potential_url = clean_input(input)
|
56
|
+
addressable_uri = Addressable::URI.heuristic_parse(potential_url)
|
40
57
|
raise Twingly::URL::Error::ParseError if addressable_uri.nil?
|
41
58
|
|
42
59
|
scheme = addressable_uri.scheme
|
@@ -58,15 +75,16 @@ module Twingly
|
|
58
75
|
raise
|
59
76
|
end
|
60
77
|
|
61
|
-
def
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
potential_url = potential_url.scrub
|
78
|
+
def clean_input(input)
|
79
|
+
input = String(input)
|
80
|
+
input = input.scrub
|
81
|
+
input = strip_whitespace(input)
|
82
|
+
end
|
67
83
|
|
68
|
-
|
69
|
-
|
84
|
+
def strip_whitespace(input)
|
85
|
+
return input unless input.encoding == Encoding::UTF_8
|
86
|
+
|
87
|
+
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
70
88
|
end
|
71
89
|
|
72
90
|
# Workaround for the following bug in addressable:
|
@@ -83,7 +101,8 @@ module Twingly
|
|
83
101
|
|
84
102
|
private :new
|
85
103
|
private :internal_parse
|
86
|
-
private :
|
104
|
+
private :clean_input
|
105
|
+
private :strip_whitespace
|
87
106
|
private :try_addressable_normalize
|
88
107
|
end
|
89
108
|
|
@@ -189,6 +208,16 @@ module Twingly
|
|
189
208
|
self.to_s <=> other.to_s
|
190
209
|
end
|
191
210
|
|
211
|
+
def eql?(other)
|
212
|
+
return false unless other.is_a?(self.class)
|
213
|
+
|
214
|
+
self.hash == other.hash
|
215
|
+
end
|
216
|
+
|
217
|
+
def hash
|
218
|
+
self.to_s.hash
|
219
|
+
end
|
220
|
+
|
192
221
|
def to_s
|
193
222
|
addressable_uri.to_s
|
194
223
|
end
|
@@ -203,7 +232,7 @@ module Twingly
|
|
203
232
|
|
204
233
|
def normalize_blogspot(host, domain)
|
205
234
|
if domain.sld.downcase == "blogspot"
|
206
|
-
host.sub(
|
235
|
+
host.sub(STARTS_WITH_WWW, "").sub(/#{domain.tld}\z/i, "com")
|
207
236
|
else
|
208
237
|
host
|
209
238
|
end
|
data/lib/twingly/url/error.rb
CHANGED
data/lib/twingly/url/hasher.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'digest'
|
2
4
|
|
3
5
|
require "twingly/url"
|
@@ -28,10 +30,6 @@ module Twingly
|
|
28
30
|
def autopingdb_hash(url)
|
29
31
|
SHA256_DIGEST.digest(url).unpack("q")[0]
|
30
32
|
end
|
31
|
-
|
32
|
-
def pingloggerdb_hash(url)
|
33
|
-
SHA256_DIGEST.digest(url).unpack("Q")[0]
|
34
|
-
end
|
35
33
|
end
|
36
34
|
end
|
37
35
|
end
|
data/lib/twingly/url/null_url.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Twingly
|
4
4
|
class URL
|
@@ -24,6 +24,16 @@ module Twingly
|
|
24
24
|
self.to_s <=> other.to_s
|
25
25
|
end
|
26
26
|
|
27
|
+
def eql?(other)
|
28
|
+
return false unless other.is_a?(self.class)
|
29
|
+
|
30
|
+
self.hash == other.hash
|
31
|
+
end
|
32
|
+
|
33
|
+
def hash
|
34
|
+
self.to_s.hash
|
35
|
+
end
|
36
|
+
|
27
37
|
def to_s
|
28
38
|
""
|
29
39
|
end
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 6.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -107,15 +107,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '2.
|
110
|
+
version: '2.4'
|
111
111
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
112
|
requirements:
|
113
113
|
- - ">="
|
114
114
|
- !ruby/object:Gem::Version
|
115
115
|
version: '0'
|
116
116
|
requirements: []
|
117
|
-
|
118
|
-
rubygems_version: 2.7.4
|
117
|
+
rubygems_version: 3.0.1
|
119
118
|
signing_key:
|
120
119
|
specification_version: 4
|
121
120
|
summary: Ruby library for URL handling
|