twingly-url 5.1.1 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a3c908215d3999313c0230890446e979995175f5ae34f05f778a8ff51ecf7de
4
- data.tar.gz: f3546864fab23385d42f25f41506a74c6309716ce34b2cf00c5ce52bbf497d77
3
+ metadata.gz: 56c971217509cb49a880e44835cd28fb7cbd0993ed3c770e916dbc5692aa3a4a
4
+ data.tar.gz: d09caeb8ea2a01b7523565fa8088852922afbcd042ce650bb1de41a13010bed4
5
5
  SHA512:
6
- metadata.gz: 2c8232dda0e4d15d8a22c99692ca76ae47a27ad5986222481aa2c6f2d6aec5f762ac26ac26a77a3653436c8a97647f8f00a2a4dd89086e16b7f13b1a06112147
7
- data.tar.gz: 89e956970ea3b14ce5fccd527d4eedbc7c910749a0f6f60fc8b280316dab5de6e7c9a5a26ad724d41d487d9814547eba4184123c03df0d43a944238ad42558ed
6
+ metadata.gz: 4b960358767ae47df3d234adcb16242bce1de669b27af753691c206b5d8c0899ed2cf1272a1468fe74faa61e008647b015b028277d63878e1054301e01b43585
7
+ data.tar.gz: cb82dbf7140f0cae7ddc8068a6fe892ddb1bab5afde458d21f0c34e5fd6e36bfe189c916bcdc27f5f4f3db88130bcf288f75fe5955413c2102dde0d3d9d7b299
data/README.md CHANGED
@@ -11,7 +11,6 @@ Twingly URL tools.
11
11
  * `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
12
12
  * `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
13
13
  * `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
14
- * `Twingly::URL::Hasher.pingloggerdb_hash(url)` - SHA256 64-bit unsigned, native endian digest
15
14
  * `twingly/url/utilities` - Utilities to work with URLs
16
15
  * `Twingly::URL::Utilities.extract_valid_urls` - Returns Array of valid `Twingly::URL`
17
16
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "addressable/idna"
2
4
  require "public_suffix"
3
5
 
data/lib/twingly/url.rb CHANGED
@@ -1,4 +1,6 @@
1
- require "addressable/idna"
1
+ # frozen_string_literal: true
2
+
3
+ require "addressable/idna/pure"
2
4
  require "addressable/uri"
3
5
  require "public_suffix"
4
6
 
@@ -11,19 +13,33 @@ module Twingly
11
13
  class URL
12
14
  include Comparable
13
15
 
14
- ACCEPTED_SCHEMES = /\Ahttps?\z/i
16
+ ACCEPTED_SCHEMES = /\Ahttps?\z/i.freeze
15
17
  CUSTOM_PSL = PublicSuffixList.with_punycoded_names
16
- ENDS_WITH_SLASH = /\/+$/
18
+ ENDS_WITH_SLASH = /\/+$/.freeze
19
+ STARTS_WITH_WWW = /\Awww\./i.freeze
17
20
  ERRORS_TO_EXTEND = [
18
21
  Addressable::IDNA::PunycodeBigOutput,
19
22
  Addressable::URI::InvalidURIError,
20
23
  PublicSuffix::DomainInvalid,
21
- ]
24
+ ].freeze
25
+ NBSP = "\u00A0"
26
+ SPACE = "\u0020"
27
+ WHITESPACE_CHARS = [
28
+ NBSP,
29
+ SPACE,
30
+ ].join.freeze
31
+ LEADING_AND_TRAILING_WHITESPACE =
32
+ /\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
22
33
 
23
34
  private_constant :ACCEPTED_SCHEMES
24
35
  private_constant :CUSTOM_PSL
36
+ private_constant :STARTS_WITH_WWW
25
37
  private_constant :ENDS_WITH_SLASH
26
38
  private_constant :ERRORS_TO_EXTEND
39
+ private_constant :NBSP
40
+ private_constant :SPACE
41
+ private_constant :WHITESPACE_CHARS
42
+ private_constant :LEADING_AND_TRAILING_WHITESPACE
27
43
 
28
44
  class << self
29
45
  def parse(potential_url)
@@ -35,8 +51,9 @@ module Twingly
35
51
  raise
36
52
  end
37
53
 
38
- def internal_parse(potential_url)
39
- addressable_uri = to_addressable_uri(potential_url)
54
+ def internal_parse(input)
55
+ potential_url = clean_input(input)
56
+ addressable_uri = Addressable::URI.heuristic_parse(potential_url)
40
57
  raise Twingly::URL::Error::ParseError if addressable_uri.nil?
41
58
 
42
59
  scheme = addressable_uri.scheme
@@ -58,15 +75,16 @@ module Twingly
58
75
  raise
59
76
  end
60
77
 
61
- def to_addressable_uri(potential_url)
62
- if potential_url.is_a?(Addressable::URI)
63
- potential_url
64
- else
65
- potential_url = String(potential_url)
66
- potential_url = potential_url.scrub
78
+ def clean_input(input)
79
+ input = String(input)
80
+ input = input.scrub
81
+ input = strip_whitespace(input)
82
+ end
67
83
 
68
- Addressable::URI.heuristic_parse(potential_url)
69
- end
84
+ def strip_whitespace(input)
85
+ return input unless input.encoding == Encoding::UTF_8
86
+
87
+ input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
70
88
  end
71
89
 
72
90
  # Workaround for the following bug in addressable:
@@ -83,7 +101,8 @@ module Twingly
83
101
 
84
102
  private :new
85
103
  private :internal_parse
86
- private :to_addressable_uri
104
+ private :clean_input
105
+ private :strip_whitespace
87
106
  private :try_addressable_normalize
88
107
  end
89
108
 
@@ -189,6 +208,16 @@ module Twingly
189
208
  self.to_s <=> other.to_s
190
209
  end
191
210
 
211
+ def eql?(other)
212
+ return false unless other.is_a?(self.class)
213
+
214
+ self.hash == other.hash
215
+ end
216
+
217
+ def hash
218
+ self.to_s.hash
219
+ end
220
+
192
221
  def to_s
193
222
  addressable_uri.to_s
194
223
  end
@@ -203,7 +232,7 @@ module Twingly
203
232
 
204
233
  def normalize_blogspot(host, domain)
205
234
  if domain.sld.downcase == "blogspot"
206
- host.sub(/\Awww\./i, "").sub(/#{domain.tld}\z/i, "com")
235
+ host.sub(STARTS_WITH_WWW, "").sub(/#{domain.tld}\z/i, "com")
207
236
  else
208
237
  host
209
238
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Twingly
2
4
  class URL
3
5
  module Error
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'digest'
2
4
 
3
5
  require "twingly/url"
@@ -28,10 +30,6 @@ module Twingly
28
30
  def autopingdb_hash(url)
29
31
  SHA256_DIGEST.digest(url).unpack("q")[0]
30
32
  end
31
-
32
- def pingloggerdb_hash(url)
33
- SHA256_DIGEST.digest(url).unpack("Q")[0]
34
- end
35
33
  end
36
34
  end
37
35
  end
@@ -1,4 +1,4 @@
1
- require "twingly/url"
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Twingly
4
4
  class URL
@@ -24,6 +24,16 @@ module Twingly
24
24
  self.to_s <=> other.to_s
25
25
  end
26
26
 
27
+ def eql?(other)
28
+ return false unless other.is_a?(self.class)
29
+
30
+ self.hash == other.hash
31
+ end
32
+
33
+ def hash
34
+ self.to_s.hash
35
+ end
36
+
27
37
  def to_s
28
38
  ""
29
39
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "twingly/url"
2
4
 
3
5
  module Twingly
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Twingly
2
4
  class URL
3
- VERSION = "5.1.1"
5
+ VERSION = "6.0.0"
4
6
  end
5
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twingly-url
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.1.1
4
+ version: 6.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Twingly AB
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-14 00:00:00.000000000 Z
11
+ date: 2019-02-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -107,15 +107,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '2.2'
110
+ version: '2.4'
111
111
  required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  requirements:
113
113
  - - ">="
114
114
  - !ruby/object:Gem::Version
115
115
  version: '0'
116
116
  requirements: []
117
- rubyforge_project:
118
- rubygems_version: 2.7.4
117
+ rubygems_version: 3.0.1
119
118
  signing_key:
120
119
  specification_version: 4
121
120
  summary: Ruby library for URL handling