twingly-url 5.1.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a3c908215d3999313c0230890446e979995175f5ae34f05f778a8ff51ecf7de
4
- data.tar.gz: f3546864fab23385d42f25f41506a74c6309716ce34b2cf00c5ce52bbf497d77
3
+ metadata.gz: 56c971217509cb49a880e44835cd28fb7cbd0993ed3c770e916dbc5692aa3a4a
4
+ data.tar.gz: d09caeb8ea2a01b7523565fa8088852922afbcd042ce650bb1de41a13010bed4
5
5
  SHA512:
6
- metadata.gz: 2c8232dda0e4d15d8a22c99692ca76ae47a27ad5986222481aa2c6f2d6aec5f762ac26ac26a77a3653436c8a97647f8f00a2a4dd89086e16b7f13b1a06112147
7
- data.tar.gz: 89e956970ea3b14ce5fccd527d4eedbc7c910749a0f6f60fc8b280316dab5de6e7c9a5a26ad724d41d487d9814547eba4184123c03df0d43a944238ad42558ed
6
+ metadata.gz: 4b960358767ae47df3d234adcb16242bce1de669b27af753691c206b5d8c0899ed2cf1272a1468fe74faa61e008647b015b028277d63878e1054301e01b43585
7
+ data.tar.gz: cb82dbf7140f0cae7ddc8068a6fe892ddb1bab5afde458d21f0c34e5fd6e36bfe189c916bcdc27f5f4f3db88130bcf288f75fe5955413c2102dde0d3d9d7b299
data/README.md CHANGED
@@ -11,7 +11,6 @@ Twingly URL tools.
11
11
  * `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
12
12
  * `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
13
13
  * `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
14
- * `Twingly::URL::Hasher.pingloggerdb_hash(url)` - SHA256 64-bit unsigned, native endian digest
15
14
  * `twingly/url/utilities` - Utilities to work with URLs
16
15
  * `Twingly::URL::Utilities.extract_valid_urls` - Returns Array of valid `Twingly::URL`
17
16
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "addressable/idna"
2
4
  require "public_suffix"
3
5
 
data/lib/twingly/url.rb CHANGED
@@ -1,4 +1,6 @@
1
- require "addressable/idna"
1
+ # frozen_string_literal: true
2
+
3
+ require "addressable/idna/pure"
2
4
  require "addressable/uri"
3
5
  require "public_suffix"
4
6
 
@@ -11,19 +13,33 @@ module Twingly
11
13
  class URL
12
14
  include Comparable
13
15
 
14
- ACCEPTED_SCHEMES = /\Ahttps?\z/i
16
+ ACCEPTED_SCHEMES = /\Ahttps?\z/i.freeze
15
17
  CUSTOM_PSL = PublicSuffixList.with_punycoded_names
16
- ENDS_WITH_SLASH = /\/+$/
18
+ ENDS_WITH_SLASH = /\/+$/.freeze
19
+ STARTS_WITH_WWW = /\Awww\./i.freeze
17
20
  ERRORS_TO_EXTEND = [
18
21
  Addressable::IDNA::PunycodeBigOutput,
19
22
  Addressable::URI::InvalidURIError,
20
23
  PublicSuffix::DomainInvalid,
21
- ]
24
+ ].freeze
25
+ NBSP = "\u00A0"
26
+ SPACE = "\u0020"
27
+ WHITESPACE_CHARS = [
28
+ NBSP,
29
+ SPACE,
30
+ ].join.freeze
31
+ LEADING_AND_TRAILING_WHITESPACE =
32
+ /\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
22
33
 
23
34
  private_constant :ACCEPTED_SCHEMES
24
35
  private_constant :CUSTOM_PSL
36
+ private_constant :STARTS_WITH_WWW
25
37
  private_constant :ENDS_WITH_SLASH
26
38
  private_constant :ERRORS_TO_EXTEND
39
+ private_constant :NBSP
40
+ private_constant :SPACE
41
+ private_constant :WHITESPACE_CHARS
42
+ private_constant :LEADING_AND_TRAILING_WHITESPACE
27
43
 
28
44
  class << self
29
45
  def parse(potential_url)
@@ -35,8 +51,9 @@ module Twingly
35
51
  raise
36
52
  end
37
53
 
38
- def internal_parse(potential_url)
39
- addressable_uri = to_addressable_uri(potential_url)
54
+ def internal_parse(input)
55
+ potential_url = clean_input(input)
56
+ addressable_uri = Addressable::URI.heuristic_parse(potential_url)
40
57
  raise Twingly::URL::Error::ParseError if addressable_uri.nil?
41
58
 
42
59
  scheme = addressable_uri.scheme
@@ -58,15 +75,16 @@ module Twingly
58
75
  raise
59
76
  end
60
77
 
61
- def to_addressable_uri(potential_url)
62
- if potential_url.is_a?(Addressable::URI)
63
- potential_url
64
- else
65
- potential_url = String(potential_url)
66
- potential_url = potential_url.scrub
78
+ def clean_input(input)
79
+ input = String(input)
80
+ input = input.scrub
81
+ input = strip_whitespace(input)
82
+ end
67
83
 
68
- Addressable::URI.heuristic_parse(potential_url)
69
- end
84
+ def strip_whitespace(input)
85
+ return input unless input.encoding == Encoding::UTF_8
86
+
87
+ input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
70
88
  end
71
89
 
72
90
  # Workaround for the following bug in addressable:
@@ -83,7 +101,8 @@ module Twingly
83
101
 
84
102
  private :new
85
103
  private :internal_parse
86
- private :to_addressable_uri
104
+ private :clean_input
105
+ private :strip_whitespace
87
106
  private :try_addressable_normalize
88
107
  end
89
108
 
@@ -189,6 +208,16 @@ module Twingly
189
208
  self.to_s <=> other.to_s
190
209
  end
191
210
 
211
+ def eql?(other)
212
+ return false unless other.is_a?(self.class)
213
+
214
+ self.hash == other.hash
215
+ end
216
+
217
+ def hash
218
+ self.to_s.hash
219
+ end
220
+
192
221
  def to_s
193
222
  addressable_uri.to_s
194
223
  end
@@ -203,7 +232,7 @@ module Twingly
203
232
 
204
233
  def normalize_blogspot(host, domain)
205
234
  if domain.sld.downcase == "blogspot"
206
- host.sub(/\Awww\./i, "").sub(/#{domain.tld}\z/i, "com")
235
+ host.sub(STARTS_WITH_WWW, "").sub(/#{domain.tld}\z/i, "com")
207
236
  else
208
237
  host
209
238
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Twingly
2
4
  class URL
3
5
  module Error
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'digest'
2
4
 
3
5
  require "twingly/url"
@@ -28,10 +30,6 @@ module Twingly
28
30
  def autopingdb_hash(url)
29
31
  SHA256_DIGEST.digest(url).unpack("q")[0]
30
32
  end
31
-
32
- def pingloggerdb_hash(url)
33
- SHA256_DIGEST.digest(url).unpack("Q")[0]
34
- end
35
33
  end
36
34
  end
37
35
  end
@@ -1,4 +1,4 @@
1
- require "twingly/url"
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Twingly
4
4
  class URL
@@ -24,6 +24,16 @@ module Twingly
24
24
  self.to_s <=> other.to_s
25
25
  end
26
26
 
27
+ def eql?(other)
28
+ return false unless other.is_a?(self.class)
29
+
30
+ self.hash == other.hash
31
+ end
32
+
33
+ def hash
34
+ self.to_s.hash
35
+ end
36
+
27
37
  def to_s
28
38
  ""
29
39
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "twingly/url"
2
4
 
3
5
  module Twingly
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Twingly
2
4
  class URL
3
- VERSION = "5.1.1"
5
+ VERSION = "6.0.0"
4
6
  end
5
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twingly-url
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.1.1
4
+ version: 6.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Twingly AB
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-14 00:00:00.000000000 Z
11
+ date: 2019-02-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -107,15 +107,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '2.2'
110
+ version: '2.4'
111
111
  required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  requirements:
113
113
  - - ">="
114
114
  - !ruby/object:Gem::Version
115
115
  version: '0'
116
116
  requirements: []
117
- rubyforge_project:
118
- rubygems_version: 2.7.4
117
+ rubygems_version: 3.0.1
119
118
  signing_key:
120
119
  specification_version: 4
121
120
  summary: Ruby library for URL handling