twingly-url 6.0.4 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7308c5eb70d91f77fb0f81761948a6652a79dd610c481bb4237cfa435f3495b5
4
- data.tar.gz: d54e531c0a7e350f01cf495d73adc69b16d9dd25af7859388c2d543d2bdfa299
3
+ metadata.gz: 97ddbac02316ba39d3a55c162ceb1ce0065a7be690a98ab4bea4870eddfd1316
4
+ data.tar.gz: 618a3e2544a478439dda7acc3fb103a0fcf7f818e4c09095ca39117871e6bc0f
5
5
  SHA512:
6
- metadata.gz: 7a871c957a15ba4695de8dbed2122619a5bf89922f5901ed7c3cecbde4ee33a9cf3017350344972d2a28ab9e6cb3e17380b1e42ce1532679332a1c25cea5d6d8
7
- data.tar.gz: ab7c227ca8300c094637de6ae2cbbab4f65fab2585863f81feacdee85e1f5cf73711c5b3ec15dc8a05ac343a84b106047f6bb12f81ed769eb8b3c893004c8987
6
+ metadata.gz: a9fce1402fe896c2ee5e343fb33f2b5c2113c5499c79575958172824c50035132674135a212fcce96337361177b582340dfe3f780ef4cf5ec261b29ab87479d3
7
+ data.tar.gz: 9617ed3bcb459921188f3da2e7ce04c26764c350aed55751fd319e6f0aa0daefab9bfbefcccccbfb5235f8f9ce656e56eca3bddb69741fca14df71125b8e29b4
data/README.md CHANGED
@@ -8,7 +8,6 @@ Twingly URL tools.
8
8
  * `Twingly::URL.parse` - Returns one or more `Twingly::URL` instance
9
9
  * `twingly/url/hasher` - Generate URL hashes suitable for primary keys
10
10
  * `Twingly::URL::Hasher.taskdb_hash(url)` - MD5 hexdigest
11
- * `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
12
11
  * `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
13
12
  * `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
14
13
  * `twingly/url/utilities` - Utilities to work with URLs
@@ -19,10 +19,6 @@ module Twingly
19
19
  MD5_DIGEST.hexdigest(url)[0..29].upcase
20
20
  end
21
21
 
22
- def blogstream_hash(url)
23
- MD5_DIGEST.hexdigest(url)[0..29].upcase
24
- end
25
-
26
22
  def documentdb_hash(url)
27
23
  SHA256_DIGEST.digest(url).unpack("L!")[0]
28
24
  end
data/lib/twingly/url.rb CHANGED
@@ -22,6 +22,8 @@ module Twingly
22
22
  Addressable::URI::InvalidURIError,
23
23
  PublicSuffix::DomainInvalid,
24
24
  ].freeze
25
+ DOT = "."
26
+ HYPHEN = "-"
25
27
  CARRIAGE_RETURN = "\u000D"
26
28
  LINE_FEED = "\u000A"
27
29
  NBSP = "\u00A0"
@@ -34,16 +36,20 @@ module Twingly
34
36
  ].join.freeze
35
37
  LEADING_AND_TRAILING_WHITESPACE =
36
38
  /\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
39
+ LETTERS_DIGITS_HYPHEN = /\A[a-zA-Z0-9-]+\z/.freeze
37
40
 
38
41
  private_constant :ACCEPTED_SCHEMES
39
42
  private_constant :CUSTOM_PSL
40
43
  private_constant :STARTS_WITH_WWW
41
44
  private_constant :ENDS_WITH_SLASH
42
45
  private_constant :ERRORS_TO_EXTEND
46
+ private_constant :DOT
47
+ private_constant :HYPHEN
43
48
  private_constant :NBSP
44
49
  private_constant :SPACE
45
50
  private_constant :WHITESPACE_CHARS
46
51
  private_constant :LEADING_AND_TRAILING_WHITESPACE
52
+ private_constant :LETTERS_DIGITS_HYPHEN
47
53
 
48
54
  class << self
49
55
  def parse(potential_url)
@@ -91,10 +97,9 @@ module Twingly
91
97
  input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
92
98
  end
93
99
 
94
- # Workaround for the following bug in addressable:
95
- # https://github.com/sporkmonger/addressable/issues/224
96
100
  def try_addressable_normalize(addressable_uri)
97
- addressable_uri.normalize
101
+ ascii_host = addressable_uri.normalize.host
102
+ raise Twingly::URL::Error::ParseError unless valid_hostname?(ascii_host)
98
103
  rescue ArgumentError => error
99
104
  if error.message.include?("invalid byte sequence in UTF-8")
100
105
  raise Twingly::URL::Error::ParseError
@@ -103,11 +108,27 @@ module Twingly
103
108
  raise
104
109
  end
105
110
 
111
+ def valid_hostname?(hostname)
112
+ # No need to check the TLD, the public suffix list does that
113
+ labels = hostname.split(DOT)[0...-1].map(&:to_s)
114
+
115
+ labels.all? { |label| valid_label?(label) }
116
+ end
117
+
118
+ def valid_label?(label)
119
+ return false if label.start_with?(HYPHEN)
120
+ return false if label.end_with?(HYPHEN)
121
+
122
+ label.match?(LETTERS_DIGITS_HYPHEN)
123
+ end
124
+
106
125
  private :new
107
126
  private :internal_parse
108
127
  private :clean_input
109
128
  private :strip_whitespace
110
129
  private :try_addressable_normalize
130
+ private :valid_hostname?
131
+ private :valid_label?
111
132
  end
112
133
 
113
134
  def initialize(addressable_uri, public_suffix_domain)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Twingly
4
4
  class URL
5
- VERSION = "6.0.4"
5
+ VERSION = "7.0.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twingly-url
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.4
4
+ version: 7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Twingly AB
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-14 00:00:00.000000000 Z
11
+ date: 2022-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: 3.0.1
34
34
  - - "<"
35
35
  - !ruby/object:Gem::Version
36
- version: '5.0'
36
+ version: '6.0'
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: 3.0.1
44
44
  - - "<"
45
45
  - !ruby/object:Gem::Version
46
- version: '5.0'
46
+ version: '6.0'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
49
  requirement: !ruby/object:Gem::Requirement
@@ -113,14 +113,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
113
113
  requirements:
114
114
  - - ">="
115
115
  - !ruby/object:Gem::Version
116
- version: '2.5'
116
+ version: '2.6'
117
117
  required_rubygems_version: !ruby/object:Gem::Requirement
118
118
  requirements:
119
119
  - - ">="
120
120
  - !ruby/object:Gem::Version
121
121
  version: '0'
122
122
  requirements: []
123
- rubygems_version: 3.1.2
123
+ rubygems_version: 3.3.7
124
124
  signing_key:
125
125
  specification_version: 4
126
126
  summary: Ruby library for URL handling