twingly-url 6.0.4 → 7.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7308c5eb70d91f77fb0f81761948a6652a79dd610c481bb4237cfa435f3495b5
4
- data.tar.gz: d54e531c0a7e350f01cf495d73adc69b16d9dd25af7859388c2d543d2bdfa299
3
+ metadata.gz: 4d077940af63f0646300c3438d45f3ca66370825c5ca81df32c6d1597e156071
4
+ data.tar.gz: 308a66eb3c281139a9f747a3538c3e53291f491eafa3bbdd3dbe30d1decb42d3
5
5
  SHA512:
6
- metadata.gz: 7a871c957a15ba4695de8dbed2122619a5bf89922f5901ed7c3cecbde4ee33a9cf3017350344972d2a28ab9e6cb3e17380b1e42ce1532679332a1c25cea5d6d8
7
- data.tar.gz: ab7c227ca8300c094637de6ae2cbbab4f65fab2585863f81feacdee85e1f5cf73711c5b3ec15dc8a05ac343a84b106047f6bb12f81ed769eb8b3c893004c8987
6
+ metadata.gz: 0b50ee3c1e3dd3beb12b85c813e36952945fca3de296257d7e0428a1de78d866932d334f13c405f784659324a69de0b2de293c07520f94f0e9d3c9a0665a3955
7
+ data.tar.gz: 278af99f73d82edb9f05eed9f795fe0e0395bf45626f9fde2e09b605ce467afc90c13780020fc94b6e54f2c64daf995f061a20eddbc1fcc84d6ac1cf0a1fcd48
data/README.md CHANGED
@@ -8,7 +8,6 @@ Twingly URL tools.
8
8
  * `Twingly::URL.parse` - Returns one or more `Twingly::URL` instance
9
9
  * `twingly/url/hasher` - Generate URL hashes suitable for primary keys
10
10
  * `Twingly::URL::Hasher.taskdb_hash(url)` - MD5 hexdigest
11
- * `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
12
11
  * `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
13
12
  * `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
14
13
  * `twingly/url/utilities` - Utilities to work with URLs
@@ -19,10 +19,6 @@ module Twingly
19
19
  MD5_DIGEST.hexdigest(url)[0..29].upcase
20
20
  end
21
21
 
22
- def blogstream_hash(url)
23
- MD5_DIGEST.hexdigest(url)[0..29].upcase
24
- end
25
-
26
22
  def documentdb_hash(url)
27
23
  SHA256_DIGEST.digest(url).unpack("L!")[0]
28
24
  end
data/lib/twingly/url.rb CHANGED
@@ -22,6 +22,8 @@ module Twingly
22
22
  Addressable::URI::InvalidURIError,
23
23
  PublicSuffix::DomainInvalid,
24
24
  ].freeze
25
+ DOT = "."
26
+ HYPHEN = "-"
25
27
  CARRIAGE_RETURN = "\u000D"
26
28
  LINE_FEED = "\u000A"
27
29
  NBSP = "\u00A0"
@@ -34,16 +36,20 @@ module Twingly
34
36
  ].join.freeze
35
37
  LEADING_AND_TRAILING_WHITESPACE =
36
38
  /\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
39
+ LETTERS_DIGITS_HYPHEN = /\A[a-zA-Z0-9-]+\z/.freeze
37
40
 
38
41
  private_constant :ACCEPTED_SCHEMES
39
42
  private_constant :CUSTOM_PSL
40
43
  private_constant :STARTS_WITH_WWW
41
44
  private_constant :ENDS_WITH_SLASH
42
45
  private_constant :ERRORS_TO_EXTEND
46
+ private_constant :DOT
47
+ private_constant :HYPHEN
43
48
  private_constant :NBSP
44
49
  private_constant :SPACE
45
50
  private_constant :WHITESPACE_CHARS
46
51
  private_constant :LEADING_AND_TRAILING_WHITESPACE
52
+ private_constant :LETTERS_DIGITS_HYPHEN
47
53
 
48
54
  class << self
49
55
  def parse(potential_url)
@@ -91,10 +97,9 @@ module Twingly
91
97
  input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
92
98
  end
93
99
 
94
- # Workaround for the following bug in addressable:
95
- # https://github.com/sporkmonger/addressable/issues/224
96
100
  def try_addressable_normalize(addressable_uri)
97
- addressable_uri.normalize
101
+ ascii_host = addressable_uri.normalize.host
102
+ raise Twingly::URL::Error::ParseError unless valid_hostname?(ascii_host)
98
103
  rescue ArgumentError => error
99
104
  if error.message.include?("invalid byte sequence in UTF-8")
100
105
  raise Twingly::URL::Error::ParseError
@@ -103,11 +108,29 @@ module Twingly
103
108
  raise
104
109
  end
105
110
 
111
+ def valid_hostname?(hostname)
112
+ return false if hostname.nil?
113
+
114
+ # No need to check the TLD, the public suffix list does that
115
+ labels = hostname.split(DOT)[0...-1].map(&:to_s)
116
+
117
+ labels.all? { |label| valid_label?(label) }
118
+ end
119
+
120
+ def valid_label?(label)
121
+ return false if label.start_with?(HYPHEN)
122
+ return false if label.end_with?(HYPHEN)
123
+
124
+ label.match?(LETTERS_DIGITS_HYPHEN)
125
+ end
126
+
106
127
  private :new
107
128
  private :internal_parse
108
129
  private :clean_input
109
130
  private :strip_whitespace
110
131
  private :try_addressable_normalize
132
+ private :valid_hostname?
133
+ private :valid_label?
111
134
  end
112
135
 
113
136
  def initialize(addressable_uri, public_suffix_domain)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Twingly
4
4
  class URL
5
- VERSION = "6.0.4"
5
+ VERSION = "7.0.1"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twingly-url
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.4
4
+ version: 7.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Twingly AB
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-14 00:00:00.000000000 Z
11
+ date: 2022-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: 3.0.1
34
34
  - - "<"
35
35
  - !ruby/object:Gem::Version
36
- version: '5.0'
36
+ version: '6.0'
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: 3.0.1
44
44
  - - "<"
45
45
  - !ruby/object:Gem::Version
46
- version: '5.0'
46
+ version: '6.0'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
49
  requirement: !ruby/object:Gem::Requirement
@@ -113,14 +113,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
113
113
  requirements:
114
114
  - - ">="
115
115
  - !ruby/object:Gem::Version
116
- version: '2.5'
116
+ version: '2.6'
117
117
  required_rubygems_version: !ruby/object:Gem::Requirement
118
118
  requirements:
119
119
  - - ">="
120
120
  - !ruby/object:Gem::Version
121
121
  version: '0'
122
122
  requirements: []
123
- rubygems_version: 3.1.2
123
+ rubygems_version: 3.3.7
124
124
  signing_key:
125
125
  specification_version: 4
126
126
  summary: Ruby library for URL handling