twingly-url 5.1.1 → 6.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -9
- data/lib/twingly/public_suffix_list.rb +2 -0
- data/lib/twingly/url.rb +49 -16
- data/lib/twingly/url/error.rb +2 -0
- data/lib/twingly/url/hasher.rb +2 -4
- data/lib/twingly/url/null_url.rb +11 -1
- data/lib/twingly/url/utilities.rb +2 -0
- data/lib/twingly/version.rb +3 -1
- metadata +22 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7308c5eb70d91f77fb0f81761948a6652a79dd610c481bb4237cfa435f3495b5
|
4
|
+
data.tar.gz: d54e531c0a7e350f01cf495d73adc69b16d9dd25af7859388c2d543d2bdfa299
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a871c957a15ba4695de8dbed2122619a5bf89922f5901ed7c3cecbde4ee33a9cf3017350344972d2a28ab9e6cb3e17380b1e42ce1532679332a1c25cea5d6d8
|
7
|
+
data.tar.gz: ab7c227ca8300c094637de6ae2cbbab4f65fab2585863f81feacdee85e1f5cf73711c5b3ec15dc8a05ac343a84b106047f6bb12f81ed769eb8b3c893004c8987
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Twingly::URL
|
2
2
|
|
3
|
-
[![Build Status](https://
|
3
|
+
[![GitHub Build Status](https://github.com/twingly/twingly-url/workflows/CI/badge.svg?branch=master)](https://github.com/twingly/twingly-url/actions)
|
4
4
|
|
5
5
|
Twingly URL tools.
|
6
6
|
|
@@ -11,7 +11,6 @@ Twingly URL tools.
|
|
11
11
|
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
12
12
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
13
13
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
14
|
-
* `Twingly::URL::Hasher.pingloggerdb_hash(url)` - SHA256 64-bit unsigned, native endian digest
|
15
14
|
* `twingly/url/utilities` - Utilities to work with URLs
|
16
15
|
* `Twingly::URL::Utilities.extract_valid_urls` - Returns Array of valid `Twingly::URL`
|
17
16
|
|
@@ -175,13 +174,13 @@ Run tests with
|
|
175
174
|
|
176
175
|
### Profiling
|
177
176
|
|
178
|
-
|
177
|
+
There's some profiling tasks available through Rake
|
179
178
|
|
180
179
|
cd profile/
|
181
|
-
bundle
|
182
|
-
bundle exec rake
|
180
|
+
bundle # Install dependencies
|
181
|
+
bundle exec rake -T # Show available tasks
|
183
182
|
|
184
|
-
Note that this isn't a benchmark, we're using [ruby-prof] which will slow things down.
|
183
|
+
Note that this isn't a benchmark, we're using [ruby-prof] and [memory_profiler] which will slow things down.
|
185
184
|
|
186
185
|
## Release workflow
|
187
186
|
|
@@ -191,16 +190,17 @@ Note that this isn't a benchmark, we're using [ruby-prof] which will slow things
|
|
191
190
|
|
192
191
|
* Bump the version in `lib/twingly/version.rb` in a commit, no need to push (the release task does that).
|
193
192
|
|
193
|
+
* Ensure you are signed in to RubyGems.org as [twingly][twingly-rubygems] with `gem signin`.
|
194
|
+
|
194
195
|
* Build and [publish](http://guides.rubygems.org/publishing/) the gem. This will create the proper tag in git, push the commit and tag and upload to RubyGems.
|
195
196
|
|
196
197
|
bundle exec rake release
|
197
198
|
|
198
|
-
|
199
|
-
|
200
|
-
* Update the changelog with [GitHub Changelog Generator](https://github.com/skywinder/github-changelog-generator/) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`, commit and push manually.
|
199
|
+
* Update the changelog with [GitHub Changelog Generator](https://github.com/github-changelog-generator/github-changelog-generator) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`. You need to commit and push manually.
|
201
200
|
|
202
201
|
github_changelog_generator
|
203
202
|
|
204
203
|
[twingly-rubygems]: https://rubygems.org/profiles/twingly
|
205
204
|
[ruby-prof]: http://ruby-prof.rubyforge.org/
|
205
|
+
[memory_profiler]: https://github.com/SamSaffron/memory_profiler
|
206
206
|
[examples]: examples/url.rb
|
data/lib/twingly/url.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "addressable/idna/pure"
|
2
4
|
require "addressable/uri"
|
3
5
|
require "public_suffix"
|
4
6
|
|
@@ -11,19 +13,37 @@ module Twingly
|
|
11
13
|
class URL
|
12
14
|
include Comparable
|
13
15
|
|
14
|
-
ACCEPTED_SCHEMES = /\Ahttps?\z/i
|
16
|
+
ACCEPTED_SCHEMES = /\Ahttps?\z/i.freeze
|
15
17
|
CUSTOM_PSL = PublicSuffixList.with_punycoded_names
|
16
|
-
ENDS_WITH_SLASH =
|
18
|
+
ENDS_WITH_SLASH = /\/+$/.freeze
|
19
|
+
STARTS_WITH_WWW = /\Awww\./i.freeze
|
17
20
|
ERRORS_TO_EXTEND = [
|
18
21
|
Addressable::IDNA::PunycodeBigOutput,
|
19
22
|
Addressable::URI::InvalidURIError,
|
20
23
|
PublicSuffix::DomainInvalid,
|
21
|
-
]
|
24
|
+
].freeze
|
25
|
+
CARRIAGE_RETURN = "\u000D"
|
26
|
+
LINE_FEED = "\u000A"
|
27
|
+
NBSP = "\u00A0"
|
28
|
+
SPACE = "\u0020"
|
29
|
+
WHITESPACE_CHARS = [
|
30
|
+
CARRIAGE_RETURN,
|
31
|
+
LINE_FEED,
|
32
|
+
NBSP,
|
33
|
+
SPACE,
|
34
|
+
].join.freeze
|
35
|
+
LEADING_AND_TRAILING_WHITESPACE =
|
36
|
+
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
22
37
|
|
23
38
|
private_constant :ACCEPTED_SCHEMES
|
24
39
|
private_constant :CUSTOM_PSL
|
40
|
+
private_constant :STARTS_WITH_WWW
|
25
41
|
private_constant :ENDS_WITH_SLASH
|
26
42
|
private_constant :ERRORS_TO_EXTEND
|
43
|
+
private_constant :NBSP
|
44
|
+
private_constant :SPACE
|
45
|
+
private_constant :WHITESPACE_CHARS
|
46
|
+
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
27
47
|
|
28
48
|
class << self
|
29
49
|
def parse(potential_url)
|
@@ -35,8 +55,9 @@ module Twingly
|
|
35
55
|
raise
|
36
56
|
end
|
37
57
|
|
38
|
-
def internal_parse(
|
39
|
-
|
58
|
+
def internal_parse(input)
|
59
|
+
potential_url = clean_input(input)
|
60
|
+
addressable_uri = Addressable::URI.heuristic_parse(potential_url)
|
40
61
|
raise Twingly::URL::Error::ParseError if addressable_uri.nil?
|
41
62
|
|
42
63
|
scheme = addressable_uri.scheme
|
@@ -58,15 +79,16 @@ module Twingly
|
|
58
79
|
raise
|
59
80
|
end
|
60
81
|
|
61
|
-
def
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
potential_url = potential_url.scrub
|
82
|
+
def clean_input(input)
|
83
|
+
input = String(input)
|
84
|
+
input = input.scrub
|
85
|
+
input = strip_whitespace(input)
|
86
|
+
end
|
67
87
|
|
68
|
-
|
69
|
-
|
88
|
+
def strip_whitespace(input)
|
89
|
+
return input unless input.encoding == Encoding::UTF_8
|
90
|
+
|
91
|
+
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
70
92
|
end
|
71
93
|
|
72
94
|
# Workaround for the following bug in addressable:
|
@@ -83,7 +105,8 @@ module Twingly
|
|
83
105
|
|
84
106
|
private :new
|
85
107
|
private :internal_parse
|
86
|
-
private :
|
108
|
+
private :clean_input
|
109
|
+
private :strip_whitespace
|
87
110
|
private :try_addressable_normalize
|
88
111
|
end
|
89
112
|
|
@@ -189,6 +212,16 @@ module Twingly
|
|
189
212
|
self.to_s <=> other.to_s
|
190
213
|
end
|
191
214
|
|
215
|
+
def eql?(other)
|
216
|
+
return false unless other.is_a?(self.class)
|
217
|
+
|
218
|
+
self.hash == other.hash
|
219
|
+
end
|
220
|
+
|
221
|
+
def hash
|
222
|
+
self.to_s.hash
|
223
|
+
end
|
224
|
+
|
192
225
|
def to_s
|
193
226
|
addressable_uri.to_s
|
194
227
|
end
|
@@ -203,7 +236,7 @@ module Twingly
|
|
203
236
|
|
204
237
|
def normalize_blogspot(host, domain)
|
205
238
|
if domain.sld.downcase == "blogspot"
|
206
|
-
host.sub(
|
239
|
+
host.sub(STARTS_WITH_WWW, "").sub(/#{domain.tld}\z/i, "com")
|
207
240
|
else
|
208
241
|
host
|
209
242
|
end
|
data/lib/twingly/url/error.rb
CHANGED
data/lib/twingly/url/hasher.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'digest'
|
2
4
|
|
3
5
|
require "twingly/url"
|
@@ -28,10 +30,6 @@ module Twingly
|
|
28
30
|
def autopingdb_hash(url)
|
29
31
|
SHA256_DIGEST.digest(url).unpack("q")[0]
|
30
32
|
end
|
31
|
-
|
32
|
-
def pingloggerdb_hash(url)
|
33
|
-
SHA256_DIGEST.digest(url).unpack("Q")[0]
|
34
|
-
end
|
35
33
|
end
|
36
34
|
end
|
37
35
|
end
|
data/lib/twingly/url/null_url.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Twingly
|
4
4
|
class URL
|
@@ -24,6 +24,16 @@ module Twingly
|
|
24
24
|
self.to_s <=> other.to_s
|
25
25
|
end
|
26
26
|
|
27
|
+
def eql?(other)
|
28
|
+
return false unless other.is_a?(self.class)
|
29
|
+
|
30
|
+
self.hash == other.hash
|
31
|
+
end
|
32
|
+
|
33
|
+
def hash
|
34
|
+
self.to_s.hash
|
35
|
+
end
|
36
|
+
|
27
37
|
def to_s
|
28
38
|
""
|
29
39
|
end
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 6.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -16,42 +16,48 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 2.
|
19
|
+
version: '2.6'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 2.
|
26
|
+
version: '2.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: public_suffix
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 3.0.1
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '5.0'
|
34
37
|
type: :runtime
|
35
38
|
prerelease: false
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
37
40
|
requirements:
|
38
|
-
- - "
|
41
|
+
- - ">="
|
39
42
|
- !ruby/object:Gem::Version
|
40
43
|
version: 3.0.1
|
44
|
+
- - "<"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '5.0'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
48
|
name: rake
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
44
50
|
requirements:
|
45
51
|
- - "~>"
|
46
52
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
53
|
+
version: '12'
|
48
54
|
type: :development
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
51
57
|
requirements:
|
52
58
|
- - "~>"
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
60
|
+
version: '12'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: rspec
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,14 +76,14 @@ dependencies:
|
|
70
76
|
name: pry
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
72
78
|
requirements:
|
73
|
-
- - "
|
79
|
+
- - "~>"
|
74
80
|
- !ruby/object:Gem::Version
|
75
81
|
version: '0'
|
76
82
|
type: :development
|
77
83
|
prerelease: false
|
78
84
|
version_requirements: !ruby/object:Gem::Requirement
|
79
85
|
requirements:
|
80
|
-
- - "
|
86
|
+
- - "~>"
|
81
87
|
- !ruby/object:Gem::Version
|
82
88
|
version: '0'
|
83
89
|
description: Twingly URL tools
|
@@ -99,24 +105,23 @@ homepage: http://github.com/twingly/twingly-url
|
|
99
105
|
licenses:
|
100
106
|
- MIT
|
101
107
|
metadata: {}
|
102
|
-
post_install_message:
|
108
|
+
post_install_message:
|
103
109
|
rdoc_options: []
|
104
110
|
require_paths:
|
105
111
|
- lib
|
106
112
|
required_ruby_version: !ruby/object:Gem::Requirement
|
107
113
|
requirements:
|
108
|
-
- - "
|
114
|
+
- - ">="
|
109
115
|
- !ruby/object:Gem::Version
|
110
|
-
version: '2.
|
116
|
+
version: '2.5'
|
111
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
118
|
requirements:
|
113
119
|
- - ">="
|
114
120
|
- !ruby/object:Gem::Version
|
115
121
|
version: '0'
|
116
122
|
requirements: []
|
117
|
-
|
118
|
-
|
119
|
-
signing_key:
|
123
|
+
rubygems_version: 3.1.2
|
124
|
+
signing_key:
|
120
125
|
specification_version: 4
|
121
126
|
summary: Ruby library for URL handling
|
122
127
|
test_files: []
|