twingly-url 5.1.0 → 6.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +8 -8
- data/lib/twingly/public_suffix_list.rb +4 -4
- data/lib/twingly/url.rb +57 -19
- data/lib/twingly/url/error.rb +2 -0
- data/lib/twingly/url/hasher.rb +3 -5
- data/lib/twingly/url/null_url.rb +11 -1
- data/lib/twingly/url/utilities.rb +3 -1
- data/lib/twingly/version.rb +3 -1
- metadata +23 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e39c685acc5000907b03195ce1c08a1a3628263aa64860257f88cde5c44be042
|
4
|
+
data.tar.gz: 6f4573e38a72d7943e872fb4a7714619e08f624eedaa73f6a51e432d3ddecfbe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab988985a16a53990d91be9c81c440a8baf878181ff2149cecc72eb1d52b753efa1edef701d1578d87b1bc26a0ea5700bd56e4735a5393f446d37fb4c393436e
|
7
|
+
data.tar.gz: 64947df33fd987b6e446037fdf841a885ac9d8292cea92ca9dfb71b3f518d107d7dcead1344b49fd53a81dcec62abc2355ea76c6ed28a625a97ab2408267528f
|
data/README.md
CHANGED
@@ -11,7 +11,6 @@ Twingly URL tools.
|
|
11
11
|
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
12
12
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
13
13
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
14
|
-
* `Twingly::URL::Hasher.pingloggerdb_hash(url)` - SHA256 64-bit unsigned, native endian digest
|
15
14
|
* `twingly/url/utilities` - Utilities to work with URLs
|
16
15
|
* `Twingly::URL::Utilities.extract_valid_urls` - Returns Array of valid `Twingly::URL`
|
17
16
|
|
@@ -175,13 +174,13 @@ Run tests with
|
|
175
174
|
|
176
175
|
### Profiling
|
177
176
|
|
178
|
-
|
177
|
+
There's some profiling tasks available through Rake
|
179
178
|
|
180
179
|
cd profile/
|
181
|
-
bundle
|
182
|
-
bundle exec rake
|
180
|
+
bundle # Install dependencies
|
181
|
+
bundle exec rake -T # Show available tasks
|
183
182
|
|
184
|
-
Note that this isn't a benchmark, we're using [ruby-prof] which will slow things down.
|
183
|
+
Note that this isn't a benchmark, we're using [ruby-prof] and [memory_profiler] which will slow things down.
|
185
184
|
|
186
185
|
## Release workflow
|
187
186
|
|
@@ -191,16 +190,17 @@ Note that this isn't a benchmark, we're using [ruby-prof] which will slow things
|
|
191
190
|
|
192
191
|
* Bump the version in `lib/twingly/version.rb` in a commit, no need to push (the release task does that).
|
193
192
|
|
193
|
+
* Ensure you are signed in to RubyGems.org as [twingly][twingly-rubygems] with `gem signin`.
|
194
|
+
|
194
195
|
* Build and [publish](http://guides.rubygems.org/publishing/) the gem. This will create the proper tag in git, push the commit and tag and upload to RubyGems.
|
195
196
|
|
196
197
|
bundle exec rake release
|
197
198
|
|
198
|
-
|
199
|
-
|
200
|
-
* Update the changelog with [GitHub Changelog Generator](https://github.com/skywinder/github-changelog-generator/) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`, commit and push manually.
|
199
|
+
* Update the changelog with [GitHub Changelog Generator](https://github.com/skywinder/github-changelog-generator/) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`. You need to commit and push manually.
|
201
200
|
|
202
201
|
github_changelog_generator
|
203
202
|
|
204
203
|
[twingly-rubygems]: https://rubygems.org/profiles/twingly
|
205
204
|
[ruby-prof]: http://ruby-prof.rubyforge.org/
|
205
|
+
[memory_profiler]: https://github.com/SamSaffron/memory_profiler
|
206
206
|
[examples]: examples/url.rb
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "addressable/idna"
|
2
4
|
require "public_suffix"
|
3
5
|
|
@@ -15,17 +17,15 @@ module Twingly
|
|
15
17
|
|
16
18
|
punycoded_names(list).each do |punycoded_name|
|
17
19
|
new_rule = PublicSuffix::Rule.factory(punycoded_name)
|
18
|
-
list.add(new_rule
|
20
|
+
list.add(new_rule)
|
19
21
|
end
|
20
22
|
|
21
|
-
list.reindex!
|
22
|
-
|
23
23
|
list
|
24
24
|
end
|
25
25
|
|
26
26
|
private_class_method \
|
27
27
|
def self.punycoded_names(list)
|
28
|
-
names = list.map { |rule| Addressable::IDNA.to_ascii(rule.value) }
|
28
|
+
names = list.each.map { |rule| Addressable::IDNA.to_ascii(rule.value) }
|
29
29
|
names.select { |name| punycoded_name?(name) }
|
30
30
|
end
|
31
31
|
|
data/lib/twingly/url.rb
CHANGED
@@ -1,37 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "addressable/idna/pure"
|
1
4
|
require "addressable/uri"
|
2
5
|
require "public_suffix"
|
3
6
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
7
|
+
require "twingly/public_suffix_list"
|
8
|
+
require "twingly/url/null_url"
|
9
|
+
require "twingly/url/error"
|
10
|
+
require "twingly/version"
|
8
11
|
|
9
12
|
module Twingly
|
10
13
|
class URL
|
11
14
|
include Comparable
|
12
15
|
|
13
|
-
ACCEPTED_SCHEMES = /\Ahttps?\z/i
|
16
|
+
ACCEPTED_SCHEMES = /\Ahttps?\z/i.freeze
|
14
17
|
CUSTOM_PSL = PublicSuffixList.with_punycoded_names
|
15
|
-
ENDS_WITH_SLASH =
|
18
|
+
ENDS_WITH_SLASH = /\/+$/.freeze
|
19
|
+
STARTS_WITH_WWW = /\Awww\./i.freeze
|
16
20
|
ERRORS_TO_EXTEND = [
|
21
|
+
Addressable::IDNA::PunycodeBigOutput,
|
17
22
|
Addressable::URI::InvalidURIError,
|
18
23
|
PublicSuffix::DomainInvalid,
|
19
|
-
]
|
24
|
+
].freeze
|
25
|
+
CARRIAGE_RETURN = "\u000D"
|
26
|
+
LINE_FEED = "\u000A"
|
27
|
+
NBSP = "\u00A0"
|
28
|
+
SPACE = "\u0020"
|
29
|
+
WHITESPACE_CHARS = [
|
30
|
+
CARRIAGE_RETURN,
|
31
|
+
LINE_FEED,
|
32
|
+
NBSP,
|
33
|
+
SPACE,
|
34
|
+
].join.freeze
|
35
|
+
LEADING_AND_TRAILING_WHITESPACE =
|
36
|
+
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
20
37
|
|
21
38
|
private_constant :ACCEPTED_SCHEMES
|
22
39
|
private_constant :CUSTOM_PSL
|
40
|
+
private_constant :STARTS_WITH_WWW
|
23
41
|
private_constant :ENDS_WITH_SLASH
|
24
42
|
private_constant :ERRORS_TO_EXTEND
|
43
|
+
private_constant :NBSP
|
44
|
+
private_constant :SPACE
|
45
|
+
private_constant :WHITESPACE_CHARS
|
46
|
+
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
25
47
|
|
26
48
|
class << self
|
27
49
|
def parse(potential_url)
|
28
50
|
internal_parse(potential_url)
|
29
51
|
rescue Twingly::URL::Error, Twingly::URL::Error::ParseError => error
|
30
52
|
NullURL.new
|
53
|
+
rescue Exception => error
|
54
|
+
error.extend(Twingly::URL::Error)
|
55
|
+
raise
|
31
56
|
end
|
32
57
|
|
33
|
-
def internal_parse(
|
34
|
-
|
58
|
+
def internal_parse(input)
|
59
|
+
potential_url = clean_input(input)
|
60
|
+
addressable_uri = Addressable::URI.heuristic_parse(potential_url)
|
35
61
|
raise Twingly::URL::Error::ParseError if addressable_uri.nil?
|
36
62
|
|
37
63
|
scheme = addressable_uri.scheme
|
@@ -53,15 +79,16 @@ module Twingly
|
|
53
79
|
raise
|
54
80
|
end
|
55
81
|
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
potential_url = potential_url.scrub
|
82
|
+
def clean_input(input)
|
83
|
+
input = String(input)
|
84
|
+
input = input.scrub
|
85
|
+
input = strip_whitespace(input)
|
86
|
+
end
|
62
87
|
|
63
|
-
|
64
|
-
|
88
|
+
def strip_whitespace(input)
|
89
|
+
return input unless input.encoding == Encoding::UTF_8
|
90
|
+
|
91
|
+
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
65
92
|
end
|
66
93
|
|
67
94
|
# Workaround for the following bug in addressable:
|
@@ -78,7 +105,8 @@ module Twingly
|
|
78
105
|
|
79
106
|
private :new
|
80
107
|
private :internal_parse
|
81
|
-
private :
|
108
|
+
private :clean_input
|
109
|
+
private :strip_whitespace
|
82
110
|
private :try_addressable_normalize
|
83
111
|
end
|
84
112
|
|
@@ -184,6 +212,16 @@ module Twingly
|
|
184
212
|
self.to_s <=> other.to_s
|
185
213
|
end
|
186
214
|
|
215
|
+
def eql?(other)
|
216
|
+
return false unless other.is_a?(self.class)
|
217
|
+
|
218
|
+
self.hash == other.hash
|
219
|
+
end
|
220
|
+
|
221
|
+
def hash
|
222
|
+
self.to_s.hash
|
223
|
+
end
|
224
|
+
|
187
225
|
def to_s
|
188
226
|
addressable_uri.to_s
|
189
227
|
end
|
@@ -198,7 +236,7 @@ module Twingly
|
|
198
236
|
|
199
237
|
def normalize_blogspot(host, domain)
|
200
238
|
if domain.sld.downcase == "blogspot"
|
201
|
-
host.sub(
|
239
|
+
host.sub(STARTS_WITH_WWW, "").sub(/#{domain.tld}\z/i, "com")
|
202
240
|
else
|
203
241
|
host
|
204
242
|
end
|
data/lib/twingly/url/error.rb
CHANGED
data/lib/twingly/url/hasher.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'digest'
|
2
4
|
|
3
|
-
|
5
|
+
require "twingly/url"
|
4
6
|
|
5
7
|
module Twingly
|
6
8
|
class URL
|
@@ -28,10 +30,6 @@ module Twingly
|
|
28
30
|
def autopingdb_hash(url)
|
29
31
|
SHA256_DIGEST.digest(url).unpack("q")[0]
|
30
32
|
end
|
31
|
-
|
32
|
-
def pingloggerdb_hash(url)
|
33
|
-
SHA256_DIGEST.digest(url).unpack("Q")[0]
|
34
|
-
end
|
35
33
|
end
|
36
34
|
end
|
37
35
|
end
|
data/lib/twingly/url/null_url.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Twingly
|
4
4
|
class URL
|
@@ -24,6 +24,16 @@ module Twingly
|
|
24
24
|
self.to_s <=> other.to_s
|
25
25
|
end
|
26
26
|
|
27
|
+
def eql?(other)
|
28
|
+
return false unless other.is_a?(self.class)
|
29
|
+
|
30
|
+
self.hash == other.hash
|
31
|
+
end
|
32
|
+
|
33
|
+
def hash
|
34
|
+
self.to_s.hash
|
35
|
+
end
|
36
|
+
|
27
37
|
def to_s
|
28
38
|
""
|
29
39
|
end
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,57 +1,63 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 6.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 2.
|
19
|
+
version: '2.6'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 2.
|
26
|
+
version: '2.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: public_suffix
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 3.0.1
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '5.0'
|
34
37
|
type: :runtime
|
35
38
|
prerelease: false
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
37
40
|
requirements:
|
38
|
-
- -
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 3.0.1
|
44
|
+
- - "<"
|
39
45
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
46
|
+
version: '5.0'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
48
|
name: rake
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
44
50
|
requirements:
|
45
51
|
- - "~>"
|
46
52
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
53
|
+
version: '12'
|
48
54
|
type: :development
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
51
57
|
requirements:
|
52
58
|
- - "~>"
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
60
|
+
version: '12'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: rspec
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -99,7 +105,7 @@ homepage: http://github.com/twingly/twingly-url
|
|
99
105
|
licenses:
|
100
106
|
- MIT
|
101
107
|
metadata: {}
|
102
|
-
post_install_message:
|
108
|
+
post_install_message:
|
103
109
|
rdoc_options: []
|
104
110
|
require_paths:
|
105
111
|
- lib
|
@@ -107,16 +113,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
107
113
|
requirements:
|
108
114
|
- - "~>"
|
109
115
|
- !ruby/object:Gem::Version
|
110
|
-
version: '2.
|
116
|
+
version: '2.5'
|
111
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
118
|
requirements:
|
113
119
|
- - ">="
|
114
120
|
- !ruby/object:Gem::Version
|
115
121
|
version: '0'
|
116
122
|
requirements: []
|
117
|
-
|
118
|
-
|
119
|
-
signing_key:
|
123
|
+
rubygems_version: 3.1.2
|
124
|
+
signing_key:
|
120
125
|
specification_version: 4
|
121
126
|
summary: Ruby library for URL handling
|
122
127
|
test_files: []
|