twingly-url 5.1.0 → 6.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +8 -8
- data/lib/twingly/public_suffix_list.rb +4 -4
- data/lib/twingly/url.rb +57 -19
- data/lib/twingly/url/error.rb +2 -0
- data/lib/twingly/url/hasher.rb +3 -5
- data/lib/twingly/url/null_url.rb +11 -1
- data/lib/twingly/url/utilities.rb +3 -1
- data/lib/twingly/version.rb +3 -1
- metadata +23 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e39c685acc5000907b03195ce1c08a1a3628263aa64860257f88cde5c44be042
|
4
|
+
data.tar.gz: 6f4573e38a72d7943e872fb4a7714619e08f624eedaa73f6a51e432d3ddecfbe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab988985a16a53990d91be9c81c440a8baf878181ff2149cecc72eb1d52b753efa1edef701d1578d87b1bc26a0ea5700bd56e4735a5393f446d37fb4c393436e
|
7
|
+
data.tar.gz: 64947df33fd987b6e446037fdf841a885ac9d8292cea92ca9dfb71b3f518d107d7dcead1344b49fd53a81dcec62abc2355ea76c6ed28a625a97ab2408267528f
|
data/README.md
CHANGED
@@ -11,7 +11,6 @@ Twingly URL tools.
|
|
11
11
|
* `Twingly::URL::Hasher.blogstream_hash(url)` - MD5 hexdigest
|
12
12
|
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
|
13
13
|
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
|
14
|
-
* `Twingly::URL::Hasher.pingloggerdb_hash(url)` - SHA256 64-bit unsigned, native endian digest
|
15
14
|
* `twingly/url/utilities` - Utilities to work with URLs
|
16
15
|
* `Twingly::URL::Utilities.extract_valid_urls` - Returns Array of valid `Twingly::URL`
|
17
16
|
|
@@ -175,13 +174,13 @@ Run tests with
|
|
175
174
|
|
176
175
|
### Profiling
|
177
176
|
|
178
|
-
|
177
|
+
There's some profiling tasks available through Rake
|
179
178
|
|
180
179
|
cd profile/
|
181
|
-
bundle
|
182
|
-
bundle exec rake
|
180
|
+
bundle # Install dependencies
|
181
|
+
bundle exec rake -T # Show available tasks
|
183
182
|
|
184
|
-
Note that this isn't a benchmark, we're using [ruby-prof] which will slow things down.
|
183
|
+
Note that this isn't a benchmark, we're using [ruby-prof] and [memory_profiler] which will slow things down.
|
185
184
|
|
186
185
|
## Release workflow
|
187
186
|
|
@@ -191,16 +190,17 @@ Note that this isn't a benchmark, we're using [ruby-prof] which will slow things
|
|
191
190
|
|
192
191
|
* Bump the version in `lib/twingly/version.rb` in a commit, no need to push (the release task does that).
|
193
192
|
|
193
|
+
* Ensure you are signed in to RubyGems.org as [twingly][twingly-rubygems] with `gem signin`.
|
194
|
+
|
194
195
|
* Build and [publish](http://guides.rubygems.org/publishing/) the gem. This will create the proper tag in git, push the commit and tag and upload to RubyGems.
|
195
196
|
|
196
197
|
bundle exec rake release
|
197
198
|
|
198
|
-
|
199
|
-
|
200
|
-
* Update the changelog with [GitHub Changelog Generator](https://github.com/skywinder/github-changelog-generator/) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`, commit and push manually.
|
199
|
+
* Update the changelog with [GitHub Changelog Generator](https://github.com/skywinder/github-changelog-generator/) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`. You need to commit and push manually.
|
201
200
|
|
202
201
|
github_changelog_generator
|
203
202
|
|
204
203
|
[twingly-rubygems]: https://rubygems.org/profiles/twingly
|
205
204
|
[ruby-prof]: http://ruby-prof.rubyforge.org/
|
205
|
+
[memory_profiler]: https://github.com/SamSaffron/memory_profiler
|
206
206
|
[examples]: examples/url.rb
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "addressable/idna"
|
2
4
|
require "public_suffix"
|
3
5
|
|
@@ -15,17 +17,15 @@ module Twingly
|
|
15
17
|
|
16
18
|
punycoded_names(list).each do |punycoded_name|
|
17
19
|
new_rule = PublicSuffix::Rule.factory(punycoded_name)
|
18
|
-
list.add(new_rule
|
20
|
+
list.add(new_rule)
|
19
21
|
end
|
20
22
|
|
21
|
-
list.reindex!
|
22
|
-
|
23
23
|
list
|
24
24
|
end
|
25
25
|
|
26
26
|
private_class_method \
|
27
27
|
def self.punycoded_names(list)
|
28
|
-
names = list.map { |rule| Addressable::IDNA.to_ascii(rule.value) }
|
28
|
+
names = list.each.map { |rule| Addressable::IDNA.to_ascii(rule.value) }
|
29
29
|
names.select { |name| punycoded_name?(name) }
|
30
30
|
end
|
31
31
|
|
data/lib/twingly/url.rb
CHANGED
@@ -1,37 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "addressable/idna/pure"
|
1
4
|
require "addressable/uri"
|
2
5
|
require "public_suffix"
|
3
6
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
7
|
+
require "twingly/public_suffix_list"
|
8
|
+
require "twingly/url/null_url"
|
9
|
+
require "twingly/url/error"
|
10
|
+
require "twingly/version"
|
8
11
|
|
9
12
|
module Twingly
|
10
13
|
class URL
|
11
14
|
include Comparable
|
12
15
|
|
13
|
-
ACCEPTED_SCHEMES = /\Ahttps?\z/i
|
16
|
+
ACCEPTED_SCHEMES = /\Ahttps?\z/i.freeze
|
14
17
|
CUSTOM_PSL = PublicSuffixList.with_punycoded_names
|
15
|
-
ENDS_WITH_SLASH =
|
18
|
+
ENDS_WITH_SLASH = /\/+$/.freeze
|
19
|
+
STARTS_WITH_WWW = /\Awww\./i.freeze
|
16
20
|
ERRORS_TO_EXTEND = [
|
21
|
+
Addressable::IDNA::PunycodeBigOutput,
|
17
22
|
Addressable::URI::InvalidURIError,
|
18
23
|
PublicSuffix::DomainInvalid,
|
19
|
-
]
|
24
|
+
].freeze
|
25
|
+
CARRIAGE_RETURN = "\u000D"
|
26
|
+
LINE_FEED = "\u000A"
|
27
|
+
NBSP = "\u00A0"
|
28
|
+
SPACE = "\u0020"
|
29
|
+
WHITESPACE_CHARS = [
|
30
|
+
CARRIAGE_RETURN,
|
31
|
+
LINE_FEED,
|
32
|
+
NBSP,
|
33
|
+
SPACE,
|
34
|
+
].join.freeze
|
35
|
+
LEADING_AND_TRAILING_WHITESPACE =
|
36
|
+
/\A[#{WHITESPACE_CHARS}]+|[#{WHITESPACE_CHARS}]+\z/.freeze
|
20
37
|
|
21
38
|
private_constant :ACCEPTED_SCHEMES
|
22
39
|
private_constant :CUSTOM_PSL
|
40
|
+
private_constant :STARTS_WITH_WWW
|
23
41
|
private_constant :ENDS_WITH_SLASH
|
24
42
|
private_constant :ERRORS_TO_EXTEND
|
43
|
+
private_constant :NBSP
|
44
|
+
private_constant :SPACE
|
45
|
+
private_constant :WHITESPACE_CHARS
|
46
|
+
private_constant :LEADING_AND_TRAILING_WHITESPACE
|
25
47
|
|
26
48
|
class << self
|
27
49
|
def parse(potential_url)
|
28
50
|
internal_parse(potential_url)
|
29
51
|
rescue Twingly::URL::Error, Twingly::URL::Error::ParseError => error
|
30
52
|
NullURL.new
|
53
|
+
rescue Exception => error
|
54
|
+
error.extend(Twingly::URL::Error)
|
55
|
+
raise
|
31
56
|
end
|
32
57
|
|
33
|
-
def internal_parse(
|
34
|
-
|
58
|
+
def internal_parse(input)
|
59
|
+
potential_url = clean_input(input)
|
60
|
+
addressable_uri = Addressable::URI.heuristic_parse(potential_url)
|
35
61
|
raise Twingly::URL::Error::ParseError if addressable_uri.nil?
|
36
62
|
|
37
63
|
scheme = addressable_uri.scheme
|
@@ -53,15 +79,16 @@ module Twingly
|
|
53
79
|
raise
|
54
80
|
end
|
55
81
|
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
potential_url = potential_url.scrub
|
82
|
+
def clean_input(input)
|
83
|
+
input = String(input)
|
84
|
+
input = input.scrub
|
85
|
+
input = strip_whitespace(input)
|
86
|
+
end
|
62
87
|
|
63
|
-
|
64
|
-
|
88
|
+
def strip_whitespace(input)
|
89
|
+
return input unless input.encoding == Encoding::UTF_8
|
90
|
+
|
91
|
+
input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
|
65
92
|
end
|
66
93
|
|
67
94
|
# Workaround for the following bug in addressable:
|
@@ -78,7 +105,8 @@ module Twingly
|
|
78
105
|
|
79
106
|
private :new
|
80
107
|
private :internal_parse
|
81
|
-
private :
|
108
|
+
private :clean_input
|
109
|
+
private :strip_whitespace
|
82
110
|
private :try_addressable_normalize
|
83
111
|
end
|
84
112
|
|
@@ -184,6 +212,16 @@ module Twingly
|
|
184
212
|
self.to_s <=> other.to_s
|
185
213
|
end
|
186
214
|
|
215
|
+
def eql?(other)
|
216
|
+
return false unless other.is_a?(self.class)
|
217
|
+
|
218
|
+
self.hash == other.hash
|
219
|
+
end
|
220
|
+
|
221
|
+
def hash
|
222
|
+
self.to_s.hash
|
223
|
+
end
|
224
|
+
|
187
225
|
def to_s
|
188
226
|
addressable_uri.to_s
|
189
227
|
end
|
@@ -198,7 +236,7 @@ module Twingly
|
|
198
236
|
|
199
237
|
def normalize_blogspot(host, domain)
|
200
238
|
if domain.sld.downcase == "blogspot"
|
201
|
-
host.sub(
|
239
|
+
host.sub(STARTS_WITH_WWW, "").sub(/#{domain.tld}\z/i, "com")
|
202
240
|
else
|
203
241
|
host
|
204
242
|
end
|
data/lib/twingly/url/error.rb
CHANGED
data/lib/twingly/url/hasher.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'digest'
|
2
4
|
|
3
|
-
|
5
|
+
require "twingly/url"
|
4
6
|
|
5
7
|
module Twingly
|
6
8
|
class URL
|
@@ -28,10 +30,6 @@ module Twingly
|
|
28
30
|
def autopingdb_hash(url)
|
29
31
|
SHA256_DIGEST.digest(url).unpack("q")[0]
|
30
32
|
end
|
31
|
-
|
32
|
-
def pingloggerdb_hash(url)
|
33
|
-
SHA256_DIGEST.digest(url).unpack("Q")[0]
|
34
|
-
end
|
35
33
|
end
|
36
34
|
end
|
37
35
|
end
|
data/lib/twingly/url/null_url.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Twingly
|
4
4
|
class URL
|
@@ -24,6 +24,16 @@ module Twingly
|
|
24
24
|
self.to_s <=> other.to_s
|
25
25
|
end
|
26
26
|
|
27
|
+
def eql?(other)
|
28
|
+
return false unless other.is_a?(self.class)
|
29
|
+
|
30
|
+
self.hash == other.hash
|
31
|
+
end
|
32
|
+
|
33
|
+
def hash
|
34
|
+
self.to_s.hash
|
35
|
+
end
|
36
|
+
|
27
37
|
def to_s
|
28
38
|
""
|
29
39
|
end
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,57 +1,63 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 6.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 2.
|
19
|
+
version: '2.6'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 2.
|
26
|
+
version: '2.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: public_suffix
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 3.0.1
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '5.0'
|
34
37
|
type: :runtime
|
35
38
|
prerelease: false
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
37
40
|
requirements:
|
38
|
-
- -
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 3.0.1
|
44
|
+
- - "<"
|
39
45
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
46
|
+
version: '5.0'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
48
|
name: rake
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
44
50
|
requirements:
|
45
51
|
- - "~>"
|
46
52
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
53
|
+
version: '12'
|
48
54
|
type: :development
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
51
57
|
requirements:
|
52
58
|
- - "~>"
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
60
|
+
version: '12'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: rspec
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -99,7 +105,7 @@ homepage: http://github.com/twingly/twingly-url
|
|
99
105
|
licenses:
|
100
106
|
- MIT
|
101
107
|
metadata: {}
|
102
|
-
post_install_message:
|
108
|
+
post_install_message:
|
103
109
|
rdoc_options: []
|
104
110
|
require_paths:
|
105
111
|
- lib
|
@@ -107,16 +113,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
107
113
|
requirements:
|
108
114
|
- - "~>"
|
109
115
|
- !ruby/object:Gem::Version
|
110
|
-
version: '2.
|
116
|
+
version: '2.5'
|
111
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
118
|
requirements:
|
113
119
|
- - ">="
|
114
120
|
- !ruby/object:Gem::Version
|
115
121
|
version: '0'
|
116
122
|
requirements: []
|
117
|
-
|
118
|
-
|
119
|
-
signing_key:
|
123
|
+
rubygems_version: 3.1.2
|
124
|
+
signing_key:
|
120
125
|
specification_version: 4
|
121
126
|
summary: Ruby library for URL handling
|
122
127
|
test_files: []
|