url_grey 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/README.md +39 -0
- data/Rakefile +7 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/url_grey.rb +256 -0
- data/lib/url_grey/version.rb +3 -0
- data/url_grey.gemspec +27 -0
- metadata +124 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e95f9c5eda3bb27c6c30f0cdd08aa46969030b22
|
4
|
+
data.tar.gz: cf5c2c804e16c7463274f117f1ef5d2c2222bbc7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c658e0642443497b7c33cb90f9c359deedf4b30dabf6d63fd252577ca77ea08b0eede2fa454fa5898cfb752c93de7cece4f12d93e06db2346837e44771faa7bf
|
7
|
+
data.tar.gz: d0e08cb022a5d7b23cfa54fde82bd99cdd6a74cb361d1d140166f211f5601ac41dcf35b8e795be61c3b0a1c8d98baac53eb903ef67b2e791a67d22c503841512
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# URLGrey
|
2
|
+
|
3
|
+
This attempts to copy chomium's algorithm for making sense of things
|
4
|
+
typed into the url bar. You can download the [chromium source] to play
|
5
|
+
along, but note that it is currently 2.1 GB.
|
6
|
+
|
7
|
+
The ported code is very similar to how it is written in the original
|
8
|
+
C++. It is a great example of the imperative style of programming by
|
9
|
+
state mutation. I'm not gonna lie, it's pretty gross. But hey, it passes
|
10
|
+
the tests.
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
Some examples from the tests:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
URLGrey.new("google.com").fixed
|
18
|
+
#=> "http://google.com/"
|
19
|
+
|
20
|
+
URLGrey.new("www.google.com#foo").fixed
|
21
|
+
#=> "http://www.google.com/#foo"
|
22
|
+
|
23
|
+
URLGrey.new("\u6C34.com").fixed
|
24
|
+
#=> "http://xn--1rw.com/"
|
25
|
+
|
26
|
+
URLGrey.new("http://foo.com/s?q=\uC5C5").fixed
|
27
|
+
#=> "http://foo.com/s?q=%EC%97%85"
|
28
|
+
|
29
|
+
URLGrey.new("http;/www.google.com/").fixed
|
30
|
+
#=> "http://www.google.com/"
|
31
|
+
|
32
|
+
URLGrey.new(" foo.com/asdf bar").fixed
|
33
|
+
#=> "http://foo.com/asdf%20%20bar"
|
34
|
+
|
35
|
+
URLGrey.new("[::]:80/path").fixed
|
36
|
+
#=> "http://[::]/path"
|
37
|
+
```
|
38
|
+
|
39
|
+
[chromium source]: https://chromium.googlesource.com/chromium/chromium/
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "url_grey"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/lib/url_grey.rb
ADDED
@@ -0,0 +1,256 @@
|
|
1
|
+
require "simpleidn"
|
2
|
+
|
3
|
+
require "url_grey/version"
|
4
|
+
|
5
|
+
class URLGrey
|
6
|
+
AUTHORITY_TERMINATORS = "/\\?#"
|
7
|
+
ABOUT_BLANK_URL = "about:blank"
|
8
|
+
PATH_PASS_CHARS = "!$&'()*+,/:;=@[]"
|
9
|
+
PATH_UNESCAPE_CHARS = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"
|
10
|
+
HOST_ESCAPE_CHARS = " !\"\#$&'()*,<=>@`{|}"
|
11
|
+
HOST_NORMAL_CHARS = "+-.0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz"
|
12
|
+
HOST_CHROME_DEFAULT = "version"
|
13
|
+
QUERY_NORMAL_CHARS = "!$%&()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
|
14
|
+
DEFAULT_PORTS = {
|
15
|
+
ftp: 21,
|
16
|
+
gopher: 70,
|
17
|
+
http: 80,
|
18
|
+
https: 443,
|
19
|
+
ws: 80,
|
20
|
+
wss: 443,
|
21
|
+
}
|
22
|
+
STANDARD_SCHEMES = ['http', 'https', 'file', 'ftp', 'gopher', 'ws', 'wss', 'filesystem']
|
23
|
+
|
24
|
+
attr_accessor :original, :coerced
|
25
|
+
attr_accessor :scheme, :username, :password, :host, :port, :path, :query, :ref
|
26
|
+
attr_accessor :slashes
|
27
|
+
|
28
|
+
def initialize(_original)
|
29
|
+
self.original = _original.sub(%r{^\s*}, '')
|
30
|
+
|
31
|
+
parse!
|
32
|
+
end
|
33
|
+
|
34
|
+
def parts
|
35
|
+
{
|
36
|
+
scheme: self.scheme,
|
37
|
+
username: self.username,
|
38
|
+
password: self.password,
|
39
|
+
host: self.host,
|
40
|
+
port: self.port,
|
41
|
+
path: self.path,
|
42
|
+
query: self.query,
|
43
|
+
ref: self.ref
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def fixed
|
48
|
+
return ABOUT_BLANK_URL if self.original == ABOUT_BLANK_URL
|
49
|
+
|
50
|
+
"#{fixed_scheme}#{fixed_credentials}#{fixed_host}#{fixed_port}#{fixed_path}#{fixed_query}#{fixed_ref}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def fixed_credentials
|
54
|
+
return "" unless (!self.username.empty? || !self.password.empty?)
|
55
|
+
return "#{self.username}@" if self.password.empty?
|
56
|
+
"#{self.username}:#{self.password}@"
|
57
|
+
end
|
58
|
+
|
59
|
+
# from components/url_formatter/url_fixer.cc FixupHost
|
60
|
+
def fixed_host
|
61
|
+
fixed = self.host.gsub(%r{\s}, '').downcase
|
62
|
+
unless fixed.match(%r{^\.*$})
|
63
|
+
fixed = fixed.sub(%r{^\.*}, '')
|
64
|
+
fixed = fixed.sub(%r{(?<=\.)\.*$}, '')
|
65
|
+
end
|
66
|
+
if fixed.empty? && ["about", "chrome"].include?(self.scheme)
|
67
|
+
fixed = HOST_CHROME_DEFAULT
|
68
|
+
end
|
69
|
+
|
70
|
+
if fixed.match(%r{^[[:ascii:]]*$})
|
71
|
+
fixed = fixed.chars.map do |char|
|
72
|
+
if HOST_NORMAL_CHARS.include?(char)
|
73
|
+
char
|
74
|
+
else
|
75
|
+
"%#{char.codepoints.first.to_s(16).upcase}"
|
76
|
+
end
|
77
|
+
end.join("")
|
78
|
+
else
|
79
|
+
fixed = SimpleIDN.to_ascii(fixed)
|
80
|
+
end
|
81
|
+
fixed
|
82
|
+
end
|
83
|
+
|
84
|
+
# from url/url_canon_path.cc CanonicalizePath
|
85
|
+
def fixed_path
|
86
|
+
fixed = self.path
|
87
|
+
if (fixed[0] != '/') && ((STANDARD_SCHEMES + ["about", "chrome"]).include?(self.scheme))
|
88
|
+
fixed = '/' + fixed
|
89
|
+
end
|
90
|
+
|
91
|
+
fixed.chars.map do |char|
|
92
|
+
if PATH_PASS_CHARS.include?(char)
|
93
|
+
char
|
94
|
+
elsif PATH_UNESCAPE_CHARS.include?(char)
|
95
|
+
char
|
96
|
+
elsif char == "."
|
97
|
+
# TODO: if the dot is preceded by a slash, do directory stuff:
|
98
|
+
# google.com/abc/.././def -> google.com/def
|
99
|
+
char
|
100
|
+
else
|
101
|
+
"%#{char.codepoints.first.to_s(16).upcase}"
|
102
|
+
end
|
103
|
+
end.join("")
|
104
|
+
end
|
105
|
+
|
106
|
+
def fixed_port
|
107
|
+
return "" if (self.port.empty? || self.port.to_i == DEFAULT_PORTS[self.scheme.to_sym])
|
108
|
+
":#{self.port}"
|
109
|
+
end
|
110
|
+
|
111
|
+
def fixed_query
|
112
|
+
fixed = self.query
|
113
|
+
return "" if fixed.nil?
|
114
|
+
fixed = fixed.bytes.map do |byte|
|
115
|
+
if QUERY_NORMAL_CHARS.unpack("U*").include?(byte)
|
116
|
+
[byte].pack("U")
|
117
|
+
else
|
118
|
+
"%#{byte.to_s(16).upcase}"
|
119
|
+
end
|
120
|
+
end.join('')
|
121
|
+
"?#{fixed}"
|
122
|
+
end
|
123
|
+
|
124
|
+
def fixed_ref
|
125
|
+
return "" if self.ref.nil?
|
126
|
+
"\##{self.ref}"
|
127
|
+
end
|
128
|
+
|
129
|
+
def fixed_scheme
|
130
|
+
fixed = self.scheme
|
131
|
+
if fixed == "about"
|
132
|
+
fixed = "chrome"
|
133
|
+
end
|
134
|
+
|
135
|
+
if (STANDARD_SCHEMES + ["about", "chrome"]).include?(fixed)
|
136
|
+
"#{fixed}://"
|
137
|
+
else
|
138
|
+
"#{fixed}:#{self.slashes}"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
|
144
|
+
def parse!
|
145
|
+
parse_scheme!
|
146
|
+
after_scheme = self.coerced.match(%r{:(.*)})[1]
|
147
|
+
self.slashes, after_slashes = after_scheme.match(%r{^([\\\/]*)(.*)$})[1..2]
|
148
|
+
|
149
|
+
# authority terminators: '/', '\', '?', '#'
|
150
|
+
if (after_slashes.chars & ['/', '\\', '?', '#']).any?
|
151
|
+
authority, full_path = after_slashes.match(%r{^(.*?)([\\\/?#].*)$})[1..2]
|
152
|
+
else
|
153
|
+
authority = after_slashes
|
154
|
+
full_path = ""
|
155
|
+
end
|
156
|
+
|
157
|
+
if authority.include?("@")
|
158
|
+
user_info, server_info = authority.match(%r{^(.*)@(.*)$})[1..2]
|
159
|
+
else
|
160
|
+
user_info = ""
|
161
|
+
server_info = authority
|
162
|
+
end
|
163
|
+
|
164
|
+
# parse user_info
|
165
|
+
if user_info.empty?
|
166
|
+
self.username = ""
|
167
|
+
self.password = ""
|
168
|
+
else
|
169
|
+
if user_info.include?(":")
|
170
|
+
self.username, self.password = user_info.match(%r{^(.*?):(.*)$})[1..2]
|
171
|
+
else
|
172
|
+
self.username = user_info
|
173
|
+
self.password = ""
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# parse server_info
|
178
|
+
if !server_info.include?(":")
|
179
|
+
self.host = server_info
|
180
|
+
self.port = ""
|
181
|
+
elsif server_info.include?("]")
|
182
|
+
if server_info.reverse.index(":") < server_info.reverse.index("]")
|
183
|
+
self.host, self.port = server_info.match(%r{^(.*):(.*)$})[1..2]
|
184
|
+
else
|
185
|
+
self.host = server_info
|
186
|
+
self.port = ""
|
187
|
+
end
|
188
|
+
elsif server_info.chars.first == "["
|
189
|
+
self.host = server_info
|
190
|
+
self.port = ""
|
191
|
+
else
|
192
|
+
self.host, self.port = server_info.match(%r{^(.*):(.*)$})[1..2]
|
193
|
+
end
|
194
|
+
|
195
|
+
# parse full_path
|
196
|
+
if full_path.include?("#")
|
197
|
+
before_ref, self.ref = full_path.match(%r{^(.*?)#(.*)$})[1..2]
|
198
|
+
else
|
199
|
+
before_ref = full_path
|
200
|
+
self.ref = nil
|
201
|
+
end
|
202
|
+
|
203
|
+
if before_ref.include?("?")
|
204
|
+
self.path, self.query = before_ref.match(%r{^(.*?)\?(.*)$})[1..2]
|
205
|
+
else
|
206
|
+
self.path = before_ref
|
207
|
+
self.query = nil
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def parse_scheme!
|
212
|
+
self.coerced = self.original
|
213
|
+
|
214
|
+
if !find_scheme(self.original) && (self.original[0]!= ";")
|
215
|
+
if find_scheme(self.original.sub(";", ":"))
|
216
|
+
self.coerced = self.original.sub(";", ":")
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
if !find_scheme(self.coerced)
|
221
|
+
if self.coerced.match(%r{^ftp\.}i)
|
222
|
+
self.coerced = "ftp://" + self.coerced
|
223
|
+
else
|
224
|
+
self.coerced = "http://" + self.coerced
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
self.scheme = find_scheme(self.coerced) || ""
|
229
|
+
end
|
230
|
+
|
231
|
+
def find_scheme(text)
|
232
|
+
# extract scheme
|
233
|
+
return false unless match = text.match(%r{^(.*?):})
|
234
|
+
|
235
|
+
component = match[1].downcase
|
236
|
+
|
237
|
+
return "" if component.empty?
|
238
|
+
|
239
|
+
# first character must be a letter
|
240
|
+
return false unless component.match(%r{^[a-z]})
|
241
|
+
|
242
|
+
# reject anything with invalid characters
|
243
|
+
return false unless component.match(%r{^[+\-0-9a-z]*$})
|
244
|
+
|
245
|
+
# fix up segmentation for "www:123/"
|
246
|
+
return false if has_port(text)
|
247
|
+
|
248
|
+
component
|
249
|
+
end
|
250
|
+
|
251
|
+
def has_port(text)
|
252
|
+
return false unless text.include?(":")
|
253
|
+
match = text.match(%r{:(.*?)[\\/\?#]}) || text.match(%r{:(.*)$})
|
254
|
+
match[1].match(%r{^\d+$})
|
255
|
+
end
|
256
|
+
end
|
data/url_grey.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'url_grey/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "url_grey"
|
8
|
+
spec.version = URLGrey::VERSION
|
9
|
+
spec.authors = ["Stacey Touset"]
|
10
|
+
spec.email = ["capicue@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "Coerce and normalize user inputted URLs"
|
13
|
+
spec.homepage = "https://github.com/capicue/url_grey"
|
14
|
+
spec.licenses = ["MIT"]
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = "exe"
|
18
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "simpleidn", "~> 0.0.7"
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
25
|
+
spec.add_development_dependency "minitest", "~> 5.8"
|
26
|
+
spec.add_development_dependency "pry", "~> 0.10"
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: url_grey
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Stacey Touset
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-02-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: simpleidn
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.7
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.7
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.10'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.10'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5.8'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5.8'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.10'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.10'
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- capicue@gmail.com
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- ".gitignore"
|
91
|
+
- ".travis.yml"
|
92
|
+
- Gemfile
|
93
|
+
- README.md
|
94
|
+
- Rakefile
|
95
|
+
- bin/console
|
96
|
+
- bin/setup
|
97
|
+
- lib/url_grey.rb
|
98
|
+
- lib/url_grey/version.rb
|
99
|
+
- url_grey.gemspec
|
100
|
+
homepage: https://github.com/capicue/url_grey
|
101
|
+
licenses:
|
102
|
+
- MIT
|
103
|
+
metadata: {}
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '0'
|
113
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
requirements: []
|
119
|
+
rubyforge_project:
|
120
|
+
rubygems_version: 2.5.1
|
121
|
+
signing_key:
|
122
|
+
specification_version: 4
|
123
|
+
summary: Coerce and normalize user inputted URLs
|
124
|
+
test_files: []
|