url_grey 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e95f9c5eda3bb27c6c30f0cdd08aa46969030b22
4
+ data.tar.gz: cf5c2c804e16c7463274f117f1ef5d2c2222bbc7
5
+ SHA512:
6
+ metadata.gz: c658e0642443497b7c33cb90f9c359deedf4b30dabf6d63fd252577ca77ea08b0eede2fa454fa5898cfb752c93de7cece4f12d93e06db2346837e44771faa7bf
7
+ data.tar.gz: d0e08cb022a5d7b23cfa54fde82bd99cdd6a74cb361d1d140166f211f5601ac41dcf35b8e795be61c3b0a1c8d98baac53eb903ef67b2e791a67d22c503841512
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in url_grey.gemspec
4
+ gemspec
@@ -0,0 +1,39 @@
1
+ # URLGrey
2
+
3
+ This attempts to copy chomium's algorithm for making sense of things
4
+ typed into the url bar. You can download the [chromium source] to play
5
+ along, but note that it is currently 2.1 GB.
6
+
7
+ The ported code is very similar to how it is written in the original
8
+ C++. It is a great example of the imperative style of programming by
9
+ state mutation. I'm not gonna lie, it's pretty gross. But hey, it passes
10
+ the tests.
11
+
12
+ ## Usage
13
+
14
+ Some examples from the tests:
15
+
16
+ ```ruby
17
+ URLGrey.new("google.com").fixed
18
+ #=> "http://google.com/"
19
+
20
+ URLGrey.new("www.google.com#foo").fixed
21
+ #=> "http://www.google.com/#foo"
22
+
23
+ URLGrey.new("\u6C34.com").fixed
24
+ #=> "http://xn--1rw.com/"
25
+
26
+ URLGrey.new("http://foo.com/s?q=\uC5C5").fixed
27
+ #=> "http://foo.com/s?q=%EC%97%85"
28
+
29
+ URLGrey.new("http;/www.google.com/").fixed
30
+ #=> "http://www.google.com/"
31
+
32
+ URLGrey.new(" foo.com/asdf bar").fixed
33
+ #=> "http://foo.com/asdf%20%20bar"
34
+
35
+ URLGrey.new("[::]:80/path").fixed
36
+ #=> "http://[::]/path"
37
+ ```
38
+
39
+ [chromium source]: https://chromium.googlesource.com/chromium/chromium/
@@ -0,0 +1,7 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'test'
6
+ t.pattern = "test/*_test.rb"
7
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "url_grey"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,256 @@
1
+ require "simpleidn"
2
+
3
+ require "url_grey/version"
4
+
5
+ class URLGrey
6
+ AUTHORITY_TERMINATORS = "/\\?#"
7
+ ABOUT_BLANK_URL = "about:blank"
8
+ PATH_PASS_CHARS = "!$&'()*+,/:;=@[]"
9
+ PATH_UNESCAPE_CHARS = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"
10
+ HOST_ESCAPE_CHARS = " !\"\#$&'()*,<=>@`{|}"
11
+ HOST_NORMAL_CHARS = "+-.0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz"
12
+ HOST_CHROME_DEFAULT = "version"
13
+ QUERY_NORMAL_CHARS = "!$%&()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
14
+ DEFAULT_PORTS = {
15
+ ftp: 21,
16
+ gopher: 70,
17
+ http: 80,
18
+ https: 443,
19
+ ws: 80,
20
+ wss: 443,
21
+ }
22
+ STANDARD_SCHEMES = ['http', 'https', 'file', 'ftp', 'gopher', 'ws', 'wss', 'filesystem']
23
+
24
+ attr_accessor :original, :coerced
25
+ attr_accessor :scheme, :username, :password, :host, :port, :path, :query, :ref
26
+ attr_accessor :slashes
27
+
28
+ def initialize(_original)
29
+ self.original = _original.sub(%r{^\s*}, '')
30
+
31
+ parse!
32
+ end
33
+
34
+ def parts
35
+ {
36
+ scheme: self.scheme,
37
+ username: self.username,
38
+ password: self.password,
39
+ host: self.host,
40
+ port: self.port,
41
+ path: self.path,
42
+ query: self.query,
43
+ ref: self.ref
44
+ }
45
+ end
46
+
47
+ def fixed
48
+ return ABOUT_BLANK_URL if self.original == ABOUT_BLANK_URL
49
+
50
+ "#{fixed_scheme}#{fixed_credentials}#{fixed_host}#{fixed_port}#{fixed_path}#{fixed_query}#{fixed_ref}"
51
+ end
52
+
53
+ def fixed_credentials
54
+ return "" unless (!self.username.empty? || !self.password.empty?)
55
+ return "#{self.username}@" if self.password.empty?
56
+ "#{self.username}:#{self.password}@"
57
+ end
58
+
59
+ # from components/url_formatter/url_fixer.cc FixupHost
60
+ def fixed_host
61
+ fixed = self.host.gsub(%r{\s}, '').downcase
62
+ unless fixed.match(%r{^\.*$})
63
+ fixed = fixed.sub(%r{^\.*}, '')
64
+ fixed = fixed.sub(%r{(?<=\.)\.*$}, '')
65
+ end
66
+ if fixed.empty? && ["about", "chrome"].include?(self.scheme)
67
+ fixed = HOST_CHROME_DEFAULT
68
+ end
69
+
70
+ if fixed.match(%r{^[[:ascii:]]*$})
71
+ fixed = fixed.chars.map do |char|
72
+ if HOST_NORMAL_CHARS.include?(char)
73
+ char
74
+ else
75
+ "%#{char.codepoints.first.to_s(16).upcase}"
76
+ end
77
+ end.join("")
78
+ else
79
+ fixed = SimpleIDN.to_ascii(fixed)
80
+ end
81
+ fixed
82
+ end
83
+
84
+ # from url/url_canon_path.cc CanonicalizePath
85
+ def fixed_path
86
+ fixed = self.path
87
+ if (fixed[0] != '/') && ((STANDARD_SCHEMES + ["about", "chrome"]).include?(self.scheme))
88
+ fixed = '/' + fixed
89
+ end
90
+
91
+ fixed.chars.map do |char|
92
+ if PATH_PASS_CHARS.include?(char)
93
+ char
94
+ elsif PATH_UNESCAPE_CHARS.include?(char)
95
+ char
96
+ elsif char == "."
97
+ # TODO: if the dot is preceded by a slash, do directory stuff:
98
+ # google.com/abc/.././def -> google.com/def
99
+ char
100
+ else
101
+ "%#{char.codepoints.first.to_s(16).upcase}"
102
+ end
103
+ end.join("")
104
+ end
105
+
106
+ def fixed_port
107
+ return "" if (self.port.empty? || self.port.to_i == DEFAULT_PORTS[self.scheme.to_sym])
108
+ ":#{self.port}"
109
+ end
110
+
111
+ def fixed_query
112
+ fixed = self.query
113
+ return "" if fixed.nil?
114
+ fixed = fixed.bytes.map do |byte|
115
+ if QUERY_NORMAL_CHARS.unpack("U*").include?(byte)
116
+ [byte].pack("U")
117
+ else
118
+ "%#{byte.to_s(16).upcase}"
119
+ end
120
+ end.join('')
121
+ "?#{fixed}"
122
+ end
123
+
124
+ def fixed_ref
125
+ return "" if self.ref.nil?
126
+ "\##{self.ref}"
127
+ end
128
+
129
+ def fixed_scheme
130
+ fixed = self.scheme
131
+ if fixed == "about"
132
+ fixed = "chrome"
133
+ end
134
+
135
+ if (STANDARD_SCHEMES + ["about", "chrome"]).include?(fixed)
136
+ "#{fixed}://"
137
+ else
138
+ "#{fixed}:#{self.slashes}"
139
+ end
140
+ end
141
+
142
+ private
143
+
144
+ def parse!
145
+ parse_scheme!
146
+ after_scheme = self.coerced.match(%r{:(.*)})[1]
147
+ self.slashes, after_slashes = after_scheme.match(%r{^([\\\/]*)(.*)$})[1..2]
148
+
149
+ # authority terminators: '/', '\', '?', '#'
150
+ if (after_slashes.chars & ['/', '\\', '?', '#']).any?
151
+ authority, full_path = after_slashes.match(%r{^(.*?)([\\\/?#].*)$})[1..2]
152
+ else
153
+ authority = after_slashes
154
+ full_path = ""
155
+ end
156
+
157
+ if authority.include?("@")
158
+ user_info, server_info = authority.match(%r{^(.*)@(.*)$})[1..2]
159
+ else
160
+ user_info = ""
161
+ server_info = authority
162
+ end
163
+
164
+ # parse user_info
165
+ if user_info.empty?
166
+ self.username = ""
167
+ self.password = ""
168
+ else
169
+ if user_info.include?(":")
170
+ self.username, self.password = user_info.match(%r{^(.*?):(.*)$})[1..2]
171
+ else
172
+ self.username = user_info
173
+ self.password = ""
174
+ end
175
+ end
176
+
177
+ # parse server_info
178
+ if !server_info.include?(":")
179
+ self.host = server_info
180
+ self.port = ""
181
+ elsif server_info.include?("]")
182
+ if server_info.reverse.index(":") < server_info.reverse.index("]")
183
+ self.host, self.port = server_info.match(%r{^(.*):(.*)$})[1..2]
184
+ else
185
+ self.host = server_info
186
+ self.port = ""
187
+ end
188
+ elsif server_info.chars.first == "["
189
+ self.host = server_info
190
+ self.port = ""
191
+ else
192
+ self.host, self.port = server_info.match(%r{^(.*):(.*)$})[1..2]
193
+ end
194
+
195
+ # parse full_path
196
+ if full_path.include?("#")
197
+ before_ref, self.ref = full_path.match(%r{^(.*?)#(.*)$})[1..2]
198
+ else
199
+ before_ref = full_path
200
+ self.ref = nil
201
+ end
202
+
203
+ if before_ref.include?("?")
204
+ self.path, self.query = before_ref.match(%r{^(.*?)\?(.*)$})[1..2]
205
+ else
206
+ self.path = before_ref
207
+ self.query = nil
208
+ end
209
+ end
210
+
211
+ def parse_scheme!
212
+ self.coerced = self.original
213
+
214
+ if !find_scheme(self.original) && (self.original[0]!= ";")
215
+ if find_scheme(self.original.sub(";", ":"))
216
+ self.coerced = self.original.sub(";", ":")
217
+ end
218
+ end
219
+
220
+ if !find_scheme(self.coerced)
221
+ if self.coerced.match(%r{^ftp\.}i)
222
+ self.coerced = "ftp://" + self.coerced
223
+ else
224
+ self.coerced = "http://" + self.coerced
225
+ end
226
+ end
227
+
228
+ self.scheme = find_scheme(self.coerced) || ""
229
+ end
230
+
231
+ def find_scheme(text)
232
+ # extract scheme
233
+ return false unless match = text.match(%r{^(.*?):})
234
+
235
+ component = match[1].downcase
236
+
237
+ return "" if component.empty?
238
+
239
+ # first character must be a letter
240
+ return false unless component.match(%r{^[a-z]})
241
+
242
+ # reject anything with invalid characters
243
+ return false unless component.match(%r{^[+\-0-9a-z]*$})
244
+
245
+ # fix up segmentation for "www:123/"
246
+ return false if has_port(text)
247
+
248
+ component
249
+ end
250
+
251
+ def has_port(text)
252
+ return false unless text.include?(":")
253
+ match = text.match(%r{:(.*?)[\\/\?#]}) || text.match(%r{:(.*)$})
254
+ match[1].match(%r{^\d+$})
255
+ end
256
+ end
@@ -0,0 +1,3 @@
1
+ class URLGrey
2
+ VERSION = "1.0.0"
3
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'url_grey/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "url_grey"
8
+ spec.version = URLGrey::VERSION
9
+ spec.authors = ["Stacey Touset"]
10
+ spec.email = ["capicue@gmail.com"]
11
+
12
+ spec.summary = "Coerce and normalize user inputted URLs"
13
+ spec.homepage = "https://github.com/capicue/url_grey"
14
+ spec.licenses = ["MIT"]
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "simpleidn", "~> 0.0.7"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.10"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency "minitest", "~> 5.8"
26
+ spec.add_development_dependency "pry", "~> 0.10"
27
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_grey
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Stacey Touset
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-02-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: simpleidn
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.0.7
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.10'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.10'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5.8'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5.8'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ description:
84
+ email:
85
+ - capicue@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".travis.yml"
92
+ - Gemfile
93
+ - README.md
94
+ - Rakefile
95
+ - bin/console
96
+ - bin/setup
97
+ - lib/url_grey.rb
98
+ - lib/url_grey/version.rb
99
+ - url_grey.gemspec
100
+ homepage: https://github.com/capicue/url_grey
101
+ licenses:
102
+ - MIT
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 2.5.1
121
+ signing_key:
122
+ specification_version: 4
123
+ summary: Coerce and normalize user inputted URLs
124
+ test_files: []