url_grey 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e95f9c5eda3bb27c6c30f0cdd08aa46969030b22
4
+ data.tar.gz: cf5c2c804e16c7463274f117f1ef5d2c2222bbc7
5
+ SHA512:
6
+ metadata.gz: c658e0642443497b7c33cb90f9c359deedf4b30dabf6d63fd252577ca77ea08b0eede2fa454fa5898cfb752c93de7cece4f12d93e06db2346837e44771faa7bf
7
+ data.tar.gz: d0e08cb022a5d7b23cfa54fde82bd99cdd6a74cb361d1d140166f211f5601ac41dcf35b8e795be61c3b0a1c8d98baac53eb903ef67b2e791a67d22c503841512
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in url_grey.gemspec
4
+ gemspec
@@ -0,0 +1,39 @@
1
+ # URLGrey
2
+
3
+ This attempts to copy chomium's algorithm for making sense of things
4
+ typed into the url bar. You can download the [chromium source] to play
5
+ along, but note that it is currently 2.1 GB.
6
+
7
+ The ported code is very similar to how it is written in the original
8
+ C++. It is a great example of the imperative style of programming by
9
+ state mutation. I'm not gonna lie, it's pretty gross. But hey, it passes
10
+ the tests.
11
+
12
+ ## Usage
13
+
14
+ Some examples from the tests:
15
+
16
+ ```ruby
17
+ URLGrey.new("google.com").fixed
18
+ #=> "http://google.com/"
19
+
20
+ URLGrey.new("www.google.com#foo").fixed
21
+ #=> "http://www.google.com/#foo"
22
+
23
+ URLGrey.new("\u6C34.com").fixed
24
+ #=> "http://xn--1rw.com/"
25
+
26
+ URLGrey.new("http://foo.com/s?q=\uC5C5").fixed
27
+ #=> "http://foo.com/s?q=%EC%97%85"
28
+
29
+ URLGrey.new("http;/www.google.com/").fixed
30
+ #=> "http://www.google.com/"
31
+
32
+ URLGrey.new(" foo.com/asdf bar").fixed
33
+ #=> "http://foo.com/asdf%20%20bar"
34
+
35
+ URLGrey.new("[::]:80/path").fixed
36
+ #=> "http://[::]/path"
37
+ ```
38
+
39
+ [chromium source]: https://chromium.googlesource.com/chromium/chromium/
@@ -0,0 +1,7 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'test'
6
+ t.pattern = "test/*_test.rb"
7
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "url_grey"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,256 @@
1
+ require "simpleidn"
2
+
3
+ require "url_grey/version"
4
+
5
+ class URLGrey
6
+ AUTHORITY_TERMINATORS = "/\\?#"
7
+ ABOUT_BLANK_URL = "about:blank"
8
+ PATH_PASS_CHARS = "!$&'()*+,/:;=@[]"
9
+ PATH_UNESCAPE_CHARS = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"
10
+ HOST_ESCAPE_CHARS = " !\"\#$&'()*,<=>@`{|}"
11
+ HOST_NORMAL_CHARS = "+-.0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz"
12
+ HOST_CHROME_DEFAULT = "version"
13
+ QUERY_NORMAL_CHARS = "!$%&()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
14
+ DEFAULT_PORTS = {
15
+ ftp: 21,
16
+ gopher: 70,
17
+ http: 80,
18
+ https: 443,
19
+ ws: 80,
20
+ wss: 443,
21
+ }
22
+ STANDARD_SCHEMES = ['http', 'https', 'file', 'ftp', 'gopher', 'ws', 'wss', 'filesystem']
23
+
24
+ attr_accessor :original, :coerced
25
+ attr_accessor :scheme, :username, :password, :host, :port, :path, :query, :ref
26
+ attr_accessor :slashes
27
+
28
+ def initialize(_original)
29
+ self.original = _original.sub(%r{^\s*}, '')
30
+
31
+ parse!
32
+ end
33
+
34
+ def parts
35
+ {
36
+ scheme: self.scheme,
37
+ username: self.username,
38
+ password: self.password,
39
+ host: self.host,
40
+ port: self.port,
41
+ path: self.path,
42
+ query: self.query,
43
+ ref: self.ref
44
+ }
45
+ end
46
+
47
+ def fixed
48
+ return ABOUT_BLANK_URL if self.original == ABOUT_BLANK_URL
49
+
50
+ "#{fixed_scheme}#{fixed_credentials}#{fixed_host}#{fixed_port}#{fixed_path}#{fixed_query}#{fixed_ref}"
51
+ end
52
+
53
+ def fixed_credentials
54
+ return "" unless (!self.username.empty? || !self.password.empty?)
55
+ return "#{self.username}@" if self.password.empty?
56
+ "#{self.username}:#{self.password}@"
57
+ end
58
+
59
+ # from components/url_formatter/url_fixer.cc FixupHost
60
+ def fixed_host
61
+ fixed = self.host.gsub(%r{\s}, '').downcase
62
+ unless fixed.match(%r{^\.*$})
63
+ fixed = fixed.sub(%r{^\.*}, '')
64
+ fixed = fixed.sub(%r{(?<=\.)\.*$}, '')
65
+ end
66
+ if fixed.empty? && ["about", "chrome"].include?(self.scheme)
67
+ fixed = HOST_CHROME_DEFAULT
68
+ end
69
+
70
+ if fixed.match(%r{^[[:ascii:]]*$})
71
+ fixed = fixed.chars.map do |char|
72
+ if HOST_NORMAL_CHARS.include?(char)
73
+ char
74
+ else
75
+ "%#{char.codepoints.first.to_s(16).upcase}"
76
+ end
77
+ end.join("")
78
+ else
79
+ fixed = SimpleIDN.to_ascii(fixed)
80
+ end
81
+ fixed
82
+ end
83
+
84
+ # from url/url_canon_path.cc CanonicalizePath
85
+ def fixed_path
86
+ fixed = self.path
87
+ if (fixed[0] != '/') && ((STANDARD_SCHEMES + ["about", "chrome"]).include?(self.scheme))
88
+ fixed = '/' + fixed
89
+ end
90
+
91
+ fixed.chars.map do |char|
92
+ if PATH_PASS_CHARS.include?(char)
93
+ char
94
+ elsif PATH_UNESCAPE_CHARS.include?(char)
95
+ char
96
+ elsif char == "."
97
+ # TODO: if the dot is preceded by a slash, do directory stuff:
98
+ # google.com/abc/.././def -> google.com/def
99
+ char
100
+ else
101
+ "%#{char.codepoints.first.to_s(16).upcase}"
102
+ end
103
+ end.join("")
104
+ end
105
+
106
+ def fixed_port
107
+ return "" if (self.port.empty? || self.port.to_i == DEFAULT_PORTS[self.scheme.to_sym])
108
+ ":#{self.port}"
109
+ end
110
+
111
+ def fixed_query
112
+ fixed = self.query
113
+ return "" if fixed.nil?
114
+ fixed = fixed.bytes.map do |byte|
115
+ if QUERY_NORMAL_CHARS.unpack("U*").include?(byte)
116
+ [byte].pack("U")
117
+ else
118
+ "%#{byte.to_s(16).upcase}"
119
+ end
120
+ end.join('')
121
+ "?#{fixed}"
122
+ end
123
+
124
+ def fixed_ref
125
+ return "" if self.ref.nil?
126
+ "\##{self.ref}"
127
+ end
128
+
129
+ def fixed_scheme
130
+ fixed = self.scheme
131
+ if fixed == "about"
132
+ fixed = "chrome"
133
+ end
134
+
135
+ if (STANDARD_SCHEMES + ["about", "chrome"]).include?(fixed)
136
+ "#{fixed}://"
137
+ else
138
+ "#{fixed}:#{self.slashes}"
139
+ end
140
+ end
141
+
142
+ private
143
+
144
+ def parse!
145
+ parse_scheme!
146
+ after_scheme = self.coerced.match(%r{:(.*)})[1]
147
+ self.slashes, after_slashes = after_scheme.match(%r{^([\\\/]*)(.*)$})[1..2]
148
+
149
+ # authority terminators: '/', '\', '?', '#'
150
+ if (after_slashes.chars & ['/', '\\', '?', '#']).any?
151
+ authority, full_path = after_slashes.match(%r{^(.*?)([\\\/?#].*)$})[1..2]
152
+ else
153
+ authority = after_slashes
154
+ full_path = ""
155
+ end
156
+
157
+ if authority.include?("@")
158
+ user_info, server_info = authority.match(%r{^(.*)@(.*)$})[1..2]
159
+ else
160
+ user_info = ""
161
+ server_info = authority
162
+ end
163
+
164
+ # parse user_info
165
+ if user_info.empty?
166
+ self.username = ""
167
+ self.password = ""
168
+ else
169
+ if user_info.include?(":")
170
+ self.username, self.password = user_info.match(%r{^(.*?):(.*)$})[1..2]
171
+ else
172
+ self.username = user_info
173
+ self.password = ""
174
+ end
175
+ end
176
+
177
+ # parse server_info
178
+ if !server_info.include?(":")
179
+ self.host = server_info
180
+ self.port = ""
181
+ elsif server_info.include?("]")
182
+ if server_info.reverse.index(":") < server_info.reverse.index("]")
183
+ self.host, self.port = server_info.match(%r{^(.*):(.*)$})[1..2]
184
+ else
185
+ self.host = server_info
186
+ self.port = ""
187
+ end
188
+ elsif server_info.chars.first == "["
189
+ self.host = server_info
190
+ self.port = ""
191
+ else
192
+ self.host, self.port = server_info.match(%r{^(.*):(.*)$})[1..2]
193
+ end
194
+
195
+ # parse full_path
196
+ if full_path.include?("#")
197
+ before_ref, self.ref = full_path.match(%r{^(.*?)#(.*)$})[1..2]
198
+ else
199
+ before_ref = full_path
200
+ self.ref = nil
201
+ end
202
+
203
+ if before_ref.include?("?")
204
+ self.path, self.query = before_ref.match(%r{^(.*?)\?(.*)$})[1..2]
205
+ else
206
+ self.path = before_ref
207
+ self.query = nil
208
+ end
209
+ end
210
+
211
+ def parse_scheme!
212
+ self.coerced = self.original
213
+
214
+ if !find_scheme(self.original) && (self.original[0]!= ";")
215
+ if find_scheme(self.original.sub(";", ":"))
216
+ self.coerced = self.original.sub(";", ":")
217
+ end
218
+ end
219
+
220
+ if !find_scheme(self.coerced)
221
+ if self.coerced.match(%r{^ftp\.}i)
222
+ self.coerced = "ftp://" + self.coerced
223
+ else
224
+ self.coerced = "http://" + self.coerced
225
+ end
226
+ end
227
+
228
+ self.scheme = find_scheme(self.coerced) || ""
229
+ end
230
+
231
+ def find_scheme(text)
232
+ # extract scheme
233
+ return false unless match = text.match(%r{^(.*?):})
234
+
235
+ component = match[1].downcase
236
+
237
+ return "" if component.empty?
238
+
239
+ # first character must be a letter
240
+ return false unless component.match(%r{^[a-z]})
241
+
242
+ # reject anything with invalid characters
243
+ return false unless component.match(%r{^[+\-0-9a-z]*$})
244
+
245
+ # fix up segmentation for "www:123/"
246
+ return false if has_port(text)
247
+
248
+ component
249
+ end
250
+
251
+ def has_port(text)
252
+ return false unless text.include?(":")
253
+ match = text.match(%r{:(.*?)[\\/\?#]}) || text.match(%r{:(.*)$})
254
+ match[1].match(%r{^\d+$})
255
+ end
256
+ end
@@ -0,0 +1,3 @@
1
+ class URLGrey
2
+ VERSION = "1.0.0"
3
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'url_grey/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "url_grey"
8
+ spec.version = URLGrey::VERSION
9
+ spec.authors = ["Stacey Touset"]
10
+ spec.email = ["capicue@gmail.com"]
11
+
12
+ spec.summary = "Coerce and normalize user inputted URLs"
13
+ spec.homepage = "https://github.com/capicue/url_grey"
14
+ spec.licenses = ["MIT"]
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "simpleidn", "~> 0.0.7"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.10"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency "minitest", "~> 5.8"
26
+ spec.add_development_dependency "pry", "~> 0.10"
27
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_grey
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Stacey Touset
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-02-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: simpleidn
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.0.7
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.10'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.10'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5.8'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5.8'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ description:
84
+ email:
85
+ - capicue@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".travis.yml"
92
+ - Gemfile
93
+ - README.md
94
+ - Rakefile
95
+ - bin/console
96
+ - bin/setup
97
+ - lib/url_grey.rb
98
+ - lib/url_grey/version.rb
99
+ - url_grey.gemspec
100
+ homepage: https://github.com/capicue/url_grey
101
+ licenses:
102
+ - MIT
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 2.5.1
121
+ signing_key:
122
+ specification_version: 4
123
+ summary: Coerce and normalize user inputted URLs
124
+ test_files: []