url_escape 2009.06.24

Sign up to get free protection for your applications and to get access to all the features.
data/AUTHORS ADDED
@@ -0,0 +1,9 @@
1
+ Following persons have contributed to url_escape.
2
+ (Sorted by number of submitted patches, then alphabetically)
3
+
4
+ 32 TJ Vanderpoel <tj@rubyists.com>
5
+ 12 Trey Dempsey <trey.dempsey@gmail.com>
6
+ 5 Jayson Vaughn <jayson@onedrop.(none)>
7
+ 5 Michael Fellinger <mf@rubyists.com>
8
+ 2 Evan Phoenix <ephoenix@engineyard.com>
9
+ 1 Kevin Berry <kb@rubyists.com>
@@ -0,0 +1,222 @@
1
+ [41e2892 | 2009-06-24 18:16:44 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
2
+
3
+ * added more RELEASE doc info
4
+
5
+ [8107148 | 2009-06-24 18:09:12 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
6
+
7
+ * Version 2009.06.24 - Initial Public Release
8
+
9
+ [471517d | 2009-06-24 18:07:53 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
10
+
11
+ * added copyright to the .rb files, and a LICENSE
12
+
13
+ [fcfd320 | 2009-06-24 18:03:07 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
14
+
15
+ * fixed copyright task
16
+
17
+ [cdd6fa3 | 2009-06-24 18:02:04 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
18
+
19
+ * added new copyright
20
+
21
+ [e2906c0 | 2009-06-24 17:36:10 UTC] Jayson Vaughn <jayson@onedrop.(none)>
22
+
23
+ * Added myself as an author. kinda an ego commit
24
+
25
+ [2c45d0a | 2009-06-24 17:31:28 UTC] Kevin Berry <KevinBerry@nrs.us>
26
+
27
+ * modified the RBConfig search to take into account suffixed ruby binaries
28
+
29
+ [411df7a | 2009-06-24 17:13:50 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
30
+
31
+ * removed gemspec from git, added partial release file
32
+
33
+ [977c411 | 2009-06-24 17:01:20 UTC] Jayson Vaughn <jayson@onedrop.(none)>
34
+
35
+ * Corrected typo to set GEMSPEC.version properly
36
+
37
+ [4f5fff3 | 2009-06-24 16:56:58 UTC] Jayson Vaughn <jayson@onedrop.(none)>
38
+
39
+ * Updated Rakefile to use the jruby URLEscape library if using jruby
40
+
41
+ [4b294eb | 2009-06-24 16:02:28 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
42
+
43
+ * removed strings which throw errors on 1.9
44
+
45
+ [b238261 | 2009-06-24 15:53:22 UTC] Jayson Vaughn <jayson@onedrop.(none)>
46
+
47
+ * changed hex_table to be upper case, changed specs to test against hex represented with upper case chars
48
+
49
+ [3fe98dd | 2009-06-24 15:43:05 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
50
+
51
+ * um, rack does not work as expected, added tests to check rack::utils against our strings, unexpected behavior in 1.9.1
52
+
53
+ [994c06a | 2009-06-24 15:35:29 UTC] Jayson Vaughn <jayson@onedrop.(none)>
54
+
55
+ * added java version and spec changes to support it
56
+
57
+ [0a51701 | 2009-06-24 02:05:48 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
58
+
59
+ * fixed big_bench
60
+
61
+ [39d6b2c | 2009-06-23 23:24:32 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
62
+
63
+ * added big_bench.rb for a real-life test of the condition. on 1.9 (linux X86) the C does not make as much a difference. TODO: test on other platforms
64
+
65
+ [97949bb | 2009-06-23 22:54:41 UTC] Trey Dempsey <trey.dempsey@gmail.com>
66
+
67
+ * Updated build task for ruby 1.9.1 on windows
68
+
69
+ [19ea12f | 2009-06-23 20:10:47 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
70
+
71
+ * fixed assertions on benchmarks
72
+
73
+ [b08d38f | 2009-06-23 19:58:26 UTC] Trey Dempsey <trey.dempsey@gmail.com>
74
+
75
+ * Updated build task to check for common issues when building on windows
76
+
77
+ [9e594c4 | 2009-06-23 19:04:51 UTC] Trey Dempsey <trey.dempsey@gmail.com>
78
+
79
+ * Fixed the bacon.rake job so it works correctly in unix.
80
+
81
+ [3899455 | 2009-06-23 18:50:02 UTC] Trey Dempsey <trey.dempsey@gmail.com>
82
+
83
+ * * Modified build task to allow for windows
84
+
85
+ * Modified bacon to allow for windows
86
+
87
+ [fc5a533 | 2009-06-24 06:56:46 UTC] Michael Fellinger <m.fellinger@gmail.com>
88
+
89
+ * Remove some debugging output
90
+
91
+ [43332a6 | 2009-06-24 06:53:44 UTC] Michael Fellinger <m.fellinger@gmail.com>
92
+
93
+ * Make rake build
94
+
95
+ [37c8c6c | 2009-06-23 16:42:00 UTC] Trey Dempsey <trey.dempsey@gmail.com>
96
+
97
+ * Added plus and space to be static as well.
98
+
99
+ [06481df | 2009-06-23 16:40:10 UTC] Trey Dempsey <trey.dempsey@gmail.com>
100
+
101
+ * Fixed issue with rubygems stepping on the extension in ruby >= 1.8.7
102
+
103
+ [169462c | 2009-06-23 16:24:11 UTC] TJ Vanderpoel <bougy.man@gmail.com>
104
+
105
+ * changed load order to fix weirdness in 1.8.7
106
+
107
+ [472af85 | 2009-06-24 06:18:18 UTC] Michael Fellinger <m.fellinger@gmail.com>
108
+
109
+ * Use block chdir
110
+
111
+ [7acd33d | 2009-06-23 16:12:45 UTC] TJ Vanderpoel <bougy.man@gmail.com>
112
+
113
+ * changed require for weird breakage on 1.8.7
114
+
115
+ [cbb73f7 | 2009-06-23 14:03:46 UTC] TJ Vanderpoel <TJ Vanderpoel tj@rubyists.com>
116
+
117
+ * cleaned up bench spec
118
+
119
+ [a2db063 | 2009-06-23 00:53:22 UTC] bougyman <bougyman@gotdebt.nationwide-recovery.com>
120
+
121
+ * added rack dependency
122
+
123
+ [ca11d2b | 2009-06-22 22:55:27 UTC] Trey Dempsey <trey.dempsey@gmail.com>
124
+
125
+ * Started on Rakefile support for Windows
126
+
127
+ [c8dc880 | 2009-06-22 21:44:05 UTC] Trey Dempsey <trey.dempsey@gmail.com>
128
+
129
+ * Reordered variable declaration so that MSVC will compile the code
130
+
131
+ [0e712a1 | 2009-06-22 18:38:45 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
132
+
133
+ * added authors, cleaned up list for TD
134
+
135
+ [1dc7ca8 | 2009-06-22 18:36:47 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
136
+
137
+ * added bench.rb to spec files
138
+
139
+ [b0b4c74 | 2009-06-22 18:35:26 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
140
+
141
+ * added benchmark spec to make sure the speed increase is realized, and a clean rake task
142
+
143
+ [675acda | 2009-06-22 18:21:36 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
144
+
145
+ * Cleaned up variable names
146
+
147
+
148
+
149
+ [fa88077 | 2009-06-22 15:48:14 UTC] Evan Phoenix <ephoenix@engineyard.com>
150
+
151
+ * Clean up new escape code
152
+
153
+ [a0509c6 | 2009-06-22 17:08:32 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
154
+
155
+ * added conditional for :force_encoding (for 1.8.6) in spec
156
+
157
+ [11dda51 | 2009-06-22 16:58:35 UTC] Trey Dempsey <trey.dempsey@gmail.com>
158
+
159
+ * Fixed specs to work with ruby 1.8
160
+
161
+ [cc043f9 | 2009-06-22 16:46:50 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
162
+
163
+ * allocate 3 bytes earlier for speed increase
164
+
165
+ [fbdde47 | 2009-06-22 16:38:36 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
166
+
167
+ * removed gemspec from MANIFEST
168
+
169
+ [0a4b914 | 2009-06-22 16:37:14 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
170
+
171
+ * removed gemspec from manifest
172
+
173
+ [8051954 | 2009-06-22 16:26:26 UTC] Trey Dempsey <trey.dempsey@gmail.com>
174
+
175
+ * Completed handling of high-bit ascii and unicode
176
+
177
+ * Added case of three byte characters
178
+
179
+ [15f2a7e | 2009-06-22 14:10:10 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
180
+
181
+ * Ignore dotfiles in git
182
+
183
+ [cfa1884 | 2009-06-22 14:08:30 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
184
+
185
+ * added plus-escaping, setup task for bacon
186
+
187
+ [858ee57 | 2009-06-22 13:34:30 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
188
+
189
+ * added basic functionality specs
190
+
191
+ [5151cbe | 2009-06-22 12:37:32 UTC] TJ Vanderpoel <Jayson Vaughn jv@rubyists.com>
192
+
193
+ * beginning specs
194
+
195
+ [cf8cba7 | 2009-06-22 07:07:48 UTC] Admin <admin@Treys-Mac.home>
196
+
197
+ * rewrote escape() to support utf-8 sequences
198
+
199
+ [f93f211 | 2009-06-22 02:50:48 UTC] bougyman <bougyman@gotdebt.nationwide-recovery.com>
200
+
201
+ * added description
202
+
203
+ [7d52898 | 2009-06-22 02:50:19 UTC] bougyman <bougyman@gotdebt.nationwide-recovery.com>
204
+
205
+ * added description
206
+
207
+ [29fa399 | 2009-06-22 04:22:42 UTC] Michael Fellinger <m.fellinger@gmail.com>
208
+
209
+ * Fix gemspec
210
+
211
+ [e767029 | 2009-06-22 03:03:50 UTC] Michael Fellinger <m.fellinger@gmail.com>
212
+
213
+ * Make url_escape ready for release
214
+
215
+ [1657c23 | 2009-06-20 18:28:50 UTC] Evan Phoenix <ephoenix@engineyard.com>
216
+
217
+ * Pushing version 2 of (un)escaper
218
+
219
+ [d11fd8f | 2009-06-20 20:24:25 UTC] bougyman <bougyman@gotdebt.nationwide-recovery.com>
220
+
221
+ * first commit
222
+
@@ -0,0 +1,24 @@
1
+ RELEASE
2
+ AUTHORS
3
+ CHANGELOG
4
+ MANIFEST
5
+ README
6
+ Rakefile
7
+ ext/escape.c
8
+ ext/extconf.rb
9
+ spec/bench.rb
10
+ spec/big_bench.rb
11
+ spec/helper.rb
12
+ spec/url_escape.rb
13
+ tasks/authors.rake
14
+ tasks/bacon.rake
15
+ tasks/build.rake
16
+ tasks/changelog.rake
17
+ tasks/clean.rake
18
+ tasks/copyright.rake
19
+ tasks/gem.rake
20
+ tasks/gem_installer.rake
21
+ tasks/manifest.rake
22
+ tasks/release.rake
23
+ tasks/reversion.rake
24
+ tasks/setup.rake
data/README ADDED
@@ -0,0 +1,8 @@
1
+ == Synopsis
2
+ A C extension to ruby enabling fast escape/unescape of
3
+ URL Encoded Strings
4
+
5
+ == Usage
6
+
7
+ URLEscape.unescape("a%20string")
8
+ URLEscape.escape("a string")
data/RELEASE ADDED
@@ -0,0 +1,62 @@
1
+ Rack::Utils and CGI escape and unescape performance boost
2
+ =========================================================
3
+
4
+ As performance boosts are about speed, we'll start with the
5
+ benchmarks. Here is a run of spec/bench.rb, from the url_escape
6
+ source tree.
7
+
8
+ Escape
9
+ ------
10
+ user system total real
11
+ URLEscape::unescape 0.090000 0.000000 0.090000 ( 0.089190)
12
+ CGI::unescape 2.820000 0.000000 2.820000 ( 2.816234)
13
+ Rack::Utils::unescape 3.140000 0.000000 3.140000 ( 3.137291)
14
+
15
+ Unescape
16
+ --------
17
+ user system total real
18
+ URLEscape::escape 0.200000 0.000000 0.200000 ( 0.196100)
19
+ CGI::escape 3.830000 0.010000 3.840000 ( 3.828438)
20
+ Rack::Utils::escape 3.880000 0.010000 3.890000 ( 3.880745)
21
+
22
+
23
+ URLEscape provides these two methods as a C extension, suitable for
24
+ use on ruby 1.8.6-8 and 1.9.1+; tested on linux, XP, and Vista.
25
+
26
+ The jruby version uses the java stdlib's java.net.URLEncoder and
27
+ URLDecoder.
28
+
29
+ ### Why?
30
+
31
+ We came across a bottleneck when regression testing FXC (a web app
32
+ which serves configuration information to the FreeSWITCH softswitch)
33
+ where requests were being delayed in our rack middleware, which parses
34
+ the POST data sent by FreeSWITCH and routes requests to the ramaze
35
+ application for processing. The delay was noticable under loads of
36
+ only 50 req/second, there rack became the bottleneck, not ramaze, the
37
+ db, or any other factor. Adding the above library (on linux, with
38
+ ruby 1.9.1) removed the delay in rack, pushing the work back to
39
+ to the web app where it's free to be as slow as it must.
40
+
41
+ ### What else?
42
+
43
+ The ability of large posts to slow down a web application cannot be
44
+ removed by just speeding up the POST parser. In order to alleviate the
45
+ risk of such large POSTs being used to deny a service, firewall or
46
+ web server throttling or limiting is a more reliable protection to
47
+ enable. Here are a few examples:
48
+
49
+ * Lighttpd: http://lighttpd.net
50
+ * Offers mod_evasive which limits connections per ip, as well as
51
+ the ability to limit the data rate per connection.
52
+
53
+ * Nginx: http://nginx.org
54
+ * Flexible limiting system, per vhost, per user, per connection.
55
+
56
+ * Netfilter/QoS (linux): http://l7-filter.sourceforge.net/
57
+ * Allow classifiation of HTTP packets so iptables/tc or whatever
58
+ utility you'd like can have the info it needs about the HTTP
59
+ protocol to make limiting/dropping/queueing decisions
60
+
61
+ * Others: Apache, Squid, Litespeed, many others will have various methods
62
+ of limiting size and frequency of requests.
@@ -0,0 +1,100 @@
1
+ # * Encoding: UTF-8
2
+ # Copyright © 2009 Evan Phoenix and The Rubyists, LLC
3
+ # Distributed under the terms of the MIT license.
4
+ # See the LICENSE file which accompanies this software for the full text
5
+ begin; require 'rubygems'; rescue LoadError; end
6
+
7
+ require 'rake'
8
+ require 'rake/clean'
9
+ require 'rake/gempackagetask'
10
+ require 'time'
11
+ require 'date'
12
+
13
+ PROJECT_SPECS = FileList[
14
+ 'spec/url_escape.rb',
15
+ 'spec/bench.rb'
16
+ ]
17
+
18
+ PROJECT_MODULE = 'URLEscape'
19
+ PROJECT_README = 'README'
20
+
21
+ PROJECT_COPYRIGHT_SUMMARY = [
22
+ "# Copyright © 2009 Evan Phoenix and The Rubyists, LLC",
23
+ "# Distributed under the terms of the MIT license.",
24
+ "# See the LICENSE file which accompanies this software for the full text",
25
+ ]
26
+ PROJECT_COPYRIGHT = PROJECT_COPYRIGHT_SUMMARY + [
27
+ "#",
28
+ "# Permission is hereby granted, free of charge, to any person obtaining a copy",
29
+ '# of this software and associated documentation files (the "Software"), to deal',
30
+ "# in the Software without restriction, including without limitation the rights",
31
+ "# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell",
32
+ "# copies of the Software, and to permit persons to whom the Software is",
33
+ "# furnished to do so, subject to the following conditions:",
34
+ "#",
35
+ "# The above copyright notice and this permission notice shall be included in",
36
+ "# all copies or substantial portions of the Software.",
37
+ "#",
38
+ '# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR',
39
+ "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,",
40
+ "# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE",
41
+ "# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER",
42
+ "# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,",
43
+ "# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN",
44
+ "# THE SOFTWARE."
45
+ ]
46
+
47
+ PROJECT_VERSION =
48
+ if version = ENV['PROJECT_VERSION'] || ENV['VERSION']
49
+ version
50
+ else
51
+ URLEscape::VERSION rescue Date.today.strftime("%Y.%m.%d")
52
+ end
53
+
54
+ # To release the monthly version do:
55
+ # $ PROJECT_VERSION=2009.03 rake release
56
+
57
+ GEMSPEC = Gem::Specification.new{|s|
58
+ s.name = "url_escape"
59
+ s.description = "Fast replacement for CGI.escape and Rack::Utils.escape"
60
+ s.authors = ["Evan Phoenix", "TJ Vanderpoel", "Michael Fellinger", "Trey Dempsey", "Jayson Vaughn"]
61
+ s.summary = "Fast url_escape library written in C"
62
+ s.email = "manveru@rubyists.com"
63
+ s.homepage = "http://github.com/bougyman/seedling"
64
+ s.rubyforge_project = "url-escape"
65
+ s.platform = Gem::Platform::RUBY
66
+ s.version = PROJECT_VERSION
67
+ s.files = %w(RELEASE AUTHORS CHANGELOG MANIFEST README Rakefile) +
68
+ Dir['ext/*.{c,h,rb}'] +
69
+ Dir['spec/*.rb'] +
70
+ Dir['tasks/*.rake']
71
+ s.has_rdoc = false
72
+ s.extensions = ['ext/extconf.rb']
73
+ s.require_path = "ext"
74
+ }
75
+
76
+ if RUBY_PLATFORM =~ /mswin/
77
+ # Win specific stuff
78
+ elsif RUBY_PLATFORM =~ /java/
79
+ GEMSPEC.platform = 'java'
80
+ GEMSPEC.files += %w[ lib/url_escape.rb ]
81
+ GEMSPEC.require_path = "lib"
82
+ GEMSPEC.extensions = nil
83
+ end
84
+
85
+ Dir.glob('tasks/*.rake'){|f| import(f) }
86
+
87
+ task :default => [:bacon]
88
+
89
+ CLEAN.include %w[
90
+ **/.*.sw?
91
+ *.gem
92
+ .config
93
+ **/*~
94
+ **/{data.db,cache.yaml}
95
+ *.yaml
96
+ pkg
97
+ rdoc
98
+ ydoc
99
+ *coverage*
100
+ ]
@@ -0,0 +1,187 @@
1
+ /**
2
+ Copyright © 2009 Evan Phoenix and The Rubyists, LLC
3
+ Distributed under the terms of the MIT license.
4
+ See the LICENSE file which accompanies this software for the full text
5
+ */
6
+
7
+ #include "ruby.h"
8
+
9
+ #ifndef RSTRING_PTR
10
+ #define RSTRING_PTR(obj) RSTRING(obj)->ptr
11
+ #endif
12
+
13
+ #ifndef RSTRING_LEN
14
+ #define RSTRING_LEN(obj) RSTRING(obj)->len
15
+ #endif
16
+
17
+ static const char space[] = { ' ' };
18
+ static const char plus[] = { '+' };
19
+ static const char hex_table[][4] = {
20
+ "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
21
+ "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
22
+ "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
23
+ "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
24
+ "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
25
+ "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
26
+ "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
27
+ "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
28
+ "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
29
+ "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
30
+ "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
31
+ "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
32
+ "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
33
+ "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
34
+ "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
35
+ "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
36
+ "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
37
+ "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
38
+ "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
39
+ "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
40
+ "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
41
+ "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
42
+ "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
43
+ "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
44
+ "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
45
+ "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
46
+ "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
47
+ "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
48
+ "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
49
+ "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
50
+ "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
51
+ "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
52
+ };
53
+
54
+ static inline char hex(char digit) {
55
+ if(digit >= '0' && digit <= '9') {
56
+ return digit - '0';
57
+ } else if(digit >= 'a' && digit <= 'f') {
58
+ return 10 + digit - 'a';
59
+ } else if(digit >= 'A' && digit <= 'F') {
60
+ return 10 + digit - 'A';
61
+ }
62
+
63
+ return -1;
64
+ }
65
+
66
+ static VALUE unescape(VALUE self, VALUE str) {
67
+ const char* buf;
68
+ const char* bufend;
69
+ VALUE outstr;
70
+
71
+ StringValue(str);
72
+ buf = RSTRING_PTR(str);
73
+ bufend = buf + RSTRING_LEN(str);
74
+ outstr = rb_str_buf_new(RSTRING_LEN(str));
75
+
76
+ while(buf < bufend) {
77
+ if(buf[0] == '%' && buf + 2 <= bufend) {
78
+ char high = hex(buf[1]);
79
+ char low = hex(buf[2]);
80
+
81
+ if(high >= 0 && low >= 0) {
82
+ const char byte = low + (high << 4);
83
+ rb_str_buf_cat(outstr, &byte, 1);
84
+ buf += 3;
85
+ continue;
86
+ }
87
+ }
88
+
89
+ if(buf[0] == '+') {
90
+ rb_str_buf_cat(outstr, space, 1);
91
+ } else {
92
+ rb_str_buf_cat(outstr, buf, 1);
93
+ }
94
+ buf++;
95
+ }
96
+
97
+ return outstr;
98
+ }
99
+
100
+
101
+ static inline int valid_literal(const char byte) {
102
+ return ('A' <= byte && byte <= 'Z') ||
103
+ ('a' <= byte && byte <= 'z') ||
104
+ ('0' <= byte && byte <= '9') ||
105
+ byte == '-' ||
106
+ byte == '_' ||
107
+ byte == '.';
108
+
109
+ }
110
+
111
+ static VALUE escape(VALUE self, VALUE str)
112
+ {
113
+ char* buf;
114
+ int len;
115
+ VALUE outstr;
116
+ int i;
117
+ unsigned char byte_two, byte_three;
118
+
119
+ StringValue(str);
120
+ buf = RSTRING_PTR(str);
121
+ len = RSTRING_LEN(str);
122
+ outstr = rb_str_buf_new(len);
123
+
124
+ for(i = 0; i < len;) {
125
+ const unsigned char byte_one = buf[i];
126
+
127
+ /* (UTF-8 escape, 0x0000-0x007F) */
128
+ if(byte_one < 0x80) {
129
+ if(valid_literal(byte_one)) {
130
+ rb_str_buf_cat(outstr, buf+i, 1);
131
+ } else if(byte_one == ' ') {
132
+ // a + or %20 replacement (depending on const plus assignment)
133
+ rb_str_buf_cat(outstr, plus, 1);
134
+ } else { // It's ascii but needs encoding
135
+ rb_str_buf_cat(outstr, hex_table[byte_one], 3);
136
+ }
137
+ i++;
138
+ continue;
139
+ }
140
+
141
+ // Ok, there are UTF-8 prefix bytes, so we need to interpret
142
+ // them.
143
+ //
144
+ // If we have at least one extra byte to consume
145
+ if(i + 1 < len) {
146
+ byte_two = buf[i + 1];
147
+
148
+ /* (UTF-8 escape, 0x0080-0x07FF) */
149
+ if(0xc0 <= byte_one && byte_one <= 0xdf && 0x80 <= byte_two && byte_two <= 0xbf) {
150
+ rb_str_buf_cat(outstr, hex_table[byte_one], 3);
151
+ rb_str_buf_cat(outstr, hex_table[byte_two], 3);
152
+ i += 2;
153
+ continue;
154
+
155
+ // If we have at least two extra bytes to consume
156
+ } else if(i + 2 < len) {
157
+ byte_three = buf[i + 2];
158
+
159
+ /* (UTF-8 escape, 0x0800-0xFFFF) */
160
+ if(0xe0 == byte_one && 0x80 <= byte_two && byte_two <= 0xbf) {
161
+ rb_str_buf_cat(outstr, hex_table[byte_one], 3);
162
+ rb_str_buf_cat(outstr, hex_table[byte_two], 3);
163
+ rb_str_buf_cat(outstr, hex_table[byte_three], 3);
164
+ i += 3;
165
+ continue;
166
+ }
167
+ }
168
+ }
169
+
170
+ /* (ISO Latin-1/2/? escape, 0x0080 - x00FF) */
171
+ if(0x80 <= byte_one) {
172
+ rb_str_buf_cat(outstr, hex_table[byte_one], 3);
173
+ } else {
174
+ // Well crap. Just throw it in I guess...
175
+ rb_str_buf_cat(outstr, hex_table[byte_one], 3);
176
+ }
177
+ i++;
178
+ }
179
+
180
+ return outstr;
181
+ }
182
+
183
+ void Init_url_escape() {
184
+ VALUE mod = rb_define_module("URLEscape");
185
+ rb_define_singleton_method(mod, "unescape", unescape, 1);
186
+ rb_define_singleton_method(mod, "escape", escape, 1);
187
+ }