escape_utils 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/.gitignore +2 -1
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +206 -0
  4. data/benchmark/html_escape.rb +1 -0
  5. data/benchmark/html_unescape.rb +1 -0
  6. data/benchmark/javascript_escape.rb +1 -0
  7. data/benchmark/javascript_unescape.rb +1 -0
  8. data/benchmark/url_escape.rb +1 -0
  9. data/benchmark/url_unescape.rb +1 -0
  10. data/escape_utils.gemspec +0 -3
  11. data/ext/escape_utils/buffer.c +228 -0
  12. data/ext/escape_utils/buffer.h +91 -0
  13. data/ext/escape_utils/escape_utils.c +111 -531
  14. data/ext/escape_utils/houdini.h +15 -0
  15. data/ext/escape_utils/houdini_html.c +214 -0
  16. data/ext/escape_utils/houdini_js.c +148 -0
  17. data/ext/escape_utils/houdini_uri.c +130 -0
  18. data/ext/escape_utils/html_unescape.h +754 -0
  19. data/ext/escape_utils/uri_escape.h +35 -0
  20. data/lib/escape_utils.rb +2 -2
  21. data/lib/escape_utils/html/cgi.rb +0 -2
  22. data/lib/escape_utils/html/erb.rb +0 -2
  23. data/lib/escape_utils/html/haml.rb +0 -2
  24. data/lib/escape_utils/html/rack.rb +0 -2
  25. data/lib/escape_utils/html_safety.rb +0 -2
  26. data/lib/escape_utils/javascript/action_view.rb +0 -2
  27. data/lib/escape_utils/url/cgi.rb +0 -2
  28. data/lib/escape_utils/url/erb.rb +0 -2
  29. data/lib/escape_utils/url/rack.rb +0 -2
  30. data/lib/escape_utils/url/uri.rb +0 -2
  31. data/lib/escape_utils/version.rb +1 -1
  32. data/spec/html/escape_spec.rb +0 -1
  33. data/spec/html/unescape_spec.rb +0 -1
  34. data/spec/html_safety_spec.rb +0 -1
  35. data/spec/javascript/escape_spec.rb +0 -1
  36. data/spec/javascript/unescape_spec.rb +0 -1
  37. data/spec/query/escape_spec.rb +0 -1
  38. data/spec/query/unescape_spec.rb +1 -0
  39. data/spec/spec_helper.rb +0 -1
  40. data/spec/uri/escape_spec.rb +0 -1
  41. data/spec/uri/unescape_spec.rb +1 -0
  42. data/spec/url/escape_spec.rb +0 -1
  43. data/spec/url/unescape_spec.rb +1 -0
  44. metadata +16 -8
  45. data/README.rdoc +0 -146
data/.gitignore CHANGED
@@ -5,4 +5,5 @@ pkg/*
5
5
  doc/*
6
6
  *.rbc
7
7
  tmp/
8
- Gemfile.lock
8
+ Gemfile.lock
9
+ vendor/*
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.4 (September 7th, 2011)
4
+ * swap out custom escaping routines for houdini - https://github.com/tanoku/houdini
5
+ * add RSTRING_NOT_MODIFIED define for a Rubinius speedup
6
+
3
7
  ## 0.2.3 (March 9th, 2011)
4
8
  * change encoding strategy to simply return strings in the encoding the input string was in, not taking into account Encoding.default_internal
5
9
 
@@ -0,0 +1,206 @@
1
+ # escape_utils
2
+
3
+ Being as though we're all html escaping everything these days, why not make it faster?
4
+
5
+ For character encoding in 1.9, the output string's encoding is copied from the input string.
6
+
7
+ It has monkey-patches for Rack::Utils, CGI, URI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
8
+
9
+ It supports HTML, URL, URI and Javascript escaping/unescaping.
10
+
11
+ ## Installing
12
+
13
+ ``` sh
14
+ gem install escape_utils
15
+ ```
16
+
17
+ ## Warning: UTF-8 only
18
+
19
+ escape_utils assumes all input is encoded as valid UTF-8. If you are dealing with other encodings do your best to transcode the string into a UTF-8 byte stream before handing it to escape_utils.
20
+
21
+ On Ruby 1.9 this is as easy as:
22
+
23
+ ``` ruby
24
+ utf8_string = non_utf8_string.encode('UTF-8')
25
+ ```
26
+
27
+ If you're on Ruby 1.8 you can use [charlock_holmes](https://github.com/brianmario/charlock_holmes) to transcode like so:
28
+
29
+ ``` ruby
30
+ # NOTE: we're assuming you know the encoding of `non_utf8_string` here.
31
+ # if you don't, you can use the detection API of charlock_holmes
32
+ utf8_string = CharlockHolmes::Converter.convert(non_utf8_string, other_encoding, 'UTF-8')
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ### HTML
38
+
39
+ #### Escaping
40
+
41
+ ``` ruby
42
+ html = `curl -s http://maps.google.com`
43
+ escaped_html = EscapeUtils.escape_html(html)
44
+ ```
45
+
46
+ By default escape_utils will escape `/` characters with `/`, but you can disable that by setting `EscapeUtils.html_secure = false`
47
+ or per-call by passing `false` as the second parameter to `escape_html` like `EscapeUtils.escape_html(html, false)`
48
+
49
+ For more information check out: http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
50
+
51
+ #### Unescaping
52
+
53
+ ``` ruby
54
+ html = `curl -s http://maps.google.com`
55
+ escaped_html = EscapeUtils.escape_html(html)
56
+ html = EscapeUtils.unescape_html(escaped_html)
57
+ ```
58
+
59
+ #### Monkey Patches
60
+
61
+ ``` ruby
62
+ require 'escape_utils/html/rack' # to patch Rack::Utils
63
+ require 'escape_utils/html/erb' # to patch ERB::Util
64
+ require 'escape_utils/html/cgi' # to patch CGI
65
+ require 'escape_utils/html/haml' # to patch Haml::Helpers
66
+ ```
67
+
68
+ ### URL
69
+
70
+ Use (un)escape_uri to get RFC-compliant escaping (like PHP rawurlencode).
71
+
72
+ Use (un)escape_url to get CGI escaping (where space is +).
73
+
74
+ #### Escaping
75
+
76
+ ``` ruby
77
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
78
+ escaped_url = EscapeUtils.escape_url(url)
79
+ ```
80
+
81
+ #### Unescaping
82
+
83
+ ``` ruby
84
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
85
+ escaped_url = EscapeUtils.escape_url(url)
86
+ EscapeUtils.unescape_url(escaped_url) == url # => true
87
+ ```
88
+
89
+ #### Monkey Patches
90
+
91
+ ``` ruby
92
+ require 'escape_utils/url/cgi' # to patch CGI
93
+ require 'escape_utils/url/erb' # to patch ERB::Util
94
+ require 'escape_utils/url/rack' # to patch Rack::Utils
95
+ require 'escape_utils/url/uri' # to patch URI
96
+ ```
97
+
98
+ ### Javascript
99
+
100
+ #### Escaping
101
+
102
+ ``` ruby
103
+ javascript = `curl -s http://code.jquery.com/jquery-1.4.2.js`
104
+ escaped_javascript = EscapeUtils.escape_javascript(javascript)
105
+ ```
106
+
107
+ #### Unescaping
108
+
109
+ ``` ruby
110
+ javascript = `curl -s http://code.jquery.com/jquery-1.4.2.js`
111
+ escaped_javascript = EscapeUtils.escape_javascript(javascript)
112
+ EscapeUtils.unescape_javascript(escaped_javascript) == javascript # => true
113
+ ```
114
+
115
+ #### Monkey Patches
116
+
117
+ ``` ruby
118
+ require 'escape_utils/javascript/action_view' # to patch ActionView::Helpers::JavaScriptHelper
119
+ ```
120
+
121
+ ## Benchmarks
122
+
123
+ In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
124
+ While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
125
+ Escaping Javascript is around 16-30x faster.
126
+
127
+ This output is from my laptop using the benchmark scripts in the benchmarks folder.
128
+
129
+ ### HTML
130
+
131
+ #### Escaping
132
+
133
+ ```
134
+ Rack::Utils.escape_html
135
+ 9.650000 0.090000 9.740000 ( 9.750756)
136
+ Haml::Helpers.html_escape
137
+ 9.310000 0.110000 9.420000 ( 9.417317)
138
+ ERB::Util.html_escape
139
+ 5.330000 0.390000 5.720000 ( 5.748394)
140
+ CGI.escapeHTML
141
+ 5.370000 0.380000 5.750000 ( 5.791344)
142
+ FasterHTMLEscape.html_escape
143
+ 0.520000 0.010000 0.530000 ( 0.539485)
144
+ fast_xs_extra#fast_xs_html
145
+ 0.310000 0.030000 0.340000 ( 0.336734)
146
+ EscapeUtils.escape_html
147
+ 0.200000 0.050000 0.250000 ( 0.258839)
148
+ ```
149
+
150
+ #### Unescaping
151
+
152
+ ```
153
+ CGI.unescapeHTML
154
+ 16.520000 0.080000 16.600000 ( 16.853888)
155
+ EscapeUtils.unescape_html
156
+ 0.120000 0.040000 0.160000 ( 0.162696)
157
+ ```
158
+
159
+ ### Javascript
160
+
161
+ #### Escaping
162
+
163
+ ```
164
+ ActionView::Helpers::JavaScriptHelper#escape_javascript
165
+ 3.810000 0.100000 3.910000 ( 3.925557)
166
+ EscapeUtils.escape_javascript
167
+ 0.200000 0.040000 0.240000 ( 0.236692)
168
+ ```
169
+
170
+ #### Unescaping
171
+
172
+ I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
173
+
174
+ ### URL
175
+
176
+ #### Escaping
177
+
178
+ ```
179
+ ERB::Util.url_encode
180
+ 0.520000 0.010000 0.530000 ( 0.529277)
181
+ Rack::Utils.escape
182
+ 0.460000 0.010000 0.470000 ( 0.466962)
183
+ CGI.escape
184
+ 0.440000 0.000000 0.440000 ( 0.443017)
185
+ URLEscape#escape
186
+ 0.040000 0.000000 0.040000 ( 0.045661)
187
+ fast_xs_extra#fast_xs_url
188
+ 0.010000 0.000000 0.010000 ( 0.015429)
189
+ EscapeUtils.escape_url
190
+ 0.010000 0.000000 0.010000 ( 0.010843)
191
+ ```
192
+
193
+ #### Unescaping
194
+
195
+ ```
196
+ Rack::Utils.unescape
197
+ 0.250000 0.010000 0.260000 ( 0.257558)
198
+ CGI.unescape
199
+ 0.250000 0.000000 0.250000 ( 0.257837)
200
+ URLEscape#unescape
201
+ 0.040000 0.000000 0.040000 ( 0.031548)
202
+ fast_xs_extra#fast_uxs_cgi
203
+ 0.010000 0.000000 0.010000 ( 0.006062)
204
+ EscapeUtils.unescape_url
205
+ 0.000000 0.000000 0.000000 ( 0.005679)
206
+ ```
@@ -19,6 +19,7 @@ end
19
19
  times = 100
20
20
  url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
21
21
  html = `curl -s #{url}`
22
+ html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
22
23
  puts "Escaping #{html.bytesize} bytes of html #{times} times, from #{url}"
23
24
 
24
25
  Benchmark.bmbm do |x|
@@ -16,6 +16,7 @@ end
16
16
  times = 100
17
17
  url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
18
18
  html = `curl -s #{url}`
19
+ html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
19
20
  escaped_html = EscapeUtils.escape_html(html)
20
21
  puts "Unescaping #{escaped_html.bytesize} bytes of escaped html #{times} times, from #{url}"
21
22
 
@@ -15,6 +15,7 @@ end
15
15
  times = 100
16
16
  url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
17
17
  javascript = `curl -s #{url}`
18
+ javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding)
18
19
  puts "Escaping #{javascript.bytesize} bytes of javascript #{times} times, from #{url}"
19
20
 
20
21
  Benchmark.bmbm do |x|
@@ -10,6 +10,7 @@ require 'escape_utils'
10
10
  times = 100
11
11
  url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
12
12
  javascript = `curl -s #{url}`
13
+ javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding)
13
14
  escaped_javascript = EscapeUtils.escape_javascript(javascript)
14
15
  puts "Escaping #{escaped_javascript.bytesize} bytes of javascript #{times} times, from #{url}"
15
16
 
@@ -14,6 +14,7 @@ require 'escape_utils'
14
14
 
15
15
  times = 10_000
16
16
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
17
+ url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
17
18
  puts "Escaping a #{url.bytesize} byte URL #{times} times"
18
19
 
19
20
  Benchmark.bmbm do |x|
@@ -13,6 +13,7 @@ require 'escape_utils'
13
13
 
14
14
  times = 10_000
15
15
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
16
+ url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
16
17
  escaped_url = EscapeUtils.escape_url(url)
17
18
  puts "Escaping a #{url.bytesize} byte URL #{times} times"
18
19
 
@@ -7,9 +7,6 @@ Gem::Specification.new do |s|
7
7
  s.date = Time.now.utc.strftime("%Y-%m-%d")
8
8
  s.email = %q{seniorlopez@gmail.com}
9
9
  s.extensions = ["ext/escape_utils/extconf.rb"]
10
- s.extra_rdoc_files = [
11
- "README.rdoc"
12
- ]
13
10
  s.files = `git ls-files`.split("\n")
14
11
  s.homepage = %q{http://github.com/brianmario/escape_utils}
15
12
  s.rdoc_options = ["--charset=UTF-8"]
@@ -0,0 +1,228 @@
1
+ /*
2
+ * Copyright (c) 2008, Natacha Porté
3
+ * Copyright (c) 2011, Vicent Martí
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+
18
+ #define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
19
+
20
+ #include "buffer.h"
21
+
22
+ #include <stdio.h>
23
+ #include <stdlib.h>
24
+ #include <string.h>
25
+
26
+ /* MSVC compat */
27
+ #if defined(_MSC_VER)
28
+ # define _buf_vsnprintf _vsnprintf
29
+ #else
30
+ # define _buf_vsnprintf vsnprintf
31
+ #endif
32
+
33
+ int
34
+ bufprefix(const struct buf *buf, const char *prefix)
35
+ {
36
+ size_t i;
37
+
38
+ for (i = 0; i < buf->size; ++i) {
39
+ if (prefix[i] == 0)
40
+ return 0;
41
+
42
+ if (buf->data[i] != prefix[i])
43
+ return buf->data[i] - prefix[i];
44
+ }
45
+
46
+ return 0;
47
+ }
48
+
49
+ /* bufgrow: increasing the allocated size to the given value */
50
+ int
51
+ bufgrow(struct buf *buf, size_t neosz)
52
+ {
53
+ size_t neoasz;
54
+ void *neodata;
55
+ if (!buf || !buf->unit || neosz > BUFFER_MAX_ALLOC_SIZE)
56
+ return BUF_ENOMEM;
57
+
58
+ if (buf->asize >= neosz)
59
+ return BUF_OK;
60
+
61
+ neoasz = buf->asize + buf->unit;
62
+ while (neoasz < neosz)
63
+ neoasz += buf->unit;
64
+
65
+ neodata = realloc(buf->data, neoasz);
66
+ if (!neodata)
67
+ return BUF_ENOMEM;
68
+
69
+ buf->data = neodata;
70
+ buf->asize = neoasz;
71
+ return BUF_OK;
72
+ }
73
+
74
+
75
+ /* bufnew: allocation of a new buffer */
76
+ struct buf *
77
+ bufnew(size_t unit)
78
+ {
79
+ struct buf *ret;
80
+ ret = malloc(sizeof (struct buf));
81
+
82
+ if (ret) {
83
+ ret->data = 0;
84
+ ret->size = ret->asize = 0;
85
+ ret->unit = unit;
86
+ }
87
+ return ret;
88
+ }
89
+
90
+ /* bufnullterm: NULL-termination of the string array */
91
+ const char *
92
+ bufcstr(struct buf *buf)
93
+ {
94
+ if (!buf || !buf->unit)
95
+ return NULL;
96
+
97
+ if (buf->size < buf->asize && buf->data[buf->size] == 0)
98
+ return (char *)buf->data;
99
+
100
+ if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
101
+ buf->data[buf->size] = 0;
102
+ return (char *)buf->data;
103
+ }
104
+
105
+ return NULL;
106
+ }
107
+
108
+ /* bufprintf: formatted printing to a buffer */
109
+ void
110
+ bufprintf(struct buf *buf, const char *fmt, ...)
111
+ {
112
+ va_list ap;
113
+ if (!buf || !buf->unit)
114
+ return;
115
+
116
+ va_start(ap, fmt);
117
+ vbufprintf(buf, fmt, ap);
118
+ va_end(ap);
119
+ }
120
+
121
+ /* bufput: appends raw data to a buffer */
122
+ void
123
+ bufput(struct buf *buf, const void *data, size_t len)
124
+ {
125
+ if (!buf)
126
+ return;
127
+
128
+ if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
129
+ return;
130
+
131
+ memcpy(buf->data + buf->size, data, len);
132
+ buf->size += len;
133
+ }
134
+
135
+ /* bufputs: appends a NUL-terminated string to a buffer */
136
+ void
137
+ bufputs(struct buf *buf, const char *str)
138
+ {
139
+ bufput(buf, str, strlen(str));
140
+ }
141
+
142
+
143
+ /* bufputc: appends a single uint8_t to a buffer */
144
+ void
145
+ bufputc(struct buf *buf, int c)
146
+ {
147
+ if (!buf)
148
+ return;
149
+
150
+ if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
151
+ return;
152
+
153
+ buf->data[buf->size] = c;
154
+ buf->size += 1;
155
+ }
156
+
157
+ /* bufrelease: decrease the reference count and free the buffer if needed */
158
+ void
159
+ bufrelease(struct buf *buf)
160
+ {
161
+ if (!buf)
162
+ return;
163
+
164
+ free(buf->data);
165
+ free(buf);
166
+ }
167
+
168
+
169
+ /* bufreset: frees internal data of the buffer */
170
+ void
171
+ bufreset(struct buf *buf)
172
+ {
173
+ if (!buf)
174
+ return;
175
+
176
+ free(buf->data);
177
+ buf->data = NULL;
178
+ buf->size = buf->asize = 0;
179
+ }
180
+
181
+ /* bufslurp: removes a given number of bytes from the head of the array */
182
+ void
183
+ bufslurp(struct buf *buf, size_t len)
184
+ {
185
+ if (!buf || !buf->unit || len <= 0)
186
+ return;
187
+
188
+ if (len >= buf->size) {
189
+ buf->size = 0;
190
+ return;
191
+ }
192
+
193
+ buf->size -= len;
194
+ memmove(buf->data, buf->data + len, buf->size);
195
+ }
196
+
197
+ /* vbufprintf: stdarg variant of formatted printing into a buffer */
198
+ void
199
+ vbufprintf(struct buf *buf, const char *fmt, va_list ap)
200
+ {
201
+ int n;
202
+
203
+ if (buf == 0 || (buf->size >= buf->asize && bufgrow(buf, buf->size + 1)) < 0)
204
+ return;
205
+
206
+ n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
207
+
208
+ if (n < 0) {
209
+ #ifdef _MSC_VER
210
+ n = _vscprintf(fmt, ap);
211
+ #else
212
+ return;
213
+ #endif
214
+ }
215
+
216
+ if ((size_t)n >= buf->asize - buf->size) {
217
+ if (bufgrow(buf, buf->size + n + 1) < 0)
218
+ return;
219
+
220
+ n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
221
+ }
222
+
223
+ if (n < 0)
224
+ return;
225
+
226
+ buf->size += n;
227
+ }
228
+