escape_utils 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +4 -0
- data/README.rdoc +6 -3
- data/VERSION +1 -1
- data/escape_utils.gemspec +2 -2
- data/ext/escape_utils.c +72 -41
- data/lib/escape_utils.rb +1 -1
- data/spec/html/escape_spec.rb +14 -0
- data/spec/html/unescape_spec.rb +14 -0
- data/spec/javascript/escape_spec.rb +15 -1
- metadata +3 -3
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.1.3 (June 9th, 2010)
|
4
|
+
* cleaned some code up, removing duplication
|
5
|
+
* moved to a more flexible character encoding scheme using Encoding.defaut_internal for 1.9 users
|
6
|
+
|
3
7
|
## 0.1.2 (June 8th, 2010)
|
4
8
|
* forgot to add the ActionView monkey patch for JS escaping ;)
|
5
9
|
|
data/README.rdoc
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
Being as though we're all html escaping everything these days, why not make it faster?
|
4
4
|
|
5
|
-
At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon
|
5
|
+
At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon.
|
6
|
+
|
7
|
+
For character encoding in 1.9, we'll return strings in whatever Encoding.default_internal is set to or utf-8 otherwise.
|
6
8
|
|
7
9
|
It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
|
8
10
|
|
@@ -45,8 +47,9 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
|
|
45
47
|
|
46
48
|
== Benchmarks
|
47
49
|
|
48
|
-
In my testing, escaping is around 10-20x faster than the pure ruby implementations in wide use today.
|
49
|
-
While unescaping is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
|
50
|
+
In my testing, escaping html is around 10-20x faster than the pure ruby implementations in wide use today.
|
51
|
+
While unescaping html is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
|
52
|
+
Escaping Javascript is around 16-30x faster.
|
50
53
|
|
51
54
|
This output is from my laptop using the benchmark scripts in the benchmarks folder.
|
52
55
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/escape_utils.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{escape_utils}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Brian Lopez"]
|
12
|
-
s.date = %q{2010-06-
|
12
|
+
s.date = %q{2010-06-09}
|
13
13
|
s.email = %q{seniorlopez@gmail.com}
|
14
14
|
s.extensions = ["ext/extconf.rb"]
|
15
15
|
s.extra_rdoc_files = [
|
data/ext/escape_utils.c
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
#include <ruby.h>
|
2
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
3
|
+
#include <ruby/encoding.h>
|
4
|
+
static rb_encoding *utf8Encoding;
|
5
|
+
#endif
|
2
6
|
|
3
7
|
#define APPEND_BUFFER(escape, len, scoot_by) \
|
4
8
|
memcpy(&out[total], &in[offset], i-offset); \
|
@@ -6,18 +10,17 @@
|
|
6
10
|
offset = i+scoot_by; \
|
7
11
|
memcpy(&out[total], escape, len); \
|
8
12
|
total += len; \
|
9
|
-
break; \
|
10
13
|
|
11
14
|
static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
|
12
15
|
size_t i = 0, offset = 0, total = 0;
|
13
16
|
|
14
17
|
for(;i<in_len;i++) {
|
15
18
|
switch(in[i]) {
|
16
|
-
case '&': APPEND_BUFFER("&", 5, 1);
|
17
|
-
case '<': APPEND_BUFFER("<", 4, 1);
|
18
|
-
case '>': APPEND_BUFFER(">", 4, 1);
|
19
|
-
case '\'': APPEND_BUFFER("'", 5, 1);
|
20
|
-
case '\"': APPEND_BUFFER(""", 6, 1);
|
19
|
+
case '&': APPEND_BUFFER("&", 5, 1); break;
|
20
|
+
case '<': APPEND_BUFFER("<", 4, 1); break;
|
21
|
+
case '>': APPEND_BUFFER(">", 4, 1); break;
|
22
|
+
case '\'': APPEND_BUFFER("'", 5, 1); break;
|
23
|
+
case '\"': APPEND_BUFFER(""", 6, 1); break;
|
21
24
|
}
|
22
25
|
}
|
23
26
|
|
@@ -31,38 +34,26 @@ static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t
|
|
31
34
|
size_t i = 0, offset = 0, total = 0;
|
32
35
|
|
33
36
|
for(;i<in_len;i++) {
|
34
|
-
|
35
|
-
|
36
|
-
if (i
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
}
|
48
|
-
} else if (i+4 <= in_len) {
|
49
|
-
if (memcmp(&in[i], "&", 5) == 0) {
|
50
|
-
APPEND_BUFFER("&", 1, 5);
|
51
|
-
} else if (memcmp(&in[i], "<", 4) == 0) {
|
52
|
-
APPEND_BUFFER("<", 1, 4);
|
53
|
-
} else if (memcmp(&in[i], ">", 4) == 0) {
|
54
|
-
APPEND_BUFFER(">", 1, 4);
|
55
|
-
} else if (memcmp(&in[i], "'", 5) == 0) {
|
56
|
-
APPEND_BUFFER("\'", 1, 5);
|
57
|
-
}
|
58
|
-
} else if (i+3 <= in_len) {
|
59
|
-
if (memcmp(&in[i], "<", 4) == 0) {
|
60
|
-
APPEND_BUFFER("<", 1, 4);
|
61
|
-
} else if (memcmp(&in[i], ">", 4) == 0) {
|
62
|
-
APPEND_BUFFER(">", 1, 4);
|
63
|
-
}
|
37
|
+
if(in[i] == '&') {
|
38
|
+
if (i+3 <= in_len) {
|
39
|
+
if (memcmp(&in[i], "<", 4) == 0) {
|
40
|
+
APPEND_BUFFER("<", 1, 4);
|
41
|
+
} else if (memcmp(&in[i], ">", 4) == 0) {
|
42
|
+
APPEND_BUFFER(">", 1, 4);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
if (i+4 <= in_len) {
|
46
|
+
if (memcmp(&in[i], "&", 5) == 0) {
|
47
|
+
APPEND_BUFFER("&", 1, 5);
|
48
|
+
} else if (memcmp(&in[i], "'", 5) == 0) {
|
49
|
+
APPEND_BUFFER("\'", 1, 5);
|
64
50
|
}
|
65
|
-
|
51
|
+
}
|
52
|
+
if (i+5 <= in_len) {
|
53
|
+
if (memcmp(&in[i], """, 6) == 0) {
|
54
|
+
APPEND_BUFFER("\"", 1, 6);
|
55
|
+
}
|
56
|
+
}
|
66
57
|
}
|
67
58
|
}
|
68
59
|
|
@@ -77,7 +68,7 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
|
|
77
68
|
|
78
69
|
for(;i<in_len;i++) {
|
79
70
|
switch(in[i]) {
|
80
|
-
case '\\': APPEND_BUFFER("\\\\", 2, 1);
|
71
|
+
case '\\': APPEND_BUFFER("\\\\", 2, 1); break;
|
81
72
|
case '<':
|
82
73
|
if (i+1 <= in_len && in[i+1] == '/') {
|
83
74
|
APPEND_BUFFER("<\\/", 3, 2);
|
@@ -90,9 +81,9 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
|
|
90
81
|
APPEND_BUFFER("\\n", 2, 1);
|
91
82
|
}
|
92
83
|
break;
|
93
|
-
case '\n': APPEND_BUFFER("\\n", 2, 1);
|
94
|
-
case '\"': APPEND_BUFFER("\\\"", 2, 1);
|
95
|
-
case '\'': APPEND_BUFFER("\\'", 2, 1);
|
84
|
+
case '\n': APPEND_BUFFER("\\n", 2, 1); break;
|
85
|
+
case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
|
86
|
+
case '\'': APPEND_BUFFER("\\'", 2, 1); break;
|
96
87
|
}
|
97
88
|
}
|
98
89
|
|
@@ -106,6 +97,10 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
|
|
106
97
|
Check_Type(str, T_STRING);
|
107
98
|
|
108
99
|
VALUE rb_output_buf;
|
100
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
101
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
102
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
103
|
+
#endif
|
109
104
|
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
110
105
|
size_t len = RSTRING_LEN(str), new_len = 0;
|
111
106
|
|
@@ -122,6 +117,14 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
|
|
122
117
|
// free the temporary C string
|
123
118
|
free(outBuf);
|
124
119
|
|
120
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
121
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
122
|
+
if (default_internal_enc) {
|
123
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
124
|
+
} else {
|
125
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
126
|
+
}
|
127
|
+
#endif
|
125
128
|
return rb_output_buf;
|
126
129
|
}
|
127
130
|
|
@@ -129,6 +132,10 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
|
|
129
132
|
Check_Type(str, T_STRING);
|
130
133
|
|
131
134
|
VALUE rb_output_buf;
|
135
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
136
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
137
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
138
|
+
#endif
|
132
139
|
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
133
140
|
size_t len = RSTRING_LEN(str), new_len = 0;
|
134
141
|
|
@@ -145,6 +152,14 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
|
|
145
152
|
// free the temporary C string
|
146
153
|
free(outBuf);
|
147
154
|
|
155
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
156
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
157
|
+
if (default_internal_enc) {
|
158
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
159
|
+
} else {
|
160
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
161
|
+
}
|
162
|
+
#endif
|
148
163
|
return rb_output_buf;
|
149
164
|
}
|
150
165
|
|
@@ -156,6 +171,10 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
|
|
156
171
|
Check_Type(str, T_STRING);
|
157
172
|
|
158
173
|
VALUE rb_output_buf;
|
174
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
175
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
176
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
177
|
+
#endif
|
159
178
|
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
160
179
|
size_t len = RSTRING_LEN(str), new_len = 0;
|
161
180
|
|
@@ -172,6 +191,14 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
|
|
172
191
|
// free the temporary C string
|
173
192
|
free(outBuf);
|
174
193
|
|
194
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
195
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
196
|
+
if (default_internal_enc) {
|
197
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
198
|
+
} else {
|
199
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
200
|
+
}
|
201
|
+
#endif
|
175
202
|
return rb_output_buf;
|
176
203
|
}
|
177
204
|
|
@@ -184,4 +211,8 @@ void Init_escape_utils_ext() {
|
|
184
211
|
rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
|
185
212
|
rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
|
186
213
|
rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
|
214
|
+
|
215
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
216
|
+
utf8Encoding = rb_utf8_encoding();
|
217
|
+
#endif
|
187
218
|
}
|
data/lib/escape_utils.rb
CHANGED
data/spec/html/escape_spec.rb
CHANGED
@@ -21,4 +21,18 @@ describe EscapeUtils, "escape_html" do
|
|
21
21
|
it "should escape the & character" do
|
22
22
|
EscapeUtils.escape_html("<b>Bourbon & Branch</b>").should eql("<b>Bourbon & Branch</b>")
|
23
23
|
end
|
24
|
+
|
25
|
+
if RUBY_VERSION =~ /^1.9/
|
26
|
+
it "should default to utf-8 if Encoding.default_internal is nil" do
|
27
|
+
Encoding.default_internal = nil
|
28
|
+
EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.find('utf-8'))
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should use Encoding.default_internal" do
|
32
|
+
Encoding.default_internal = Encoding.find('utf-8')
|
33
|
+
EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
|
34
|
+
Encoding.default_internal = Encoding.find('us-ascii')
|
35
|
+
EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
|
36
|
+
end
|
37
|
+
end
|
24
38
|
end
|
data/spec/html/unescape_spec.rb
CHANGED
@@ -21,4 +21,18 @@ describe EscapeUtils, "unescape_html" do
|
|
21
21
|
it "should unescape the & character" do
|
22
22
|
EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").should eql("<b>Bourbon & Branch</b>")
|
23
23
|
end
|
24
|
+
|
25
|
+
if RUBY_VERSION =~ /^1.9/
|
26
|
+
it "should default to utf-8 if Encoding.default_internal is nil" do
|
27
|
+
Encoding.default_internal = nil
|
28
|
+
EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.find('utf-8'))
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should use Encoding.default_internal" do
|
32
|
+
Encoding.default_internal = Encoding.find('utf-8')
|
33
|
+
EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
|
34
|
+
Encoding.default_internal = Encoding.find('us-ascii')
|
35
|
+
EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
|
36
|
+
end
|
37
|
+
end
|
24
38
|
end
|
@@ -10,7 +10,7 @@ describe EscapeUtils, "escape_javascript" do
|
|
10
10
|
it "should return an empty string if passed nil" do
|
11
11
|
EscapeUtils.escape_javascript(nil).should eql("")
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
it "should escape quotes and newlines" do
|
15
15
|
EscapeUtils.escape_javascript(%(This "thing" is really\n netos')).should eql(%(This \\"thing\\" is really\\n netos\\'))
|
16
16
|
end
|
@@ -22,4 +22,18 @@ describe EscapeUtils, "escape_javascript" do
|
|
22
22
|
it "should escape closed html tags" do
|
23
23
|
EscapeUtils.escape_javascript(%(dont </close> tags)).should eql(%(dont <\\/close> tags))
|
24
24
|
end
|
25
|
+
|
26
|
+
if RUBY_VERSION =~ /^1.9/
|
27
|
+
it "should default to utf-8 if Encoding.default_internal is nil" do
|
28
|
+
Encoding.default_internal = nil
|
29
|
+
EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.find('utf-8'))
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should use Encoding.default_internal" do
|
33
|
+
Encoding.default_internal = Encoding.find('utf-8')
|
34
|
+
EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
|
35
|
+
Encoding.default_internal = Encoding.find('us-ascii')
|
36
|
+
EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
|
37
|
+
end
|
38
|
+
end
|
25
39
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 3
|
9
|
+
version: 0.1.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Brian Lopez
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-06-
|
17
|
+
date: 2010-06-09 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|