escape_utils 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +4 -0
- data/README.rdoc +6 -3
- data/VERSION +1 -1
- data/escape_utils.gemspec +2 -2
- data/ext/escape_utils.c +72 -41
- data/lib/escape_utils.rb +1 -1
- data/spec/html/escape_spec.rb +14 -0
- data/spec/html/unescape_spec.rb +14 -0
- data/spec/javascript/escape_spec.rb +15 -1
- metadata +3 -3
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.1.3 (June 9th, 2010)
|
4
|
+
* cleaned some code up, removing duplication
|
5
|
+
* moved to a more flexible character encoding scheme using Encoding.defaut_internal for 1.9 users
|
6
|
+
|
3
7
|
## 0.1.2 (June 8th, 2010)
|
4
8
|
* forgot to add the ActionView monkey patch for JS escaping ;)
|
5
9
|
|
data/README.rdoc
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
Being as though we're all html escaping everything these days, why not make it faster?
|
4
4
|
|
5
|
-
At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon
|
5
|
+
At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon.
|
6
|
+
|
7
|
+
For character encoding in 1.9, we'll return strings in whatever Encoding.default_internal is set to or utf-8 otherwise.
|
6
8
|
|
7
9
|
It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
|
8
10
|
|
@@ -45,8 +47,9 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
|
|
45
47
|
|
46
48
|
== Benchmarks
|
47
49
|
|
48
|
-
In my testing, escaping is around 10-20x faster than the pure ruby implementations in wide use today.
|
49
|
-
While unescaping is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
|
50
|
+
In my testing, escaping html is around 10-20x faster than the pure ruby implementations in wide use today.
|
51
|
+
While unescaping html is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
|
52
|
+
Escaping Javascript is around 16-30x faster.
|
50
53
|
|
51
54
|
This output is from my laptop using the benchmark scripts in the benchmarks folder.
|
52
55
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/escape_utils.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{escape_utils}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Brian Lopez"]
|
12
|
-
s.date = %q{2010-06-
|
12
|
+
s.date = %q{2010-06-09}
|
13
13
|
s.email = %q{seniorlopez@gmail.com}
|
14
14
|
s.extensions = ["ext/extconf.rb"]
|
15
15
|
s.extra_rdoc_files = [
|
data/ext/escape_utils.c
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
#include <ruby.h>
|
2
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
3
|
+
#include <ruby/encoding.h>
|
4
|
+
static rb_encoding *utf8Encoding;
|
5
|
+
#endif
|
2
6
|
|
3
7
|
#define APPEND_BUFFER(escape, len, scoot_by) \
|
4
8
|
memcpy(&out[total], &in[offset], i-offset); \
|
@@ -6,18 +10,17 @@
|
|
6
10
|
offset = i+scoot_by; \
|
7
11
|
memcpy(&out[total], escape, len); \
|
8
12
|
total += len; \
|
9
|
-
break; \
|
10
13
|
|
11
14
|
static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
|
12
15
|
size_t i = 0, offset = 0, total = 0;
|
13
16
|
|
14
17
|
for(;i<in_len;i++) {
|
15
18
|
switch(in[i]) {
|
16
|
-
case '&': APPEND_BUFFER("&", 5, 1);
|
17
|
-
case '<': APPEND_BUFFER("<", 4, 1);
|
18
|
-
case '>': APPEND_BUFFER(">", 4, 1);
|
19
|
-
case '\'': APPEND_BUFFER("'", 5, 1);
|
20
|
-
case '\"': APPEND_BUFFER(""", 6, 1);
|
19
|
+
case '&': APPEND_BUFFER("&", 5, 1); break;
|
20
|
+
case '<': APPEND_BUFFER("<", 4, 1); break;
|
21
|
+
case '>': APPEND_BUFFER(">", 4, 1); break;
|
22
|
+
case '\'': APPEND_BUFFER("'", 5, 1); break;
|
23
|
+
case '\"': APPEND_BUFFER(""", 6, 1); break;
|
21
24
|
}
|
22
25
|
}
|
23
26
|
|
@@ -31,38 +34,26 @@ static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t
|
|
31
34
|
size_t i = 0, offset = 0, total = 0;
|
32
35
|
|
33
36
|
for(;i<in_len;i++) {
|
34
|
-
|
35
|
-
|
36
|
-
if (i
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
}
|
48
|
-
} else if (i+4 <= in_len) {
|
49
|
-
if (memcmp(&in[i], "&", 5) == 0) {
|
50
|
-
APPEND_BUFFER("&", 1, 5);
|
51
|
-
} else if (memcmp(&in[i], "<", 4) == 0) {
|
52
|
-
APPEND_BUFFER("<", 1, 4);
|
53
|
-
} else if (memcmp(&in[i], ">", 4) == 0) {
|
54
|
-
APPEND_BUFFER(">", 1, 4);
|
55
|
-
} else if (memcmp(&in[i], "'", 5) == 0) {
|
56
|
-
APPEND_BUFFER("\'", 1, 5);
|
57
|
-
}
|
58
|
-
} else if (i+3 <= in_len) {
|
59
|
-
if (memcmp(&in[i], "<", 4) == 0) {
|
60
|
-
APPEND_BUFFER("<", 1, 4);
|
61
|
-
} else if (memcmp(&in[i], ">", 4) == 0) {
|
62
|
-
APPEND_BUFFER(">", 1, 4);
|
63
|
-
}
|
37
|
+
if(in[i] == '&') {
|
38
|
+
if (i+3 <= in_len) {
|
39
|
+
if (memcmp(&in[i], "<", 4) == 0) {
|
40
|
+
APPEND_BUFFER("<", 1, 4);
|
41
|
+
} else if (memcmp(&in[i], ">", 4) == 0) {
|
42
|
+
APPEND_BUFFER(">", 1, 4);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
if (i+4 <= in_len) {
|
46
|
+
if (memcmp(&in[i], "&", 5) == 0) {
|
47
|
+
APPEND_BUFFER("&", 1, 5);
|
48
|
+
} else if (memcmp(&in[i], "'", 5) == 0) {
|
49
|
+
APPEND_BUFFER("\'", 1, 5);
|
64
50
|
}
|
65
|
-
|
51
|
+
}
|
52
|
+
if (i+5 <= in_len) {
|
53
|
+
if (memcmp(&in[i], """, 6) == 0) {
|
54
|
+
APPEND_BUFFER("\"", 1, 6);
|
55
|
+
}
|
56
|
+
}
|
66
57
|
}
|
67
58
|
}
|
68
59
|
|
@@ -77,7 +68,7 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
|
|
77
68
|
|
78
69
|
for(;i<in_len;i++) {
|
79
70
|
switch(in[i]) {
|
80
|
-
case '\\': APPEND_BUFFER("\\\\", 2, 1);
|
71
|
+
case '\\': APPEND_BUFFER("\\\\", 2, 1); break;
|
81
72
|
case '<':
|
82
73
|
if (i+1 <= in_len && in[i+1] == '/') {
|
83
74
|
APPEND_BUFFER("<\\/", 3, 2);
|
@@ -90,9 +81,9 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
|
|
90
81
|
APPEND_BUFFER("\\n", 2, 1);
|
91
82
|
}
|
92
83
|
break;
|
93
|
-
case '\n': APPEND_BUFFER("\\n", 2, 1);
|
94
|
-
case '\"': APPEND_BUFFER("\\\"", 2, 1);
|
95
|
-
case '\'': APPEND_BUFFER("\\'", 2, 1);
|
84
|
+
case '\n': APPEND_BUFFER("\\n", 2, 1); break;
|
85
|
+
case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
|
86
|
+
case '\'': APPEND_BUFFER("\\'", 2, 1); break;
|
96
87
|
}
|
97
88
|
}
|
98
89
|
|
@@ -106,6 +97,10 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
|
|
106
97
|
Check_Type(str, T_STRING);
|
107
98
|
|
108
99
|
VALUE rb_output_buf;
|
100
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
101
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
102
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
103
|
+
#endif
|
109
104
|
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
110
105
|
size_t len = RSTRING_LEN(str), new_len = 0;
|
111
106
|
|
@@ -122,6 +117,14 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
|
|
122
117
|
// free the temporary C string
|
123
118
|
free(outBuf);
|
124
119
|
|
120
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
121
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
122
|
+
if (default_internal_enc) {
|
123
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
124
|
+
} else {
|
125
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
126
|
+
}
|
127
|
+
#endif
|
125
128
|
return rb_output_buf;
|
126
129
|
}
|
127
130
|
|
@@ -129,6 +132,10 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
|
|
129
132
|
Check_Type(str, T_STRING);
|
130
133
|
|
131
134
|
VALUE rb_output_buf;
|
135
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
136
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
137
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
138
|
+
#endif
|
132
139
|
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
133
140
|
size_t len = RSTRING_LEN(str), new_len = 0;
|
134
141
|
|
@@ -145,6 +152,14 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
|
|
145
152
|
// free the temporary C string
|
146
153
|
free(outBuf);
|
147
154
|
|
155
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
156
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
157
|
+
if (default_internal_enc) {
|
158
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
159
|
+
} else {
|
160
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
161
|
+
}
|
162
|
+
#endif
|
148
163
|
return rb_output_buf;
|
149
164
|
}
|
150
165
|
|
@@ -156,6 +171,10 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
|
|
156
171
|
Check_Type(str, T_STRING);
|
157
172
|
|
158
173
|
VALUE rb_output_buf;
|
174
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
175
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
176
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
177
|
+
#endif
|
159
178
|
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
160
179
|
size_t len = RSTRING_LEN(str), new_len = 0;
|
161
180
|
|
@@ -172,6 +191,14 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
|
|
172
191
|
// free the temporary C string
|
173
192
|
free(outBuf);
|
174
193
|
|
194
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
195
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
196
|
+
if (default_internal_enc) {
|
197
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
198
|
+
} else {
|
199
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
200
|
+
}
|
201
|
+
#endif
|
175
202
|
return rb_output_buf;
|
176
203
|
}
|
177
204
|
|
@@ -184,4 +211,8 @@ void Init_escape_utils_ext() {
|
|
184
211
|
rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
|
185
212
|
rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
|
186
213
|
rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
|
214
|
+
|
215
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
216
|
+
utf8Encoding = rb_utf8_encoding();
|
217
|
+
#endif
|
187
218
|
}
|
data/lib/escape_utils.rb
CHANGED
data/spec/html/escape_spec.rb
CHANGED
@@ -21,4 +21,18 @@ describe EscapeUtils, "escape_html" do
|
|
21
21
|
it "should escape the & character" do
|
22
22
|
EscapeUtils.escape_html("<b>Bourbon & Branch</b>").should eql("<b>Bourbon & Branch</b>")
|
23
23
|
end
|
24
|
+
|
25
|
+
if RUBY_VERSION =~ /^1.9/
|
26
|
+
it "should default to utf-8 if Encoding.default_internal is nil" do
|
27
|
+
Encoding.default_internal = nil
|
28
|
+
EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.find('utf-8'))
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should use Encoding.default_internal" do
|
32
|
+
Encoding.default_internal = Encoding.find('utf-8')
|
33
|
+
EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
|
34
|
+
Encoding.default_internal = Encoding.find('us-ascii')
|
35
|
+
EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
|
36
|
+
end
|
37
|
+
end
|
24
38
|
end
|
data/spec/html/unescape_spec.rb
CHANGED
@@ -21,4 +21,18 @@ describe EscapeUtils, "unescape_html" do
|
|
21
21
|
it "should unescape the & character" do
|
22
22
|
EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").should eql("<b>Bourbon & Branch</b>")
|
23
23
|
end
|
24
|
+
|
25
|
+
if RUBY_VERSION =~ /^1.9/
|
26
|
+
it "should default to utf-8 if Encoding.default_internal is nil" do
|
27
|
+
Encoding.default_internal = nil
|
28
|
+
EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.find('utf-8'))
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should use Encoding.default_internal" do
|
32
|
+
Encoding.default_internal = Encoding.find('utf-8')
|
33
|
+
EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
|
34
|
+
Encoding.default_internal = Encoding.find('us-ascii')
|
35
|
+
EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
|
36
|
+
end
|
37
|
+
end
|
24
38
|
end
|
@@ -10,7 +10,7 @@ describe EscapeUtils, "escape_javascript" do
|
|
10
10
|
it "should return an empty string if passed nil" do
|
11
11
|
EscapeUtils.escape_javascript(nil).should eql("")
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
it "should escape quotes and newlines" do
|
15
15
|
EscapeUtils.escape_javascript(%(This "thing" is really\n netos')).should eql(%(This \\"thing\\" is really\\n netos\\'))
|
16
16
|
end
|
@@ -22,4 +22,18 @@ describe EscapeUtils, "escape_javascript" do
|
|
22
22
|
it "should escape closed html tags" do
|
23
23
|
EscapeUtils.escape_javascript(%(dont </close> tags)).should eql(%(dont <\\/close> tags))
|
24
24
|
end
|
25
|
+
|
26
|
+
if RUBY_VERSION =~ /^1.9/
|
27
|
+
it "should default to utf-8 if Encoding.default_internal is nil" do
|
28
|
+
Encoding.default_internal = nil
|
29
|
+
EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.find('utf-8'))
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should use Encoding.default_internal" do
|
33
|
+
Encoding.default_internal = Encoding.find('utf-8')
|
34
|
+
EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
|
35
|
+
Encoding.default_internal = Encoding.find('us-ascii')
|
36
|
+
EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
|
37
|
+
end
|
38
|
+
end
|
25
39
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 3
|
9
|
+
version: 0.1.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Brian Lopez
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-06-
|
17
|
+
date: 2010-06-09 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|