escape_utils 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.3 (June 9th, 2010)
4
+ * cleaned some code up, removing duplication
5
+ * moved to a more flexible character encoding scheme using Encoding.defaut_internal for 1.9 users
6
+
3
7
  ## 0.1.2 (June 8th, 2010)
4
8
  * forgot to add the ActionView monkey patch for JS escaping ;)
5
9
 
@@ -2,7 +2,9 @@
2
2
 
3
3
  Being as though we're all html escaping everything these days, why not make it faster?
4
4
 
5
- At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon
5
+ At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon.
6
+
7
+ For character encoding in 1.9, we'll return strings in whatever Encoding.default_internal is set to or utf-8 otherwise.
6
8
 
7
9
  It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
8
10
 
@@ -45,8 +47,9 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
45
47
 
46
48
  == Benchmarks
47
49
 
48
- In my testing, escaping is around 10-20x faster than the pure ruby implementations in wide use today.
49
- While unescaping is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
50
+ In my testing, escaping html is around 10-20x faster than the pure ruby implementations in wide use today.
51
+ While unescaping html is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
52
+ Escaping Javascript is around 16-30x faster.
50
53
 
51
54
  This output is from my laptop using the benchmark scripts in the benchmarks folder.
52
55
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{escape_utils}
8
- s.version = "0.1.2"
8
+ s.version = "0.1.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brian Lopez"]
12
- s.date = %q{2010-06-08}
12
+ s.date = %q{2010-06-09}
13
13
  s.email = %q{seniorlopez@gmail.com}
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.extra_rdoc_files = [
@@ -1,4 +1,8 @@
1
1
  #include <ruby.h>
2
+ #ifdef HAVE_RUBY_ENCODING_H
3
+ #include <ruby/encoding.h>
4
+ static rb_encoding *utf8Encoding;
5
+ #endif
2
6
 
3
7
  #define APPEND_BUFFER(escape, len, scoot_by) \
4
8
  memcpy(&out[total], &in[offset], i-offset); \
@@ -6,18 +10,17 @@
6
10
  offset = i+scoot_by; \
7
11
  memcpy(&out[total], escape, len); \
8
12
  total += len; \
9
- break; \
10
13
 
11
14
  static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
12
15
  size_t i = 0, offset = 0, total = 0;
13
16
 
14
17
  for(;i<in_len;i++) {
15
18
  switch(in[i]) {
16
- case '&': APPEND_BUFFER("&amp;", 5, 1);
17
- case '<': APPEND_BUFFER("&lt;", 4, 1);
18
- case '>': APPEND_BUFFER("&gt;", 4, 1);
19
- case '\'': APPEND_BUFFER("&#39;", 5, 1);
20
- case '\"': APPEND_BUFFER("&quot;", 6, 1);
19
+ case '&': APPEND_BUFFER("&amp;", 5, 1); break;
20
+ case '<': APPEND_BUFFER("&lt;", 4, 1); break;
21
+ case '>': APPEND_BUFFER("&gt;", 4, 1); break;
22
+ case '\'': APPEND_BUFFER("&#39;", 5, 1); break;
23
+ case '\"': APPEND_BUFFER("&quot;", 6, 1); break;
21
24
  }
22
25
  }
23
26
 
@@ -31,38 +34,26 @@ static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t
31
34
  size_t i = 0, offset = 0, total = 0;
32
35
 
33
36
  for(;i<in_len;i++) {
34
- switch(in[i]) {
35
- case '&':
36
- if (i+5 <= in_len) {
37
- if (memcmp(&in[i], "&amp;", 5) == 0) {
38
- APPEND_BUFFER("&", 1, 5);
39
- } else if (memcmp(&in[i], "&lt;", 4) == 0) {
40
- APPEND_BUFFER("<", 1, 4);
41
- } else if (memcmp(&in[i], "&gt;", 4) == 0) {
42
- APPEND_BUFFER(">", 1, 4);
43
- } else if (memcmp(&in[i], "&#39;", 5) == 0) {
44
- APPEND_BUFFER("\'", 1, 5);
45
- } else if (memcmp(&in[i], "&quot;", 6) == 0) {
46
- APPEND_BUFFER("\"", 1, 6);
47
- }
48
- } else if (i+4 <= in_len) {
49
- if (memcmp(&in[i], "&amp;", 5) == 0) {
50
- APPEND_BUFFER("&", 1, 5);
51
- } else if (memcmp(&in[i], "&lt;", 4) == 0) {
52
- APPEND_BUFFER("<", 1, 4);
53
- } else if (memcmp(&in[i], "&gt;", 4) == 0) {
54
- APPEND_BUFFER(">", 1, 4);
55
- } else if (memcmp(&in[i], "&#39;", 5) == 0) {
56
- APPEND_BUFFER("\'", 1, 5);
57
- }
58
- } else if (i+3 <= in_len) {
59
- if (memcmp(&in[i], "&lt;", 4) == 0) {
60
- APPEND_BUFFER("<", 1, 4);
61
- } else if (memcmp(&in[i], "&gt;", 4) == 0) {
62
- APPEND_BUFFER(">", 1, 4);
63
- }
37
+ if(in[i] == '&') {
38
+ if (i+3 <= in_len) {
39
+ if (memcmp(&in[i], "&lt;", 4) == 0) {
40
+ APPEND_BUFFER("<", 1, 4);
41
+ } else if (memcmp(&in[i], "&gt;", 4) == 0) {
42
+ APPEND_BUFFER(">", 1, 4);
43
+ }
44
+ }
45
+ if (i+4 <= in_len) {
46
+ if (memcmp(&in[i], "&amp;", 5) == 0) {
47
+ APPEND_BUFFER("&", 1, 5);
48
+ } else if (memcmp(&in[i], "&#39;", 5) == 0) {
49
+ APPEND_BUFFER("\'", 1, 5);
64
50
  }
65
- break;
51
+ }
52
+ if (i+5 <= in_len) {
53
+ if (memcmp(&in[i], "&quot;", 6) == 0) {
54
+ APPEND_BUFFER("\"", 1, 6);
55
+ }
56
+ }
66
57
  }
67
58
  }
68
59
 
@@ -77,7 +68,7 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
77
68
 
78
69
  for(;i<in_len;i++) {
79
70
  switch(in[i]) {
80
- case '\\': APPEND_BUFFER("\\\\", 2, 1);
71
+ case '\\': APPEND_BUFFER("\\\\", 2, 1); break;
81
72
  case '<':
82
73
  if (i+1 <= in_len && in[i+1] == '/') {
83
74
  APPEND_BUFFER("<\\/", 3, 2);
@@ -90,9 +81,9 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
90
81
  APPEND_BUFFER("\\n", 2, 1);
91
82
  }
92
83
  break;
93
- case '\n': APPEND_BUFFER("\\n", 2, 1);
94
- case '\"': APPEND_BUFFER("\\\"", 2, 1);
95
- case '\'': APPEND_BUFFER("\\'", 2, 1);
84
+ case '\n': APPEND_BUFFER("\\n", 2, 1); break;
85
+ case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
86
+ case '\'': APPEND_BUFFER("\\'", 2, 1); break;
96
87
  }
97
88
  }
98
89
 
@@ -106,6 +97,10 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
106
97
  Check_Type(str, T_STRING);
107
98
 
108
99
  VALUE rb_output_buf;
100
+ #ifdef HAVE_RUBY_ENCODING_H
101
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
102
+ rb_encoding *original_encoding = rb_enc_get(str);
103
+ #endif
109
104
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
110
105
  size_t len = RSTRING_LEN(str), new_len = 0;
111
106
 
@@ -122,6 +117,14 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
122
117
  // free the temporary C string
123
118
  free(outBuf);
124
119
 
120
+ #ifdef HAVE_RUBY_ENCODING_H
121
+ rb_enc_associate(rb_output_buf, original_encoding);
122
+ if (default_internal_enc) {
123
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
124
+ } else {
125
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
126
+ }
127
+ #endif
125
128
  return rb_output_buf;
126
129
  }
127
130
 
@@ -129,6 +132,10 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
129
132
  Check_Type(str, T_STRING);
130
133
 
131
134
  VALUE rb_output_buf;
135
+ #ifdef HAVE_RUBY_ENCODING_H
136
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
137
+ rb_encoding *original_encoding = rb_enc_get(str);
138
+ #endif
132
139
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
133
140
  size_t len = RSTRING_LEN(str), new_len = 0;
134
141
 
@@ -145,6 +152,14 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
145
152
  // free the temporary C string
146
153
  free(outBuf);
147
154
 
155
+ #ifdef HAVE_RUBY_ENCODING_H
156
+ rb_enc_associate(rb_output_buf, original_encoding);
157
+ if (default_internal_enc) {
158
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
159
+ } else {
160
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
161
+ }
162
+ #endif
148
163
  return rb_output_buf;
149
164
  }
150
165
 
@@ -156,6 +171,10 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
156
171
  Check_Type(str, T_STRING);
157
172
 
158
173
  VALUE rb_output_buf;
174
+ #ifdef HAVE_RUBY_ENCODING_H
175
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
176
+ rb_encoding *original_encoding = rb_enc_get(str);
177
+ #endif
159
178
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
160
179
  size_t len = RSTRING_LEN(str), new_len = 0;
161
180
 
@@ -172,6 +191,14 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
172
191
  // free the temporary C string
173
192
  free(outBuf);
174
193
 
194
+ #ifdef HAVE_RUBY_ENCODING_H
195
+ rb_enc_associate(rb_output_buf, original_encoding);
196
+ if (default_internal_enc) {
197
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
198
+ } else {
199
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
200
+ }
201
+ #endif
175
202
  return rb_output_buf;
176
203
  }
177
204
 
@@ -184,4 +211,8 @@ void Init_escape_utils_ext() {
184
211
  rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
185
212
  rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
186
213
  rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
214
+
215
+ #ifdef HAVE_RUBY_ENCODING_H
216
+ utf8Encoding = rb_utf8_encoding();
217
+ #endif
187
218
  }
@@ -4,5 +4,5 @@ require 'escape_utils_ext'
4
4
 
5
5
  EscapeUtils.send(:extend, EscapeUtils)
6
6
  module EscapeUtils
7
- VERSION = "0.1.2"
7
+ VERSION = "0.1.3"
8
8
  end
@@ -21,4 +21,18 @@ describe EscapeUtils, "escape_html" do
21
21
  it "should escape the & character" do
22
22
  EscapeUtils.escape_html("<b>Bourbon & Branch</b>").should eql("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;")
23
23
  end
24
+
25
+ if RUBY_VERSION =~ /^1.9/
26
+ it "should default to utf-8 if Encoding.default_internal is nil" do
27
+ Encoding.default_internal = nil
28
+ EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.find('utf-8'))
29
+ end
30
+
31
+ it "should use Encoding.default_internal" do
32
+ Encoding.default_internal = Encoding.find('utf-8')
33
+ EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
34
+ Encoding.default_internal = Encoding.find('us-ascii')
35
+ EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
36
+ end
37
+ end
24
38
  end
@@ -21,4 +21,18 @@ describe EscapeUtils, "unescape_html" do
21
21
  it "should unescape the & character" do
22
22
  EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").should eql("<b>Bourbon & Branch</b>")
23
23
  end
24
+
25
+ if RUBY_VERSION =~ /^1.9/
26
+ it "should default to utf-8 if Encoding.default_internal is nil" do
27
+ Encoding.default_internal = nil
28
+ EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.find('utf-8'))
29
+ end
30
+
31
+ it "should use Encoding.default_internal" do
32
+ Encoding.default_internal = Encoding.find('utf-8')
33
+ EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.default_internal)
34
+ Encoding.default_internal = Encoding.find('us-ascii')
35
+ EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.default_internal)
36
+ end
37
+ end
24
38
  end
@@ -10,7 +10,7 @@ describe EscapeUtils, "escape_javascript" do
10
10
  it "should return an empty string if passed nil" do
11
11
  EscapeUtils.escape_javascript(nil).should eql("")
12
12
  end
13
-
13
+
14
14
  it "should escape quotes and newlines" do
15
15
  EscapeUtils.escape_javascript(%(This "thing" is really\n netos')).should eql(%(This \\"thing\\" is really\\n netos\\'))
16
16
  end
@@ -22,4 +22,18 @@ describe EscapeUtils, "escape_javascript" do
22
22
  it "should escape closed html tags" do
23
23
  EscapeUtils.escape_javascript(%(dont </close> tags)).should eql(%(dont <\\/close> tags))
24
24
  end
25
+
26
+ if RUBY_VERSION =~ /^1.9/
27
+ it "should default to utf-8 if Encoding.default_internal is nil" do
28
+ Encoding.default_internal = nil
29
+ EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.find('utf-8'))
30
+ end
31
+
32
+ it "should use Encoding.default_internal" do
33
+ Encoding.default_internal = Encoding.find('utf-8')
34
+ EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
35
+ Encoding.default_internal = Encoding.find('us-ascii')
36
+ EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
37
+ end
38
+ end
25
39
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 2
9
- version: 0.1.2
8
+ - 3
9
+ version: 0.1.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Brian Lopez
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-06-08 00:00:00 -07:00
17
+ date: 2010-06-09 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies: []
20
20