escape_utils 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.3 (June 9th, 2010)
4
+ * cleaned some code up, removing duplication
5
+ * moved to a more flexible character encoding scheme using Encoding.defaut_internal for 1.9 users
6
+
3
7
  ## 0.1.2 (June 8th, 2010)
4
8
  * forgot to add the ActionView monkey patch for JS escaping ;)
5
9
 
@@ -2,7 +2,9 @@
2
2
 
3
3
  Being as though we're all html escaping everything these days, why not make it faster?
4
4
 
5
- At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon
5
+ At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon.
6
+
7
+ For character encoding in 1.9, we'll return strings in whatever Encoding.default_internal is set to or utf-8 otherwise.
6
8
 
7
9
  It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
8
10
 
@@ -45,8 +47,9 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
45
47
 
46
48
  == Benchmarks
47
49
 
48
- In my testing, escaping is around 10-20x faster than the pure ruby implementations in wide use today.
49
- While unescaping is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
50
+ In my testing, escaping html is around 10-20x faster than the pure ruby implementations in wide use today.
51
+ While unescaping html is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
52
+ Escaping Javascript is around 16-30x faster.
50
53
 
51
54
  This output is from my laptop using the benchmark scripts in the benchmarks folder.
52
55
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{escape_utils}
8
- s.version = "0.1.2"
8
+ s.version = "0.1.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brian Lopez"]
12
- s.date = %q{2010-06-08}
12
+ s.date = %q{2010-06-09}
13
13
  s.email = %q{seniorlopez@gmail.com}
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.extra_rdoc_files = [
@@ -1,4 +1,8 @@
1
1
  #include <ruby.h>
2
+ #ifdef HAVE_RUBY_ENCODING_H
3
+ #include <ruby/encoding.h>
4
+ static rb_encoding *utf8Encoding;
5
+ #endif
2
6
 
3
7
  #define APPEND_BUFFER(escape, len, scoot_by) \
4
8
  memcpy(&out[total], &in[offset], i-offset); \
@@ -6,18 +10,17 @@
6
10
  offset = i+scoot_by; \
7
11
  memcpy(&out[total], escape, len); \
8
12
  total += len; \
9
- break; \
10
13
 
11
14
  static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
12
15
  size_t i = 0, offset = 0, total = 0;
13
16
 
14
17
  for(;i<in_len;i++) {
15
18
  switch(in[i]) {
16
- case '&': APPEND_BUFFER("&amp;", 5, 1);
17
- case '<': APPEND_BUFFER("&lt;", 4, 1);
18
- case '>': APPEND_BUFFER("&gt;", 4, 1);
19
- case '\'': APPEND_BUFFER("&#39;", 5, 1);
20
- case '\"': APPEND_BUFFER("&quot;", 6, 1);
19
+ case '&': APPEND_BUFFER("&amp;", 5, 1); break;
20
+ case '<': APPEND_BUFFER("&lt;", 4, 1); break;
21
+ case '>': APPEND_BUFFER("&gt;", 4, 1); break;
22
+ case '\'': APPEND_BUFFER("&#39;", 5, 1); break;
23
+ case '\"': APPEND_BUFFER("&quot;", 6, 1); break;
21
24
  }
22
25
  }
23
26
 
@@ -31,38 +34,26 @@ static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t
31
34
  size_t i = 0, offset = 0, total = 0;
32
35
 
33
36
  for(;i<in_len;i++) {
34
- switch(in[i]) {
35
- case '&':
36
- if (i+5 <= in_len) {
37
- if (memcmp(&in[i], "&amp;", 5) == 0) {
38
- APPEND_BUFFER("&", 1, 5);
39
- } else if (memcmp(&in[i], "&lt;", 4) == 0) {
40
- APPEND_BUFFER("<", 1, 4);
41
- } else if (memcmp(&in[i], "&gt;", 4) == 0) {
42
- APPEND_BUFFER(">", 1, 4);
43
- } else if (memcmp(&in[i], "&#39;", 5) == 0) {
44
- APPEND_BUFFER("\'", 1, 5);
45
- } else if (memcmp(&in[i], "&quot;", 6) == 0) {
46
- APPEND_BUFFER("\"", 1, 6);
47
- }
48
- } else if (i+4 <= in_len) {
49
- if (memcmp(&in[i], "&amp;", 5) == 0) {
50
- APPEND_BUFFER("&", 1, 5);
51
- } else if (memcmp(&in[i], "&lt;", 4) == 0) {
52
- APPEND_BUFFER("<", 1, 4);
53
- } else if (memcmp(&in[i], "&gt;", 4) == 0) {
54
- APPEND_BUFFER(">", 1, 4);
55
- } else if (memcmp(&in[i], "&#39;", 5) == 0) {
56
- APPEND_BUFFER("\'", 1, 5);
57
- }
58
- } else if (i+3 <= in_len) {
59
- if (memcmp(&in[i], "&lt;", 4) == 0) {
60
- APPEND_BUFFER("<", 1, 4);
61
- } else if (memcmp(&in[i], "&gt;", 4) == 0) {
62
- APPEND_BUFFER(">", 1, 4);
63
- }
37
+ if(in[i] == '&') {
38
+ if (i+3 <= in_len) {
39
+ if (memcmp(&in[i], "&lt;", 4) == 0) {
40
+ APPEND_BUFFER("<", 1, 4);
41
+ } else if (memcmp(&in[i], "&gt;", 4) == 0) {
42
+ APPEND_BUFFER(">", 1, 4);
43
+ }
44
+ }
45
+ if (i+4 <= in_len) {
46
+ if (memcmp(&in[i], "&amp;", 5) == 0) {
47
+ APPEND_BUFFER("&", 1, 5);
48
+ } else if (memcmp(&in[i], "&#39;", 5) == 0) {
49
+ APPEND_BUFFER("\'", 1, 5);
64
50
  }
65
- break;
51
+ }
52
+ if (i+5 <= in_len) {
53
+ if (memcmp(&in[i], "&quot;", 6) == 0) {
54
+ APPEND_BUFFER("\"", 1, 6);
55
+ }
56
+ }
66
57
  }
67
58
  }
68
59
 
@@ -77,7 +68,7 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
77
68
 
78
69
  for(;i<in_len;i++) {
79
70
  switch(in[i]) {
80
- case '\\': APPEND_BUFFER("\\\\", 2, 1);
71
+ case '\\': APPEND_BUFFER("\\\\", 2, 1); break;
81
72
  case '<':
82
73
  if (i+1 <= in_len && in[i+1] == '/') {
83
74
  APPEND_BUFFER("<\\/", 3, 2);
@@ -90,9 +81,9 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
90
81
  APPEND_BUFFER("\\n", 2, 1);
91
82
  }
92
83
  break;
93
- case '\n': APPEND_BUFFER("\\n", 2, 1);
94
- case '\"': APPEND_BUFFER("\\\"", 2, 1);
95
- case '\'': APPEND_BUFFER("\\'", 2, 1);
84
+ case '\n': APPEND_BUFFER("\\n", 2, 1); break;
85
+ case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
86
+ case '\'': APPEND_BUFFER("\\'", 2, 1); break;
96
87
  }
97
88
  }
98
89
 
@@ -106,6 +97,10 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
106
97
  Check_Type(str, T_STRING);
107
98
 
108
99
  VALUE rb_output_buf;
100
+ #ifdef HAVE_RUBY_ENCODING_H
101
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
102
+ rb_encoding *original_encoding = rb_enc_get(str);
103
+ #endif
109
104
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
110
105
  size_t len = RSTRING_LEN(str), new_len = 0;
111
106
 
@@ -122,6 +117,14 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
122
117
  // free the temporary C string
123
118
  free(outBuf);
124
119
 
120
+ #ifdef HAVE_RUBY_ENCODING_H
121
+ rb_enc_associate(rb_output_buf, original_encoding);
122
+ if (default_internal_enc) {
123
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
124
+ } else {
125
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
126
+ }
127
+ #endif
125
128
  return rb_output_buf;
126
129
  }
127
130
 
@@ -129,6 +132,10 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
129
132
  Check_Type(str, T_STRING);
130
133
 
131
134
  VALUE rb_output_buf;
135
+ #ifdef HAVE_RUBY_ENCODING_H
136
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
137
+ rb_encoding *original_encoding = rb_enc_get(str);
138
+ #endif
132
139
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
133
140
  size_t len = RSTRING_LEN(str), new_len = 0;
134
141
 
@@ -145,6 +152,14 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
145
152
  // free the temporary C string
146
153
  free(outBuf);
147
154
 
155
+ #ifdef HAVE_RUBY_ENCODING_H
156
+ rb_enc_associate(rb_output_buf, original_encoding);
157
+ if (default_internal_enc) {
158
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
159
+ } else {
160
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
161
+ }
162
+ #endif
148
163
  return rb_output_buf;
149
164
  }
150
165
 
@@ -156,6 +171,10 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
156
171
  Check_Type(str, T_STRING);
157
172
 
158
173
  VALUE rb_output_buf;
174
+ #ifdef HAVE_RUBY_ENCODING_H
175
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
176
+ rb_encoding *original_encoding = rb_enc_get(str);
177
+ #endif
159
178
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
160
179
  size_t len = RSTRING_LEN(str), new_len = 0;
161
180
 
@@ -172,6 +191,14 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
172
191
  // free the temporary C string
173
192
  free(outBuf);
174
193
 
194
+ #ifdef HAVE_RUBY_ENCODING_H
195
+ rb_enc_associate(rb_output_buf, original_encoding);
196
+ if (default_internal_enc) {
197
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
198
+ } else {
199
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
200
+ }
201
+ #endif
175
202
  return rb_output_buf;
176
203
  }
177
204
 
@@ -184,4 +211,8 @@ void Init_escape_utils_ext() {
184
211
  rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
185
212
  rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
186
213
  rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
214
+
215
+ #ifdef HAVE_RUBY_ENCODING_H
216
+ utf8Encoding = rb_utf8_encoding();
217
+ #endif
187
218
  }
@@ -4,5 +4,5 @@ require 'escape_utils_ext'
4
4
 
5
5
  EscapeUtils.send(:extend, EscapeUtils)
6
6
  module EscapeUtils
7
- VERSION = "0.1.2"
7
+ VERSION = "0.1.3"
8
8
  end
@@ -21,4 +21,18 @@ describe EscapeUtils, "escape_html" do
21
21
  it "should escape the & character" do
22
22
  EscapeUtils.escape_html("<b>Bourbon & Branch</b>").should eql("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;")
23
23
  end
24
+
25
+ if RUBY_VERSION =~ /^1.9/
26
+ it "should default to utf-8 if Encoding.default_internal is nil" do
27
+ Encoding.default_internal = nil
28
+ EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.find('utf-8'))
29
+ end
30
+
31
+ it "should use Encoding.default_internal" do
32
+ Encoding.default_internal = Encoding.find('utf-8')
33
+ EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
34
+ Encoding.default_internal = Encoding.find('us-ascii')
35
+ EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
36
+ end
37
+ end
24
38
  end
@@ -21,4 +21,18 @@ describe EscapeUtils, "unescape_html" do
21
21
  it "should unescape the & character" do
22
22
  EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").should eql("<b>Bourbon & Branch</b>")
23
23
  end
24
+
25
+ if RUBY_VERSION =~ /^1.9/
26
+ it "should default to utf-8 if Encoding.default_internal is nil" do
27
+ Encoding.default_internal = nil
28
+ EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.find('utf-8'))
29
+ end
30
+
31
+ it "should use Encoding.default_internal" do
32
+ Encoding.default_internal = Encoding.find('utf-8')
33
+ EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.default_internal)
34
+ Encoding.default_internal = Encoding.find('us-ascii')
35
+ EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.default_internal)
36
+ end
37
+ end
24
38
  end
@@ -10,7 +10,7 @@ describe EscapeUtils, "escape_javascript" do
10
10
  it "should return an empty string if passed nil" do
11
11
  EscapeUtils.escape_javascript(nil).should eql("")
12
12
  end
13
-
13
+
14
14
  it "should escape quotes and newlines" do
15
15
  EscapeUtils.escape_javascript(%(This "thing" is really\n netos')).should eql(%(This \\"thing\\" is really\\n netos\\'))
16
16
  end
@@ -22,4 +22,18 @@ describe EscapeUtils, "escape_javascript" do
22
22
  it "should escape closed html tags" do
23
23
  EscapeUtils.escape_javascript(%(dont </close> tags)).should eql(%(dont <\\/close> tags))
24
24
  end
25
+
26
+ if RUBY_VERSION =~ /^1.9/
27
+ it "should default to utf-8 if Encoding.default_internal is nil" do
28
+ Encoding.default_internal = nil
29
+ EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.find('utf-8'))
30
+ end
31
+
32
+ it "should use Encoding.default_internal" do
33
+ Encoding.default_internal = Encoding.find('utf-8')
34
+ EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
35
+ Encoding.default_internal = Encoding.find('us-ascii')
36
+ EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
37
+ end
38
+ end
25
39
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 2
9
- version: 0.1.2
8
+ - 3
9
+ version: 0.1.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Brian Lopez
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-06-08 00:00:00 -07:00
17
+ date: 2010-06-09 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies: []
20
20