escape_utils 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -3,4 +3,6 @@ Makefile
3
3
  *.bundle
4
4
  pkg/*
5
5
  doc/*
6
- *.rbc
6
+ *.rbc
7
+ tmp/
8
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --colour
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.0 (February 8th, 2011)
4
+ * fixed a couple of compilation warnings on 1.9.3
5
+ * moved to rspec2
6
+ * remove hard-conversion to utf-8 to preserve the string's original encoding
7
+ * moved to rake-compiler, Bundler
8
+ * pass through incompletely escaped data on unescaping
9
+ * added tilde to escape_{uri,url}specs (It's a difference between CGI.escape and URI.escape)
10
+ * escape_uri and escape_url now match their Ruby counterparts
11
+ ** escape_uri is used where URI.escape is, and escape_url is used where CGI.escape is used.
12
+ * performance and memory usage optimizations
13
+
3
14
  ## 0.1.9 (October 15th, 2010)
4
15
  * add a flag as an optional 2nd parameter to EscapeUtils.escape_html to disable/enable the escaping of the '/' character. Defaults to the new flag EscapeUtils.html_secure
5
16
 
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/README.rdoc CHANGED
@@ -2,11 +2,9 @@
2
2
 
3
3
  Being as though we're all html escaping everything these days, why not make it faster?
4
4
 
5
- At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon.
6
-
7
5
  For character encoding in 1.9, we'll return strings in whatever Encoding.default_internal is set to or utf-8 otherwise.
8
6
 
9
- It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
7
+ It has monkey-patches for Rack::Utils, CGI, URI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
10
8
 
11
9
  It supports HTML, URL, URI and Javascript escaping/unescaping.
12
10
 
@@ -38,16 +36,20 @@ It supports HTML, URL, URI and Javascript escaping/unescaping.
38
36
 
39
37
  === URL
40
38
 
39
+ Use (un)escape_uri to get RFC-compliant escaping (like PHP rawurlencode).
40
+
41
+ Use (un)escape_url to get CGI escaping (where space is +).
42
+
41
43
  ==== Escaping
42
44
 
43
45
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
44
- escaped_url = EscapeUtils.url_escape(url)
46
+ escaped_url = EscapeUtils.escape_url(url)
45
47
 
46
48
  ==== Unescaping
47
49
 
48
50
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
49
- escaped_url = EscapeUtils.url_escape(url)
50
- EscapeUtils.url_unescape(escaped_url) == url # => true
51
+ escaped_url = EscapeUtils.escape_url(url)
52
+ EscapeUtils.unescape_url(escaped_url) == url # => true
51
53
 
52
54
  === Javascript
53
55
 
@@ -141,4 +143,4 @@ I didn't look that hard, but I'm not aware of another ruby library that does Jav
141
143
  fast_xs_extra#fast_uxs_cgi
142
144
  0.010000 0.000000 0.010000 ( 0.006062)
143
145
  EscapeUtils.unescape_url
144
- 0.000000 0.000000 0.000000 ( 0.005679)
146
+ 0.000000 0.000000 0.000000 ( 0.005679)
data/Rakefile CHANGED
@@ -1,35 +1,32 @@
1
- # encoding: UTF-8
1
+ # rspec
2
2
  begin
3
- require 'jeweler'
4
- Jeweler::Tasks.new do |gem|
5
- gem.name = "escape_utils"
6
- gem.summary = "Faster string escaping routines for your web apps"
7
- gem.email = "seniorlopez@gmail.com"
8
- gem.homepage = "http://github.com/brianmario/escape_utils"
9
- gem.authors = ["Brian Lopez"]
10
- gem.require_paths = ["lib", "ext"]
11
- gem.extra_rdoc_files = `git ls-files *.rdoc`.split("\n")
12
- gem.files = `git ls-files`.split("\n")
13
- gem.extensions = ["ext/extconf.rb"]
14
- gem.files.include %w(lib/jeweler/templates/.document lib/jeweler/templates/.gitignore)
15
- # gem.rubyforge_project = "mysql2"
3
+ require 'rspec'
4
+ require 'rspec/core/rake_task'
5
+
6
+ desc "Run all examples with RCov"
7
+ RSpec::Core::RakeTask.new('spec:rcov') do |t|
8
+ t.rcov = true
9
+ end
10
+ RSpec::Core::RakeTask.new('spec') do |t|
11
+ t.verbose = true
16
12
  end
13
+
14
+ task :default => :spec
17
15
  rescue LoadError
18
- puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install jeweler -s http://gems.github.com"
16
+ puts "rspec, or one of its dependencies, is not available. Install it with: sudo gem install rspec"
19
17
  end
20
18
 
21
- require 'rake'
22
- require 'spec/rake/spectask'
19
+ # rake-compiler
20
+ require 'rake' unless defined? Rake
23
21
 
24
- desc "Run all examples with RCov"
25
- Spec::Rake::SpecTask.new('spec:rcov') do |t|
26
- t.spec_files = FileList['spec/']
27
- t.rcov = true
28
- t.rcov_opts = lambda do
29
- IO.readlines("spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
30
- end
22
+ gem 'rake-compiler', '>= 0.7.5'
23
+ require "rake/extensiontask"
24
+
25
+ Rake::ExtensionTask.new('escape_utils') do |ext|
26
+ ext.cross_compile = true
27
+ ext.cross_platform = ['x86-mingw32', 'x86-mswin32-60']
28
+
29
+ ext.lib_dir = File.join 'lib', 'escape_utils'
31
30
  end
32
- Spec::Rake::SpecTask.new('spec') do |t|
33
- t.spec_files = FileList['spec/']
34
- t.spec_opts << '--options' << 'spec/spec.opts'
35
- end
31
+
32
+ Rake::Task[:spec].prerequisites << :compile
@@ -10,7 +10,6 @@ require 'erb'
10
10
  require 'cgi'
11
11
  require 'haml'
12
12
  require 'fast_xs_extra'
13
- require 'faster_html_escape'
14
13
  require 'escape_utils'
15
14
 
16
15
  module HamlBench
@@ -51,13 +50,6 @@ Benchmark.bmbm do |x|
51
50
  end
52
51
  end
53
52
 
54
- x.report do
55
- puts "FasterHTMLEscape.html_escape"
56
- times.times do
57
- FasterHTMLEscape.html_escape(html)
58
- end
59
- end
60
-
61
53
  x.report do
62
54
  puts "fast_xs_extra#fast_xs_html"
63
55
  times.times do
data/escape_utils.gemspec CHANGED
@@ -1,90 +1,31 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
- # -*- encoding: utf-8 -*-
1
+ require './lib/escape_utils/version' unless defined? EscapeUtils::VERSION
5
2
 
6
3
  Gem::Specification.new do |s|
7
4
  s.name = %q{escape_utils}
8
- s.version = "0.1.9"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
5
+ s.version = EscapeUtils::VERSION
11
6
  s.authors = ["Brian Lopez"]
12
- s.date = %q{2010-10-15}
7
+ s.date = Time.now.utc.strftime("%Y-%m-%d")
13
8
  s.email = %q{seniorlopez@gmail.com}
14
- s.extensions = ["ext/extconf.rb"]
9
+ s.extensions = ["ext/escape_utils/extconf.rb"]
15
10
  s.extra_rdoc_files = [
16
11
  "README.rdoc"
17
12
  ]
18
- s.files = [
19
- ".gitignore",
20
- "CHANGELOG.md",
21
- "MIT-LICENSE",
22
- "README.rdoc",
23
- "Rakefile",
24
- "VERSION",
25
- "benchmark/html_escape.rb",
26
- "benchmark/html_unescape.rb",
27
- "benchmark/javascript_escape.rb",
28
- "benchmark/javascript_unescape.rb",
29
- "benchmark/url_escape.rb",
30
- "benchmark/url_unescape.rb",
31
- "escape_utils.gemspec",
32
- "ext/escape_utils.c",
33
- "ext/extconf.rb",
34
- "lib/escape_utils.rb",
35
- "lib/escape_utils/html/cgi.rb",
36
- "lib/escape_utils/html/erb.rb",
37
- "lib/escape_utils/html/haml.rb",
38
- "lib/escape_utils/html/rack.rb",
39
- "lib/escape_utils/html_safety.rb",
40
- "lib/escape_utils/javascript/action_view.rb",
41
- "lib/escape_utils/url/cgi.rb",
42
- "lib/escape_utils/url/erb.rb",
43
- "lib/escape_utils/url/rack.rb",
44
- "lib/escape_utils/url/uri.rb",
45
- "spec/html/escape_spec.rb",
46
- "spec/html/unescape_spec.rb",
47
- "spec/html_safety_spec.rb",
48
- "spec/javascript/escape_spec.rb",
49
- "spec/javascript/unescape_spec.rb",
50
- "spec/query/escape_spec.rb",
51
- "spec/query/unescape_spec.rb",
52
- "spec/rcov.opts",
53
- "spec/spec.opts",
54
- "spec/spec_helper.rb",
55
- "spec/uri/escape_spec.rb",
56
- "spec/uri/unescape_spec.rb",
57
- "spec/url/escape_spec.rb",
58
- "spec/url/unescape_spec.rb"
59
- ]
13
+ s.files = `git ls-files`.split("\n")
60
14
  s.homepage = %q{http://github.com/brianmario/escape_utils}
61
15
  s.rdoc_options = ["--charset=UTF-8"]
62
16
  s.require_paths = ["lib", "ext"]
63
- s.rubygems_version = %q{1.3.7}
17
+ s.rubygems_version = %q{1.4.2}
64
18
  s.summary = %q{Faster string escaping routines for your web apps}
65
- s.test_files = [
66
- "spec/html/escape_spec.rb",
67
- "spec/html/unescape_spec.rb",
68
- "spec/html_safety_spec.rb",
69
- "spec/javascript/escape_spec.rb",
70
- "spec/javascript/unescape_spec.rb",
71
- "spec/query/escape_spec.rb",
72
- "spec/query/unescape_spec.rb",
73
- "spec/spec_helper.rb",
74
- "spec/uri/escape_spec.rb",
75
- "spec/uri/unescape_spec.rb",
76
- "spec/url/escape_spec.rb",
77
- "spec/url/unescape_spec.rb"
78
- ]
79
-
80
- if s.respond_to? :specification_version then
81
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
82
- s.specification_version = 3
19
+ s.test_files = `git ls-files spec`.split("\n")
83
20
 
84
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
85
- else
86
- end
87
- else
88
- end
21
+ # tests
22
+ s.add_development_dependency 'rake-compiler', ">= 0.7.5"
23
+ s.add_development_dependency 'rspec', ">= 2.0.0"
24
+ # benchmarks
25
+ s.add_development_dependency 'rack'
26
+ s.add_development_dependency 'haml'
27
+ s.add_development_dependency 'fast_xs'
28
+ s.add_development_dependency 'actionpack'
29
+ s.add_development_dependency 'url_escape'
89
30
  end
90
31
 
@@ -1,19 +1,27 @@
1
+ #if RB_CVAR_SET_ARITY == 4
2
+ # define rb_cvar_set(a,b,c) rb_cvar_set(a,b,c,0)
3
+ #endif
1
4
  #include <ruby.h>
2
5
  #ifdef HAVE_RUBY_ENCODING_H
3
6
  #include <ruby/encoding.h>
4
- static rb_encoding *utf8Encoding;
5
7
  #endif
6
8
 
7
9
  static VALUE mEscapeUtils;
8
10
  static ID rb_html_secure;
11
+ static int html_secure = 1;
9
12
 
10
- #define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
11
- #define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
13
+ #define IS_HEX(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
12
14
  #define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
13
- #define URI_SAFE(c) (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c== 45 || c == 46 || c == 95 || c == 126
14
- // ALPHA / DIGIT / "-" / "." / "_" / "~"
15
15
 
16
- static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len, unsigned short int secure) {
16
+ #define ALPHANUM(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
17
+ #define URL_SAFE(c) (ALPHANUM(c) || c == '-' || c == '_' || c == '.')
18
+
19
+ /* from uri/common.rb */
20
+ #define UNRESERVED(c) (ALPHANUM(c) || c == '-' || c == '_' || c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')')
21
+ #define RESERVED(c) (c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c== '&' || c == '=' || c == '+' || c == '$' || c == ',' || c == '[' || c == ']')
22
+ #define URI_SAFE(c) (URL_SAFE(c) || UNRESERVED(c) || RESERVED(c))
23
+
24
+ static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len, int secure) {
17
25
  size_t total = 0;
18
26
  unsigned char curChar;
19
27
 
@@ -57,36 +65,39 @@ static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t
57
65
  while (len) {
58
66
  curChar = *in++;
59
67
  if (curChar == '&') {
60
- if ((in-start)+2 <= in_len && *in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
68
+ if (*in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
61
69
  *out++ = '<';
62
70
  total-=3;
63
71
  in+=3;
64
72
  len-=3;
65
- } else if ((in-start)+2 <= in_len && *in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
73
+ } else if (*in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
66
74
  *out++ = '>';
67
75
  total-=3;
68
76
  in+=3;
69
77
  len-=3;
70
- } else if ((in-start)+3 <= in_len && *in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
78
+ } else if (*in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
71
79
  *out++ = '&';
72
80
  total-=4;
73
81
  in+=4;
74
82
  len-=4;
75
- } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
83
+ } else if (*in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
76
84
  *out++ = '\'';
77
85
  total-=4;
78
86
  in+=4;
79
87
  len-=4;
80
- } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
88
+ } else if (*in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
81
89
  *out++ = '/';
82
90
  total-=4;
83
91
  in+=4;
84
92
  len-=4;
85
- } else if ((in-start)+4 <= in_len && *in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
93
+ } else if (*in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
86
94
  *out++ = '\"';
87
95
  total-=5;
88
96
  in+=5;
89
97
  len-=5;
98
+ } else {
99
+ /* incomplete tag, pass it through */
100
+ *out++ = curChar;
90
101
  }
91
102
  } else {
92
103
  *out++ = curChar;
@@ -172,7 +183,9 @@ static size_t unescape_javascript(unsigned char *out, const unsigned char *in, s
172
183
  *out++ = '/';
173
184
  total--;
174
185
  } else {
186
+ /* incomplete escape, pass it through */
175
187
  *out++ = curChar;
188
+ continue;
176
189
  }
177
190
  in++; in_len--;
178
191
  } else {
@@ -194,13 +207,13 @@ static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_
194
207
  curChar = *in++;
195
208
  if (curChar == ' ') {
196
209
  *out++ = '+';
197
- } else if ((curChar != '_' && curChar != '.' && curChar != '-') && NOT_HEX(curChar)) {
210
+ } else if (URL_SAFE(curChar)) {
211
+ *out++ = curChar;
212
+ } else {
198
213
  hex[1] = hexChars[curChar & 0x0f];
199
214
  hex[0] = hexChars[(curChar >> 4) & 0x0f];
200
215
  *out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
201
216
  total += 2;
202
- } else {
203
- *out++ = curChar;
204
217
  }
205
218
  in_len--;
206
219
  }
@@ -217,10 +230,13 @@ static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t i
217
230
  while (len) {
218
231
  curChar = *in++;
219
232
  if (curChar == '%') {
220
- if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
233
+ if (IS_HEX(*in) && IS_HEX(*(in+1))) {
221
234
  *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
222
235
  in+=2;
223
236
  total-=2;
237
+ } else {
238
+ /* incomplete escape, pass it through */
239
+ *out++ = curChar;
224
240
  }
225
241
  } else if (curChar == '+') {
226
242
  *out++ = ' ';
@@ -264,10 +280,13 @@ static size_t unescape_uri(unsigned char *out, const unsigned char *in, size_t i
264
280
  while (len) {
265
281
  curChar = *in++;
266
282
  if (curChar == '%') {
267
- if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
283
+ if (IS_HEX(*in) && IS_HEX(*(in+1))) {
268
284
  *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
269
285
  in+=2;
270
286
  total-=2;
287
+ } else {
288
+ /* incomplete escape, pass it through */
289
+ *out++ = curChar;
271
290
  }
272
291
  } else {
273
292
  *out++ = curChar;
@@ -279,11 +298,15 @@ static size_t unescape_uri(unsigned char *out, const unsigned char *in, size_t i
279
298
  }
280
299
 
281
300
  static VALUE rb_escape_html(int argc, VALUE * argv, VALUE self) {
282
- VALUE str, rb_secure = rb_funcall(mEscapeUtils, rb_html_secure, 0);
283
- unsigned short secure = 1;
284
- if (rb_secure == Qfalse) {
285
- secure = 0;
286
- }
301
+ VALUE str, rb_secure;
302
+ int secure = html_secure;
303
+ VALUE rb_output_buf;
304
+ #ifdef HAVE_RUBY_ENCODING_H
305
+ rb_encoding *default_internal_enc;
306
+ rb_encoding *original_encoding;
307
+ #endif
308
+ unsigned char *inBuf, *outBuf;
309
+ size_t len, new_len;
287
310
 
288
311
  if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
289
312
  if (rb_secure == Qfalse) {
@@ -293,181 +316,202 @@ static VALUE rb_escape_html(int argc, VALUE * argv, VALUE self) {
293
316
 
294
317
  Check_Type(str, T_STRING);
295
318
 
296
- VALUE rb_output_buf;
297
319
  #ifdef HAVE_RUBY_ENCODING_H
298
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
299
- rb_encoding *original_encoding = rb_enc_get(str);
320
+ default_internal_enc = rb_default_internal_encoding();
321
+ original_encoding = rb_enc_get(str);
300
322
  #endif
301
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
302
- size_t len = RSTRING_LEN(str), new_len = 0;
323
+ inBuf = (unsigned char*)RSTRING_PTR(str);
324
+ len = RSTRING_LEN(str);
303
325
 
304
326
  // this is the max size the string could be
305
327
  // TODO: we should try to be more intelligent about this
306
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*5));
328
+ new_len = sizeof(unsigned char)*(len*5);
329
+
330
+ // create our new ruby string
331
+ rb_output_buf = rb_str_new(NULL, new_len);
332
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
307
333
 
308
334
  // perform our escape, returning the new string's length
309
335
  new_len = escape_html(outBuf, inBuf, len, secure);
310
336
 
311
- // create our new ruby string
312
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
313
-
314
- // free the temporary C string
315
- free(outBuf);
337
+ // shrink our new ruby string
338
+ rb_str_resize(rb_output_buf, new_len);
316
339
 
317
340
  #ifdef HAVE_RUBY_ENCODING_H
318
341
  rb_enc_associate(rb_output_buf, original_encoding);
319
342
  if (default_internal_enc) {
320
343
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
321
- } else {
322
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
323
344
  }
324
345
  #endif
325
346
  return rb_output_buf;
326
347
  }
327
348
 
328
349
  static VALUE rb_unescape_html(VALUE self, VALUE str) {
329
- Check_Type(str, T_STRING);
330
-
331
350
  VALUE rb_output_buf;
332
351
  #ifdef HAVE_RUBY_ENCODING_H
333
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
334
- rb_encoding *original_encoding = rb_enc_get(str);
352
+ rb_encoding *default_internal_enc;
353
+ rb_encoding *original_encoding;
335
354
  #endif
336
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
337
- size_t len = RSTRING_LEN(str), new_len = 0;
355
+ unsigned char *inBuf, *outBuf;
356
+ size_t len, new_len;
357
+
358
+ Check_Type(str, T_STRING);
359
+ #ifdef HAVE_RUBY_ENCODING_H
360
+ default_internal_enc = rb_default_internal_encoding();
361
+ original_encoding = rb_enc_get(str);
362
+ #endif
363
+ inBuf = (unsigned char*)RSTRING_PTR(str);
364
+ len = RSTRING_LEN(str);
338
365
 
339
366
  // this is the max size the string could be
340
- // TODO: we should try to be more intelligent about this
341
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
367
+ // TODO: we could be more intelligent about this, but probably not
368
+ new_len = sizeof(unsigned char) * len;
369
+
370
+ // create our new ruby string
371
+ rb_output_buf = rb_str_new(NULL, new_len);
372
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
342
373
 
343
374
  // perform our escape, returning the new string's length
344
375
  new_len = unescape_html(outBuf, inBuf, len);
345
376
 
346
- // create our new ruby string
347
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
348
-
349
- // free the temporary C string
350
- free(outBuf);
377
+ // shrink our new ruby string
378
+ rb_str_resize(rb_output_buf, new_len);
351
379
 
352
380
  #ifdef HAVE_RUBY_ENCODING_H
353
381
  rb_enc_associate(rb_output_buf, original_encoding);
354
382
  if (default_internal_enc) {
355
383
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
356
- } else {
357
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
358
384
  }
359
385
  #endif
360
386
  return rb_output_buf;
361
387
  }
362
388
 
363
389
  static VALUE rb_escape_javascript(VALUE self, VALUE str) {
390
+ VALUE rb_output_buf;
391
+ #ifdef HAVE_RUBY_ENCODING_H
392
+ rb_encoding *default_internal_enc;
393
+ rb_encoding *original_encoding;
394
+ #endif
395
+ unsigned char *inBuf, *outBuf;
396
+ size_t len, new_len;
397
+
364
398
  if (str == Qnil) {
365
399
  return rb_str_new2("");
366
400
  }
367
401
 
368
402
  Check_Type(str, T_STRING);
369
403
 
370
- VALUE rb_output_buf;
371
404
  #ifdef HAVE_RUBY_ENCODING_H
372
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
373
- rb_encoding *original_encoding = rb_enc_get(str);
405
+ default_internal_enc = rb_default_internal_encoding();
406
+ original_encoding = rb_enc_get(str);
374
407
  #endif
375
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
376
- size_t len = RSTRING_LEN(str), new_len = 0;
408
+ inBuf = (unsigned char*)RSTRING_PTR(str);
409
+ len = RSTRING_LEN(str);
377
410
 
378
411
  // this is the max size the string could be
379
412
  // TODO: we should try to be more intelligent about this
380
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*2));
413
+ new_len = sizeof(unsigned char)*(len*2);
414
+
415
+ // create our new ruby string
416
+ rb_output_buf = rb_str_new(NULL, new_len);
417
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
381
418
 
382
419
  // perform our escape, returning the new string's length
383
420
  new_len = escape_javascript(outBuf, inBuf, len);
384
421
 
385
- // create our new ruby string
386
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
387
-
388
- // free the temporary C string
389
- free(outBuf);
422
+ // shrink our new ruby string
423
+ rb_str_resize(rb_output_buf, new_len);
390
424
 
391
425
  #ifdef HAVE_RUBY_ENCODING_H
392
426
  rb_enc_associate(rb_output_buf, original_encoding);
393
427
  if (default_internal_enc) {
394
428
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
395
- } else {
396
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
397
429
  }
398
430
  #endif
399
431
  return rb_output_buf;
400
432
  }
401
433
 
402
434
  static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
435
+ VALUE rb_output_buf;
436
+ #ifdef HAVE_RUBY_ENCODING_H
437
+ rb_encoding *default_internal_enc;
438
+ rb_encoding *original_encoding;
439
+ #endif
440
+ unsigned char *inBuf, *outBuf;
441
+ size_t len, new_len;
442
+
403
443
  if (str == Qnil) {
404
444
  return rb_str_new2("");
405
445
  }
406
446
 
407
447
  Check_Type(str, T_STRING);
408
448
 
409
- VALUE rb_output_buf;
410
449
  #ifdef HAVE_RUBY_ENCODING_H
411
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
412
- rb_encoding *original_encoding = rb_enc_get(str);
450
+ default_internal_enc = rb_default_internal_encoding();
451
+ original_encoding = rb_enc_get(str);
413
452
  #endif
414
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
415
- size_t len = RSTRING_LEN(str), new_len = 0;
453
+ inBuf = (unsigned char*)RSTRING_PTR(str);
454
+ len = RSTRING_LEN(str);
416
455
 
417
456
  // this is the max size the string could be
418
- // TODO: we should try to be more intelligent about this
419
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
457
+ // TODO: we could be more intelligent about this, but probably not
458
+ new_len = sizeof(unsigned char) * len;
459
+
460
+ // create our new ruby string
461
+ rb_output_buf = rb_str_new(NULL, new_len);
462
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
420
463
 
421
464
  // perform our escape, returning the new string's length
422
465
  new_len = unescape_javascript(outBuf, inBuf, len);
423
466
 
424
- // create our new ruby string
425
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
426
-
427
- // free the temporary C string
428
- free(outBuf);
467
+ // shrink our new ruby string
468
+ rb_str_resize(rb_output_buf, new_len);
429
469
 
430
470
  #ifdef HAVE_RUBY_ENCODING_H
431
471
  rb_enc_associate(rb_output_buf, original_encoding);
432
472
  if (default_internal_enc) {
433
473
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
434
- } else {
435
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
436
474
  }
437
475
  #endif
438
476
  return rb_output_buf;
439
477
  }
440
478
 
441
479
  static VALUE rb_escape_url(VALUE self, VALUE str) {
480
+ VALUE rb_output_buf;
481
+ #ifdef HAVE_RUBY_ENCODING_H
482
+ rb_encoding *default_internal_enc;
483
+ rb_encoding *original_encoding;
484
+ #endif
485
+ unsigned char *inBuf, *outBuf;
486
+ size_t len, new_len;
487
+
442
488
  Check_Type(str, T_STRING);
443
489
 
444
- VALUE rb_output_buf;
445
490
  #ifdef HAVE_RUBY_ENCODING_H
446
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
447
- rb_encoding *original_encoding = rb_enc_get(str);
491
+ default_internal_enc = rb_default_internal_encoding();
492
+ original_encoding = rb_enc_get(str);
448
493
  #endif
449
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
450
- size_t len = RSTRING_LEN(str), new_len = 0;
494
+ inBuf = (unsigned char*)RSTRING_PTR(str);
495
+ len = RSTRING_LEN(str);
451
496
 
452
497
  // this is the max size the string could be
453
498
  // TODO: we should try to be more intelligent about this
454
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
499
+ new_len = sizeof(unsigned char)*(len*3);
500
+
501
+ // create our new ruby string
502
+ rb_output_buf = rb_str_new(NULL, new_len);
503
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
455
504
 
456
505
  // perform our escape, returning the new string's length
457
506
  new_len = escape_url(outBuf, inBuf, len);
458
507
 
459
- // create our new ruby string
460
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
461
-
462
- // free the temporary C string
463
- free(outBuf);
508
+ // shrink our new ruby string
509
+ rb_str_resize(rb_output_buf, new_len);
464
510
 
465
511
  #ifdef HAVE_RUBY_ENCODING_H
466
512
  rb_enc_associate(rb_output_buf, original_encoding);
467
513
  if (default_internal_enc) {
468
514
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
469
- } else {
470
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
471
515
  }
472
516
  #endif
473
517
  return rb_output_buf;
@@ -482,27 +526,27 @@ static VALUE rb_unescape_url(VALUE self, VALUE str) {
482
526
  rb_encoding *original_encoding = rb_enc_get(str);
483
527
  #endif
484
528
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
485
- size_t len = RSTRING_LEN(str), new_len = 0;
529
+ size_t len = RSTRING_LEN(str);
486
530
 
487
531
  // this is the max size the string could be
488
- // TODO: we should try to be more intelligent about this
489
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
532
+ // TODO: we could be more intelligent about this, but probably not
533
+ size_t new_len = sizeof(unsigned char) * len;
534
+ unsigned char *outBuf;
535
+
536
+ // create our new ruby string
537
+ rb_output_buf = rb_str_new(NULL, new_len);
490
538
 
491
539
  // perform our escape, returning the new string's length
540
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
492
541
  new_len = unescape_url(outBuf, inBuf, len);
493
542
 
494
- // create our new ruby string
495
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
496
-
497
- // free the temporary C string
498
- free(outBuf);
543
+ // shrink our new ruby string
544
+ rb_str_resize(rb_output_buf, new_len);
499
545
 
500
546
  #ifdef HAVE_RUBY_ENCODING_H
501
547
  rb_enc_associate(rb_output_buf, original_encoding);
502
548
  if (default_internal_enc) {
503
549
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
504
- } else {
505
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
506
550
  }
507
551
  #endif
508
552
  return rb_output_buf;
@@ -517,27 +561,27 @@ static VALUE rb_escape_uri(VALUE self, VALUE str) {
517
561
  rb_encoding *original_encoding = rb_enc_get(str);
518
562
  #endif
519
563
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
520
- size_t len = RSTRING_LEN(str), new_len = 0;
564
+ size_t len = RSTRING_LEN(str);
565
+ unsigned char *outBuf;
521
566
 
522
567
  // this is the max size the string could be
523
568
  // TODO: we should try to be more intelligent about this
524
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
569
+ size_t new_len = sizeof(unsigned char)*(len*3);
570
+
571
+ // create our new ruby string
572
+ rb_output_buf = rb_str_new(NULL, new_len);
573
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
525
574
 
526
575
  // perform our escape, returning the new string's length
527
576
  new_len = escape_uri(outBuf, inBuf, len);
528
577
 
529
- // create our new ruby string
530
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
531
-
532
- // free the temporary C string
533
- free(outBuf);
578
+ // shrink our new ruby string
579
+ rb_str_resize(rb_output_buf, new_len);
534
580
 
535
581
  #ifdef HAVE_RUBY_ENCODING_H
536
582
  rb_enc_associate(rb_output_buf, original_encoding);
537
583
  if (default_internal_enc) {
538
584
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
539
- } else {
540
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
541
585
  }
542
586
  #endif
543
587
  return rb_output_buf;
@@ -552,34 +596,47 @@ static VALUE rb_unescape_uri(VALUE self, VALUE str) {
552
596
  rb_encoding *original_encoding = rb_enc_get(str);
553
597
  #endif
554
598
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
555
- size_t len = RSTRING_LEN(str), new_len = 0;
599
+ size_t len = RSTRING_LEN(str);
600
+ unsigned char *outBuf;
556
601
 
557
602
  // this is the max size the string could be
558
603
  // TODO: we should try to be more intelligent about this
559
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
604
+ size_t new_len = sizeof(unsigned char)*len;
605
+
606
+ // create our new ruby string
607
+ rb_output_buf = rb_str_new(NULL, new_len);
608
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
560
609
 
561
610
  // perform our escape, returning the new string's length
562
611
  new_len = unescape_uri(outBuf, inBuf, len);
563
612
 
564
- // create our new ruby string
565
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
566
-
567
- // free the temporary C string
568
- free(outBuf);
613
+ // shrink our new ruby string
614
+ rb_str_resize(rb_output_buf, new_len);
569
615
 
570
616
  #ifdef HAVE_RUBY_ENCODING_H
571
617
  rb_enc_associate(rb_output_buf, original_encoding);
572
618
  if (default_internal_enc) {
573
619
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
574
- } else {
575
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
576
620
  }
577
621
  #endif
578
622
  return rb_output_buf;
579
623
  }
580
624
 
625
+ static VALUE rb_s_get_html_secure(VALUE self)
626
+ {
627
+ return rb_cvar_get(self, rb_html_secure);
628
+ }
629
+
630
+ static VALUE rb_s_set_html_secure(VALUE self, VALUE val)
631
+ {
632
+ html_secure = RTEST(val);
633
+ rb_cvar_set(self, rb_html_secure, val);
634
+
635
+ return val;
636
+ }
637
+
581
638
  /* Ruby Extension initializer */
582
- void Init_escape_utils_ext() {
639
+ void Init_escape_utils() {
583
640
  mEscapeUtils = rb_define_module("EscapeUtils");
584
641
  rb_define_method(mEscapeUtils, "escape_html", rb_escape_html, -1);
585
642
  rb_define_module_function(mEscapeUtils, "escape_html", rb_escape_html, -1);
@@ -598,10 +655,9 @@ void Init_escape_utils_ext() {
598
655
  rb_define_method(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
599
656
  rb_define_module_function(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
600
657
 
601
- #ifdef HAVE_RUBY_ENCODING_H
602
- utf8Encoding = rb_utf8_encoding();
603
- #endif
658
+ rb_define_singleton_method(mEscapeUtils, "html_secure", rb_s_get_html_secure, 0);
659
+ rb_define_singleton_method(mEscapeUtils, "html_secure=", rb_s_set_html_secure, 1);
604
660
 
605
- rb_html_secure = rb_intern("html_secure");
661
+ rb_html_secure = rb_intern("@@html_secure");
606
662
  }
607
663