escape_utils 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -3,4 +3,6 @@ Makefile
3
3
  *.bundle
4
4
  pkg/*
5
5
  doc/*
6
- *.rbc
6
+ *.rbc
7
+ tmp/
8
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --colour
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.0 (February 8th, 2011)
4
+ * fixed a couple of compilation warnings on 1.9.3
5
+ * moved to rspec2
6
+ * remove hard-conversion to utf-8 to preserve the string's original encoding
7
+ * moved to rake-compiler, Bundler
8
+ * pass through incompletely escaped data on unescaping
9
+ * added tilde to escape_{uri,url}specs (It's a difference between CGI.escape and URI.escape)
10
+ * escape_uri and escape_url now match their Ruby counterparts
11
+ ** escape_uri is used where URI.escape is, and escape_url is used where CGI.escape is used.
12
+ * performance and memory usage optimizations
13
+
3
14
  ## 0.1.9 (October 15th, 2010)
4
15
  * add a flag as an optional 2nd parameter to EscapeUtils.escape_html to disable/enable the escaping of the '/' character. Defaults to the new flag EscapeUtils.html_secure
5
16
 
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/README.rdoc CHANGED
@@ -2,11 +2,9 @@
2
2
 
3
3
  Being as though we're all html escaping everything these days, why not make it faster?
4
4
 
5
- At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon.
6
-
7
5
  For character encoding in 1.9, we'll return strings in whatever Encoding.default_internal is set to or utf-8 otherwise.
8
6
 
9
- It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
7
+ It has monkey-patches for Rack::Utils, CGI, URI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
10
8
 
11
9
  It supports HTML, URL, URI and Javascript escaping/unescaping.
12
10
 
@@ -38,16 +36,20 @@ It supports HTML, URL, URI and Javascript escaping/unescaping.
38
36
 
39
37
  === URL
40
38
 
39
+ Use (un)escape_uri to get RFC-compliant escaping (like PHP rawurlencode).
40
+
41
+ Use (un)escape_url to get CGI escaping (where space is +).
42
+
41
43
  ==== Escaping
42
44
 
43
45
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
44
- escaped_url = EscapeUtils.url_escape(url)
46
+ escaped_url = EscapeUtils.escape_url(url)
45
47
 
46
48
  ==== Unescaping
47
49
 
48
50
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
49
- escaped_url = EscapeUtils.url_escape(url)
50
- EscapeUtils.url_unescape(escaped_url) == url # => true
51
+ escaped_url = EscapeUtils.escape_url(url)
52
+ EscapeUtils.unescape_url(escaped_url) == url # => true
51
53
 
52
54
  === Javascript
53
55
 
@@ -141,4 +143,4 @@ I didn't look that hard, but I'm not aware of another ruby library that does Jav
141
143
  fast_xs_extra#fast_uxs_cgi
142
144
  0.010000 0.000000 0.010000 ( 0.006062)
143
145
  EscapeUtils.unescape_url
144
- 0.000000 0.000000 0.000000 ( 0.005679)
146
+ 0.000000 0.000000 0.000000 ( 0.005679)
data/Rakefile CHANGED
@@ -1,35 +1,32 @@
1
- # encoding: UTF-8
1
+ # rspec
2
2
  begin
3
- require 'jeweler'
4
- Jeweler::Tasks.new do |gem|
5
- gem.name = "escape_utils"
6
- gem.summary = "Faster string escaping routines for your web apps"
7
- gem.email = "seniorlopez@gmail.com"
8
- gem.homepage = "http://github.com/brianmario/escape_utils"
9
- gem.authors = ["Brian Lopez"]
10
- gem.require_paths = ["lib", "ext"]
11
- gem.extra_rdoc_files = `git ls-files *.rdoc`.split("\n")
12
- gem.files = `git ls-files`.split("\n")
13
- gem.extensions = ["ext/extconf.rb"]
14
- gem.files.include %w(lib/jeweler/templates/.document lib/jeweler/templates/.gitignore)
15
- # gem.rubyforge_project = "mysql2"
3
+ require 'rspec'
4
+ require 'rspec/core/rake_task'
5
+
6
+ desc "Run all examples with RCov"
7
+ RSpec::Core::RakeTask.new('spec:rcov') do |t|
8
+ t.rcov = true
9
+ end
10
+ RSpec::Core::RakeTask.new('spec') do |t|
11
+ t.verbose = true
16
12
  end
13
+
14
+ task :default => :spec
17
15
  rescue LoadError
18
- puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install jeweler -s http://gems.github.com"
16
+ puts "rspec, or one of its dependencies, is not available. Install it with: sudo gem install rspec"
19
17
  end
20
18
 
21
- require 'rake'
22
- require 'spec/rake/spectask'
19
+ # rake-compiler
20
+ require 'rake' unless defined? Rake
23
21
 
24
- desc "Run all examples with RCov"
25
- Spec::Rake::SpecTask.new('spec:rcov') do |t|
26
- t.spec_files = FileList['spec/']
27
- t.rcov = true
28
- t.rcov_opts = lambda do
29
- IO.readlines("spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
30
- end
22
+ gem 'rake-compiler', '>= 0.7.5'
23
+ require "rake/extensiontask"
24
+
25
+ Rake::ExtensionTask.new('escape_utils') do |ext|
26
+ ext.cross_compile = true
27
+ ext.cross_platform = ['x86-mingw32', 'x86-mswin32-60']
28
+
29
+ ext.lib_dir = File.join 'lib', 'escape_utils'
31
30
  end
32
- Spec::Rake::SpecTask.new('spec') do |t|
33
- t.spec_files = FileList['spec/']
34
- t.spec_opts << '--options' << 'spec/spec.opts'
35
- end
31
+
32
+ Rake::Task[:spec].prerequisites << :compile
@@ -10,7 +10,6 @@ require 'erb'
10
10
  require 'cgi'
11
11
  require 'haml'
12
12
  require 'fast_xs_extra'
13
- require 'faster_html_escape'
14
13
  require 'escape_utils'
15
14
 
16
15
  module HamlBench
@@ -51,13 +50,6 @@ Benchmark.bmbm do |x|
51
50
  end
52
51
  end
53
52
 
54
- x.report do
55
- puts "FasterHTMLEscape.html_escape"
56
- times.times do
57
- FasterHTMLEscape.html_escape(html)
58
- end
59
- end
60
-
61
53
  x.report do
62
54
  puts "fast_xs_extra#fast_xs_html"
63
55
  times.times do
data/escape_utils.gemspec CHANGED
@@ -1,90 +1,31 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
- # -*- encoding: utf-8 -*-
1
+ require './lib/escape_utils/version' unless defined? EscapeUtils::VERSION
5
2
 
6
3
  Gem::Specification.new do |s|
7
4
  s.name = %q{escape_utils}
8
- s.version = "0.1.9"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
5
+ s.version = EscapeUtils::VERSION
11
6
  s.authors = ["Brian Lopez"]
12
- s.date = %q{2010-10-15}
7
+ s.date = Time.now.utc.strftime("%Y-%m-%d")
13
8
  s.email = %q{seniorlopez@gmail.com}
14
- s.extensions = ["ext/extconf.rb"]
9
+ s.extensions = ["ext/escape_utils/extconf.rb"]
15
10
  s.extra_rdoc_files = [
16
11
  "README.rdoc"
17
12
  ]
18
- s.files = [
19
- ".gitignore",
20
- "CHANGELOG.md",
21
- "MIT-LICENSE",
22
- "README.rdoc",
23
- "Rakefile",
24
- "VERSION",
25
- "benchmark/html_escape.rb",
26
- "benchmark/html_unescape.rb",
27
- "benchmark/javascript_escape.rb",
28
- "benchmark/javascript_unescape.rb",
29
- "benchmark/url_escape.rb",
30
- "benchmark/url_unescape.rb",
31
- "escape_utils.gemspec",
32
- "ext/escape_utils.c",
33
- "ext/extconf.rb",
34
- "lib/escape_utils.rb",
35
- "lib/escape_utils/html/cgi.rb",
36
- "lib/escape_utils/html/erb.rb",
37
- "lib/escape_utils/html/haml.rb",
38
- "lib/escape_utils/html/rack.rb",
39
- "lib/escape_utils/html_safety.rb",
40
- "lib/escape_utils/javascript/action_view.rb",
41
- "lib/escape_utils/url/cgi.rb",
42
- "lib/escape_utils/url/erb.rb",
43
- "lib/escape_utils/url/rack.rb",
44
- "lib/escape_utils/url/uri.rb",
45
- "spec/html/escape_spec.rb",
46
- "spec/html/unescape_spec.rb",
47
- "spec/html_safety_spec.rb",
48
- "spec/javascript/escape_spec.rb",
49
- "spec/javascript/unescape_spec.rb",
50
- "spec/query/escape_spec.rb",
51
- "spec/query/unescape_spec.rb",
52
- "spec/rcov.opts",
53
- "spec/spec.opts",
54
- "spec/spec_helper.rb",
55
- "spec/uri/escape_spec.rb",
56
- "spec/uri/unescape_spec.rb",
57
- "spec/url/escape_spec.rb",
58
- "spec/url/unescape_spec.rb"
59
- ]
13
+ s.files = `git ls-files`.split("\n")
60
14
  s.homepage = %q{http://github.com/brianmario/escape_utils}
61
15
  s.rdoc_options = ["--charset=UTF-8"]
62
16
  s.require_paths = ["lib", "ext"]
63
- s.rubygems_version = %q{1.3.7}
17
+ s.rubygems_version = %q{1.4.2}
64
18
  s.summary = %q{Faster string escaping routines for your web apps}
65
- s.test_files = [
66
- "spec/html/escape_spec.rb",
67
- "spec/html/unescape_spec.rb",
68
- "spec/html_safety_spec.rb",
69
- "spec/javascript/escape_spec.rb",
70
- "spec/javascript/unescape_spec.rb",
71
- "spec/query/escape_spec.rb",
72
- "spec/query/unescape_spec.rb",
73
- "spec/spec_helper.rb",
74
- "spec/uri/escape_spec.rb",
75
- "spec/uri/unescape_spec.rb",
76
- "spec/url/escape_spec.rb",
77
- "spec/url/unescape_spec.rb"
78
- ]
79
-
80
- if s.respond_to? :specification_version then
81
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
82
- s.specification_version = 3
19
+ s.test_files = `git ls-files spec`.split("\n")
83
20
 
84
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
85
- else
86
- end
87
- else
88
- end
21
+ # tests
22
+ s.add_development_dependency 'rake-compiler', ">= 0.7.5"
23
+ s.add_development_dependency 'rspec', ">= 2.0.0"
24
+ # benchmarks
25
+ s.add_development_dependency 'rack'
26
+ s.add_development_dependency 'haml'
27
+ s.add_development_dependency 'fast_xs'
28
+ s.add_development_dependency 'actionpack'
29
+ s.add_development_dependency 'url_escape'
89
30
  end
90
31
 
@@ -1,19 +1,27 @@
1
+ #if RB_CVAR_SET_ARITY == 4
2
+ # define rb_cvar_set(a,b,c) rb_cvar_set(a,b,c,0)
3
+ #endif
1
4
  #include <ruby.h>
2
5
  #ifdef HAVE_RUBY_ENCODING_H
3
6
  #include <ruby/encoding.h>
4
- static rb_encoding *utf8Encoding;
5
7
  #endif
6
8
 
7
9
  static VALUE mEscapeUtils;
8
10
  static ID rb_html_secure;
11
+ static int html_secure = 1;
9
12
 
10
- #define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
11
- #define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
13
+ #define IS_HEX(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
12
14
  #define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
13
- #define URI_SAFE(c) (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c== 45 || c == 46 || c == 95 || c == 126
14
- // ALPHA / DIGIT / "-" / "." / "_" / "~"
15
15
 
16
- static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len, unsigned short int secure) {
16
+ #define ALPHANUM(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
17
+ #define URL_SAFE(c) (ALPHANUM(c) || c == '-' || c == '_' || c == '.')
18
+
19
+ /* from uri/common.rb */
20
+ #define UNRESERVED(c) (ALPHANUM(c) || c == '-' || c == '_' || c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')')
21
+ #define RESERVED(c) (c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c== '&' || c == '=' || c == '+' || c == '$' || c == ',' || c == '[' || c == ']')
22
+ #define URI_SAFE(c) (URL_SAFE(c) || UNRESERVED(c) || RESERVED(c))
23
+
24
+ static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len, int secure) {
17
25
  size_t total = 0;
18
26
  unsigned char curChar;
19
27
 
@@ -57,36 +65,39 @@ static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t
57
65
  while (len) {
58
66
  curChar = *in++;
59
67
  if (curChar == '&') {
60
- if ((in-start)+2 <= in_len && *in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
68
+ if (*in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
61
69
  *out++ = '<';
62
70
  total-=3;
63
71
  in+=3;
64
72
  len-=3;
65
- } else if ((in-start)+2 <= in_len && *in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
73
+ } else if (*in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
66
74
  *out++ = '>';
67
75
  total-=3;
68
76
  in+=3;
69
77
  len-=3;
70
- } else if ((in-start)+3 <= in_len && *in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
78
+ } else if (*in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
71
79
  *out++ = '&';
72
80
  total-=4;
73
81
  in+=4;
74
82
  len-=4;
75
- } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
83
+ } else if (*in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
76
84
  *out++ = '\'';
77
85
  total-=4;
78
86
  in+=4;
79
87
  len-=4;
80
- } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
88
+ } else if (*in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
81
89
  *out++ = '/';
82
90
  total-=4;
83
91
  in+=4;
84
92
  len-=4;
85
- } else if ((in-start)+4 <= in_len && *in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
93
+ } else if (*in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
86
94
  *out++ = '\"';
87
95
  total-=5;
88
96
  in+=5;
89
97
  len-=5;
98
+ } else {
99
+ /* incomplete tag, pass it through */
100
+ *out++ = curChar;
90
101
  }
91
102
  } else {
92
103
  *out++ = curChar;
@@ -172,7 +183,9 @@ static size_t unescape_javascript(unsigned char *out, const unsigned char *in, s
172
183
  *out++ = '/';
173
184
  total--;
174
185
  } else {
186
+ /* incomplete escape, pass it through */
175
187
  *out++ = curChar;
188
+ continue;
176
189
  }
177
190
  in++; in_len--;
178
191
  } else {
@@ -194,13 +207,13 @@ static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_
194
207
  curChar = *in++;
195
208
  if (curChar == ' ') {
196
209
  *out++ = '+';
197
- } else if ((curChar != '_' && curChar != '.' && curChar != '-') && NOT_HEX(curChar)) {
210
+ } else if (URL_SAFE(curChar)) {
211
+ *out++ = curChar;
212
+ } else {
198
213
  hex[1] = hexChars[curChar & 0x0f];
199
214
  hex[0] = hexChars[(curChar >> 4) & 0x0f];
200
215
  *out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
201
216
  total += 2;
202
- } else {
203
- *out++ = curChar;
204
217
  }
205
218
  in_len--;
206
219
  }
@@ -217,10 +230,13 @@ static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t i
217
230
  while (len) {
218
231
  curChar = *in++;
219
232
  if (curChar == '%') {
220
- if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
233
+ if (IS_HEX(*in) && IS_HEX(*(in+1))) {
221
234
  *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
222
235
  in+=2;
223
236
  total-=2;
237
+ } else {
238
+ /* incomplete escape, pass it through */
239
+ *out++ = curChar;
224
240
  }
225
241
  } else if (curChar == '+') {
226
242
  *out++ = ' ';
@@ -264,10 +280,13 @@ static size_t unescape_uri(unsigned char *out, const unsigned char *in, size_t i
264
280
  while (len) {
265
281
  curChar = *in++;
266
282
  if (curChar == '%') {
267
- if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
283
+ if (IS_HEX(*in) && IS_HEX(*(in+1))) {
268
284
  *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
269
285
  in+=2;
270
286
  total-=2;
287
+ } else {
288
+ /* incomplete escape, pass it through */
289
+ *out++ = curChar;
271
290
  }
272
291
  } else {
273
292
  *out++ = curChar;
@@ -279,11 +298,15 @@ static size_t unescape_uri(unsigned char *out, const unsigned char *in, size_t i
279
298
  }
280
299
 
281
300
  static VALUE rb_escape_html(int argc, VALUE * argv, VALUE self) {
282
- VALUE str, rb_secure = rb_funcall(mEscapeUtils, rb_html_secure, 0);
283
- unsigned short secure = 1;
284
- if (rb_secure == Qfalse) {
285
- secure = 0;
286
- }
301
+ VALUE str, rb_secure;
302
+ int secure = html_secure;
303
+ VALUE rb_output_buf;
304
+ #ifdef HAVE_RUBY_ENCODING_H
305
+ rb_encoding *default_internal_enc;
306
+ rb_encoding *original_encoding;
307
+ #endif
308
+ unsigned char *inBuf, *outBuf;
309
+ size_t len, new_len;
287
310
 
288
311
  if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
289
312
  if (rb_secure == Qfalse) {
@@ -293,181 +316,202 @@ static VALUE rb_escape_html(int argc, VALUE * argv, VALUE self) {
293
316
 
294
317
  Check_Type(str, T_STRING);
295
318
 
296
- VALUE rb_output_buf;
297
319
  #ifdef HAVE_RUBY_ENCODING_H
298
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
299
- rb_encoding *original_encoding = rb_enc_get(str);
320
+ default_internal_enc = rb_default_internal_encoding();
321
+ original_encoding = rb_enc_get(str);
300
322
  #endif
301
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
302
- size_t len = RSTRING_LEN(str), new_len = 0;
323
+ inBuf = (unsigned char*)RSTRING_PTR(str);
324
+ len = RSTRING_LEN(str);
303
325
 
304
326
  // this is the max size the string could be
305
327
  // TODO: we should try to be more intelligent about this
306
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*5));
328
+ new_len = sizeof(unsigned char)*(len*5);
329
+
330
+ // create our new ruby string
331
+ rb_output_buf = rb_str_new(NULL, new_len);
332
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
307
333
 
308
334
  // perform our escape, returning the new string's length
309
335
  new_len = escape_html(outBuf, inBuf, len, secure);
310
336
 
311
- // create our new ruby string
312
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
313
-
314
- // free the temporary C string
315
- free(outBuf);
337
+ // shrink our new ruby string
338
+ rb_str_resize(rb_output_buf, new_len);
316
339
 
317
340
  #ifdef HAVE_RUBY_ENCODING_H
318
341
  rb_enc_associate(rb_output_buf, original_encoding);
319
342
  if (default_internal_enc) {
320
343
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
321
- } else {
322
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
323
344
  }
324
345
  #endif
325
346
  return rb_output_buf;
326
347
  }
327
348
 
328
349
  static VALUE rb_unescape_html(VALUE self, VALUE str) {
329
- Check_Type(str, T_STRING);
330
-
331
350
  VALUE rb_output_buf;
332
351
  #ifdef HAVE_RUBY_ENCODING_H
333
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
334
- rb_encoding *original_encoding = rb_enc_get(str);
352
+ rb_encoding *default_internal_enc;
353
+ rb_encoding *original_encoding;
335
354
  #endif
336
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
337
- size_t len = RSTRING_LEN(str), new_len = 0;
355
+ unsigned char *inBuf, *outBuf;
356
+ size_t len, new_len;
357
+
358
+ Check_Type(str, T_STRING);
359
+ #ifdef HAVE_RUBY_ENCODING_H
360
+ default_internal_enc = rb_default_internal_encoding();
361
+ original_encoding = rb_enc_get(str);
362
+ #endif
363
+ inBuf = (unsigned char*)RSTRING_PTR(str);
364
+ len = RSTRING_LEN(str);
338
365
 
339
366
  // this is the max size the string could be
340
- // TODO: we should try to be more intelligent about this
341
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
367
+ // TODO: we could be more intelligent about this, but probably not
368
+ new_len = sizeof(unsigned char) * len;
369
+
370
+ // create our new ruby string
371
+ rb_output_buf = rb_str_new(NULL, new_len);
372
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
342
373
 
343
374
  // perform our escape, returning the new string's length
344
375
  new_len = unescape_html(outBuf, inBuf, len);
345
376
 
346
- // create our new ruby string
347
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
348
-
349
- // free the temporary C string
350
- free(outBuf);
377
+ // shrink our new ruby string
378
+ rb_str_resize(rb_output_buf, new_len);
351
379
 
352
380
  #ifdef HAVE_RUBY_ENCODING_H
353
381
  rb_enc_associate(rb_output_buf, original_encoding);
354
382
  if (default_internal_enc) {
355
383
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
356
- } else {
357
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
358
384
  }
359
385
  #endif
360
386
  return rb_output_buf;
361
387
  }
362
388
 
363
389
  static VALUE rb_escape_javascript(VALUE self, VALUE str) {
390
+ VALUE rb_output_buf;
391
+ #ifdef HAVE_RUBY_ENCODING_H
392
+ rb_encoding *default_internal_enc;
393
+ rb_encoding *original_encoding;
394
+ #endif
395
+ unsigned char *inBuf, *outBuf;
396
+ size_t len, new_len;
397
+
364
398
  if (str == Qnil) {
365
399
  return rb_str_new2("");
366
400
  }
367
401
 
368
402
  Check_Type(str, T_STRING);
369
403
 
370
- VALUE rb_output_buf;
371
404
  #ifdef HAVE_RUBY_ENCODING_H
372
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
373
- rb_encoding *original_encoding = rb_enc_get(str);
405
+ default_internal_enc = rb_default_internal_encoding();
406
+ original_encoding = rb_enc_get(str);
374
407
  #endif
375
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
376
- size_t len = RSTRING_LEN(str), new_len = 0;
408
+ inBuf = (unsigned char*)RSTRING_PTR(str);
409
+ len = RSTRING_LEN(str);
377
410
 
378
411
  // this is the max size the string could be
379
412
  // TODO: we should try to be more intelligent about this
380
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*2));
413
+ new_len = sizeof(unsigned char)*(len*2);
414
+
415
+ // create our new ruby string
416
+ rb_output_buf = rb_str_new(NULL, new_len);
417
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
381
418
 
382
419
  // perform our escape, returning the new string's length
383
420
  new_len = escape_javascript(outBuf, inBuf, len);
384
421
 
385
- // create our new ruby string
386
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
387
-
388
- // free the temporary C string
389
- free(outBuf);
422
+ // shrink our new ruby string
423
+ rb_str_resize(rb_output_buf, new_len);
390
424
 
391
425
  #ifdef HAVE_RUBY_ENCODING_H
392
426
  rb_enc_associate(rb_output_buf, original_encoding);
393
427
  if (default_internal_enc) {
394
428
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
395
- } else {
396
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
397
429
  }
398
430
  #endif
399
431
  return rb_output_buf;
400
432
  }
401
433
 
402
434
  static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
435
+ VALUE rb_output_buf;
436
+ #ifdef HAVE_RUBY_ENCODING_H
437
+ rb_encoding *default_internal_enc;
438
+ rb_encoding *original_encoding;
439
+ #endif
440
+ unsigned char *inBuf, *outBuf;
441
+ size_t len, new_len;
442
+
403
443
  if (str == Qnil) {
404
444
  return rb_str_new2("");
405
445
  }
406
446
 
407
447
  Check_Type(str, T_STRING);
408
448
 
409
- VALUE rb_output_buf;
410
449
  #ifdef HAVE_RUBY_ENCODING_H
411
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
412
- rb_encoding *original_encoding = rb_enc_get(str);
450
+ default_internal_enc = rb_default_internal_encoding();
451
+ original_encoding = rb_enc_get(str);
413
452
  #endif
414
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
415
- size_t len = RSTRING_LEN(str), new_len = 0;
453
+ inBuf = (unsigned char*)RSTRING_PTR(str);
454
+ len = RSTRING_LEN(str);
416
455
 
417
456
  // this is the max size the string could be
418
- // TODO: we should try to be more intelligent about this
419
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
457
+ // TODO: we could be more intelligent about this, but probably not
458
+ new_len = sizeof(unsigned char) * len;
459
+
460
+ // create our new ruby string
461
+ rb_output_buf = rb_str_new(NULL, new_len);
462
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
420
463
 
421
464
  // perform our escape, returning the new string's length
422
465
  new_len = unescape_javascript(outBuf, inBuf, len);
423
466
 
424
- // create our new ruby string
425
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
426
-
427
- // free the temporary C string
428
- free(outBuf);
467
+ // shrink our new ruby string
468
+ rb_str_resize(rb_output_buf, new_len);
429
469
 
430
470
  #ifdef HAVE_RUBY_ENCODING_H
431
471
  rb_enc_associate(rb_output_buf, original_encoding);
432
472
  if (default_internal_enc) {
433
473
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
434
- } else {
435
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
436
474
  }
437
475
  #endif
438
476
  return rb_output_buf;
439
477
  }
440
478
 
441
479
  static VALUE rb_escape_url(VALUE self, VALUE str) {
480
+ VALUE rb_output_buf;
481
+ #ifdef HAVE_RUBY_ENCODING_H
482
+ rb_encoding *default_internal_enc;
483
+ rb_encoding *original_encoding;
484
+ #endif
485
+ unsigned char *inBuf, *outBuf;
486
+ size_t len, new_len;
487
+
442
488
  Check_Type(str, T_STRING);
443
489
 
444
- VALUE rb_output_buf;
445
490
  #ifdef HAVE_RUBY_ENCODING_H
446
- rb_encoding *default_internal_enc = rb_default_internal_encoding();
447
- rb_encoding *original_encoding = rb_enc_get(str);
491
+ default_internal_enc = rb_default_internal_encoding();
492
+ original_encoding = rb_enc_get(str);
448
493
  #endif
449
- unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
450
- size_t len = RSTRING_LEN(str), new_len = 0;
494
+ inBuf = (unsigned char*)RSTRING_PTR(str);
495
+ len = RSTRING_LEN(str);
451
496
 
452
497
  // this is the max size the string could be
453
498
  // TODO: we should try to be more intelligent about this
454
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
499
+ new_len = sizeof(unsigned char)*(len*3);
500
+
501
+ // create our new ruby string
502
+ rb_output_buf = rb_str_new(NULL, new_len);
503
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
455
504
 
456
505
  // perform our escape, returning the new string's length
457
506
  new_len = escape_url(outBuf, inBuf, len);
458
507
 
459
- // create our new ruby string
460
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
461
-
462
- // free the temporary C string
463
- free(outBuf);
508
+ // shrink our new ruby string
509
+ rb_str_resize(rb_output_buf, new_len);
464
510
 
465
511
  #ifdef HAVE_RUBY_ENCODING_H
466
512
  rb_enc_associate(rb_output_buf, original_encoding);
467
513
  if (default_internal_enc) {
468
514
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
469
- } else {
470
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
471
515
  }
472
516
  #endif
473
517
  return rb_output_buf;
@@ -482,27 +526,27 @@ static VALUE rb_unescape_url(VALUE self, VALUE str) {
482
526
  rb_encoding *original_encoding = rb_enc_get(str);
483
527
  #endif
484
528
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
485
- size_t len = RSTRING_LEN(str), new_len = 0;
529
+ size_t len = RSTRING_LEN(str);
486
530
 
487
531
  // this is the max size the string could be
488
- // TODO: we should try to be more intelligent about this
489
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
532
+ // TODO: we could be more intelligent about this, but probably not
533
+ size_t new_len = sizeof(unsigned char) * len;
534
+ unsigned char *outBuf;
535
+
536
+ // create our new ruby string
537
+ rb_output_buf = rb_str_new(NULL, new_len);
490
538
 
491
539
  // perform our escape, returning the new string's length
540
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
492
541
  new_len = unescape_url(outBuf, inBuf, len);
493
542
 
494
- // create our new ruby string
495
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
496
-
497
- // free the temporary C string
498
- free(outBuf);
543
+ // shrink our new ruby string
544
+ rb_str_resize(rb_output_buf, new_len);
499
545
 
500
546
  #ifdef HAVE_RUBY_ENCODING_H
501
547
  rb_enc_associate(rb_output_buf, original_encoding);
502
548
  if (default_internal_enc) {
503
549
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
504
- } else {
505
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
506
550
  }
507
551
  #endif
508
552
  return rb_output_buf;
@@ -517,27 +561,27 @@ static VALUE rb_escape_uri(VALUE self, VALUE str) {
517
561
  rb_encoding *original_encoding = rb_enc_get(str);
518
562
  #endif
519
563
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
520
- size_t len = RSTRING_LEN(str), new_len = 0;
564
+ size_t len = RSTRING_LEN(str);
565
+ unsigned char *outBuf;
521
566
 
522
567
  // this is the max size the string could be
523
568
  // TODO: we should try to be more intelligent about this
524
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
569
+ size_t new_len = sizeof(unsigned char)*(len*3);
570
+
571
+ // create our new ruby string
572
+ rb_output_buf = rb_str_new(NULL, new_len);
573
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
525
574
 
526
575
  // perform our escape, returning the new string's length
527
576
  new_len = escape_uri(outBuf, inBuf, len);
528
577
 
529
- // create our new ruby string
530
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
531
-
532
- // free the temporary C string
533
- free(outBuf);
578
+ // shrink our new ruby string
579
+ rb_str_resize(rb_output_buf, new_len);
534
580
 
535
581
  #ifdef HAVE_RUBY_ENCODING_H
536
582
  rb_enc_associate(rb_output_buf, original_encoding);
537
583
  if (default_internal_enc) {
538
584
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
539
- } else {
540
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
541
585
  }
542
586
  #endif
543
587
  return rb_output_buf;
@@ -552,34 +596,47 @@ static VALUE rb_unescape_uri(VALUE self, VALUE str) {
552
596
  rb_encoding *original_encoding = rb_enc_get(str);
553
597
  #endif
554
598
  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
555
- size_t len = RSTRING_LEN(str), new_len = 0;
599
+ size_t len = RSTRING_LEN(str);
600
+ unsigned char *outBuf;
556
601
 
557
602
  // this is the max size the string could be
558
603
  // TODO: we should try to be more intelligent about this
559
- unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
604
+ size_t new_len = sizeof(unsigned char)*len;
605
+
606
+ // create our new ruby string
607
+ rb_output_buf = rb_str_new(NULL, new_len);
608
+ outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
560
609
 
561
610
  // perform our escape, returning the new string's length
562
611
  new_len = unescape_uri(outBuf, inBuf, len);
563
612
 
564
- // create our new ruby string
565
- rb_output_buf = rb_str_new((char *)outBuf, new_len);
566
-
567
- // free the temporary C string
568
- free(outBuf);
613
+ // shrink our new ruby string
614
+ rb_str_resize(rb_output_buf, new_len);
569
615
 
570
616
  #ifdef HAVE_RUBY_ENCODING_H
571
617
  rb_enc_associate(rb_output_buf, original_encoding);
572
618
  if (default_internal_enc) {
573
619
  rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
574
- } else {
575
- rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
576
620
  }
577
621
  #endif
578
622
  return rb_output_buf;
579
623
  }
580
624
 
625
+ static VALUE rb_s_get_html_secure(VALUE self)
626
+ {
627
+ return rb_cvar_get(self, rb_html_secure);
628
+ }
629
+
630
+ static VALUE rb_s_set_html_secure(VALUE self, VALUE val)
631
+ {
632
+ html_secure = RTEST(val);
633
+ rb_cvar_set(self, rb_html_secure, val);
634
+
635
+ return val;
636
+ }
637
+
581
638
  /* Ruby Extension initializer */
582
- void Init_escape_utils_ext() {
639
+ void Init_escape_utils() {
583
640
  mEscapeUtils = rb_define_module("EscapeUtils");
584
641
  rb_define_method(mEscapeUtils, "escape_html", rb_escape_html, -1);
585
642
  rb_define_module_function(mEscapeUtils, "escape_html", rb_escape_html, -1);
@@ -598,10 +655,9 @@ void Init_escape_utils_ext() {
598
655
  rb_define_method(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
599
656
  rb_define_module_function(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
600
657
 
601
- #ifdef HAVE_RUBY_ENCODING_H
602
- utf8Encoding = rb_utf8_encoding();
603
- #endif
658
+ rb_define_singleton_method(mEscapeUtils, "html_secure", rb_s_get_html_secure, 0);
659
+ rb_define_singleton_method(mEscapeUtils, "html_secure=", rb_s_set_html_secure, 1);
604
660
 
605
- rb_html_secure = rb_intern("html_secure");
661
+ rb_html_secure = rb_intern("@@html_secure");
606
662
  }
607
663