escape_utils 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.5 (September 6th, 2010)
4
+ * support for URI escaping added (thanks to @joshbuddy)
5
+ * bugfix to ensure we don't drop opening tags during escape_javascript (thanks to @nagybence)
6
+
3
7
  ## 0.1.5 (July 13th, 2010)
4
8
  * add URL escaping and unescaping
5
9
  * major refactor of HTML and Javascript escaping and unescaping logic for a decent speed up
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.5
1
+ 0.1.6
data/escape_utils.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{escape_utils}
8
- s.version = "0.1.5"
8
+ s.version = "0.1.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brian Lopez"]
12
- s.date = %q{2010-07-13}
12
+ s.date = %q{2010-09-06}
13
13
  s.email = %q{seniorlopez@gmail.com}
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.extra_rdoc_files = [
@@ -41,16 +41,19 @@ Gem::Specification.new do |s|
41
41
  "lib/escape_utils/url/cgi.rb",
42
42
  "lib/escape_utils/url/erb.rb",
43
43
  "lib/escape_utils/url/rack.rb",
44
+ "lib/escape_utils/url/uri.rb",
44
45
  "spec/html/escape_spec.rb",
45
46
  "spec/html/unescape_spec.rb",
46
47
  "spec/html_safety_spec.rb",
47
48
  "spec/javascript/escape_spec.rb",
48
49
  "spec/javascript/unescape_spec.rb",
50
+ "spec/query/escape_spec.rb",
51
+ "spec/query/unescape_spec.rb",
49
52
  "spec/rcov.opts",
50
53
  "spec/spec.opts",
51
54
  "spec/spec_helper.rb",
52
- "spec/url/escape_spec.rb",
53
- "spec/url/unescape_spec.rb"
55
+ "spec/uri/escape_spec.rb",
56
+ "spec/uri/unescape_spec.rb"
54
57
  ]
55
58
  s.homepage = %q{http://github.com/brianmario/escape_utils}
56
59
  s.rdoc_options = ["--charset=UTF-8"]
@@ -63,9 +66,11 @@ Gem::Specification.new do |s|
63
66
  "spec/html_safety_spec.rb",
64
67
  "spec/javascript/escape_spec.rb",
65
68
  "spec/javascript/unescape_spec.rb",
69
+ "spec/query/escape_spec.rb",
70
+ "spec/query/unescape_spec.rb",
66
71
  "spec/spec_helper.rb",
67
- "spec/url/escape_spec.rb",
68
- "spec/url/unescape_spec.rb"
72
+ "spec/uri/escape_spec.rb",
73
+ "spec/uri/unescape_spec.rb"
69
74
  ]
70
75
 
71
76
  if s.respond_to? :specification_version then
data/ext/escape_utils.c CHANGED
@@ -7,6 +7,8 @@ static rb_encoding *utf8Encoding;
7
7
  #define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
8
8
  #define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
9
9
  #define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
10
+ #define URI_SAFE(c) (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= 38 && c <= 47) || c == 33 || c == 36 || c == 58 || c == 59 || c == 61 || c == 63 || c == 64 || c == 91 || c == 93 || c == 95 || c == 126
11
+ // \.:\/?\[\]\-_~\!\$&'\(\)\*\+,;=@
10
12
 
11
13
  static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
12
14
  size_t total = 0;
@@ -112,8 +114,9 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
112
114
  total++;
113
115
  break;
114
116
  case '<':
117
+ *out++ = '<';
115
118
  if (*in == '/') {
116
- *out++ = '<'; *out++ = '\\'; *out++ = '/';
119
+ *out++ = '\\'; *out++ = '/';
117
120
  in++; in_len--;
118
121
  total++;
119
122
  }
@@ -234,6 +237,51 @@ static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t i
234
237
  return total;
235
238
  }
236
239
 
240
+ static size_t escape_uri(unsigned char *out, const unsigned char *in, size_t in_len) {
241
+ size_t total = 0;
242
+ unsigned char curChar, hex[2];
243
+ const unsigned char hexChars[16] = "0123456789ABCDEF";
244
+
245
+ total = in_len;
246
+ while (in_len) {
247
+ curChar = *in++;
248
+ if (URI_SAFE(curChar)) {
249
+ *out++ = curChar;
250
+ } else {
251
+ hex[1] = hexChars[curChar & 0x0f];
252
+ hex[0] = hexChars[(curChar >> 4) & 0x0f];
253
+ *out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
254
+ total += 2;
255
+ }
256
+ in_len--;
257
+ }
258
+
259
+ return total;
260
+ }
261
+
262
+ static size_t unescape_uri(unsigned char *out, const unsigned char *in, size_t in_len) {
263
+ size_t total = 0, len = in_len;
264
+ unsigned char curChar, *start;
265
+
266
+ start = (unsigned char *)&in[0];
267
+ total = in_len;
268
+ while (len) {
269
+ curChar = *in++;
270
+ if (curChar == '%') {
271
+ if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
272
+ *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
273
+ in+=2;
274
+ total-=2;
275
+ }
276
+ } else {
277
+ *out++ = curChar;
278
+ }
279
+ len--;
280
+ }
281
+
282
+ return total;
283
+ }
284
+
237
285
  static VALUE rb_escape_html(VALUE self, VALUE str) {
238
286
  Check_Type(str, T_STRING);
239
287
 
@@ -452,6 +500,76 @@ static VALUE rb_unescape_url(VALUE self, VALUE str) {
452
500
  return rb_output_buf;
453
501
  }
454
502
 
503
+ static VALUE rb_escape_uri(VALUE self, VALUE str) {
504
+ Check_Type(str, T_STRING);
505
+
506
+ VALUE rb_output_buf;
507
+ #ifdef HAVE_RUBY_ENCODING_H
508
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
509
+ rb_encoding *original_encoding = rb_enc_get(str);
510
+ #endif
511
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
512
+ size_t len = RSTRING_LEN(str), new_len = 0;
513
+
514
+ // this is the max size the string could be
515
+ // TODO: we should try to be more intelligent about this
516
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
517
+
518
+ // perform our escape, returning the new string's length
519
+ new_len = escape_uri(outBuf, inBuf, len);
520
+
521
+ // create our new ruby string
522
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
523
+
524
+ // free the temporary C string
525
+ free(outBuf);
526
+
527
+ #ifdef HAVE_RUBY_ENCODING_H
528
+ rb_enc_associate(rb_output_buf, original_encoding);
529
+ if (default_internal_enc) {
530
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
531
+ } else {
532
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
533
+ }
534
+ #endif
535
+ return rb_output_buf;
536
+ }
537
+
538
+ static VALUE rb_unescape_uri(VALUE self, VALUE str) {
539
+ Check_Type(str, T_STRING);
540
+
541
+ VALUE rb_output_buf;
542
+ #ifdef HAVE_RUBY_ENCODING_H
543
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
544
+ rb_encoding *original_encoding = rb_enc_get(str);
545
+ #endif
546
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
547
+ size_t len = RSTRING_LEN(str), new_len = 0;
548
+
549
+ // this is the max size the string could be
550
+ // TODO: we should try to be more intelligent about this
551
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
552
+
553
+ // perform our escape, returning the new string's length
554
+ new_len = unescape_uri(outBuf, inBuf, len);
555
+
556
+ // create our new ruby string
557
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
558
+
559
+ // free the temporary C string
560
+ free(outBuf);
561
+
562
+ #ifdef HAVE_RUBY_ENCODING_H
563
+ rb_enc_associate(rb_output_buf, original_encoding);
564
+ if (default_internal_enc) {
565
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
566
+ } else {
567
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
568
+ }
569
+ #endif
570
+ return rb_output_buf;
571
+ }
572
+
455
573
  /* Ruby Extension initializer */
456
574
  void Init_escape_utils_ext() {
457
575
  VALUE mEscape = rb_define_module("EscapeUtils");
@@ -467,8 +585,13 @@ void Init_escape_utils_ext() {
467
585
  rb_define_module_function(mEscape, "escape_url", rb_escape_url, 1);
468
586
  rb_define_method(mEscape, "unescape_url", rb_unescape_url, 1);
469
587
  rb_define_module_function(mEscape, "unescape_url", rb_unescape_url, 1);
588
+ rb_define_method(mEscape, "escape_uri", rb_escape_uri, 1);
589
+ rb_define_module_function(mEscape, "escape_uri", rb_escape_uri, 1);
590
+ rb_define_method(mEscape, "unescape_uri", rb_unescape_uri, 1);
591
+ rb_define_module_function(mEscape, "unescape_uri", rb_unescape_uri, 1);
470
592
 
471
593
  #ifdef HAVE_RUBY_ENCODING_H
472
594
  utf8Encoding = rb_utf8_encoding();
473
595
  #endif
474
- }
596
+ }
597
+
data/lib/escape_utils.rb CHANGED
@@ -4,7 +4,7 @@ require 'escape_utils_ext'
4
4
 
5
5
  EscapeUtils.send(:extend, EscapeUtils)
6
6
  module EscapeUtils
7
- VERSION = "0.1.5"
7
+ VERSION = "0.1.6"
8
8
 
9
9
  autoload :HtmlSafety, 'escape_utils/html_safety'
10
10
  end
@@ -7,7 +7,7 @@ class CGI
7
7
  alias escapeHTML _escape_html
8
8
 
9
9
  def unescapeHTML(s)
10
- EscapeUtils.unescape_html(s)
10
+ EscapeUtils.unescape_html(s.to_s)
11
11
  end
12
12
  end
13
13
  end
@@ -2,9 +2,9 @@
2
2
 
3
3
  class CGI
4
4
  def self.escape(s)
5
- EscapeUtils.escape_url(s)
5
+ EscapeUtils.escape_url(s.to_s)
6
6
  end
7
7
  def self.unescape(s)
8
- EscapeUtils.unescape_url(s)
8
+ EscapeUtils.unescape_url(s.to_s)
9
9
  end
10
10
  end
@@ -3,7 +3,7 @@
3
3
  class ERB
4
4
  module Util
5
5
  def url_encode(s)
6
- EscapeUtils.escape_url(s)
6
+ EscapeUtils.escape_url(s.to_s)
7
7
  end
8
8
  alias u url_encode
9
9
  module_function :u
@@ -3,10 +3,10 @@
3
3
  module Rack
4
4
  module Utils
5
5
  def escape(url)
6
- EscapeUtils.escape_url(url)
6
+ EscapeUtils.escape_url(url.to_s)
7
7
  end
8
8
  def unescape(url)
9
- EscapeUtils.unescape_url(url)
9
+ EscapeUtils.unescape_url(url.to_s)
10
10
  end
11
11
  module_function :escape
12
12
  module_function :unescape
@@ -0,0 +1,10 @@
1
+ # encoding: utf-8
2
+
3
+ module URI
4
+ def self.escape(s, unsafe=nil)
5
+ EscapeUtils.escape_uri(s.to_s)
6
+ end
7
+ def self.unescape(s)
8
+ EscapeUtils.unescape_uri(s.to_s)
9
+ end
10
+ end
@@ -20,7 +20,7 @@ describe EscapeUtils, "escape_javascript" do
20
20
  end
21
21
 
22
22
  it "should escape closed html tags" do
23
- EscapeUtils.escape_javascript(%(dont </close> tags)).should eql(%(dont <\\/close> tags))
23
+ EscapeUtils.escape_javascript(%(keep <open>, but dont </close> tags)).should eql(%(keep <open>, but dont <\\/close> tags))
24
24
  end
25
25
 
26
26
  if RUBY_VERSION =~ /^1.9/
@@ -36,4 +36,5 @@ describe EscapeUtils, "escape_javascript" do
36
36
  EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
37
37
  end
38
38
  end
39
- end
39
+ end
40
+
File without changes
File without changes
@@ -0,0 +1,52 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
3
+
4
+ describe EscapeUtils, "escape_uri" do
5
+ it "should respond to escape_uri" do
6
+ EscapeUtils.should respond_to(:escape_uri)
7
+ end
8
+
9
+ it "should escape a basic url" do
10
+ EscapeUtils.escape_uri("http://www.homerun.com/").should eql("http://www.homerun.com/")
11
+ end
12
+
13
+ # NOTE: from Rack's test suite
14
+ it "should escape a url containing tags" do
15
+ EscapeUtils.escape_uri("fo<o>bar").should eql("fo%3Co%3Ebar")
16
+ end
17
+
18
+ # NOTE: from Rack's test suite
19
+ it "should escape a url with spaces" do
20
+ EscapeUtils.escape_uri("a space").should eql("a%20space")
21
+ EscapeUtils.escape_uri("a sp ace ").should eql("a%20%20%20sp%20ace%20")
22
+ end
23
+
24
+ # NOTE: from Rack's test suite
25
+ it "should escape a string of mixed characters" do
26
+ EscapeUtils.escape_uri("q1!2\"'w$5&7/z8)?\\").should eql("q1!2%22'w$5&7/z8)?%5C")
27
+ end
28
+
29
+ # NOTE: from Rack's test suite
30
+ it "should escape correctly for multibyte characters" do
31
+ matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8".unpack("a*")[0] # Matsumoto
32
+ matz_name.force_encoding("UTF-8") if matz_name.respond_to? :force_encoding
33
+ EscapeUtils.escape_uri(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
34
+ matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8".unpack("a*")[0] # Matsu moto
35
+ matz_name_sep.force_encoding("UTF-8") if matz_name_sep.respond_to? :force_encoding
36
+ EscapeUtils.escape_uri(matz_name_sep).should eql('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
37
+ end
38
+
39
+ if RUBY_VERSION =~ /^1.9/
40
+ it "should default to utf-8 if Encoding.default_internal is nil" do
41
+ Encoding.default_internal = nil
42
+ EscapeUtils.escape_uri("http://www.homerun.com/").encoding.should eql(Encoding.find('utf-8'))
43
+ end
44
+
45
+ it "should use Encoding.default_internal" do
46
+ Encoding.default_internal = Encoding.find('utf-8')
47
+ EscapeUtils.escape_uri("http://www.homerun.com/").encoding.should eql(Encoding.default_internal)
48
+ Encoding.default_internal = Encoding.find('us-ascii')
49
+ EscapeUtils.escape_uri("http://www.homerun.com/").encoding.should eql(Encoding.default_internal)
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
3
+
4
+ describe EscapeUtils, "unescape_uri" do
5
+ it "should respond to unescape_uri" do
6
+ EscapeUtils.should respond_to(:unescape_uri)
7
+ end
8
+
9
+ it "should unescape a basic url" do
10
+ EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").should eql("http://www.homerun.com/")
11
+ EscapeUtils.unescape_uri("http://www.homerun.com/").should eql("http://www.homerun.com/")
12
+ end
13
+
14
+ # NOTE: from Rack's test suite
15
+ it "should unescape a url containing tags" do
16
+ EscapeUtils.unescape_uri("fo%3Co%3Ebar").should eql("fo<o>bar")
17
+ end
18
+
19
+ # NOTE: from Rack's test suite
20
+ it "should unescape a url with spaces" do
21
+ EscapeUtils.unescape_uri("a%20space").should eql("a space")
22
+ EscapeUtils.unescape_uri("a%20%20%20sp%20ace%20").should eql("a sp ace ")
23
+ EscapeUtils.unescape_uri("a+space").should eql("a+space")
24
+ end
25
+
26
+ # NOTE: from Rack's test suite
27
+ it "should unescape a string of mixed characters" do
28
+ EscapeUtils.unescape_uri("q1%212%22%27w%245%267%2Fz8%29%3F%5C").should eql("q1!2\"'w$5&7/z8)?\\")
29
+ EscapeUtils.unescape_uri("q1!2%22'w$5&7/z8)?%5C").should eql("q1!2\"'w$5&7/z8)?\\")
30
+ end
31
+
32
+ # NOTE: from Rack's test suite
33
+ it "should unescape correctly for multibyte characters" do
34
+ matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8".unpack("a*")[0] # Matsumoto
35
+ matz_name.force_encoding("UTF-8") if matz_name.respond_to? :force_encoding
36
+ EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8').should eql(matz_name)
37
+ matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8".unpack("a*")[0] # Matsu moto
38
+ matz_name_sep.force_encoding("UTF-8") if matz_name_sep.respond_to? :force_encoding
39
+ EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8').should eql(matz_name_sep)
40
+ end
41
+
42
+ if RUBY_VERSION =~ /^1.9/
43
+ it "should default to utf-8 if Encoding.default_internal is nil" do
44
+ Encoding.default_internal = nil
45
+ EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").encoding.should eql(Encoding.find('utf-8'))
46
+ end
47
+
48
+ it "should use Encoding.default_internal" do
49
+ Encoding.default_internal = Encoding.find('utf-8')
50
+ EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").encoding.should eql(Encoding.default_internal)
51
+ Encoding.default_internal = Encoding.find('us-ascii')
52
+ EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").encoding.should eql(Encoding.default_internal)
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: escape_utils
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 5
10
- version: 0.1.5
9
+ - 6
10
+ version: 0.1.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Lopez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-07-13 00:00:00 -07:00
18
+ date: 2010-09-06 00:00:00 -07:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
@@ -53,16 +53,19 @@ files:
53
53
  - lib/escape_utils/url/cgi.rb
54
54
  - lib/escape_utils/url/erb.rb
55
55
  - lib/escape_utils/url/rack.rb
56
+ - lib/escape_utils/url/uri.rb
56
57
  - spec/html/escape_spec.rb
57
58
  - spec/html/unescape_spec.rb
58
59
  - spec/html_safety_spec.rb
59
60
  - spec/javascript/escape_spec.rb
60
61
  - spec/javascript/unescape_spec.rb
62
+ - spec/query/escape_spec.rb
63
+ - spec/query/unescape_spec.rb
61
64
  - spec/rcov.opts
62
65
  - spec/spec.opts
63
66
  - spec/spec_helper.rb
64
- - spec/url/escape_spec.rb
65
- - spec/url/unescape_spec.rb
67
+ - spec/uri/escape_spec.rb
68
+ - spec/uri/unescape_spec.rb
66
69
  has_rdoc: true
67
70
  homepage: http://github.com/brianmario/escape_utils
68
71
  licenses: []
@@ -104,6 +107,8 @@ test_files:
104
107
  - spec/html_safety_spec.rb
105
108
  - spec/javascript/escape_spec.rb
106
109
  - spec/javascript/unescape_spec.rb
110
+ - spec/query/escape_spec.rb
111
+ - spec/query/unescape_spec.rb
107
112
  - spec/spec_helper.rb
108
- - spec/url/escape_spec.rb
109
- - spec/url/unescape_spec.rb
113
+ - spec/uri/escape_spec.rb
114
+ - spec/uri/unescape_spec.rb