escape_utils 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.5 (September 6th, 2010)
4
+ * support for URI escaping added (thanks to @joshbuddy)
5
+ * bugfix to ensure we don't drop opening tags during escape_javascript (thanks to @nagybence)
6
+
3
7
  ## 0.1.5 (July 13th, 2010)
4
8
  * add URL escaping and unescaping
5
9
  * major refactor of HTML and Javascript escaping and unescaping logic for a decent speed up
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.5
1
+ 0.1.6
data/escape_utils.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{escape_utils}
8
- s.version = "0.1.5"
8
+ s.version = "0.1.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brian Lopez"]
12
- s.date = %q{2010-07-13}
12
+ s.date = %q{2010-09-06}
13
13
  s.email = %q{seniorlopez@gmail.com}
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.extra_rdoc_files = [
@@ -41,16 +41,19 @@ Gem::Specification.new do |s|
41
41
  "lib/escape_utils/url/cgi.rb",
42
42
  "lib/escape_utils/url/erb.rb",
43
43
  "lib/escape_utils/url/rack.rb",
44
+ "lib/escape_utils/url/uri.rb",
44
45
  "spec/html/escape_spec.rb",
45
46
  "spec/html/unescape_spec.rb",
46
47
  "spec/html_safety_spec.rb",
47
48
  "spec/javascript/escape_spec.rb",
48
49
  "spec/javascript/unescape_spec.rb",
50
+ "spec/query/escape_spec.rb",
51
+ "spec/query/unescape_spec.rb",
49
52
  "spec/rcov.opts",
50
53
  "spec/spec.opts",
51
54
  "spec/spec_helper.rb",
52
- "spec/url/escape_spec.rb",
53
- "spec/url/unescape_spec.rb"
55
+ "spec/uri/escape_spec.rb",
56
+ "spec/uri/unescape_spec.rb"
54
57
  ]
55
58
  s.homepage = %q{http://github.com/brianmario/escape_utils}
56
59
  s.rdoc_options = ["--charset=UTF-8"]
@@ -63,9 +66,11 @@ Gem::Specification.new do |s|
63
66
  "spec/html_safety_spec.rb",
64
67
  "spec/javascript/escape_spec.rb",
65
68
  "spec/javascript/unescape_spec.rb",
69
+ "spec/query/escape_spec.rb",
70
+ "spec/query/unescape_spec.rb",
66
71
  "spec/spec_helper.rb",
67
- "spec/url/escape_spec.rb",
68
- "spec/url/unescape_spec.rb"
72
+ "spec/uri/escape_spec.rb",
73
+ "spec/uri/unescape_spec.rb"
69
74
  ]
70
75
 
71
76
  if s.respond_to? :specification_version then
data/ext/escape_utils.c CHANGED
@@ -7,6 +7,8 @@ static rb_encoding *utf8Encoding;
7
7
  #define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
8
8
  #define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
9
9
  #define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
10
+ #define URI_SAFE(c) (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= 38 && c <= 47) || c == 33 || c == 36 || c == 58 || c == 59 || c == 61 || c == 63 || c == 64 || c == 91 || c == 93 || c == 95 || c == 126
11
+ // \.:\/?\[\]\-_~\!\$&'\(\)\*\+,;=@
10
12
 
11
13
  static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
12
14
  size_t total = 0;
@@ -112,8 +114,9 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
112
114
  total++;
113
115
  break;
114
116
  case '<':
117
+ *out++ = '<';
115
118
  if (*in == '/') {
116
- *out++ = '<'; *out++ = '\\'; *out++ = '/';
119
+ *out++ = '\\'; *out++ = '/';
117
120
  in++; in_len--;
118
121
  total++;
119
122
  }
@@ -234,6 +237,51 @@ static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t i
234
237
  return total;
235
238
  }
236
239
 
240
+ static size_t escape_uri(unsigned char *out, const unsigned char *in, size_t in_len) {
241
+ size_t total = 0;
242
+ unsigned char curChar, hex[2];
243
+ const unsigned char hexChars[16] = "0123456789ABCDEF";
244
+
245
+ total = in_len;
246
+ while (in_len) {
247
+ curChar = *in++;
248
+ if (URI_SAFE(curChar)) {
249
+ *out++ = curChar;
250
+ } else {
251
+ hex[1] = hexChars[curChar & 0x0f];
252
+ hex[0] = hexChars[(curChar >> 4) & 0x0f];
253
+ *out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
254
+ total += 2;
255
+ }
256
+ in_len--;
257
+ }
258
+
259
+ return total;
260
+ }
261
+
262
+ static size_t unescape_uri(unsigned char *out, const unsigned char *in, size_t in_len) {
263
+ size_t total = 0, len = in_len;
264
+ unsigned char curChar, *start;
265
+
266
+ start = (unsigned char *)&in[0];
267
+ total = in_len;
268
+ while (len) {
269
+ curChar = *in++;
270
+ if (curChar == '%') {
271
+ if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
272
+ *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
273
+ in+=2;
274
+ total-=2;
275
+ }
276
+ } else {
277
+ *out++ = curChar;
278
+ }
279
+ len--;
280
+ }
281
+
282
+ return total;
283
+ }
284
+
237
285
  static VALUE rb_escape_html(VALUE self, VALUE str) {
238
286
  Check_Type(str, T_STRING);
239
287
 
@@ -452,6 +500,76 @@ static VALUE rb_unescape_url(VALUE self, VALUE str) {
452
500
  return rb_output_buf;
453
501
  }
454
502
 
503
+ static VALUE rb_escape_uri(VALUE self, VALUE str) {
504
+ Check_Type(str, T_STRING);
505
+
506
+ VALUE rb_output_buf;
507
+ #ifdef HAVE_RUBY_ENCODING_H
508
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
509
+ rb_encoding *original_encoding = rb_enc_get(str);
510
+ #endif
511
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
512
+ size_t len = RSTRING_LEN(str), new_len = 0;
513
+
514
+ // this is the max size the string could be
515
+ // TODO: we should try to be more intelligent about this
516
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
517
+
518
+ // perform our escape, returning the new string's length
519
+ new_len = escape_uri(outBuf, inBuf, len);
520
+
521
+ // create our new ruby string
522
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
523
+
524
+ // free the temporary C string
525
+ free(outBuf);
526
+
527
+ #ifdef HAVE_RUBY_ENCODING_H
528
+ rb_enc_associate(rb_output_buf, original_encoding);
529
+ if (default_internal_enc) {
530
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
531
+ } else {
532
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
533
+ }
534
+ #endif
535
+ return rb_output_buf;
536
+ }
537
+
538
+ static VALUE rb_unescape_uri(VALUE self, VALUE str) {
539
+ Check_Type(str, T_STRING);
540
+
541
+ VALUE rb_output_buf;
542
+ #ifdef HAVE_RUBY_ENCODING_H
543
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
544
+ rb_encoding *original_encoding = rb_enc_get(str);
545
+ #endif
546
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
547
+ size_t len = RSTRING_LEN(str), new_len = 0;
548
+
549
+ // this is the max size the string could be
550
+ // TODO: we should try to be more intelligent about this
551
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
552
+
553
+ // perform our escape, returning the new string's length
554
+ new_len = unescape_uri(outBuf, inBuf, len);
555
+
556
+ // create our new ruby string
557
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
558
+
559
+ // free the temporary C string
560
+ free(outBuf);
561
+
562
+ #ifdef HAVE_RUBY_ENCODING_H
563
+ rb_enc_associate(rb_output_buf, original_encoding);
564
+ if (default_internal_enc) {
565
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
566
+ } else {
567
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
568
+ }
569
+ #endif
570
+ return rb_output_buf;
571
+ }
572
+
455
573
  /* Ruby Extension initializer */
456
574
  void Init_escape_utils_ext() {
457
575
  VALUE mEscape = rb_define_module("EscapeUtils");
@@ -467,8 +585,13 @@ void Init_escape_utils_ext() {
467
585
  rb_define_module_function(mEscape, "escape_url", rb_escape_url, 1);
468
586
  rb_define_method(mEscape, "unescape_url", rb_unescape_url, 1);
469
587
  rb_define_module_function(mEscape, "unescape_url", rb_unescape_url, 1);
588
+ rb_define_method(mEscape, "escape_uri", rb_escape_uri, 1);
589
+ rb_define_module_function(mEscape, "escape_uri", rb_escape_uri, 1);
590
+ rb_define_method(mEscape, "unescape_uri", rb_unescape_uri, 1);
591
+ rb_define_module_function(mEscape, "unescape_uri", rb_unescape_uri, 1);
470
592
 
471
593
  #ifdef HAVE_RUBY_ENCODING_H
472
594
  utf8Encoding = rb_utf8_encoding();
473
595
  #endif
474
- }
596
+ }
597
+
data/lib/escape_utils.rb CHANGED
@@ -4,7 +4,7 @@ require 'escape_utils_ext'
4
4
 
5
5
  EscapeUtils.send(:extend, EscapeUtils)
6
6
  module EscapeUtils
7
- VERSION = "0.1.5"
7
+ VERSION = "0.1.6"
8
8
 
9
9
  autoload :HtmlSafety, 'escape_utils/html_safety'
10
10
  end
@@ -7,7 +7,7 @@ class CGI
7
7
  alias escapeHTML _escape_html
8
8
 
9
9
  def unescapeHTML(s)
10
- EscapeUtils.unescape_html(s)
10
+ EscapeUtils.unescape_html(s.to_s)
11
11
  end
12
12
  end
13
13
  end
@@ -2,9 +2,9 @@
2
2
 
3
3
  class CGI
4
4
  def self.escape(s)
5
- EscapeUtils.escape_url(s)
5
+ EscapeUtils.escape_url(s.to_s)
6
6
  end
7
7
  def self.unescape(s)
8
- EscapeUtils.unescape_url(s)
8
+ EscapeUtils.unescape_url(s.to_s)
9
9
  end
10
10
  end
@@ -3,7 +3,7 @@
3
3
  class ERB
4
4
  module Util
5
5
  def url_encode(s)
6
- EscapeUtils.escape_url(s)
6
+ EscapeUtils.escape_url(s.to_s)
7
7
  end
8
8
  alias u url_encode
9
9
  module_function :u
@@ -3,10 +3,10 @@
3
3
  module Rack
4
4
  module Utils
5
5
  def escape(url)
6
- EscapeUtils.escape_url(url)
6
+ EscapeUtils.escape_url(url.to_s)
7
7
  end
8
8
  def unescape(url)
9
- EscapeUtils.unescape_url(url)
9
+ EscapeUtils.unescape_url(url.to_s)
10
10
  end
11
11
  module_function :escape
12
12
  module_function :unescape
@@ -0,0 +1,10 @@
1
+ # encoding: utf-8
2
+
3
+ module URI
4
+ def self.escape(s, unsafe=nil)
5
+ EscapeUtils.escape_uri(s.to_s)
6
+ end
7
+ def self.unescape(s)
8
+ EscapeUtils.unescape_uri(s.to_s)
9
+ end
10
+ end
@@ -20,7 +20,7 @@ describe EscapeUtils, "escape_javascript" do
20
20
  end
21
21
 
22
22
  it "should escape closed html tags" do
23
- EscapeUtils.escape_javascript(%(dont </close> tags)).should eql(%(dont <\\/close> tags))
23
+ EscapeUtils.escape_javascript(%(keep <open>, but dont </close> tags)).should eql(%(keep <open>, but dont <\\/close> tags))
24
24
  end
25
25
 
26
26
  if RUBY_VERSION =~ /^1.9/
@@ -36,4 +36,5 @@ describe EscapeUtils, "escape_javascript" do
36
36
  EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
37
37
  end
38
38
  end
39
- end
39
+ end
40
+
File without changes
File without changes
@@ -0,0 +1,52 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
3
+
4
+ describe EscapeUtils, "escape_uri" do
5
+ it "should respond to escape_uri" do
6
+ EscapeUtils.should respond_to(:escape_uri)
7
+ end
8
+
9
+ it "should escape a basic url" do
10
+ EscapeUtils.escape_uri("http://www.homerun.com/").should eql("http://www.homerun.com/")
11
+ end
12
+
13
+ # NOTE: from Rack's test suite
14
+ it "should escape a url containing tags" do
15
+ EscapeUtils.escape_uri("fo<o>bar").should eql("fo%3Co%3Ebar")
16
+ end
17
+
18
+ # NOTE: from Rack's test suite
19
+ it "should escape a url with spaces" do
20
+ EscapeUtils.escape_uri("a space").should eql("a%20space")
21
+ EscapeUtils.escape_uri("a sp ace ").should eql("a%20%20%20sp%20ace%20")
22
+ end
23
+
24
+ # NOTE: from Rack's test suite
25
+ it "should escape a string of mixed characters" do
26
+ EscapeUtils.escape_uri("q1!2\"'w$5&7/z8)?\\").should eql("q1!2%22'w$5&7/z8)?%5C")
27
+ end
28
+
29
+ # NOTE: from Rack's test suite
30
+ it "should escape correctly for multibyte characters" do
31
+ matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8".unpack("a*")[0] # Matsumoto
32
+ matz_name.force_encoding("UTF-8") if matz_name.respond_to? :force_encoding
33
+ EscapeUtils.escape_uri(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
34
+ matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8".unpack("a*")[0] # Matsu moto
35
+ matz_name_sep.force_encoding("UTF-8") if matz_name_sep.respond_to? :force_encoding
36
+ EscapeUtils.escape_uri(matz_name_sep).should eql('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
37
+ end
38
+
39
+ if RUBY_VERSION =~ /^1.9/
40
+ it "should default to utf-8 if Encoding.default_internal is nil" do
41
+ Encoding.default_internal = nil
42
+ EscapeUtils.escape_uri("http://www.homerun.com/").encoding.should eql(Encoding.find('utf-8'))
43
+ end
44
+
45
+ it "should use Encoding.default_internal" do
46
+ Encoding.default_internal = Encoding.find('utf-8')
47
+ EscapeUtils.escape_uri("http://www.homerun.com/").encoding.should eql(Encoding.default_internal)
48
+ Encoding.default_internal = Encoding.find('us-ascii')
49
+ EscapeUtils.escape_uri("http://www.homerun.com/").encoding.should eql(Encoding.default_internal)
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
3
+
4
+ describe EscapeUtils, "unescape_uri" do
5
+ it "should respond to unescape_uri" do
6
+ EscapeUtils.should respond_to(:unescape_uri)
7
+ end
8
+
9
+ it "should unescape a basic url" do
10
+ EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").should eql("http://www.homerun.com/")
11
+ EscapeUtils.unescape_uri("http://www.homerun.com/").should eql("http://www.homerun.com/")
12
+ end
13
+
14
+ # NOTE: from Rack's test suite
15
+ it "should unescape a url containing tags" do
16
+ EscapeUtils.unescape_uri("fo%3Co%3Ebar").should eql("fo<o>bar")
17
+ end
18
+
19
+ # NOTE: from Rack's test suite
20
+ it "should unescape a url with spaces" do
21
+ EscapeUtils.unescape_uri("a%20space").should eql("a space")
22
+ EscapeUtils.unescape_uri("a%20%20%20sp%20ace%20").should eql("a sp ace ")
23
+ EscapeUtils.unescape_uri("a+space").should eql("a+space")
24
+ end
25
+
26
+ # NOTE: from Rack's test suite
27
+ it "should unescape a string of mixed characters" do
28
+ EscapeUtils.unescape_uri("q1%212%22%27w%245%267%2Fz8%29%3F%5C").should eql("q1!2\"'w$5&7/z8)?\\")
29
+ EscapeUtils.unescape_uri("q1!2%22'w$5&7/z8)?%5C").should eql("q1!2\"'w$5&7/z8)?\\")
30
+ end
31
+
32
+ # NOTE: from Rack's test suite
33
+ it "should unescape correctly for multibyte characters" do
34
+ matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8".unpack("a*")[0] # Matsumoto
35
+ matz_name.force_encoding("UTF-8") if matz_name.respond_to? :force_encoding
36
+ EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8').should eql(matz_name)
37
+ matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8".unpack("a*")[0] # Matsu moto
38
+ matz_name_sep.force_encoding("UTF-8") if matz_name_sep.respond_to? :force_encoding
39
+ EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8').should eql(matz_name_sep)
40
+ end
41
+
42
+ if RUBY_VERSION =~ /^1.9/
43
+ it "should default to utf-8 if Encoding.default_internal is nil" do
44
+ Encoding.default_internal = nil
45
+ EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").encoding.should eql(Encoding.find('utf-8'))
46
+ end
47
+
48
+ it "should use Encoding.default_internal" do
49
+ Encoding.default_internal = Encoding.find('utf-8')
50
+ EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").encoding.should eql(Encoding.default_internal)
51
+ Encoding.default_internal = Encoding.find('us-ascii')
52
+ EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").encoding.should eql(Encoding.default_internal)
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: escape_utils
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 5
10
- version: 0.1.5
9
+ - 6
10
+ version: 0.1.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Lopez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-07-13 00:00:00 -07:00
18
+ date: 2010-09-06 00:00:00 -07:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
@@ -53,16 +53,19 @@ files:
53
53
  - lib/escape_utils/url/cgi.rb
54
54
  - lib/escape_utils/url/erb.rb
55
55
  - lib/escape_utils/url/rack.rb
56
+ - lib/escape_utils/url/uri.rb
56
57
  - spec/html/escape_spec.rb
57
58
  - spec/html/unescape_spec.rb
58
59
  - spec/html_safety_spec.rb
59
60
  - spec/javascript/escape_spec.rb
60
61
  - spec/javascript/unescape_spec.rb
62
+ - spec/query/escape_spec.rb
63
+ - spec/query/unescape_spec.rb
61
64
  - spec/rcov.opts
62
65
  - spec/spec.opts
63
66
  - spec/spec_helper.rb
64
- - spec/url/escape_spec.rb
65
- - spec/url/unescape_spec.rb
67
+ - spec/uri/escape_spec.rb
68
+ - spec/uri/unescape_spec.rb
66
69
  has_rdoc: true
67
70
  homepage: http://github.com/brianmario/escape_utils
68
71
  licenses: []
@@ -104,6 +107,8 @@ test_files:
104
107
  - spec/html_safety_spec.rb
105
108
  - spec/javascript/escape_spec.rb
106
109
  - spec/javascript/unescape_spec.rb
110
+ - spec/query/escape_spec.rb
111
+ - spec/query/unescape_spec.rb
107
112
  - spec/spec_helper.rb
108
- - spec/url/escape_spec.rb
109
- - spec/url/unescape_spec.rb
113
+ - spec/uri/escape_spec.rb
114
+ - spec/uri/unescape_spec.rb