escape_utils 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.gitignore +2 -1
  2. data/.travis.yml +13 -0
  3. data/CHANGELOG.md +7 -0
  4. data/MIT-LICENSE +1 -1
  5. data/Rakefile +5 -18
  6. data/benchmark/html_escape.rb +9 -2
  7. data/benchmark/xml_escape.rb +29 -0
  8. data/escape_utils.gemspec +2 -3
  9. data/ext/escape_utils/buffer.c +181 -160
  10. data/ext/escape_utils/buffer.h +90 -68
  11. data/ext/escape_utils/escape_utils.c +77 -39
  12. data/ext/escape_utils/extconf.rb +1 -1
  13. data/ext/escape_utils/houdini.h +37 -8
  14. data/ext/escape_utils/houdini_href_e.c +115 -0
  15. data/ext/escape_utils/houdini_html_e.c +90 -0
  16. data/ext/escape_utils/houdini_html_u.c +122 -0
  17. data/ext/escape_utils/{houdini_js.c → houdini_js_e.c} +17 -75
  18. data/ext/escape_utils/houdini_js_u.c +60 -0
  19. data/ext/escape_utils/{uri_escape.h → houdini_uri_e.c} +68 -2
  20. data/ext/escape_utils/houdini_uri_u.c +65 -0
  21. data/ext/escape_utils/houdini_xml_e.c +136 -0
  22. data/lib/escape_utils/version.rb +1 -1
  23. data/lib/escape_utils/xml/builder.rb +8 -0
  24. data/test/helper.rb +14 -0
  25. data/test/html/escape_test.rb +61 -0
  26. data/test/html/unescape_test.rb +48 -0
  27. data/test/html_safety_test.rb +46 -0
  28. data/test/javascript/escape_test.rb +42 -0
  29. data/test/javascript/unescape_test.rb +46 -0
  30. data/test/query/escape_test.rb +50 -0
  31. data/test/query/unescape_test.rb +52 -0
  32. data/test/uri/escape_test.rb +50 -0
  33. data/test/uri/unescape_test.rb +55 -0
  34. data/test/url/escape_test.rb +58 -0
  35. data/test/url/unescape_test.rb +60 -0
  36. data/test/xml/escape_test.rb +67 -0
  37. metadata +136 -152
  38. data/.rspec +0 -2
  39. data/ext/escape_utils/houdini_html.c +0 -214
  40. data/ext/escape_utils/houdini_uri.c +0 -130
  41. data/spec/html/escape_spec.rb +0 -42
  42. data/spec/html/unescape_spec.rb +0 -37
  43. data/spec/html_safety_spec.rb +0 -48
  44. data/spec/javascript/escape_spec.rb +0 -34
  45. data/spec/javascript/unescape_spec.rb +0 -37
  46. data/spec/query/escape_spec.rb +0 -44
  47. data/spec/query/unescape_spec.rb +0 -46
  48. data/spec/rcov.opts +0 -3
  49. data/spec/spec_helper.rb +0 -5
  50. data/spec/uri/escape_spec.rb +0 -43
  51. data/spec/uri/unescape_spec.rb +0 -57
  52. data/spec/url/escape_spec.rb +0 -52
  53. data/spec/url/unescape_spec.rb +0 -57
@@ -1,130 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
- #include "uri_escape.h"
7
-
8
- #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
9
- #define UNESCAPE_GROW_FACTOR(x) (x)
10
-
11
- extern int _isxdigit(int c);
12
-
13
- static void
14
- escape(struct buf *ob, const uint8_t *src, size_t size, int is_url)
15
- {
16
- static const char hex_chars[] = "0123456789ABCDEF";
17
- const char *safe_table = is_url ? URL_SAFE : URI_SAFE;
18
-
19
- size_t i = 0, org;
20
- char hex_str[3];
21
-
22
- bufgrow(ob, ESCAPE_GROW_FACTOR(size));
23
- hex_str[0] = '%';
24
-
25
- while (i < size) {
26
- org = i;
27
- while (i < size && safe_table[src[i]] != 0)
28
- i++;
29
-
30
- if (i > org)
31
- bufput(ob, src + org, i - org);
32
-
33
- /* escaping */
34
- if (i >= size)
35
- break;
36
-
37
- if (src[i] == ' ' && is_url) {
38
- bufputc(ob, '+');
39
- } else {
40
- hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
41
- hex_str[2] = hex_chars[src[i] & 0xF];
42
- bufput(ob, hex_str, 3);
43
- }
44
-
45
- i++;
46
- }
47
- }
48
-
49
- #define hex2c(c) ((c | 32) % 39 - 9)
50
-
51
- static void
52
- unescape(struct buf *ob, const uint8_t *src, size_t size, int is_url)
53
- {
54
- size_t i = 0, org;
55
-
56
- bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
57
-
58
- while (i < size) {
59
- org = i;
60
- while (i < size && src[i] != '%')
61
- i++;
62
-
63
- if (i > org)
64
- bufput(ob, src + org, i - org);
65
-
66
- /* escaping */
67
- if (i >= size)
68
- break;
69
-
70
- i++;
71
-
72
- if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
73
- unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
74
- bufputc(ob, new_char);
75
- i += 2;
76
- } else {
77
- bufputc(ob, '%');
78
- }
79
- }
80
-
81
- if (is_url) {
82
- char *find = (char *)bufcstr(ob);
83
- while ((find = strchr(find, '+')) != NULL)
84
- *find = ' ';
85
- }
86
- }
87
-
88
-
89
- void
90
- houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size)
91
- {
92
- return escape(ob, src, size, 0);
93
- }
94
-
95
- void
96
- houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size)
97
- {
98
- return escape(ob, src, size, 1);
99
- }
100
-
101
- void
102
- houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size)
103
- {
104
- return unescape(ob, src, size, 0);
105
- }
106
-
107
- void
108
- houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size)
109
- {
110
- return unescape(ob, src, size, 1);
111
- }
112
-
113
-
114
-
115
- //#define TEST
116
- #ifdef TEST
117
-
118
- int main()
119
- {
120
- const char TEST_STRING[] = "http% this \200 is a test";
121
- struct buf *buffer;
122
-
123
- buffer = bufnew(128);
124
- houdini_escape_uri(buffer, TEST_STRING, strlen(TEST_STRING));
125
- printf("Result: %.*s\n", (int)buffer->size, buffer->data);
126
- bufrelease(buffer);
127
- return 0;
128
- }
129
- #endif
130
-
@@ -1,42 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "escape_html" do
4
- it "should respond to escape_html" do
5
- EscapeUtils.should respond_to(:escape_html)
6
- end
7
-
8
- it "should escape a basic html tag, also escaping the '/' character if the secure parameter is true" do
9
- EscapeUtils.escape_html("<some_tag/>").should eql("&lt;some_tag&#47;&gt;")
10
- end
11
-
12
- it "should escape a basic html tag, not escaping the '/' character if the secure parameter is false" do
13
- EscapeUtils.escape_html("<some_tag/>", false).should eql("&lt;some_tag/&gt;")
14
- end
15
-
16
- it "should escape a basic html tag, not escaping the '/' character if EscapeUtils.html_secure is false" do
17
- EscapeUtils.html_secure = false
18
- EscapeUtils.escape_html("<some_tag/>").should eql("&lt;some_tag/&gt;")
19
- EscapeUtils.html_secure = true
20
- end
21
-
22
- it "should escape double-quotes" do
23
- EscapeUtils.escape_html("<some_tag some_attr=\"some value\"/>").should eql("&lt;some_tag some_attr=&quot;some value&quot;&#47;&gt;")
24
- end
25
-
26
- it "should escape single-quotes" do
27
- EscapeUtils.escape_html("<some_tag some_attr='some value'/>").should eql("&lt;some_tag some_attr=&#39;some value&#39;&#47;&gt;")
28
- end
29
-
30
- it "should escape the & character" do
31
- EscapeUtils.escape_html("<b>Bourbon & Branch</b>").should eql("&lt;b&gt;Bourbon &amp; Branch&lt;&#47;b&gt;")
32
- end
33
-
34
- if RUBY_VERSION =~ /^1.9/
35
- it "return value should be in original string's encoding" do
36
- str = "<b>Bourbon & Branch</b>".encode('us-ascii')
37
- EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('us-ascii'))
38
- str = "<b>Bourbon & Branch</b>".encode('utf-8')
39
- EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('utf-8'))
40
- end
41
- end
42
- end
@@ -1,37 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "unescape_html" do
4
- it "should respond to unescape_html" do
5
- EscapeUtils.should respond_to(:unescape_html)
6
- end
7
-
8
- it "should unescape a basic html tag" do
9
- EscapeUtils.unescape_html("&lt;some_tag&#47;&gt;").should eql("<some_tag/>")
10
- end
11
-
12
- it "should unescape double-quotes" do
13
- EscapeUtils.unescape_html("&lt;some_tag some_attr=&quot;some value&quot;&#47;&gt;").should eql("<some_tag some_attr=\"some value\"/>")
14
- end
15
-
16
- it "should unescape single-quotes" do
17
- EscapeUtils.unescape_html("&lt;some_tag some_attr=&#39;some value&#39;&#47;&gt;").should eql("<some_tag some_attr='some value'/>")
18
- end
19
-
20
- it "should unescape the & character" do
21
- EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;&#47;b&gt;").should eql("<b>Bourbon & Branch</b>")
22
- end
23
-
24
- it "should pass through incompletely escaped tags" do
25
- EscapeUtils.unescape_html("&").should eql("&")
26
- EscapeUtils.unescape_html("&lt").should eql("&lt")
27
- end
28
-
29
- if RUBY_VERSION =~ /^1.9/
30
- it "return value should be in original string's encoding" do
31
- str = "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;".encode('us-ascii')
32
- EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('us-ascii'))
33
- str = "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;".encode('utf-8')
34
- EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('utf-8'))
35
- end
36
- end
37
- end
@@ -1,48 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper.rb')
2
-
3
- class Object
4
- def html_safe?
5
- false
6
- end
7
- end
8
-
9
- class TestSafeBuffer < String
10
- def html_safe?
11
- true
12
- end
13
-
14
- def html_safe
15
- self
16
- end
17
-
18
- def to_s
19
- self
20
- end
21
- end
22
-
23
- class String
24
- def html_safe
25
- TestSafeBuffer.new(self)
26
- end
27
- end
28
-
29
- include EscapeUtils::HtmlSafety
30
-
31
- describe EscapeUtils::HtmlSafety do
32
-
33
- it "should escape unsafe strings and make them safe" do
34
- escaped = _escape_html("<strong>unsafe</strong>")
35
- escaped.should eql("&lt;strong&gt;unsafe&lt;&#47;strong&gt;")
36
- escaped.should be_html_safe
37
- end
38
-
39
- it "shouldn't escape safe strings" do
40
- _escape_html("<p>safe string</p>".html_safe).should eql("<p>safe string</p>")
41
- end
42
-
43
- it "should work with non strings" do
44
- _escape_html(5).should eql("5")
45
- _escape_html(:hello).should eql("hello")
46
- end
47
-
48
- end
@@ -1,34 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "escape_javascript" do
4
- it "should respond to escape_javascript" do
5
- EscapeUtils.should respond_to(:escape_javascript)
6
- end
7
-
8
- # these are from the ActionView tests
9
- it "should return an empty string if passed nil" do
10
- EscapeUtils.escape_javascript(nil).should eql("")
11
- end
12
-
13
- it "should escape quotes and newlines" do
14
- EscapeUtils.escape_javascript(%(This "thing" is really\n netos\r\n\n')).should eql(%(This \\"thing\\" is really\\n netos\\n\\n\\'))
15
- end
16
-
17
- it "should escape backslashes" do
18
- EscapeUtils.escape_javascript(%(backslash\\test)).should eql(%(backslash\\\\test))
19
- end
20
-
21
- it "should escape closed html tags" do
22
- EscapeUtils.escape_javascript(%(keep <open>, but dont </close> tags)).should eql(%(keep <open>, but dont <\\/close> tags))
23
- end
24
-
25
- if RUBY_VERSION =~ /^1.9/
26
- it "return value should be in original string's encoding" do
27
- str = "dont </close> tags".encode('us-ascii')
28
- EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('us-ascii'))
29
- str = "dont </close> tags".encode('utf-8')
30
- EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('utf-8'))
31
- end
32
- end
33
- end
34
-
@@ -1,37 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "unescape_javascript" do
4
- it "should respond to unescape_javascript" do
5
- EscapeUtils.should respond_to(:unescape_javascript)
6
- end
7
-
8
- # these are from the ActionView tests
9
- it "should return an empty string if passed nil" do
10
- EscapeUtils.unescape_javascript(nil).should eql("")
11
- end
12
-
13
- it "should unescape quotes and newlines" do
14
- EscapeUtils.unescape_javascript(%(This \\"thing\\" is really\\n netos\\n\\n\\')).should eql(%(This "thing" is really\n netos\n\n'))
15
- end
16
-
17
- it "should unescape backslashes" do
18
- EscapeUtils.unescape_javascript(%(backslash\\\\test)).should eql(%(backslash\\test))
19
- end
20
-
21
- it "should unescape closed html tags" do
22
- EscapeUtils.unescape_javascript(%(dont <\\/close> tags)).should eql(%(dont </close> tags))
23
- end
24
-
25
- it "should pass through standalone '\'" do
26
- EscapeUtils.unescape_javascript("\\").should eql("\\")
27
- end
28
-
29
- if RUBY_VERSION =~ /^1.9/
30
- it "return value should be in original string's encoding" do
31
- str = "dont <\\/close> tags".encode('us-ascii')
32
- EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('us-ascii'))
33
- str = "dont <\\/close> tags".encode('utf-8')
34
- EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('utf-8'))
35
- end
36
- end
37
- end
@@ -1,44 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "escape_url" do
4
- it "should respond to escape_url" do
5
- EscapeUtils.should respond_to(:escape_url)
6
- end
7
-
8
- it "should escape a basic url" do
9
- EscapeUtils.escape_url("http://www.homerun.com/").should eql("http%3A%2F%2Fwww.homerun.com%2F")
10
- end
11
-
12
- # NOTE: from Rack's test suite
13
- it "should escape a url containing tags" do
14
- EscapeUtils.escape_url("fo<o>bar").should eql("fo%3Co%3Ebar")
15
- end
16
-
17
- # NOTE: from Rack's test suite
18
- it "should escape a url with spaces" do
19
- EscapeUtils.escape_url("a space").should eql("a+space")
20
- EscapeUtils.escape_url("a sp ace ").should eql("a+++sp+ace+")
21
- end
22
-
23
- # NOTE: from Rack's test suite
24
- it "should escape a string of mixed characters" do
25
- EscapeUtils.escape_url("q1!2\"'w$5&7/z8)?\\").should eql("q1%212%22%27w%245%267%2Fz8%29%3F%5C")
26
- end
27
-
28
- # NOTE: from Rack's test suite
29
- it "should escape correctly for multibyte characters" do
30
- matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
31
- EscapeUtils.escape_url(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
32
- matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
33
- EscapeUtils.escape_url(matz_name_sep).should eql('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8')
34
- end
35
-
36
- if RUBY_VERSION =~ /^1.9/
37
- it "return value should be in original string's encoding" do
38
- str = "http://www.homerun.com/".encode('us-ascii')
39
- EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('us-ascii'))
40
- str = "http://www.homerun.com/".encode('utf-8')
41
- EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('utf-8'))
42
- end
43
- end
44
- end
@@ -1,46 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
4
-
5
- describe EscapeUtils, "unescape_url" do
6
- it "should respond to unescape_url" do
7
- EscapeUtils.should respond_to(:unescape_url)
8
- end
9
-
10
- it "should unescape a basic url" do
11
- EscapeUtils.unescape_url("http%3A%2F%2Fwww.homerun.com%2F").should eql("http://www.homerun.com/")
12
- end
13
-
14
- # NOTE: from Rack's test suite
15
- it "should unescape a url containing tags" do
16
- EscapeUtils.unescape_url("fo%3Co%3Ebar").should eql("fo<o>bar")
17
- end
18
-
19
- # NOTE: from Rack's test suite
20
- it "should unescape a url with spaces" do
21
- EscapeUtils.unescape_url("a+space").should eql("a space")
22
- EscapeUtils.unescape_url("a+++sp+ace+").should eql("a sp ace ")
23
- end
24
-
25
- # NOTE: from Rack's test suite
26
- it "should unescape a string of mixed characters" do
27
- EscapeUtils.unescape_url("q1%212%22%27w%245%267%2Fz8%29%3F%5C").should eql("q1!2\"'w$5&7/z8)?\\")
28
- end
29
-
30
- # NOTE: from Rack's test suite
31
- it "should unescape correctly for multibyte characters" do
32
- matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
33
- EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8').should eql(matz_name)
34
- matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
35
- EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8').should eql(matz_name_sep)
36
- end
37
-
38
- if RUBY_VERSION =~ /^1.9/
39
- it "return value should be in original string's encoding" do
40
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii')
41
- EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('us-ascii'))
42
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8')
43
- EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('utf-8'))
44
- end
45
- end
46
- end
data/spec/rcov.opts DELETED
@@ -1,3 +0,0 @@
1
- --exclude spec,gem
2
- --text-summary
3
- --sort coverage --sort-reverse
data/spec/spec_helper.rb DELETED
@@ -1,5 +0,0 @@
1
- $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
2
- $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
3
-
4
- require 'rspec'
5
- require 'escape_utils'
@@ -1,43 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
- require 'uri'
3
-
4
- describe EscapeUtils, "escape_uri" do
5
- it "should respond to escape_uri" do
6
- EscapeUtils.should respond_to(:escape_uri)
7
- end
8
-
9
- it "should escape each byte exactly like URI.escape" do
10
- (0..255).each do |i|
11
- c = i.chr
12
- EscapeUtils.escape_uri(c).should eql(URI.escape(c))
13
- end
14
- end
15
-
16
- # NOTE: from Rack's test suite
17
- it "should escape a url containing tags" do
18
- EscapeUtils.escape_uri("fo<o>bar").should eql("fo%3Co%3Ebar")
19
- end
20
-
21
- # NOTE: from Rack's test suite
22
- it "should escape a url with spaces" do
23
- EscapeUtils.escape_uri("a space").should eql("a%20space")
24
- EscapeUtils.escape_uri("a sp ace ").should eql("a%20%20%20sp%20ace%20")
25
- end
26
-
27
- # NOTE: from Rack's test suite
28
- it "should escape correctly for multibyte characters" do
29
- matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
30
- EscapeUtils.escape_uri(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
31
- matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
32
- EscapeUtils.escape_uri(matz_name_sep).should eql('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
33
- end
34
-
35
- if RUBY_VERSION =~ /^1.9/
36
- it "return value should be in original string's encoding" do
37
- str = "http://www.homerun.com/".encode('us-ascii')
38
- EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('us-ascii'))
39
- str = "http://www.homerun.com/".encode('utf-8')
40
- EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('utf-8'))
41
- end
42
- end
43
- end