escape_utils 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.gitignore +2 -1
  2. data/.travis.yml +13 -0
  3. data/CHANGELOG.md +7 -0
  4. data/MIT-LICENSE +1 -1
  5. data/Rakefile +5 -18
  6. data/benchmark/html_escape.rb +9 -2
  7. data/benchmark/xml_escape.rb +29 -0
  8. data/escape_utils.gemspec +2 -3
  9. data/ext/escape_utils/buffer.c +181 -160
  10. data/ext/escape_utils/buffer.h +90 -68
  11. data/ext/escape_utils/escape_utils.c +77 -39
  12. data/ext/escape_utils/extconf.rb +1 -1
  13. data/ext/escape_utils/houdini.h +37 -8
  14. data/ext/escape_utils/houdini_href_e.c +115 -0
  15. data/ext/escape_utils/houdini_html_e.c +90 -0
  16. data/ext/escape_utils/houdini_html_u.c +122 -0
  17. data/ext/escape_utils/{houdini_js.c → houdini_js_e.c} +17 -75
  18. data/ext/escape_utils/houdini_js_u.c +60 -0
  19. data/ext/escape_utils/{uri_escape.h → houdini_uri_e.c} +68 -2
  20. data/ext/escape_utils/houdini_uri_u.c +65 -0
  21. data/ext/escape_utils/houdini_xml_e.c +136 -0
  22. data/lib/escape_utils/version.rb +1 -1
  23. data/lib/escape_utils/xml/builder.rb +8 -0
  24. data/test/helper.rb +14 -0
  25. data/test/html/escape_test.rb +61 -0
  26. data/test/html/unescape_test.rb +48 -0
  27. data/test/html_safety_test.rb +46 -0
  28. data/test/javascript/escape_test.rb +42 -0
  29. data/test/javascript/unescape_test.rb +46 -0
  30. data/test/query/escape_test.rb +50 -0
  31. data/test/query/unescape_test.rb +52 -0
  32. data/test/uri/escape_test.rb +50 -0
  33. data/test/uri/unescape_test.rb +55 -0
  34. data/test/url/escape_test.rb +58 -0
  35. data/test/url/unescape_test.rb +60 -0
  36. data/test/xml/escape_test.rb +67 -0
  37. metadata +136 -152
  38. data/.rspec +0 -2
  39. data/ext/escape_utils/houdini_html.c +0 -214
  40. data/ext/escape_utils/houdini_uri.c +0 -130
  41. data/spec/html/escape_spec.rb +0 -42
  42. data/spec/html/unescape_spec.rb +0 -37
  43. data/spec/html_safety_spec.rb +0 -48
  44. data/spec/javascript/escape_spec.rb +0 -34
  45. data/spec/javascript/unescape_spec.rb +0 -37
  46. data/spec/query/escape_spec.rb +0 -44
  47. data/spec/query/unescape_spec.rb +0 -46
  48. data/spec/rcov.opts +0 -3
  49. data/spec/spec_helper.rb +0 -5
  50. data/spec/uri/escape_spec.rb +0 -43
  51. data/spec/uri/unescape_spec.rb +0 -57
  52. data/spec/url/escape_spec.rb +0 -52
  53. data/spec/url/unescape_spec.rb +0 -57
@@ -1,130 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
- #include "uri_escape.h"
7
-
8
- #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
9
- #define UNESCAPE_GROW_FACTOR(x) (x)
10
-
11
- extern int _isxdigit(int c);
12
-
13
- static void
14
- escape(struct buf *ob, const uint8_t *src, size_t size, int is_url)
15
- {
16
- static const char hex_chars[] = "0123456789ABCDEF";
17
- const char *safe_table = is_url ? URL_SAFE : URI_SAFE;
18
-
19
- size_t i = 0, org;
20
- char hex_str[3];
21
-
22
- bufgrow(ob, ESCAPE_GROW_FACTOR(size));
23
- hex_str[0] = '%';
24
-
25
- while (i < size) {
26
- org = i;
27
- while (i < size && safe_table[src[i]] != 0)
28
- i++;
29
-
30
- if (i > org)
31
- bufput(ob, src + org, i - org);
32
-
33
- /* escaping */
34
- if (i >= size)
35
- break;
36
-
37
- if (src[i] == ' ' && is_url) {
38
- bufputc(ob, '+');
39
- } else {
40
- hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
41
- hex_str[2] = hex_chars[src[i] & 0xF];
42
- bufput(ob, hex_str, 3);
43
- }
44
-
45
- i++;
46
- }
47
- }
48
-
49
- #define hex2c(c) ((c | 32) % 39 - 9)
50
-
51
- static void
52
- unescape(struct buf *ob, const uint8_t *src, size_t size, int is_url)
53
- {
54
- size_t i = 0, org;
55
-
56
- bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
57
-
58
- while (i < size) {
59
- org = i;
60
- while (i < size && src[i] != '%')
61
- i++;
62
-
63
- if (i > org)
64
- bufput(ob, src + org, i - org);
65
-
66
- /* escaping */
67
- if (i >= size)
68
- break;
69
-
70
- i++;
71
-
72
- if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
73
- unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
74
- bufputc(ob, new_char);
75
- i += 2;
76
- } else {
77
- bufputc(ob, '%');
78
- }
79
- }
80
-
81
- if (is_url) {
82
- char *find = (char *)bufcstr(ob);
83
- while ((find = strchr(find, '+')) != NULL)
84
- *find = ' ';
85
- }
86
- }
87
-
88
-
89
- void
90
- houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size)
91
- {
92
- return escape(ob, src, size, 0);
93
- }
94
-
95
- void
96
- houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size)
97
- {
98
- return escape(ob, src, size, 1);
99
- }
100
-
101
- void
102
- houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size)
103
- {
104
- return unescape(ob, src, size, 0);
105
- }
106
-
107
- void
108
- houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size)
109
- {
110
- return unescape(ob, src, size, 1);
111
- }
112
-
113
-
114
-
115
- //#define TEST
116
- #ifdef TEST
117
-
118
- int main()
119
- {
120
- const char TEST_STRING[] = "http% this \200 is a test";
121
- struct buf *buffer;
122
-
123
- buffer = bufnew(128);
124
- houdini_escape_uri(buffer, TEST_STRING, strlen(TEST_STRING));
125
- printf("Result: %.*s\n", (int)buffer->size, buffer->data);
126
- bufrelease(buffer);
127
- return 0;
128
- }
129
- #endif
130
-
@@ -1,42 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "escape_html" do
4
- it "should respond to escape_html" do
5
- EscapeUtils.should respond_to(:escape_html)
6
- end
7
-
8
- it "should escape a basic html tag, also escaping the '/' character if the secure parameter is true" do
9
- EscapeUtils.escape_html("<some_tag/>").should eql("&lt;some_tag&#47;&gt;")
10
- end
11
-
12
- it "should escape a basic html tag, not escaping the '/' character if the secure parameter is false" do
13
- EscapeUtils.escape_html("<some_tag/>", false).should eql("&lt;some_tag/&gt;")
14
- end
15
-
16
- it "should escape a basic html tag, not escaping the '/' character if EscapeUtils.html_secure is false" do
17
- EscapeUtils.html_secure = false
18
- EscapeUtils.escape_html("<some_tag/>").should eql("&lt;some_tag/&gt;")
19
- EscapeUtils.html_secure = true
20
- end
21
-
22
- it "should escape double-quotes" do
23
- EscapeUtils.escape_html("<some_tag some_attr=\"some value\"/>").should eql("&lt;some_tag some_attr=&quot;some value&quot;&#47;&gt;")
24
- end
25
-
26
- it "should escape single-quotes" do
27
- EscapeUtils.escape_html("<some_tag some_attr='some value'/>").should eql("&lt;some_tag some_attr=&#39;some value&#39;&#47;&gt;")
28
- end
29
-
30
- it "should escape the & character" do
31
- EscapeUtils.escape_html("<b>Bourbon & Branch</b>").should eql("&lt;b&gt;Bourbon &amp; Branch&lt;&#47;b&gt;")
32
- end
33
-
34
- if RUBY_VERSION =~ /^1.9/
35
- it "return value should be in original string's encoding" do
36
- str = "<b>Bourbon & Branch</b>".encode('us-ascii')
37
- EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('us-ascii'))
38
- str = "<b>Bourbon & Branch</b>".encode('utf-8')
39
- EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('utf-8'))
40
- end
41
- end
42
- end
@@ -1,37 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "unescape_html" do
4
- it "should respond to unescape_html" do
5
- EscapeUtils.should respond_to(:unescape_html)
6
- end
7
-
8
- it "should unescape a basic html tag" do
9
- EscapeUtils.unescape_html("&lt;some_tag&#47;&gt;").should eql("<some_tag/>")
10
- end
11
-
12
- it "should unescape double-quotes" do
13
- EscapeUtils.unescape_html("&lt;some_tag some_attr=&quot;some value&quot;&#47;&gt;").should eql("<some_tag some_attr=\"some value\"/>")
14
- end
15
-
16
- it "should unescape single-quotes" do
17
- EscapeUtils.unescape_html("&lt;some_tag some_attr=&#39;some value&#39;&#47;&gt;").should eql("<some_tag some_attr='some value'/>")
18
- end
19
-
20
- it "should unescape the & character" do
21
- EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;&#47;b&gt;").should eql("<b>Bourbon & Branch</b>")
22
- end
23
-
24
- it "should pass through incompletely escaped tags" do
25
- EscapeUtils.unescape_html("&").should eql("&")
26
- EscapeUtils.unescape_html("&lt").should eql("&lt")
27
- end
28
-
29
- if RUBY_VERSION =~ /^1.9/
30
- it "return value should be in original string's encoding" do
31
- str = "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;".encode('us-ascii')
32
- EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('us-ascii'))
33
- str = "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;".encode('utf-8')
34
- EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('utf-8'))
35
- end
36
- end
37
- end
@@ -1,48 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper.rb')
2
-
3
- class Object
4
- def html_safe?
5
- false
6
- end
7
- end
8
-
9
- class TestSafeBuffer < String
10
- def html_safe?
11
- true
12
- end
13
-
14
- def html_safe
15
- self
16
- end
17
-
18
- def to_s
19
- self
20
- end
21
- end
22
-
23
- class String
24
- def html_safe
25
- TestSafeBuffer.new(self)
26
- end
27
- end
28
-
29
- include EscapeUtils::HtmlSafety
30
-
31
- describe EscapeUtils::HtmlSafety do
32
-
33
- it "should escape unsafe strings and make them safe" do
34
- escaped = _escape_html("<strong>unsafe</strong>")
35
- escaped.should eql("&lt;strong&gt;unsafe&lt;&#47;strong&gt;")
36
- escaped.should be_html_safe
37
- end
38
-
39
- it "shouldn't escape safe strings" do
40
- _escape_html("<p>safe string</p>".html_safe).should eql("<p>safe string</p>")
41
- end
42
-
43
- it "should work with non strings" do
44
- _escape_html(5).should eql("5")
45
- _escape_html(:hello).should eql("hello")
46
- end
47
-
48
- end
@@ -1,34 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "escape_javascript" do
4
- it "should respond to escape_javascript" do
5
- EscapeUtils.should respond_to(:escape_javascript)
6
- end
7
-
8
- # these are from the ActionView tests
9
- it "should return an empty string if passed nil" do
10
- EscapeUtils.escape_javascript(nil).should eql("")
11
- end
12
-
13
- it "should escape quotes and newlines" do
14
- EscapeUtils.escape_javascript(%(This "thing" is really\n netos\r\n\n')).should eql(%(This \\"thing\\" is really\\n netos\\n\\n\\'))
15
- end
16
-
17
- it "should escape backslashes" do
18
- EscapeUtils.escape_javascript(%(backslash\\test)).should eql(%(backslash\\\\test))
19
- end
20
-
21
- it "should escape closed html tags" do
22
- EscapeUtils.escape_javascript(%(keep <open>, but dont </close> tags)).should eql(%(keep <open>, but dont <\\/close> tags))
23
- end
24
-
25
- if RUBY_VERSION =~ /^1.9/
26
- it "return value should be in original string's encoding" do
27
- str = "dont </close> tags".encode('us-ascii')
28
- EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('us-ascii'))
29
- str = "dont </close> tags".encode('utf-8')
30
- EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('utf-8'))
31
- end
32
- end
33
- end
34
-
@@ -1,37 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "unescape_javascript" do
4
- it "should respond to unescape_javascript" do
5
- EscapeUtils.should respond_to(:unescape_javascript)
6
- end
7
-
8
- # these are from the ActionView tests
9
- it "should return an empty string if passed nil" do
10
- EscapeUtils.unescape_javascript(nil).should eql("")
11
- end
12
-
13
- it "should unescape quotes and newlines" do
14
- EscapeUtils.unescape_javascript(%(This \\"thing\\" is really\\n netos\\n\\n\\')).should eql(%(This "thing" is really\n netos\n\n'))
15
- end
16
-
17
- it "should unescape backslashes" do
18
- EscapeUtils.unescape_javascript(%(backslash\\\\test)).should eql(%(backslash\\test))
19
- end
20
-
21
- it "should unescape closed html tags" do
22
- EscapeUtils.unescape_javascript(%(dont <\\/close> tags)).should eql(%(dont </close> tags))
23
- end
24
-
25
- it "should pass through standalone '\'" do
26
- EscapeUtils.unescape_javascript("\\").should eql("\\")
27
- end
28
-
29
- if RUBY_VERSION =~ /^1.9/
30
- it "return value should be in original string's encoding" do
31
- str = "dont <\\/close> tags".encode('us-ascii')
32
- EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('us-ascii'))
33
- str = "dont <\\/close> tags".encode('utf-8')
34
- EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('utf-8'))
35
- end
36
- end
37
- end
@@ -1,44 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
-
3
- describe EscapeUtils, "escape_url" do
4
- it "should respond to escape_url" do
5
- EscapeUtils.should respond_to(:escape_url)
6
- end
7
-
8
- it "should escape a basic url" do
9
- EscapeUtils.escape_url("http://www.homerun.com/").should eql("http%3A%2F%2Fwww.homerun.com%2F")
10
- end
11
-
12
- # NOTE: from Rack's test suite
13
- it "should escape a url containing tags" do
14
- EscapeUtils.escape_url("fo<o>bar").should eql("fo%3Co%3Ebar")
15
- end
16
-
17
- # NOTE: from Rack's test suite
18
- it "should escape a url with spaces" do
19
- EscapeUtils.escape_url("a space").should eql("a+space")
20
- EscapeUtils.escape_url("a sp ace ").should eql("a+++sp+ace+")
21
- end
22
-
23
- # NOTE: from Rack's test suite
24
- it "should escape a string of mixed characters" do
25
- EscapeUtils.escape_url("q1!2\"'w$5&7/z8)?\\").should eql("q1%212%22%27w%245%267%2Fz8%29%3F%5C")
26
- end
27
-
28
- # NOTE: from Rack's test suite
29
- it "should escape correctly for multibyte characters" do
30
- matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
31
- EscapeUtils.escape_url(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
32
- matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
33
- EscapeUtils.escape_url(matz_name_sep).should eql('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8')
34
- end
35
-
36
- if RUBY_VERSION =~ /^1.9/
37
- it "return value should be in original string's encoding" do
38
- str = "http://www.homerun.com/".encode('us-ascii')
39
- EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('us-ascii'))
40
- str = "http://www.homerun.com/".encode('utf-8')
41
- EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('utf-8'))
42
- end
43
- end
44
- end
@@ -1,46 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
4
-
5
- describe EscapeUtils, "unescape_url" do
6
- it "should respond to unescape_url" do
7
- EscapeUtils.should respond_to(:unescape_url)
8
- end
9
-
10
- it "should unescape a basic url" do
11
- EscapeUtils.unescape_url("http%3A%2F%2Fwww.homerun.com%2F").should eql("http://www.homerun.com/")
12
- end
13
-
14
- # NOTE: from Rack's test suite
15
- it "should unescape a url containing tags" do
16
- EscapeUtils.unescape_url("fo%3Co%3Ebar").should eql("fo<o>bar")
17
- end
18
-
19
- # NOTE: from Rack's test suite
20
- it "should unescape a url with spaces" do
21
- EscapeUtils.unescape_url("a+space").should eql("a space")
22
- EscapeUtils.unescape_url("a+++sp+ace+").should eql("a sp ace ")
23
- end
24
-
25
- # NOTE: from Rack's test suite
26
- it "should unescape a string of mixed characters" do
27
- EscapeUtils.unescape_url("q1%212%22%27w%245%267%2Fz8%29%3F%5C").should eql("q1!2\"'w$5&7/z8)?\\")
28
- end
29
-
30
- # NOTE: from Rack's test suite
31
- it "should unescape correctly for multibyte characters" do
32
- matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
33
- EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8').should eql(matz_name)
34
- matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
35
- EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8').should eql(matz_name_sep)
36
- end
37
-
38
- if RUBY_VERSION =~ /^1.9/
39
- it "return value should be in original string's encoding" do
40
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii')
41
- EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('us-ascii'))
42
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8')
43
- EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('utf-8'))
44
- end
45
- end
46
- end
data/spec/rcov.opts DELETED
@@ -1,3 +0,0 @@
1
- --exclude spec,gem
2
- --text-summary
3
- --sort coverage --sort-reverse
data/spec/spec_helper.rb DELETED
@@ -1,5 +0,0 @@
1
- $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
2
- $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
3
-
4
- require 'rspec'
5
- require 'escape_utils'
@@ -1,43 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
2
- require 'uri'
3
-
4
- describe EscapeUtils, "escape_uri" do
5
- it "should respond to escape_uri" do
6
- EscapeUtils.should respond_to(:escape_uri)
7
- end
8
-
9
- it "should escape each byte exactly like URI.escape" do
10
- (0..255).each do |i|
11
- c = i.chr
12
- EscapeUtils.escape_uri(c).should eql(URI.escape(c))
13
- end
14
- end
15
-
16
- # NOTE: from Rack's test suite
17
- it "should escape a url containing tags" do
18
- EscapeUtils.escape_uri("fo<o>bar").should eql("fo%3Co%3Ebar")
19
- end
20
-
21
- # NOTE: from Rack's test suite
22
- it "should escape a url with spaces" do
23
- EscapeUtils.escape_uri("a space").should eql("a%20space")
24
- EscapeUtils.escape_uri("a sp ace ").should eql("a%20%20%20sp%20ace%20")
25
- end
26
-
27
- # NOTE: from Rack's test suite
28
- it "should escape correctly for multibyte characters" do
29
- matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
30
- EscapeUtils.escape_uri(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
31
- matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
32
- EscapeUtils.escape_uri(matz_name_sep).should eql('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
33
- end
34
-
35
- if RUBY_VERSION =~ /^1.9/
36
- it "return value should be in original string's encoding" do
37
- str = "http://www.homerun.com/".encode('us-ascii')
38
- EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('us-ascii'))
39
- str = "http://www.homerun.com/".encode('utf-8')
40
- EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('utf-8'))
41
- end
42
- end
43
- end