escape_utils 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +47 -88
  4. data/benchmark/html_escape_once.rb +25 -0
  5. data/benchmark/javascript_escape.rb +1 -1
  6. data/benchmark/javascript_unescape.rb +1 -1
  7. data/benchmark/url_decode.rb +28 -0
  8. data/benchmark/url_encode.rb +37 -0
  9. data/benchmark/xml_escape.rb +7 -11
  10. data/ext/escape_utils/escape_utils.c +7 -115
  11. data/ext/escape_utils/houdini.h +3 -5
  12. data/ext/escape_utils/houdini_html_e.c +52 -24
  13. data/ext/escape_utils/houdini_uri_e.c +6 -17
  14. data/ext/escape_utils/houdini_uri_u.c +5 -15
  15. data/ext/escape_utils/houdini_xml_e.c +15 -1
  16. data/lib/escape_utils/html/cgi.rb +10 -8
  17. data/lib/escape_utils/html/erb.rb +1 -10
  18. data/lib/escape_utils/html/haml.rb +1 -7
  19. data/lib/escape_utils/html/rack.rb +3 -3
  20. data/lib/escape_utils/html_safety.rb +13 -0
  21. data/lib/escape_utils/url/cgi.rb +0 -8
  22. data/lib/escape_utils/url/erb.rb +1 -1
  23. data/lib/escape_utils/url/uri.rb +11 -7
  24. data/lib/escape_utils/version.rb +1 -1
  25. data/lib/escape_utils.rb +61 -9
  26. data/test/helper.rb +16 -3
  27. data/test/html/escape_test.rb +41 -39
  28. data/test/html/unescape_test.rb +0 -16
  29. data/test/html_safety_test.rb +1 -27
  30. data/test/javascript/escape_test.rb +0 -5
  31. data/test/query/escape_test.rb +0 -16
  32. data/test/query/unescape_test.rb +2 -18
  33. data/test/uri/unescape_test.rb +2 -2
  34. data/test/uri_component/unescape_test.rb +2 -2
  35. data/test/url/escape_test.rb +0 -16
  36. data/test/url/unescape_test.rb +2 -18
  37. metadata +6 -8
  38. data/benchmark/html_escape.rb +0 -68
  39. data/benchmark/html_unescape.rb +0 -35
  40. data/benchmark/url_escape.rb +0 -56
  41. data/benchmark/url_unescape.rb +0 -50
  42. data/ext/escape_utils/houdini_html_u.c +0 -122
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6f26c8a24ba01ecb130e04ef9af6802ab2deb56455527ccd551ae93839736604
4
- data.tar.gz: 4fc6f51b66cd0c773cf3aea923023ccb66e31cd865e9b89da81a9119540cae05
3
+ metadata.gz: 7b4b256a1ceb8ed2f7e673d5e2081daafb36761d93d3ebe38c6a1b51017f7ed5
4
+ data.tar.gz: 23de0f72e4df0b9ddf1d6ca0063fcac74696dc7da429f4b8eb5200fb90ba6435
5
5
  SHA512:
6
- metadata.gz: f0f6556bd83c1068189d5a3183b2ad1df88bd4a79f46f119ae7fcfdff4339544d6e1654379fe0b64cd6f07865c67eafa83b9caa236e0ac6b23de1814fd808b2e
7
- data.tar.gz: dd5e1b3baf9c594d48f0580f6c71c766e7e341fce332dba8fbd7f0a36d8611bb70e043ef3808c95e9d6a774a71d088e1136ff8025c2ae43e1588f0500d8d04ad
6
+ metadata.gz: 0d009060659e31a0d82073d8d6f870b174fc7647c34686d1168ac65bb6abf066043fb568c161850fb90a00ca0f0e55151874e0509089d62e92fb7c97d2187534
7
+ data.tar.gz: 6395d5b453930debba5b6eee4f5cc2440f2fc72f01d27c45987ecdcb461ff0edfd6ae15935f8ec9f50b010f5f4afcd4e419ee10a2bcdb995652b8be5688a316a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Unreleased
2
2
 
3
+ # 1.3.0
4
+
5
+ - Deprecate `EscapeUtils.escape_url` and `EscapeUtils.unescape_url` given that Ruby 2.5 provides an optimized `CGI.escape` and `CGI.unescape` with mostly similar performance.
6
+ - Don't patch `URI.escape` and `URI.unescape` if they don't already exist.
7
+ - Add `EscapeUtils.escape_html_once` and `EscapeUtils.rb_eu_escape_html_once_as_html_safe` as faster implementations of Rails `escape_once` helper.
8
+ - Deprecate `escape_html` and `escape_html_as_html_safe` given that Ruby 2.5 optimized `GCI.escapeHTML` to be twice faster than the `EscapeUtils` implementation.
9
+ - Deprecate `unescape_html` given that Ruby 2.5 optimized `GCI.unescapeHTML` to be only 40% slower than th `EscapeUtils` implementation.
10
+ - Deprecate `escape_html_as_html_safe` as well.
11
+ - Deprecate `EscapeUtils.html_safe`, there's no reason to escape for slashes `/` in 2022.
12
+
3
13
  # 1.2.2
4
14
 
5
15
  - Update EscapeUtils.escape_javascript to match Rails `escape_javascript`
data/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # escape_utils
2
2
 
3
- Being as though we're all html escaping everything these days, why not make it faster?
3
+ `EscapeUtils` used to provide optimized escaping function to replace the slow methods
4
+ provided by Ruby. Since Ruby 2.5, the various `CGI` escape methods have been severely optimized
5
+ and most `EscapeUtils` methods became irrelevant and were deprecated.
4
6
 
5
- For character encoding, the output string's encoding is copied from the input string.
7
+ It however still provide fast escaping and unescaping methods for URL (RFC 3986), Javascript, XML, as well as an "escape HTML once" method.
6
8
 
7
- It has monkey-patches for Rack::Utils, CGI, URI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
8
-
9
- It supports HTML, URL, URI and Javascript escaping/unescaping.
9
+ It has monkey-patches for Rack::Utils, URI and ERB::Util so you can drop this in and have your app start escaping fast as balls in no time
10
10
 
11
11
  ## Installing
12
12
 
@@ -22,70 +22,62 @@ escape_utils assumes all input is encoded as valid UTF-8. If you are dealing wit
22
22
 
23
23
 
24
24
  ``` ruby
25
- utf8_string = non_utf8_string.encode('UTF-8')
25
+ utf8_string = non_utf8_string.encode(Encoding::UTF_8)
26
26
  ```
27
27
 
28
28
  ## Usage
29
29
 
30
30
  ### HTML
31
31
 
32
- #### Escaping
33
-
34
- ``` ruby
35
- html = `curl -s http://maps.google.com`
36
- escaped_html = EscapeUtils.escape_html(html)
37
- ```
32
+ As of `escape_utils 1.3.0`, regular HTML escaping methods are deprecated. Ruby 2.5 introduced C implementations for `CGI.escapeHTML` and `CGI.unescapeHTML` which are respectively faster and almost as fast as `EscapeUtils`. Use that instead.
38
33
 
39
- By default escape_utils will escape `/` characters with `/`, but you can disable that by setting `EscapeUtils.html_secure = false`
40
- or per-call by passing `false` as the second parameter to `escape_html` like `EscapeUtils.escape_html(html, false)`
41
-
42
- For more information check out: http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
43
-
44
- #### Unescaping
45
-
46
- ``` ruby
47
- html = `curl -s http://maps.google.com`
48
- escaped_html = EscapeUtils.escape_html(html)
49
- html = EscapeUtils.unescape_html(escaped_html)
50
- ```
34
+ To avoid double-escaping HTML entities, use `EscapeUtils.escape_html_once`.
51
35
 
52
36
  #### Monkey Patches
53
37
 
38
+ Since historically, `HTML` monkey patches changed the return value for `ActiveSupport::SafeBuffer` instances, they are conserved for that purpose only, but they should be considered as deprecated as well.
39
+
54
40
  ``` ruby
55
- require 'escape_utils/html/erb' # to patch ERB::Util
56
41
  require 'escape_utils/html/cgi' # to patch CGI
57
- require 'escape_utils/html/haml' # to patch Haml::Helpers
58
42
  ```
59
43
 
60
44
  ### URL
61
45
 
62
- Use (un)escape_uri to get RFC-compliant escaping (like PHP rawurlencode).
46
+ Use `escape_uri` and `unescape` to get RFC 3986 compliant escaping (like PHP `rawurlencode` or `ERB::Util.url_encode`).
63
47
 
64
- Use (un)escape_url to get CGI escaping (where space is +).
48
+ The difference with `CGI.escape` is that spaces (` `) are encoded as `%20` instead of `+`.
65
49
 
66
50
  #### Escaping
67
51
 
68
52
  ``` ruby
69
53
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
70
- escaped_url = EscapeUtils.escape_url(url)
54
+ escaped_url = EscapeUtils.escape_uri(url)
71
55
  ```
72
56
 
73
57
  #### Unescaping
74
58
 
75
59
  ``` ruby
76
60
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
77
- escaped_url = EscapeUtils.escape_url(url)
78
- EscapeUtils.unescape_url(escaped_url) == url # => true
61
+ escaped_url = EscapeUtils.escape_uri(url)
62
+ EscapeUtils.unescape_uri(escaped_uri) == url # => true
79
63
  ```
80
64
 
81
65
  #### Monkey Patches
82
66
 
83
67
  ``` ruby
84
- require 'escape_utils/url/cgi' # to patch CGI
85
68
  require 'escape_utils/url/erb' # to patch ERB::Util
86
69
  require 'escape_utils/url/uri' # to patch URI
87
70
  ```
88
71
 
72
+ Note that `URI.escape` and `URI.unescape` were removed in Ruby 3.0. `'escape_utils/url/uri'` is a noop on Ruby 3+.
73
+
74
+ ### XML
75
+
76
+ ```ruby
77
+ xml = `curl -s 'https://raw.githubusercontent.com/darcyliu/google-styleguide/master/cppguide.xml'`
78
+ escaped_xml = EscapeUtils.escape_xml(xml)
79
+ ```
80
+
89
81
  ### Javascript
90
82
 
91
83
  #### Escaping
@@ -111,87 +103,54 @@ require 'escape_utils/javascript/action_view' # to patch ActionView::Helpers::Ja
111
103
 
112
104
  ## Benchmarks
113
105
 
114
- In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
115
- While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
116
- Escaping Javascript is around 16-30x faster.
106
+ Escaping URL following RFC 3986 is 13-32x faster than the methods provided by Ruby.
107
+
108
+ Escaping Javascript is around 13x faster than Rails `escape_javascript`.
109
+
110
+ `EscapeUtils.escape_html_once` is about 17x faster than Rails `escape_once`.
117
111
 
118
112
  This output is from my laptop using the benchmark scripts in the benchmarks folder.
119
113
 
120
- ### HTML
114
+ ### Javascript
121
115
 
122
116
  #### Escaping
123
117
 
124
118
  ```
125
- Rack::Utils.escape_html
126
- 9.650000 0.090000 9.740000 ( 9.750756)
127
- Haml::Helpers.html_escape
128
- 9.310000 0.110000 9.420000 ( 9.417317)
129
- ERB::Util.html_escape
130
- 5.330000 0.390000 5.720000 ( 5.748394)
131
- CGI.escapeHTML
132
- 5.370000 0.380000 5.750000 ( 5.791344)
133
- FasterHTMLEscape.html_escape
134
- 0.520000 0.010000 0.530000 ( 0.539485)
135
- fast_xs_extra#fast_xs_html
136
- 0.310000 0.030000 0.340000 ( 0.336734)
137
- EscapeUtils.escape_html
138
- 0.200000 0.050000 0.250000 ( 0.258839)
119
+ EscapeUtils.escape_javascript: 1567.5 i/s
120
+ ActionView::Helpers::JavaScriptHelper#escape_javascript: 116.8 i/s - 13.42x (± 0.00) slower
139
121
  ```
140
122
 
141
123
  #### Unescaping
142
124
 
143
125
  ```
144
- CGI.unescapeHTML
145
- 16.520000 0.080000 16.600000 ( 16.853888)
146
- EscapeUtils.unescape_html
147
- 0.120000 0.040000 0.160000 ( 0.162696)
126
+ EscapeUtils.escape_javascript: 2.089k (± 3.0%) i/s - 10.530k in 5.044615s
148
127
  ```
149
128
 
150
- ### Javascript
129
+ I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
130
+
131
+ ### URL
151
132
 
152
133
  #### Escaping
153
134
 
154
135
  ```
155
- ActionView::Helpers::JavaScriptHelper#escape_javascript
156
- 3.810000 0.100000 3.910000 ( 3.925557)
157
- EscapeUtils.escape_javascript
158
- 0.200000 0.040000 0.240000 ( 0.236692)
136
+ EscapeUtils.escape_uri: 4019359.2 i/s
137
+ fast_xs_extra#fast_xs_url: 2435949.2 i/s - 1.65x (± 0.00) slower
138
+ URI::DEFAULT_PARSER.escape: 288800.8 i/s - 13.92x (± 0.00) slower
139
+ ERB::Util.url_encode: 122373.5 i/s - 32.85x (± 0.00) slower
159
140
  ```
160
141
 
161
142
  #### Unescaping
162
143
 
163
- I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
164
-
165
- ### URL
166
-
167
- #### Escaping
168
-
169
144
  ```
170
- ERB::Util.url_encode
171
- 0.520000 0.010000 0.530000 ( 0.529277)
172
- Rack::Utils.escape
173
- 0.460000 0.010000 0.470000 ( 0.466962)
174
- CGI.escape
175
- 0.440000 0.000000 0.440000 ( 0.443017)
176
- URLEscape#escape
177
- 0.040000 0.000000 0.040000 ( 0.045661)
178
- fast_xs_extra#fast_xs_url
179
- 0.010000 0.000000 0.010000 ( 0.015429)
180
- EscapeUtils.escape_url
181
- 0.010000 0.000000 0.010000 ( 0.010843)
145
+ EscapeUtils.unescape_uri: 3866774.5 i/s
146
+ fast_xs_extra#fast_uxs_url: 2438900.7 i/s - 1.59x (± 0.00) slower
182
147
  ```
183
148
 
184
- #### Unescaping
149
+ ### HTML
150
+
151
+ #### Escape once
185
152
 
186
153
  ```
187
- Rack::Utils.unescape
188
- 0.250000 0.010000 0.260000 ( 0.257558)
189
- CGI.unescape
190
- 0.250000 0.000000 0.250000 ( 0.257837)
191
- URLEscape#unescape
192
- 0.040000 0.000000 0.040000 ( 0.031548)
193
- fast_xs_extra#fast_uxs_cgi
194
- 0.010000 0.000000 0.010000 ( 0.006062)
195
- EscapeUtils.unescape_url
196
- 0.000000 0.000000 0.000000 ( 0.005679)
154
+ EscapeUtils.escape_html_once: 2831.5 i/s
155
+ ActionView::Helpers::TagHelper#escape_once: 161.4 i/s - 17.55x (± 0.00) slower
197
156
  ```
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler/setup'
5
+ require 'benchmark/ips'
6
+
7
+ require 'escape_utils'
8
+ require 'active_support/core_ext/string/output_safety'
9
+
10
+ url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
11
+ html = `curl -s #{url}`
12
+ html = html.force_encoding('utf-8')
13
+ puts "Escaping #{html.bytesize} bytes of html from #{url}"
14
+
15
+ Benchmark.ips do |x|
16
+ x.report "EscapeUtils.escape_html_once" do
17
+ EscapeUtils.escape_html_once(html)
18
+ end
19
+
20
+ x.report "ActionView::Helpers::TagHelper#escape_once" do # Rails expose it as ERB::Util.html_escape_once
21
+ ERB::Util.html_escape_once(html)
22
+ end
23
+
24
+ x.compare!(order: :baseline)
25
+ end
@@ -13,7 +13,7 @@ end
13
13
 
14
14
  url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
15
15
  javascript = `curl -s #{url}`
16
- javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding)
16
+ javascript = javascript.force_encoding('utf-8')
17
17
  puts "Escaping #{javascript.bytesize} bytes of javascript, from #{url}"
18
18
 
19
19
  Benchmark.ips do |x|
@@ -8,7 +8,7 @@ require 'escape_utils'
8
8
 
9
9
  url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
10
10
  javascript = `curl -s #{url}`
11
- javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding)
11
+ javascript = javascript.force_encoding('utf-8')
12
12
  escaped_javascript = EscapeUtils.escape_javascript(javascript)
13
13
  puts "Escaping #{escaped_javascript.bytesize} bytes of javascript, from #{url}"
14
14
 
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler/setup'
5
+ require 'benchmark/ips'
6
+
7
+ require 'rack'
8
+ require 'cgi'
9
+ require 'url_escape'
10
+ require 'fast_xs_extra'
11
+ require 'escape_utils'
12
+
13
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
14
+ url = url.force_encoding('us-ascii')
15
+ escaped_url = EscapeUtils.escape_uri(url)
16
+ puts "Escaping a #{url.bytesize} byte URL"
17
+
18
+ Benchmark.ips do |x|
19
+ x.report "EscapeUtils.unescape_uri" do
20
+ EscapeUtils.unescape_uri(escaped_url)
21
+ end
22
+
23
+ x.report "fast_xs_extra#fast_uxs_url" do
24
+ url.fast_xs_url
25
+ end
26
+
27
+ x.compare!(order: :baseline)
28
+ end
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler/setup'
5
+ require 'benchmark/ips'
6
+
7
+ require 'rack'
8
+ require 'erb'
9
+ require 'cgi'
10
+ require 'url_escape'
11
+ require 'fast_xs_extra'
12
+ require 'escape_utils'
13
+
14
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
15
+ puts "Escaping a #{url.bytesize} byte URL times"
16
+
17
+ Benchmark.ips do |x|
18
+ x.report "EscapeUtils.escape_uri" do
19
+ EscapeUtils.escape_uri(url)
20
+ end
21
+
22
+ x.report " URI::DEFAULT_PARSER.escape" do
23
+ URI::DEFAULT_PARSER.escape(url)
24
+ end
25
+
26
+ x.report "ERB::Util.url_encode" do |times|
27
+ times.times do
28
+ ERB::Util.url_encode(url)
29
+ end
30
+ end
31
+
32
+ x.report "fast_xs_extra#fast_xs_url" do
33
+ url.fast_xs_url
34
+ end
35
+
36
+ x.compare!(order: :baseline)
37
+ end
@@ -7,23 +7,19 @@ require 'benchmark/ips'
7
7
  require 'fast_xs'
8
8
  require 'escape_utils'
9
9
 
10
- url = "http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml"
10
+ url = "https://raw.githubusercontent.com/darcyliu/google-styleguide/master/cppguide.xml"
11
11
  xml = `curl -s #{url}`
12
- xml = xml.force_encoding('binary') if xml.respond_to?(:force_encoding)
12
+ xml = xml.force_encoding('binary')
13
13
  puts "Escaping #{xml.bytesize} bytes of xml, from #{url}"
14
14
 
15
15
  Benchmark.ips do |x|
16
- x.report "fast_xs" do |times|
17
- times.times do
18
- xml.fast_xs
19
- end
16
+ x.report "EscapeUtils.escape_xml" do
17
+ EscapeUtils.escape_xml(xml)
20
18
  end
21
19
 
22
- x.report "EscapeUtils.escape_xml" do |times|
23
- times.times do
24
- EscapeUtils.escape_xml(xml)
25
- end
20
+ x.report "fast_xs" do
21
+ xml.fast_xs
26
22
  end
27
23
 
28
- x.compare!
24
+ x.compare!(order: :baseline)
29
25
  end
@@ -9,11 +9,6 @@
9
9
 
10
10
  static VALUE rb_eEncodingCompatibilityError;
11
11
 
12
- static VALUE eu_new_str(const char *str, size_t len)
13
- {
14
- return rb_enc_str_new(str, len, rb_utf8_encoding());
15
- }
16
-
17
12
  static void check_utf8_encoding(VALUE str)
18
13
  {
19
14
  static rb_encoding *_cached[3] = {NULL, NULL, NULL};
@@ -34,41 +29,6 @@ static void check_utf8_encoding(VALUE str)
34
29
 
35
30
  typedef int (*houdini_cb)(gh_buf *, const uint8_t *, size_t);
36
31
 
37
- static VALUE rb_mEscapeUtils;
38
- static ID ID_at_html_safe, ID_new;
39
-
40
- /**
41
- * html_secure instance variable
42
- */
43
- static int g_html_secure = 1;
44
-
45
- static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
46
- {
47
- g_html_secure = RTEST(val);
48
- rb_ivar_set(self, rb_intern("@html_secure"), val);
49
- return val;
50
- }
51
-
52
- /**
53
- * html_safe_string_class instance variable
54
- */
55
- static VALUE rb_html_safe_string_class;
56
- static VALUE rb_html_safe_string_template_object;
57
-
58
- static VALUE rb_eu_set_html_safe_string_class(VALUE self, VALUE val)
59
- {
60
- Check_Type(val, T_CLASS);
61
-
62
- if (rb_funcall(val, rb_intern("<="), 1, rb_cString) == Qnil)
63
- rb_raise(rb_eArgError, "%s must be a descendent of String", rb_class2name(val));
64
-
65
- rb_html_safe_string_class = val;
66
- rb_html_safe_string_template_object = rb_class_new_instance(0, NULL, rb_html_safe_string_class);
67
- OBJ_FREEZE(rb_html_safe_string_template_object);
68
- rb_ivar_set(self, rb_intern("@html_safe_string_class"), val);
69
- return val;
70
- }
71
-
72
32
  /**
73
33
  * Generic template
74
34
  */
@@ -78,13 +38,13 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
78
38
  gh_buf buf = GH_BUF_INIT;
79
39
 
80
40
  if (NIL_P(str))
81
- return eu_new_str("", 0);
41
+ return rb_utf8_str_new("", 0);
82
42
 
83
43
  Check_Type(str, T_STRING);
84
44
  check_utf8_encoding(str);
85
45
 
86
46
  if (do_escape(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
87
- VALUE result = eu_new_str(buf.ptr, buf.size);
47
+ VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
88
48
  gh_buf_free(&buf);
89
49
  return result;
90
50
  }
@@ -96,50 +56,15 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
96
56
  /**
97
57
  * HTML methods
98
58
  */
99
- static VALUE new_html_safe_string(const char *ptr, size_t len)
100
- {
101
- return rb_str_new_with_class(rb_html_safe_string_template_object, ptr, len);
102
- }
103
-
104
- static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
105
- {
106
- VALUE result;
107
- int secure = g_html_secure;
108
- gh_buf buf = GH_BUF_INIT;
109
-
110
- Check_Type(str, T_STRING);
111
- check_utf8_encoding(str);
112
59
 
113
- if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
114
- result = new_html_safe_string(buf.ptr, buf.size);
115
- gh_buf_free(&buf);
116
- } else {
117
- result = new_html_safe_string(RSTRING_PTR(str), RSTRING_LEN(str));
118
- }
119
-
120
- rb_ivar_set(result, ID_at_html_safe, Qtrue);
121
- rb_enc_associate(result, rb_enc_get(str));
122
-
123
- return result;
124
- }
125
-
126
- static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
60
+ static VALUE rb_eu_escape_html_once(VALUE self, VALUE str)
127
61
  {
128
- VALUE str, rb_secure;
129
62
  gh_buf buf = GH_BUF_INIT;
130
- int secure = g_html_secure;
131
-
132
- if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
133
- if (rb_secure == Qfalse) {
134
- secure = 0;
135
- }
136
- }
137
-
138
63
  Check_Type(str, T_STRING);
139
64
  check_utf8_encoding(str);
140
65
 
141
- if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
142
- VALUE result = eu_new_str(buf.ptr, buf.size);
66
+ if (houdini_escape_html_once(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
67
+ VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
143
68
  gh_buf_free(&buf);
144
69
  return result;
145
70
  }
@@ -147,11 +72,6 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
147
72
  return str;
148
73
  }
149
74
 
150
- static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
151
- {
152
- return rb_eu__generic(str, &houdini_unescape_html);
153
- }
154
-
155
75
 
156
76
  /**
157
77
  * XML methods
@@ -175,21 +95,6 @@ static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
175
95
  return rb_eu__generic(str, &houdini_unescape_js);
176
96
  }
177
97
 
178
-
179
- /**
180
- * URL methods
181
- */
182
- static VALUE rb_eu_escape_url(VALUE self, VALUE str)
183
- {
184
- return rb_eu__generic(str, &houdini_escape_url);
185
- }
186
-
187
- static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
188
- {
189
- return rb_eu__generic(str, &houdini_unescape_url);
190
- }
191
-
192
-
193
98
  /**
194
99
  * URI methods
195
100
  */
@@ -216,7 +121,6 @@ static VALUE rb_eu_unescape_uri_component(VALUE self, VALUE str)
216
121
  return rb_eu__generic(str, &houdini_unescape_uri_component);
217
122
  }
218
123
 
219
-
220
124
  /**
221
125
  * Ruby Extension initializer
222
126
  */
@@ -225,26 +129,14 @@ void Init_escape_utils()
225
129
  {
226
130
  rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
227
131
 
228
- ID_new = rb_intern("new");
229
- ID_at_html_safe = rb_intern("@html_safe");
230
- rb_global_variable(&rb_html_safe_string_class);
231
- rb_global_variable(&rb_html_safe_string_template_object);
232
-
233
- rb_mEscapeUtils = rb_define_module("EscapeUtils");
234
- rb_define_method(rb_mEscapeUtils, "escape_html_as_html_safe", rb_eu_escape_html_as_html_safe, 1);
235
- rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
236
- rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
132
+ VALUE rb_mEscapeUtils = rb_define_module("EscapeUtils");
133
+ rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, 1);
237
134
  rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
238
135
  rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
239
136
  rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
240
- rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
241
- rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1);
242
137
  rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1);
243
138
  rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
244
139
  rb_define_method(rb_mEscapeUtils, "escape_uri_component", rb_eu_escape_uri_component, 1);
245
140
  rb_define_method(rb_mEscapeUtils, "unescape_uri_component", rb_eu_unescape_uri_component, 1);
246
-
247
- rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
248
- rb_define_singleton_method(rb_mEscapeUtils, "html_safe_string_class=", rb_eu_set_html_safe_string_class, 1);
249
141
  }
250
142
 
@@ -22,20 +22,18 @@ extern "C" {
22
22
  # define _isdigit(c) ((c) >= '0' && (c) <= '9')
23
23
  #endif
24
24
 
25
+ #define _isasciialpha(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
26
+
25
27
  #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
26
28
  #define HOUDINI_UNESCAPED_SIZE(x) (x)
27
29
 
28
- extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
29
- extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
30
- extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
30
+ extern int houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size);
31
31
  extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
32
32
  extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
33
33
  extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
34
- extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
35
34
  extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
36
35
  extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
37
36
  extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
38
- extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
39
37
  extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
40
38
  extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
41
39