escape_utils 1.2.2 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +47 -88
  4. data/benchmark/html_escape_once.rb +25 -0
  5. data/benchmark/javascript_escape.rb +1 -1
  6. data/benchmark/javascript_unescape.rb +1 -1
  7. data/benchmark/url_decode.rb +28 -0
  8. data/benchmark/url_encode.rb +37 -0
  9. data/benchmark/xml_escape.rb +7 -11
  10. data/ext/escape_utils/escape_utils.c +7 -115
  11. data/ext/escape_utils/houdini.h +3 -5
  12. data/ext/escape_utils/houdini_html_e.c +52 -24
  13. data/ext/escape_utils/houdini_uri_e.c +6 -17
  14. data/ext/escape_utils/houdini_uri_u.c +5 -15
  15. data/ext/escape_utils/houdini_xml_e.c +15 -1
  16. data/lib/escape_utils/html/cgi.rb +10 -8
  17. data/lib/escape_utils/html/erb.rb +1 -10
  18. data/lib/escape_utils/html/haml.rb +1 -7
  19. data/lib/escape_utils/html/rack.rb +3 -3
  20. data/lib/escape_utils/html_safety.rb +13 -0
  21. data/lib/escape_utils/url/cgi.rb +0 -8
  22. data/lib/escape_utils/url/erb.rb +1 -1
  23. data/lib/escape_utils/url/uri.rb +11 -7
  24. data/lib/escape_utils/version.rb +1 -1
  25. data/lib/escape_utils.rb +61 -9
  26. data/test/helper.rb +16 -3
  27. data/test/html/escape_test.rb +41 -39
  28. data/test/html/unescape_test.rb +0 -16
  29. data/test/html_safety_test.rb +1 -27
  30. data/test/javascript/escape_test.rb +0 -5
  31. data/test/query/escape_test.rb +0 -16
  32. data/test/query/unescape_test.rb +2 -18
  33. data/test/uri/unescape_test.rb +2 -2
  34. data/test/uri_component/unescape_test.rb +2 -2
  35. data/test/url/escape_test.rb +0 -16
  36. data/test/url/unescape_test.rb +2 -18
  37. metadata +6 -8
  38. data/benchmark/html_escape.rb +0 -68
  39. data/benchmark/html_unescape.rb +0 -35
  40. data/benchmark/url_escape.rb +0 -56
  41. data/benchmark/url_unescape.rb +0 -50
  42. data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -1,10 +1,5 @@
1
1
  require File.expand_path("../../helper", __FILE__)
2
2
 
3
- require 'active_support'
4
- require 'active_support/json'
5
- require 'action_view'
6
- require 'action_view/helpers'
7
-
8
3
  class JavascriptEscapeTest < Minitest::Test
9
4
  ActiveSupport.escape_html_entities_in_json = true
10
5
 
@@ -25,22 +25,6 @@ class QueryEscapeTest < Minitest::Test
25
25
  assert_equal '%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8', EscapeUtils.escape_url(matz_name_sep)
26
26
  end
27
27
 
28
- def test_input_must_be_utf8_or_ascii
29
- str = "a space"
30
-
31
- str.force_encoding 'ISO-8859-1'
32
- assert_raises Encoding::CompatibilityError do
33
- EscapeUtils.escape_url(str)
34
- end
35
-
36
- str.force_encoding 'UTF-8'
37
- begin
38
- EscapeUtils.escape_url(str)
39
- rescue Encoding::CompatibilityError => e
40
- assert_nil e, "#{e.class.name} raised, expected not to"
41
- end
42
- end
43
-
44
28
  def test_return_value_is_tagged_as_utf8
45
29
  str = "a+space"
46
30
  assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
@@ -20,29 +20,13 @@ class QueryUnescapeTest < Minitest::Test
20
20
 
21
21
  def test_url_containing_multibyte_characters
22
22
  matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
23
- matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
23
+ matz_name.force_encoding('UTF-8')
24
24
  assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
25
25
  matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
26
- matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
26
+ matz_name_sep.force_encoding('UTF-8')
27
27
  assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8')
28
28
  end
29
29
 
30
- def test_input_must_be_valid_utf8_or_ascii
31
- escaped = EscapeUtils.unescape_url("a+space")
32
-
33
- escaped.force_encoding 'ISO-8859-1'
34
- assert_raises Encoding::CompatibilityError do
35
- EscapeUtils.unescape_url(escaped)
36
- end
37
-
38
- escaped.force_encoding 'UTF-8'
39
- begin
40
- EscapeUtils.unescape_url(escaped)
41
- rescue Encoding::CompatibilityError => e
42
- assert_nil e, "#{e.class.name} raised, expected not to"
43
- end
44
- end
45
-
46
30
  def test_return_value_is_tagged_as_utf8
47
31
  escaped = EscapeUtils.escape_url("a space")
48
32
  assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding
@@ -23,10 +23,10 @@ class UriUnescapeTest < Minitest::Test
23
23
 
24
24
  def test_uri_containing_multibyte_charactes
25
25
  matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
26
- matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
26
+ matz_name.force_encoding('UTF-8')
27
27
  assert_equal matz_name, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
28
28
  matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
29
- matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
29
+ matz_name_sep.force_encoding('UTF-8')
30
30
  assert_equal matz_name_sep, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
31
31
  end
32
32
 
@@ -39,10 +39,10 @@ class UriComponentUnescapeTest < Minitest::Test
39
39
 
40
40
  def test_multibyte_characters
41
41
  matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
42
- matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
42
+ matz_name.force_encoding('UTF-8')
43
43
  assert_equal matz_name, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
44
44
  matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
45
- matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
45
+ matz_name_sep.force_encoding('UTF-8')
46
46
  assert_equal matz_name_sep, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
47
47
  end
48
48
 
@@ -41,22 +41,6 @@ class UrlEscapeTest < Minitest::Test
41
41
  assert_equal "a%2Fslash", EscapeUtils.escape_url("a/slash")
42
42
  end
43
43
 
44
- def test_input_must_be_utf8_or_ascii
45
- str = "fo<o>bar"
46
-
47
- str.force_encoding 'ISO-8859-1'
48
- assert_raises Encoding::CompatibilityError do
49
- EscapeUtils.escape_url(str)
50
- end
51
-
52
- str.force_encoding 'UTF-8'
53
- begin
54
- EscapeUtils.escape_url(str)
55
- rescue Encoding::CompatibilityError => e
56
- assert_nil e, "#{e.class.name} raised, expected not to"
57
- end
58
- end
59
-
60
44
  def test_return_value_is_tagged_as_utf8
61
45
  str = "fo<o>bar"
62
46
  assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
@@ -28,10 +28,10 @@ class UrlUnescapeTest < Minitest::Test
28
28
 
29
29
  def test_multibyte_characters
30
30
  matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
31
- matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
31
+ matz_name.force_encoding('UTF-8')
32
32
  assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
33
33
  matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
34
- matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
34
+ matz_name_sep.force_encoding('UTF-8')
35
35
  assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
36
36
  end
37
37
 
@@ -46,22 +46,6 @@ class UrlUnescapeTest < Minitest::Test
46
46
  end
47
47
  end
48
48
 
49
- def test_input_must_be_valid_utf8_or_ascii
50
- escaped = EscapeUtils.escape_url("fo<o>bar")
51
-
52
- escaped.force_encoding 'ISO-8859-1'
53
- assert_raises Encoding::CompatibilityError do
54
- EscapeUtils.unescape_url(escaped)
55
- end
56
-
57
- escaped.force_encoding 'UTF-8'
58
- begin
59
- EscapeUtils.unescape_url(escaped)
60
- rescue Encoding::CompatibilityError => e
61
- assert_nil e, "#{e.class.name} raised, expected not to"
62
- end
63
- end
64
-
65
49
  def test_return_value_is_tagged_as_utf8
66
50
  escaped = EscapeUtils.escape_url("fo<o>bar")
67
51
  assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: escape_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.2
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Lopez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-07 00:00:00.000000000 Z
11
+ date: 2022-06-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Quickly perform HTML, URL, URI and Javascript escaping/unescaping
14
14
  email: seniorlopez@gmail.com
@@ -24,12 +24,11 @@ files:
24
24
  - LICENSE
25
25
  - README.md
26
26
  - Rakefile
27
- - benchmark/html_escape.rb
28
- - benchmark/html_unescape.rb
27
+ - benchmark/html_escape_once.rb
29
28
  - benchmark/javascript_escape.rb
30
29
  - benchmark/javascript_unescape.rb
31
- - benchmark/url_escape.rb
32
- - benchmark/url_unescape.rb
30
+ - benchmark/url_decode.rb
31
+ - benchmark/url_encode.rb
33
32
  - benchmark/xml_escape.rb
34
33
  - bin/console
35
34
  - escape_utils.gemspec
@@ -40,7 +39,6 @@ files:
40
39
  - ext/escape_utils/houdini.h
41
40
  - ext/escape_utils/houdini_href_e.c
42
41
  - ext/escape_utils/houdini_html_e.c
43
- - ext/escape_utils/houdini_html_u.c
44
42
  - ext/escape_utils/houdini_js_e.c
45
43
  - ext/escape_utils/houdini_js_u.c
46
44
  - ext/escape_utils/houdini_uri_e.c
@@ -97,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
95
  - !ruby/object:Gem::Version
98
96
  version: '0'
99
97
  requirements: []
100
- rubygems_version: 3.3.7
98
+ rubygems_version: 3.1.2
101
99
  signing_key:
102
100
  specification_version: 4
103
101
  summary: Faster string escaping routines for your web apps
@@ -1,68 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler/setup'
5
- require 'benchmark/ips'
6
-
7
- require 'rack'
8
- require 'erb'
9
- require 'cgi'
10
- require 'haml'
11
- require 'fast_xs_extra'
12
- require 'escape_utils'
13
-
14
- module HamlBench
15
- extend Haml::Helpers
16
- end
17
-
18
- url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
19
- html = `curl -s #{url}`
20
- html = html.force_encoding('utf-8') if html.respond_to?(:force_encoding)
21
- puts "Escaping #{html.bytesize} bytes of html from #{url}"
22
-
23
- Benchmark.ips do |x|
24
- x.report "Rack::Utils.escape_html" do |times|
25
- times.times do
26
- Rack::Utils.escape_html(html)
27
- end
28
- end
29
-
30
- x.report "Haml::Helpers.html_escape" do |times|
31
- times.times do
32
- HamlBench.html_escape(html)
33
- end
34
- end
35
-
36
- x.report "ERB::Util.html_escape" do |times|
37
- times.times do
38
- ERB::Util.html_escape(html)
39
- end
40
- end
41
-
42
- x.report "CGI.escapeHTML" do |times|
43
- times.times do
44
- CGI.escapeHTML(html)
45
- end
46
- end
47
-
48
- x.report "String#gsub" do |times|
49
- html_escape = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;', "'" => '&#39;' }
50
- times.times do
51
- html.gsub(/[&"'><]/, html_escape)
52
- end
53
- end
54
-
55
- x.report "fast_xs_extra#fast_xs_html" do |times|
56
- times.times do
57
- html.fast_xs_html
58
- end
59
- end
60
-
61
- x.report "EscapeUtils.escape_html" do |times|
62
- times.times do
63
- EscapeUtils.escape_html(html)
64
- end
65
- end
66
-
67
- x.compare!
68
- end
@@ -1,35 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler/setup'
5
- require 'benchmark/ips'
6
-
7
- require 'cgi'
8
- require 'haml'
9
- require 'escape_utils'
10
-
11
- module HamlBench
12
- extend Haml::Helpers
13
- end
14
-
15
- url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
16
- html = `curl -s #{url}`
17
- html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
18
- escaped_html = EscapeUtils.escape_html(html)
19
- puts "Unescaping #{escaped_html.bytesize} bytes of escaped html, from #{url}"
20
-
21
- Benchmark.ips do |x|
22
- x.report "CGI.unescapeHTML" do |times|
23
- times.times do
24
- CGI.unescapeHTML(escaped_html)
25
- end
26
- end
27
-
28
- x.report "EscapeUtils.unescape_html" do |times|
29
- times.times do
30
- EscapeUtils.unescape_html(escaped_html)
31
- end
32
- end
33
-
34
- x.compare!
35
- end
@@ -1,56 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler/setup'
5
- require 'benchmark/ips'
6
-
7
- require 'rack'
8
- require 'erb'
9
- require 'cgi'
10
- require 'url_escape'
11
- require 'fast_xs_extra'
12
- require 'escape_utils'
13
-
14
- url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
15
- url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
16
- puts "Escaping a #{url.bytesize} byte URL times"
17
-
18
- Benchmark.ips do |x|
19
- x.report "ERB::Util.url_encode" do |times|
20
- times.times do
21
- ERB::Util.url_encode(url)
22
- end
23
- end
24
-
25
- x.report "Rack::Utils.escape" do |times|
26
- times.times do
27
- Rack::Utils.escape(url)
28
- end
29
- end
30
-
31
- x.report "CGI.escape" do |times|
32
- times.times do
33
- CGI.escape(url)
34
- end
35
- end
36
-
37
- x.report "URLEscape#escape" do |times|
38
- times.times do
39
- URLEscape.escape(url)
40
- end
41
- end
42
-
43
- x.report "fast_xs_extra#fast_xs_url" do |times|
44
- times.times do
45
- url.fast_xs_url
46
- end
47
- end
48
-
49
- x.report "EscapeUtils.escape_url" do |times|
50
- times.times do
51
- EscapeUtils.escape_url(url)
52
- end
53
- end
54
-
55
- x.compare!
56
- end
@@ -1,50 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler/setup'
5
- require 'benchmark/ips'
6
-
7
- require 'rack'
8
- require 'cgi'
9
- require 'url_escape'
10
- require 'fast_xs_extra'
11
- require 'escape_utils'
12
-
13
- url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
14
- url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
15
- escaped_url = EscapeUtils.escape_url(url)
16
- puts "Escaping a #{url.bytesize} byte URL"
17
-
18
- Benchmark.ips do |x|
19
- x.report "Rack::Utils.unescape" do |times|
20
- times.times do
21
- Rack::Utils.unescape(escaped_url)
22
- end
23
- end
24
-
25
- x.report "CGI.unescape" do |times|
26
- times.times do
27
- CGI.unescape(escaped_url)
28
- end
29
- end
30
-
31
- x.report "URLEscape#unescape" do |times|
32
- times.times do
33
- URLEscape.unescape(escaped_url)
34
- end
35
- end
36
-
37
- x.report "fast_xs_extra#fast_uxs_cgi" do |times|
38
- times.times do
39
- url.fast_uxs_cgi
40
- end
41
- end
42
-
43
- x.report "EscapeUtils.unescape_url" do |times|
44
- times.times do
45
- EscapeUtils.unescape_url(escaped_url)
46
- end
47
- end
48
-
49
- x.compare!
50
- end
@@ -1,122 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
- #include "html_unescape.h"
7
-
8
- static inline void
9
- gh_buf_put_utf8(gh_buf *ob, int c)
10
- {
11
- unsigned char unichar[4];
12
-
13
- if (c < 0x80) {
14
- gh_buf_putc(ob, c);
15
- }
16
- else if (c < 0x800) {
17
- unichar[0] = 192 + (c / 64);
18
- unichar[1] = 128 + (c % 64);
19
- gh_buf_put(ob, unichar, 2);
20
- }
21
- else if (c - 0xd800u < 0x800) {
22
- gh_buf_putc(ob, '?');
23
- }
24
- else if (c < 0x10000) {
25
- unichar[0] = 224 + (c / 4096);
26
- unichar[1] = 128 + (c / 64) % 64;
27
- unichar[2] = 128 + (c % 64);
28
- gh_buf_put(ob, unichar, 3);
29
- }
30
- else if (c < 0x110000) {
31
- unichar[0] = 240 + (c / 262144);
32
- unichar[1] = 128 + (c / 4096) % 64;
33
- unichar[2] = 128 + (c / 64) % 64;
34
- unichar[3] = 128 + (c % 64);
35
- gh_buf_put(ob, unichar, 4);
36
- }
37
- else {
38
- gh_buf_putc(ob, '?');
39
- }
40
- }
41
-
42
- static size_t
43
- unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
44
- {
45
- size_t i = 0;
46
-
47
- if (size > 3 && src[0] == '#') {
48
- int codepoint = 0;
49
-
50
- if (_isdigit(src[1])) {
51
- for (i = 1; i < size && _isdigit(src[i]); ++i)
52
- codepoint = (codepoint * 10) + (src[i] - '0');
53
- }
54
-
55
- else if (src[1] == 'x' || src[1] == 'X') {
56
- for (i = 2; i < size && _isxdigit(src[i]); ++i)
57
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
58
- }
59
-
60
- if (i < size && src[i] == ';' && codepoint) {
61
- gh_buf_put_utf8(ob, codepoint);
62
- return i + 1;
63
- }
64
- }
65
-
66
- else {
67
- if (size > MAX_WORD_LENGTH)
68
- size = MAX_WORD_LENGTH;
69
-
70
- for (i = MIN_WORD_LENGTH; i < size; ++i) {
71
- if (src[i] == ' ')
72
- break;
73
-
74
- if (src[i] == ';') {
75
- const struct html_ent *entity = find_entity((char *)src, i);
76
-
77
- if (entity != NULL) {
78
- gh_buf_put(ob, entity->utf8, entity->utf8_len);
79
- return i + 1;
80
- }
81
-
82
- break;
83
- }
84
- }
85
- }
86
-
87
- gh_buf_putc(ob, '&');
88
- return 0;
89
- }
90
-
91
- int
92
- houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
93
- {
94
- size_t i = 0, org;
95
-
96
- while (i < size) {
97
- org = i;
98
- while (i < size && src[i] != '&')
99
- i++;
100
-
101
- if (likely(i > org)) {
102
- if (unlikely(org == 0)) {
103
- if (i >= size)
104
- return 0;
105
-
106
- gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
107
- }
108
-
109
- gh_buf_put(ob, src + org, i - org);
110
- }
111
-
112
- /* escaping */
113
- if (i >= size)
114
- break;
115
-
116
- i++;
117
- i += unescape_ent(ob, src + i, size - i);
118
- }
119
-
120
- return 1;
121
- }
122
-