escape_utils 1.2.2 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +47 -88
- data/benchmark/html_escape_once.rb +25 -0
- data/benchmark/javascript_escape.rb +1 -1
- data/benchmark/javascript_unescape.rb +1 -1
- data/benchmark/url_decode.rb +28 -0
- data/benchmark/url_encode.rb +37 -0
- data/benchmark/xml_escape.rb +7 -11
- data/ext/escape_utils/escape_utils.c +7 -115
- data/ext/escape_utils/houdini.h +3 -5
- data/ext/escape_utils/houdini_html_e.c +52 -24
- data/ext/escape_utils/houdini_uri_e.c +6 -17
- data/ext/escape_utils/houdini_uri_u.c +5 -15
- data/ext/escape_utils/houdini_xml_e.c +15 -1
- data/lib/escape_utils/html/cgi.rb +10 -8
- data/lib/escape_utils/html/erb.rb +1 -10
- data/lib/escape_utils/html/haml.rb +1 -7
- data/lib/escape_utils/html/rack.rb +3 -3
- data/lib/escape_utils/html_safety.rb +13 -0
- data/lib/escape_utils/url/cgi.rb +0 -8
- data/lib/escape_utils/url/erb.rb +1 -1
- data/lib/escape_utils/url/uri.rb +11 -7
- data/lib/escape_utils/version.rb +1 -1
- data/lib/escape_utils.rb +61 -9
- data/test/helper.rb +16 -3
- data/test/html/escape_test.rb +41 -39
- data/test/html/unescape_test.rb +0 -16
- data/test/html_safety_test.rb +1 -27
- data/test/javascript/escape_test.rb +0 -5
- data/test/query/escape_test.rb +0 -16
- data/test/query/unescape_test.rb +2 -18
- data/test/uri/unescape_test.rb +2 -2
- data/test/uri_component/unescape_test.rb +2 -2
- data/test/url/escape_test.rb +0 -16
- data/test/url/unescape_test.rb +2 -18
- metadata +6 -8
- data/benchmark/html_escape.rb +0 -68
- data/benchmark/html_unescape.rb +0 -35
- data/benchmark/url_escape.rb +0 -56
- data/benchmark/url_unescape.rb +0 -50
- data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -1,10 +1,5 @@
|
|
1
1
|
require File.expand_path("../../helper", __FILE__)
|
2
2
|
|
3
|
-
require 'active_support'
|
4
|
-
require 'active_support/json'
|
5
|
-
require 'action_view'
|
6
|
-
require 'action_view/helpers'
|
7
|
-
|
8
3
|
class JavascriptEscapeTest < Minitest::Test
|
9
4
|
ActiveSupport.escape_html_entities_in_json = true
|
10
5
|
|
data/test/query/escape_test.rb
CHANGED
@@ -25,22 +25,6 @@ class QueryEscapeTest < Minitest::Test
|
|
25
25
|
assert_equal '%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8', EscapeUtils.escape_url(matz_name_sep)
|
26
26
|
end
|
27
27
|
|
28
|
-
def test_input_must_be_utf8_or_ascii
|
29
|
-
str = "a space"
|
30
|
-
|
31
|
-
str.force_encoding 'ISO-8859-1'
|
32
|
-
assert_raises Encoding::CompatibilityError do
|
33
|
-
EscapeUtils.escape_url(str)
|
34
|
-
end
|
35
|
-
|
36
|
-
str.force_encoding 'UTF-8'
|
37
|
-
begin
|
38
|
-
EscapeUtils.escape_url(str)
|
39
|
-
rescue Encoding::CompatibilityError => e
|
40
|
-
assert_nil e, "#{e.class.name} raised, expected not to"
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
28
|
def test_return_value_is_tagged_as_utf8
|
45
29
|
str = "a+space"
|
46
30
|
assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
|
data/test/query/unescape_test.rb
CHANGED
@@ -20,29 +20,13 @@ class QueryUnescapeTest < Minitest::Test
|
|
20
20
|
|
21
21
|
def test_url_containing_multibyte_characters
|
22
22
|
matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
|
23
|
-
matz_name.force_encoding('UTF-8')
|
23
|
+
matz_name.force_encoding('UTF-8')
|
24
24
|
assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
|
25
25
|
matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
|
26
|
-
matz_name_sep.force_encoding('UTF-8')
|
26
|
+
matz_name_sep.force_encoding('UTF-8')
|
27
27
|
assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8')
|
28
28
|
end
|
29
29
|
|
30
|
-
def test_input_must_be_valid_utf8_or_ascii
|
31
|
-
escaped = EscapeUtils.unescape_url("a+space")
|
32
|
-
|
33
|
-
escaped.force_encoding 'ISO-8859-1'
|
34
|
-
assert_raises Encoding::CompatibilityError do
|
35
|
-
EscapeUtils.unescape_url(escaped)
|
36
|
-
end
|
37
|
-
|
38
|
-
escaped.force_encoding 'UTF-8'
|
39
|
-
begin
|
40
|
-
EscapeUtils.unescape_url(escaped)
|
41
|
-
rescue Encoding::CompatibilityError => e
|
42
|
-
assert_nil e, "#{e.class.name} raised, expected not to"
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
30
|
def test_return_value_is_tagged_as_utf8
|
47
31
|
escaped = EscapeUtils.escape_url("a space")
|
48
32
|
assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding
|
data/test/uri/unescape_test.rb
CHANGED
@@ -23,10 +23,10 @@ class UriUnescapeTest < Minitest::Test
|
|
23
23
|
|
24
24
|
def test_uri_containing_multibyte_charactes
|
25
25
|
matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
|
26
|
-
matz_name.force_encoding('UTF-8')
|
26
|
+
matz_name.force_encoding('UTF-8')
|
27
27
|
assert_equal matz_name, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
|
28
28
|
matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
|
29
|
-
matz_name_sep.force_encoding('UTF-8')
|
29
|
+
matz_name_sep.force_encoding('UTF-8')
|
30
30
|
assert_equal matz_name_sep, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
|
31
31
|
end
|
32
32
|
|
@@ -39,10 +39,10 @@ class UriComponentUnescapeTest < Minitest::Test
|
|
39
39
|
|
40
40
|
def test_multibyte_characters
|
41
41
|
matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
|
42
|
-
matz_name.force_encoding('UTF-8')
|
42
|
+
matz_name.force_encoding('UTF-8')
|
43
43
|
assert_equal matz_name, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
|
44
44
|
matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
|
45
|
-
matz_name_sep.force_encoding('UTF-8')
|
45
|
+
matz_name_sep.force_encoding('UTF-8')
|
46
46
|
assert_equal matz_name_sep, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
|
47
47
|
end
|
48
48
|
|
data/test/url/escape_test.rb
CHANGED
@@ -41,22 +41,6 @@ class UrlEscapeTest < Minitest::Test
|
|
41
41
|
assert_equal "a%2Fslash", EscapeUtils.escape_url("a/slash")
|
42
42
|
end
|
43
43
|
|
44
|
-
def test_input_must_be_utf8_or_ascii
|
45
|
-
str = "fo<o>bar"
|
46
|
-
|
47
|
-
str.force_encoding 'ISO-8859-1'
|
48
|
-
assert_raises Encoding::CompatibilityError do
|
49
|
-
EscapeUtils.escape_url(str)
|
50
|
-
end
|
51
|
-
|
52
|
-
str.force_encoding 'UTF-8'
|
53
|
-
begin
|
54
|
-
EscapeUtils.escape_url(str)
|
55
|
-
rescue Encoding::CompatibilityError => e
|
56
|
-
assert_nil e, "#{e.class.name} raised, expected not to"
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
44
|
def test_return_value_is_tagged_as_utf8
|
61
45
|
str = "fo<o>bar"
|
62
46
|
assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
|
data/test/url/unescape_test.rb
CHANGED
@@ -28,10 +28,10 @@ class UrlUnescapeTest < Minitest::Test
|
|
28
28
|
|
29
29
|
def test_multibyte_characters
|
30
30
|
matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
|
31
|
-
matz_name.force_encoding('UTF-8')
|
31
|
+
matz_name.force_encoding('UTF-8')
|
32
32
|
assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
|
33
33
|
matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
|
34
|
-
matz_name_sep.force_encoding('UTF-8')
|
34
|
+
matz_name_sep.force_encoding('UTF-8')
|
35
35
|
assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
|
36
36
|
end
|
37
37
|
|
@@ -46,22 +46,6 @@ class UrlUnescapeTest < Minitest::Test
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
def test_input_must_be_valid_utf8_or_ascii
|
50
|
-
escaped = EscapeUtils.escape_url("fo<o>bar")
|
51
|
-
|
52
|
-
escaped.force_encoding 'ISO-8859-1'
|
53
|
-
assert_raises Encoding::CompatibilityError do
|
54
|
-
EscapeUtils.unescape_url(escaped)
|
55
|
-
end
|
56
|
-
|
57
|
-
escaped.force_encoding 'UTF-8'
|
58
|
-
begin
|
59
|
-
EscapeUtils.unescape_url(escaped)
|
60
|
-
rescue Encoding::CompatibilityError => e
|
61
|
-
assert_nil e, "#{e.class.name} raised, expected not to"
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
49
|
def test_return_value_is_tagged_as_utf8
|
66
50
|
escaped = EscapeUtils.escape_url("fo<o>bar")
|
67
51
|
assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: escape_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Lopez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Quickly perform HTML, URL, URI and Javascript escaping/unescaping
|
14
14
|
email: seniorlopez@gmail.com
|
@@ -24,12 +24,11 @@ files:
|
|
24
24
|
- LICENSE
|
25
25
|
- README.md
|
26
26
|
- Rakefile
|
27
|
-
- benchmark/
|
28
|
-
- benchmark/html_unescape.rb
|
27
|
+
- benchmark/html_escape_once.rb
|
29
28
|
- benchmark/javascript_escape.rb
|
30
29
|
- benchmark/javascript_unescape.rb
|
31
|
-
- benchmark/
|
32
|
-
- benchmark/
|
30
|
+
- benchmark/url_decode.rb
|
31
|
+
- benchmark/url_encode.rb
|
33
32
|
- benchmark/xml_escape.rb
|
34
33
|
- bin/console
|
35
34
|
- escape_utils.gemspec
|
@@ -40,7 +39,6 @@ files:
|
|
40
39
|
- ext/escape_utils/houdini.h
|
41
40
|
- ext/escape_utils/houdini_href_e.c
|
42
41
|
- ext/escape_utils/houdini_html_e.c
|
43
|
-
- ext/escape_utils/houdini_html_u.c
|
44
42
|
- ext/escape_utils/houdini_js_e.c
|
45
43
|
- ext/escape_utils/houdini_js_u.c
|
46
44
|
- ext/escape_utils/houdini_uri_e.c
|
@@ -97,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
95
|
- !ruby/object:Gem::Version
|
98
96
|
version: '0'
|
99
97
|
requirements: []
|
100
|
-
rubygems_version: 3.
|
98
|
+
rubygems_version: 3.1.2
|
101
99
|
signing_key:
|
102
100
|
specification_version: 4
|
103
101
|
summary: Faster string escaping routines for your web apps
|
data/benchmark/html_escape.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler/setup'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
|
7
|
-
require 'rack'
|
8
|
-
require 'erb'
|
9
|
-
require 'cgi'
|
10
|
-
require 'haml'
|
11
|
-
require 'fast_xs_extra'
|
12
|
-
require 'escape_utils'
|
13
|
-
|
14
|
-
module HamlBench
|
15
|
-
extend Haml::Helpers
|
16
|
-
end
|
17
|
-
|
18
|
-
url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
|
19
|
-
html = `curl -s #{url}`
|
20
|
-
html = html.force_encoding('utf-8') if html.respond_to?(:force_encoding)
|
21
|
-
puts "Escaping #{html.bytesize} bytes of html from #{url}"
|
22
|
-
|
23
|
-
Benchmark.ips do |x|
|
24
|
-
x.report "Rack::Utils.escape_html" do |times|
|
25
|
-
times.times do
|
26
|
-
Rack::Utils.escape_html(html)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
x.report "Haml::Helpers.html_escape" do |times|
|
31
|
-
times.times do
|
32
|
-
HamlBench.html_escape(html)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
x.report "ERB::Util.html_escape" do |times|
|
37
|
-
times.times do
|
38
|
-
ERB::Util.html_escape(html)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
x.report "CGI.escapeHTML" do |times|
|
43
|
-
times.times do
|
44
|
-
CGI.escapeHTML(html)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
x.report "String#gsub" do |times|
|
49
|
-
html_escape = { '&' => '&', '>' => '>', '<' => '<', '"' => '"', "'" => ''' }
|
50
|
-
times.times do
|
51
|
-
html.gsub(/[&"'><]/, html_escape)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
x.report "fast_xs_extra#fast_xs_html" do |times|
|
56
|
-
times.times do
|
57
|
-
html.fast_xs_html
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
x.report "EscapeUtils.escape_html" do |times|
|
62
|
-
times.times do
|
63
|
-
EscapeUtils.escape_html(html)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
x.compare!
|
68
|
-
end
|
data/benchmark/html_unescape.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler/setup'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
|
7
|
-
require 'cgi'
|
8
|
-
require 'haml'
|
9
|
-
require 'escape_utils'
|
10
|
-
|
11
|
-
module HamlBench
|
12
|
-
extend Haml::Helpers
|
13
|
-
end
|
14
|
-
|
15
|
-
url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
|
16
|
-
html = `curl -s #{url}`
|
17
|
-
html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
|
18
|
-
escaped_html = EscapeUtils.escape_html(html)
|
19
|
-
puts "Unescaping #{escaped_html.bytesize} bytes of escaped html, from #{url}"
|
20
|
-
|
21
|
-
Benchmark.ips do |x|
|
22
|
-
x.report "CGI.unescapeHTML" do |times|
|
23
|
-
times.times do
|
24
|
-
CGI.unescapeHTML(escaped_html)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
x.report "EscapeUtils.unescape_html" do |times|
|
29
|
-
times.times do
|
30
|
-
EscapeUtils.unescape_html(escaped_html)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
x.compare!
|
35
|
-
end
|
data/benchmark/url_escape.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler/setup'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
|
7
|
-
require 'rack'
|
8
|
-
require 'erb'
|
9
|
-
require 'cgi'
|
10
|
-
require 'url_escape'
|
11
|
-
require 'fast_xs_extra'
|
12
|
-
require 'escape_utils'
|
13
|
-
|
14
|
-
url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
|
15
|
-
url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
|
16
|
-
puts "Escaping a #{url.bytesize} byte URL times"
|
17
|
-
|
18
|
-
Benchmark.ips do |x|
|
19
|
-
x.report "ERB::Util.url_encode" do |times|
|
20
|
-
times.times do
|
21
|
-
ERB::Util.url_encode(url)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
x.report "Rack::Utils.escape" do |times|
|
26
|
-
times.times do
|
27
|
-
Rack::Utils.escape(url)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
x.report "CGI.escape" do |times|
|
32
|
-
times.times do
|
33
|
-
CGI.escape(url)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
x.report "URLEscape#escape" do |times|
|
38
|
-
times.times do
|
39
|
-
URLEscape.escape(url)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
x.report "fast_xs_extra#fast_xs_url" do |times|
|
44
|
-
times.times do
|
45
|
-
url.fast_xs_url
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
x.report "EscapeUtils.escape_url" do |times|
|
50
|
-
times.times do
|
51
|
-
EscapeUtils.escape_url(url)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
x.compare!
|
56
|
-
end
|
data/benchmark/url_unescape.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler/setup'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
|
7
|
-
require 'rack'
|
8
|
-
require 'cgi'
|
9
|
-
require 'url_escape'
|
10
|
-
require 'fast_xs_extra'
|
11
|
-
require 'escape_utils'
|
12
|
-
|
13
|
-
url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
|
14
|
-
url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
|
15
|
-
escaped_url = EscapeUtils.escape_url(url)
|
16
|
-
puts "Escaping a #{url.bytesize} byte URL"
|
17
|
-
|
18
|
-
Benchmark.ips do |x|
|
19
|
-
x.report "Rack::Utils.unescape" do |times|
|
20
|
-
times.times do
|
21
|
-
Rack::Utils.unescape(escaped_url)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
x.report "CGI.unescape" do |times|
|
26
|
-
times.times do
|
27
|
-
CGI.unescape(escaped_url)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
x.report "URLEscape#unescape" do |times|
|
32
|
-
times.times do
|
33
|
-
URLEscape.unescape(escaped_url)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
x.report "fast_xs_extra#fast_uxs_cgi" do |times|
|
38
|
-
times.times do
|
39
|
-
url.fast_uxs_cgi
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
x.report "EscapeUtils.unescape_url" do |times|
|
44
|
-
times.times do
|
45
|
-
EscapeUtils.unescape_url(escaped_url)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
x.compare!
|
50
|
-
end
|
@@ -1,122 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "houdini.h"
|
6
|
-
#include "html_unescape.h"
|
7
|
-
|
8
|
-
static inline void
|
9
|
-
gh_buf_put_utf8(gh_buf *ob, int c)
|
10
|
-
{
|
11
|
-
unsigned char unichar[4];
|
12
|
-
|
13
|
-
if (c < 0x80) {
|
14
|
-
gh_buf_putc(ob, c);
|
15
|
-
}
|
16
|
-
else if (c < 0x800) {
|
17
|
-
unichar[0] = 192 + (c / 64);
|
18
|
-
unichar[1] = 128 + (c % 64);
|
19
|
-
gh_buf_put(ob, unichar, 2);
|
20
|
-
}
|
21
|
-
else if (c - 0xd800u < 0x800) {
|
22
|
-
gh_buf_putc(ob, '?');
|
23
|
-
}
|
24
|
-
else if (c < 0x10000) {
|
25
|
-
unichar[0] = 224 + (c / 4096);
|
26
|
-
unichar[1] = 128 + (c / 64) % 64;
|
27
|
-
unichar[2] = 128 + (c % 64);
|
28
|
-
gh_buf_put(ob, unichar, 3);
|
29
|
-
}
|
30
|
-
else if (c < 0x110000) {
|
31
|
-
unichar[0] = 240 + (c / 262144);
|
32
|
-
unichar[1] = 128 + (c / 4096) % 64;
|
33
|
-
unichar[2] = 128 + (c / 64) % 64;
|
34
|
-
unichar[3] = 128 + (c % 64);
|
35
|
-
gh_buf_put(ob, unichar, 4);
|
36
|
-
}
|
37
|
-
else {
|
38
|
-
gh_buf_putc(ob, '?');
|
39
|
-
}
|
40
|
-
}
|
41
|
-
|
42
|
-
static size_t
|
43
|
-
unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
|
44
|
-
{
|
45
|
-
size_t i = 0;
|
46
|
-
|
47
|
-
if (size > 3 && src[0] == '#') {
|
48
|
-
int codepoint = 0;
|
49
|
-
|
50
|
-
if (_isdigit(src[1])) {
|
51
|
-
for (i = 1; i < size && _isdigit(src[i]); ++i)
|
52
|
-
codepoint = (codepoint * 10) + (src[i] - '0');
|
53
|
-
}
|
54
|
-
|
55
|
-
else if (src[1] == 'x' || src[1] == 'X') {
|
56
|
-
for (i = 2; i < size && _isxdigit(src[i]); ++i)
|
57
|
-
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
58
|
-
}
|
59
|
-
|
60
|
-
if (i < size && src[i] == ';' && codepoint) {
|
61
|
-
gh_buf_put_utf8(ob, codepoint);
|
62
|
-
return i + 1;
|
63
|
-
}
|
64
|
-
}
|
65
|
-
|
66
|
-
else {
|
67
|
-
if (size > MAX_WORD_LENGTH)
|
68
|
-
size = MAX_WORD_LENGTH;
|
69
|
-
|
70
|
-
for (i = MIN_WORD_LENGTH; i < size; ++i) {
|
71
|
-
if (src[i] == ' ')
|
72
|
-
break;
|
73
|
-
|
74
|
-
if (src[i] == ';') {
|
75
|
-
const struct html_ent *entity = find_entity((char *)src, i);
|
76
|
-
|
77
|
-
if (entity != NULL) {
|
78
|
-
gh_buf_put(ob, entity->utf8, entity->utf8_len);
|
79
|
-
return i + 1;
|
80
|
-
}
|
81
|
-
|
82
|
-
break;
|
83
|
-
}
|
84
|
-
}
|
85
|
-
}
|
86
|
-
|
87
|
-
gh_buf_putc(ob, '&');
|
88
|
-
return 0;
|
89
|
-
}
|
90
|
-
|
91
|
-
int
|
92
|
-
houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
93
|
-
{
|
94
|
-
size_t i = 0, org;
|
95
|
-
|
96
|
-
while (i < size) {
|
97
|
-
org = i;
|
98
|
-
while (i < size && src[i] != '&')
|
99
|
-
i++;
|
100
|
-
|
101
|
-
if (likely(i > org)) {
|
102
|
-
if (unlikely(org == 0)) {
|
103
|
-
if (i >= size)
|
104
|
-
return 0;
|
105
|
-
|
106
|
-
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
107
|
-
}
|
108
|
-
|
109
|
-
gh_buf_put(ob, src + org, i - org);
|
110
|
-
}
|
111
|
-
|
112
|
-
/* escaping */
|
113
|
-
if (i >= size)
|
114
|
-
break;
|
115
|
-
|
116
|
-
i++;
|
117
|
-
i += unescape_ent(ob, src + i, size - i);
|
118
|
-
}
|
119
|
-
|
120
|
-
return 1;
|
121
|
-
}
|
122
|
-
|