escape_utils 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +47 -88
- data/benchmark/html_escape_once.rb +25 -0
- data/benchmark/javascript_escape.rb +1 -1
- data/benchmark/javascript_unescape.rb +1 -1
- data/benchmark/url_decode.rb +28 -0
- data/benchmark/url_encode.rb +37 -0
- data/benchmark/xml_escape.rb +7 -11
- data/ext/escape_utils/escape_utils.c +7 -115
- data/ext/escape_utils/houdini.h +3 -5
- data/ext/escape_utils/houdini_html_e.c +52 -24
- data/ext/escape_utils/houdini_uri_e.c +6 -17
- data/ext/escape_utils/houdini_uri_u.c +5 -15
- data/ext/escape_utils/houdini_xml_e.c +15 -1
- data/lib/escape_utils/html/cgi.rb +10 -8
- data/lib/escape_utils/html/erb.rb +1 -10
- data/lib/escape_utils/html/haml.rb +1 -7
- data/lib/escape_utils/html/rack.rb +3 -3
- data/lib/escape_utils/html_safety.rb +13 -0
- data/lib/escape_utils/url/cgi.rb +0 -8
- data/lib/escape_utils/url/erb.rb +1 -1
- data/lib/escape_utils/url/uri.rb +11 -7
- data/lib/escape_utils/version.rb +1 -1
- data/lib/escape_utils.rb +61 -9
- data/test/helper.rb +16 -3
- data/test/html/escape_test.rb +41 -39
- data/test/html/unescape_test.rb +0 -16
- data/test/html_safety_test.rb +1 -27
- data/test/javascript/escape_test.rb +0 -5
- data/test/query/escape_test.rb +0 -16
- data/test/query/unescape_test.rb +2 -18
- data/test/uri/unescape_test.rb +2 -2
- data/test/uri_component/unescape_test.rb +2 -2
- data/test/url/escape_test.rb +0 -16
- data/test/url/unescape_test.rb +2 -18
- metadata +6 -8
- data/benchmark/html_escape.rb +0 -68
- data/benchmark/html_unescape.rb +0 -35
- data/benchmark/url_escape.rb +0 -56
- data/benchmark/url_unescape.rb +0 -50
- data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -1,10 +1,5 @@
|
|
1
1
|
require File.expand_path("../../helper", __FILE__)
|
2
2
|
|
3
|
-
require 'active_support'
|
4
|
-
require 'active_support/json'
|
5
|
-
require 'action_view'
|
6
|
-
require 'action_view/helpers'
|
7
|
-
|
8
3
|
class JavascriptEscapeTest < Minitest::Test
|
9
4
|
ActiveSupport.escape_html_entities_in_json = true
|
10
5
|
|
data/test/query/escape_test.rb
CHANGED
@@ -25,22 +25,6 @@ class QueryEscapeTest < Minitest::Test
|
|
25
25
|
assert_equal '%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8', EscapeUtils.escape_url(matz_name_sep)
|
26
26
|
end
|
27
27
|
|
28
|
-
def test_input_must_be_utf8_or_ascii
|
29
|
-
str = "a space"
|
30
|
-
|
31
|
-
str.force_encoding 'ISO-8859-1'
|
32
|
-
assert_raises Encoding::CompatibilityError do
|
33
|
-
EscapeUtils.escape_url(str)
|
34
|
-
end
|
35
|
-
|
36
|
-
str.force_encoding 'UTF-8'
|
37
|
-
begin
|
38
|
-
EscapeUtils.escape_url(str)
|
39
|
-
rescue Encoding::CompatibilityError => e
|
40
|
-
assert_nil e, "#{e.class.name} raised, expected not to"
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
28
|
def test_return_value_is_tagged_as_utf8
|
45
29
|
str = "a+space"
|
46
30
|
assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
|
data/test/query/unescape_test.rb
CHANGED
@@ -20,29 +20,13 @@ class QueryUnescapeTest < Minitest::Test
|
|
20
20
|
|
21
21
|
def test_url_containing_multibyte_characters
|
22
22
|
matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
|
23
|
-
matz_name.force_encoding('UTF-8')
|
23
|
+
matz_name.force_encoding('UTF-8')
|
24
24
|
assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
|
25
25
|
matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
|
26
|
-
matz_name_sep.force_encoding('UTF-8')
|
26
|
+
matz_name_sep.force_encoding('UTF-8')
|
27
27
|
assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8')
|
28
28
|
end
|
29
29
|
|
30
|
-
def test_input_must_be_valid_utf8_or_ascii
|
31
|
-
escaped = EscapeUtils.unescape_url("a+space")
|
32
|
-
|
33
|
-
escaped.force_encoding 'ISO-8859-1'
|
34
|
-
assert_raises Encoding::CompatibilityError do
|
35
|
-
EscapeUtils.unescape_url(escaped)
|
36
|
-
end
|
37
|
-
|
38
|
-
escaped.force_encoding 'UTF-8'
|
39
|
-
begin
|
40
|
-
EscapeUtils.unescape_url(escaped)
|
41
|
-
rescue Encoding::CompatibilityError => e
|
42
|
-
assert_nil e, "#{e.class.name} raised, expected not to"
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
30
|
def test_return_value_is_tagged_as_utf8
|
47
31
|
escaped = EscapeUtils.escape_url("a space")
|
48
32
|
assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding
|
data/test/uri/unescape_test.rb
CHANGED
@@ -23,10 +23,10 @@ class UriUnescapeTest < Minitest::Test
|
|
23
23
|
|
24
24
|
def test_uri_containing_multibyte_charactes
|
25
25
|
matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
|
26
|
-
matz_name.force_encoding('UTF-8')
|
26
|
+
matz_name.force_encoding('UTF-8')
|
27
27
|
assert_equal matz_name, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
|
28
28
|
matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
|
29
|
-
matz_name_sep.force_encoding('UTF-8')
|
29
|
+
matz_name_sep.force_encoding('UTF-8')
|
30
30
|
assert_equal matz_name_sep, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
|
31
31
|
end
|
32
32
|
|
@@ -39,10 +39,10 @@ class UriComponentUnescapeTest < Minitest::Test
|
|
39
39
|
|
40
40
|
def test_multibyte_characters
|
41
41
|
matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
|
42
|
-
matz_name.force_encoding('UTF-8')
|
42
|
+
matz_name.force_encoding('UTF-8')
|
43
43
|
assert_equal matz_name, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
|
44
44
|
matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
|
45
|
-
matz_name_sep.force_encoding('UTF-8')
|
45
|
+
matz_name_sep.force_encoding('UTF-8')
|
46
46
|
assert_equal matz_name_sep, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
|
47
47
|
end
|
48
48
|
|
data/test/url/escape_test.rb
CHANGED
@@ -41,22 +41,6 @@ class UrlEscapeTest < Minitest::Test
|
|
41
41
|
assert_equal "a%2Fslash", EscapeUtils.escape_url("a/slash")
|
42
42
|
end
|
43
43
|
|
44
|
-
def test_input_must_be_utf8_or_ascii
|
45
|
-
str = "fo<o>bar"
|
46
|
-
|
47
|
-
str.force_encoding 'ISO-8859-1'
|
48
|
-
assert_raises Encoding::CompatibilityError do
|
49
|
-
EscapeUtils.escape_url(str)
|
50
|
-
end
|
51
|
-
|
52
|
-
str.force_encoding 'UTF-8'
|
53
|
-
begin
|
54
|
-
EscapeUtils.escape_url(str)
|
55
|
-
rescue Encoding::CompatibilityError => e
|
56
|
-
assert_nil e, "#{e.class.name} raised, expected not to"
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
44
|
def test_return_value_is_tagged_as_utf8
|
61
45
|
str = "fo<o>bar"
|
62
46
|
assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
|
data/test/url/unescape_test.rb
CHANGED
@@ -28,10 +28,10 @@ class UrlUnescapeTest < Minitest::Test
|
|
28
28
|
|
29
29
|
def test_multibyte_characters
|
30
30
|
matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
|
31
|
-
matz_name.force_encoding('UTF-8')
|
31
|
+
matz_name.force_encoding('UTF-8')
|
32
32
|
assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
|
33
33
|
matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
|
34
|
-
matz_name_sep.force_encoding('UTF-8')
|
34
|
+
matz_name_sep.force_encoding('UTF-8')
|
35
35
|
assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
|
36
36
|
end
|
37
37
|
|
@@ -46,22 +46,6 @@ class UrlUnescapeTest < Minitest::Test
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
def test_input_must_be_valid_utf8_or_ascii
|
50
|
-
escaped = EscapeUtils.escape_url("fo<o>bar")
|
51
|
-
|
52
|
-
escaped.force_encoding 'ISO-8859-1'
|
53
|
-
assert_raises Encoding::CompatibilityError do
|
54
|
-
EscapeUtils.unescape_url(escaped)
|
55
|
-
end
|
56
|
-
|
57
|
-
escaped.force_encoding 'UTF-8'
|
58
|
-
begin
|
59
|
-
EscapeUtils.unescape_url(escaped)
|
60
|
-
rescue Encoding::CompatibilityError => e
|
61
|
-
assert_nil e, "#{e.class.name} raised, expected not to"
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
49
|
def test_return_value_is_tagged_as_utf8
|
66
50
|
escaped = EscapeUtils.escape_url("fo<o>bar")
|
67
51
|
assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: escape_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Lopez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Quickly perform HTML, URL, URI and Javascript escaping/unescaping
|
14
14
|
email: seniorlopez@gmail.com
|
@@ -24,12 +24,11 @@ files:
|
|
24
24
|
- LICENSE
|
25
25
|
- README.md
|
26
26
|
- Rakefile
|
27
|
-
- benchmark/
|
28
|
-
- benchmark/html_unescape.rb
|
27
|
+
- benchmark/html_escape_once.rb
|
29
28
|
- benchmark/javascript_escape.rb
|
30
29
|
- benchmark/javascript_unescape.rb
|
31
|
-
- benchmark/
|
32
|
-
- benchmark/
|
30
|
+
- benchmark/url_decode.rb
|
31
|
+
- benchmark/url_encode.rb
|
33
32
|
- benchmark/xml_escape.rb
|
34
33
|
- bin/console
|
35
34
|
- escape_utils.gemspec
|
@@ -40,7 +39,6 @@ files:
|
|
40
39
|
- ext/escape_utils/houdini.h
|
41
40
|
- ext/escape_utils/houdini_href_e.c
|
42
41
|
- ext/escape_utils/houdini_html_e.c
|
43
|
-
- ext/escape_utils/houdini_html_u.c
|
44
42
|
- ext/escape_utils/houdini_js_e.c
|
45
43
|
- ext/escape_utils/houdini_js_u.c
|
46
44
|
- ext/escape_utils/houdini_uri_e.c
|
@@ -97,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
95
|
- !ruby/object:Gem::Version
|
98
96
|
version: '0'
|
99
97
|
requirements: []
|
100
|
-
rubygems_version: 3.
|
98
|
+
rubygems_version: 3.1.2
|
101
99
|
signing_key:
|
102
100
|
specification_version: 4
|
103
101
|
summary: Faster string escaping routines for your web apps
|
data/benchmark/html_escape.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler/setup'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
|
7
|
-
require 'rack'
|
8
|
-
require 'erb'
|
9
|
-
require 'cgi'
|
10
|
-
require 'haml'
|
11
|
-
require 'fast_xs_extra'
|
12
|
-
require 'escape_utils'
|
13
|
-
|
14
|
-
module HamlBench
|
15
|
-
extend Haml::Helpers
|
16
|
-
end
|
17
|
-
|
18
|
-
url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
|
19
|
-
html = `curl -s #{url}`
|
20
|
-
html = html.force_encoding('utf-8') if html.respond_to?(:force_encoding)
|
21
|
-
puts "Escaping #{html.bytesize} bytes of html from #{url}"
|
22
|
-
|
23
|
-
Benchmark.ips do |x|
|
24
|
-
x.report "Rack::Utils.escape_html" do |times|
|
25
|
-
times.times do
|
26
|
-
Rack::Utils.escape_html(html)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
x.report "Haml::Helpers.html_escape" do |times|
|
31
|
-
times.times do
|
32
|
-
HamlBench.html_escape(html)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
x.report "ERB::Util.html_escape" do |times|
|
37
|
-
times.times do
|
38
|
-
ERB::Util.html_escape(html)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
x.report "CGI.escapeHTML" do |times|
|
43
|
-
times.times do
|
44
|
-
CGI.escapeHTML(html)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
x.report "String#gsub" do |times|
|
49
|
-
html_escape = { '&' => '&', '>' => '>', '<' => '<', '"' => '"', "'" => ''' }
|
50
|
-
times.times do
|
51
|
-
html.gsub(/[&"'><]/, html_escape)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
x.report "fast_xs_extra#fast_xs_html" do |times|
|
56
|
-
times.times do
|
57
|
-
html.fast_xs_html
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
x.report "EscapeUtils.escape_html" do |times|
|
62
|
-
times.times do
|
63
|
-
EscapeUtils.escape_html(html)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
x.compare!
|
68
|
-
end
|
data/benchmark/html_unescape.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler/setup'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
|
7
|
-
require 'cgi'
|
8
|
-
require 'haml'
|
9
|
-
require 'escape_utils'
|
10
|
-
|
11
|
-
module HamlBench
|
12
|
-
extend Haml::Helpers
|
13
|
-
end
|
14
|
-
|
15
|
-
url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
|
16
|
-
html = `curl -s #{url}`
|
17
|
-
html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
|
18
|
-
escaped_html = EscapeUtils.escape_html(html)
|
19
|
-
puts "Unescaping #{escaped_html.bytesize} bytes of escaped html, from #{url}"
|
20
|
-
|
21
|
-
Benchmark.ips do |x|
|
22
|
-
x.report "CGI.unescapeHTML" do |times|
|
23
|
-
times.times do
|
24
|
-
CGI.unescapeHTML(escaped_html)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
x.report "EscapeUtils.unescape_html" do |times|
|
29
|
-
times.times do
|
30
|
-
EscapeUtils.unescape_html(escaped_html)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
x.compare!
|
35
|
-
end
|
data/benchmark/url_escape.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler/setup'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
|
7
|
-
require 'rack'
|
8
|
-
require 'erb'
|
9
|
-
require 'cgi'
|
10
|
-
require 'url_escape'
|
11
|
-
require 'fast_xs_extra'
|
12
|
-
require 'escape_utils'
|
13
|
-
|
14
|
-
url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
|
15
|
-
url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
|
16
|
-
puts "Escaping a #{url.bytesize} byte URL times"
|
17
|
-
|
18
|
-
Benchmark.ips do |x|
|
19
|
-
x.report "ERB::Util.url_encode" do |times|
|
20
|
-
times.times do
|
21
|
-
ERB::Util.url_encode(url)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
x.report "Rack::Utils.escape" do |times|
|
26
|
-
times.times do
|
27
|
-
Rack::Utils.escape(url)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
x.report "CGI.escape" do |times|
|
32
|
-
times.times do
|
33
|
-
CGI.escape(url)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
x.report "URLEscape#escape" do |times|
|
38
|
-
times.times do
|
39
|
-
URLEscape.escape(url)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
x.report "fast_xs_extra#fast_xs_url" do |times|
|
44
|
-
times.times do
|
45
|
-
url.fast_xs_url
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
x.report "EscapeUtils.escape_url" do |times|
|
50
|
-
times.times do
|
51
|
-
EscapeUtils.escape_url(url)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
x.compare!
|
56
|
-
end
|
data/benchmark/url_unescape.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler/setup'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
|
7
|
-
require 'rack'
|
8
|
-
require 'cgi'
|
9
|
-
require 'url_escape'
|
10
|
-
require 'fast_xs_extra'
|
11
|
-
require 'escape_utils'
|
12
|
-
|
13
|
-
url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
|
14
|
-
url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
|
15
|
-
escaped_url = EscapeUtils.escape_url(url)
|
16
|
-
puts "Escaping a #{url.bytesize} byte URL"
|
17
|
-
|
18
|
-
Benchmark.ips do |x|
|
19
|
-
x.report "Rack::Utils.unescape" do |times|
|
20
|
-
times.times do
|
21
|
-
Rack::Utils.unescape(escaped_url)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
x.report "CGI.unescape" do |times|
|
26
|
-
times.times do
|
27
|
-
CGI.unescape(escaped_url)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
x.report "URLEscape#unescape" do |times|
|
32
|
-
times.times do
|
33
|
-
URLEscape.unescape(escaped_url)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
x.report "fast_xs_extra#fast_uxs_cgi" do |times|
|
38
|
-
times.times do
|
39
|
-
url.fast_uxs_cgi
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
x.report "EscapeUtils.unescape_url" do |times|
|
44
|
-
times.times do
|
45
|
-
EscapeUtils.unescape_url(escaped_url)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
x.compare!
|
50
|
-
end
|
@@ -1,122 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "houdini.h"
|
6
|
-
#include "html_unescape.h"
|
7
|
-
|
8
|
-
static inline void
|
9
|
-
gh_buf_put_utf8(gh_buf *ob, int c)
|
10
|
-
{
|
11
|
-
unsigned char unichar[4];
|
12
|
-
|
13
|
-
if (c < 0x80) {
|
14
|
-
gh_buf_putc(ob, c);
|
15
|
-
}
|
16
|
-
else if (c < 0x800) {
|
17
|
-
unichar[0] = 192 + (c / 64);
|
18
|
-
unichar[1] = 128 + (c % 64);
|
19
|
-
gh_buf_put(ob, unichar, 2);
|
20
|
-
}
|
21
|
-
else if (c - 0xd800u < 0x800) {
|
22
|
-
gh_buf_putc(ob, '?');
|
23
|
-
}
|
24
|
-
else if (c < 0x10000) {
|
25
|
-
unichar[0] = 224 + (c / 4096);
|
26
|
-
unichar[1] = 128 + (c / 64) % 64;
|
27
|
-
unichar[2] = 128 + (c % 64);
|
28
|
-
gh_buf_put(ob, unichar, 3);
|
29
|
-
}
|
30
|
-
else if (c < 0x110000) {
|
31
|
-
unichar[0] = 240 + (c / 262144);
|
32
|
-
unichar[1] = 128 + (c / 4096) % 64;
|
33
|
-
unichar[2] = 128 + (c / 64) % 64;
|
34
|
-
unichar[3] = 128 + (c % 64);
|
35
|
-
gh_buf_put(ob, unichar, 4);
|
36
|
-
}
|
37
|
-
else {
|
38
|
-
gh_buf_putc(ob, '?');
|
39
|
-
}
|
40
|
-
}
|
41
|
-
|
42
|
-
static size_t
|
43
|
-
unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
|
44
|
-
{
|
45
|
-
size_t i = 0;
|
46
|
-
|
47
|
-
if (size > 3 && src[0] == '#') {
|
48
|
-
int codepoint = 0;
|
49
|
-
|
50
|
-
if (_isdigit(src[1])) {
|
51
|
-
for (i = 1; i < size && _isdigit(src[i]); ++i)
|
52
|
-
codepoint = (codepoint * 10) + (src[i] - '0');
|
53
|
-
}
|
54
|
-
|
55
|
-
else if (src[1] == 'x' || src[1] == 'X') {
|
56
|
-
for (i = 2; i < size && _isxdigit(src[i]); ++i)
|
57
|
-
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
58
|
-
}
|
59
|
-
|
60
|
-
if (i < size && src[i] == ';' && codepoint) {
|
61
|
-
gh_buf_put_utf8(ob, codepoint);
|
62
|
-
return i + 1;
|
63
|
-
}
|
64
|
-
}
|
65
|
-
|
66
|
-
else {
|
67
|
-
if (size > MAX_WORD_LENGTH)
|
68
|
-
size = MAX_WORD_LENGTH;
|
69
|
-
|
70
|
-
for (i = MIN_WORD_LENGTH; i < size; ++i) {
|
71
|
-
if (src[i] == ' ')
|
72
|
-
break;
|
73
|
-
|
74
|
-
if (src[i] == ';') {
|
75
|
-
const struct html_ent *entity = find_entity((char *)src, i);
|
76
|
-
|
77
|
-
if (entity != NULL) {
|
78
|
-
gh_buf_put(ob, entity->utf8, entity->utf8_len);
|
79
|
-
return i + 1;
|
80
|
-
}
|
81
|
-
|
82
|
-
break;
|
83
|
-
}
|
84
|
-
}
|
85
|
-
}
|
86
|
-
|
87
|
-
gh_buf_putc(ob, '&');
|
88
|
-
return 0;
|
89
|
-
}
|
90
|
-
|
91
|
-
int
|
92
|
-
houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
93
|
-
{
|
94
|
-
size_t i = 0, org;
|
95
|
-
|
96
|
-
while (i < size) {
|
97
|
-
org = i;
|
98
|
-
while (i < size && src[i] != '&')
|
99
|
-
i++;
|
100
|
-
|
101
|
-
if (likely(i > org)) {
|
102
|
-
if (unlikely(org == 0)) {
|
103
|
-
if (i >= size)
|
104
|
-
return 0;
|
105
|
-
|
106
|
-
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
107
|
-
}
|
108
|
-
|
109
|
-
gh_buf_put(ob, src + org, i - org);
|
110
|
-
}
|
111
|
-
|
112
|
-
/* escaping */
|
113
|
-
if (i >= size)
|
114
|
-
break;
|
115
|
-
|
116
|
-
i++;
|
117
|
-
i += unescape_ent(ob, src + i, size - i);
|
118
|
-
}
|
119
|
-
|
120
|
-
return 1;
|
121
|
-
}
|
122
|
-
|