escape_utils 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.gitignore +2 -1
  2. data/.travis.yml +13 -0
  3. data/CHANGELOG.md +7 -0
  4. data/MIT-LICENSE +1 -1
  5. data/Rakefile +5 -18
  6. data/benchmark/html_escape.rb +9 -2
  7. data/benchmark/xml_escape.rb +29 -0
  8. data/escape_utils.gemspec +2 -3
  9. data/ext/escape_utils/buffer.c +181 -160
  10. data/ext/escape_utils/buffer.h +90 -68
  11. data/ext/escape_utils/escape_utils.c +77 -39
  12. data/ext/escape_utils/extconf.rb +1 -1
  13. data/ext/escape_utils/houdini.h +37 -8
  14. data/ext/escape_utils/houdini_href_e.c +115 -0
  15. data/ext/escape_utils/houdini_html_e.c +90 -0
  16. data/ext/escape_utils/houdini_html_u.c +122 -0
  17. data/ext/escape_utils/{houdini_js.c → houdini_js_e.c} +17 -75
  18. data/ext/escape_utils/houdini_js_u.c +60 -0
  19. data/ext/escape_utils/{uri_escape.h → houdini_uri_e.c} +68 -2
  20. data/ext/escape_utils/houdini_uri_u.c +65 -0
  21. data/ext/escape_utils/houdini_xml_e.c +136 -0
  22. data/lib/escape_utils/version.rb +1 -1
  23. data/lib/escape_utils/xml/builder.rb +8 -0
  24. data/test/helper.rb +14 -0
  25. data/test/html/escape_test.rb +61 -0
  26. data/test/html/unescape_test.rb +48 -0
  27. data/test/html_safety_test.rb +46 -0
  28. data/test/javascript/escape_test.rb +42 -0
  29. data/test/javascript/unescape_test.rb +46 -0
  30. data/test/query/escape_test.rb +50 -0
  31. data/test/query/unescape_test.rb +52 -0
  32. data/test/uri/escape_test.rb +50 -0
  33. data/test/uri/unescape_test.rb +55 -0
  34. data/test/url/escape_test.rb +58 -0
  35. data/test/url/unescape_test.rb +60 -0
  36. data/test/xml/escape_test.rb +67 -0
  37. metadata +136 -152
  38. data/.rspec +0 -2
  39. data/ext/escape_utils/houdini_html.c +0 -214
  40. data/ext/escape_utils/houdini_uri.c +0 -130
  41. data/spec/html/escape_spec.rb +0 -42
  42. data/spec/html/unescape_spec.rb +0 -37
  43. data/spec/html_safety_spec.rb +0 -48
  44. data/spec/javascript/escape_spec.rb +0 -34
  45. data/spec/javascript/unescape_spec.rb +0 -37
  46. data/spec/query/escape_spec.rb +0 -44
  47. data/spec/query/unescape_spec.rb +0 -46
  48. data/spec/rcov.opts +0 -3
  49. data/spec/spec_helper.rb +0 -5
  50. data/spec/uri/escape_spec.rb +0 -43
  51. data/spec/uri/unescape_spec.rb +0 -57
  52. data/spec/url/escape_spec.rb +0 -52
  53. data/spec/url/unescape_spec.rb +0 -57
@@ -0,0 +1,67 @@
1
+ require File.expand_path("../../helper", __FILE__)
2
+
3
+ class XmlEscapeTest < MiniTest::Unit::TestCase
4
+ def test_basic_xml
5
+ assert_equal "&lt;some_tag/&gt;", EscapeUtils.escape_xml("<some_tag/>")
6
+ end
7
+
8
+ def test_double_quotes
9
+ assert_equal "&lt;some_tag some_attr=&quot;some value&quot;/&gt;", EscapeUtils.escape_xml("<some_tag some_attr=\"some value\"/>")
10
+ end
11
+
12
+ def test_single_quotes
13
+ assert_equal "&lt;some_tag some_attr=&apos;some value&apos;/&gt;", EscapeUtils.escape_xml("<some_tag some_attr='some value'/>")
14
+ end
15
+
16
+ def test_ampersand
17
+ assert_equal "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;", EscapeUtils.escape_xml("<b>Bourbon & Branch</b>")
18
+ end
19
+
20
+ # See http://www.w3.org/TR/REC-xml/#charsets for details.
21
+ VALID = [
22
+ (0x9..0xA), 0xD,
23
+ (0x20..0xD7FF),
24
+ (0xE000..0xFFFD),
25
+ (0x10000..0x10FFFF)
26
+ ]
27
+
28
+ REPLACEMENT_CHAR = "?".unpack('U*').first
29
+
30
+ def test_invalid_characters
31
+ VALID.each do |range|
32
+ if range.kind_of? Range
33
+ start = range.begin
34
+ last = range.end
35
+ last -= 1 if range.exclude_end?
36
+ else
37
+ start = last = range
38
+ end
39
+ input = [start.pred, start, last, last.next].pack('U*')
40
+ expect = [REPLACEMENT_CHAR, start, last, REPLACEMENT_CHAR].pack('U*')
41
+ assert_equal expect, EscapeUtils.escape_xml(input)
42
+ end
43
+ end
44
+
45
+ if RUBY_VERSION =~ /^1.9/
46
+ def test_input_must_be_utf8_or_ascii
47
+ str = "<some_tag/>"
48
+
49
+ str.force_encoding 'ISO-8859-1'
50
+ assert_raises Encoding::CompatibilityError do
51
+ EscapeUtils.escape_xml(str)
52
+ end
53
+
54
+ str.force_encoding 'UTF-8'
55
+ begin
56
+ EscapeUtils.escape_xml(str)
57
+ rescue Encoding::CompatibilityError => e
58
+ assert_nil e, "#{e.class.name} raised, expected not to"
59
+ end
60
+ end
61
+
62
+ def test_return_value_is_tagged_as_utf8
63
+ str = "<some_tag/>"
64
+ assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
65
+ end
66
+ end
67
+ end
metadata CHANGED
@@ -1,136 +1,137 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: escape_utils
3
- version: !ruby/object:Gem::Version
4
- hash: 31
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 2
9
- - 4
10
- version: 0.2.4
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Brian Lopez
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-09-07 00:00:00 -07:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
12
+ date: 2013-02-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
22
15
  name: rake-compiler
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
25
17
  none: false
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- hash: 9
30
- segments:
31
- - 0
32
- - 7
33
- - 5
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
34
21
  version: 0.7.5
35
22
  type: :development
36
- version_requirements: *id001
37
- - !ruby/object:Gem::Dependency
38
- name: rspec
39
23
  prerelease: false
40
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.7.5
30
+ - !ruby/object:Gem::Dependency
31
+ name: minitest
32
+ requirement: !ruby/object:Gem::Requirement
41
33
  none: false
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- hash: 15
46
- segments:
47
- - 2
48
- - 0
49
- - 0
50
- version: 2.0.0
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
51
38
  type: :development
52
- version_requirements: *id002
53
- - !ruby/object:Gem::Dependency
54
- name: rack
55
39
  prerelease: false
56
- requirement: &id003 !ruby/object:Gem::Requirement
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rack
48
+ requirement: !ruby/object:Gem::Requirement
57
49
  none: false
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- hash: 3
62
- segments:
63
- - 0
64
- version: "0"
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
65
54
  type: :development
66
- version_requirements: *id003
67
- - !ruby/object:Gem::Dependency
68
- name: haml
69
55
  prerelease: false
70
- requirement: &id004 !ruby/object:Gem::Requirement
56
+ version_requirements: !ruby/object:Gem::Requirement
71
57
  none: false
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- hash: 3
76
- segments:
77
- - 0
78
- version: "0"
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: haml
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
79
70
  type: :development
80
- version_requirements: *id004
81
- - !ruby/object:Gem::Dependency
82
- name: fast_xs
83
71
  prerelease: false
84
- requirement: &id005 !ruby/object:Gem::Requirement
72
+ version_requirements: !ruby/object:Gem::Requirement
85
73
  none: false
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- hash: 3
90
- segments:
91
- - 0
92
- version: "0"
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: fast_xs
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
93
86
  type: :development
94
- version_requirements: *id005
95
- - !ruby/object:Gem::Dependency
96
- name: actionpack
97
87
  prerelease: false
98
- requirement: &id006 !ruby/object:Gem::Requirement
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: actionpack
96
+ requirement: !ruby/object:Gem::Requirement
99
97
  none: false
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- hash: 3
104
- segments:
105
- - 0
106
- version: "0"
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
107
102
  type: :development
108
- version_requirements: *id006
109
- - !ruby/object:Gem::Dependency
110
- name: url_escape
111
103
  prerelease: false
112
- requirement: &id007 !ruby/object:Gem::Requirement
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: url_escape
112
+ requirement: !ruby/object:Gem::Requirement
113
113
  none: false
114
- requirements:
115
- - - ">="
116
- - !ruby/object:Gem::Version
117
- hash: 3
118
- segments:
119
- - 0
120
- version: "0"
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
121
118
  type: :development
122
- version_requirements: *id007
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
123
126
  description:
124
127
  email: seniorlopez@gmail.com
125
128
  executables: []
126
-
127
- extensions:
129
+ extensions:
128
130
  - ext/escape_utils/extconf.rb
129
131
  extra_rdoc_files: []
130
-
131
- files:
132
+ files:
132
133
  - .gitignore
133
- - .rspec
134
+ - .travis.yml
134
135
  - CHANGELOG.md
135
136
  - Gemfile
136
137
  - MIT-LICENSE
@@ -142,17 +143,22 @@ files:
142
143
  - benchmark/javascript_unescape.rb
143
144
  - benchmark/url_escape.rb
144
145
  - benchmark/url_unescape.rb
146
+ - benchmark/xml_escape.rb
145
147
  - escape_utils.gemspec
146
148
  - ext/escape_utils/buffer.c
147
149
  - ext/escape_utils/buffer.h
148
150
  - ext/escape_utils/escape_utils.c
149
151
  - ext/escape_utils/extconf.rb
150
152
  - ext/escape_utils/houdini.h
151
- - ext/escape_utils/houdini_html.c
152
- - ext/escape_utils/houdini_js.c
153
- - ext/escape_utils/houdini_uri.c
153
+ - ext/escape_utils/houdini_href_e.c
154
+ - ext/escape_utils/houdini_html_e.c
155
+ - ext/escape_utils/houdini_html_u.c
156
+ - ext/escape_utils/houdini_js_e.c
157
+ - ext/escape_utils/houdini_js_u.c
158
+ - ext/escape_utils/houdini_uri_e.c
159
+ - ext/escape_utils/houdini_uri_u.c
160
+ - ext/escape_utils/houdini_xml_e.c
154
161
  - ext/escape_utils/html_unescape.h
155
- - ext/escape_utils/uri_escape.h
156
162
  - lib/escape_utils.rb
157
163
  - lib/escape_utils/html/cgi.rb
158
164
  - lib/escape_utils/html/erb.rb
@@ -165,65 +171,43 @@ files:
165
171
  - lib/escape_utils/url/rack.rb
166
172
  - lib/escape_utils/url/uri.rb
167
173
  - lib/escape_utils/version.rb
168
- - spec/html/escape_spec.rb
169
- - spec/html/unescape_spec.rb
170
- - spec/html_safety_spec.rb
171
- - spec/javascript/escape_spec.rb
172
- - spec/javascript/unescape_spec.rb
173
- - spec/query/escape_spec.rb
174
- - spec/query/unescape_spec.rb
175
- - spec/rcov.opts
176
- - spec/spec_helper.rb
177
- - spec/uri/escape_spec.rb
178
- - spec/uri/unescape_spec.rb
179
- - spec/url/escape_spec.rb
180
- - spec/url/unescape_spec.rb
181
- has_rdoc: true
174
+ - lib/escape_utils/xml/builder.rb
175
+ - test/helper.rb
176
+ - test/html/escape_test.rb
177
+ - test/html/unescape_test.rb
178
+ - test/html_safety_test.rb
179
+ - test/javascript/escape_test.rb
180
+ - test/javascript/unescape_test.rb
181
+ - test/query/escape_test.rb
182
+ - test/query/unescape_test.rb
183
+ - test/uri/escape_test.rb
184
+ - test/uri/unescape_test.rb
185
+ - test/url/escape_test.rb
186
+ - test/url/unescape_test.rb
187
+ - test/xml/escape_test.rb
182
188
  homepage: http://github.com/brianmario/escape_utils
183
189
  licenses: []
184
-
185
190
  post_install_message:
186
- rdoc_options:
191
+ rdoc_options:
187
192
  - --charset=UTF-8
188
- require_paths:
193
+ require_paths:
189
194
  - lib
190
- - ext
191
- required_ruby_version: !ruby/object:Gem::Requirement
195
+ required_ruby_version: !ruby/object:Gem::Requirement
192
196
  none: false
193
- requirements:
194
- - - ">="
195
- - !ruby/object:Gem::Version
196
- hash: 3
197
- segments:
198
- - 0
199
- version: "0"
200
- required_rubygems_version: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - ! '>='
199
+ - !ruby/object:Gem::Version
200
+ version: '0'
201
+ required_rubygems_version: !ruby/object:Gem::Requirement
201
202
  none: false
202
- requirements:
203
- - - ">="
204
- - !ruby/object:Gem::Version
205
- hash: 3
206
- segments:
207
- - 0
208
- version: "0"
203
+ requirements:
204
+ - - ! '>='
205
+ - !ruby/object:Gem::Version
206
+ version: '0'
209
207
  requirements: []
210
-
211
208
  rubyforge_project:
212
- rubygems_version: 1.6.2
209
+ rubygems_version: 1.8.23
213
210
  signing_key:
214
211
  specification_version: 3
215
212
  summary: Faster string escaping routines for your web apps
216
- test_files:
217
- - spec/html/escape_spec.rb
218
- - spec/html/unescape_spec.rb
219
- - spec/html_safety_spec.rb
220
- - spec/javascript/escape_spec.rb
221
- - spec/javascript/unescape_spec.rb
222
- - spec/query/escape_spec.rb
223
- - spec/query/unescape_spec.rb
224
- - spec/rcov.opts
225
- - spec/spec_helper.rb
226
- - spec/uri/escape_spec.rb
227
- - spec/uri/unescape_spec.rb
228
- - spec/url/escape_spec.rb
229
- - spec/url/unescape_spec.rb
213
+ test_files: []
data/.rspec DELETED
@@ -1,2 +0,0 @@
1
- --format documentation
2
- --colour
@@ -1,214 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
- #include "html_unescape.h"
7
-
8
- #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
9
- #define UNESCAPE_GROW_FACTOR(x) (x) /* unescaping shouldn't grow our buffer */
10
-
11
- /* Helper _isdigit methods -- do not trust the current locale */
12
- int _isxdigit(int c)
13
- {
14
- return strchr("0123456789ABCDEFabcdef", c) != NULL;
15
- }
16
-
17
- int _isdigit(int c)
18
- {
19
- return (c >= '0' && c <= '9');
20
- }
21
-
22
-
23
- /**
24
- * According to the OWASP rules:
25
- *
26
- * & --> &amp;
27
- * < --> &lt;
28
- * > --> &gt;
29
- * " --> &quot;
30
- * ' --> &#x27; &apos; is not recommended
31
- * / --> &#x2F; forward slash is included as it helps end an HTML entity
32
- *
33
- */
34
- static const char HTML_ESCAPE_TABLE[] = {
35
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
- 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
38
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
39
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
- };
52
-
53
- static const char *HTML_ESCAPES[] = {
54
- "",
55
- "&quot;",
56
- "&amp;",
57
- "&#39;",
58
- "&#47;",
59
- "&lt;",
60
- "&gt;"
61
- };
62
-
63
- void
64
- houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure)
65
- {
66
- size_t i = 0, org, esc;
67
-
68
- bufgrow(ob, ESCAPE_GROW_FACTOR(size));
69
-
70
- while (i < size) {
71
- org = i;
72
- while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
73
- i++;
74
-
75
- if (i > org)
76
- bufput(ob, src + org, i - org);
77
-
78
- /* escaping */
79
- if (i >= size)
80
- break;
81
-
82
- /* The forward slash is only escaped in secure mode */
83
- if (src[i] == '/' && !secure) {
84
- bufputc(ob, '/');
85
- } else {
86
- bufputs(ob, HTML_ESCAPES[esc]);
87
- }
88
-
89
- i++;
90
- }
91
- }
92
-
93
- static inline void
94
- bufput_utf8(struct buf *ob, int c)
95
- {
96
- unsigned char unichar[4];
97
-
98
- if (c < 0x80) {
99
- bufputc(ob, c);
100
- }
101
- else if (c < 0x800) {
102
- unichar[0] = 192 + (c / 64);
103
- unichar[1] = 128 + (c % 64);
104
- bufput(ob, unichar, 2);
105
- }
106
- else if (c - 0xd800u < 0x800) {
107
- bufputc(ob, '?');
108
- }
109
- else if (c < 0x10000) {
110
- unichar[0] = 224 + (c / 4096);
111
- unichar[1] = 128 + (c / 64) % 64;
112
- unichar[2] = 128 + (c % 64);
113
- bufput(ob, unichar, 3);
114
- }
115
- else if (c < 0x110000) {
116
- unichar[0] = 240 + (c / 262144);
117
- unichar[1] = 128 + (c / 4096) % 64;
118
- unichar[2] = 128 + (c / 64) % 64;
119
- unichar[3] = 128 + (c % 64);
120
- bufput(ob, unichar, 4);
121
- }
122
- else {
123
- bufputc(ob, '?');
124
- }
125
- }
126
-
127
- static size_t
128
- unescape_ent(struct buf *ob, const uint8_t *src, size_t size)
129
- {
130
- size_t i = 0;
131
-
132
- if (size > 3 && src[0] == '#') {
133
- int codepoint = 0;
134
-
135
- if (_isdigit(src[1])) {
136
- for (i = 1; i < size && _isdigit(src[i]); ++i)
137
- codepoint = (codepoint * 10) + (src[i] - '0');
138
- }
139
-
140
- else if (src[1] == 'x' || src[1] == 'X') {
141
- for (i = 2; i < size && _isxdigit(src[i]); ++i)
142
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
143
- }
144
-
145
- if (i < size && src[i] == ';') {
146
- bufput_utf8(ob, codepoint);
147
- return i + 1;
148
- }
149
- }
150
-
151
- else {
152
- if (size > MAX_WORD_LENGTH)
153
- size = MAX_WORD_LENGTH;
154
-
155
- for (i = MIN_WORD_LENGTH; i < size; ++i) {
156
- if (src[i] == ' ')
157
- break;
158
-
159
- if (src[i] == ';') {
160
- const struct html_ent *entity = find_entity((char *)src, i);
161
-
162
- if (entity != NULL) {
163
- bufput(ob, entity->utf8, entity->utf8_len);
164
- return i + 1;
165
- }
166
-
167
- break;
168
- }
169
- }
170
- }
171
-
172
- bufputc(ob, '&');
173
- return 0;
174
- }
175
-
176
- void
177
- houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size)
178
- {
179
- size_t i = 0, org;
180
-
181
- bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
182
-
183
- while (i < size) {
184
- org = i;
185
- while (i < size && src[i] != '&')
186
- i++;
187
-
188
- if (i > org)
189
- bufput(ob, src + org, i - org);
190
-
191
- /* escaping */
192
- if (i >= size)
193
- break;
194
-
195
- i++;
196
- i += unescape_ent(ob, src + i, size - i);
197
- }
198
- }
199
-
200
- #ifdef TEST
201
-
202
- int main()
203
- {
204
- const char TEST_STRING[] = "This &#x2663; is & just &quot;an example&diams;&quot;";
205
- struct buf *buffer;
206
-
207
- buffer = bufnew(128);
208
- houdini_unescape_html(buffer, TEST_STRING, strlen(TEST_STRING));
209
- printf("Result: %.*s\n", (int)buffer->size, buffer->data);
210
- bufrelease(buffer);
211
- return 0;
212
- }
213
- #endif
214
-