escape_utils 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.gitignore +2 -1
  2. data/.travis.yml +13 -0
  3. data/CHANGELOG.md +7 -0
  4. data/MIT-LICENSE +1 -1
  5. data/Rakefile +5 -18
  6. data/benchmark/html_escape.rb +9 -2
  7. data/benchmark/xml_escape.rb +29 -0
  8. data/escape_utils.gemspec +2 -3
  9. data/ext/escape_utils/buffer.c +181 -160
  10. data/ext/escape_utils/buffer.h +90 -68
  11. data/ext/escape_utils/escape_utils.c +77 -39
  12. data/ext/escape_utils/extconf.rb +1 -1
  13. data/ext/escape_utils/houdini.h +37 -8
  14. data/ext/escape_utils/houdini_href_e.c +115 -0
  15. data/ext/escape_utils/houdini_html_e.c +90 -0
  16. data/ext/escape_utils/houdini_html_u.c +122 -0
  17. data/ext/escape_utils/{houdini_js.c → houdini_js_e.c} +17 -75
  18. data/ext/escape_utils/houdini_js_u.c +60 -0
  19. data/ext/escape_utils/{uri_escape.h → houdini_uri_e.c} +68 -2
  20. data/ext/escape_utils/houdini_uri_u.c +65 -0
  21. data/ext/escape_utils/houdini_xml_e.c +136 -0
  22. data/lib/escape_utils/version.rb +1 -1
  23. data/lib/escape_utils/xml/builder.rb +8 -0
  24. data/test/helper.rb +14 -0
  25. data/test/html/escape_test.rb +61 -0
  26. data/test/html/unescape_test.rb +48 -0
  27. data/test/html_safety_test.rb +46 -0
  28. data/test/javascript/escape_test.rb +42 -0
  29. data/test/javascript/unescape_test.rb +46 -0
  30. data/test/query/escape_test.rb +50 -0
  31. data/test/query/unescape_test.rb +52 -0
  32. data/test/uri/escape_test.rb +50 -0
  33. data/test/uri/unescape_test.rb +55 -0
  34. data/test/url/escape_test.rb +58 -0
  35. data/test/url/unescape_test.rb +60 -0
  36. data/test/xml/escape_test.rb +67 -0
  37. metadata +136 -152
  38. data/.rspec +0 -2
  39. data/ext/escape_utils/houdini_html.c +0 -214
  40. data/ext/escape_utils/houdini_uri.c +0 -130
  41. data/spec/html/escape_spec.rb +0 -42
  42. data/spec/html/unescape_spec.rb +0 -37
  43. data/spec/html_safety_spec.rb +0 -48
  44. data/spec/javascript/escape_spec.rb +0 -34
  45. data/spec/javascript/unescape_spec.rb +0 -37
  46. data/spec/query/escape_spec.rb +0 -44
  47. data/spec/query/unescape_spec.rb +0 -46
  48. data/spec/rcov.opts +0 -3
  49. data/spec/spec_helper.rb +0 -5
  50. data/spec/uri/escape_spec.rb +0 -43
  51. data/spec/uri/unescape_spec.rb +0 -57
  52. data/spec/url/escape_spec.rb +0 -52
  53. data/spec/url/unescape_spec.rb +0 -57
@@ -0,0 +1,67 @@
1
+ require File.expand_path("../../helper", __FILE__)
2
+
3
+ class XmlEscapeTest < MiniTest::Unit::TestCase
4
+ def test_basic_xml
5
+ assert_equal "&lt;some_tag/&gt;", EscapeUtils.escape_xml("<some_tag/>")
6
+ end
7
+
8
+ def test_double_quotes
9
+ assert_equal "&lt;some_tag some_attr=&quot;some value&quot;/&gt;", EscapeUtils.escape_xml("<some_tag some_attr=\"some value\"/>")
10
+ end
11
+
12
+ def test_single_quotes
13
+ assert_equal "&lt;some_tag some_attr=&apos;some value&apos;/&gt;", EscapeUtils.escape_xml("<some_tag some_attr='some value'/>")
14
+ end
15
+
16
+ def test_ampersand
17
+ assert_equal "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;", EscapeUtils.escape_xml("<b>Bourbon & Branch</b>")
18
+ end
19
+
20
+ # See http://www.w3.org/TR/REC-xml/#charsets for details.
21
+ VALID = [
22
+ (0x9..0xA), 0xD,
23
+ (0x20..0xD7FF),
24
+ (0xE000..0xFFFD),
25
+ (0x10000..0x10FFFF)
26
+ ]
27
+
28
+ REPLACEMENT_CHAR = "?".unpack('U*').first
29
+
30
+ def test_invalid_characters
31
+ VALID.each do |range|
32
+ if range.kind_of? Range
33
+ start = range.begin
34
+ last = range.end
35
+ last -= 1 if range.exclude_end?
36
+ else
37
+ start = last = range
38
+ end
39
+ input = [start.pred, start, last, last.next].pack('U*')
40
+ expect = [REPLACEMENT_CHAR, start, last, REPLACEMENT_CHAR].pack('U*')
41
+ assert_equal expect, EscapeUtils.escape_xml(input)
42
+ end
43
+ end
44
+
45
+ if RUBY_VERSION =~ /^1.9/
46
+ def test_input_must_be_utf8_or_ascii
47
+ str = "<some_tag/>"
48
+
49
+ str.force_encoding 'ISO-8859-1'
50
+ assert_raises Encoding::CompatibilityError do
51
+ EscapeUtils.escape_xml(str)
52
+ end
53
+
54
+ str.force_encoding 'UTF-8'
55
+ begin
56
+ EscapeUtils.escape_xml(str)
57
+ rescue Encoding::CompatibilityError => e
58
+ assert_nil e, "#{e.class.name} raised, expected not to"
59
+ end
60
+ end
61
+
62
+ def test_return_value_is_tagged_as_utf8
63
+ str = "<some_tag/>"
64
+ assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
65
+ end
66
+ end
67
+ end
metadata CHANGED
@@ -1,136 +1,137 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: escape_utils
3
- version: !ruby/object:Gem::Version
4
- hash: 31
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 2
9
- - 4
10
- version: 0.2.4
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Brian Lopez
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-09-07 00:00:00 -07:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
12
+ date: 2013-02-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
22
15
  name: rake-compiler
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
25
17
  none: false
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- hash: 9
30
- segments:
31
- - 0
32
- - 7
33
- - 5
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
34
21
  version: 0.7.5
35
22
  type: :development
36
- version_requirements: *id001
37
- - !ruby/object:Gem::Dependency
38
- name: rspec
39
23
  prerelease: false
40
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.7.5
30
+ - !ruby/object:Gem::Dependency
31
+ name: minitest
32
+ requirement: !ruby/object:Gem::Requirement
41
33
  none: false
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- hash: 15
46
- segments:
47
- - 2
48
- - 0
49
- - 0
50
- version: 2.0.0
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
51
38
  type: :development
52
- version_requirements: *id002
53
- - !ruby/object:Gem::Dependency
54
- name: rack
55
39
  prerelease: false
56
- requirement: &id003 !ruby/object:Gem::Requirement
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rack
48
+ requirement: !ruby/object:Gem::Requirement
57
49
  none: false
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- hash: 3
62
- segments:
63
- - 0
64
- version: "0"
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
65
54
  type: :development
66
- version_requirements: *id003
67
- - !ruby/object:Gem::Dependency
68
- name: haml
69
55
  prerelease: false
70
- requirement: &id004 !ruby/object:Gem::Requirement
56
+ version_requirements: !ruby/object:Gem::Requirement
71
57
  none: false
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- hash: 3
76
- segments:
77
- - 0
78
- version: "0"
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: haml
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
79
70
  type: :development
80
- version_requirements: *id004
81
- - !ruby/object:Gem::Dependency
82
- name: fast_xs
83
71
  prerelease: false
84
- requirement: &id005 !ruby/object:Gem::Requirement
72
+ version_requirements: !ruby/object:Gem::Requirement
85
73
  none: false
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- hash: 3
90
- segments:
91
- - 0
92
- version: "0"
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: fast_xs
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
93
86
  type: :development
94
- version_requirements: *id005
95
- - !ruby/object:Gem::Dependency
96
- name: actionpack
97
87
  prerelease: false
98
- requirement: &id006 !ruby/object:Gem::Requirement
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: actionpack
96
+ requirement: !ruby/object:Gem::Requirement
99
97
  none: false
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- hash: 3
104
- segments:
105
- - 0
106
- version: "0"
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
107
102
  type: :development
108
- version_requirements: *id006
109
- - !ruby/object:Gem::Dependency
110
- name: url_escape
111
103
  prerelease: false
112
- requirement: &id007 !ruby/object:Gem::Requirement
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: url_escape
112
+ requirement: !ruby/object:Gem::Requirement
113
113
  none: false
114
- requirements:
115
- - - ">="
116
- - !ruby/object:Gem::Version
117
- hash: 3
118
- segments:
119
- - 0
120
- version: "0"
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
121
118
  type: :development
122
- version_requirements: *id007
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
123
126
  description:
124
127
  email: seniorlopez@gmail.com
125
128
  executables: []
126
-
127
- extensions:
129
+ extensions:
128
130
  - ext/escape_utils/extconf.rb
129
131
  extra_rdoc_files: []
130
-
131
- files:
132
+ files:
132
133
  - .gitignore
133
- - .rspec
134
+ - .travis.yml
134
135
  - CHANGELOG.md
135
136
  - Gemfile
136
137
  - MIT-LICENSE
@@ -142,17 +143,22 @@ files:
142
143
  - benchmark/javascript_unescape.rb
143
144
  - benchmark/url_escape.rb
144
145
  - benchmark/url_unescape.rb
146
+ - benchmark/xml_escape.rb
145
147
  - escape_utils.gemspec
146
148
  - ext/escape_utils/buffer.c
147
149
  - ext/escape_utils/buffer.h
148
150
  - ext/escape_utils/escape_utils.c
149
151
  - ext/escape_utils/extconf.rb
150
152
  - ext/escape_utils/houdini.h
151
- - ext/escape_utils/houdini_html.c
152
- - ext/escape_utils/houdini_js.c
153
- - ext/escape_utils/houdini_uri.c
153
+ - ext/escape_utils/houdini_href_e.c
154
+ - ext/escape_utils/houdini_html_e.c
155
+ - ext/escape_utils/houdini_html_u.c
156
+ - ext/escape_utils/houdini_js_e.c
157
+ - ext/escape_utils/houdini_js_u.c
158
+ - ext/escape_utils/houdini_uri_e.c
159
+ - ext/escape_utils/houdini_uri_u.c
160
+ - ext/escape_utils/houdini_xml_e.c
154
161
  - ext/escape_utils/html_unescape.h
155
- - ext/escape_utils/uri_escape.h
156
162
  - lib/escape_utils.rb
157
163
  - lib/escape_utils/html/cgi.rb
158
164
  - lib/escape_utils/html/erb.rb
@@ -165,65 +171,43 @@ files:
165
171
  - lib/escape_utils/url/rack.rb
166
172
  - lib/escape_utils/url/uri.rb
167
173
  - lib/escape_utils/version.rb
168
- - spec/html/escape_spec.rb
169
- - spec/html/unescape_spec.rb
170
- - spec/html_safety_spec.rb
171
- - spec/javascript/escape_spec.rb
172
- - spec/javascript/unescape_spec.rb
173
- - spec/query/escape_spec.rb
174
- - spec/query/unescape_spec.rb
175
- - spec/rcov.opts
176
- - spec/spec_helper.rb
177
- - spec/uri/escape_spec.rb
178
- - spec/uri/unescape_spec.rb
179
- - spec/url/escape_spec.rb
180
- - spec/url/unescape_spec.rb
181
- has_rdoc: true
174
+ - lib/escape_utils/xml/builder.rb
175
+ - test/helper.rb
176
+ - test/html/escape_test.rb
177
+ - test/html/unescape_test.rb
178
+ - test/html_safety_test.rb
179
+ - test/javascript/escape_test.rb
180
+ - test/javascript/unescape_test.rb
181
+ - test/query/escape_test.rb
182
+ - test/query/unescape_test.rb
183
+ - test/uri/escape_test.rb
184
+ - test/uri/unescape_test.rb
185
+ - test/url/escape_test.rb
186
+ - test/url/unescape_test.rb
187
+ - test/xml/escape_test.rb
182
188
  homepage: http://github.com/brianmario/escape_utils
183
189
  licenses: []
184
-
185
190
  post_install_message:
186
- rdoc_options:
191
+ rdoc_options:
187
192
  - --charset=UTF-8
188
- require_paths:
193
+ require_paths:
189
194
  - lib
190
- - ext
191
- required_ruby_version: !ruby/object:Gem::Requirement
195
+ required_ruby_version: !ruby/object:Gem::Requirement
192
196
  none: false
193
- requirements:
194
- - - ">="
195
- - !ruby/object:Gem::Version
196
- hash: 3
197
- segments:
198
- - 0
199
- version: "0"
200
- required_rubygems_version: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - ! '>='
199
+ - !ruby/object:Gem::Version
200
+ version: '0'
201
+ required_rubygems_version: !ruby/object:Gem::Requirement
201
202
  none: false
202
- requirements:
203
- - - ">="
204
- - !ruby/object:Gem::Version
205
- hash: 3
206
- segments:
207
- - 0
208
- version: "0"
203
+ requirements:
204
+ - - ! '>='
205
+ - !ruby/object:Gem::Version
206
+ version: '0'
209
207
  requirements: []
210
-
211
208
  rubyforge_project:
212
- rubygems_version: 1.6.2
209
+ rubygems_version: 1.8.23
213
210
  signing_key:
214
211
  specification_version: 3
215
212
  summary: Faster string escaping routines for your web apps
216
- test_files:
217
- - spec/html/escape_spec.rb
218
- - spec/html/unescape_spec.rb
219
- - spec/html_safety_spec.rb
220
- - spec/javascript/escape_spec.rb
221
- - spec/javascript/unescape_spec.rb
222
- - spec/query/escape_spec.rb
223
- - spec/query/unescape_spec.rb
224
- - spec/rcov.opts
225
- - spec/spec_helper.rb
226
- - spec/uri/escape_spec.rb
227
- - spec/uri/unescape_spec.rb
228
- - spec/url/escape_spec.rb
229
- - spec/url/unescape_spec.rb
213
+ test_files: []
data/.rspec DELETED
@@ -1,2 +0,0 @@
1
- --format documentation
2
- --colour
@@ -1,214 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
- #include "html_unescape.h"
7
-
8
- #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
9
- #define UNESCAPE_GROW_FACTOR(x) (x) /* unescaping shouldn't grow our buffer */
10
-
11
- /* Helper _isdigit methods -- do not trust the current locale */
12
- int _isxdigit(int c)
13
- {
14
- return strchr("0123456789ABCDEFabcdef", c) != NULL;
15
- }
16
-
17
- int _isdigit(int c)
18
- {
19
- return (c >= '0' && c <= '9');
20
- }
21
-
22
-
23
- /**
24
- * According to the OWASP rules:
25
- *
26
- * & --> &amp;
27
- * < --> &lt;
28
- * > --> &gt;
29
- * " --> &quot;
30
- * ' --> &#x27; &apos; is not recommended
31
- * / --> &#x2F; forward slash is included as it helps end an HTML entity
32
- *
33
- */
34
- static const char HTML_ESCAPE_TABLE[] = {
35
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
- 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
38
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
39
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
- };
52
-
53
- static const char *HTML_ESCAPES[] = {
54
- "",
55
- "&quot;",
56
- "&amp;",
57
- "&#39;",
58
- "&#47;",
59
- "&lt;",
60
- "&gt;"
61
- };
62
-
63
- void
64
- houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure)
65
- {
66
- size_t i = 0, org, esc;
67
-
68
- bufgrow(ob, ESCAPE_GROW_FACTOR(size));
69
-
70
- while (i < size) {
71
- org = i;
72
- while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
73
- i++;
74
-
75
- if (i > org)
76
- bufput(ob, src + org, i - org);
77
-
78
- /* escaping */
79
- if (i >= size)
80
- break;
81
-
82
- /* The forward slash is only escaped in secure mode */
83
- if (src[i] == '/' && !secure) {
84
- bufputc(ob, '/');
85
- } else {
86
- bufputs(ob, HTML_ESCAPES[esc]);
87
- }
88
-
89
- i++;
90
- }
91
- }
92
-
93
- static inline void
94
- bufput_utf8(struct buf *ob, int c)
95
- {
96
- unsigned char unichar[4];
97
-
98
- if (c < 0x80) {
99
- bufputc(ob, c);
100
- }
101
- else if (c < 0x800) {
102
- unichar[0] = 192 + (c / 64);
103
- unichar[1] = 128 + (c % 64);
104
- bufput(ob, unichar, 2);
105
- }
106
- else if (c - 0xd800u < 0x800) {
107
- bufputc(ob, '?');
108
- }
109
- else if (c < 0x10000) {
110
- unichar[0] = 224 + (c / 4096);
111
- unichar[1] = 128 + (c / 64) % 64;
112
- unichar[2] = 128 + (c % 64);
113
- bufput(ob, unichar, 3);
114
- }
115
- else if (c < 0x110000) {
116
- unichar[0] = 240 + (c / 262144);
117
- unichar[1] = 128 + (c / 4096) % 64;
118
- unichar[2] = 128 + (c / 64) % 64;
119
- unichar[3] = 128 + (c % 64);
120
- bufput(ob, unichar, 4);
121
- }
122
- else {
123
- bufputc(ob, '?');
124
- }
125
- }
126
-
127
- static size_t
128
- unescape_ent(struct buf *ob, const uint8_t *src, size_t size)
129
- {
130
- size_t i = 0;
131
-
132
- if (size > 3 && src[0] == '#') {
133
- int codepoint = 0;
134
-
135
- if (_isdigit(src[1])) {
136
- for (i = 1; i < size && _isdigit(src[i]); ++i)
137
- codepoint = (codepoint * 10) + (src[i] - '0');
138
- }
139
-
140
- else if (src[1] == 'x' || src[1] == 'X') {
141
- for (i = 2; i < size && _isxdigit(src[i]); ++i)
142
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
143
- }
144
-
145
- if (i < size && src[i] == ';') {
146
- bufput_utf8(ob, codepoint);
147
- return i + 1;
148
- }
149
- }
150
-
151
- else {
152
- if (size > MAX_WORD_LENGTH)
153
- size = MAX_WORD_LENGTH;
154
-
155
- for (i = MIN_WORD_LENGTH; i < size; ++i) {
156
- if (src[i] == ' ')
157
- break;
158
-
159
- if (src[i] == ';') {
160
- const struct html_ent *entity = find_entity((char *)src, i);
161
-
162
- if (entity != NULL) {
163
- bufput(ob, entity->utf8, entity->utf8_len);
164
- return i + 1;
165
- }
166
-
167
- break;
168
- }
169
- }
170
- }
171
-
172
- bufputc(ob, '&');
173
- return 0;
174
- }
175
-
176
- void
177
- houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size)
178
- {
179
- size_t i = 0, org;
180
-
181
- bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
182
-
183
- while (i < size) {
184
- org = i;
185
- while (i < size && src[i] != '&')
186
- i++;
187
-
188
- if (i > org)
189
- bufput(ob, src + org, i - org);
190
-
191
- /* escaping */
192
- if (i >= size)
193
- break;
194
-
195
- i++;
196
- i += unescape_ent(ob, src + i, size - i);
197
- }
198
- }
199
-
200
- #ifdef TEST
201
-
202
- int main()
203
- {
204
- const char TEST_STRING[] = "This &#x2663; is & just &quot;an example&diams;&quot;";
205
- struct buf *buffer;
206
-
207
- buffer = bufnew(128);
208
- houdini_unescape_html(buffer, TEST_STRING, strlen(TEST_STRING));
209
- printf("Result: %.*s\n", (int)buffer->size, buffer->data);
210
- bufrelease(buffer);
211
- return 0;
212
- }
213
- #endif
214
-