faml 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitmodules +3 -0
- data/.travis.yml +0 -3
- data/Appraisals +5 -4
- data/CHANGELOG.md +11 -0
- data/Gemfile +1 -1
- data/ext/attribute_builder/attribute_builder.c +63 -31
- data/ext/attribute_builder/extconf.rb +12 -0
- data/faml.gemspec +1 -1
- data/gemfiles/rails_4.0.gemfile +1 -1
- data/gemfiles/rails_4.1.gemfile +1 -1
- data/gemfiles/rails_4.2.gemfile +1 -1
- data/gemfiles/rails_edge.gemfile +2 -1
- data/incompatibilities/README.md +1 -1
- data/incompatibilities/spec/render/attribute_spec.md +0 -0
- data/lib/faml.rb +4 -4
- data/lib/faml/cli.rb +1 -1
- data/lib/faml/compiler.rb +7 -7
- data/lib/faml/engine.rb +4 -4
- data/lib/faml/filter_compilers.rb +12 -12
- data/lib/faml/filter_compilers/base.rb +1 -1
- data/lib/faml/filter_compilers/cdata.rb +1 -1
- data/lib/faml/filter_compilers/coffee.rb +1 -1
- data/lib/faml/filter_compilers/css.rb +1 -1
- data/lib/faml/filter_compilers/escaped.rb +1 -1
- data/lib/faml/filter_compilers/javascript.rb +1 -1
- data/lib/faml/filter_compilers/markdown.rb +1 -1
- data/lib/faml/filter_compilers/plain.rb +1 -1
- data/lib/faml/filter_compilers/preserve.rb +1 -1
- data/lib/faml/filter_compilers/ruby.rb +1 -1
- data/lib/faml/filter_compilers/sass.rb +1 -1
- data/lib/faml/filter_compilers/scss.rb +1 -1
- data/lib/faml/filter_compilers/tilt_base.rb +1 -2
- data/lib/faml/rails_helpers.rb +1 -1
- data/lib/faml/railtie.rb +1 -1
- data/lib/faml/text_compiler.rb +2 -2
- data/lib/faml/tilt.rb +1 -1
- data/lib/faml/version.rb +1 -1
- data/spec/render/attribute_spec.rb +13 -0
- data/vendor/houdini/.gitignore +3 -0
- data/vendor/houdini/COPYING +7 -0
- data/vendor/houdini/Makefile +79 -0
- data/vendor/houdini/README.md +59 -0
- data/vendor/houdini/buffer.c +249 -0
- data/vendor/houdini/buffer.h +113 -0
- data/vendor/houdini/houdini.h +46 -0
- data/vendor/houdini/houdini_href_e.c +115 -0
- data/vendor/houdini/houdini_html_e.c +90 -0
- data/vendor/houdini/houdini_html_u.c +122 -0
- data/vendor/houdini/houdini_js_e.c +90 -0
- data/vendor/houdini/houdini_js_u.c +60 -0
- data/vendor/houdini/houdini_uri_e.c +107 -0
- data/vendor/houdini/houdini_uri_u.c +68 -0
- data/vendor/houdini/houdini_xml_e.c +136 -0
- data/vendor/houdini/html_unescape.gperf +258 -0
- data/vendor/houdini/html_unescape.h +754 -0
- data/vendor/houdini/tools/build_table.py +13 -0
- data/vendor/houdini/tools/build_tables.c +51 -0
- data/vendor/houdini/tools/wikipedia_table.txt +2025 -0
- metadata +23 -3
@@ -0,0 +1,90 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
static const char JS_ESCAPE[] = {
|
8
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
|
9
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
10
|
+
0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
11
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
12
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
13
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
14
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
18
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
};
|
25
|
+
|
26
|
+
int
|
27
|
+
houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
|
28
|
+
{
|
29
|
+
size_t i = 0, org, ch;
|
30
|
+
|
31
|
+
while (i < size) {
|
32
|
+
org = i;
|
33
|
+
while (i < size && JS_ESCAPE[src[i]] == 0)
|
34
|
+
i++;
|
35
|
+
|
36
|
+
if (likely(i > org)) {
|
37
|
+
if (unlikely(org == 0)) {
|
38
|
+
if (i >= size)
|
39
|
+
return 0;
|
40
|
+
|
41
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
42
|
+
}
|
43
|
+
|
44
|
+
gh_buf_put(ob, src + org, i - org);
|
45
|
+
}
|
46
|
+
|
47
|
+
/* escaping */
|
48
|
+
if (i >= size)
|
49
|
+
break;
|
50
|
+
|
51
|
+
ch = src[i];
|
52
|
+
|
53
|
+
switch (ch) {
|
54
|
+
case '/':
|
55
|
+
/*
|
56
|
+
* Escape only if preceded by a lt
|
57
|
+
*/
|
58
|
+
if (i && src[i - 1] == '<')
|
59
|
+
gh_buf_putc(ob, '\\');
|
60
|
+
|
61
|
+
gh_buf_putc(ob, ch);
|
62
|
+
break;
|
63
|
+
|
64
|
+
case '\r':
|
65
|
+
/*
|
66
|
+
* Escape as \n, and skip the next \n if it's there
|
67
|
+
*/
|
68
|
+
if (i + 1 < size && src[i + 1] == '\n') i++;
|
69
|
+
|
70
|
+
case '\n':
|
71
|
+
/*
|
72
|
+
* Escape actually as '\','n', not as '\', '\n'
|
73
|
+
*/
|
74
|
+
ch = 'n';
|
75
|
+
|
76
|
+
default:
|
77
|
+
/*
|
78
|
+
* Normal escaping
|
79
|
+
*/
|
80
|
+
gh_buf_putc(ob, '\\');
|
81
|
+
gh_buf_putc(ob, ch);
|
82
|
+
break;
|
83
|
+
}
|
84
|
+
|
85
|
+
i++;
|
86
|
+
}
|
87
|
+
|
88
|
+
return 1;
|
89
|
+
}
|
90
|
+
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
int
|
8
|
+
houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size)
|
9
|
+
{
|
10
|
+
size_t i = 0, org, ch;
|
11
|
+
|
12
|
+
while (i < size) {
|
13
|
+
org = i;
|
14
|
+
while (i < size && src[i] != '\\')
|
15
|
+
i++;
|
16
|
+
|
17
|
+
if (likely(i > org)) {
|
18
|
+
if (unlikely(org == 0)) {
|
19
|
+
if (i >= size)
|
20
|
+
return 0;
|
21
|
+
|
22
|
+
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
23
|
+
}
|
24
|
+
|
25
|
+
gh_buf_put(ob, src + org, i - org);
|
26
|
+
}
|
27
|
+
|
28
|
+
/* escaping */
|
29
|
+
if (i == size)
|
30
|
+
break;
|
31
|
+
|
32
|
+
if (++i == size) {
|
33
|
+
gh_buf_putc(ob, '\\');
|
34
|
+
break;
|
35
|
+
}
|
36
|
+
|
37
|
+
ch = src[i];
|
38
|
+
|
39
|
+
switch (ch) {
|
40
|
+
case 'n':
|
41
|
+
ch = '\n';
|
42
|
+
/* pass through */
|
43
|
+
|
44
|
+
case '\\':
|
45
|
+
case '\'':
|
46
|
+
case '\"':
|
47
|
+
case '/':
|
48
|
+
gh_buf_putc(ob, ch);
|
49
|
+
i++;
|
50
|
+
break;
|
51
|
+
|
52
|
+
default:
|
53
|
+
gh_buf_putc(ob, '\\');
|
54
|
+
break;
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
return 1;
|
59
|
+
}
|
60
|
+
|
@@ -0,0 +1,107 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
static const char URL_SAFE[] = {
|
8
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
9
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
10
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
|
11
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
12
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
13
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
14
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
15
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
16
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
18
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
};
|
25
|
+
|
26
|
+
static const char URI_SAFE[] = {
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
30
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
31
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
32
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
33
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
34
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
+
};
|
44
|
+
|
45
|
+
static int
|
46
|
+
escape(gh_buf *ob, const uint8_t *src, size_t size,
|
47
|
+
const char *safe_table, bool escape_plus)
|
48
|
+
{
|
49
|
+
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
50
|
+
|
51
|
+
size_t i = 0, org;
|
52
|
+
uint8_t hex_str[3];
|
53
|
+
|
54
|
+
hex_str[0] = '%';
|
55
|
+
|
56
|
+
while (i < size) {
|
57
|
+
org = i;
|
58
|
+
while (i < size && safe_table[src[i]] != 0)
|
59
|
+
i++;
|
60
|
+
|
61
|
+
if (likely(i > org)) {
|
62
|
+
if (unlikely(org == 0)) {
|
63
|
+
if (i >= size)
|
64
|
+
return 0;
|
65
|
+
|
66
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
67
|
+
}
|
68
|
+
|
69
|
+
gh_buf_put(ob, src + org, i - org);
|
70
|
+
}
|
71
|
+
|
72
|
+
/* escaping */
|
73
|
+
if (i >= size)
|
74
|
+
break;
|
75
|
+
|
76
|
+
if (src[i] == ' ' && escape_plus) {
|
77
|
+
gh_buf_putc(ob, '+');
|
78
|
+
} else {
|
79
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
80
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
81
|
+
gh_buf_put(ob, hex_str, 3);
|
82
|
+
}
|
83
|
+
|
84
|
+
i++;
|
85
|
+
}
|
86
|
+
|
87
|
+
return 1;
|
88
|
+
}
|
89
|
+
|
90
|
+
int
|
91
|
+
houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size)
|
92
|
+
{
|
93
|
+
return escape(ob, src, size, URI_SAFE, false);
|
94
|
+
}
|
95
|
+
|
96
|
+
int
|
97
|
+
houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
|
98
|
+
{
|
99
|
+
return escape(ob, src, size, URL_SAFE, false);
|
100
|
+
}
|
101
|
+
|
102
|
+
int
|
103
|
+
houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size)
|
104
|
+
{
|
105
|
+
return escape(ob, src, size, URL_SAFE, true);
|
106
|
+
}
|
107
|
+
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define hex2c(c) ((c | 32) % 39 - 9)
|
8
|
+
|
9
|
+
static int
|
10
|
+
unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
|
11
|
+
{
|
12
|
+
size_t i = 0, org;
|
13
|
+
|
14
|
+
while (i < size) {
|
15
|
+
org = i;
|
16
|
+
while (i < size && src[i] != '%' && src[i] != '+')
|
17
|
+
i++;
|
18
|
+
|
19
|
+
if (likely(i > org)) {
|
20
|
+
if (unlikely(org == 0)) {
|
21
|
+
if (i >= size)
|
22
|
+
return 0;
|
23
|
+
|
24
|
+
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
25
|
+
}
|
26
|
+
|
27
|
+
gh_buf_put(ob, src + org, i - org);
|
28
|
+
}
|
29
|
+
|
30
|
+
/* escaping */
|
31
|
+
if (i >= size)
|
32
|
+
break;
|
33
|
+
|
34
|
+
if (src[i++] == '+') {
|
35
|
+
gh_buf_putc(ob, unescape_plus ? ' ' : '+');
|
36
|
+
continue;
|
37
|
+
}
|
38
|
+
|
39
|
+
if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
|
40
|
+
unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
|
41
|
+
gh_buf_putc(ob, new_char);
|
42
|
+
i += 2;
|
43
|
+
} else {
|
44
|
+
gh_buf_putc(ob, '%');
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
return 1;
|
49
|
+
}
|
50
|
+
|
51
|
+
int
|
52
|
+
houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size)
|
53
|
+
{
|
54
|
+
return unescape(ob, src, size, false);
|
55
|
+
}
|
56
|
+
|
57
|
+
int
|
58
|
+
houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
|
59
|
+
{
|
60
|
+
return unescape(ob, src, size, false);
|
61
|
+
}
|
62
|
+
|
63
|
+
int
|
64
|
+
houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size)
|
65
|
+
{
|
66
|
+
return unescape(ob, src, size, true);
|
67
|
+
}
|
68
|
+
|
@@ -0,0 +1,136 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
/**
|
8
|
+
* & --> &
|
9
|
+
* < --> <
|
10
|
+
* > --> >
|
11
|
+
* " --> "
|
12
|
+
* ' --> '
|
13
|
+
*/
|
14
|
+
static const char *LOOKUP_CODES[] = {
|
15
|
+
"", /* reserved: use literal single character */
|
16
|
+
"", /* unused */
|
17
|
+
"", /* reserved: 2 character UTF-8 */
|
18
|
+
"", /* reserved: 3 character UTF-8 */
|
19
|
+
"", /* reserved: 4 character UTF-8 */
|
20
|
+
"?", /* invalid UTF-8 character */
|
21
|
+
""",
|
22
|
+
"&",
|
23
|
+
"'",
|
24
|
+
"<",
|
25
|
+
">"
|
26
|
+
};
|
27
|
+
|
28
|
+
static const char CODE_INVALID = 5;
|
29
|
+
|
30
|
+
static const char XML_LOOKUP_TABLE[] = {
|
31
|
+
/* ASCII: 0xxxxxxx */
|
32
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 0, 5, 5,
|
33
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
34
|
+
0, 0, 6, 0, 0, 0, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0,10, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
|
41
|
+
/* Invalid UTF-8 char start: 10xxxxxx */
|
42
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
43
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
44
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
45
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
46
|
+
|
47
|
+
/* Multibyte UTF-8 */
|
48
|
+
|
49
|
+
/* 2 bytes: 110xxxxx */
|
50
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
51
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
52
|
+
|
53
|
+
/* 3 bytes: 1110xxxx */
|
54
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
55
|
+
|
56
|
+
/* 4 bytes: 11110xxx */
|
57
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
58
|
+
|
59
|
+
/* Invalid UTF-8: 11111xxx */
|
60
|
+
5, 5, 5, 5, 5, 5, 5, 5,
|
61
|
+
};
|
62
|
+
|
63
|
+
int
|
64
|
+
houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size)
|
65
|
+
{
|
66
|
+
size_t i = 0;
|
67
|
+
unsigned char code = 0;
|
68
|
+
|
69
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
70
|
+
|
71
|
+
while (i < size) {
|
72
|
+
size_t start, end;
|
73
|
+
|
74
|
+
start = end = i;
|
75
|
+
|
76
|
+
while (i < size) {
|
77
|
+
unsigned int byte;
|
78
|
+
|
79
|
+
byte = src[i++];
|
80
|
+
code = XML_LOOKUP_TABLE[byte];
|
81
|
+
|
82
|
+
if (!code) {
|
83
|
+
/* single character used literally */
|
84
|
+
} else if (code >= CODE_INVALID) {
|
85
|
+
break; /* insert lookup code string */
|
86
|
+
} else if (code > size - end) {
|
87
|
+
code = CODE_INVALID; /* truncated UTF-8 character */
|
88
|
+
break;
|
89
|
+
} else {
|
90
|
+
unsigned int chr = byte & (0xff >> code);
|
91
|
+
|
92
|
+
while (--code) {
|
93
|
+
byte = src[i++];
|
94
|
+
if ((byte & 0xc0) != 0x80) {
|
95
|
+
code = CODE_INVALID;
|
96
|
+
break;
|
97
|
+
}
|
98
|
+
chr = (chr << 6) + (byte & 0x3f);
|
99
|
+
}
|
100
|
+
|
101
|
+
switch (i - end) {
|
102
|
+
case 2:
|
103
|
+
if (chr < 0x80)
|
104
|
+
code = CODE_INVALID;
|
105
|
+
break;
|
106
|
+
case 3:
|
107
|
+
if (chr < 0x800 ||
|
108
|
+
(chr > 0xd7ff && chr < 0xe000) ||
|
109
|
+
chr > 0xfffd)
|
110
|
+
code = CODE_INVALID;
|
111
|
+
break;
|
112
|
+
case 4:
|
113
|
+
if (chr < 0x10000 || chr > 0x10ffff)
|
114
|
+
code = CODE_INVALID;
|
115
|
+
break;
|
116
|
+
default:
|
117
|
+
break;
|
118
|
+
}
|
119
|
+
if (code == CODE_INVALID)
|
120
|
+
break;
|
121
|
+
}
|
122
|
+
end = i;
|
123
|
+
}
|
124
|
+
|
125
|
+
if (end > start)
|
126
|
+
gh_buf_put(ob, src + start, end - start);
|
127
|
+
|
128
|
+
/* escaping */
|
129
|
+
if (end >= size)
|
130
|
+
break;
|
131
|
+
|
132
|
+
gh_buf_puts(ob, LOOKUP_CODES[code]);
|
133
|
+
}
|
134
|
+
|
135
|
+
return 1;
|
136
|
+
}
|