faml 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitmodules +3 -0
- data/.travis.yml +0 -3
- data/Appraisals +5 -4
- data/CHANGELOG.md +11 -0
- data/Gemfile +1 -1
- data/ext/attribute_builder/attribute_builder.c +63 -31
- data/ext/attribute_builder/extconf.rb +12 -0
- data/faml.gemspec +1 -1
- data/gemfiles/rails_4.0.gemfile +1 -1
- data/gemfiles/rails_4.1.gemfile +1 -1
- data/gemfiles/rails_4.2.gemfile +1 -1
- data/gemfiles/rails_edge.gemfile +2 -1
- data/incompatibilities/README.md +1 -1
- data/incompatibilities/spec/render/attribute_spec.md +0 -0
- data/lib/faml.rb +4 -4
- data/lib/faml/cli.rb +1 -1
- data/lib/faml/compiler.rb +7 -7
- data/lib/faml/engine.rb +4 -4
- data/lib/faml/filter_compilers.rb +12 -12
- data/lib/faml/filter_compilers/base.rb +1 -1
- data/lib/faml/filter_compilers/cdata.rb +1 -1
- data/lib/faml/filter_compilers/coffee.rb +1 -1
- data/lib/faml/filter_compilers/css.rb +1 -1
- data/lib/faml/filter_compilers/escaped.rb +1 -1
- data/lib/faml/filter_compilers/javascript.rb +1 -1
- data/lib/faml/filter_compilers/markdown.rb +1 -1
- data/lib/faml/filter_compilers/plain.rb +1 -1
- data/lib/faml/filter_compilers/preserve.rb +1 -1
- data/lib/faml/filter_compilers/ruby.rb +1 -1
- data/lib/faml/filter_compilers/sass.rb +1 -1
- data/lib/faml/filter_compilers/scss.rb +1 -1
- data/lib/faml/filter_compilers/tilt_base.rb +1 -2
- data/lib/faml/rails_helpers.rb +1 -1
- data/lib/faml/railtie.rb +1 -1
- data/lib/faml/text_compiler.rb +2 -2
- data/lib/faml/tilt.rb +1 -1
- data/lib/faml/version.rb +1 -1
- data/spec/render/attribute_spec.rb +13 -0
- data/vendor/houdini/.gitignore +3 -0
- data/vendor/houdini/COPYING +7 -0
- data/vendor/houdini/Makefile +79 -0
- data/vendor/houdini/README.md +59 -0
- data/vendor/houdini/buffer.c +249 -0
- data/vendor/houdini/buffer.h +113 -0
- data/vendor/houdini/houdini.h +46 -0
- data/vendor/houdini/houdini_href_e.c +115 -0
- data/vendor/houdini/houdini_html_e.c +90 -0
- data/vendor/houdini/houdini_html_u.c +122 -0
- data/vendor/houdini/houdini_js_e.c +90 -0
- data/vendor/houdini/houdini_js_u.c +60 -0
- data/vendor/houdini/houdini_uri_e.c +107 -0
- data/vendor/houdini/houdini_uri_u.c +68 -0
- data/vendor/houdini/houdini_xml_e.c +136 -0
- data/vendor/houdini/html_unescape.gperf +258 -0
- data/vendor/houdini/html_unescape.h +754 -0
- data/vendor/houdini/tools/build_table.py +13 -0
- data/vendor/houdini/tools/build_tables.c +51 -0
- data/vendor/houdini/tools/wikipedia_table.txt +2025 -0
- metadata +23 -3
@@ -0,0 +1,113 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (C) the libgit2 contributors. All rights reserved.
|
3
|
+
*
|
4
|
+
* This file is part of libgit2, distributed under the GNU GPL v2 with
|
5
|
+
* a Linking Exception. For full terms see the included COPYING file.
|
6
|
+
*/
|
7
|
+
#ifndef INCLUDE_buffer_h__
|
8
|
+
#define INCLUDE_buffer_h__
|
9
|
+
|
10
|
+
#include <stdbool.h>
|
11
|
+
#include <stddef.h>
|
12
|
+
#include <stdarg.h>
|
13
|
+
#include <sys/types.h>
|
14
|
+
#include <stdint.h>
|
15
|
+
|
16
|
+
typedef struct {
|
17
|
+
char *ptr;
|
18
|
+
size_t asize, size;
|
19
|
+
} gh_buf;
|
20
|
+
|
21
|
+
extern char gh_buf__initbuf[];
|
22
|
+
extern char gh_buf__oom[];
|
23
|
+
|
24
|
+
#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
|
25
|
+
|
26
|
+
/**
|
27
|
+
* Initialize a gh_buf structure.
|
28
|
+
*
|
29
|
+
* For the cases where GH_BUF_INIT cannot be used to do static
|
30
|
+
* initialization.
|
31
|
+
*/
|
32
|
+
extern void gh_buf_init(gh_buf *buf, size_t initial_size);
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Attempt to grow the buffer to hold at least `target_size` bytes.
|
36
|
+
*
|
37
|
+
* If the allocation fails, this will return an error. If mark_oom is true,
|
38
|
+
* this will mark the buffer as invalid for future operations; if false,
|
39
|
+
* existing buffer content will be preserved, but calling code must handle
|
40
|
+
* that buffer was not expanded.
|
41
|
+
*/
|
42
|
+
extern int gh_buf_try_grow(gh_buf *buf, size_t target_size, bool mark_oom);
|
43
|
+
|
44
|
+
/**
|
45
|
+
* Grow the buffer to hold at least `target_size` bytes.
|
46
|
+
*
|
47
|
+
* If the allocation fails, this will return an error and the buffer will be
|
48
|
+
* marked as invalid for future operations, invaliding contents.
|
49
|
+
*
|
50
|
+
* @return 0 on success or -1 on failure
|
51
|
+
*/
|
52
|
+
static inline int gh_buf_grow(gh_buf *buf, size_t target_size)
|
53
|
+
{
|
54
|
+
return gh_buf_try_grow(buf, target_size, true);
|
55
|
+
}
|
56
|
+
|
57
|
+
extern void gh_buf_free(gh_buf *buf);
|
58
|
+
extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
|
59
|
+
|
60
|
+
/**
|
61
|
+
* Test if there have been any reallocation failures with this gh_buf.
|
62
|
+
*
|
63
|
+
* Any function that writes to a gh_buf can fail due to memory allocation
|
64
|
+
* issues. If one fails, the gh_buf will be marked with an OOM error and
|
65
|
+
* further calls to modify the buffer will fail. Check gh_buf_oom() at the
|
66
|
+
* end of your sequence and it will be true if you ran out of memory at any
|
67
|
+
* point with that buffer.
|
68
|
+
*
|
69
|
+
* @return false if no error, true if allocation error
|
70
|
+
*/
|
71
|
+
static inline bool gh_buf_oom(const gh_buf *buf)
|
72
|
+
{
|
73
|
+
return (buf->ptr == gh_buf__oom);
|
74
|
+
}
|
75
|
+
|
76
|
+
|
77
|
+
static inline size_t gh_buf_len(const gh_buf *buf)
|
78
|
+
{
|
79
|
+
return buf->size;
|
80
|
+
}
|
81
|
+
|
82
|
+
extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
|
83
|
+
|
84
|
+
extern void gh_buf_attach(gh_buf *buf, char *ptr, size_t asize);
|
85
|
+
extern char *gh_buf_detach(gh_buf *buf);
|
86
|
+
extern void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf);
|
87
|
+
|
88
|
+
static inline const char *gh_buf_cstr(const gh_buf *buf)
|
89
|
+
{
|
90
|
+
return buf->ptr;
|
91
|
+
}
|
92
|
+
|
93
|
+
/*
|
94
|
+
* Functions below that return int value error codes will return 0 on
|
95
|
+
* success or -1 on failure (which generally means an allocation failed).
|
96
|
+
* Using a gh_buf where the allocation has failed with result in -1 from
|
97
|
+
* all further calls using that buffer. As a result, you can ignore the
|
98
|
+
* return code of these functions and call them in a series then just call
|
99
|
+
* gh_buf_oom at the end.
|
100
|
+
*/
|
101
|
+
extern int gh_buf_set(gh_buf *buf, const char *data, size_t len);
|
102
|
+
extern int gh_buf_sets(gh_buf *buf, const char *string);
|
103
|
+
extern int gh_buf_putc(gh_buf *buf, char c);
|
104
|
+
extern int gh_buf_put(gh_buf *buf, const void *data, size_t len);
|
105
|
+
extern int gh_buf_puts(gh_buf *buf, const char *string);
|
106
|
+
extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
|
107
|
+
__attribute__((format (printf, 2, 3)));
|
108
|
+
extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
|
109
|
+
extern void gh_buf_clear(gh_buf *buf);
|
110
|
+
|
111
|
+
#define gh_buf_PUTS(buf, str) gh_buf_put(buf, str, sizeof(str) - 1)
|
112
|
+
|
113
|
+
#endif
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#ifndef __HOUDINI_H__
|
2
|
+
#define __HOUDINI_H__
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
#include <stdint.h>
|
9
|
+
#include "buffer.h"
|
10
|
+
|
11
|
+
#define likely(x) __builtin_expect((x),1)
|
12
|
+
#define unlikely(x) __builtin_expect((x),0)
|
13
|
+
|
14
|
+
#ifdef HOUDINI_USE_LOCALE
|
15
|
+
# define _isxdigit(c) isxdigit(c)
|
16
|
+
# define _isdigit(c) isdigit(c)
|
17
|
+
#else
|
18
|
+
/*
|
19
|
+
* Helper _isdigit methods -- do not trust the current locale
|
20
|
+
* */
|
21
|
+
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
22
|
+
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
23
|
+
#endif
|
24
|
+
|
25
|
+
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
|
26
|
+
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
27
|
+
|
28
|
+
extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
|
29
|
+
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
|
30
|
+
extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
|
31
|
+
extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
|
32
|
+
extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
|
33
|
+
extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
|
34
|
+
extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
|
35
|
+
extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
|
36
|
+
extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
|
37
|
+
extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
|
38
|
+
extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
|
39
|
+
extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
|
40
|
+
extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
|
41
|
+
|
42
|
+
#ifdef __cplusplus
|
43
|
+
}
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#endif
|
@@ -0,0 +1,115 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
/*
|
8
|
+
* The following characters will not be escaped:
|
9
|
+
*
|
10
|
+
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
11
|
+
*
|
12
|
+
* Note that this character set is the addition of:
|
13
|
+
*
|
14
|
+
* - The characters which are safe to be in an URL
|
15
|
+
* - The characters which are *not* safe to be in
|
16
|
+
* an URL because they are RESERVED characters.
|
17
|
+
*
|
18
|
+
* We asume (lazily) that any RESERVED char that
|
19
|
+
* appears inside an URL is actually meant to
|
20
|
+
* have its native function (i.e. as an URL
|
21
|
+
* component/separator) and hence needs no escaping.
|
22
|
+
*
|
23
|
+
* There are two exceptions: the chacters & (amp)
|
24
|
+
* and ' (single quote) do not appear in the table.
|
25
|
+
* They are meant to appear in the URL as components,
|
26
|
+
* yet they require special HTML-entity escaping
|
27
|
+
* to generate valid HTML markup.
|
28
|
+
*
|
29
|
+
* All other characters will be escaped to %XX.
|
30
|
+
*
|
31
|
+
*/
|
32
|
+
static const char HREF_SAFE[] = {
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
36
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
37
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
39
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
40
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
45
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
+
};
|
50
|
+
|
51
|
+
int
|
52
|
+
houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
|
53
|
+
{
|
54
|
+
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
55
|
+
size_t i = 0, org;
|
56
|
+
uint8_t hex_str[3];
|
57
|
+
|
58
|
+
hex_str[0] = '%';
|
59
|
+
|
60
|
+
while (i < size) {
|
61
|
+
org = i;
|
62
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
63
|
+
i++;
|
64
|
+
|
65
|
+
if (likely(i > org)) {
|
66
|
+
if (unlikely(org == 0)) {
|
67
|
+
if (i >= size)
|
68
|
+
return 0;
|
69
|
+
|
70
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
71
|
+
}
|
72
|
+
|
73
|
+
gh_buf_put(ob, src + org, i - org);
|
74
|
+
}
|
75
|
+
|
76
|
+
/* escaping */
|
77
|
+
if (i >= size)
|
78
|
+
break;
|
79
|
+
|
80
|
+
switch (src[i]) {
|
81
|
+
/* amp appears all the time in URLs, but needs
|
82
|
+
* HTML-entity escaping to be inside an href */
|
83
|
+
case '&':
|
84
|
+
gh_buf_PUTS(ob, "&");
|
85
|
+
break;
|
86
|
+
|
87
|
+
/* the single quote is a valid URL character
|
88
|
+
* according to the standard; it needs HTML
|
89
|
+
* entity escaping too */
|
90
|
+
case '\'':
|
91
|
+
gh_buf_PUTS(ob, "'");
|
92
|
+
break;
|
93
|
+
|
94
|
+
/* the space can be escaped to %20 or a plus
|
95
|
+
* sign. we're going with the generic escape
|
96
|
+
* for now. the plus thing is more commonly seen
|
97
|
+
* when building GET strings */
|
98
|
+
#if 0
|
99
|
+
case ' ':
|
100
|
+
gh_buf_putc(ob, '+');
|
101
|
+
break;
|
102
|
+
#endif
|
103
|
+
|
104
|
+
/* every other character goes with a %XX escaping */
|
105
|
+
default:
|
106
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
107
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
108
|
+
gh_buf_put(ob, hex_str, 3);
|
109
|
+
}
|
110
|
+
|
111
|
+
i++;
|
112
|
+
}
|
113
|
+
|
114
|
+
return 1;
|
115
|
+
}
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
/**
|
8
|
+
* According to the OWASP rules:
|
9
|
+
*
|
10
|
+
* & --> &
|
11
|
+
* < --> <
|
12
|
+
* > --> >
|
13
|
+
* " --> "
|
14
|
+
* ' --> ' ' is not recommended
|
15
|
+
* / --> / forward slash is included as it helps end an HTML entity
|
16
|
+
*
|
17
|
+
*/
|
18
|
+
static const char HTML_ESCAPE_TABLE[] = {
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
+
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
};
|
36
|
+
|
37
|
+
static const char *HTML_ESCAPES[] = {
|
38
|
+
"",
|
39
|
+
""",
|
40
|
+
"&",
|
41
|
+
"'",
|
42
|
+
"/",
|
43
|
+
"<",
|
44
|
+
">"
|
45
|
+
};
|
46
|
+
|
47
|
+
int
|
48
|
+
houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
|
49
|
+
{
|
50
|
+
size_t i = 0, org, esc = 0;
|
51
|
+
|
52
|
+
while (i < size) {
|
53
|
+
org = i;
|
54
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
55
|
+
i++;
|
56
|
+
|
57
|
+
if (i > org) {
|
58
|
+
if (unlikely(org == 0)) {
|
59
|
+
if (i >= size)
|
60
|
+
return 0;
|
61
|
+
|
62
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
63
|
+
}
|
64
|
+
|
65
|
+
gh_buf_put(ob, src + org, i - org);
|
66
|
+
}
|
67
|
+
|
68
|
+
/* escaping */
|
69
|
+
if (unlikely(i >= size))
|
70
|
+
break;
|
71
|
+
|
72
|
+
/* The forward slash is only escaped in secure mode */
|
73
|
+
if (src[i] == '/' && !secure) {
|
74
|
+
gh_buf_putc(ob, '/');
|
75
|
+
} else {
|
76
|
+
gh_buf_puts(ob, HTML_ESCAPES[esc]);
|
77
|
+
}
|
78
|
+
|
79
|
+
i++;
|
80
|
+
}
|
81
|
+
|
82
|
+
return 1;
|
83
|
+
}
|
84
|
+
|
85
|
+
int
|
86
|
+
houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
87
|
+
{
|
88
|
+
return houdini_escape_html0(ob, src, size, 1);
|
89
|
+
}
|
90
|
+
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
#include "html_unescape.h"
|
7
|
+
|
8
|
+
static inline void
|
9
|
+
gh_buf_put_utf8(gh_buf *ob, int c)
|
10
|
+
{
|
11
|
+
unsigned char unichar[4];
|
12
|
+
|
13
|
+
if (c < 0x80) {
|
14
|
+
gh_buf_putc(ob, c);
|
15
|
+
}
|
16
|
+
else if (c < 0x800) {
|
17
|
+
unichar[0] = 192 + (c / 64);
|
18
|
+
unichar[1] = 128 + (c % 64);
|
19
|
+
gh_buf_put(ob, unichar, 2);
|
20
|
+
}
|
21
|
+
else if (c - 0xd800u < 0x800) {
|
22
|
+
gh_buf_putc(ob, '?');
|
23
|
+
}
|
24
|
+
else if (c < 0x10000) {
|
25
|
+
unichar[0] = 224 + (c / 4096);
|
26
|
+
unichar[1] = 128 + (c / 64) % 64;
|
27
|
+
unichar[2] = 128 + (c % 64);
|
28
|
+
gh_buf_put(ob, unichar, 3);
|
29
|
+
}
|
30
|
+
else if (c < 0x110000) {
|
31
|
+
unichar[0] = 240 + (c / 262144);
|
32
|
+
unichar[1] = 128 + (c / 4096) % 64;
|
33
|
+
unichar[2] = 128 + (c / 64) % 64;
|
34
|
+
unichar[3] = 128 + (c % 64);
|
35
|
+
gh_buf_put(ob, unichar, 4);
|
36
|
+
}
|
37
|
+
else {
|
38
|
+
gh_buf_putc(ob, '?');
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
static size_t
|
43
|
+
unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
|
44
|
+
{
|
45
|
+
size_t i = 0;
|
46
|
+
|
47
|
+
if (size > 3 && src[0] == '#') {
|
48
|
+
int codepoint = 0;
|
49
|
+
|
50
|
+
if (_isdigit(src[1])) {
|
51
|
+
for (i = 1; i < size && _isdigit(src[i]); ++i)
|
52
|
+
codepoint = (codepoint * 10) + (src[i] - '0');
|
53
|
+
}
|
54
|
+
|
55
|
+
else if (src[1] == 'x' || src[1] == 'X') {
|
56
|
+
for (i = 2; i < size && _isxdigit(src[i]); ++i)
|
57
|
+
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
58
|
+
}
|
59
|
+
|
60
|
+
if (i < size && src[i] == ';' && codepoint) {
|
61
|
+
gh_buf_put_utf8(ob, codepoint);
|
62
|
+
return i + 1;
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
else {
|
67
|
+
if (size > MAX_WORD_LENGTH)
|
68
|
+
size = MAX_WORD_LENGTH;
|
69
|
+
|
70
|
+
for (i = MIN_WORD_LENGTH; i < size; ++i) {
|
71
|
+
if (src[i] == ' ')
|
72
|
+
break;
|
73
|
+
|
74
|
+
if (src[i] == ';') {
|
75
|
+
const struct html_ent *entity = find_entity((char *)src, i);
|
76
|
+
|
77
|
+
if (entity != NULL) {
|
78
|
+
gh_buf_put(ob, entity->utf8, entity->utf8_len);
|
79
|
+
return i + 1;
|
80
|
+
}
|
81
|
+
|
82
|
+
break;
|
83
|
+
}
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
gh_buf_putc(ob, '&');
|
88
|
+
return 0;
|
89
|
+
}
|
90
|
+
|
91
|
+
int
|
92
|
+
houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
93
|
+
{
|
94
|
+
size_t i = 0, org;
|
95
|
+
|
96
|
+
while (i < size) {
|
97
|
+
org = i;
|
98
|
+
while (i < size && src[i] != '&')
|
99
|
+
i++;
|
100
|
+
|
101
|
+
if (likely(i > org)) {
|
102
|
+
if (unlikely(org == 0)) {
|
103
|
+
if (i >= size)
|
104
|
+
return 0;
|
105
|
+
|
106
|
+
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
107
|
+
}
|
108
|
+
|
109
|
+
gh_buf_put(ob, src + org, i - org);
|
110
|
+
}
|
111
|
+
|
112
|
+
/* escaping */
|
113
|
+
if (i >= size)
|
114
|
+
break;
|
115
|
+
|
116
|
+
i++;
|
117
|
+
i += unescape_ent(ob, src + i, size - i);
|
118
|
+
}
|
119
|
+
|
120
|
+
return 1;
|
121
|
+
}
|
122
|
+
|