commonmarker 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/ext/commonmarker/cmark/CMakeLists.txt +10 -4
- data/ext/commonmarker/cmark/Makefile +5 -5
- data/ext/commonmarker/cmark/api_test/CMakeLists.txt +1 -1
- data/ext/commonmarker/cmark/api_test/main.c +16 -0
- data/ext/commonmarker/cmark/build/CMakeCache.txt +3 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/2.8.10.1/CMakeSystem.cmake +4 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeError.log +12 -12
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeOutput.log +97 -142
- data/ext/commonmarker/cmark/build/CMakeFiles/Makefile.cmake +0 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +23 -23
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +2 -2
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/buffer.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/houdini_html_u.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/node.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/references.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/utf8.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/xml.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/cmake_install.cmake +3 -3
- data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
- data/ext/commonmarker/cmark/build/src/config.h +6 -6
- data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
- data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
- data/ext/commonmarker/cmark/build/testdir/CTestTestfile.cmake +4 -4
- data/ext/commonmarker/cmark/changelog.txt +46 -0
- data/ext/commonmarker/cmark/man/man3/cmark.3 +21 -20
- data/ext/commonmarker/cmark/src/CMakeLists.txt +4 -6
- data/ext/commonmarker/cmark/src/bench.h +8 -8
- data/ext/commonmarker/cmark/src/blocks.c +917 -947
- data/ext/commonmarker/cmark/src/buffer.c +213 -288
- data/ext/commonmarker/cmark/src/buffer.h +19 -21
- data/ext/commonmarker/cmark/src/chunk.h +78 -82
- data/ext/commonmarker/cmark/src/cmark.c +9 -17
- data/ext/commonmarker/cmark/src/cmark.h +113 -157
- data/ext/commonmarker/cmark/src/cmark_ctype.c +24 -35
- data/ext/commonmarker/cmark/src/commonmark.c +390 -425
- data/ext/commonmarker/cmark/src/config.h.in +6 -6
- data/ext/commonmarker/cmark/src/houdini.h +21 -15
- data/ext/commonmarker/cmark/src/houdini_href_e.c +50 -57
- data/ext/commonmarker/cmark/src/houdini_html_e.c +36 -51
- data/ext/commonmarker/cmark/src/houdini_html_u.c +119 -124
- data/ext/commonmarker/cmark/src/html.c +289 -307
- data/ext/commonmarker/cmark/src/inlines.c +976 -1030
- data/ext/commonmarker/cmark/src/inlines.h +4 -2
- data/ext/commonmarker/cmark/src/iterator.c +96 -126
- data/ext/commonmarker/cmark/src/iterator.h +5 -5
- data/ext/commonmarker/cmark/src/latex.c +379 -401
- data/ext/commonmarker/cmark/src/main.c +168 -175
- data/ext/commonmarker/cmark/src/man.c +212 -226
- data/ext/commonmarker/cmark/src/node.c +746 -839
- data/ext/commonmarker/cmark/src/node.h +47 -48
- data/ext/commonmarker/cmark/src/parser.h +14 -14
- data/ext/commonmarker/cmark/src/references.c +101 -111
- data/ext/commonmarker/cmark/src/references.h +10 -8
- data/ext/commonmarker/cmark/src/render.c +144 -167
- data/ext/commonmarker/cmark/src/render.h +22 -41
- data/ext/commonmarker/cmark/src/scanners.c +27695 -20903
- data/ext/commonmarker/cmark/src/scanners.h +2 -1
- data/ext/commonmarker/cmark/src/scanners.re +1 -1
- data/ext/commonmarker/cmark/src/utf8.c +276 -419
- data/ext/commonmarker/cmark/src/utf8.h +6 -6
- data/ext/commonmarker/cmark/src/xml.c +129 -144
- data/ext/commonmarker/cmark/test/CMakeLists.txt +4 -4
- data/ext/commonmarker/cmark/test/smart_punct.txt +8 -0
- data/ext/commonmarker/cmark/test/spec.txt +109 -47
- data/lib/commonmarker/version.rb +1 -1
- metadata +2 -2
@@ -16,10 +16,10 @@
|
|
16
16
|
#define CMARK_ATTRIBUTE(list)
|
17
17
|
#endif
|
18
18
|
|
19
|
-
#
|
20
|
-
|
21
|
-
#
|
22
|
-
#
|
19
|
+
#ifndef CMARK_INLINE
|
20
|
+
#if defined(_MSC_VER) && !defined(__cplusplus)
|
21
|
+
#define CMARK_INLINE __inline
|
22
|
+
#else
|
23
|
+
#define CMARK_INLINE inline
|
24
|
+
#endif
|
23
25
|
#endif
|
24
|
-
|
25
|
-
#cmakedefine HAVE_C99_SNPRINTF
|
@@ -10,33 +10,39 @@ extern "C" {
|
|
10
10
|
#include "buffer.h"
|
11
11
|
|
12
12
|
#ifdef HAVE___BUILTIN_EXPECT
|
13
|
-
#
|
14
|
-
#
|
13
|
+
#define likely(x) __builtin_expect((x), 1)
|
14
|
+
#define unlikely(x) __builtin_expect((x), 0)
|
15
15
|
#else
|
16
|
-
#
|
17
|
-
#
|
16
|
+
#define likely(x) (x)
|
17
|
+
#define unlikely(x) (x)
|
18
18
|
#endif
|
19
19
|
|
20
20
|
#ifdef HOUDINI_USE_LOCALE
|
21
|
-
#
|
22
|
-
#
|
21
|
+
#define _isxdigit(c) isxdigit(c)
|
22
|
+
#define _isdigit(c) isdigit(c)
|
23
23
|
#else
|
24
24
|
/*
|
25
25
|
* Helper _isdigit methods -- do not trust the current locale
|
26
26
|
* */
|
27
|
-
#
|
28
|
-
#
|
27
|
+
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
28
|
+
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
29
29
|
#endif
|
30
30
|
|
31
|
-
#define HOUDINI_ESCAPED_SIZE(x) (((x)
|
31
|
+
#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
|
32
32
|
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
33
33
|
|
34
|
-
extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
35
|
-
|
36
|
-
extern int
|
37
|
-
|
38
|
-
extern
|
39
|
-
|
34
|
+
extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
35
|
+
bufsize_t size);
|
36
|
+
extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
|
37
|
+
bufsize_t size);
|
38
|
+
extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
|
39
|
+
bufsize_t size, int secure);
|
40
|
+
extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
41
|
+
bufsize_t size);
|
42
|
+
extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
43
|
+
bufsize_t size);
|
44
|
+
extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
|
45
|
+
bufsize_t size);
|
40
46
|
|
41
47
|
#ifdef __cplusplus
|
42
48
|
}
|
@@ -30,78 +30,71 @@
|
|
30
30
|
*
|
31
31
|
*/
|
32
32
|
static const char HREF_SAFE[] = {
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
45
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
46
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
35
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
36
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
37
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
+
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
49
44
|
};
|
50
45
|
|
51
|
-
int
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
bufsize_t i = 0, org;
|
56
|
-
uint8_t hex_str[3];
|
46
|
+
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
47
|
+
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
48
|
+
bufsize_t i = 0, org;
|
49
|
+
uint8_t hex_str[3];
|
57
50
|
|
58
|
-
|
51
|
+
hex_str[0] = '%';
|
59
52
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
53
|
+
while (i < size) {
|
54
|
+
org = i;
|
55
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
56
|
+
i++;
|
64
57
|
|
65
|
-
|
66
|
-
|
58
|
+
if (likely(i > org))
|
59
|
+
cmark_strbuf_put(ob, src + org, i - org);
|
67
60
|
|
68
|
-
|
69
|
-
|
70
|
-
|
61
|
+
/* escaping */
|
62
|
+
if (i >= size)
|
63
|
+
break;
|
71
64
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
65
|
+
switch (src[i]) {
|
66
|
+
/* amp appears all the time in URLs, but needs
|
67
|
+
* HTML-entity escaping to be inside an href */
|
68
|
+
case '&':
|
69
|
+
cmark_strbuf_puts(ob, "&");
|
70
|
+
break;
|
78
71
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
72
|
+
/* the single quote is a valid URL character
|
73
|
+
* according to the standard; it needs HTML
|
74
|
+
* entity escaping too */
|
75
|
+
case '\'':
|
76
|
+
cmark_strbuf_puts(ob, "'");
|
77
|
+
break;
|
85
78
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
79
|
+
/* the space can be escaped to %20 or a plus
|
80
|
+
* sign. we're going with the generic escape
|
81
|
+
* for now. the plus thing is more commonly seen
|
82
|
+
* when building GET strings */
|
90
83
|
#if 0
|
91
84
|
case ' ':
|
92
85
|
cmark_strbuf_putc(ob, '+');
|
93
86
|
break;
|
94
87
|
#endif
|
95
88
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
89
|
+
/* every other character goes with a %XX escaping */
|
90
|
+
default:
|
91
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
92
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
93
|
+
cmark_strbuf_put(ob, hex_str, 3);
|
94
|
+
}
|
102
95
|
|
103
|
-
|
104
|
-
|
96
|
+
i++;
|
97
|
+
}
|
105
98
|
|
106
|
-
|
99
|
+
return 1;
|
107
100
|
}
|
@@ -16,66 +16,51 @@
|
|
16
16
|
*
|
17
17
|
*/
|
18
18
|
static const char HTML_ESCAPE_TABLE[] = {
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
30
|
};
|
36
31
|
|
37
|
-
static const char *HTML_ESCAPES[] = {
|
38
|
-
|
39
|
-
""",
|
40
|
-
"&",
|
41
|
-
"'",
|
42
|
-
"/",
|
43
|
-
"<",
|
44
|
-
">"
|
45
|
-
};
|
32
|
+
static const char *HTML_ESCAPES[] = {"", """, "&", "'",
|
33
|
+
"/", "<", ">"};
|
46
34
|
|
47
|
-
int
|
48
|
-
|
49
|
-
|
50
|
-
bufsize_t i = 0, org, esc = 0;
|
35
|
+
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
|
36
|
+
int secure) {
|
37
|
+
bufsize_t i = 0, org, esc = 0;
|
51
38
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
39
|
+
while (i < size) {
|
40
|
+
org = i;
|
41
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
42
|
+
i++;
|
56
43
|
|
57
|
-
|
58
|
-
|
44
|
+
if (i > org)
|
45
|
+
cmark_strbuf_put(ob, src + org, i - org);
|
59
46
|
|
60
|
-
|
61
|
-
|
62
|
-
|
47
|
+
/* escaping */
|
48
|
+
if (unlikely(i >= size))
|
49
|
+
break;
|
63
50
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
51
|
+
/* The forward slash is only escaped in secure mode */
|
52
|
+
if ((src[i] == '/' || src[i] == '\'') && !secure) {
|
53
|
+
cmark_strbuf_putc(ob, src[i]);
|
54
|
+
} else {
|
55
|
+
cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
|
56
|
+
}
|
70
57
|
|
71
|
-
|
72
|
-
|
58
|
+
i++;
|
59
|
+
}
|
73
60
|
|
74
|
-
|
61
|
+
return 1;
|
75
62
|
}
|
76
63
|
|
77
|
-
int
|
78
|
-
|
79
|
-
{
|
80
|
-
return houdini_escape_html0(ob, src, size, 1);
|
64
|
+
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
65
|
+
return houdini_escape_html0(ob, src, size, 1);
|
81
66
|
}
|
@@ -9,145 +9,140 @@
|
|
9
9
|
|
10
10
|
/* Binary tree lookup code for entities added by JGM */
|
11
11
|
|
12
|
-
static unsigned char *
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
12
|
+
static unsigned char *S_lookup(int i, int low, int hi, const unsigned char *s,
|
13
|
+
int len) {
|
14
|
+
int j;
|
15
|
+
int cmp = strncmp((char *)s, (char *)cmark_entities[i].entity, len);
|
16
|
+
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
|
17
|
+
return (unsigned char *)cmark_entities[i].bytes;
|
18
|
+
} else if (cmp < 0 && i > low) {
|
19
|
+
j = i - ((i - low) / 2);
|
20
|
+
if (j == i)
|
21
|
+
j -= 1;
|
22
|
+
return S_lookup(j, low, i - 1, s, len);
|
23
|
+
} else if (cmp > 0 && i < hi) {
|
24
|
+
j = i + ((hi - i) / 2);
|
25
|
+
if (j == i)
|
26
|
+
j += 1;
|
27
|
+
return S_lookup(j, i + 1, hi, s, len);
|
28
|
+
} else {
|
29
|
+
return NULL;
|
30
|
+
}
|
30
31
|
}
|
31
32
|
|
32
|
-
static unsigned char *
|
33
|
-
|
34
|
-
{
|
35
|
-
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
|
33
|
+
static unsigned char *S_lookup_entity(const unsigned char *s, int len) {
|
34
|
+
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
|
36
35
|
}
|
37
36
|
|
38
|
-
bufsize_t
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
}
|
107
|
-
|
108
|
-
return 0;
|
37
|
+
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
38
|
+
bufsize_t size) {
|
39
|
+
bufsize_t i = 0;
|
40
|
+
|
41
|
+
if (size >= 3 && src[0] == '#') {
|
42
|
+
int codepoint = 0;
|
43
|
+
int num_digits = 0;
|
44
|
+
|
45
|
+
if (_isdigit(src[1])) {
|
46
|
+
for (i = 1; i < size && _isdigit(src[i]); ++i) {
|
47
|
+
codepoint = (codepoint * 10) + (src[i] - '0');
|
48
|
+
|
49
|
+
if (codepoint >= 0x110000) {
|
50
|
+
// Keep counting digits but
|
51
|
+
// avoid integer overflow.
|
52
|
+
codepoint = 0x110000;
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
num_digits = i - 1;
|
57
|
+
}
|
58
|
+
|
59
|
+
else if (src[1] == 'x' || src[1] == 'X') {
|
60
|
+
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
|
61
|
+
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
62
|
+
|
63
|
+
if (codepoint >= 0x110000) {
|
64
|
+
// Keep counting digits but
|
65
|
+
// avoid integer overflow.
|
66
|
+
codepoint = 0x110000;
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
num_digits = i - 2;
|
71
|
+
}
|
72
|
+
|
73
|
+
if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
|
74
|
+
if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
|
75
|
+
codepoint >= 0x110000) {
|
76
|
+
codepoint = 0xFFFD;
|
77
|
+
}
|
78
|
+
cmark_utf8proc_encode_char(codepoint, ob);
|
79
|
+
return i + 1;
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
else {
|
84
|
+
if (size > CMARK_ENTITY_MAX_LENGTH)
|
85
|
+
size = CMARK_ENTITY_MAX_LENGTH;
|
86
|
+
|
87
|
+
for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
|
88
|
+
if (src[i] == ' ')
|
89
|
+
break;
|
90
|
+
|
91
|
+
if (src[i] == ';') {
|
92
|
+
const unsigned char *entity = S_lookup_entity(src, i);
|
93
|
+
|
94
|
+
if (entity != NULL) {
|
95
|
+
cmark_strbuf_puts(ob, (const char *)entity);
|
96
|
+
return i + 1;
|
97
|
+
}
|
98
|
+
|
99
|
+
break;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
return 0;
|
109
105
|
}
|
110
106
|
|
111
|
-
int
|
112
|
-
|
113
|
-
|
114
|
-
bufsize_t i = 0, org, ent;
|
107
|
+
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
108
|
+
bufsize_t size) {
|
109
|
+
bufsize_t i = 0, org, ent;
|
115
110
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
111
|
+
while (i < size) {
|
112
|
+
org = i;
|
113
|
+
while (i < size && src[i] != '&')
|
114
|
+
i++;
|
120
115
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
116
|
+
if (likely(i > org)) {
|
117
|
+
if (unlikely(org == 0)) {
|
118
|
+
if (i >= size)
|
119
|
+
return 0;
|
125
120
|
|
126
|
-
|
127
|
-
|
121
|
+
cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
122
|
+
}
|
128
123
|
|
129
|
-
|
130
|
-
|
124
|
+
cmark_strbuf_put(ob, src + org, i - org);
|
125
|
+
}
|
131
126
|
|
132
|
-
|
133
|
-
|
134
|
-
|
127
|
+
/* escaping */
|
128
|
+
if (i >= size)
|
129
|
+
break;
|
135
130
|
|
136
|
-
|
131
|
+
i++;
|
137
132
|
|
138
|
-
|
139
|
-
|
133
|
+
ent = houdini_unescape_ent(ob, src + i, size - i);
|
134
|
+
i += ent;
|
140
135
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
136
|
+
/* not really an entity */
|
137
|
+
if (ent == 0)
|
138
|
+
cmark_strbuf_putc(ob, '&');
|
139
|
+
}
|
145
140
|
|
146
|
-
|
141
|
+
return 1;
|
147
142
|
}
|
148
143
|
|
149
|
-
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
150
|
-
{
|
151
|
-
|
152
|
-
|
144
|
+
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
145
|
+
bufsize_t size) {
|
146
|
+
if (!houdini_unescape_html(ob, src, size))
|
147
|
+
cmark_strbuf_put(ob, src, size);
|
153
148
|
}
|