markly 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/markly +94 -0
- data/ext/markly/arena.c +103 -0
- data/ext/markly/autolink.c +425 -0
- data/ext/markly/autolink.h +8 -0
- data/ext/markly/blocks.c +1585 -0
- data/ext/markly/buffer.c +278 -0
- data/ext/markly/buffer.h +116 -0
- data/ext/markly/case_fold_switch.inc +4327 -0
- data/ext/markly/chunk.h +135 -0
- data/ext/markly/cmark-gfm-core-extensions.h +54 -0
- data/ext/markly/cmark-gfm-extension_api.h +736 -0
- data/ext/markly/cmark-gfm-extensions_export.h +42 -0
- data/ext/markly/cmark-gfm.h +817 -0
- data/ext/markly/cmark-gfm_export.h +42 -0
- data/ext/markly/cmark-gfm_version.h +7 -0
- data/ext/markly/cmark.c +55 -0
- data/ext/markly/cmark_ctype.c +44 -0
- data/ext/markly/cmark_ctype.h +33 -0
- data/ext/markly/commonmark.c +519 -0
- data/ext/markly/config.h +76 -0
- data/ext/markly/core-extensions.c +27 -0
- data/ext/markly/entities.inc +2138 -0
- data/ext/markly/ext_scanners.c +1159 -0
- data/ext/markly/ext_scanners.h +24 -0
- data/ext/markly/extconf.rb +7 -0
- data/ext/markly/footnotes.c +40 -0
- data/ext/markly/footnotes.h +25 -0
- data/ext/markly/houdini.h +57 -0
- data/ext/markly/houdini_href_e.c +100 -0
- data/ext/markly/houdini_html_e.c +66 -0
- data/ext/markly/houdini_html_u.c +149 -0
- data/ext/markly/html.c +465 -0
- data/ext/markly/html.h +27 -0
- data/ext/markly/inlines.c +1633 -0
- data/ext/markly/inlines.h +29 -0
- data/ext/markly/iterator.c +159 -0
- data/ext/markly/iterator.h +26 -0
- data/ext/markly/latex.c +466 -0
- data/ext/markly/linked_list.c +37 -0
- data/ext/markly/man.c +278 -0
- data/ext/markly/map.c +122 -0
- data/ext/markly/map.h +41 -0
- data/ext/markly/markly.c +1226 -0
- data/ext/markly/markly.h +16 -0
- data/ext/markly/node.c +979 -0
- data/ext/markly/node.h +118 -0
- data/ext/markly/parser.h +58 -0
- data/ext/markly/plaintext.c +235 -0
- data/ext/markly/plugin.c +36 -0
- data/ext/markly/plugin.h +34 -0
- data/ext/markly/references.c +42 -0
- data/ext/markly/references.h +26 -0
- data/ext/markly/registry.c +63 -0
- data/ext/markly/registry.h +24 -0
- data/ext/markly/render.c +205 -0
- data/ext/markly/render.h +62 -0
- data/ext/markly/scanners.c +20382 -0
- data/ext/markly/scanners.h +62 -0
- data/ext/markly/scanners.re +326 -0
- data/ext/markly/strikethrough.c +167 -0
- data/ext/markly/strikethrough.h +9 -0
- data/ext/markly/syntax_extension.c +149 -0
- data/ext/markly/syntax_extension.h +34 -0
- data/ext/markly/table.c +803 -0
- data/ext/markly/table.h +12 -0
- data/ext/markly/tagfilter.c +60 -0
- data/ext/markly/tagfilter.h +8 -0
- data/ext/markly/tasklist.c +156 -0
- data/ext/markly/tasklist.h +8 -0
- data/ext/markly/utf8.c +317 -0
- data/ext/markly/utf8.h +35 -0
- data/ext/markly/xml.c +181 -0
- data/lib/markly.rb +43 -0
- data/lib/markly/flags.rb +37 -0
- data/lib/markly/markly.so +0 -0
- data/lib/markly/node.rb +70 -0
- data/lib/markly/node/inspect.rb +59 -0
- data/lib/markly/renderer.rb +133 -0
- data/lib/markly/renderer/html_renderer.rb +252 -0
- data/lib/markly/version.rb +5 -0
- metadata +211 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
#include "chunk.h"
|
2
|
+
#include "cmark-gfm.h"
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
|
9
|
+
unsigned char *ptr, int len, bufsize_t offset);
|
10
|
+
bufsize_t _scan_table_start(const unsigned char *p);
|
11
|
+
bufsize_t _scan_table_cell(const unsigned char *p);
|
12
|
+
bufsize_t _scan_table_cell_end(const unsigned char *p);
|
13
|
+
bufsize_t _scan_table_row_end(const unsigned char *p);
|
14
|
+
bufsize_t _scan_tasklist(const unsigned char *p);
|
15
|
+
|
16
|
+
#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
|
17
|
+
#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
|
18
|
+
#define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
|
19
|
+
#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
|
20
|
+
#define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
|
21
|
+
|
22
|
+
#ifdef __cplusplus
|
23
|
+
}
|
24
|
+
#endif
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#include "cmark-gfm.h"
|
2
|
+
#include "parser.h"
|
3
|
+
#include "footnotes.h"
|
4
|
+
#include "inlines.h"
|
5
|
+
#include "chunk.h"
|
6
|
+
|
7
|
+
static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
|
8
|
+
cmark_footnote *ref = (cmark_footnote *)_ref;
|
9
|
+
cmark_mem *mem = map->mem;
|
10
|
+
if (ref != NULL) {
|
11
|
+
mem->free(ref->entry.label);
|
12
|
+
if (ref->node)
|
13
|
+
cmark_node_free(ref->node);
|
14
|
+
mem->free(ref);
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
void cmark_footnote_create(cmark_map *map, cmark_node *node) {
|
19
|
+
cmark_footnote *ref;
|
20
|
+
unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
|
21
|
+
|
22
|
+
/* empty footnote name, or composed from only whitespace */
|
23
|
+
if (reflabel == NULL)
|
24
|
+
return;
|
25
|
+
|
26
|
+
assert(map->sorted == NULL);
|
27
|
+
|
28
|
+
ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
|
29
|
+
ref->entry.label = reflabel;
|
30
|
+
ref->node = node;
|
31
|
+
ref->entry.age = map->size;
|
32
|
+
ref->entry.next = map->refs;
|
33
|
+
|
34
|
+
map->refs = (cmark_map_entry *)ref;
|
35
|
+
map->size++;
|
36
|
+
}
|
37
|
+
|
38
|
+
cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
|
39
|
+
return cmark_map_new(mem, footnote_free);
|
40
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#ifndef CMARK_FOOTNOTES_H
|
2
|
+
#define CMARK_FOOTNOTES_H
|
3
|
+
|
4
|
+
#include "map.h"
|
5
|
+
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
struct cmark_footnote {
|
11
|
+
cmark_map_entry entry;
|
12
|
+
cmark_node *node;
|
13
|
+
unsigned int ix;
|
14
|
+
};
|
15
|
+
|
16
|
+
typedef struct cmark_footnote cmark_footnote;
|
17
|
+
|
18
|
+
void cmark_footnote_create(cmark_map *map, cmark_node *node);
|
19
|
+
cmark_map *cmark_footnote_map_new(cmark_mem *mem);
|
20
|
+
|
21
|
+
#ifdef __cplusplus
|
22
|
+
}
|
23
|
+
#endif
|
24
|
+
|
25
|
+
#endif
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#ifndef CMARK_HOUDINI_H
|
2
|
+
#define CMARK_HOUDINI_H
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
#include <stdint.h>
|
9
|
+
#include "config.h"
|
10
|
+
#include "buffer.h"
|
11
|
+
|
12
|
+
#ifdef HAVE___BUILTIN_EXPECT
|
13
|
+
#define likely(x) __builtin_expect((x), 1)
|
14
|
+
#define unlikely(x) __builtin_expect((x), 0)
|
15
|
+
#else
|
16
|
+
#define likely(x) (x)
|
17
|
+
#define unlikely(x) (x)
|
18
|
+
#endif
|
19
|
+
|
20
|
+
#ifdef HOUDINI_USE_LOCALE
|
21
|
+
#define _isxdigit(c) isxdigit(c)
|
22
|
+
#define _isdigit(c) isdigit(c)
|
23
|
+
#else
|
24
|
+
/*
|
25
|
+
* Helper _isdigit methods -- do not trust the current locale
|
26
|
+
* */
|
27
|
+
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
28
|
+
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
|
32
|
+
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
33
|
+
|
34
|
+
CMARK_GFM_EXPORT
|
35
|
+
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
36
|
+
bufsize_t size);
|
37
|
+
CMARK_GFM_EXPORT
|
38
|
+
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
|
39
|
+
bufsize_t size);
|
40
|
+
CMARK_GFM_EXPORT
|
41
|
+
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
|
42
|
+
bufsize_t size, int secure);
|
43
|
+
CMARK_GFM_EXPORT
|
44
|
+
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
45
|
+
bufsize_t size);
|
46
|
+
CMARK_GFM_EXPORT
|
47
|
+
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
48
|
+
bufsize_t size);
|
49
|
+
CMARK_GFM_EXPORT
|
50
|
+
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
|
51
|
+
bufsize_t size);
|
52
|
+
|
53
|
+
#ifdef __cplusplus
|
54
|
+
}
|
55
|
+
#endif
|
56
|
+
|
57
|
+
#endif
|
@@ -0,0 +1,100 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
/*
|
8
|
+
* The following characters will not be escaped:
|
9
|
+
*
|
10
|
+
* -_.+!*'(),%#@?=;:/,+&$~ alphanum
|
11
|
+
*
|
12
|
+
* Note that this character set is the addition of:
|
13
|
+
*
|
14
|
+
* - The characters which are safe to be in an URL
|
15
|
+
* - The characters which are *not* safe to be in
|
16
|
+
* an URL because they are RESERVED characters.
|
17
|
+
*
|
18
|
+
* We assume (lazily) that any RESERVED char that
|
19
|
+
* appears inside an URL is actually meant to
|
20
|
+
* have its native function (i.e. as an URL
|
21
|
+
* component/separator) and hence needs no escaping.
|
22
|
+
*
|
23
|
+
* There are two exceptions: the chacters & (amp)
|
24
|
+
* and ' (single quote) do not appear in the table.
|
25
|
+
* They are meant to appear in the URL as components,
|
26
|
+
* yet they require special HTML-entity escaping
|
27
|
+
* to generate valid HTML markup.
|
28
|
+
*
|
29
|
+
* All other characters will be escaped to %XX.
|
30
|
+
*
|
31
|
+
*/
|
32
|
+
static const char HREF_SAFE[] = {
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
35
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
36
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
37
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
+
1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
};
|
45
|
+
|
46
|
+
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
47
|
+
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
48
|
+
bufsize_t i = 0, org;
|
49
|
+
uint8_t hex_str[3];
|
50
|
+
|
51
|
+
hex_str[0] = '%';
|
52
|
+
|
53
|
+
while (i < size) {
|
54
|
+
org = i;
|
55
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
56
|
+
i++;
|
57
|
+
|
58
|
+
if (likely(i > org))
|
59
|
+
cmark_strbuf_put(ob, src + org, i - org);
|
60
|
+
|
61
|
+
/* escaping */
|
62
|
+
if (i >= size)
|
63
|
+
break;
|
64
|
+
|
65
|
+
switch (src[i]) {
|
66
|
+
/* amp appears all the time in URLs, but needs
|
67
|
+
* HTML-entity escaping to be inside an href */
|
68
|
+
case '&':
|
69
|
+
cmark_strbuf_puts(ob, "&");
|
70
|
+
break;
|
71
|
+
|
72
|
+
/* the single quote is a valid URL character
|
73
|
+
* according to the standard; it needs HTML
|
74
|
+
* entity escaping too */
|
75
|
+
case '\'':
|
76
|
+
cmark_strbuf_puts(ob, "'");
|
77
|
+
break;
|
78
|
+
|
79
|
+
/* the space can be escaped to %20 or a plus
|
80
|
+
* sign. we're going with the generic escape
|
81
|
+
* for now. the plus thing is more commonly seen
|
82
|
+
* when building GET strings */
|
83
|
+
#if 0
|
84
|
+
case ' ':
|
85
|
+
cmark_strbuf_putc(ob, '+');
|
86
|
+
break;
|
87
|
+
#endif
|
88
|
+
|
89
|
+
/* every other character goes with a %XX escaping */
|
90
|
+
default:
|
91
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
92
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
93
|
+
cmark_strbuf_put(ob, hex_str, 3);
|
94
|
+
}
|
95
|
+
|
96
|
+
i++;
|
97
|
+
}
|
98
|
+
|
99
|
+
return 1;
|
100
|
+
}
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
/**
|
8
|
+
* According to the OWASP rules:
|
9
|
+
*
|
10
|
+
* & --> &
|
11
|
+
* < --> <
|
12
|
+
* > --> >
|
13
|
+
* " --> "
|
14
|
+
* ' --> ' ' is not recommended
|
15
|
+
* / --> / forward slash is included as it helps end an HTML entity
|
16
|
+
*
|
17
|
+
*/
|
18
|
+
static const char HTML_ESCAPE_TABLE[] = {
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
};
|
31
|
+
|
32
|
+
static const char *HTML_ESCAPES[] = {"", """, "&", "'",
|
33
|
+
"/", "<", ">"};
|
34
|
+
|
35
|
+
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
|
36
|
+
int secure) {
|
37
|
+
bufsize_t i = 0, org, esc = 0;
|
38
|
+
|
39
|
+
while (i < size) {
|
40
|
+
org = i;
|
41
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
42
|
+
i++;
|
43
|
+
|
44
|
+
if (i > org)
|
45
|
+
cmark_strbuf_put(ob, src + org, i - org);
|
46
|
+
|
47
|
+
/* escaping */
|
48
|
+
if (unlikely(i >= size))
|
49
|
+
break;
|
50
|
+
|
51
|
+
/* The forward slash and single quote are only escaped in secure mode */
|
52
|
+
if ((src[i] == '/' || src[i] == '\'') && !secure) {
|
53
|
+
cmark_strbuf_putc(ob, src[i]);
|
54
|
+
} else {
|
55
|
+
cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
|
56
|
+
}
|
57
|
+
|
58
|
+
i++;
|
59
|
+
}
|
60
|
+
|
61
|
+
return 1;
|
62
|
+
}
|
63
|
+
|
64
|
+
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
65
|
+
return houdini_escape_html0(ob, src, size, 1);
|
66
|
+
}
|
@@ -0,0 +1,149 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "buffer.h"
|
6
|
+
#include "houdini.h"
|
7
|
+
#include "utf8.h"
|
8
|
+
#include "entities.inc"
|
9
|
+
|
10
|
+
/* Binary tree lookup code for entities added by JGM */
|
11
|
+
|
12
|
+
static const unsigned char *S_lookup(int i, int low, int hi,
|
13
|
+
const unsigned char *s, int len) {
|
14
|
+
int j;
|
15
|
+
int cmp =
|
16
|
+
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
|
17
|
+
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
|
18
|
+
return (const unsigned char *)cmark_entities[i].bytes;
|
19
|
+
} else if (cmp <= 0 && i > low) {
|
20
|
+
j = i - ((i - low) / 2);
|
21
|
+
if (j == i)
|
22
|
+
j -= 1;
|
23
|
+
return S_lookup(j, low, i - 1, s, len);
|
24
|
+
} else if (cmp > 0 && i < hi) {
|
25
|
+
j = i + ((hi - i) / 2);
|
26
|
+
if (j == i)
|
27
|
+
j += 1;
|
28
|
+
return S_lookup(j, i + 1, hi, s, len);
|
29
|
+
} else {
|
30
|
+
return NULL;
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
|
35
|
+
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
|
36
|
+
}
|
37
|
+
|
38
|
+
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
39
|
+
bufsize_t size) {
|
40
|
+
bufsize_t i = 0;
|
41
|
+
|
42
|
+
if (size >= 3 && src[0] == '#') {
|
43
|
+
int codepoint = 0;
|
44
|
+
int num_digits = 0;
|
45
|
+
|
46
|
+
if (_isdigit(src[1])) {
|
47
|
+
for (i = 1; i < size && _isdigit(src[i]); ++i) {
|
48
|
+
codepoint = (codepoint * 10) + (src[i] - '0');
|
49
|
+
|
50
|
+
if (codepoint >= 0x110000) {
|
51
|
+
// Keep counting digits but
|
52
|
+
// avoid integer overflow.
|
53
|
+
codepoint = 0x110000;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
num_digits = i - 1;
|
58
|
+
}
|
59
|
+
|
60
|
+
else if (src[1] == 'x' || src[1] == 'X') {
|
61
|
+
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
|
62
|
+
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
63
|
+
|
64
|
+
if (codepoint >= 0x110000) {
|
65
|
+
// Keep counting digits but
|
66
|
+
// avoid integer overflow.
|
67
|
+
codepoint = 0x110000;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
num_digits = i - 2;
|
72
|
+
}
|
73
|
+
|
74
|
+
if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
|
75
|
+
if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
|
76
|
+
codepoint >= 0x110000) {
|
77
|
+
codepoint = 0xFFFD;
|
78
|
+
}
|
79
|
+
cmark_utf8proc_encode_char(codepoint, ob);
|
80
|
+
return i + 1;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
|
84
|
+
else {
|
85
|
+
if (size > CMARK_ENTITY_MAX_LENGTH)
|
86
|
+
size = CMARK_ENTITY_MAX_LENGTH;
|
87
|
+
|
88
|
+
for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
|
89
|
+
if (src[i] == ' ')
|
90
|
+
break;
|
91
|
+
|
92
|
+
if (src[i] == ';') {
|
93
|
+
const unsigned char *entity = S_lookup_entity(src, i);
|
94
|
+
|
95
|
+
if (entity != NULL) {
|
96
|
+
cmark_strbuf_puts(ob, (const char *)entity);
|
97
|
+
return i + 1;
|
98
|
+
}
|
99
|
+
|
100
|
+
break;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
return 0;
|
106
|
+
}
|
107
|
+
|
108
|
+
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
109
|
+
bufsize_t size) {
|
110
|
+
bufsize_t i = 0, org, ent;
|
111
|
+
|
112
|
+
while (i < size) {
|
113
|
+
org = i;
|
114
|
+
while (i < size && src[i] != '&')
|
115
|
+
i++;
|
116
|
+
|
117
|
+
if (likely(i > org)) {
|
118
|
+
if (unlikely(org == 0)) {
|
119
|
+
if (i >= size)
|
120
|
+
return 0;
|
121
|
+
|
122
|
+
cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
123
|
+
}
|
124
|
+
|
125
|
+
cmark_strbuf_put(ob, src + org, i - org);
|
126
|
+
}
|
127
|
+
|
128
|
+
/* escaping */
|
129
|
+
if (i >= size)
|
130
|
+
break;
|
131
|
+
|
132
|
+
i++;
|
133
|
+
|
134
|
+
ent = houdini_unescape_ent(ob, src + i, size - i);
|
135
|
+
i += ent;
|
136
|
+
|
137
|
+
/* not really an entity */
|
138
|
+
if (ent == 0)
|
139
|
+
cmark_strbuf_putc(ob, '&');
|
140
|
+
}
|
141
|
+
|
142
|
+
return 1;
|
143
|
+
}
|
144
|
+
|
145
|
+
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
146
|
+
bufsize_t size) {
|
147
|
+
if (!houdini_unescape_html(ob, src, size))
|
148
|
+
cmark_strbuf_put(ob, src, size);
|
149
|
+
}
|