commonmarker 0.23.10 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +1182 -0
- data/Cargo.toml +7 -0
- data/README.md +217 -170
- data/ext/commonmarker/Cargo.toml +20 -0
- data/ext/commonmarker/extconf.rb +3 -6
- data/ext/commonmarker/src/lib.rs +103 -0
- data/ext/commonmarker/src/node.rs +1151 -0
- data/ext/commonmarker/src/options.rs +175 -0
- data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
- data/ext/commonmarker/src/plugins.rs +6 -0
- data/ext/commonmarker/src/utils.rs +8 -0
- data/lib/commonmarker/config.rb +92 -40
- data/lib/commonmarker/constants.rb +7 -0
- data/lib/commonmarker/extension.rb +14 -0
- data/lib/commonmarker/node/ast.rb +8 -0
- data/lib/commonmarker/node/inspect.rb +14 -4
- data/lib/commonmarker/node.rb +29 -47
- data/lib/commonmarker/renderer.rb +1 -127
- data/lib/commonmarker/utils.rb +22 -0
- data/lib/commonmarker/version.rb +2 -2
- data/lib/commonmarker.rb +27 -25
- metadata +38 -186
- data/Rakefile +0 -109
- data/bin/commonmarker +0 -118
- data/commonmarker.gemspec +0 -38
- data/ext/commonmarker/arena.c +0 -104
- data/ext/commonmarker/autolink.c +0 -508
- data/ext/commonmarker/autolink.h +0 -8
- data/ext/commonmarker/blocks.c +0 -1622
- data/ext/commonmarker/buffer.c +0 -278
- data/ext/commonmarker/buffer.h +0 -116
- data/ext/commonmarker/case_fold_switch.inc +0 -4327
- data/ext/commonmarker/chunk.h +0 -135
- data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
- data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
- data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
- data/ext/commonmarker/cmark-gfm.h +0 -833
- data/ext/commonmarker/cmark-gfm_export.h +0 -42
- data/ext/commonmarker/cmark-gfm_version.h +0 -7
- data/ext/commonmarker/cmark.c +0 -55
- data/ext/commonmarker/cmark_ctype.c +0 -44
- data/ext/commonmarker/cmark_ctype.h +0 -33
- data/ext/commonmarker/commonmark.c +0 -514
- data/ext/commonmarker/commonmarker.c +0 -1308
- data/ext/commonmarker/commonmarker.h +0 -16
- data/ext/commonmarker/config.h +0 -76
- data/ext/commonmarker/core-extensions.c +0 -27
- data/ext/commonmarker/entities.inc +0 -2138
- data/ext/commonmarker/ext_scanners.c +0 -879
- data/ext/commonmarker/ext_scanners.h +0 -24
- data/ext/commonmarker/footnotes.c +0 -63
- data/ext/commonmarker/footnotes.h +0 -27
- data/ext/commonmarker/houdini.h +0 -57
- data/ext/commonmarker/houdini_href_e.c +0 -100
- data/ext/commonmarker/houdini_html_e.c +0 -66
- data/ext/commonmarker/houdini_html_u.c +0 -149
- data/ext/commonmarker/html.c +0 -502
- data/ext/commonmarker/html.h +0 -27
- data/ext/commonmarker/inlines.c +0 -1788
- data/ext/commonmarker/inlines.h +0 -29
- data/ext/commonmarker/iterator.c +0 -159
- data/ext/commonmarker/iterator.h +0 -26
- data/ext/commonmarker/latex.c +0 -468
- data/ext/commonmarker/linked_list.c +0 -37
- data/ext/commonmarker/man.c +0 -274
- data/ext/commonmarker/map.c +0 -129
- data/ext/commonmarker/map.h +0 -44
- data/ext/commonmarker/node.c +0 -1045
- data/ext/commonmarker/node.h +0 -167
- data/ext/commonmarker/parser.h +0 -59
- data/ext/commonmarker/plaintext.c +0 -218
- data/ext/commonmarker/plugin.c +0 -36
- data/ext/commonmarker/plugin.h +0 -34
- data/ext/commonmarker/references.c +0 -43
- data/ext/commonmarker/references.h +0 -26
- data/ext/commonmarker/registry.c +0 -63
- data/ext/commonmarker/registry.h +0 -24
- data/ext/commonmarker/render.c +0 -213
- data/ext/commonmarker/render.h +0 -62
- data/ext/commonmarker/scanners.c +0 -14056
- data/ext/commonmarker/scanners.h +0 -70
- data/ext/commonmarker/scanners.re +0 -341
- data/ext/commonmarker/strikethrough.c +0 -167
- data/ext/commonmarker/strikethrough.h +0 -9
- data/ext/commonmarker/syntax_extension.c +0 -149
- data/ext/commonmarker/syntax_extension.h +0 -34
- data/ext/commonmarker/table.c +0 -917
- data/ext/commonmarker/table.h +0 -12
- data/ext/commonmarker/tagfilter.c +0 -60
- data/ext/commonmarker/tagfilter.h +0 -8
- data/ext/commonmarker/tasklist.c +0 -156
- data/ext/commonmarker/tasklist.h +0 -8
- data/ext/commonmarker/utf8.c +0 -317
- data/ext/commonmarker/utf8.h +0 -35
- data/ext/commonmarker/xml.c +0 -182
- data/lib/commonmarker/renderer/html_renderer.rb +0 -256
@@ -1,24 +0,0 @@
|
|
1
|
-
#include "chunk.h"
|
2
|
-
#include "cmark-gfm.h"
|
3
|
-
|
4
|
-
#ifdef __cplusplus
|
5
|
-
extern "C" {
|
6
|
-
#endif
|
7
|
-
|
8
|
-
bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
|
9
|
-
unsigned char *ptr, int len, bufsize_t offset);
|
10
|
-
bufsize_t _scan_table_start(const unsigned char *p);
|
11
|
-
bufsize_t _scan_table_cell(const unsigned char *p);
|
12
|
-
bufsize_t _scan_table_cell_end(const unsigned char *p);
|
13
|
-
bufsize_t _scan_table_row_end(const unsigned char *p);
|
14
|
-
bufsize_t _scan_tasklist(const unsigned char *p);
|
15
|
-
|
16
|
-
#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
|
17
|
-
#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
|
18
|
-
#define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
|
19
|
-
#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
|
20
|
-
#define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
|
21
|
-
|
22
|
-
#ifdef __cplusplus
|
23
|
-
}
|
24
|
-
#endif
|
@@ -1,63 +0,0 @@
|
|
1
|
-
#include "cmark-gfm.h"
|
2
|
-
#include "parser.h"
|
3
|
-
#include "footnotes.h"
|
4
|
-
#include "inlines.h"
|
5
|
-
#include "chunk.h"
|
6
|
-
|
7
|
-
static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
|
8
|
-
cmark_footnote *ref = (cmark_footnote *)_ref;
|
9
|
-
cmark_mem *mem = map->mem;
|
10
|
-
if (ref != NULL) {
|
11
|
-
mem->free(ref->entry.label);
|
12
|
-
if (ref->node)
|
13
|
-
cmark_node_free(ref->node);
|
14
|
-
mem->free(ref);
|
15
|
-
}
|
16
|
-
}
|
17
|
-
|
18
|
-
void cmark_footnote_create(cmark_map *map, cmark_node *node) {
|
19
|
-
cmark_footnote *ref;
|
20
|
-
unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
|
21
|
-
|
22
|
-
/* empty footnote name, or composed from only whitespace */
|
23
|
-
if (reflabel == NULL)
|
24
|
-
return;
|
25
|
-
|
26
|
-
assert(map->sorted == NULL);
|
27
|
-
|
28
|
-
ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
|
29
|
-
ref->entry.label = reflabel;
|
30
|
-
ref->node = node;
|
31
|
-
ref->entry.age = map->size;
|
32
|
-
ref->entry.next = map->refs;
|
33
|
-
|
34
|
-
map->refs = (cmark_map_entry *)ref;
|
35
|
-
map->size++;
|
36
|
-
}
|
37
|
-
|
38
|
-
cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
|
39
|
-
return cmark_map_new(mem, footnote_free);
|
40
|
-
}
|
41
|
-
|
42
|
-
// Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
|
43
|
-
// unlink all of the footnote nodes before freeing their memory.
|
44
|
-
//
|
45
|
-
// Sometimes, two (unused) footnote nodes can end up referencing each other,
|
46
|
-
// which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
|
47
|
-
// etc, can lead to a use-after-free error.
|
48
|
-
//
|
49
|
-
// Better to `unlink` every footnote node first, setting their next, prev, and
|
50
|
-
// parent pointers to NULL, and only then walk thru & free them up.
|
51
|
-
void cmark_unlink_footnotes_map(cmark_map *map) {
|
52
|
-
cmark_map_entry *ref;
|
53
|
-
cmark_map_entry *next;
|
54
|
-
|
55
|
-
ref = map->refs;
|
56
|
-
while(ref) {
|
57
|
-
next = ref->next;
|
58
|
-
if (((cmark_footnote *)ref)->node) {
|
59
|
-
cmark_node_unlink(((cmark_footnote *)ref)->node);
|
60
|
-
}
|
61
|
-
ref = next;
|
62
|
-
}
|
63
|
-
}
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#ifndef CMARK_FOOTNOTES_H
|
2
|
-
#define CMARK_FOOTNOTES_H
|
3
|
-
|
4
|
-
#include "map.h"
|
5
|
-
|
6
|
-
#ifdef __cplusplus
|
7
|
-
extern "C" {
|
8
|
-
#endif
|
9
|
-
|
10
|
-
struct cmark_footnote {
|
11
|
-
cmark_map_entry entry;
|
12
|
-
cmark_node *node;
|
13
|
-
unsigned int ix;
|
14
|
-
};
|
15
|
-
|
16
|
-
typedef struct cmark_footnote cmark_footnote;
|
17
|
-
|
18
|
-
void cmark_footnote_create(cmark_map *map, cmark_node *node);
|
19
|
-
cmark_map *cmark_footnote_map_new(cmark_mem *mem);
|
20
|
-
|
21
|
-
void cmark_unlink_footnotes_map(cmark_map *map);
|
22
|
-
|
23
|
-
#ifdef __cplusplus
|
24
|
-
}
|
25
|
-
#endif
|
26
|
-
|
27
|
-
#endif
|
data/ext/commonmarker/houdini.h
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
#ifndef CMARK_HOUDINI_H
|
2
|
-
#define CMARK_HOUDINI_H
|
3
|
-
|
4
|
-
#ifdef __cplusplus
|
5
|
-
extern "C" {
|
6
|
-
#endif
|
7
|
-
|
8
|
-
#include <stdint.h>
|
9
|
-
#include "config.h"
|
10
|
-
#include "buffer.h"
|
11
|
-
|
12
|
-
#ifdef HAVE___BUILTIN_EXPECT
|
13
|
-
#define likely(x) __builtin_expect((x), 1)
|
14
|
-
#define unlikely(x) __builtin_expect((x), 0)
|
15
|
-
#else
|
16
|
-
#define likely(x) (x)
|
17
|
-
#define unlikely(x) (x)
|
18
|
-
#endif
|
19
|
-
|
20
|
-
#ifdef HOUDINI_USE_LOCALE
|
21
|
-
#define _isxdigit(c) isxdigit(c)
|
22
|
-
#define _isdigit(c) isdigit(c)
|
23
|
-
#else
|
24
|
-
/*
|
25
|
-
* Helper _isdigit methods -- do not trust the current locale
|
26
|
-
* */
|
27
|
-
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
28
|
-
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
29
|
-
#endif
|
30
|
-
|
31
|
-
#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
|
32
|
-
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
33
|
-
|
34
|
-
CMARK_GFM_EXPORT
|
35
|
-
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
36
|
-
bufsize_t size);
|
37
|
-
CMARK_GFM_EXPORT
|
38
|
-
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
|
39
|
-
bufsize_t size);
|
40
|
-
CMARK_GFM_EXPORT
|
41
|
-
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
|
42
|
-
bufsize_t size, int secure);
|
43
|
-
CMARK_GFM_EXPORT
|
44
|
-
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
45
|
-
bufsize_t size);
|
46
|
-
CMARK_GFM_EXPORT
|
47
|
-
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
48
|
-
bufsize_t size);
|
49
|
-
CMARK_GFM_EXPORT
|
50
|
-
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
|
51
|
-
bufsize_t size);
|
52
|
-
|
53
|
-
#ifdef __cplusplus
|
54
|
-
}
|
55
|
-
#endif
|
56
|
-
|
57
|
-
#endif
|
@@ -1,100 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "houdini.h"
|
6
|
-
|
7
|
-
/*
|
8
|
-
* The following characters will not be escaped:
|
9
|
-
*
|
10
|
-
* -_.+!*'(),%#@?=;:/,+&$~ alphanum
|
11
|
-
*
|
12
|
-
* Note that this character set is the addition of:
|
13
|
-
*
|
14
|
-
* - The characters which are safe to be in an URL
|
15
|
-
* - The characters which are *not* safe to be in
|
16
|
-
* an URL because they are RESERVED characters.
|
17
|
-
*
|
18
|
-
* We assume (lazily) that any RESERVED char that
|
19
|
-
* appears inside an URL is actually meant to
|
20
|
-
* have its native function (i.e. as an URL
|
21
|
-
* component/separator) and hence needs no escaping.
|
22
|
-
*
|
23
|
-
* There are two exceptions: the chacters & (amp)
|
24
|
-
* and ' (single quote) do not appear in the table.
|
25
|
-
* They are meant to appear in the URL as components,
|
26
|
-
* yet they require special HTML-entity escaping
|
27
|
-
* to generate valid HTML markup.
|
28
|
-
*
|
29
|
-
* All other characters will be escaped to %XX.
|
30
|
-
*
|
31
|
-
*/
|
32
|
-
static const char HREF_SAFE[] = {
|
33
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
35
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
36
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
37
|
-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
-
1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
-
};
|
45
|
-
|
46
|
-
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
47
|
-
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
48
|
-
bufsize_t i = 0, org;
|
49
|
-
uint8_t hex_str[3];
|
50
|
-
|
51
|
-
hex_str[0] = '%';
|
52
|
-
|
53
|
-
while (i < size) {
|
54
|
-
org = i;
|
55
|
-
while (i < size && HREF_SAFE[src[i]] != 0)
|
56
|
-
i++;
|
57
|
-
|
58
|
-
if (likely(i > org))
|
59
|
-
cmark_strbuf_put(ob, src + org, i - org);
|
60
|
-
|
61
|
-
/* escaping */
|
62
|
-
if (i >= size)
|
63
|
-
break;
|
64
|
-
|
65
|
-
switch (src[i]) {
|
66
|
-
/* amp appears all the time in URLs, but needs
|
67
|
-
* HTML-entity escaping to be inside an href */
|
68
|
-
case '&':
|
69
|
-
cmark_strbuf_puts(ob, "&");
|
70
|
-
break;
|
71
|
-
|
72
|
-
/* the single quote is a valid URL character
|
73
|
-
* according to the standard; it needs HTML
|
74
|
-
* entity escaping too */
|
75
|
-
case '\'':
|
76
|
-
cmark_strbuf_puts(ob, "'");
|
77
|
-
break;
|
78
|
-
|
79
|
-
/* the space can be escaped to %20 or a plus
|
80
|
-
* sign. we're going with the generic escape
|
81
|
-
* for now. the plus thing is more commonly seen
|
82
|
-
* when building GET strings */
|
83
|
-
#if 0
|
84
|
-
case ' ':
|
85
|
-
cmark_strbuf_putc(ob, '+');
|
86
|
-
break;
|
87
|
-
#endif
|
88
|
-
|
89
|
-
/* every other character goes with a %XX escaping */
|
90
|
-
default:
|
91
|
-
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
92
|
-
hex_str[2] = hex_chars[src[i] & 0xF];
|
93
|
-
cmark_strbuf_put(ob, hex_str, 3);
|
94
|
-
}
|
95
|
-
|
96
|
-
i++;
|
97
|
-
}
|
98
|
-
|
99
|
-
return 1;
|
100
|
-
}
|
@@ -1,66 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "houdini.h"
|
6
|
-
|
7
|
-
/**
|
8
|
-
* According to the OWASP rules:
|
9
|
-
*
|
10
|
-
* & --> &
|
11
|
-
* < --> <
|
12
|
-
* > --> >
|
13
|
-
* " --> "
|
14
|
-
* ' --> ' ' is not recommended
|
15
|
-
* / --> / forward slash is included as it helps end an HTML entity
|
16
|
-
*
|
17
|
-
*/
|
18
|
-
static const char HTML_ESCAPE_TABLE[] = {
|
19
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
21
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
-
};
|
31
|
-
|
32
|
-
static const char *HTML_ESCAPES[] = {"", """, "&", "'",
|
33
|
-
"/", "<", ">"};
|
34
|
-
|
35
|
-
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
|
36
|
-
int secure) {
|
37
|
-
bufsize_t i = 0, org, esc = 0;
|
38
|
-
|
39
|
-
while (i < size) {
|
40
|
-
org = i;
|
41
|
-
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
42
|
-
i++;
|
43
|
-
|
44
|
-
if (i > org)
|
45
|
-
cmark_strbuf_put(ob, src + org, i - org);
|
46
|
-
|
47
|
-
/* escaping */
|
48
|
-
if (unlikely(i >= size))
|
49
|
-
break;
|
50
|
-
|
51
|
-
/* The forward slash and single quote are only escaped in secure mode */
|
52
|
-
if ((src[i] == '/' || src[i] == '\'') && !secure) {
|
53
|
-
cmark_strbuf_putc(ob, src[i]);
|
54
|
-
} else {
|
55
|
-
cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
|
56
|
-
}
|
57
|
-
|
58
|
-
i++;
|
59
|
-
}
|
60
|
-
|
61
|
-
return 1;
|
62
|
-
}
|
63
|
-
|
64
|
-
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
65
|
-
return houdini_escape_html0(ob, src, size, 1);
|
66
|
-
}
|
@@ -1,149 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "buffer.h"
|
6
|
-
#include "houdini.h"
|
7
|
-
#include "utf8.h"
|
8
|
-
#include "entities.inc"
|
9
|
-
|
10
|
-
/* Binary tree lookup code for entities added by JGM */
|
11
|
-
|
12
|
-
static const unsigned char *S_lookup(int i, int low, int hi,
|
13
|
-
const unsigned char *s, int len) {
|
14
|
-
int j;
|
15
|
-
int cmp =
|
16
|
-
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
|
17
|
-
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
|
18
|
-
return (const unsigned char *)cmark_entities[i].bytes;
|
19
|
-
} else if (cmp <= 0 && i > low) {
|
20
|
-
j = i - ((i - low) / 2);
|
21
|
-
if (j == i)
|
22
|
-
j -= 1;
|
23
|
-
return S_lookup(j, low, i - 1, s, len);
|
24
|
-
} else if (cmp > 0 && i < hi) {
|
25
|
-
j = i + ((hi - i) / 2);
|
26
|
-
if (j == i)
|
27
|
-
j += 1;
|
28
|
-
return S_lookup(j, i + 1, hi, s, len);
|
29
|
-
} else {
|
30
|
-
return NULL;
|
31
|
-
}
|
32
|
-
}
|
33
|
-
|
34
|
-
static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
|
35
|
-
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
|
36
|
-
}
|
37
|
-
|
38
|
-
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
39
|
-
bufsize_t size) {
|
40
|
-
bufsize_t i = 0;
|
41
|
-
|
42
|
-
if (size >= 3 && src[0] == '#') {
|
43
|
-
int codepoint = 0;
|
44
|
-
int num_digits = 0;
|
45
|
-
|
46
|
-
if (_isdigit(src[1])) {
|
47
|
-
for (i = 1; i < size && _isdigit(src[i]); ++i) {
|
48
|
-
codepoint = (codepoint * 10) + (src[i] - '0');
|
49
|
-
|
50
|
-
if (codepoint >= 0x110000) {
|
51
|
-
// Keep counting digits but
|
52
|
-
// avoid integer overflow.
|
53
|
-
codepoint = 0x110000;
|
54
|
-
}
|
55
|
-
}
|
56
|
-
|
57
|
-
num_digits = i - 1;
|
58
|
-
}
|
59
|
-
|
60
|
-
else if (src[1] == 'x' || src[1] == 'X') {
|
61
|
-
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
|
62
|
-
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
63
|
-
|
64
|
-
if (codepoint >= 0x110000) {
|
65
|
-
// Keep counting digits but
|
66
|
-
// avoid integer overflow.
|
67
|
-
codepoint = 0x110000;
|
68
|
-
}
|
69
|
-
}
|
70
|
-
|
71
|
-
num_digits = i - 2;
|
72
|
-
}
|
73
|
-
|
74
|
-
if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
|
75
|
-
if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
|
76
|
-
codepoint >= 0x110000) {
|
77
|
-
codepoint = 0xFFFD;
|
78
|
-
}
|
79
|
-
cmark_utf8proc_encode_char(codepoint, ob);
|
80
|
-
return i + 1;
|
81
|
-
}
|
82
|
-
}
|
83
|
-
|
84
|
-
else {
|
85
|
-
if (size > CMARK_ENTITY_MAX_LENGTH)
|
86
|
-
size = CMARK_ENTITY_MAX_LENGTH;
|
87
|
-
|
88
|
-
for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
|
89
|
-
if (src[i] == ' ')
|
90
|
-
break;
|
91
|
-
|
92
|
-
if (src[i] == ';') {
|
93
|
-
const unsigned char *entity = S_lookup_entity(src, i);
|
94
|
-
|
95
|
-
if (entity != NULL) {
|
96
|
-
cmark_strbuf_puts(ob, (const char *)entity);
|
97
|
-
return i + 1;
|
98
|
-
}
|
99
|
-
|
100
|
-
break;
|
101
|
-
}
|
102
|
-
}
|
103
|
-
}
|
104
|
-
|
105
|
-
return 0;
|
106
|
-
}
|
107
|
-
|
108
|
-
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
109
|
-
bufsize_t size) {
|
110
|
-
bufsize_t i = 0, org, ent;
|
111
|
-
|
112
|
-
while (i < size) {
|
113
|
-
org = i;
|
114
|
-
while (i < size && src[i] != '&')
|
115
|
-
i++;
|
116
|
-
|
117
|
-
if (likely(i > org)) {
|
118
|
-
if (unlikely(org == 0)) {
|
119
|
-
if (i >= size)
|
120
|
-
return 0;
|
121
|
-
|
122
|
-
cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
123
|
-
}
|
124
|
-
|
125
|
-
cmark_strbuf_put(ob, src + org, i - org);
|
126
|
-
}
|
127
|
-
|
128
|
-
/* escaping */
|
129
|
-
if (i >= size)
|
130
|
-
break;
|
131
|
-
|
132
|
-
i++;
|
133
|
-
|
134
|
-
ent = houdini_unescape_ent(ob, src + i, size - i);
|
135
|
-
i += ent;
|
136
|
-
|
137
|
-
/* not really an entity */
|
138
|
-
if (ent == 0)
|
139
|
-
cmark_strbuf_putc(ob, '&');
|
140
|
-
}
|
141
|
-
|
142
|
-
return 1;
|
143
|
-
}
|
144
|
-
|
145
|
-
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
146
|
-
bufsize_t size) {
|
147
|
-
if (!houdini_unescape_html(ob, src, size))
|
148
|
-
cmark_strbuf_put(ob, src, size);
|
149
|
-
}
|