commonmarker 0.23.6 → 1.0.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +70 -212
- data/commonmarker.gemspec +34 -31
- data/ext/commonmarker/Cargo.toml +12 -0
- data/ext/commonmarker/_util.rb +102 -0
- data/ext/commonmarker/extconf.rb +4 -5
- data/ext/commonmarker/src/comrak_options.rs +107 -0
- data/ext/commonmarker/src/lib.rs +27 -0
- data/lib/commonmarker/config.rb +58 -37
- data/lib/commonmarker/extension.rb +14 -0
- data/lib/commonmarker/renderer.rb +1 -127
- data/lib/commonmarker/version.rb +2 -2
- data/lib/commonmarker.rb +19 -32
- metadata +33 -177
- data/Rakefile +0 -109
- data/bin/commonmarker +0 -118
- data/ext/commonmarker/arena.c +0 -103
- data/ext/commonmarker/autolink.c +0 -456
- data/ext/commonmarker/autolink.h +0 -8
- data/ext/commonmarker/blocks.c +0 -1596
- data/ext/commonmarker/buffer.c +0 -278
- data/ext/commonmarker/buffer.h +0 -116
- data/ext/commonmarker/case_fold_switch.inc +0 -4327
- data/ext/commonmarker/chunk.h +0 -135
- data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
- data/ext/commonmarker/cmark-gfm-extension_api.h +0 -736
- data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
- data/ext/commonmarker/cmark-gfm.h +0 -817
- data/ext/commonmarker/cmark-gfm_export.h +0 -42
- data/ext/commonmarker/cmark-gfm_version.h +0 -7
- data/ext/commonmarker/cmark.c +0 -55
- data/ext/commonmarker/cmark_ctype.c +0 -44
- data/ext/commonmarker/cmark_ctype.h +0 -33
- data/ext/commonmarker/commonmark.c +0 -529
- data/ext/commonmarker/commonmarker.c +0 -1307
- data/ext/commonmarker/commonmarker.h +0 -16
- data/ext/commonmarker/config.h +0 -76
- data/ext/commonmarker/core-extensions.c +0 -27
- data/ext/commonmarker/entities.inc +0 -2138
- data/ext/commonmarker/ext_scanners.c +0 -879
- data/ext/commonmarker/ext_scanners.h +0 -24
- data/ext/commonmarker/footnotes.c +0 -63
- data/ext/commonmarker/footnotes.h +0 -27
- data/ext/commonmarker/houdini.h +0 -57
- data/ext/commonmarker/houdini_href_e.c +0 -100
- data/ext/commonmarker/houdini_html_e.c +0 -66
- data/ext/commonmarker/houdini_html_u.c +0 -149
- data/ext/commonmarker/html.c +0 -486
- data/ext/commonmarker/html.h +0 -27
- data/ext/commonmarker/inlines.c +0 -1716
- data/ext/commonmarker/inlines.h +0 -29
- data/ext/commonmarker/iterator.c +0 -159
- data/ext/commonmarker/iterator.h +0 -26
- data/ext/commonmarker/latex.c +0 -466
- data/ext/commonmarker/linked_list.c +0 -37
- data/ext/commonmarker/man.c +0 -278
- data/ext/commonmarker/map.c +0 -122
- data/ext/commonmarker/map.h +0 -41
- data/ext/commonmarker/node.c +0 -979
- data/ext/commonmarker/node.h +0 -125
- data/ext/commonmarker/parser.h +0 -58
- data/ext/commonmarker/plaintext.c +0 -235
- data/ext/commonmarker/plugin.c +0 -36
- data/ext/commonmarker/plugin.h +0 -34
- data/ext/commonmarker/references.c +0 -42
- data/ext/commonmarker/references.h +0 -26
- data/ext/commonmarker/registry.c +0 -63
- data/ext/commonmarker/registry.h +0 -24
- data/ext/commonmarker/render.c +0 -205
- data/ext/commonmarker/render.h +0 -62
- data/ext/commonmarker/scanners.c +0 -10508
- data/ext/commonmarker/scanners.h +0 -62
- data/ext/commonmarker/scanners.re +0 -341
- data/ext/commonmarker/strikethrough.c +0 -167
- data/ext/commonmarker/strikethrough.h +0 -9
- data/ext/commonmarker/syntax_extension.c +0 -149
- data/ext/commonmarker/syntax_extension.h +0 -34
- data/ext/commonmarker/table.c +0 -848
- data/ext/commonmarker/table.h +0 -12
- data/ext/commonmarker/tagfilter.c +0 -60
- data/ext/commonmarker/tagfilter.h +0 -8
- data/ext/commonmarker/tasklist.c +0 -156
- data/ext/commonmarker/tasklist.h +0 -8
- data/ext/commonmarker/utf8.c +0 -317
- data/ext/commonmarker/utf8.h +0 -35
- data/ext/commonmarker/xml.c +0 -181
- data/lib/commonmarker/node/inspect.rb +0 -47
- data/lib/commonmarker/node.rb +0 -83
- data/lib/commonmarker/renderer/html_renderer.rb +0 -252
@@ -1,24 +0,0 @@
|
|
1
|
-
#include "chunk.h"
|
2
|
-
#include "cmark-gfm.h"
|
3
|
-
|
4
|
-
#ifdef __cplusplus
|
5
|
-
extern "C" {
|
6
|
-
#endif
|
7
|
-
|
8
|
-
bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
|
9
|
-
unsigned char *ptr, int len, bufsize_t offset);
|
10
|
-
bufsize_t _scan_table_start(const unsigned char *p);
|
11
|
-
bufsize_t _scan_table_cell(const unsigned char *p);
|
12
|
-
bufsize_t _scan_table_cell_end(const unsigned char *p);
|
13
|
-
bufsize_t _scan_table_row_end(const unsigned char *p);
|
14
|
-
bufsize_t _scan_tasklist(const unsigned char *p);
|
15
|
-
|
16
|
-
#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
|
17
|
-
#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
|
18
|
-
#define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
|
19
|
-
#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
|
20
|
-
#define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
|
21
|
-
|
22
|
-
#ifdef __cplusplus
|
23
|
-
}
|
24
|
-
#endif
|
@@ -1,63 +0,0 @@
|
|
1
|
-
#include "cmark-gfm.h"
|
2
|
-
#include "parser.h"
|
3
|
-
#include "footnotes.h"
|
4
|
-
#include "inlines.h"
|
5
|
-
#include "chunk.h"
|
6
|
-
|
7
|
-
static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
|
8
|
-
cmark_footnote *ref = (cmark_footnote *)_ref;
|
9
|
-
cmark_mem *mem = map->mem;
|
10
|
-
if (ref != NULL) {
|
11
|
-
mem->free(ref->entry.label);
|
12
|
-
if (ref->node)
|
13
|
-
cmark_node_free(ref->node);
|
14
|
-
mem->free(ref);
|
15
|
-
}
|
16
|
-
}
|
17
|
-
|
18
|
-
void cmark_footnote_create(cmark_map *map, cmark_node *node) {
|
19
|
-
cmark_footnote *ref;
|
20
|
-
unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
|
21
|
-
|
22
|
-
/* empty footnote name, or composed from only whitespace */
|
23
|
-
if (reflabel == NULL)
|
24
|
-
return;
|
25
|
-
|
26
|
-
assert(map->sorted == NULL);
|
27
|
-
|
28
|
-
ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
|
29
|
-
ref->entry.label = reflabel;
|
30
|
-
ref->node = node;
|
31
|
-
ref->entry.age = map->size;
|
32
|
-
ref->entry.next = map->refs;
|
33
|
-
|
34
|
-
map->refs = (cmark_map_entry *)ref;
|
35
|
-
map->size++;
|
36
|
-
}
|
37
|
-
|
38
|
-
cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
|
39
|
-
return cmark_map_new(mem, footnote_free);
|
40
|
-
}
|
41
|
-
|
42
|
-
// Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
|
43
|
-
// unlink all of the footnote nodes before freeing their memory.
|
44
|
-
//
|
45
|
-
// Sometimes, two (unused) footnote nodes can end up referencing each other,
|
46
|
-
// which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
|
47
|
-
// etc, can lead to a use-after-free error.
|
48
|
-
//
|
49
|
-
// Better to `unlink` every footnote node first, setting their next, prev, and
|
50
|
-
// parent pointers to NULL, and only then walk thru & free them up.
|
51
|
-
void cmark_unlink_footnotes_map(cmark_map *map) {
|
52
|
-
cmark_map_entry *ref;
|
53
|
-
cmark_map_entry *next;
|
54
|
-
|
55
|
-
ref = map->refs;
|
56
|
-
while(ref) {
|
57
|
-
next = ref->next;
|
58
|
-
if (((cmark_footnote *)ref)->node) {
|
59
|
-
cmark_node_unlink(((cmark_footnote *)ref)->node);
|
60
|
-
}
|
61
|
-
ref = next;
|
62
|
-
}
|
63
|
-
}
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#ifndef CMARK_FOOTNOTES_H
|
2
|
-
#define CMARK_FOOTNOTES_H
|
3
|
-
|
4
|
-
#include "map.h"
|
5
|
-
|
6
|
-
#ifdef __cplusplus
|
7
|
-
extern "C" {
|
8
|
-
#endif
|
9
|
-
|
10
|
-
struct cmark_footnote {
|
11
|
-
cmark_map_entry entry;
|
12
|
-
cmark_node *node;
|
13
|
-
unsigned int ix;
|
14
|
-
};
|
15
|
-
|
16
|
-
typedef struct cmark_footnote cmark_footnote;
|
17
|
-
|
18
|
-
void cmark_footnote_create(cmark_map *map, cmark_node *node);
|
19
|
-
cmark_map *cmark_footnote_map_new(cmark_mem *mem);
|
20
|
-
|
21
|
-
void cmark_unlink_footnotes_map(cmark_map *map);
|
22
|
-
|
23
|
-
#ifdef __cplusplus
|
24
|
-
}
|
25
|
-
#endif
|
26
|
-
|
27
|
-
#endif
|
data/ext/commonmarker/houdini.h
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
#ifndef CMARK_HOUDINI_H
|
2
|
-
#define CMARK_HOUDINI_H
|
3
|
-
|
4
|
-
#ifdef __cplusplus
|
5
|
-
extern "C" {
|
6
|
-
#endif
|
7
|
-
|
8
|
-
#include <stdint.h>
|
9
|
-
#include "config.h"
|
10
|
-
#include "buffer.h"
|
11
|
-
|
12
|
-
#ifdef HAVE___BUILTIN_EXPECT
|
13
|
-
#define likely(x) __builtin_expect((x), 1)
|
14
|
-
#define unlikely(x) __builtin_expect((x), 0)
|
15
|
-
#else
|
16
|
-
#define likely(x) (x)
|
17
|
-
#define unlikely(x) (x)
|
18
|
-
#endif
|
19
|
-
|
20
|
-
#ifdef HOUDINI_USE_LOCALE
|
21
|
-
#define _isxdigit(c) isxdigit(c)
|
22
|
-
#define _isdigit(c) isdigit(c)
|
23
|
-
#else
|
24
|
-
/*
|
25
|
-
* Helper _isdigit methods -- do not trust the current locale
|
26
|
-
* */
|
27
|
-
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
28
|
-
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
29
|
-
#endif
|
30
|
-
|
31
|
-
#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
|
32
|
-
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
33
|
-
|
34
|
-
CMARK_GFM_EXPORT
|
35
|
-
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
36
|
-
bufsize_t size);
|
37
|
-
CMARK_GFM_EXPORT
|
38
|
-
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
|
39
|
-
bufsize_t size);
|
40
|
-
CMARK_GFM_EXPORT
|
41
|
-
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
|
42
|
-
bufsize_t size, int secure);
|
43
|
-
CMARK_GFM_EXPORT
|
44
|
-
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
45
|
-
bufsize_t size);
|
46
|
-
CMARK_GFM_EXPORT
|
47
|
-
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
48
|
-
bufsize_t size);
|
49
|
-
CMARK_GFM_EXPORT
|
50
|
-
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
|
51
|
-
bufsize_t size);
|
52
|
-
|
53
|
-
#ifdef __cplusplus
|
54
|
-
}
|
55
|
-
#endif
|
56
|
-
|
57
|
-
#endif
|
@@ -1,100 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "houdini.h"
|
6
|
-
|
7
|
-
/*
|
8
|
-
* The following characters will not be escaped:
|
9
|
-
*
|
10
|
-
* -_.+!*'(),%#@?=;:/,+&$~ alphanum
|
11
|
-
*
|
12
|
-
* Note that this character set is the addition of:
|
13
|
-
*
|
14
|
-
* - The characters which are safe to be in an URL
|
15
|
-
* - The characters which are *not* safe to be in
|
16
|
-
* an URL because they are RESERVED characters.
|
17
|
-
*
|
18
|
-
* We assume (lazily) that any RESERVED char that
|
19
|
-
* appears inside an URL is actually meant to
|
20
|
-
* have its native function (i.e. as an URL
|
21
|
-
* component/separator) and hence needs no escaping.
|
22
|
-
*
|
23
|
-
* There are two exceptions: the chacters & (amp)
|
24
|
-
* and ' (single quote) do not appear in the table.
|
25
|
-
* They are meant to appear in the URL as components,
|
26
|
-
* yet they require special HTML-entity escaping
|
27
|
-
* to generate valid HTML markup.
|
28
|
-
*
|
29
|
-
* All other characters will be escaped to %XX.
|
30
|
-
*
|
31
|
-
*/
|
32
|
-
static const char HREF_SAFE[] = {
|
33
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
35
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
36
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
37
|
-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
-
1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
-
};
|
45
|
-
|
46
|
-
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
47
|
-
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
48
|
-
bufsize_t i = 0, org;
|
49
|
-
uint8_t hex_str[3];
|
50
|
-
|
51
|
-
hex_str[0] = '%';
|
52
|
-
|
53
|
-
while (i < size) {
|
54
|
-
org = i;
|
55
|
-
while (i < size && HREF_SAFE[src[i]] != 0)
|
56
|
-
i++;
|
57
|
-
|
58
|
-
if (likely(i > org))
|
59
|
-
cmark_strbuf_put(ob, src + org, i - org);
|
60
|
-
|
61
|
-
/* escaping */
|
62
|
-
if (i >= size)
|
63
|
-
break;
|
64
|
-
|
65
|
-
switch (src[i]) {
|
66
|
-
/* amp appears all the time in URLs, but needs
|
67
|
-
* HTML-entity escaping to be inside an href */
|
68
|
-
case '&':
|
69
|
-
cmark_strbuf_puts(ob, "&");
|
70
|
-
break;
|
71
|
-
|
72
|
-
/* the single quote is a valid URL character
|
73
|
-
* according to the standard; it needs HTML
|
74
|
-
* entity escaping too */
|
75
|
-
case '\'':
|
76
|
-
cmark_strbuf_puts(ob, "'");
|
77
|
-
break;
|
78
|
-
|
79
|
-
/* the space can be escaped to %20 or a plus
|
80
|
-
* sign. we're going with the generic escape
|
81
|
-
* for now. the plus thing is more commonly seen
|
82
|
-
* when building GET strings */
|
83
|
-
#if 0
|
84
|
-
case ' ':
|
85
|
-
cmark_strbuf_putc(ob, '+');
|
86
|
-
break;
|
87
|
-
#endif
|
88
|
-
|
89
|
-
/* every other character goes with a %XX escaping */
|
90
|
-
default:
|
91
|
-
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
92
|
-
hex_str[2] = hex_chars[src[i] & 0xF];
|
93
|
-
cmark_strbuf_put(ob, hex_str, 3);
|
94
|
-
}
|
95
|
-
|
96
|
-
i++;
|
97
|
-
}
|
98
|
-
|
99
|
-
return 1;
|
100
|
-
}
|
@@ -1,66 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "houdini.h"
|
6
|
-
|
7
|
-
/**
|
8
|
-
* According to the OWASP rules:
|
9
|
-
*
|
10
|
-
* & --> &
|
11
|
-
* < --> <
|
12
|
-
* > --> >
|
13
|
-
* " --> "
|
14
|
-
* ' --> ' ' is not recommended
|
15
|
-
* / --> / forward slash is included as it helps end an HTML entity
|
16
|
-
*
|
17
|
-
*/
|
18
|
-
static const char HTML_ESCAPE_TABLE[] = {
|
19
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
21
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
-
};
|
31
|
-
|
32
|
-
static const char *HTML_ESCAPES[] = {"", """, "&", "'",
|
33
|
-
"/", "<", ">"};
|
34
|
-
|
35
|
-
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
|
36
|
-
int secure) {
|
37
|
-
bufsize_t i = 0, org, esc = 0;
|
38
|
-
|
39
|
-
while (i < size) {
|
40
|
-
org = i;
|
41
|
-
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
42
|
-
i++;
|
43
|
-
|
44
|
-
if (i > org)
|
45
|
-
cmark_strbuf_put(ob, src + org, i - org);
|
46
|
-
|
47
|
-
/* escaping */
|
48
|
-
if (unlikely(i >= size))
|
49
|
-
break;
|
50
|
-
|
51
|
-
/* The forward slash and single quote are only escaped in secure mode */
|
52
|
-
if ((src[i] == '/' || src[i] == '\'') && !secure) {
|
53
|
-
cmark_strbuf_putc(ob, src[i]);
|
54
|
-
} else {
|
55
|
-
cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
|
56
|
-
}
|
57
|
-
|
58
|
-
i++;
|
59
|
-
}
|
60
|
-
|
61
|
-
return 1;
|
62
|
-
}
|
63
|
-
|
64
|
-
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
65
|
-
return houdini_escape_html0(ob, src, size, 1);
|
66
|
-
}
|
@@ -1,149 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "buffer.h"
|
6
|
-
#include "houdini.h"
|
7
|
-
#include "utf8.h"
|
8
|
-
#include "entities.inc"
|
9
|
-
|
10
|
-
/* Binary tree lookup code for entities added by JGM */
|
11
|
-
|
12
|
-
static const unsigned char *S_lookup(int i, int low, int hi,
|
13
|
-
const unsigned char *s, int len) {
|
14
|
-
int j;
|
15
|
-
int cmp =
|
16
|
-
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
|
17
|
-
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
|
18
|
-
return (const unsigned char *)cmark_entities[i].bytes;
|
19
|
-
} else if (cmp <= 0 && i > low) {
|
20
|
-
j = i - ((i - low) / 2);
|
21
|
-
if (j == i)
|
22
|
-
j -= 1;
|
23
|
-
return S_lookup(j, low, i - 1, s, len);
|
24
|
-
} else if (cmp > 0 && i < hi) {
|
25
|
-
j = i + ((hi - i) / 2);
|
26
|
-
if (j == i)
|
27
|
-
j += 1;
|
28
|
-
return S_lookup(j, i + 1, hi, s, len);
|
29
|
-
} else {
|
30
|
-
return NULL;
|
31
|
-
}
|
32
|
-
}
|
33
|
-
|
34
|
-
static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
|
35
|
-
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
|
36
|
-
}
|
37
|
-
|
38
|
-
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
39
|
-
bufsize_t size) {
|
40
|
-
bufsize_t i = 0;
|
41
|
-
|
42
|
-
if (size >= 3 && src[0] == '#') {
|
43
|
-
int codepoint = 0;
|
44
|
-
int num_digits = 0;
|
45
|
-
|
46
|
-
if (_isdigit(src[1])) {
|
47
|
-
for (i = 1; i < size && _isdigit(src[i]); ++i) {
|
48
|
-
codepoint = (codepoint * 10) + (src[i] - '0');
|
49
|
-
|
50
|
-
if (codepoint >= 0x110000) {
|
51
|
-
// Keep counting digits but
|
52
|
-
// avoid integer overflow.
|
53
|
-
codepoint = 0x110000;
|
54
|
-
}
|
55
|
-
}
|
56
|
-
|
57
|
-
num_digits = i - 1;
|
58
|
-
}
|
59
|
-
|
60
|
-
else if (src[1] == 'x' || src[1] == 'X') {
|
61
|
-
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
|
62
|
-
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
63
|
-
|
64
|
-
if (codepoint >= 0x110000) {
|
65
|
-
// Keep counting digits but
|
66
|
-
// avoid integer overflow.
|
67
|
-
codepoint = 0x110000;
|
68
|
-
}
|
69
|
-
}
|
70
|
-
|
71
|
-
num_digits = i - 2;
|
72
|
-
}
|
73
|
-
|
74
|
-
if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
|
75
|
-
if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
|
76
|
-
codepoint >= 0x110000) {
|
77
|
-
codepoint = 0xFFFD;
|
78
|
-
}
|
79
|
-
cmark_utf8proc_encode_char(codepoint, ob);
|
80
|
-
return i + 1;
|
81
|
-
}
|
82
|
-
}
|
83
|
-
|
84
|
-
else {
|
85
|
-
if (size > CMARK_ENTITY_MAX_LENGTH)
|
86
|
-
size = CMARK_ENTITY_MAX_LENGTH;
|
87
|
-
|
88
|
-
for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
|
89
|
-
if (src[i] == ' ')
|
90
|
-
break;
|
91
|
-
|
92
|
-
if (src[i] == ';') {
|
93
|
-
const unsigned char *entity = S_lookup_entity(src, i);
|
94
|
-
|
95
|
-
if (entity != NULL) {
|
96
|
-
cmark_strbuf_puts(ob, (const char *)entity);
|
97
|
-
return i + 1;
|
98
|
-
}
|
99
|
-
|
100
|
-
break;
|
101
|
-
}
|
102
|
-
}
|
103
|
-
}
|
104
|
-
|
105
|
-
return 0;
|
106
|
-
}
|
107
|
-
|
108
|
-
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
109
|
-
bufsize_t size) {
|
110
|
-
bufsize_t i = 0, org, ent;
|
111
|
-
|
112
|
-
while (i < size) {
|
113
|
-
org = i;
|
114
|
-
while (i < size && src[i] != '&')
|
115
|
-
i++;
|
116
|
-
|
117
|
-
if (likely(i > org)) {
|
118
|
-
if (unlikely(org == 0)) {
|
119
|
-
if (i >= size)
|
120
|
-
return 0;
|
121
|
-
|
122
|
-
cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
123
|
-
}
|
124
|
-
|
125
|
-
cmark_strbuf_put(ob, src + org, i - org);
|
126
|
-
}
|
127
|
-
|
128
|
-
/* escaping */
|
129
|
-
if (i >= size)
|
130
|
-
break;
|
131
|
-
|
132
|
-
i++;
|
133
|
-
|
134
|
-
ent = houdini_unescape_ent(ob, src + i, size - i);
|
135
|
-
i += ent;
|
136
|
-
|
137
|
-
/* not really an entity */
|
138
|
-
if (ent == 0)
|
139
|
-
cmark_strbuf_putc(ob, '&');
|
140
|
-
}
|
141
|
-
|
142
|
-
return 1;
|
143
|
-
}
|
144
|
-
|
145
|
-
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
146
|
-
bufsize_t size) {
|
147
|
-
if (!houdini_unescape_html(ob, src, size))
|
148
|
-
cmark_strbuf_put(ob, src, size);
|
149
|
-
}
|