commonmarker 0.23.10 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +1220 -0
- data/Cargo.toml +7 -0
- data/README.md +217 -170
- data/ext/commonmarker/Cargo.toml +20 -0
- data/ext/commonmarker/extconf.rb +3 -6
- data/ext/commonmarker/src/lib.rs +183 -0
- data/ext/commonmarker/src/node.rs +1115 -0
- data/ext/commonmarker/src/options.rs +165 -0
- data/ext/commonmarker/src/plugins/syntax_highlighting.rs +74 -0
- data/ext/commonmarker/src/plugins.rs +3 -0
- data/ext/commonmarker/src/utils.rs +8 -0
- data/lib/commonmarker/config.rb +90 -40
- data/lib/commonmarker/constants.rb +7 -0
- data/lib/commonmarker/extension.rb +14 -0
- data/lib/commonmarker/node/ast.rb +8 -0
- data/lib/commonmarker/node/inspect.rb +14 -4
- data/lib/commonmarker/node.rb +29 -47
- data/lib/commonmarker/renderer.rb +1 -127
- data/lib/commonmarker/utils.rb +22 -0
- data/lib/commonmarker/version.rb +2 -2
- data/lib/commonmarker.rb +27 -25
- metadata +38 -186
- data/Rakefile +0 -109
- data/bin/commonmarker +0 -118
- data/commonmarker.gemspec +0 -38
- data/ext/commonmarker/arena.c +0 -104
- data/ext/commonmarker/autolink.c +0 -508
- data/ext/commonmarker/autolink.h +0 -8
- data/ext/commonmarker/blocks.c +0 -1622
- data/ext/commonmarker/buffer.c +0 -278
- data/ext/commonmarker/buffer.h +0 -116
- data/ext/commonmarker/case_fold_switch.inc +0 -4327
- data/ext/commonmarker/chunk.h +0 -135
- data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
- data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
- data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
- data/ext/commonmarker/cmark-gfm.h +0 -833
- data/ext/commonmarker/cmark-gfm_export.h +0 -42
- data/ext/commonmarker/cmark-gfm_version.h +0 -7
- data/ext/commonmarker/cmark.c +0 -55
- data/ext/commonmarker/cmark_ctype.c +0 -44
- data/ext/commonmarker/cmark_ctype.h +0 -33
- data/ext/commonmarker/commonmark.c +0 -514
- data/ext/commonmarker/commonmarker.c +0 -1308
- data/ext/commonmarker/commonmarker.h +0 -16
- data/ext/commonmarker/config.h +0 -76
- data/ext/commonmarker/core-extensions.c +0 -27
- data/ext/commonmarker/entities.inc +0 -2138
- data/ext/commonmarker/ext_scanners.c +0 -879
- data/ext/commonmarker/ext_scanners.h +0 -24
- data/ext/commonmarker/footnotes.c +0 -63
- data/ext/commonmarker/footnotes.h +0 -27
- data/ext/commonmarker/houdini.h +0 -57
- data/ext/commonmarker/houdini_href_e.c +0 -100
- data/ext/commonmarker/houdini_html_e.c +0 -66
- data/ext/commonmarker/houdini_html_u.c +0 -149
- data/ext/commonmarker/html.c +0 -502
- data/ext/commonmarker/html.h +0 -27
- data/ext/commonmarker/inlines.c +0 -1788
- data/ext/commonmarker/inlines.h +0 -29
- data/ext/commonmarker/iterator.c +0 -159
- data/ext/commonmarker/iterator.h +0 -26
- data/ext/commonmarker/latex.c +0 -468
- data/ext/commonmarker/linked_list.c +0 -37
- data/ext/commonmarker/man.c +0 -274
- data/ext/commonmarker/map.c +0 -129
- data/ext/commonmarker/map.h +0 -44
- data/ext/commonmarker/node.c +0 -1045
- data/ext/commonmarker/node.h +0 -167
- data/ext/commonmarker/parser.h +0 -59
- data/ext/commonmarker/plaintext.c +0 -218
- data/ext/commonmarker/plugin.c +0 -36
- data/ext/commonmarker/plugin.h +0 -34
- data/ext/commonmarker/references.c +0 -43
- data/ext/commonmarker/references.h +0 -26
- data/ext/commonmarker/registry.c +0 -63
- data/ext/commonmarker/registry.h +0 -24
- data/ext/commonmarker/render.c +0 -213
- data/ext/commonmarker/render.h +0 -62
- data/ext/commonmarker/scanners.c +0 -14056
- data/ext/commonmarker/scanners.h +0 -70
- data/ext/commonmarker/scanners.re +0 -341
- data/ext/commonmarker/strikethrough.c +0 -167
- data/ext/commonmarker/strikethrough.h +0 -9
- data/ext/commonmarker/syntax_extension.c +0 -149
- data/ext/commonmarker/syntax_extension.h +0 -34
- data/ext/commonmarker/table.c +0 -917
- data/ext/commonmarker/table.h +0 -12
- data/ext/commonmarker/tagfilter.c +0 -60
- data/ext/commonmarker/tagfilter.h +0 -8
- data/ext/commonmarker/tasklist.c +0 -156
- data/ext/commonmarker/tasklist.h +0 -8
- data/ext/commonmarker/utf8.c +0 -317
- data/ext/commonmarker/utf8.h +0 -35
- data/ext/commonmarker/xml.c +0 -182
- data/lib/commonmarker/renderer/html_renderer.rb +0 -256
data/ext/commonmarker/table.h
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
#ifndef CMARK_GFM_TABLE_H
|
2
|
-
#define CMARK_GFM_TABLE_H
|
3
|
-
|
4
|
-
#include "cmark-gfm-core-extensions.h"
|
5
|
-
|
6
|
-
|
7
|
-
extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
|
8
|
-
CMARK_NODE_TABLE_CELL;
|
9
|
-
|
10
|
-
cmark_syntax_extension *create_table_extension(void);
|
11
|
-
|
12
|
-
#endif
|
@@ -1,60 +0,0 @@
|
|
1
|
-
#include "tagfilter.h"
|
2
|
-
#include <parser.h>
|
3
|
-
#include <ctype.h>
|
4
|
-
|
5
|
-
static const char *blacklist[] = {
|
6
|
-
"title", "textarea", "style", "xmp", "iframe",
|
7
|
-
"noembed", "noframes", "script", "plaintext", NULL,
|
8
|
-
};
|
9
|
-
|
10
|
-
static int is_tag(const unsigned char *tag_data, size_t tag_size,
|
11
|
-
const char *tagname) {
|
12
|
-
size_t i;
|
13
|
-
|
14
|
-
if (tag_size < 3 || tag_data[0] != '<')
|
15
|
-
return 0;
|
16
|
-
|
17
|
-
i = 1;
|
18
|
-
|
19
|
-
if (tag_data[i] == '/') {
|
20
|
-
i++;
|
21
|
-
}
|
22
|
-
|
23
|
-
for (; i < tag_size; ++i, ++tagname) {
|
24
|
-
if (*tagname == 0)
|
25
|
-
break;
|
26
|
-
|
27
|
-
if (tolower(tag_data[i]) != *tagname)
|
28
|
-
return 0;
|
29
|
-
}
|
30
|
-
|
31
|
-
if (i == tag_size)
|
32
|
-
return 0;
|
33
|
-
|
34
|
-
if (cmark_isspace(tag_data[i]) || tag_data[i] == '>')
|
35
|
-
return 1;
|
36
|
-
|
37
|
-
if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>')
|
38
|
-
return 1;
|
39
|
-
|
40
|
-
return 0;
|
41
|
-
}
|
42
|
-
|
43
|
-
static int filter(cmark_syntax_extension *ext, const unsigned char *tag,
|
44
|
-
size_t tag_len) {
|
45
|
-
const char **it;
|
46
|
-
|
47
|
-
for (it = blacklist; *it; ++it) {
|
48
|
-
if (is_tag(tag, tag_len, *it)) {
|
49
|
-
return 0;
|
50
|
-
}
|
51
|
-
}
|
52
|
-
|
53
|
-
return 1;
|
54
|
-
}
|
55
|
-
|
56
|
-
cmark_syntax_extension *create_tagfilter_extension(void) {
|
57
|
-
cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter");
|
58
|
-
cmark_syntax_extension_set_html_filter_func(ext, filter);
|
59
|
-
return ext;
|
60
|
-
}
|
data/ext/commonmarker/tasklist.c
DELETED
@@ -1,156 +0,0 @@
|
|
1
|
-
#include "tasklist.h"
|
2
|
-
#include <parser.h>
|
3
|
-
#include <render.h>
|
4
|
-
#include <html.h>
|
5
|
-
#include "ext_scanners.h"
|
6
|
-
|
7
|
-
typedef enum {
|
8
|
-
CMARK_TASKLIST_NOCHECKED,
|
9
|
-
CMARK_TASKLIST_CHECKED,
|
10
|
-
} cmark_tasklist_type;
|
11
|
-
|
12
|
-
// Local constants
|
13
|
-
static const char *TYPE_STRING = "tasklist";
|
14
|
-
|
15
|
-
static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) {
|
16
|
-
return TYPE_STRING;
|
17
|
-
}
|
18
|
-
|
19
|
-
|
20
|
-
// Return 1 if state was set, 0 otherwise
|
21
|
-
int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) {
|
22
|
-
// The node has to exist, and be an extension, and actually be the right type in order to get the value.
|
23
|
-
if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
|
24
|
-
return 0;
|
25
|
-
|
26
|
-
node->as.list.checked = is_checked;
|
27
|
-
return 1;
|
28
|
-
}
|
29
|
-
|
30
|
-
bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) {
|
31
|
-
if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
|
32
|
-
return false;
|
33
|
-
|
34
|
-
if (node->as.list.checked) {
|
35
|
-
return true;
|
36
|
-
}
|
37
|
-
else {
|
38
|
-
return false;
|
39
|
-
}
|
40
|
-
}
|
41
|
-
|
42
|
-
static bool parse_node_item_prefix(cmark_parser *parser, const char *input,
|
43
|
-
cmark_node *container) {
|
44
|
-
bool res = false;
|
45
|
-
|
46
|
-
if (parser->indent >=
|
47
|
-
container->as.list.marker_offset + container->as.list.padding) {
|
48
|
-
cmark_parser_advance_offset(parser, input, container->as.list.marker_offset +
|
49
|
-
container->as.list.padding,
|
50
|
-
true);
|
51
|
-
res = true;
|
52
|
-
} else if (parser->blank && container->first_child != NULL) {
|
53
|
-
// if container->first_child is NULL, then the opening line
|
54
|
-
// of the list item was blank after the list marker; in this
|
55
|
-
// case, we are done with the list item.
|
56
|
-
cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset,
|
57
|
-
false);
|
58
|
-
res = true;
|
59
|
-
}
|
60
|
-
return res;
|
61
|
-
}
|
62
|
-
|
63
|
-
static int matches(cmark_syntax_extension *self, cmark_parser *parser,
|
64
|
-
unsigned char *input, int len,
|
65
|
-
cmark_node *parent_container) {
|
66
|
-
return parse_node_item_prefix(parser, (const char*)input, parent_container);
|
67
|
-
}
|
68
|
-
|
69
|
-
static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
|
70
|
-
cmark_node_type child_type) {
|
71
|
-
return (node->type == CMARK_NODE_ITEM) ? 1 : 0;
|
72
|
-
}
|
73
|
-
|
74
|
-
static cmark_node *open_tasklist_item(cmark_syntax_extension *self,
|
75
|
-
int indented, cmark_parser *parser,
|
76
|
-
cmark_node *parent_container,
|
77
|
-
unsigned char *input, int len) {
|
78
|
-
cmark_node_type node_type = cmark_node_get_type(parent_container);
|
79
|
-
if (node_type != CMARK_NODE_ITEM) {
|
80
|
-
return NULL;
|
81
|
-
}
|
82
|
-
|
83
|
-
bufsize_t matched = scan_tasklist(input, len, 0);
|
84
|
-
if (!matched) {
|
85
|
-
return NULL;
|
86
|
-
}
|
87
|
-
|
88
|
-
cmark_node_set_syntax_extension(parent_container, self);
|
89
|
-
cmark_parser_advance_offset(parser, (char *)input, 3, false);
|
90
|
-
|
91
|
-
// Either an upper or lower case X means the task is completed.
|
92
|
-
parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]"));
|
93
|
-
|
94
|
-
return NULL;
|
95
|
-
}
|
96
|
-
|
97
|
-
static void commonmark_render(cmark_syntax_extension *extension,
|
98
|
-
cmark_renderer *renderer, cmark_node *node,
|
99
|
-
cmark_event_type ev_type, int options) {
|
100
|
-
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
101
|
-
if (entering) {
|
102
|
-
renderer->cr(renderer);
|
103
|
-
if (node->as.list.checked) {
|
104
|
-
renderer->out(renderer, node, "- [x] ", false, LITERAL);
|
105
|
-
} else {
|
106
|
-
renderer->out(renderer, node, "- [ ] ", false, LITERAL);
|
107
|
-
}
|
108
|
-
cmark_strbuf_puts(renderer->prefix, " ");
|
109
|
-
} else {
|
110
|
-
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
|
111
|
-
renderer->cr(renderer);
|
112
|
-
}
|
113
|
-
}
|
114
|
-
|
115
|
-
static void html_render(cmark_syntax_extension *extension,
|
116
|
-
cmark_html_renderer *renderer, cmark_node *node,
|
117
|
-
cmark_event_type ev_type, int options) {
|
118
|
-
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
119
|
-
if (entering) {
|
120
|
-
cmark_html_render_cr(renderer->html);
|
121
|
-
cmark_strbuf_puts(renderer->html, "<li");
|
122
|
-
cmark_html_render_sourcepos(node, renderer->html, options);
|
123
|
-
cmark_strbuf_putc(renderer->html, '>');
|
124
|
-
if (node->as.list.checked) {
|
125
|
-
cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
|
126
|
-
} else {
|
127
|
-
cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> ");
|
128
|
-
}
|
129
|
-
} else {
|
130
|
-
cmark_strbuf_puts(renderer->html, "</li>\n");
|
131
|
-
}
|
132
|
-
}
|
133
|
-
|
134
|
-
static const char *xml_attr(cmark_syntax_extension *extension,
|
135
|
-
cmark_node *node) {
|
136
|
-
if (node->as.list.checked) {
|
137
|
-
return " completed=\"true\"";
|
138
|
-
} else {
|
139
|
-
return " completed=\"false\"";
|
140
|
-
}
|
141
|
-
}
|
142
|
-
|
143
|
-
cmark_syntax_extension *create_tasklist_extension(void) {
|
144
|
-
cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist");
|
145
|
-
|
146
|
-
cmark_syntax_extension_set_match_block_func(ext, matches);
|
147
|
-
cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
|
148
|
-
cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item);
|
149
|
-
cmark_syntax_extension_set_can_contain_func(ext, can_contain);
|
150
|
-
cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
|
151
|
-
cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render);
|
152
|
-
cmark_syntax_extension_set_html_render_func(ext, html_render);
|
153
|
-
cmark_syntax_extension_set_xml_attr_func(ext, xml_attr);
|
154
|
-
|
155
|
-
return ext;
|
156
|
-
}
|
data/ext/commonmarker/tasklist.h
DELETED
data/ext/commonmarker/utf8.c
DELETED
@@ -1,317 +0,0 @@
|
|
1
|
-
#include <stdlib.h>
|
2
|
-
#include <stdint.h>
|
3
|
-
#include <assert.h>
|
4
|
-
|
5
|
-
#include "cmark_ctype.h"
|
6
|
-
#include "utf8.h"
|
7
|
-
|
8
|
-
static const int8_t utf8proc_utf8class[256] = {
|
9
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
10
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
11
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
12
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
13
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
14
|
-
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
|
-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
18
|
-
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
19
|
-
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
|
20
|
-
|
21
|
-
static void encode_unknown(cmark_strbuf *buf) {
|
22
|
-
static const uint8_t repl[] = {239, 191, 189};
|
23
|
-
cmark_strbuf_put(buf, repl, 3);
|
24
|
-
}
|
25
|
-
|
26
|
-
static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
|
27
|
-
int length, i;
|
28
|
-
|
29
|
-
if (!str_len)
|
30
|
-
return 0;
|
31
|
-
|
32
|
-
length = utf8proc_utf8class[str[0]];
|
33
|
-
|
34
|
-
if (!length)
|
35
|
-
return -1;
|
36
|
-
|
37
|
-
if (str_len >= 0 && (bufsize_t)length > str_len)
|
38
|
-
return -str_len;
|
39
|
-
|
40
|
-
for (i = 1; i < length; i++) {
|
41
|
-
if ((str[i] & 0xC0) != 0x80)
|
42
|
-
return -i;
|
43
|
-
}
|
44
|
-
|
45
|
-
return length;
|
46
|
-
}
|
47
|
-
|
48
|
-
// Validate a single UTF-8 character according to RFC 3629.
|
49
|
-
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
|
50
|
-
int length = utf8proc_utf8class[str[0]];
|
51
|
-
|
52
|
-
if (!length)
|
53
|
-
return -1;
|
54
|
-
|
55
|
-
if ((bufsize_t)length > str_len)
|
56
|
-
return -str_len;
|
57
|
-
|
58
|
-
switch (length) {
|
59
|
-
case 2:
|
60
|
-
if ((str[1] & 0xC0) != 0x80)
|
61
|
-
return -1;
|
62
|
-
if (str[0] < 0xC2) {
|
63
|
-
// Overlong
|
64
|
-
return -length;
|
65
|
-
}
|
66
|
-
break;
|
67
|
-
|
68
|
-
case 3:
|
69
|
-
if ((str[1] & 0xC0) != 0x80)
|
70
|
-
return -1;
|
71
|
-
if ((str[2] & 0xC0) != 0x80)
|
72
|
-
return -2;
|
73
|
-
if (str[0] == 0xE0) {
|
74
|
-
if (str[1] < 0xA0) {
|
75
|
-
// Overlong
|
76
|
-
return -length;
|
77
|
-
}
|
78
|
-
} else if (str[0] == 0xED) {
|
79
|
-
if (str[1] >= 0xA0) {
|
80
|
-
// Surrogate
|
81
|
-
return -length;
|
82
|
-
}
|
83
|
-
}
|
84
|
-
break;
|
85
|
-
|
86
|
-
case 4:
|
87
|
-
if ((str[1] & 0xC0) != 0x80)
|
88
|
-
return -1;
|
89
|
-
if ((str[2] & 0xC0) != 0x80)
|
90
|
-
return -2;
|
91
|
-
if ((str[3] & 0xC0) != 0x80)
|
92
|
-
return -3;
|
93
|
-
if (str[0] == 0xF0) {
|
94
|
-
if (str[1] < 0x90) {
|
95
|
-
// Overlong
|
96
|
-
return -length;
|
97
|
-
}
|
98
|
-
} else if (str[0] >= 0xF4) {
|
99
|
-
if (str[0] > 0xF4 || str[1] >= 0x90) {
|
100
|
-
// Above 0x10FFFF
|
101
|
-
return -length;
|
102
|
-
}
|
103
|
-
}
|
104
|
-
break;
|
105
|
-
}
|
106
|
-
|
107
|
-
return length;
|
108
|
-
}
|
109
|
-
|
110
|
-
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
|
111
|
-
bufsize_t size) {
|
112
|
-
bufsize_t i = 0;
|
113
|
-
|
114
|
-
while (i < size) {
|
115
|
-
bufsize_t org = i;
|
116
|
-
int charlen = 0;
|
117
|
-
|
118
|
-
while (i < size) {
|
119
|
-
if (line[i] < 0x80 && line[i] != 0) {
|
120
|
-
i++;
|
121
|
-
} else if (line[i] >= 0x80) {
|
122
|
-
charlen = utf8proc_valid(line + i, size - i);
|
123
|
-
if (charlen < 0) {
|
124
|
-
charlen = -charlen;
|
125
|
-
break;
|
126
|
-
}
|
127
|
-
i += charlen;
|
128
|
-
} else if (line[i] == 0) {
|
129
|
-
// ASCII NUL is technically valid but rejected
|
130
|
-
// for security reasons.
|
131
|
-
charlen = 1;
|
132
|
-
break;
|
133
|
-
}
|
134
|
-
}
|
135
|
-
|
136
|
-
if (i > org) {
|
137
|
-
cmark_strbuf_put(ob, line + org, i - org);
|
138
|
-
}
|
139
|
-
|
140
|
-
if (i >= size) {
|
141
|
-
break;
|
142
|
-
} else {
|
143
|
-
// Invalid UTF-8
|
144
|
-
encode_unknown(ob);
|
145
|
-
i += charlen;
|
146
|
-
}
|
147
|
-
}
|
148
|
-
}
|
149
|
-
|
150
|
-
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
|
151
|
-
int32_t *dst) {
|
152
|
-
int length;
|
153
|
-
int32_t uc = -1;
|
154
|
-
|
155
|
-
*dst = -1;
|
156
|
-
length = utf8proc_charlen(str, str_len);
|
157
|
-
if (length < 0)
|
158
|
-
return -1;
|
159
|
-
|
160
|
-
switch (length) {
|
161
|
-
case 1:
|
162
|
-
uc = str[0];
|
163
|
-
break;
|
164
|
-
case 2:
|
165
|
-
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
|
166
|
-
if (uc < 0x80)
|
167
|
-
uc = -1;
|
168
|
-
break;
|
169
|
-
case 3:
|
170
|
-
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
|
171
|
-
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
|
172
|
-
uc = -1;
|
173
|
-
break;
|
174
|
-
case 4:
|
175
|
-
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
|
176
|
-
((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
|
177
|
-
if (uc < 0x10000 || uc >= 0x110000)
|
178
|
-
uc = -1;
|
179
|
-
break;
|
180
|
-
}
|
181
|
-
|
182
|
-
if (uc < 0)
|
183
|
-
return -1;
|
184
|
-
|
185
|
-
*dst = uc;
|
186
|
-
return length;
|
187
|
-
}
|
188
|
-
|
189
|
-
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
|
190
|
-
uint8_t dst[4];
|
191
|
-
bufsize_t len = 0;
|
192
|
-
|
193
|
-
assert(uc >= 0);
|
194
|
-
|
195
|
-
if (uc < 0x80) {
|
196
|
-
dst[0] = (uint8_t)(uc);
|
197
|
-
len = 1;
|
198
|
-
} else if (uc < 0x800) {
|
199
|
-
dst[0] = (uint8_t)(0xC0 + (uc >> 6));
|
200
|
-
dst[1] = 0x80 + (uc & 0x3F);
|
201
|
-
len = 2;
|
202
|
-
} else if (uc == 0xFFFF) {
|
203
|
-
dst[0] = 0xFF;
|
204
|
-
len = 1;
|
205
|
-
} else if (uc == 0xFFFE) {
|
206
|
-
dst[0] = 0xFE;
|
207
|
-
len = 1;
|
208
|
-
} else if (uc < 0x10000) {
|
209
|
-
dst[0] = (uint8_t)(0xE0 + (uc >> 12));
|
210
|
-
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
|
211
|
-
dst[2] = 0x80 + (uc & 0x3F);
|
212
|
-
len = 3;
|
213
|
-
} else if (uc < 0x110000) {
|
214
|
-
dst[0] = (uint8_t)(0xF0 + (uc >> 18));
|
215
|
-
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
|
216
|
-
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
|
217
|
-
dst[3] = 0x80 + (uc & 0x3F);
|
218
|
-
len = 4;
|
219
|
-
} else {
|
220
|
-
encode_unknown(buf);
|
221
|
-
return;
|
222
|
-
}
|
223
|
-
|
224
|
-
cmark_strbuf_put(buf, dst, len);
|
225
|
-
}
|
226
|
-
|
227
|
-
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
228
|
-
bufsize_t len) {
|
229
|
-
int32_t c;
|
230
|
-
|
231
|
-
#define bufpush(x) cmark_utf8proc_encode_char(x, dest)
|
232
|
-
|
233
|
-
while (len > 0) {
|
234
|
-
bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
|
235
|
-
|
236
|
-
if (char_len >= 0) {
|
237
|
-
#include "case_fold_switch.inc"
|
238
|
-
} else {
|
239
|
-
encode_unknown(dest);
|
240
|
-
char_len = -char_len;
|
241
|
-
}
|
242
|
-
|
243
|
-
str += char_len;
|
244
|
-
len -= char_len;
|
245
|
-
}
|
246
|
-
}
|
247
|
-
|
248
|
-
// matches anything in the Zs class, plus LF, CR, TAB, FF.
|
249
|
-
int cmark_utf8proc_is_space(int32_t uc) {
|
250
|
-
return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
|
251
|
-
uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
|
252
|
-
uc == 8287 || uc == 12288);
|
253
|
-
}
|
254
|
-
|
255
|
-
// matches anything in the P[cdefios] classes.
|
256
|
-
int cmark_utf8proc_is_punctuation(int32_t uc) {
|
257
|
-
return (
|
258
|
-
(uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
|
259
|
-
uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
|
260
|
-
uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
|
261
|
-
uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
|
262
|
-
uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
|
263
|
-
uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
|
264
|
-
(uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
|
265
|
-
(uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
|
266
|
-
uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
|
267
|
-
uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
|
268
|
-
uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
|
269
|
-
(uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
|
270
|
-
(uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
|
271
|
-
uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
|
272
|
-
(uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
|
273
|
-
(uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
|
274
|
-
(uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
|
275
|
-
uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
|
276
|
-
(uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
|
277
|
-
(uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
|
278
|
-
(uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
|
279
|
-
(uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
|
280
|
-
(uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
|
281
|
-
uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
|
282
|
-
(uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
|
283
|
-
(uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
|
284
|
-
(uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
|
285
|
-
(uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
|
286
|
-
uc == 11632 || (uc >= 11776 && uc <= 11822) ||
|
287
|
-
(uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
|
288
|
-
(uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
|
289
|
-
uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
|
290
|
-
uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
|
291
|
-
uc == 42622 || (uc >= 42738 && uc <= 42743) ||
|
292
|
-
(uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
|
293
|
-
(uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
|
294
|
-
uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
|
295
|
-
uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
|
296
|
-
uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
|
297
|
-
uc == 64831 || (uc >= 65040 && uc <= 65049) ||
|
298
|
-
(uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
|
299
|
-
uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
|
300
|
-
(uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
|
301
|
-
(uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
|
302
|
-
uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
|
303
|
-
uc == 65343 || uc == 65371 || uc == 65373 ||
|
304
|
-
(uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
|
305
|
-
uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
|
306
|
-
uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
|
307
|
-
(uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
|
308
|
-
(uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
|
309
|
-
uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
|
310
|
-
(uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
|
311
|
-
(uc >= 70085 && uc <= 70088) || uc == 70093 ||
|
312
|
-
(uc >= 70200 && uc <= 70205) || uc == 70854 ||
|
313
|
-
(uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
|
314
|
-
(uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
|
315
|
-
uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
|
316
|
-
uc == 113823);
|
317
|
-
}
|
data/ext/commonmarker/utf8.h
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
#ifndef CMARK_UTF8_H
|
2
|
-
#define CMARK_UTF8_H
|
3
|
-
|
4
|
-
#include <stdint.h>
|
5
|
-
#include "buffer.h"
|
6
|
-
|
7
|
-
#ifdef __cplusplus
|
8
|
-
extern "C" {
|
9
|
-
#endif
|
10
|
-
|
11
|
-
CMARK_GFM_EXPORT
|
12
|
-
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
13
|
-
bufsize_t len);
|
14
|
-
|
15
|
-
CMARK_GFM_EXPORT
|
16
|
-
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
|
17
|
-
|
18
|
-
CMARK_GFM_EXPORT
|
19
|
-
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
|
20
|
-
|
21
|
-
CMARK_GFM_EXPORT
|
22
|
-
void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
|
23
|
-
bufsize_t size);
|
24
|
-
|
25
|
-
CMARK_GFM_EXPORT
|
26
|
-
int cmark_utf8proc_is_space(int32_t uc);
|
27
|
-
|
28
|
-
CMARK_GFM_EXPORT
|
29
|
-
int cmark_utf8proc_is_punctuation(int32_t uc);
|
30
|
-
|
31
|
-
#ifdef __cplusplus
|
32
|
-
}
|
33
|
-
#endif
|
34
|
-
|
35
|
-
#endif
|