markly 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/markly +94 -0
- data/ext/markly/arena.c +103 -0
- data/ext/markly/autolink.c +425 -0
- data/ext/markly/autolink.h +8 -0
- data/ext/markly/blocks.c +1585 -0
- data/ext/markly/buffer.c +278 -0
- data/ext/markly/buffer.h +116 -0
- data/ext/markly/case_fold_switch.inc +4327 -0
- data/ext/markly/chunk.h +135 -0
- data/ext/markly/cmark-gfm-core-extensions.h +54 -0
- data/ext/markly/cmark-gfm-extension_api.h +736 -0
- data/ext/markly/cmark-gfm-extensions_export.h +42 -0
- data/ext/markly/cmark-gfm.h +817 -0
- data/ext/markly/cmark-gfm_export.h +42 -0
- data/ext/markly/cmark-gfm_version.h +7 -0
- data/ext/markly/cmark.c +55 -0
- data/ext/markly/cmark_ctype.c +44 -0
- data/ext/markly/cmark_ctype.h +33 -0
- data/ext/markly/commonmark.c +519 -0
- data/ext/markly/config.h +76 -0
- data/ext/markly/core-extensions.c +27 -0
- data/ext/markly/entities.inc +2138 -0
- data/ext/markly/ext_scanners.c +1159 -0
- data/ext/markly/ext_scanners.h +24 -0
- data/ext/markly/extconf.rb +7 -0
- data/ext/markly/footnotes.c +40 -0
- data/ext/markly/footnotes.h +25 -0
- data/ext/markly/houdini.h +57 -0
- data/ext/markly/houdini_href_e.c +100 -0
- data/ext/markly/houdini_html_e.c +66 -0
- data/ext/markly/houdini_html_u.c +149 -0
- data/ext/markly/html.c +465 -0
- data/ext/markly/html.h +27 -0
- data/ext/markly/inlines.c +1633 -0
- data/ext/markly/inlines.h +29 -0
- data/ext/markly/iterator.c +159 -0
- data/ext/markly/iterator.h +26 -0
- data/ext/markly/latex.c +466 -0
- data/ext/markly/linked_list.c +37 -0
- data/ext/markly/man.c +278 -0
- data/ext/markly/map.c +122 -0
- data/ext/markly/map.h +41 -0
- data/ext/markly/markly.c +1226 -0
- data/ext/markly/markly.h +16 -0
- data/ext/markly/node.c +979 -0
- data/ext/markly/node.h +118 -0
- data/ext/markly/parser.h +58 -0
- data/ext/markly/plaintext.c +235 -0
- data/ext/markly/plugin.c +36 -0
- data/ext/markly/plugin.h +34 -0
- data/ext/markly/references.c +42 -0
- data/ext/markly/references.h +26 -0
- data/ext/markly/registry.c +63 -0
- data/ext/markly/registry.h +24 -0
- data/ext/markly/render.c +205 -0
- data/ext/markly/render.h +62 -0
- data/ext/markly/scanners.c +20382 -0
- data/ext/markly/scanners.h +62 -0
- data/ext/markly/scanners.re +326 -0
- data/ext/markly/strikethrough.c +167 -0
- data/ext/markly/strikethrough.h +9 -0
- data/ext/markly/syntax_extension.c +149 -0
- data/ext/markly/syntax_extension.h +34 -0
- data/ext/markly/table.c +803 -0
- data/ext/markly/table.h +12 -0
- data/ext/markly/tagfilter.c +60 -0
- data/ext/markly/tagfilter.h +8 -0
- data/ext/markly/tasklist.c +156 -0
- data/ext/markly/tasklist.h +8 -0
- data/ext/markly/utf8.c +317 -0
- data/ext/markly/utf8.h +35 -0
- data/ext/markly/xml.c +181 -0
- data/lib/markly.rb +43 -0
- data/lib/markly/flags.rb +37 -0
- data/lib/markly/markly.so +0 -0
- data/lib/markly/node.rb +70 -0
- data/lib/markly/node/inspect.rb +59 -0
- data/lib/markly/renderer.rb +133 -0
- data/lib/markly/renderer/html_renderer.rb +252 -0
- data/lib/markly/version.rb +5 -0
- metadata +211 -0
data/ext/markly/table.h
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#ifndef CMARK_GFM_TABLE_H
|
2
|
+
#define CMARK_GFM_TABLE_H
|
3
|
+
|
4
|
+
#include "cmark-gfm-core-extensions.h"
|
5
|
+
|
6
|
+
|
7
|
+
extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
|
8
|
+
CMARK_NODE_TABLE_CELL;
|
9
|
+
|
10
|
+
cmark_syntax_extension *create_table_extension(void);
|
11
|
+
|
12
|
+
#endif
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#include "tagfilter.h"
|
2
|
+
#include <parser.h>
|
3
|
+
#include <ctype.h>
|
4
|
+
|
5
|
+
static const char *blacklist[] = {
|
6
|
+
"title", "textarea", "style", "xmp", "iframe",
|
7
|
+
"noembed", "noframes", "script", "plaintext", NULL,
|
8
|
+
};
|
9
|
+
|
10
|
+
static int is_tag(const unsigned char *tag_data, size_t tag_size,
|
11
|
+
const char *tagname) {
|
12
|
+
size_t i;
|
13
|
+
|
14
|
+
if (tag_size < 3 || tag_data[0] != '<')
|
15
|
+
return 0;
|
16
|
+
|
17
|
+
i = 1;
|
18
|
+
|
19
|
+
if (tag_data[i] == '/') {
|
20
|
+
i++;
|
21
|
+
}
|
22
|
+
|
23
|
+
for (; i < tag_size; ++i, ++tagname) {
|
24
|
+
if (*tagname == 0)
|
25
|
+
break;
|
26
|
+
|
27
|
+
if (tolower(tag_data[i]) != *tagname)
|
28
|
+
return 0;
|
29
|
+
}
|
30
|
+
|
31
|
+
if (i == tag_size)
|
32
|
+
return 0;
|
33
|
+
|
34
|
+
if (cmark_isspace(tag_data[i]) || tag_data[i] == '>')
|
35
|
+
return 1;
|
36
|
+
|
37
|
+
if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>')
|
38
|
+
return 1;
|
39
|
+
|
40
|
+
return 0;
|
41
|
+
}
|
42
|
+
|
43
|
+
static int filter(cmark_syntax_extension *ext, const unsigned char *tag,
|
44
|
+
size_t tag_len) {
|
45
|
+
const char **it;
|
46
|
+
|
47
|
+
for (it = blacklist; *it; ++it) {
|
48
|
+
if (is_tag(tag, tag_len, *it)) {
|
49
|
+
return 0;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
return 1;
|
54
|
+
}
|
55
|
+
|
56
|
+
cmark_syntax_extension *create_tagfilter_extension(void) {
|
57
|
+
cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter");
|
58
|
+
cmark_syntax_extension_set_html_filter_func(ext, filter);
|
59
|
+
return ext;
|
60
|
+
}
|
@@ -0,0 +1,156 @@
|
|
1
|
+
#include "tasklist.h"
|
2
|
+
#include <parser.h>
|
3
|
+
#include <render.h>
|
4
|
+
#include <html.h>
|
5
|
+
#include "ext_scanners.h"
|
6
|
+
|
7
|
+
typedef enum {
|
8
|
+
CMARK_TASKLIST_NOCHECKED,
|
9
|
+
CMARK_TASKLIST_CHECKED,
|
10
|
+
} cmark_tasklist_type;
|
11
|
+
|
12
|
+
// Local constants
|
13
|
+
static const char *TYPE_STRING = "tasklist";
|
14
|
+
|
15
|
+
static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) {
|
16
|
+
return TYPE_STRING;
|
17
|
+
}
|
18
|
+
|
19
|
+
|
20
|
+
// Return 1 if state was set, 0 otherwise
|
21
|
+
int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) {
|
22
|
+
// The node has to exist, and be an extension, and actually be the right type in order to get the value.
|
23
|
+
if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
|
24
|
+
return 0;
|
25
|
+
|
26
|
+
node->as.list.checked = is_checked;
|
27
|
+
return 1;
|
28
|
+
}
|
29
|
+
|
30
|
+
bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) {
|
31
|
+
if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
|
32
|
+
return false;
|
33
|
+
|
34
|
+
if (node->as.list.checked) {
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
else {
|
38
|
+
return false;
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
static bool parse_node_item_prefix(cmark_parser *parser, const char *input,
|
43
|
+
cmark_node *container) {
|
44
|
+
bool res = false;
|
45
|
+
|
46
|
+
if (parser->indent >=
|
47
|
+
container->as.list.marker_offset + container->as.list.padding) {
|
48
|
+
cmark_parser_advance_offset(parser, input, container->as.list.marker_offset +
|
49
|
+
container->as.list.padding,
|
50
|
+
true);
|
51
|
+
res = true;
|
52
|
+
} else if (parser->blank && container->first_child != NULL) {
|
53
|
+
// if container->first_child is NULL, then the opening line
|
54
|
+
// of the list item was blank after the list marker; in this
|
55
|
+
// case, we are done with the list item.
|
56
|
+
cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset,
|
57
|
+
false);
|
58
|
+
res = true;
|
59
|
+
}
|
60
|
+
return res;
|
61
|
+
}
|
62
|
+
|
63
|
+
static int matches(cmark_syntax_extension *self, cmark_parser *parser,
|
64
|
+
unsigned char *input, int len,
|
65
|
+
cmark_node *parent_container) {
|
66
|
+
return parse_node_item_prefix(parser, (const char*)input, parent_container);
|
67
|
+
}
|
68
|
+
|
69
|
+
static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
|
70
|
+
cmark_node_type child_type) {
|
71
|
+
return (node->type == CMARK_NODE_ITEM) ? 1 : 0;
|
72
|
+
}
|
73
|
+
|
74
|
+
static cmark_node *open_tasklist_item(cmark_syntax_extension *self,
|
75
|
+
int indented, cmark_parser *parser,
|
76
|
+
cmark_node *parent_container,
|
77
|
+
unsigned char *input, int len) {
|
78
|
+
cmark_node_type node_type = cmark_node_get_type(parent_container);
|
79
|
+
if (node_type != CMARK_NODE_ITEM) {
|
80
|
+
return NULL;
|
81
|
+
}
|
82
|
+
|
83
|
+
bufsize_t matched = scan_tasklist(input, len, 0);
|
84
|
+
if (!matched) {
|
85
|
+
return NULL;
|
86
|
+
}
|
87
|
+
|
88
|
+
cmark_node_set_syntax_extension(parent_container, self);
|
89
|
+
cmark_parser_advance_offset(parser, (char *)input, 3, false);
|
90
|
+
|
91
|
+
// Either an upper or lower case X means the task is completed.
|
92
|
+
parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]"));
|
93
|
+
|
94
|
+
return NULL;
|
95
|
+
}
|
96
|
+
|
97
|
+
static void commonmark_render(cmark_syntax_extension *extension,
|
98
|
+
cmark_renderer *renderer, cmark_node *node,
|
99
|
+
cmark_event_type ev_type, int options) {
|
100
|
+
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
101
|
+
if (entering) {
|
102
|
+
renderer->cr(renderer);
|
103
|
+
if (node->as.list.checked) {
|
104
|
+
renderer->out(renderer, node, "- [x] ", false, LITERAL);
|
105
|
+
} else {
|
106
|
+
renderer->out(renderer, node, "- [ ] ", false, LITERAL);
|
107
|
+
}
|
108
|
+
cmark_strbuf_puts(renderer->prefix, " ");
|
109
|
+
} else {
|
110
|
+
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
|
111
|
+
renderer->cr(renderer);
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
static void html_render(cmark_syntax_extension *extension,
|
116
|
+
cmark_html_renderer *renderer, cmark_node *node,
|
117
|
+
cmark_event_type ev_type, int options) {
|
118
|
+
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
119
|
+
if (entering) {
|
120
|
+
cmark_html_render_cr(renderer->html);
|
121
|
+
cmark_strbuf_puts(renderer->html, "<li");
|
122
|
+
cmark_html_render_sourcepos(node, renderer->html, options);
|
123
|
+
cmark_strbuf_putc(renderer->html, '>');
|
124
|
+
if (node->as.list.checked) {
|
125
|
+
cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
|
126
|
+
} else {
|
127
|
+
cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> ");
|
128
|
+
}
|
129
|
+
} else {
|
130
|
+
cmark_strbuf_puts(renderer->html, "</li>\n");
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
static const char *xml_attr(cmark_syntax_extension *extension,
|
135
|
+
cmark_node *node) {
|
136
|
+
if (node->as.list.checked) {
|
137
|
+
return " completed=\"true\"";
|
138
|
+
} else {
|
139
|
+
return " completed=\"false\"";
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
cmark_syntax_extension *create_tasklist_extension(void) {
|
144
|
+
cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist");
|
145
|
+
|
146
|
+
cmark_syntax_extension_set_match_block_func(ext, matches);
|
147
|
+
cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
|
148
|
+
cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item);
|
149
|
+
cmark_syntax_extension_set_can_contain_func(ext, can_contain);
|
150
|
+
cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
|
151
|
+
cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render);
|
152
|
+
cmark_syntax_extension_set_html_render_func(ext, html_render);
|
153
|
+
cmark_syntax_extension_set_xml_attr_func(ext, xml_attr);
|
154
|
+
|
155
|
+
return ext;
|
156
|
+
}
|
data/ext/markly/utf8.c
ADDED
@@ -0,0 +1,317 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <stdint.h>
|
3
|
+
#include <assert.h>
|
4
|
+
|
5
|
+
#include "cmark_ctype.h"
|
6
|
+
#include "utf8.h"
|
7
|
+
|
8
|
+
static const int8_t utf8proc_utf8class[256] = {
|
9
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
10
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
11
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
12
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
13
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
14
|
+
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
18
|
+
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
19
|
+
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
|
20
|
+
|
21
|
+
static void encode_unknown(cmark_strbuf *buf) {
|
22
|
+
static const uint8_t repl[] = {239, 191, 189};
|
23
|
+
cmark_strbuf_put(buf, repl, 3);
|
24
|
+
}
|
25
|
+
|
26
|
+
static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
|
27
|
+
int length, i;
|
28
|
+
|
29
|
+
if (!str_len)
|
30
|
+
return 0;
|
31
|
+
|
32
|
+
length = utf8proc_utf8class[str[0]];
|
33
|
+
|
34
|
+
if (!length)
|
35
|
+
return -1;
|
36
|
+
|
37
|
+
if (str_len >= 0 && (bufsize_t)length > str_len)
|
38
|
+
return -str_len;
|
39
|
+
|
40
|
+
for (i = 1; i < length; i++) {
|
41
|
+
if ((str[i] & 0xC0) != 0x80)
|
42
|
+
return -i;
|
43
|
+
}
|
44
|
+
|
45
|
+
return length;
|
46
|
+
}
|
47
|
+
|
48
|
+
// Validate a single UTF-8 character according to RFC 3629.
|
49
|
+
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
|
50
|
+
int length = utf8proc_utf8class[str[0]];
|
51
|
+
|
52
|
+
if (!length)
|
53
|
+
return -1;
|
54
|
+
|
55
|
+
if ((bufsize_t)length > str_len)
|
56
|
+
return -str_len;
|
57
|
+
|
58
|
+
switch (length) {
|
59
|
+
case 2:
|
60
|
+
if ((str[1] & 0xC0) != 0x80)
|
61
|
+
return -1;
|
62
|
+
if (str[0] < 0xC2) {
|
63
|
+
// Overlong
|
64
|
+
return -length;
|
65
|
+
}
|
66
|
+
break;
|
67
|
+
|
68
|
+
case 3:
|
69
|
+
if ((str[1] & 0xC0) != 0x80)
|
70
|
+
return -1;
|
71
|
+
if ((str[2] & 0xC0) != 0x80)
|
72
|
+
return -2;
|
73
|
+
if (str[0] == 0xE0) {
|
74
|
+
if (str[1] < 0xA0) {
|
75
|
+
// Overlong
|
76
|
+
return -length;
|
77
|
+
}
|
78
|
+
} else if (str[0] == 0xED) {
|
79
|
+
if (str[1] >= 0xA0) {
|
80
|
+
// Surrogate
|
81
|
+
return -length;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
break;
|
85
|
+
|
86
|
+
case 4:
|
87
|
+
if ((str[1] & 0xC0) != 0x80)
|
88
|
+
return -1;
|
89
|
+
if ((str[2] & 0xC0) != 0x80)
|
90
|
+
return -2;
|
91
|
+
if ((str[3] & 0xC0) != 0x80)
|
92
|
+
return -3;
|
93
|
+
if (str[0] == 0xF0) {
|
94
|
+
if (str[1] < 0x90) {
|
95
|
+
// Overlong
|
96
|
+
return -length;
|
97
|
+
}
|
98
|
+
} else if (str[0] >= 0xF4) {
|
99
|
+
if (str[0] > 0xF4 || str[1] >= 0x90) {
|
100
|
+
// Above 0x10FFFF
|
101
|
+
return -length;
|
102
|
+
}
|
103
|
+
}
|
104
|
+
break;
|
105
|
+
}
|
106
|
+
|
107
|
+
return length;
|
108
|
+
}
|
109
|
+
|
110
|
+
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
|
111
|
+
bufsize_t size) {
|
112
|
+
bufsize_t i = 0;
|
113
|
+
|
114
|
+
while (i < size) {
|
115
|
+
bufsize_t org = i;
|
116
|
+
int charlen = 0;
|
117
|
+
|
118
|
+
while (i < size) {
|
119
|
+
if (line[i] < 0x80 && line[i] != 0) {
|
120
|
+
i++;
|
121
|
+
} else if (line[i] >= 0x80) {
|
122
|
+
charlen = utf8proc_valid(line + i, size - i);
|
123
|
+
if (charlen < 0) {
|
124
|
+
charlen = -charlen;
|
125
|
+
break;
|
126
|
+
}
|
127
|
+
i += charlen;
|
128
|
+
} else if (line[i] == 0) {
|
129
|
+
// ASCII NUL is technically valid but rejected
|
130
|
+
// for security reasons.
|
131
|
+
charlen = 1;
|
132
|
+
break;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
if (i > org) {
|
137
|
+
cmark_strbuf_put(ob, line + org, i - org);
|
138
|
+
}
|
139
|
+
|
140
|
+
if (i >= size) {
|
141
|
+
break;
|
142
|
+
} else {
|
143
|
+
// Invalid UTF-8
|
144
|
+
encode_unknown(ob);
|
145
|
+
i += charlen;
|
146
|
+
}
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
|
151
|
+
int32_t *dst) {
|
152
|
+
int length;
|
153
|
+
int32_t uc = -1;
|
154
|
+
|
155
|
+
*dst = -1;
|
156
|
+
length = utf8proc_charlen(str, str_len);
|
157
|
+
if (length < 0)
|
158
|
+
return -1;
|
159
|
+
|
160
|
+
switch (length) {
|
161
|
+
case 1:
|
162
|
+
uc = str[0];
|
163
|
+
break;
|
164
|
+
case 2:
|
165
|
+
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
|
166
|
+
if (uc < 0x80)
|
167
|
+
uc = -1;
|
168
|
+
break;
|
169
|
+
case 3:
|
170
|
+
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
|
171
|
+
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
|
172
|
+
uc = -1;
|
173
|
+
break;
|
174
|
+
case 4:
|
175
|
+
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
|
176
|
+
((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
|
177
|
+
if (uc < 0x10000 || uc >= 0x110000)
|
178
|
+
uc = -1;
|
179
|
+
break;
|
180
|
+
}
|
181
|
+
|
182
|
+
if (uc < 0)
|
183
|
+
return -1;
|
184
|
+
|
185
|
+
*dst = uc;
|
186
|
+
return length;
|
187
|
+
}
|
188
|
+
|
189
|
+
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
|
190
|
+
uint8_t dst[4];
|
191
|
+
bufsize_t len = 0;
|
192
|
+
|
193
|
+
assert(uc >= 0);
|
194
|
+
|
195
|
+
if (uc < 0x80) {
|
196
|
+
dst[0] = (uint8_t)(uc);
|
197
|
+
len = 1;
|
198
|
+
} else if (uc < 0x800) {
|
199
|
+
dst[0] = (uint8_t)(0xC0 + (uc >> 6));
|
200
|
+
dst[1] = 0x80 + (uc & 0x3F);
|
201
|
+
len = 2;
|
202
|
+
} else if (uc == 0xFFFF) {
|
203
|
+
dst[0] = 0xFF;
|
204
|
+
len = 1;
|
205
|
+
} else if (uc == 0xFFFE) {
|
206
|
+
dst[0] = 0xFE;
|
207
|
+
len = 1;
|
208
|
+
} else if (uc < 0x10000) {
|
209
|
+
dst[0] = (uint8_t)(0xE0 + (uc >> 12));
|
210
|
+
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
|
211
|
+
dst[2] = 0x80 + (uc & 0x3F);
|
212
|
+
len = 3;
|
213
|
+
} else if (uc < 0x110000) {
|
214
|
+
dst[0] = (uint8_t)(0xF0 + (uc >> 18));
|
215
|
+
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
|
216
|
+
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
|
217
|
+
dst[3] = 0x80 + (uc & 0x3F);
|
218
|
+
len = 4;
|
219
|
+
} else {
|
220
|
+
encode_unknown(buf);
|
221
|
+
return;
|
222
|
+
}
|
223
|
+
|
224
|
+
cmark_strbuf_put(buf, dst, len);
|
225
|
+
}
|
226
|
+
|
227
|
+
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
228
|
+
bufsize_t len) {
|
229
|
+
int32_t c;
|
230
|
+
|
231
|
+
#define bufpush(x) cmark_utf8proc_encode_char(x, dest)
|
232
|
+
|
233
|
+
while (len > 0) {
|
234
|
+
bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
|
235
|
+
|
236
|
+
if (char_len >= 0) {
|
237
|
+
#include "case_fold_switch.inc"
|
238
|
+
} else {
|
239
|
+
encode_unknown(dest);
|
240
|
+
char_len = -char_len;
|
241
|
+
}
|
242
|
+
|
243
|
+
str += char_len;
|
244
|
+
len -= char_len;
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
// matches anything in the Zs class, plus LF, CR, TAB, FF.
|
249
|
+
int cmark_utf8proc_is_space(int32_t uc) {
|
250
|
+
return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
|
251
|
+
uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
|
252
|
+
uc == 8287 || uc == 12288);
|
253
|
+
}
|
254
|
+
|
255
|
+
// matches anything in the P[cdefios] classes.
|
256
|
+
int cmark_utf8proc_is_punctuation(int32_t uc) {
|
257
|
+
return (
|
258
|
+
(uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
|
259
|
+
uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
|
260
|
+
uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
|
261
|
+
uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
|
262
|
+
uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
|
263
|
+
uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
|
264
|
+
(uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
|
265
|
+
(uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
|
266
|
+
uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
|
267
|
+
uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
|
268
|
+
uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
|
269
|
+
(uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
|
270
|
+
(uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
|
271
|
+
uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
|
272
|
+
(uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
|
273
|
+
(uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
|
274
|
+
(uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
|
275
|
+
uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
|
276
|
+
(uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
|
277
|
+
(uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
|
278
|
+
(uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
|
279
|
+
(uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
|
280
|
+
(uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
|
281
|
+
uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
|
282
|
+
(uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
|
283
|
+
(uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
|
284
|
+
(uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
|
285
|
+
(uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
|
286
|
+
uc == 11632 || (uc >= 11776 && uc <= 11822) ||
|
287
|
+
(uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
|
288
|
+
(uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
|
289
|
+
uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
|
290
|
+
uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
|
291
|
+
uc == 42622 || (uc >= 42738 && uc <= 42743) ||
|
292
|
+
(uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
|
293
|
+
(uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
|
294
|
+
uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
|
295
|
+
uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
|
296
|
+
uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
|
297
|
+
uc == 64831 || (uc >= 65040 && uc <= 65049) ||
|
298
|
+
(uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
|
299
|
+
uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
|
300
|
+
(uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
|
301
|
+
(uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
|
302
|
+
uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
|
303
|
+
uc == 65343 || uc == 65371 || uc == 65373 ||
|
304
|
+
(uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
|
305
|
+
uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
|
306
|
+
uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
|
307
|
+
(uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
|
308
|
+
(uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
|
309
|
+
uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
|
310
|
+
(uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
|
311
|
+
(uc >= 70085 && uc <= 70088) || uc == 70093 ||
|
312
|
+
(uc >= 70200 && uc <= 70205) || uc == 70854 ||
|
313
|
+
(uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
|
314
|
+
(uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
|
315
|
+
uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
|
316
|
+
uc == 113823);
|
317
|
+
}
|