commonmarker 0.23.6 → 1.0.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +70 -212
- data/commonmarker.gemspec +34 -31
- data/ext/commonmarker/Cargo.toml +12 -0
- data/ext/commonmarker/_util.rb +102 -0
- data/ext/commonmarker/extconf.rb +4 -5
- data/ext/commonmarker/src/comrak_options.rs +107 -0
- data/ext/commonmarker/src/lib.rs +27 -0
- data/lib/commonmarker/config.rb +58 -37
- data/lib/commonmarker/extension.rb +14 -0
- data/lib/commonmarker/renderer.rb +1 -127
- data/lib/commonmarker/version.rb +2 -2
- data/lib/commonmarker.rb +19 -32
- metadata +33 -177
- data/Rakefile +0 -109
- data/bin/commonmarker +0 -118
- data/ext/commonmarker/arena.c +0 -103
- data/ext/commonmarker/autolink.c +0 -456
- data/ext/commonmarker/autolink.h +0 -8
- data/ext/commonmarker/blocks.c +0 -1596
- data/ext/commonmarker/buffer.c +0 -278
- data/ext/commonmarker/buffer.h +0 -116
- data/ext/commonmarker/case_fold_switch.inc +0 -4327
- data/ext/commonmarker/chunk.h +0 -135
- data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
- data/ext/commonmarker/cmark-gfm-extension_api.h +0 -736
- data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
- data/ext/commonmarker/cmark-gfm.h +0 -817
- data/ext/commonmarker/cmark-gfm_export.h +0 -42
- data/ext/commonmarker/cmark-gfm_version.h +0 -7
- data/ext/commonmarker/cmark.c +0 -55
- data/ext/commonmarker/cmark_ctype.c +0 -44
- data/ext/commonmarker/cmark_ctype.h +0 -33
- data/ext/commonmarker/commonmark.c +0 -529
- data/ext/commonmarker/commonmarker.c +0 -1307
- data/ext/commonmarker/commonmarker.h +0 -16
- data/ext/commonmarker/config.h +0 -76
- data/ext/commonmarker/core-extensions.c +0 -27
- data/ext/commonmarker/entities.inc +0 -2138
- data/ext/commonmarker/ext_scanners.c +0 -879
- data/ext/commonmarker/ext_scanners.h +0 -24
- data/ext/commonmarker/footnotes.c +0 -63
- data/ext/commonmarker/footnotes.h +0 -27
- data/ext/commonmarker/houdini.h +0 -57
- data/ext/commonmarker/houdini_href_e.c +0 -100
- data/ext/commonmarker/houdini_html_e.c +0 -66
- data/ext/commonmarker/houdini_html_u.c +0 -149
- data/ext/commonmarker/html.c +0 -486
- data/ext/commonmarker/html.h +0 -27
- data/ext/commonmarker/inlines.c +0 -1716
- data/ext/commonmarker/inlines.h +0 -29
- data/ext/commonmarker/iterator.c +0 -159
- data/ext/commonmarker/iterator.h +0 -26
- data/ext/commonmarker/latex.c +0 -466
- data/ext/commonmarker/linked_list.c +0 -37
- data/ext/commonmarker/man.c +0 -278
- data/ext/commonmarker/map.c +0 -122
- data/ext/commonmarker/map.h +0 -41
- data/ext/commonmarker/node.c +0 -979
- data/ext/commonmarker/node.h +0 -125
- data/ext/commonmarker/parser.h +0 -58
- data/ext/commonmarker/plaintext.c +0 -235
- data/ext/commonmarker/plugin.c +0 -36
- data/ext/commonmarker/plugin.h +0 -34
- data/ext/commonmarker/references.c +0 -42
- data/ext/commonmarker/references.h +0 -26
- data/ext/commonmarker/registry.c +0 -63
- data/ext/commonmarker/registry.h +0 -24
- data/ext/commonmarker/render.c +0 -205
- data/ext/commonmarker/render.h +0 -62
- data/ext/commonmarker/scanners.c +0 -10508
- data/ext/commonmarker/scanners.h +0 -62
- data/ext/commonmarker/scanners.re +0 -341
- data/ext/commonmarker/strikethrough.c +0 -167
- data/ext/commonmarker/strikethrough.h +0 -9
- data/ext/commonmarker/syntax_extension.c +0 -149
- data/ext/commonmarker/syntax_extension.h +0 -34
- data/ext/commonmarker/table.c +0 -848
- data/ext/commonmarker/table.h +0 -12
- data/ext/commonmarker/tagfilter.c +0 -60
- data/ext/commonmarker/tagfilter.h +0 -8
- data/ext/commonmarker/tasklist.c +0 -156
- data/ext/commonmarker/tasklist.h +0 -8
- data/ext/commonmarker/utf8.c +0 -317
- data/ext/commonmarker/utf8.h +0 -35
- data/ext/commonmarker/xml.c +0 -181
- data/lib/commonmarker/node/inspect.rb +0 -47
- data/lib/commonmarker/node.rb +0 -83
- data/lib/commonmarker/renderer/html_renderer.rb +0 -252
data/ext/commonmarker/table.h
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
#ifndef CMARK_GFM_TABLE_H
|
2
|
-
#define CMARK_GFM_TABLE_H
|
3
|
-
|
4
|
-
#include "cmark-gfm-core-extensions.h"
|
5
|
-
|
6
|
-
|
7
|
-
extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
|
8
|
-
CMARK_NODE_TABLE_CELL;
|
9
|
-
|
10
|
-
cmark_syntax_extension *create_table_extension(void);
|
11
|
-
|
12
|
-
#endif
|
@@ -1,60 +0,0 @@
|
|
1
|
-
#include "tagfilter.h"
|
2
|
-
#include <parser.h>
|
3
|
-
#include <ctype.h>
|
4
|
-
|
5
|
-
static const char *blacklist[] = {
|
6
|
-
"title", "textarea", "style", "xmp", "iframe",
|
7
|
-
"noembed", "noframes", "script", "plaintext", NULL,
|
8
|
-
};
|
9
|
-
|
10
|
-
static int is_tag(const unsigned char *tag_data, size_t tag_size,
|
11
|
-
const char *tagname) {
|
12
|
-
size_t i;
|
13
|
-
|
14
|
-
if (tag_size < 3 || tag_data[0] != '<')
|
15
|
-
return 0;
|
16
|
-
|
17
|
-
i = 1;
|
18
|
-
|
19
|
-
if (tag_data[i] == '/') {
|
20
|
-
i++;
|
21
|
-
}
|
22
|
-
|
23
|
-
for (; i < tag_size; ++i, ++tagname) {
|
24
|
-
if (*tagname == 0)
|
25
|
-
break;
|
26
|
-
|
27
|
-
if (tolower(tag_data[i]) != *tagname)
|
28
|
-
return 0;
|
29
|
-
}
|
30
|
-
|
31
|
-
if (i == tag_size)
|
32
|
-
return 0;
|
33
|
-
|
34
|
-
if (cmark_isspace(tag_data[i]) || tag_data[i] == '>')
|
35
|
-
return 1;
|
36
|
-
|
37
|
-
if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>')
|
38
|
-
return 1;
|
39
|
-
|
40
|
-
return 0;
|
41
|
-
}
|
42
|
-
|
43
|
-
static int filter(cmark_syntax_extension *ext, const unsigned char *tag,
|
44
|
-
size_t tag_len) {
|
45
|
-
const char **it;
|
46
|
-
|
47
|
-
for (it = blacklist; *it; ++it) {
|
48
|
-
if (is_tag(tag, tag_len, *it)) {
|
49
|
-
return 0;
|
50
|
-
}
|
51
|
-
}
|
52
|
-
|
53
|
-
return 1;
|
54
|
-
}
|
55
|
-
|
56
|
-
cmark_syntax_extension *create_tagfilter_extension(void) {
|
57
|
-
cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter");
|
58
|
-
cmark_syntax_extension_set_html_filter_func(ext, filter);
|
59
|
-
return ext;
|
60
|
-
}
|
data/ext/commonmarker/tasklist.c
DELETED
@@ -1,156 +0,0 @@
|
|
1
|
-
#include "tasklist.h"
|
2
|
-
#include <parser.h>
|
3
|
-
#include <render.h>
|
4
|
-
#include <html.h>
|
5
|
-
#include "ext_scanners.h"
|
6
|
-
|
7
|
-
typedef enum {
|
8
|
-
CMARK_TASKLIST_NOCHECKED,
|
9
|
-
CMARK_TASKLIST_CHECKED,
|
10
|
-
} cmark_tasklist_type;
|
11
|
-
|
12
|
-
// Local constants
|
13
|
-
static const char *TYPE_STRING = "tasklist";
|
14
|
-
|
15
|
-
static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) {
|
16
|
-
return TYPE_STRING;
|
17
|
-
}
|
18
|
-
|
19
|
-
|
20
|
-
// Return 1 if state was set, 0 otherwise
|
21
|
-
int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) {
|
22
|
-
// The node has to exist, and be an extension, and actually be the right type in order to get the value.
|
23
|
-
if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
|
24
|
-
return 0;
|
25
|
-
|
26
|
-
node->as.list.checked = is_checked;
|
27
|
-
return 1;
|
28
|
-
}
|
29
|
-
|
30
|
-
bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) {
|
31
|
-
if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
|
32
|
-
return false;
|
33
|
-
|
34
|
-
if (node->as.list.checked) {
|
35
|
-
return true;
|
36
|
-
}
|
37
|
-
else {
|
38
|
-
return false;
|
39
|
-
}
|
40
|
-
}
|
41
|
-
|
42
|
-
static bool parse_node_item_prefix(cmark_parser *parser, const char *input,
|
43
|
-
cmark_node *container) {
|
44
|
-
bool res = false;
|
45
|
-
|
46
|
-
if (parser->indent >=
|
47
|
-
container->as.list.marker_offset + container->as.list.padding) {
|
48
|
-
cmark_parser_advance_offset(parser, input, container->as.list.marker_offset +
|
49
|
-
container->as.list.padding,
|
50
|
-
true);
|
51
|
-
res = true;
|
52
|
-
} else if (parser->blank && container->first_child != NULL) {
|
53
|
-
// if container->first_child is NULL, then the opening line
|
54
|
-
// of the list item was blank after the list marker; in this
|
55
|
-
// case, we are done with the list item.
|
56
|
-
cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset,
|
57
|
-
false);
|
58
|
-
res = true;
|
59
|
-
}
|
60
|
-
return res;
|
61
|
-
}
|
62
|
-
|
63
|
-
static int matches(cmark_syntax_extension *self, cmark_parser *parser,
|
64
|
-
unsigned char *input, int len,
|
65
|
-
cmark_node *parent_container) {
|
66
|
-
return parse_node_item_prefix(parser, (const char*)input, parent_container);
|
67
|
-
}
|
68
|
-
|
69
|
-
static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
|
70
|
-
cmark_node_type child_type) {
|
71
|
-
return (node->type == CMARK_NODE_ITEM) ? 1 : 0;
|
72
|
-
}
|
73
|
-
|
74
|
-
static cmark_node *open_tasklist_item(cmark_syntax_extension *self,
|
75
|
-
int indented, cmark_parser *parser,
|
76
|
-
cmark_node *parent_container,
|
77
|
-
unsigned char *input, int len) {
|
78
|
-
cmark_node_type node_type = cmark_node_get_type(parent_container);
|
79
|
-
if (node_type != CMARK_NODE_ITEM) {
|
80
|
-
return NULL;
|
81
|
-
}
|
82
|
-
|
83
|
-
bufsize_t matched = scan_tasklist(input, len, 0);
|
84
|
-
if (!matched) {
|
85
|
-
return NULL;
|
86
|
-
}
|
87
|
-
|
88
|
-
cmark_node_set_syntax_extension(parent_container, self);
|
89
|
-
cmark_parser_advance_offset(parser, (char *)input, 3, false);
|
90
|
-
|
91
|
-
// Either an upper or lower case X means the task is completed.
|
92
|
-
parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]"));
|
93
|
-
|
94
|
-
return NULL;
|
95
|
-
}
|
96
|
-
|
97
|
-
static void commonmark_render(cmark_syntax_extension *extension,
|
98
|
-
cmark_renderer *renderer, cmark_node *node,
|
99
|
-
cmark_event_type ev_type, int options) {
|
100
|
-
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
101
|
-
if (entering) {
|
102
|
-
renderer->cr(renderer);
|
103
|
-
if (node->as.list.checked) {
|
104
|
-
renderer->out(renderer, node, "- [x] ", false, LITERAL);
|
105
|
-
} else {
|
106
|
-
renderer->out(renderer, node, "- [ ] ", false, LITERAL);
|
107
|
-
}
|
108
|
-
cmark_strbuf_puts(renderer->prefix, " ");
|
109
|
-
} else {
|
110
|
-
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
|
111
|
-
renderer->cr(renderer);
|
112
|
-
}
|
113
|
-
}
|
114
|
-
|
115
|
-
static void html_render(cmark_syntax_extension *extension,
|
116
|
-
cmark_html_renderer *renderer, cmark_node *node,
|
117
|
-
cmark_event_type ev_type, int options) {
|
118
|
-
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
119
|
-
if (entering) {
|
120
|
-
cmark_html_render_cr(renderer->html);
|
121
|
-
cmark_strbuf_puts(renderer->html, "<li");
|
122
|
-
cmark_html_render_sourcepos(node, renderer->html, options);
|
123
|
-
cmark_strbuf_putc(renderer->html, '>');
|
124
|
-
if (node->as.list.checked) {
|
125
|
-
cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
|
126
|
-
} else {
|
127
|
-
cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> ");
|
128
|
-
}
|
129
|
-
} else {
|
130
|
-
cmark_strbuf_puts(renderer->html, "</li>\n");
|
131
|
-
}
|
132
|
-
}
|
133
|
-
|
134
|
-
static const char *xml_attr(cmark_syntax_extension *extension,
|
135
|
-
cmark_node *node) {
|
136
|
-
if (node->as.list.checked) {
|
137
|
-
return " completed=\"true\"";
|
138
|
-
} else {
|
139
|
-
return " completed=\"false\"";
|
140
|
-
}
|
141
|
-
}
|
142
|
-
|
143
|
-
cmark_syntax_extension *create_tasklist_extension(void) {
|
144
|
-
cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist");
|
145
|
-
|
146
|
-
cmark_syntax_extension_set_match_block_func(ext, matches);
|
147
|
-
cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
|
148
|
-
cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item);
|
149
|
-
cmark_syntax_extension_set_can_contain_func(ext, can_contain);
|
150
|
-
cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
|
151
|
-
cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render);
|
152
|
-
cmark_syntax_extension_set_html_render_func(ext, html_render);
|
153
|
-
cmark_syntax_extension_set_xml_attr_func(ext, xml_attr);
|
154
|
-
|
155
|
-
return ext;
|
156
|
-
}
|
data/ext/commonmarker/tasklist.h
DELETED
data/ext/commonmarker/utf8.c
DELETED
@@ -1,317 +0,0 @@
|
|
1
|
-
#include <stdlib.h>
|
2
|
-
#include <stdint.h>
|
3
|
-
#include <assert.h>
|
4
|
-
|
5
|
-
#include "cmark_ctype.h"
|
6
|
-
#include "utf8.h"
|
7
|
-
|
8
|
-
static const int8_t utf8proc_utf8class[256] = {
|
9
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
10
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
11
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
12
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
13
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
14
|
-
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
|
-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
18
|
-
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
19
|
-
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
|
20
|
-
|
21
|
-
static void encode_unknown(cmark_strbuf *buf) {
|
22
|
-
static const uint8_t repl[] = {239, 191, 189};
|
23
|
-
cmark_strbuf_put(buf, repl, 3);
|
24
|
-
}
|
25
|
-
|
26
|
-
static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
|
27
|
-
int length, i;
|
28
|
-
|
29
|
-
if (!str_len)
|
30
|
-
return 0;
|
31
|
-
|
32
|
-
length = utf8proc_utf8class[str[0]];
|
33
|
-
|
34
|
-
if (!length)
|
35
|
-
return -1;
|
36
|
-
|
37
|
-
if (str_len >= 0 && (bufsize_t)length > str_len)
|
38
|
-
return -str_len;
|
39
|
-
|
40
|
-
for (i = 1; i < length; i++) {
|
41
|
-
if ((str[i] & 0xC0) != 0x80)
|
42
|
-
return -i;
|
43
|
-
}
|
44
|
-
|
45
|
-
return length;
|
46
|
-
}
|
47
|
-
|
48
|
-
// Validate a single UTF-8 character according to RFC 3629.
|
49
|
-
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
|
50
|
-
int length = utf8proc_utf8class[str[0]];
|
51
|
-
|
52
|
-
if (!length)
|
53
|
-
return -1;
|
54
|
-
|
55
|
-
if ((bufsize_t)length > str_len)
|
56
|
-
return -str_len;
|
57
|
-
|
58
|
-
switch (length) {
|
59
|
-
case 2:
|
60
|
-
if ((str[1] & 0xC0) != 0x80)
|
61
|
-
return -1;
|
62
|
-
if (str[0] < 0xC2) {
|
63
|
-
// Overlong
|
64
|
-
return -length;
|
65
|
-
}
|
66
|
-
break;
|
67
|
-
|
68
|
-
case 3:
|
69
|
-
if ((str[1] & 0xC0) != 0x80)
|
70
|
-
return -1;
|
71
|
-
if ((str[2] & 0xC0) != 0x80)
|
72
|
-
return -2;
|
73
|
-
if (str[0] == 0xE0) {
|
74
|
-
if (str[1] < 0xA0) {
|
75
|
-
// Overlong
|
76
|
-
return -length;
|
77
|
-
}
|
78
|
-
} else if (str[0] == 0xED) {
|
79
|
-
if (str[1] >= 0xA0) {
|
80
|
-
// Surrogate
|
81
|
-
return -length;
|
82
|
-
}
|
83
|
-
}
|
84
|
-
break;
|
85
|
-
|
86
|
-
case 4:
|
87
|
-
if ((str[1] & 0xC0) != 0x80)
|
88
|
-
return -1;
|
89
|
-
if ((str[2] & 0xC0) != 0x80)
|
90
|
-
return -2;
|
91
|
-
if ((str[3] & 0xC0) != 0x80)
|
92
|
-
return -3;
|
93
|
-
if (str[0] == 0xF0) {
|
94
|
-
if (str[1] < 0x90) {
|
95
|
-
// Overlong
|
96
|
-
return -length;
|
97
|
-
}
|
98
|
-
} else if (str[0] >= 0xF4) {
|
99
|
-
if (str[0] > 0xF4 || str[1] >= 0x90) {
|
100
|
-
// Above 0x10FFFF
|
101
|
-
return -length;
|
102
|
-
}
|
103
|
-
}
|
104
|
-
break;
|
105
|
-
}
|
106
|
-
|
107
|
-
return length;
|
108
|
-
}
|
109
|
-
|
110
|
-
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
|
111
|
-
bufsize_t size) {
|
112
|
-
bufsize_t i = 0;
|
113
|
-
|
114
|
-
while (i < size) {
|
115
|
-
bufsize_t org = i;
|
116
|
-
int charlen = 0;
|
117
|
-
|
118
|
-
while (i < size) {
|
119
|
-
if (line[i] < 0x80 && line[i] != 0) {
|
120
|
-
i++;
|
121
|
-
} else if (line[i] >= 0x80) {
|
122
|
-
charlen = utf8proc_valid(line + i, size - i);
|
123
|
-
if (charlen < 0) {
|
124
|
-
charlen = -charlen;
|
125
|
-
break;
|
126
|
-
}
|
127
|
-
i += charlen;
|
128
|
-
} else if (line[i] == 0) {
|
129
|
-
// ASCII NUL is technically valid but rejected
|
130
|
-
// for security reasons.
|
131
|
-
charlen = 1;
|
132
|
-
break;
|
133
|
-
}
|
134
|
-
}
|
135
|
-
|
136
|
-
if (i > org) {
|
137
|
-
cmark_strbuf_put(ob, line + org, i - org);
|
138
|
-
}
|
139
|
-
|
140
|
-
if (i >= size) {
|
141
|
-
break;
|
142
|
-
} else {
|
143
|
-
// Invalid UTF-8
|
144
|
-
encode_unknown(ob);
|
145
|
-
i += charlen;
|
146
|
-
}
|
147
|
-
}
|
148
|
-
}
|
149
|
-
|
150
|
-
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
|
151
|
-
int32_t *dst) {
|
152
|
-
int length;
|
153
|
-
int32_t uc = -1;
|
154
|
-
|
155
|
-
*dst = -1;
|
156
|
-
length = utf8proc_charlen(str, str_len);
|
157
|
-
if (length < 0)
|
158
|
-
return -1;
|
159
|
-
|
160
|
-
switch (length) {
|
161
|
-
case 1:
|
162
|
-
uc = str[0];
|
163
|
-
break;
|
164
|
-
case 2:
|
165
|
-
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
|
166
|
-
if (uc < 0x80)
|
167
|
-
uc = -1;
|
168
|
-
break;
|
169
|
-
case 3:
|
170
|
-
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
|
171
|
-
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
|
172
|
-
uc = -1;
|
173
|
-
break;
|
174
|
-
case 4:
|
175
|
-
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
|
176
|
-
((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
|
177
|
-
if (uc < 0x10000 || uc >= 0x110000)
|
178
|
-
uc = -1;
|
179
|
-
break;
|
180
|
-
}
|
181
|
-
|
182
|
-
if (uc < 0)
|
183
|
-
return -1;
|
184
|
-
|
185
|
-
*dst = uc;
|
186
|
-
return length;
|
187
|
-
}
|
188
|
-
|
189
|
-
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
|
190
|
-
uint8_t dst[4];
|
191
|
-
bufsize_t len = 0;
|
192
|
-
|
193
|
-
assert(uc >= 0);
|
194
|
-
|
195
|
-
if (uc < 0x80) {
|
196
|
-
dst[0] = (uint8_t)(uc);
|
197
|
-
len = 1;
|
198
|
-
} else if (uc < 0x800) {
|
199
|
-
dst[0] = (uint8_t)(0xC0 + (uc >> 6));
|
200
|
-
dst[1] = 0x80 + (uc & 0x3F);
|
201
|
-
len = 2;
|
202
|
-
} else if (uc == 0xFFFF) {
|
203
|
-
dst[0] = 0xFF;
|
204
|
-
len = 1;
|
205
|
-
} else if (uc == 0xFFFE) {
|
206
|
-
dst[0] = 0xFE;
|
207
|
-
len = 1;
|
208
|
-
} else if (uc < 0x10000) {
|
209
|
-
dst[0] = (uint8_t)(0xE0 + (uc >> 12));
|
210
|
-
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
|
211
|
-
dst[2] = 0x80 + (uc & 0x3F);
|
212
|
-
len = 3;
|
213
|
-
} else if (uc < 0x110000) {
|
214
|
-
dst[0] = (uint8_t)(0xF0 + (uc >> 18));
|
215
|
-
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
|
216
|
-
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
|
217
|
-
dst[3] = 0x80 + (uc & 0x3F);
|
218
|
-
len = 4;
|
219
|
-
} else {
|
220
|
-
encode_unknown(buf);
|
221
|
-
return;
|
222
|
-
}
|
223
|
-
|
224
|
-
cmark_strbuf_put(buf, dst, len);
|
225
|
-
}
|
226
|
-
|
227
|
-
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
228
|
-
bufsize_t len) {
|
229
|
-
int32_t c;
|
230
|
-
|
231
|
-
#define bufpush(x) cmark_utf8proc_encode_char(x, dest)
|
232
|
-
|
233
|
-
while (len > 0) {
|
234
|
-
bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
|
235
|
-
|
236
|
-
if (char_len >= 0) {
|
237
|
-
#include "case_fold_switch.inc"
|
238
|
-
} else {
|
239
|
-
encode_unknown(dest);
|
240
|
-
char_len = -char_len;
|
241
|
-
}
|
242
|
-
|
243
|
-
str += char_len;
|
244
|
-
len -= char_len;
|
245
|
-
}
|
246
|
-
}
|
247
|
-
|
248
|
-
// matches anything in the Zs class, plus LF, CR, TAB, FF.
|
249
|
-
int cmark_utf8proc_is_space(int32_t uc) {
|
250
|
-
return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
|
251
|
-
uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
|
252
|
-
uc == 8287 || uc == 12288);
|
253
|
-
}
|
254
|
-
|
255
|
-
// matches anything in the P[cdefios] classes.
|
256
|
-
int cmark_utf8proc_is_punctuation(int32_t uc) {
|
257
|
-
return (
|
258
|
-
(uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
|
259
|
-
uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
|
260
|
-
uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
|
261
|
-
uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
|
262
|
-
uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
|
263
|
-
uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
|
264
|
-
(uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
|
265
|
-
(uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
|
266
|
-
uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
|
267
|
-
uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
|
268
|
-
uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
|
269
|
-
(uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
|
270
|
-
(uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
|
271
|
-
uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
|
272
|
-
(uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
|
273
|
-
(uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
|
274
|
-
(uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
|
275
|
-
uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
|
276
|
-
(uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
|
277
|
-
(uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
|
278
|
-
(uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
|
279
|
-
(uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
|
280
|
-
(uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
|
281
|
-
uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
|
282
|
-
(uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
|
283
|
-
(uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
|
284
|
-
(uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
|
285
|
-
(uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
|
286
|
-
uc == 11632 || (uc >= 11776 && uc <= 11822) ||
|
287
|
-
(uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
|
288
|
-
(uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
|
289
|
-
uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
|
290
|
-
uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
|
291
|
-
uc == 42622 || (uc >= 42738 && uc <= 42743) ||
|
292
|
-
(uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
|
293
|
-
(uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
|
294
|
-
uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
|
295
|
-
uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
|
296
|
-
uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
|
297
|
-
uc == 64831 || (uc >= 65040 && uc <= 65049) ||
|
298
|
-
(uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
|
299
|
-
uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
|
300
|
-
(uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
|
301
|
-
(uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
|
302
|
-
uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
|
303
|
-
uc == 65343 || uc == 65371 || uc == 65373 ||
|
304
|
-
(uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
|
305
|
-
uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
|
306
|
-
uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
|
307
|
-
(uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
|
308
|
-
(uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
|
309
|
-
uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
|
310
|
-
(uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
|
311
|
-
(uc >= 70085 && uc <= 70088) || uc == 70093 ||
|
312
|
-
(uc >= 70200 && uc <= 70205) || uc == 70854 ||
|
313
|
-
(uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
|
314
|
-
(uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
|
315
|
-
uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
|
316
|
-
uc == 113823);
|
317
|
-
}
|
data/ext/commonmarker/utf8.h
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
#ifndef CMARK_UTF8_H
|
2
|
-
#define CMARK_UTF8_H
|
3
|
-
|
4
|
-
#include <stdint.h>
|
5
|
-
#include "buffer.h"
|
6
|
-
|
7
|
-
#ifdef __cplusplus
|
8
|
-
extern "C" {
|
9
|
-
#endif
|
10
|
-
|
11
|
-
CMARK_GFM_EXPORT
|
12
|
-
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
13
|
-
bufsize_t len);
|
14
|
-
|
15
|
-
CMARK_GFM_EXPORT
|
16
|
-
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
|
17
|
-
|
18
|
-
CMARK_GFM_EXPORT
|
19
|
-
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
|
20
|
-
|
21
|
-
CMARK_GFM_EXPORT
|
22
|
-
void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
|
23
|
-
bufsize_t size);
|
24
|
-
|
25
|
-
CMARK_GFM_EXPORT
|
26
|
-
int cmark_utf8proc_is_space(int32_t uc);
|
27
|
-
|
28
|
-
CMARK_GFM_EXPORT
|
29
|
-
int cmark_utf8proc_is_punctuation(int32_t uc);
|
30
|
-
|
31
|
-
#ifdef __cplusplus
|
32
|
-
}
|
33
|
-
#endif
|
34
|
-
|
35
|
-
#endif
|