commonmarker 0.23.10 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +1156 -0
  3. data/Cargo.toml +7 -0
  4. data/README.md +237 -172
  5. data/ext/commonmarker/Cargo.toml +20 -0
  6. data/ext/commonmarker/extconf.rb +3 -6
  7. data/ext/commonmarker/src/lib.rs +103 -0
  8. data/ext/commonmarker/src/node.rs +1221 -0
  9. data/ext/commonmarker/src/options.rs +220 -0
  10. data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
  11. data/ext/commonmarker/src/plugins.rs +6 -0
  12. data/ext/commonmarker/src/utils.rs +8 -0
  13. data/lib/commonmarker/config.rb +92 -40
  14. data/lib/commonmarker/constants.rb +7 -0
  15. data/lib/commonmarker/extension.rb +14 -0
  16. data/lib/commonmarker/node/ast.rb +8 -0
  17. data/lib/commonmarker/node/inspect.rb +14 -4
  18. data/lib/commonmarker/node.rb +29 -47
  19. data/lib/commonmarker/renderer.rb +1 -127
  20. data/lib/commonmarker/utils.rb +22 -0
  21. data/lib/commonmarker/version.rb +2 -2
  22. data/lib/commonmarker.rb +27 -25
  23. metadata +38 -191
  24. data/Rakefile +0 -109
  25. data/bin/commonmarker +0 -118
  26. data/commonmarker.gemspec +0 -38
  27. data/ext/commonmarker/arena.c +0 -104
  28. data/ext/commonmarker/autolink.c +0 -508
  29. data/ext/commonmarker/autolink.h +0 -8
  30. data/ext/commonmarker/blocks.c +0 -1622
  31. data/ext/commonmarker/buffer.c +0 -278
  32. data/ext/commonmarker/buffer.h +0 -116
  33. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  34. data/ext/commonmarker/chunk.h +0 -135
  35. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  36. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
  37. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  38. data/ext/commonmarker/cmark-gfm.h +0 -833
  39. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  40. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  41. data/ext/commonmarker/cmark.c +0 -55
  42. data/ext/commonmarker/cmark_ctype.c +0 -44
  43. data/ext/commonmarker/cmark_ctype.h +0 -33
  44. data/ext/commonmarker/commonmark.c +0 -514
  45. data/ext/commonmarker/commonmarker.c +0 -1308
  46. data/ext/commonmarker/commonmarker.h +0 -16
  47. data/ext/commonmarker/config.h +0 -76
  48. data/ext/commonmarker/core-extensions.c +0 -27
  49. data/ext/commonmarker/entities.inc +0 -2138
  50. data/ext/commonmarker/ext_scanners.c +0 -879
  51. data/ext/commonmarker/ext_scanners.h +0 -24
  52. data/ext/commonmarker/footnotes.c +0 -63
  53. data/ext/commonmarker/footnotes.h +0 -27
  54. data/ext/commonmarker/houdini.h +0 -57
  55. data/ext/commonmarker/houdini_href_e.c +0 -100
  56. data/ext/commonmarker/houdini_html_e.c +0 -66
  57. data/ext/commonmarker/houdini_html_u.c +0 -149
  58. data/ext/commonmarker/html.c +0 -502
  59. data/ext/commonmarker/html.h +0 -27
  60. data/ext/commonmarker/inlines.c +0 -1788
  61. data/ext/commonmarker/inlines.h +0 -29
  62. data/ext/commonmarker/iterator.c +0 -159
  63. data/ext/commonmarker/iterator.h +0 -26
  64. data/ext/commonmarker/latex.c +0 -468
  65. data/ext/commonmarker/linked_list.c +0 -37
  66. data/ext/commonmarker/man.c +0 -274
  67. data/ext/commonmarker/map.c +0 -129
  68. data/ext/commonmarker/map.h +0 -44
  69. data/ext/commonmarker/node.c +0 -1045
  70. data/ext/commonmarker/node.h +0 -167
  71. data/ext/commonmarker/parser.h +0 -59
  72. data/ext/commonmarker/plaintext.c +0 -218
  73. data/ext/commonmarker/plugin.c +0 -36
  74. data/ext/commonmarker/plugin.h +0 -34
  75. data/ext/commonmarker/references.c +0 -43
  76. data/ext/commonmarker/references.h +0 -26
  77. data/ext/commonmarker/registry.c +0 -63
  78. data/ext/commonmarker/registry.h +0 -24
  79. data/ext/commonmarker/render.c +0 -213
  80. data/ext/commonmarker/render.h +0 -62
  81. data/ext/commonmarker/scanners.c +0 -14056
  82. data/ext/commonmarker/scanners.h +0 -70
  83. data/ext/commonmarker/scanners.re +0 -341
  84. data/ext/commonmarker/strikethrough.c +0 -167
  85. data/ext/commonmarker/strikethrough.h +0 -9
  86. data/ext/commonmarker/syntax_extension.c +0 -149
  87. data/ext/commonmarker/syntax_extension.h +0 -34
  88. data/ext/commonmarker/table.c +0 -917
  89. data/ext/commonmarker/table.h +0 -12
  90. data/ext/commonmarker/tagfilter.c +0 -60
  91. data/ext/commonmarker/tagfilter.h +0 -8
  92. data/ext/commonmarker/tasklist.c +0 -156
  93. data/ext/commonmarker/tasklist.h +0 -8
  94. data/ext/commonmarker/utf8.c +0 -317
  95. data/ext/commonmarker/utf8.h +0 -35
  96. data/ext/commonmarker/xml.c +0 -182
  97. data/lib/commonmarker/renderer/html_renderer.rb +0 -256
@@ -1,12 +0,0 @@
1
- #ifndef CMARK_GFM_TABLE_H
2
- #define CMARK_GFM_TABLE_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
-
7
- extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
8
- CMARK_NODE_TABLE_CELL;
9
-
10
- cmark_syntax_extension *create_table_extension(void);
11
-
12
- #endif
@@ -1,60 +0,0 @@
1
- #include "tagfilter.h"
2
- #include <parser.h>
3
- #include <ctype.h>
4
-
5
- static const char *blacklist[] = {
6
- "title", "textarea", "style", "xmp", "iframe",
7
- "noembed", "noframes", "script", "plaintext", NULL,
8
- };
9
-
10
- static int is_tag(const unsigned char *tag_data, size_t tag_size,
11
- const char *tagname) {
12
- size_t i;
13
-
14
- if (tag_size < 3 || tag_data[0] != '<')
15
- return 0;
16
-
17
- i = 1;
18
-
19
- if (tag_data[i] == '/') {
20
- i++;
21
- }
22
-
23
- for (; i < tag_size; ++i, ++tagname) {
24
- if (*tagname == 0)
25
- break;
26
-
27
- if (tolower(tag_data[i]) != *tagname)
28
- return 0;
29
- }
30
-
31
- if (i == tag_size)
32
- return 0;
33
-
34
- if (cmark_isspace(tag_data[i]) || tag_data[i] == '>')
35
- return 1;
36
-
37
- if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>')
38
- return 1;
39
-
40
- return 0;
41
- }
42
-
43
- static int filter(cmark_syntax_extension *ext, const unsigned char *tag,
44
- size_t tag_len) {
45
- const char **it;
46
-
47
- for (it = blacklist; *it; ++it) {
48
- if (is_tag(tag, tag_len, *it)) {
49
- return 0;
50
- }
51
- }
52
-
53
- return 1;
54
- }
55
-
56
- cmark_syntax_extension *create_tagfilter_extension(void) {
57
- cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter");
58
- cmark_syntax_extension_set_html_filter_func(ext, filter);
59
- return ext;
60
- }
@@ -1,8 +0,0 @@
1
- #ifndef CMARK_GFM_TAGFILTER_H
2
- #define CMARK_GFM_TAGFILTER_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
- cmark_syntax_extension *create_tagfilter_extension(void);
7
-
8
- #endif
@@ -1,156 +0,0 @@
1
- #include "tasklist.h"
2
- #include <parser.h>
3
- #include <render.h>
4
- #include <html.h>
5
- #include "ext_scanners.h"
6
-
7
- typedef enum {
8
- CMARK_TASKLIST_NOCHECKED,
9
- CMARK_TASKLIST_CHECKED,
10
- } cmark_tasklist_type;
11
-
12
- // Local constants
13
- static const char *TYPE_STRING = "tasklist";
14
-
15
- static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) {
16
- return TYPE_STRING;
17
- }
18
-
19
-
20
- // Return 1 if state was set, 0 otherwise
21
- int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) {
22
- // The node has to exist, and be an extension, and actually be the right type in order to get the value.
23
- if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
24
- return 0;
25
-
26
- node->as.list.checked = is_checked;
27
- return 1;
28
- }
29
-
30
- bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) {
31
- if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
32
- return false;
33
-
34
- if (node->as.list.checked) {
35
- return true;
36
- }
37
- else {
38
- return false;
39
- }
40
- }
41
-
42
- static bool parse_node_item_prefix(cmark_parser *parser, const char *input,
43
- cmark_node *container) {
44
- bool res = false;
45
-
46
- if (parser->indent >=
47
- container->as.list.marker_offset + container->as.list.padding) {
48
- cmark_parser_advance_offset(parser, input, container->as.list.marker_offset +
49
- container->as.list.padding,
50
- true);
51
- res = true;
52
- } else if (parser->blank && container->first_child != NULL) {
53
- // if container->first_child is NULL, then the opening line
54
- // of the list item was blank after the list marker; in this
55
- // case, we are done with the list item.
56
- cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset,
57
- false);
58
- res = true;
59
- }
60
- return res;
61
- }
62
-
63
- static int matches(cmark_syntax_extension *self, cmark_parser *parser,
64
- unsigned char *input, int len,
65
- cmark_node *parent_container) {
66
- return parse_node_item_prefix(parser, (const char*)input, parent_container);
67
- }
68
-
69
- static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
70
- cmark_node_type child_type) {
71
- return (node->type == CMARK_NODE_ITEM) ? 1 : 0;
72
- }
73
-
74
- static cmark_node *open_tasklist_item(cmark_syntax_extension *self,
75
- int indented, cmark_parser *parser,
76
- cmark_node *parent_container,
77
- unsigned char *input, int len) {
78
- cmark_node_type node_type = cmark_node_get_type(parent_container);
79
- if (node_type != CMARK_NODE_ITEM) {
80
- return NULL;
81
- }
82
-
83
- bufsize_t matched = scan_tasklist(input, len, 0);
84
- if (!matched) {
85
- return NULL;
86
- }
87
-
88
- cmark_node_set_syntax_extension(parent_container, self);
89
- cmark_parser_advance_offset(parser, (char *)input, 3, false);
90
-
91
- // Either an upper or lower case X means the task is completed.
92
- parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]"));
93
-
94
- return NULL;
95
- }
96
-
97
- static void commonmark_render(cmark_syntax_extension *extension,
98
- cmark_renderer *renderer, cmark_node *node,
99
- cmark_event_type ev_type, int options) {
100
- bool entering = (ev_type == CMARK_EVENT_ENTER);
101
- if (entering) {
102
- renderer->cr(renderer);
103
- if (node->as.list.checked) {
104
- renderer->out(renderer, node, "- [x] ", false, LITERAL);
105
- } else {
106
- renderer->out(renderer, node, "- [ ] ", false, LITERAL);
107
- }
108
- cmark_strbuf_puts(renderer->prefix, " ");
109
- } else {
110
- cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
111
- renderer->cr(renderer);
112
- }
113
- }
114
-
115
- static void html_render(cmark_syntax_extension *extension,
116
- cmark_html_renderer *renderer, cmark_node *node,
117
- cmark_event_type ev_type, int options) {
118
- bool entering = (ev_type == CMARK_EVENT_ENTER);
119
- if (entering) {
120
- cmark_html_render_cr(renderer->html);
121
- cmark_strbuf_puts(renderer->html, "<li");
122
- cmark_html_render_sourcepos(node, renderer->html, options);
123
- cmark_strbuf_putc(renderer->html, '>');
124
- if (node->as.list.checked) {
125
- cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
126
- } else {
127
- cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> ");
128
- }
129
- } else {
130
- cmark_strbuf_puts(renderer->html, "</li>\n");
131
- }
132
- }
133
-
134
- static const char *xml_attr(cmark_syntax_extension *extension,
135
- cmark_node *node) {
136
- if (node->as.list.checked) {
137
- return " completed=\"true\"";
138
- } else {
139
- return " completed=\"false\"";
140
- }
141
- }
142
-
143
- cmark_syntax_extension *create_tasklist_extension(void) {
144
- cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist");
145
-
146
- cmark_syntax_extension_set_match_block_func(ext, matches);
147
- cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
148
- cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item);
149
- cmark_syntax_extension_set_can_contain_func(ext, can_contain);
150
- cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
151
- cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render);
152
- cmark_syntax_extension_set_html_render_func(ext, html_render);
153
- cmark_syntax_extension_set_xml_attr_func(ext, xml_attr);
154
-
155
- return ext;
156
- }
@@ -1,8 +0,0 @@
1
- #ifndef TASKLIST_H
2
- #define TASKLIST_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
- cmark_syntax_extension *create_tasklist_extension(void);
7
-
8
- #endif
@@ -1,317 +0,0 @@
1
- #include <stdlib.h>
2
- #include <stdint.h>
3
- #include <assert.h>
4
-
5
- #include "cmark_ctype.h"
6
- #include "utf8.h"
7
-
8
- static const int8_t utf8proc_utf8class[256] = {
9
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14
- 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
18
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
19
- 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
20
-
21
- static void encode_unknown(cmark_strbuf *buf) {
22
- static const uint8_t repl[] = {239, 191, 189};
23
- cmark_strbuf_put(buf, repl, 3);
24
- }
25
-
26
- static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
27
- int length, i;
28
-
29
- if (!str_len)
30
- return 0;
31
-
32
- length = utf8proc_utf8class[str[0]];
33
-
34
- if (!length)
35
- return -1;
36
-
37
- if (str_len >= 0 && (bufsize_t)length > str_len)
38
- return -str_len;
39
-
40
- for (i = 1; i < length; i++) {
41
- if ((str[i] & 0xC0) != 0x80)
42
- return -i;
43
- }
44
-
45
- return length;
46
- }
47
-
48
- // Validate a single UTF-8 character according to RFC 3629.
49
- static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
50
- int length = utf8proc_utf8class[str[0]];
51
-
52
- if (!length)
53
- return -1;
54
-
55
- if ((bufsize_t)length > str_len)
56
- return -str_len;
57
-
58
- switch (length) {
59
- case 2:
60
- if ((str[1] & 0xC0) != 0x80)
61
- return -1;
62
- if (str[0] < 0xC2) {
63
- // Overlong
64
- return -length;
65
- }
66
- break;
67
-
68
- case 3:
69
- if ((str[1] & 0xC0) != 0x80)
70
- return -1;
71
- if ((str[2] & 0xC0) != 0x80)
72
- return -2;
73
- if (str[0] == 0xE0) {
74
- if (str[1] < 0xA0) {
75
- // Overlong
76
- return -length;
77
- }
78
- } else if (str[0] == 0xED) {
79
- if (str[1] >= 0xA0) {
80
- // Surrogate
81
- return -length;
82
- }
83
- }
84
- break;
85
-
86
- case 4:
87
- if ((str[1] & 0xC0) != 0x80)
88
- return -1;
89
- if ((str[2] & 0xC0) != 0x80)
90
- return -2;
91
- if ((str[3] & 0xC0) != 0x80)
92
- return -3;
93
- if (str[0] == 0xF0) {
94
- if (str[1] < 0x90) {
95
- // Overlong
96
- return -length;
97
- }
98
- } else if (str[0] >= 0xF4) {
99
- if (str[0] > 0xF4 || str[1] >= 0x90) {
100
- // Above 0x10FFFF
101
- return -length;
102
- }
103
- }
104
- break;
105
- }
106
-
107
- return length;
108
- }
109
-
110
- void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
111
- bufsize_t size) {
112
- bufsize_t i = 0;
113
-
114
- while (i < size) {
115
- bufsize_t org = i;
116
- int charlen = 0;
117
-
118
- while (i < size) {
119
- if (line[i] < 0x80 && line[i] != 0) {
120
- i++;
121
- } else if (line[i] >= 0x80) {
122
- charlen = utf8proc_valid(line + i, size - i);
123
- if (charlen < 0) {
124
- charlen = -charlen;
125
- break;
126
- }
127
- i += charlen;
128
- } else if (line[i] == 0) {
129
- // ASCII NUL is technically valid but rejected
130
- // for security reasons.
131
- charlen = 1;
132
- break;
133
- }
134
- }
135
-
136
- if (i > org) {
137
- cmark_strbuf_put(ob, line + org, i - org);
138
- }
139
-
140
- if (i >= size) {
141
- break;
142
- } else {
143
- // Invalid UTF-8
144
- encode_unknown(ob);
145
- i += charlen;
146
- }
147
- }
148
- }
149
-
150
- int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
151
- int32_t *dst) {
152
- int length;
153
- int32_t uc = -1;
154
-
155
- *dst = -1;
156
- length = utf8proc_charlen(str, str_len);
157
- if (length < 0)
158
- return -1;
159
-
160
- switch (length) {
161
- case 1:
162
- uc = str[0];
163
- break;
164
- case 2:
165
- uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
166
- if (uc < 0x80)
167
- uc = -1;
168
- break;
169
- case 3:
170
- uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
171
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
172
- uc = -1;
173
- break;
174
- case 4:
175
- uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
176
- ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
177
- if (uc < 0x10000 || uc >= 0x110000)
178
- uc = -1;
179
- break;
180
- }
181
-
182
- if (uc < 0)
183
- return -1;
184
-
185
- *dst = uc;
186
- return length;
187
- }
188
-
189
- void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
190
- uint8_t dst[4];
191
- bufsize_t len = 0;
192
-
193
- assert(uc >= 0);
194
-
195
- if (uc < 0x80) {
196
- dst[0] = (uint8_t)(uc);
197
- len = 1;
198
- } else if (uc < 0x800) {
199
- dst[0] = (uint8_t)(0xC0 + (uc >> 6));
200
- dst[1] = 0x80 + (uc & 0x3F);
201
- len = 2;
202
- } else if (uc == 0xFFFF) {
203
- dst[0] = 0xFF;
204
- len = 1;
205
- } else if (uc == 0xFFFE) {
206
- dst[0] = 0xFE;
207
- len = 1;
208
- } else if (uc < 0x10000) {
209
- dst[0] = (uint8_t)(0xE0 + (uc >> 12));
210
- dst[1] = 0x80 + ((uc >> 6) & 0x3F);
211
- dst[2] = 0x80 + (uc & 0x3F);
212
- len = 3;
213
- } else if (uc < 0x110000) {
214
- dst[0] = (uint8_t)(0xF0 + (uc >> 18));
215
- dst[1] = 0x80 + ((uc >> 12) & 0x3F);
216
- dst[2] = 0x80 + ((uc >> 6) & 0x3F);
217
- dst[3] = 0x80 + (uc & 0x3F);
218
- len = 4;
219
- } else {
220
- encode_unknown(buf);
221
- return;
222
- }
223
-
224
- cmark_strbuf_put(buf, dst, len);
225
- }
226
-
227
- void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
228
- bufsize_t len) {
229
- int32_t c;
230
-
231
- #define bufpush(x) cmark_utf8proc_encode_char(x, dest)
232
-
233
- while (len > 0) {
234
- bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
235
-
236
- if (char_len >= 0) {
237
- #include "case_fold_switch.inc"
238
- } else {
239
- encode_unknown(dest);
240
- char_len = -char_len;
241
- }
242
-
243
- str += char_len;
244
- len -= char_len;
245
- }
246
- }
247
-
248
- // matches anything in the Zs class, plus LF, CR, TAB, FF.
249
- int cmark_utf8proc_is_space(int32_t uc) {
250
- return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
251
- uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
252
- uc == 8287 || uc == 12288);
253
- }
254
-
255
- // matches anything in the P[cdefios] classes.
256
- int cmark_utf8proc_is_punctuation(int32_t uc) {
257
- return (
258
- (uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
259
- uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
260
- uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
261
- uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
262
- uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
263
- uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
264
- (uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
265
- (uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
266
- uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
267
- uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
268
- uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
269
- (uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
270
- (uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
271
- uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
272
- (uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
273
- (uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
274
- (uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
275
- uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
276
- (uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
277
- (uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
278
- (uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
279
- (uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
280
- (uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
281
- uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
282
- (uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
283
- (uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
284
- (uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
285
- (uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
286
- uc == 11632 || (uc >= 11776 && uc <= 11822) ||
287
- (uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
288
- (uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
289
- uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
290
- uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
291
- uc == 42622 || (uc >= 42738 && uc <= 42743) ||
292
- (uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
293
- (uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
294
- uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
295
- uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
296
- uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
297
- uc == 64831 || (uc >= 65040 && uc <= 65049) ||
298
- (uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
299
- uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
300
- (uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
301
- (uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
302
- uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
303
- uc == 65343 || uc == 65371 || uc == 65373 ||
304
- (uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
305
- uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
306
- uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
307
- (uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
308
- (uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
309
- uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
310
- (uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
311
- (uc >= 70085 && uc <= 70088) || uc == 70093 ||
312
- (uc >= 70200 && uc <= 70205) || uc == 70854 ||
313
- (uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
314
- (uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
315
- uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
316
- uc == 113823);
317
- }
@@ -1,35 +0,0 @@
1
- #ifndef CMARK_UTF8_H
2
- #define CMARK_UTF8_H
3
-
4
- #include <stdint.h>
5
- #include "buffer.h"
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- CMARK_GFM_EXPORT
12
- void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
13
- bufsize_t len);
14
-
15
- CMARK_GFM_EXPORT
16
- void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
17
-
18
- CMARK_GFM_EXPORT
19
- int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
20
-
21
- CMARK_GFM_EXPORT
22
- void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
23
- bufsize_t size);
24
-
25
- CMARK_GFM_EXPORT
26
- int cmark_utf8proc_is_space(int32_t uc);
27
-
28
- CMARK_GFM_EXPORT
29
- int cmark_utf8proc_is_punctuation(int32_t uc);
30
-
31
- #ifdef __cplusplus
32
- }
33
- #endif
34
-
35
- #endif