commonmarker 0.23.10 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +1164 -0
  3. data/Cargo.toml +12 -0
  4. data/README.md +252 -176
  5. data/ext/commonmarker/Cargo.toml +21 -0
  6. data/ext/commonmarker/build.rs +9 -0
  7. data/ext/commonmarker/extconf.rb +3 -6
  8. data/ext/commonmarker/src/lib.rs +168 -0
  9. data/ext/commonmarker/src/node.rs +1263 -0
  10. data/ext/commonmarker/src/options.rs +241 -0
  11. data/ext/commonmarker/src/plugins/syntax_highlighting.rs +171 -0
  12. data/ext/commonmarker/src/plugins.rs +6 -0
  13. data/ext/commonmarker/src/utils.rs +5 -0
  14. data/lib/commonmarker/config.rb +103 -40
  15. data/lib/commonmarker/constants.rb +7 -0
  16. data/lib/commonmarker/extension.rb +14 -0
  17. data/lib/commonmarker/node/ast.rb +8 -0
  18. data/lib/commonmarker/node/inspect.rb +14 -4
  19. data/lib/commonmarker/node.rb +29 -47
  20. data/lib/commonmarker/renderer.rb +1 -127
  21. data/lib/commonmarker/utils.rb +22 -0
  22. data/lib/commonmarker/version.rb +2 -2
  23. data/lib/commonmarker.rb +27 -25
  24. metadata +38 -187
  25. data/Rakefile +0 -109
  26. data/bin/commonmarker +0 -118
  27. data/commonmarker.gemspec +0 -38
  28. data/ext/commonmarker/arena.c +0 -104
  29. data/ext/commonmarker/autolink.c +0 -508
  30. data/ext/commonmarker/autolink.h +0 -8
  31. data/ext/commonmarker/blocks.c +0 -1622
  32. data/ext/commonmarker/buffer.c +0 -278
  33. data/ext/commonmarker/buffer.h +0 -116
  34. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  35. data/ext/commonmarker/chunk.h +0 -135
  36. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  37. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
  38. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  39. data/ext/commonmarker/cmark-gfm.h +0 -833
  40. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  41. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  42. data/ext/commonmarker/cmark.c +0 -55
  43. data/ext/commonmarker/cmark_ctype.c +0 -44
  44. data/ext/commonmarker/cmark_ctype.h +0 -33
  45. data/ext/commonmarker/commonmark.c +0 -514
  46. data/ext/commonmarker/commonmarker.c +0 -1308
  47. data/ext/commonmarker/commonmarker.h +0 -16
  48. data/ext/commonmarker/config.h +0 -76
  49. data/ext/commonmarker/core-extensions.c +0 -27
  50. data/ext/commonmarker/entities.inc +0 -2138
  51. data/ext/commonmarker/ext_scanners.c +0 -879
  52. data/ext/commonmarker/ext_scanners.h +0 -24
  53. data/ext/commonmarker/footnotes.c +0 -63
  54. data/ext/commonmarker/footnotes.h +0 -27
  55. data/ext/commonmarker/houdini.h +0 -57
  56. data/ext/commonmarker/houdini_href_e.c +0 -100
  57. data/ext/commonmarker/houdini_html_e.c +0 -66
  58. data/ext/commonmarker/houdini_html_u.c +0 -149
  59. data/ext/commonmarker/html.c +0 -502
  60. data/ext/commonmarker/html.h +0 -27
  61. data/ext/commonmarker/inlines.c +0 -1788
  62. data/ext/commonmarker/inlines.h +0 -29
  63. data/ext/commonmarker/iterator.c +0 -159
  64. data/ext/commonmarker/iterator.h +0 -26
  65. data/ext/commonmarker/latex.c +0 -468
  66. data/ext/commonmarker/linked_list.c +0 -37
  67. data/ext/commonmarker/man.c +0 -274
  68. data/ext/commonmarker/map.c +0 -129
  69. data/ext/commonmarker/map.h +0 -44
  70. data/ext/commonmarker/node.c +0 -1045
  71. data/ext/commonmarker/node.h +0 -167
  72. data/ext/commonmarker/parser.h +0 -59
  73. data/ext/commonmarker/plaintext.c +0 -218
  74. data/ext/commonmarker/plugin.c +0 -36
  75. data/ext/commonmarker/plugin.h +0 -34
  76. data/ext/commonmarker/references.c +0 -43
  77. data/ext/commonmarker/references.h +0 -26
  78. data/ext/commonmarker/registry.c +0 -63
  79. data/ext/commonmarker/registry.h +0 -24
  80. data/ext/commonmarker/render.c +0 -213
  81. data/ext/commonmarker/render.h +0 -62
  82. data/ext/commonmarker/scanners.c +0 -14056
  83. data/ext/commonmarker/scanners.h +0 -70
  84. data/ext/commonmarker/scanners.re +0 -341
  85. data/ext/commonmarker/strikethrough.c +0 -167
  86. data/ext/commonmarker/strikethrough.h +0 -9
  87. data/ext/commonmarker/syntax_extension.c +0 -149
  88. data/ext/commonmarker/syntax_extension.h +0 -34
  89. data/ext/commonmarker/table.c +0 -917
  90. data/ext/commonmarker/table.h +0 -12
  91. data/ext/commonmarker/tagfilter.c +0 -60
  92. data/ext/commonmarker/tagfilter.h +0 -8
  93. data/ext/commonmarker/tasklist.c +0 -156
  94. data/ext/commonmarker/tasklist.h +0 -8
  95. data/ext/commonmarker/utf8.c +0 -317
  96. data/ext/commonmarker/utf8.h +0 -35
  97. data/ext/commonmarker/xml.c +0 -182
  98. data/lib/commonmarker/renderer/html_renderer.rb +0 -256
@@ -1,12 +0,0 @@
1
- #ifndef CMARK_GFM_TABLE_H
2
- #define CMARK_GFM_TABLE_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
-
7
- extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
8
- CMARK_NODE_TABLE_CELL;
9
-
10
- cmark_syntax_extension *create_table_extension(void);
11
-
12
- #endif
@@ -1,60 +0,0 @@
1
- #include "tagfilter.h"
2
- #include <parser.h>
3
- #include <ctype.h>
4
-
5
- static const char *blacklist[] = {
6
- "title", "textarea", "style", "xmp", "iframe",
7
- "noembed", "noframes", "script", "plaintext", NULL,
8
- };
9
-
10
- static int is_tag(const unsigned char *tag_data, size_t tag_size,
11
- const char *tagname) {
12
- size_t i;
13
-
14
- if (tag_size < 3 || tag_data[0] != '<')
15
- return 0;
16
-
17
- i = 1;
18
-
19
- if (tag_data[i] == '/') {
20
- i++;
21
- }
22
-
23
- for (; i < tag_size; ++i, ++tagname) {
24
- if (*tagname == 0)
25
- break;
26
-
27
- if (tolower(tag_data[i]) != *tagname)
28
- return 0;
29
- }
30
-
31
- if (i == tag_size)
32
- return 0;
33
-
34
- if (cmark_isspace(tag_data[i]) || tag_data[i] == '>')
35
- return 1;
36
-
37
- if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>')
38
- return 1;
39
-
40
- return 0;
41
- }
42
-
43
- static int filter(cmark_syntax_extension *ext, const unsigned char *tag,
44
- size_t tag_len) {
45
- const char **it;
46
-
47
- for (it = blacklist; *it; ++it) {
48
- if (is_tag(tag, tag_len, *it)) {
49
- return 0;
50
- }
51
- }
52
-
53
- return 1;
54
- }
55
-
56
- cmark_syntax_extension *create_tagfilter_extension(void) {
57
- cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter");
58
- cmark_syntax_extension_set_html_filter_func(ext, filter);
59
- return ext;
60
- }
@@ -1,8 +0,0 @@
1
- #ifndef CMARK_GFM_TAGFILTER_H
2
- #define CMARK_GFM_TAGFILTER_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
- cmark_syntax_extension *create_tagfilter_extension(void);
7
-
8
- #endif
@@ -1,156 +0,0 @@
1
- #include "tasklist.h"
2
- #include <parser.h>
3
- #include <render.h>
4
- #include <html.h>
5
- #include "ext_scanners.h"
6
-
7
- typedef enum {
8
- CMARK_TASKLIST_NOCHECKED,
9
- CMARK_TASKLIST_CHECKED,
10
- } cmark_tasklist_type;
11
-
12
- // Local constants
13
- static const char *TYPE_STRING = "tasklist";
14
-
15
- static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) {
16
- return TYPE_STRING;
17
- }
18
-
19
-
20
- // Return 1 if state was set, 0 otherwise
21
- int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) {
22
- // The node has to exist, and be an extension, and actually be the right type in order to get the value.
23
- if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
24
- return 0;
25
-
26
- node->as.list.checked = is_checked;
27
- return 1;
28
- }
29
-
30
- bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) {
31
- if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
32
- return false;
33
-
34
- if (node->as.list.checked) {
35
- return true;
36
- }
37
- else {
38
- return false;
39
- }
40
- }
41
-
42
- static bool parse_node_item_prefix(cmark_parser *parser, const char *input,
43
- cmark_node *container) {
44
- bool res = false;
45
-
46
- if (parser->indent >=
47
- container->as.list.marker_offset + container->as.list.padding) {
48
- cmark_parser_advance_offset(parser, input, container->as.list.marker_offset +
49
- container->as.list.padding,
50
- true);
51
- res = true;
52
- } else if (parser->blank && container->first_child != NULL) {
53
- // if container->first_child is NULL, then the opening line
54
- // of the list item was blank after the list marker; in this
55
- // case, we are done with the list item.
56
- cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset,
57
- false);
58
- res = true;
59
- }
60
- return res;
61
- }
62
-
63
- static int matches(cmark_syntax_extension *self, cmark_parser *parser,
64
- unsigned char *input, int len,
65
- cmark_node *parent_container) {
66
- return parse_node_item_prefix(parser, (const char*)input, parent_container);
67
- }
68
-
69
- static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
70
- cmark_node_type child_type) {
71
- return (node->type == CMARK_NODE_ITEM) ? 1 : 0;
72
- }
73
-
74
- static cmark_node *open_tasklist_item(cmark_syntax_extension *self,
75
- int indented, cmark_parser *parser,
76
- cmark_node *parent_container,
77
- unsigned char *input, int len) {
78
- cmark_node_type node_type = cmark_node_get_type(parent_container);
79
- if (node_type != CMARK_NODE_ITEM) {
80
- return NULL;
81
- }
82
-
83
- bufsize_t matched = scan_tasklist(input, len, 0);
84
- if (!matched) {
85
- return NULL;
86
- }
87
-
88
- cmark_node_set_syntax_extension(parent_container, self);
89
- cmark_parser_advance_offset(parser, (char *)input, 3, false);
90
-
91
- // Either an upper or lower case X means the task is completed.
92
- parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]"));
93
-
94
- return NULL;
95
- }
96
-
97
- static void commonmark_render(cmark_syntax_extension *extension,
98
- cmark_renderer *renderer, cmark_node *node,
99
- cmark_event_type ev_type, int options) {
100
- bool entering = (ev_type == CMARK_EVENT_ENTER);
101
- if (entering) {
102
- renderer->cr(renderer);
103
- if (node->as.list.checked) {
104
- renderer->out(renderer, node, "- [x] ", false, LITERAL);
105
- } else {
106
- renderer->out(renderer, node, "- [ ] ", false, LITERAL);
107
- }
108
- cmark_strbuf_puts(renderer->prefix, " ");
109
- } else {
110
- cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
111
- renderer->cr(renderer);
112
- }
113
- }
114
-
115
- static void html_render(cmark_syntax_extension *extension,
116
- cmark_html_renderer *renderer, cmark_node *node,
117
- cmark_event_type ev_type, int options) {
118
- bool entering = (ev_type == CMARK_EVENT_ENTER);
119
- if (entering) {
120
- cmark_html_render_cr(renderer->html);
121
- cmark_strbuf_puts(renderer->html, "<li");
122
- cmark_html_render_sourcepos(node, renderer->html, options);
123
- cmark_strbuf_putc(renderer->html, '>');
124
- if (node->as.list.checked) {
125
- cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
126
- } else {
127
- cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> ");
128
- }
129
- } else {
130
- cmark_strbuf_puts(renderer->html, "</li>\n");
131
- }
132
- }
133
-
134
- static const char *xml_attr(cmark_syntax_extension *extension,
135
- cmark_node *node) {
136
- if (node->as.list.checked) {
137
- return " completed=\"true\"";
138
- } else {
139
- return " completed=\"false\"";
140
- }
141
- }
142
-
143
- cmark_syntax_extension *create_tasklist_extension(void) {
144
- cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist");
145
-
146
- cmark_syntax_extension_set_match_block_func(ext, matches);
147
- cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
148
- cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item);
149
- cmark_syntax_extension_set_can_contain_func(ext, can_contain);
150
- cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
151
- cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render);
152
- cmark_syntax_extension_set_html_render_func(ext, html_render);
153
- cmark_syntax_extension_set_xml_attr_func(ext, xml_attr);
154
-
155
- return ext;
156
- }
@@ -1,8 +0,0 @@
1
- #ifndef TASKLIST_H
2
- #define TASKLIST_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
- cmark_syntax_extension *create_tasklist_extension(void);
7
-
8
- #endif
@@ -1,317 +0,0 @@
1
- #include <stdlib.h>
2
- #include <stdint.h>
3
- #include <assert.h>
4
-
5
- #include "cmark_ctype.h"
6
- #include "utf8.h"
7
-
8
- static const int8_t utf8proc_utf8class[256] = {
9
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14
- 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
18
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
19
- 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
20
-
21
- static void encode_unknown(cmark_strbuf *buf) {
22
- static const uint8_t repl[] = {239, 191, 189};
23
- cmark_strbuf_put(buf, repl, 3);
24
- }
25
-
26
- static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
27
- int length, i;
28
-
29
- if (!str_len)
30
- return 0;
31
-
32
- length = utf8proc_utf8class[str[0]];
33
-
34
- if (!length)
35
- return -1;
36
-
37
- if (str_len >= 0 && (bufsize_t)length > str_len)
38
- return -str_len;
39
-
40
- for (i = 1; i < length; i++) {
41
- if ((str[i] & 0xC0) != 0x80)
42
- return -i;
43
- }
44
-
45
- return length;
46
- }
47
-
48
- // Validate a single UTF-8 character according to RFC 3629.
49
- static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
50
- int length = utf8proc_utf8class[str[0]];
51
-
52
- if (!length)
53
- return -1;
54
-
55
- if ((bufsize_t)length > str_len)
56
- return -str_len;
57
-
58
- switch (length) {
59
- case 2:
60
- if ((str[1] & 0xC0) != 0x80)
61
- return -1;
62
- if (str[0] < 0xC2) {
63
- // Overlong
64
- return -length;
65
- }
66
- break;
67
-
68
- case 3:
69
- if ((str[1] & 0xC0) != 0x80)
70
- return -1;
71
- if ((str[2] & 0xC0) != 0x80)
72
- return -2;
73
- if (str[0] == 0xE0) {
74
- if (str[1] < 0xA0) {
75
- // Overlong
76
- return -length;
77
- }
78
- } else if (str[0] == 0xED) {
79
- if (str[1] >= 0xA0) {
80
- // Surrogate
81
- return -length;
82
- }
83
- }
84
- break;
85
-
86
- case 4:
87
- if ((str[1] & 0xC0) != 0x80)
88
- return -1;
89
- if ((str[2] & 0xC0) != 0x80)
90
- return -2;
91
- if ((str[3] & 0xC0) != 0x80)
92
- return -3;
93
- if (str[0] == 0xF0) {
94
- if (str[1] < 0x90) {
95
- // Overlong
96
- return -length;
97
- }
98
- } else if (str[0] >= 0xF4) {
99
- if (str[0] > 0xF4 || str[1] >= 0x90) {
100
- // Above 0x10FFFF
101
- return -length;
102
- }
103
- }
104
- break;
105
- }
106
-
107
- return length;
108
- }
109
-
110
- void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
111
- bufsize_t size) {
112
- bufsize_t i = 0;
113
-
114
- while (i < size) {
115
- bufsize_t org = i;
116
- int charlen = 0;
117
-
118
- while (i < size) {
119
- if (line[i] < 0x80 && line[i] != 0) {
120
- i++;
121
- } else if (line[i] >= 0x80) {
122
- charlen = utf8proc_valid(line + i, size - i);
123
- if (charlen < 0) {
124
- charlen = -charlen;
125
- break;
126
- }
127
- i += charlen;
128
- } else if (line[i] == 0) {
129
- // ASCII NUL is technically valid but rejected
130
- // for security reasons.
131
- charlen = 1;
132
- break;
133
- }
134
- }
135
-
136
- if (i > org) {
137
- cmark_strbuf_put(ob, line + org, i - org);
138
- }
139
-
140
- if (i >= size) {
141
- break;
142
- } else {
143
- // Invalid UTF-8
144
- encode_unknown(ob);
145
- i += charlen;
146
- }
147
- }
148
- }
149
-
150
- int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
151
- int32_t *dst) {
152
- int length;
153
- int32_t uc = -1;
154
-
155
- *dst = -1;
156
- length = utf8proc_charlen(str, str_len);
157
- if (length < 0)
158
- return -1;
159
-
160
- switch (length) {
161
- case 1:
162
- uc = str[0];
163
- break;
164
- case 2:
165
- uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
166
- if (uc < 0x80)
167
- uc = -1;
168
- break;
169
- case 3:
170
- uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
171
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
172
- uc = -1;
173
- break;
174
- case 4:
175
- uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
176
- ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
177
- if (uc < 0x10000 || uc >= 0x110000)
178
- uc = -1;
179
- break;
180
- }
181
-
182
- if (uc < 0)
183
- return -1;
184
-
185
- *dst = uc;
186
- return length;
187
- }
188
-
189
- void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
190
- uint8_t dst[4];
191
- bufsize_t len = 0;
192
-
193
- assert(uc >= 0);
194
-
195
- if (uc < 0x80) {
196
- dst[0] = (uint8_t)(uc);
197
- len = 1;
198
- } else if (uc < 0x800) {
199
- dst[0] = (uint8_t)(0xC0 + (uc >> 6));
200
- dst[1] = 0x80 + (uc & 0x3F);
201
- len = 2;
202
- } else if (uc == 0xFFFF) {
203
- dst[0] = 0xFF;
204
- len = 1;
205
- } else if (uc == 0xFFFE) {
206
- dst[0] = 0xFE;
207
- len = 1;
208
- } else if (uc < 0x10000) {
209
- dst[0] = (uint8_t)(0xE0 + (uc >> 12));
210
- dst[1] = 0x80 + ((uc >> 6) & 0x3F);
211
- dst[2] = 0x80 + (uc & 0x3F);
212
- len = 3;
213
- } else if (uc < 0x110000) {
214
- dst[0] = (uint8_t)(0xF0 + (uc >> 18));
215
- dst[1] = 0x80 + ((uc >> 12) & 0x3F);
216
- dst[2] = 0x80 + ((uc >> 6) & 0x3F);
217
- dst[3] = 0x80 + (uc & 0x3F);
218
- len = 4;
219
- } else {
220
- encode_unknown(buf);
221
- return;
222
- }
223
-
224
- cmark_strbuf_put(buf, dst, len);
225
- }
226
-
227
- void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
228
- bufsize_t len) {
229
- int32_t c;
230
-
231
- #define bufpush(x) cmark_utf8proc_encode_char(x, dest)
232
-
233
- while (len > 0) {
234
- bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
235
-
236
- if (char_len >= 0) {
237
- #include "case_fold_switch.inc"
238
- } else {
239
- encode_unknown(dest);
240
- char_len = -char_len;
241
- }
242
-
243
- str += char_len;
244
- len -= char_len;
245
- }
246
- }
247
-
248
- // matches anything in the Zs class, plus LF, CR, TAB, FF.
249
- int cmark_utf8proc_is_space(int32_t uc) {
250
- return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
251
- uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
252
- uc == 8287 || uc == 12288);
253
- }
254
-
255
- // matches anything in the P[cdefios] classes.
256
- int cmark_utf8proc_is_punctuation(int32_t uc) {
257
- return (
258
- (uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
259
- uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
260
- uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
261
- uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
262
- uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
263
- uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
264
- (uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
265
- (uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
266
- uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
267
- uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
268
- uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
269
- (uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
270
- (uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
271
- uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
272
- (uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
273
- (uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
274
- (uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
275
- uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
276
- (uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
277
- (uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
278
- (uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
279
- (uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
280
- (uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
281
- uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
282
- (uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
283
- (uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
284
- (uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
285
- (uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
286
- uc == 11632 || (uc >= 11776 && uc <= 11822) ||
287
- (uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
288
- (uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
289
- uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
290
- uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
291
- uc == 42622 || (uc >= 42738 && uc <= 42743) ||
292
- (uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
293
- (uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
294
- uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
295
- uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
296
- uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
297
- uc == 64831 || (uc >= 65040 && uc <= 65049) ||
298
- (uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
299
- uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
300
- (uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
301
- (uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
302
- uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
303
- uc == 65343 || uc == 65371 || uc == 65373 ||
304
- (uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
305
- uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
306
- uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
307
- (uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
308
- (uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
309
- uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
310
- (uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
311
- (uc >= 70085 && uc <= 70088) || uc == 70093 ||
312
- (uc >= 70200 && uc <= 70205) || uc == 70854 ||
313
- (uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
314
- (uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
315
- uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
316
- uc == 113823);
317
- }
@@ -1,35 +0,0 @@
1
- #ifndef CMARK_UTF8_H
2
- #define CMARK_UTF8_H
3
-
4
- #include <stdint.h>
5
- #include "buffer.h"
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- CMARK_GFM_EXPORT
12
- void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
13
- bufsize_t len);
14
-
15
- CMARK_GFM_EXPORT
16
- void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
17
-
18
- CMARK_GFM_EXPORT
19
- int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
20
-
21
- CMARK_GFM_EXPORT
22
- void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
23
- bufsize_t size);
24
-
25
- CMARK_GFM_EXPORT
26
- int cmark_utf8proc_is_space(int32_t uc);
27
-
28
- CMARK_GFM_EXPORT
29
- int cmark_utf8proc_is_punctuation(int32_t uc);
30
-
31
- #ifdef __cplusplus
32
- }
33
- #endif
34
-
35
- #endif