commonmarker 0.23.6 → 1.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +70 -212
  3. data/commonmarker.gemspec +34 -31
  4. data/ext/commonmarker/Cargo.toml +12 -0
  5. data/ext/commonmarker/_util.rb +102 -0
  6. data/ext/commonmarker/extconf.rb +4 -5
  7. data/ext/commonmarker/src/comrak_options.rs +107 -0
  8. data/ext/commonmarker/src/lib.rs +27 -0
  9. data/lib/commonmarker/config.rb +58 -37
  10. data/lib/commonmarker/extension.rb +14 -0
  11. data/lib/commonmarker/renderer.rb +1 -127
  12. data/lib/commonmarker/version.rb +2 -2
  13. data/lib/commonmarker.rb +19 -32
  14. metadata +33 -177
  15. data/Rakefile +0 -109
  16. data/bin/commonmarker +0 -118
  17. data/ext/commonmarker/arena.c +0 -103
  18. data/ext/commonmarker/autolink.c +0 -456
  19. data/ext/commonmarker/autolink.h +0 -8
  20. data/ext/commonmarker/blocks.c +0 -1596
  21. data/ext/commonmarker/buffer.c +0 -278
  22. data/ext/commonmarker/buffer.h +0 -116
  23. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  24. data/ext/commonmarker/chunk.h +0 -135
  25. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  26. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -736
  27. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  28. data/ext/commonmarker/cmark-gfm.h +0 -817
  29. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  30. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  31. data/ext/commonmarker/cmark.c +0 -55
  32. data/ext/commonmarker/cmark_ctype.c +0 -44
  33. data/ext/commonmarker/cmark_ctype.h +0 -33
  34. data/ext/commonmarker/commonmark.c +0 -529
  35. data/ext/commonmarker/commonmarker.c +0 -1307
  36. data/ext/commonmarker/commonmarker.h +0 -16
  37. data/ext/commonmarker/config.h +0 -76
  38. data/ext/commonmarker/core-extensions.c +0 -27
  39. data/ext/commonmarker/entities.inc +0 -2138
  40. data/ext/commonmarker/ext_scanners.c +0 -879
  41. data/ext/commonmarker/ext_scanners.h +0 -24
  42. data/ext/commonmarker/footnotes.c +0 -63
  43. data/ext/commonmarker/footnotes.h +0 -27
  44. data/ext/commonmarker/houdini.h +0 -57
  45. data/ext/commonmarker/houdini_href_e.c +0 -100
  46. data/ext/commonmarker/houdini_html_e.c +0 -66
  47. data/ext/commonmarker/houdini_html_u.c +0 -149
  48. data/ext/commonmarker/html.c +0 -486
  49. data/ext/commonmarker/html.h +0 -27
  50. data/ext/commonmarker/inlines.c +0 -1716
  51. data/ext/commonmarker/inlines.h +0 -29
  52. data/ext/commonmarker/iterator.c +0 -159
  53. data/ext/commonmarker/iterator.h +0 -26
  54. data/ext/commonmarker/latex.c +0 -466
  55. data/ext/commonmarker/linked_list.c +0 -37
  56. data/ext/commonmarker/man.c +0 -278
  57. data/ext/commonmarker/map.c +0 -122
  58. data/ext/commonmarker/map.h +0 -41
  59. data/ext/commonmarker/node.c +0 -979
  60. data/ext/commonmarker/node.h +0 -125
  61. data/ext/commonmarker/parser.h +0 -58
  62. data/ext/commonmarker/plaintext.c +0 -235
  63. data/ext/commonmarker/plugin.c +0 -36
  64. data/ext/commonmarker/plugin.h +0 -34
  65. data/ext/commonmarker/references.c +0 -42
  66. data/ext/commonmarker/references.h +0 -26
  67. data/ext/commonmarker/registry.c +0 -63
  68. data/ext/commonmarker/registry.h +0 -24
  69. data/ext/commonmarker/render.c +0 -205
  70. data/ext/commonmarker/render.h +0 -62
  71. data/ext/commonmarker/scanners.c +0 -10508
  72. data/ext/commonmarker/scanners.h +0 -62
  73. data/ext/commonmarker/scanners.re +0 -341
  74. data/ext/commonmarker/strikethrough.c +0 -167
  75. data/ext/commonmarker/strikethrough.h +0 -9
  76. data/ext/commonmarker/syntax_extension.c +0 -149
  77. data/ext/commonmarker/syntax_extension.h +0 -34
  78. data/ext/commonmarker/table.c +0 -848
  79. data/ext/commonmarker/table.h +0 -12
  80. data/ext/commonmarker/tagfilter.c +0 -60
  81. data/ext/commonmarker/tagfilter.h +0 -8
  82. data/ext/commonmarker/tasklist.c +0 -156
  83. data/ext/commonmarker/tasklist.h +0 -8
  84. data/ext/commonmarker/utf8.c +0 -317
  85. data/ext/commonmarker/utf8.h +0 -35
  86. data/ext/commonmarker/xml.c +0 -181
  87. data/lib/commonmarker/node/inspect.rb +0 -47
  88. data/lib/commonmarker/node.rb +0 -83
  89. data/lib/commonmarker/renderer/html_renderer.rb +0 -252
@@ -1,12 +0,0 @@
1
- #ifndef CMARK_GFM_TABLE_H
2
- #define CMARK_GFM_TABLE_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
-
7
- extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
8
- CMARK_NODE_TABLE_CELL;
9
-
10
- cmark_syntax_extension *create_table_extension(void);
11
-
12
- #endif
@@ -1,60 +0,0 @@
1
- #include "tagfilter.h"
2
- #include <parser.h>
3
- #include <ctype.h>
4
-
5
- static const char *blacklist[] = {
6
- "title", "textarea", "style", "xmp", "iframe",
7
- "noembed", "noframes", "script", "plaintext", NULL,
8
- };
9
-
10
- static int is_tag(const unsigned char *tag_data, size_t tag_size,
11
- const char *tagname) {
12
- size_t i;
13
-
14
- if (tag_size < 3 || tag_data[0] != '<')
15
- return 0;
16
-
17
- i = 1;
18
-
19
- if (tag_data[i] == '/') {
20
- i++;
21
- }
22
-
23
- for (; i < tag_size; ++i, ++tagname) {
24
- if (*tagname == 0)
25
- break;
26
-
27
- if (tolower(tag_data[i]) != *tagname)
28
- return 0;
29
- }
30
-
31
- if (i == tag_size)
32
- return 0;
33
-
34
- if (cmark_isspace(tag_data[i]) || tag_data[i] == '>')
35
- return 1;
36
-
37
- if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>')
38
- return 1;
39
-
40
- return 0;
41
- }
42
-
43
- static int filter(cmark_syntax_extension *ext, const unsigned char *tag,
44
- size_t tag_len) {
45
- const char **it;
46
-
47
- for (it = blacklist; *it; ++it) {
48
- if (is_tag(tag, tag_len, *it)) {
49
- return 0;
50
- }
51
- }
52
-
53
- return 1;
54
- }
55
-
56
- cmark_syntax_extension *create_tagfilter_extension(void) {
57
- cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter");
58
- cmark_syntax_extension_set_html_filter_func(ext, filter);
59
- return ext;
60
- }
@@ -1,8 +0,0 @@
1
- #ifndef CMARK_GFM_TAGFILTER_H
2
- #define CMARK_GFM_TAGFILTER_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
- cmark_syntax_extension *create_tagfilter_extension(void);
7
-
8
- #endif
@@ -1,156 +0,0 @@
1
- #include "tasklist.h"
2
- #include <parser.h>
3
- #include <render.h>
4
- #include <html.h>
5
- #include "ext_scanners.h"
6
-
7
- typedef enum {
8
- CMARK_TASKLIST_NOCHECKED,
9
- CMARK_TASKLIST_CHECKED,
10
- } cmark_tasklist_type;
11
-
12
- // Local constants
13
- static const char *TYPE_STRING = "tasklist";
14
-
15
- static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) {
16
- return TYPE_STRING;
17
- }
18
-
19
-
20
- // Return 1 if state was set, 0 otherwise
21
- int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) {
22
- // The node has to exist, and be an extension, and actually be the right type in order to get the value.
23
- if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
24
- return 0;
25
-
26
- node->as.list.checked = is_checked;
27
- return 1;
28
- }
29
-
30
- bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) {
31
- if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
32
- return false;
33
-
34
- if (node->as.list.checked) {
35
- return true;
36
- }
37
- else {
38
- return false;
39
- }
40
- }
41
-
42
- static bool parse_node_item_prefix(cmark_parser *parser, const char *input,
43
- cmark_node *container) {
44
- bool res = false;
45
-
46
- if (parser->indent >=
47
- container->as.list.marker_offset + container->as.list.padding) {
48
- cmark_parser_advance_offset(parser, input, container->as.list.marker_offset +
49
- container->as.list.padding,
50
- true);
51
- res = true;
52
- } else if (parser->blank && container->first_child != NULL) {
53
- // if container->first_child is NULL, then the opening line
54
- // of the list item was blank after the list marker; in this
55
- // case, we are done with the list item.
56
- cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset,
57
- false);
58
- res = true;
59
- }
60
- return res;
61
- }
62
-
63
- static int matches(cmark_syntax_extension *self, cmark_parser *parser,
64
- unsigned char *input, int len,
65
- cmark_node *parent_container) {
66
- return parse_node_item_prefix(parser, (const char*)input, parent_container);
67
- }
68
-
69
- static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
70
- cmark_node_type child_type) {
71
- return (node->type == CMARK_NODE_ITEM) ? 1 : 0;
72
- }
73
-
74
- static cmark_node *open_tasklist_item(cmark_syntax_extension *self,
75
- int indented, cmark_parser *parser,
76
- cmark_node *parent_container,
77
- unsigned char *input, int len) {
78
- cmark_node_type node_type = cmark_node_get_type(parent_container);
79
- if (node_type != CMARK_NODE_ITEM) {
80
- return NULL;
81
- }
82
-
83
- bufsize_t matched = scan_tasklist(input, len, 0);
84
- if (!matched) {
85
- return NULL;
86
- }
87
-
88
- cmark_node_set_syntax_extension(parent_container, self);
89
- cmark_parser_advance_offset(parser, (char *)input, 3, false);
90
-
91
- // Either an upper or lower case X means the task is completed.
92
- parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]"));
93
-
94
- return NULL;
95
- }
96
-
97
- static void commonmark_render(cmark_syntax_extension *extension,
98
- cmark_renderer *renderer, cmark_node *node,
99
- cmark_event_type ev_type, int options) {
100
- bool entering = (ev_type == CMARK_EVENT_ENTER);
101
- if (entering) {
102
- renderer->cr(renderer);
103
- if (node->as.list.checked) {
104
- renderer->out(renderer, node, "- [x] ", false, LITERAL);
105
- } else {
106
- renderer->out(renderer, node, "- [ ] ", false, LITERAL);
107
- }
108
- cmark_strbuf_puts(renderer->prefix, " ");
109
- } else {
110
- cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
111
- renderer->cr(renderer);
112
- }
113
- }
114
-
115
- static void html_render(cmark_syntax_extension *extension,
116
- cmark_html_renderer *renderer, cmark_node *node,
117
- cmark_event_type ev_type, int options) {
118
- bool entering = (ev_type == CMARK_EVENT_ENTER);
119
- if (entering) {
120
- cmark_html_render_cr(renderer->html);
121
- cmark_strbuf_puts(renderer->html, "<li");
122
- cmark_html_render_sourcepos(node, renderer->html, options);
123
- cmark_strbuf_putc(renderer->html, '>');
124
- if (node->as.list.checked) {
125
- cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
126
- } else {
127
- cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> ");
128
- }
129
- } else {
130
- cmark_strbuf_puts(renderer->html, "</li>\n");
131
- }
132
- }
133
-
134
- static const char *xml_attr(cmark_syntax_extension *extension,
135
- cmark_node *node) {
136
- if (node->as.list.checked) {
137
- return " completed=\"true\"";
138
- } else {
139
- return " completed=\"false\"";
140
- }
141
- }
142
-
143
- cmark_syntax_extension *create_tasklist_extension(void) {
144
- cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist");
145
-
146
- cmark_syntax_extension_set_match_block_func(ext, matches);
147
- cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
148
- cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item);
149
- cmark_syntax_extension_set_can_contain_func(ext, can_contain);
150
- cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
151
- cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render);
152
- cmark_syntax_extension_set_html_render_func(ext, html_render);
153
- cmark_syntax_extension_set_xml_attr_func(ext, xml_attr);
154
-
155
- return ext;
156
- }
@@ -1,8 +0,0 @@
1
- #ifndef TASKLIST_H
2
- #define TASKLIST_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
- cmark_syntax_extension *create_tasklist_extension(void);
7
-
8
- #endif
@@ -1,317 +0,0 @@
1
- #include <stdlib.h>
2
- #include <stdint.h>
3
- #include <assert.h>
4
-
5
- #include "cmark_ctype.h"
6
- #include "utf8.h"
7
-
8
- static const int8_t utf8proc_utf8class[256] = {
9
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14
- 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
18
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
19
- 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
20
-
21
- static void encode_unknown(cmark_strbuf *buf) {
22
- static const uint8_t repl[] = {239, 191, 189};
23
- cmark_strbuf_put(buf, repl, 3);
24
- }
25
-
26
- static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
27
- int length, i;
28
-
29
- if (!str_len)
30
- return 0;
31
-
32
- length = utf8proc_utf8class[str[0]];
33
-
34
- if (!length)
35
- return -1;
36
-
37
- if (str_len >= 0 && (bufsize_t)length > str_len)
38
- return -str_len;
39
-
40
- for (i = 1; i < length; i++) {
41
- if ((str[i] & 0xC0) != 0x80)
42
- return -i;
43
- }
44
-
45
- return length;
46
- }
47
-
48
- // Validate a single UTF-8 character according to RFC 3629.
49
- static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
50
- int length = utf8proc_utf8class[str[0]];
51
-
52
- if (!length)
53
- return -1;
54
-
55
- if ((bufsize_t)length > str_len)
56
- return -str_len;
57
-
58
- switch (length) {
59
- case 2:
60
- if ((str[1] & 0xC0) != 0x80)
61
- return -1;
62
- if (str[0] < 0xC2) {
63
- // Overlong
64
- return -length;
65
- }
66
- break;
67
-
68
- case 3:
69
- if ((str[1] & 0xC0) != 0x80)
70
- return -1;
71
- if ((str[2] & 0xC0) != 0x80)
72
- return -2;
73
- if (str[0] == 0xE0) {
74
- if (str[1] < 0xA0) {
75
- // Overlong
76
- return -length;
77
- }
78
- } else if (str[0] == 0xED) {
79
- if (str[1] >= 0xA0) {
80
- // Surrogate
81
- return -length;
82
- }
83
- }
84
- break;
85
-
86
- case 4:
87
- if ((str[1] & 0xC0) != 0x80)
88
- return -1;
89
- if ((str[2] & 0xC0) != 0x80)
90
- return -2;
91
- if ((str[3] & 0xC0) != 0x80)
92
- return -3;
93
- if (str[0] == 0xF0) {
94
- if (str[1] < 0x90) {
95
- // Overlong
96
- return -length;
97
- }
98
- } else if (str[0] >= 0xF4) {
99
- if (str[0] > 0xF4 || str[1] >= 0x90) {
100
- // Above 0x10FFFF
101
- return -length;
102
- }
103
- }
104
- break;
105
- }
106
-
107
- return length;
108
- }
109
-
110
- void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
111
- bufsize_t size) {
112
- bufsize_t i = 0;
113
-
114
- while (i < size) {
115
- bufsize_t org = i;
116
- int charlen = 0;
117
-
118
- while (i < size) {
119
- if (line[i] < 0x80 && line[i] != 0) {
120
- i++;
121
- } else if (line[i] >= 0x80) {
122
- charlen = utf8proc_valid(line + i, size - i);
123
- if (charlen < 0) {
124
- charlen = -charlen;
125
- break;
126
- }
127
- i += charlen;
128
- } else if (line[i] == 0) {
129
- // ASCII NUL is technically valid but rejected
130
- // for security reasons.
131
- charlen = 1;
132
- break;
133
- }
134
- }
135
-
136
- if (i > org) {
137
- cmark_strbuf_put(ob, line + org, i - org);
138
- }
139
-
140
- if (i >= size) {
141
- break;
142
- } else {
143
- // Invalid UTF-8
144
- encode_unknown(ob);
145
- i += charlen;
146
- }
147
- }
148
- }
149
-
150
- int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
151
- int32_t *dst) {
152
- int length;
153
- int32_t uc = -1;
154
-
155
- *dst = -1;
156
- length = utf8proc_charlen(str, str_len);
157
- if (length < 0)
158
- return -1;
159
-
160
- switch (length) {
161
- case 1:
162
- uc = str[0];
163
- break;
164
- case 2:
165
- uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
166
- if (uc < 0x80)
167
- uc = -1;
168
- break;
169
- case 3:
170
- uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
171
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
172
- uc = -1;
173
- break;
174
- case 4:
175
- uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
176
- ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
177
- if (uc < 0x10000 || uc >= 0x110000)
178
- uc = -1;
179
- break;
180
- }
181
-
182
- if (uc < 0)
183
- return -1;
184
-
185
- *dst = uc;
186
- return length;
187
- }
188
-
189
- void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
190
- uint8_t dst[4];
191
- bufsize_t len = 0;
192
-
193
- assert(uc >= 0);
194
-
195
- if (uc < 0x80) {
196
- dst[0] = (uint8_t)(uc);
197
- len = 1;
198
- } else if (uc < 0x800) {
199
- dst[0] = (uint8_t)(0xC0 + (uc >> 6));
200
- dst[1] = 0x80 + (uc & 0x3F);
201
- len = 2;
202
- } else if (uc == 0xFFFF) {
203
- dst[0] = 0xFF;
204
- len = 1;
205
- } else if (uc == 0xFFFE) {
206
- dst[0] = 0xFE;
207
- len = 1;
208
- } else if (uc < 0x10000) {
209
- dst[0] = (uint8_t)(0xE0 + (uc >> 12));
210
- dst[1] = 0x80 + ((uc >> 6) & 0x3F);
211
- dst[2] = 0x80 + (uc & 0x3F);
212
- len = 3;
213
- } else if (uc < 0x110000) {
214
- dst[0] = (uint8_t)(0xF0 + (uc >> 18));
215
- dst[1] = 0x80 + ((uc >> 12) & 0x3F);
216
- dst[2] = 0x80 + ((uc >> 6) & 0x3F);
217
- dst[3] = 0x80 + (uc & 0x3F);
218
- len = 4;
219
- } else {
220
- encode_unknown(buf);
221
- return;
222
- }
223
-
224
- cmark_strbuf_put(buf, dst, len);
225
- }
226
-
227
- void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
228
- bufsize_t len) {
229
- int32_t c;
230
-
231
- #define bufpush(x) cmark_utf8proc_encode_char(x, dest)
232
-
233
- while (len > 0) {
234
- bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
235
-
236
- if (char_len >= 0) {
237
- #include "case_fold_switch.inc"
238
- } else {
239
- encode_unknown(dest);
240
- char_len = -char_len;
241
- }
242
-
243
- str += char_len;
244
- len -= char_len;
245
- }
246
- }
247
-
248
- // matches anything in the Zs class, plus LF, CR, TAB, FF.
249
- int cmark_utf8proc_is_space(int32_t uc) {
250
- return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
251
- uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
252
- uc == 8287 || uc == 12288);
253
- }
254
-
255
- // matches anything in the P[cdefios] classes.
256
- int cmark_utf8proc_is_punctuation(int32_t uc) {
257
- return (
258
- (uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
259
- uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
260
- uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
261
- uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
262
- uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
263
- uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
264
- (uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
265
- (uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
266
- uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
267
- uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
268
- uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
269
- (uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
270
- (uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
271
- uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
272
- (uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
273
- (uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
274
- (uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
275
- uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
276
- (uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
277
- (uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
278
- (uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
279
- (uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
280
- (uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
281
- uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
282
- (uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
283
- (uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
284
- (uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
285
- (uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
286
- uc == 11632 || (uc >= 11776 && uc <= 11822) ||
287
- (uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
288
- (uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
289
- uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
290
- uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
291
- uc == 42622 || (uc >= 42738 && uc <= 42743) ||
292
- (uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
293
- (uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
294
- uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
295
- uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
296
- uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
297
- uc == 64831 || (uc >= 65040 && uc <= 65049) ||
298
- (uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
299
- uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
300
- (uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
301
- (uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
302
- uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
303
- uc == 65343 || uc == 65371 || uc == 65373 ||
304
- (uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
305
- uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
306
- uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
307
- (uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
308
- (uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
309
- uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
310
- (uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
311
- (uc >= 70085 && uc <= 70088) || uc == 70093 ||
312
- (uc >= 70200 && uc <= 70205) || uc == 70854 ||
313
- (uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
314
- (uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
315
- uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
316
- uc == 113823);
317
- }
@@ -1,35 +0,0 @@
1
- #ifndef CMARK_UTF8_H
2
- #define CMARK_UTF8_H
3
-
4
- #include <stdint.h>
5
- #include "buffer.h"
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- CMARK_GFM_EXPORT
12
- void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
13
- bufsize_t len);
14
-
15
- CMARK_GFM_EXPORT
16
- void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
17
-
18
- CMARK_GFM_EXPORT
19
- int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
20
-
21
- CMARK_GFM_EXPORT
22
- void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
23
- bufsize_t size);
24
-
25
- CMARK_GFM_EXPORT
26
- int cmark_utf8proc_is_space(int32_t uc);
27
-
28
- CMARK_GFM_EXPORT
29
- int cmark_utf8proc_is_punctuation(int32_t uc);
30
-
31
- #ifdef __cplusplus
32
- }
33
- #endif
34
-
35
- #endif