commonmarker 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/ext/commonmarker/cmark/CMakeLists.txt +10 -4
- data/ext/commonmarker/cmark/Makefile +5 -5
- data/ext/commonmarker/cmark/api_test/CMakeLists.txt +1 -1
- data/ext/commonmarker/cmark/api_test/main.c +16 -0
- data/ext/commonmarker/cmark/build/CMakeCache.txt +3 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/2.8.10.1/CMakeSystem.cmake +4 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeError.log +12 -12
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeOutput.log +97 -142
- data/ext/commonmarker/cmark/build/CMakeFiles/Makefile.cmake +0 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +23 -23
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +2 -2
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/buffer.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/houdini_html_u.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/node.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/references.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/utf8.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/xml.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/cmake_install.cmake +3 -3
- data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
- data/ext/commonmarker/cmark/build/src/config.h +6 -6
- data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
- data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
- data/ext/commonmarker/cmark/build/testdir/CTestTestfile.cmake +4 -4
- data/ext/commonmarker/cmark/changelog.txt +46 -0
- data/ext/commonmarker/cmark/man/man3/cmark.3 +21 -20
- data/ext/commonmarker/cmark/src/CMakeLists.txt +4 -6
- data/ext/commonmarker/cmark/src/bench.h +8 -8
- data/ext/commonmarker/cmark/src/blocks.c +917 -947
- data/ext/commonmarker/cmark/src/buffer.c +213 -288
- data/ext/commonmarker/cmark/src/buffer.h +19 -21
- data/ext/commonmarker/cmark/src/chunk.h +78 -82
- data/ext/commonmarker/cmark/src/cmark.c +9 -17
- data/ext/commonmarker/cmark/src/cmark.h +113 -157
- data/ext/commonmarker/cmark/src/cmark_ctype.c +24 -35
- data/ext/commonmarker/cmark/src/commonmark.c +390 -425
- data/ext/commonmarker/cmark/src/config.h.in +6 -6
- data/ext/commonmarker/cmark/src/houdini.h +21 -15
- data/ext/commonmarker/cmark/src/houdini_href_e.c +50 -57
- data/ext/commonmarker/cmark/src/houdini_html_e.c +36 -51
- data/ext/commonmarker/cmark/src/houdini_html_u.c +119 -124
- data/ext/commonmarker/cmark/src/html.c +289 -307
- data/ext/commonmarker/cmark/src/inlines.c +976 -1030
- data/ext/commonmarker/cmark/src/inlines.h +4 -2
- data/ext/commonmarker/cmark/src/iterator.c +96 -126
- data/ext/commonmarker/cmark/src/iterator.h +5 -5
- data/ext/commonmarker/cmark/src/latex.c +379 -401
- data/ext/commonmarker/cmark/src/main.c +168 -175
- data/ext/commonmarker/cmark/src/man.c +212 -226
- data/ext/commonmarker/cmark/src/node.c +746 -839
- data/ext/commonmarker/cmark/src/node.h +47 -48
- data/ext/commonmarker/cmark/src/parser.h +14 -14
- data/ext/commonmarker/cmark/src/references.c +101 -111
- data/ext/commonmarker/cmark/src/references.h +10 -8
- data/ext/commonmarker/cmark/src/render.c +144 -167
- data/ext/commonmarker/cmark/src/render.h +22 -41
- data/ext/commonmarker/cmark/src/scanners.c +27695 -20903
- data/ext/commonmarker/cmark/src/scanners.h +2 -1
- data/ext/commonmarker/cmark/src/scanners.re +1 -1
- data/ext/commonmarker/cmark/src/utf8.c +276 -419
- data/ext/commonmarker/cmark/src/utf8.h +6 -6
- data/ext/commonmarker/cmark/src/xml.c +129 -144
- data/ext/commonmarker/cmark/test/CMakeLists.txt +4 -4
- data/ext/commonmarker/cmark/test/smart_punct.txt +8 -0
- data/ext/commonmarker/cmark/test/spec.txt +109 -47
- data/lib/commonmarker/version.rb +1 -1
- metadata +2 -2
@@ -6,452 +6,309 @@
|
|
6
6
|
#include "utf8.h"
|
7
7
|
|
8
8
|
static const int8_t utf8proc_utf8class[256] = {
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
|
25
|
-
};
|
26
|
-
|
27
|
-
static void encode_unknown(cmark_strbuf *buf)
|
28
|
-
{
|
29
|
-
static const uint8_t repl[] = {239, 191, 189};
|
30
|
-
cmark_strbuf_put(buf, repl, 3);
|
9
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
10
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
11
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
12
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
13
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
14
|
+
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
18
|
+
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
19
|
+
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
|
20
|
+
|
21
|
+
static void encode_unknown(cmark_strbuf *buf) {
|
22
|
+
static const uint8_t repl[] = {239, 191, 189};
|
23
|
+
cmark_strbuf_put(buf, repl, 3);
|
31
24
|
}
|
32
25
|
|
33
|
-
static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len)
|
34
|
-
|
35
|
-
int length, i;
|
26
|
+
static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
|
27
|
+
int length, i;
|
36
28
|
|
37
|
-
|
38
|
-
|
29
|
+
if (!str_len)
|
30
|
+
return 0;
|
39
31
|
|
40
|
-
|
32
|
+
length = utf8proc_utf8class[str[0]];
|
41
33
|
|
42
|
-
|
43
|
-
|
34
|
+
if (!length)
|
35
|
+
return -1;
|
44
36
|
|
45
|
-
|
46
|
-
|
37
|
+
if (str_len >= 0 && (bufsize_t)length > str_len)
|
38
|
+
return -str_len;
|
47
39
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
40
|
+
for (i = 1; i < length; i++) {
|
41
|
+
if ((str[i] & 0xC0) != 0x80)
|
42
|
+
return -i;
|
43
|
+
}
|
52
44
|
|
53
|
-
|
45
|
+
return length;
|
54
46
|
}
|
55
47
|
|
56
48
|
// Validate a single UTF-8 character according to RFC 3629.
|
57
|
-
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
return length;
|
49
|
+
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
|
50
|
+
int length = utf8proc_utf8class[str[0]];
|
51
|
+
|
52
|
+
if (!length)
|
53
|
+
return -1;
|
54
|
+
|
55
|
+
if ((bufsize_t)length > str_len)
|
56
|
+
return -str_len;
|
57
|
+
|
58
|
+
switch (length) {
|
59
|
+
case 2:
|
60
|
+
if ((str[1] & 0xC0) != 0x80)
|
61
|
+
return -1;
|
62
|
+
if (str[0] < 0xC2) {
|
63
|
+
// Overlong
|
64
|
+
return -length;
|
65
|
+
}
|
66
|
+
break;
|
67
|
+
|
68
|
+
case 3:
|
69
|
+
if ((str[1] & 0xC0) != 0x80)
|
70
|
+
return -1;
|
71
|
+
if ((str[2] & 0xC0) != 0x80)
|
72
|
+
return -2;
|
73
|
+
if (str[0] == 0xE0) {
|
74
|
+
if (str[1] < 0xA0) {
|
75
|
+
// Overlong
|
76
|
+
return -length;
|
77
|
+
}
|
78
|
+
} else if (str[0] == 0xED) {
|
79
|
+
if (str[1] >= 0xA0) {
|
80
|
+
// Surrogate
|
81
|
+
return -length;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
break;
|
85
|
+
|
86
|
+
case 4:
|
87
|
+
if ((str[1] & 0xC0) != 0x80)
|
88
|
+
return -1;
|
89
|
+
if ((str[2] & 0xC0) != 0x80)
|
90
|
+
return -2;
|
91
|
+
if ((str[3] & 0xC0) != 0x80)
|
92
|
+
return -3;
|
93
|
+
if (str[0] == 0xF0) {
|
94
|
+
if (str[1] < 0x90) {
|
95
|
+
// Overlong
|
96
|
+
return -length;
|
97
|
+
}
|
98
|
+
} else if (str[0] >= 0xF4) {
|
99
|
+
if (str[0] > 0xF4 || str[1] >= 0x90) {
|
100
|
+
// Above 0x10FFFF
|
101
|
+
return -length;
|
102
|
+
}
|
103
|
+
}
|
104
|
+
break;
|
105
|
+
}
|
106
|
+
|
107
|
+
return length;
|
117
108
|
}
|
118
109
|
|
119
|
-
void
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
}
|
110
|
+
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) {
|
111
|
+
bufsize_t i = 0;
|
112
|
+
|
113
|
+
while (i < size) {
|
114
|
+
bufsize_t org = i;
|
115
|
+
int charlen = 0;
|
116
|
+
|
117
|
+
while (i < size) {
|
118
|
+
if (line[i] < 0x80 && line[i] != 0) {
|
119
|
+
i++;
|
120
|
+
} else if (line[i] >= 0x80) {
|
121
|
+
charlen = utf8proc_valid(line + i, size - i);
|
122
|
+
if (charlen < 0) {
|
123
|
+
charlen = -charlen;
|
124
|
+
break;
|
125
|
+
}
|
126
|
+
i += charlen;
|
127
|
+
} else if (line[i] == 0) {
|
128
|
+
// ASCII NUL is technically valid but rejected
|
129
|
+
// for security reasons.
|
130
|
+
charlen = 1;
|
131
|
+
break;
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
if (i > org) {
|
136
|
+
cmark_strbuf_put(ob, line + org, i - org);
|
137
|
+
}
|
138
|
+
|
139
|
+
if (i >= size) {
|
140
|
+
break;
|
141
|
+
} else {
|
142
|
+
// Invalid UTF-8
|
143
|
+
encode_unknown(ob);
|
144
|
+
i += charlen;
|
145
|
+
}
|
146
|
+
}
|
157
147
|
}
|
158
148
|
|
159
|
-
int
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
149
|
+
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst) {
|
150
|
+
int length;
|
151
|
+
int32_t uc = -1;
|
152
|
+
|
153
|
+
*dst = -1;
|
154
|
+
length = utf8proc_charlen(str, str_len);
|
155
|
+
if (length < 0)
|
156
|
+
return -1;
|
157
|
+
|
158
|
+
switch (length) {
|
159
|
+
case 1:
|
160
|
+
uc = str[0];
|
161
|
+
break;
|
162
|
+
case 2:
|
163
|
+
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
|
164
|
+
if (uc < 0x80)
|
165
|
+
uc = -1;
|
166
|
+
break;
|
167
|
+
case 3:
|
168
|
+
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
|
169
|
+
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
|
170
|
+
uc = -1;
|
171
|
+
break;
|
172
|
+
case 4:
|
173
|
+
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
|
174
|
+
((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
|
175
|
+
if (uc < 0x10000 || uc >= 0x110000)
|
176
|
+
uc = -1;
|
177
|
+
break;
|
178
|
+
}
|
179
|
+
|
180
|
+
if (uc < 0)
|
181
|
+
return -1;
|
182
|
+
|
183
|
+
*dst = uc;
|
184
|
+
return length;
|
194
185
|
}
|
195
186
|
|
196
|
-
void
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
cmark_strbuf_put(buf, dst, len);
|
187
|
+
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
|
188
|
+
uint8_t dst[4];
|
189
|
+
bufsize_t len = 0;
|
190
|
+
|
191
|
+
assert(uc >= 0);
|
192
|
+
|
193
|
+
if (uc < 0x80) {
|
194
|
+
dst[0] = (uint8_t)(uc);
|
195
|
+
len = 1;
|
196
|
+
} else if (uc < 0x800) {
|
197
|
+
dst[0] = (uint8_t)(0xC0 + (uc >> 6));
|
198
|
+
dst[1] = 0x80 + (uc & 0x3F);
|
199
|
+
len = 2;
|
200
|
+
} else if (uc == 0xFFFF) {
|
201
|
+
dst[0] = 0xFF;
|
202
|
+
len = 1;
|
203
|
+
} else if (uc == 0xFFFE) {
|
204
|
+
dst[0] = 0xFE;
|
205
|
+
len = 1;
|
206
|
+
} else if (uc < 0x10000) {
|
207
|
+
dst[0] = (uint8_t)(0xE0 + (uc >> 12));
|
208
|
+
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
|
209
|
+
dst[2] = 0x80 + (uc & 0x3F);
|
210
|
+
len = 3;
|
211
|
+
} else if (uc < 0x110000) {
|
212
|
+
dst[0] = (uint8_t)(0xF0 + (uc >> 18));
|
213
|
+
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
|
214
|
+
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
|
215
|
+
dst[3] = 0x80 + (uc & 0x3F);
|
216
|
+
len = 4;
|
217
|
+
} else {
|
218
|
+
encode_unknown(buf);
|
219
|
+
return;
|
220
|
+
}
|
221
|
+
|
222
|
+
cmark_strbuf_put(buf, dst, len);
|
233
223
|
}
|
234
224
|
|
235
|
-
void
|
236
|
-
|
237
|
-
int32_t c;
|
225
|
+
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len) {
|
226
|
+
int32_t c;
|
238
227
|
|
239
|
-
#define bufpush(x)
|
240
|
-
utf8proc_encode_char(x, dest)
|
228
|
+
#define bufpush(x) cmark_utf8proc_encode_char(x, dest)
|
241
229
|
|
242
|
-
|
243
|
-
|
230
|
+
while (len > 0) {
|
231
|
+
bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
|
244
232
|
|
245
|
-
|
233
|
+
if (char_len >= 0) {
|
246
234
|
#include "case_fold_switch.inc"
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
235
|
+
} else {
|
236
|
+
encode_unknown(dest);
|
237
|
+
char_len = -char_len;
|
238
|
+
}
|
239
|
+
|
240
|
+
str += char_len;
|
241
|
+
len -= char_len;
|
242
|
+
}
|
255
243
|
}
|
256
244
|
|
257
245
|
// matches anything in the Zs class, plus LF, CR, TAB, FF.
|
258
|
-
int
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
uc == 12 ||
|
263
|
-
uc == 13 ||
|
264
|
-
uc == 32 ||
|
265
|
-
uc == 160 ||
|
266
|
-
uc == 5760 ||
|
267
|
-
(uc >= 8192 && uc <= 8202) ||
|
268
|
-
uc == 8239 ||
|
269
|
-
uc == 8287 ||
|
270
|
-
uc == 12288);
|
246
|
+
int cmark_utf8proc_is_space(int32_t uc) {
|
247
|
+
return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
|
248
|
+
uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
|
249
|
+
uc == 8287 || uc == 12288);
|
271
250
|
}
|
272
251
|
|
273
252
|
// matches anything in the P[cdefios] classes.
|
274
|
-
int
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
(uc >= 6104 && uc <= 6106) ||
|
336
|
-
(uc >= 6144 && uc <= 6154) ||
|
337
|
-
uc == 6468 ||
|
338
|
-
uc == 6469 ||
|
339
|
-
uc == 6686 ||
|
340
|
-
uc == 6687 ||
|
341
|
-
(uc >= 6816 && uc <= 6822) ||
|
342
|
-
(uc >= 6824 && uc <= 6829) ||
|
343
|
-
(uc >= 7002 && uc <= 7008) ||
|
344
|
-
(uc >= 7164 && uc <= 7167) ||
|
345
|
-
(uc >= 7227 && uc <= 7231) ||
|
346
|
-
uc == 7294 ||
|
347
|
-
uc == 7295 ||
|
348
|
-
(uc >= 7360 && uc <= 7367) ||
|
349
|
-
uc == 7379 ||
|
350
|
-
(uc >= 8208 && uc <= 8231) ||
|
351
|
-
(uc >= 8240 && uc <= 8259) ||
|
352
|
-
(uc >= 8261 && uc <= 8273) ||
|
353
|
-
(uc >= 8275 && uc <= 8286) ||
|
354
|
-
uc == 8317 ||
|
355
|
-
uc == 8318 ||
|
356
|
-
uc == 8333 ||
|
357
|
-
uc == 8334 ||
|
358
|
-
(uc >= 8968 && uc <= 8971) ||
|
359
|
-
uc == 9001 ||
|
360
|
-
uc == 9002 ||
|
361
|
-
(uc >= 10088 && uc <= 10101) ||
|
362
|
-
uc == 10181 ||
|
363
|
-
uc == 10182 ||
|
364
|
-
(uc >= 10214 && uc <= 10223) ||
|
365
|
-
(uc >= 10627 && uc <= 10648) ||
|
366
|
-
(uc >= 10712 && uc <= 10715) ||
|
367
|
-
uc == 10748 ||
|
368
|
-
uc == 10749 ||
|
369
|
-
(uc >= 11513 && uc <= 11516) ||
|
370
|
-
uc == 11518 ||
|
371
|
-
uc == 11519 ||
|
372
|
-
uc == 11632 ||
|
373
|
-
(uc >= 11776 && uc <= 11822) ||
|
374
|
-
(uc >= 11824 && uc <= 11842) ||
|
375
|
-
(uc >= 12289 && uc <= 12291) ||
|
376
|
-
(uc >= 12296 && uc <= 12305) ||
|
377
|
-
(uc >= 12308 && uc <= 12319) ||
|
378
|
-
uc == 12336 ||
|
379
|
-
uc == 12349 ||
|
380
|
-
uc == 12448 ||
|
381
|
-
uc == 12539 ||
|
382
|
-
uc == 42238 ||
|
383
|
-
uc == 42239 ||
|
384
|
-
(uc >= 42509 && uc <= 42511) ||
|
385
|
-
uc == 42611 ||
|
386
|
-
uc == 42622 ||
|
387
|
-
(uc >= 42738 && uc <= 42743) ||
|
388
|
-
(uc >= 43124 && uc <= 43127) ||
|
389
|
-
uc == 43214 ||
|
390
|
-
uc == 43215 ||
|
391
|
-
(uc >= 43256 && uc <= 43258) ||
|
392
|
-
uc == 43310 ||
|
393
|
-
uc == 43311 ||
|
394
|
-
uc == 43359 ||
|
395
|
-
(uc >= 43457 && uc <= 43469) ||
|
396
|
-
uc == 43486 ||
|
397
|
-
uc == 43487 ||
|
398
|
-
(uc >= 43612 && uc <= 43615) ||
|
399
|
-
uc == 43742 ||
|
400
|
-
uc == 43743 ||
|
401
|
-
uc == 43760 ||
|
402
|
-
uc == 43761 ||
|
403
|
-
uc == 44011 ||
|
404
|
-
uc == 64830 ||
|
405
|
-
uc == 64831 ||
|
406
|
-
(uc >= 65040 && uc <= 65049) ||
|
407
|
-
(uc >= 65072 && uc <= 65106) ||
|
408
|
-
(uc >= 65108 && uc <= 65121) ||
|
409
|
-
uc == 65123 ||
|
410
|
-
uc == 65128 ||
|
411
|
-
uc == 65130 ||
|
412
|
-
uc == 65131 ||
|
413
|
-
(uc >= 65281 && uc <= 65283) ||
|
414
|
-
(uc >= 65285 && uc <= 65290) ||
|
415
|
-
(uc >= 65292 && uc <= 65295) ||
|
416
|
-
uc == 65306 ||
|
417
|
-
uc == 65307 ||
|
418
|
-
uc == 65311 ||
|
419
|
-
uc == 65312 ||
|
420
|
-
(uc >= 65339 && uc <= 65341) ||
|
421
|
-
uc == 65343 ||
|
422
|
-
uc == 65371 ||
|
423
|
-
uc == 65373 ||
|
424
|
-
(uc >= 65375 && uc <= 65381) ||
|
425
|
-
(uc >= 65792 && uc <= 65794) ||
|
426
|
-
uc == 66463 ||
|
427
|
-
uc == 66512 ||
|
428
|
-
uc == 66927 ||
|
429
|
-
uc == 67671 ||
|
430
|
-
uc == 67871 ||
|
431
|
-
uc == 67903 ||
|
432
|
-
(uc >= 68176 && uc <= 68184) ||
|
433
|
-
uc == 68223 ||
|
434
|
-
(uc >= 68336 && uc <= 68342) ||
|
435
|
-
(uc >= 68409 && uc <= 68415) ||
|
436
|
-
(uc >= 68505 && uc <= 68508) ||
|
437
|
-
(uc >= 69703 && uc <= 69709) ||
|
438
|
-
uc == 69819 ||
|
439
|
-
uc == 69820 ||
|
440
|
-
(uc >= 69822 && uc <= 69825) ||
|
441
|
-
(uc >= 69952 && uc <= 69955) ||
|
442
|
-
uc == 70004 ||
|
443
|
-
uc == 70005 ||
|
444
|
-
(uc >= 70085 && uc <= 70088) ||
|
445
|
-
uc == 70093 ||
|
446
|
-
(uc >= 70200 && uc <= 70205) ||
|
447
|
-
uc == 70854 ||
|
448
|
-
(uc >= 71105 && uc <= 71113) ||
|
449
|
-
(uc >= 71233 && uc <= 71235) ||
|
450
|
-
(uc >= 74864 && uc <= 74868) ||
|
451
|
-
uc == 92782 ||
|
452
|
-
uc == 92783 ||
|
453
|
-
uc == 92917 ||
|
454
|
-
(uc >= 92983 && uc <= 92987) ||
|
455
|
-
uc == 92996 ||
|
456
|
-
uc == 113823);
|
253
|
+
int cmark_utf8proc_is_punctuation(int32_t uc) {
|
254
|
+
return (
|
255
|
+
(uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
|
256
|
+
uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
|
257
|
+
uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
|
258
|
+
uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
|
259
|
+
uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
|
260
|
+
uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
|
261
|
+
(uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
|
262
|
+
(uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
|
263
|
+
uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
|
264
|
+
uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
|
265
|
+
uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
|
266
|
+
(uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
|
267
|
+
(uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
|
268
|
+
uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
|
269
|
+
(uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
|
270
|
+
(uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
|
271
|
+
(uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
|
272
|
+
uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
|
273
|
+
(uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
|
274
|
+
(uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
|
275
|
+
(uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
|
276
|
+
(uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
|
277
|
+
(uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
|
278
|
+
uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
|
279
|
+
(uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
|
280
|
+
(uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
|
281
|
+
(uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
|
282
|
+
(uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
|
283
|
+
uc == 11632 || (uc >= 11776 && uc <= 11822) ||
|
284
|
+
(uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
|
285
|
+
(uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
|
286
|
+
uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
|
287
|
+
uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
|
288
|
+
uc == 42622 || (uc >= 42738 && uc <= 42743) ||
|
289
|
+
(uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
|
290
|
+
(uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
|
291
|
+
uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
|
292
|
+
uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
|
293
|
+
uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
|
294
|
+
uc == 64831 || (uc >= 65040 && uc <= 65049) ||
|
295
|
+
(uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
|
296
|
+
uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
|
297
|
+
(uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
|
298
|
+
(uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
|
299
|
+
uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
|
300
|
+
uc == 65343 || uc == 65371 || uc == 65373 ||
|
301
|
+
(uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
|
302
|
+
uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
|
303
|
+
uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
|
304
|
+
(uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
|
305
|
+
(uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
|
306
|
+
uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
|
307
|
+
(uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
|
308
|
+
(uc >= 70085 && uc <= 70088) || uc == 70093 ||
|
309
|
+
(uc >= 70200 && uc <= 70205) || uc == 70854 ||
|
310
|
+
(uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
|
311
|
+
(uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
|
312
|
+
uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
|
313
|
+
uc == 113823);
|
457
314
|
}
|