commonmarker 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/ext/commonmarker/cmark/CMakeLists.txt +10 -4
  3. data/ext/commonmarker/cmark/Makefile +5 -5
  4. data/ext/commonmarker/cmark/api_test/CMakeLists.txt +1 -1
  5. data/ext/commonmarker/cmark/api_test/main.c +16 -0
  6. data/ext/commonmarker/cmark/build/CMakeCache.txt +3 -4
  7. data/ext/commonmarker/cmark/build/CMakeFiles/2.8.10.1/CMakeSystem.cmake +4 -4
  8. data/ext/commonmarker/cmark/build/CMakeFiles/CMakeError.log +12 -12
  9. data/ext/commonmarker/cmark/build/CMakeFiles/CMakeOutput.log +97 -142
  10. data/ext/commonmarker/cmark/build/CMakeFiles/Makefile.cmake +0 -1
  11. data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
  12. data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
  13. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +1 -1
  14. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +23 -23
  15. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +2 -2
  16. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
  17. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
  18. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/buffer.c.o +0 -0
  19. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
  20. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
  21. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/houdini_html_u.c.o +0 -0
  22. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
  23. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
  24. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/node.c.o +0 -0
  25. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/references.c.o +0 -0
  26. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
  27. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
  28. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/utf8.c.o +0 -0
  29. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/xml.c.o +0 -0
  30. data/ext/commonmarker/cmark/build/src/cmake_install.cmake +3 -3
  31. data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
  32. data/ext/commonmarker/cmark/build/src/config.h +6 -6
  33. data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
  34. data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
  35. data/ext/commonmarker/cmark/build/testdir/CTestTestfile.cmake +4 -4
  36. data/ext/commonmarker/cmark/changelog.txt +46 -0
  37. data/ext/commonmarker/cmark/man/man3/cmark.3 +21 -20
  38. data/ext/commonmarker/cmark/src/CMakeLists.txt +4 -6
  39. data/ext/commonmarker/cmark/src/bench.h +8 -8
  40. data/ext/commonmarker/cmark/src/blocks.c +917 -947
  41. data/ext/commonmarker/cmark/src/buffer.c +213 -288
  42. data/ext/commonmarker/cmark/src/buffer.h +19 -21
  43. data/ext/commonmarker/cmark/src/chunk.h +78 -82
  44. data/ext/commonmarker/cmark/src/cmark.c +9 -17
  45. data/ext/commonmarker/cmark/src/cmark.h +113 -157
  46. data/ext/commonmarker/cmark/src/cmark_ctype.c +24 -35
  47. data/ext/commonmarker/cmark/src/commonmark.c +390 -425
  48. data/ext/commonmarker/cmark/src/config.h.in +6 -6
  49. data/ext/commonmarker/cmark/src/houdini.h +21 -15
  50. data/ext/commonmarker/cmark/src/houdini_href_e.c +50 -57
  51. data/ext/commonmarker/cmark/src/houdini_html_e.c +36 -51
  52. data/ext/commonmarker/cmark/src/houdini_html_u.c +119 -124
  53. data/ext/commonmarker/cmark/src/html.c +289 -307
  54. data/ext/commonmarker/cmark/src/inlines.c +976 -1030
  55. data/ext/commonmarker/cmark/src/inlines.h +4 -2
  56. data/ext/commonmarker/cmark/src/iterator.c +96 -126
  57. data/ext/commonmarker/cmark/src/iterator.h +5 -5
  58. data/ext/commonmarker/cmark/src/latex.c +379 -401
  59. data/ext/commonmarker/cmark/src/main.c +168 -175
  60. data/ext/commonmarker/cmark/src/man.c +212 -226
  61. data/ext/commonmarker/cmark/src/node.c +746 -839
  62. data/ext/commonmarker/cmark/src/node.h +47 -48
  63. data/ext/commonmarker/cmark/src/parser.h +14 -14
  64. data/ext/commonmarker/cmark/src/references.c +101 -111
  65. data/ext/commonmarker/cmark/src/references.h +10 -8
  66. data/ext/commonmarker/cmark/src/render.c +144 -167
  67. data/ext/commonmarker/cmark/src/render.h +22 -41
  68. data/ext/commonmarker/cmark/src/scanners.c +27695 -20903
  69. data/ext/commonmarker/cmark/src/scanners.h +2 -1
  70. data/ext/commonmarker/cmark/src/scanners.re +1 -1
  71. data/ext/commonmarker/cmark/src/utf8.c +276 -419
  72. data/ext/commonmarker/cmark/src/utf8.h +6 -6
  73. data/ext/commonmarker/cmark/src/xml.c +129 -144
  74. data/ext/commonmarker/cmark/test/CMakeLists.txt +4 -4
  75. data/ext/commonmarker/cmark/test/smart_punct.txt +8 -0
  76. data/ext/commonmarker/cmark/test/spec.txt +109 -47
  77. data/lib/commonmarker/version.rb +1 -1
  78. metadata +2 -2
@@ -6,452 +6,309 @@
6
6
  #include "utf8.h"
7
7
 
8
8
  static const int8_t utf8proc_utf8class[256] = {
9
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
22
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
23
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
24
- 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
25
- };
26
-
27
- static void encode_unknown(cmark_strbuf *buf)
28
- {
29
- static const uint8_t repl[] = {239, 191, 189};
30
- cmark_strbuf_put(buf, repl, 3);
9
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
18
+ 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
19
+ 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
20
+
21
+ static void encode_unknown(cmark_strbuf *buf) {
22
+ static const uint8_t repl[] = {239, 191, 189};
23
+ cmark_strbuf_put(buf, repl, 3);
31
24
  }
32
25
 
33
- static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len)
34
- {
35
- int length, i;
26
+ static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
27
+ int length, i;
36
28
 
37
- if (!str_len)
38
- return 0;
29
+ if (!str_len)
30
+ return 0;
39
31
 
40
- length = utf8proc_utf8class[str[0]];
32
+ length = utf8proc_utf8class[str[0]];
41
33
 
42
- if (!length)
43
- return -1;
34
+ if (!length)
35
+ return -1;
44
36
 
45
- if (str_len >= 0 && (bufsize_t)length > str_len)
46
- return -str_len;
37
+ if (str_len >= 0 && (bufsize_t)length > str_len)
38
+ return -str_len;
47
39
 
48
- for (i = 1; i < length; i++) {
49
- if ((str[i] & 0xC0) != 0x80)
50
- return -i;
51
- }
40
+ for (i = 1; i < length; i++) {
41
+ if ((str[i] & 0xC0) != 0x80)
42
+ return -i;
43
+ }
52
44
 
53
- return length;
45
+ return length;
54
46
  }
55
47
 
56
48
  // Validate a single UTF-8 character according to RFC 3629.
57
- static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
58
- {
59
- int length = utf8proc_utf8class[str[0]];
60
-
61
- if (!length)
62
- return -1;
63
-
64
- if ((bufsize_t)length > str_len)
65
- return -str_len;
66
-
67
- switch (length) {
68
- case 2:
69
- if ((str[1] & 0xC0) != 0x80)
70
- return -1;
71
- if (str[0] < 0xC2) {
72
- // Overlong
73
- return -length;
74
- }
75
- break;
76
-
77
- case 3:
78
- if ((str[1] & 0xC0) != 0x80)
79
- return -1;
80
- if ((str[2] & 0xC0) != 0x80)
81
- return -2;
82
- if (str[0] == 0xE0) {
83
- if (str[1] < 0xA0) {
84
- // Overlong
85
- return -length;
86
- }
87
- } else if (str[0] == 0xED) {
88
- if (str[1] >= 0xA0) {
89
- // Surrogate
90
- return -length;
91
- }
92
- }
93
- break;
94
-
95
- case 4:
96
- if ((str[1] & 0xC0) != 0x80)
97
- return -1;
98
- if ((str[2] & 0xC0) != 0x80)
99
- return -2;
100
- if ((str[3] & 0xC0) != 0x80)
101
- return -3;
102
- if (str[0] == 0xF0) {
103
- if (str[1] < 0x90) {
104
- // Overlong
105
- return -length;
106
- }
107
- } else if (str[0] >= 0xF4) {
108
- if (str[0] > 0xF4 || str[1] >= 0x90) {
109
- // Above 0x10FFFF
110
- return -length;
111
- }
112
- }
113
- break;
114
- }
115
-
116
- return length;
49
+ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
50
+ int length = utf8proc_utf8class[str[0]];
51
+
52
+ if (!length)
53
+ return -1;
54
+
55
+ if ((bufsize_t)length > str_len)
56
+ return -str_len;
57
+
58
+ switch (length) {
59
+ case 2:
60
+ if ((str[1] & 0xC0) != 0x80)
61
+ return -1;
62
+ if (str[0] < 0xC2) {
63
+ // Overlong
64
+ return -length;
65
+ }
66
+ break;
67
+
68
+ case 3:
69
+ if ((str[1] & 0xC0) != 0x80)
70
+ return -1;
71
+ if ((str[2] & 0xC0) != 0x80)
72
+ return -2;
73
+ if (str[0] == 0xE0) {
74
+ if (str[1] < 0xA0) {
75
+ // Overlong
76
+ return -length;
77
+ }
78
+ } else if (str[0] == 0xED) {
79
+ if (str[1] >= 0xA0) {
80
+ // Surrogate
81
+ return -length;
82
+ }
83
+ }
84
+ break;
85
+
86
+ case 4:
87
+ if ((str[1] & 0xC0) != 0x80)
88
+ return -1;
89
+ if ((str[2] & 0xC0) != 0x80)
90
+ return -2;
91
+ if ((str[3] & 0xC0) != 0x80)
92
+ return -3;
93
+ if (str[0] == 0xF0) {
94
+ if (str[1] < 0x90) {
95
+ // Overlong
96
+ return -length;
97
+ }
98
+ } else if (str[0] >= 0xF4) {
99
+ if (str[0] > 0xF4 || str[1] >= 0x90) {
100
+ // Above 0x10FFFF
101
+ return -length;
102
+ }
103
+ }
104
+ break;
105
+ }
106
+
107
+ return length;
117
108
  }
118
109
 
119
- void utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size)
120
- {
121
- bufsize_t i = 0;
122
-
123
- while (i < size) {
124
- bufsize_t org = i;
125
- int charlen = 0;
126
-
127
- while (i < size) {
128
- if (line[i] < 0x80 && line[i] != 0) {
129
- i++;
130
- } else if (line[i] >= 0x80) {
131
- charlen = utf8proc_valid(line + i, size - i);
132
- if (charlen < 0) {
133
- charlen = -charlen;
134
- break;
135
- }
136
- i += charlen;
137
- } else if (line[i] == 0) {
138
- // ASCII NUL is technically valid but rejected
139
- // for security reasons.
140
- charlen = 1;
141
- break;
142
- }
143
- }
144
-
145
- if (i > org) {
146
- cmark_strbuf_put(ob, line + org, i - org);
147
- }
148
-
149
- if (i >= size) {
150
- break;
151
- } else {
152
- // Invalid UTF-8
153
- encode_unknown(ob);
154
- i += charlen;
155
- }
156
- }
110
+ void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) {
111
+ bufsize_t i = 0;
112
+
113
+ while (i < size) {
114
+ bufsize_t org = i;
115
+ int charlen = 0;
116
+
117
+ while (i < size) {
118
+ if (line[i] < 0x80 && line[i] != 0) {
119
+ i++;
120
+ } else if (line[i] >= 0x80) {
121
+ charlen = utf8proc_valid(line + i, size - i);
122
+ if (charlen < 0) {
123
+ charlen = -charlen;
124
+ break;
125
+ }
126
+ i += charlen;
127
+ } else if (line[i] == 0) {
128
+ // ASCII NUL is technically valid but rejected
129
+ // for security reasons.
130
+ charlen = 1;
131
+ break;
132
+ }
133
+ }
134
+
135
+ if (i > org) {
136
+ cmark_strbuf_put(ob, line + org, i - org);
137
+ }
138
+
139
+ if (i >= size) {
140
+ break;
141
+ } else {
142
+ // Invalid UTF-8
143
+ encode_unknown(ob);
144
+ i += charlen;
145
+ }
146
+ }
157
147
  }
158
148
 
159
- int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst)
160
- {
161
- int length;
162
- int32_t uc = -1;
163
-
164
- *dst = -1;
165
- length = utf8proc_charlen(str, str_len);
166
- if (length < 0)
167
- return -1;
168
-
169
- switch (length) {
170
- case 1:
171
- uc = str[0];
172
- break;
173
- case 2:
174
- uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
175
- if (uc < 0x80) uc = -1;
176
- break;
177
- case 3:
178
- uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
179
- + (str[2] & 0x3F);
180
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) uc = -1;
181
- break;
182
- case 4:
183
- uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
184
- + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
185
- if (uc < 0x10000 || uc >= 0x110000) uc = -1;
186
- break;
187
- }
188
-
189
- if (uc < 0)
190
- return -1;
191
-
192
- *dst = uc;
193
- return length;
149
+ int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst) {
150
+ int length;
151
+ int32_t uc = -1;
152
+
153
+ *dst = -1;
154
+ length = utf8proc_charlen(str, str_len);
155
+ if (length < 0)
156
+ return -1;
157
+
158
+ switch (length) {
159
+ case 1:
160
+ uc = str[0];
161
+ break;
162
+ case 2:
163
+ uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
164
+ if (uc < 0x80)
165
+ uc = -1;
166
+ break;
167
+ case 3:
168
+ uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
169
+ if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
170
+ uc = -1;
171
+ break;
172
+ case 4:
173
+ uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
174
+ ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
175
+ if (uc < 0x10000 || uc >= 0x110000)
176
+ uc = -1;
177
+ break;
178
+ }
179
+
180
+ if (uc < 0)
181
+ return -1;
182
+
183
+ *dst = uc;
184
+ return length;
194
185
  }
195
186
 
196
- void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
197
- {
198
- uint8_t dst[4];
199
- bufsize_t len = 0;
200
-
201
- assert(uc >= 0);
202
-
203
- if (uc < 0x80) {
204
- dst[0] = uc;
205
- len = 1;
206
- } else if (uc < 0x800) {
207
- dst[0] = 0xC0 + (uc >> 6);
208
- dst[1] = 0x80 + (uc & 0x3F);
209
- len = 2;
210
- } else if (uc == 0xFFFF) {
211
- dst[0] = 0xFF;
212
- len = 1;
213
- } else if (uc == 0xFFFE) {
214
- dst[0] = 0xFE;
215
- len = 1;
216
- } else if (uc < 0x10000) {
217
- dst[0] = 0xE0 + (uc >> 12);
218
- dst[1] = 0x80 + ((uc >> 6) & 0x3F);
219
- dst[2] = 0x80 + (uc & 0x3F);
220
- len = 3;
221
- } else if (uc < 0x110000) {
222
- dst[0] = 0xF0 + (uc >> 18);
223
- dst[1] = 0x80 + ((uc >> 12) & 0x3F);
224
- dst[2] = 0x80 + ((uc >> 6) & 0x3F);
225
- dst[3] = 0x80 + (uc & 0x3F);
226
- len = 4;
227
- } else {
228
- encode_unknown(buf);
229
- return;
230
- }
231
-
232
- cmark_strbuf_put(buf, dst, len);
187
+ void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
188
+ uint8_t dst[4];
189
+ bufsize_t len = 0;
190
+
191
+ assert(uc >= 0);
192
+
193
+ if (uc < 0x80) {
194
+ dst[0] = (uint8_t)(uc);
195
+ len = 1;
196
+ } else if (uc < 0x800) {
197
+ dst[0] = (uint8_t)(0xC0 + (uc >> 6));
198
+ dst[1] = 0x80 + (uc & 0x3F);
199
+ len = 2;
200
+ } else if (uc == 0xFFFF) {
201
+ dst[0] = 0xFF;
202
+ len = 1;
203
+ } else if (uc == 0xFFFE) {
204
+ dst[0] = 0xFE;
205
+ len = 1;
206
+ } else if (uc < 0x10000) {
207
+ dst[0] = (uint8_t)(0xE0 + (uc >> 12));
208
+ dst[1] = 0x80 + ((uc >> 6) & 0x3F);
209
+ dst[2] = 0x80 + (uc & 0x3F);
210
+ len = 3;
211
+ } else if (uc < 0x110000) {
212
+ dst[0] = (uint8_t)(0xF0 + (uc >> 18));
213
+ dst[1] = 0x80 + ((uc >> 12) & 0x3F);
214
+ dst[2] = 0x80 + ((uc >> 6) & 0x3F);
215
+ dst[3] = 0x80 + (uc & 0x3F);
216
+ len = 4;
217
+ } else {
218
+ encode_unknown(buf);
219
+ return;
220
+ }
221
+
222
+ cmark_strbuf_put(buf, dst, len);
233
223
  }
234
224
 
235
- void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len)
236
- {
237
- int32_t c;
225
+ void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len) {
226
+ int32_t c;
238
227
 
239
- #define bufpush(x) \
240
- utf8proc_encode_char(x, dest)
228
+ #define bufpush(x) cmark_utf8proc_encode_char(x, dest)
241
229
 
242
- while (len > 0) {
243
- bufsize_t char_len = utf8proc_iterate(str, len, &c);
230
+ while (len > 0) {
231
+ bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
244
232
 
245
- if (char_len >= 0) {
233
+ if (char_len >= 0) {
246
234
  #include "case_fold_switch.inc"
247
- } else {
248
- encode_unknown(dest);
249
- char_len = -char_len;
250
- }
251
-
252
- str += char_len;
253
- len -= char_len;
254
- }
235
+ } else {
236
+ encode_unknown(dest);
237
+ char_len = -char_len;
238
+ }
239
+
240
+ str += char_len;
241
+ len -= char_len;
242
+ }
255
243
  }
256
244
 
257
245
  // matches anything in the Zs class, plus LF, CR, TAB, FF.
258
- int utf8proc_is_space(int32_t uc)
259
- {
260
- return (uc == 9 ||
261
- uc == 10 ||
262
- uc == 12 ||
263
- uc == 13 ||
264
- uc == 32 ||
265
- uc == 160 ||
266
- uc == 5760 ||
267
- (uc >= 8192 && uc <= 8202) ||
268
- uc == 8239 ||
269
- uc == 8287 ||
270
- uc == 12288);
246
+ int cmark_utf8proc_is_space(int32_t uc) {
247
+ return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
248
+ uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
249
+ uc == 8287 || uc == 12288);
271
250
  }
272
251
 
273
252
  // matches anything in the P[cdefios] classes.
274
- int utf8proc_is_punctuation(int32_t uc)
275
- {
276
- return ((uc < 128 && cmark_ispunct((char)uc)) ||
277
- uc == 161 ||
278
- uc == 167 ||
279
- uc == 171 ||
280
- uc == 182 ||
281
- uc == 183 ||
282
- uc == 187 ||
283
- uc == 191 ||
284
- uc == 894 ||
285
- uc == 903 ||
286
- (uc >= 1370 && uc <= 1375) ||
287
- uc == 1417 ||
288
- uc == 1418 ||
289
- uc == 1470 ||
290
- uc == 1472 ||
291
- uc == 1475 ||
292
- uc == 1478 ||
293
- uc == 1523 ||
294
- uc == 1524 ||
295
- uc == 1545 ||
296
- uc == 1546 ||
297
- uc == 1548 ||
298
- uc == 1549 ||
299
- uc == 1563 ||
300
- uc == 1566 ||
301
- uc == 1567 ||
302
- (uc >= 1642 && uc <= 1645) ||
303
- uc == 1748 ||
304
- (uc >= 1792 && uc <= 1805) ||
305
- (uc >= 2039 && uc <= 2041) ||
306
- (uc >= 2096 && uc <= 2110) ||
307
- uc == 2142 ||
308
- uc == 2404 ||
309
- uc == 2405 ||
310
- uc == 2416 ||
311
- uc == 2800 ||
312
- uc == 3572 ||
313
- uc == 3663 ||
314
- uc == 3674 ||
315
- uc == 3675 ||
316
- (uc >= 3844 && uc <= 3858) ||
317
- uc == 3860 ||
318
- (uc >= 3898 && uc <= 3901) ||
319
- uc == 3973 ||
320
- (uc >= 4048 && uc <= 4052) ||
321
- uc == 4057 ||
322
- uc == 4058 ||
323
- (uc >= 4170 && uc <= 4175) ||
324
- uc == 4347 ||
325
- (uc >= 4960 && uc <= 4968) ||
326
- uc == 5120 ||
327
- uc == 5741 ||
328
- uc == 5742 ||
329
- uc == 5787 ||
330
- uc == 5788 ||
331
- (uc >= 5867 && uc <= 5869) ||
332
- uc == 5941 ||
333
- uc == 5942 ||
334
- (uc >= 6100 && uc <= 6102) ||
335
- (uc >= 6104 && uc <= 6106) ||
336
- (uc >= 6144 && uc <= 6154) ||
337
- uc == 6468 ||
338
- uc == 6469 ||
339
- uc == 6686 ||
340
- uc == 6687 ||
341
- (uc >= 6816 && uc <= 6822) ||
342
- (uc >= 6824 && uc <= 6829) ||
343
- (uc >= 7002 && uc <= 7008) ||
344
- (uc >= 7164 && uc <= 7167) ||
345
- (uc >= 7227 && uc <= 7231) ||
346
- uc == 7294 ||
347
- uc == 7295 ||
348
- (uc >= 7360 && uc <= 7367) ||
349
- uc == 7379 ||
350
- (uc >= 8208 && uc <= 8231) ||
351
- (uc >= 8240 && uc <= 8259) ||
352
- (uc >= 8261 && uc <= 8273) ||
353
- (uc >= 8275 && uc <= 8286) ||
354
- uc == 8317 ||
355
- uc == 8318 ||
356
- uc == 8333 ||
357
- uc == 8334 ||
358
- (uc >= 8968 && uc <= 8971) ||
359
- uc == 9001 ||
360
- uc == 9002 ||
361
- (uc >= 10088 && uc <= 10101) ||
362
- uc == 10181 ||
363
- uc == 10182 ||
364
- (uc >= 10214 && uc <= 10223) ||
365
- (uc >= 10627 && uc <= 10648) ||
366
- (uc >= 10712 && uc <= 10715) ||
367
- uc == 10748 ||
368
- uc == 10749 ||
369
- (uc >= 11513 && uc <= 11516) ||
370
- uc == 11518 ||
371
- uc == 11519 ||
372
- uc == 11632 ||
373
- (uc >= 11776 && uc <= 11822) ||
374
- (uc >= 11824 && uc <= 11842) ||
375
- (uc >= 12289 && uc <= 12291) ||
376
- (uc >= 12296 && uc <= 12305) ||
377
- (uc >= 12308 && uc <= 12319) ||
378
- uc == 12336 ||
379
- uc == 12349 ||
380
- uc == 12448 ||
381
- uc == 12539 ||
382
- uc == 42238 ||
383
- uc == 42239 ||
384
- (uc >= 42509 && uc <= 42511) ||
385
- uc == 42611 ||
386
- uc == 42622 ||
387
- (uc >= 42738 && uc <= 42743) ||
388
- (uc >= 43124 && uc <= 43127) ||
389
- uc == 43214 ||
390
- uc == 43215 ||
391
- (uc >= 43256 && uc <= 43258) ||
392
- uc == 43310 ||
393
- uc == 43311 ||
394
- uc == 43359 ||
395
- (uc >= 43457 && uc <= 43469) ||
396
- uc == 43486 ||
397
- uc == 43487 ||
398
- (uc >= 43612 && uc <= 43615) ||
399
- uc == 43742 ||
400
- uc == 43743 ||
401
- uc == 43760 ||
402
- uc == 43761 ||
403
- uc == 44011 ||
404
- uc == 64830 ||
405
- uc == 64831 ||
406
- (uc >= 65040 && uc <= 65049) ||
407
- (uc >= 65072 && uc <= 65106) ||
408
- (uc >= 65108 && uc <= 65121) ||
409
- uc == 65123 ||
410
- uc == 65128 ||
411
- uc == 65130 ||
412
- uc == 65131 ||
413
- (uc >= 65281 && uc <= 65283) ||
414
- (uc >= 65285 && uc <= 65290) ||
415
- (uc >= 65292 && uc <= 65295) ||
416
- uc == 65306 ||
417
- uc == 65307 ||
418
- uc == 65311 ||
419
- uc == 65312 ||
420
- (uc >= 65339 && uc <= 65341) ||
421
- uc == 65343 ||
422
- uc == 65371 ||
423
- uc == 65373 ||
424
- (uc >= 65375 && uc <= 65381) ||
425
- (uc >= 65792 && uc <= 65794) ||
426
- uc == 66463 ||
427
- uc == 66512 ||
428
- uc == 66927 ||
429
- uc == 67671 ||
430
- uc == 67871 ||
431
- uc == 67903 ||
432
- (uc >= 68176 && uc <= 68184) ||
433
- uc == 68223 ||
434
- (uc >= 68336 && uc <= 68342) ||
435
- (uc >= 68409 && uc <= 68415) ||
436
- (uc >= 68505 && uc <= 68508) ||
437
- (uc >= 69703 && uc <= 69709) ||
438
- uc == 69819 ||
439
- uc == 69820 ||
440
- (uc >= 69822 && uc <= 69825) ||
441
- (uc >= 69952 && uc <= 69955) ||
442
- uc == 70004 ||
443
- uc == 70005 ||
444
- (uc >= 70085 && uc <= 70088) ||
445
- uc == 70093 ||
446
- (uc >= 70200 && uc <= 70205) ||
447
- uc == 70854 ||
448
- (uc >= 71105 && uc <= 71113) ||
449
- (uc >= 71233 && uc <= 71235) ||
450
- (uc >= 74864 && uc <= 74868) ||
451
- uc == 92782 ||
452
- uc == 92783 ||
453
- uc == 92917 ||
454
- (uc >= 92983 && uc <= 92987) ||
455
- uc == 92996 ||
456
- uc == 113823);
253
+ int cmark_utf8proc_is_punctuation(int32_t uc) {
254
+ return (
255
+ (uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
256
+ uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
257
+ uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
258
+ uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
259
+ uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
260
+ uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
261
+ (uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
262
+ (uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
263
+ uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
264
+ uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
265
+ uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
266
+ (uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
267
+ (uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
268
+ uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
269
+ (uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
270
+ (uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
271
+ (uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
272
+ uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
273
+ (uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
274
+ (uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
275
+ (uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
276
+ (uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
277
+ (uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
278
+ uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
279
+ (uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
280
+ (uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
281
+ (uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
282
+ (uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
283
+ uc == 11632 || (uc >= 11776 && uc <= 11822) ||
284
+ (uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
285
+ (uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
286
+ uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
287
+ uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
288
+ uc == 42622 || (uc >= 42738 && uc <= 42743) ||
289
+ (uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
290
+ (uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
291
+ uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
292
+ uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
293
+ uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
294
+ uc == 64831 || (uc >= 65040 && uc <= 65049) ||
295
+ (uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
296
+ uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
297
+ (uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
298
+ (uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
299
+ uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
300
+ uc == 65343 || uc == 65371 || uc == 65373 ||
301
+ (uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
302
+ uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
303
+ uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
304
+ (uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
305
+ (uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
306
+ uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
307
+ (uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
308
+ (uc >= 70085 && uc <= 70088) || uc == 70093 ||
309
+ (uc >= 70200 && uc <= 70205) || uc == 70854 ||
310
+ (uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
311
+ (uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
312
+ uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
313
+ uc == 113823);
457
314
  }