commonmarker 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/ext/commonmarker/cmark/CMakeLists.txt +10 -4
- data/ext/commonmarker/cmark/Makefile +5 -5
- data/ext/commonmarker/cmark/api_test/CMakeLists.txt +1 -1
- data/ext/commonmarker/cmark/api_test/main.c +16 -0
- data/ext/commonmarker/cmark/build/CMakeCache.txt +3 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/2.8.10.1/CMakeSystem.cmake +4 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeError.log +12 -12
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeOutput.log +97 -142
- data/ext/commonmarker/cmark/build/CMakeFiles/Makefile.cmake +0 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +23 -23
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +2 -2
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/buffer.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/houdini_html_u.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/node.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/references.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/utf8.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/xml.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/cmake_install.cmake +3 -3
- data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
- data/ext/commonmarker/cmark/build/src/config.h +6 -6
- data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
- data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
- data/ext/commonmarker/cmark/build/testdir/CTestTestfile.cmake +4 -4
- data/ext/commonmarker/cmark/changelog.txt +46 -0
- data/ext/commonmarker/cmark/man/man3/cmark.3 +21 -20
- data/ext/commonmarker/cmark/src/CMakeLists.txt +4 -6
- data/ext/commonmarker/cmark/src/bench.h +8 -8
- data/ext/commonmarker/cmark/src/blocks.c +917 -947
- data/ext/commonmarker/cmark/src/buffer.c +213 -288
- data/ext/commonmarker/cmark/src/buffer.h +19 -21
- data/ext/commonmarker/cmark/src/chunk.h +78 -82
- data/ext/commonmarker/cmark/src/cmark.c +9 -17
- data/ext/commonmarker/cmark/src/cmark.h +113 -157
- data/ext/commonmarker/cmark/src/cmark_ctype.c +24 -35
- data/ext/commonmarker/cmark/src/commonmark.c +390 -425
- data/ext/commonmarker/cmark/src/config.h.in +6 -6
- data/ext/commonmarker/cmark/src/houdini.h +21 -15
- data/ext/commonmarker/cmark/src/houdini_href_e.c +50 -57
- data/ext/commonmarker/cmark/src/houdini_html_e.c +36 -51
- data/ext/commonmarker/cmark/src/houdini_html_u.c +119 -124
- data/ext/commonmarker/cmark/src/html.c +289 -307
- data/ext/commonmarker/cmark/src/inlines.c +976 -1030
- data/ext/commonmarker/cmark/src/inlines.h +4 -2
- data/ext/commonmarker/cmark/src/iterator.c +96 -126
- data/ext/commonmarker/cmark/src/iterator.h +5 -5
- data/ext/commonmarker/cmark/src/latex.c +379 -401
- data/ext/commonmarker/cmark/src/main.c +168 -175
- data/ext/commonmarker/cmark/src/man.c +212 -226
- data/ext/commonmarker/cmark/src/node.c +746 -839
- data/ext/commonmarker/cmark/src/node.h +47 -48
- data/ext/commonmarker/cmark/src/parser.h +14 -14
- data/ext/commonmarker/cmark/src/references.c +101 -111
- data/ext/commonmarker/cmark/src/references.h +10 -8
- data/ext/commonmarker/cmark/src/render.c +144 -167
- data/ext/commonmarker/cmark/src/render.h +22 -41
- data/ext/commonmarker/cmark/src/scanners.c +27695 -20903
- data/ext/commonmarker/cmark/src/scanners.h +2 -1
- data/ext/commonmarker/cmark/src/scanners.re +1 -1
- data/ext/commonmarker/cmark/src/utf8.c +276 -419
- data/ext/commonmarker/cmark/src/utf8.h +6 -6
- data/ext/commonmarker/cmark/src/xml.c +129 -144
- data/ext/commonmarker/cmark/test/CMakeLists.txt +4 -4
- data/ext/commonmarker/cmark/test/smart_punct.txt +8 -0
- data/ext/commonmarker/cmark/test/spec.txt +109 -47
- data/lib/commonmarker/version.rb +1 -1
- metadata +2 -2
@@ -13,7 +13,6 @@
|
|
13
13
|
#include "scanners.h"
|
14
14
|
#include "inlines.h"
|
15
15
|
|
16
|
-
|
17
16
|
static const char *EMDASH = "\xE2\x80\x94";
|
18
17
|
static const char *ENDASH = "\xE2\x80\x93";
|
19
18
|
static const char *ELLIPSES = "\xE2\x80\xA6";
|
@@ -22,7 +21,6 @@ static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
|
|
22
21
|
static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
|
23
22
|
static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
|
24
23
|
|
25
|
-
|
26
24
|
// Macros for creating various kinds of simple.
|
27
25
|
#define make_str(s) make_literal(CMARK_NODE_TEXT, s)
|
28
26
|
#define make_code(s) make_literal(CMARK_NODE_CODE, s)
|
@@ -33,209 +31,189 @@ static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
|
|
33
31
|
#define make_strong() make_simple(CMARK_NODE_STRONG)
|
34
32
|
|
35
33
|
typedef struct delimiter {
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
34
|
+
struct delimiter *previous;
|
35
|
+
struct delimiter *next;
|
36
|
+
cmark_node *inl_text;
|
37
|
+
bufsize_t position;
|
38
|
+
unsigned char delim_char;
|
39
|
+
bool can_open;
|
40
|
+
bool can_close;
|
41
|
+
bool active;
|
44
42
|
} delimiter;
|
45
43
|
|
46
44
|
typedef struct {
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
45
|
+
cmark_chunk input;
|
46
|
+
bufsize_t pos;
|
47
|
+
cmark_reference_map *refmap;
|
48
|
+
delimiter *last_delim;
|
51
49
|
} subject;
|
52
50
|
|
53
|
-
static inline bool
|
54
|
-
|
55
|
-
{
|
56
|
-
return (c == '\n' || c == '\r');
|
51
|
+
static inline bool S_is_line_end_char(char c) {
|
52
|
+
return (c == '\n' || c == '\r');
|
57
53
|
}
|
58
54
|
|
59
|
-
static delimiter*
|
60
|
-
|
55
|
+
static delimiter *S_insert_emph(subject *subj, delimiter *opener,
|
56
|
+
delimiter *closer);
|
61
57
|
|
62
|
-
static int parse_inline(subject*
|
58
|
+
static int parse_inline(subject *subj, cmark_node *parent, int options);
|
63
59
|
|
64
60
|
static void subject_from_buf(subject *e, cmark_strbuf *buffer,
|
65
61
|
cmark_reference_map *refmap);
|
66
62
|
static bufsize_t subject_find_special_char(subject *subj, int options);
|
67
63
|
|
68
64
|
// Create an inline with a literal string value.
|
69
|
-
static inline cmark_node*
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
return e;
|
65
|
+
static inline cmark_node *make_literal(cmark_node_type t, cmark_chunk s) {
|
66
|
+
cmark_node *e = (cmark_node *)calloc(1, sizeof(*e));
|
67
|
+
if (e != NULL) {
|
68
|
+
e->type = t;
|
69
|
+
e->as.literal = s;
|
70
|
+
e->next = NULL;
|
71
|
+
e->prev = NULL;
|
72
|
+
e->parent = NULL;
|
73
|
+
e->first_child = NULL;
|
74
|
+
e->last_child = NULL;
|
75
|
+
// These fields aren't used for inlines:
|
76
|
+
e->start_line = 0;
|
77
|
+
e->start_column = 0;
|
78
|
+
e->end_line = 0;
|
79
|
+
}
|
80
|
+
return e;
|
86
81
|
}
|
87
82
|
|
88
83
|
// Create an inline with no value.
|
89
|
-
static inline cmark_node*
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
return e;
|
84
|
+
static inline cmark_node *make_simple(cmark_node_type t) {
|
85
|
+
cmark_node *e = (cmark_node *)calloc(1, sizeof(*e));
|
86
|
+
if (e != NULL) {
|
87
|
+
e->type = t;
|
88
|
+
e->next = NULL;
|
89
|
+
e->prev = NULL;
|
90
|
+
e->parent = NULL;
|
91
|
+
e->first_child = NULL;
|
92
|
+
e->last_child = NULL;
|
93
|
+
// These fields aren't used for inlines:
|
94
|
+
e->start_line = 0;
|
95
|
+
e->start_column = 0;
|
96
|
+
e->end_line = 0;
|
97
|
+
}
|
98
|
+
return e;
|
105
99
|
}
|
106
100
|
|
107
101
|
// Like make_str, but parses entities.
|
108
|
-
static cmark_node *make_str_with_entities(cmark_chunk *content)
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
}
|
102
|
+
static cmark_node *make_str_with_entities(cmark_chunk *content) {
|
103
|
+
cmark_strbuf unescaped = GH_BUF_INIT;
|
104
|
+
|
105
|
+
if (houdini_unescape_html(&unescaped, content->data, content->len)) {
|
106
|
+
return make_str(cmark_chunk_buf_detach(&unescaped));
|
107
|
+
} else {
|
108
|
+
return make_str(*content);
|
109
|
+
}
|
117
110
|
}
|
118
111
|
|
119
112
|
// Duplicate a chunk by creating a copy of the buffer not by reusing the
|
120
113
|
// buffer like cmark_chunk_dup does.
|
121
|
-
static cmark_chunk chunk_clone(cmark_chunk *src)
|
122
|
-
|
123
|
-
|
124
|
-
bufsize_t len = src->len;
|
114
|
+
static cmark_chunk chunk_clone(cmark_chunk *src) {
|
115
|
+
cmark_chunk c;
|
116
|
+
bufsize_t len = src->len;
|
125
117
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
118
|
+
c.len = len;
|
119
|
+
c.data = (unsigned char *)malloc(len + 1);
|
120
|
+
c.alloc = 1;
|
121
|
+
memcpy(c.data, src->data, len);
|
122
|
+
c.data[len] = '\0';
|
131
123
|
|
132
|
-
|
124
|
+
return c;
|
133
125
|
}
|
134
126
|
|
135
|
-
static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
|
136
|
-
|
137
|
-
cmark_strbuf buf = GH_BUF_INIT;
|
127
|
+
static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email) {
|
128
|
+
cmark_strbuf buf = GH_BUF_INIT;
|
138
129
|
|
139
|
-
|
130
|
+
cmark_chunk_trim(url);
|
140
131
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
132
|
+
if (url->len == 0) {
|
133
|
+
cmark_chunk result = CMARK_CHUNK_EMPTY;
|
134
|
+
return result;
|
135
|
+
}
|
145
136
|
|
146
|
-
|
147
|
-
|
137
|
+
if (is_email)
|
138
|
+
cmark_strbuf_puts(&buf, "mailto:");
|
148
139
|
|
149
|
-
|
150
|
-
|
140
|
+
houdini_unescape_html_f(&buf, url->data, url->len);
|
141
|
+
return cmark_chunk_buf_detach(&buf);
|
151
142
|
}
|
152
143
|
|
153
|
-
static inline cmark_node*
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
return link;
|
144
|
+
static inline cmark_node *make_autolink(cmark_chunk url, int is_email) {
|
145
|
+
cmark_node *link = make_simple(CMARK_NODE_LINK);
|
146
|
+
link->as.link.url = cmark_clean_autolink(&url, is_email);
|
147
|
+
link->as.link.title = cmark_chunk_literal("");
|
148
|
+
cmark_node_append_child(link, make_str_with_entities(&url));
|
149
|
+
return link;
|
160
150
|
}
|
161
151
|
|
162
152
|
static void subject_from_buf(subject *e, cmark_strbuf *buffer,
|
163
|
-
cmark_reference_map *refmap)
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
e->last_delim = NULL;
|
153
|
+
cmark_reference_map *refmap) {
|
154
|
+
e->input.data = buffer->ptr;
|
155
|
+
e->input.len = buffer->size;
|
156
|
+
e->input.alloc = 0;
|
157
|
+
e->pos = 0;
|
158
|
+
e->refmap = refmap;
|
159
|
+
e->last_delim = NULL;
|
171
160
|
}
|
172
161
|
|
173
|
-
static inline int isbacktick(int c)
|
174
|
-
{
|
175
|
-
return (c == '`');
|
176
|
-
}
|
162
|
+
static inline int isbacktick(int c) { return (c == '`'); }
|
177
163
|
|
178
|
-
static inline unsigned char peek_char(subject *subj)
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
|
164
|
+
static inline unsigned char peek_char(subject *subj) {
|
165
|
+
// NULL bytes should have been stripped out by now. If they're
|
166
|
+
// present, it's a programming error:
|
167
|
+
assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
|
168
|
+
return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
|
184
169
|
}
|
185
170
|
|
186
|
-
static inline unsigned char peek_at(subject *subj, bufsize_t pos)
|
187
|
-
|
188
|
-
return subj->input.data[pos];
|
171
|
+
static inline unsigned char peek_at(subject *subj, bufsize_t pos) {
|
172
|
+
return subj->input.data[pos];
|
189
173
|
}
|
190
174
|
|
191
175
|
// Return true if there are more characters in the subject.
|
192
|
-
static inline int is_eof(subject*
|
193
|
-
|
194
|
-
return (subj->pos >= subj->input.len);
|
176
|
+
static inline int is_eof(subject *subj) {
|
177
|
+
return (subj->pos >= subj->input.len);
|
195
178
|
}
|
196
179
|
|
197
180
|
// Advance the subject. Doesn't check for eof.
|
198
181
|
#define advance(subj) (subj)->pos += 1
|
199
182
|
|
200
|
-
static inline bool
|
201
|
-
|
202
|
-
{
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
}
|
208
|
-
return skipped;
|
183
|
+
static inline bool skip_spaces(subject *subj) {
|
184
|
+
bool skipped = false;
|
185
|
+
while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
|
186
|
+
advance(subj);
|
187
|
+
skipped = true;
|
188
|
+
}
|
189
|
+
return skipped;
|
209
190
|
}
|
210
191
|
|
211
|
-
static inline bool
|
212
|
-
|
213
|
-
{
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
}
|
223
|
-
return seen_line_end_char || is_eof(subj);
|
192
|
+
static inline bool skip_line_end(subject *subj) {
|
193
|
+
bool seen_line_end_char = false;
|
194
|
+
if (peek_char(subj) == '\r') {
|
195
|
+
advance(subj);
|
196
|
+
seen_line_end_char = true;
|
197
|
+
}
|
198
|
+
if (peek_char(subj) == '\n') {
|
199
|
+
advance(subj);
|
200
|
+
seen_line_end_char = true;
|
201
|
+
}
|
202
|
+
return seen_line_end_char || is_eof(subj);
|
224
203
|
}
|
225
204
|
|
226
205
|
// Take characters while a predicate holds, and return a string.
|
227
|
-
static inline cmark_chunk take_while(subject*
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
bufsize_t len = 0;
|
206
|
+
static inline cmark_chunk take_while(subject *subj, int (*f)(int)) {
|
207
|
+
unsigned char c;
|
208
|
+
bufsize_t startpos = subj->pos;
|
209
|
+
bufsize_t len = 0;
|
232
210
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
211
|
+
while ((c = peek_char(subj)) && (*f)(c)) {
|
212
|
+
advance(subj);
|
213
|
+
len++;
|
214
|
+
}
|
237
215
|
|
238
|
-
|
216
|
+
return cmark_chunk_dup(&subj->input, startpos, len);
|
239
217
|
}
|
240
218
|
|
241
219
|
// Try to process a backtick code span that began with a
|
@@ -243,981 +221,949 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int))
|
|
243
221
|
// parsed). Return 0 if you don't find matching closing
|
244
222
|
// backticks, otherwise return the position in the subject
|
245
223
|
// after the closing backticks.
|
246
|
-
static bufsize_t scan_to_closing_backticks(subject*
|
247
|
-
{
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
224
|
+
static bufsize_t scan_to_closing_backticks(subject *subj,
|
225
|
+
bufsize_t openticklength) {
|
226
|
+
// read non backticks
|
227
|
+
unsigned char c;
|
228
|
+
while ((c = peek_char(subj)) && c != '`') {
|
229
|
+
advance(subj);
|
230
|
+
}
|
231
|
+
if (is_eof(subj)) {
|
232
|
+
return 0; // did not find closing ticks, return 0
|
233
|
+
}
|
234
|
+
bufsize_t numticks = 0;
|
235
|
+
while (peek_char(subj) == '`') {
|
236
|
+
advance(subj);
|
237
|
+
numticks++;
|
238
|
+
}
|
239
|
+
if (numticks != openticklength) {
|
240
|
+
return (scan_to_closing_backticks(subj, openticklength));
|
241
|
+
}
|
242
|
+
return (subj->pos);
|
265
243
|
}
|
266
244
|
|
267
245
|
// Parse backtick code section or raw backticks, return an inline.
|
268
246
|
// Assumes that the subject has a backtick at the current position.
|
269
|
-
static cmark_node*
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
247
|
+
static cmark_node *handle_backticks(subject *subj) {
|
248
|
+
cmark_chunk openticks = take_while(subj, isbacktick);
|
249
|
+
bufsize_t startpos = subj->pos;
|
250
|
+
bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
|
251
|
+
|
252
|
+
if (endpos == 0) { // not found
|
253
|
+
subj->pos = startpos; // rewind
|
254
|
+
return make_str(openticks);
|
255
|
+
} else {
|
256
|
+
cmark_strbuf buf = GH_BUF_INIT;
|
257
|
+
|
258
|
+
cmark_strbuf_set(&buf, subj->input.data + startpos,
|
259
|
+
endpos - startpos - openticks.len);
|
260
|
+
cmark_strbuf_trim(&buf);
|
261
|
+
cmark_strbuf_normalize_whitespace(&buf);
|
262
|
+
|
263
|
+
return make_code(cmark_chunk_buf_detach(&buf));
|
264
|
+
}
|
287
265
|
}
|
288
266
|
|
289
267
|
// Scan ***, **, or * and return number scanned, or 0.
|
290
268
|
// Advances position.
|
291
|
-
static int
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
}
|
352
|
-
return numdelims;
|
269
|
+
static int scan_delims(subject *subj, unsigned char c, bool *can_open,
|
270
|
+
bool *can_close) {
|
271
|
+
int numdelims = 0;
|
272
|
+
bufsize_t before_char_pos;
|
273
|
+
int32_t after_char = 0;
|
274
|
+
int32_t before_char = 0;
|
275
|
+
int len;
|
276
|
+
bool left_flanking, right_flanking;
|
277
|
+
|
278
|
+
if (subj->pos == 0) {
|
279
|
+
before_char = 10;
|
280
|
+
} else {
|
281
|
+
before_char_pos = subj->pos - 1;
|
282
|
+
// walk back to the beginning of the UTF_8 sequence:
|
283
|
+
while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
|
284
|
+
before_char_pos -= 1;
|
285
|
+
}
|
286
|
+
len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
|
287
|
+
subj->pos - before_char_pos, &before_char);
|
288
|
+
if (len == -1) {
|
289
|
+
before_char = 10;
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
if (c == '\'' || c == '"') {
|
294
|
+
numdelims++;
|
295
|
+
advance(subj); // limit to 1 delim for quotes
|
296
|
+
} else {
|
297
|
+
while (peek_char(subj) == c) {
|
298
|
+
numdelims++;
|
299
|
+
advance(subj);
|
300
|
+
}
|
301
|
+
}
|
302
|
+
|
303
|
+
len = cmark_utf8proc_iterate(subj->input.data + subj->pos,
|
304
|
+
subj->input.len - subj->pos, &after_char);
|
305
|
+
if (len == -1) {
|
306
|
+
after_char = 10;
|
307
|
+
}
|
308
|
+
left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
|
309
|
+
!(cmark_utf8proc_is_punctuation(after_char) &&
|
310
|
+
!cmark_utf8proc_is_space(before_char) &&
|
311
|
+
!cmark_utf8proc_is_punctuation(before_char));
|
312
|
+
right_flanking =
|
313
|
+
numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
|
314
|
+
!(cmark_utf8proc_is_punctuation(before_char) &&
|
315
|
+
!cmark_utf8proc_is_space(after_char) && !cmark_utf8proc_is_punctuation(after_char));
|
316
|
+
if (c == '_') {
|
317
|
+
*can_open = left_flanking &&
|
318
|
+
(!right_flanking || cmark_utf8proc_is_punctuation(before_char));
|
319
|
+
*can_close = right_flanking &&
|
320
|
+
(!left_flanking || cmark_utf8proc_is_punctuation(after_char));
|
321
|
+
} else if (c == '\'' || c == '"') {
|
322
|
+
*can_open = left_flanking && !right_flanking;
|
323
|
+
*can_close = right_flanking;
|
324
|
+
} else {
|
325
|
+
*can_open = left_flanking;
|
326
|
+
*can_close = right_flanking;
|
327
|
+
}
|
328
|
+
return numdelims;
|
353
329
|
}
|
354
330
|
|
355
331
|
/*
|
356
332
|
static void print_delimiters(subject *subj)
|
357
333
|
{
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
334
|
+
delimiter *delim;
|
335
|
+
delim = subj->last_delim;
|
336
|
+
while (delim != NULL) {
|
337
|
+
printf("Item at stack pos %p, text pos %d: %d %d %d next(%p)
|
338
|
+
prev(%p)\n",
|
339
|
+
(void*)delim, delim->position, delim->delim_char,
|
340
|
+
delim->can_open, delim->can_close,
|
341
|
+
(void*)delim->next, (void*)delim->previous);
|
342
|
+
delim = delim->previous;
|
343
|
+
}
|
367
344
|
}
|
368
345
|
*/
|
369
346
|
|
370
|
-
static void remove_delimiter(subject *subj, delimiter *delim)
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
347
|
+
static void remove_delimiter(subject *subj, delimiter *delim) {
|
348
|
+
if (delim == NULL)
|
349
|
+
return;
|
350
|
+
if (delim->next == NULL) {
|
351
|
+
// end of list:
|
352
|
+
assert(delim == subj->last_delim);
|
353
|
+
subj->last_delim = delim->previous;
|
354
|
+
} else {
|
355
|
+
delim->next->previous = delim->previous;
|
356
|
+
}
|
357
|
+
if (delim->previous != NULL) {
|
358
|
+
delim->previous->next = delim->next;
|
359
|
+
}
|
360
|
+
free(delim);
|
384
361
|
}
|
385
362
|
|
386
363
|
static void push_delimiter(subject *subj, unsigned char c, bool can_open,
|
387
|
-
bool can_close, cmark_node *inl_text)
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
delim->active = true;
|
405
|
-
subj->last_delim = delim;
|
364
|
+
bool can_close, cmark_node *inl_text) {
|
365
|
+
delimiter *delim = (delimiter *)malloc(sizeof(delimiter));
|
366
|
+
if (delim == NULL) {
|
367
|
+
return;
|
368
|
+
}
|
369
|
+
delim->delim_char = c;
|
370
|
+
delim->can_open = can_open;
|
371
|
+
delim->can_close = can_close;
|
372
|
+
delim->inl_text = inl_text;
|
373
|
+
delim->previous = subj->last_delim;
|
374
|
+
delim->next = NULL;
|
375
|
+
if (delim->previous != NULL) {
|
376
|
+
delim->previous->next = delim;
|
377
|
+
}
|
378
|
+
delim->position = subj->pos;
|
379
|
+
delim->active = true;
|
380
|
+
subj->last_delim = delim;
|
406
381
|
}
|
407
382
|
|
408
383
|
// Assumes the subject has a c at the current position.
|
409
|
-
static cmark_node*
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
return inl_text;
|
384
|
+
static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
|
385
|
+
bufsize_t numdelims;
|
386
|
+
cmark_node *inl_text;
|
387
|
+
bool can_open, can_close;
|
388
|
+
cmark_chunk contents;
|
389
|
+
|
390
|
+
numdelims = scan_delims(subj, c, &can_open, &can_close);
|
391
|
+
|
392
|
+
if (c == '\'' && smart) {
|
393
|
+
contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
|
394
|
+
} else if (c == '"' && smart) {
|
395
|
+
contents =
|
396
|
+
cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
|
397
|
+
} else {
|
398
|
+
contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
|
399
|
+
}
|
400
|
+
|
401
|
+
inl_text = make_str(contents);
|
402
|
+
|
403
|
+
if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
|
404
|
+
push_delimiter(subj, c, can_open, can_close, inl_text);
|
405
|
+
}
|
406
|
+
|
407
|
+
return inl_text;
|
434
408
|
}
|
435
409
|
|
436
410
|
// Assumes we have a hyphen at the current position.
|
437
|
-
static cmark_node*
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
return make_str(cmark_chunk_buf_detach(&buf));
|
411
|
+
static cmark_node *handle_hyphen(subject *subj, bool smart) {
|
412
|
+
int startpos = subj->pos;
|
413
|
+
|
414
|
+
advance(subj);
|
415
|
+
|
416
|
+
if (!smart || peek_char(subj) != '-') {
|
417
|
+
return make_str(cmark_chunk_literal("-"));
|
418
|
+
}
|
419
|
+
|
420
|
+
while (smart && peek_char(subj) == '-') {
|
421
|
+
advance(subj);
|
422
|
+
}
|
423
|
+
|
424
|
+
int numhyphens = subj->pos - startpos;
|
425
|
+
int en_count = 0;
|
426
|
+
int em_count = 0;
|
427
|
+
int i;
|
428
|
+
cmark_strbuf buf = GH_BUF_INIT;
|
429
|
+
|
430
|
+
if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
|
431
|
+
em_count = numhyphens / 3;
|
432
|
+
} else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
|
433
|
+
en_count = numhyphens / 2;
|
434
|
+
} else if (numhyphens % 3 == 2) { // use one en dash at end
|
435
|
+
en_count = 1;
|
436
|
+
em_count = (numhyphens - 2) / 3;
|
437
|
+
} else { // use two en dashes at the end
|
438
|
+
en_count = 2;
|
439
|
+
em_count = (numhyphens - 4) / 3;
|
440
|
+
}
|
441
|
+
|
442
|
+
for (i = em_count; i > 0; i--) {
|
443
|
+
cmark_strbuf_puts(&buf, EMDASH);
|
444
|
+
}
|
445
|
+
|
446
|
+
for (i = en_count; i > 0; i--) {
|
447
|
+
cmark_strbuf_puts(&buf, ENDASH);
|
448
|
+
}
|
449
|
+
|
450
|
+
return make_str(cmark_chunk_buf_detach(&buf));
|
478
451
|
}
|
479
452
|
|
480
453
|
// Assumes we have a period at the current position.
|
481
|
-
static cmark_node*
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
}
|
454
|
+
static cmark_node *handle_period(subject *subj, bool smart) {
|
455
|
+
advance(subj);
|
456
|
+
if (smart && peek_char(subj) == '.') {
|
457
|
+
advance(subj);
|
458
|
+
if (peek_char(subj) == '.') {
|
459
|
+
advance(subj);
|
460
|
+
return make_str(cmark_chunk_literal(ELLIPSES));
|
461
|
+
} else {
|
462
|
+
return make_str(cmark_chunk_literal(".."));
|
463
|
+
}
|
464
|
+
} else {
|
465
|
+
return make_str(cmark_chunk_literal("."));
|
466
|
+
}
|
495
467
|
}
|
496
468
|
|
497
|
-
static void process_emphasis(subject *subj, delimiter *stack_bottom)
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
}
|
574
|
-
}
|
575
|
-
// free all delimiters in list until stack_bottom:
|
576
|
-
while (subj->last_delim != stack_bottom) {
|
577
|
-
remove_delimiter(subj, subj->last_delim);
|
578
|
-
}
|
469
|
+
static void process_emphasis(subject *subj, delimiter *stack_bottom) {
|
470
|
+
delimiter *closer = subj->last_delim;
|
471
|
+
delimiter *opener;
|
472
|
+
delimiter *old_closer;
|
473
|
+
bool opener_found;
|
474
|
+
delimiter *openers_bottom[128];
|
475
|
+
|
476
|
+
// initialize openers_bottom:
|
477
|
+
openers_bottom['*'] = stack_bottom;
|
478
|
+
openers_bottom['_'] = stack_bottom;
|
479
|
+
openers_bottom['\''] = stack_bottom;
|
480
|
+
openers_bottom['"'] = stack_bottom;
|
481
|
+
|
482
|
+
// move back to first relevant delim.
|
483
|
+
while (closer != NULL && closer->previous != stack_bottom) {
|
484
|
+
closer = closer->previous;
|
485
|
+
}
|
486
|
+
|
487
|
+
// now move forward, looking for closers, and handling each
|
488
|
+
while (closer != NULL) {
|
489
|
+
if (closer->can_close &&
|
490
|
+
(closer->delim_char == '*' || closer->delim_char == '_' ||
|
491
|
+
closer->delim_char == '"' || closer->delim_char == '\'')) {
|
492
|
+
// Now look backwards for first matching opener:
|
493
|
+
opener = closer->previous;
|
494
|
+
opener_found = false;
|
495
|
+
while (opener != NULL && opener != stack_bottom &&
|
496
|
+
opener != openers_bottom[closer->delim_char]) {
|
497
|
+
if (opener->delim_char == closer->delim_char && opener->can_open) {
|
498
|
+
opener_found = true;
|
499
|
+
break;
|
500
|
+
}
|
501
|
+
opener = opener->previous;
|
502
|
+
}
|
503
|
+
old_closer = closer;
|
504
|
+
if (closer->delim_char == '*' || closer->delim_char == '_') {
|
505
|
+
if (opener_found) {
|
506
|
+
closer = S_insert_emph(subj, opener, closer);
|
507
|
+
} else {
|
508
|
+
closer = closer->next;
|
509
|
+
}
|
510
|
+
} else if (closer->delim_char == '\'') {
|
511
|
+
cmark_chunk_free(&closer->inl_text->as.literal);
|
512
|
+
closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
|
513
|
+
if (opener_found) {
|
514
|
+
cmark_chunk_free(&opener->inl_text->as.literal);
|
515
|
+
opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
|
516
|
+
}
|
517
|
+
closer = closer->next;
|
518
|
+
} else if (closer->delim_char == '"') {
|
519
|
+
cmark_chunk_free(&closer->inl_text->as.literal);
|
520
|
+
closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
|
521
|
+
if (opener_found) {
|
522
|
+
cmark_chunk_free(&opener->inl_text->as.literal);
|
523
|
+
opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
|
524
|
+
}
|
525
|
+
closer = closer->next;
|
526
|
+
}
|
527
|
+
if (!opener_found) {
|
528
|
+
// set lower bound for future searches for openers:
|
529
|
+
openers_bottom[old_closer->delim_char] = old_closer->previous;
|
530
|
+
if (!old_closer->can_open) {
|
531
|
+
// we can remove a closer that can't be an
|
532
|
+
// opener, once we've seen there's no
|
533
|
+
// matching opener:
|
534
|
+
remove_delimiter(subj, old_closer);
|
535
|
+
}
|
536
|
+
}
|
537
|
+
} else {
|
538
|
+
closer = closer->next;
|
539
|
+
}
|
540
|
+
}
|
541
|
+
// free all delimiters in list until stack_bottom:
|
542
|
+
while (subj->last_delim != stack_bottom) {
|
543
|
+
remove_delimiter(subj, subj->last_delim);
|
544
|
+
}
|
579
545
|
}
|
580
546
|
|
581
|
-
static delimiter*
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
return closer;
|
547
|
+
static delimiter *S_insert_emph(subject *subj, delimiter *opener,
|
548
|
+
delimiter *closer) {
|
549
|
+
delimiter *delim, *tmp_delim;
|
550
|
+
bufsize_t use_delims;
|
551
|
+
cmark_node *opener_inl = opener->inl_text;
|
552
|
+
cmark_node *closer_inl = closer->inl_text;
|
553
|
+
bufsize_t opener_num_chars = opener_inl->as.literal.len;
|
554
|
+
bufsize_t closer_num_chars = closer_inl->as.literal.len;
|
555
|
+
cmark_node *tmp, *emph, *first_child, *last_child;
|
556
|
+
|
557
|
+
// calculate the actual number of characters used from this closer
|
558
|
+
if (closer_num_chars < 3 || opener_num_chars < 3) {
|
559
|
+
use_delims = closer_num_chars <= opener_num_chars ? closer_num_chars
|
560
|
+
: opener_num_chars;
|
561
|
+
} else { // closer and opener both have >= 3 characters
|
562
|
+
use_delims = closer_num_chars % 2 == 0 ? 2 : 1;
|
563
|
+
}
|
564
|
+
|
565
|
+
// remove used characters from associated inlines.
|
566
|
+
opener_num_chars -= use_delims;
|
567
|
+
closer_num_chars -= use_delims;
|
568
|
+
opener_inl->as.literal.len = opener_num_chars;
|
569
|
+
closer_inl->as.literal.len = closer_num_chars;
|
570
|
+
|
571
|
+
// free delimiters between opener and closer
|
572
|
+
delim = closer->previous;
|
573
|
+
while (delim != NULL && delim != opener) {
|
574
|
+
tmp_delim = delim->previous;
|
575
|
+
remove_delimiter(subj, delim);
|
576
|
+
delim = tmp_delim;
|
577
|
+
}
|
578
|
+
|
579
|
+
first_child = opener_inl->next;
|
580
|
+
last_child = closer_inl->prev;
|
581
|
+
|
582
|
+
// if opener has 0 characters, remove it and its associated inline
|
583
|
+
if (opener_num_chars == 0) {
|
584
|
+
// replace empty opener inline with emph
|
585
|
+
cmark_chunk_free(&(opener_inl->as.literal));
|
586
|
+
emph = opener_inl;
|
587
|
+
emph->type = use_delims == 1 ? CMARK_NODE_EMPH : CMARK_NODE_STRONG;
|
588
|
+
// remove opener from list
|
589
|
+
remove_delimiter(subj, opener);
|
590
|
+
} else {
|
591
|
+
// create new emph or strong, and splice it in to our inlines
|
592
|
+
// between the opener and closer
|
593
|
+
emph = use_delims == 1 ? make_emph() : make_strong();
|
594
|
+
emph->parent = opener_inl->parent;
|
595
|
+
emph->prev = opener_inl;
|
596
|
+
opener_inl->next = emph;
|
597
|
+
}
|
598
|
+
|
599
|
+
// push children below emph
|
600
|
+
emph->next = closer_inl;
|
601
|
+
closer_inl->prev = emph;
|
602
|
+
emph->first_child = first_child;
|
603
|
+
emph->last_child = last_child;
|
604
|
+
|
605
|
+
// fix children pointers
|
606
|
+
first_child->prev = NULL;
|
607
|
+
last_child->next = NULL;
|
608
|
+
for (tmp = first_child; tmp != NULL; tmp = tmp->next) {
|
609
|
+
tmp->parent = emph;
|
610
|
+
}
|
611
|
+
|
612
|
+
// if closer has 0 characters, remove it and its associated inline
|
613
|
+
if (closer_num_chars == 0) {
|
614
|
+
// remove empty closer inline
|
615
|
+
cmark_node_free(closer_inl);
|
616
|
+
// remove closer from list
|
617
|
+
tmp_delim = closer->next;
|
618
|
+
remove_delimiter(subj, closer);
|
619
|
+
closer = tmp_delim;
|
620
|
+
}
|
621
|
+
|
622
|
+
return closer;
|
659
623
|
}
|
660
624
|
|
661
625
|
// Parse backslash-escape or just a backslash, returning an inline.
|
662
|
-
static cmark_node*
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
626
|
+
static cmark_node *handle_backslash(subject *subj) {
|
627
|
+
advance(subj);
|
628
|
+
unsigned char nextchar = peek_char(subj);
|
629
|
+
if (cmark_ispunct(
|
630
|
+
nextchar)) { // only ascii symbols and newline can be escaped
|
631
|
+
advance(subj);
|
632
|
+
return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
|
633
|
+
} else if (!is_eof(subj) && skip_line_end(subj)) {
|
634
|
+
return make_linebreak();
|
635
|
+
} else {
|
636
|
+
return make_str(cmark_chunk_literal("\\"));
|
637
|
+
}
|
674
638
|
}
|
675
639
|
|
676
640
|
// Parse an entity or a regular "&" string.
|
677
641
|
// Assumes the subject has an '&' character at the current position.
|
678
|
-
static cmark_node*
|
679
|
-
|
680
|
-
|
681
|
-
bufsize_t len;
|
642
|
+
static cmark_node *handle_entity(subject *subj) {
|
643
|
+
cmark_strbuf ent = GH_BUF_INIT;
|
644
|
+
bufsize_t len;
|
682
645
|
|
683
|
-
|
646
|
+
advance(subj);
|
684
647
|
|
685
|
-
|
686
|
-
|
687
|
-
subj->input.len - subj->pos
|
688
|
-
);
|
648
|
+
len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
|
649
|
+
subj->input.len - subj->pos);
|
689
650
|
|
690
|
-
|
691
|
-
|
651
|
+
if (len == 0)
|
652
|
+
return make_str(cmark_chunk_literal("&"));
|
692
653
|
|
693
|
-
|
694
|
-
|
654
|
+
subj->pos += len;
|
655
|
+
return make_str(cmark_chunk_buf_detach(&ent));
|
695
656
|
}
|
696
657
|
|
697
658
|
// Clean a URL: remove surrounding whitespace and surrounding <>,
|
698
659
|
// and remove \ that escape punctuation.
|
699
|
-
cmark_chunk cmark_clean_url(cmark_chunk *url)
|
700
|
-
|
701
|
-
cmark_strbuf buf = GH_BUF_INIT;
|
660
|
+
cmark_chunk cmark_clean_url(cmark_chunk *url) {
|
661
|
+
cmark_strbuf buf = GH_BUF_INIT;
|
702
662
|
|
703
|
-
|
663
|
+
cmark_chunk_trim(url);
|
704
664
|
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
665
|
+
if (url->len == 0) {
|
666
|
+
cmark_chunk result = CMARK_CHUNK_EMPTY;
|
667
|
+
return result;
|
668
|
+
}
|
709
669
|
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
670
|
+
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
|
671
|
+
houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
|
672
|
+
} else {
|
673
|
+
houdini_unescape_html_f(&buf, url->data, url->len);
|
674
|
+
}
|
715
675
|
|
716
|
-
|
717
|
-
|
676
|
+
cmark_strbuf_unescape(&buf);
|
677
|
+
return cmark_chunk_buf_detach(&buf);
|
718
678
|
}
|
719
679
|
|
720
|
-
cmark_chunk cmark_clean_title(cmark_chunk *title)
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
cmark_strbuf_unescape(&buf);
|
743
|
-
return cmark_chunk_buf_detach(&buf);
|
680
|
+
cmark_chunk cmark_clean_title(cmark_chunk *title) {
|
681
|
+
cmark_strbuf buf = GH_BUF_INIT;
|
682
|
+
unsigned char first, last;
|
683
|
+
|
684
|
+
if (title->len == 0) {
|
685
|
+
cmark_chunk result = CMARK_CHUNK_EMPTY;
|
686
|
+
return result;
|
687
|
+
}
|
688
|
+
|
689
|
+
first = title->data[0];
|
690
|
+
last = title->data[title->len - 1];
|
691
|
+
|
692
|
+
// remove surrounding quotes if any:
|
693
|
+
if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
|
694
|
+
(first == '"' && last == '"')) {
|
695
|
+
houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
|
696
|
+
} else {
|
697
|
+
houdini_unescape_html_f(&buf, title->data, title->len);
|
698
|
+
}
|
699
|
+
|
700
|
+
cmark_strbuf_unescape(&buf);
|
701
|
+
return cmark_chunk_buf_detach(&buf);
|
744
702
|
}
|
745
703
|
|
746
704
|
// Parse an autolink or HTML tag.
|
747
705
|
// Assumes the subject has a '<' character at the current position.
|
748
|
-
static cmark_node*
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
return make_str(cmark_chunk_literal("<"));
|
706
|
+
static cmark_node *handle_pointy_brace(subject *subj) {
|
707
|
+
bufsize_t matchlen = 0;
|
708
|
+
cmark_chunk contents;
|
709
|
+
|
710
|
+
advance(subj); // advance past first <
|
711
|
+
|
712
|
+
// first try to match a URL autolink
|
713
|
+
matchlen = scan_autolink_uri(&subj->input, subj->pos);
|
714
|
+
if (matchlen > 0) {
|
715
|
+
contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
|
716
|
+
subj->pos += matchlen;
|
717
|
+
|
718
|
+
return make_autolink(contents, 0);
|
719
|
+
}
|
720
|
+
|
721
|
+
// next try to match an email autolink
|
722
|
+
matchlen = scan_autolink_email(&subj->input, subj->pos);
|
723
|
+
if (matchlen > 0) {
|
724
|
+
contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
|
725
|
+
subj->pos += matchlen;
|
726
|
+
|
727
|
+
return make_autolink(contents, 1);
|
728
|
+
}
|
729
|
+
|
730
|
+
// finally, try to match an html tag
|
731
|
+
matchlen = scan_html_tag(&subj->input, subj->pos);
|
732
|
+
if (matchlen > 0) {
|
733
|
+
contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
|
734
|
+
subj->pos += matchlen;
|
735
|
+
return make_raw_html(contents);
|
736
|
+
}
|
737
|
+
|
738
|
+
// if nothing matches, just return the opening <:
|
739
|
+
return make_str(cmark_chunk_literal("<"));
|
783
740
|
}
|
784
741
|
|
785
742
|
// Parse a link label. Returns 1 if successful.
|
786
743
|
// Note: unescaped brackets are not allowed in labels.
|
787
744
|
// The label begins with `[` and ends with the first `]` character
|
788
745
|
// encountered. Backticks in labels do not start code spans.
|
789
|
-
static int link_label(subject*
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
746
|
+
static int link_label(subject *subj, cmark_chunk *raw_label) {
|
747
|
+
bufsize_t startpos = subj->pos;
|
748
|
+
int length = 0;
|
749
|
+
unsigned char c;
|
750
|
+
|
751
|
+
// advance past [
|
752
|
+
if (peek_char(subj) == '[') {
|
753
|
+
advance(subj);
|
754
|
+
} else {
|
755
|
+
return 0;
|
756
|
+
}
|
757
|
+
|
758
|
+
while ((c = peek_char(subj)) && c != '[' && c != ']') {
|
759
|
+
if (c == '\\') {
|
760
|
+
advance(subj);
|
761
|
+
length++;
|
762
|
+
if (cmark_ispunct(peek_char(subj))) {
|
763
|
+
advance(subj);
|
764
|
+
length++;
|
765
|
+
}
|
766
|
+
} else {
|
767
|
+
advance(subj);
|
768
|
+
length++;
|
769
|
+
}
|
770
|
+
if (length > MAX_LINK_LABEL_LENGTH) {
|
771
|
+
goto noMatch;
|
772
|
+
}
|
773
|
+
}
|
774
|
+
|
775
|
+
if (c == ']') { // match found
|
776
|
+
*raw_label =
|
777
|
+
cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
|
778
|
+
cmark_chunk_trim(raw_label);
|
779
|
+
advance(subj); // advance past ]
|
780
|
+
return 1;
|
781
|
+
}
|
825
782
|
|
826
783
|
noMatch:
|
827
|
-
|
828
|
-
|
829
|
-
|
784
|
+
subj->pos = startpos; // rewind
|
785
|
+
return 0;
|
830
786
|
}
|
831
787
|
|
832
788
|
// Return a link, an image, or a literal close bracket.
|
833
|
-
static cmark_node*
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
789
|
+
static cmark_node *handle_close_bracket(subject *subj, cmark_node *parent) {
|
790
|
+
bufsize_t initial_pos;
|
791
|
+
bufsize_t starturl, endurl, starttitle, endtitle, endall;
|
792
|
+
bufsize_t n;
|
793
|
+
bufsize_t sps;
|
794
|
+
cmark_reference *ref;
|
795
|
+
bool is_image = false;
|
796
|
+
cmark_chunk url_chunk, title_chunk;
|
797
|
+
cmark_chunk url, title;
|
798
|
+
delimiter *opener;
|
799
|
+
cmark_node *link_text;
|
800
|
+
cmark_node *inl;
|
801
|
+
cmark_chunk raw_label;
|
802
|
+
int found_label;
|
803
|
+
|
804
|
+
advance(subj); // advance past ]
|
805
|
+
initial_pos = subj->pos;
|
806
|
+
|
807
|
+
// look through list of delimiters for a [ or !
|
808
|
+
opener = subj->last_delim;
|
809
|
+
while (opener) {
|
810
|
+
if (opener->delim_char == '[' || opener->delim_char == '!') {
|
811
|
+
break;
|
812
|
+
}
|
813
|
+
opener = opener->previous;
|
814
|
+
}
|
815
|
+
|
816
|
+
if (opener == NULL) {
|
817
|
+
return make_str(cmark_chunk_literal("]"));
|
818
|
+
}
|
819
|
+
|
820
|
+
if (!opener->active) {
|
821
|
+
// take delimiter off stack
|
822
|
+
remove_delimiter(subj, opener);
|
823
|
+
return make_str(cmark_chunk_literal("]"));
|
824
|
+
}
|
825
|
+
|
826
|
+
// If we got here, we matched a potential link/image text.
|
827
|
+
is_image = opener->delim_char == '!';
|
828
|
+
link_text = opener->inl_text->next;
|
829
|
+
|
830
|
+
// Now we check to see if it's a link/image.
|
831
|
+
|
832
|
+
// First, look for an inline link.
|
833
|
+
if (peek_char(subj) == '(' &&
|
834
|
+
((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
|
835
|
+
((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
|
836
|
+
|
837
|
+
// try to parse an explicit link:
|
838
|
+
starturl = subj->pos + 1 + sps; // after (
|
839
|
+
endurl = starturl + n;
|
840
|
+
starttitle = endurl + scan_spacechars(&subj->input, endurl);
|
841
|
+
|
842
|
+
// ensure there are spaces btw url and title
|
843
|
+
endtitle = (starttitle == endurl)
|
844
|
+
? starttitle
|
845
|
+
: starttitle + scan_link_title(&subj->input, starttitle);
|
846
|
+
|
847
|
+
endall = endtitle + scan_spacechars(&subj->input, endtitle);
|
848
|
+
|
849
|
+
if (peek_at(subj, endall) == ')') {
|
850
|
+
subj->pos = endall + 1;
|
851
|
+
|
852
|
+
url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
|
853
|
+
title_chunk =
|
854
|
+
cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
|
855
|
+
url = cmark_clean_url(&url_chunk);
|
856
|
+
title = cmark_clean_title(&title_chunk);
|
857
|
+
cmark_chunk_free(&url_chunk);
|
858
|
+
cmark_chunk_free(&title_chunk);
|
859
|
+
goto match;
|
860
|
+
|
861
|
+
} else {
|
862
|
+
goto noMatch;
|
863
|
+
}
|
864
|
+
}
|
865
|
+
|
866
|
+
// Next, look for a following [link label] that matches in refmap.
|
867
|
+
// skip spaces
|
868
|
+
subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
|
869
|
+
raw_label = cmark_chunk_literal("");
|
870
|
+
found_label = link_label(subj, &raw_label);
|
871
|
+
if (!found_label || raw_label.len == 0) {
|
872
|
+
cmark_chunk_free(&raw_label);
|
873
|
+
raw_label = cmark_chunk_dup(&subj->input, opener->position,
|
874
|
+
initial_pos - opener->position - 1);
|
875
|
+
}
|
876
|
+
|
877
|
+
if (!found_label) {
|
878
|
+
// If we have a shortcut reference link, back up
|
879
|
+
// to before the spacse we skipped.
|
880
|
+
subj->pos = initial_pos;
|
881
|
+
}
|
882
|
+
|
883
|
+
ref = cmark_reference_lookup(subj->refmap, &raw_label);
|
884
|
+
cmark_chunk_free(&raw_label);
|
885
|
+
|
886
|
+
if (ref != NULL) { // found
|
887
|
+
url = chunk_clone(&ref->url);
|
888
|
+
title = chunk_clone(&ref->title);
|
889
|
+
goto match;
|
890
|
+
} else {
|
891
|
+
goto noMatch;
|
892
|
+
}
|
936
893
|
|
937
894
|
noMatch:
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
895
|
+
// If we fall through to here, it means we didn't match a link:
|
896
|
+
remove_delimiter(subj, opener); // remove this opener from delimiter list
|
897
|
+
subj->pos = initial_pos;
|
898
|
+
return make_str(cmark_chunk_literal("]"));
|
942
899
|
|
943
900
|
match:
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
901
|
+
inl = opener->inl_text;
|
902
|
+
inl->type = is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK;
|
903
|
+
cmark_chunk_free(&inl->as.literal);
|
904
|
+
inl->first_child = link_text;
|
905
|
+
process_emphasis(subj, opener);
|
906
|
+
inl->as.link.url = url;
|
907
|
+
inl->as.link.title = title;
|
908
|
+
inl->next = NULL;
|
909
|
+
if (link_text) {
|
910
|
+
cmark_node *tmp;
|
911
|
+
link_text->prev = NULL;
|
912
|
+
for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) {
|
913
|
+
tmp->parent = inl;
|
914
|
+
}
|
915
|
+
tmp->parent = inl;
|
916
|
+
inl->last_child = tmp;
|
917
|
+
}
|
918
|
+
parent->last_child = inl;
|
919
|
+
|
920
|
+
// Now, if we have a link, we also want to deactivate earlier link
|
921
|
+
// delimiters. (This code can be removed if we decide to allow links
|
922
|
+
// inside links.)
|
923
|
+
remove_delimiter(subj, opener);
|
924
|
+
if (!is_image) {
|
925
|
+
opener = subj->last_delim;
|
926
|
+
while (opener != NULL) {
|
927
|
+
if (opener->delim_char == '[') {
|
928
|
+
if (!opener->active) {
|
929
|
+
break;
|
930
|
+
} else {
|
931
|
+
opener->active = false;
|
932
|
+
}
|
933
|
+
}
|
934
|
+
opener = opener->previous;
|
935
|
+
}
|
936
|
+
}
|
937
|
+
|
938
|
+
return NULL;
|
982
939
|
}
|
983
940
|
|
984
941
|
// Parse a hard or soft linebreak, returning an inline.
|
985
|
-
// Assumes the subject has a newline at the current position.
|
986
|
-
static cmark_node*
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
942
|
+
// Assumes the subject has a cr or newline at the current position.
|
943
|
+
static cmark_node *handle_newline(subject *subj) {
|
944
|
+
bufsize_t nlpos = subj->pos;
|
945
|
+
// skip over cr, crlf, or lf:
|
946
|
+
if (peek_at(subj, subj->pos) == '\r') {
|
947
|
+
advance(subj);
|
948
|
+
}
|
949
|
+
if (peek_at(subj, subj->pos) == '\n') {
|
950
|
+
advance(subj);
|
951
|
+
}
|
952
|
+
// skip spaces at beginning of line
|
953
|
+
skip_spaces(subj);
|
954
|
+
if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
|
955
|
+
peek_at(subj, nlpos - 2) == ' ') {
|
956
|
+
return make_linebreak();
|
957
|
+
} else {
|
958
|
+
return make_softbreak();
|
959
|
+
}
|
1000
960
|
}
|
1001
961
|
|
1002
|
-
static bufsize_t subject_find_special_char(subject *subj, int options)
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
bufsize_t n = subj->pos + 1;
|
1045
|
-
|
1046
|
-
while (n < subj->input.len) {
|
1047
|
-
if (SPECIAL_CHARS[subj->input.data[n]])
|
1048
|
-
return n;
|
1049
|
-
if (options & CMARK_OPT_SMART &&
|
1050
|
-
SMART_PUNCT_CHARS[subj->input.data[n]])
|
1051
|
-
return n;
|
1052
|
-
n++;
|
1053
|
-
}
|
1054
|
-
|
1055
|
-
return subj->input.len;
|
962
|
+
static bufsize_t subject_find_special_char(subject *subj, int options) {
|
963
|
+
// "\r\n\\`&_*[]<!"
|
964
|
+
static const int8_t SPECIAL_CHARS[256] = {
|
965
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
966
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
|
967
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
968
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
|
969
|
+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
970
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
971
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
972
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
973
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
974
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
975
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
976
|
+
|
977
|
+
// " ' . -
|
978
|
+
static const char SMART_PUNCT_CHARS[] = {
|
979
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
980
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
|
981
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
982
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
983
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
984
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
985
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
986
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
987
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
988
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
989
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
990
|
+
};
|
991
|
+
|
992
|
+
bufsize_t n = subj->pos + 1;
|
993
|
+
|
994
|
+
while (n < subj->input.len) {
|
995
|
+
if (SPECIAL_CHARS[subj->input.data[n]])
|
996
|
+
return n;
|
997
|
+
if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
|
998
|
+
return n;
|
999
|
+
n++;
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
return subj->input.len;
|
1056
1003
|
}
|
1057
1004
|
|
1058
1005
|
// Parse an inline, advancing subject, and add it as a child of parent.
|
1059
1006
|
// Return 0 if no inline can be parsed, 1 otherwise.
|
1060
|
-
static int parse_inline(subject*
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
return 1;
|
1007
|
+
static int parse_inline(subject *subj, cmark_node *parent, int options) {
|
1008
|
+
cmark_node *new_inl = NULL;
|
1009
|
+
cmark_chunk contents;
|
1010
|
+
unsigned char c;
|
1011
|
+
bufsize_t endpos;
|
1012
|
+
c = peek_char(subj);
|
1013
|
+
if (c == 0) {
|
1014
|
+
return 0;
|
1015
|
+
}
|
1016
|
+
switch (c) {
|
1017
|
+
case '\r':
|
1018
|
+
case '\n':
|
1019
|
+
new_inl = handle_newline(subj);
|
1020
|
+
break;
|
1021
|
+
case '`':
|
1022
|
+
new_inl = handle_backticks(subj);
|
1023
|
+
break;
|
1024
|
+
case '\\':
|
1025
|
+
new_inl = handle_backslash(subj);
|
1026
|
+
break;
|
1027
|
+
case '&':
|
1028
|
+
new_inl = handle_entity(subj);
|
1029
|
+
break;
|
1030
|
+
case '<':
|
1031
|
+
new_inl = handle_pointy_brace(subj);
|
1032
|
+
break;
|
1033
|
+
case '*':
|
1034
|
+
case '_':
|
1035
|
+
case '\'':
|
1036
|
+
case '"':
|
1037
|
+
new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
|
1038
|
+
break;
|
1039
|
+
case '-':
|
1040
|
+
new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
|
1041
|
+
break;
|
1042
|
+
case '.':
|
1043
|
+
new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
|
1044
|
+
break;
|
1045
|
+
case '[':
|
1046
|
+
advance(subj);
|
1047
|
+
new_inl = make_str(cmark_chunk_literal("["));
|
1048
|
+
push_delimiter(subj, '[', true, false, new_inl);
|
1049
|
+
break;
|
1050
|
+
case ']':
|
1051
|
+
new_inl = handle_close_bracket(subj, parent);
|
1052
|
+
break;
|
1053
|
+
case '!':
|
1054
|
+
advance(subj);
|
1055
|
+
if (peek_char(subj) == '[') {
|
1056
|
+
advance(subj);
|
1057
|
+
new_inl = make_str(cmark_chunk_literal("!["));
|
1058
|
+
push_delimiter(subj, '!', false, true, new_inl);
|
1059
|
+
} else {
|
1060
|
+
new_inl = make_str(cmark_chunk_literal("!"));
|
1061
|
+
}
|
1062
|
+
break;
|
1063
|
+
default:
|
1064
|
+
endpos = subject_find_special_char(subj, options);
|
1065
|
+
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
|
1066
|
+
subj->pos = endpos;
|
1067
|
+
|
1068
|
+
// if we're at a newline, strip trailing spaces.
|
1069
|
+
if (S_is_line_end_char(peek_char(subj))) {
|
1070
|
+
cmark_chunk_rtrim(&contents);
|
1071
|
+
}
|
1072
|
+
|
1073
|
+
new_inl = make_str(contents);
|
1074
|
+
}
|
1075
|
+
if (new_inl != NULL) {
|
1076
|
+
cmark_node_append_child(parent, new_inl);
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
return 1;
|
1134
1080
|
}
|
1135
1081
|
|
1136
1082
|
// Parse inlines from parent's string_content, adding as children of parent.
|
1137
|
-
extern void cmark_parse_inlines(cmark_node*
|
1138
|
-
{
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1083
|
+
extern void cmark_parse_inlines(cmark_node *parent, cmark_reference_map *refmap,
|
1084
|
+
int options) {
|
1085
|
+
subject subj;
|
1086
|
+
subject_from_buf(&subj, &parent->string_content, refmap);
|
1087
|
+
cmark_chunk_rtrim(&subj.input);
|
1142
1088
|
|
1143
|
-
|
1089
|
+
while (!is_eof(&subj) && parse_inline(&subj, parent, options))
|
1090
|
+
;
|
1144
1091
|
|
1145
|
-
|
1092
|
+
process_emphasis(&subj, NULL);
|
1146
1093
|
}
|
1147
1094
|
|
1148
1095
|
// Parse zero or more space characters, including at most one newline.
|
1149
|
-
static void spnl(subject*
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
}
|
1096
|
+
static void spnl(subject *subj) {
|
1097
|
+
skip_spaces(subj);
|
1098
|
+
if (skip_line_end(subj)) {
|
1099
|
+
skip_spaces(subj);
|
1100
|
+
}
|
1155
1101
|
}
|
1156
1102
|
|
1157
1103
|
// Parse reference. Assumes string begins with '[' character.
|
1158
1104
|
// Modify refmap if a reference is encountered.
|
1159
1105
|
// Return 0 if no reference found, otherwise position of subject
|
1160
1106
|
// after reference is parsed.
|
1161
|
-
bufsize_t cmark_parse_reference_inline(cmark_strbuf *input,
|
1162
|
-
{
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1107
|
+
bufsize_t cmark_parse_reference_inline(cmark_strbuf *input,
|
1108
|
+
cmark_reference_map *refmap) {
|
1109
|
+
subject subj;
|
1110
|
+
|
1111
|
+
cmark_chunk lab;
|
1112
|
+
cmark_chunk url;
|
1113
|
+
cmark_chunk title;
|
1114
|
+
|
1115
|
+
bufsize_t matchlen = 0;
|
1116
|
+
bufsize_t beforetitle;
|
1117
|
+
|
1118
|
+
subject_from_buf(&subj, input, NULL);
|
1119
|
+
|
1120
|
+
// parse label:
|
1121
|
+
if (!link_label(&subj, &lab) || lab.len == 0)
|
1122
|
+
return 0;
|
1123
|
+
|
1124
|
+
// colon:
|
1125
|
+
if (peek_char(&subj) == ':') {
|
1126
|
+
advance(&subj);
|
1127
|
+
} else {
|
1128
|
+
return 0;
|
1129
|
+
}
|
1130
|
+
|
1131
|
+
// parse link url:
|
1132
|
+
spnl(&subj);
|
1133
|
+
matchlen = scan_link_url(&subj.input, subj.pos);
|
1134
|
+
if (matchlen) {
|
1135
|
+
url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
|
1136
|
+
subj.pos += matchlen;
|
1137
|
+
} else {
|
1138
|
+
return 0;
|
1139
|
+
}
|
1140
|
+
|
1141
|
+
// parse optional link_title
|
1142
|
+
beforetitle = subj.pos;
|
1143
|
+
spnl(&subj);
|
1144
|
+
matchlen = scan_link_title(&subj.input, subj.pos);
|
1145
|
+
if (matchlen) {
|
1146
|
+
title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
|
1147
|
+
subj.pos += matchlen;
|
1148
|
+
} else {
|
1149
|
+
subj.pos = beforetitle;
|
1150
|
+
title = cmark_chunk_literal("");
|
1151
|
+
}
|
1152
|
+
|
1153
|
+
// parse final spaces and newline:
|
1154
|
+
skip_spaces(&subj);
|
1155
|
+
if (!skip_line_end(&subj)) {
|
1156
|
+
if (matchlen) { // try rewinding before title
|
1157
|
+
subj.pos = beforetitle;
|
1158
|
+
skip_spaces(&subj);
|
1159
|
+
if (!skip_line_end(&subj)) {
|
1160
|
+
return 0;
|
1161
|
+
}
|
1162
|
+
} else {
|
1163
|
+
return 0;
|
1164
|
+
}
|
1165
|
+
}
|
1166
|
+
// insert reference into refmap
|
1167
|
+
cmark_reference_create(refmap, &lab, &url, &title);
|
1168
|
+
return subj.pos;
|
1223
1169
|
}
|