redcarpet 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of redcarpet might be problematic. Click here for more details.

data/ext/buffer.h ADDED
@@ -0,0 +1,147 @@
1
+ /* buffer.h - automatic buffer structure */
2
+
3
+ /*
4
+ * Copyright (c) 2008, Natacha Porté
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ */
18
+
19
+ #ifndef LITHIUM_BUFFER_H
20
+ #define LITHIUM_BUFFER_H
21
+
22
+ #include <stddef.h>
23
+
24
+
25
+ /********************
26
+ * TYPE DEFINITIONS *
27
+ ********************/
28
+
29
+ /* struct buf • character array buffer */
30
+ struct buf {
31
+ char * data; /* actual character data */
32
+ size_t size; /* size of the string */
33
+ size_t asize; /* allocated size (0 = volatile buffer) */
34
+ size_t unit; /* reallocation unit size (0 = read-only buffer) */
35
+ int ref; }; /* reference count */
36
+
37
+
38
+
39
+ /**********
40
+ * MACROS *
41
+ **********/
42
+
43
+ /* CONST_BUF • global buffer from a string litteral */
44
+ #define CONST_BUF(name, string) \
45
+ static struct buf name = { string, sizeof string -1, sizeof string }
46
+
47
+
48
+ /* VOLATILE_BUF • macro for creating a volatile buffer on the stack */
49
+ #define VOLATILE_BUF(name, strname) \
50
+ struct buf name = { strname, strlen(strname) }
51
+
52
+
53
+ /* BUFPUTSL • optimized bufputs of a string litteral */
54
+ #define BUFPUTSL(output, litteral) \
55
+ bufput(output, litteral, sizeof litteral - 1)
56
+
57
+
58
+
59
+ /********************
60
+ * BUFFER FUNCTIONS *
61
+ ********************/
62
+
63
+ /* bufcasecmp • case-insensitive buffer comparison */
64
+ int
65
+ bufcasecmp(const struct buf *, const struct buf *);
66
+
67
+ /* bufcmp • case-sensitive buffer comparison */
68
+ int
69
+ bufcmp(const struct buf *, const struct buf *);
70
+
71
+ /* bufcmps • case-sensitive comparison of a string to a buffer */
72
+ int
73
+ bufcmps(const struct buf *, const char *);
74
+
75
+ /* bufprefix * compare the beggining of a buffer with a string */
76
+ int
77
+ bufprefix(const struct buf *buf, const char *prefix);
78
+
79
+ /* bufdup • buffer duplication */
80
+ struct buf *
81
+ bufdup(const struct buf *, size_t)
82
+ __attribute__ ((malloc));
83
+
84
+ /* bufgrow • increasing the allocated size to the given value */
85
+ int
86
+ bufgrow(struct buf *, size_t);
87
+
88
+ /* bufnew • allocation of a new buffer */
89
+ struct buf *
90
+ bufnew(size_t)
91
+ __attribute__ ((malloc));
92
+
93
+ /* bufnullterm • NUL-termination of the string array (making a C-string) */
94
+ void
95
+ bufnullterm(struct buf *);
96
+
97
+ /* bufprintf • formatted printing to a buffer */
98
+ void
99
+ bufprintf(struct buf *, const char *, ...)
100
+ __attribute__ ((format (printf, 2, 3)));
101
+
102
+ /* bufput • appends raw data to a buffer */
103
+ void
104
+ bufput(struct buf *, const void*, size_t);
105
+
106
+ /* bufputs • appends a NUL-terminated string to a buffer */
107
+ void
108
+ bufputs(struct buf *, const char*);
109
+
110
+ /* bufputc • appends a single char to a buffer */
111
+ void
112
+ bufputc(struct buf *, char);
113
+
114
+ /* bufrelease • decrease the reference count and free the buffer if needed */
115
+ void
116
+ bufrelease(struct buf *);
117
+
118
+ /* bufreset • frees internal data of the buffer */
119
+ void
120
+ bufreset(struct buf *);
121
+
122
+ /* bufset • safely assigns a buffer to another */
123
+ void
124
+ bufset(struct buf **, struct buf *);
125
+
126
+ /* bufslurp • removes a given number of bytes from the head of the array */
127
+ void
128
+ bufslurp(struct buf *, size_t);
129
+
130
+ /* buftoi • converts the numbers at the beginning of the buf into an int */
131
+ int
132
+ buftoi(struct buf *, size_t, size_t *);
133
+
134
+
135
+
136
+ #ifdef BUFFER_STDARG
137
+ #include <stdarg.h>
138
+
139
+ /* vbufprintf • stdarg variant of formatted printing into a buffer */
140
+ void
141
+ vbufprintf(struct buf *, const char*, va_list);
142
+
143
+ #endif /* def BUFFER_STDARG */
144
+
145
+ #endif /* ndef LITHIUM_BUFFER_H */
146
+
147
+ /* vim: set filetype=c: */
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+
3
+ dir_config('redcarpet')
4
+ create_makefile('redcarpet')
data/ext/markdown.c ADDED
@@ -0,0 +1,1590 @@
1
+ /* markdown.c - generic markdown parser */
2
+
3
+ /*
4
+ * Copyright (c) 2009, Natacha Porté
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ */
18
+
19
+ #include "markdown.h"
20
+
21
+ #include "array.h"
22
+
23
+ #include <assert.h>
24
+ #include <string.h>
25
+ #include <strings.h> /* for strncasecmp */
26
+
27
+ #define TEXT_UNIT 64 /* unit for the copy of the input buffer */
28
+ #define WORK_UNIT 64 /* block-level working buffer */
29
+
30
+ #define MKD_LI_END 8 /* internal list flag */
31
+
32
+ /***************
33
+ * LOCAL TYPES *
34
+ ***************/
35
+
36
+ /* link_ref • reference to a link */
37
+ struct link_ref {
38
+ struct buf * id;
39
+ struct buf * link;
40
+ struct buf * title; };
41
+
42
+
43
+ /* char_trigger • function pointer to render active chars */
44
+ /* returns the number of chars taken care of */
45
+ /* data is the pointer of the beginning of the span */
46
+ /* offset is the number of valid chars before data */
47
+ struct render;
48
+ typedef size_t
49
+ (*char_trigger)(struct buf *ob, struct render *rndr,
50
+ char *data, size_t offset, size_t size);
51
+
52
+
53
+ /* render • structure containing one particular render */
54
+ struct render {
55
+ struct mkd_renderer make;
56
+ struct array refs;
57
+ char_trigger active_char[256];
58
+ struct parray work;
59
+ };
60
+
61
+
62
+ /* html_tag • structure for quick HTML tag search (inspired from discount) */
63
+ struct html_tag {
64
+ const char *text;
65
+ size_t size;
66
+ };
67
+
68
+
69
+
70
+ /********************
71
+ * GLOBAL VARIABLES *
72
+ ********************/
73
+
74
+ /* block_tags • recognised block tags, sorted by cmp_html_tag */
75
+ static struct html_tag block_tags[] = {
76
+ /*0*/ { "p", 1 },
77
+ { "dl", 2 },
78
+ { "h1", 2 },
79
+ { "h2", 2 },
80
+ { "h3", 2 },
81
+ { "h4", 2 },
82
+ { "h5", 2 },
83
+ { "h6", 2 },
84
+ { "ol", 2 },
85
+ { "ul", 2 },
86
+ /*10*/ { "del", 3 },
87
+ { "div", 3 },
88
+ /*12*/ { "ins", 3 },
89
+ { "pre", 3 },
90
+ { "form", 4 },
91
+ { "math", 4 },
92
+ { "table", 5 },
93
+ { "iframe", 6 },
94
+ { "script", 6 },
95
+ { "fieldset", 8 },
96
+ { "noscript", 8 },
97
+ { "blockquote", 10 } };
98
+
99
+ #define INS_TAG (block_tags + 12)
100
+ #define DEL_TAG (block_tags + 10)
101
+
102
+
103
+
104
+ /***************************
105
+ * STATIC HELPER FUNCTIONS *
106
+ ***************************/
107
+
108
+ /* cmp_link_ref • comparison function for link_ref sorted arrays */
109
+ static int
110
+ cmp_link_ref(void *key, void *array_entry) {
111
+ struct link_ref *lr = array_entry;
112
+ return bufcasecmp(key, lr->id); }
113
+
114
+
115
+ /* cmp_link_ref_sort • comparison function for link_ref qsort */
116
+ static int
117
+ cmp_link_ref_sort(const void *a, const void *b) {
118
+ const struct link_ref *lra = a;
119
+ const struct link_ref *lrb = b;
120
+ return bufcasecmp(lra->id, lrb->id); }
121
+
122
+
123
+ /* cmp_html_tag • comparison function for bsearch() (stolen from discount) */
124
+ static int
125
+ cmp_html_tag(const void *a, const void *b) {
126
+ const struct html_tag *hta = a;
127
+ const struct html_tag *htb = b;
128
+ if (hta->size != htb->size) return (int)((ssize_t)hta->size - (ssize_t)htb->size);
129
+ return strncasecmp(hta->text, htb->text, hta->size); }
130
+
131
+
132
+ /* find_block_tag • returns the current block tag */
133
+ static struct html_tag *
134
+ find_block_tag(char *data, size_t size) {
135
+ size_t i = 0;
136
+ struct html_tag key;
137
+
138
+ /* looking for the word end */
139
+ while (i < size && ((data[i] >= '0' && data[i] <= '9')
140
+ || (data[i] >= 'A' && data[i] <= 'Z')
141
+ || (data[i] >= 'a' && data[i] <= 'z')))
142
+ i += 1;
143
+ if (i >= size) return 0;
144
+
145
+ /* binary search of the tag */
146
+ key.text = data;
147
+ key.size = i;
148
+ return bsearch(&key, block_tags,
149
+ sizeof block_tags / sizeof block_tags[0],
150
+ sizeof block_tags[0], cmp_html_tag); }
151
+
152
+
153
+
154
+ /****************************
155
+ * INLINE PARSING FUNCTIONS *
156
+ ****************************/
157
+
158
+ /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
159
+ /* this is less strict than the original markdown e-mail address matching */
160
+ static size_t
161
+ is_mail_autolink(char *data, size_t size) {
162
+ size_t i = 0, nb = 0;
163
+ /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
164
+ while (i < size && (data[i] == '-' || data[i] == '.'
165
+ || data[i] == '_' || data[i] == '@'
166
+ || (data[i] >= 'a' && data[i] <= 'z')
167
+ || (data[i] >= 'A' && data[i] <= 'Z')
168
+ || (data[i] >= '0' && data[i] <= '9'))) {
169
+ if (data[i] == '@') nb += 1;
170
+ i += 1; }
171
+ if (i >= size || data[i] != '>' || nb != 1) return 0;
172
+ return i + 1; }
173
+
174
+
175
+ /* tag_length • returns the length of the given tag, or 0 is it's not valid */
176
+ static size_t
177
+ tag_length(char *data, size_t size, enum mkd_autolink *autolink) {
178
+ size_t i, j;
179
+
180
+ /* a valid tag can't be shorter than 3 chars */
181
+ if (size < 3) return 0;
182
+
183
+ /* begins with a '<' optionally followed by '/', followed by letter */
184
+ if (data[0] != '<') return 0;
185
+ i = (data[1] == '/') ? 2 : 1;
186
+ if ((data[i] < 'a' || data[i] > 'z')
187
+ && (data[i] < 'A' || data[i] > 'Z')) return 0;
188
+
189
+ /* scheme test */
190
+ *autolink = MKDA_NOT_AUTOLINK;
191
+
192
+ /* try to find the beggining of an URI */
193
+ while (i < size && (isalpha(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
194
+ i++;
195
+
196
+ if (i > 2 && data[i] == ':') {
197
+ *autolink = MKDA_NORMAL;
198
+ i++;
199
+ }
200
+
201
+ /*
202
+ * FIXME: check for double slashes after the URI id?
203
+ * There are some protocols that don't have them, e.g.
204
+ * news:resource
205
+ */
206
+
207
+ /* completing autolink test: no whitespace or ' or " */
208
+ if (i >= size || i == '>')
209
+ *autolink = MKDA_NOT_AUTOLINK;
210
+ else if (*autolink) {
211
+ j = i;
212
+ while (i < size && data[i] != '>' && data[i] != '\''
213
+ && data[i] != '"' && data[i] != ' ' && data[i] != '\t'
214
+ && data[i] != '\t')
215
+ i += 1;
216
+ if (i >= size) return 0;
217
+ if (i > j && data[i] == '>') return i + 1;
218
+ /* one of the forbidden chars has been found */
219
+ *autolink = MKDA_NOT_AUTOLINK; }
220
+ else if ((j = is_mail_autolink(data + i, size - i)) != 0) {
221
+ *autolink = (i == 8)
222
+ ? MKDA_EXPLICIT_EMAIL : MKDA_IMPLICIT_EMAIL;
223
+ return i + j; }
224
+
225
+ /* looking for sometinhg looking like a tag end */
226
+ while (i < size && data[i] != '>') i += 1;
227
+ if (i >= size) return 0;
228
+ return i + 1; }
229
+
230
+
231
+ /* parse_inline • parses inline markdown elements */
232
+ static void
233
+ parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size) {
234
+ size_t i = 0, end = 0;
235
+ char_trigger action = 0;
236
+ struct buf work = { 0, 0, 0, 0, 0 };
237
+
238
+ while (i < size) {
239
+ /* copying inactive chars into the output */
240
+ while (end < size
241
+ && (action = rndr->active_char[(unsigned char)data[end]]) == 0)
242
+ end += 1;
243
+ if (rndr->make.normal_text) {
244
+ work.data = data + i;
245
+ work.size = end - i;
246
+ rndr->make.normal_text(ob, &work, &rndr->make.render_options);
247
+ }
248
+ else
249
+ bufput(ob, data + i, end - i);
250
+ if (end >= size) break;
251
+ i = end;
252
+
253
+ /* calling the trigger */
254
+ end = action(ob, rndr, data + i, i, size - i);
255
+ if (!end) /* no action from the callback */
256
+ end = i + 1;
257
+ else {
258
+ i += end;
259
+ end = i; } } }
260
+
261
+
262
+ /* find_emph_char • looks for the next emph char, skipping other constructs */
263
+ static size_t
264
+ find_emph_char(char *data, size_t size, char c) {
265
+ size_t i = 1;
266
+
267
+ while (i < size) {
268
+ while (i < size && data[i] != c
269
+ && data[i] != '`' && data[i] != '[')
270
+ i += 1;
271
+ if (data[i] == c) return i;
272
+
273
+ /* not counting escaped chars */
274
+ if (i && data[i - 1] == '\\') { i += 1; continue; }
275
+
276
+ /* skipping a code span */
277
+ if (data[i] == '`') {
278
+ size_t tmp_i = 0;
279
+ i += 1;
280
+ while (i < size && data[i] != '`') {
281
+ if (!tmp_i && data[i] == c) tmp_i = i;
282
+ i += 1; }
283
+ if (i >= size) return tmp_i;
284
+ i += 1; }
285
+
286
+ /* skipping a link */
287
+ else if (data[i] == '[') {
288
+ size_t tmp_i = 0;
289
+ char cc;
290
+ i += 1;
291
+ while (i < size && data[i] != ']') {
292
+ if (!tmp_i && data[i] == c) tmp_i = i;
293
+ i += 1; }
294
+ i += 1;
295
+ while (i < size && (data[i] == ' '
296
+ || data[i] == '\t' || data[i] == '\n'))
297
+ i += 1;
298
+ if (i >= size) return tmp_i;
299
+ if (data[i] != '[' && data[i] != '(') { /* not a link*/
300
+ if (tmp_i) return tmp_i;
301
+ else continue; }
302
+ cc = data[i];
303
+ i += 1;
304
+ while (i < size && data[i] != cc) {
305
+ if (!tmp_i && data[i] == c) tmp_i = i;
306
+ i += 1; }
307
+ if (i >= size) return tmp_i;
308
+ i += 1; } }
309
+ return 0; }
310
+
311
+
312
+ /* parse_emph1 • parsing single emphase */
313
+ /* closed by a symbol not preceded by whitespace and not followed by symbol */
314
+ static size_t
315
+ parse_emph1(struct buf *ob, struct render *rndr,
316
+ char *data, size_t size, char c) {
317
+ size_t i = 0, len;
318
+ struct buf *work = 0;
319
+ int r;
320
+
321
+ if (!rndr->make.emphasis) return 0;
322
+
323
+ /* skipping one symbol if coming from emph3 */
324
+ if (size > 1 && data[0] == c && data[1] == c) i = 1;
325
+
326
+ while (i < size) {
327
+ len = find_emph_char(data + i, size - i, c);
328
+ if (!len) return 0;
329
+ i += len;
330
+ if (i >= size) return 0;
331
+
332
+ if (i + 1 < size && data[i + 1] == c) {
333
+ i += 1;
334
+ continue;
335
+ }
336
+
337
+ if (data[i] == c && !isspace(data[i - 1])) {
338
+
339
+ if ((rndr->make.parser_options.flags & PARSER_STRICT) == 0) {
340
+ if (!(i + 1 == size || isspace(data[i + 1]) || ispunct(data[i + 1])))
341
+ continue;
342
+ }
343
+
344
+ if (rndr->work.size < rndr->work.asize) {
345
+ work = rndr->work.item[rndr->work.size ++];
346
+ work->size = 0;
347
+ } else {
348
+ work = bufnew(WORK_UNIT);
349
+ parr_push(&rndr->work, work);
350
+ }
351
+
352
+ parse_inline(work, rndr, data, i);
353
+ r = rndr->make.emphasis(ob, work, c, &rndr->make.render_options);
354
+ rndr->work.size -= 1;
355
+ return r ? i + 1 : 0;
356
+ }
357
+ }
358
+
359
+ return 0;
360
+ }
361
+
362
+
363
+ /* parse_emph2 • parsing single emphase */
364
+ static size_t
365
+ parse_emph2(struct buf *ob, struct render *rndr,
366
+ char *data, size_t size, char c) {
367
+ size_t i = 0, len;
368
+ struct buf *work = 0;
369
+ int r;
370
+
371
+ if (!rndr->make.double_emphasis) return 0;
372
+
373
+ while (i < size) {
374
+ len = find_emph_char(data + i, size - i, c);
375
+ if (!len) return 0;
376
+ i += len;
377
+ if (i + 1 < size && data[i] == c && data[i + 1] == c
378
+ && i && data[i - 1] != ' '
379
+ && data[i - 1] != '\t' && data[i - 1] != '\n') {
380
+ if (rndr->work.size < rndr->work.asize) {
381
+ work = rndr->work.item[rndr->work.size ++];
382
+ work->size = 0; }
383
+ else {
384
+ work = bufnew(WORK_UNIT);
385
+ parr_push(&rndr->work, work); }
386
+ parse_inline(work, rndr, data, i);
387
+ r = rndr->make.double_emphasis(ob, work, c,
388
+ &rndr->make.render_options);
389
+ rndr->work.size -= 1;
390
+ return r ? i + 2 : 0; }
391
+ i += 1; }
392
+ return 0; }
393
+
394
+
395
+ /* parse_emph3 • parsing single emphase */
396
+ /* finds the first closing tag, and delegates to the other emph */
397
+ static size_t
398
+ parse_emph3(struct buf *ob, struct render *rndr,
399
+ char *data, size_t size, char c) {
400
+ size_t i = 0, len;
401
+ int r;
402
+
403
+ while (i < size) {
404
+ len = find_emph_char(data + i, size - i, c);
405
+ if (!len) return 0;
406
+ i += len;
407
+
408
+ /* skip whitespace preceded symbols */
409
+ if (data[i] != c || data[i - 1] == ' '
410
+ || data[i - 1] == '\t' || data[i - 1] == '\n')
411
+ continue;
412
+
413
+ if (i + 2 < size && data[i + 1] == c && data[i + 2] == c
414
+ && rndr->make.triple_emphasis) {
415
+ /* triple symbol found */
416
+ struct buf *work = 0;
417
+ if (rndr->work.size < rndr->work.asize) {
418
+ work = rndr->work.item[rndr->work.size ++];
419
+ work->size = 0; }
420
+ else {
421
+ work = bufnew(WORK_UNIT);
422
+ parr_push(&rndr->work, work); }
423
+ parse_inline(work, rndr, data, i);
424
+ r = rndr->make.triple_emphasis(ob, work, c,
425
+ &rndr->make.render_options);
426
+ rndr->work.size -= 1;
427
+ return r ? i + 3 : 0; }
428
+ else if (i + 1 < size && data[i + 1] == c) {
429
+ /* double symbol found, handing over to emph1 */
430
+ len = parse_emph1(ob, rndr, data - 2, size + 2, c);
431
+ if (!len) return 0;
432
+ else return len - 2; }
433
+ else {
434
+ /* single symbol found, handing over to emph2 */
435
+ len = parse_emph2(ob, rndr, data - 1, size + 1, c);
436
+ if (!len) return 0;
437
+ else return len - 1; } }
438
+ return 0; }
439
+
440
+
441
+ /* char_emphasis • single and double emphasis parsing */
442
+ static size_t
443
+ char_emphasis(struct buf *ob, struct render *rndr,
444
+ char *data, size_t offset, size_t size) {
445
+ char c = data[0];
446
+ size_t ret;
447
+ if (size > 2 && data[1] != c) {
448
+ /* whitespace cannot follow an opening emphasis */
449
+ if (data[1] == ' ' || data[1] == '\t' || data[1] == '\n'
450
+ || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
451
+ return 0;
452
+ return ret + 1; }
453
+ if (size > 3 && data[1] == c && data[2] != c) {
454
+ if (data[2] == ' ' || data[2] == '\t' || data[2] == '\n'
455
+ || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
456
+ return 0;
457
+ return ret + 2; }
458
+ if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
459
+ if (data[3] == ' ' || data[3] == '\t' || data[3] == '\n'
460
+ || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
461
+ return 0;
462
+ return ret + 3; }
463
+ return 0; }
464
+
465
+
466
+ /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
467
+ static size_t
468
+ char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) {
469
+
470
+ if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
471
+ return 0;
472
+
473
+ /* removing the last space from ob and rendering */
474
+ while (ob->size && ob->data[ob->size - 1] == ' ')
475
+ ob->size--;
476
+
477
+ return rndr->make.linebreak(ob, &rndr->make.render_options) ? 1 : 0;
478
+ }
479
+
480
+
481
+ /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
482
+ static size_t
483
+ char_codespan(struct buf *ob, struct render *rndr,
484
+ char *data, size_t offset, size_t size) {
485
+ size_t end, nb = 0, i, f_begin, f_end;
486
+
487
+ /* counting the number of backticks in the delimiter */
488
+ while (nb < size && data[nb] == '`') nb += 1;
489
+
490
+ /* finding the next delimiter */
491
+ i = 0;
492
+ for (end = nb; end < size && i < nb; end += 1)
493
+ if (data[end] == '`') i += 1;
494
+ else i = 0;
495
+ if (i < nb && end >= size) return 0; /* no matching delimiter */
496
+
497
+ /* trimming outside whitespaces */
498
+ f_begin = nb;
499
+ while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t'))
500
+ f_begin += 1;
501
+ f_end = end - nb;
502
+ while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t'))
503
+ f_end -= 1;
504
+
505
+ /* real code span */
506
+ if (f_begin < f_end) {
507
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 };
508
+ if (!rndr->make.codespan(ob, &work, &rndr->make.render_options))
509
+ end = 0; }
510
+ else {
511
+ if (!rndr->make.codespan(ob, 0, &rndr->make.render_options))
512
+ end = 0; }
513
+ return end; }
514
+
515
+
516
+ /* char_escape • '\\' backslash escape */
517
+ static size_t
518
+ char_escape(struct buf *ob, struct render *rndr,
519
+ char *data, size_t offset, size_t size) {
520
+ struct buf work = { 0, 0, 0, 0, 0 };
521
+ if (size > 1) {
522
+ if (rndr->make.normal_text) {
523
+ work.data = data + 1;
524
+ work.size = 1;
525
+ rndr->make.normal_text(ob, &work, &rndr->make.render_options); }
526
+ else bufputc(ob, data[1]); }
527
+ return 2; }
528
+
529
+
530
+ /* char_entity • '&' escaped when it doesn't belong to an entity */
531
+ /* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */
532
+ static size_t
533
+ char_entity(struct buf *ob, struct render *rndr,
534
+ char *data, size_t offset, size_t size) {
535
+ size_t end = 1;
536
+ struct buf work;
537
+ if (end < size && data[end] == '#') end += 1;
538
+ while (end < size
539
+ && ((data[end] >= '0' && data[end] <= '9')
540
+ || (data[end] >= 'a' && data[end] <= 'z')
541
+ || (data[end] >= 'A' && data[end] <= 'Z')))
542
+ end += 1;
543
+ if (end < size && data[end] == ';') {
544
+ /* real entity */
545
+ end += 1; }
546
+ else {
547
+ /* lone '&' */
548
+ return 0; }
549
+ if (rndr->make.entity) {
550
+ work.data = data;
551
+ work.size = end;
552
+ rndr->make.entity(ob, &work, &rndr->make.render_options); }
553
+ else bufput(ob, data, end);
554
+ return end; }
555
+
556
+
557
+ /* char_langle_tag • '<' when tags or autolinks are allowed */
558
+ static size_t
559
+ char_langle_tag(struct buf *ob, struct render *rndr,
560
+ char *data, size_t offset, size_t size) {
561
+ enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
562
+ size_t end = tag_length(data, size, &altype);
563
+ struct buf work = { data, end, 0, 0, 0 };
564
+ int ret = 0;
565
+ if (end) {
566
+ if (rndr->make.autolink && altype != MKDA_NOT_AUTOLINK) {
567
+ work.data = data + 1;
568
+ work.size = end - 2;
569
+ ret = rndr->make.autolink(ob, &work, altype,
570
+ &rndr->make.render_options); }
571
+ else if (rndr->make.raw_html_tag)
572
+ ret = rndr->make.raw_html_tag(ob, &work,
573
+ &rndr->make.render_options); }
574
+ if (!ret) return 0;
575
+ else return end; }
576
+
577
+
578
+ /* char_link • '[': parsing a link or an image */
579
+ static size_t
580
+ char_link(struct buf *ob, struct render *rndr,
581
+ char *data, size_t offset, size_t size) {
582
+ int is_img = (offset && data[-1] == '!'), level;
583
+ size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
584
+ struct buf *content = 0;
585
+ struct buf *link = 0;
586
+ struct buf *title = 0;
587
+ size_t org_work_size = rndr->work.size;
588
+ int text_has_nl = 0, ret;
589
+
590
+ /* checking whether the correct renderer exists */
591
+ if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link))
592
+ return 0;
593
+
594
+ /* looking for the matching closing bracket */
595
+ for (level = 1; i < size; i += 1)
596
+ if (data[i] == '\n') text_has_nl = 1;
597
+ else if (data[i - 1] == '\\') continue;
598
+ else if (data[i] == '[') level += 1;
599
+ else if (data[i] == ']') {
600
+ level -= 1;
601
+ if (level <= 0) break; }
602
+ if (i >= size) return 0;
603
+ txt_e = i;
604
+ i += 1;
605
+
606
+ /* skip any amount of whitespace or newline */
607
+ /* (this is much more laxist than original markdown syntax) */
608
+ while (i < size
609
+ && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n'))
610
+ i += 1;
611
+
612
+ /* inline style link */
613
+ if (i < size && data[i] == '(') {
614
+ /* skipping initial whitespace */
615
+ i += 1;
616
+ while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
617
+ link_b = i;
618
+
619
+ /* looking for link end: ' " ) */
620
+ while (i < size
621
+ && data[i] != '\'' && data[i] != '"' && data[i] != ')')
622
+ i += 1;
623
+ if (i >= size) return 0;
624
+ link_e = i;
625
+
626
+ /* looking for title end if present */
627
+ if (data[i] == '\'' || data[i] == '"') {
628
+ i += 1;
629
+ title_b = i;
630
+ while (i < size && data[i] != ')')
631
+ i += 1;
632
+ if (i >= size) return 0;
633
+
634
+ /* skipping whitespaces after title */
635
+ title_e = i - 1;
636
+ while (title_e > title_b && (data[title_e] == ' '
637
+ || data[title_e] == '\t' || data[title_e] == '\n'))
638
+ title_e -= 1;
639
+
640
+ /* checking for closing quote presence */
641
+ if (data[title_e] != '\'' && data[title_e] != '"') {
642
+ title_b = title_e = 0;
643
+ link_e = i; } }
644
+
645
+ /* remove whitespace at the end of the link */
646
+ while (link_e > link_b
647
+ && (data[link_e - 1] == ' ' || data[link_e - 1] == '\t'))
648
+ link_e -= 1;
649
+
650
+ /* remove optional angle brackets around the link */
651
+ if (data[link_b] == '<') link_b += 1;
652
+ if (data[link_e - 1] == '>') link_e -= 1;
653
+
654
+ /* building escaped link and title */
655
+ if (link_e > link_b) {
656
+ if (rndr->work.size < rndr->work.asize) {
657
+ link = rndr->work.item[rndr->work.size ++];
658
+ link->size = 0; }
659
+ else {
660
+ link = bufnew(WORK_UNIT);
661
+ parr_push(&rndr->work, link); }
662
+ bufput(link, data + link_b, link_e - link_b); }
663
+ if (title_e > title_b) {
664
+ if (rndr->work.size < rndr->work.asize) {
665
+ title = rndr->work.item[rndr->work.size ++];
666
+ title->size = 0; }
667
+ else {
668
+ title = bufnew(WORK_UNIT);
669
+ parr_push(&rndr->work, title); }
670
+ bufput(title, data + title_b, title_e - title_b);}
671
+
672
+ i += 1; }
673
+
674
+ /* reference style link */
675
+ else if (i < size && data[i] == '[') {
676
+ struct buf id = { 0, 0, 0, 0, 0 };
677
+ struct link_ref *lr;
678
+
679
+ /* looking for the id */
680
+ i += 1;
681
+ link_b = i;
682
+ while (i < size && data[i] != ']') i += 1;
683
+ if (i >= size) return 0;
684
+ link_e = i;
685
+
686
+ /* finding the link_ref */
687
+ if (link_b == link_e) {
688
+ if (text_has_nl) {
689
+ struct buf *b = 0;
690
+ size_t j;
691
+ if (rndr->work.size < rndr->work.asize) {
692
+ b = rndr->work.item[rndr->work.size ++];
693
+ b->size = 0; }
694
+ else {
695
+ b = bufnew(WORK_UNIT);
696
+ parr_push(&rndr->work, b); }
697
+ for (j = 1; j < txt_e; j += 1)
698
+ if (data[j] != '\n')
699
+ bufputc(b, data[j]);
700
+ else if (data[j - 1] != ' ')
701
+ bufputc(b, ' ');
702
+ id.data = b->data;
703
+ id.size = b->size; }
704
+ else {
705
+ id.data = data + 1;
706
+ id.size = txt_e - 1; } }
707
+ else {
708
+ id.data = data + link_b;
709
+ id.size = link_e - link_b; }
710
+ lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
711
+ if (!lr) return 0;
712
+
713
+ /* keeping link and title from link_ref */
714
+ link = lr->link;
715
+ title = lr->title;
716
+ i += 1; }
717
+
718
+ /* shortcut reference style link */
719
+ else {
720
+ struct buf id = { 0, 0, 0, 0, 0 };
721
+ struct link_ref *lr;
722
+
723
+ /* crafting the id */
724
+ if (text_has_nl) {
725
+ struct buf *b = 0;
726
+ size_t j;
727
+ if (rndr->work.size < rndr->work.asize) {
728
+ b = rndr->work.item[rndr->work.size ++];
729
+ b->size = 0; }
730
+ else {
731
+ b = bufnew(WORK_UNIT);
732
+ parr_push(&rndr->work, b); }
733
+ for (j = 1; j < txt_e; j += 1)
734
+ if (data[j] != '\n')
735
+ bufputc(b, data[j]);
736
+ else if (data[j - 1] != ' ')
737
+ bufputc(b, ' ');
738
+ id.data = b->data;
739
+ id.size = b->size; }
740
+ else {
741
+ id.data = data + 1;
742
+ id.size = txt_e - 1; }
743
+
744
+ /* finding the link_ref */
745
+ lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
746
+ if (!lr) return 0;
747
+
748
+ /* keeping link and title from link_ref */
749
+ link = lr->link;
750
+ title = lr->title;
751
+
752
+ /* rewinding the whitespace */
753
+ i = txt_e + 1; }
754
+
755
+ /* building content: img alt is escaped, link content is parsed */
756
+ if (txt_e > 1) {
757
+ if (rndr->work.size < rndr->work.asize) {
758
+ content = rndr->work.item[rndr->work.size ++];
759
+ content->size = 0; }
760
+ else {
761
+ content = bufnew(WORK_UNIT);
762
+ parr_push(&rndr->work, content); }
763
+ if (is_img) bufput(content, data + 1, txt_e - 1);
764
+ else parse_inline(content, rndr, data + 1, txt_e - 1); }
765
+
766
+ /* calling the relevant rendering function */
767
+ ret = 0;
768
+ if (is_img) {
769
+ if (ob->size && ob->data[ob->size - 1] == '!') ob->size -= 1;
770
+ ret = rndr->make.image(ob, link, title, content,
771
+ &rndr->make.render_options); }
772
+ else ret = rndr->make.link(ob, link, title, content, &rndr->make.render_options);
773
+
774
+ /* cleanup */
775
+ rndr->work.size = (int)org_work_size;
776
+ return ret ? i : 0;
777
+ }
778
+
779
+
780
+
781
+ /*********************************
782
+ * BLOCK-LEVEL PARSING FUNCTIONS *
783
+ *********************************/
784
+
785
+ /* is_empty • returns the line length when it is empty, 0 otherwise */
786
+ static size_t
787
+ is_empty(char *data, size_t size) {
788
+ size_t i;
789
+ for (i = 0; i < size && data[i] != '\n'; i += 1)
790
+ if (data[i] != ' ' && data[i] != '\t') return 0;
791
+ return i + 1; }
792
+
793
+
794
+ /* is_hrule • returns whether a line is a horizontal rule */
795
+ static int
796
+ is_hrule(char *data, size_t size) {
797
+ size_t i = 0, n = 0;
798
+ char c;
799
+
800
+ /* skipping initial spaces */
801
+ if (size < 3) return 0;
802
+ if (data[0] == ' ') { i += 1;
803
+ if (data[1] == ' ') { i += 1;
804
+ if (data[2] == ' ') { i += 1; } } }
805
+
806
+ /* looking at the hrule char */
807
+ if (i + 2 >= size
808
+ || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
809
+ return 0;
810
+ c = data[i];
811
+
812
+ /* the whole line must be the char or whitespace */
813
+ while (i < size && data[i] != '\n') {
814
+ if (data[i] == c) n += 1;
815
+ else if (data[i] != ' ' && data[i] != '\t')
816
+ return 0;
817
+ i += 1; }
818
+
819
+ return n >= 3; }
820
+
821
+
822
+ /* is_headerline • returns whether the line is a setext-style hdr underline */
823
+ static int
824
+ is_headerline(char *data, size_t size) {
825
+ size_t i = 0;
826
+
827
+ /* test of level 1 header */
828
+ if (data[i] == '=') {
829
+ for (i = 1; i < size && data[i] == '='; i += 1);
830
+ while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
831
+ return (i >= size || data[i] == '\n') ? 1 : 0; }
832
+
833
+ /* test of level 2 header */
834
+ if (data[i] == '-') {
835
+ for (i = 1; i < size && data[i] == '-'; i += 1);
836
+ while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
837
+ return (i >= size || data[i] == '\n') ? 2 : 0; }
838
+
839
+ return 0; }
840
+
841
+
842
+ /* prefix_quote • returns blockquote prefix length */
843
+ static size_t
844
+ prefix_quote(char *data, size_t size) {
845
+ size_t i = 0;
846
+ if (i < size && data[i] == ' ') i += 1;
847
+ if (i < size && data[i] == ' ') i += 1;
848
+ if (i < size && data[i] == ' ') i += 1;
849
+ if (i < size && data[i] == '>') {
850
+ if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
851
+ return i + 2;
852
+ else return i + 1; }
853
+ else return 0; }
854
+
855
+
856
+ /* prefix_code • returns prefix length for block code*/
857
+ static size_t
858
+ prefix_code(char *data, size_t size) {
859
+ if (size > 0 && data[0] == '\t') return 1;
860
+ if (size > 3 && data[0] == ' ' && data[1] == ' '
861
+ && data[2] == ' ' && data[3] == ' ') return 4;
862
+ return 0; }
863
+
864
+ /* prefix_oli • returns ordered list item prefix */
865
+ static size_t
866
+ prefix_oli(char *data, size_t size) {
867
+ size_t i = 0;
868
+ if (i < size && data[i] == ' ') i += 1;
869
+ if (i < size && data[i] == ' ') i += 1;
870
+ if (i < size && data[i] == ' ') i += 1;
871
+ if (i >= size || data[i] < '0' || data[i] > '9') return 0;
872
+ while (i < size && data[i] >= '0' && data[i] <= '9') i += 1;
873
+ if (i + 1 >= size || data[i] != '.'
874
+ || (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
875
+ return i + 2; }
876
+
877
+
878
+ /* prefix_uli • returns ordered list item prefix */
879
+ static size_t
880
+ prefix_uli(char *data, size_t size) {
881
+ size_t i = 0;
882
+ if (i < size && data[i] == ' ') i += 1;
883
+ if (i < size && data[i] == ' ') i += 1;
884
+ if (i < size && data[i] == ' ') i += 1;
885
+ if (i + 1 >= size
886
+ || (data[i] != '*' && data[i] != '+' && data[i] != '-')
887
+ || (data[i + 1] != ' ' && data[i + 1] != '\t'))
888
+ return 0;
889
+ return i + 2; }
890
+
891
+
892
+ /* parse_block • parsing of one block, returning next char to parse */
893
+ static void parse_block(struct buf *ob, struct render *rndr,
894
+ char *data, size_t size, int depth);
895
+
896
+
897
+ /* parse_blockquote • hanldes parsing of a blockquote fragment */
898
+ static size_t
899
+ parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size, int depth) {
900
+ size_t beg, end = 0, pre, work_size = 0;
901
+ char *work_data = 0;
902
+ struct buf *out = 0;
903
+
904
+ if (rndr->work.size < rndr->work.asize) {
905
+ out = rndr->work.item[rndr->work.size ++];
906
+ out->size = 0; }
907
+ else {
908
+ out = bufnew(WORK_UNIT);
909
+ parr_push(&rndr->work, out); }
910
+
911
+ beg = 0;
912
+ while (beg < size) {
913
+ for (end = beg + 1; end < size && data[end - 1] != '\n';
914
+ end += 1);
915
+ pre = prefix_quote(data + beg, end - beg);
916
+ if (pre) beg += pre; /* skipping prefix */
917
+ else if (is_empty(data + beg, end - beg)
918
+ && (end >= size || (prefix_quote(data + end, size - end) == 0
919
+ && !is_empty(data + end, size - end))))
920
+ /* empty line followed by non-quote line */
921
+ break;
922
+ if (beg < end) { /* copy into the in-place working buffer */
923
+ /* bufput(work, data + beg, end - beg); */
924
+ if (!work_data)
925
+ work_data = data + beg;
926
+ else if (data + beg != work_data + work_size)
927
+ memmove(work_data + work_size, data + beg,
928
+ end - beg);
929
+ work_size += end - beg; }
930
+ beg = end; }
931
+
932
+ parse_block(out, rndr, work_data, work_size, depth + 1);
933
+ if (rndr->make.blockquote)
934
+ rndr->make.blockquote(ob, out, &rndr->make.render_options);
935
+ rndr->work.size -= 1;
936
+ return end;
937
+ }
938
+
939
+
940
+ /* parse_blockquote • hanldes parsing of a regular paragraph */
941
+ static size_t
942
+ parse_paragraph(struct buf *ob, struct render *rndr,
943
+ char *data, size_t size) {
944
+ size_t i = 0, end = 0;
945
+ int level = 0;
946
+ struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */
947
+
948
+ while (i < size) {
949
+ for (end = i + 1; end < size && data[end - 1] != '\n';
950
+ end += 1);
951
+ if (is_empty(data + i, size - i)
952
+ || (level = is_headerline(data + i, size - i)) != 0)
953
+ break;
954
+ if (data[i] == '#'
955
+ || is_hrule(data + i, size - i)) {
956
+ end = i;
957
+ break; }
958
+ i = end; }
959
+
960
+ work.size = i;
961
+ while (work.size && data[work.size - 1] == '\n')
962
+ work.size -= 1;
963
+ if (!level) {
964
+ struct buf *tmp = 0;
965
+ if (rndr->work.size < rndr->work.asize) {
966
+ tmp = rndr->work.item[rndr->work.size ++];
967
+ tmp->size = 0; }
968
+ else {
969
+ tmp = bufnew(WORK_UNIT);
970
+ parr_push(&rndr->work, tmp); }
971
+ parse_inline(tmp, rndr, work.data, work.size);
972
+ if (rndr->make.paragraph)
973
+ rndr->make.paragraph(ob, tmp, &rndr->make.render_options);
974
+ rndr->work.size -= 1; }
975
+ else {
976
+ if (work.size) {
977
+ size_t beg;
978
+ i = work.size;
979
+ work.size -= 1;
980
+ while (work.size && data[work.size] != '\n')
981
+ work.size -= 1;
982
+ beg = work.size + 1;
983
+ while (work.size && data[work.size - 1] == '\n')
984
+ work.size -= 1;
985
+ if (work.size) {
986
+ struct buf *tmp = 0;
987
+ if (rndr->work.size < rndr->work.asize) {
988
+ tmp=rndr->work.item[rndr->work.size++];
989
+ tmp->size = 0; }
990
+ else {
991
+ tmp = bufnew(WORK_UNIT);
992
+ parr_push(&rndr->work, tmp); }
993
+ parse_inline(tmp, rndr, work.data, work.size);
994
+ if (rndr->make.paragraph)
995
+ rndr->make.paragraph(ob, tmp,
996
+ &rndr->make.render_options);
997
+ rndr->work.size -= 1;
998
+ work.data += beg;
999
+ work.size = i - beg; }
1000
+ else work.size = i; }
1001
+ if (rndr->make.header)
1002
+ rndr->make.header(ob, &work, level, &rndr->make.render_options);}
1003
+ return end; }
1004
+
1005
+
1006
+ /* parse_blockquote • hanldes parsing of a block-level code fragment */
1007
+ static size_t
1008
+ parse_blockcode(struct buf *ob, struct render *rndr,
1009
+ char *data, size_t size) {
1010
+ size_t beg, end, pre;
1011
+ struct buf *work = 0;
1012
+
1013
+ if (rndr->work.size < rndr->work.asize) {
1014
+ work = rndr->work.item[rndr->work.size ++];
1015
+ work->size = 0; }
1016
+ else {
1017
+ work = bufnew(WORK_UNIT);
1018
+ parr_push(&rndr->work, work); }
1019
+
1020
+ beg = 0;
1021
+ while (beg < size) {
1022
+ for (end = beg + 1; end < size && data[end - 1] != '\n';
1023
+ end += 1);
1024
+ pre = prefix_code(data + beg, end - beg);
1025
+ if (pre) beg += pre; /* skipping prefix */
1026
+ else if (!is_empty(data + beg, end - beg))
1027
+ /* non-empty non-prefixed line breaks the pre */
1028
+ break;
1029
+ if (beg < end) {
1030
+ /* verbatim copy to the working buffer,
1031
+ escaping entities */
1032
+ if (is_empty(data + beg, end - beg))
1033
+ bufputc(work, '\n');
1034
+ else bufput(work, data + beg, end - beg); }
1035
+ beg = end; }
1036
+
1037
+ while (work->size && work->data[work->size - 1] == '\n')
1038
+ work->size -= 1;
1039
+ bufputc(work, '\n');
1040
+ if (rndr->make.blockcode)
1041
+ rndr->make.blockcode(ob, work, &rndr->make.render_options);
1042
+ rndr->work.size -= 1;
1043
+ return beg; }
1044
+
1045
+
1046
+ /* parse_listitem • parsing of a single list item */
1047
+ /* assuming initial prefix is already removed */
1048
+ static size_t
1049
+ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int *flags, int depth) {
1050
+ struct buf *work = 0, *inter = 0;
1051
+ size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1052
+ int in_empty = 0, has_inside_empty = 0;
1053
+
1054
+ /* keeping book of the first indentation prefix */
1055
+ if (size > 1 && data[0] == ' ') { orgpre = 1;
1056
+ if (size > 2 && data[1] == ' ') { orgpre = 2;
1057
+ if (size > 3 && data[2] == ' ') { orgpre = 3; } } }
1058
+ beg = prefix_uli(data, size);
1059
+ if (!beg) beg = prefix_oli(data, size);
1060
+ if (!beg) return 0;
1061
+ /* skipping to the beginning of the following line */
1062
+ end = beg;
1063
+ while (end < size && data[end - 1] != '\n') end += 1;
1064
+
1065
+ /* getting working buffers */
1066
+ if (rndr->work.size < rndr->work.asize) {
1067
+ work = rndr->work.item[rndr->work.size ++];
1068
+ work->size = 0; }
1069
+ else {
1070
+ work = bufnew(WORK_UNIT);
1071
+ parr_push(&rndr->work, work); }
1072
+ if (rndr->work.size < rndr->work.asize) {
1073
+ inter = rndr->work.item[rndr->work.size ++];
1074
+ inter->size = 0; }
1075
+ else {
1076
+ inter = bufnew(WORK_UNIT);
1077
+ parr_push(&rndr->work, inter); }
1078
+
1079
+ /* putting the first line into the working buffer */
1080
+ bufput(work, data + beg, end - beg);
1081
+ beg = end;
1082
+
1083
+ /* process the following lines */
1084
+ while (beg < size) {
1085
+ end += 1;
1086
+ while (end < size && data[end - 1] != '\n') end += 1;
1087
+
1088
+ /* process an empty line */
1089
+ if (is_empty(data + beg, end - beg)) {
1090
+ in_empty = 1;
1091
+ beg = end;
1092
+ continue; }
1093
+
1094
+ /* calculating the indentation */
1095
+ i = 0;
1096
+ if (end - beg > 1 && data[beg] == ' ') { i = 1;
1097
+ if (end - beg > 2 && data[beg + 1] == ' ') { i = 2;
1098
+ if (end - beg > 3 && data[beg + 2] == ' ') { i = 3;
1099
+ if (end - beg > 3 && data[beg + 3] == ' ') { i = 4; } } } }
1100
+ pre = i;
1101
+ if (data[beg] == '\t') { i = 1; pre = 8; }
1102
+
1103
+ /* checking for a new item */
1104
+ if ((prefix_uli(data + beg + i, end - beg - i)
1105
+ && !is_hrule(data + beg + i, end - beg - i))
1106
+ || prefix_oli(data + beg + i, end - beg - i)) {
1107
+ if (in_empty) has_inside_empty = 1;
1108
+ if (pre == orgpre) /* the following item must have */
1109
+ break; /* the same indentation */
1110
+ if (!sublist) sublist = work->size; }
1111
+
1112
+ /* joining only indented stuff after empty lines */
1113
+ else if (in_empty && i < 4 && data[beg] != '\t') {
1114
+ *flags |= MKD_LI_END;
1115
+ break; }
1116
+ else if (in_empty) {
1117
+ bufputc(work, '\n');
1118
+ has_inside_empty = 1; }
1119
+ in_empty = 0;
1120
+
1121
+ /* adding the line without prefix into the working buffer */
1122
+ bufput(work, data + beg + i, end - beg - i);
1123
+ beg = end; }
1124
+
1125
+ /* render of li contents */
1126
+ if (has_inside_empty) *flags |= MKD_LI_BLOCK;
1127
+ if (*flags & MKD_LI_BLOCK) {
1128
+ /* intermediate render of block li */
1129
+ if (sublist && sublist < work->size) {
1130
+ parse_block(inter, rndr, work->data, sublist, depth + 1);
1131
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist, depth + 1);
1132
+ }
1133
+ else
1134
+ parse_block(inter, rndr, work->data, work->size, depth + 1);
1135
+ } else {
1136
+ /* intermediate render of inline li */
1137
+ if (sublist && sublist < work->size) {
1138
+ parse_inline(inter, rndr, work->data, sublist);
1139
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist, depth + 1);
1140
+ }
1141
+ else
1142
+ parse_inline(inter, rndr, work->data, work->size);
1143
+ }
1144
+
1145
+ /* render of li itself */
1146
+ if (rndr->make.listitem)
1147
+ rndr->make.listitem(ob, inter, *flags, &rndr->make.render_options);
1148
+ rndr->work.size -= 2;
1149
+ return beg;
1150
+ }
1151
+
1152
+
1153
+ /* parse_list • parsing ordered or unordered list block */
1154
+ static size_t
1155
+ parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int flags, int depth) {
1156
+ struct buf *work = 0;
1157
+ size_t i = 0, j;
1158
+
1159
+ if (rndr->work.size < rndr->work.asize) {
1160
+ work = rndr->work.item[rndr->work.size ++];
1161
+ work->size = 0; }
1162
+ else {
1163
+ work = bufnew(WORK_UNIT);
1164
+ parr_push(&rndr->work, work); }
1165
+
1166
+ while (i < size) {
1167
+ j = parse_listitem(work, rndr, data + i, size - i, &flags, depth + 1);
1168
+ i += j;
1169
+
1170
+ if (!j || (flags & MKD_LI_END))
1171
+ break;
1172
+ }
1173
+
1174
+ if (rndr->make.list)
1175
+ rndr->make.list(ob, work, flags, &rndr->make.render_options);
1176
+ rndr->work.size -= 1;
1177
+ return i;
1178
+ }
1179
+
1180
+
1181
+ /* parse_atxheader • parsing of atx-style headers */
1182
+ static size_t
1183
+ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size) {
1184
+ size_t level = 0;
1185
+ size_t i, end, skip;
1186
+ struct buf work = { data, 0, 0, 0, 0 };
1187
+
1188
+ if (!size || data[0] != '#') return 0;
1189
+ while (level < size && level < 6 && data[level] == '#') level += 1;
1190
+ for (i = level; i < size && (data[i] == ' ' || data[i] == '\t');
1191
+ i += 1);
1192
+ work.data = data + i;
1193
+ for (end = i; end < size && data[end] != '\n'; end += 1);
1194
+ skip = end;
1195
+ while (end && data[end - 1] == '#') end -= 1;
1196
+ while (end && (data[end - 1] == ' ' || data[end - 1] == '\t')) end -= 1;
1197
+ work.size = end - i;
1198
+ if (rndr->make.header)
1199
+ rndr->make.header(ob, &work, (int)level, &rndr->make.render_options);
1200
+ return skip;
1201
+ }
1202
+
1203
+
1204
+ /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1205
+ /* returns the length on match, 0 otherwise */
1206
+ static size_t
1207
+ htmlblock_end(struct html_tag *tag, char *data, size_t size) {
1208
+ size_t i, w;
1209
+
1210
+ /* assuming data[0] == '<' && data[1] == '/' already tested */
1211
+
1212
+ /* checking tag is a match */
1213
+ if (tag->size + 3 >= size
1214
+ || strncasecmp(data + 2, tag->text, tag->size)
1215
+ || data[tag->size + 2] != '>')
1216
+ return 0;
1217
+
1218
+ /* checking white lines */
1219
+ i = tag->size + 3;
1220
+ w = 0;
1221
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
1222
+ return 0; /* non-blank after tag */
1223
+ i += w;
1224
+ w = 0;
1225
+
1226
+
1227
+ #ifdef UPSKIRT_NEWLINE_AFTER_TAGS
1228
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
1229
+ return 0; /* non-blank line after tag line */
1230
+ #else
1231
+ if (i < size)
1232
+ w = is_empty(data + i, size - i);
1233
+ #endif
1234
+
1235
+ return i + w;
1236
+ }
1237
+
1238
+
1239
+ /* parse_htmlblock • parsing of inline HTML block */
1240
+ static size_t
1241
+ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size) {
1242
+ size_t i, j = 0;
1243
+ struct html_tag *curtag;
1244
+ int found;
1245
+ struct buf work = { data, 0, 0, 0, 0 };
1246
+
1247
+ /* identification of the opening tag */
1248
+ if (size < 2 || data[0] != '<') return 0;
1249
+ curtag = find_block_tag(data + 1, size - 1);
1250
+
1251
+ /* handling of special cases */
1252
+ if (!curtag) {
1253
+ /* HTML comment, laxist form */
1254
+ if (size > 5 && data[1] == '!'
1255
+ && data[2] == '-' && data[3] == '-') {
1256
+ i = 5;
1257
+ while (i < size
1258
+ && !(data[i - 2] == '-' && data[i - 1] == '-'
1259
+ && data[i] == '>'))
1260
+ i += 1;
1261
+ i += 1;
1262
+ if (i < size)
1263
+ j = is_empty(data + i, size - i);
1264
+ if (j) {
1265
+ work.size = i + j;
1266
+ if (rndr->make.blockhtml)
1267
+ rndr->make.blockhtml(ob, &work,
1268
+ &rndr->make.render_options);
1269
+ return work.size; } }
1270
+
1271
+ /* HR, which is the only self-closing block tag considered */
1272
+ if (size > 4
1273
+ && (data[1] == 'h' || data[1] == 'H')
1274
+ && (data[2] == 'r' || data[2] == 'R')) {
1275
+ i = 3;
1276
+ while (i < size && data[i] != '>')
1277
+ i += 1;
1278
+ if (i + 1 < size) {
1279
+ i += 1;
1280
+ j = is_empty(data + i, size - i);
1281
+ if (j) {
1282
+ work.size = i + j;
1283
+ if (rndr->make.blockhtml)
1284
+ rndr->make.blockhtml(ob, &work,
1285
+ &rndr->make.render_options);
1286
+ return work.size; } } }
1287
+
1288
+ /* no special case recognised */
1289
+ return 0; }
1290
+
1291
+ /* looking for an unindented matching closing tag */
1292
+ /* followed by a blank line */
1293
+ i = 1;
1294
+ found = 0;
1295
+ #if 0
1296
+ while (i < size) {
1297
+ i += 1;
1298
+ while (i < size && !(data[i - 2] == '\n'
1299
+ && data[i - 1] == '<' && data[i] == '/'))
1300
+ i += 1;
1301
+ if (i + 2 + curtag->size >= size) break;
1302
+ j = htmlblock_end(curtag, data + i - 1, size - i + 1);
1303
+ if (j) {
1304
+ i += j - 1;
1305
+ found = 1;
1306
+ break; } }
1307
+ #endif
1308
+
1309
+ /* if not found, trying a second pass looking for indented match */
1310
+ /* but not if tag is "ins" or "del" (following original Markdown.pl) */
1311
+ if (!found && curtag != INS_TAG && curtag != DEL_TAG) {
1312
+ i = 1;
1313
+ while (i < size) {
1314
+ i += 1;
1315
+ while (i < size
1316
+ && !(data[i - 1] == '<' && data[i] == '/'))
1317
+ i += 1;
1318
+ if (i + 2 + curtag->size >= size) break;
1319
+ j = htmlblock_end(curtag, data + i - 1, size - i + 1);
1320
+ if (j) {
1321
+ i += j - 1;
1322
+ found = 1;
1323
+ break; } } }
1324
+
1325
+ if (!found) return 0;
1326
+
1327
+ /* the end of the block has been found */
1328
+ work.size = i;
1329
+ if (rndr->make.blockhtml)
1330
+ rndr->make.blockhtml(ob, &work, &rndr->make.render_options);
1331
+ return i; }
1332
+
1333
+
1334
+ /* parse_block • parsing of one block, returning next char to parse */
1335
+ static void
1336
+ parse_block(struct buf *ob, struct render *rndr, char *data, size_t size, int depth) {
1337
+ size_t beg, end, i;
1338
+ char *txt_data;
1339
+ beg = 0;
1340
+
1341
+ if (depth >= rndr->make.parser_options.recursion_depth)
1342
+ return;
1343
+
1344
+ while (beg < size) {
1345
+ txt_data = data + beg;
1346
+ end = size - beg;
1347
+ if (data[beg] == '#')
1348
+ beg += parse_atxheader(ob, rndr, txt_data, end);
1349
+ else if (data[beg] == '<' && rndr->make.blockhtml
1350
+ && (i = parse_htmlblock(ob, rndr, txt_data, end)) != 0)
1351
+ beg += i;
1352
+ else if ((i = is_empty(txt_data, end)) != 0)
1353
+ beg += i;
1354
+ else if (is_hrule(txt_data, end)) {
1355
+ if (rndr->make.hrule)
1356
+ rndr->make.hrule(ob, &rndr->make.render_options);
1357
+ while (beg < size && data[beg] != '\n') beg += 1;
1358
+ beg += 1; }
1359
+ else if (prefix_quote(txt_data, end))
1360
+ beg += parse_blockquote(ob, rndr, txt_data, end, depth + 1);
1361
+ else if (prefix_code(txt_data, end))
1362
+ beg += parse_blockcode(ob, rndr, txt_data, end);
1363
+ else if (prefix_uli(txt_data, end))
1364
+ beg += parse_list(ob, rndr, txt_data, end, 0, depth + 1);
1365
+ else if (prefix_oli(txt_data, end))
1366
+ beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED, depth + 1);
1367
+ else
1368
+ beg += parse_paragraph(ob, rndr, txt_data, end);
1369
+ }
1370
+ }
1371
+
1372
+
1373
+
1374
+ /*********************
1375
+ * REFERENCE PARSING *
1376
+ *********************/
1377
+
1378
+ /* is_ref • returns whether a line is a reference or not */
1379
+ static int
1380
+ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs) {
1381
+ /* int n; */
1382
+ size_t i = 0;
1383
+ size_t id_offset, id_end;
1384
+ size_t link_offset, link_end;
1385
+ size_t title_offset, title_end;
1386
+ size_t line_end;
1387
+ struct link_ref *lr;
1388
+ /* struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */
1389
+
1390
+ /* up to 3 optional leading spaces */
1391
+ if (beg + 3 >= end) return 0;
1392
+ if (data[beg] == ' ') { i = 1;
1393
+ if (data[beg + 1] == ' ') { i = 2;
1394
+ if (data[beg + 2] == ' ') { i = 3;
1395
+ if (data[beg + 3] == ' ') return 0; } } }
1396
+ i += beg;
1397
+
1398
+ /* id part: anything but a newline between brackets */
1399
+ if (data[i] != '[') return 0;
1400
+ i += 1;
1401
+ id_offset = i;
1402
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
1403
+ i += 1;
1404
+ if (i >= end || data[i] != ']') return 0;
1405
+ id_end = i;
1406
+
1407
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
1408
+ i += 1;
1409
+ if (i >= end || data[i] != ':') return 0;
1410
+ i += 1;
1411
+ while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1412
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
1413
+ i += 1;
1414
+ if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; }
1415
+ while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1416
+ if (i >= end) return 0;
1417
+
1418
+ /* link: whitespace-free sequence, optionally between angle brackets */
1419
+ if (data[i] == '<') i += 1;
1420
+ link_offset = i;
1421
+ while (i < end && data[i] != ' ' && data[i] != '\t'
1422
+ && data[i] != '\n' && data[i] != '\r') i += 1;
1423
+ if (data[i - 1] == '>') link_end = i - 1;
1424
+ else link_end = i;
1425
+
1426
+ /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
1427
+ while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1428
+ if (i < end && data[i] != '\n' && data[i] != '\r'
1429
+ && data[i] != '\'' && data[i] != '"' && data[i] != '(')
1430
+ return 0;
1431
+ line_end = 0;
1432
+ /* computing end-of-line */
1433
+ if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
1434
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
1435
+ line_end = i + 1;
1436
+
1437
+ /* optional (space|tab)* spacer after a newline */
1438
+ if (line_end) {
1439
+ i = line_end + 1;
1440
+ while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; }
1441
+
1442
+ /* optional title: any non-newline sequence enclosed in '"()
1443
+ alone on its line */
1444
+ title_offset = title_end = 0;
1445
+ if (i + 1 < end
1446
+ && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
1447
+ i += 1;
1448
+ title_offset = i;
1449
+ /* looking for EOL */
1450
+ while (i < end && data[i] != '\n' && data[i] != '\r') i += 1;
1451
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
1452
+ title_end = i + 1;
1453
+ else title_end = i;
1454
+ /* stepping back */
1455
+ i -= 1;
1456
+ while (i > title_offset && (data[i] == ' ' || data[i] == '\t'))
1457
+ i -= 1;
1458
+ if (i > title_offset
1459
+ && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
1460
+ line_end = title_end;
1461
+ title_end = i; } }
1462
+ if (!line_end) return 0; /* garbage after the link */
1463
+
1464
+ /* a valid ref has been found, filling-in return structures */
1465
+ if (last) *last = line_end;
1466
+ if (!refs) return 1;
1467
+ lr = arr_item(refs, arr_newitem(refs));
1468
+ lr->id = bufnew(id_end - id_offset);
1469
+ bufput(lr->id, data + id_offset, id_end - id_offset);
1470
+ lr->link = bufnew(link_end - link_offset);
1471
+ bufput(lr->link, data + link_offset, link_end - link_offset);
1472
+ if (title_end > title_offset) {
1473
+ lr->title = bufnew(title_end - title_offset);
1474
+ bufput(lr->title, data + title_offset,
1475
+ title_end - title_offset); }
1476
+ else lr->title = 0;
1477
+ return 1; }
1478
+
1479
+
1480
+
1481
+ /**********************
1482
+ * EXPORTED FUNCTIONS *
1483
+ **********************/
1484
+ static void expand_tabs(struct buf *ob, const char *line, size_t size)
1485
+ {
1486
+ size_t i = 0, tab = 0;
1487
+
1488
+ while (i < size) {
1489
+ size_t org = i;
1490
+
1491
+ while (i < size && line[i] != '\t') {
1492
+ i++; tab++;
1493
+ }
1494
+
1495
+ if (i > org)
1496
+ bufput(ob, line + org, i - org);
1497
+
1498
+ if (i >= size)
1499
+ break;
1500
+
1501
+ bufputc(ob, ' '); tab++;
1502
+
1503
+ while ((tab % 4) != 0) {
1504
+ bufputc(ob, ' '); tab++;
1505
+ }
1506
+
1507
+ i++;
1508
+ }
1509
+ }
1510
+
1511
+ /* markdown • parses the input buffer and renders it into the output buffer */
1512
+ void
1513
+ markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer) {
1514
+ struct link_ref *lr;
1515
+ struct buf *text = bufnew(TEXT_UNIT);
1516
+ size_t i, beg, end;
1517
+ struct render rndr;
1518
+
1519
+ /* filling the render structure */
1520
+ if (!rndrer) return;
1521
+ rndr.make = *rndrer;
1522
+ arr_init(&rndr.refs, sizeof (struct link_ref));
1523
+ parr_init(&rndr.work);
1524
+ for (i = 0; i < 256; i += 1) rndr.active_char[i] = 0;
1525
+ if ((rndr.make.emphasis || rndr.make.double_emphasis
1526
+ || rndr.make.triple_emphasis)
1527
+ && rndr.make.emph_chars)
1528
+ for (i = 0; rndr.make.emph_chars[i]; i += 1)
1529
+ rndr.active_char[(unsigned char)rndr.make.emph_chars[i]]
1530
+ = char_emphasis;
1531
+ if (rndr.make.codespan) rndr.active_char['`'] = char_codespan;
1532
+ if (rndr.make.linebreak) rndr.active_char['\n'] = char_linebreak;
1533
+ if (rndr.make.image || rndr.make.link)
1534
+ rndr.active_char['['] = char_link;
1535
+ rndr.active_char['<'] = char_langle_tag;
1536
+ rndr.active_char['\\'] = char_escape;
1537
+ rndr.active_char['&'] = char_entity;
1538
+
1539
+ /* first pass: looking for references, copying everything else */
1540
+ beg = 0;
1541
+ while (beg < ib->size) /* iterating over lines */
1542
+ if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs))
1543
+ beg = end;
1544
+ else { /* skipping to the next line */
1545
+ end = beg;
1546
+ while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r')
1547
+ end += 1;
1548
+
1549
+ /* adding the line body if present */
1550
+ if (end > beg)
1551
+ expand_tabs(text, ib->data + beg, end - beg);
1552
+
1553
+ while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) {
1554
+ /* add one \n per newline */
1555
+ if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n'))
1556
+ bufputc(text, '\n');
1557
+ end += 1;
1558
+ }
1559
+
1560
+ beg = end;
1561
+ }
1562
+
1563
+ /* sorting the reference array */
1564
+ if (rndr.refs.size)
1565
+ qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit,
1566
+ cmp_link_ref_sort);
1567
+
1568
+ /* adding a final newline if not already present */
1569
+ if (!text->size) return;
1570
+ if (text->data[text->size - 1] != '\n'
1571
+ && text->data[text->size - 1] != '\r')
1572
+ bufputc(text, '\n');
1573
+
1574
+ /* second pass: actual rendering */
1575
+ parse_block(ob, &rndr, text->data, text->size, 0 /* initial depth */);
1576
+
1577
+ /* clean-up */
1578
+ bufrelease(text);
1579
+ lr = rndr.refs.base;
1580
+ for (i = 0; i < (size_t)rndr.refs.size; i += 1) {
1581
+ bufrelease(lr[i].id);
1582
+ bufrelease(lr[i].link);
1583
+ bufrelease(lr[i].title); }
1584
+ arr_free(&rndr.refs);
1585
+ assert(rndr.work.size == 0);
1586
+ for (i = 0; i < (size_t)rndr.work.asize; i += 1)
1587
+ bufrelease(rndr.work.item[i]);
1588
+ parr_free(&rndr.work); }
1589
+
1590
+ /* vim: set filetype=c: */