vkhater-redcarpet 2.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2556 @@
1
+ /* markdown.c - generic markdown parser */
2
+
3
+ /*
4
+ * Copyright (c) 2009, Natacha Porté
5
+ * Copyright (c) 2011, Vicent Marti
6
+ *
7
+ * Permission to use, copy, modify, and distribute this software for any
8
+ * purpose with or without fee is hereby granted, provided that the above
9
+ * copyright notice and this permission notice appear in all copies.
10
+ *
11
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+ */
19
+
20
+ #include "markdown.h"
21
+ #include "stack.h"
22
+
23
+ #include <assert.h>
24
+ #include <string.h>
25
+ #include <ctype.h>
26
+ #include <stdio.h>
27
+
28
+ #if defined(_WIN32)
29
+ #define strncasecmp _strnicmp
30
+ #endif
31
+
32
+ #define REF_TABLE_SIZE 8
33
+
34
+ #define BUFFER_BLOCK 0
35
+ #define BUFFER_SPAN 1
36
+
37
+ #define MKD_LI_END 8 /* internal list flag */
38
+
39
+ #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
40
+ #define GPERF_DOWNCASE 1
41
+ #define GPERF_CASE_STRNCMP 1
42
+ #include "html_blocks.h"
43
+
44
+ /***************
45
+ * LOCAL TYPES *
46
+ ***************/
47
+
48
+ /* link_ref: reference to a link */
49
+ struct link_ref {
50
+ unsigned int id;
51
+
52
+ struct buf *link;
53
+ struct buf *title;
54
+
55
+ struct link_ref *next;
56
+ };
57
+
58
+ /* char_trigger: function pointer to render active chars */
59
+ /* returns the number of chars taken care of */
60
+ /* data is the pointer of the beginning of the span */
61
+ /* offset is the number of valid chars before data */
62
+ struct sd_markdown;
63
+ typedef size_t
64
+ (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
65
+
66
+ static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
67
+ static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
68
+ static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
69
+ static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
70
+ static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
71
+ static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
72
+ static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
73
+ static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
74
+ static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
75
+ static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
76
+ static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
77
+
78
+ enum markdown_char_t {
79
+ MD_CHAR_NONE = 0,
80
+ MD_CHAR_EMPHASIS,
81
+ MD_CHAR_CODESPAN,
82
+ MD_CHAR_LINEBREAK,
83
+ MD_CHAR_LINK,
84
+ MD_CHAR_LANGLE,
85
+ MD_CHAR_ESCAPE,
86
+ MD_CHAR_ENTITITY,
87
+ MD_CHAR_AUTOLINK_URL,
88
+ MD_CHAR_AUTOLINK_EMAIL,
89
+ MD_CHAR_AUTOLINK_WWW,
90
+ MD_CHAR_SUPERSCRIPT,
91
+ };
92
+
93
+ static char_trigger markdown_char_ptrs[] = {
94
+ NULL,
95
+ &char_emphasis,
96
+ &char_codespan,
97
+ &char_linebreak,
98
+ &char_link,
99
+ &char_langle_tag,
100
+ &char_escape,
101
+ &char_entity,
102
+ &char_autolink_url,
103
+ &char_autolink_email,
104
+ &char_autolink_www,
105
+ &char_superscript,
106
+ };
107
+
108
+ /* render • structure containing one particular render */
109
+ struct sd_markdown {
110
+ struct sd_callbacks cb;
111
+ void *opaque;
112
+
113
+ struct link_ref *refs[REF_TABLE_SIZE];
114
+ uint8_t active_char[256];
115
+ struct stack work_bufs[2];
116
+ unsigned int ext_flags;
117
+ size_t max_nesting;
118
+ int in_link_body;
119
+ };
120
+
121
+ /***************************
122
+ * HELPER FUNCTIONS *
123
+ ***************************/
124
+
125
+ static inline struct buf *
126
+ rndr_newbuf(struct sd_markdown *rndr, int type)
127
+ {
128
+ static const size_t buf_size[2] = {256, 64};
129
+ struct buf *work = NULL;
130
+ struct stack *pool = &rndr->work_bufs[type];
131
+
132
+ if (pool->size < pool->asize &&
133
+ pool->item[pool->size] != NULL) {
134
+ work = pool->item[pool->size++];
135
+ work->size = 0;
136
+ } else {
137
+ work = bufnew(buf_size[type]);
138
+ stack_push(pool, work);
139
+ }
140
+
141
+ return work;
142
+ }
143
+
144
+ static inline void
145
+ rndr_popbuf(struct sd_markdown *rndr, int type)
146
+ {
147
+ rndr->work_bufs[type].size--;
148
+ }
149
+
150
+ static void
151
+ unscape_text(struct buf *ob, struct buf *src)
152
+ {
153
+ size_t i = 0, org;
154
+ while (i < src->size) {
155
+ org = i;
156
+ while (i < src->size && src->data[i] != '\\')
157
+ i++;
158
+
159
+ if (i > org)
160
+ bufput(ob, src->data + org, i - org);
161
+
162
+ if (i + 1 >= src->size)
163
+ break;
164
+
165
+ bufputc(ob, src->data[i + 1]);
166
+ i += 2;
167
+ }
168
+ }
169
+
170
+ static unsigned int
171
+ hash_link_ref(const uint8_t *link_ref, size_t length)
172
+ {
173
+ size_t i;
174
+ unsigned int hash = 0;
175
+
176
+ for (i = 0; i < length; ++i)
177
+ hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
178
+
179
+ return hash;
180
+ }
181
+
182
+ static struct link_ref *
183
+ add_link_ref(
184
+ struct link_ref **references,
185
+ const uint8_t *name, size_t name_size)
186
+ {
187
+ struct link_ref *ref = calloc(1, sizeof(struct link_ref));
188
+
189
+ if (!ref)
190
+ return NULL;
191
+
192
+ ref->id = hash_link_ref(name, name_size);
193
+ ref->next = references[ref->id % REF_TABLE_SIZE];
194
+
195
+ references[ref->id % REF_TABLE_SIZE] = ref;
196
+ return ref;
197
+ }
198
+
199
+ static struct link_ref *
200
+ find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
201
+ {
202
+ unsigned int hash = hash_link_ref(name, length);
203
+ struct link_ref *ref = NULL;
204
+
205
+ ref = references[hash % REF_TABLE_SIZE];
206
+
207
+ while (ref != NULL) {
208
+ if (ref->id == hash)
209
+ return ref;
210
+
211
+ ref = ref->next;
212
+ }
213
+
214
+ return NULL;
215
+ }
216
+
217
+ static void
218
+ free_link_refs(struct link_ref **references)
219
+ {
220
+ size_t i;
221
+
222
+ for (i = 0; i < REF_TABLE_SIZE; ++i) {
223
+ struct link_ref *r = references[i];
224
+ struct link_ref *next;
225
+
226
+ while (r) {
227
+ next = r->next;
228
+ bufrelease(r->link);
229
+ bufrelease(r->title);
230
+ free(r);
231
+ r = next;
232
+ }
233
+ }
234
+ }
235
+
236
+ /*
237
+ * Check whether a char is a Markdown space.
238
+
239
+ * Right now we only consider spaces the actual
240
+ * space and a newline: tabs and carriage returns
241
+ * are filtered out during the preprocessing phase.
242
+ *
243
+ * If we wanted to actually be UTF-8 compliant, we
244
+ * should instead extract an Unicode codepoint from
245
+ * this character and check for space properties.
246
+ */
247
+ static inline int
248
+ _isspace(int c)
249
+ {
250
+ return c == ' ' || c == '\n';
251
+ }
252
+
253
+ /****************************
254
+ * INLINE PARSING FUNCTIONS *
255
+ ****************************/
256
+
257
+ /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
258
+ /* this is less strict than the original markdown e-mail address matching */
259
+ static size_t
260
+ is_mail_autolink(uint8_t *data, size_t size)
261
+ {
262
+ size_t i = 0, nb = 0;
263
+
264
+ /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
265
+ for (i = 0; i < size; ++i) {
266
+ if (isalnum(data[i]))
267
+ continue;
268
+
269
+ switch (data[i]) {
270
+ case '@':
271
+ nb++;
272
+
273
+ case '-':
274
+ case '.':
275
+ case '_':
276
+ break;
277
+
278
+ case '>':
279
+ return (nb == 1) ? i + 1 : 0;
280
+
281
+ default:
282
+ return 0;
283
+ }
284
+ }
285
+
286
+ return 0;
287
+ }
288
+
289
+ /* tag_length • returns the length of the given tag, or 0 is it's not valid */
290
+ static size_t
291
+ tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
292
+ {
293
+ size_t i, j;
294
+
295
+ /* a valid tag can't be shorter than 3 chars */
296
+ if (size < 3) return 0;
297
+
298
+ /* begins with a '<' optionally followed by '/', followed by letter or number */
299
+ if (data[0] != '<') return 0;
300
+ i = (data[1] == '/') ? 2 : 1;
301
+
302
+ if (!isalnum(data[i]))
303
+ return 0;
304
+
305
+ /* scheme test */
306
+ *autolink = MKDA_NOT_AUTOLINK;
307
+
308
+ /* try to find the beginning of an URI */
309
+ while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
310
+ i++;
311
+
312
+ if (i > 1 && data[i] == '@') {
313
+ if ((j = is_mail_autolink(data + i, size - i)) != 0) {
314
+ *autolink = MKDA_EMAIL;
315
+ return i + j;
316
+ }
317
+ }
318
+
319
+ if (i > 2 && data[i] == ':') {
320
+ *autolink = MKDA_NORMAL;
321
+ i++;
322
+ }
323
+
324
+ /* completing autolink test: no whitespace or ' or " */
325
+ if (i >= size)
326
+ *autolink = MKDA_NOT_AUTOLINK;
327
+
328
+ else if (*autolink) {
329
+ j = i;
330
+
331
+ while (i < size) {
332
+ if (data[i] == '\\') i += 2;
333
+ else if (data[i] == '>' || data[i] == '\'' ||
334
+ data[i] == '"' || data[i] == ' ' || data[i] == '\n')
335
+ break;
336
+ else i++;
337
+ }
338
+
339
+ if (i >= size) return 0;
340
+ if (i > j && data[i] == '>') return i + 1;
341
+ /* one of the forbidden chars has been found */
342
+ *autolink = MKDA_NOT_AUTOLINK;
343
+ }
344
+
345
+ /* looking for sometinhg looking like a tag end */
346
+ while (i < size && data[i] != '>') i++;
347
+ if (i >= size) return 0;
348
+ return i + 1;
349
+ }
350
+
351
+ /* parse_inline • parses inline markdown elements */
352
+ static void
353
+ parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
354
+ {
355
+ size_t i = 0, end = 0;
356
+ uint8_t action = 0;
357
+ struct buf work = { 0, 0, 0, 0 };
358
+
359
+ if (rndr->work_bufs[BUFFER_SPAN].size +
360
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
361
+ return;
362
+
363
+ while (i < size) {
364
+ /* copying inactive chars into the output */
365
+ while (end < size && (action = rndr->active_char[data[end]]) == 0) {
366
+ end++;
367
+ }
368
+
369
+ if (rndr->cb.normal_text) {
370
+ work.data = data + i;
371
+ work.size = end - i;
372
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
373
+ }
374
+ else
375
+ bufput(ob, data + i, end - i);
376
+
377
+ if (end >= size) break;
378
+ i = end;
379
+
380
+ end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
381
+ if (!end) /* no action from the callback */
382
+ end = i + 1;
383
+ else {
384
+ i += end;
385
+ end = i;
386
+ }
387
+ }
388
+ }
389
+
390
+ /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
391
+ static size_t
392
+ find_emph_char(uint8_t *data, size_t size, uint8_t c)
393
+ {
394
+ size_t i = 1;
395
+
396
+ while (i < size) {
397
+ while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
398
+ i++;
399
+
400
+ if (i == size)
401
+ return 0;
402
+
403
+ if (data[i] == c)
404
+ return i;
405
+
406
+ /* not counting escaped chars */
407
+ if (i && data[i - 1] == '\\') {
408
+ i++; continue;
409
+ }
410
+
411
+ if (data[i] == '`') {
412
+ size_t span_nb = 0, bt;
413
+ size_t tmp_i = 0;
414
+
415
+ /* counting the number of opening backticks */
416
+ while (i < size && data[i] == '`') {
417
+ i++; span_nb++;
418
+ }
419
+
420
+ if (i >= size) return 0;
421
+
422
+ /* finding the matching closing sequence */
423
+ bt = 0;
424
+ while (i < size && bt < span_nb) {
425
+ if (!tmp_i && data[i] == c) tmp_i = i;
426
+ if (data[i] == '`') bt++;
427
+ else bt = 0;
428
+ i++;
429
+ }
430
+
431
+ if (i >= size) return tmp_i;
432
+ }
433
+ /* skipping a link */
434
+ else if (data[i] == '[') {
435
+ size_t tmp_i = 0;
436
+ uint8_t cc;
437
+
438
+ i++;
439
+ while (i < size && data[i] != ']') {
440
+ if (!tmp_i && data[i] == c) tmp_i = i;
441
+ i++;
442
+ }
443
+
444
+ i++;
445
+ while (i < size && (data[i] == ' ' || data[i] == '\n'))
446
+ i++;
447
+
448
+ if (i >= size)
449
+ return tmp_i;
450
+
451
+ switch (data[i]) {
452
+ case '[':
453
+ cc = ']'; break;
454
+
455
+ case '(':
456
+ cc = ')'; break;
457
+
458
+ default:
459
+ if (tmp_i)
460
+ return tmp_i;
461
+ else
462
+ continue;
463
+ }
464
+
465
+ i++;
466
+ while (i < size && data[i] != cc) {
467
+ if (!tmp_i && data[i] == c) tmp_i = i;
468
+ i++;
469
+ }
470
+
471
+ if (i >= size)
472
+ return tmp_i;
473
+
474
+ i++;
475
+ }
476
+ }
477
+
478
+ return 0;
479
+ }
480
+
481
+ /* parse_emph1 • parsing single emphase */
482
+ /* closed by a symbol not preceded by whitespace and not followed by symbol */
483
+ static size_t
484
+ parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
485
+ {
486
+ size_t i = 0, len;
487
+ struct buf *work = 0;
488
+ int r;
489
+
490
+ if (!rndr->cb.emphasis) return 0;
491
+
492
+ /* skipping one symbol if coming from emph3 */
493
+ if (size > 1 && data[0] == c && data[1] == c) i = 1;
494
+
495
+ while (i < size) {
496
+ len = find_emph_char(data + i, size - i, c);
497
+ if (!len) return 0;
498
+ i += len;
499
+ if (i >= size) return 0;
500
+
501
+ if (data[i] == c && !_isspace(data[i - 1])) {
502
+
503
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
504
+ if (i + i < size && isalnum(data[i + 1]))
505
+ continue;
506
+ }
507
+
508
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
509
+ parse_inline(work, rndr, data, i);
510
+ r = rndr->cb.emphasis(ob, work, rndr->opaque);
511
+ rndr_popbuf(rndr, BUFFER_SPAN);
512
+ return r ? i + 1 : 0;
513
+ }
514
+ }
515
+
516
+ return 0;
517
+ }
518
+
519
+ /* parse_emph2 • parsing single emphase */
520
+ static size_t
521
+ parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
522
+ {
523
+ int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
524
+ size_t i = 0, len;
525
+ struct buf *work = 0;
526
+ int r;
527
+
528
+ render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
529
+
530
+ if (!render_method)
531
+ return 0;
532
+
533
+ while (i < size) {
534
+ len = find_emph_char(data + i, size - i, c);
535
+ if (!len) return 0;
536
+ i += len;
537
+
538
+ if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
539
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
540
+ parse_inline(work, rndr, data, i);
541
+ r = render_method(ob, work, rndr->opaque);
542
+ rndr_popbuf(rndr, BUFFER_SPAN);
543
+ return r ? i + 2 : 0;
544
+ }
545
+ i++;
546
+ }
547
+ return 0;
548
+ }
549
+
550
+ /* parse_emph3 • parsing single emphase */
551
+ /* finds the first closing tag, and delegates to the other emph */
552
+ static size_t
553
+ parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
554
+ {
555
+ size_t i = 0, len;
556
+ int r;
557
+
558
+ while (i < size) {
559
+ len = find_emph_char(data + i, size - i, c);
560
+ if (!len) return 0;
561
+ i += len;
562
+
563
+ /* skip whitespace preceded symbols */
564
+ if (data[i] != c || _isspace(data[i - 1]))
565
+ continue;
566
+
567
+ if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
568
+ /* triple symbol found */
569
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
570
+
571
+ parse_inline(work, rndr, data, i);
572
+ r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
573
+ rndr_popbuf(rndr, BUFFER_SPAN);
574
+ return r ? i + 3 : 0;
575
+
576
+ } else if (i + 1 < size && data[i + 1] == c) {
577
+ /* double symbol found, handing over to emph1 */
578
+ len = parse_emph1(ob, rndr, data - 2, size + 2, c);
579
+ if (!len) return 0;
580
+ else return len - 2;
581
+
582
+ } else {
583
+ /* single symbol found, handing over to emph2 */
584
+ len = parse_emph2(ob, rndr, data - 1, size + 1, c);
585
+ if (!len) return 0;
586
+ else return len - 1;
587
+ }
588
+ }
589
+ return 0;
590
+ }
591
+
592
+ /* char_emphasis • single and double emphasis parsing */
593
+ static size_t
594
+ char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
595
+ {
596
+ uint8_t c = data[0];
597
+ size_t ret;
598
+
599
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
600
+ if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>')
601
+ return 0;
602
+ }
603
+
604
+ if (size > 2 && data[1] != c) {
605
+ /* whitespace cannot follow an opening emphasis;
606
+ * strikethrough only takes two characters '~~' */
607
+ if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
608
+ return 0;
609
+
610
+ return ret + 1;
611
+ }
612
+
613
+ if (size > 3 && data[1] == c && data[2] != c) {
614
+ if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
615
+ return 0;
616
+
617
+ return ret + 2;
618
+ }
619
+
620
+ if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
621
+ if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
622
+ return 0;
623
+
624
+ return ret + 3;
625
+ }
626
+
627
+ return 0;
628
+ }
629
+
630
+
631
+ /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
632
+ static size_t
633
+ char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
634
+ {
635
+ if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
636
+ return 0;
637
+
638
+ /* removing the last space from ob and rendering */
639
+ while (ob->size && ob->data[ob->size - 1] == ' ')
640
+ ob->size--;
641
+
642
+ return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
643
+ }
644
+
645
+
646
+ /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
647
+ static size_t
648
+ char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
649
+ {
650
+ size_t end, nb = 0, i, f_begin, f_end;
651
+
652
+ /* counting the number of backticks in the delimiter */
653
+ while (nb < size && data[nb] == '`')
654
+ nb++;
655
+
656
+ /* finding the next delimiter */
657
+ i = 0;
658
+ for (end = nb; end < size && i < nb; end++) {
659
+ if (data[end] == '`') i++;
660
+ else i = 0;
661
+ }
662
+
663
+ if (i < nb && end >= size)
664
+ return 0; /* no matching delimiter */
665
+
666
+ /* trimming outside whitespaces */
667
+ f_begin = nb;
668
+ while (f_begin < end && data[f_begin] == ' ')
669
+ f_begin++;
670
+
671
+ f_end = end - nb;
672
+ while (f_end > nb && data[f_end-1] == ' ')
673
+ f_end--;
674
+
675
+ /* real code span */
676
+ if (f_begin < f_end) {
677
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
678
+ if (!rndr->cb.codespan(ob, &work, rndr->opaque))
679
+ end = 0;
680
+ } else {
681
+ if (!rndr->cb.codespan(ob, 0, rndr->opaque))
682
+ end = 0;
683
+ }
684
+
685
+ return end;
686
+ }
687
+
688
+
689
+ /* char_escape • '\\' backslash escape */
690
+ static size_t
691
+ char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
692
+ {
693
+ static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
694
+ struct buf work = { 0, 0, 0, 0 };
695
+
696
+ if (size > 1) {
697
+ if (strchr(escape_chars, data[1]) == NULL)
698
+ return 0;
699
+
700
+ if (rndr->cb.normal_text) {
701
+ work.data = data + 1;
702
+ work.size = 1;
703
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
704
+ }
705
+ else bufputc(ob, data[1]);
706
+ } else if (size == 1) {
707
+ bufputc(ob, data[0]);
708
+ }
709
+
710
+ return 2;
711
+ }
712
+
713
+ /* char_entity • '&' escaped when it doesn't belong to an entity */
714
+ /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
715
+ static size_t
716
+ char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
717
+ {
718
+ size_t end = 1;
719
+ struct buf work = { 0, 0, 0, 0 };
720
+
721
+ if (end < size && data[end] == '#')
722
+ end++;
723
+
724
+ while (end < size && isalnum(data[end]))
725
+ end++;
726
+
727
+ if (end < size && data[end] == ';')
728
+ end++; /* real entity */
729
+ else
730
+ return 0; /* lone '&' */
731
+
732
+ if (rndr->cb.entity) {
733
+ work.data = data;
734
+ work.size = end;
735
+ rndr->cb.entity(ob, &work, rndr->opaque);
736
+ }
737
+ else bufput(ob, data, end);
738
+
739
+ return end;
740
+ }
741
+
742
+ /* char_langle_tag • '<' when tags or autolinks are allowed */
743
+ static size_t
744
+ char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
745
+ {
746
+ enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
747
+ size_t end = tag_length(data, size, &altype);
748
+ struct buf work = { data, end, 0, 0 };
749
+ int ret = 0;
750
+
751
+ if (end > 2) {
752
+ if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
753
+ struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
754
+ work.data = data + 1;
755
+ work.size = end - 2;
756
+ unscape_text(u_link, &work);
757
+ ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
758
+ rndr_popbuf(rndr, BUFFER_SPAN);
759
+ }
760
+ else if (rndr->cb.raw_html_tag)
761
+ ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
762
+ }
763
+
764
+ if (!ret) return 0;
765
+ else return end;
766
+ }
767
+
768
+ static size_t
769
+ char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
770
+ {
771
+ struct buf *link, *link_url, *link_text;
772
+ size_t link_len, rewind;
773
+
774
+ if (!rndr->cb.link || rndr->in_link_body)
775
+ return 0;
776
+
777
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
778
+
779
+ if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
780
+ link_url = rndr_newbuf(rndr, BUFFER_SPAN);
781
+ BUFPUTSL(link_url, "http://");
782
+ bufput(link_url, link->data, link->size);
783
+
784
+ ob->size -= rewind;
785
+ if (rndr->cb.normal_text) {
786
+ link_text = rndr_newbuf(rndr, BUFFER_SPAN);
787
+ rndr->cb.normal_text(link_text, link, rndr->opaque);
788
+ rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
789
+ rndr_popbuf(rndr, BUFFER_SPAN);
790
+ } else {
791
+ rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
792
+ }
793
+ rndr_popbuf(rndr, BUFFER_SPAN);
794
+ }
795
+
796
+ rndr_popbuf(rndr, BUFFER_SPAN);
797
+ return link_len;
798
+ }
799
+
800
+ static size_t
801
+ char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
802
+ {
803
+ struct buf *link;
804
+ size_t link_len, rewind;
805
+
806
+ if (!rndr->cb.autolink || rndr->in_link_body)
807
+ return 0;
808
+
809
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
810
+
811
+ if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
812
+ ob->size -= rewind;
813
+ rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
814
+ }
815
+
816
+ rndr_popbuf(rndr, BUFFER_SPAN);
817
+ return link_len;
818
+ }
819
+
820
+ static size_t
821
+ char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
822
+ {
823
+ struct buf *link;
824
+ size_t link_len, rewind;
825
+
826
+ if (!rndr->cb.autolink || rndr->in_link_body)
827
+ return 0;
828
+
829
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
830
+
831
+ if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
832
+ ob->size -= rewind;
833
+ rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
834
+ }
835
+
836
+ rndr_popbuf(rndr, BUFFER_SPAN);
837
+ return link_len;
838
+ }
839
+
840
+ /* char_link • '[': parsing a link or an image */
841
+ static size_t
842
+ char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
843
+ {
844
+ int is_img = (offset && data[-1] == '!'), level;
845
+ size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
846
+ struct buf *content = 0;
847
+ struct buf *link = 0;
848
+ struct buf *title = 0;
849
+ struct buf *u_link = 0;
850
+ size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
851
+ int text_has_nl = 0, ret = 0;
852
+ int in_title = 0, qtype = 0;
853
+
854
+ /* checking whether the correct renderer exists */
855
+ if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
856
+ goto cleanup;
857
+
858
+ /* looking for the matching closing bracket */
859
+ for (level = 1; i < size; i++) {
860
+ if (data[i] == '\n')
861
+ text_has_nl = 1;
862
+
863
+ else if (data[i - 1] == '\\')
864
+ continue;
865
+
866
+ else if (data[i] == '[')
867
+ level++;
868
+
869
+ else if (data[i] == ']') {
870
+ level--;
871
+ if (level <= 0)
872
+ break;
873
+ }
874
+ }
875
+
876
+ if (i >= size)
877
+ goto cleanup;
878
+
879
+ txt_e = i;
880
+ i++;
881
+
882
+ /* skip any amount of whitespace or newline */
883
+ /* (this is much more laxist than original markdown syntax) */
884
+ while (i < size && _isspace(data[i]))
885
+ i++;
886
+
887
+ /* inline style link */
888
+ if (i < size && data[i] == '(') {
889
+ /* skipping initial whitespace */
890
+ i++;
891
+
892
+ while (i < size && _isspace(data[i]))
893
+ i++;
894
+
895
+ link_b = i;
896
+
897
+ /* looking for link end: ' " ) */
898
+ while (i < size) {
899
+ if (data[i] == '\\') i += 2;
900
+ else if (data[i] == ')') break;
901
+ else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
902
+ else i++;
903
+ }
904
+
905
+ if (i >= size) goto cleanup;
906
+ link_e = i;
907
+
908
+ /* looking for title end if present */
909
+ if (data[i] == '\'' || data[i] == '"') {
910
+ qtype = data[i];
911
+ in_title = 1;
912
+ i++;
913
+ title_b = i;
914
+
915
+ while (i < size) {
916
+ if (data[i] == '\\') i += 2;
917
+ else if (data[i] == qtype) {in_title = 0; i++;}
918
+ else if ((data[i] == ')') && !in_title) break;
919
+ else i++;
920
+ }
921
+
922
+ if (i >= size) goto cleanup;
923
+
924
+ /* skipping whitespaces after title */
925
+ title_e = i - 1;
926
+ while (title_e > title_b && _isspace(data[title_e]))
927
+ title_e--;
928
+
929
+ /* checking for closing quote presence */
930
+ if (data[title_e] != '\'' && data[title_e] != '"') {
931
+ title_b = title_e = 0;
932
+ link_e = i;
933
+ }
934
+ }
935
+
936
+ /* remove whitespace at the end of the link */
937
+ while (link_e > link_b && _isspace(data[link_e - 1]))
938
+ link_e--;
939
+
940
+ /* remove optional angle brackets around the link */
941
+ if (data[link_b] == '<') link_b++;
942
+ if (data[link_e - 1] == '>') link_e--;
943
+
944
+ /* building escaped link and title */
945
+ if (link_e > link_b) {
946
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
947
+ bufput(link, data + link_b, link_e - link_b);
948
+ }
949
+
950
+ if (title_e > title_b) {
951
+ title = rndr_newbuf(rndr, BUFFER_SPAN);
952
+ bufput(title, data + title_b, title_e - title_b);
953
+ }
954
+
955
+ i++;
956
+ }
957
+
958
+ /* reference style link */
959
+ else if (i < size && data[i] == '[') {
960
+ struct buf id = { 0, 0, 0, 0 };
961
+ struct link_ref *lr;
962
+
963
+ /* looking for the id */
964
+ i++;
965
+ link_b = i;
966
+ while (i < size && data[i] != ']') i++;
967
+ if (i >= size) goto cleanup;
968
+ link_e = i;
969
+
970
+ /* finding the link_ref */
971
+ if (link_b == link_e) {
972
+ if (text_has_nl) {
973
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
974
+ size_t j;
975
+
976
+ for (j = 1; j < txt_e; j++) {
977
+ if (data[j] != '\n')
978
+ bufputc(b, data[j]);
979
+ else if (data[j - 1] != ' ')
980
+ bufputc(b, ' ');
981
+ }
982
+
983
+ id.data = b->data;
984
+ id.size = b->size;
985
+ } else {
986
+ id.data = data + 1;
987
+ id.size = txt_e - 1;
988
+ }
989
+ } else {
990
+ id.data = data + link_b;
991
+ id.size = link_e - link_b;
992
+ }
993
+
994
+ lr = find_link_ref(rndr->refs, id.data, id.size);
995
+ if (!lr)
996
+ goto cleanup;
997
+
998
+ /* keeping link and title from link_ref */
999
+ link = lr->link;
1000
+ title = lr->title;
1001
+ i++;
1002
+ }
1003
+
1004
+ /* shortcut reference style link */
1005
+ else {
1006
+ struct buf id = { 0, 0, 0, 0 };
1007
+ struct link_ref *lr;
1008
+
1009
+ /* crafting the id */
1010
+ if (text_has_nl) {
1011
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1012
+ size_t j;
1013
+
1014
+ for (j = 1; j < txt_e; j++) {
1015
+ if (data[j] != '\n')
1016
+ bufputc(b, data[j]);
1017
+ else if (data[j - 1] != ' ')
1018
+ bufputc(b, ' ');
1019
+ }
1020
+
1021
+ id.data = b->data;
1022
+ id.size = b->size;
1023
+ } else {
1024
+ id.data = data + 1;
1025
+ id.size = txt_e - 1;
1026
+ }
1027
+
1028
+ /* finding the link_ref */
1029
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1030
+ if (!lr)
1031
+ goto cleanup;
1032
+
1033
+ /* keeping link and title from link_ref */
1034
+ link = lr->link;
1035
+ title = lr->title;
1036
+
1037
+ /* rewinding the whitespace */
1038
+ i = txt_e + 1;
1039
+ }
1040
+
1041
+ /* building content: img alt is escaped, link content is parsed */
1042
+ if (txt_e > 1) {
1043
+ content = rndr_newbuf(rndr, BUFFER_SPAN);
1044
+ if (is_img) {
1045
+ bufput(content, data + 1, txt_e - 1);
1046
+ } else {
1047
+ /* disable autolinking when parsing inline the
1048
+ * content of a link */
1049
+ rndr->in_link_body = 1;
1050
+ parse_inline(content, rndr, data + 1, txt_e - 1);
1051
+ rndr->in_link_body = 0;
1052
+ }
1053
+ }
1054
+
1055
+ if (link) {
1056
+ u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1057
+ unscape_text(u_link, link);
1058
+ }
1059
+
1060
+ /* calling the relevant rendering function */
1061
+ if (is_img) {
1062
+ if (ob->size && ob->data[ob->size - 1] == '!')
1063
+ ob->size -= 1;
1064
+
1065
+ ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1066
+ } else {
1067
+ ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1068
+ }
1069
+
1070
+ /* cleanup */
1071
+ cleanup:
1072
+ rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1073
+ return ret ? i : 0;
1074
+ }
1075
+
1076
+ static size_t
1077
+ char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1078
+ {
1079
+ size_t sup_start, sup_len;
1080
+ struct buf *sup;
1081
+
1082
+ if (!rndr->cb.superscript)
1083
+ return 0;
1084
+
1085
+ if (size < 2)
1086
+ return 0;
1087
+
1088
+ if (data[1] == '(') {
1089
+ sup_start = sup_len = 2;
1090
+
1091
+ while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1092
+ sup_len++;
1093
+
1094
+ if (sup_len == size)
1095
+ return 0;
1096
+ } else {
1097
+ sup_start = sup_len = 1;
1098
+
1099
+ while (sup_len < size && !_isspace(data[sup_len]))
1100
+ sup_len++;
1101
+ }
1102
+
1103
+ if (sup_len - sup_start == 0)
1104
+ return (sup_start == 2) ? 3 : 0;
1105
+
1106
+ sup = rndr_newbuf(rndr, BUFFER_SPAN);
1107
+ parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1108
+ rndr->cb.superscript(ob, sup, rndr->opaque);
1109
+ rndr_popbuf(rndr, BUFFER_SPAN);
1110
+
1111
+ return (sup_start == 2) ? sup_len + 1 : sup_len;
1112
+ }
1113
+
1114
+ /*********************************
1115
+ * BLOCK-LEVEL PARSING FUNCTIONS *
1116
+ *********************************/
1117
+
1118
+ /* is_empty • returns the line length when it is empty, 0 otherwise */
1119
+ static size_t
1120
+ is_empty(uint8_t *data, size_t size)
1121
+ {
1122
+ size_t i;
1123
+
1124
+ for (i = 0; i < size && data[i] != '\n'; i++)
1125
+ if (data[i] != ' ')
1126
+ return 0;
1127
+
1128
+ return i + 1;
1129
+ }
1130
+
1131
+ /* is_hrule • returns whether a line is a horizontal rule */
1132
+ static int
1133
+ is_hrule(uint8_t *data, size_t size)
1134
+ {
1135
+ size_t i = 0, n = 0;
1136
+ uint8_t c;
1137
+
1138
+ /* skipping initial spaces */
1139
+ if (size < 3) return 0;
1140
+ if (data[0] == ' ') { i++;
1141
+ if (data[1] == ' ') { i++;
1142
+ if (data[2] == ' ') { i++; } } }
1143
+
1144
+ /* looking at the hrule uint8_t */
1145
+ if (i + 2 >= size
1146
+ || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1147
+ return 0;
1148
+ c = data[i];
1149
+
1150
+ /* the whole line must be the char or whitespace */
1151
+ while (i < size && data[i] != '\n') {
1152
+ if (data[i] == c) n++;
1153
+ else if (data[i] != ' ')
1154
+ return 0;
1155
+
1156
+ i++;
1157
+ }
1158
+
1159
+ return n >= 3;
1160
+ }
1161
+
1162
+ /* check if a line begins with a code fence; return the
1163
+ * width of the code fence */
1164
+ static size_t
1165
+ prefix_codefence(uint8_t *data, size_t size)
1166
+ {
1167
+ size_t i = 0, n = 0;
1168
+ uint8_t c;
1169
+
1170
+ /* skipping initial spaces */
1171
+ if (size < 3) return 0;
1172
+ if (data[0] == ' ') { i++;
1173
+ if (data[1] == ' ') { i++;
1174
+ if (data[2] == ' ') { i++; } } }
1175
+
1176
+ /* looking at the hrule uint8_t */
1177
+ if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1178
+ return 0;
1179
+
1180
+ c = data[i];
1181
+
1182
+ /* the whole line must be the uint8_t or whitespace */
1183
+ while (i < size && data[i] == c) {
1184
+ n++; i++;
1185
+ }
1186
+
1187
+ if (n < 3)
1188
+ return 0;
1189
+
1190
+ return i;
1191
+ }
1192
+
1193
+ /* check if a line is a code fence; return its size if it is */
1194
+ static size_t
1195
+ is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1196
+ {
1197
+ size_t i = 0, syn_len = 0;
1198
+ uint8_t *syn_start;
1199
+
1200
+ i = prefix_codefence(data, size);
1201
+ if (i == 0)
1202
+ return 0;
1203
+
1204
+ while (i < size && data[i] == ' ')
1205
+ i++;
1206
+
1207
+ syn_start = data + i;
1208
+
1209
+ if (i < size && data[i] == '{') {
1210
+ i++; syn_start++;
1211
+
1212
+ while (i < size && data[i] != '}' && data[i] != '\n') {
1213
+ syn_len++; i++;
1214
+ }
1215
+
1216
+ if (i == size || data[i] != '}')
1217
+ return 0;
1218
+
1219
+ /* strip all whitespace at the beginning and the end
1220
+ * of the {} block */
1221
+ while (syn_len > 0 && _isspace(syn_start[0])) {
1222
+ syn_start++; syn_len--;
1223
+ }
1224
+
1225
+ while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1226
+ syn_len--;
1227
+
1228
+ i++;
1229
+ } else {
1230
+ while (i < size && !_isspace(data[i])) {
1231
+ syn_len++; i++;
1232
+ }
1233
+ }
1234
+
1235
+ if (syntax) {
1236
+ syntax->data = syn_start;
1237
+ syntax->size = syn_len;
1238
+ }
1239
+
1240
+ while (i < size && data[i] != '\n') {
1241
+ if (!_isspace(data[i]))
1242
+ return 0;
1243
+
1244
+ i++;
1245
+ }
1246
+
1247
+ return i + 1;
1248
+ }
1249
+
1250
+ /* is_atxheader • returns whether the line is a hash-prefixed header */
1251
+ static int
1252
+ is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1253
+ {
1254
+ if (data[0] != '#')
1255
+ return 0;
1256
+
1257
+ if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1258
+ size_t level = 0;
1259
+
1260
+ while (level < size && level < 6 && data[level] == '#')
1261
+ level++;
1262
+
1263
+ if (level < size && data[level] != ' ')
1264
+ return 0;
1265
+ }
1266
+
1267
+ return 1;
1268
+ }
1269
+
1270
+ /* is_headerline • returns whether the line is a setext-style hdr underline */
1271
+ static int
1272
+ is_headerline(uint8_t *data, size_t size)
1273
+ {
1274
+ size_t i = 0;
1275
+
1276
+ /* test of level 1 header */
1277
+ if (data[i] == '=') {
1278
+ for (i = 1; i < size && data[i] == '='; i++);
1279
+ while (i < size && data[i] == ' ') i++;
1280
+ return (i >= size || data[i] == '\n') ? 1 : 0; }
1281
+
1282
+ /* test of level 2 header */
1283
+ if (data[i] == '-') {
1284
+ for (i = 1; i < size && data[i] == '-'; i++);
1285
+ while (i < size && data[i] == ' ') i++;
1286
+ return (i >= size || data[i] == '\n') ? 2 : 0; }
1287
+
1288
+ return 0;
1289
+ }
1290
+
1291
+ static int
1292
+ is_next_headerline(uint8_t *data, size_t size)
1293
+ {
1294
+ size_t i = 0;
1295
+
1296
+ while (i < size && data[i] != '\n')
1297
+ i++;
1298
+
1299
+ if (++i >= size)
1300
+ return 0;
1301
+
1302
+ return is_headerline(data + i, size - i);
1303
+ }
1304
+
1305
+ /* prefix_quote • returns blockquote prefix length */
1306
+ static size_t
1307
+ prefix_quote(uint8_t *data, size_t size)
1308
+ {
1309
+ size_t i = 0;
1310
+ if (i < size && data[i] == ' ') i++;
1311
+ if (i < size && data[i] == ' ') i++;
1312
+ if (i < size && data[i] == ' ') i++;
1313
+
1314
+ if (i < size && data[i] == '>') {
1315
+ if (i + 1 < size && data[i + 1] == ' ')
1316
+ return i + 2;
1317
+
1318
+ return i + 1;
1319
+ }
1320
+
1321
+ return 0;
1322
+ }
1323
+
1324
+ /* prefix_code • returns prefix length for block code*/
1325
+ static size_t
1326
+ prefix_code(uint8_t *data, size_t size)
1327
+ {
1328
+ if (size > 3 && data[0] == ' ' && data[1] == ' '
1329
+ && data[2] == ' ' && data[3] == ' ') return 4;
1330
+
1331
+ return 0;
1332
+ }
1333
+
1334
+ /* prefix_oli • returns ordered list item prefix */
1335
+ static size_t
1336
+ prefix_oli(uint8_t *data, size_t size)
1337
+ {
1338
+ size_t i = 0;
1339
+
1340
+ if (i < size && data[i] == ' ') i++;
1341
+ if (i < size && data[i] == ' ') i++;
1342
+ if (i < size && data[i] == ' ') i++;
1343
+
1344
+ if (i >= size || data[i] < '0' || data[i] > '9')
1345
+ return 0;
1346
+
1347
+ while (i < size && data[i] >= '0' && data[i] <= '9')
1348
+ i++;
1349
+
1350
+ if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1351
+ return 0;
1352
+
1353
+ if (is_next_headerline(data + i, size - i))
1354
+ return 0;
1355
+
1356
+ return i + 2;
1357
+ }
1358
+
1359
+ /* prefix_uli • returns ordered list item prefix */
1360
+ static size_t
1361
+ prefix_uli(uint8_t *data, size_t size)
1362
+ {
1363
+ size_t i = 0;
1364
+
1365
+ if (i < size && data[i] == ' ') i++;
1366
+ if (i < size && data[i] == ' ') i++;
1367
+ if (i < size && data[i] == ' ') i++;
1368
+
1369
+ if (i + 1 >= size ||
1370
+ (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1371
+ data[i + 1] != ' ')
1372
+ return 0;
1373
+
1374
+ if (is_next_headerline(data + i, size - i))
1375
+ return 0;
1376
+
1377
+ return i + 2;
1378
+ }
1379
+
1380
+
1381
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
1382
+ static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1383
+ uint8_t *data, size_t size);
1384
+
1385
+
1386
+ /* parse_blockquote • handles parsing of a blockquote fragment */
1387
+ static size_t
1388
+ parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1389
+ {
1390
+ size_t beg, end = 0, pre, work_size = 0;
1391
+ uint8_t *work_data = 0;
1392
+ struct buf *out = 0;
1393
+
1394
+ out = rndr_newbuf(rndr, BUFFER_BLOCK);
1395
+ beg = 0;
1396
+ while (beg < size) {
1397
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1398
+
1399
+ pre = prefix_quote(data + beg, end - beg);
1400
+
1401
+ if (pre)
1402
+ beg += pre; /* skipping prefix */
1403
+
1404
+ /* empty line followed by non-quote line */
1405
+ else if (is_empty(data + beg, end - beg) &&
1406
+ (end >= size || (prefix_quote(data + end, size - end) == 0 &&
1407
+ !is_empty(data + end, size - end))))
1408
+ break;
1409
+
1410
+ if (beg < end) { /* copy into the in-place working buffer */
1411
+ /* bufput(work, data + beg, end - beg); */
1412
+ if (!work_data)
1413
+ work_data = data + beg;
1414
+ else if (data + beg != work_data + work_size)
1415
+ memmove(work_data + work_size, data + beg, end - beg);
1416
+ work_size += end - beg;
1417
+ }
1418
+ beg = end;
1419
+ }
1420
+
1421
+ parse_block(out, rndr, work_data, work_size);
1422
+ if (rndr->cb.blockquote)
1423
+ rndr->cb.blockquote(ob, out, rndr->opaque);
1424
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1425
+ return end;
1426
+ }
1427
+
1428
+ static size_t
1429
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1430
+
1431
+ /* parse_blockquote • handles parsing of a regular paragraph */
1432
+ static size_t
1433
+ parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1434
+ {
1435
+ size_t i = 0, end = 0;
1436
+ int level = 0;
1437
+ struct buf work = { data, 0, 0, 0 };
1438
+
1439
+ while (i < size) {
1440
+ for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1441
+
1442
+ if (is_empty(data + i, size - i))
1443
+ break;
1444
+
1445
+ if ((level = is_headerline(data + i, size - i)) != 0)
1446
+ break;
1447
+
1448
+ if (is_atxheader(rndr, data + i, size - i) ||
1449
+ is_hrule(data + i, size - i) ||
1450
+ prefix_quote(data + i, size - i)) {
1451
+ end = i;
1452
+ break;
1453
+ }
1454
+
1455
+ /*
1456
+ * Early termination of a paragraph with the same logic
1457
+ * as Markdown 1.0.0. If this logic is applied, the
1458
+ * Markdown 1.0.3 test suite won't pass cleanly
1459
+ *
1460
+ * :: If the first character in a new line is not a letter,
1461
+ * let's check to see if there's some kind of block starting
1462
+ * here
1463
+ */
1464
+ if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
1465
+ if (prefix_oli(data + i, size - i) ||
1466
+ prefix_uli(data + i, size - i)) {
1467
+ end = i;
1468
+ break;
1469
+ }
1470
+
1471
+ /* see if an html block starts here */
1472
+ if (data[i] == '<' && rndr->cb.blockhtml &&
1473
+ parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1474
+ end = i;
1475
+ break;
1476
+ }
1477
+
1478
+ /* see if a code fence starts here */
1479
+ if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1480
+ is_codefence(data + i, size - i, NULL) != 0) {
1481
+ end = i;
1482
+ break;
1483
+ }
1484
+ }
1485
+
1486
+ i = end;
1487
+ }
1488
+
1489
+ work.size = i;
1490
+ while (work.size && data[work.size - 1] == '\n')
1491
+ work.size--;
1492
+
1493
+ if (!level) {
1494
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1495
+ parse_inline(tmp, rndr, work.data, work.size);
1496
+ if (rndr->cb.paragraph)
1497
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1498
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1499
+ } else {
1500
+ struct buf *header_work;
1501
+
1502
+ if (work.size) {
1503
+ size_t beg;
1504
+ i = work.size;
1505
+ work.size -= 1;
1506
+
1507
+ while (work.size && data[work.size] != '\n')
1508
+ work.size -= 1;
1509
+
1510
+ beg = work.size + 1;
1511
+ while (work.size && data[work.size - 1] == '\n')
1512
+ work.size -= 1;
1513
+
1514
+ if (work.size > 0) {
1515
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1516
+ parse_inline(tmp, rndr, work.data, work.size);
1517
+
1518
+ if (rndr->cb.paragraph)
1519
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1520
+
1521
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1522
+ work.data += beg;
1523
+ work.size = i - beg;
1524
+ }
1525
+ else work.size = i;
1526
+ }
1527
+
1528
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1529
+ parse_inline(header_work, rndr, work.data, work.size);
1530
+
1531
+ if (rndr->cb.header)
1532
+ rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1533
+
1534
+ rndr_popbuf(rndr, BUFFER_SPAN);
1535
+ }
1536
+
1537
+ return end;
1538
+ }
1539
+
1540
+ /* parse_fencedcode • handles parsing of a block-level code fragment */
1541
+ static size_t
1542
+ parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1543
+ {
1544
+ size_t beg, end;
1545
+ struct buf *work = 0;
1546
+ struct buf lang = { 0, 0, 0, 0 };
1547
+
1548
+ beg = is_codefence(data, size, &lang);
1549
+ if (beg == 0) return 0;
1550
+
1551
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1552
+
1553
+ while (beg < size) {
1554
+ size_t fence_end;
1555
+ struct buf fence_trail = { 0, 0, 0, 0 };
1556
+
1557
+ fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1558
+ if (fence_end != 0 && fence_trail.size == 0) {
1559
+ beg += fence_end;
1560
+ break;
1561
+ }
1562
+
1563
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1564
+
1565
+ if (beg < end) {
1566
+ /* verbatim copy to the working buffer,
1567
+ escaping entities */
1568
+ if (is_empty(data + beg, end - beg))
1569
+ bufputc(work, '\n');
1570
+ else bufput(work, data + beg, end - beg);
1571
+ }
1572
+ beg = end;
1573
+ }
1574
+
1575
+ if (work->size && work->data[work->size - 1] != '\n')
1576
+ bufputc(work, '\n');
1577
+
1578
+ if (rndr->cb.blockcode)
1579
+ rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1580
+
1581
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1582
+ return beg;
1583
+ }
1584
+
1585
+ static size_t
1586
+ parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1587
+ {
1588
+ size_t beg, end, pre;
1589
+ struct buf *work = 0;
1590
+
1591
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1592
+
1593
+ beg = 0;
1594
+ while (beg < size) {
1595
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1596
+ pre = prefix_code(data + beg, end - beg);
1597
+
1598
+ if (pre)
1599
+ beg += pre; /* skipping prefix */
1600
+ else if (!is_empty(data + beg, end - beg))
1601
+ /* non-empty non-prefixed line breaks the pre */
1602
+ break;
1603
+
1604
+ if (beg < end) {
1605
+ /* verbatim copy to the working buffer,
1606
+ escaping entities */
1607
+ if (is_empty(data + beg, end - beg))
1608
+ bufputc(work, '\n');
1609
+ else bufput(work, data + beg, end - beg);
1610
+ }
1611
+ beg = end;
1612
+ }
1613
+
1614
+ while (work->size && work->data[work->size - 1] == '\n')
1615
+ work->size -= 1;
1616
+
1617
+ bufputc(work, '\n');
1618
+
1619
+ if (rndr->cb.blockcode)
1620
+ rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1621
+
1622
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1623
+ return beg;
1624
+ }
1625
+
1626
+ /* parse_listitem • parsing of a single list item */
1627
+ /* assuming initial prefix is already removed */
1628
+ static size_t
1629
+ parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1630
+ {
1631
+ struct buf *work = 0, *inter = 0;
1632
+ size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1633
+ int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1634
+
1635
+ /* keeping track of the first indentation prefix */
1636
+ while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1637
+ orgpre++;
1638
+
1639
+ beg = prefix_uli(data, size);
1640
+ if (!beg)
1641
+ beg = prefix_oli(data, size);
1642
+
1643
+ if (!beg)
1644
+ return 0;
1645
+
1646
+ /* skipping to the beginning of the following line */
1647
+ end = beg;
1648
+ while (end < size && data[end - 1] != '\n')
1649
+ end++;
1650
+
1651
+ /* getting working buffers */
1652
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
1653
+ inter = rndr_newbuf(rndr, BUFFER_SPAN);
1654
+
1655
+ /* putting the first line into the working buffer */
1656
+ bufput(work, data + beg, end - beg);
1657
+ beg = end;
1658
+
1659
+ /* process the following lines */
1660
+ while (beg < size) {
1661
+ size_t has_next_uli = 0, has_next_oli = 0;
1662
+
1663
+ end++;
1664
+
1665
+ while (end < size && data[end - 1] != '\n')
1666
+ end++;
1667
+
1668
+ /* process an empty line */
1669
+ if (is_empty(data + beg, end - beg)) {
1670
+ in_empty = 1;
1671
+ beg = end;
1672
+ continue;
1673
+ }
1674
+
1675
+ /* calculating the indentation */
1676
+ i = 0;
1677
+ while (i < 4 && beg + i < end && data[beg + i] == ' ')
1678
+ i++;
1679
+
1680
+ pre = i;
1681
+
1682
+ if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1683
+ if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1684
+ in_fence = !in_fence;
1685
+ }
1686
+
1687
+ /* Only check for new list items if we are **not** inside
1688
+ * a fenced code block */
1689
+ if (!in_fence) {
1690
+ has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1691
+ has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1692
+ }
1693
+
1694
+ /* checking for ul/ol switch */
1695
+ if (in_empty && (
1696
+ ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1697
+ (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1698
+ *flags |= MKD_LI_END;
1699
+ break; /* the following item must have same list type */
1700
+ }
1701
+
1702
+ /* checking for a new item */
1703
+ if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1704
+ if (in_empty)
1705
+ has_inside_empty = 1;
1706
+
1707
+ if (pre == orgpre) /* the following item must have */
1708
+ break; /* the same indentation */
1709
+
1710
+ if (!sublist)
1711
+ sublist = work->size;
1712
+ }
1713
+ /* joining only indented stuff after empty lines;
1714
+ * note that now we only require 1 space of indentation
1715
+ * to continue a list */
1716
+ else if (in_empty && pre == 0) {
1717
+ *flags |= MKD_LI_END;
1718
+ break;
1719
+ }
1720
+ else if (in_empty) {
1721
+ bufputc(work, '\n');
1722
+ has_inside_empty = 1;
1723
+ }
1724
+
1725
+ in_empty = 0;
1726
+
1727
+ /* adding the line without prefix into the working buffer */
1728
+ bufput(work, data + beg + i, end - beg - i);
1729
+ beg = end;
1730
+ }
1731
+
1732
+ /* render of li contents */
1733
+ if (has_inside_empty)
1734
+ *flags |= MKD_LI_BLOCK;
1735
+
1736
+ if (*flags & MKD_LI_BLOCK) {
1737
+ /* intermediate render of block li */
1738
+ if (sublist && sublist < work->size) {
1739
+ parse_block(inter, rndr, work->data, sublist);
1740
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1741
+ }
1742
+ else
1743
+ parse_block(inter, rndr, work->data, work->size);
1744
+ } else {
1745
+ /* intermediate render of inline li */
1746
+ if (sublist && sublist < work->size) {
1747
+ parse_inline(inter, rndr, work->data, sublist);
1748
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1749
+ }
1750
+ else
1751
+ parse_inline(inter, rndr, work->data, work->size);
1752
+ }
1753
+
1754
+ /* render of li itself */
1755
+ if (rndr->cb.listitem)
1756
+ rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1757
+
1758
+ rndr_popbuf(rndr, BUFFER_SPAN);
1759
+ rndr_popbuf(rndr, BUFFER_SPAN);
1760
+ return beg;
1761
+ }
1762
+
1763
+
1764
+ /* parse_list • parsing ordered or unordered list block */
1765
+ static size_t
1766
+ parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1767
+ {
1768
+ struct buf *work = 0;
1769
+ size_t i = 0, j;
1770
+
1771
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1772
+
1773
+ while (i < size) {
1774
+ j = parse_listitem(work, rndr, data + i, size - i, &flags);
1775
+ i += j;
1776
+
1777
+ if (!j || (flags & MKD_LI_END))
1778
+ break;
1779
+ }
1780
+
1781
+ if (rndr->cb.list)
1782
+ rndr->cb.list(ob, work, flags, rndr->opaque);
1783
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1784
+ return i;
1785
+ }
1786
+
1787
+ /* parse_atxheader • parsing of atx-style headers */
1788
+ static size_t
1789
+ parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1790
+ {
1791
+ size_t level = 0;
1792
+ size_t i, end, skip;
1793
+
1794
+ while (level < size && level < 6 && data[level] == '#')
1795
+ level++;
1796
+
1797
+ for (i = level; i < size && data[i] == ' '; i++);
1798
+
1799
+ for (end = i; end < size && data[end] != '\n'; end++);
1800
+ skip = end;
1801
+
1802
+ while (end && data[end - 1] == '#')
1803
+ end--;
1804
+
1805
+ while (end && data[end - 1] == ' ')
1806
+ end--;
1807
+
1808
+ if (end > i) {
1809
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
1810
+
1811
+ parse_inline(work, rndr, data + i, end - i);
1812
+
1813
+ if (rndr->cb.header)
1814
+ rndr->cb.header(ob, work, (int)level, rndr->opaque);
1815
+
1816
+ rndr_popbuf(rndr, BUFFER_SPAN);
1817
+ }
1818
+
1819
+ return skip;
1820
+ }
1821
+
1822
+
1823
+ /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1824
+ /* returns the length on match, 0 otherwise */
1825
+ static size_t
1826
+ htmlblock_end_tag(
1827
+ const char *tag,
1828
+ size_t tag_len,
1829
+ struct sd_markdown *rndr,
1830
+ uint8_t *data,
1831
+ size_t size)
1832
+ {
1833
+ size_t i, w;
1834
+
1835
+ /* checking if tag is a match */
1836
+ if (tag_len + 3 >= size ||
1837
+ strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
1838
+ data[tag_len + 2] != '>')
1839
+ return 0;
1840
+
1841
+ /* checking white lines */
1842
+ i = tag_len + 3;
1843
+ w = 0;
1844
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
1845
+ return 0; /* non-blank after tag */
1846
+ i += w;
1847
+ w = 0;
1848
+
1849
+ if (i < size)
1850
+ w = is_empty(data + i, size - i);
1851
+
1852
+ return i + w;
1853
+ }
1854
+
1855
+ static size_t
1856
+ htmlblock_end(const char *curtag,
1857
+ struct sd_markdown *rndr,
1858
+ uint8_t *data,
1859
+ size_t size,
1860
+ int start_of_line)
1861
+ {
1862
+ size_t tag_size = strlen(curtag);
1863
+ size_t i = 1, end_tag;
1864
+ int block_lines = 0;
1865
+
1866
+ while (i < size) {
1867
+ i++;
1868
+ while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
1869
+ if (data[i] == '\n')
1870
+ block_lines++;
1871
+
1872
+ i++;
1873
+ }
1874
+
1875
+ /* If we are only looking for unindented tags, skip the tag
1876
+ * if it doesn't follow a newline.
1877
+ *
1878
+ * The only exception to this is if the tag is still on the
1879
+ * initial line; in that case it still counts as a closing
1880
+ * tag
1881
+ */
1882
+ if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
1883
+ continue;
1884
+
1885
+ if (i + 2 + tag_size >= size)
1886
+ break;
1887
+
1888
+ end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
1889
+ if (end_tag)
1890
+ return i + end_tag - 1;
1891
+ }
1892
+
1893
+ return 0;
1894
+ }
1895
+
1896
+
1897
+ /* parse_htmlblock • parsing of inline HTML block */
1898
+ static size_t
1899
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
1900
+ {
1901
+ size_t i, j = 0, tag_end;
1902
+ const char *curtag = NULL;
1903
+ struct buf work = { data, 0, 0, 0 };
1904
+
1905
+ /* identification of the opening tag */
1906
+ if (size < 2 || data[0] != '<')
1907
+ return 0;
1908
+
1909
+ i = 1;
1910
+ while (i < size && data[i] != '>' && data[i] != ' ')
1911
+ i++;
1912
+
1913
+ if (i < size)
1914
+ curtag = find_block_tag((char *)data + 1, (int)i - 1);
1915
+
1916
+ /* handling of special cases */
1917
+ if (!curtag) {
1918
+
1919
+ /* HTML comment, laxist form */
1920
+ if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1921
+ i = 5;
1922
+
1923
+ while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1924
+ i++;
1925
+
1926
+ i++;
1927
+
1928
+ if (i < size)
1929
+ j = is_empty(data + i, size - i);
1930
+
1931
+ if (j) {
1932
+ work.size = i + j;
1933
+ if (do_render && rndr->cb.blockhtml)
1934
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
1935
+ return work.size;
1936
+ }
1937
+ }
1938
+
1939
+ /* HR, which is the only self-closing block tag considered */
1940
+ if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1941
+ i = 3;
1942
+ while (i < size && data[i] != '>')
1943
+ i++;
1944
+
1945
+ if (i + 1 < size) {
1946
+ i++;
1947
+ j = is_empty(data + i, size - i);
1948
+ if (j) {
1949
+ work.size = i + j;
1950
+ if (do_render && rndr->cb.blockhtml)
1951
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
1952
+ return work.size;
1953
+ }
1954
+ }
1955
+ }
1956
+
1957
+ /* no special case recognised */
1958
+ return 0;
1959
+ }
1960
+
1961
+ /* looking for an unindented matching closing tag */
1962
+ /* followed by a blank line */
1963
+ tag_end = htmlblock_end(curtag, rndr, data, size, 1);
1964
+
1965
+ /* if not found, trying a second pass looking for indented match */
1966
+ /* but not if tag is "ins" or "del" (following original Markdown.pl) */
1967
+ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
1968
+ tag_end = htmlblock_end(curtag, rndr, data, size, 0);
1969
+ }
1970
+
1971
+ if (!tag_end)
1972
+ return 0;
1973
+
1974
+ /* the end of the block has been found */
1975
+ work.size = tag_end;
1976
+ if (do_render && rndr->cb.blockhtml)
1977
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
1978
+
1979
+ return tag_end;
1980
+ }
1981
+
1982
+ static void
1983
+ parse_table_row(
1984
+ struct buf *ob,
1985
+ struct sd_markdown *rndr,
1986
+ uint8_t *data,
1987
+ size_t size,
1988
+ size_t columns,
1989
+ int *col_data,
1990
+ int header_flag)
1991
+ {
1992
+ size_t i = 0, col;
1993
+ struct buf *row_work = 0;
1994
+
1995
+ if (!rndr->cb.table_cell || !rndr->cb.table_row)
1996
+ return;
1997
+
1998
+ row_work = rndr_newbuf(rndr, BUFFER_SPAN);
1999
+
2000
+ if (i < size && data[i] == '|')
2001
+ i++;
2002
+
2003
+ for (col = 0; col < columns && i < size; ++col) {
2004
+ size_t cell_start, cell_end;
2005
+ struct buf *cell_work;
2006
+
2007
+ cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2008
+
2009
+ while (i < size && _isspace(data[i]))
2010
+ i++;
2011
+
2012
+ cell_start = i;
2013
+
2014
+ while (i < size && data[i] != '|')
2015
+ i++;
2016
+
2017
+ cell_end = i - 1;
2018
+
2019
+ while (cell_end > cell_start && _isspace(data[cell_end]))
2020
+ cell_end--;
2021
+
2022
+ parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2023
+ rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
2024
+
2025
+ rndr_popbuf(rndr, BUFFER_SPAN);
2026
+ i++;
2027
+ }
2028
+
2029
+ for (; col < columns; ++col) {
2030
+ struct buf empty_cell = { 0, 0, 0, 0 };
2031
+ rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
2032
+ }
2033
+
2034
+ rndr->cb.table_row(ob, row_work, rndr->opaque);
2035
+
2036
+ rndr_popbuf(rndr, BUFFER_SPAN);
2037
+ }
2038
+
2039
+ static size_t
2040
+ parse_table_header(
2041
+ struct buf *ob,
2042
+ struct sd_markdown *rndr,
2043
+ uint8_t *data,
2044
+ size_t size,
2045
+ size_t *columns,
2046
+ int **column_data)
2047
+ {
2048
+ int pipes;
2049
+ size_t i = 0, col, header_end, under_end;
2050
+
2051
+ pipes = 0;
2052
+ while (i < size && data[i] != '\n')
2053
+ if (data[i++] == '|')
2054
+ pipes++;
2055
+
2056
+ if (i == size || pipes == 0)
2057
+ return 0;
2058
+
2059
+ header_end = i;
2060
+
2061
+ while (header_end > 0 && _isspace(data[header_end - 1]))
2062
+ header_end--;
2063
+
2064
+ if (data[0] == '|')
2065
+ pipes--;
2066
+
2067
+ if (header_end && data[header_end - 1] == '|')
2068
+ pipes--;
2069
+
2070
+ *columns = pipes + 1;
2071
+ *column_data = calloc(*columns, sizeof(int));
2072
+
2073
+ /* Parse the header underline */
2074
+ i++;
2075
+ if (i < size && data[i] == '|')
2076
+ i++;
2077
+
2078
+ under_end = i;
2079
+ while (under_end < size && data[under_end] != '\n')
2080
+ under_end++;
2081
+
2082
+ for (col = 0; col < *columns && i < under_end; ++col) {
2083
+ size_t dashes = 0;
2084
+
2085
+ while (i < under_end && data[i] == ' ')
2086
+ i++;
2087
+
2088
+ if (data[i] == ':') {
2089
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2090
+ dashes++;
2091
+ }
2092
+
2093
+ while (i < under_end && data[i] == '-') {
2094
+ i++; dashes++;
2095
+ }
2096
+
2097
+ if (i < under_end && data[i] == ':') {
2098
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2099
+ dashes++;
2100
+ }
2101
+
2102
+ while (i < under_end && data[i] == ' ')
2103
+ i++;
2104
+
2105
+ if (i < under_end && data[i] != '|')
2106
+ break;
2107
+
2108
+ if (dashes < 3)
2109
+ break;
2110
+
2111
+ i++;
2112
+ }
2113
+
2114
+ if (col < *columns)
2115
+ return 0;
2116
+
2117
+ parse_table_row(
2118
+ ob, rndr, data,
2119
+ header_end,
2120
+ *columns,
2121
+ *column_data,
2122
+ MKD_TABLE_HEADER
2123
+ );
2124
+
2125
+ return under_end + 1;
2126
+ }
2127
+
2128
+ static size_t
2129
+ parse_table(
2130
+ struct buf *ob,
2131
+ struct sd_markdown *rndr,
2132
+ uint8_t *data,
2133
+ size_t size)
2134
+ {
2135
+ size_t i;
2136
+
2137
+ struct buf *header_work = 0;
2138
+ struct buf *body_work = 0;
2139
+
2140
+ size_t columns;
2141
+ int *col_data = NULL;
2142
+
2143
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2144
+ body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2145
+
2146
+ i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2147
+ if (i > 0) {
2148
+
2149
+ while (i < size) {
2150
+ size_t row_start;
2151
+ int pipes = 0;
2152
+
2153
+ row_start = i;
2154
+
2155
+ while (i < size && data[i] != '\n')
2156
+ if (data[i++] == '|')
2157
+ pipes++;
2158
+
2159
+ if (pipes == 0 || i == size) {
2160
+ i = row_start;
2161
+ break;
2162
+ }
2163
+
2164
+ parse_table_row(
2165
+ body_work,
2166
+ rndr,
2167
+ data + row_start,
2168
+ i - row_start,
2169
+ columns,
2170
+ col_data, 0
2171
+ );
2172
+
2173
+ i++;
2174
+ }
2175
+
2176
+ if (rndr->cb.table)
2177
+ rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2178
+ }
2179
+
2180
+ free(col_data);
2181
+ rndr_popbuf(rndr, BUFFER_SPAN);
2182
+ rndr_popbuf(rndr, BUFFER_BLOCK);
2183
+ return i;
2184
+ }
2185
+
2186
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
2187
+ static void
2188
+ parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2189
+ {
2190
+ size_t beg, end, i;
2191
+ uint8_t *txt_data;
2192
+ beg = 0;
2193
+
2194
+ if (rndr->work_bufs[BUFFER_SPAN].size +
2195
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
2196
+ return;
2197
+
2198
+ while (beg < size) {
2199
+ txt_data = data + beg;
2200
+ end = size - beg;
2201
+
2202
+ if (is_atxheader(rndr, txt_data, end))
2203
+ beg += parse_atxheader(ob, rndr, txt_data, end);
2204
+
2205
+ else if (data[beg] == '<' && rndr->cb.blockhtml &&
2206
+ (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
2207
+ beg += i;
2208
+
2209
+ else if ((i = is_empty(txt_data, end)) != 0)
2210
+ beg += i;
2211
+
2212
+ else if (is_hrule(txt_data, end)) {
2213
+ if (rndr->cb.hrule)
2214
+ rndr->cb.hrule(ob, rndr->opaque);
2215
+
2216
+ while (beg < size && data[beg] != '\n')
2217
+ beg++;
2218
+
2219
+ beg++;
2220
+ }
2221
+
2222
+ else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
2223
+ (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
2224
+ beg += i;
2225
+
2226
+ else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
2227
+ (i = parse_table(ob, rndr, txt_data, end)) != 0)
2228
+ beg += i;
2229
+
2230
+ else if (prefix_quote(txt_data, end))
2231
+ beg += parse_blockquote(ob, rndr, txt_data, end);
2232
+
2233
+ else if (prefix_code(txt_data, end))
2234
+ beg += parse_blockcode(ob, rndr, txt_data, end);
2235
+
2236
+ else if (prefix_uli(txt_data, end))
2237
+ beg += parse_list(ob, rndr, txt_data, end, 0);
2238
+
2239
+ else if (prefix_oli(txt_data, end))
2240
+ beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
2241
+
2242
+ else
2243
+ beg += parse_paragraph(ob, rndr, txt_data, end);
2244
+ }
2245
+ }
2246
+
2247
+
2248
+
2249
+ /*********************
2250
+ * REFERENCE PARSING *
2251
+ *********************/
2252
+
2253
+ /* is_ref • returns whether a line is a reference or not */
2254
+ static int
2255
+ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2256
+ {
2257
+ /* int n; */
2258
+ size_t i = 0;
2259
+ size_t id_offset, id_end;
2260
+ size_t link_offset, link_end;
2261
+ size_t title_offset, title_end;
2262
+ size_t line_end;
2263
+
2264
+ /* up to 3 optional leading spaces */
2265
+ if (beg + 3 >= end) return 0;
2266
+ if (data[beg] == ' ') { i = 1;
2267
+ if (data[beg + 1] == ' ') { i = 2;
2268
+ if (data[beg + 2] == ' ') { i = 3;
2269
+ if (data[beg + 3] == ' ') return 0; } } }
2270
+ i += beg;
2271
+
2272
+ /* id part: anything but a newline between brackets */
2273
+ if (data[i] != '[') return 0;
2274
+ i++;
2275
+ id_offset = i;
2276
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2277
+ i++;
2278
+ if (i >= end || data[i] != ']') return 0;
2279
+ id_end = i;
2280
+
2281
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
2282
+ i++;
2283
+ if (i >= end || data[i] != ':') return 0;
2284
+ i++;
2285
+ while (i < end && data[i] == ' ') i++;
2286
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2287
+ i++;
2288
+ if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2289
+ while (i < end && data[i] == ' ') i++;
2290
+ if (i >= end) return 0;
2291
+
2292
+ /* link: whitespace-free sequence, optionally between angle brackets */
2293
+ if (data[i] == '<')
2294
+ i++;
2295
+
2296
+ link_offset = i;
2297
+
2298
+ while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2299
+ i++;
2300
+
2301
+ if (data[i - 1] == '>') link_end = i - 1;
2302
+ else link_end = i;
2303
+
2304
+ /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2305
+ while (i < end && data[i] == ' ') i++;
2306
+ if (i < end && data[i] != '\n' && data[i] != '\r'
2307
+ && data[i] != '\'' && data[i] != '"' && data[i] != '(')
2308
+ return 0;
2309
+ line_end = 0;
2310
+ /* computing end-of-line */
2311
+ if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2312
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2313
+ line_end = i + 1;
2314
+
2315
+ /* optional (space|tab)* spacer after a newline */
2316
+ if (line_end) {
2317
+ i = line_end + 1;
2318
+ while (i < end && data[i] == ' ') i++; }
2319
+
2320
+ /* optional title: any non-newline sequence enclosed in '"()
2321
+ alone on its line */
2322
+ title_offset = title_end = 0;
2323
+ if (i + 1 < end
2324
+ && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2325
+ i++;
2326
+ title_offset = i;
2327
+ /* looking for EOL */
2328
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2329
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2330
+ title_end = i + 1;
2331
+ else title_end = i;
2332
+ /* stepping back */
2333
+ i -= 1;
2334
+ while (i > title_offset && data[i] == ' ')
2335
+ i -= 1;
2336
+ if (i > title_offset
2337
+ && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2338
+ line_end = title_end;
2339
+ title_end = i; } }
2340
+
2341
+ if (!line_end || link_end == link_offset)
2342
+ return 0; /* garbage after the link empty link */
2343
+
2344
+ /* a valid ref has been found, filling-in return structures */
2345
+ if (last)
2346
+ *last = line_end;
2347
+
2348
+ if (refs) {
2349
+ struct link_ref *ref;
2350
+
2351
+ ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2352
+ if (!ref)
2353
+ return 0;
2354
+
2355
+ ref->link = bufnew(link_end - link_offset);
2356
+ bufput(ref->link, data + link_offset, link_end - link_offset);
2357
+
2358
+ if (title_end > title_offset) {
2359
+ ref->title = bufnew(title_end - title_offset);
2360
+ bufput(ref->title, data + title_offset, title_end - title_offset);
2361
+ }
2362
+ }
2363
+
2364
+ return 1;
2365
+ }
2366
+
2367
+ static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2368
+ {
2369
+ size_t i = 0, tab = 0;
2370
+
2371
+ while (i < size) {
2372
+ size_t org = i;
2373
+
2374
+ while (i < size && line[i] != '\t') {
2375
+ i++; tab++;
2376
+ }
2377
+
2378
+ if (i > org)
2379
+ bufput(ob, line + org, i - org);
2380
+
2381
+ if (i >= size)
2382
+ break;
2383
+
2384
+ do {
2385
+ bufputc(ob, ' '); tab++;
2386
+ } while (tab % 4);
2387
+
2388
+ i++;
2389
+ }
2390
+ }
2391
+
2392
+ /**********************
2393
+ * EXPORTED FUNCTIONS *
2394
+ **********************/
2395
+
2396
+ struct sd_markdown *
2397
+ sd_markdown_new(
2398
+ unsigned int extensions,
2399
+ size_t max_nesting,
2400
+ const struct sd_callbacks *callbacks,
2401
+ void *opaque)
2402
+ {
2403
+ struct sd_markdown *md = NULL;
2404
+
2405
+ assert(max_nesting > 0 && callbacks);
2406
+
2407
+ md = malloc(sizeof(struct sd_markdown));
2408
+ if (!md)
2409
+ return NULL;
2410
+
2411
+ memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2412
+
2413
+ stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2414
+ stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2415
+
2416
+ memset(md->active_char, 0x0, 256);
2417
+
2418
+ if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2419
+ md->active_char['*'] = MD_CHAR_EMPHASIS;
2420
+ md->active_char['_'] = MD_CHAR_EMPHASIS;
2421
+ if (extensions & MKDEXT_STRIKETHROUGH)
2422
+ md->active_char['~'] = MD_CHAR_EMPHASIS;
2423
+ }
2424
+
2425
+ if (md->cb.codespan)
2426
+ md->active_char['`'] = MD_CHAR_CODESPAN;
2427
+
2428
+ if (md->cb.linebreak)
2429
+ md->active_char['\n'] = MD_CHAR_LINEBREAK;
2430
+
2431
+ if (md->cb.image || md->cb.link)
2432
+ md->active_char['['] = MD_CHAR_LINK;
2433
+
2434
+ md->active_char['<'] = MD_CHAR_LANGLE;
2435
+ md->active_char['\\'] = MD_CHAR_ESCAPE;
2436
+ md->active_char['&'] = MD_CHAR_ENTITITY;
2437
+
2438
+ if (extensions & MKDEXT_AUTOLINK) {
2439
+ md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2440
+ md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2441
+ md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2442
+ }
2443
+
2444
+ if (extensions & MKDEXT_SUPERSCRIPT)
2445
+ md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2446
+
2447
+ /* Extension data */
2448
+ md->ext_flags = extensions;
2449
+ md->opaque = opaque;
2450
+ md->max_nesting = max_nesting;
2451
+ md->in_link_body = 0;
2452
+
2453
+ return md;
2454
+ }
2455
+
2456
+ void
2457
+ sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2458
+ {
2459
+ #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
2460
+ static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2461
+
2462
+ struct buf *text;
2463
+ size_t beg, end;
2464
+
2465
+ text = bufnew(64);
2466
+ if (!text)
2467
+ return;
2468
+
2469
+ /* Preallocate enough space for our buffer to avoid expanding while copying */
2470
+ bufgrow(text, doc_size);
2471
+
2472
+ /* reset the references table */
2473
+ memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2474
+
2475
+ /* first pass: looking for references, copying everything else */
2476
+ beg = 0;
2477
+
2478
+ /* Skip a possible UTF-8 BOM, even though the Unicode standard
2479
+ * discourages having these in UTF-8 documents */
2480
+ if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2481
+ beg += 3;
2482
+
2483
+ while (beg < doc_size) /* iterating over lines */
2484
+ if (is_ref(document, beg, doc_size, &end, md->refs))
2485
+ beg = end;
2486
+ else { /* skipping to the next line */
2487
+ end = beg;
2488
+ while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2489
+ end++;
2490
+
2491
+ /* adding the line body if present */
2492
+ if (end > beg)
2493
+ expand_tabs(text, document + beg, end - beg);
2494
+
2495
+ while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2496
+ /* add one \n per newline */
2497
+ if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2498
+ bufputc(text, '\n');
2499
+ end++;
2500
+ }
2501
+
2502
+ beg = end;
2503
+ }
2504
+
2505
+ /* pre-grow the output buffer to minimize allocations */
2506
+ bufgrow(ob, MARKDOWN_GROW(text->size));
2507
+
2508
+ /* second pass: actual rendering */
2509
+ if (md->cb.doc_header)
2510
+ md->cb.doc_header(ob, md->opaque);
2511
+
2512
+ if (text->size) {
2513
+ /* adding a final newline if not already present */
2514
+ if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
2515
+ bufputc(text, '\n');
2516
+
2517
+ parse_block(ob, md, text->data, text->size);
2518
+ }
2519
+
2520
+ if (md->cb.doc_footer)
2521
+ md->cb.doc_footer(ob, md->opaque);
2522
+
2523
+ /* clean-up */
2524
+ bufrelease(text);
2525
+ free_link_refs(md->refs);
2526
+
2527
+ assert(md->work_bufs[BUFFER_SPAN].size == 0);
2528
+ assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2529
+ }
2530
+
2531
+ void
2532
+ sd_markdown_free(struct sd_markdown *md)
2533
+ {
2534
+ size_t i;
2535
+
2536
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2537
+ bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2538
+
2539
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2540
+ bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2541
+
2542
+ stack_free(&md->work_bufs[BUFFER_SPAN]);
2543
+ stack_free(&md->work_bufs[BUFFER_BLOCK]);
2544
+
2545
+ free(md);
2546
+ }
2547
+
2548
+ void
2549
+ sd_version(int *ver_major, int *ver_minor, int *ver_revision)
2550
+ {
2551
+ *ver_major = SUNDOWN_VER_MAJOR;
2552
+ *ver_minor = SUNDOWN_VER_MINOR;
2553
+ *ver_revision = SUNDOWN_VER_REVISION;
2554
+ }
2555
+
2556
+ /* vim: set filetype=c: */