tight-redcarpet 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2907 @@
1
+ /* markdown.c - generic markdown parser */
2
+
3
+ /*
4
+ * Copyright (c) 2009, Natacha Porté
5
+ * Copyright (c) 2011, Vicent Marti
6
+ *
7
+ * Permission to use, copy, modify, and distribute this software for any
8
+ * purpose with or without fee is hereby granted, provided that the above
9
+ * copyright notice and this permission notice appear in all copies.
10
+ *
11
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+ */
19
+
20
+ #include "markdown.h"
21
+ #include "stack.h"
22
+
23
+ #include <assert.h>
24
+ #include <string.h>
25
+ #include <ctype.h>
26
+ #include <stdio.h>
27
+
28
+ #if defined(_WIN32)
29
+ #define strncasecmp _strnicmp
30
+ #endif
31
+
32
+ #define REF_TABLE_SIZE 8
33
+
34
+ #define BUFFER_BLOCK 0
35
+ #define BUFFER_SPAN 1
36
+
37
+ #define MKD_LI_END 8 /* internal list flag */
38
+
39
+ #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
40
+ #define GPERF_DOWNCASE 1
41
+ #define GPERF_CASE_STRNCMP 1
42
+ #include "html_blocks.h"
43
+
44
+ /***************
45
+ * LOCAL TYPES *
46
+ ***************/
47
+
48
+ /* link_ref: reference to a link */
49
+ struct link_ref {
50
+ unsigned int id;
51
+
52
+ struct buf *link;
53
+ struct buf *title;
54
+
55
+ struct link_ref *next;
56
+ };
57
+
58
+ /* footnote_ref: reference to a footnote */
59
+ struct footnote_ref {
60
+ unsigned int id;
61
+
62
+ int is_used;
63
+ unsigned int num;
64
+
65
+ struct buf *contents;
66
+ };
67
+
68
+ /* footnote_item: an item in a footnote_list */
69
+ struct footnote_item {
70
+ struct footnote_ref *ref;
71
+ struct footnote_item *next;
72
+ };
73
+
74
+ /* footnote_list: linked list of footnote_item */
75
+ struct footnote_list {
76
+ unsigned int count;
77
+ struct footnote_item *head;
78
+ struct footnote_item *tail;
79
+ };
80
+
81
+ /* char_trigger: function pointer to render active chars */
82
+ /* returns the number of chars taken care of */
83
+ /* data is the pointer of the beginning of the span */
84
+ /* offset is the number of valid chars before data */
85
+ struct sd_markdown;
86
+ typedef size_t
87
+ (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
88
+
89
+ static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
90
+ static size_t char_underline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
91
+ static size_t char_highlight(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
92
+ static size_t char_quote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
93
+ static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
94
+ static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
95
+ static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
96
+ static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
97
+ static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
98
+ static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
99
+ static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
100
+ static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
101
+ static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
102
+ static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
103
+
104
+ enum markdown_char_t {
105
+ MD_CHAR_NONE = 0,
106
+ MD_CHAR_EMPHASIS,
107
+ MD_CHAR_CODESPAN,
108
+ MD_CHAR_LINEBREAK,
109
+ MD_CHAR_LINK,
110
+ MD_CHAR_LANGLE,
111
+ MD_CHAR_ESCAPE,
112
+ MD_CHAR_ENTITITY,
113
+ MD_CHAR_AUTOLINK_URL,
114
+ MD_CHAR_AUTOLINK_EMAIL,
115
+ MD_CHAR_AUTOLINK_WWW,
116
+ MD_CHAR_SUPERSCRIPT,
117
+ MD_CHAR_QUOTE
118
+ };
119
+
120
+ static char_trigger markdown_char_ptrs[] = {
121
+ NULL,
122
+ &char_emphasis,
123
+ &char_codespan,
124
+ &char_linebreak,
125
+ &char_link,
126
+ &char_langle_tag,
127
+ &char_escape,
128
+ &char_entity,
129
+ &char_autolink_url,
130
+ &char_autolink_email,
131
+ &char_autolink_www,
132
+ &char_superscript,
133
+ &char_quote
134
+ };
135
+
136
+ /* render • structure containing one particular render */
137
+ struct sd_markdown {
138
+ struct sd_callbacks cb;
139
+ void *opaque;
140
+
141
+ struct link_ref *refs[REF_TABLE_SIZE];
142
+ struct footnote_list footnotes_found;
143
+ struct footnote_list footnotes_used;
144
+ uint8_t active_char[256];
145
+ struct stack work_bufs[2];
146
+ unsigned int ext_flags;
147
+ size_t max_nesting;
148
+ int in_link_body;
149
+ };
150
+
151
+ /***************************
152
+ * HELPER FUNCTIONS *
153
+ ***************************/
154
+
155
+ static inline struct buf *
156
+ rndr_newbuf(struct sd_markdown *rndr, int type)
157
+ {
158
+ static const size_t buf_size[2] = {256, 64};
159
+ struct buf *work = NULL;
160
+ struct stack *pool = &rndr->work_bufs[type];
161
+
162
+ if (pool->size < pool->asize &&
163
+ pool->item[pool->size] != NULL) {
164
+ work = pool->item[pool->size++];
165
+ work->size = 0;
166
+ } else {
167
+ work = bufnew(buf_size[type]);
168
+ redcarpet_stack_push(pool, work);
169
+ }
170
+
171
+ return work;
172
+ }
173
+
174
+ static inline void
175
+ rndr_popbuf(struct sd_markdown *rndr, int type)
176
+ {
177
+ rndr->work_bufs[type].size--;
178
+ }
179
+
180
+ static void
181
+ unscape_text(struct buf *ob, struct buf *src)
182
+ {
183
+ size_t i = 0, org;
184
+ while (i < src->size) {
185
+ org = i;
186
+ while (i < src->size && src->data[i] != '\\')
187
+ i++;
188
+
189
+ if (i > org)
190
+ bufput(ob, src->data + org, i - org);
191
+
192
+ if (i + 1 >= src->size)
193
+ break;
194
+
195
+ bufputc(ob, src->data[i + 1]);
196
+ i += 2;
197
+ }
198
+ }
199
+
200
+ static unsigned int
201
+ hash_link_ref(const uint8_t *link_ref, size_t length)
202
+ {
203
+ size_t i;
204
+ unsigned int hash = 0;
205
+
206
+ for (i = 0; i < length; ++i)
207
+ hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
208
+
209
+ return hash;
210
+ }
211
+
212
+ static struct link_ref *
213
+ add_link_ref(
214
+ struct link_ref **references,
215
+ const uint8_t *name, size_t name_size)
216
+ {
217
+ struct link_ref *ref = calloc(1, sizeof(struct link_ref));
218
+
219
+ if (!ref)
220
+ return NULL;
221
+
222
+ ref->id = hash_link_ref(name, name_size);
223
+ ref->next = references[ref->id % REF_TABLE_SIZE];
224
+
225
+ references[ref->id % REF_TABLE_SIZE] = ref;
226
+ return ref;
227
+ }
228
+
229
+ static struct link_ref *
230
+ find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
231
+ {
232
+ unsigned int hash = hash_link_ref(name, length);
233
+ struct link_ref *ref = NULL;
234
+
235
+ ref = references[hash % REF_TABLE_SIZE];
236
+
237
+ while (ref != NULL) {
238
+ if (ref->id == hash)
239
+ return ref;
240
+
241
+ ref = ref->next;
242
+ }
243
+
244
+ return NULL;
245
+ }
246
+
247
+ static void
248
+ free_link_refs(struct link_ref **references)
249
+ {
250
+ size_t i;
251
+
252
+ for (i = 0; i < REF_TABLE_SIZE; ++i) {
253
+ struct link_ref *r = references[i];
254
+ struct link_ref *next;
255
+
256
+ while (r) {
257
+ next = r->next;
258
+ bufrelease(r->link);
259
+ bufrelease(r->title);
260
+ free(r);
261
+ r = next;
262
+ }
263
+ }
264
+ }
265
+
266
+ static struct footnote_ref *
267
+ create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
268
+ {
269
+ struct footnote_ref *ref = calloc(1, sizeof(struct footnote_ref));
270
+ if (!ref)
271
+ return NULL;
272
+
273
+ ref->id = hash_link_ref(name, name_size);
274
+
275
+ return ref;
276
+ }
277
+
278
+ static int
279
+ add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
280
+ {
281
+ struct footnote_item *item = calloc(1, sizeof(struct footnote_item));
282
+ if (!item)
283
+ return 0;
284
+ item->ref = ref;
285
+
286
+ if (list->head == NULL) {
287
+ list->head = list->tail = item;
288
+ } else {
289
+ list->tail->next = item;
290
+ list->tail = item;
291
+ }
292
+ list->count++;
293
+
294
+ return 1;
295
+ }
296
+
297
+ static struct footnote_ref *
298
+ find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
299
+ {
300
+ unsigned int hash = hash_link_ref(name, length);
301
+ struct footnote_item *item = NULL;
302
+
303
+ item = list->head;
304
+
305
+ while (item != NULL) {
306
+ if (item->ref->id == hash)
307
+ return item->ref;
308
+ item = item->next;
309
+ }
310
+
311
+ return NULL;
312
+ }
313
+
314
+ static void
315
+ free_footnote_ref(struct footnote_ref *ref)
316
+ {
317
+ bufrelease(ref->contents);
318
+ free(ref);
319
+ }
320
+
321
+ static void
322
+ free_footnote_list(struct footnote_list *list, int free_refs)
323
+ {
324
+ struct footnote_item *item = list->head;
325
+ struct footnote_item *next;
326
+
327
+ while (item) {
328
+ next = item->next;
329
+ if (free_refs)
330
+ free_footnote_ref(item->ref);
331
+ free(item);
332
+ item = next;
333
+ }
334
+ }
335
+
336
+ /*
337
+ Wrap isalnum so that characters outside of the ASCII range don't count.
338
+ */
339
+ static inline int
340
+ _isalnum(int c)
341
+ {
342
+ return isalnum(c) && c < 0x7f;
343
+ }
344
+
345
+ /*
346
+ * Check whether a char is a Markdown space.
347
+
348
+ * Right now we only consider spaces the actual
349
+ * space and a newline: tabs and carriage returns
350
+ * are filtered out during the preprocessing phase.
351
+ *
352
+ * If we wanted to actually be UTF-8 compliant, we
353
+ * should instead extract an Unicode codepoint from
354
+ * this character and check for space properties.
355
+ */
356
+ static inline int
357
+ _isspace(int c)
358
+ {
359
+ return c == ' ' || c == '\n';
360
+ }
361
+
362
+ /****************************
363
+ * INLINE PARSING FUNCTIONS *
364
+ ****************************/
365
+
366
+ /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
367
+ /* this is less strict than the original markdown e-mail address matching */
368
+ static size_t
369
+ is_mail_autolink(uint8_t *data, size_t size)
370
+ {
371
+ size_t i = 0, nb = 0;
372
+
373
+ /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
374
+ for (i = 0; i < size; ++i) {
375
+ if (_isalnum(data[i]))
376
+ continue;
377
+
378
+ switch (data[i]) {
379
+ case '@':
380
+ nb++;
381
+
382
+ case '-':
383
+ case '.':
384
+ case '_':
385
+ break;
386
+
387
+ case '>':
388
+ return (nb == 1) ? i + 1 : 0;
389
+
390
+ default:
391
+ return 0;
392
+ }
393
+ }
394
+
395
+ return 0;
396
+ }
397
+
398
+ /* tag_length • returns the length of the given tag, or 0 is it's not valid */
399
+ static size_t
400
+ tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
401
+ {
402
+ size_t i, j;
403
+
404
+ /* a valid tag can't be shorter than 3 chars */
405
+ if (size < 3) return 0;
406
+
407
+ /* begins with a '<' optionally followed by '/', followed by letter or number */
408
+ if (data[0] != '<') return 0;
409
+ i = (data[1] == '/') ? 2 : 1;
410
+
411
+ if (!_isalnum(data[i]))
412
+ return 0;
413
+
414
+ /* scheme test */
415
+ *autolink = MKDA_NOT_AUTOLINK;
416
+
417
+ /* try to find the beginning of an URI */
418
+ while (i < size && (_isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
419
+ i++;
420
+
421
+ if (i > 1 && data[i] == '@') {
422
+ if ((j = is_mail_autolink(data + i, size - i)) != 0) {
423
+ *autolink = MKDA_EMAIL;
424
+ return i + j;
425
+ }
426
+ }
427
+
428
+ if (i > 2 && data[i] == ':') {
429
+ *autolink = MKDA_NORMAL;
430
+ i++;
431
+ }
432
+
433
+ /* completing autolink test: no whitespace or ' or " */
434
+ if (i >= size)
435
+ *autolink = MKDA_NOT_AUTOLINK;
436
+
437
+ else if (*autolink) {
438
+ j = i;
439
+
440
+ while (i < size) {
441
+ if (data[i] == '\\') i += 2;
442
+ else if (data[i] == '>' || data[i] == '\'' ||
443
+ data[i] == '"' || data[i] == ' ' || data[i] == '\n')
444
+ break;
445
+ else i++;
446
+ }
447
+
448
+ if (i >= size) return 0;
449
+ if (i > j && data[i] == '>') return i + 1;
450
+ /* one of the forbidden chars has been found */
451
+ *autolink = MKDA_NOT_AUTOLINK;
452
+ }
453
+
454
+ /* looking for sometinhg looking like a tag end */
455
+ while (i < size && data[i] != '>') i++;
456
+ if (i >= size) return 0;
457
+ return i + 1;
458
+ }
459
+
460
+ /* parse_inline • parses inline markdown elements */
461
+ static void
462
+ parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
463
+ {
464
+ size_t i = 0, end = 0;
465
+ uint8_t action = 0;
466
+ struct buf work = { 0, 0, 0, 0 };
467
+
468
+ if (rndr->work_bufs[BUFFER_SPAN].size +
469
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
470
+ return;
471
+
472
+ while (i < size) {
473
+ /* copying inactive chars into the output */
474
+ while (end < size && (action = rndr->active_char[data[end]]) == 0) {
475
+ end++;
476
+ }
477
+
478
+ if (rndr->cb.normal_text) {
479
+ work.data = data + i;
480
+ work.size = end - i;
481
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
482
+ }
483
+ else
484
+ bufput(ob, data + i, end - i);
485
+
486
+ if (end >= size) break;
487
+ i = end;
488
+
489
+ end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
490
+ if (!end) /* no action from the callback */
491
+ end = i + 1;
492
+ else {
493
+ i += end;
494
+ end = i;
495
+ }
496
+ }
497
+ }
498
+
499
+ /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
500
+ static size_t
501
+ find_emph_char(uint8_t *data, size_t size, uint8_t c)
502
+ {
503
+ size_t i = 1;
504
+
505
+ while (i < size) {
506
+ while (i < size && data[i] != c && data[i] != '[')
507
+ i++;
508
+
509
+ if (i == size)
510
+ return 0;
511
+
512
+ /* not counting escaped chars */
513
+ if (i && data[i - 1] == '\\') {
514
+ i++; continue;
515
+ }
516
+
517
+ if (data[i] == c)
518
+ return i;
519
+
520
+ if (data[i] == '`') {
521
+ size_t span_nb = 0, bt;
522
+ size_t tmp_i = 0;
523
+
524
+ /* counting the number of opening backticks */
525
+ while (i < size && data[i] == '`') {
526
+ i++; span_nb++;
527
+ }
528
+
529
+ if (i >= size) return 0;
530
+
531
+ /* finding the matching closing sequence */
532
+ bt = 0;
533
+ while (i < size && bt < span_nb) {
534
+ if (!tmp_i && data[i] == c) tmp_i = i;
535
+ if (data[i] == '`') bt++;
536
+ else bt = 0;
537
+ i++;
538
+ }
539
+
540
+ if (i >= size) return tmp_i;
541
+ }
542
+ /* skipping a link */
543
+ else if (data[i] == '[') {
544
+ size_t tmp_i = 0;
545
+ uint8_t cc;
546
+
547
+ i++;
548
+ while (i < size && data[i] != ']') {
549
+ if (!tmp_i && data[i] == c) tmp_i = i;
550
+ i++;
551
+ }
552
+
553
+ i++;
554
+ while (i < size && (data[i] == ' ' || data[i] == '\n'))
555
+ i++;
556
+
557
+ if (i >= size)
558
+ return tmp_i;
559
+
560
+ switch (data[i]) {
561
+ case '[':
562
+ cc = ']'; break;
563
+
564
+ case '(':
565
+ cc = ')'; break;
566
+
567
+ default:
568
+ if (tmp_i)
569
+ return tmp_i;
570
+ else
571
+ continue;
572
+ }
573
+
574
+ i++;
575
+ while (i < size && data[i] != cc) {
576
+ if (!tmp_i && data[i] == c) tmp_i = i;
577
+ i++;
578
+ }
579
+
580
+ if (i >= size)
581
+ return tmp_i;
582
+
583
+ i++;
584
+ }
585
+ }
586
+
587
+ return 0;
588
+ }
589
+
590
+ /* parse_emph1 • parsing single emphase */
591
+ /* closed by a symbol not preceded by whitespace and not followed by symbol */
592
+ static size_t
593
+ parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
594
+ {
595
+ size_t i = 0, len;
596
+ struct buf *work = 0;
597
+ int r;
598
+
599
+ /* skipping one symbol if coming from emph3 */
600
+ if (size > 1 && data[0] == c && data[1] == c) i = 1;
601
+
602
+ while (i < size) {
603
+ len = find_emph_char(data + i, size - i, c);
604
+ if (!len) return 0;
605
+ i += len;
606
+ if (i >= size) return 0;
607
+
608
+ if (data[i] == c && !_isspace(data[i - 1])) {
609
+
610
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
611
+ if (i + i < size && _isalnum(data[i + 1]))
612
+ continue;
613
+ }
614
+
615
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
616
+ parse_inline(work, rndr, data, i);
617
+
618
+ if (rndr->ext_flags & MKDEXT_UNDERLINE && c == '_')
619
+ r = rndr->cb.underline(ob, work, rndr->opaque);
620
+ else
621
+ r = rndr->cb.emphasis(ob, work, rndr->opaque);
622
+
623
+ rndr_popbuf(rndr, BUFFER_SPAN);
624
+ return r ? i + 1 : 0;
625
+ }
626
+ }
627
+
628
+ return 0;
629
+ }
630
+
631
+ /* parse_emph2 • parsing single emphase */
632
+ static size_t
633
+ parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
634
+ {
635
+ size_t i = 0, len;
636
+ struct buf *work = 0;
637
+ int r;
638
+
639
+ while (i < size) {
640
+ len = find_emph_char(data + i, size - i, c);
641
+ if (!len) return 0;
642
+ i += len;
643
+
644
+ if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
645
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
646
+ parse_inline(work, rndr, data, i);
647
+
648
+ if (c == '~')
649
+ r = rndr->cb.strikethrough(ob, work, rndr->opaque);
650
+ else if (c == '=')
651
+ r = rndr->cb.highlight(ob, work, rndr->opaque);
652
+ else
653
+ r = rndr->cb.double_emphasis(ob, work, rndr->opaque);
654
+
655
+ rndr_popbuf(rndr, BUFFER_SPAN);
656
+ return r ? i + 2 : 0;
657
+ }
658
+ i++;
659
+ }
660
+ return 0;
661
+ }
662
+
663
+ /* parse_emph3 • parsing single emphase */
664
+ /* finds the first closing tag, and delegates to the other emph */
665
+ static size_t
666
+ parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
667
+ {
668
+ size_t i = 0, len;
669
+ int r;
670
+
671
+ while (i < size) {
672
+ len = find_emph_char(data + i, size - i, c);
673
+ if (!len) return 0;
674
+ i += len;
675
+
676
+ /* skip whitespace preceded symbols */
677
+ if (data[i] != c || _isspace(data[i - 1]))
678
+ continue;
679
+
680
+ if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
681
+ /* triple symbol found */
682
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
683
+
684
+ parse_inline(work, rndr, data, i);
685
+ r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
686
+ rndr_popbuf(rndr, BUFFER_SPAN);
687
+ return r ? i + 3 : 0;
688
+
689
+ } else if (i + 1 < size && data[i + 1] == c) {
690
+ /* double symbol found, handing over to emph1 */
691
+ len = parse_emph1(ob, rndr, data - 2, size + 2, c);
692
+ if (!len) return 0;
693
+ else return len - 2;
694
+
695
+ } else {
696
+ /* single symbol found, handing over to emph2 */
697
+ len = parse_emph2(ob, rndr, data - 1, size + 1, c);
698
+ if (!len) return 0;
699
+ else return len - 1;
700
+ }
701
+ }
702
+ return 0;
703
+ }
704
+
705
+ /* char_emphasis • single and double emphasis parsing */
706
+ static size_t
707
+ char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
708
+ {
709
+ uint8_t c = data[0];
710
+ size_t ret;
711
+
712
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
713
+ if (offset > 0 && _isalnum(data[-1]))
714
+ return 0;
715
+ }
716
+
717
+ if (size > 2 && data[1] != c) {
718
+ /* whitespace cannot follow an opening emphasis;
719
+ * strikethrough only takes two characters '~~' */
720
+ if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
721
+ return 0;
722
+
723
+ return ret + 1;
724
+ }
725
+
726
+ if (size > 3 && data[1] == c && data[2] != c) {
727
+ if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
728
+ return 0;
729
+
730
+ return ret + 2;
731
+ }
732
+
733
+ if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
734
+ if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
735
+ return 0;
736
+
737
+ return ret + 3;
738
+ }
739
+
740
+ return 0;
741
+ }
742
+
743
+
744
+ /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
745
+ static size_t
746
+ char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
747
+ {
748
+ if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
749
+ return 0;
750
+
751
+ /* removing the last space from ob and rendering */
752
+ while (ob->size && ob->data[ob->size - 1] == ' ')
753
+ ob->size--;
754
+
755
+ return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
756
+ }
757
+
758
+
759
+ /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
760
+ static size_t
761
+ char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
762
+ {
763
+ size_t end, nb = 0, i, f_begin, f_end;
764
+
765
+ /* counting the number of backticks in the delimiter */
766
+ while (nb < size && data[nb] == '`')
767
+ nb++;
768
+
769
+ /* finding the next delimiter */
770
+ i = 0;
771
+ for (end = nb; end < size && i < nb; end++) {
772
+ if (data[end] == '`') i++;
773
+ else i = 0;
774
+ }
775
+
776
+ if (i < nb && end >= size)
777
+ return 0; /* no matching delimiter */
778
+
779
+ /* trimming outside whitespaces */
780
+ f_begin = nb;
781
+ while (f_begin < end && data[f_begin] == ' ')
782
+ f_begin++;
783
+
784
+ f_end = end - nb;
785
+ while (f_end > nb && data[f_end-1] == ' ')
786
+ f_end--;
787
+
788
+ /* real code span */
789
+ if (f_begin < f_end) {
790
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
791
+ if (!rndr->cb.codespan(ob, &work, rndr->opaque))
792
+ end = 0;
793
+ } else {
794
+ if (!rndr->cb.codespan(ob, 0, rndr->opaque))
795
+ end = 0;
796
+ }
797
+
798
+ return end;
799
+ }
800
+
801
+ /* char_quote • '"' parsing a quote */
802
+ static size_t
803
+ char_quote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
804
+ {
805
+ size_t end, nq = 0, i, f_begin, f_end;
806
+
807
+ /* counting the number of quotes in the delimiter */
808
+ while (nq < size && data[nq] == '"')
809
+ nq++;
810
+
811
+ /* finding the next delimiter */
812
+ i = 0;
813
+ for (end = nq; end < size && i < nq; end++) {
814
+ if (data[end] == '"') i++;
815
+ else i = 0;
816
+ }
817
+
818
+ if (i < nq && end >= size)
819
+ return 0; /* no matching delimiter */
820
+
821
+ /* trimming outside whitespaces */
822
+ f_begin = nq;
823
+ while (f_begin < end && data[f_begin] == ' ')
824
+ f_begin++;
825
+
826
+ f_end = end - nq;
827
+ while (f_end > nq && data[f_end-1] == ' ')
828
+ f_end--;
829
+
830
+ /* real quote */
831
+ if (f_begin < f_end) {
832
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
833
+ if (!rndr->cb.quote(ob, &work, rndr->opaque))
834
+ end = 0;
835
+ } else {
836
+ if (!rndr->cb.quote(ob, 0, rndr->opaque))
837
+ end = 0;
838
+ }
839
+
840
+ return end;
841
+ }
842
+
843
+
844
+ /* char_escape • '\\' backslash escape */
845
+ static size_t
846
+ char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
847
+ {
848
+ static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=";
849
+ struct buf work = { 0, 0, 0, 0 };
850
+
851
+ if (size > 1) {
852
+ if (strchr(escape_chars, data[1]) == NULL)
853
+ return 0;
854
+
855
+ if (rndr->cb.normal_text) {
856
+ work.data = data + 1;
857
+ work.size = 1;
858
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
859
+ }
860
+ else bufputc(ob, data[1]);
861
+ } else if (size == 1) {
862
+ bufputc(ob, data[0]);
863
+ }
864
+
865
+ return 2;
866
+ }
867
+
868
+ /* char_entity • '&' escaped when it doesn't belong to an entity */
869
+ /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
870
+ static size_t
871
+ char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
872
+ {
873
+ size_t end = 1;
874
+ struct buf work = { 0, 0, 0, 0 };
875
+
876
+ if (end < size && data[end] == '#')
877
+ end++;
878
+
879
+ while (end < size && _isalnum(data[end]))
880
+ end++;
881
+
882
+ if (end < size && data[end] == ';')
883
+ end++; /* real entity */
884
+ else
885
+ return 0; /* lone '&' */
886
+
887
+ if (rndr->cb.entity) {
888
+ work.data = data;
889
+ work.size = end;
890
+ rndr->cb.entity(ob, &work, rndr->opaque);
891
+ }
892
+ else bufput(ob, data, end);
893
+
894
+ return end;
895
+ }
896
+
897
+ /* char_langle_tag • '<' when tags or autolinks are allowed */
898
+ static size_t
899
+ char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
900
+ {
901
+ enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
902
+ size_t end = tag_length(data, size, &altype);
903
+ struct buf work = { data, end, 0, 0 };
904
+ int ret = 0;
905
+
906
+ if (end > 2) {
907
+ if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
908
+ struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
909
+ work.data = data + 1;
910
+ work.size = end - 2;
911
+ unscape_text(u_link, &work);
912
+ ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
913
+ rndr_popbuf(rndr, BUFFER_SPAN);
914
+ }
915
+ else if (rndr->cb.raw_html_tag)
916
+ ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
917
+ }
918
+
919
+ if (!ret) return 0;
920
+ else return end;
921
+ }
922
+
923
+ static size_t
924
+ char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
925
+ {
926
+ struct buf *link, *link_url, *link_text;
927
+ size_t link_len, rewind;
928
+
929
+ if (!rndr->cb.link || rndr->in_link_body)
930
+ return 0;
931
+
932
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
933
+
934
+ if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
935
+ link_url = rndr_newbuf(rndr, BUFFER_SPAN);
936
+ BUFPUTSL(link_url, "http://");
937
+ bufput(link_url, link->data, link->size);
938
+
939
+ ob->size -= rewind;
940
+ if (rndr->cb.normal_text) {
941
+ link_text = rndr_newbuf(rndr, BUFFER_SPAN);
942
+ rndr->cb.normal_text(link_text, link, rndr->opaque);
943
+ rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
944
+ rndr_popbuf(rndr, BUFFER_SPAN);
945
+ } else {
946
+ rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
947
+ }
948
+ rndr_popbuf(rndr, BUFFER_SPAN);
949
+ }
950
+
951
+ rndr_popbuf(rndr, BUFFER_SPAN);
952
+ return link_len;
953
+ }
954
+
955
+ static size_t
956
+ char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
957
+ {
958
+ struct buf *link;
959
+ size_t link_len, rewind;
960
+
961
+ if (!rndr->cb.autolink || rndr->in_link_body)
962
+ return 0;
963
+
964
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
965
+
966
+ if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
967
+ ob->size -= rewind;
968
+ rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
969
+ }
970
+
971
+ rndr_popbuf(rndr, BUFFER_SPAN);
972
+ return link_len;
973
+ }
974
+
975
+ static size_t
976
+ char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
977
+ {
978
+ struct buf *link;
979
+ size_t link_len, rewind;
980
+
981
+ if (!rndr->cb.autolink || rndr->in_link_body)
982
+ return 0;
983
+
984
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
985
+
986
+ if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, SD_AUTOLINK_SHORT_DOMAINS)) > 0) {
987
+ ob->size -= rewind;
988
+ rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
989
+ }
990
+
991
+ rndr_popbuf(rndr, BUFFER_SPAN);
992
+ return link_len;
993
+ }
994
+
995
+ /* char_link • '[': parsing a link or an image */
996
+ static size_t
997
+ char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
998
+ {
999
+ int is_img = (offset && data[-1] == '!'), level;
1000
+ size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
1001
+ struct buf *content = 0;
1002
+ struct buf *link = 0;
1003
+ struct buf *title = 0;
1004
+ struct buf *u_link = 0;
1005
+ size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
1006
+ int text_has_nl = 0, ret = 0;
1007
+ int in_title = 0, qtype = 0;
1008
+
1009
+ /* checking whether the correct renderer exists */
1010
+ if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
1011
+ goto cleanup;
1012
+
1013
+ /* looking for the matching closing bracket */
1014
+ for (level = 1; i < size; i++) {
1015
+ if (data[i] == '\n')
1016
+ text_has_nl = 1;
1017
+
1018
+ else if (data[i - 1] == '\\')
1019
+ continue;
1020
+
1021
+ else if (data[i] == '[')
1022
+ level++;
1023
+
1024
+ else if (data[i] == ']') {
1025
+ level--;
1026
+ if (level <= 0)
1027
+ break;
1028
+ }
1029
+ }
1030
+
1031
+ if (i >= size)
1032
+ goto cleanup;
1033
+
1034
+ txt_e = i;
1035
+ i++;
1036
+
1037
+ /* footnote link */
1038
+ if (rndr->ext_flags & MKDEXT_FOOTNOTES && data[1] == '^') {
1039
+ if (txt_e < 3)
1040
+ goto cleanup;
1041
+
1042
+ struct buf id = { 0, 0, 0, 0 };
1043
+ struct footnote_ref *fr;
1044
+
1045
+ id.data = data + 2;
1046
+ id.size = txt_e - 2;
1047
+
1048
+ fr = find_footnote_ref(&rndr->footnotes_found, id.data, id.size);
1049
+
1050
+ /* mark footnote used */
1051
+ if (fr && !fr->is_used) {
1052
+ if(!add_footnote_ref(&rndr->footnotes_used, fr))
1053
+ goto cleanup;
1054
+ fr->is_used = 1;
1055
+ fr->num = rndr->footnotes_used.count;
1056
+ }
1057
+
1058
+ /* render */
1059
+ if (fr && rndr->cb.footnote_ref)
1060
+ ret = rndr->cb.footnote_ref(ob, fr->num, rndr->opaque);
1061
+
1062
+ goto cleanup;
1063
+ }
1064
+
1065
+ /* skip any amount of whitespace or newline */
1066
+ /* (this is much more laxist than original markdown syntax) */
1067
+ while (i < size && _isspace(data[i]))
1068
+ i++;
1069
+
1070
+ /* inline style link */
1071
+ if (i < size && data[i] == '(') {
1072
+ /* skipping initial whitespace */
1073
+ i++;
1074
+
1075
+ while (i < size && _isspace(data[i]))
1076
+ i++;
1077
+
1078
+ link_b = i;
1079
+
1080
+ /* looking for link end: ' " ) */
1081
+ /* Count the number of open parenthesis */
1082
+ size_t nb_p = 0;
1083
+
1084
+ while (i < size) {
1085
+ if (data[i] == '\\') i += 2;
1086
+ else if (data[i] == '(' && i != 0) {
1087
+ nb_p++; i++;
1088
+ }
1089
+ else if (data[i] == ')') {
1090
+ if (nb_p == 0) break;
1091
+ else nb_p--; i++;
1092
+ } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
1093
+ else i++;
1094
+ }
1095
+
1096
+ if (i >= size) goto cleanup;
1097
+ link_e = i;
1098
+
1099
+ /* looking for title end if present */
1100
+ if (data[i] == '\'' || data[i] == '"') {
1101
+ qtype = data[i];
1102
+ in_title = 1;
1103
+ i++;
1104
+ title_b = i;
1105
+
1106
+ while (i < size) {
1107
+ if (data[i] == '\\') i += 2;
1108
+ else if (data[i] == qtype) {in_title = 0; i++;}
1109
+ else if ((data[i] == ')') && !in_title) break;
1110
+ else i++;
1111
+ }
1112
+
1113
+ if (i >= size) goto cleanup;
1114
+
1115
+ /* skipping whitespaces after title */
1116
+ title_e = i - 1;
1117
+ while (title_e > title_b && _isspace(data[title_e]))
1118
+ title_e--;
1119
+
1120
+ /* checking for closing quote presence */
1121
+ if (data[title_e] != '\'' && data[title_e] != '"') {
1122
+ title_b = title_e = 0;
1123
+ link_e = i;
1124
+ }
1125
+ }
1126
+
1127
+ /* remove whitespace at the end of the link */
1128
+ while (link_e > link_b && _isspace(data[link_e - 1]))
1129
+ link_e--;
1130
+
1131
+ /* remove optional angle brackets around the link */
1132
+ if (data[link_b] == '<') link_b++;
1133
+ if (data[link_e - 1] == '>') link_e--;
1134
+
1135
+ /* building escaped link and title */
1136
+ if (link_e > link_b) {
1137
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
1138
+ bufput(link, data + link_b, link_e - link_b);
1139
+ }
1140
+
1141
+ if (title_e > title_b) {
1142
+ title = rndr_newbuf(rndr, BUFFER_SPAN);
1143
+ bufput(title, data + title_b, title_e - title_b);
1144
+ }
1145
+
1146
+ i++;
1147
+ }
1148
+
1149
+ /* reference style link */
1150
+ else if (i < size && data[i] == '[') {
1151
+ struct buf id = { 0, 0, 0, 0 };
1152
+ struct link_ref *lr;
1153
+
1154
+ /* looking for the id */
1155
+ i++;
1156
+ link_b = i;
1157
+ while (i < size && data[i] != ']') i++;
1158
+ if (i >= size) goto cleanup;
1159
+ link_e = i;
1160
+
1161
+ /* finding the link_ref */
1162
+ if (link_b == link_e) {
1163
+ if (text_has_nl) {
1164
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1165
+ size_t j;
1166
+
1167
+ for (j = 1; j < txt_e; j++) {
1168
+ if (data[j] != '\n')
1169
+ bufputc(b, data[j]);
1170
+ else if (data[j - 1] != ' ')
1171
+ bufputc(b, ' ');
1172
+ }
1173
+
1174
+ id.data = b->data;
1175
+ id.size = b->size;
1176
+ } else {
1177
+ id.data = data + 1;
1178
+ id.size = txt_e - 1;
1179
+ }
1180
+ } else {
1181
+ id.data = data + link_b;
1182
+ id.size = link_e - link_b;
1183
+ }
1184
+
1185
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1186
+ if (!lr)
1187
+ goto cleanup;
1188
+
1189
+ /* keeping link and title from link_ref */
1190
+ link = lr->link;
1191
+ title = lr->title;
1192
+ i++;
1193
+ }
1194
+
1195
+ /* shortcut reference style link */
1196
+ else {
1197
+ struct buf id = { 0, 0, 0, 0 };
1198
+ struct link_ref *lr;
1199
+
1200
+ /* crafting the id */
1201
+ if (text_has_nl) {
1202
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1203
+ size_t j;
1204
+
1205
+ for (j = 1; j < txt_e; j++) {
1206
+ if (data[j] != '\n')
1207
+ bufputc(b, data[j]);
1208
+ else if (data[j - 1] != ' ')
1209
+ bufputc(b, ' ');
1210
+ }
1211
+
1212
+ id.data = b->data;
1213
+ id.size = b->size;
1214
+ } else {
1215
+ id.data = data + 1;
1216
+ id.size = txt_e - 1;
1217
+ }
1218
+
1219
+ /* finding the link_ref */
1220
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1221
+ if (!lr)
1222
+ goto cleanup;
1223
+
1224
+ /* keeping link and title from link_ref */
1225
+ link = lr->link;
1226
+ title = lr->title;
1227
+
1228
+ /* rewinding the whitespace */
1229
+ i = txt_e + 1;
1230
+ }
1231
+
1232
+ /* building content: img alt is escaped, link content is parsed */
1233
+ if (txt_e > 1) {
1234
+ content = rndr_newbuf(rndr, BUFFER_SPAN);
1235
+ if (is_img) {
1236
+ bufput(content, data + 1, txt_e - 1);
1237
+ } else {
1238
+ /* disable autolinking when parsing inline the
1239
+ * content of a link */
1240
+ rndr->in_link_body = 1;
1241
+ parse_inline(content, rndr, data + 1, txt_e - 1);
1242
+ rndr->in_link_body = 0;
1243
+ }
1244
+ }
1245
+
1246
+ if (link) {
1247
+ u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1248
+ unscape_text(u_link, link);
1249
+ }
1250
+
1251
+ /* calling the relevant rendering function */
1252
+ if (is_img) {
1253
+ if (ob->size && ob->data[ob->size - 1] == '!')
1254
+ ob->size -= 1;
1255
+
1256
+ ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1257
+ } else {
1258
+ ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1259
+ }
1260
+
1261
+ /* cleanup */
1262
+ cleanup:
1263
+ rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1264
+ return ret ? i : 0;
1265
+ }
1266
+
1267
+ static size_t
1268
+ char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1269
+ {
1270
+ size_t sup_start, sup_len;
1271
+ struct buf *sup;
1272
+
1273
+ if (!rndr->cb.superscript)
1274
+ return 0;
1275
+
1276
+ if (size < 2)
1277
+ return 0;
1278
+
1279
+ if (data[1] == '(') {
1280
+ sup_start = sup_len = 2;
1281
+
1282
+ while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1283
+ sup_len++;
1284
+
1285
+ if (sup_len == size)
1286
+ return 0;
1287
+ } else {
1288
+ sup_start = sup_len = 1;
1289
+
1290
+ while (sup_len < size && !_isspace(data[sup_len]))
1291
+ sup_len++;
1292
+ }
1293
+
1294
+ if (sup_len - sup_start == 0)
1295
+ return (sup_start == 2) ? 3 : 0;
1296
+
1297
+ sup = rndr_newbuf(rndr, BUFFER_SPAN);
1298
+ parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1299
+ rndr->cb.superscript(ob, sup, rndr->opaque);
1300
+ rndr_popbuf(rndr, BUFFER_SPAN);
1301
+
1302
+ return (sup_start == 2) ? sup_len + 1 : sup_len;
1303
+ }
1304
+
1305
+ /*********************************
1306
+ * BLOCK-LEVEL PARSING FUNCTIONS *
1307
+ *********************************/
1308
+
1309
+ /* is_empty • returns the line length when it is empty, 0 otherwise */
1310
+ static size_t
1311
+ is_empty(const uint8_t *data, size_t size)
1312
+ {
1313
+ size_t i;
1314
+
1315
+ for (i = 0; i < size && data[i] != '\n'; i++)
1316
+ if (data[i] != ' ')
1317
+ return 0;
1318
+
1319
+ return i + 1;
1320
+ }
1321
+
1322
+ /* is_hrule • returns whether a line is a horizontal rule */
1323
+ static int
1324
+ is_hrule(uint8_t *data, size_t size)
1325
+ {
1326
+ size_t i = 0, n = 0;
1327
+ uint8_t c;
1328
+
1329
+ /* skipping initial spaces */
1330
+ if (size < 3) return 0;
1331
+ if (data[0] == ' ') { i++;
1332
+ if (data[1] == ' ') { i++;
1333
+ if (data[2] == ' ') { i++; } } }
1334
+
1335
+ /* looking at the hrule uint8_t */
1336
+ if (i + 2 >= size
1337
+ || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1338
+ return 0;
1339
+ c = data[i];
1340
+
1341
+ /* the whole line must be the char or whitespace */
1342
+ while (i < size && data[i] != '\n') {
1343
+ if (data[i] == c) n++;
1344
+ else if (data[i] != ' ')
1345
+ return 0;
1346
+
1347
+ i++;
1348
+ }
1349
+
1350
+ return n >= 3;
1351
+ }
1352
+
1353
+ /* check if a line begins with a code fence; return the
1354
+ * width of the code fence */
1355
+ static size_t
1356
+ prefix_codefence(uint8_t *data, size_t size)
1357
+ {
1358
+ size_t i = 0, n = 0;
1359
+ uint8_t c;
1360
+
1361
+ /* skipping initial spaces */
1362
+ if (size < 3) return 0;
1363
+ if (data[0] == ' ') { i++;
1364
+ if (data[1] == ' ') { i++;
1365
+ if (data[2] == ' ') { i++; } } }
1366
+
1367
+ /* looking at the hrule uint8_t */
1368
+ if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1369
+ return 0;
1370
+
1371
+ c = data[i];
1372
+
1373
+ /* the whole line must be the uint8_t or whitespace */
1374
+ while (i < size && data[i] == c) {
1375
+ n++; i++;
1376
+ }
1377
+
1378
+ if (n < 3)
1379
+ return 0;
1380
+
1381
+ return i;
1382
+ }
1383
+
1384
+ /* check if a line is a code fence; return its size if it is */
1385
+ static size_t
1386
+ is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1387
+ {
1388
+ size_t i = 0, syn_len = 0;
1389
+ uint8_t *syn_start;
1390
+
1391
+ i = prefix_codefence(data, size);
1392
+ if (i == 0)
1393
+ return 0;
1394
+
1395
+ while (i < size && data[i] == ' ')
1396
+ i++;
1397
+
1398
+ syn_start = data + i;
1399
+
1400
+ if (i < size && data[i] == '{') {
1401
+ i++; syn_start++;
1402
+
1403
+ while (i < size && data[i] != '}' && data[i] != '\n') {
1404
+ syn_len++; i++;
1405
+ }
1406
+
1407
+ if (i == size || data[i] != '}')
1408
+ return 0;
1409
+
1410
+ /* strip all whitespace at the beginning and the end
1411
+ * of the {} block */
1412
+ while (syn_len > 0 && _isspace(syn_start[0])) {
1413
+ syn_start++; syn_len--;
1414
+ }
1415
+
1416
+ while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1417
+ syn_len--;
1418
+
1419
+ i++;
1420
+ } else {
1421
+ while (i < size && !_isspace(data[i])) {
1422
+ syn_len++; i++;
1423
+ }
1424
+ }
1425
+
1426
+ if (syntax) {
1427
+ syntax->data = syn_start;
1428
+ syntax->size = syn_len;
1429
+ }
1430
+
1431
+ while (i < size && data[i] != '\n') {
1432
+ if (!_isspace(data[i]))
1433
+ return 0;
1434
+
1435
+ i++;
1436
+ }
1437
+
1438
+ return i + 1;
1439
+ }
1440
+
1441
+ /* is_atxheader • returns whether the line is a hash-prefixed header */
1442
+ static int
1443
+ is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1444
+ {
1445
+ if (data[0] != '#')
1446
+ return 0;
1447
+
1448
+ if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1449
+ size_t level = 0;
1450
+
1451
+ while (level < size && level < 6 && data[level] == '#')
1452
+ level++;
1453
+
1454
+ if (level < size && data[level] != ' ')
1455
+ return 0;
1456
+ }
1457
+
1458
+ return 1;
1459
+ }
1460
+
1461
+ /* is_headerline • returns whether the line is a setext-style hdr underline */
1462
+ static int
1463
+ is_headerline(uint8_t *data, size_t size)
1464
+ {
1465
+ size_t i = 0;
1466
+
1467
+ /* test of level 1 header */
1468
+ if (data[i] == '=') {
1469
+ for (i = 1; i < size && data[i] == '='; i++);
1470
+ while (i < size && data[i] == ' ') i++;
1471
+ return (i >= size || data[i] == '\n') ? 3 : 0; }
1472
+
1473
+ /* test of level 2 header */
1474
+ if (data[i] == '-') {
1475
+ for (i = 1; i < size && data[i] == '-'; i++);
1476
+ while (i < size && data[i] == ' ') i++;
1477
+ return (i >= size || data[i] == '\n') ? 4 : 0; }
1478
+
1479
+ return 0;
1480
+ }
1481
+
1482
+ static int
1483
+ is_next_headerline(uint8_t *data, size_t size)
1484
+ {
1485
+ size_t i = 0;
1486
+
1487
+ while (i < size && data[i] != '\n')
1488
+ i++;
1489
+
1490
+ if (++i >= size)
1491
+ return 0;
1492
+
1493
+ return is_headerline(data + i, size - i);
1494
+ }
1495
+
1496
+ /* prefix_quote • returns blockquote prefix length */
1497
+ static size_t
1498
+ prefix_quote(uint8_t *data, size_t size)
1499
+ {
1500
+ size_t i = 0;
1501
+ if (i < size && data[i] == ' ') i++;
1502
+ if (i < size && data[i] == ' ') i++;
1503
+ if (i < size && data[i] == ' ') i++;
1504
+
1505
+ if (i < size && data[i] == '>') {
1506
+ if (i + 1 < size && data[i + 1] == ' ')
1507
+ return i + 2;
1508
+
1509
+ return i + 1;
1510
+ }
1511
+
1512
+ return 0;
1513
+ }
1514
+
1515
+ /* prefix_code • returns prefix length for block code*/
1516
+ static size_t
1517
+ prefix_code(uint8_t *data, size_t size)
1518
+ {
1519
+ if (size > 3 && data[0] == ' ' && data[1] == ' '
1520
+ && data[2] == ' ' && data[3] == ' ') return 4;
1521
+
1522
+ return 0;
1523
+ }
1524
+
1525
+ /* prefix_oli • returns ordered list item prefix */
1526
+ static size_t
1527
+ prefix_oli(uint8_t *data, size_t size)
1528
+ {
1529
+ size_t i = 0;
1530
+
1531
+ if (i < size && data[i] == ' ') i++;
1532
+ if (i < size && data[i] == ' ') i++;
1533
+ if (i < size && data[i] == ' ') i++;
1534
+
1535
+ if (i >= size || data[i] < '0' || data[i] > '9')
1536
+ return 0;
1537
+
1538
+ while (i < size && data[i] >= '0' && data[i] <= '9')
1539
+ i++;
1540
+
1541
+ if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1542
+ return 0;
1543
+
1544
+ if (is_next_headerline(data + i, size - i))
1545
+ return 0;
1546
+
1547
+ return i + 2;
1548
+ }
1549
+
1550
+ /* prefix_uli • returns ordered list item prefix */
1551
+ static size_t
1552
+ prefix_uli(uint8_t *data, size_t size)
1553
+ {
1554
+ size_t i = 0;
1555
+
1556
+ if (i < size && data[i] == ' ') i++;
1557
+ if (i < size && data[i] == ' ') i++;
1558
+ if (i < size && data[i] == ' ') i++;
1559
+
1560
+ if (i + 1 >= size ||
1561
+ (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1562
+ data[i + 1] != ' ')
1563
+ return 0;
1564
+
1565
+ if (is_next_headerline(data + i, size - i))
1566
+ return 0;
1567
+
1568
+ return i + 2;
1569
+ }
1570
+
1571
+
1572
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
1573
+ static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1574
+ uint8_t *data, size_t size);
1575
+
1576
+
1577
+ /* parse_blockquote • handles parsing of a blockquote fragment */
1578
+ static size_t
1579
+ parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1580
+ {
1581
+ size_t beg, end = 0, pre, work_size = 0;
1582
+ uint8_t *work_data = 0;
1583
+ struct buf *out = 0;
1584
+
1585
+ out = rndr_newbuf(rndr, BUFFER_BLOCK);
1586
+ beg = 0;
1587
+ while (beg < size) {
1588
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1589
+
1590
+ pre = prefix_quote(data + beg, end - beg);
1591
+
1592
+ if (pre)
1593
+ beg += pre; /* skipping prefix */
1594
+
1595
+ /* empty line followed by non-quote line */
1596
+ else if (is_empty(data + beg, end - beg) &&
1597
+ (end >= size || (prefix_quote(data + end, size - end) == 0 &&
1598
+ !is_empty(data + end, size - end))))
1599
+ break;
1600
+
1601
+ if (beg < end) { /* copy into the in-place working buffer */
1602
+ /* bufput(work, data + beg, end - beg); */
1603
+ if (!work_data)
1604
+ work_data = data + beg;
1605
+ else if (data + beg != work_data + work_size)
1606
+ memmove(work_data + work_size, data + beg, end - beg);
1607
+ work_size += end - beg;
1608
+ }
1609
+ beg = end;
1610
+ }
1611
+
1612
+ parse_block(out, rndr, work_data, work_size);
1613
+ if (rndr->cb.blockquote)
1614
+ rndr->cb.blockquote(ob, out, rndr->opaque);
1615
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1616
+ return end;
1617
+ }
1618
+
1619
+ static size_t
1620
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1621
+
1622
+ /* parse_blockquote • handles parsing of a regular paragraph */
1623
+ static size_t
1624
+ parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1625
+ {
1626
+ size_t i = 0, end = 0;
1627
+ int level = 0, last_is_empty = 1;
1628
+ struct buf work = { data, 0, 0, 0 };
1629
+
1630
+ while (i < size) {
1631
+ for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1632
+
1633
+ if (is_empty(data + i, size - i)) {
1634
+ last_is_empty = 1;
1635
+ break;
1636
+ }
1637
+
1638
+ if (!last_is_empty && (level = is_headerline(data + i, size - i)) != 0) {
1639
+ last_is_empty = 0;
1640
+ break;
1641
+ }
1642
+
1643
+ last_is_empty = 0;
1644
+
1645
+ if (is_atxheader(rndr, data + i, size - i) ||
1646
+ is_hrule(data + i, size - i) ||
1647
+ prefix_quote(data + i, size - i)) {
1648
+ end = i;
1649
+ break;
1650
+ }
1651
+
1652
+ /*
1653
+ * Early termination of a paragraph with the same logic
1654
+ * as Markdown 1.0.0. If this logic is applied, the
1655
+ * Markdown 1.0.3 test suite won't pass cleanly
1656
+ *
1657
+ * :: If the first character in a new line is not a letter,
1658
+ * let's check to see if there's some kind of block starting
1659
+ * here
1660
+ */
1661
+ if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalpha(data[i])) {
1662
+ if (prefix_oli(data + i, size - i) ||
1663
+ prefix_uli(data + i, size - i)) {
1664
+ end = i;
1665
+ break;
1666
+ }
1667
+
1668
+ /* see if an html block starts here */
1669
+ if (data[i] == '<' && rndr->cb.blockhtml &&
1670
+ parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1671
+ end = i;
1672
+ break;
1673
+ }
1674
+
1675
+ /* see if a code fence starts here */
1676
+ if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1677
+ is_codefence(data + i, size - i, NULL) != 0) {
1678
+ end = i;
1679
+ break;
1680
+ }
1681
+ }
1682
+
1683
+ i = end;
1684
+ }
1685
+
1686
+ work.size = i;
1687
+ while (work.size && data[work.size - 1] == '\n')
1688
+ work.size--;
1689
+
1690
+ if (!level) {
1691
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1692
+ parse_inline(tmp, rndr, work.data, work.size);
1693
+ if (rndr->cb.paragraph)
1694
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1695
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1696
+ } else {
1697
+ struct buf *header_work;
1698
+
1699
+ if (work.size) {
1700
+ size_t beg;
1701
+ i = work.size;
1702
+ work.size -= 1;
1703
+
1704
+ while (work.size && data[work.size] != '\n')
1705
+ work.size -= 1;
1706
+
1707
+ beg = work.size + 1;
1708
+ while (work.size && data[work.size - 1] == '\n')
1709
+ work.size -= 1;
1710
+
1711
+ if (work.size > 0) {
1712
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1713
+ parse_inline(tmp, rndr, work.data, work.size);
1714
+
1715
+ if (rndr->cb.paragraph)
1716
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1717
+
1718
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1719
+ work.data += beg;
1720
+ work.size = i - beg;
1721
+ }
1722
+ else work.size = i;
1723
+ }
1724
+
1725
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1726
+ parse_inline(header_work, rndr, work.data, work.size);
1727
+
1728
+ if (rndr->cb.header)
1729
+ rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1730
+
1731
+ rndr_popbuf(rndr, BUFFER_SPAN);
1732
+ }
1733
+
1734
+ return end;
1735
+ }
1736
+
1737
+ /* parse_fencedcode • handles parsing of a block-level code fragment */
1738
+ static size_t
1739
+ parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1740
+ {
1741
+ size_t beg, end;
1742
+ struct buf *work = 0;
1743
+ struct buf lang = { 0, 0, 0, 0 };
1744
+
1745
+ beg = is_codefence(data, size, &lang);
1746
+ if (beg == 0) return 0;
1747
+
1748
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1749
+
1750
+ while (beg < size) {
1751
+ size_t fence_end;
1752
+ struct buf fence_trail = { 0, 0, 0, 0 };
1753
+
1754
+ fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1755
+ if (fence_end != 0 && fence_trail.size == 0) {
1756
+ beg += fence_end;
1757
+ break;
1758
+ }
1759
+
1760
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1761
+
1762
+ if (beg < end) {
1763
+ /* verbatim copy to the working buffer,
1764
+ escaping entities */
1765
+ if (is_empty(data + beg, end - beg))
1766
+ bufputc(work, '\n');
1767
+ else bufput(work, data + beg, end - beg);
1768
+ }
1769
+ beg = end;
1770
+ }
1771
+
1772
+ if (work->size && work->data[work->size - 1] != '\n')
1773
+ bufputc(work, '\n');
1774
+
1775
+ if (rndr->cb.blockcode)
1776
+ rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1777
+
1778
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1779
+ return beg;
1780
+ }
1781
+
1782
+ static size_t
1783
+ parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1784
+ {
1785
+ size_t beg, end, pre;
1786
+ struct buf *work = 0;
1787
+
1788
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1789
+
1790
+ beg = 0;
1791
+ while (beg < size) {
1792
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1793
+ pre = prefix_code(data + beg, end - beg);
1794
+
1795
+ if (pre)
1796
+ beg += pre; /* skipping prefix */
1797
+ else if (!is_empty(data + beg, end - beg))
1798
+ /* non-empty non-prefixed line breaks the pre */
1799
+ break;
1800
+
1801
+ if (beg < end) {
1802
+ /* verbatim copy to the working buffer,
1803
+ escaping entities */
1804
+ if (is_empty(data + beg, end - beg))
1805
+ bufputc(work, '\n');
1806
+ else bufput(work, data + beg, end - beg);
1807
+ }
1808
+ beg = end;
1809
+ }
1810
+
1811
+ while (work->size && work->data[work->size - 1] == '\n')
1812
+ work->size -= 1;
1813
+
1814
+ bufputc(work, '\n');
1815
+
1816
+ if (rndr->cb.blockcode)
1817
+ rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1818
+
1819
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1820
+ return beg;
1821
+ }
1822
+
1823
+ /* parse_listitem • parsing of a single list item */
1824
+ /* assuming initial prefix is already removed */
1825
+ static size_t
1826
+ parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1827
+ {
1828
+ struct buf *work = 0, *inter = 0;
1829
+ size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1830
+ int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1831
+
1832
+ /* keeping track of the first indentation prefix */
1833
+ while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1834
+ orgpre++;
1835
+
1836
+ beg = prefix_uli(data, size);
1837
+ if (!beg)
1838
+ beg = prefix_oli(data, size);
1839
+
1840
+ if (!beg)
1841
+ return 0;
1842
+
1843
+ /* skipping to the beginning of the following line */
1844
+ end = beg;
1845
+ while (end < size && data[end - 1] != '\n')
1846
+ end++;
1847
+
1848
+ /* getting working buffers */
1849
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
1850
+ inter = rndr_newbuf(rndr, BUFFER_SPAN);
1851
+
1852
+ /* putting the first line into the working buffer */
1853
+ bufput(work, data + beg, end - beg);
1854
+ beg = end;
1855
+
1856
+ /* process the following lines */
1857
+ while (beg < size) {
1858
+ size_t has_next_uli = 0, has_next_oli = 0;
1859
+
1860
+ end++;
1861
+
1862
+ while (end < size && data[end - 1] != '\n')
1863
+ end++;
1864
+
1865
+ /* process an empty line */
1866
+ if (is_empty(data + beg, end - beg)) {
1867
+ in_empty = 1;
1868
+ beg = end;
1869
+ continue;
1870
+ }
1871
+
1872
+ /* calculating the indentation */
1873
+ i = 0;
1874
+ while (i < 4 && beg + i < end && data[beg + i] == ' ')
1875
+ i++;
1876
+
1877
+ pre = i;
1878
+
1879
+ if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1880
+ if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1881
+ in_fence = !in_fence;
1882
+ }
1883
+
1884
+ /* Only check for new list items if we are **not** inside
1885
+ * a fenced code block */
1886
+ if (!in_fence) {
1887
+ has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1888
+ has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1889
+ }
1890
+
1891
+ /* checking for ul/ol switch */
1892
+ if (in_empty && (
1893
+ ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1894
+ (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1895
+ *flags |= MKD_LI_END;
1896
+ break; /* the following item must have same list type */
1897
+ }
1898
+
1899
+ /* checking for a new item */
1900
+ if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1901
+ if (in_empty)
1902
+ has_inside_empty = 1;
1903
+
1904
+ if (pre == orgpre) /* the following item must have */
1905
+ break; /* the same indentation */
1906
+
1907
+ if (!sublist)
1908
+ sublist = work->size;
1909
+ }
1910
+ /* joining only indented stuff after empty lines */
1911
+ else if (in_empty && i < 4 && data[beg] != '\t') {
1912
+ *flags |= MKD_LI_END;
1913
+ break;
1914
+ }
1915
+ else if (in_empty) {
1916
+ bufputc(work, '\n');
1917
+ has_inside_empty = 1;
1918
+ }
1919
+
1920
+ in_empty = 0;
1921
+
1922
+ /* adding the line without prefix into the working buffer */
1923
+ bufput(work, data + beg + i, end - beg - i);
1924
+ beg = end;
1925
+ }
1926
+
1927
+ /* render of li contents */
1928
+ if (has_inside_empty)
1929
+ *flags |= MKD_LI_BLOCK;
1930
+
1931
+ if (*flags & MKD_LI_BLOCK) {
1932
+ /* intermediate render of block li */
1933
+ if (sublist && sublist < work->size) {
1934
+ parse_block(inter, rndr, work->data, sublist);
1935
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1936
+ }
1937
+ else
1938
+ parse_block(inter, rndr, work->data, work->size);
1939
+ } else {
1940
+ /* intermediate render of inline li */
1941
+ if (sublist && sublist < work->size) {
1942
+ parse_inline(inter, rndr, work->data, sublist);
1943
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1944
+ }
1945
+ else
1946
+ parse_inline(inter, rndr, work->data, work->size);
1947
+ }
1948
+
1949
+ /* render of li itself */
1950
+ if (rndr->cb.listitem)
1951
+ rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1952
+
1953
+ rndr_popbuf(rndr, BUFFER_SPAN);
1954
+ rndr_popbuf(rndr, BUFFER_SPAN);
1955
+ return beg;
1956
+ }
1957
+
1958
+
1959
+ /* parse_list • parsing ordered or unordered list block */
1960
+ static size_t
1961
+ parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1962
+ {
1963
+ struct buf *work = 0;
1964
+ size_t i = 0, j;
1965
+
1966
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1967
+
1968
+ while (i < size) {
1969
+ j = parse_listitem(work, rndr, data + i, size - i, &flags);
1970
+ i += j;
1971
+
1972
+ if (!j || (flags & MKD_LI_END))
1973
+ break;
1974
+ }
1975
+
1976
+ if (rndr->cb.list)
1977
+ rndr->cb.list(ob, work, flags, rndr->opaque);
1978
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1979
+ return i;
1980
+ }
1981
+
1982
+ /* parse_atxheader • parsing of atx-style headers */
1983
+ static size_t
1984
+ parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1985
+ {
1986
+ size_t level = 0;
1987
+ size_t i, end, skip;
1988
+
1989
+ while (level < size && level < 6 && data[level] == '#')
1990
+ level++;
1991
+
1992
+ for (i = level; i < size && data[i] == ' '; i++);
1993
+
1994
+ for (end = i; end < size && data[end] != '\n'; end++);
1995
+ skip = end;
1996
+
1997
+ while (end && data[end - 1] == '#')
1998
+ end--;
1999
+
2000
+ while (end && data[end - 1] == ' ')
2001
+ end--;
2002
+
2003
+ if (end > i) {
2004
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
2005
+
2006
+ parse_inline(work, rndr, data + i, end - i);
2007
+
2008
+ if (rndr->cb.header)
2009
+ rndr->cb.header(ob, work, (int)level, rndr->opaque);
2010
+
2011
+ rndr_popbuf(rndr, BUFFER_SPAN);
2012
+ }
2013
+
2014
+ return skip;
2015
+ }
2016
+
2017
+ /* parse_footnote_def • parse a single footnote definition */
2018
+ static void
2019
+ parse_footnote_def(struct buf *ob, struct sd_markdown *rndr, unsigned int num, uint8_t *data, size_t size)
2020
+ {
2021
+ struct buf *work = 0;
2022
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
2023
+
2024
+ parse_block(work, rndr, data, size);
2025
+
2026
+ if (rndr->cb.footnote_def)
2027
+ rndr->cb.footnote_def(ob, work, num, rndr->opaque);
2028
+ rndr_popbuf(rndr, BUFFER_SPAN);
2029
+ }
2030
+
2031
+ /* parse_footnote_list • render the contents of the footnotes */
2032
+ static void
2033
+ parse_footnote_list(struct buf *ob, struct sd_markdown *rndr, struct footnote_list *footnotes)
2034
+ {
2035
+ struct buf *work = 0;
2036
+ struct footnote_item *item;
2037
+ struct footnote_ref *ref;
2038
+
2039
+ if (footnotes->count == 0)
2040
+ return;
2041
+
2042
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
2043
+
2044
+ item = footnotes->head;
2045
+ while (item) {
2046
+ ref = item->ref;
2047
+ parse_footnote_def(work, rndr, ref->num, ref->contents->data, ref->contents->size);
2048
+ item = item->next;
2049
+ }
2050
+
2051
+ if (rndr->cb.footnotes)
2052
+ rndr->cb.footnotes(ob, work, rndr->opaque);
2053
+ rndr_popbuf(rndr, BUFFER_BLOCK);
2054
+ }
2055
+
2056
+ /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
2057
+ /* returns the length on match, 0 otherwise */
2058
+ static size_t
2059
+ htmlblock_end_tag(
2060
+ const char *tag,
2061
+ size_t tag_len,
2062
+ struct sd_markdown *rndr,
2063
+ uint8_t *data,
2064
+ size_t size)
2065
+ {
2066
+ size_t i, w;
2067
+
2068
+ /* checking if tag is a match */
2069
+ if (tag_len + 3 >= size ||
2070
+ strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
2071
+ data[tag_len + 2] != '>')
2072
+ return 0;
2073
+
2074
+ /* checking white lines */
2075
+ i = tag_len + 3;
2076
+ w = 0;
2077
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
2078
+ return 0; /* non-blank after tag */
2079
+ i += w;
2080
+ w = 0;
2081
+
2082
+ if (i < size)
2083
+ w = is_empty(data + i, size - i);
2084
+
2085
+ return i + w;
2086
+ }
2087
+
2088
+ static size_t
2089
+ htmlblock_end(const char *curtag,
2090
+ struct sd_markdown *rndr,
2091
+ uint8_t *data,
2092
+ size_t size,
2093
+ int start_of_line)
2094
+ {
2095
+ size_t tag_size = strlen(curtag);
2096
+ size_t i = 1, end_tag;
2097
+ int block_lines = 0;
2098
+
2099
+ while (i < size) {
2100
+ i++;
2101
+ while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
2102
+ if (data[i] == '\n')
2103
+ block_lines++;
2104
+
2105
+ i++;
2106
+ }
2107
+
2108
+ /* If we are only looking for unindented tags, skip the tag
2109
+ * if it doesn't follow a newline.
2110
+ *
2111
+ * The only exception to this is if the tag is still on the
2112
+ * initial line; in that case it still counts as a closing
2113
+ * tag
2114
+ */
2115
+ if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
2116
+ continue;
2117
+
2118
+ if (i + 2 + tag_size >= size)
2119
+ break;
2120
+
2121
+ end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
2122
+ if (end_tag)
2123
+ return i + end_tag - 1;
2124
+ }
2125
+
2126
+ return 0;
2127
+ }
2128
+
2129
+
2130
+ /* parse_htmlblock • parsing of inline HTML block */
2131
+ static size_t
2132
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
2133
+ {
2134
+ size_t i, j = 0, tag_end;
2135
+ const char *curtag = NULL;
2136
+ struct buf work = { data, 0, 0, 0 };
2137
+
2138
+ /* identification of the opening tag */
2139
+ if (size < 2 || data[0] != '<')
2140
+ return 0;
2141
+
2142
+ i = 1;
2143
+ while (i < size && data[i] != '>' && data[i] != ' ')
2144
+ i++;
2145
+
2146
+ if (i < size)
2147
+ curtag = find_block_tag((char *)data + 1, (int)i - 1);
2148
+
2149
+ /* handling of special cases */
2150
+ if (!curtag) {
2151
+
2152
+ /* HTML comment, laxist form */
2153
+ if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
2154
+ i = 5;
2155
+
2156
+ while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
2157
+ i++;
2158
+
2159
+ i++;
2160
+
2161
+ if (i < size)
2162
+ j = is_empty(data + i, size - i);
2163
+
2164
+ if (j) {
2165
+ work.size = i + j;
2166
+ if (do_render && rndr->cb.blockhtml)
2167
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
2168
+ return work.size;
2169
+ }
2170
+ }
2171
+
2172
+ /* HR, which is the only self-closing block tag considered */
2173
+ if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
2174
+ i = 3;
2175
+ while (i < size && data[i] != '>')
2176
+ i++;
2177
+
2178
+ if (i + 1 < size) {
2179
+ i++;
2180
+ j = is_empty(data + i, size - i);
2181
+ if (j) {
2182
+ work.size = i + j;
2183
+ if (do_render && rndr->cb.blockhtml)
2184
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
2185
+ return work.size;
2186
+ }
2187
+ }
2188
+ }
2189
+
2190
+ /* no special case recognised */
2191
+ return 0;
2192
+ }
2193
+
2194
+ /* looking for an unindented matching closing tag */
2195
+ /* followed by a blank line */
2196
+ tag_end = htmlblock_end(curtag, rndr, data, size, 1);
2197
+
2198
+ /* if not found, trying a second pass looking for indented match */
2199
+ /* but not if tag is "ins" or "del" (following original Markdown.pl) */
2200
+ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
2201
+ tag_end = htmlblock_end(curtag, rndr, data, size, 0);
2202
+ }
2203
+
2204
+ if (!tag_end)
2205
+ return 0;
2206
+
2207
+ /* the end of the block has been found */
2208
+ work.size = tag_end;
2209
+ if (do_render && rndr->cb.blockhtml)
2210
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
2211
+
2212
+ return tag_end;
2213
+ }
2214
+
2215
+ static void
2216
+ parse_table_row(
2217
+ struct buf *ob,
2218
+ struct sd_markdown *rndr,
2219
+ uint8_t *data,
2220
+ size_t size,
2221
+ size_t columns,
2222
+ int *col_data,
2223
+ int header_flag)
2224
+ {
2225
+ size_t i = 0, col;
2226
+ struct buf *row_work = 0;
2227
+
2228
+ if (!rndr->cb.table_cell || !rndr->cb.table_row)
2229
+ return;
2230
+
2231
+ row_work = rndr_newbuf(rndr, BUFFER_SPAN);
2232
+
2233
+ if (i < size && data[i] == '|')
2234
+ i++;
2235
+
2236
+ for (col = 0; col < columns && i < size; ++col) {
2237
+ size_t cell_start, cell_end;
2238
+ struct buf *cell_work;
2239
+
2240
+ cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2241
+
2242
+ while (i < size && _isspace(data[i]))
2243
+ i++;
2244
+
2245
+ cell_start = i;
2246
+
2247
+ while (i < size && data[i] != '|')
2248
+ i++;
2249
+
2250
+ cell_end = i - 1;
2251
+
2252
+ while (cell_end > cell_start && _isspace(data[cell_end]))
2253
+ cell_end--;
2254
+
2255
+ parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2256
+ rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
2257
+
2258
+ rndr_popbuf(rndr, BUFFER_SPAN);
2259
+ i++;
2260
+ }
2261
+
2262
+ for (; col < columns; ++col) {
2263
+ struct buf empty_cell = { 0, 0, 0, 0 };
2264
+ rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
2265
+ }
2266
+
2267
+ rndr->cb.table_row(ob, row_work, rndr->opaque);
2268
+
2269
+ rndr_popbuf(rndr, BUFFER_SPAN);
2270
+ }
2271
+
2272
+ static size_t
2273
+ parse_table_header(
2274
+ struct buf *ob,
2275
+ struct sd_markdown *rndr,
2276
+ uint8_t *data,
2277
+ size_t size,
2278
+ size_t *columns,
2279
+ int **column_data)
2280
+ {
2281
+ int pipes;
2282
+ size_t i = 0, col, header_end, under_end;
2283
+
2284
+ pipes = 0;
2285
+ while (i < size && data[i] != '\n')
2286
+ if (data[i++] == '|')
2287
+ pipes++;
2288
+
2289
+ if (i == size || pipes == 0)
2290
+ return 0;
2291
+
2292
+ header_end = i;
2293
+
2294
+ while (header_end > 0 && _isspace(data[header_end - 1]))
2295
+ header_end--;
2296
+
2297
+ if (data[0] == '|')
2298
+ pipes--;
2299
+
2300
+ if (header_end && data[header_end - 1] == '|')
2301
+ pipes--;
2302
+
2303
+ *columns = pipes + 1;
2304
+ *column_data = calloc(*columns, sizeof(int));
2305
+
2306
+ /* Parse the header underline */
2307
+ i++;
2308
+ if (i < size && data[i] == '|')
2309
+ i++;
2310
+
2311
+ under_end = i;
2312
+ while (under_end < size && data[under_end] != '\n')
2313
+ under_end++;
2314
+
2315
+ for (col = 0; col < *columns && i < under_end; ++col) {
2316
+ size_t dashes = 0;
2317
+
2318
+ while (i < under_end && data[i] == ' ')
2319
+ i++;
2320
+
2321
+ if (data[i] == ':') {
2322
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2323
+ dashes++;
2324
+ }
2325
+
2326
+ while (i < under_end && data[i] == '-') {
2327
+ i++; dashes++;
2328
+ }
2329
+
2330
+ if (i < under_end && data[i] == ':') {
2331
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2332
+ dashes++;
2333
+ }
2334
+
2335
+ while (i < under_end && data[i] == ' ')
2336
+ i++;
2337
+
2338
+ if (i < under_end && data[i] != '|' && data[i] != '+')
2339
+ break;
2340
+
2341
+ if (dashes < 3)
2342
+ break;
2343
+
2344
+ i++;
2345
+ }
2346
+
2347
+ if (col < *columns)
2348
+ return 0;
2349
+
2350
+ parse_table_row(
2351
+ ob, rndr, data,
2352
+ header_end,
2353
+ *columns,
2354
+ *column_data,
2355
+ MKD_TABLE_HEADER
2356
+ );
2357
+
2358
+ return under_end + 1;
2359
+ }
2360
+
2361
+ static size_t
2362
+ parse_table(
2363
+ struct buf *ob,
2364
+ struct sd_markdown *rndr,
2365
+ uint8_t *data,
2366
+ size_t size)
2367
+ {
2368
+ size_t i;
2369
+
2370
+ struct buf *header_work = 0;
2371
+ struct buf *body_work = 0;
2372
+
2373
+ size_t columns;
2374
+ int *col_data = NULL;
2375
+
2376
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2377
+ body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2378
+
2379
+ i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2380
+ if (i > 0) {
2381
+
2382
+ while (i < size) {
2383
+ size_t row_start;
2384
+ int pipes = 0;
2385
+
2386
+ row_start = i;
2387
+
2388
+ while (i < size && data[i] != '\n')
2389
+ if (data[i++] == '|')
2390
+ pipes++;
2391
+
2392
+ if (pipes == 0 || i == size) {
2393
+ i = row_start;
2394
+ break;
2395
+ }
2396
+
2397
+ parse_table_row(
2398
+ body_work,
2399
+ rndr,
2400
+ data + row_start,
2401
+ i - row_start,
2402
+ columns,
2403
+ col_data, 0
2404
+ );
2405
+
2406
+ i++;
2407
+ }
2408
+
2409
+ if (rndr->cb.table)
2410
+ rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2411
+ }
2412
+
2413
+ free(col_data);
2414
+ rndr_popbuf(rndr, BUFFER_SPAN);
2415
+ rndr_popbuf(rndr, BUFFER_BLOCK);
2416
+ return i;
2417
+ }
2418
+
2419
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
2420
+ static void
2421
+ parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2422
+ {
2423
+ size_t beg, end, i;
2424
+ uint8_t *txt_data;
2425
+ beg = 0;
2426
+
2427
+ if (rndr->work_bufs[BUFFER_SPAN].size +
2428
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
2429
+ return;
2430
+
2431
+ while (beg < size) {
2432
+ txt_data = data + beg;
2433
+ end = size - beg;
2434
+
2435
+ if (is_atxheader(rndr, txt_data, end))
2436
+ beg += parse_atxheader(ob, rndr, txt_data, end);
2437
+
2438
+ else if (data[beg] == '<' && rndr->cb.blockhtml &&
2439
+ (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
2440
+ beg += i;
2441
+
2442
+ else if ((i = is_empty(txt_data, end)) != 0)
2443
+ beg += i;
2444
+
2445
+ else if (is_hrule(txt_data, end)) {
2446
+ if (rndr->cb.hrule)
2447
+ rndr->cb.hrule(ob, rndr->opaque);
2448
+
2449
+ while (beg < size && data[beg] != '\n')
2450
+ beg++;
2451
+
2452
+ beg++;
2453
+ }
2454
+
2455
+ else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
2456
+ (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
2457
+ beg += i;
2458
+
2459
+ else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
2460
+ (i = parse_table(ob, rndr, txt_data, end)) != 0)
2461
+ beg += i;
2462
+
2463
+ else if (prefix_quote(txt_data, end))
2464
+ beg += parse_blockquote(ob, rndr, txt_data, end);
2465
+
2466
+ else if (!(rndr->ext_flags & MKDEXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end))
2467
+ beg += parse_blockcode(ob, rndr, txt_data, end);
2468
+
2469
+ else if (prefix_uli(txt_data, end))
2470
+ beg += parse_list(ob, rndr, txt_data, end, 0);
2471
+
2472
+ else if (prefix_oli(txt_data, end))
2473
+ beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
2474
+
2475
+ else
2476
+ beg += parse_paragraph(ob, rndr, txt_data, end);
2477
+ }
2478
+ }
2479
+
2480
+
2481
+
2482
+ /*********************
2483
+ * REFERENCE PARSING *
2484
+ *********************/
2485
+
2486
+ /* is_footnote • returns whether a line is a footnote definition or not */
2487
+ static int
2488
+ is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct footnote_list *list)
2489
+ {
2490
+ size_t i = 0;
2491
+ struct buf *contents = 0;
2492
+ size_t ind = 0;
2493
+ int in_empty = 0;
2494
+ size_t start = 0;
2495
+
2496
+ size_t id_offset, id_end;
2497
+
2498
+ /* up to 3 optional leading spaces */
2499
+ if (beg + 3 >= end) return 0;
2500
+ if (data[beg] == ' ') { i = 1;
2501
+ if (data[beg + 1] == ' ') { i = 2;
2502
+ if (data[beg + 2] == ' ') { i = 3;
2503
+ if (data[beg + 3] == ' ') return 0; } } }
2504
+ i += beg;
2505
+
2506
+ /* id part: caret followed by anything between brackets */
2507
+ if (data[i] != '[') return 0;
2508
+ i++;
2509
+ if (i >= end || data[i] != '^') return 0;
2510
+ i++;
2511
+ id_offset = i;
2512
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2513
+ i++;
2514
+ if (i >= end || data[i] != ']') return 0;
2515
+ id_end = i;
2516
+
2517
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
2518
+ i++;
2519
+ if (i >= end || data[i] != ':') return 0;
2520
+ i++;
2521
+
2522
+ /* getting content buffer */
2523
+ contents = bufnew(64);
2524
+
2525
+ start = i;
2526
+
2527
+ /* process lines similiar to a list item */
2528
+ while (i < end) {
2529
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2530
+
2531
+ /* process an empty line */
2532
+ if (is_empty(data + start, i - start)) {
2533
+ in_empty = 1;
2534
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2535
+ i++;
2536
+ if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
2537
+ }
2538
+ start = i;
2539
+ continue;
2540
+ }
2541
+
2542
+ /* calculating the indentation */
2543
+ ind = 0;
2544
+ while (ind < 4 && start + ind < end && data[start + ind] == ' ')
2545
+ ind++;
2546
+
2547
+ /* joining only indented stuff after empty lines;
2548
+ * note that now we only require 1 space of indentation
2549
+ * to continue, just like lists */
2550
+ if (ind == 0) {
2551
+ if (start == id_end + 2 && data[start] == '\t') {}
2552
+ else break;
2553
+ }
2554
+ else if (in_empty) {
2555
+ bufputc(contents, '\n');
2556
+ }
2557
+
2558
+ in_empty = 0;
2559
+
2560
+ /* adding the line into the content buffer */
2561
+ bufput(contents, data + start + ind, i - start - ind);
2562
+ /* add carriage return */
2563
+ if (i < end) {
2564
+ bufput(contents, "\n", 1);
2565
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2566
+ i++;
2567
+ if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
2568
+ }
2569
+ }
2570
+ start = i;
2571
+ }
2572
+
2573
+ if (last)
2574
+ *last = start;
2575
+
2576
+ if (list) {
2577
+ struct footnote_ref *ref;
2578
+ ref = create_footnote_ref(list, data + id_offset, id_end - id_offset);
2579
+ if (!ref)
2580
+ return 0;
2581
+ if (!add_footnote_ref(list, ref)) {
2582
+ free_footnote_ref(ref);
2583
+ return 0;
2584
+ }
2585
+ ref->contents = contents;
2586
+ }
2587
+
2588
+ return 1;
2589
+ }
2590
+
2591
+ /* is_ref • returns whether a line is a reference or not */
2592
+ static int
2593
+ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2594
+ {
2595
+ /* int n; */
2596
+ size_t i = 0;
2597
+ size_t id_offset, id_end;
2598
+ size_t link_offset, link_end;
2599
+ size_t title_offset, title_end;
2600
+ size_t line_end;
2601
+
2602
+ /* up to 3 optional leading spaces */
2603
+ if (beg + 3 >= end) return 0;
2604
+ if (data[beg] == ' ') { i = 1;
2605
+ if (data[beg + 1] == ' ') { i = 2;
2606
+ if (data[beg + 2] == ' ') { i = 3;
2607
+ if (data[beg + 3] == ' ') return 0; } } }
2608
+ i += beg;
2609
+
2610
+ /* id part: anything but a newline between brackets */
2611
+ if (data[i] != '[') return 0;
2612
+ i++;
2613
+ id_offset = i;
2614
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2615
+ i++;
2616
+ if (i >= end || data[i] != ']') return 0;
2617
+ id_end = i;
2618
+
2619
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
2620
+ i++;
2621
+ if (i >= end || data[i] != ':') return 0;
2622
+ i++;
2623
+ while (i < end && strchr("\t ", data[i])) i++;
2624
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2625
+ i++;
2626
+ if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2627
+ while (i < end && strchr("\t ", data[i])) i++;
2628
+ if (i >= end) return 0;
2629
+
2630
+ /* link: whitespace-free sequence, optionally between angle brackets */
2631
+ if (data[i] == '<')
2632
+ i++;
2633
+
2634
+ link_offset = i;
2635
+
2636
+ while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2637
+ i++;
2638
+
2639
+ if (data[i - 1] == '>') link_end = i - 1;
2640
+ else link_end = i;
2641
+
2642
+ /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2643
+ while (i < end && strchr("\t ", data[i])) i++;
2644
+ if (i < end && data[i] != '\n' && data[i] != '\r'
2645
+ && data[i] != '\'' && data[i] != '"' && data[i] != '(')
2646
+ return 0;
2647
+ line_end = 0;
2648
+ /* computing end-of-line */
2649
+ if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2650
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2651
+ line_end = i + 1;
2652
+
2653
+ /* optional (space|tab)* spacer after a newline */
2654
+ if (line_end) {
2655
+ i = line_end + 1;
2656
+ while (i < end && strchr("\t ", data[i])) i++; }
2657
+
2658
+ /* optional title: any non-newline sequence enclosed in '"()
2659
+ alone on its line */
2660
+ title_offset = title_end = 0;
2661
+ if (i + 1 < end
2662
+ && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2663
+ i++;
2664
+ title_offset = i;
2665
+ /* looking for EOL */
2666
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2667
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2668
+ title_end = i + 1;
2669
+ else title_end = i;
2670
+ /* stepping back */
2671
+ i -= 1;
2672
+ while (i > title_offset && data[i] == ' ')
2673
+ i -= 1;
2674
+ if (i > title_offset
2675
+ && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2676
+ line_end = title_end;
2677
+ title_end = i; } }
2678
+
2679
+ if (!line_end || link_end == link_offset)
2680
+ return 0; /* garbage after the link empty link */
2681
+
2682
+ /* a valid ref has been found, filling-in return structures */
2683
+ if (last)
2684
+ *last = line_end;
2685
+
2686
+ if (refs) {
2687
+ struct link_ref *ref;
2688
+
2689
+ ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2690
+ if (!ref)
2691
+ return 0;
2692
+
2693
+ ref->link = bufnew(link_end - link_offset);
2694
+ bufput(ref->link, data + link_offset, link_end - link_offset);
2695
+
2696
+ if (title_end > title_offset) {
2697
+ ref->title = bufnew(title_end - title_offset);
2698
+ bufput(ref->title, data + title_offset, title_end - title_offset);
2699
+ }
2700
+ }
2701
+
2702
+ return 1;
2703
+ }
2704
+
2705
+ static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2706
+ {
2707
+ size_t i = 0, tab = 0;
2708
+
2709
+ while (i < size) {
2710
+ size_t org = i;
2711
+
2712
+ while (i < size && line[i] != '\t') {
2713
+ i++; tab++;
2714
+ }
2715
+
2716
+ if (i > org)
2717
+ bufput(ob, line + org, i - org);
2718
+
2719
+ if (i >= size)
2720
+ break;
2721
+
2722
+ do {
2723
+ bufputc(ob, ' '); tab++;
2724
+ } while (tab % 4);
2725
+
2726
+ i++;
2727
+ }
2728
+ }
2729
+
2730
+ /**********************
2731
+ * EXPORTED FUNCTIONS *
2732
+ **********************/
2733
+
2734
+ struct sd_markdown *
2735
+ sd_markdown_new(
2736
+ unsigned int extensions,
2737
+ size_t max_nesting,
2738
+ const struct sd_callbacks *callbacks,
2739
+ void *opaque)
2740
+ {
2741
+ struct sd_markdown *md = NULL;
2742
+
2743
+ assert(max_nesting > 0 && callbacks);
2744
+
2745
+ md = malloc(sizeof(struct sd_markdown));
2746
+ if (!md)
2747
+ return NULL;
2748
+
2749
+ memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2750
+
2751
+ redcarpet_stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2752
+ redcarpet_stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2753
+
2754
+ memset(md->active_char, 0x0, 256);
2755
+
2756
+ if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2757
+ md->active_char['*'] = MD_CHAR_EMPHASIS;
2758
+ md->active_char['_'] = MD_CHAR_EMPHASIS;
2759
+ if (extensions & MKDEXT_STRIKETHROUGH)
2760
+ md->active_char['~'] = MD_CHAR_EMPHASIS;
2761
+ if (extensions & MKDEXT_HIGHLIGHT)
2762
+ md->active_char['='] = MD_CHAR_EMPHASIS;
2763
+ }
2764
+
2765
+ if (md->cb.codespan)
2766
+ md->active_char['`'] = MD_CHAR_CODESPAN;
2767
+
2768
+ if (md->cb.linebreak)
2769
+ md->active_char['\n'] = MD_CHAR_LINEBREAK;
2770
+
2771
+ if (md->cb.image || md->cb.link)
2772
+ md->active_char['['] = MD_CHAR_LINK;
2773
+
2774
+ md->active_char['<'] = MD_CHAR_LANGLE;
2775
+ md->active_char['\\'] = MD_CHAR_ESCAPE;
2776
+ md->active_char['&'] = MD_CHAR_ENTITITY;
2777
+
2778
+ if (extensions & MKDEXT_AUTOLINK) {
2779
+ md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2780
+ md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2781
+ md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2782
+ }
2783
+
2784
+ if (extensions & MKDEXT_SUPERSCRIPT)
2785
+ md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2786
+
2787
+ if (extensions & MKDEXT_QUOTE)
2788
+ md->active_char['"'] = MD_CHAR_QUOTE;
2789
+
2790
+ /* Extension data */
2791
+ md->ext_flags = extensions;
2792
+ md->opaque = opaque;
2793
+ md->max_nesting = max_nesting;
2794
+ md->in_link_body = 0;
2795
+
2796
+ return md;
2797
+ }
2798
+
2799
+ void
2800
+ sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2801
+ {
2802
+ #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
2803
+ static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2804
+
2805
+ struct buf *text;
2806
+ size_t beg, end;
2807
+
2808
+ text = bufnew(64);
2809
+ if (!text)
2810
+ return;
2811
+
2812
+ /* Preallocate enough space for our buffer to avoid expanding while copying */
2813
+ bufgrow(text, doc_size);
2814
+
2815
+ /* reset the references table */
2816
+ memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2817
+
2818
+ int footnotes_enabled = md->ext_flags & MKDEXT_FOOTNOTES;
2819
+
2820
+ /* reset the footnotes lists */
2821
+ if (footnotes_enabled) {
2822
+ memset(&md->footnotes_found, 0x0, sizeof(md->footnotes_found));
2823
+ memset(&md->footnotes_used, 0x0, sizeof(md->footnotes_used));
2824
+ }
2825
+
2826
+ /* first pass: looking for references, copying everything else */
2827
+ beg = 0;
2828
+
2829
+ /* Skip a possible UTF-8 BOM, even though the Unicode standard
2830
+ * discourages having these in UTF-8 documents */
2831
+ if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2832
+ beg += 3;
2833
+
2834
+ while (beg < doc_size) /* iterating over lines */
2835
+ if (footnotes_enabled && is_footnote(document, beg, doc_size, &end, &md->footnotes_found))
2836
+ beg = end;
2837
+ else if (is_ref(document, beg, doc_size, &end, md->refs))
2838
+ beg = end;
2839
+ else { /* skipping to the next line */
2840
+ end = beg;
2841
+ while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2842
+ end++;
2843
+
2844
+ /* adding the line body if present */
2845
+ if (end > beg)
2846
+ expand_tabs(text, document + beg, end - beg);
2847
+
2848
+ while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2849
+ /* add one \n per newline */
2850
+ if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2851
+ bufputc(text, '\n');
2852
+ end++;
2853
+ }
2854
+
2855
+ beg = end;
2856
+ }
2857
+
2858
+ /* pre-grow the output buffer to minimize allocations */
2859
+ bufgrow(ob, MARKDOWN_GROW(text->size));
2860
+
2861
+ /* second pass: actual rendering */
2862
+ if (md->cb.doc_header)
2863
+ md->cb.doc_header(ob, md->opaque);
2864
+
2865
+ if (text->size) {
2866
+ /* adding a final newline if not already present */
2867
+ if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
2868
+ bufputc(text, '\n');
2869
+
2870
+ parse_block(ob, md, text->data, text->size);
2871
+ }
2872
+
2873
+ /* footnotes */
2874
+ if (footnotes_enabled)
2875
+ parse_footnote_list(ob, md, &md->footnotes_used);
2876
+
2877
+ if (md->cb.doc_footer)
2878
+ md->cb.doc_footer(ob, md->opaque);
2879
+
2880
+ /* clean-up */
2881
+ bufrelease(text);
2882
+ free_link_refs(md->refs);
2883
+ if (footnotes_enabled) {
2884
+ free_footnote_list(&md->footnotes_found, 1);
2885
+ free_footnote_list(&md->footnotes_used, 0);
2886
+ }
2887
+
2888
+ assert(md->work_bufs[BUFFER_SPAN].size == 0);
2889
+ assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2890
+ }
2891
+
2892
+ void
2893
+ sd_markdown_free(struct sd_markdown *md)
2894
+ {
2895
+ size_t i;
2896
+
2897
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2898
+ bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2899
+
2900
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2901
+ bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2902
+
2903
+ redcarpet_stack_free(&md->work_bufs[BUFFER_SPAN]);
2904
+ redcarpet_stack_free(&md->work_bufs[BUFFER_BLOCK]);
2905
+
2906
+ free(md);
2907
+ }