rdiscountwl 1.0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/markdown.c ADDED
@@ -0,0 +1,1361 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include "config.h"
8
+
9
+ #include <stdio.h>
10
+ #include <string.h>
11
+ #include <stdarg.h>
12
+ #include <stdlib.h>
13
+ #include <time.h>
14
+ #include <ctype.h>
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+ #include "tags.h"
20
+
21
+ typedef int (*stfu)(const void*,const void*);
22
+
23
+ typedef ANCHOR(Paragraph) ParagraphRoot;
24
+
25
+ static Paragraph *Pp(ParagraphRoot *, Line *, int);
26
+ static Paragraph *compile(Line *, int, MMIOT *);
27
+
28
+ /* case insensitive string sort for Footnote tags.
29
+ */
30
+ int
31
+ __mkd_footsort(Footnote *a, Footnote *b)
32
+ {
33
+ int i;
34
+ char ac, bc;
35
+
36
+ if ( S(a->tag) != S(b->tag) )
37
+ return S(a->tag) - S(b->tag);
38
+
39
+ for ( i=0; i < S(a->tag); i++) {
40
+ ac = tolower(T(a->tag)[i]);
41
+ bc = tolower(T(b->tag)[i]);
42
+
43
+ if ( isspace(ac) && isspace(bc) )
44
+ continue;
45
+ if ( ac != bc )
46
+ return ac - bc;
47
+ }
48
+ return 0;
49
+ }
50
+
51
+
52
+ /* find the first blank character after position <i>
53
+ */
54
+ static int
55
+ nextblank(Line *t, int i)
56
+ {
57
+ while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
58
+ ++i;
59
+ return i;
60
+ }
61
+
62
+
63
+ /* find the next nonblank character after position <i>
64
+ */
65
+ static int
66
+ nextnonblank(Line *t, int i)
67
+ {
68
+ while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
69
+ ++i;
70
+ return i;
71
+ }
72
+
73
+
74
+ /* find the first nonblank character on the Line.
75
+ */
76
+ int
77
+ mkd_firstnonblank(Line *p)
78
+ {
79
+ return nextnonblank(p,0);
80
+ }
81
+
82
+
83
+ static inline int
84
+ blankline(Line *p)
85
+ {
86
+ return ! (p && (S(p->text) > p->dle) );
87
+ }
88
+
89
+
90
+ static Line *
91
+ skipempty(Line *p)
92
+ {
93
+ while ( p && (p->dle == S(p->text)) )
94
+ p = p->next;
95
+ return p;
96
+ }
97
+
98
+
99
+ void
100
+ ___mkd_tidy(Cstring *t)
101
+ {
102
+ while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
103
+ --S(*t);
104
+ }
105
+
106
+
107
+ static struct kw comment = { "!--", 3, 0 };
108
+
109
+ static struct kw *
110
+ isopentag(Line *p)
111
+ {
112
+ int i=0, len;
113
+ char *line;
114
+
115
+ if ( !p ) return 0;
116
+
117
+ line = T(p->text);
118
+ len = S(p->text);
119
+
120
+ if ( len < 3 || line[0] != '<' )
121
+ return 0;
122
+
123
+ if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
124
+ /* comments need special case handling, because
125
+ * the !-- doesn't need to end in a whitespace
126
+ */
127
+ return &comment;
128
+
129
+ /* find how long the tag is so we can check to see if
130
+ * it's a block-level tag
131
+ */
132
+ for ( i=1; i < len && T(p->text)[i] != '>'
133
+ && T(p->text)[i] != '/'
134
+ && !isspace(T(p->text)[i]); ++i )
135
+ ;
136
+
137
+
138
+ return mkd_search_tags(T(p->text)+1, i-1);
139
+ }
140
+
141
+
142
+ typedef struct _flo {
143
+ Line *t;
144
+ int i;
145
+ } FLO;
146
+
147
+ #define floindex(x) (x.i)
148
+
149
+
150
+ static int
151
+ flogetc(FLO *f)
152
+ {
153
+ if ( f && f->t ) {
154
+ if ( f->i < S(f->t->text) )
155
+ return T(f->t->text)[f->i++];
156
+ f->t = f->t->next;
157
+ f->i = 0;
158
+ return flogetc(f);
159
+ }
160
+ return EOF;
161
+ }
162
+
163
+
164
+ static void
165
+ splitline(Line *t, int cutpoint)
166
+ {
167
+ if ( t && (cutpoint < S(t->text)) ) {
168
+ Line *tmp = calloc(1, sizeof *tmp);
169
+
170
+ tmp->next = t->next;
171
+ t->next = tmp;
172
+
173
+ tmp->dle = t->dle;
174
+ SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
175
+ S(t->text) = cutpoint;
176
+ }
177
+ }
178
+
179
+ #define UNCHECK(l) ((l)->flags &= ~CHECKED)
180
+
181
+ #define UNLESS_FENCED(t) if (fenced) { \
182
+ other = 1; l->count += (c == ' ' ? 0 : -1); \
183
+ } else { t; }
184
+
185
+ /*
186
+ * walk a line, seeing if it's any of half a dozen interesting regular
187
+ * types.
188
+ */
189
+ static void
190
+ checkline(Line *l, DWORD flags)
191
+ {
192
+ int eol, i;
193
+ int dashes = 0, spaces = 0,
194
+ equals = 0, underscores = 0,
195
+ stars = 0, tildes = 0, other = 0,
196
+ backticks = 0, fenced = 0;
197
+
198
+ l->flags |= CHECKED;
199
+ l->kind = chk_text;
200
+ l->count = 0;
201
+
202
+ if (l->dle >= 4) { l->kind=chk_code; return; }
203
+
204
+ for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
205
+ ;
206
+
207
+ for (i=l->dle; i<eol; i++) {
208
+ register int c = T(l->text)[i];
209
+ int is_fence_char = 0;
210
+
211
+ if ( c != ' ' ) l->count++;
212
+
213
+ switch (c) {
214
+ case '-': UNLESS_FENCED(dashes = 1); break;
215
+ case ' ': UNLESS_FENCED(spaces = 1); break;
216
+ case '=': equals = 1; break;
217
+ case '_': UNLESS_FENCED(underscores = 1); break;
218
+ case '*': stars = 1; break;
219
+ default:
220
+ if (flags & MKD_FENCEDCODE) {
221
+ switch (c) {
222
+ case '~': if (other) return; is_fence_char = 1; tildes = 1; break;
223
+ case '`': if (other) return; is_fence_char = 1; backticks = 1; break;
224
+ }
225
+ if (is_fence_char) {
226
+ fenced = 1;
227
+ break;
228
+ }
229
+ }
230
+ other = 1;
231
+ l->count--;
232
+ if (!fenced) return;
233
+ }
234
+ }
235
+
236
+ if ( dashes + equals + underscores + stars + tildes + backticks > 1 )
237
+ return;
238
+
239
+ if ( spaces ) {
240
+ if ( (underscores || stars || dashes) )
241
+ l->kind = chk_hr;
242
+ return;
243
+ }
244
+
245
+ if ( stars || underscores ) { l->kind = chk_hr; }
246
+ else if ( dashes ) { l->kind = chk_dash; }
247
+ else if ( equals ) { l->kind = chk_equal; }
248
+ else if ( tildes ) { l->kind = chk_tilde; }
249
+ else if ( backticks ) { l->kind = chk_backtick; }
250
+ }
251
+
252
+
253
+
254
+ /* markdown only does special handling of comments if the comment end
255
+ * is at the end of a line
256
+ */
257
+ static Line *
258
+ commentblock(Paragraph *p, int *unclosed)
259
+ {
260
+ Line *t, *ret;
261
+ char *end;
262
+
263
+ for ( t = p->text; t ; t = t->next) {
264
+ if ( end = strstr(T(t->text), "-->") ) {
265
+ if ( nextnonblank(t, 3 + (end - T(t->text))) < S(t->text) )
266
+ continue;
267
+ /*splitline(t, 3 + (end - T(t->text)) );*/
268
+ ret = t->next;
269
+ t->next = 0;
270
+ return ret;
271
+ }
272
+ }
273
+
274
+ *unclosed = 1;
275
+ return t;
276
+
277
+ }
278
+
279
+
280
+ static Line *
281
+ htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
282
+ {
283
+ Line *ret;
284
+ FLO f = { p->text, 0 };
285
+ int c;
286
+ int i, closing, depth=0;
287
+
288
+ *unclosed = 0;
289
+
290
+ if ( tag == &comment )
291
+ return commentblock(p, unclosed);
292
+
293
+ if ( tag->selfclose ) {
294
+ ret = f.t->next;
295
+ f.t->next = 0;
296
+ return ret;
297
+ }
298
+
299
+ while ( (c = flogetc(&f)) != EOF ) {
300
+ if ( c == '<' ) {
301
+ /* tag? */
302
+ c = flogetc(&f);
303
+ if ( c == '!' ) { /* comment? */
304
+ if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
305
+ /* yes */
306
+ while ( (c = flogetc(&f)) != EOF ) {
307
+ if ( c == '-' && flogetc(&f) == '-'
308
+ && flogetc(&f) == '>')
309
+ /* consumed whole comment */
310
+ break;
311
+ }
312
+ }
313
+ }
314
+ else {
315
+ if ( closing = (c == '/') ) c = flogetc(&f);
316
+
317
+ for ( i=0; i < tag->size; i++, c=flogetc(&f) ) {
318
+ if ( tag->id[i] != toupper(c) )
319
+ break;
320
+ }
321
+
322
+ if ( (i == tag->size) && !isalnum(c) ) {
323
+ depth = depth + (closing ? -1 : 1);
324
+ if ( depth == 0 ) {
325
+ while ( c != EOF && c != '>' ) {
326
+ /* consume trailing gunk in close tag */
327
+ c = flogetc(&f);
328
+ }
329
+ if ( c == EOF )
330
+ break;
331
+ if ( !f.t )
332
+ return 0;
333
+ splitline(f.t, floindex(f));
334
+ ret = f.t->next;
335
+ f.t->next = 0;
336
+ return ret;
337
+ }
338
+ }
339
+ }
340
+ }
341
+ }
342
+ *unclosed = 1;
343
+ return 0;
344
+ }
345
+
346
+
347
+ /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
348
+ */
349
+ static int
350
+ isfootnote(Line *t)
351
+ {
352
+ int i;
353
+
354
+ if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
355
+ return 0;
356
+
357
+ for ( ++i; i < S(t->text) ; ++i ) {
358
+ if ( T(t->text)[i] == '[' )
359
+ return 0;
360
+ else if ( T(t->text)[i] == ']' )
361
+ return ( T(t->text)[i+1] == ':' ) ;
362
+ }
363
+ return 0;
364
+ }
365
+
366
+
367
+ static inline int
368
+ isquote(Line *t)
369
+ {
370
+ return (t->dle < 4 && T(t->text)[t->dle] == '>');
371
+ }
372
+
373
+
374
+ static inline int
375
+ iscode(Line *t)
376
+ {
377
+ return (t->dle >= 4);
378
+ }
379
+
380
+
381
+ static inline int
382
+ ishr(Line *t, DWORD flags)
383
+ {
384
+ if ( ! (t->flags & CHECKED) )
385
+ checkline(t, flags);
386
+
387
+ if ( t->count > 2 )
388
+ return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
389
+ return 0;
390
+ }
391
+
392
+
393
+ static int
394
+ issetext(Line *t, int *htyp, DWORD flags)
395
+ {
396
+ Line *n;
397
+
398
+ /* check for setext-style HEADER
399
+ * ======
400
+ */
401
+
402
+ if ( (n = t->next) ) {
403
+ if ( !(n->flags & CHECKED) )
404
+ checkline(n, flags);
405
+
406
+ if ( n->kind == chk_dash || n->kind == chk_equal ) {
407
+ *htyp = SETEXT;
408
+ return 1;
409
+ }
410
+ }
411
+ return 0;
412
+ }
413
+
414
+
415
+ static int
416
+ ishdr(Line *t, int *htyp, DWORD flags)
417
+ {
418
+ /* ANY leading `#`'s make this into an ETX header
419
+ */
420
+ if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
421
+ *htyp = ETX;
422
+ return 1;
423
+ }
424
+
425
+ /* And if not, maybe it's a SETEXT header instead
426
+ */
427
+ return issetext(t, htyp, flags);
428
+ }
429
+
430
+
431
+ static inline int
432
+ end_of_block(Line *t, DWORD flags)
433
+ {
434
+ int dummy;
435
+
436
+ if ( !t )
437
+ return 0;
438
+
439
+ return ( (S(t->text) <= t->dle) || ishr(t, flags) || ishdr(t, &dummy, flags) );
440
+ }
441
+
442
+
443
+ static Line*
444
+ is_discount_dt(Line *t, int *clip, DWORD flags)
445
+ {
446
+ if ( !(flags & MKD_NODLDISCOUNT)
447
+ && t
448
+ && t->next
449
+ && (S(t->text) > 2)
450
+ && (t->dle == 0)
451
+ && (T(t->text)[0] == '=')
452
+ && (T(t->text)[S(t->text)-1] == '=') ) {
453
+ if ( t->next->dle >= 4 ) {
454
+ *clip = 4;
455
+ return t;
456
+ }
457
+ else
458
+ return is_discount_dt(t->next, clip, flags);
459
+ }
460
+ return 0;
461
+ }
462
+
463
+
464
+ static int
465
+ is_extra_dd(Line *t)
466
+ {
467
+ return (t->dle < 4) && (T(t->text)[t->dle] == ':')
468
+ && isspace(T(t->text)[t->dle+1]);
469
+ }
470
+
471
+
472
+ static Line*
473
+ is_extra_dt(Line *t, int *clip, DWORD flags)
474
+ {
475
+ if ( flags & MKD_DLEXTRA
476
+ && t
477
+ && t->next && S(t->text) && T(t->text)[0] != '='
478
+ && T(t->text)[S(t->text)-1] != '=') {
479
+ Line *x;
480
+
481
+ if ( iscode(t) || end_of_block(t, flags) )
482
+ return 0;
483
+
484
+ if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
485
+ *clip = x->dle+2;
486
+ return t;
487
+ }
488
+
489
+ if ( x=is_extra_dt(t->next, clip, flags) )
490
+ return x;
491
+ }
492
+ return 0;
493
+ }
494
+
495
+
496
+ static Line*
497
+ isdefinition(Line *t, int *clip, int *kind, DWORD flags)
498
+ {
499
+ Line *ret;
500
+
501
+ *kind = 1;
502
+ if ( ret = is_discount_dt(t,clip,flags) )
503
+ return ret;
504
+
505
+ *kind=2;
506
+ return is_extra_dt(t,clip,flags);
507
+ }
508
+
509
+
510
+ static int
511
+ islist(Line *t, int *clip, DWORD flags, int *list_type)
512
+ {
513
+ int i, j;
514
+ char *q;
515
+
516
+ if ( end_of_block(t, flags) )
517
+ return 0;
518
+
519
+ if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type,flags) )
520
+ return DL;
521
+
522
+ if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
523
+ i = nextnonblank(t, t->dle+1);
524
+ *clip = (i > 4) ? 4 : i;
525
+ *list_type = UL;
526
+ return AL;
527
+ }
528
+
529
+ if ( (j = nextblank(t,t->dle)) > t->dle ) {
530
+ if ( T(t->text)[j-1] == '.' ) {
531
+
532
+ if ( !(flags & (MKD_NOALPHALIST|MKD_STRICT))
533
+ && (j == t->dle + 2)
534
+ && isalpha(T(t->text)[t->dle]) ) {
535
+ j = nextnonblank(t,j);
536
+ *clip = (j > 4) ? 4 : j;
537
+ *list_type = AL;
538
+ return AL;
539
+ }
540
+
541
+ strtoul(T(t->text)+t->dle, &q, 10);
542
+ if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
543
+ j = nextnonblank(t,j);
544
+ *clip = j;
545
+ *list_type = OL;
546
+ return AL;
547
+ }
548
+ }
549
+ }
550
+ return 0;
551
+ }
552
+
553
+
554
+ static Line *
555
+ headerblock(Paragraph *pp, int htyp)
556
+ {
557
+ Line *ret = 0;
558
+ Line *p = pp->text;
559
+ int i, j;
560
+
561
+ switch (htyp) {
562
+ case SETEXT:
563
+ /* p->text is header, p->next->text is -'s or ='s
564
+ */
565
+ pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
566
+
567
+ ret = p->next->next;
568
+ ___mkd_freeLine(p->next);
569
+ p->next = 0;
570
+ break;
571
+
572
+ case ETX:
573
+ /* p->text is ###header###, so we need to trim off
574
+ * the leading and trailing `#`'s
575
+ */
576
+
577
+ for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
578
+ && (i < 6); i++)
579
+ ;
580
+
581
+ pp->hnumber = i;
582
+
583
+ while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
584
+ ++i;
585
+
586
+ CLIP(p->text, 0, i);
587
+ UNCHECK(p);
588
+
589
+ for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
590
+ ;
591
+
592
+ while ( j && isspace(T(p->text)[j-1]) )
593
+ --j;
594
+
595
+ S(p->text) = j;
596
+
597
+ ret = p->next;
598
+ p->next = 0;
599
+ break;
600
+ }
601
+ return ret;
602
+ }
603
+
604
+
605
+ static Line *
606
+ codeblock(Paragraph *p)
607
+ {
608
+ Line *t = p->text, *r;
609
+
610
+ for ( ; t; t = r ) {
611
+ CLIP(t->text,0,4);
612
+ t->dle = mkd_firstnonblank(t);
613
+
614
+ if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
615
+ ___mkd_freeLineRange(t,r);
616
+ t->next = 0;
617
+ return r;
618
+ }
619
+ }
620
+ return t;
621
+ }
622
+
623
+
624
+ static int
625
+ iscodefence(Line *r, int size, line_type kind, DWORD flags)
626
+ {
627
+ if ( !(flags & MKD_FENCEDCODE) )
628
+ return 0;
629
+
630
+ if ( !(r->flags & CHECKED) )
631
+ checkline(r, flags);
632
+
633
+ if ( kind )
634
+ return (r->kind == kind) && (r->count >= size);
635
+ else
636
+ return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size);
637
+ }
638
+
639
+ static Paragraph *
640
+ fencedcodeblock(ParagraphRoot *d, Line **ptr, DWORD flags)
641
+ {
642
+ Line *first, *r;
643
+ Paragraph *ret;
644
+
645
+ first = (*ptr);
646
+
647
+ /* don't allow zero-length code fences
648
+ */
649
+ if ( (first->next == 0) || iscodefence(first->next, first->count, 0, flags) )
650
+ return 0;
651
+
652
+ /* find the closing fence, discard the fences,
653
+ * return a Paragraph with the contents
654
+ */
655
+ for ( r = first; r && r->next; r = r->next )
656
+ if ( iscodefence(r->next, first->count, first->kind, flags) ) {
657
+ (*ptr) = r->next->next;
658
+ ret = Pp(d, first->next, CODE);
659
+ if (S(first->text) - first->count > 0) {
660
+ char *lang_attr = T(first->text) + first->count;
661
+ while ( *lang_attr != 0 && *lang_attr == ' ' ) lang_attr++;
662
+ ret->lang = strdup(lang_attr);
663
+ }
664
+ else {
665
+ ret->lang = 0;
666
+ }
667
+ ___mkd_freeLine(first);
668
+ ___mkd_freeLine(r->next);
669
+ r->next = 0;
670
+ return ret;
671
+ }
672
+ return 0;
673
+ }
674
+
675
+
676
+ static int
677
+ centered(Line *first, Line *last)
678
+ {
679
+
680
+ if ( first&&last ) {
681
+ int len = S(last->text);
682
+
683
+ if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
684
+ && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
685
+ CLIP(first->text, 0, 2);
686
+ S(last->text) -= 2;
687
+ return CENTER;
688
+ }
689
+ }
690
+ return 0;
691
+ }
692
+
693
+
694
+ static int
695
+ endoftextblock(Line *t, int toplevelblock, DWORD flags)
696
+ {
697
+ int z;
698
+
699
+ if ( end_of_block(t, flags) || isquote(t) )
700
+ return 1;
701
+
702
+ /* HORRIBLE STANDARDS KLUDGES:
703
+ * 1. non-toplevel paragraphs absorb adjacent code blocks
704
+ * 2. Toplevel paragraphs eat absorb adjacent list items,
705
+ * but sublevel blocks behave properly.
706
+ * (What this means is that we only need to check for code
707
+ * blocks at toplevel, and only check for list items at
708
+ * nested levels.)
709
+ */
710
+ return toplevelblock ? 0 : islist(t,&z,flags,&z);
711
+ }
712
+
713
+
714
+ static Line *
715
+ textblock(Paragraph *p, int toplevel, DWORD flags)
716
+ {
717
+ Line *t, *next;
718
+
719
+ for ( t = p->text; t ; t = next ) {
720
+ if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
721
+ p->align = centered(p->text, t);
722
+ t->next = 0;
723
+ return next;
724
+ }
725
+ }
726
+ return t;
727
+ }
728
+
729
+
730
+ /* length of the id: or class: kind in a special div-not-quote block
731
+ */
732
+ static int
733
+ szmarkerclass(char *p)
734
+ {
735
+ if ( strncasecmp(p, "id:", 3) == 0 )
736
+ return 3;
737
+ if ( strncasecmp(p, "class:", 6) == 0 )
738
+ return 6;
739
+ return 0;
740
+ }
741
+
742
+
743
+ /*
744
+ * check if the first line of a quoted block is the special div-not-quote
745
+ * marker %[kind:]name%
746
+ */
747
+ #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') )
748
+
749
+ static int
750
+ isdivmarker(Line *p, int start, DWORD flags)
751
+ {
752
+ char *s;
753
+ int last, i;
754
+
755
+ if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) )
756
+ return 0;
757
+
758
+ start = nextnonblank(p, start);
759
+ last= S(p->text) - (1 + start);
760
+ s = T(p->text) + start;
761
+
762
+ if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
763
+ return 0;
764
+
765
+ i = szmarkerclass(s+1);
766
+
767
+ if ( !iscsschar(s[i+1]) )
768
+ return 0;
769
+ while ( ++i < last )
770
+ if ( !(isdigit(s[i]) || iscsschar(s[i])) )
771
+ return 0;
772
+
773
+ return 1;
774
+ }
775
+
776
+
777
+ /*
778
+ * accumulate a blockquote.
779
+ *
780
+ * one sick horrible thing about blockquotes is that even though
781
+ * it just takes ^> to start a quote, following lines, if quoted,
782
+ * assume that the prefix is ``> ''. This means that code needs
783
+ * to be indented *5* spaces from the leading '>', but *4* spaces
784
+ * from the start of the line. This does not appear to be
785
+ * documented in the reference implementation, but it's the
786
+ * way the markdown sample web form at Daring Fireball works.
787
+ */
788
+ static Line *
789
+ quoteblock(Paragraph *p, DWORD flags)
790
+ {
791
+ Line *t, *q;
792
+ int qp;
793
+
794
+ for ( t = p->text; t ; t = q ) {
795
+ if ( isquote(t) ) {
796
+ /* clip leading spaces */
797
+ for (qp = 0; T(t->text)[qp] != '>'; qp ++)
798
+ /* assert: the first nonblank character on this line
799
+ * will be a >
800
+ */;
801
+ /* clip '>' */
802
+ qp++;
803
+ /* clip next space, if any */
804
+ if ( T(t->text)[qp] == ' ' )
805
+ qp++;
806
+ CLIP(t->text, 0, qp);
807
+ UNCHECK(t);
808
+ t->dle = mkd_firstnonblank(t);
809
+ }
810
+
811
+ q = skipempty(t->next);
812
+
813
+ if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
814
+ ___mkd_freeLineRange(t, q);
815
+ t = q;
816
+ break;
817
+ }
818
+ }
819
+ if ( isdivmarker(p->text,0,flags) ) {
820
+ char *prefix = "class";
821
+ int i;
822
+
823
+ q = p->text;
824
+ p->text = p->text->next;
825
+
826
+ if ( (i = szmarkerclass(1+T(q->text))) == 3 )
827
+ /* and this would be an "%id:" prefix */
828
+ prefix="id";
829
+
830
+ if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
831
+ sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
832
+ T(q->text)+(i+1) );
833
+
834
+ ___mkd_freeLine(q);
835
+ }
836
+ return t;
837
+ }
838
+
839
+
840
+ typedef int (*linefn)(Line *);
841
+
842
+
843
+ /*
844
+ * pull in a list block. A list block starts with a list marker and
845
+ * runs until the next list marker, the next non-indented paragraph,
846
+ * or EOF. You do not have to indent nonblank lines after the list
847
+ * marker, but multiple paragraphs need to start with a 4-space indent.
848
+ */
849
+ static Line *
850
+ listitem(Paragraph *p, int indent, DWORD flags, linefn check)
851
+ {
852
+ Line *t, *q;
853
+ int clip = indent;
854
+ int z;
855
+
856
+ for ( t = p->text; t ; t = q) {
857
+ CLIP(t->text, 0, clip);
858
+ UNCHECK(t);
859
+ t->dle = mkd_firstnonblank(t);
860
+
861
+ /* even though we had to trim a long leader off this item,
862
+ * the indent for trailing paragraphs is still 4...
863
+ */
864
+ if (indent > 4) {
865
+ indent = 4;
866
+ }
867
+ if ( (q = skipempty(t->next)) == 0 ) {
868
+ ___mkd_freeLineRange(t,q);
869
+ return 0;
870
+ }
871
+
872
+ /* after a blank line, the next block needs to start with a line
873
+ * that's indented 4(? -- reference implementation allows a 1
874
+ * character indent, but that has unfortunate side effects here)
875
+ * spaces, but after that the line doesn't need any indentation
876
+ */
877
+ if ( q != t->next ) {
878
+ if (q->dle < indent) {
879
+ q = t->next;
880
+ t->next = 0;
881
+ return q;
882
+ }
883
+ /* indent at least 2, and at most as
884
+ * as far as the initial line was indented. */
885
+ indent = clip ? clip : 2;
886
+ }
887
+
888
+ if ( (q->dle < indent) && (ishr(q,flags) || islist(q,&z,flags,&z)
889
+ || (check && (*check)(q)))
890
+ && !issetext(q,&z,flags) ) {
891
+ q = t->next;
892
+ t->next = 0;
893
+ return q;
894
+ }
895
+
896
+ clip = (q->dle > indent) ? indent : q->dle;
897
+ }
898
+ return t;
899
+ }
900
+
901
+
902
+ static Line *
903
+ definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
904
+ {
905
+ ParagraphRoot d = { 0, 0 };
906
+ Paragraph *p;
907
+ Line *q = top->text, *text = 0, *labels;
908
+ int z, para;
909
+
910
+ while (( labels = q )) {
911
+
912
+ if ( (q = isdefinition(labels, &z, &kind, f->flags)) == 0 )
913
+ break;
914
+
915
+ if ( (text = skipempty(q->next)) == 0 )
916
+ break;
917
+
918
+ if ( para = (text != q->next) )
919
+ ___mkd_freeLineRange(q, text);
920
+
921
+ q->next = 0;
922
+ if ( kind == 1 /* discount dl */ )
923
+ for ( q = labels; q; q = q->next ) {
924
+ CLIP(q->text, 0, 1);
925
+ UNCHECK(q);
926
+ S(q->text)--;
927
+ }
928
+
929
+ dd_block:
930
+ p = Pp(&d, text, LISTITEM);
931
+
932
+ text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0);
933
+ p->down = compile(p->text, 0, f);
934
+ p->text = labels; labels = 0;
935
+
936
+ if ( para && p->down ) p->down->align = PARA;
937
+
938
+ if ( (q = skipempty(text)) == 0 )
939
+ break;
940
+
941
+ if ( para = (q != text) ) {
942
+ Line anchor;
943
+
944
+ anchor.next = text;
945
+ ___mkd_freeLineRange(&anchor,q);
946
+ text = q;
947
+
948
+ }
949
+
950
+ if ( kind == 2 && is_extra_dd(q) )
951
+ goto dd_block;
952
+ }
953
+ top->text = 0;
954
+ top->down = T(d);
955
+ return text;
956
+ }
957
+
958
+
959
+ static Line *
960
+ enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
961
+ {
962
+ ParagraphRoot d = { 0, 0 };
963
+ Paragraph *p;
964
+ Line *q = top->text, *text;
965
+ int para = 0, z;
966
+
967
+ while (( text = q )) {
968
+
969
+ p = Pp(&d, text, LISTITEM);
970
+ text = listitem(p, clip, f->flags, 0);
971
+
972
+ p->down = compile(p->text, 0, f);
973
+ p->text = 0;
974
+
975
+ if ( para && p->down ) p->down->align = PARA;
976
+
977
+ if ( (q = skipempty(text)) == 0
978
+ || islist(q, &clip, f->flags, &z) != list_class )
979
+ break;
980
+
981
+ if ( para = (q != text) ) {
982
+ Line anchor;
983
+
984
+ anchor.next = text;
985
+ ___mkd_freeLineRange(&anchor, q);
986
+
987
+ if ( p->down ) p->down->align = PARA;
988
+ }
989
+ }
990
+ top->text = 0;
991
+ top->down = T(d);
992
+ return text;
993
+ }
994
+
995
+
996
+ static int
997
+ tgood(char c)
998
+ {
999
+ switch (c) {
1000
+ case '\'':
1001
+ case '"': return c;
1002
+ case '(': return ')';
1003
+ }
1004
+ return 0;
1005
+ }
1006
+
1007
+
1008
+ /*
1009
+ * add a new (image or link) footnote to the footnote table
1010
+ */
1011
+ static Line*
1012
+ addfootnote(Line *p, MMIOT* f)
1013
+ {
1014
+ int j, i;
1015
+ int c;
1016
+ Line *np = p->next;
1017
+
1018
+ Footnote *foot = &EXPAND(f->footnotes->note);
1019
+
1020
+ CREATE(foot->tag);
1021
+ CREATE(foot->link);
1022
+ CREATE(foot->title);
1023
+ foot->flags = foot->height = foot->width = 0;
1024
+
1025
+ for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
1026
+ EXPAND(foot->tag) = T(p->text)[j];
1027
+
1028
+ EXPAND(foot->tag) = 0;
1029
+ S(foot->tag)--;
1030
+ j = nextnonblank(p, j+2);
1031
+
1032
+ if ( (f->flags & MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) {
1033
+ /* need to consume all lines until non-indented block? */
1034
+ while ( j < S(p->text) )
1035
+ EXPAND(foot->title) = T(p->text)[j++];
1036
+ goto skip_to_end;
1037
+ }
1038
+
1039
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
1040
+ EXPAND(foot->link) = T(p->text)[j++];
1041
+ EXPAND(foot->link) = 0;
1042
+ S(foot->link)--;
1043
+ j = nextnonblank(p,j);
1044
+
1045
+ if ( T(p->text)[j] == '=' ) {
1046
+ sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
1047
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
1048
+ ++j;
1049
+ j = nextnonblank(p,j);
1050
+ }
1051
+
1052
+
1053
+ if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
1054
+ ___mkd_freeLine(p);
1055
+ p = np;
1056
+ np = p->next;
1057
+ j = p->dle;
1058
+ }
1059
+
1060
+ if ( (c = tgood(T(p->text)[j])) ) {
1061
+ /* Try to take the rest of the line as a comment; read to
1062
+ * EOL, then shrink the string back to before the final
1063
+ * quote.
1064
+ */
1065
+ ++j; /* skip leading quote */
1066
+
1067
+ while ( j < S(p->text) )
1068
+ EXPAND(foot->title) = T(p->text)[j++];
1069
+
1070
+ while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
1071
+ --S(foot->title);
1072
+ if ( S(foot->title) ) /* skip trailing quote */
1073
+ --S(foot->title);
1074
+ EXPAND(foot->title) = 0;
1075
+ --S(foot->title);
1076
+ }
1077
+
1078
+ skip_to_end:
1079
+ ___mkd_freeLine(p);
1080
+ return np;
1081
+ }
1082
+
1083
+
1084
+ /*
1085
+ * allocate a paragraph header, link it to the
1086
+ * tail of the current document
1087
+ */
1088
+ static Paragraph *
1089
+ Pp(ParagraphRoot *d, Line *ptr, int typ)
1090
+ {
1091
+ Paragraph *ret = calloc(sizeof *ret, 1);
1092
+
1093
+ ret->text = ptr;
1094
+ ret->typ = typ;
1095
+
1096
+ return ATTACH(*d, ret);
1097
+ }
1098
+
1099
+
1100
+
1101
+ static Line*
1102
+ consume(Line *ptr, int *eaten)
1103
+ {
1104
+ Line *next;
1105
+ int blanks=0;
1106
+
1107
+ for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
1108
+ next = ptr->next;
1109
+ ___mkd_freeLine(ptr);
1110
+ }
1111
+ if ( ptr ) *eaten = blanks;
1112
+ return ptr;
1113
+ }
1114
+
1115
+
1116
+ /*
1117
+ * top-level compilation; break the document into
1118
+ * style, html, and source blocks with footnote links
1119
+ * weeded out.
1120
+ */
1121
+ static Paragraph *
1122
+ compile_document(Line *ptr, MMIOT *f)
1123
+ {
1124
+ ParagraphRoot d = { 0, 0 };
1125
+ ANCHOR(Line) source = { 0, 0 };
1126
+ Paragraph *p = 0;
1127
+ struct kw *tag;
1128
+ int eaten, unclosed;
1129
+
1130
+ while ( ptr ) {
1131
+ if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) {
1132
+ int blocktype;
1133
+ /* If we encounter a html/style block, compile and save all
1134
+ * of the cached source BEFORE processing the html/style.
1135
+ */
1136
+ if ( T(source) ) {
1137
+ E(source)->next = 0;
1138
+ p = Pp(&d, 0, SOURCE);
1139
+ p->down = compile(T(source), 1, f);
1140
+ T(source) = E(source) = 0;
1141
+ }
1142
+
1143
+ if ( f->flags & MKD_NOSTYLE )
1144
+ blocktype = HTML;
1145
+ else
1146
+ blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML;
1147
+ p = Pp(&d, ptr, blocktype);
1148
+ ptr = htmlblock(p, tag, &unclosed);
1149
+ if ( unclosed ) {
1150
+ p->typ = SOURCE;
1151
+ p->down = compile(p->text, 1, f);
1152
+ p->text = 0;
1153
+ }
1154
+ }
1155
+ else if ( isfootnote(ptr) ) {
1156
+ /* footnotes, like cats, sleep anywhere; pull them
1157
+ * out of the input stream and file them away for
1158
+ * later processing
1159
+ */
1160
+ ptr = consume(addfootnote(ptr, f), &eaten);
1161
+ }
1162
+ else {
1163
+ /* source; cache it up to wait for eof or the
1164
+ * next html/style block
1165
+ */
1166
+ ATTACH(source,ptr);
1167
+ ptr = ptr->next;
1168
+ }
1169
+ }
1170
+ if ( T(source) ) {
1171
+ /* if there's any cached source at EOF, compile
1172
+ * it now.
1173
+ */
1174
+ E(source)->next = 0;
1175
+ p = Pp(&d, 0, SOURCE);
1176
+ p->down = compile(T(source), 1, f);
1177
+ }
1178
+ return T(d);
1179
+ }
1180
+
1181
+
1182
+ static int
1183
+ first_nonblank_before(Line *j, int dle)
1184
+ {
1185
+ return (j->dle < dle) ? j->dle : dle;
1186
+ }
1187
+
1188
+
1189
+ static int
1190
+ actually_a_table(MMIOT *f, Line *pp)
1191
+ {
1192
+ Line *r;
1193
+ int j;
1194
+ int c;
1195
+
1196
+ /* tables need to be turned on */
1197
+ if ( f->flags & (MKD_STRICT|MKD_NOTABLES) )
1198
+ return 0;
1199
+
1200
+ /* tables need three lines */
1201
+ if ( !(pp && pp->next && pp->next->next) ) {
1202
+ return 0;
1203
+ }
1204
+
1205
+ /* all lines must contain |'s */
1206
+ for (r = pp; r; r = r->next )
1207
+ if ( !(r->flags & PIPECHAR) ) {
1208
+ return 0;
1209
+ }
1210
+
1211
+ /* if the header has a leading |, all lines must have leading |'s */
1212
+ if ( T(pp->text)[pp->dle] == '|' ) {
1213
+ for ( r = pp; r; r = r->next )
1214
+ if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
1215
+ return 0;
1216
+ }
1217
+ }
1218
+
1219
+ /* second line must be only whitespace, -, |, or : */
1220
+ r = pp->next;
1221
+
1222
+ for ( j=r->dle; j < S(r->text); ++j ) {
1223
+ c = T(r->text)[j];
1224
+
1225
+ if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
1226
+ return 0;
1227
+ }
1228
+ }
1229
+
1230
+ return 1;
1231
+ }
1232
+
1233
+
1234
+ /*
1235
+ * break a collection of markdown input into
1236
+ * blocks of lists, code, html, and text to
1237
+ * be marked up.
1238
+ */
1239
+ static Paragraph *
1240
+ compile(Line *ptr, int toplevel, MMIOT *f)
1241
+ {
1242
+ ParagraphRoot d = { 0, 0 };
1243
+ Paragraph *p = 0;
1244
+ Line *r;
1245
+ int para = toplevel;
1246
+ int blocks = 0;
1247
+ int hdr_type, list_type, list_class, indent;
1248
+
1249
+ ptr = consume(ptr, &para);
1250
+
1251
+ while ( ptr ) {
1252
+ if ( iscode(ptr) ) {
1253
+ p = Pp(&d, ptr, CODE);
1254
+
1255
+ if ( f->flags & MKD_1_COMPAT) {
1256
+ /* HORRIBLE STANDARDS KLUDGE: the first line of every block
1257
+ * has trailing whitespace trimmed off.
1258
+ */
1259
+ ___mkd_tidy(&p->text->text);
1260
+ }
1261
+
1262
+ ptr = codeblock(p);
1263
+ }
1264
+ else if ( iscodefence(ptr,3,0,f->flags) && (p=fencedcodeblock(&d, &ptr, f->flags)) )
1265
+ /* yay, it's already done */ ;
1266
+ else if ( ishr(ptr, f->flags) ) {
1267
+ p = Pp(&d, 0, HR);
1268
+ r = ptr;
1269
+ ptr = ptr->next;
1270
+ ___mkd_freeLine(r);
1271
+ }
1272
+ else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) {
1273
+ if ( list_class == DL ) {
1274
+ p = Pp(&d, ptr, DL);
1275
+ ptr = definition_block(p, indent, f, list_type);
1276
+ }
1277
+ else {
1278
+ p = Pp(&d, ptr, list_type);
1279
+ ptr = enumerated_block(p, indent, f, list_class);
1280
+ }
1281
+ }
1282
+ else if ( isquote(ptr) ) {
1283
+ p = Pp(&d, ptr, QUOTE);
1284
+ ptr = quoteblock(p, f->flags);
1285
+ p->down = compile(p->text, 1, f);
1286
+ p->text = 0;
1287
+ }
1288
+ else if ( ishdr(ptr, &hdr_type, f->flags) ) {
1289
+ p = Pp(&d, ptr, HDR);
1290
+ ptr = headerblock(p, hdr_type);
1291
+ }
1292
+ else {
1293
+ p = Pp(&d, ptr, MARKUP);
1294
+ ptr = textblock(p, toplevel, f->flags);
1295
+ /* tables are a special kind of paragraph */
1296
+ if ( actually_a_table(f, p->text) )
1297
+ p->typ = TABLE;
1298
+ }
1299
+
1300
+ if ( (para||toplevel) && !p->align )
1301
+ p->align = PARA;
1302
+
1303
+ blocks++;
1304
+ para = toplevel || (blocks > 1);
1305
+ ptr = consume(ptr, &para);
1306
+
1307
+ if ( para && !p->align )
1308
+ p->align = PARA;
1309
+
1310
+ }
1311
+ return T(d);
1312
+ }
1313
+
1314
+
1315
+ /*
1316
+ * the guts of the markdown() function, ripped out so I can do
1317
+ * debugging.
1318
+ */
1319
+
1320
+ /*
1321
+ * prepare and compile `text`, returning a Paragraph tree.
1322
+ */
1323
+ int
1324
+ mkd_compile(Document *doc, DWORD flags)
1325
+ {
1326
+ if ( !doc )
1327
+ return 0;
1328
+
1329
+ flags &= USER_FLAGS;
1330
+
1331
+ if ( doc->compiled ) {
1332
+ if ( doc->ctx->flags == flags )
1333
+ return 1;
1334
+ else {
1335
+ if ( doc->code)
1336
+ ___mkd_freeParagraph(doc->code);
1337
+ if ( doc->ctx->footnotes )
1338
+ ___mkd_freefootnotes(doc->ctx);
1339
+ }
1340
+ }
1341
+
1342
+ doc->compiled = 1;
1343
+ memset(doc->ctx, 0, sizeof(MMIOT) );
1344
+ doc->ctx->ref_prefix= doc->ref_prefix;
1345
+ doc->ctx->cb = &(doc->cb);
1346
+ doc->ctx->flags = flags;
1347
+ CREATE(doc->ctx->in);
1348
+ doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
1349
+ doc->ctx->footnotes->reference = 0;
1350
+ CREATE(doc->ctx->footnotes->note);
1351
+
1352
+ mkd_initialize();
1353
+
1354
+ doc->code = compile_document(T(doc->content), doc->ctx);
1355
+ qsort(T(doc->ctx->footnotes->note), S(doc->ctx->footnotes->note),
1356
+ sizeof T(doc->ctx->footnotes->note)[0],
1357
+ (stfu)__mkd_footsort);
1358
+ memset(&doc->content, 0, sizeof doc->content);
1359
+ return 1;
1360
+ }
1361
+