chaptastic-rdiscount 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/markdown.c ADDED
@@ -0,0 +1,969 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include <stdio.h>
8
+ #include <string.h>
9
+ #include <stdarg.h>
10
+ #include <stdlib.h>
11
+ #include <time.h>
12
+ #include <ctype.h>
13
+
14
+ #include "config.h"
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+
20
+ /* block-level tags for passing html blocks through the blender
21
+ */
22
+ struct kw {
23
+ char *id;
24
+ int size;
25
+ int selfclose;
26
+ } ;
27
+
28
+ #define KW(x) { x, sizeof(x)-1, 0 }
29
+ #define SC(x) { x, sizeof(x)-1, 1 }
30
+
31
+ static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
32
+ KW("ADDRESS"), KW("BDO"), KW("BLOCKQUOTE"),
33
+ KW("CENTER"), KW("DFN"), KW("DIV"), KW("H1"),
34
+ KW("H2"), KW("H3"), KW("H4"), KW("H5"),
35
+ KW("H6"), KW("LISTING"), KW("NOBR"),
36
+ KW("UL"), KW("P"), KW("OL"), KW("DL"),
37
+ KW("PLAINTEXT"), KW("PRE"), KW("TABLE"),
38
+ KW("WBR"), KW("XMP"), SC("HR"), SC("BR") };
39
+ #define SZTAGS (sizeof blocktags / sizeof blocktags[0])
40
+ #define MAXTAG 11 /* sizeof "BLOCKQUOTE" */
41
+
42
+ typedef int (*stfu)(const void*,const void*);
43
+
44
+ typedef ANCHOR(Paragraph) ParagraphRoot;
45
+
46
+
47
+ /* case insensitive string sort (for qsort() and bsearch() of block tags)
48
+ */
49
+ static int
50
+ casort(struct kw *a, struct kw *b)
51
+ {
52
+ if ( a->size != b->size )
53
+ return a->size - b->size;
54
+ return strncasecmp(a->id, b->id, b->size);
55
+ }
56
+
57
+
58
+ /* case insensitive string sort for Footnote tags.
59
+ */
60
+ int
61
+ __mkd_footsort(Footnote *a, Footnote *b)
62
+ {
63
+ int i;
64
+ char ac, bc;
65
+
66
+ if ( S(a->tag) != S(b->tag) )
67
+ return S(a->tag) - S(b->tag);
68
+
69
+ for ( i=0; i < S(a->tag); i++) {
70
+ ac = tolower(T(a->tag)[i]);
71
+ bc = tolower(T(b->tag)[i]);
72
+
73
+ if ( isspace(ac) && isspace(bc) )
74
+ continue;
75
+ if ( ac != bc )
76
+ return ac - bc;
77
+ }
78
+ return 0;
79
+ }
80
+
81
+
82
+ /* find the first blank character after position <i>
83
+ */
84
+ static int
85
+ nextblank(Line *t, int i)
86
+ {
87
+ while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
88
+ ++i;
89
+ return i;
90
+ }
91
+
92
+
93
+ /* find the next nonblank character after position <i>
94
+ */
95
+ static int
96
+ nextnonblank(Line *t, int i)
97
+ {
98
+ while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
99
+ ++i;
100
+ return i;
101
+ }
102
+
103
+
104
+ /* find the first nonblank character on the Line.
105
+ */
106
+ int
107
+ mkd_firstnonblank(Line *p)
108
+ {
109
+ return nextnonblank(p,0);
110
+ }
111
+
112
+
113
+ static int
114
+ blankline(Line *p)
115
+ {
116
+ return ! (p && (S(p->text) > p->dle) );
117
+ }
118
+
119
+
120
+ static Line *
121
+ skipempty(Line *p)
122
+ {
123
+ while ( p && (p->dle == S(p->text)) )
124
+ p = p->next;
125
+ return p;
126
+ }
127
+
128
+
129
+ void
130
+ ___mkd_tidy(Cstring *t)
131
+ {
132
+ while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
133
+ --S(*t);
134
+ }
135
+
136
+
137
+ static struct kw *
138
+ isopentag(Line *p)
139
+ {
140
+ int i=0, len;
141
+ struct kw key, *ret;
142
+
143
+ if ( !p ) return 0;
144
+
145
+ len = S(p->text);
146
+
147
+ if ( len < 3 || T(p->text)[0] != '<' )
148
+ return 0;
149
+
150
+ /* find how long the tag is so we can check to see if
151
+ * it's a block-level tag
152
+ */
153
+ for ( i=1; i < len && T(p->text)[i] != '>'
154
+ && T(p->text)[i] != '/'
155
+ && !isspace(T(p->text)[i]); ++i )
156
+ ;
157
+
158
+ key.id = T(p->text)+1;
159
+ key.size = i-1;
160
+
161
+ if ( ret = bsearch(&key, blocktags, SZTAGS, sizeof key, (stfu)casort))
162
+ return ret;
163
+
164
+ return 0;
165
+ }
166
+
167
+
168
+ typedef struct _flo {
169
+ Line *t;
170
+ int i;
171
+ } FLO;
172
+
173
+
174
+ static int
175
+ flogetc(FLO *f)
176
+ {
177
+ if ( f && f->t ) {
178
+ if ( f->i < S(f->t->text) )
179
+ return T(f->t->text)[f->i++];
180
+ f->t = f->t->next;
181
+ f->i = 0;
182
+ return flogetc(f);
183
+ }
184
+ return EOF;
185
+ }
186
+
187
+
188
+ static Line *
189
+ htmlblock(Paragraph *p, struct kw *tag)
190
+ {
191
+ Line *ret;
192
+ FLO f = { p->text, 0 };
193
+ int c;
194
+ int i, closing, depth=0;
195
+
196
+ if ( tag->selfclose || (tag->size >= MAXTAG) ) {
197
+ ret = f.t->next;
198
+ f.t->next = 0;
199
+ return ret;
200
+ }
201
+
202
+ while ( (c = flogetc(&f)) != EOF ) {
203
+ if ( c == '<' ) {
204
+ /* tag? */
205
+ c = flogetc(&f);
206
+ if ( c == '!' ) { /* comment? */
207
+ if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
208
+ /* yes */
209
+ while ( (c = flogetc(&f)) != EOF ) {
210
+ if ( c == '-' && flogetc(&f) == '-'
211
+ && flogetc(&f) == '>')
212
+ /* consumed whole comment */
213
+ break;
214
+ }
215
+ }
216
+ }
217
+ else {
218
+ if ( closing = (c == '/') ) c = flogetc(&f);
219
+
220
+ for ( i=0; i < tag->size; c=flogetc(&f) ) {
221
+ if ( tag->id[i++] != toupper(c) )
222
+ break;
223
+ }
224
+
225
+ if ( (i == tag->size) && !isalnum(c) ) {
226
+ depth = depth + (closing ? -1 : 1);
227
+ if ( depth == 0 ) {
228
+ while ( c != EOF && c != '>' ) {
229
+ /* consume trailing gunk in close tag */
230
+ c = flogetc(&f);
231
+ }
232
+ ret = f.t->next;
233
+ f.t->next = 0;
234
+ return ret;
235
+ }
236
+ }
237
+ }
238
+ }
239
+ }
240
+ return 0;
241
+ }
242
+
243
+
244
+ static Line *
245
+ comment(Paragraph *p)
246
+ {
247
+ Line *t, *ret;
248
+
249
+ for ( t = p->text; t ; t = t->next) {
250
+ if ( strstr(T(t->text), "-->") ) {
251
+ ret = t->next;
252
+ t->next = 0;
253
+ return ret;
254
+ }
255
+ }
256
+ return t;
257
+
258
+ }
259
+
260
+
261
+ /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
262
+ */
263
+ static int
264
+ isfootnote(Line *t)
265
+ {
266
+ int i;
267
+
268
+ if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
269
+ return 0;
270
+
271
+ for ( ++i; i < S(t->text) ; ++i ) {
272
+ if ( T(t->text)[i] == '[' )
273
+ return 0;
274
+ else if ( T(t->text)[i] == ']' && T(t->text)[i+1] == ':' )
275
+ return 1;
276
+ }
277
+ return 0;
278
+ }
279
+
280
+
281
+ static int
282
+ isquote(Line *t)
283
+ {
284
+ return ( T(t->text)[0] == '>' );
285
+ }
286
+
287
+
288
+ static int
289
+ dashchar(char c)
290
+ {
291
+ return (c == '*') || (c == '-') || (c == '_');
292
+ }
293
+
294
+
295
+ static int
296
+ iscode(Line *t)
297
+ {
298
+ return (t->dle >= 4);
299
+ }
300
+
301
+
302
+ static int
303
+ ishr(Line *t)
304
+ {
305
+ int i, count=0;
306
+ char dash = 0;
307
+ char c;
308
+
309
+ if ( iscode(t) ) return 0;
310
+
311
+ for ( i = 0; i < S(t->text); i++) {
312
+ c = T(t->text)[i];
313
+ if ( (dash == 0) && dashchar(c) )
314
+ dash = c;
315
+
316
+ if ( c == dash ) ++count;
317
+ else if ( !isspace(c) )
318
+ return 0;
319
+ }
320
+ return (count >= 3);
321
+ }
322
+
323
+
324
+ static int
325
+ ishdr(Line *t, int *htyp)
326
+ {
327
+ int i;
328
+
329
+
330
+ /* first check for etx-style ###HEADER###
331
+ */
332
+
333
+ /* leading run of `#`'s ?
334
+ */
335
+ for ( i=0; T(t->text)[i] == '#'; ++i)
336
+ ;
337
+
338
+ /* ANY leading `#`'s make this into an ETX header
339
+ */
340
+ if ( i && (i < S(t->text) || i > 1) ) {
341
+ *htyp = ETX;
342
+ return 1;
343
+ }
344
+
345
+ /* then check for setext-style HEADER
346
+ * ======
347
+ */
348
+
349
+ if ( t->next ) {
350
+ char *q = T(t->next->text);
351
+
352
+ if ( (*q == '=') || (*q == '-') ) {
353
+ for (i=1; i < S(t->next->text); i++)
354
+ if ( q[0] != q[i] )
355
+ return 0;
356
+ *htyp = SETEXT;
357
+ return 1;
358
+ }
359
+ }
360
+ return 0;
361
+ }
362
+
363
+
364
+ static int
365
+ isdefinition(Line *t)
366
+ {
367
+ #if DL_TAG_EXTENSION
368
+ return t && t->next
369
+ && (S(t->text) > 2)
370
+ && (t->dle == 0)
371
+ && (T(t->text)[0] == '=')
372
+ && (T(t->text)[S(t->text)-1] == '=')
373
+ && ( (t->next->dle >= 4) || isdefinition(t->next) );
374
+ #else
375
+ return 0;
376
+ #endif
377
+ }
378
+
379
+
380
+ static int
381
+ islist(Line *t, int *trim)
382
+ {
383
+ int i, j;
384
+ char *q;
385
+
386
+ if ( iscode(t) || blankline(t) || ishdr(t,&i) || ishr(t) )
387
+ return 0;
388
+
389
+ if ( isdefinition(t) ) {
390
+ *trim = 4;
391
+ return DL;
392
+ }
393
+
394
+ if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
395
+ i = nextnonblank(t, t->dle+1);
396
+ *trim = (i > 4) ? 4 : i;
397
+ return UL;
398
+ }
399
+
400
+ if ( (j = nextblank(t,t->dle)) > t->dle ) {
401
+ if ( T(t->text)[j-1] == '.' ) {
402
+ #if ALPHA_LIST
403
+ if ( (j == t->dle + 2) && isalpha(T(t->text)[t->dle]) ) {
404
+ j = nextnonblank(t,j);
405
+ *trim = j;
406
+ return AL;
407
+ }
408
+ #endif
409
+ strtoul(T(t->text)+t->dle, &q, 10);
410
+ if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
411
+ j = nextnonblank(t,j);
412
+ *trim = j;
413
+ return OL;
414
+ }
415
+ }
416
+ }
417
+ return 0;
418
+ }
419
+
420
+
421
+ static Line *
422
+ headerblock(Paragraph *pp, int htyp)
423
+ {
424
+ Line *ret = 0;
425
+ Line *p = pp->text;
426
+ int i, j;
427
+
428
+ switch (htyp) {
429
+ case SETEXT:
430
+ /* p->text is header, p->next->text is -'s or ='s
431
+ */
432
+ pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
433
+
434
+ ret = p->next->next;
435
+ ___mkd_freeLine(p->next);
436
+ p->next = 0;
437
+ break;
438
+
439
+ case ETX:
440
+ /* p->text is ###header###, so we need to trim off
441
+ * the leading and trailing `#`'s
442
+ */
443
+
444
+ for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1); i++)
445
+ ;
446
+
447
+ pp->hnumber = i;
448
+
449
+ while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
450
+ ++i;
451
+
452
+ CLIP(p->text, 0, i);
453
+
454
+ for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
455
+ ;
456
+
457
+ while ( j && isspace(T(p->text)[j-1]) )
458
+ --j;
459
+
460
+ S(p->text) = j;
461
+
462
+ ret = p->next;
463
+ p->next = 0;
464
+ break;
465
+ }
466
+ return ret;
467
+ }
468
+
469
+
470
+ static Line *
471
+ codeblock(Paragraph *p)
472
+ {
473
+ Line *t = p->text, *r;
474
+
475
+ for ( ; t; t = r ) {
476
+ CLIP(t->text,0,4);
477
+ t->dle = mkd_firstnonblank(t);
478
+
479
+ if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
480
+ ___mkd_freeLineRange(t,r);
481
+ t->next = 0;
482
+ return r;
483
+ }
484
+ }
485
+ return t;
486
+ }
487
+
488
+
489
+ static int
490
+ centered(Line *first, Line *last)
491
+ {
492
+
493
+ if ( first&&last ) {
494
+ int len = S(last->text);
495
+
496
+ if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
497
+ && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
498
+ CLIP(first->text, 0, 2);
499
+ S(last->text) -= 2;
500
+ return CENTER;
501
+ }
502
+ }
503
+ return 0;
504
+ }
505
+
506
+
507
+ static int
508
+ endoftextblock(Line *t, int toplevelblock)
509
+ {
510
+ int z;
511
+
512
+ if ( blankline(t)||isquote(t)||iscode(t)||ishdr(t,&z)||ishr(t) )
513
+ return 1;
514
+
515
+ /* HORRIBLE STANDARDS KLUDGE: Toplevel paragraphs eat absorb adjacent
516
+ * list items, but sublevel blocks behave properly.
517
+ */
518
+ return toplevelblock ? 0 : islist(t,&z);
519
+ }
520
+
521
+
522
+ static Line *
523
+ textblock(Paragraph *p, int toplevel)
524
+ {
525
+ Line *t, *next;
526
+
527
+ for ( t = p->text; t ; t = next ) {
528
+ if ( ((next = t->next) == 0) || endoftextblock(next, toplevel) ) {
529
+ p->align = centered(p->text, t);
530
+ t->next = 0;
531
+ return next;
532
+ }
533
+ }
534
+ return t;
535
+ }
536
+
537
+
538
+ /* length of the id: or class: kind in a special div-not-quote block
539
+ */
540
+ static int
541
+ szmarkerclass(char *p)
542
+ {
543
+ if ( strncasecmp(p, "id:", 3) == 0 )
544
+ return 3;
545
+ if ( strncasecmp(p, "class:", 6) == 0 )
546
+ return 6;
547
+ return 0;
548
+ }
549
+
550
+
551
+ /*
552
+ * check if the first line of a quoted block is the special div-not-quote
553
+ * marker %[kind:]name%
554
+ */
555
+ static int
556
+ isdivmarker(Line *p)
557
+ {
558
+ #if DIV_QUOTE
559
+ char *s = T(p->text);
560
+ int len = S(p->text);
561
+ int i;
562
+
563
+ if ( !(len && s[0] == '%' && s[len-1] == '%') ) return 0;
564
+
565
+ i = szmarkerclass(s+1);
566
+ --len;
567
+
568
+ while ( ++i < len )
569
+ if ( !isalnum(s[i]) )
570
+ return 0;
571
+
572
+ return 1;
573
+ #else
574
+ return 0;
575
+ #endif
576
+ }
577
+
578
+
579
+ /*
580
+ * accumulate a blockquote.
581
+ *
582
+ * one sick horrible thing about blockquotes is that even though
583
+ * it just takes ^> to start a quote, following lines, if quoted,
584
+ * assume that the prefix is ``>''. This means that code needs
585
+ * to be indented *5* spaces from the leading '>', but *4* spaces
586
+ * from the start of the line. This does not appear to be
587
+ * documented in the reference implementation, but it's the
588
+ * way the markdown sample web form at Daring Fireball works.
589
+ */
590
+ static Line *
591
+ quoteblock(Paragraph *p)
592
+ {
593
+ Line *t, *q;
594
+ int qp;
595
+
596
+ for ( t = p->text; t ; t = q ) {
597
+ if ( isquote(t) ) {
598
+ qp = (T(t->text)[1] == ' ') ? 2 : 1;
599
+ CLIP(t->text, 0, qp);
600
+ t->dle = mkd_firstnonblank(t);
601
+ }
602
+
603
+ if ( !(q = skipempty(t->next)) || ((q != t->next) && !isquote(q)) ) {
604
+ ___mkd_freeLineRange(t, q);
605
+ t = q;
606
+ break;
607
+ }
608
+ }
609
+ if ( isdivmarker(p->text) ) {
610
+ char *prefix = "class";
611
+ int i;
612
+
613
+ q = p->text;
614
+ p->text = p->text->next;
615
+
616
+ if ( (i = szmarkerclass(1+T(q->text))) == 3 )
617
+ /* and this would be an "%id:" prefix */
618
+ prefix="id";
619
+
620
+ if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
621
+ sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
622
+ T(q->text)+(i+1) );
623
+
624
+ ___mkd_freeLine(q);
625
+ }
626
+ return t;
627
+ }
628
+
629
+
630
+ static Paragraph *Pp(ParagraphRoot *, Line *, int);
631
+ static Paragraph *compile(Line *, int, MMIOT *);
632
+
633
+
634
+ /*
635
+ * pull in a list block. A list block starts with a list marker and
636
+ * runs until the next list marker, the next non-indented paragraph,
637
+ * or EOF. You do not have to indent nonblank lines after the list
638
+ * marker, but multiple paragraphs need to start with a 4-space indent.
639
+ */
640
+ static Line *
641
+ listitem(Paragraph *p, int indent)
642
+ {
643
+ Line *t, *q;
644
+ int clip = indent;
645
+ int z;
646
+
647
+ for ( t = p->text; t ; t = q) {
648
+ CLIP(t->text, 0, clip);
649
+ t->dle = mkd_firstnonblank(t);
650
+
651
+ if ( (q = skipempty(t->next)) == 0 ) {
652
+ ___mkd_freeLineRange(t,q);
653
+ return 0;
654
+ }
655
+
656
+ /* after a blank line, the next block needs to start with a line
657
+ * that's indented 4 spaces, but after that the line doesn't
658
+ * need any indentation
659
+ */
660
+ if ( q != t->next ) {
661
+ if (q->dle < 4) {
662
+ q = t->next;
663
+ t->next = 0;
664
+ return q;
665
+ }
666
+ indent = 4;
667
+ }
668
+
669
+ if ( (q->dle < indent) && (ishr(q) || islist(q,&z)) && !ishdr(q,&z) ) {
670
+ q = t->next;
671
+ t->next = 0;
672
+ return q;
673
+ }
674
+
675
+ clip = (q->dle > indent) ? indent : q->dle;
676
+ }
677
+ return t;
678
+ }
679
+
680
+
681
+ static Line *
682
+ listblock(Paragraph *top, int trim, MMIOT *f)
683
+ {
684
+ ParagraphRoot d = { 0, 0 };
685
+ Paragraph *p;
686
+ Line *q = top->text, *text;
687
+ Line *label;
688
+ int para = 0;
689
+
690
+ while (( text = q )) {
691
+ if ( top->typ == DL ) {
692
+ Line *lp;
693
+
694
+ for ( lp = label = text; lp ; lp = lp->next ) {
695
+ text = lp->next;
696
+ CLIP(lp->text, 0, 1);
697
+ S(lp->text)--;
698
+ if ( !isdefinition(lp->next) )
699
+ lp->next = 0;
700
+ }
701
+ }
702
+ else label = 0;
703
+
704
+ p = Pp(&d, text, LISTITEM);
705
+ text = listitem(p, trim);
706
+
707
+ p->down = compile(p->text, 0, f);
708
+ p->text = label;
709
+
710
+ if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
711
+
712
+ if ( !(q = skipempty(text)) || (islist(q, &trim) == 0) )
713
+ break;
714
+
715
+ if ( para = (q != text) ) {
716
+ Line anchor;
717
+
718
+ anchor.next = text;
719
+ ___mkd_freeLineRange(&anchor, q);
720
+ }
721
+
722
+ if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
723
+ }
724
+ top->text = 0;
725
+ top->down = T(d);
726
+ return text;
727
+ }
728
+
729
+
730
+ static int
731
+ tgood(char c)
732
+ {
733
+ switch (c) {
734
+ case '\'':
735
+ case '"': return c;
736
+ case '(': return ')';
737
+ }
738
+ return 0;
739
+ }
740
+
741
+
742
+ /*
743
+ * add a new (image or link) footnote to the footnote table
744
+ */
745
+ static Line*
746
+ addfootnote(Line *p, MMIOT* f)
747
+ {
748
+ int j, i;
749
+ int c;
750
+ Line *np = p->next;
751
+
752
+ Footnote *foot = &EXPAND(*f->footnotes);
753
+
754
+ CREATE(foot->tag);
755
+ CREATE(foot->link);
756
+ CREATE(foot->title);
757
+ foot->height = foot->width = 0;
758
+
759
+ for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
760
+ EXPAND(foot->tag) = T(p->text)[j];
761
+
762
+ EXPAND(foot->tag) = 0;
763
+ S(foot->tag)--;
764
+ j = nextnonblank(p, j+2);
765
+
766
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
767
+ EXPAND(foot->link) = T(p->text)[j++];
768
+ EXPAND(foot->link) = 0;
769
+ S(foot->link)--;
770
+ j = nextnonblank(p,j);
771
+
772
+ if ( T(p->text)[j] == '=' ) {
773
+ sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
774
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
775
+ ++j;
776
+ j = nextnonblank(p,j);
777
+ }
778
+
779
+
780
+ if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
781
+ ___mkd_freeLine(p);
782
+ p = np;
783
+ np = p->next;
784
+ j = p->dle;
785
+ }
786
+
787
+ if ( (c = tgood(T(p->text)[j])) ) {
788
+ /* Try to take the rest of the line as a comment; read to
789
+ * EOL, then shrink the string back to before the final
790
+ * quote.
791
+ */
792
+ ++j; /* skip leading quote */
793
+
794
+ while ( j < S(p->text) )
795
+ EXPAND(foot->title) = T(p->text)[j++];
796
+
797
+ while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
798
+ --S(foot->title);
799
+ if ( S(foot->title) ) /* skip trailing quote */
800
+ --S(foot->title);
801
+ EXPAND(foot->title) = 0;
802
+ --S(foot->title);
803
+ }
804
+
805
+ ___mkd_freeLine(p);
806
+ return np;
807
+ }
808
+
809
+
810
+ /*
811
+ * allocate a paragraph header, link it to the
812
+ * tail of the current document
813
+ */
814
+ static Paragraph *
815
+ Pp(ParagraphRoot *d, Line *ptr, int typ)
816
+ {
817
+ Paragraph *ret = calloc(sizeof *ret, 1);
818
+
819
+ ret->text = ptr;
820
+ ret->typ = typ;
821
+
822
+ return ATTACH(*d, ret);
823
+ }
824
+
825
+
826
+
827
+ static Line*
828
+ consume(Line *ptr, int *eaten)
829
+ {
830
+ Line *next;
831
+ int blanks=0;
832
+
833
+ for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
834
+ next = ptr->next;
835
+ ___mkd_freeLine(ptr);
836
+ }
837
+ if ( ptr ) *eaten = blanks;
838
+ return ptr;
839
+ }
840
+
841
+
842
+ /*
843
+ * break a collection of markdown input into
844
+ * blocks of lists, code, html, and text to
845
+ * be marked up.
846
+ */
847
+ static Paragraph *
848
+ compile(Line *ptr, int toplevel, MMIOT *f)
849
+ {
850
+ ParagraphRoot d = { 0, 0 };
851
+ Paragraph *p = 0;
852
+ struct kw *tag;
853
+ Line *r;
854
+ int para = toplevel;
855
+ int hdr_type, list_type, indent;
856
+
857
+ ptr = consume(ptr, &para);
858
+
859
+ while ( ptr ) {
860
+ if ( toplevel && !(f->flags & DENY_HTML) && (tag = isopentag(ptr)) ) {
861
+ p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML);
862
+ if ( strcmp(tag->id, "!--") == 0 )
863
+ ptr = comment(p);
864
+ else
865
+ ptr = htmlblock(p, tag);
866
+ }
867
+ else if ( iscode(ptr) ) {
868
+ p = Pp(&d, ptr, CODE);
869
+
870
+ if ( f->flags & MKD_1_COMPAT) {
871
+ /* HORRIBLE STANDARDS KLUDGE: the first line of every block
872
+ * has trailing whitespace trimmed off.
873
+ */
874
+ ___mkd_tidy(&p->text->text);
875
+ }
876
+
877
+ ptr = codeblock(p);
878
+ }
879
+ else if ( ishr(ptr) ) {
880
+ p = Pp(&d, 0, HR);
881
+ r = ptr;
882
+ ptr = ptr->next;
883
+ ___mkd_freeLine(r);
884
+ }
885
+ else if (( list_type = islist(ptr, &indent) )) {
886
+ p = Pp(&d, ptr, list_type);
887
+ ptr = listblock(p, indent, f);
888
+ }
889
+ else if ( isquote(ptr) ) {
890
+ p = Pp(&d, ptr, QUOTE);
891
+ ptr = quoteblock(p);
892
+ p->down = compile(p->text, 1, f);
893
+ p->text = 0;
894
+ }
895
+ else if ( ishdr(ptr, &hdr_type) ) {
896
+ p = Pp(&d, ptr, HDR);
897
+ ptr = headerblock(p, hdr_type);
898
+ }
899
+ else if ( toplevel && (isfootnote(ptr)) ) {
900
+ ptr = consume(addfootnote(ptr, f), &para);
901
+ continue;
902
+ }
903
+ else {
904
+ p = Pp(&d, ptr, MARKUP);
905
+ ptr = textblock(p, toplevel);
906
+ }
907
+
908
+ if ( (para||toplevel) && !p->align )
909
+ p->align = PARA;
910
+
911
+ para = toplevel;
912
+ ptr = consume(ptr, &para);
913
+
914
+ if ( para && !p->align )
915
+ p->align = PARA;
916
+
917
+ }
918
+ return T(d);
919
+ }
920
+
921
+
922
+ static void
923
+ initialize()
924
+ {
925
+ static int first = 1;
926
+
927
+ if ( first-- > 0 ) {
928
+ first = 0;
929
+ INITRNG(time(0));
930
+ qsort(blocktags, SZTAGS, sizeof blocktags[0], (stfu)casort);
931
+ }
932
+ }
933
+
934
+
935
+ /*
936
+ * the guts of the markdown() function, ripped out so I can do
937
+ * debugging.
938
+ */
939
+
940
+ /*
941
+ * prepare and compile `text`, returning a Paragraph tree.
942
+ */
943
+ int
944
+ mkd_compile(Document *doc, int flags)
945
+ {
946
+ if ( !doc )
947
+ return 0;
948
+
949
+ if ( doc->compiled )
950
+ return 1;
951
+
952
+ doc->compiled = 1;
953
+ memset(doc->ctx, 0, sizeof(MMIOT) );
954
+ doc->ctx->flags = flags & USER_FLAGS;
955
+ doc->ctx->base = doc->base;
956
+ CREATE(doc->ctx->in);
957
+ doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
958
+ CREATE(*doc->ctx->footnotes);
959
+
960
+ initialize();
961
+
962
+ doc->code = compile(T(doc->content), 1, doc->ctx);
963
+ qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
964
+ sizeof T(*doc->ctx->footnotes)[0],
965
+ (stfu)__mkd_footsort);
966
+ memset(&doc->content, 0, sizeof doc->content);
967
+ return 1;
968
+ }
969
+