rtomayko-rdiscount 1.3.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/markdown.c ADDED
@@ -0,0 +1,928 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include <stdio.h>
8
+ #include <string.h>
9
+ #include <stdarg.h>
10
+ #include <stdlib.h>
11
+ #include <time.h>
12
+ #include <ctype.h>
13
+
14
+ #include "config.h"
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+
20
+ /* block-level tags for passing html blocks through the blender
21
+ */
22
+ struct kw {
23
+ char *id;
24
+ int siz;
25
+ } ;
26
+
27
+ #define KW(x) { x, sizeof(x)-1 }
28
+
29
+ static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
30
+ KW("ADDRESS"), KW("BDO"), KW("BLOCKQUOTE"),
31
+ KW("CENTER"), KW("DFN"), KW("DIV"), KW("H1"),
32
+ KW("H2"), KW("H3"), KW("H4"), KW("H5"),
33
+ KW("H6"), KW("LISTING"), KW("NOBR"),
34
+ KW("UL"), KW("P"), KW("OL"), KW("DL"),
35
+ KW("PLAINTEXT"), KW("PRE"), KW("TABLE"),
36
+ KW("WBR"), KW("XMP"), KW("HR"), KW("BR") };
37
+ #define SZTAGS (sizeof blocktags / sizeof blocktags[0])
38
+ #define MAXTAG 11 /* sizeof "BLOCKQUOTE" */
39
+
40
+ typedef int (*stfu)(const void*,const void*);
41
+
42
+ typedef ANCHOR(Paragraph) ParagraphRoot;
43
+
44
+
45
+ /* case insensitive string sort (for qsort() and bsearch() of block tags)
46
+ */
47
+ static int
48
+ casort(struct kw *a, struct kw *b)
49
+ {
50
+ if ( a->siz != b->siz )
51
+ return a->siz - b->siz;
52
+ return strncasecmp(a->id, b->id, b->siz);
53
+ }
54
+
55
+
56
+ /* case insensitive string sort for Footnote tags.
57
+ */
58
+ int
59
+ __mkd_footsort(Footnote *a, Footnote *b)
60
+ {
61
+ int i;
62
+ char ac, bc;
63
+
64
+ if ( S(a->tag) != S(b->tag) )
65
+ return S(a->tag) - S(b->tag);
66
+
67
+ for ( i=0; i < S(a->tag); i++) {
68
+ ac = tolower(T(a->tag)[i]);
69
+ bc = tolower(T(b->tag)[i]);
70
+
71
+ if ( isspace(ac) && isspace(bc) )
72
+ continue;
73
+ if ( ac != bc )
74
+ return ac - bc;
75
+ }
76
+ return 0;
77
+ }
78
+
79
+
80
+ /* find the first blank character after position <i>
81
+ */
82
+ static int
83
+ nextblank(Line *t, int i)
84
+ {
85
+ while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
86
+ ++i;
87
+ return i;
88
+ }
89
+
90
+
91
+ /* find the next nonblank character after position <i>
92
+ */
93
+ static int
94
+ nextnonblank(Line *t, int i)
95
+ {
96
+ while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
97
+ ++i;
98
+ return i;
99
+ }
100
+
101
+
102
+ /* find the first nonblank character on the Line.
103
+ */
104
+ int
105
+ mkd_firstnonblank(Line *p)
106
+ {
107
+ return nextnonblank(p,0);
108
+ }
109
+
110
+
111
+ static int
112
+ blankline(Line *p)
113
+ {
114
+ return ! (p && (S(p->text) > p->dle) );
115
+ }
116
+
117
+
118
+ static Line *
119
+ skipempty(Line *p)
120
+ {
121
+ while ( p && (p->dle == S(p->text)) )
122
+ p = p->next;
123
+ return p;
124
+ }
125
+
126
+
127
+ static char *
128
+ isopentag(Line *p)
129
+ {
130
+ int i=0, len;
131
+ struct kw key, *ret;
132
+
133
+ if ( !p ) return 0;
134
+
135
+ len = S(p->text);
136
+
137
+ if ( len < 3 || T(p->text)[0] != '<' )
138
+ return 0;
139
+
140
+ /* find how long the tag is so we can check to see if
141
+ * it's a block-level tag
142
+ */
143
+ for ( i=1; i < len && T(p->text)[i] != '>'
144
+ && T(p->text)[i] != '/'
145
+ && !isspace(T(p->text)[i]); ++i )
146
+ ;
147
+
148
+ key.id = T(p->text)+1;
149
+ key.siz = i-1;
150
+
151
+ if ( ret = bsearch(&key,blocktags,SZTAGS,sizeof key, (stfu)casort))
152
+ return ret->id;
153
+
154
+ return 0;
155
+ }
156
+
157
+
158
+ static int
159
+ selfclose(Line *t, char *tag)
160
+ {
161
+ char *q = T(t->text);
162
+ int siz = strlen(tag);
163
+ int i;
164
+
165
+ if ( strcasecmp(tag, "HR") == 0 || strcasecmp(tag, "BR") == 0 )
166
+ /* <HR> and <BR> are self-closing block-level tags,
167
+ */
168
+ return 1;
169
+
170
+ i = S(t->text) - (siz + 3);
171
+
172
+ /* we specialcase start and end tags on the same line.
173
+ */
174
+ return ( i > 0 ) && (q[i] == '<') && (q[i+1] == '/')
175
+ && (q[i+2+siz] == '>')
176
+ && (strncasecmp(&q[i+2], tag, siz) == 0);
177
+ }
178
+
179
+
180
+ static Line *
181
+ htmlblock(Paragraph *p, char *tag)
182
+ {
183
+ Line *t = p->text, *ret;
184
+ int closesize;
185
+ char close[MAXTAG+4];
186
+
187
+ if ( selfclose(t, tag) || (strlen(tag) >= MAXTAG) ) {
188
+ ret = t->next;
189
+ t->next = 0;
190
+ return ret;
191
+ }
192
+
193
+ closesize = sprintf(close, "</%s>", tag);
194
+
195
+ for ( ; t ; t = t->next) {
196
+ if ( strncasecmp(T(t->text), close, closesize) == 0 ) {
197
+ ret = t->next;
198
+ t->next = 0;
199
+ return ret;
200
+ }
201
+ }
202
+ return 0;
203
+ }
204
+
205
+
206
+ static Line *
207
+ comment(Paragraph *p, char *key)
208
+ {
209
+ Line *t, *ret;
210
+
211
+ for ( t = p->text; t ; t = t->next) {
212
+ if ( strstr(T(t->text), "-->") ) {
213
+ ret = t->next;
214
+ t->next = 0;
215
+ return ret;
216
+ }
217
+ }
218
+ return t;
219
+
220
+ }
221
+
222
+
223
+ /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
224
+ */
225
+ static int
226
+ isfootnote(Line *t)
227
+ {
228
+ int i;
229
+
230
+ if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
231
+ return 0;
232
+
233
+ for ( ++i; i < S(t->text) ; ++i ) {
234
+ if ( T(t->text)[i] == '[' )
235
+ return 0;
236
+ else if ( T(t->text)[i] == ']' && T(t->text)[i+1] == ':' )
237
+ return 1;
238
+ }
239
+ return 0;
240
+ }
241
+
242
+
243
+ static int
244
+ isquote(Line *t)
245
+ {
246
+ return ( T(t->text)[0] == '>' );
247
+ }
248
+
249
+
250
+ static int
251
+ dashchar(char c)
252
+ {
253
+ return (c == '*') || (c == '-') || (c == '_');
254
+ }
255
+
256
+
257
+ static int
258
+ iscode(Line *t)
259
+ {
260
+ return (t->dle >= 4);
261
+ }
262
+
263
+
264
+ static int
265
+ ishr(Line *t)
266
+ {
267
+ int i, count=0;
268
+ char dash = 0;
269
+ char c;
270
+
271
+ if ( iscode(t) ) return 0;
272
+
273
+ for ( i = 0; i < S(t->text); i++) {
274
+ c = T(t->text)[i];
275
+ if ( (dash == 0) && dashchar(c) )
276
+ dash = c;
277
+
278
+ if ( c == dash ) ++count;
279
+ else if ( !isspace(c) )
280
+ return 0;
281
+ }
282
+ return (count >= 3);
283
+ }
284
+
285
+
286
+ static int
287
+ ishdr(Line *t, int *htyp)
288
+ {
289
+ int i;
290
+
291
+
292
+ /* first check for etx-style ###HEADER###
293
+ */
294
+
295
+ /* leading run of `#`'s ?
296
+ */
297
+ for ( i=0; T(t->text)[i] == '#'; ++i)
298
+ ;
299
+
300
+ /* ANY leading `#`'s make this into an ETX header
301
+ */
302
+ if ( i ) {
303
+ *htyp = ETX;
304
+ return 1;
305
+ }
306
+
307
+ /* then check for setext-style HEADER
308
+ * ======
309
+ */
310
+
311
+ if ( t->next ) {
312
+ char *q = T(t->next->text);
313
+
314
+ if ( (*q == '=') || (*q == '-') ) {
315
+ for (i=1; i < S(t->next->text); i++)
316
+ if ( q[0] != q[i] )
317
+ return 0;
318
+ *htyp = SETEXT;
319
+ return 1;
320
+ }
321
+ }
322
+ return 0;
323
+ }
324
+
325
+
326
+ static int
327
+ isdefinition(Line *t)
328
+ {
329
+ #if DL_TAG_EXTENSION
330
+ return t && t->next
331
+ && (S(t->text) > 2)
332
+ && (t->dle == 0)
333
+ && (T(t->text)[0] == '=')
334
+ && (T(t->text)[S(t->text)-1] == '=')
335
+ && ( (t->next->dle >= 4) || isdefinition(t->next) );
336
+ #else
337
+ return 0;
338
+ #endif
339
+ }
340
+
341
+
342
+ static int
343
+ islist(Line *t, int *trim)
344
+ {
345
+ int i, j;
346
+ char *q;
347
+
348
+ if ( iscode(t) || blankline(t) || ishdr(t,&i) || ishr(t) )
349
+ return 0;
350
+
351
+ if ( isdefinition(t) ) {
352
+ *trim = 4;
353
+ return DL;
354
+ }
355
+
356
+ if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
357
+ i = nextnonblank(t, t->dle+1);
358
+ *trim = (i > 4) ? 4 : i;
359
+ return UL;
360
+ }
361
+
362
+ if ( (j = nextblank(t,t->dle)) > t->dle ) {
363
+ if ( T(t->text)[j-1] == '.' ) {
364
+ #if ALPHA_LIST
365
+ if ( (j == t->dle + 2) && isalpha(T(t->text)[t->dle]) ) {
366
+ j = nextnonblank(t,j);
367
+ *trim = j;
368
+ return AL;
369
+ }
370
+ #endif
371
+ strtoul(T(t->text)+t->dle, &q, 10);
372
+ if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
373
+ j = nextnonblank(t,j);
374
+ *trim = j;
375
+ return OL;
376
+ }
377
+ }
378
+ }
379
+ return 0;
380
+ }
381
+
382
+
383
+ static Line *
384
+ headerblock(Paragraph *pp, int htyp)
385
+ {
386
+ Line *ret = 0;
387
+ Line *p = pp->text;
388
+ int i, j;
389
+
390
+ switch (htyp) {
391
+ case SETEXT:
392
+ /* p->text is header, p->next->text is -'s or ='s
393
+ */
394
+ pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
395
+
396
+ ret = p->next->next;
397
+ ___mkd_freeLine(p->next);
398
+ p->next = 0;
399
+ break;
400
+
401
+ case ETX:
402
+ /* p->text is ###header###, so we need to trim off
403
+ * the leading and trailing `#`'s
404
+ */
405
+
406
+ for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1); i++)
407
+ ;
408
+
409
+ pp->hnumber = i;
410
+
411
+ while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
412
+ ++i;
413
+
414
+ CLIP(p->text, 0, i);
415
+
416
+ for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
417
+ ;
418
+
419
+ while ( j && isspace(T(p->text)[j-1]) )
420
+ --j;
421
+
422
+ S(p->text) = j;
423
+
424
+ ret = p->next;
425
+ p->next = 0;
426
+ break;
427
+ }
428
+ return ret;
429
+ }
430
+
431
+
432
+ static Line *
433
+ codeblock(Paragraph *p)
434
+ {
435
+ Line *t = p->text, *r;
436
+
437
+ /* HORRIBLE STANDARDS KLUDGE: the first line of every block
438
+ * has trailing whitespace trimmed off.
439
+ */
440
+ while ( S(t->text) && isspace(T(t->text)[S(t->text)-1]) )
441
+ --S(t->text);
442
+
443
+ for ( ; t; t = r ) {
444
+ CLIP(t->text,0,4);
445
+ t->dle = mkd_firstnonblank(t);
446
+
447
+ if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
448
+ ___mkd_freeLineRange(t,r);
449
+ t->next = 0;
450
+ return r;
451
+ }
452
+ }
453
+ return t;
454
+ }
455
+
456
+
457
+ static int
458
+ centered(Line *first, Line *last)
459
+ {
460
+
461
+ if ( first&&last ) {
462
+ int len = S(last->text);
463
+
464
+ if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
465
+ && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
466
+ CLIP(first->text, 0, 2);
467
+ S(last->text) -= 2;
468
+ return CENTER;
469
+ }
470
+ }
471
+ return 0;
472
+ }
473
+
474
+
475
+ static int
476
+ endoftextblock(Line *t, int toplevelblock)
477
+ {
478
+ int z;
479
+
480
+ if ( blankline(t)||isquote(t)||iscode(t)||ishdr(t,&z)||ishr(t) )
481
+ return 1;
482
+
483
+ /* HORRIBLE STANDARDS KLUDGE: Toplevel paragraphs eat absorb adjacent
484
+ * list items, but sublevel blocks behave properly.
485
+ */
486
+ return toplevelblock ? 0 : islist(t,&z);
487
+ }
488
+
489
+
490
+ static Line *
491
+ textblock(Paragraph *p, int toplevel)
492
+ {
493
+ Line *t, *next;
494
+
495
+ for ( t = p->text; t ; t = next )
496
+ if ( ((next = t->next) == 0) || endoftextblock(next, toplevel) ) {
497
+ p->align = centered(p->text, t);
498
+ t->next = 0;
499
+ return next;
500
+ }
501
+ return t;
502
+ }
503
+
504
+
505
+ /* length of the id: or class: kind in a special div-not-quote block
506
+ */
507
+ static int
508
+ szmarkerclass(char *p)
509
+ {
510
+ if ( strncasecmp(p, "id:", 3) == 0 )
511
+ return 3;
512
+ if ( strncasecmp(p, "class:", 6) == 0 )
513
+ return 6;
514
+ return 0;
515
+ }
516
+
517
+
518
+ /*
519
+ * check if the first line of a quoted block is the special div-not-quote
520
+ * marker %[kind:]name%
521
+ */
522
+ static int
523
+ isdivmarker(Line *p)
524
+ {
525
+ #if DIV_QUOTE
526
+ char *s = T(p->text);
527
+ int len = S(p->text);
528
+ int i;
529
+
530
+ if ( !(len && s[0] == '%' && s[len-1] == '%') ) return 0;
531
+
532
+ i = szmarkerclass(s+1);
533
+ --len;
534
+
535
+ while ( ++i < len )
536
+ if ( !isalnum(s[i]) )
537
+ return 0;
538
+
539
+ return 1;
540
+ #else
541
+ return 0;
542
+ #endif
543
+ }
544
+
545
+
546
+ /*
547
+ * accumulate a blockquote.
548
+ *
549
+ * one sick horrible thing about blockquotes is that even though
550
+ * it just takes ^> to start a quote, following lines, if quoted,
551
+ * assume that the prefix is ``>''. This means that code needs
552
+ * to be indented *5* spaces from the leading '>', but *4* spaces
553
+ * from the start of the line. This does not appear to be
554
+ * documented in the reference implementation, but it's the
555
+ * way the markdown sample web form at Daring Fireball works.
556
+ */
557
+ static Line *
558
+ quoteblock(Paragraph *p)
559
+ {
560
+ Line *t, *q;
561
+ int qp;
562
+
563
+ for ( t = p->text; t ; t = q ) {
564
+ if ( isquote(t) ) {
565
+ qp = (T(t->text)[1] == ' ') ? 2 : 1;
566
+ CLIP(t->text, 0, qp);
567
+ t->dle = mkd_firstnonblank(t);
568
+ }
569
+
570
+ if ( !(q = skipempty(t->next)) || ((q != t->next) && !isquote(q)) ) {
571
+ ___mkd_freeLineRange(t, q);
572
+ t = q;
573
+ break;
574
+ }
575
+ }
576
+ if ( isdivmarker(p->text) ) {
577
+ char *prefix = "class";
578
+ int i;
579
+
580
+ q = p->text;
581
+ p->text = p->text->next;
582
+
583
+ if ( (i = szmarkerclass(1+T(q->text))) == 3 )
584
+ /* and this would be an "%id:" prefix */
585
+ prefix="id";
586
+
587
+ if ( p->ident = malloc(4+i+S(q->text)) )
588
+ sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
589
+ T(q->text)+(i+1) );
590
+
591
+ ___mkd_freeLine(q);
592
+ }
593
+ return t;
594
+ }
595
+
596
+
597
+ static Paragraph *Pp(ParagraphRoot *, Line *, int);
598
+ static Paragraph *compile(Line *, int, MMIOT *);
599
+
600
+
601
+ /*
602
+ * pull in a list block. A list block starts with a list marker and
603
+ * runs until the next list marker, the next non-indented paragraph,
604
+ * or EOF. You do not have to indent nonblank lines after the list
605
+ * marker, but multiple paragraphs need to start with a 4-space indent.
606
+ */
607
+ static Line *
608
+ listitem(Paragraph *p, int indent)
609
+ {
610
+ Line *t, *q;
611
+ int clip = indent;
612
+ int z;
613
+
614
+ for ( t = p->text; t ; t = q) {
615
+ CLIP(t->text, 0, clip);
616
+ t->dle = mkd_firstnonblank(t);
617
+
618
+ if ( (q = skipempty(t->next)) == 0 ) {
619
+ ___mkd_freeLineRange(t,q);
620
+ return 0;
621
+ }
622
+
623
+ /* after a blank line, the next block needs to start with a line
624
+ * that's indented 4 spaces, but after that the line doesn't
625
+ * need any indentation
626
+ */
627
+ if ( q != t->next ) {
628
+ if (q->dle < 4) {
629
+ q = t->next;
630
+ t->next = 0;
631
+ return q;
632
+ }
633
+ indent = 4;
634
+ }
635
+
636
+ if ( (q->dle < indent) && (ishr(q) || islist(q,&z)) && !ishdr(q,&z) ) {
637
+ q = t->next;
638
+ t->next = 0;
639
+ return q;
640
+ }
641
+
642
+ clip = (q->dle > indent) ? indent : q->dle;
643
+ }
644
+ return t;
645
+ }
646
+
647
+
648
+ static Line *
649
+ listblock(Paragraph *top, int trim, MMIOT *f)
650
+ {
651
+ ParagraphRoot d = { 0, 0 };
652
+ Paragraph *p;
653
+ Line *q = top->text, *text;
654
+ Line *label;
655
+ int para = 0;
656
+
657
+ while (( text = q )) {
658
+ if ( top->typ == DL ) {
659
+ Line *lp;
660
+
661
+ for ( lp = label = text; lp ; lp = lp->next ) {
662
+ text = lp->next;
663
+ CLIP(lp->text, 0, 1);
664
+ S(lp->text)--;
665
+ if ( !isdefinition(lp->next) )
666
+ lp->next = 0;
667
+ }
668
+ }
669
+ else label = 0;
670
+
671
+ p = Pp(&d, text, LISTITEM);
672
+ text = listitem(p, trim);
673
+
674
+ p->down = compile(p->text, 0, f);
675
+ p->text = label;
676
+
677
+ if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
678
+
679
+ if ( !(q = skipempty(text)) || (islist(q, &trim) == 0) )
680
+ break;
681
+
682
+ if ( para = (q != text) ) {
683
+ Line anchor;
684
+
685
+ anchor.next = text;
686
+ ___mkd_freeLineRange(&anchor, q);
687
+ }
688
+
689
+ if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
690
+ }
691
+ top->text = 0;
692
+ top->down = T(d);
693
+ return text;
694
+ }
695
+
696
+
697
+ static int
698
+ tgood(char c)
699
+ {
700
+ switch (c) {
701
+ case '\'':
702
+ case '"': return c;
703
+ case '(': return ')';
704
+ }
705
+ return 0;
706
+ }
707
+
708
+
709
+ /*
710
+ * add a new (image or link) footnote to the footnote table
711
+ */
712
+ static Line*
713
+ addfootnote(Line *p, MMIOT* f)
714
+ {
715
+ int j, i;
716
+ int c;
717
+ Line *np = p->next;
718
+
719
+ Footnote *foot = &EXPAND(*f->footnotes);
720
+
721
+ CREATE(foot->tag);
722
+ CREATE(foot->link);
723
+ CREATE(foot->title);
724
+ foot->height = foot->width = 0;
725
+
726
+ for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
727
+ EXPAND(foot->tag) = T(p->text)[j];
728
+
729
+ EXPAND(foot->tag) = 0;
730
+ S(foot->tag)--;
731
+ j = nextnonblank(p, j+2);
732
+
733
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
734
+ EXPAND(foot->link) = T(p->text)[j++];
735
+ EXPAND(foot->link) = 0;
736
+ S(foot->link)--;
737
+ j = nextnonblank(p,j);
738
+
739
+ if ( T(p->text)[j] == '=' ) {
740
+ sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
741
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
742
+ ++j;
743
+ j = nextnonblank(p,j);
744
+ }
745
+
746
+
747
+ if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
748
+ ___mkd_freeLine(p);
749
+ p = np;
750
+ np = p->next;
751
+ j = p->dle;
752
+ }
753
+
754
+ if ( (c = tgood(T(p->text)[j])) ) {
755
+ /* Try to take the rest of the line as a comment; read to
756
+ * EOL, then shrink the string back to before the final
757
+ * quote.
758
+ */
759
+ ++j; /* skip leading quote */
760
+
761
+ while ( j < S(p->text) )
762
+ EXPAND(foot->title) = T(p->text)[j++];
763
+
764
+ while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
765
+ --S(foot->title);
766
+ if ( S(foot->title) ) /* skip trailing quote */
767
+ --S(foot->title);
768
+ EXPAND(foot->title) = 0;
769
+ --S(foot->title);
770
+ }
771
+
772
+ ___mkd_freeLine(p);
773
+ return np;
774
+ }
775
+
776
+
777
+ /*
778
+ * allocate a paragraph header, link it to the
779
+ * tail of the current document
780
+ */
781
+ static Paragraph *
782
+ Pp(ParagraphRoot *d, Line *ptr, int typ)
783
+ {
784
+ Paragraph *ret = calloc(sizeof *ret, 1);
785
+
786
+ ret->text = ptr;
787
+ ret->typ = typ;
788
+
789
+ return ATTACH(*d, ret);
790
+ }
791
+
792
+
793
+
794
+ static Line*
795
+ consume(Line *ptr, int *eaten)
796
+ {
797
+ Line *next;
798
+ int blanks=0;
799
+
800
+ for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
801
+ next = ptr->next;
802
+ ___mkd_freeLine(ptr);
803
+ }
804
+ if ( ptr ) *eaten = blanks;
805
+ return ptr;
806
+ }
807
+
808
+
809
+ /*
810
+ * break a collection of markdown input into
811
+ * blocks of lists, code, html, and text to
812
+ * be marked up.
813
+ */
814
+ static Paragraph *
815
+ compile(Line *ptr, int toplevel, MMIOT *f)
816
+ {
817
+ ParagraphRoot d = { 0, 0 };
818
+ Paragraph *p = 0;
819
+ char *key;
820
+ Line *r;
821
+ int para = toplevel;
822
+ int hdr_type, list_type, indent;
823
+
824
+ ptr = consume(ptr, &para);
825
+
826
+ while ( ptr ) {
827
+ if ( toplevel && !(f->flags & DENY_HTML) && (key = isopentag(ptr)) ) {
828
+ p = Pp(&d, ptr, strcmp(key, "STYLE") == 0 ? STYLE : HTML);
829
+ if ( strcmp(key, "!--") == 0 )
830
+ ptr = comment(p, key);
831
+ else
832
+ ptr = htmlblock(p, key);
833
+ }
834
+ else if ( iscode(ptr) ) {
835
+ p = Pp(&d, ptr, CODE);
836
+ ptr = codeblock(p);
837
+ }
838
+ else if ( ishr(ptr) ) {
839
+ p = Pp(&d, 0, HR);
840
+ r = ptr;
841
+ ptr = ptr->next;
842
+ ___mkd_freeLine(r);
843
+ }
844
+ else if (( list_type = islist(ptr, &indent) )) {
845
+ p = Pp(&d, ptr, list_type);
846
+ ptr = listblock(p, indent, f);
847
+ }
848
+ else if ( isquote(ptr) ) {
849
+ p = Pp(&d, ptr, QUOTE);
850
+ ptr = quoteblock(p);
851
+ p->down = compile(p->text, 1, f);
852
+ p->text = 0;
853
+ }
854
+ else if ( ishdr(ptr, &hdr_type) ) {
855
+ p = Pp(&d, ptr, HDR);
856
+ ptr = headerblock(p, hdr_type);
857
+ }
858
+ else if ( toplevel && (isfootnote(ptr)) ) {
859
+ ptr = consume(addfootnote(ptr, f), &para);
860
+ continue;
861
+ }
862
+ else {
863
+ p = Pp(&d, ptr, MARKUP);
864
+ ptr = textblock(p, toplevel);
865
+ }
866
+
867
+ if ( (para||toplevel) && !p->align )
868
+ p->align = PARA;
869
+
870
+ para = toplevel;
871
+ ptr = consume(ptr, &para);
872
+
873
+ if ( para && !p->align )
874
+ p->align = PARA;
875
+
876
+ }
877
+ return T(d);
878
+ }
879
+
880
+
881
+ static void
882
+ initialize()
883
+ {
884
+ static int first = 1;
885
+
886
+ if ( first-- > 0 ) {
887
+ first = 0;
888
+ INITRNG(time(0));
889
+ qsort(blocktags, SZTAGS, sizeof blocktags[0], (stfu)casort);
890
+ }
891
+ }
892
+
893
+
894
+ /*
895
+ * the guts of the markdown() function, ripped out so I can do
896
+ * debugging.
897
+ */
898
+
899
+ /*
900
+ * prepare and compile `text`, returning a Paragraph tree.
901
+ */
902
+ int
903
+ mkd_compile(Document *doc, int flags)
904
+ {
905
+ if ( !doc )
906
+ return 0;
907
+
908
+ if ( doc->compiled )
909
+ return 1;
910
+
911
+ doc->compiled = 1;
912
+ memset(doc->ctx, 0, sizeof(MMIOT) );
913
+ doc->ctx->flags = flags & USER_FLAGS;
914
+ doc->ctx->base = doc->base;
915
+ CREATE(doc->ctx->in);
916
+ doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
917
+ CREATE(*doc->ctx->footnotes);
918
+
919
+ initialize();
920
+
921
+ doc->code = compile(T(doc->content), 1, doc->ctx);
922
+ qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
923
+ sizeof T(*doc->ctx->footnotes)[0],
924
+ (stfu)__mkd_footsort);
925
+ memset(&doc->content, 0, sizeof doc->content);
926
+ return 1;
927
+ }
928
+