rdiscount-dsc 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ /* block-level tags for passing html5 blocks through the blender
2
+ */
3
+ #include "tags.h"
4
+
5
+ void
6
+ mkd_with_html5_tags()
7
+ {
8
+ static int populated = 0;
9
+
10
+ if ( populated ) return;
11
+ populated = 1;
12
+
13
+ mkd_prepare_tags();
14
+
15
+ mkd_define_tag("ASIDE", 0);
16
+ mkd_define_tag("FOOTER", 0);
17
+ mkd_define_tag("HEADER", 0);
18
+ mkd_define_tag("HGROUP", 0);
19
+ mkd_define_tag("NAV", 0);
20
+ mkd_define_tag("SECTION", 0);
21
+ mkd_define_tag("ARTICLE", 0);
22
+
23
+ mkd_sort_tags();
24
+ }
@@ -0,0 +1,1215 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include "config.h"
8
+
9
+ #include <stdio.h>
10
+ #include <string.h>
11
+ #include <stdarg.h>
12
+ #include <stdlib.h>
13
+ #include <time.h>
14
+ #include <ctype.h>
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+ #include "tags.h"
20
+
21
+ typedef int (*stfu)(const void*,const void*);
22
+
23
+ typedef ANCHOR(Paragraph) ParagraphRoot;
24
+
25
+ /* case insensitive string sort for Footnote tags.
26
+ */
27
+ int
28
+ __mkd_footsort(Footnote *a, Footnote *b)
29
+ {
30
+ int i;
31
+ char ac, bc;
32
+
33
+ if ( S(a->tag) != S(b->tag) )
34
+ return S(a->tag) - S(b->tag);
35
+
36
+ for ( i=0; i < S(a->tag); i++) {
37
+ ac = tolower(T(a->tag)[i]);
38
+ bc = tolower(T(b->tag)[i]);
39
+
40
+ if ( isspace(ac) && isspace(bc) )
41
+ continue;
42
+ if ( ac != bc )
43
+ return ac - bc;
44
+ }
45
+ return 0;
46
+ }
47
+
48
+
49
+ /* find the first blank character after position <i>
50
+ */
51
+ static int
52
+ nextblank(Line *t, int i)
53
+ {
54
+ while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
55
+ ++i;
56
+ return i;
57
+ }
58
+
59
+
60
+ /* find the next nonblank character after position <i>
61
+ */
62
+ static int
63
+ nextnonblank(Line *t, int i)
64
+ {
65
+ while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
66
+ ++i;
67
+ return i;
68
+ }
69
+
70
+
71
+ /* find the first nonblank character on the Line.
72
+ */
73
+ int
74
+ mkd_firstnonblank(Line *p)
75
+ {
76
+ return nextnonblank(p,0);
77
+ }
78
+
79
+
80
+ static int
81
+ blankline(Line *p)
82
+ {
83
+ return ! (p && (S(p->text) > p->dle) );
84
+ }
85
+
86
+
87
+ static Line *
88
+ skipempty(Line *p)
89
+ {
90
+ while ( p && (p->dle == S(p->text)) )
91
+ p = p->next;
92
+ return p;
93
+ }
94
+
95
+
96
+ void
97
+ ___mkd_tidy(Cstring *t)
98
+ {
99
+ while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
100
+ --S(*t);
101
+ }
102
+
103
+
104
+ static struct kw comment = { "!--", 3, 0 };
105
+
106
+ static struct kw *
107
+ isopentag(Line *p)
108
+ {
109
+ int i=0, len;
110
+ char *line;
111
+
112
+ if ( !p ) return 0;
113
+
114
+ line = T(p->text);
115
+ len = S(p->text);
116
+
117
+ if ( len < 3 || line[0] != '<' )
118
+ return 0;
119
+
120
+ if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
121
+ /* comments need special case handling, because
122
+ * the !-- doesn't need to end in a whitespace
123
+ */
124
+ return &comment;
125
+
126
+ /* find how long the tag is so we can check to see if
127
+ * it's a block-level tag
128
+ */
129
+ for ( i=1; i < len && T(p->text)[i] != '>'
130
+ && T(p->text)[i] != '/'
131
+ && !isspace(T(p->text)[i]); ++i )
132
+ ;
133
+
134
+
135
+ return mkd_search_tags(T(p->text)+1, i-1);
136
+ }
137
+
138
+
139
+ typedef struct _flo {
140
+ Line *t;
141
+ int i;
142
+ } FLO;
143
+
144
+ #define floindex(x) (x.i)
145
+
146
+
147
+ static int
148
+ flogetc(FLO *f)
149
+ {
150
+ if ( f && f->t ) {
151
+ if ( f->i < S(f->t->text) )
152
+ return T(f->t->text)[f->i++];
153
+ f->t = f->t->next;
154
+ f->i = 0;
155
+ return flogetc(f);
156
+ }
157
+ return EOF;
158
+ }
159
+
160
+
161
+ static void
162
+ splitline(Line *t, int cutpoint)
163
+ {
164
+ if ( t && (cutpoint < S(t->text)) ) {
165
+ Line *tmp = calloc(1, sizeof *tmp);
166
+
167
+ tmp->next = t->next;
168
+ t->next = tmp;
169
+
170
+ tmp->dle = t->dle;
171
+ SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
172
+ S(t->text) = cutpoint;
173
+ }
174
+ }
175
+
176
+
177
+ static Line *
178
+ commentblock(Paragraph *p, int *unclosed)
179
+ {
180
+ Line *t, *ret;
181
+ char *end;
182
+
183
+ for ( t = p->text; t ; t = t->next) {
184
+ if ( end = strstr(T(t->text), "-->") ) {
185
+ splitline(t, 3 + (end - T(t->text)) );
186
+ ret = t->next;
187
+ t->next = 0;
188
+ return ret;
189
+ }
190
+ }
191
+ *unclosed = 1;
192
+ return t;
193
+
194
+ }
195
+
196
+
197
+ static Line *
198
+ htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
199
+ {
200
+ Line *ret;
201
+ FLO f = { p->text, 0 };
202
+ int c;
203
+ int i, closing, depth=0;
204
+
205
+ *unclosed = 0;
206
+
207
+ if ( tag == &comment )
208
+ return commentblock(p, unclosed);
209
+
210
+ if ( tag->selfclose ) {
211
+ ret = f.t->next;
212
+ f.t->next = 0;
213
+ return ret;
214
+ }
215
+
216
+ while ( (c = flogetc(&f)) != EOF ) {
217
+ if ( c == '<' ) {
218
+ /* tag? */
219
+ c = flogetc(&f);
220
+ if ( c == '!' ) { /* comment? */
221
+ if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
222
+ /* yes */
223
+ while ( (c = flogetc(&f)) != EOF ) {
224
+ if ( c == '-' && flogetc(&f) == '-'
225
+ && flogetc(&f) == '>')
226
+ /* consumed whole comment */
227
+ break;
228
+ }
229
+ }
230
+ }
231
+ else {
232
+ if ( closing = (c == '/') ) c = flogetc(&f);
233
+
234
+ for ( i=0; i < tag->size; c=flogetc(&f) ) {
235
+ if ( tag->id[i++] != toupper(c) )
236
+ break;
237
+ }
238
+
239
+ if ( (i == tag->size) && !isalnum(c) ) {
240
+ depth = depth + (closing ? -1 : 1);
241
+ if ( depth == 0 ) {
242
+ while ( c != EOF && c != '>' ) {
243
+ /* consume trailing gunk in close tag */
244
+ c = flogetc(&f);
245
+ }
246
+ if ( c == EOF )
247
+ break;
248
+ if ( !f.t )
249
+ return 0;
250
+ splitline(f.t, floindex(f));
251
+ ret = f.t->next;
252
+ f.t->next = 0;
253
+ return ret;
254
+ }
255
+ }
256
+ }
257
+ }
258
+ }
259
+ *unclosed = 1;
260
+ return 0;
261
+ }
262
+
263
+
264
+ /* tables look like
265
+ * header|header{|header}
266
+ * ------|------{|......}
267
+ * {body lines}
268
+ */
269
+ static int
270
+ istable(Line *t)
271
+ {
272
+ char *p;
273
+ Line *dashes = t->next;
274
+ int contains = 0; /* found character bits; 0x01 is |, 0x02 is - */
275
+
276
+ /* two lines, first must contain | */
277
+ if ( !(dashes && memchr(T(t->text), '|', S(t->text))) )
278
+ return 0;
279
+
280
+ /* second line must contain - or | and nothing
281
+ * else except for whitespace or :
282
+ */
283
+ for ( p = T(dashes->text)+S(dashes->text)-1; p >= T(dashes->text); --p)
284
+ if ( *p == '|' )
285
+ contains |= 0x01;
286
+ else if ( *p == '-' )
287
+ contains |= 0x02;
288
+ else if ( ! ((*p == ':') || isspace(*p)) )
289
+ return 0;
290
+
291
+ return (contains & 0x03);
292
+ }
293
+
294
+
295
+ /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
296
+ */
297
+ static int
298
+ isfootnote(Line *t)
299
+ {
300
+ int i;
301
+
302
+ if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
303
+ return 0;
304
+
305
+ for ( ++i; i < S(t->text) ; ++i ) {
306
+ if ( T(t->text)[i] == '[' )
307
+ return 0;
308
+ else if ( T(t->text)[i] == ']' )
309
+ return ( T(t->text)[i+1] == ':' ) ;
310
+ }
311
+ return 0;
312
+ }
313
+
314
+
315
+ static int
316
+ isquote(Line *t)
317
+ {
318
+ int j;
319
+
320
+ for ( j=0; j < 4; j++ )
321
+ if ( T(t->text)[j] == '>' )
322
+ return 1;
323
+ else if ( !isspace(T(t->text)[j]) )
324
+ return 0;
325
+ return 0;
326
+ }
327
+
328
+
329
+ static int
330
+ dashchar(char c)
331
+ {
332
+ return (c == '*') || (c == '-') || (c == '_');
333
+ }
334
+
335
+
336
+ static int
337
+ iscode(Line *t)
338
+ {
339
+ return (t->dle >= 4);
340
+ }
341
+
342
+
343
+ static int
344
+ ishr(Line *t)
345
+ {
346
+ int i, count=0;
347
+ char dash = 0;
348
+ char c;
349
+
350
+ if ( iscode(t) ) return 0;
351
+
352
+ for ( i = 0; i < S(t->text); i++) {
353
+ c = T(t->text)[i];
354
+ if ( (dash == 0) && dashchar(c) )
355
+ dash = c;
356
+
357
+ if ( c == dash ) ++count;
358
+ else if ( !isspace(c) )
359
+ return 0;
360
+ }
361
+ return (count >= 3);
362
+ }
363
+
364
+
365
+ static int
366
+ issetext(Line *t, int *htyp)
367
+ {
368
+ int i;
369
+ /* then check for setext-style HEADER
370
+ * ======
371
+ */
372
+
373
+ if ( t->next ) {
374
+ char *q = T(t->next->text);
375
+ int last = S(t->next->text);
376
+
377
+ if ( (*q == '=') || (*q == '-') ) {
378
+ /* ignore trailing whitespace */
379
+ while ( (last > 1) && isspace(q[last-1]) )
380
+ --last;
381
+
382
+ for (i=1; i < last; i++)
383
+ if ( q[0] != q[i] )
384
+ return 0;
385
+ *htyp = SETEXT;
386
+ return 1;
387
+ }
388
+ }
389
+ return 0;
390
+ }
391
+
392
+
393
+ static int
394
+ ishdr(Line *t, int *htyp)
395
+ {
396
+ int i;
397
+
398
+
399
+ /* first check for etx-style ###HEADER###
400
+ */
401
+
402
+ /* leading run of `#`'s ?
403
+ */
404
+ for ( i=0; T(t->text)[i] == '#'; ++i)
405
+ ;
406
+
407
+ /* ANY leading `#`'s make this into an ETX header
408
+ */
409
+ if ( i && (i < S(t->text) || i > 1) ) {
410
+ *htyp = ETX;
411
+ return 1;
412
+ }
413
+
414
+ return issetext(t, htyp);
415
+ }
416
+
417
+
418
+ static Line*
419
+ is_discount_dt(Line *t, int *clip)
420
+ {
421
+ #if USE_DISCOUNT_DL
422
+ if ( t && t->next
423
+ && (S(t->text) > 2)
424
+ && (t->dle == 0)
425
+ && (T(t->text)[0] == '=')
426
+ && (T(t->text)[S(t->text)-1] == '=') ) {
427
+ if ( t->next->dle >= 4 ) {
428
+ *clip = 4;
429
+ return t;
430
+ }
431
+ else
432
+ return is_discount_dt(t->next, clip);
433
+ }
434
+ #endif
435
+ return 0;
436
+ }
437
+
438
+
439
+ static int
440
+ is_extra_dd(Line *t)
441
+ {
442
+ return (t->dle < 4) && (T(t->text)[t->dle] == ':')
443
+ && isspace(T(t->text)[t->dle+1]);
444
+ }
445
+
446
+
447
+ static Line*
448
+ is_extra_dt(Line *t, int *clip)
449
+ {
450
+ #if USE_EXTRA_DL
451
+ int i;
452
+
453
+ if ( t && t->next && T(t->text)[0] != '='
454
+ && T(t->text)[S(t->text)-1] != '=') {
455
+ Line *x;
456
+
457
+ if ( iscode(t) || blankline(t) || ishdr(t,&i) || ishr(t) )
458
+ return 0;
459
+
460
+ if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
461
+ *clip = x->dle+2;
462
+ return t;
463
+ }
464
+
465
+ if ( x=is_extra_dt(t->next, clip) )
466
+ return x;
467
+ }
468
+ #endif
469
+ return 0;
470
+ }
471
+
472
+
473
+ static Line*
474
+ isdefinition(Line *t, int *clip, int *kind)
475
+ {
476
+ Line *ret;
477
+
478
+ *kind = 1;
479
+ if ( ret = is_discount_dt(t,clip) )
480
+ return ret;
481
+
482
+ *kind=2;
483
+ return is_extra_dt(t,clip);
484
+ }
485
+
486
+
487
+ static int
488
+ islist(Line *t, int *clip, DWORD flags, int *list_type)
489
+ {
490
+ int i, j;
491
+ char *q;
492
+
493
+ if ( /*iscode(t) ||*/ blankline(t) || ishdr(t,&i) || ishr(t) )
494
+ return 0;
495
+
496
+ if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type) )
497
+ return DL;
498
+
499
+ if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
500
+ i = nextnonblank(t, t->dle+1);
501
+ *clip = (i > 4) ? 4 : i;
502
+ *list_type = UL;
503
+ return AL;
504
+ }
505
+
506
+ if ( (j = nextblank(t,t->dle)) > t->dle ) {
507
+ if ( T(t->text)[j-1] == '.' ) {
508
+
509
+ if ( !(flags & (MKD_NOALPHALIST|MKD_STRICT))
510
+ && (j == t->dle + 2)
511
+ && isalpha(T(t->text)[t->dle]) ) {
512
+ j = nextnonblank(t,j);
513
+ *clip = (j > 4) ? 4 : j;
514
+ *list_type = AL;
515
+ return AL;
516
+ }
517
+
518
+ strtoul(T(t->text)+t->dle, &q, 10);
519
+ if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
520
+ j = nextnonblank(t,j);
521
+ *clip = (j > 4) ? 4 : j;
522
+ *list_type = OL;
523
+ return AL;
524
+ }
525
+ }
526
+ }
527
+ return 0;
528
+ }
529
+
530
+
531
+ static Line *
532
+ headerblock(Paragraph *pp, int htyp)
533
+ {
534
+ Line *ret = 0;
535
+ Line *p = pp->text;
536
+ int i, j;
537
+
538
+ switch (htyp) {
539
+ case SETEXT:
540
+ /* p->text is header, p->next->text is -'s or ='s
541
+ */
542
+ pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
543
+
544
+ ret = p->next->next;
545
+ ___mkd_freeLine(p->next);
546
+ p->next = 0;
547
+ break;
548
+
549
+ case ETX:
550
+ /* p->text is ###header###, so we need to trim off
551
+ * the leading and trailing `#`'s
552
+ */
553
+
554
+ for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
555
+ && (i < 6); i++)
556
+ ;
557
+
558
+ pp->hnumber = i;
559
+
560
+ while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
561
+ ++i;
562
+
563
+ CLIP(p->text, 0, i);
564
+
565
+ for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
566
+ ;
567
+
568
+ while ( j && isspace(T(p->text)[j-1]) )
569
+ --j;
570
+
571
+ S(p->text) = j;
572
+
573
+ ret = p->next;
574
+ p->next = 0;
575
+ break;
576
+ }
577
+ return ret;
578
+ }
579
+
580
+
581
+ static Line *
582
+ codeblock(Paragraph *p)
583
+ {
584
+ Line *t = p->text, *r;
585
+
586
+ for ( ; t; t = r ) {
587
+ CLIP(t->text,0,4);
588
+ t->dle = mkd_firstnonblank(t);
589
+
590
+ if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
591
+ ___mkd_freeLineRange(t,r);
592
+ t->next = 0;
593
+ return r;
594
+ }
595
+ }
596
+ return t;
597
+ }
598
+
599
+
600
+ static int
601
+ centered(Line *first, Line *last)
602
+ {
603
+
604
+ if ( first&&last ) {
605
+ int len = S(last->text);
606
+
607
+ if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
608
+ && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
609
+ CLIP(first->text, 0, 2);
610
+ S(last->text) -= 2;
611
+ return CENTER;
612
+ }
613
+ }
614
+ return 0;
615
+ }
616
+
617
+
618
+ static int
619
+ endoftextblock(Line *t, int toplevelblock, DWORD flags)
620
+ {
621
+ int z;
622
+
623
+ if ( blankline(t)||isquote(t)||ishdr(t,&z)||ishr(t) )
624
+ return 1;
625
+
626
+ /* HORRIBLE STANDARDS KLUDGE: non-toplevel paragraphs absorb adjacent
627
+ * code blocks
628
+ */
629
+ if ( toplevelblock && iscode(t) )
630
+ return 1;
631
+
632
+ /* HORRIBLE STANDARDS KLUDGE: Toplevel paragraphs eat absorb adjacent
633
+ * list items, but sublevel blocks behave properly.
634
+ */
635
+ return toplevelblock ? 0 : islist(t,&z,flags, &z);
636
+ }
637
+
638
+
639
+ static Line *
640
+ textblock(Paragraph *p, int toplevel, DWORD flags)
641
+ {
642
+ Line *t, *next;
643
+
644
+ for ( t = p->text; t ; t = next ) {
645
+ if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
646
+ p->align = centered(p->text, t);
647
+ t->next = 0;
648
+ return next;
649
+ }
650
+ }
651
+ return t;
652
+ }
653
+
654
+
655
+ /* length of the id: or class: kind in a special div-not-quote block
656
+ */
657
+ static int
658
+ szmarkerclass(char *p)
659
+ {
660
+ if ( strncasecmp(p, "id:", 3) == 0 )
661
+ return 3;
662
+ if ( strncasecmp(p, "class:", 6) == 0 )
663
+ return 6;
664
+ return 0;
665
+ }
666
+
667
+
668
+ /*
669
+ * check if the first line of a quoted block is the special div-not-quote
670
+ * marker %[kind:]name%
671
+ */
672
+ static int
673
+ isdivmarker(Line *p, int start, DWORD flags)
674
+ {
675
+ char *s;
676
+ int len, i;
677
+
678
+ if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) )
679
+ return 0;
680
+
681
+ len = S(p->text);
682
+ s = T(p->text);
683
+
684
+ if ( !(len && s[start] == '%' && s[len-1] == '%') ) return 0;
685
+
686
+ i = szmarkerclass(s+start+1)+start;
687
+ len -= start+1;
688
+
689
+ while ( ++i < len )
690
+ if ( !isalnum(s[i]) )
691
+ return 0;
692
+
693
+ return 1;
694
+ }
695
+
696
+
697
+ /*
698
+ * accumulate a blockquote.
699
+ *
700
+ * one sick horrible thing about blockquotes is that even though
701
+ * it just takes ^> to start a quote, following lines, if quoted,
702
+ * assume that the prefix is ``>''. This means that code needs
703
+ * to be indented *5* spaces from the leading '>', but *4* spaces
704
+ * from the start of the line. This does not appear to be
705
+ * documented in the reference implementation, but it's the
706
+ * way the markdown sample web form at Daring Fireball works.
707
+ */
708
+ static Line *
709
+ quoteblock(Paragraph *p, DWORD flags)
710
+ {
711
+ Line *t, *q;
712
+ int qp;
713
+
714
+ for ( t = p->text; t ; t = q ) {
715
+ if ( isquote(t) ) {
716
+ /* clip leading spaces */
717
+ for (qp = 0; T(t->text)[qp] != '>'; qp ++)
718
+ /* assert: the first nonblank character on this line
719
+ * will be a >
720
+ */;
721
+ /* clip '>' */
722
+ qp++;
723
+ /* clip next space, if any */
724
+ if ( T(t->text)[qp] == ' ' )
725
+ qp++;
726
+ CLIP(t->text, 0, qp);
727
+ t->dle = mkd_firstnonblank(t);
728
+ }
729
+
730
+ q = skipempty(t->next);
731
+
732
+ if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
733
+ ___mkd_freeLineRange(t, q);
734
+ t = q;
735
+ break;
736
+ }
737
+ }
738
+ if ( isdivmarker(p->text,0,flags) ) {
739
+ char *prefix = "class";
740
+ int i;
741
+
742
+ q = p->text;
743
+ p->text = p->text->next;
744
+
745
+ if ( (i = szmarkerclass(1+T(q->text))) == 3 )
746
+ /* and this would be an "%id:" prefix */
747
+ prefix="id";
748
+
749
+ if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
750
+ sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
751
+ T(q->text)+(i+1) );
752
+
753
+ ___mkd_freeLine(q);
754
+ }
755
+ return t;
756
+ }
757
+
758
+
759
+ /*
760
+ * A table block starts with a table header (see istable()), and continues
761
+ * until EOF or a line that /doesn't/ contain a |.
762
+ */
763
+ static Line *
764
+ tableblock(Paragraph *p)
765
+ {
766
+ Line *t, *q;
767
+
768
+ for ( t = p->text; t && (q = t->next); t = t->next ) {
769
+ if ( !memchr(T(q->text), '|', S(q->text)) ) {
770
+ t->next = 0;
771
+ return q;
772
+ }
773
+ }
774
+ return 0;
775
+ }
776
+
777
+
778
+ static Paragraph *Pp(ParagraphRoot *, Line *, int);
779
+ static Paragraph *compile(Line *, int, MMIOT *);
780
+
781
+ typedef int (*linefn)(Line *);
782
+
783
+
784
+ /*
785
+ * pull in a list block. A list block starts with a list marker and
786
+ * runs until the next list marker, the next non-indented paragraph,
787
+ * or EOF. You do not have to indent nonblank lines after the list
788
+ * marker, but multiple paragraphs need to start with a 4-space indent.
789
+ */
790
+ static Line *
791
+ listitem(Paragraph *p, int indent, DWORD flags, linefn check)
792
+ {
793
+ Line *t, *q;
794
+ int clip = indent;
795
+ int z;
796
+
797
+ for ( t = p->text; t ; t = q) {
798
+ CLIP(t->text, 0, clip);
799
+ t->dle = mkd_firstnonblank(t);
800
+
801
+ if ( (q = skipempty(t->next)) == 0 ) {
802
+ ___mkd_freeLineRange(t,q);
803
+ return 0;
804
+ }
805
+
806
+ /* after a blank line, the next block needs to start with a line
807
+ * that's indented 4(? -- reference implementation allows a 1
808
+ * character indent, but that has unfortunate side effects here)
809
+ * spaces, but after that the line doesn't need any indentation
810
+ */
811
+ if ( q != t->next ) {
812
+ if (q->dle < indent) {
813
+ q = t->next;
814
+ t->next = 0;
815
+ return q;
816
+ }
817
+ /* indent at least 2, and at most as
818
+ * as far as the initial line was indented. */
819
+ indent = clip ? clip : 2;
820
+ }
821
+
822
+ if ( (q->dle < indent) && (ishr(q) || islist(q,&z,flags,&z)
823
+ || (check && (*check)(q)))
824
+ && !issetext(q,&z) ) {
825
+ q = t->next;
826
+ t->next = 0;
827
+ return q;
828
+ }
829
+
830
+ clip = (q->dle > indent) ? indent : q->dle;
831
+ }
832
+ return t;
833
+ }
834
+
835
+
836
+ static Line *
837
+ definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
838
+ {
839
+ ParagraphRoot d = { 0, 0 };
840
+ Paragraph *p;
841
+ Line *q = top->text, *text = 0, *labels;
842
+ int z, para;
843
+
844
+ while (( labels = q )) {
845
+
846
+ if ( (q = isdefinition(labels, &z, &kind)) == 0 )
847
+ break;
848
+
849
+ if ( (text = skipempty(q->next)) == 0 )
850
+ break;
851
+
852
+ if (( para = (text != q->next) ))
853
+ ___mkd_freeLineRange(q, text);
854
+
855
+ q->next = 0;
856
+ if ( kind == 1 /* discount dl */ )
857
+ for ( q = labels; q; q = q->next ) {
858
+ CLIP(q->text, 0, 1);
859
+ S(q->text)--;
860
+ }
861
+
862
+ dd_block:
863
+ p = Pp(&d, text, LISTITEM);
864
+
865
+ text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0);
866
+ p->down = compile(p->text, 0, f);
867
+ p->text = labels; labels = 0;
868
+
869
+ if ( para && p->down ) p->down->align = PARA;
870
+
871
+ if ( (q = skipempty(text)) == 0 )
872
+ break;
873
+
874
+ if (( para = (q != text) )) {
875
+ Line anchor;
876
+
877
+ anchor.next = text;
878
+ ___mkd_freeLineRange(&anchor,q);
879
+ text = q;
880
+
881
+ }
882
+
883
+ if ( kind == 2 && is_extra_dd(q) )
884
+ goto dd_block;
885
+ }
886
+ top->text = 0;
887
+ top->down = T(d);
888
+ return text;
889
+ }
890
+
891
+
892
+ static Line *
893
+ enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
894
+ {
895
+ ParagraphRoot d = { 0, 0 };
896
+ Paragraph *p;
897
+ Line *q = top->text, *text;
898
+ int para = 0, z;
899
+
900
+ while (( text = q )) {
901
+
902
+ p = Pp(&d, text, LISTITEM);
903
+ text = listitem(p, clip, f->flags, 0);
904
+
905
+ p->down = compile(p->text, 0, f);
906
+ p->text = 0;
907
+
908
+ if ( para && p->down ) p->down->align = PARA;
909
+
910
+ if ( (q = skipempty(text)) == 0
911
+ || islist(q, &clip, f->flags, &z) != list_class )
912
+ break;
913
+
914
+ if ( para = (q != text) ) {
915
+ Line anchor;
916
+
917
+ anchor.next = text;
918
+ ___mkd_freeLineRange(&anchor, q);
919
+
920
+ if ( p->down ) p->down->align = PARA;
921
+ }
922
+ }
923
+ top->text = 0;
924
+ top->down = T(d);
925
+ return text;
926
+ }
927
+
928
+
929
+ static int
930
+ tgood(char c)
931
+ {
932
+ switch (c) {
933
+ case '\'':
934
+ case '"': return c;
935
+ case '(': return ')';
936
+ }
937
+ return 0;
938
+ }
939
+
940
+
941
+ /*
942
+ * add a new (image or link) footnote to the footnote table
943
+ */
944
+ static Line*
945
+ addfootnote(Line *p, MMIOT* f)
946
+ {
947
+ int j, i;
948
+ int c;
949
+ Line *np = p->next;
950
+
951
+ Footnote *foot = &EXPAND(*f->footnotes);
952
+
953
+ CREATE(foot->tag);
954
+ CREATE(foot->link);
955
+ CREATE(foot->title);
956
+ foot->height = foot->width = 0;
957
+
958
+ for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
959
+ EXPAND(foot->tag) = T(p->text)[j];
960
+
961
+ EXPAND(foot->tag) = 0;
962
+ S(foot->tag)--;
963
+ j = nextnonblank(p, j+2);
964
+
965
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
966
+ EXPAND(foot->link) = T(p->text)[j++];
967
+ EXPAND(foot->link) = 0;
968
+ S(foot->link)--;
969
+ j = nextnonblank(p,j);
970
+
971
+ if ( T(p->text)[j] == '=' ) {
972
+ sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
973
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
974
+ ++j;
975
+ j = nextnonblank(p,j);
976
+ }
977
+
978
+
979
+ if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
980
+ ___mkd_freeLine(p);
981
+ p = np;
982
+ np = p->next;
983
+ j = p->dle;
984
+ }
985
+
986
+ if ( (c = tgood(T(p->text)[j])) ) {
987
+ /* Try to take the rest of the line as a comment; read to
988
+ * EOL, then shrink the string back to before the final
989
+ * quote.
990
+ */
991
+ ++j; /* skip leading quote */
992
+
993
+ while ( j < S(p->text) )
994
+ EXPAND(foot->title) = T(p->text)[j++];
995
+
996
+ while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
997
+ --S(foot->title);
998
+ if ( S(foot->title) ) /* skip trailing quote */
999
+ --S(foot->title);
1000
+ EXPAND(foot->title) = 0;
1001
+ --S(foot->title);
1002
+ }
1003
+
1004
+ ___mkd_freeLine(p);
1005
+ return np;
1006
+ }
1007
+
1008
+
1009
+ /*
1010
+ * allocate a paragraph header, link it to the
1011
+ * tail of the current document
1012
+ */
1013
+ static Paragraph *
1014
+ Pp(ParagraphRoot *d, Line *ptr, int typ)
1015
+ {
1016
+ Paragraph *ret = calloc(sizeof *ret, 1);
1017
+
1018
+ ret->text = ptr;
1019
+ ret->typ = typ;
1020
+
1021
+ return ATTACH(*d, ret);
1022
+ }
1023
+
1024
+
1025
+
1026
+ static Line*
1027
+ consume(Line *ptr, int *eaten)
1028
+ {
1029
+ Line *next;
1030
+ int blanks=0;
1031
+
1032
+ for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
1033
+ next = ptr->next;
1034
+ ___mkd_freeLine(ptr);
1035
+ }
1036
+ if ( ptr ) *eaten = blanks;
1037
+ return ptr;
1038
+ }
1039
+
1040
+
1041
+ /*
1042
+ * top-level compilation; break the document into
1043
+ * style, html, and source blocks with footnote links
1044
+ * weeded out.
1045
+ */
1046
+ static Paragraph *
1047
+ compile_document(Line *ptr, MMIOT *f)
1048
+ {
1049
+ ParagraphRoot d = { 0, 0 };
1050
+ ANCHOR(Line) source = { 0, 0 };
1051
+ Paragraph *p = 0;
1052
+ struct kw *tag;
1053
+ int eaten, unclosed;
1054
+
1055
+ while ( ptr ) {
1056
+ if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) {
1057
+ /* If we encounter a html/style block, compile and save all
1058
+ * of the cached source BEFORE processing the html/style.
1059
+ */
1060
+ if ( T(source) ) {
1061
+ E(source)->next = 0;
1062
+ p = Pp(&d, 0, SOURCE);
1063
+ p->down = compile(T(source), 1, f);
1064
+ T(source) = E(source) = 0;
1065
+ }
1066
+ p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML);
1067
+ ptr = htmlblock(p, tag, &unclosed);
1068
+ if ( unclosed ) {
1069
+ p->typ = SOURCE;
1070
+ p->down = compile(p->text, 1, f);
1071
+ p->text = 0;
1072
+ }
1073
+ }
1074
+ else if ( isfootnote(ptr) ) {
1075
+ /* footnotes, like cats, sleep anywhere; pull them
1076
+ * out of the input stream and file them away for
1077
+ * later processing
1078
+ */
1079
+ ptr = consume(addfootnote(ptr, f), &eaten);
1080
+ }
1081
+ else {
1082
+ /* source; cache it up to wait for eof or the
1083
+ * next html/style block
1084
+ */
1085
+ ATTACH(source,ptr);
1086
+ ptr = ptr->next;
1087
+ }
1088
+ }
1089
+ if ( T(source) ) {
1090
+ /* if there's any cached source at EOF, compile
1091
+ * it now.
1092
+ */
1093
+ E(source)->next = 0;
1094
+ p = Pp(&d, 0, SOURCE);
1095
+ p->down = compile(T(source), 1, f);
1096
+ }
1097
+ return T(d);
1098
+ }
1099
+
1100
+
1101
+ /*
1102
+ * break a collection of markdown input into
1103
+ * blocks of lists, code, html, and text to
1104
+ * be marked up.
1105
+ */
1106
+ static Paragraph *
1107
+ compile(Line *ptr, int toplevel, MMIOT *f)
1108
+ {
1109
+ ParagraphRoot d = { 0, 0 };
1110
+ Paragraph *p = 0;
1111
+ Line *r;
1112
+ int para = toplevel;
1113
+ int blocks = 0;
1114
+ int hdr_type, list_type, list_class, indent;
1115
+
1116
+ ptr = consume(ptr, &para);
1117
+
1118
+ while ( ptr ) {
1119
+ if ( iscode(ptr) ) {
1120
+ p = Pp(&d, ptr, CODE);
1121
+
1122
+ if ( f->flags & MKD_1_COMPAT) {
1123
+ /* HORRIBLE STANDARDS KLUDGE: the first line of every block
1124
+ * has trailing whitespace trimmed off.
1125
+ */
1126
+ ___mkd_tidy(&p->text->text);
1127
+ }
1128
+
1129
+ ptr = codeblock(p);
1130
+ }
1131
+ else if ( ishr(ptr) ) {
1132
+ p = Pp(&d, 0, HR);
1133
+ r = ptr;
1134
+ ptr = ptr->next;
1135
+ ___mkd_freeLine(r);
1136
+ }
1137
+ else if (( list_class = islist(ptr, &indent, f->flags, &list_type) )) {
1138
+ if ( list_class == DL ) {
1139
+ p = Pp(&d, ptr, DL);
1140
+ ptr = definition_block(p, indent, f, list_type);
1141
+ }
1142
+ else {
1143
+ p = Pp(&d, ptr, list_type);
1144
+ ptr = enumerated_block(p, indent, f, list_class);
1145
+ }
1146
+ }
1147
+ else if ( isquote(ptr) ) {
1148
+ p = Pp(&d, ptr, QUOTE);
1149
+ ptr = quoteblock(p, f->flags);
1150
+ p->down = compile(p->text, 1, f);
1151
+ p->text = 0;
1152
+ }
1153
+ else if ( ishdr(ptr, &hdr_type) ) {
1154
+ p = Pp(&d, ptr, HDR);
1155
+ ptr = headerblock(p, hdr_type);
1156
+ }
1157
+ else if ( istable(ptr) && !(f->flags & (MKD_STRICT|MKD_NOTABLES)) ) {
1158
+ p = Pp(&d, ptr, TABLE);
1159
+ ptr = tableblock(p);
1160
+ }
1161
+ else {
1162
+ p = Pp(&d, ptr, MARKUP);
1163
+ ptr = textblock(p, toplevel, f->flags);
1164
+ }
1165
+
1166
+ if ( (para||toplevel) && !p->align )
1167
+ p->align = PARA;
1168
+
1169
+ blocks++;
1170
+ para = toplevel || (blocks > 1);
1171
+ ptr = consume(ptr, &para);
1172
+
1173
+ if ( para && !p->align )
1174
+ p->align = PARA;
1175
+
1176
+ }
1177
+ return T(d);
1178
+ }
1179
+
1180
+
1181
+ /*
1182
+ * the guts of the markdown() function, ripped out so I can do
1183
+ * debugging.
1184
+ */
1185
+
1186
+ /*
1187
+ * prepare and compile `text`, returning a Paragraph tree.
1188
+ */
1189
+ int
1190
+ mkd_compile(Document *doc, DWORD flags)
1191
+ {
1192
+ if ( !doc )
1193
+ return 0;
1194
+
1195
+ if ( doc->compiled )
1196
+ return 1;
1197
+
1198
+ doc->compiled = 1;
1199
+ memset(doc->ctx, 0, sizeof(MMIOT) );
1200
+ doc->ctx->cb = &(doc->cb);
1201
+ doc->ctx->flags = flags & USER_FLAGS;
1202
+ CREATE(doc->ctx->in);
1203
+ doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
1204
+ CREATE(*doc->ctx->footnotes);
1205
+
1206
+ mkd_initialize();
1207
+
1208
+ doc->code = compile_document(T(doc->content), doc->ctx);
1209
+ qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
1210
+ sizeof T(*doc->ctx->footnotes)[0],
1211
+ (stfu)__mkd_footsort);
1212
+ memset(&doc->content, 0, sizeof doc->content);
1213
+ return 1;
1214
+ }
1215
+