rdiscountwl 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/markdown.c ADDED
@@ -0,0 +1,1361 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include "config.h"
8
+
9
+ #include <stdio.h>
10
+ #include <string.h>
11
+ #include <stdarg.h>
12
+ #include <stdlib.h>
13
+ #include <time.h>
14
+ #include <ctype.h>
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+ #include "tags.h"
20
+
21
+ typedef int (*stfu)(const void*,const void*);
22
+
23
+ typedef ANCHOR(Paragraph) ParagraphRoot;
24
+
25
+ static Paragraph *Pp(ParagraphRoot *, Line *, int);
26
+ static Paragraph *compile(Line *, int, MMIOT *);
27
+
28
+ /* case insensitive string sort for Footnote tags.
29
+ */
30
+ int
31
+ __mkd_footsort(Footnote *a, Footnote *b)
32
+ {
33
+ int i;
34
+ char ac, bc;
35
+
36
+ if ( S(a->tag) != S(b->tag) )
37
+ return S(a->tag) - S(b->tag);
38
+
39
+ for ( i=0; i < S(a->tag); i++) {
40
+ ac = tolower(T(a->tag)[i]);
41
+ bc = tolower(T(b->tag)[i]);
42
+
43
+ if ( isspace(ac) && isspace(bc) )
44
+ continue;
45
+ if ( ac != bc )
46
+ return ac - bc;
47
+ }
48
+ return 0;
49
+ }
50
+
51
+
52
+ /* find the first blank character after position <i>
53
+ */
54
+ static int
55
+ nextblank(Line *t, int i)
56
+ {
57
+ while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
58
+ ++i;
59
+ return i;
60
+ }
61
+
62
+
63
+ /* find the next nonblank character after position <i>
64
+ */
65
+ static int
66
+ nextnonblank(Line *t, int i)
67
+ {
68
+ while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
69
+ ++i;
70
+ return i;
71
+ }
72
+
73
+
74
+ /* find the first nonblank character on the Line.
75
+ */
76
+ int
77
+ mkd_firstnonblank(Line *p)
78
+ {
79
+ return nextnonblank(p,0);
80
+ }
81
+
82
+
83
+ static inline int
84
+ blankline(Line *p)
85
+ {
86
+ return ! (p && (S(p->text) > p->dle) );
87
+ }
88
+
89
+
90
+ static Line *
91
+ skipempty(Line *p)
92
+ {
93
+ while ( p && (p->dle == S(p->text)) )
94
+ p = p->next;
95
+ return p;
96
+ }
97
+
98
+
99
+ void
100
+ ___mkd_tidy(Cstring *t)
101
+ {
102
+ while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
103
+ --S(*t);
104
+ }
105
+
106
+
107
+ static struct kw comment = { "!--", 3, 0 };
108
+
109
+ static struct kw *
110
+ isopentag(Line *p)
111
+ {
112
+ int i=0, len;
113
+ char *line;
114
+
115
+ if ( !p ) return 0;
116
+
117
+ line = T(p->text);
118
+ len = S(p->text);
119
+
120
+ if ( len < 3 || line[0] != '<' )
121
+ return 0;
122
+
123
+ if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
124
+ /* comments need special case handling, because
125
+ * the !-- doesn't need to end in a whitespace
126
+ */
127
+ return &comment;
128
+
129
+ /* find how long the tag is so we can check to see if
130
+ * it's a block-level tag
131
+ */
132
+ for ( i=1; i < len && T(p->text)[i] != '>'
133
+ && T(p->text)[i] != '/'
134
+ && !isspace(T(p->text)[i]); ++i )
135
+ ;
136
+
137
+
138
+ return mkd_search_tags(T(p->text)+1, i-1);
139
+ }
140
+
141
+
142
+ typedef struct _flo {
143
+ Line *t;
144
+ int i;
145
+ } FLO;
146
+
147
+ #define floindex(x) (x.i)
148
+
149
+
150
+ static int
151
+ flogetc(FLO *f)
152
+ {
153
+ if ( f && f->t ) {
154
+ if ( f->i < S(f->t->text) )
155
+ return T(f->t->text)[f->i++];
156
+ f->t = f->t->next;
157
+ f->i = 0;
158
+ return flogetc(f);
159
+ }
160
+ return EOF;
161
+ }
162
+
163
+
164
+ static void
165
+ splitline(Line *t, int cutpoint)
166
+ {
167
+ if ( t && (cutpoint < S(t->text)) ) {
168
+ Line *tmp = calloc(1, sizeof *tmp);
169
+
170
+ tmp->next = t->next;
171
+ t->next = tmp;
172
+
173
+ tmp->dle = t->dle;
174
+ SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
175
+ S(t->text) = cutpoint;
176
+ }
177
+ }
178
+
179
+ #define UNCHECK(l) ((l)->flags &= ~CHECKED)
180
+
181
+ #define UNLESS_FENCED(t) if (fenced) { \
182
+ other = 1; l->count += (c == ' ' ? 0 : -1); \
183
+ } else { t; }
184
+
185
+ /*
186
+ * walk a line, seeing if it's any of half a dozen interesting regular
187
+ * types.
188
+ */
189
+ static void
190
+ checkline(Line *l, DWORD flags)
191
+ {
192
+ int eol, i;
193
+ int dashes = 0, spaces = 0,
194
+ equals = 0, underscores = 0,
195
+ stars = 0, tildes = 0, other = 0,
196
+ backticks = 0, fenced = 0;
197
+
198
+ l->flags |= CHECKED;
199
+ l->kind = chk_text;
200
+ l->count = 0;
201
+
202
+ if (l->dle >= 4) { l->kind=chk_code; return; }
203
+
204
+ for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
205
+ ;
206
+
207
+ for (i=l->dle; i<eol; i++) {
208
+ register int c = T(l->text)[i];
209
+ int is_fence_char = 0;
210
+
211
+ if ( c != ' ' ) l->count++;
212
+
213
+ switch (c) {
214
+ case '-': UNLESS_FENCED(dashes = 1); break;
215
+ case ' ': UNLESS_FENCED(spaces = 1); break;
216
+ case '=': equals = 1; break;
217
+ case '_': UNLESS_FENCED(underscores = 1); break;
218
+ case '*': stars = 1; break;
219
+ default:
220
+ if (flags & MKD_FENCEDCODE) {
221
+ switch (c) {
222
+ case '~': if (other) return; is_fence_char = 1; tildes = 1; break;
223
+ case '`': if (other) return; is_fence_char = 1; backticks = 1; break;
224
+ }
225
+ if (is_fence_char) {
226
+ fenced = 1;
227
+ break;
228
+ }
229
+ }
230
+ other = 1;
231
+ l->count--;
232
+ if (!fenced) return;
233
+ }
234
+ }
235
+
236
+ if ( dashes + equals + underscores + stars + tildes + backticks > 1 )
237
+ return;
238
+
239
+ if ( spaces ) {
240
+ if ( (underscores || stars || dashes) )
241
+ l->kind = chk_hr;
242
+ return;
243
+ }
244
+
245
+ if ( stars || underscores ) { l->kind = chk_hr; }
246
+ else if ( dashes ) { l->kind = chk_dash; }
247
+ else if ( equals ) { l->kind = chk_equal; }
248
+ else if ( tildes ) { l->kind = chk_tilde; }
249
+ else if ( backticks ) { l->kind = chk_backtick; }
250
+ }
251
+
252
+
253
+
254
+ /* markdown only does special handling of comments if the comment end
255
+ * is at the end of a line
256
+ */
257
+ static Line *
258
+ commentblock(Paragraph *p, int *unclosed)
259
+ {
260
+ Line *t, *ret;
261
+ char *end;
262
+
263
+ for ( t = p->text; t ; t = t->next) {
264
+ if ( end = strstr(T(t->text), "-->") ) {
265
+ if ( nextnonblank(t, 3 + (end - T(t->text))) < S(t->text) )
266
+ continue;
267
+ /*splitline(t, 3 + (end - T(t->text)) );*/
268
+ ret = t->next;
269
+ t->next = 0;
270
+ return ret;
271
+ }
272
+ }
273
+
274
+ *unclosed = 1;
275
+ return t;
276
+
277
+ }
278
+
279
+
280
+ static Line *
281
+ htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
282
+ {
283
+ Line *ret;
284
+ FLO f = { p->text, 0 };
285
+ int c;
286
+ int i, closing, depth=0;
287
+
288
+ *unclosed = 0;
289
+
290
+ if ( tag == &comment )
291
+ return commentblock(p, unclosed);
292
+
293
+ if ( tag->selfclose ) {
294
+ ret = f.t->next;
295
+ f.t->next = 0;
296
+ return ret;
297
+ }
298
+
299
+ while ( (c = flogetc(&f)) != EOF ) {
300
+ if ( c == '<' ) {
301
+ /* tag? */
302
+ c = flogetc(&f);
303
+ if ( c == '!' ) { /* comment? */
304
+ if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
305
+ /* yes */
306
+ while ( (c = flogetc(&f)) != EOF ) {
307
+ if ( c == '-' && flogetc(&f) == '-'
308
+ && flogetc(&f) == '>')
309
+ /* consumed whole comment */
310
+ break;
311
+ }
312
+ }
313
+ }
314
+ else {
315
+ if ( closing = (c == '/') ) c = flogetc(&f);
316
+
317
+ for ( i=0; i < tag->size; i++, c=flogetc(&f) ) {
318
+ if ( tag->id[i] != toupper(c) )
319
+ break;
320
+ }
321
+
322
+ if ( (i == tag->size) && !isalnum(c) ) {
323
+ depth = depth + (closing ? -1 : 1);
324
+ if ( depth == 0 ) {
325
+ while ( c != EOF && c != '>' ) {
326
+ /* consume trailing gunk in close tag */
327
+ c = flogetc(&f);
328
+ }
329
+ if ( c == EOF )
330
+ break;
331
+ if ( !f.t )
332
+ return 0;
333
+ splitline(f.t, floindex(f));
334
+ ret = f.t->next;
335
+ f.t->next = 0;
336
+ return ret;
337
+ }
338
+ }
339
+ }
340
+ }
341
+ }
342
+ *unclosed = 1;
343
+ return 0;
344
+ }
345
+
346
+
347
+ /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
348
+ */
349
+ static int
350
+ isfootnote(Line *t)
351
+ {
352
+ int i;
353
+
354
+ if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
355
+ return 0;
356
+
357
+ for ( ++i; i < S(t->text) ; ++i ) {
358
+ if ( T(t->text)[i] == '[' )
359
+ return 0;
360
+ else if ( T(t->text)[i] == ']' )
361
+ return ( T(t->text)[i+1] == ':' ) ;
362
+ }
363
+ return 0;
364
+ }
365
+
366
+
367
+ static inline int
368
+ isquote(Line *t)
369
+ {
370
+ return (t->dle < 4 && T(t->text)[t->dle] == '>');
371
+ }
372
+
373
+
374
+ static inline int
375
+ iscode(Line *t)
376
+ {
377
+ return (t->dle >= 4);
378
+ }
379
+
380
+
381
+ static inline int
382
+ ishr(Line *t, DWORD flags)
383
+ {
384
+ if ( ! (t->flags & CHECKED) )
385
+ checkline(t, flags);
386
+
387
+ if ( t->count > 2 )
388
+ return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
389
+ return 0;
390
+ }
391
+
392
+
393
+ static int
394
+ issetext(Line *t, int *htyp, DWORD flags)
395
+ {
396
+ Line *n;
397
+
398
+ /* check for setext-style HEADER
399
+ * ======
400
+ */
401
+
402
+ if ( (n = t->next) ) {
403
+ if ( !(n->flags & CHECKED) )
404
+ checkline(n, flags);
405
+
406
+ if ( n->kind == chk_dash || n->kind == chk_equal ) {
407
+ *htyp = SETEXT;
408
+ return 1;
409
+ }
410
+ }
411
+ return 0;
412
+ }
413
+
414
+
415
+ static int
416
+ ishdr(Line *t, int *htyp, DWORD flags)
417
+ {
418
+ /* ANY leading `#`'s make this into an ETX header
419
+ */
420
+ if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
421
+ *htyp = ETX;
422
+ return 1;
423
+ }
424
+
425
+ /* And if not, maybe it's a SETEXT header instead
426
+ */
427
+ return issetext(t, htyp, flags);
428
+ }
429
+
430
+
431
+ static inline int
432
+ end_of_block(Line *t, DWORD flags)
433
+ {
434
+ int dummy;
435
+
436
+ if ( !t )
437
+ return 0;
438
+
439
+ return ( (S(t->text) <= t->dle) || ishr(t, flags) || ishdr(t, &dummy, flags) );
440
+ }
441
+
442
+
443
+ static Line*
444
+ is_discount_dt(Line *t, int *clip, DWORD flags)
445
+ {
446
+ if ( !(flags & MKD_NODLDISCOUNT)
447
+ && t
448
+ && t->next
449
+ && (S(t->text) > 2)
450
+ && (t->dle == 0)
451
+ && (T(t->text)[0] == '=')
452
+ && (T(t->text)[S(t->text)-1] == '=') ) {
453
+ if ( t->next->dle >= 4 ) {
454
+ *clip = 4;
455
+ return t;
456
+ }
457
+ else
458
+ return is_discount_dt(t->next, clip, flags);
459
+ }
460
+ return 0;
461
+ }
462
+
463
+
464
+ static int
465
+ is_extra_dd(Line *t)
466
+ {
467
+ return (t->dle < 4) && (T(t->text)[t->dle] == ':')
468
+ && isspace(T(t->text)[t->dle+1]);
469
+ }
470
+
471
+
472
+ static Line*
473
+ is_extra_dt(Line *t, int *clip, DWORD flags)
474
+ {
475
+ if ( flags & MKD_DLEXTRA
476
+ && t
477
+ && t->next && S(t->text) && T(t->text)[0] != '='
478
+ && T(t->text)[S(t->text)-1] != '=') {
479
+ Line *x;
480
+
481
+ if ( iscode(t) || end_of_block(t, flags) )
482
+ return 0;
483
+
484
+ if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
485
+ *clip = x->dle+2;
486
+ return t;
487
+ }
488
+
489
+ if ( x=is_extra_dt(t->next, clip, flags) )
490
+ return x;
491
+ }
492
+ return 0;
493
+ }
494
+
495
+
496
+ static Line*
497
+ isdefinition(Line *t, int *clip, int *kind, DWORD flags)
498
+ {
499
+ Line *ret;
500
+
501
+ *kind = 1;
502
+ if ( ret = is_discount_dt(t,clip,flags) )
503
+ return ret;
504
+
505
+ *kind=2;
506
+ return is_extra_dt(t,clip,flags);
507
+ }
508
+
509
+
510
+ static int
511
+ islist(Line *t, int *clip, DWORD flags, int *list_type)
512
+ {
513
+ int i, j;
514
+ char *q;
515
+
516
+ if ( end_of_block(t, flags) )
517
+ return 0;
518
+
519
+ if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type,flags) )
520
+ return DL;
521
+
522
+ if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
523
+ i = nextnonblank(t, t->dle+1);
524
+ *clip = (i > 4) ? 4 : i;
525
+ *list_type = UL;
526
+ return AL;
527
+ }
528
+
529
+ if ( (j = nextblank(t,t->dle)) > t->dle ) {
530
+ if ( T(t->text)[j-1] == '.' ) {
531
+
532
+ if ( !(flags & (MKD_NOALPHALIST|MKD_STRICT))
533
+ && (j == t->dle + 2)
534
+ && isalpha(T(t->text)[t->dle]) ) {
535
+ j = nextnonblank(t,j);
536
+ *clip = (j > 4) ? 4 : j;
537
+ *list_type = AL;
538
+ return AL;
539
+ }
540
+
541
+ strtoul(T(t->text)+t->dle, &q, 10);
542
+ if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
543
+ j = nextnonblank(t,j);
544
+ *clip = j;
545
+ *list_type = OL;
546
+ return AL;
547
+ }
548
+ }
549
+ }
550
+ return 0;
551
+ }
552
+
553
+
554
+ static Line *
555
+ headerblock(Paragraph *pp, int htyp)
556
+ {
557
+ Line *ret = 0;
558
+ Line *p = pp->text;
559
+ int i, j;
560
+
561
+ switch (htyp) {
562
+ case SETEXT:
563
+ /* p->text is header, p->next->text is -'s or ='s
564
+ */
565
+ pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
566
+
567
+ ret = p->next->next;
568
+ ___mkd_freeLine(p->next);
569
+ p->next = 0;
570
+ break;
571
+
572
+ case ETX:
573
+ /* p->text is ###header###, so we need to trim off
574
+ * the leading and trailing `#`'s
575
+ */
576
+
577
+ for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
578
+ && (i < 6); i++)
579
+ ;
580
+
581
+ pp->hnumber = i;
582
+
583
+ while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
584
+ ++i;
585
+
586
+ CLIP(p->text, 0, i);
587
+ UNCHECK(p);
588
+
589
+ for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
590
+ ;
591
+
592
+ while ( j && isspace(T(p->text)[j-1]) )
593
+ --j;
594
+
595
+ S(p->text) = j;
596
+
597
+ ret = p->next;
598
+ p->next = 0;
599
+ break;
600
+ }
601
+ return ret;
602
+ }
603
+
604
+
605
+ static Line *
606
+ codeblock(Paragraph *p)
607
+ {
608
+ Line *t = p->text, *r;
609
+
610
+ for ( ; t; t = r ) {
611
+ CLIP(t->text,0,4);
612
+ t->dle = mkd_firstnonblank(t);
613
+
614
+ if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
615
+ ___mkd_freeLineRange(t,r);
616
+ t->next = 0;
617
+ return r;
618
+ }
619
+ }
620
+ return t;
621
+ }
622
+
623
+
624
+ static int
625
+ iscodefence(Line *r, int size, line_type kind, DWORD flags)
626
+ {
627
+ if ( !(flags & MKD_FENCEDCODE) )
628
+ return 0;
629
+
630
+ if ( !(r->flags & CHECKED) )
631
+ checkline(r, flags);
632
+
633
+ if ( kind )
634
+ return (r->kind == kind) && (r->count >= size);
635
+ else
636
+ return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size);
637
+ }
638
+
639
+ static Paragraph *
640
+ fencedcodeblock(ParagraphRoot *d, Line **ptr, DWORD flags)
641
+ {
642
+ Line *first, *r;
643
+ Paragraph *ret;
644
+
645
+ first = (*ptr);
646
+
647
+ /* don't allow zero-length code fences
648
+ */
649
+ if ( (first->next == 0) || iscodefence(first->next, first->count, 0, flags) )
650
+ return 0;
651
+
652
+ /* find the closing fence, discard the fences,
653
+ * return a Paragraph with the contents
654
+ */
655
+ for ( r = first; r && r->next; r = r->next )
656
+ if ( iscodefence(r->next, first->count, first->kind, flags) ) {
657
+ (*ptr) = r->next->next;
658
+ ret = Pp(d, first->next, CODE);
659
+ if (S(first->text) - first->count > 0) {
660
+ char *lang_attr = T(first->text) + first->count;
661
+ while ( *lang_attr != 0 && *lang_attr == ' ' ) lang_attr++;
662
+ ret->lang = strdup(lang_attr);
663
+ }
664
+ else {
665
+ ret->lang = 0;
666
+ }
667
+ ___mkd_freeLine(first);
668
+ ___mkd_freeLine(r->next);
669
+ r->next = 0;
670
+ return ret;
671
+ }
672
+ return 0;
673
+ }
674
+
675
+
676
+ static int
677
+ centered(Line *first, Line *last)
678
+ {
679
+
680
+ if ( first&&last ) {
681
+ int len = S(last->text);
682
+
683
+ if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
684
+ && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
685
+ CLIP(first->text, 0, 2);
686
+ S(last->text) -= 2;
687
+ return CENTER;
688
+ }
689
+ }
690
+ return 0;
691
+ }
692
+
693
+
694
+ static int
695
+ endoftextblock(Line *t, int toplevelblock, DWORD flags)
696
+ {
697
+ int z;
698
+
699
+ if ( end_of_block(t, flags) || isquote(t) )
700
+ return 1;
701
+
702
+ /* HORRIBLE STANDARDS KLUDGES:
703
+ * 1. non-toplevel paragraphs absorb adjacent code blocks
704
+ * 2. Toplevel paragraphs eat absorb adjacent list items,
705
+ * but sublevel blocks behave properly.
706
+ * (What this means is that we only need to check for code
707
+ * blocks at toplevel, and only check for list items at
708
+ * nested levels.)
709
+ */
710
+ return toplevelblock ? 0 : islist(t,&z,flags,&z);
711
+ }
712
+
713
+
714
+ static Line *
715
+ textblock(Paragraph *p, int toplevel, DWORD flags)
716
+ {
717
+ Line *t, *next;
718
+
719
+ for ( t = p->text; t ; t = next ) {
720
+ if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
721
+ p->align = centered(p->text, t);
722
+ t->next = 0;
723
+ return next;
724
+ }
725
+ }
726
+ return t;
727
+ }
728
+
729
+
730
+ /* length of the id: or class: kind in a special div-not-quote block
731
+ */
732
+ static int
733
+ szmarkerclass(char *p)
734
+ {
735
+ if ( strncasecmp(p, "id:", 3) == 0 )
736
+ return 3;
737
+ if ( strncasecmp(p, "class:", 6) == 0 )
738
+ return 6;
739
+ return 0;
740
+ }
741
+
742
+
743
+ /*
744
+ * check if the first line of a quoted block is the special div-not-quote
745
+ * marker %[kind:]name%
746
+ */
747
+ #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') )
748
+
749
+ static int
750
+ isdivmarker(Line *p, int start, DWORD flags)
751
+ {
752
+ char *s;
753
+ int last, i;
754
+
755
+ if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) )
756
+ return 0;
757
+
758
+ start = nextnonblank(p, start);
759
+ last= S(p->text) - (1 + start);
760
+ s = T(p->text) + start;
761
+
762
+ if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
763
+ return 0;
764
+
765
+ i = szmarkerclass(s+1);
766
+
767
+ if ( !iscsschar(s[i+1]) )
768
+ return 0;
769
+ while ( ++i < last )
770
+ if ( !(isdigit(s[i]) || iscsschar(s[i])) )
771
+ return 0;
772
+
773
+ return 1;
774
+ }
775
+
776
+
777
+ /*
778
+ * accumulate a blockquote.
779
+ *
780
+ * one sick horrible thing about blockquotes is that even though
781
+ * it just takes ^> to start a quote, following lines, if quoted,
782
+ * assume that the prefix is ``> ''. This means that code needs
783
+ * to be indented *5* spaces from the leading '>', but *4* spaces
784
+ * from the start of the line. This does not appear to be
785
+ * documented in the reference implementation, but it's the
786
+ * way the markdown sample web form at Daring Fireball works.
787
+ */
788
+ static Line *
789
+ quoteblock(Paragraph *p, DWORD flags)
790
+ {
791
+ Line *t, *q;
792
+ int qp;
793
+
794
+ for ( t = p->text; t ; t = q ) {
795
+ if ( isquote(t) ) {
796
+ /* clip leading spaces */
797
+ for (qp = 0; T(t->text)[qp] != '>'; qp ++)
798
+ /* assert: the first nonblank character on this line
799
+ * will be a >
800
+ */;
801
+ /* clip '>' */
802
+ qp++;
803
+ /* clip next space, if any */
804
+ if ( T(t->text)[qp] == ' ' )
805
+ qp++;
806
+ CLIP(t->text, 0, qp);
807
+ UNCHECK(t);
808
+ t->dle = mkd_firstnonblank(t);
809
+ }
810
+
811
+ q = skipempty(t->next);
812
+
813
+ if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
814
+ ___mkd_freeLineRange(t, q);
815
+ t = q;
816
+ break;
817
+ }
818
+ }
819
+ if ( isdivmarker(p->text,0,flags) ) {
820
+ char *prefix = "class";
821
+ int i;
822
+
823
+ q = p->text;
824
+ p->text = p->text->next;
825
+
826
+ if ( (i = szmarkerclass(1+T(q->text))) == 3 )
827
+ /* and this would be an "%id:" prefix */
828
+ prefix="id";
829
+
830
+ if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
831
+ sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
832
+ T(q->text)+(i+1) );
833
+
834
+ ___mkd_freeLine(q);
835
+ }
836
+ return t;
837
+ }
838
+
839
+
840
+ typedef int (*linefn)(Line *);
841
+
842
+
843
+ /*
844
+ * pull in a list block. A list block starts with a list marker and
845
+ * runs until the next list marker, the next non-indented paragraph,
846
+ * or EOF. You do not have to indent nonblank lines after the list
847
+ * marker, but multiple paragraphs need to start with a 4-space indent.
848
+ */
849
+ static Line *
850
+ listitem(Paragraph *p, int indent, DWORD flags, linefn check)
851
+ {
852
+ Line *t, *q;
853
+ int clip = indent;
854
+ int z;
855
+
856
+ for ( t = p->text; t ; t = q) {
857
+ CLIP(t->text, 0, clip);
858
+ UNCHECK(t);
859
+ t->dle = mkd_firstnonblank(t);
860
+
861
+ /* even though we had to trim a long leader off this item,
862
+ * the indent for trailing paragraphs is still 4...
863
+ */
864
+ if (indent > 4) {
865
+ indent = 4;
866
+ }
867
+ if ( (q = skipempty(t->next)) == 0 ) {
868
+ ___mkd_freeLineRange(t,q);
869
+ return 0;
870
+ }
871
+
872
+ /* after a blank line, the next block needs to start with a line
873
+ * that's indented 4(? -- reference implementation allows a 1
874
+ * character indent, but that has unfortunate side effects here)
875
+ * spaces, but after that the line doesn't need any indentation
876
+ */
877
+ if ( q != t->next ) {
878
+ if (q->dle < indent) {
879
+ q = t->next;
880
+ t->next = 0;
881
+ return q;
882
+ }
883
+ /* indent at least 2, and at most as
884
+ * as far as the initial line was indented. */
885
+ indent = clip ? clip : 2;
886
+ }
887
+
888
+ if ( (q->dle < indent) && (ishr(q,flags) || islist(q,&z,flags,&z)
889
+ || (check && (*check)(q)))
890
+ && !issetext(q,&z,flags) ) {
891
+ q = t->next;
892
+ t->next = 0;
893
+ return q;
894
+ }
895
+
896
+ clip = (q->dle > indent) ? indent : q->dle;
897
+ }
898
+ return t;
899
+ }
900
+
901
+
902
+ static Line *
903
+ definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
904
+ {
905
+ ParagraphRoot d = { 0, 0 };
906
+ Paragraph *p;
907
+ Line *q = top->text, *text = 0, *labels;
908
+ int z, para;
909
+
910
+ while (( labels = q )) {
911
+
912
+ if ( (q = isdefinition(labels, &z, &kind, f->flags)) == 0 )
913
+ break;
914
+
915
+ if ( (text = skipempty(q->next)) == 0 )
916
+ break;
917
+
918
+ if ( para = (text != q->next) )
919
+ ___mkd_freeLineRange(q, text);
920
+
921
+ q->next = 0;
922
+ if ( kind == 1 /* discount dl */ )
923
+ for ( q = labels; q; q = q->next ) {
924
+ CLIP(q->text, 0, 1);
925
+ UNCHECK(q);
926
+ S(q->text)--;
927
+ }
928
+
929
+ dd_block:
930
+ p = Pp(&d, text, LISTITEM);
931
+
932
+ text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0);
933
+ p->down = compile(p->text, 0, f);
934
+ p->text = labels; labels = 0;
935
+
936
+ if ( para && p->down ) p->down->align = PARA;
937
+
938
+ if ( (q = skipempty(text)) == 0 )
939
+ break;
940
+
941
+ if ( para = (q != text) ) {
942
+ Line anchor;
943
+
944
+ anchor.next = text;
945
+ ___mkd_freeLineRange(&anchor,q);
946
+ text = q;
947
+
948
+ }
949
+
950
+ if ( kind == 2 && is_extra_dd(q) )
951
+ goto dd_block;
952
+ }
953
+ top->text = 0;
954
+ top->down = T(d);
955
+ return text;
956
+ }
957
+
958
+
959
+ static Line *
960
+ enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
961
+ {
962
+ ParagraphRoot d = { 0, 0 };
963
+ Paragraph *p;
964
+ Line *q = top->text, *text;
965
+ int para = 0, z;
966
+
967
+ while (( text = q )) {
968
+
969
+ p = Pp(&d, text, LISTITEM);
970
+ text = listitem(p, clip, f->flags, 0);
971
+
972
+ p->down = compile(p->text, 0, f);
973
+ p->text = 0;
974
+
975
+ if ( para && p->down ) p->down->align = PARA;
976
+
977
+ if ( (q = skipempty(text)) == 0
978
+ || islist(q, &clip, f->flags, &z) != list_class )
979
+ break;
980
+
981
+ if ( para = (q != text) ) {
982
+ Line anchor;
983
+
984
+ anchor.next = text;
985
+ ___mkd_freeLineRange(&anchor, q);
986
+
987
+ if ( p->down ) p->down->align = PARA;
988
+ }
989
+ }
990
+ top->text = 0;
991
+ top->down = T(d);
992
+ return text;
993
+ }
994
+
995
+
996
+ static int
997
+ tgood(char c)
998
+ {
999
+ switch (c) {
1000
+ case '\'':
1001
+ case '"': return c;
1002
+ case '(': return ')';
1003
+ }
1004
+ return 0;
1005
+ }
1006
+
1007
+
1008
+ /*
1009
+ * add a new (image or link) footnote to the footnote table
1010
+ */
1011
+ static Line*
1012
+ addfootnote(Line *p, MMIOT* f)
1013
+ {
1014
+ int j, i;
1015
+ int c;
1016
+ Line *np = p->next;
1017
+
1018
+ Footnote *foot = &EXPAND(f->footnotes->note);
1019
+
1020
+ CREATE(foot->tag);
1021
+ CREATE(foot->link);
1022
+ CREATE(foot->title);
1023
+ foot->flags = foot->height = foot->width = 0;
1024
+
1025
+ for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
1026
+ EXPAND(foot->tag) = T(p->text)[j];
1027
+
1028
+ EXPAND(foot->tag) = 0;
1029
+ S(foot->tag)--;
1030
+ j = nextnonblank(p, j+2);
1031
+
1032
+ if ( (f->flags & MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) {
1033
+ /* need to consume all lines until non-indented block? */
1034
+ while ( j < S(p->text) )
1035
+ EXPAND(foot->title) = T(p->text)[j++];
1036
+ goto skip_to_end;
1037
+ }
1038
+
1039
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
1040
+ EXPAND(foot->link) = T(p->text)[j++];
1041
+ EXPAND(foot->link) = 0;
1042
+ S(foot->link)--;
1043
+ j = nextnonblank(p,j);
1044
+
1045
+ if ( T(p->text)[j] == '=' ) {
1046
+ sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
1047
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
1048
+ ++j;
1049
+ j = nextnonblank(p,j);
1050
+ }
1051
+
1052
+
1053
+ if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
1054
+ ___mkd_freeLine(p);
1055
+ p = np;
1056
+ np = p->next;
1057
+ j = p->dle;
1058
+ }
1059
+
1060
+ if ( (c = tgood(T(p->text)[j])) ) {
1061
+ /* Try to take the rest of the line as a comment; read to
1062
+ * EOL, then shrink the string back to before the final
1063
+ * quote.
1064
+ */
1065
+ ++j; /* skip leading quote */
1066
+
1067
+ while ( j < S(p->text) )
1068
+ EXPAND(foot->title) = T(p->text)[j++];
1069
+
1070
+ while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
1071
+ --S(foot->title);
1072
+ if ( S(foot->title) ) /* skip trailing quote */
1073
+ --S(foot->title);
1074
+ EXPAND(foot->title) = 0;
1075
+ --S(foot->title);
1076
+ }
1077
+
1078
+ skip_to_end:
1079
+ ___mkd_freeLine(p);
1080
+ return np;
1081
+ }
1082
+
1083
+
1084
+ /*
1085
+ * allocate a paragraph header, link it to the
1086
+ * tail of the current document
1087
+ */
1088
+ static Paragraph *
1089
+ Pp(ParagraphRoot *d, Line *ptr, int typ)
1090
+ {
1091
+ Paragraph *ret = calloc(sizeof *ret, 1);
1092
+
1093
+ ret->text = ptr;
1094
+ ret->typ = typ;
1095
+
1096
+ return ATTACH(*d, ret);
1097
+ }
1098
+
1099
+
1100
+
1101
+ static Line*
1102
+ consume(Line *ptr, int *eaten)
1103
+ {
1104
+ Line *next;
1105
+ int blanks=0;
1106
+
1107
+ for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
1108
+ next = ptr->next;
1109
+ ___mkd_freeLine(ptr);
1110
+ }
1111
+ if ( ptr ) *eaten = blanks;
1112
+ return ptr;
1113
+ }
1114
+
1115
+
1116
+ /*
1117
+ * top-level compilation; break the document into
1118
+ * style, html, and source blocks with footnote links
1119
+ * weeded out.
1120
+ */
1121
+ static Paragraph *
1122
+ compile_document(Line *ptr, MMIOT *f)
1123
+ {
1124
+ ParagraphRoot d = { 0, 0 };
1125
+ ANCHOR(Line) source = { 0, 0 };
1126
+ Paragraph *p = 0;
1127
+ struct kw *tag;
1128
+ int eaten, unclosed;
1129
+
1130
+ while ( ptr ) {
1131
+ if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) {
1132
+ int blocktype;
1133
+ /* If we encounter a html/style block, compile and save all
1134
+ * of the cached source BEFORE processing the html/style.
1135
+ */
1136
+ if ( T(source) ) {
1137
+ E(source)->next = 0;
1138
+ p = Pp(&d, 0, SOURCE);
1139
+ p->down = compile(T(source), 1, f);
1140
+ T(source) = E(source) = 0;
1141
+ }
1142
+
1143
+ if ( f->flags & MKD_NOSTYLE )
1144
+ blocktype = HTML;
1145
+ else
1146
+ blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML;
1147
+ p = Pp(&d, ptr, blocktype);
1148
+ ptr = htmlblock(p, tag, &unclosed);
1149
+ if ( unclosed ) {
1150
+ p->typ = SOURCE;
1151
+ p->down = compile(p->text, 1, f);
1152
+ p->text = 0;
1153
+ }
1154
+ }
1155
+ else if ( isfootnote(ptr) ) {
1156
+ /* footnotes, like cats, sleep anywhere; pull them
1157
+ * out of the input stream and file them away for
1158
+ * later processing
1159
+ */
1160
+ ptr = consume(addfootnote(ptr, f), &eaten);
1161
+ }
1162
+ else {
1163
+ /* source; cache it up to wait for eof or the
1164
+ * next html/style block
1165
+ */
1166
+ ATTACH(source,ptr);
1167
+ ptr = ptr->next;
1168
+ }
1169
+ }
1170
+ if ( T(source) ) {
1171
+ /* if there's any cached source at EOF, compile
1172
+ * it now.
1173
+ */
1174
+ E(source)->next = 0;
1175
+ p = Pp(&d, 0, SOURCE);
1176
+ p->down = compile(T(source), 1, f);
1177
+ }
1178
+ return T(d);
1179
+ }
1180
+
1181
+
1182
+ static int
1183
+ first_nonblank_before(Line *j, int dle)
1184
+ {
1185
+ return (j->dle < dle) ? j->dle : dle;
1186
+ }
1187
+
1188
+
1189
+ static int
1190
+ actually_a_table(MMIOT *f, Line *pp)
1191
+ {
1192
+ Line *r;
1193
+ int j;
1194
+ int c;
1195
+
1196
+ /* tables need to be turned on */
1197
+ if ( f->flags & (MKD_STRICT|MKD_NOTABLES) )
1198
+ return 0;
1199
+
1200
+ /* tables need three lines */
1201
+ if ( !(pp && pp->next && pp->next->next) ) {
1202
+ return 0;
1203
+ }
1204
+
1205
+ /* all lines must contain |'s */
1206
+ for (r = pp; r; r = r->next )
1207
+ if ( !(r->flags & PIPECHAR) ) {
1208
+ return 0;
1209
+ }
1210
+
1211
+ /* if the header has a leading |, all lines must have leading |'s */
1212
+ if ( T(pp->text)[pp->dle] == '|' ) {
1213
+ for ( r = pp; r; r = r->next )
1214
+ if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
1215
+ return 0;
1216
+ }
1217
+ }
1218
+
1219
+ /* second line must be only whitespace, -, |, or : */
1220
+ r = pp->next;
1221
+
1222
+ for ( j=r->dle; j < S(r->text); ++j ) {
1223
+ c = T(r->text)[j];
1224
+
1225
+ if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
1226
+ return 0;
1227
+ }
1228
+ }
1229
+
1230
+ return 1;
1231
+ }
1232
+
1233
+
1234
+ /*
1235
+ * break a collection of markdown input into
1236
+ * blocks of lists, code, html, and text to
1237
+ * be marked up.
1238
+ */
1239
+ static Paragraph *
1240
+ compile(Line *ptr, int toplevel, MMIOT *f)
1241
+ {
1242
+ ParagraphRoot d = { 0, 0 };
1243
+ Paragraph *p = 0;
1244
+ Line *r;
1245
+ int para = toplevel;
1246
+ int blocks = 0;
1247
+ int hdr_type, list_type, list_class, indent;
1248
+
1249
+ ptr = consume(ptr, &para);
1250
+
1251
+ while ( ptr ) {
1252
+ if ( iscode(ptr) ) {
1253
+ p = Pp(&d, ptr, CODE);
1254
+
1255
+ if ( f->flags & MKD_1_COMPAT) {
1256
+ /* HORRIBLE STANDARDS KLUDGE: the first line of every block
1257
+ * has trailing whitespace trimmed off.
1258
+ */
1259
+ ___mkd_tidy(&p->text->text);
1260
+ }
1261
+
1262
+ ptr = codeblock(p);
1263
+ }
1264
+ else if ( iscodefence(ptr,3,0,f->flags) && (p=fencedcodeblock(&d, &ptr, f->flags)) )
1265
+ /* yay, it's already done */ ;
1266
+ else if ( ishr(ptr, f->flags) ) {
1267
+ p = Pp(&d, 0, HR);
1268
+ r = ptr;
1269
+ ptr = ptr->next;
1270
+ ___mkd_freeLine(r);
1271
+ }
1272
+ else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) {
1273
+ if ( list_class == DL ) {
1274
+ p = Pp(&d, ptr, DL);
1275
+ ptr = definition_block(p, indent, f, list_type);
1276
+ }
1277
+ else {
1278
+ p = Pp(&d, ptr, list_type);
1279
+ ptr = enumerated_block(p, indent, f, list_class);
1280
+ }
1281
+ }
1282
+ else if ( isquote(ptr) ) {
1283
+ p = Pp(&d, ptr, QUOTE);
1284
+ ptr = quoteblock(p, f->flags);
1285
+ p->down = compile(p->text, 1, f);
1286
+ p->text = 0;
1287
+ }
1288
+ else if ( ishdr(ptr, &hdr_type, f->flags) ) {
1289
+ p = Pp(&d, ptr, HDR);
1290
+ ptr = headerblock(p, hdr_type);
1291
+ }
1292
+ else {
1293
+ p = Pp(&d, ptr, MARKUP);
1294
+ ptr = textblock(p, toplevel, f->flags);
1295
+ /* tables are a special kind of paragraph */
1296
+ if ( actually_a_table(f, p->text) )
1297
+ p->typ = TABLE;
1298
+ }
1299
+
1300
+ if ( (para||toplevel) && !p->align )
1301
+ p->align = PARA;
1302
+
1303
+ blocks++;
1304
+ para = toplevel || (blocks > 1);
1305
+ ptr = consume(ptr, &para);
1306
+
1307
+ if ( para && !p->align )
1308
+ p->align = PARA;
1309
+
1310
+ }
1311
+ return T(d);
1312
+ }
1313
+
1314
+
1315
+ /*
1316
+ * the guts of the markdown() function, ripped out so I can do
1317
+ * debugging.
1318
+ */
1319
+
1320
+ /*
1321
+ * prepare and compile `text`, returning a Paragraph tree.
1322
+ */
1323
+ int
1324
+ mkd_compile(Document *doc, DWORD flags)
1325
+ {
1326
+ if ( !doc )
1327
+ return 0;
1328
+
1329
+ flags &= USER_FLAGS;
1330
+
1331
+ if ( doc->compiled ) {
1332
+ if ( doc->ctx->flags == flags )
1333
+ return 1;
1334
+ else {
1335
+ if ( doc->code)
1336
+ ___mkd_freeParagraph(doc->code);
1337
+ if ( doc->ctx->footnotes )
1338
+ ___mkd_freefootnotes(doc->ctx);
1339
+ }
1340
+ }
1341
+
1342
+ doc->compiled = 1;
1343
+ memset(doc->ctx, 0, sizeof(MMIOT) );
1344
+ doc->ctx->ref_prefix= doc->ref_prefix;
1345
+ doc->ctx->cb = &(doc->cb);
1346
+ doc->ctx->flags = flags;
1347
+ CREATE(doc->ctx->in);
1348
+ doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
1349
+ doc->ctx->footnotes->reference = 0;
1350
+ CREATE(doc->ctx->footnotes->note);
1351
+
1352
+ mkd_initialize();
1353
+
1354
+ doc->code = compile_document(T(doc->content), doc->ctx);
1355
+ qsort(T(doc->ctx->footnotes->note), S(doc->ctx->footnotes->note),
1356
+ sizeof T(doc->ctx->footnotes->note)[0],
1357
+ (stfu)__mkd_footsort);
1358
+ memset(&doc->content, 0, sizeof doc->content);
1359
+ return 1;
1360
+ }
1361
+