rtomayko-rdiscount 1.3.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/generate.c ADDED
@@ -0,0 +1,1379 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include <stdio.h>
8
+ #include <string.h>
9
+ #include <stdarg.h>
10
+ #include <stdlib.h>
11
+ #include <time.h>
12
+ #include <ctype.h>
13
+
14
+ #include "config.h"
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+
20
+ /* prefixes for <automatic links>
21
+ */
22
+ static char *autoprefix[] = { "http://", "https://", "ftp://", "news://" };
23
+ #define SZAUTOPREFIX (sizeof autoprefix / sizeof autoprefix[0])
24
+
25
+ typedef int (*stfu)(const void*,const void*);
26
+
27
+
28
+ /* forward declarations */
29
+ static int iscodeblock(MMIOT*);
30
+ static void code(int, MMIOT*);
31
+ static void text(MMIOT *f);
32
+ static Paragraph *display(Paragraph*, MMIOT*);
33
+
34
+ /* externals from markdown.c */
35
+ int __mkd_footsort(Footnote *, Footnote *);
36
+
37
+ /*
38
+ * push text into the generator input buffer
39
+ */
40
+ static void
41
+ push(char *bfr, int size, MMIOT *f)
42
+ {
43
+ while ( size-- > 0 )
44
+ EXPAND(f->in) = *bfr++;
45
+ }
46
+
47
+
48
+ /* look <i> characters ahead of the cursor.
49
+ */
50
+ static int
51
+ peek(MMIOT *f, int i)
52
+ {
53
+
54
+ i += (f->isp-1);
55
+
56
+ return (i >= 0) && (i < S(f->in)) ? T(f->in)[i] : EOF;
57
+ }
58
+
59
+
60
+ /* pull a byte from the input buffer
61
+ */
62
+ static int
63
+ pull(MMIOT *f)
64
+ {
65
+ return ( f->isp < S(f->in) ) ? T(f->in)[f->isp++] : EOF;
66
+ }
67
+
68
+
69
+ /* return a pointer to the current position in the input buffer.
70
+ */
71
+ static char*
72
+ cursor(MMIOT *f)
73
+ {
74
+ return T(f->in) + f->isp;
75
+ }
76
+
77
+
78
+ static int
79
+ isthisspace(MMIOT *f, int i)
80
+ {
81
+ int c = peek(f, i);
82
+
83
+ return isspace(c) || (c == EOF);
84
+ }
85
+
86
+
87
+ static int
88
+ isthisalnum(MMIOT *f, int i)
89
+ {
90
+ int c = peek(f, i);
91
+
92
+ return (c != EOF) && isalnum(c);
93
+ }
94
+
95
+
96
+ static int
97
+ isthisnonword(MMIOT *f, int i)
98
+ {
99
+ return isthisspace(f, i) || ispunct(peek(f,i));
100
+ }
101
+
102
+
103
+ /* return/set the current cursor position
104
+ */
105
+ #define mmiotseek(f,x) (f->isp = x)
106
+ #define mmiottell(f) (f->isp)
107
+
108
+
109
+ /* move n characters forward ( or -n characters backward) in the input buffer.
110
+ */
111
+ static void
112
+ shift(MMIOT *f, int i)
113
+ {
114
+ if (f->isp + i >= 0 )
115
+ f->isp += i;
116
+ }
117
+
118
+
119
+ /* Qchar()
120
+ */
121
+ static void
122
+ Qchar(char c, MMIOT *f)
123
+ {
124
+ block *cur;
125
+
126
+ if ( S(f->Q) == 0 ) {
127
+ cur = &EXPAND(f->Q);
128
+ memset(cur, 0, sizeof *cur);
129
+ cur->b_type = bTEXT;
130
+ }
131
+ else
132
+ cur = &T(f->Q)[S(f->Q)-1];
133
+
134
+ EXPAND(cur->b_text) = c;
135
+
136
+ }
137
+
138
+
139
+ /* Qstring()
140
+ */
141
+ static void
142
+ Qstring(char *s, MMIOT *f)
143
+ {
144
+ while (*s)
145
+ Qchar(*s++, f);
146
+ }
147
+
148
+
149
+ /* Qwrite()
150
+ */
151
+ static void
152
+ Qwrite(char *s, int size, MMIOT *f)
153
+ {
154
+ while (size-- > 0)
155
+ Qchar(*s++, f);
156
+ }
157
+
158
+
159
+ /* Qprintf()
160
+ */
161
+ static void
162
+ Qprintf(MMIOT *f, char *fmt, ...)
163
+ {
164
+ char bfr[80];
165
+ va_list ptr;
166
+
167
+ va_start(ptr,fmt);
168
+ vsnprintf(bfr, sizeof bfr, fmt, ptr);
169
+ va_end(ptr);
170
+ Qstring(bfr, f);
171
+ }
172
+
173
+
174
+ /* Qem()
175
+ */
176
+ static void
177
+ Qem(MMIOT *f, char c, int count)
178
+ {
179
+ block *p = &EXPAND(f->Q);
180
+
181
+ memset(p, 0, sizeof *p);
182
+ p->b_type = (c == '*') ? bSTAR : bUNDER;
183
+ p->b_char = c;
184
+ p->b_count = count;
185
+
186
+ memset(&EXPAND(f->Q), 0, sizeof(block));
187
+ }
188
+
189
+
190
+ /* empair()
191
+ */
192
+ static int
193
+ empair(MMIOT *f, int go, int level)
194
+ {
195
+
196
+ int i;
197
+ block *begin, *p;
198
+
199
+ begin = &T(f->Q)[go];
200
+ for (i=go+1; i < S(f->Q); i++) {
201
+ p = &T(f->Q)[i];
202
+
203
+ if ( (p->b_type != bTEXT) && (p->b_count <= 0) )
204
+ break;
205
+
206
+ if ( p->b_type == begin->b_type ) {
207
+ if ( p->b_count == level ) /* exact match */
208
+ return i-go;
209
+
210
+ if ( p->b_count > 2 ) /* fuzzy match */
211
+ return i-go;
212
+ }
213
+ }
214
+ return EOF;
215
+ }
216
+
217
+
218
+
219
+ static struct emtags {
220
+ char open[10];
221
+ char close[10];
222
+ int size;
223
+ } emtags[] = { { "<em>" , "</em>", 5 }, { "<strong>", "</strong>", 9 } };
224
+
225
+
226
+ static void
227
+ emclose(Cstring *s, int level)
228
+ {
229
+ PREFIX(*s, emtags[level-1].close, emtags[level-1].size);
230
+ }
231
+
232
+
233
+ static void
234
+ emopen(Cstring *s, int level)
235
+ {
236
+ SUFFIX(*s, emtags[level-1].open, emtags[level-1].size-1);
237
+ }
238
+
239
+
240
+ /* emmatch()
241
+ */
242
+ static void
243
+ emmatch(MMIOT *f, int go)
244
+ {
245
+ block *start = &T(f->Q)[go], *end;
246
+ int e, e2, i, match;
247
+
248
+ while ( start->b_count ) {
249
+ switch (start->b_count) {
250
+ case 2: e = empair(f,go,match=2);
251
+ if ( e != EOF ) break;
252
+ case 1: e = empair(f,go,match=1); break;
253
+ default:
254
+ e = empair(f,go,1);
255
+ e2= empair(f,go,2);
256
+
257
+ if ( e == EOF || ((e2 != EOF) && (e2 >= e)) ) {
258
+ e = e2;
259
+ match = 2;
260
+ }
261
+ else
262
+ match = 1;
263
+ }
264
+ if ( e != EOF ) {
265
+ end = &T(f->Q)[go+e];
266
+ emclose(&end->b_post, match);
267
+ emopen(&start->b_text, match);
268
+ end->b_count -= match;
269
+ }
270
+ else {
271
+ for (i=0; i < match; i++)
272
+ EXPAND(start->b_text) = start->b_char;
273
+ }
274
+
275
+ start->b_count -= match;
276
+ }
277
+ }
278
+
279
+
280
+ /* emblock()
281
+ */
282
+ static void
283
+ emblock(MMIOT *f)
284
+ {
285
+ int i;
286
+ block *p;
287
+
288
+ for (i=0; i < S(f->Q); i++) {
289
+ p = &T(f->Q)[i];
290
+
291
+ if ( p->b_type != bTEXT ) emmatch(f, i);
292
+
293
+ if ( S(p->b_post) ) { SUFFIX(f->out, T(p->b_post), S(p->b_post));
294
+ DELETE(p->b_post); }
295
+ if ( S(p->b_text) ) { SUFFIX(f->out, T(p->b_text), S(p->b_text));
296
+ DELETE(p->b_text); }
297
+ }
298
+ S(f->Q) = 0;
299
+ }
300
+
301
+
302
+ /* generate html from a markup fragment
303
+ */
304
+ static void
305
+ reparse(char *bfr, int size, int flags, MMIOT *f)
306
+ {
307
+ MMIOT sub;
308
+
309
+ ___mkd_initmmiot(&sub, f->footnotes);
310
+
311
+ sub.flags = f->flags | flags;
312
+ sub.base = f->base;
313
+
314
+ push(bfr, size, &sub);
315
+ EXPAND(sub.in) = 0;
316
+ S(sub.in)--;
317
+
318
+ text(&sub);
319
+ emblock(&sub);
320
+
321
+ Qwrite(T(sub.out), S(sub.out), f);
322
+
323
+ ___mkd_freemmiot(&sub, f->footnotes);
324
+ }
325
+
326
+
327
+ /*
328
+ * write out a url, escaping problematic characters
329
+ */
330
+ static void
331
+ puturl(char *s, int size, MMIOT *f)
332
+ {
333
+ unsigned char c;
334
+
335
+ while ( size-- > 0 ) {
336
+ c = *s++;
337
+
338
+ if ( c == '&' )
339
+ Qstring("&amp;", f);
340
+ else if ( c == '<' )
341
+ Qstring("&lt;", f);
342
+ else if ( isalnum(c) || ispunct(c) )
343
+ Qchar(c, f);
344
+ else
345
+ Qprintf(f, "%%%02X", c);
346
+ }
347
+ }
348
+
349
+
350
+ /* advance forward until the next character is not whitespace
351
+ */
352
+ static int
353
+ eatspace(MMIOT *f)
354
+ {
355
+ int c;
356
+
357
+ for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) )
358
+ ;
359
+ return c;
360
+ }
361
+
362
+
363
+ /* (match (a (nested (parenthetical (string.)))))
364
+ */
365
+ static int
366
+ parenthetical(int in, int out, MMIOT *f)
367
+ {
368
+ int size, indent, c;
369
+
370
+ for ( indent=1,size=0; indent; size++ ) {
371
+ if ( (c = pull(f)) == EOF )
372
+ return EOF;
373
+ else if ( c == in )
374
+ ++indent;
375
+ else if ( c == out )
376
+ --indent;
377
+ }
378
+ return size-1;
379
+ }
380
+
381
+
382
+ /* extract a []-delimited label from the input stream.
383
+ */
384
+ static char *
385
+ linkylabel(MMIOT *f, int *sizep)
386
+ {
387
+ char *ptr = cursor(f);
388
+
389
+ if ( (*sizep = parenthetical('[',']',f)) != EOF )
390
+ return ptr;
391
+ return 0;
392
+ }
393
+
394
+
395
+ /* extract a (-prefixed url from the input stream.
396
+ * the label is either of the format `<link>`, where I
397
+ * extract until I find a >, or it is of the format
398
+ * `text`, where I extract until I reach a ')' or
399
+ * whitespace.
400
+ */
401
+ static char*
402
+ linkyurl(MMIOT *f, int *sizep)
403
+ {
404
+ int size = 0;
405
+ char *ptr;
406
+ int c;
407
+
408
+ if ( (c = eatspace(f)) == EOF )
409
+ return 0;
410
+
411
+ ptr = cursor(f);
412
+
413
+ if ( c == '<' ) {
414
+ pull(f);
415
+ ptr++;
416
+ if ( (size = parenthetical('<', '>', f)) == EOF )
417
+ return 0;
418
+ }
419
+ else {
420
+ for ( ; ((c=pull(f)) != ')') && !isspace(c); size++)
421
+ if ( c == EOF ) return 0;
422
+ if ( c == ')' )
423
+ shift(f, -1);
424
+ }
425
+ *sizep = size;
426
+ return ptr;
427
+ }
428
+
429
+
430
+ /* extract a =HHHxWWW size from the input stream
431
+ */
432
+ static int
433
+ linkysize(MMIOT *f, int *heightp, int *widthp)
434
+ {
435
+ int height=0, width=0;
436
+ int c;
437
+
438
+ *heightp = 0;
439
+ *widthp = 0;
440
+
441
+ if ( (c = eatspace(f)) != '=' )
442
+ return (c != EOF);
443
+ pull(f); /* eat '=' */
444
+
445
+ for ( c = pull(f); isdigit(c); c = pull(f))
446
+ width = (width * 10) + (c - '0');
447
+
448
+ if ( c == 'x' ) {
449
+ for ( c = pull(f); isdigit(c); c = pull(f))
450
+ height = (height*10) + (c - '0');
451
+
452
+ if ( c != EOF ) {
453
+ if ( !isspace(c) ) shift(f, -1);
454
+ *heightp = height;
455
+ *widthp = width;
456
+ return 1;
457
+ }
458
+ }
459
+ return 0;
460
+ }
461
+
462
+
463
+ /* extract a )-terminated title from the input stream.
464
+ */
465
+ static char*
466
+ linkytitle(MMIOT *f, int *sizep)
467
+ {
468
+ int countq=0, qc, c, size;
469
+ char *ret, *lastqc = 0;
470
+
471
+ eatspace(f);
472
+ if ( (qc=pull(f)) != '"' && qc != '\'' && qc != '(' )
473
+ return 0;
474
+
475
+ if ( qc == '(' ) qc = ')';
476
+
477
+ for ( ret = cursor(f); (c = pull(f)) != EOF; ) {
478
+ if ( (c == ')') && countq ) {
479
+ size = (lastqc ? lastqc : cursor(f)) - ret;
480
+ *sizep = size-1;
481
+ return ret;
482
+ }
483
+ else if ( c == qc ) {
484
+ lastqc = cursor(f);
485
+ countq++;
486
+ }
487
+ }
488
+ return 0;
489
+ }
490
+
491
+
492
+ /* look up (or construct) a footnote from the [xxx] link
493
+ * at the head of the stream.
494
+ */
495
+ static int
496
+ linkykey(int image, Footnote *val, MMIOT *f)
497
+ {
498
+ Footnote *ret;
499
+ Cstring mylabel;
500
+
501
+ memset(val, 0, sizeof *val);
502
+
503
+ if ( (T(val->tag) = linkylabel(f, &S(val->tag))) == 0 )
504
+ return 0;
505
+
506
+ eatspace(f);
507
+ switch ( pull(f) ) {
508
+ case '(':
509
+ /* embedded link */
510
+ if ( (T(val->link) = linkyurl(f,&S(val->link))) == 0 )
511
+ return 0;
512
+
513
+ if ( image && !linkysize(f, &val->height, &val->width) )
514
+ return 0;
515
+
516
+ T(val->title) = linkytitle(f, &S(val->title));
517
+
518
+ return peek(f,0) == ')';
519
+
520
+ case '[':
521
+ /* footnote link */
522
+ mylabel = val->tag;
523
+ if ( (T(val->tag) = linkylabel(f, &S(val->tag))) == 0 )
524
+ return 0;
525
+
526
+ if ( !S(val->tag) )
527
+ val->tag = mylabel;
528
+
529
+ ret = bsearch(val, T(*f->footnotes), S(*f->footnotes),
530
+ sizeof *val, (stfu)__mkd_footsort);
531
+
532
+ if ( ret ) {
533
+ val->tag = mylabel;
534
+ val->link = ret->link;
535
+ val->title = ret->title;
536
+ val->height = ret->height;
537
+ val->width = ret->width;
538
+ return 1;
539
+ }
540
+ }
541
+ return 0;
542
+ }
543
+
544
+
545
+ /*
546
+ * all the tag types that linkylinky can produce are
547
+ * defined by this structure.
548
+ */
549
+ typedef struct linkytype {
550
+ char *pat;
551
+ int szpat;
552
+ char *link_pfx; /* tag prefix and link pointer (eg: "<a href="\"" */
553
+ char *link_sfx; /* link suffix (eg: "\"" */
554
+ int WxH; /* this tag allows width x height arguments */
555
+ char *text_pfx; /* text prefix (eg: ">" */
556
+ char *text_sfx; /* text suffix (eg: "</a>" */
557
+ int flags; /* reparse flags */
558
+ } linkytype;
559
+
560
+ static linkytype imaget = { 0, 0, "<img src=\"", "\"",
561
+ 1, " alt=\"", "\" />", DENY_IMG|INSIDE_TAG };
562
+ static linkytype linkt = { 0, 0, "<a href=\"", "\"",
563
+ 0, ">", "</a>", DENY_A };
564
+
565
+ /*
566
+ * pseudo-protocols for [][];
567
+ *
568
+ * id: generates <a id="link">tag</a>
569
+ * class: generates <span class="link">tag</span>
570
+ * raw: just dump the link without any processing
571
+ */
572
+ static linkytype specials[] = {
573
+ { "id:", 3, "<a id=\"", "\"", 0, ">", "</a>", 0 },
574
+ { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0 },
575
+ { "raw:", 4, 0, 0, 0, 0, 0, 0 },
576
+ } ;
577
+
578
+ #define NR(x) (sizeof x / sizeof x[0])
579
+
580
+ /* see if t contains one of our pseudo-protocols.
581
+ */
582
+ static linkytype *
583
+ extratag(Cstring t)
584
+ {
585
+ int i;
586
+ linkytype *r;
587
+
588
+ for ( i=0; i < NR(specials); i++ ) {
589
+ r = &specials[i];
590
+ if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) )
591
+ return r;
592
+ }
593
+ return 0;
594
+ }
595
+
596
+
597
+ /*
598
+ * process embedded links and images
599
+ */
600
+ static int
601
+ linkylinky(int image, MMIOT *f)
602
+ {
603
+ int start = mmiottell(f);
604
+ Footnote link;
605
+ linkytype *tag;
606
+
607
+ if ( !linkykey(image, &link, f) ) {
608
+ mmiotseek(f, start);
609
+ return 0;
610
+ }
611
+
612
+ if ( image )
613
+ tag = &imaget;
614
+ else if ( (f->flags & NO_PSEUDO_PROTO) || (tag = extratag(link.link)) == 0 )
615
+ tag = &linkt;
616
+
617
+ if ( f->flags & tag-> flags ) {
618
+ mmiotseek(f, start);
619
+ return 0;
620
+ }
621
+
622
+ if ( tag->link_pfx ) {
623
+ Qstring(tag->link_pfx, f);
624
+ if ( f->base && (T(link.link)[tag->szpat] == '/') )
625
+ puturl(f->base, strlen(f->base), f);
626
+ puturl(T(link.link) + tag->szpat, S(link.link) - tag->szpat, f);
627
+ Qstring(tag->link_sfx, f);
628
+
629
+ if ( tag->WxH && link.height && link.width ) {
630
+ Qprintf(f," height=\"%d\"", link.height);
631
+ Qprintf(f, " width=\"%d\"", link.width);
632
+ }
633
+
634
+ if ( S(link.title) ) {
635
+ Qstring(" title=\"", f);
636
+ reparse(T(link.title), S(link.title), INSIDE_TAG, f);
637
+ Qchar('"', f);
638
+ }
639
+
640
+ Qstring(tag->text_pfx, f);
641
+ reparse(T(link.tag), S(link.tag), tag->flags, f);
642
+ Qstring(tag->text_sfx, f);
643
+ }
644
+ else
645
+ Qwrite(T(link.link) + tag->szpat, S(link.link) - tag->szpat, f);
646
+
647
+ return 1;
648
+ }
649
+
650
+
651
+ /* write a character to output, doing text escapes ( & -> &amp;,
652
+ * > -> &gt; < -> &lt; )
653
+ */
654
+ static void
655
+ cputc(int c, MMIOT *f)
656
+ {
657
+ switch (c) {
658
+ case '&': Qstring("&amp;", f); break;
659
+ case '>': Qstring("&gt;", f); break;
660
+ case '<': Qstring("&lt;", f); break;
661
+ default : Qchar(c, f); break;
662
+ }
663
+ }
664
+
665
+
666
+ /*
667
+ * convert an email address to a string of nonsense
668
+ */
669
+ static void
670
+ mangle(char *s, int len, MMIOT *f)
671
+ {
672
+ while ( len-- > 0 ) {
673
+ Qstring("&#", f);
674
+ Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) );
675
+ }
676
+ }
677
+
678
+
679
+ /* before letting a tag through, validate against
680
+ * DENY_A and DENY_IMG
681
+ */
682
+ static int
683
+ forbidden_tag(MMIOT *f)
684
+ {
685
+ int c = toupper(peek(f, 1));
686
+
687
+ if ( f->flags & DENY_HTML )
688
+ return 1;
689
+
690
+ if ( c == 'A' && (f->flags & DENY_A) && !isthisalnum(f,2) )
691
+ return 1;
692
+ if ( c == 'I' && (f->flags & DENY_IMG)
693
+ && strncasecmp(cursor(f)+1, "MG", 2) == 0
694
+ && !isthisalnum(f,4) )
695
+ return 1;
696
+ return 0;
697
+ }
698
+
699
+
700
+
701
+ /* a < may be just a regular character, the start of an embedded html
702
+ * tag, or the start of an <automatic link>. If it's an automatic
703
+ * link, we also need to know if it's an email address because if it
704
+ * is we need to mangle it in our futile attempt to cut down on the
705
+ * spaminess of the rendered page.
706
+ */
707
+ static int
708
+ maybe_tag_or_link(MMIOT *f)
709
+ {
710
+ char *text;
711
+ int c, size, i;
712
+ int maybetag=1, maybeaddress=0;
713
+ int mailto;
714
+
715
+ if ( f->flags & INSIDE_TAG )
716
+ return 0;
717
+
718
+ for ( size=0; ((c = peek(f,size+1)) != '>') && !isspace(c); size++ ) {
719
+ if ( ! (c == '/' || isalnum(c) || c == '~') )
720
+ maybetag=0;
721
+ if ( c == '@' )
722
+ maybeaddress=1;
723
+ else if ( c == EOF )
724
+ return 0;
725
+ }
726
+
727
+ if ( size == 0 )
728
+ return 0;
729
+
730
+ if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
731
+ Qstring(forbidden_tag(f) ? "&lt;" : "<", f);
732
+ while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
733
+ cputc(pull(f), f);
734
+ return 1;
735
+ }
736
+
737
+ if ( f->flags & DENY_A ) return 0;
738
+
739
+ text = cursor(f);
740
+ shift(f, size+1);
741
+
742
+ for ( i=0; i < SZAUTOPREFIX; i++ )
743
+ if ( strncasecmp(text, autoprefix[i], strlen(autoprefix[i])) == 0 ) {
744
+ Qstring("<a href=\"", f);
745
+ puturl(text,size,f);
746
+ Qstring("\">", f);
747
+ puturl(text,size,f);
748
+ Qstring("</a>", f);
749
+ return 1;
750
+ }
751
+ if ( maybeaddress ) {
752
+
753
+ Qstring("<a href=\"", f);
754
+ if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 )
755
+ mailto = 7;
756
+ else {
757
+ mailto = 0;
758
+ /* supply a mailto: protocol if one wasn't attached */
759
+ mangle("mailto:", 7, f);
760
+ }
761
+
762
+ mangle(text, size, f);
763
+ Qstring("\">", f);
764
+ mangle(text+mailto, size-mailto, f);
765
+ Qstring("</a>", f);
766
+ return 1;
767
+ }
768
+
769
+ shift(f, -(size+1));
770
+ return 0;
771
+ } /* maybe_tag_or_link */
772
+
773
+
774
+ /* smartyquote code that's common for single and double quotes
775
+ */
776
+ static int
777
+ smartyquote(int *flags, char typeofquote, MMIOT *f)
778
+ {
779
+ int bit = (typeofquote == 's') ? 0x01 : 0x02;
780
+
781
+ if ( bit & (*flags) ) {
782
+ if ( isthisnonword(f,1) ) {
783
+ Qprintf(f, "&r%cquo;", typeofquote);
784
+ (*flags) &= ~bit;
785
+ return 1;
786
+ }
787
+ }
788
+ else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) {
789
+ Qprintf(f, "&l%cquo;", typeofquote);
790
+ (*flags) |= bit;
791
+ return 1;
792
+ }
793
+ return 0;
794
+ }
795
+
796
+
797
+ static int
798
+ islike(MMIOT *f, char *s)
799
+ {
800
+ int len;
801
+ int i;
802
+
803
+ if ( s[0] == '<' ) {
804
+ if ( !isthisnonword(f, -1) )
805
+ return 0;
806
+ ++s;
807
+ }
808
+
809
+ if ( !(len = strlen(s)) )
810
+ return 0;
811
+
812
+ if ( s[len-1] == '>' ) {
813
+ if ( !isthisnonword(f,len-1) )
814
+ return 0;
815
+ len--;
816
+ }
817
+
818
+ for (i=1; i < len; i++)
819
+ if (tolower(peek(f,i)) != s[i])
820
+ return 0;
821
+ return 1;
822
+ }
823
+
824
+
825
+ static struct smarties {
826
+ char c0;
827
+ char *pat;
828
+ char *entity;
829
+ int shift;
830
+ } smarties[] = {
831
+ { '\'', "'s>", "rsquo", 0 },
832
+ { '\'', "'t>", "rsquo", 0 },
833
+ { '\'', "'re>", "rsquo", 0 },
834
+ { '\'', "'ll>", "rsquo", 0 },
835
+ { '-', "--", "mdash", 1 },
836
+ { '-', "<->", "ndash", 0 },
837
+ { '.', "...", "hellip", 2 },
838
+ { '.', ". . .", "hellip", 4 },
839
+ { '(', "(c)", "copy", 2 },
840
+ { '(', "(r)", "reg", 2 },
841
+ { '(', "(tm)", "trade", 3 },
842
+ { '3', "<3/4>", "frac34", 2 },
843
+ { '3', "<3/4ths>", "frac34", 2 },
844
+ { '1', "<1/2>", "frac12", 2 },
845
+ { '1', "<1/4>", "frac14", 2 },
846
+ { '1', "<1/4th>", "frac14", 2 },
847
+ { '&', "&#0;", 0, 3 },
848
+ } ;
849
+ #define NRSMART ( sizeof smarties / sizeof smarties[0] )
850
+
851
+
852
+ /* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm)
853
+ */
854
+ static int
855
+ smartypants(int c, int *flags, MMIOT *f)
856
+ {
857
+ int i;
858
+
859
+ if ( f->flags & DENY_SMARTY )
860
+ return 0;
861
+
862
+ for ( i=0; i < NRSMART; i++)
863
+ if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) {
864
+ if ( smarties[i].entity )
865
+ Qprintf(f, "&%s;", smarties[i].entity);
866
+ shift(f, smarties[i].shift);
867
+ return 1;
868
+ }
869
+
870
+ switch (c) {
871
+ case '<' : return 0;
872
+ case '\'': if ( smartyquote(flags, 's', f) ) return 1;
873
+ break;
874
+
875
+ case '"': if ( smartyquote(flags, 'd', f) ) return 1;
876
+ break;
877
+
878
+ case '`': if ( peek(f, 1) == '`' ) {
879
+ int j = 2;
880
+
881
+ while ( (c=peek(f,j)) != EOF ) {
882
+ if ( c == '\\' )
883
+ j += 2;
884
+ else if ( c == '`' )
885
+ break;
886
+ else if ( c == '\'' && peek(f, j+1) == '\'' ) {
887
+ Qstring("&ldquo;", f);
888
+ reparse(cursor(f)+1, j-2, 0, f);
889
+ Qstring("&rdquo;", f);
890
+ shift(f,j+1);
891
+ return 1;
892
+ }
893
+ else ++j;
894
+ }
895
+
896
+ }
897
+ break;
898
+ }
899
+ return 0;
900
+ } /* smartypants */
901
+
902
+
903
+ #define tag_text(f) (f->flags & INSIDE_TAG)
904
+
905
+
906
+ static void
907
+ text(MMIOT *f)
908
+ {
909
+ int c, j;
910
+ int rep;
911
+ int smartyflags = 0;
912
+
913
+ while ( (c = pull(f)) != EOF ) {
914
+ if ( smartypants(c, &smartyflags, f) )
915
+ continue;
916
+ switch (c) {
917
+ case 0: break;
918
+
919
+ case '>': if ( tag_text(f) )
920
+ Qstring("&gt;", f);
921
+ else
922
+ Qchar(c, f);
923
+ break;
924
+
925
+ case '"': if ( tag_text(f) )
926
+ Qstring("&quot;", f);
927
+ else
928
+ Qchar(c, f);
929
+ break;
930
+
931
+ case '!': if ( peek(f,1) == '[' ) {
932
+ pull(f);
933
+ if ( tag_text(f) || !linkylinky(1, f) )
934
+ Qstring("![", f);
935
+ }
936
+ else
937
+ Qchar(c, f);
938
+ break;
939
+ case '[': if ( tag_text(f) || !linkylinky(0, f) )
940
+ Qchar(c, f);
941
+ break;
942
+ #if SUPERSCRIPT
943
+ /* A^B -> A<sup>B</sup> */
944
+ case '^': if ( (f->flags & (STRICT|INSIDE_TAG)) || isthisspace(f,-1) || isthisspace(f,1) )
945
+ Qchar(c,f);
946
+ else {
947
+ char *sup = cursor(f);
948
+ int len = 0;
949
+ Qstring("<sup>",f);
950
+ while ( !isthisspace(f,1+len) ) {
951
+ ++len;
952
+ }
953
+ shift(f,len);
954
+ reparse(sup, len, 0, f);
955
+ Qstring("</sup>", f);
956
+ }
957
+ break;
958
+ #endif
959
+ case '_':
960
+ #if RELAXED_EMPHASIS
961
+ /* Underscores don't count if they're in the middle of a word */
962
+ if ( (!(f->flags & STRICT))
963
+ && ((isthisspace(f,-1) && isthisspace(f,1))
964
+ || (isthisalnum(f,-1) && isthisalnum(f,1))) ){
965
+ Qchar(c, f);
966
+ break;
967
+ }
968
+ /* else fall into the regular old emphasis case */
969
+ #endif
970
+ case '*': if ( tag_text(f) )
971
+ Qchar(c, f);
972
+ else {
973
+ for (rep = 1; peek(f,1) == c; pull(f) )
974
+ ++rep;
975
+ Qem(f,c,rep);
976
+ }
977
+ break;
978
+
979
+ case '`': if ( tag_text(f) || !iscodeblock(f) )
980
+ Qchar(c, f);
981
+ else {
982
+ Qstring("<code>", f);
983
+ if ( peek(f, 1) == '`' ) {
984
+ pull(f);
985
+ code(2, f);
986
+ }
987
+ else
988
+ code(1, f);
989
+ Qstring("</code>", f);
990
+ }
991
+ break;
992
+
993
+ case '\\': switch ( c = pull(f) ) {
994
+ case '&': Qstring("&amp;", f);
995
+ break;
996
+ case '<': Qstring("&lt;", f);
997
+ break;
998
+ case '\\':
999
+ case '>': case '#': case '.': case '-':
1000
+ case '+': case '{': case '}': case ']':
1001
+ case '(': case ')': case '"': case '\'':
1002
+ case '!': case '[': case '*': case '_':
1003
+ case '`': Qchar(c, f);
1004
+ break;
1005
+ default:
1006
+ Qchar('\\', f);
1007
+ if ( c != EOF )
1008
+ shift(f,-1);
1009
+ break;
1010
+ }
1011
+ break;
1012
+
1013
+ case '<': if ( !maybe_tag_or_link(f) )
1014
+ Qstring("&lt;", f);
1015
+ break;
1016
+
1017
+ case '&': j = (peek(f,1) == '#' ) ? 2 : 1;
1018
+ while ( isthisalnum(f,j) )
1019
+ ++j;
1020
+
1021
+ if ( peek(f,j) != ';' )
1022
+ Qstring("&amp;", f);
1023
+ else
1024
+ Qchar(c, f);
1025
+ break;
1026
+
1027
+ default: Qchar(c, f);
1028
+ break;
1029
+ }
1030
+ }
1031
+ /* truncate the input string after we've finished processing it */
1032
+ S(f->in) = f->isp = 0;
1033
+ } /* text */
1034
+
1035
+
1036
+ static int
1037
+ iscodeblock(MMIOT *f)
1038
+ {
1039
+ int i=1, single = 1, c;
1040
+
1041
+ if ( peek(f,i) == '`' ) {
1042
+ single=0;
1043
+ i++;
1044
+ }
1045
+ while ( (c=peek(f,i)) != EOF ) {
1046
+ if ( (c == '`') && (single || peek(f,i+1) == '`') )
1047
+ return 1;
1048
+ else if ( c == '\\' )
1049
+ i++;
1050
+ i++;
1051
+ }
1052
+ return 0;
1053
+
1054
+ }
1055
+
1056
+ static int
1057
+ endofcode(int escape, int offset, MMIOT *f)
1058
+ {
1059
+ switch (escape) {
1060
+ case 2: if ( peek(f, offset+1) == '`' ) {
1061
+ shift(f,1);
1062
+ case 1: shift(f,offset);
1063
+ return 1;
1064
+ }
1065
+ default:return 0;
1066
+ }
1067
+ }
1068
+
1069
+
1070
+ /* the only characters that have special meaning in a code block are
1071
+ * `<' and `&' , which are /always/ expanded to &lt; and &amp;
1072
+ */
1073
+ static void
1074
+ code(int escape, MMIOT *f)
1075
+ {
1076
+ int c;
1077
+
1078
+ if ( escape && (peek(f,1) == ' ') )
1079
+ shift(f,1);
1080
+
1081
+ while ( (c = pull(f)) != EOF ) {
1082
+ switch (c) {
1083
+ case ' ': if ( peek(f,1) == '`' && endofcode(escape, 1, f) )
1084
+ return;
1085
+ Qchar(c, f);
1086
+ break;
1087
+
1088
+ case '`': if ( endofcode(escape, 0, f) )
1089
+ return;
1090
+ Qchar(c, f);
1091
+ break;
1092
+
1093
+ case '\\': cputc(c, f);
1094
+ if ( peek(f,1) == '>' || (c = pull(f)) == EOF )
1095
+ break;
1096
+
1097
+ default: cputc(c, f);
1098
+ break;
1099
+ }
1100
+ }
1101
+ } /* code */
1102
+
1103
+
1104
+ /* print a header block
1105
+ */
1106
+ static void
1107
+ printheader(Paragraph *pp, MMIOT *f)
1108
+ {
1109
+ Qprintf(f, "<h%d", pp->hnumber);
1110
+ if ( f->flags & TOC ) {
1111
+ Qprintf(f, " id=\"", pp->hnumber);
1112
+ mkd_string_to_anchor(T(pp->text->text), S(pp->text->text), Qchar, f);
1113
+ Qchar('"', f);
1114
+ }
1115
+ Qchar('>', f);
1116
+ push(T(pp->text->text), S(pp->text->text), f);
1117
+ text(f);
1118
+ Qprintf(f, "</h%d>", pp->hnumber);
1119
+ }
1120
+
1121
+
1122
+ static int
1123
+ printblock(Paragraph *pp, MMIOT *f)
1124
+ {
1125
+ Line *t = pp->text;
1126
+ static char *Begin[] = { "", "<p>", "<center>" };
1127
+ static char *End[] = { "", "</p>","</center>" };
1128
+
1129
+ while (t) {
1130
+ if ( S(t->text) ) {
1131
+ if ( S(t->text) > 2 && T(t->text)[S(t->text)-2] == ' '
1132
+ && T(t->text)[S(t->text)-1] == ' ') {
1133
+ push(T(t->text), S(t->text)-2, f);
1134
+ push("<br/>\n", 6, f);
1135
+ }
1136
+ else {
1137
+ push(T(t->text), S(t->text), f);
1138
+ if ( t->next )
1139
+ push("\n", 1, f);
1140
+ }
1141
+ }
1142
+ t = t->next;
1143
+ }
1144
+ Qstring(Begin[pp->align], f);
1145
+ text(f);
1146
+ Qstring(End[pp->align], f);
1147
+ return 1;
1148
+ }
1149
+
1150
+
1151
+ static void
1152
+ printcode(Line *t, MMIOT *f)
1153
+ {
1154
+ int blanks;
1155
+
1156
+ for ( blanks = 0; t ; t = t->next )
1157
+ if ( S(t->text) > t->dle ) {
1158
+ while ( blanks ) {
1159
+ push("\n", 1, f);
1160
+ --blanks;
1161
+ }
1162
+ push(T(t->text), S(t->text), f);
1163
+ push("\n", 1, f);
1164
+ }
1165
+ else blanks++;
1166
+
1167
+ Qstring("<pre><code>", f);
1168
+ code(0, f);
1169
+ Qstring("</code></pre>", f);
1170
+ }
1171
+
1172
+
1173
+ static void
1174
+ printhtml(Line *t, MMIOT *f)
1175
+ {
1176
+ int blanks;
1177
+
1178
+ for ( blanks=0; t ; t = t->next )
1179
+ if ( S(t->text) ) {
1180
+ for ( ; blanks; --blanks )
1181
+ Qchar('\n', f);
1182
+
1183
+ Qwrite(T(t->text), S(t->text), f);
1184
+ Qchar('\n', f);
1185
+ }
1186
+ else
1187
+ blanks++;
1188
+ }
1189
+
1190
+
1191
+ static void
1192
+ htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f)
1193
+ {
1194
+ emblock(f);
1195
+ if ( block )
1196
+ Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments);
1197
+ emblock(f);
1198
+
1199
+ while (( p = display(p, f) )) {
1200
+ emblock(f);
1201
+ Qstring("\n\n", f);
1202
+ }
1203
+
1204
+ if ( block )
1205
+ Qprintf(f, "</%s>", block);
1206
+ emblock(f);
1207
+ }
1208
+
1209
+
1210
+ #if DL_TAG_EXTENSION
1211
+ static void
1212
+ definitionlist(Paragraph *p, MMIOT *f)
1213
+ {
1214
+ Line *tag;
1215
+
1216
+ if ( p ) {
1217
+ Qstring("<dl>\n", f);
1218
+
1219
+ for ( ; p ; p = p->next) {
1220
+ for ( tag = p->text; tag; tag = tag->next ) {
1221
+ Qstring("<dt>", f);
1222
+ reparse(T(tag->text), S(tag->text), 0, f);
1223
+ Qstring("</dt>\n", f);
1224
+ }
1225
+
1226
+ htmlify(p->down, "dd", p->ident, f);
1227
+ }
1228
+
1229
+ Qstring("</dl>", f);
1230
+ }
1231
+ }
1232
+ #endif
1233
+
1234
+
1235
+ static void
1236
+ listdisplay(int typ, Paragraph *p, MMIOT* f)
1237
+ {
1238
+ if ( p ) {
1239
+ Qprintf(f, "<%cl", (typ==UL)?'u':'o');
1240
+ if ( typ == AL )
1241
+ Qprintf(f, " type=a");
1242
+ Qprintf(f, ">\n");
1243
+
1244
+ for ( ; p ; p = p->next ) {
1245
+ htmlify(p->down, "li", p->ident, f);
1246
+ Qchar('\n', f);
1247
+ }
1248
+
1249
+ Qprintf(f, "</%cl>\n", (typ==UL)?'u':'o');
1250
+ }
1251
+ }
1252
+
1253
+
1254
+ /* dump out a Paragraph in the desired manner
1255
+ */
1256
+ static Paragraph*
1257
+ display(Paragraph *p, MMIOT *f)
1258
+ {
1259
+ if ( !p ) return 0;
1260
+
1261
+ switch ( p->typ ) {
1262
+ case STYLE:
1263
+ case WHITESPACE:
1264
+ break;
1265
+
1266
+ case HTML:
1267
+ printhtml(p->text, f);
1268
+ break;
1269
+
1270
+ case CODE:
1271
+ printcode(p->text, f);
1272
+ break;
1273
+
1274
+ case QUOTE:
1275
+ htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f);
1276
+ break;
1277
+
1278
+ case UL:
1279
+ case OL:
1280
+ case AL:
1281
+ listdisplay(p->typ, p->down, f);
1282
+ break;
1283
+
1284
+ #if DL_TAG_EXTENSION
1285
+ case DL:
1286
+ definitionlist(p->down, f);
1287
+ break;
1288
+ #endif
1289
+
1290
+ case HR:
1291
+ Qstring("<hr />", f);
1292
+ break;
1293
+
1294
+ case HDR:
1295
+ printheader(p, f);
1296
+ break;
1297
+
1298
+ default:
1299
+ printblock(p, f);
1300
+ break;
1301
+ }
1302
+ return p->next;
1303
+ }
1304
+
1305
+
1306
+ /*
1307
+ * dump out stylesheet sections.
1308
+ */
1309
+ static int
1310
+ stylesheets(Paragraph *p, FILE *f)
1311
+ {
1312
+ Line* q;
1313
+
1314
+ for ( ; p ; p = p->next ) {
1315
+ if ( p->typ == STYLE ) {
1316
+ for ( q = p->text; q ; q = q->next )
1317
+ if ( fwrite(T(q->text), S(q->text), 1, f) == 1 )
1318
+ putc('\n', f);
1319
+ else
1320
+ return EOF;
1321
+ }
1322
+ if ( p->down && (stylesheets(p->down, f) == EOF) )
1323
+ return EOF;
1324
+ }
1325
+ return 0;
1326
+ }
1327
+
1328
+
1329
+ /* return a pointer to the compiled markdown
1330
+ * document.
1331
+ */
1332
+ int
1333
+ mkd_document(Document *p, char **res)
1334
+ {
1335
+ if ( p && p->compiled ) {
1336
+ if ( ! p->html ) {
1337
+ htmlify(p->code, 0, 0, p->ctx);
1338
+ p->html = 1;
1339
+ }
1340
+
1341
+ *res = T(p->ctx->out);
1342
+ return S(p->ctx->out);
1343
+ }
1344
+ return EOF;
1345
+ }
1346
+
1347
+
1348
+ /* public interface for reparse()
1349
+ */
1350
+ int
1351
+ mkd_text(char *bfr, int size, FILE *output, int flags)
1352
+ {
1353
+ MMIOT f;
1354
+
1355
+ ___mkd_initmmiot(&f, 0);
1356
+ f.flags = flags & USER_FLAGS;
1357
+
1358
+ reparse(bfr, size, 0, &f);
1359
+ emblock(&f);
1360
+ if ( flags & CDATA_OUTPUT )
1361
+ ___mkd_xml(T(f.out), S(f.out), output);
1362
+ else
1363
+ fwrite(T(f.out), S(f.out), 1, output);
1364
+
1365
+ ___mkd_freemmiot(&f, 0);
1366
+ return 0;
1367
+ }
1368
+
1369
+
1370
+ /* dump any embedded styles
1371
+ */
1372
+ int
1373
+ mkd_style(Document *d, FILE *f)
1374
+ {
1375
+ if ( d && d->compiled )
1376
+ return stylesheets(d->code, f);
1377
+ return EOF;
1378
+ }
1379
+