rtomayko-rdiscount 1.3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/generate.c ADDED
@@ -0,0 +1,1379 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include <stdio.h>
8
+ #include <string.h>
9
+ #include <stdarg.h>
10
+ #include <stdlib.h>
11
+ #include <time.h>
12
+ #include <ctype.h>
13
+
14
+ #include "config.h"
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+
20
+ /* prefixes for <automatic links>
21
+ */
22
+ static char *autoprefix[] = { "http://", "https://", "ftp://", "news://" };
23
+ #define SZAUTOPREFIX (sizeof autoprefix / sizeof autoprefix[0])
24
+
25
+ typedef int (*stfu)(const void*,const void*);
26
+
27
+
28
+ /* forward declarations */
29
+ static int iscodeblock(MMIOT*);
30
+ static void code(int, MMIOT*);
31
+ static void text(MMIOT *f);
32
+ static Paragraph *display(Paragraph*, MMIOT*);
33
+
34
+ /* externals from markdown.c */
35
+ int __mkd_footsort(Footnote *, Footnote *);
36
+
37
+ /*
38
+ * push text into the generator input buffer
39
+ */
40
+ static void
41
+ push(char *bfr, int size, MMIOT *f)
42
+ {
43
+ while ( size-- > 0 )
44
+ EXPAND(f->in) = *bfr++;
45
+ }
46
+
47
+
48
+ /* look <i> characters ahead of the cursor.
49
+ */
50
+ static int
51
+ peek(MMIOT *f, int i)
52
+ {
53
+
54
+ i += (f->isp-1);
55
+
56
+ return (i >= 0) && (i < S(f->in)) ? T(f->in)[i] : EOF;
57
+ }
58
+
59
+
60
+ /* pull a byte from the input buffer
61
+ */
62
+ static int
63
+ pull(MMIOT *f)
64
+ {
65
+ return ( f->isp < S(f->in) ) ? T(f->in)[f->isp++] : EOF;
66
+ }
67
+
68
+
69
+ /* return a pointer to the current position in the input buffer.
70
+ */
71
+ static char*
72
+ cursor(MMIOT *f)
73
+ {
74
+ return T(f->in) + f->isp;
75
+ }
76
+
77
+
78
+ static int
79
+ isthisspace(MMIOT *f, int i)
80
+ {
81
+ int c = peek(f, i);
82
+
83
+ return isspace(c) || (c == EOF);
84
+ }
85
+
86
+
87
+ static int
88
+ isthisalnum(MMIOT *f, int i)
89
+ {
90
+ int c = peek(f, i);
91
+
92
+ return (c != EOF) && isalnum(c);
93
+ }
94
+
95
+
96
+ static int
97
+ isthisnonword(MMIOT *f, int i)
98
+ {
99
+ return isthisspace(f, i) || ispunct(peek(f,i));
100
+ }
101
+
102
+
103
+ /* return/set the current cursor position
104
+ */
105
+ #define mmiotseek(f,x) (f->isp = x)
106
+ #define mmiottell(f) (f->isp)
107
+
108
+
109
+ /* move n characters forward ( or -n characters backward) in the input buffer.
110
+ */
111
+ static void
112
+ shift(MMIOT *f, int i)
113
+ {
114
+ if (f->isp + i >= 0 )
115
+ f->isp += i;
116
+ }
117
+
118
+
119
+ /* Qchar()
120
+ */
121
+ static void
122
+ Qchar(char c, MMIOT *f)
123
+ {
124
+ block *cur;
125
+
126
+ if ( S(f->Q) == 0 ) {
127
+ cur = &EXPAND(f->Q);
128
+ memset(cur, 0, sizeof *cur);
129
+ cur->b_type = bTEXT;
130
+ }
131
+ else
132
+ cur = &T(f->Q)[S(f->Q)-1];
133
+
134
+ EXPAND(cur->b_text) = c;
135
+
136
+ }
137
+
138
+
139
+ /* Qstring()
140
+ */
141
+ static void
142
+ Qstring(char *s, MMIOT *f)
143
+ {
144
+ while (*s)
145
+ Qchar(*s++, f);
146
+ }
147
+
148
+
149
+ /* Qwrite()
150
+ */
151
+ static void
152
+ Qwrite(char *s, int size, MMIOT *f)
153
+ {
154
+ while (size-- > 0)
155
+ Qchar(*s++, f);
156
+ }
157
+
158
+
159
+ /* Qprintf()
160
+ */
161
+ static void
162
+ Qprintf(MMIOT *f, char *fmt, ...)
163
+ {
164
+ char bfr[80];
165
+ va_list ptr;
166
+
167
+ va_start(ptr,fmt);
168
+ vsnprintf(bfr, sizeof bfr, fmt, ptr);
169
+ va_end(ptr);
170
+ Qstring(bfr, f);
171
+ }
172
+
173
+
174
+ /* Qem()
175
+ */
176
+ static void
177
+ Qem(MMIOT *f, char c, int count)
178
+ {
179
+ block *p = &EXPAND(f->Q);
180
+
181
+ memset(p, 0, sizeof *p);
182
+ p->b_type = (c == '*') ? bSTAR : bUNDER;
183
+ p->b_char = c;
184
+ p->b_count = count;
185
+
186
+ memset(&EXPAND(f->Q), 0, sizeof(block));
187
+ }
188
+
189
+
190
+ /* empair()
191
+ */
192
+ static int
193
+ empair(MMIOT *f, int go, int level)
194
+ {
195
+
196
+ int i;
197
+ block *begin, *p;
198
+
199
+ begin = &T(f->Q)[go];
200
+ for (i=go+1; i < S(f->Q); i++) {
201
+ p = &T(f->Q)[i];
202
+
203
+ if ( (p->b_type != bTEXT) && (p->b_count <= 0) )
204
+ break;
205
+
206
+ if ( p->b_type == begin->b_type ) {
207
+ if ( p->b_count == level ) /* exact match */
208
+ return i-go;
209
+
210
+ if ( p->b_count > 2 ) /* fuzzy match */
211
+ return i-go;
212
+ }
213
+ }
214
+ return EOF;
215
+ }
216
+
217
+
218
+
219
+ static struct emtags {
220
+ char open[10];
221
+ char close[10];
222
+ int size;
223
+ } emtags[] = { { "<em>" , "</em>", 5 }, { "<strong>", "</strong>", 9 } };
224
+
225
+
226
+ static void
227
+ emclose(Cstring *s, int level)
228
+ {
229
+ PREFIX(*s, emtags[level-1].close, emtags[level-1].size);
230
+ }
231
+
232
+
233
+ static void
234
+ emopen(Cstring *s, int level)
235
+ {
236
+ SUFFIX(*s, emtags[level-1].open, emtags[level-1].size-1);
237
+ }
238
+
239
+
240
+ /* emmatch()
241
+ */
242
+ static void
243
+ emmatch(MMIOT *f, int go)
244
+ {
245
+ block *start = &T(f->Q)[go], *end;
246
+ int e, e2, i, match;
247
+
248
+ while ( start->b_count ) {
249
+ switch (start->b_count) {
250
+ case 2: e = empair(f,go,match=2);
251
+ if ( e != EOF ) break;
252
+ case 1: e = empair(f,go,match=1); break;
253
+ default:
254
+ e = empair(f,go,1);
255
+ e2= empair(f,go,2);
256
+
257
+ if ( e == EOF || ((e2 != EOF) && (e2 >= e)) ) {
258
+ e = e2;
259
+ match = 2;
260
+ }
261
+ else
262
+ match = 1;
263
+ }
264
+ if ( e != EOF ) {
265
+ end = &T(f->Q)[go+e];
266
+ emclose(&end->b_post, match);
267
+ emopen(&start->b_text, match);
268
+ end->b_count -= match;
269
+ }
270
+ else {
271
+ for (i=0; i < match; i++)
272
+ EXPAND(start->b_text) = start->b_char;
273
+ }
274
+
275
+ start->b_count -= match;
276
+ }
277
+ }
278
+
279
+
280
+ /* emblock()
281
+ */
282
+ static void
283
+ emblock(MMIOT *f)
284
+ {
285
+ int i;
286
+ block *p;
287
+
288
+ for (i=0; i < S(f->Q); i++) {
289
+ p = &T(f->Q)[i];
290
+
291
+ if ( p->b_type != bTEXT ) emmatch(f, i);
292
+
293
+ if ( S(p->b_post) ) { SUFFIX(f->out, T(p->b_post), S(p->b_post));
294
+ DELETE(p->b_post); }
295
+ if ( S(p->b_text) ) { SUFFIX(f->out, T(p->b_text), S(p->b_text));
296
+ DELETE(p->b_text); }
297
+ }
298
+ S(f->Q) = 0;
299
+ }
300
+
301
+
302
+ /* generate html from a markup fragment
303
+ */
304
+ static void
305
+ reparse(char *bfr, int size, int flags, MMIOT *f)
306
+ {
307
+ MMIOT sub;
308
+
309
+ ___mkd_initmmiot(&sub, f->footnotes);
310
+
311
+ sub.flags = f->flags | flags;
312
+ sub.base = f->base;
313
+
314
+ push(bfr, size, &sub);
315
+ EXPAND(sub.in) = 0;
316
+ S(sub.in)--;
317
+
318
+ text(&sub);
319
+ emblock(&sub);
320
+
321
+ Qwrite(T(sub.out), S(sub.out), f);
322
+
323
+ ___mkd_freemmiot(&sub, f->footnotes);
324
+ }
325
+
326
+
327
+ /*
328
+ * write out a url, escaping problematic characters
329
+ */
330
+ static void
331
+ puturl(char *s, int size, MMIOT *f)
332
+ {
333
+ unsigned char c;
334
+
335
+ while ( size-- > 0 ) {
336
+ c = *s++;
337
+
338
+ if ( c == '&' )
339
+ Qstring("&amp;", f);
340
+ else if ( c == '<' )
341
+ Qstring("&lt;", f);
342
+ else if ( isalnum(c) || ispunct(c) )
343
+ Qchar(c, f);
344
+ else
345
+ Qprintf(f, "%%%02X", c);
346
+ }
347
+ }
348
+
349
+
350
+ /* advance forward until the next character is not whitespace
351
+ */
352
+ static int
353
+ eatspace(MMIOT *f)
354
+ {
355
+ int c;
356
+
357
+ for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) )
358
+ ;
359
+ return c;
360
+ }
361
+
362
+
363
+ /* (match (a (nested (parenthetical (string.)))))
364
+ */
365
+ static int
366
+ parenthetical(int in, int out, MMIOT *f)
367
+ {
368
+ int size, indent, c;
369
+
370
+ for ( indent=1,size=0; indent; size++ ) {
371
+ if ( (c = pull(f)) == EOF )
372
+ return EOF;
373
+ else if ( c == in )
374
+ ++indent;
375
+ else if ( c == out )
376
+ --indent;
377
+ }
378
+ return size-1;
379
+ }
380
+
381
+
382
+ /* extract a []-delimited label from the input stream.
383
+ */
384
+ static char *
385
+ linkylabel(MMIOT *f, int *sizep)
386
+ {
387
+ char *ptr = cursor(f);
388
+
389
+ if ( (*sizep = parenthetical('[',']',f)) != EOF )
390
+ return ptr;
391
+ return 0;
392
+ }
393
+
394
+
395
+ /* extract a (-prefixed url from the input stream.
396
+ * the label is either of the format `<link>`, where I
397
+ * extract until I find a >, or it is of the format
398
+ * `text`, where I extract until I reach a ')' or
399
+ * whitespace.
400
+ */
401
+ static char*
402
+ linkyurl(MMIOT *f, int *sizep)
403
+ {
404
+ int size = 0;
405
+ char *ptr;
406
+ int c;
407
+
408
+ if ( (c = eatspace(f)) == EOF )
409
+ return 0;
410
+
411
+ ptr = cursor(f);
412
+
413
+ if ( c == '<' ) {
414
+ pull(f);
415
+ ptr++;
416
+ if ( (size = parenthetical('<', '>', f)) == EOF )
417
+ return 0;
418
+ }
419
+ else {
420
+ for ( ; ((c=pull(f)) != ')') && !isspace(c); size++)
421
+ if ( c == EOF ) return 0;
422
+ if ( c == ')' )
423
+ shift(f, -1);
424
+ }
425
+ *sizep = size;
426
+ return ptr;
427
+ }
428
+
429
+
430
+ /* extract a =HHHxWWW size from the input stream
431
+ */
432
+ static int
433
+ linkysize(MMIOT *f, int *heightp, int *widthp)
434
+ {
435
+ int height=0, width=0;
436
+ int c;
437
+
438
+ *heightp = 0;
439
+ *widthp = 0;
440
+
441
+ if ( (c = eatspace(f)) != '=' )
442
+ return (c != EOF);
443
+ pull(f); /* eat '=' */
444
+
445
+ for ( c = pull(f); isdigit(c); c = pull(f))
446
+ width = (width * 10) + (c - '0');
447
+
448
+ if ( c == 'x' ) {
449
+ for ( c = pull(f); isdigit(c); c = pull(f))
450
+ height = (height*10) + (c - '0');
451
+
452
+ if ( c != EOF ) {
453
+ if ( !isspace(c) ) shift(f, -1);
454
+ *heightp = height;
455
+ *widthp = width;
456
+ return 1;
457
+ }
458
+ }
459
+ return 0;
460
+ }
461
+
462
+
463
+ /* extract a )-terminated title from the input stream.
464
+ */
465
+ static char*
466
+ linkytitle(MMIOT *f, int *sizep)
467
+ {
468
+ int countq=0, qc, c, size;
469
+ char *ret, *lastqc = 0;
470
+
471
+ eatspace(f);
472
+ if ( (qc=pull(f)) != '"' && qc != '\'' && qc != '(' )
473
+ return 0;
474
+
475
+ if ( qc == '(' ) qc = ')';
476
+
477
+ for ( ret = cursor(f); (c = pull(f)) != EOF; ) {
478
+ if ( (c == ')') && countq ) {
479
+ size = (lastqc ? lastqc : cursor(f)) - ret;
480
+ *sizep = size-1;
481
+ return ret;
482
+ }
483
+ else if ( c == qc ) {
484
+ lastqc = cursor(f);
485
+ countq++;
486
+ }
487
+ }
488
+ return 0;
489
+ }
490
+
491
+
492
+ /* look up (or construct) a footnote from the [xxx] link
493
+ * at the head of the stream.
494
+ */
495
+ static int
496
+ linkykey(int image, Footnote *val, MMIOT *f)
497
+ {
498
+ Footnote *ret;
499
+ Cstring mylabel;
500
+
501
+ memset(val, 0, sizeof *val);
502
+
503
+ if ( (T(val->tag) = linkylabel(f, &S(val->tag))) == 0 )
504
+ return 0;
505
+
506
+ eatspace(f);
507
+ switch ( pull(f) ) {
508
+ case '(':
509
+ /* embedded link */
510
+ if ( (T(val->link) = linkyurl(f,&S(val->link))) == 0 )
511
+ return 0;
512
+
513
+ if ( image && !linkysize(f, &val->height, &val->width) )
514
+ return 0;
515
+
516
+ T(val->title) = linkytitle(f, &S(val->title));
517
+
518
+ return peek(f,0) == ')';
519
+
520
+ case '[':
521
+ /* footnote link */
522
+ mylabel = val->tag;
523
+ if ( (T(val->tag) = linkylabel(f, &S(val->tag))) == 0 )
524
+ return 0;
525
+
526
+ if ( !S(val->tag) )
527
+ val->tag = mylabel;
528
+
529
+ ret = bsearch(val, T(*f->footnotes), S(*f->footnotes),
530
+ sizeof *val, (stfu)__mkd_footsort);
531
+
532
+ if ( ret ) {
533
+ val->tag = mylabel;
534
+ val->link = ret->link;
535
+ val->title = ret->title;
536
+ val->height = ret->height;
537
+ val->width = ret->width;
538
+ return 1;
539
+ }
540
+ }
541
+ return 0;
542
+ }
543
+
544
+
545
+ /*
546
+ * all the tag types that linkylinky can produce are
547
+ * defined by this structure.
548
+ */
549
+ typedef struct linkytype {
550
+ char *pat;
551
+ int szpat;
552
+ char *link_pfx; /* tag prefix and link pointer (eg: "<a href="\"" */
553
+ char *link_sfx; /* link suffix (eg: "\"" */
554
+ int WxH; /* this tag allows width x height arguments */
555
+ char *text_pfx; /* text prefix (eg: ">" */
556
+ char *text_sfx; /* text suffix (eg: "</a>" */
557
+ int flags; /* reparse flags */
558
+ } linkytype;
559
+
560
+ static linkytype imaget = { 0, 0, "<img src=\"", "\"",
561
+ 1, " alt=\"", "\" />", DENY_IMG|INSIDE_TAG };
562
+ static linkytype linkt = { 0, 0, "<a href=\"", "\"",
563
+ 0, ">", "</a>", DENY_A };
564
+
565
+ /*
566
+ * pseudo-protocols for [][];
567
+ *
568
+ * id: generates <a id="link">tag</a>
569
+ * class: generates <span class="link">tag</span>
570
+ * raw: just dump the link without any processing
571
+ */
572
+ static linkytype specials[] = {
573
+ { "id:", 3, "<a id=\"", "\"", 0, ">", "</a>", 0 },
574
+ { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0 },
575
+ { "raw:", 4, 0, 0, 0, 0, 0, 0 },
576
+ } ;
577
+
578
+ #define NR(x) (sizeof x / sizeof x[0])
579
+
580
+ /* see if t contains one of our pseudo-protocols.
581
+ */
582
+ static linkytype *
583
+ extratag(Cstring t)
584
+ {
585
+ int i;
586
+ linkytype *r;
587
+
588
+ for ( i=0; i < NR(specials); i++ ) {
589
+ r = &specials[i];
590
+ if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) )
591
+ return r;
592
+ }
593
+ return 0;
594
+ }
595
+
596
+
597
+ /*
598
+ * process embedded links and images
599
+ */
600
+ static int
601
+ linkylinky(int image, MMIOT *f)
602
+ {
603
+ int start = mmiottell(f);
604
+ Footnote link;
605
+ linkytype *tag;
606
+
607
+ if ( !linkykey(image, &link, f) ) {
608
+ mmiotseek(f, start);
609
+ return 0;
610
+ }
611
+
612
+ if ( image )
613
+ tag = &imaget;
614
+ else if ( (f->flags & NO_PSEUDO_PROTO) || (tag = extratag(link.link)) == 0 )
615
+ tag = &linkt;
616
+
617
+ if ( f->flags & tag-> flags ) {
618
+ mmiotseek(f, start);
619
+ return 0;
620
+ }
621
+
622
+ if ( tag->link_pfx ) {
623
+ Qstring(tag->link_pfx, f);
624
+ if ( f->base && (T(link.link)[tag->szpat] == '/') )
625
+ puturl(f->base, strlen(f->base), f);
626
+ puturl(T(link.link) + tag->szpat, S(link.link) - tag->szpat, f);
627
+ Qstring(tag->link_sfx, f);
628
+
629
+ if ( tag->WxH && link.height && link.width ) {
630
+ Qprintf(f," height=\"%d\"", link.height);
631
+ Qprintf(f, " width=\"%d\"", link.width);
632
+ }
633
+
634
+ if ( S(link.title) ) {
635
+ Qstring(" title=\"", f);
636
+ reparse(T(link.title), S(link.title), INSIDE_TAG, f);
637
+ Qchar('"', f);
638
+ }
639
+
640
+ Qstring(tag->text_pfx, f);
641
+ reparse(T(link.tag), S(link.tag), tag->flags, f);
642
+ Qstring(tag->text_sfx, f);
643
+ }
644
+ else
645
+ Qwrite(T(link.link) + tag->szpat, S(link.link) - tag->szpat, f);
646
+
647
+ return 1;
648
+ }
649
+
650
+
651
+ /* write a character to output, doing text escapes ( & -> &amp;,
652
+ * > -> &gt; < -> &lt; )
653
+ */
654
+ static void
655
+ cputc(int c, MMIOT *f)
656
+ {
657
+ switch (c) {
658
+ case '&': Qstring("&amp;", f); break;
659
+ case '>': Qstring("&gt;", f); break;
660
+ case '<': Qstring("&lt;", f); break;
661
+ default : Qchar(c, f); break;
662
+ }
663
+ }
664
+
665
+
666
+ /*
667
+ * convert an email address to a string of nonsense
668
+ */
669
+ static void
670
+ mangle(char *s, int len, MMIOT *f)
671
+ {
672
+ while ( len-- > 0 ) {
673
+ Qstring("&#", f);
674
+ Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) );
675
+ }
676
+ }
677
+
678
+
679
+ /* before letting a tag through, validate against
680
+ * DENY_A and DENY_IMG
681
+ */
682
+ static int
683
+ forbidden_tag(MMIOT *f)
684
+ {
685
+ int c = toupper(peek(f, 1));
686
+
687
+ if ( f->flags & DENY_HTML )
688
+ return 1;
689
+
690
+ if ( c == 'A' && (f->flags & DENY_A) && !isthisalnum(f,2) )
691
+ return 1;
692
+ if ( c == 'I' && (f->flags & DENY_IMG)
693
+ && strncasecmp(cursor(f)+1, "MG", 2) == 0
694
+ && !isthisalnum(f,4) )
695
+ return 1;
696
+ return 0;
697
+ }
698
+
699
+
700
+
701
+ /* a < may be just a regular character, the start of an embedded html
702
+ * tag, or the start of an <automatic link>. If it's an automatic
703
+ * link, we also need to know if it's an email address because if it
704
+ * is we need to mangle it in our futile attempt to cut down on the
705
+ * spaminess of the rendered page.
706
+ */
707
+ static int
708
+ maybe_tag_or_link(MMIOT *f)
709
+ {
710
+ char *text;
711
+ int c, size, i;
712
+ int maybetag=1, maybeaddress=0;
713
+ int mailto;
714
+
715
+ if ( f->flags & INSIDE_TAG )
716
+ return 0;
717
+
718
+ for ( size=0; ((c = peek(f,size+1)) != '>') && !isspace(c); size++ ) {
719
+ if ( ! (c == '/' || isalnum(c) || c == '~') )
720
+ maybetag=0;
721
+ if ( c == '@' )
722
+ maybeaddress=1;
723
+ else if ( c == EOF )
724
+ return 0;
725
+ }
726
+
727
+ if ( size == 0 )
728
+ return 0;
729
+
730
+ if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
731
+ Qstring(forbidden_tag(f) ? "&lt;" : "<", f);
732
+ while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
733
+ cputc(pull(f), f);
734
+ return 1;
735
+ }
736
+
737
+ if ( f->flags & DENY_A ) return 0;
738
+
739
+ text = cursor(f);
740
+ shift(f, size+1);
741
+
742
+ for ( i=0; i < SZAUTOPREFIX; i++ )
743
+ if ( strncasecmp(text, autoprefix[i], strlen(autoprefix[i])) == 0 ) {
744
+ Qstring("<a href=\"", f);
745
+ puturl(text,size,f);
746
+ Qstring("\">", f);
747
+ puturl(text,size,f);
748
+ Qstring("</a>", f);
749
+ return 1;
750
+ }
751
+ if ( maybeaddress ) {
752
+
753
+ Qstring("<a href=\"", f);
754
+ if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 )
755
+ mailto = 7;
756
+ else {
757
+ mailto = 0;
758
+ /* supply a mailto: protocol if one wasn't attached */
759
+ mangle("mailto:", 7, f);
760
+ }
761
+
762
+ mangle(text, size, f);
763
+ Qstring("\">", f);
764
+ mangle(text+mailto, size-mailto, f);
765
+ Qstring("</a>", f);
766
+ return 1;
767
+ }
768
+
769
+ shift(f, -(size+1));
770
+ return 0;
771
+ } /* maybe_tag_or_link */
772
+
773
+
774
+ /* smartyquote code that's common for single and double quotes
775
+ */
776
+ static int
777
+ smartyquote(int *flags, char typeofquote, MMIOT *f)
778
+ {
779
+ int bit = (typeofquote == 's') ? 0x01 : 0x02;
780
+
781
+ if ( bit & (*flags) ) {
782
+ if ( isthisnonword(f,1) ) {
783
+ Qprintf(f, "&r%cquo;", typeofquote);
784
+ (*flags) &= ~bit;
785
+ return 1;
786
+ }
787
+ }
788
+ else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) {
789
+ Qprintf(f, "&l%cquo;", typeofquote);
790
+ (*flags) |= bit;
791
+ return 1;
792
+ }
793
+ return 0;
794
+ }
795
+
796
+
797
+ static int
798
+ islike(MMIOT *f, char *s)
799
+ {
800
+ int len;
801
+ int i;
802
+
803
+ if ( s[0] == '<' ) {
804
+ if ( !isthisnonword(f, -1) )
805
+ return 0;
806
+ ++s;
807
+ }
808
+
809
+ if ( !(len = strlen(s)) )
810
+ return 0;
811
+
812
+ if ( s[len-1] == '>' ) {
813
+ if ( !isthisnonword(f,len-1) )
814
+ return 0;
815
+ len--;
816
+ }
817
+
818
+ for (i=1; i < len; i++)
819
+ if (tolower(peek(f,i)) != s[i])
820
+ return 0;
821
+ return 1;
822
+ }
823
+
824
+
825
+ static struct smarties {
826
+ char c0;
827
+ char *pat;
828
+ char *entity;
829
+ int shift;
830
+ } smarties[] = {
831
+ { '\'', "'s>", "rsquo", 0 },
832
+ { '\'', "'t>", "rsquo", 0 },
833
+ { '\'', "'re>", "rsquo", 0 },
834
+ { '\'', "'ll>", "rsquo", 0 },
835
+ { '-', "--", "mdash", 1 },
836
+ { '-', "<->", "ndash", 0 },
837
+ { '.', "...", "hellip", 2 },
838
+ { '.', ". . .", "hellip", 4 },
839
+ { '(', "(c)", "copy", 2 },
840
+ { '(', "(r)", "reg", 2 },
841
+ { '(', "(tm)", "trade", 3 },
842
+ { '3', "<3/4>", "frac34", 2 },
843
+ { '3', "<3/4ths>", "frac34", 2 },
844
+ { '1', "<1/2>", "frac12", 2 },
845
+ { '1', "<1/4>", "frac14", 2 },
846
+ { '1', "<1/4th>", "frac14", 2 },
847
+ { '&', "&#0;", 0, 3 },
848
+ } ;
849
+ #define NRSMART ( sizeof smarties / sizeof smarties[0] )
850
+
851
+
852
+ /* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm)
853
+ */
854
+ static int
855
+ smartypants(int c, int *flags, MMIOT *f)
856
+ {
857
+ int i;
858
+
859
+ if ( f->flags & DENY_SMARTY )
860
+ return 0;
861
+
862
+ for ( i=0; i < NRSMART; i++)
863
+ if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) {
864
+ if ( smarties[i].entity )
865
+ Qprintf(f, "&%s;", smarties[i].entity);
866
+ shift(f, smarties[i].shift);
867
+ return 1;
868
+ }
869
+
870
+ switch (c) {
871
+ case '<' : return 0;
872
+ case '\'': if ( smartyquote(flags, 's', f) ) return 1;
873
+ break;
874
+
875
+ case '"': if ( smartyquote(flags, 'd', f) ) return 1;
876
+ break;
877
+
878
+ case '`': if ( peek(f, 1) == '`' ) {
879
+ int j = 2;
880
+
881
+ while ( (c=peek(f,j)) != EOF ) {
882
+ if ( c == '\\' )
883
+ j += 2;
884
+ else if ( c == '`' )
885
+ break;
886
+ else if ( c == '\'' && peek(f, j+1) == '\'' ) {
887
+ Qstring("&ldquo;", f);
888
+ reparse(cursor(f)+1, j-2, 0, f);
889
+ Qstring("&rdquo;", f);
890
+ shift(f,j+1);
891
+ return 1;
892
+ }
893
+ else ++j;
894
+ }
895
+
896
+ }
897
+ break;
898
+ }
899
+ return 0;
900
+ } /* smartypants */
901
+
902
+
903
+ #define tag_text(f) (f->flags & INSIDE_TAG)
904
+
905
+
906
+ static void
907
+ text(MMIOT *f)
908
+ {
909
+ int c, j;
910
+ int rep;
911
+ int smartyflags = 0;
912
+
913
+ while ( (c = pull(f)) != EOF ) {
914
+ if ( smartypants(c, &smartyflags, f) )
915
+ continue;
916
+ switch (c) {
917
+ case 0: break;
918
+
919
+ case '>': if ( tag_text(f) )
920
+ Qstring("&gt;", f);
921
+ else
922
+ Qchar(c, f);
923
+ break;
924
+
925
+ case '"': if ( tag_text(f) )
926
+ Qstring("&quot;", f);
927
+ else
928
+ Qchar(c, f);
929
+ break;
930
+
931
+ case '!': if ( peek(f,1) == '[' ) {
932
+ pull(f);
933
+ if ( tag_text(f) || !linkylinky(1, f) )
934
+ Qstring("![", f);
935
+ }
936
+ else
937
+ Qchar(c, f);
938
+ break;
939
+ case '[': if ( tag_text(f) || !linkylinky(0, f) )
940
+ Qchar(c, f);
941
+ break;
942
+ #if SUPERSCRIPT
943
+ /* A^B -> A<sup>B</sup> */
944
+ case '^': if ( (f->flags & (STRICT|INSIDE_TAG)) || isthisspace(f,-1) || isthisspace(f,1) )
945
+ Qchar(c,f);
946
+ else {
947
+ char *sup = cursor(f);
948
+ int len = 0;
949
+ Qstring("<sup>",f);
950
+ while ( !isthisspace(f,1+len) ) {
951
+ ++len;
952
+ }
953
+ shift(f,len);
954
+ reparse(sup, len, 0, f);
955
+ Qstring("</sup>", f);
956
+ }
957
+ break;
958
+ #endif
959
+ case '_':
960
+ #if RELAXED_EMPHASIS
961
+ /* Underscores don't count if they're in the middle of a word */
962
+ if ( (!(f->flags & STRICT))
963
+ && ((isthisspace(f,-1) && isthisspace(f,1))
964
+ || (isthisalnum(f,-1) && isthisalnum(f,1))) ){
965
+ Qchar(c, f);
966
+ break;
967
+ }
968
+ /* else fall into the regular old emphasis case */
969
+ #endif
970
+ case '*': if ( tag_text(f) )
971
+ Qchar(c, f);
972
+ else {
973
+ for (rep = 1; peek(f,1) == c; pull(f) )
974
+ ++rep;
975
+ Qem(f,c,rep);
976
+ }
977
+ break;
978
+
979
+ case '`': if ( tag_text(f) || !iscodeblock(f) )
980
+ Qchar(c, f);
981
+ else {
982
+ Qstring("<code>", f);
983
+ if ( peek(f, 1) == '`' ) {
984
+ pull(f);
985
+ code(2, f);
986
+ }
987
+ else
988
+ code(1, f);
989
+ Qstring("</code>", f);
990
+ }
991
+ break;
992
+
993
+ case '\\': switch ( c = pull(f) ) {
994
+ case '&': Qstring("&amp;", f);
995
+ break;
996
+ case '<': Qstring("&lt;", f);
997
+ break;
998
+ case '\\':
999
+ case '>': case '#': case '.': case '-':
1000
+ case '+': case '{': case '}': case ']':
1001
+ case '(': case ')': case '"': case '\'':
1002
+ case '!': case '[': case '*': case '_':
1003
+ case '`': Qchar(c, f);
1004
+ break;
1005
+ default:
1006
+ Qchar('\\', f);
1007
+ if ( c != EOF )
1008
+ shift(f,-1);
1009
+ break;
1010
+ }
1011
+ break;
1012
+
1013
+ case '<': if ( !maybe_tag_or_link(f) )
1014
+ Qstring("&lt;", f);
1015
+ break;
1016
+
1017
+ case '&': j = (peek(f,1) == '#' ) ? 2 : 1;
1018
+ while ( isthisalnum(f,j) )
1019
+ ++j;
1020
+
1021
+ if ( peek(f,j) != ';' )
1022
+ Qstring("&amp;", f);
1023
+ else
1024
+ Qchar(c, f);
1025
+ break;
1026
+
1027
+ default: Qchar(c, f);
1028
+ break;
1029
+ }
1030
+ }
1031
+ /* truncate the input string after we've finished processing it */
1032
+ S(f->in) = f->isp = 0;
1033
+ } /* text */
1034
+
1035
+
1036
+ static int
1037
+ iscodeblock(MMIOT *f)
1038
+ {
1039
+ int i=1, single = 1, c;
1040
+
1041
+ if ( peek(f,i) == '`' ) {
1042
+ single=0;
1043
+ i++;
1044
+ }
1045
+ while ( (c=peek(f,i)) != EOF ) {
1046
+ if ( (c == '`') && (single || peek(f,i+1) == '`') )
1047
+ return 1;
1048
+ else if ( c == '\\' )
1049
+ i++;
1050
+ i++;
1051
+ }
1052
+ return 0;
1053
+
1054
+ }
1055
+
1056
+ static int
1057
+ endofcode(int escape, int offset, MMIOT *f)
1058
+ {
1059
+ switch (escape) {
1060
+ case 2: if ( peek(f, offset+1) == '`' ) {
1061
+ shift(f,1);
1062
+ case 1: shift(f,offset);
1063
+ return 1;
1064
+ }
1065
+ default:return 0;
1066
+ }
1067
+ }
1068
+
1069
+
1070
+ /* the only characters that have special meaning in a code block are
1071
+ * `<' and `&' , which are /always/ expanded to &lt; and &amp;
1072
+ */
1073
+ static void
1074
+ code(int escape, MMIOT *f)
1075
+ {
1076
+ int c;
1077
+
1078
+ if ( escape && (peek(f,1) == ' ') )
1079
+ shift(f,1);
1080
+
1081
+ while ( (c = pull(f)) != EOF ) {
1082
+ switch (c) {
1083
+ case ' ': if ( peek(f,1) == '`' && endofcode(escape, 1, f) )
1084
+ return;
1085
+ Qchar(c, f);
1086
+ break;
1087
+
1088
+ case '`': if ( endofcode(escape, 0, f) )
1089
+ return;
1090
+ Qchar(c, f);
1091
+ break;
1092
+
1093
+ case '\\': cputc(c, f);
1094
+ if ( peek(f,1) == '>' || (c = pull(f)) == EOF )
1095
+ break;
1096
+
1097
+ default: cputc(c, f);
1098
+ break;
1099
+ }
1100
+ }
1101
+ } /* code */
1102
+
1103
+
1104
+ /* print a header block
1105
+ */
1106
+ static void
1107
+ printheader(Paragraph *pp, MMIOT *f)
1108
+ {
1109
+ Qprintf(f, "<h%d", pp->hnumber);
1110
+ if ( f->flags & TOC ) {
1111
+ Qprintf(f, " id=\"", pp->hnumber);
1112
+ mkd_string_to_anchor(T(pp->text->text), S(pp->text->text), Qchar, f);
1113
+ Qchar('"', f);
1114
+ }
1115
+ Qchar('>', f);
1116
+ push(T(pp->text->text), S(pp->text->text), f);
1117
+ text(f);
1118
+ Qprintf(f, "</h%d>", pp->hnumber);
1119
+ }
1120
+
1121
+
1122
+ static int
1123
+ printblock(Paragraph *pp, MMIOT *f)
1124
+ {
1125
+ Line *t = pp->text;
1126
+ static char *Begin[] = { "", "<p>", "<center>" };
1127
+ static char *End[] = { "", "</p>","</center>" };
1128
+
1129
+ while (t) {
1130
+ if ( S(t->text) ) {
1131
+ if ( S(t->text) > 2 && T(t->text)[S(t->text)-2] == ' '
1132
+ && T(t->text)[S(t->text)-1] == ' ') {
1133
+ push(T(t->text), S(t->text)-2, f);
1134
+ push("<br/>\n", 6, f);
1135
+ }
1136
+ else {
1137
+ push(T(t->text), S(t->text), f);
1138
+ if ( t->next )
1139
+ push("\n", 1, f);
1140
+ }
1141
+ }
1142
+ t = t->next;
1143
+ }
1144
+ Qstring(Begin[pp->align], f);
1145
+ text(f);
1146
+ Qstring(End[pp->align], f);
1147
+ return 1;
1148
+ }
1149
+
1150
+
1151
+ static void
1152
+ printcode(Line *t, MMIOT *f)
1153
+ {
1154
+ int blanks;
1155
+
1156
+ for ( blanks = 0; t ; t = t->next )
1157
+ if ( S(t->text) > t->dle ) {
1158
+ while ( blanks ) {
1159
+ push("\n", 1, f);
1160
+ --blanks;
1161
+ }
1162
+ push(T(t->text), S(t->text), f);
1163
+ push("\n", 1, f);
1164
+ }
1165
+ else blanks++;
1166
+
1167
+ Qstring("<pre><code>", f);
1168
+ code(0, f);
1169
+ Qstring("</code></pre>", f);
1170
+ }
1171
+
1172
+
1173
+ static void
1174
+ printhtml(Line *t, MMIOT *f)
1175
+ {
1176
+ int blanks;
1177
+
1178
+ for ( blanks=0; t ; t = t->next )
1179
+ if ( S(t->text) ) {
1180
+ for ( ; blanks; --blanks )
1181
+ Qchar('\n', f);
1182
+
1183
+ Qwrite(T(t->text), S(t->text), f);
1184
+ Qchar('\n', f);
1185
+ }
1186
+ else
1187
+ blanks++;
1188
+ }
1189
+
1190
+
1191
+ static void
1192
+ htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f)
1193
+ {
1194
+ emblock(f);
1195
+ if ( block )
1196
+ Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments);
1197
+ emblock(f);
1198
+
1199
+ while (( p = display(p, f) )) {
1200
+ emblock(f);
1201
+ Qstring("\n\n", f);
1202
+ }
1203
+
1204
+ if ( block )
1205
+ Qprintf(f, "</%s>", block);
1206
+ emblock(f);
1207
+ }
1208
+
1209
+
1210
+ #if DL_TAG_EXTENSION
1211
+ static void
1212
+ definitionlist(Paragraph *p, MMIOT *f)
1213
+ {
1214
+ Line *tag;
1215
+
1216
+ if ( p ) {
1217
+ Qstring("<dl>\n", f);
1218
+
1219
+ for ( ; p ; p = p->next) {
1220
+ for ( tag = p->text; tag; tag = tag->next ) {
1221
+ Qstring("<dt>", f);
1222
+ reparse(T(tag->text), S(tag->text), 0, f);
1223
+ Qstring("</dt>\n", f);
1224
+ }
1225
+
1226
+ htmlify(p->down, "dd", p->ident, f);
1227
+ }
1228
+
1229
+ Qstring("</dl>", f);
1230
+ }
1231
+ }
1232
+ #endif
1233
+
1234
+
1235
+ static void
1236
+ listdisplay(int typ, Paragraph *p, MMIOT* f)
1237
+ {
1238
+ if ( p ) {
1239
+ Qprintf(f, "<%cl", (typ==UL)?'u':'o');
1240
+ if ( typ == AL )
1241
+ Qprintf(f, " type=a");
1242
+ Qprintf(f, ">\n");
1243
+
1244
+ for ( ; p ; p = p->next ) {
1245
+ htmlify(p->down, "li", p->ident, f);
1246
+ Qchar('\n', f);
1247
+ }
1248
+
1249
+ Qprintf(f, "</%cl>\n", (typ==UL)?'u':'o');
1250
+ }
1251
+ }
1252
+
1253
+
1254
+ /* dump out a Paragraph in the desired manner
1255
+ */
1256
+ static Paragraph*
1257
+ display(Paragraph *p, MMIOT *f)
1258
+ {
1259
+ if ( !p ) return 0;
1260
+
1261
+ switch ( p->typ ) {
1262
+ case STYLE:
1263
+ case WHITESPACE:
1264
+ break;
1265
+
1266
+ case HTML:
1267
+ printhtml(p->text, f);
1268
+ break;
1269
+
1270
+ case CODE:
1271
+ printcode(p->text, f);
1272
+ break;
1273
+
1274
+ case QUOTE:
1275
+ htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f);
1276
+ break;
1277
+
1278
+ case UL:
1279
+ case OL:
1280
+ case AL:
1281
+ listdisplay(p->typ, p->down, f);
1282
+ break;
1283
+
1284
+ #if DL_TAG_EXTENSION
1285
+ case DL:
1286
+ definitionlist(p->down, f);
1287
+ break;
1288
+ #endif
1289
+
1290
+ case HR:
1291
+ Qstring("<hr />", f);
1292
+ break;
1293
+
1294
+ case HDR:
1295
+ printheader(p, f);
1296
+ break;
1297
+
1298
+ default:
1299
+ printblock(p, f);
1300
+ break;
1301
+ }
1302
+ return p->next;
1303
+ }
1304
+
1305
+
1306
+ /*
1307
+ * dump out stylesheet sections.
1308
+ */
1309
+ static int
1310
+ stylesheets(Paragraph *p, FILE *f)
1311
+ {
1312
+ Line* q;
1313
+
1314
+ for ( ; p ; p = p->next ) {
1315
+ if ( p->typ == STYLE ) {
1316
+ for ( q = p->text; q ; q = q->next )
1317
+ if ( fwrite(T(q->text), S(q->text), 1, f) == 1 )
1318
+ putc('\n', f);
1319
+ else
1320
+ return EOF;
1321
+ }
1322
+ if ( p->down && (stylesheets(p->down, f) == EOF) )
1323
+ return EOF;
1324
+ }
1325
+ return 0;
1326
+ }
1327
+
1328
+
1329
+ /* return a pointer to the compiled markdown
1330
+ * document.
1331
+ */
1332
+ int
1333
+ mkd_document(Document *p, char **res)
1334
+ {
1335
+ if ( p && p->compiled ) {
1336
+ if ( ! p->html ) {
1337
+ htmlify(p->code, 0, 0, p->ctx);
1338
+ p->html = 1;
1339
+ }
1340
+
1341
+ *res = T(p->ctx->out);
1342
+ return S(p->ctx->out);
1343
+ }
1344
+ return EOF;
1345
+ }
1346
+
1347
+
1348
+ /* public interface for reparse()
1349
+ */
1350
+ int
1351
+ mkd_text(char *bfr, int size, FILE *output, int flags)
1352
+ {
1353
+ MMIOT f;
1354
+
1355
+ ___mkd_initmmiot(&f, 0);
1356
+ f.flags = flags & USER_FLAGS;
1357
+
1358
+ reparse(bfr, size, 0, &f);
1359
+ emblock(&f);
1360
+ if ( flags & CDATA_OUTPUT )
1361
+ ___mkd_xml(T(f.out), S(f.out), output);
1362
+ else
1363
+ fwrite(T(f.out), S(f.out), 1, output);
1364
+
1365
+ ___mkd_freemmiot(&f, 0);
1366
+ return 0;
1367
+ }
1368
+
1369
+
1370
+ /* dump any embedded styles
1371
+ */
1372
+ int
1373
+ mkd_style(Document *d, FILE *f)
1374
+ {
1375
+ if ( d && d->compiled )
1376
+ return stylesheets(d->code, f);
1377
+ return EOF;
1378
+ }
1379
+