bluecloth 2.0.6.pre120-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. data/ChangeLog +363 -0
  2. data/LICENSE +27 -0
  3. data/LICENSE.discount +47 -0
  4. data/README +81 -0
  5. data/Rakefile +349 -0
  6. data/Rakefile.local +43 -0
  7. data/bin/bluecloth +84 -0
  8. data/ext/Csio.c +61 -0
  9. data/ext/VERSION +1 -0
  10. data/ext/amalloc.h +29 -0
  11. data/ext/bluecloth.c +391 -0
  12. data/ext/bluecloth.h +19 -0
  13. data/ext/config.h +55 -0
  14. data/ext/css.c +76 -0
  15. data/ext/cstring.h +75 -0
  16. data/ext/docheader.c +43 -0
  17. data/ext/extconf.rb +52 -0
  18. data/ext/generate.c +1602 -0
  19. data/ext/markdown.c +1078 -0
  20. data/ext/markdown.h +146 -0
  21. data/ext/mkdio.c +303 -0
  22. data/ext/mkdio.h +79 -0
  23. data/ext/resource.c +155 -0
  24. data/ext/version.c +28 -0
  25. data/ext/xml.c +82 -0
  26. data/ext/xmlpage.c +48 -0
  27. data/lib/1.8/bluecloth_ext.so +0 -0
  28. data/lib/1.9/bluecloth_ext.so +0 -0
  29. data/lib/bluecloth.rb +164 -0
  30. data/rake/191_compat.rb +26 -0
  31. data/rake/dependencies.rb +76 -0
  32. data/rake/helpers.rb +434 -0
  33. data/rake/hg.rb +273 -0
  34. data/rake/manual.rb +782 -0
  35. data/rake/packaging.rb +126 -0
  36. data/rake/publishing.rb +269 -0
  37. data/rake/rdoc.rb +30 -0
  38. data/rake/style.rb +62 -0
  39. data/rake/svn.rb +668 -0
  40. data/rake/testing.rb +187 -0
  41. data/rake/verifytask.rb +64 -0
  42. data/rake/win32.rb +190 -0
  43. data/spec/bluecloth/101_changes_spec.rb +141 -0
  44. data/spec/bluecloth/autolinks_spec.rb +49 -0
  45. data/spec/bluecloth/blockquotes_spec.rb +145 -0
  46. data/spec/bluecloth/code_spans_spec.rb +164 -0
  47. data/spec/bluecloth/emphasis_spec.rb +164 -0
  48. data/spec/bluecloth/entities_spec.rb +65 -0
  49. data/spec/bluecloth/hrules_spec.rb +90 -0
  50. data/spec/bluecloth/images_spec.rb +92 -0
  51. data/spec/bluecloth/inline_html_spec.rb +238 -0
  52. data/spec/bluecloth/links_spec.rb +171 -0
  53. data/spec/bluecloth/lists_spec.rb +294 -0
  54. data/spec/bluecloth/paragraphs_spec.rb +75 -0
  55. data/spec/bluecloth/titles_spec.rb +305 -0
  56. data/spec/bluecloth_spec.rb +281 -0
  57. data/spec/bugfix_spec.rb +172 -0
  58. data/spec/contributions_spec.rb +85 -0
  59. data/spec/data/antsugar.txt +34 -0
  60. data/spec/data/markdowntest/Amps and angle encoding.html +17 -0
  61. data/spec/data/markdowntest/Amps and angle encoding.text +21 -0
  62. data/spec/data/markdowntest/Auto links.html +18 -0
  63. data/spec/data/markdowntest/Auto links.text +13 -0
  64. data/spec/data/markdowntest/Backslash escapes.html +118 -0
  65. data/spec/data/markdowntest/Backslash escapes.text +120 -0
  66. data/spec/data/markdowntest/Blockquotes with code blocks.html +15 -0
  67. data/spec/data/markdowntest/Blockquotes with code blocks.text +11 -0
  68. data/spec/data/markdowntest/Code Blocks.html +18 -0
  69. data/spec/data/markdowntest/Code Blocks.text +14 -0
  70. data/spec/data/markdowntest/Code Spans.html +5 -0
  71. data/spec/data/markdowntest/Code Spans.text +5 -0
  72. data/spec/data/markdowntest/Hard-wrapped paragraphs with list-like lines.html +8 -0
  73. data/spec/data/markdowntest/Hard-wrapped paragraphs with list-like lines.text +8 -0
  74. data/spec/data/markdowntest/Horizontal rules.html +71 -0
  75. data/spec/data/markdowntest/Horizontal rules.text +67 -0
  76. data/spec/data/markdowntest/Inline HTML (Advanced).html +15 -0
  77. data/spec/data/markdowntest/Inline HTML (Advanced).text +15 -0
  78. data/spec/data/markdowntest/Inline HTML (Simple).html +72 -0
  79. data/spec/data/markdowntest/Inline HTML (Simple).text +69 -0
  80. data/spec/data/markdowntest/Inline HTML comments.html +13 -0
  81. data/spec/data/markdowntest/Inline HTML comments.text +13 -0
  82. data/spec/data/markdowntest/Links, inline style.html +11 -0
  83. data/spec/data/markdowntest/Links, inline style.text +12 -0
  84. data/spec/data/markdowntest/Links, reference style.html +52 -0
  85. data/spec/data/markdowntest/Links, reference style.text +71 -0
  86. data/spec/data/markdowntest/Links, shortcut references.html +9 -0
  87. data/spec/data/markdowntest/Links, shortcut references.text +20 -0
  88. data/spec/data/markdowntest/Literal quotes in titles.html +3 -0
  89. data/spec/data/markdowntest/Literal quotes in titles.text +7 -0
  90. data/spec/data/markdowntest/Markdown Documentation - Basics.html +314 -0
  91. data/spec/data/markdowntest/Markdown Documentation - Basics.text +306 -0
  92. data/spec/data/markdowntest/Markdown Documentation - Syntax.html +942 -0
  93. data/spec/data/markdowntest/Markdown Documentation - Syntax.text +888 -0
  94. data/spec/data/markdowntest/Nested blockquotes.html +9 -0
  95. data/spec/data/markdowntest/Nested blockquotes.text +5 -0
  96. data/spec/data/markdowntest/Ordered and unordered lists.html +148 -0
  97. data/spec/data/markdowntest/Ordered and unordered lists.text +131 -0
  98. data/spec/data/markdowntest/Strong and em together.html +7 -0
  99. data/spec/data/markdowntest/Strong and em together.text +7 -0
  100. data/spec/data/markdowntest/Tabs.html +25 -0
  101. data/spec/data/markdowntest/Tabs.text +21 -0
  102. data/spec/data/markdowntest/Tidyness.html +8 -0
  103. data/spec/data/markdowntest/Tidyness.text +5 -0
  104. data/spec/data/ml-announce.txt +17 -0
  105. data/spec/data/re-overflow.txt +67 -0
  106. data/spec/data/re-overflow2.txt +281 -0
  107. data/spec/discount_spec.rb +184 -0
  108. data/spec/lib/constants.rb +5 -0
  109. data/spec/lib/helpers.rb +137 -0
  110. data/spec/lib/matchers.rb +235 -0
  111. data/spec/markdowntest_spec.rb +79 -0
  112. metadata +205 -0
data/ext/markdown.c ADDED
@@ -0,0 +1,1078 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include <stdio.h>
8
+ #include <string.h>
9
+ #include <stdarg.h>
10
+ #include <stdlib.h>
11
+ #include <time.h>
12
+ #include <ctype.h>
13
+
14
+ #include "config.h"
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+
20
+ /* block-level tags for passing html blocks through the blender
21
+ */
22
+ struct kw {
23
+ char *id;
24
+ int size;
25
+ int selfclose;
26
+ } ;
27
+
28
+ #define KW(x) { x, sizeof(x)-1, 0 }
29
+ #define SC(x) { x, sizeof(x)-1, 1 }
30
+
31
+ static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
32
+ KW("ADDRESS"), KW("BDO"), KW("BLOCKQUOTE"),
33
+ KW("CENTER"), KW("DFN"), KW("DIV"), KW("H1"),
34
+ KW("H2"), KW("H3"), KW("H4"), KW("H5"),
35
+ KW("H6"), KW("LISTING"), KW("NOBR"),
36
+ KW("UL"), KW("P"), KW("OL"), KW("DL"),
37
+ KW("PLAINTEXT"), KW("PRE"), KW("TABLE"),
38
+ KW("WBR"), KW("XMP"), SC("HR"), SC("BR"),
39
+ KW("IFRAME"), KW("MAP") };
40
+ #define SZTAGS (sizeof blocktags / sizeof blocktags[0])
41
+ #define MAXTAG 11 /* sizeof "BLOCKQUOTE" */
42
+
43
+ typedef int (*stfu)(const void*,const void*);
44
+
45
+ typedef ANCHOR(Paragraph) ParagraphRoot;
46
+
47
+
48
+ /* case insensitive string sort (for qsort() and bsearch() of block tags)
49
+ */
50
+ static int
51
+ casort(struct kw *a, struct kw *b)
52
+ {
53
+ if ( a->size != b->size )
54
+ return a->size - b->size;
55
+ return strncasecmp(a->id, b->id, b->size);
56
+ }
57
+
58
+
59
+ /* case insensitive string sort for Footnote tags.
60
+ */
61
+ int
62
+ __mkd_footsort(Footnote *a, Footnote *b)
63
+ {
64
+ int i;
65
+ char ac, bc;
66
+
67
+ if ( S(a->tag) != S(b->tag) )
68
+ return S(a->tag) - S(b->tag);
69
+
70
+ for ( i=0; i < S(a->tag); i++) {
71
+ ac = tolower(T(a->tag)[i]);
72
+ bc = tolower(T(b->tag)[i]);
73
+
74
+ if ( isspace(ac) && isspace(bc) )
75
+ continue;
76
+ if ( ac != bc )
77
+ return ac - bc;
78
+ }
79
+ return 0;
80
+ }
81
+
82
+
83
+ /* find the first blank character after position <i>
84
+ */
85
+ static int
86
+ nextblank(Line *t, int i)
87
+ {
88
+ while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
89
+ ++i;
90
+ return i;
91
+ }
92
+
93
+
94
+ /* find the next nonblank character after position <i>
95
+ */
96
+ static int
97
+ nextnonblank(Line *t, int i)
98
+ {
99
+ while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
100
+ ++i;
101
+ return i;
102
+ }
103
+
104
+
105
+ /* find the first nonblank character on the Line.
106
+ */
107
+ int
108
+ mkd_firstnonblank(Line *p)
109
+ {
110
+ return nextnonblank(p,0);
111
+ }
112
+
113
+
114
+ static int
115
+ blankline(Line *p)
116
+ {
117
+ return ! (p && (S(p->text) > p->dle) );
118
+ }
119
+
120
+
121
+ static Line *
122
+ skipempty(Line *p)
123
+ {
124
+ while ( p && (p->dle == S(p->text)) )
125
+ p = p->next;
126
+ return p;
127
+ }
128
+
129
+
130
+ void
131
+ ___mkd_tidy(Cstring *t)
132
+ {
133
+ while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
134
+ --S(*t);
135
+ }
136
+
137
+
138
+ static struct kw *
139
+ isopentag(Line *p)
140
+ {
141
+ int i=0, len;
142
+ struct kw key, *ret;
143
+
144
+ if ( !p ) return 0;
145
+
146
+ len = S(p->text);
147
+
148
+ if ( len < 3 || T(p->text)[0] != '<' )
149
+ return 0;
150
+
151
+ /* find how long the tag is so we can check to see if
152
+ * it's a block-level tag
153
+ */
154
+ for ( i=1; i < len && T(p->text)[i] != '>'
155
+ && T(p->text)[i] != '/'
156
+ && !isspace(T(p->text)[i]); ++i )
157
+ ;
158
+
159
+ key.id = T(p->text)+1;
160
+ key.size = i-1;
161
+
162
+ if ( ret = bsearch(&key, blocktags, SZTAGS, sizeof key, (stfu)casort))
163
+ return ret;
164
+
165
+ return 0;
166
+ }
167
+
168
+
169
+ typedef struct _flo {
170
+ Line *t;
171
+ int i;
172
+ } FLO;
173
+
174
+
175
+ static int
176
+ flogetc(FLO *f)
177
+ {
178
+ if ( f && f->t ) {
179
+ if ( f->i < S(f->t->text) )
180
+ return T(f->t->text)[f->i++];
181
+ f->t = f->t->next;
182
+ f->i = 0;
183
+ return flogetc(f);
184
+ }
185
+ return EOF;
186
+ }
187
+
188
+
189
+ static Line *
190
+ htmlblock(Paragraph *p, struct kw *tag)
191
+ {
192
+ Line *ret;
193
+ FLO f = { p->text, 0 };
194
+ int c;
195
+ int i, closing, depth=0;
196
+
197
+ if ( tag->selfclose || (tag->size >= MAXTAG) ) {
198
+ ret = f.t->next;
199
+ f.t->next = 0;
200
+ return ret;
201
+ }
202
+
203
+ while ( (c = flogetc(&f)) != EOF ) {
204
+ if ( c == '<' ) {
205
+ /* tag? */
206
+ c = flogetc(&f);
207
+ if ( c == '!' ) { /* comment? */
208
+ if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
209
+ /* yes */
210
+ while ( (c = flogetc(&f)) != EOF ) {
211
+ if ( c == '-' && flogetc(&f) == '-'
212
+ && flogetc(&f) == '>')
213
+ /* consumed whole comment */
214
+ break;
215
+ }
216
+ }
217
+ }
218
+ else {
219
+ if ( closing = (c == '/') ) c = flogetc(&f);
220
+
221
+ for ( i=0; i < tag->size; c=flogetc(&f) ) {
222
+ if ( tag->id[i++] != toupper(c) )
223
+ break;
224
+ }
225
+
226
+ if ( (i == tag->size) && !isalnum(c) ) {
227
+ depth = depth + (closing ? -1 : 1);
228
+ if ( depth == 0 ) {
229
+ while ( c != EOF && c != '>' ) {
230
+ /* consume trailing gunk in close tag */
231
+ c = flogetc(&f);
232
+ }
233
+ if ( !f.t )
234
+ return 0;
235
+ ret = f.t->next;
236
+ f.t->next = 0;
237
+ return ret;
238
+ }
239
+ }
240
+ }
241
+ }
242
+ }
243
+ return 0;
244
+ }
245
+
246
+
247
+ static Line *
248
+ comment(Paragraph *p)
249
+ {
250
+ Line *t, *ret;
251
+
252
+ for ( t = p->text; t ; t = t->next) {
253
+ if ( strstr(T(t->text), "-->") ) {
254
+ ret = t->next;
255
+ t->next = 0;
256
+ return ret;
257
+ }
258
+ }
259
+ return t;
260
+
261
+ }
262
+
263
+
264
+ /* tables look like
265
+ * header|header{|header}
266
+ * ------|------{|......}
267
+ * {body lines}
268
+ */
269
+ static int
270
+ istable(Line *t)
271
+ {
272
+ char *p;
273
+ Line *dashes = t->next;
274
+ int contains = 0; /* found character bits; 0x01 is |, 0x02 is - */
275
+
276
+ /* two lines, first must contain | */
277
+ if ( !(dashes && memchr(T(t->text), '|', S(t->text))) )
278
+ return 0;
279
+
280
+ /* second line must contain - or | and nothing
281
+ * else except for whitespace or :
282
+ */
283
+ for ( p = T(dashes->text)+S(dashes->text)-1; p >= T(dashes->text); --p)
284
+ if ( *p == '|' )
285
+ contains |= 0x01;
286
+ else if ( *p == '-' )
287
+ contains |= 0x02;
288
+ else if ( ! ((*p == ':') || isspace(*p)) )
289
+ return 0;
290
+
291
+ return (contains & 0x03);
292
+ }
293
+
294
+
295
+ /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
296
+ */
297
+ static int
298
+ isfootnote(Line *t)
299
+ {
300
+ int i;
301
+
302
+ if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
303
+ return 0;
304
+
305
+ for ( ++i; i < S(t->text) ; ++i ) {
306
+ if ( T(t->text)[i] == '[' )
307
+ return 0;
308
+ else if ( T(t->text)[i] == ']' && T(t->text)[i+1] == ':' )
309
+ return 1;
310
+ }
311
+ return 0;
312
+ }
313
+
314
+
315
+ static int
316
+ isquote(Line *t)
317
+ {
318
+ return ( T(t->text)[0] == '>' );
319
+ }
320
+
321
+
322
+ static int
323
+ dashchar(char c)
324
+ {
325
+ return (c == '*') || (c == '-') || (c == '_');
326
+ }
327
+
328
+
329
+ static int
330
+ iscode(Line *t)
331
+ {
332
+ return (t->dle >= 4);
333
+ }
334
+
335
+
336
+ static int
337
+ ishr(Line *t)
338
+ {
339
+ int i, count=0;
340
+ char dash = 0;
341
+ char c;
342
+
343
+ if ( iscode(t) ) return 0;
344
+
345
+ for ( i = 0; i < S(t->text); i++) {
346
+ c = T(t->text)[i];
347
+ if ( (dash == 0) && dashchar(c) )
348
+ dash = c;
349
+
350
+ if ( c == dash ) ++count;
351
+ else if ( !isspace(c) )
352
+ return 0;
353
+ }
354
+ return (count >= 3);
355
+ }
356
+
357
+
358
+ static int
359
+ ishdr(Line *t, int *htyp)
360
+ {
361
+ int i;
362
+
363
+
364
+ /* first check for etx-style ###HEADER###
365
+ */
366
+
367
+ /* leading run of `#`'s ?
368
+ */
369
+ for ( i=0; T(t->text)[i] == '#'; ++i)
370
+ ;
371
+
372
+ /* ANY leading `#`'s make this into an ETX header
373
+ */
374
+ if ( i && (i < S(t->text) || i > 1) ) {
375
+ *htyp = ETX;
376
+ return 1;
377
+ }
378
+
379
+ /* then check for setext-style HEADER
380
+ * ======
381
+ */
382
+
383
+ if ( t->next ) {
384
+ char *q = T(t->next->text);
385
+
386
+ if ( (*q == '=') || (*q == '-') ) {
387
+ for (i=1; i < S(t->next->text); i++)
388
+ if ( q[0] != q[i] )
389
+ return 0;
390
+ *htyp = SETEXT;
391
+ return 1;
392
+ }
393
+ }
394
+ return 0;
395
+ }
396
+
397
+
398
+ static int
399
+ isdefinition(Line *t)
400
+ {
401
+ #if DL_TAG_EXTENSION
402
+ return t && t->next
403
+ && (S(t->text) > 2)
404
+ && (t->dle == 0)
405
+ && (T(t->text)[0] == '=')
406
+ && (T(t->text)[S(t->text)-1] == '=')
407
+ && ( (t->next->dle >= 4) || isdefinition(t->next) );
408
+ #else
409
+ return 0;
410
+ #endif
411
+ }
412
+
413
+
414
+ static int
415
+ islist(Line *t, int *trim)
416
+ {
417
+ int i, j;
418
+ char *q;
419
+
420
+ if ( iscode(t) || blankline(t) || ishdr(t,&i) || ishr(t) )
421
+ return 0;
422
+
423
+ if ( isdefinition(t) ) {
424
+ *trim = 4;
425
+ return DL;
426
+ }
427
+
428
+ if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
429
+ i = nextnonblank(t, t->dle+1);
430
+ *trim = (i > 4) ? 4 : i;
431
+ return UL;
432
+ }
433
+
434
+ if ( (j = nextblank(t,t->dle)) > t->dle ) {
435
+ if ( T(t->text)[j-1] == '.' ) {
436
+ #if ALPHA_LIST
437
+ if ( (j == t->dle + 2) && isalpha(T(t->text)[t->dle]) ) {
438
+ j = nextnonblank(t,j);
439
+ *trim = j;
440
+ return AL;
441
+ }
442
+ #endif
443
+ strtoul(T(t->text)+t->dle, &q, 10);
444
+ if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
445
+ j = nextnonblank(t,j);
446
+ *trim = j;
447
+ return OL;
448
+ }
449
+ }
450
+ }
451
+ return 0;
452
+ }
453
+
454
+
455
+ static Line *
456
+ headerblock(Paragraph *pp, int htyp)
457
+ {
458
+ Line *ret = 0;
459
+ Line *p = pp->text;
460
+ int i, j;
461
+
462
+ switch (htyp) {
463
+ case SETEXT:
464
+ /* p->text is header, p->next->text is -'s or ='s
465
+ */
466
+ pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
467
+
468
+ ret = p->next->next;
469
+ ___mkd_freeLine(p->next);
470
+ p->next = 0;
471
+ break;
472
+
473
+ case ETX:
474
+ /* p->text is ###header###, so we need to trim off
475
+ * the leading and trailing `#`'s
476
+ */
477
+
478
+ for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1); i++)
479
+ ;
480
+
481
+ pp->hnumber = i;
482
+
483
+ while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
484
+ ++i;
485
+
486
+ CLIP(p->text, 0, i);
487
+
488
+ for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
489
+ ;
490
+
491
+ while ( j && isspace(T(p->text)[j-1]) )
492
+ --j;
493
+
494
+ S(p->text) = j;
495
+
496
+ ret = p->next;
497
+ p->next = 0;
498
+ break;
499
+ }
500
+ return ret;
501
+ }
502
+
503
+
504
+ static Line *
505
+ codeblock(Paragraph *p)
506
+ {
507
+ Line *t = p->text, *r;
508
+
509
+ for ( ; t; t = r ) {
510
+ CLIP(t->text,0,4);
511
+ t->dle = mkd_firstnonblank(t);
512
+
513
+ if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
514
+ ___mkd_freeLineRange(t,r);
515
+ t->next = 0;
516
+ return r;
517
+ }
518
+ }
519
+ return t;
520
+ }
521
+
522
+
523
+ static int
524
+ centered(Line *first, Line *last)
525
+ {
526
+
527
+ if ( first&&last ) {
528
+ int len = S(last->text);
529
+
530
+ if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
531
+ && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
532
+ CLIP(first->text, 0, 2);
533
+ S(last->text) -= 2;
534
+ return CENTER;
535
+ }
536
+ }
537
+ return 0;
538
+ }
539
+
540
+
541
+ static int
542
+ endoftextblock(Line *t, int toplevelblock)
543
+ {
544
+ int z;
545
+
546
+ if ( blankline(t)||isquote(t)||iscode(t)||ishdr(t,&z)||ishr(t) )
547
+ return 1;
548
+
549
+ /* HORRIBLE STANDARDS KLUDGE: Toplevel paragraphs eat absorb adjacent
550
+ * list items, but sublevel blocks behave properly.
551
+ */
552
+ return toplevelblock ? 0 : islist(t,&z);
553
+ }
554
+
555
+
556
+ static Line *
557
+ textblock(Paragraph *p, int toplevel)
558
+ {
559
+ Line *t, *next;
560
+
561
+ for ( t = p->text; t ; t = next ) {
562
+ if ( ((next = t->next) == 0) || endoftextblock(next, toplevel) ) {
563
+ p->align = centered(p->text, t);
564
+ t->next = 0;
565
+ return next;
566
+ }
567
+ }
568
+ return t;
569
+ }
570
+
571
+
572
+ /* length of the id: or class: kind in a special div-not-quote block
573
+ */
574
+ static int
575
+ szmarkerclass(char *p)
576
+ {
577
+ if ( strncasecmp(p, "id:", 3) == 0 )
578
+ return 3;
579
+ if ( strncasecmp(p, "class:", 6) == 0 )
580
+ return 6;
581
+ return 0;
582
+ }
583
+
584
+
585
+ /*
586
+ * check if the first line of a quoted block is the special div-not-quote
587
+ * marker %[kind:]name%
588
+ */
589
+ static int
590
+ isdivmarker(Line *p, int start)
591
+ {
592
+ #if DIV_QUOTE
593
+ char *s = T(p->text);
594
+ int len = S(p->text);
595
+ int i;
596
+
597
+ if ( !(len && s[start] == '%' && s[len-1] == '%') ) return 0;
598
+
599
+ i = szmarkerclass(s+start+1)+start;
600
+ len -= start+1;
601
+
602
+ while ( ++i < len )
603
+ if ( !isalnum(s[i]) )
604
+ return 0;
605
+
606
+ return 1;
607
+ #else
608
+ return 0;
609
+ #endif
610
+ }
611
+
612
+
613
+ /*
614
+ * accumulate a blockquote.
615
+ *
616
+ * one sick horrible thing about blockquotes is that even though
617
+ * it just takes ^> to start a quote, following lines, if quoted,
618
+ * assume that the prefix is ``>''. This means that code needs
619
+ * to be indented *5* spaces from the leading '>', but *4* spaces
620
+ * from the start of the line. This does not appear to be
621
+ * documented in the reference implementation, but it's the
622
+ * way the markdown sample web form at Daring Fireball works.
623
+ */
624
+ static Line *
625
+ quoteblock(Paragraph *p)
626
+ {
627
+ Line *t, *q;
628
+ int qp;
629
+
630
+ for ( t = p->text; t ; t = q ) {
631
+ if ( isquote(t) ) {
632
+ qp = (T(t->text)[1] == ' ') ? 2 : 1;
633
+ CLIP(t->text, 0, qp);
634
+ t->dle = mkd_firstnonblank(t);
635
+ }
636
+
637
+ q = skipempty(t->next);
638
+
639
+ if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1))) ) {
640
+ ___mkd_freeLineRange(t, q);
641
+ t = q;
642
+ break;
643
+ }
644
+ }
645
+ if ( isdivmarker(p->text,0) ) {
646
+ char *prefix = "class";
647
+ int i;
648
+
649
+ q = p->text;
650
+ p->text = p->text->next;
651
+
652
+ if ( (i = szmarkerclass(1+T(q->text))) == 3 )
653
+ /* and this would be an "%id:" prefix */
654
+ prefix="id";
655
+
656
+ if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
657
+ sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
658
+ T(q->text)+(i+1) );
659
+
660
+ ___mkd_freeLine(q);
661
+ }
662
+ return t;
663
+ }
664
+
665
+
666
+ /*
667
+ * A table block starts with a table header (see istable()), and continues
668
+ * until EOF or a line that /doesn't/ contain a |.
669
+ */
670
+ static Line *
671
+ tableblock(Paragraph *p)
672
+ {
673
+ Line *t, *q;
674
+
675
+ for ( t = p->text; t && (q = t->next); t = t->next ) {
676
+ if ( !memchr(T(q->text), '|', S(q->text)) ) {
677
+ t->next = 0;
678
+ return q;
679
+ }
680
+ }
681
+ return 0;
682
+ }
683
+
684
+
685
+ static Paragraph *Pp(ParagraphRoot *, Line *, int);
686
+ static Paragraph *compile(Line *, int, MMIOT *);
687
+
688
+
689
+ /*
690
+ * pull in a list block. A list block starts with a list marker and
691
+ * runs until the next list marker, the next non-indented paragraph,
692
+ * or EOF. You do not have to indent nonblank lines after the list
693
+ * marker, but multiple paragraphs need to start with a 4-space indent.
694
+ */
695
+ static Line *
696
+ listitem(Paragraph *p, int indent)
697
+ {
698
+ Line *t, *q;
699
+ int clip = indent;
700
+ int z;
701
+
702
+ for ( t = p->text; t ; t = q) {
703
+ CLIP(t->text, 0, clip);
704
+ t->dle = mkd_firstnonblank(t);
705
+
706
+ if ( (q = skipempty(t->next)) == 0 ) {
707
+ ___mkd_freeLineRange(t,q);
708
+ return 0;
709
+ }
710
+
711
+ /* after a blank line, the next block needs to start with a line
712
+ * that's indented 4 spaces, but after that the line doesn't
713
+ * need any indentation
714
+ */
715
+ if ( q != t->next ) {
716
+ if (q->dle < indent) {
717
+ q = t->next;
718
+ t->next = 0;
719
+ return q;
720
+ }
721
+ indent = 4;
722
+ }
723
+
724
+ if ( (q->dle < indent) && (ishr(q) || islist(q,&z)) && !ishdr(q,&z) ) {
725
+ q = t->next;
726
+ t->next = 0;
727
+ return q;
728
+ }
729
+
730
+ clip = (q->dle > indent) ? indent : q->dle;
731
+ }
732
+ return t;
733
+ }
734
+
735
+
736
+ static Line *
737
+ listblock(Paragraph *top, int trim, MMIOT *f)
738
+ {
739
+ ParagraphRoot d = { 0, 0 };
740
+ Paragraph *p;
741
+ Line *q = top->text, *text, *label;
742
+ int isdl = (top->typ == DL),
743
+ para = 0,
744
+ ltype;
745
+
746
+ while (( text = q )) {
747
+ if ( top->typ == DL ) {
748
+ Line *lp;
749
+
750
+ for ( lp = label = text; lp ; lp = lp->next ) {
751
+ text = lp->next;
752
+ CLIP(lp->text, 0, 1);
753
+ S(lp->text)--;
754
+ if ( !isdefinition(lp->next) )
755
+ lp->next = 0;
756
+ }
757
+ }
758
+ else label = 0;
759
+
760
+ p = Pp(&d, text, LISTITEM);
761
+ text = listitem(p, trim);
762
+
763
+ p->down = compile(p->text, 0, f);
764
+ p->text = label;
765
+
766
+ if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
767
+
768
+ if ( !(q = skipempty(text)) || ((ltype = islist(q, &trim)) == 0)
769
+ || (isdl != (ltype == DL)) )
770
+ break;
771
+
772
+ if ( para = (q != text) ) {
773
+ Line anchor;
774
+
775
+ anchor.next = text;
776
+ ___mkd_freeLineRange(&anchor, q);
777
+ }
778
+
779
+ if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
780
+ }
781
+ top->text = 0;
782
+ top->down = T(d);
783
+ return text;
784
+ }
785
+
786
+
787
+ static int
788
+ tgood(char c)
789
+ {
790
+ switch (c) {
791
+ case '\'':
792
+ case '"': return c;
793
+ case '(': return ')';
794
+ }
795
+ return 0;
796
+ }
797
+
798
+
799
+ /*
800
+ * add a new (image or link) footnote to the footnote table
801
+ */
802
+ static Line*
803
+ addfootnote(Line *p, MMIOT* f)
804
+ {
805
+ int j, i;
806
+ int c;
807
+ Line *np = p->next;
808
+
809
+ Footnote *foot = &EXPAND(*f->footnotes);
810
+
811
+ CREATE(foot->tag);
812
+ CREATE(foot->link);
813
+ CREATE(foot->title);
814
+ foot->height = foot->width = 0;
815
+
816
+ for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
817
+ EXPAND(foot->tag) = T(p->text)[j];
818
+
819
+ EXPAND(foot->tag) = 0;
820
+ S(foot->tag)--;
821
+ j = nextnonblank(p, j+2);
822
+
823
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
824
+ EXPAND(foot->link) = T(p->text)[j++];
825
+ EXPAND(foot->link) = 0;
826
+ S(foot->link)--;
827
+ j = nextnonblank(p,j);
828
+
829
+ if ( T(p->text)[j] == '=' ) {
830
+ sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
831
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
832
+ ++j;
833
+ j = nextnonblank(p,j);
834
+ }
835
+
836
+
837
+ if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
838
+ ___mkd_freeLine(p);
839
+ p = np;
840
+ np = p->next;
841
+ j = p->dle;
842
+ }
843
+
844
+ if ( (c = tgood(T(p->text)[j])) ) {
845
+ /* Try to take the rest of the line as a comment; read to
846
+ * EOL, then shrink the string back to before the final
847
+ * quote.
848
+ */
849
+ ++j; /* skip leading quote */
850
+
851
+ while ( j < S(p->text) )
852
+ EXPAND(foot->title) = T(p->text)[j++];
853
+
854
+ while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
855
+ --S(foot->title);
856
+ if ( S(foot->title) ) /* skip trailing quote */
857
+ --S(foot->title);
858
+ EXPAND(foot->title) = 0;
859
+ --S(foot->title);
860
+ }
861
+
862
+ ___mkd_freeLine(p);
863
+ return np;
864
+ }
865
+
866
+
867
+ /*
868
+ * allocate a paragraph header, link it to the
869
+ * tail of the current document
870
+ */
871
+ static Paragraph *
872
+ Pp(ParagraphRoot *d, Line *ptr, int typ)
873
+ {
874
+ Paragraph *ret = calloc(sizeof *ret, 1);
875
+
876
+ ret->text = ptr;
877
+ ret->typ = typ;
878
+
879
+ return ATTACH(*d, ret);
880
+ }
881
+
882
+
883
+
884
+ static Line*
885
+ consume(Line *ptr, int *eaten)
886
+ {
887
+ Line *next;
888
+ int blanks=0;
889
+
890
+ for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
891
+ next = ptr->next;
892
+ ___mkd_freeLine(ptr);
893
+ }
894
+ if ( ptr ) *eaten = blanks;
895
+ return ptr;
896
+ }
897
+
898
+
899
+ /*
900
+ * top-level compilation; break the document into
901
+ * style, html, and source blocks with footnote links
902
+ * weeded out.
903
+ */
904
+ static Paragraph *
905
+ compile_document(Line *ptr, MMIOT *f)
906
+ {
907
+ ParagraphRoot d = { 0, 0 };
908
+ ANCHOR(Line) source = { 0, 0 };
909
+ Paragraph *p = 0;
910
+ struct kw *tag;
911
+ int eaten;
912
+
913
+ while ( ptr ) {
914
+ if ( !(f->flags & DENY_HTML) && (tag = isopentag(ptr)) ) {
915
+ /* If we encounter a html/style block, compile and save all
916
+ * of the cached source BEFORE processing the html/style.
917
+ */
918
+ if ( T(source) ) {
919
+ E(source)->next = 0;
920
+ p = Pp(&d, 0, SOURCE);
921
+ p->down = compile(T(source), 1, f);
922
+ T(source) = E(source) = 0;
923
+ }
924
+ p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML);
925
+ if ( strcmp(tag->id, "!--") == 0 )
926
+ ptr = comment(p);
927
+ else
928
+ ptr = htmlblock(p, tag);
929
+ }
930
+ else if ( isfootnote(ptr) ) {
931
+ /* footnotes, like cats, sleep anywhere; pull them
932
+ * out of the input stream and file them away for
933
+ * later processing
934
+ */
935
+ ptr = consume(addfootnote(ptr, f), &eaten);
936
+ }
937
+ else {
938
+ /* source; cache it up to wait for eof or the
939
+ * next html/style block
940
+ */
941
+ ATTACH(source,ptr);
942
+ ptr = ptr->next;
943
+ }
944
+ }
945
+ if ( T(source) ) {
946
+ /* if there's any cached source at EOF, compile
947
+ * it now.
948
+ */
949
+ E(source)->next = 0;
950
+ p = Pp(&d, 0, SOURCE);
951
+ p->down = compile(T(source), 1, f);
952
+ }
953
+ return T(d);
954
+ }
955
+
956
+
957
+ /*
958
+ * break a collection of markdown input into
959
+ * blocks of lists, code, html, and text to
960
+ * be marked up.
961
+ */
962
+ static Paragraph *
963
+ compile(Line *ptr, int toplevel, MMIOT *f)
964
+ {
965
+ ParagraphRoot d = { 0, 0 };
966
+ Paragraph *p = 0;
967
+ Line *r;
968
+ int para = toplevel;
969
+ int blocks = 0;
970
+ int hdr_type, list_type, indent;
971
+
972
+ ptr = consume(ptr, &para);
973
+
974
+ while ( ptr ) {
975
+ if ( iscode(ptr) ) {
976
+ p = Pp(&d, ptr, CODE);
977
+
978
+ if ( f->flags & MKD_1_COMPAT) {
979
+ /* HORRIBLE STANDARDS KLUDGE: the first line of every block
980
+ * has trailing whitespace trimmed off.
981
+ */
982
+ ___mkd_tidy(&p->text->text);
983
+ }
984
+
985
+ ptr = codeblock(p);
986
+ }
987
+ else if ( ishr(ptr) ) {
988
+ p = Pp(&d, 0, HR);
989
+ r = ptr;
990
+ ptr = ptr->next;
991
+ ___mkd_freeLine(r);
992
+ }
993
+ else if (( list_type = islist(ptr, &indent) )) {
994
+ p = Pp(&d, ptr, list_type);
995
+ ptr = listblock(p, indent, f);
996
+ }
997
+ else if ( isquote(ptr) ) {
998
+ p = Pp(&d, ptr, QUOTE);
999
+ ptr = quoteblock(p);
1000
+ p->down = compile(p->text, 1, f);
1001
+ p->text = 0;
1002
+ }
1003
+ else if ( ishdr(ptr, &hdr_type) ) {
1004
+ p = Pp(&d, ptr, HDR);
1005
+ ptr = headerblock(p, hdr_type);
1006
+ }
1007
+ else if ( istable(ptr) && !(f->flags & (STRICT|NOTABLES)) ) {
1008
+ p = Pp(&d, ptr, TABLE);
1009
+ ptr = tableblock(p);
1010
+ }
1011
+ else {
1012
+ p = Pp(&d, ptr, MARKUP);
1013
+ ptr = textblock(p, toplevel);
1014
+ }
1015
+
1016
+ if ( (para||toplevel) && !p->align )
1017
+ p->align = PARA;
1018
+
1019
+ blocks++;
1020
+ para = toplevel || (blocks > 1);
1021
+ ptr = consume(ptr, &para);
1022
+
1023
+ if ( para && !p->align )
1024
+ p->align = PARA;
1025
+
1026
+ }
1027
+ return T(d);
1028
+ }
1029
+
1030
+
1031
+ static void
1032
+ initialize()
1033
+ {
1034
+ static int first = 1;
1035
+
1036
+ if ( first-- > 0 ) {
1037
+ first = 0;
1038
+ INITRNG(time(0));
1039
+ qsort(blocktags, SZTAGS, sizeof blocktags[0], (stfu)casort);
1040
+ }
1041
+ }
1042
+
1043
+
1044
+ /*
1045
+ * the guts of the markdown() function, ripped out so I can do
1046
+ * debugging.
1047
+ */
1048
+
1049
+ /*
1050
+ * prepare and compile `text`, returning a Paragraph tree.
1051
+ */
1052
+ int
1053
+ mkd_compile(Document *doc, int flags)
1054
+ {
1055
+ if ( !doc )
1056
+ return 0;
1057
+
1058
+ if ( doc->compiled )
1059
+ return 1;
1060
+
1061
+ doc->compiled = 1;
1062
+ memset(doc->ctx, 0, sizeof(MMIOT) );
1063
+ doc->ctx->flags = flags & USER_FLAGS;
1064
+ doc->ctx->base = doc->base;
1065
+ CREATE(doc->ctx->in);
1066
+ doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
1067
+ CREATE(*doc->ctx->footnotes);
1068
+
1069
+ initialize();
1070
+
1071
+ doc->code = compile_document(T(doc->content), doc->ctx);
1072
+ qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
1073
+ sizeof T(*doc->ctx->footnotes)[0],
1074
+ (stfu)__mkd_footsort);
1075
+ memset(&doc->content, 0, sizeof doc->content);
1076
+ return 1;
1077
+ }
1078
+