bluecloth 2.0.6.pre120-x86-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. data/ChangeLog +363 -0
  2. data/LICENSE +27 -0
  3. data/LICENSE.discount +47 -0
  4. data/README +81 -0
  5. data/Rakefile +349 -0
  6. data/Rakefile.local +43 -0
  7. data/bin/bluecloth +84 -0
  8. data/ext/Csio.c +61 -0
  9. data/ext/VERSION +1 -0
  10. data/ext/amalloc.h +29 -0
  11. data/ext/bluecloth.c +391 -0
  12. data/ext/bluecloth.h +19 -0
  13. data/ext/config.h +55 -0
  14. data/ext/css.c +76 -0
  15. data/ext/cstring.h +75 -0
  16. data/ext/docheader.c +43 -0
  17. data/ext/extconf.rb +52 -0
  18. data/ext/generate.c +1602 -0
  19. data/ext/markdown.c +1078 -0
  20. data/ext/markdown.h +146 -0
  21. data/ext/mkdio.c +303 -0
  22. data/ext/mkdio.h +79 -0
  23. data/ext/resource.c +155 -0
  24. data/ext/version.c +28 -0
  25. data/ext/xml.c +82 -0
  26. data/ext/xmlpage.c +48 -0
  27. data/lib/1.8/bluecloth_ext.so +0 -0
  28. data/lib/1.9/bluecloth_ext.so +0 -0
  29. data/lib/bluecloth.rb +164 -0
  30. data/rake/191_compat.rb +26 -0
  31. data/rake/dependencies.rb +76 -0
  32. data/rake/helpers.rb +434 -0
  33. data/rake/hg.rb +273 -0
  34. data/rake/manual.rb +782 -0
  35. data/rake/packaging.rb +126 -0
  36. data/rake/publishing.rb +269 -0
  37. data/rake/rdoc.rb +30 -0
  38. data/rake/style.rb +62 -0
  39. data/rake/svn.rb +668 -0
  40. data/rake/testing.rb +187 -0
  41. data/rake/verifytask.rb +64 -0
  42. data/rake/win32.rb +190 -0
  43. data/spec/bluecloth/101_changes_spec.rb +141 -0
  44. data/spec/bluecloth/autolinks_spec.rb +49 -0
  45. data/spec/bluecloth/blockquotes_spec.rb +145 -0
  46. data/spec/bluecloth/code_spans_spec.rb +164 -0
  47. data/spec/bluecloth/emphasis_spec.rb +164 -0
  48. data/spec/bluecloth/entities_spec.rb +65 -0
  49. data/spec/bluecloth/hrules_spec.rb +90 -0
  50. data/spec/bluecloth/images_spec.rb +92 -0
  51. data/spec/bluecloth/inline_html_spec.rb +238 -0
  52. data/spec/bluecloth/links_spec.rb +171 -0
  53. data/spec/bluecloth/lists_spec.rb +294 -0
  54. data/spec/bluecloth/paragraphs_spec.rb +75 -0
  55. data/spec/bluecloth/titles_spec.rb +305 -0
  56. data/spec/bluecloth_spec.rb +281 -0
  57. data/spec/bugfix_spec.rb +172 -0
  58. data/spec/contributions_spec.rb +85 -0
  59. data/spec/data/antsugar.txt +34 -0
  60. data/spec/data/markdowntest/Amps and angle encoding.html +17 -0
  61. data/spec/data/markdowntest/Amps and angle encoding.text +21 -0
  62. data/spec/data/markdowntest/Auto links.html +18 -0
  63. data/spec/data/markdowntest/Auto links.text +13 -0
  64. data/spec/data/markdowntest/Backslash escapes.html +118 -0
  65. data/spec/data/markdowntest/Backslash escapes.text +120 -0
  66. data/spec/data/markdowntest/Blockquotes with code blocks.html +15 -0
  67. data/spec/data/markdowntest/Blockquotes with code blocks.text +11 -0
  68. data/spec/data/markdowntest/Code Blocks.html +18 -0
  69. data/spec/data/markdowntest/Code Blocks.text +14 -0
  70. data/spec/data/markdowntest/Code Spans.html +5 -0
  71. data/spec/data/markdowntest/Code Spans.text +5 -0
  72. data/spec/data/markdowntest/Hard-wrapped paragraphs with list-like lines.html +8 -0
  73. data/spec/data/markdowntest/Hard-wrapped paragraphs with list-like lines.text +8 -0
  74. data/spec/data/markdowntest/Horizontal rules.html +71 -0
  75. data/spec/data/markdowntest/Horizontal rules.text +67 -0
  76. data/spec/data/markdowntest/Inline HTML (Advanced).html +15 -0
  77. data/spec/data/markdowntest/Inline HTML (Advanced).text +15 -0
  78. data/spec/data/markdowntest/Inline HTML (Simple).html +72 -0
  79. data/spec/data/markdowntest/Inline HTML (Simple).text +69 -0
  80. data/spec/data/markdowntest/Inline HTML comments.html +13 -0
  81. data/spec/data/markdowntest/Inline HTML comments.text +13 -0
  82. data/spec/data/markdowntest/Links, inline style.html +11 -0
  83. data/spec/data/markdowntest/Links, inline style.text +12 -0
  84. data/spec/data/markdowntest/Links, reference style.html +52 -0
  85. data/spec/data/markdowntest/Links, reference style.text +71 -0
  86. data/spec/data/markdowntest/Links, shortcut references.html +9 -0
  87. data/spec/data/markdowntest/Links, shortcut references.text +20 -0
  88. data/spec/data/markdowntest/Literal quotes in titles.html +3 -0
  89. data/spec/data/markdowntest/Literal quotes in titles.text +7 -0
  90. data/spec/data/markdowntest/Markdown Documentation - Basics.html +314 -0
  91. data/spec/data/markdowntest/Markdown Documentation - Basics.text +306 -0
  92. data/spec/data/markdowntest/Markdown Documentation - Syntax.html +942 -0
  93. data/spec/data/markdowntest/Markdown Documentation - Syntax.text +888 -0
  94. data/spec/data/markdowntest/Nested blockquotes.html +9 -0
  95. data/spec/data/markdowntest/Nested blockquotes.text +5 -0
  96. data/spec/data/markdowntest/Ordered and unordered lists.html +148 -0
  97. data/spec/data/markdowntest/Ordered and unordered lists.text +131 -0
  98. data/spec/data/markdowntest/Strong and em together.html +7 -0
  99. data/spec/data/markdowntest/Strong and em together.text +7 -0
  100. data/spec/data/markdowntest/Tabs.html +25 -0
  101. data/spec/data/markdowntest/Tabs.text +21 -0
  102. data/spec/data/markdowntest/Tidyness.html +8 -0
  103. data/spec/data/markdowntest/Tidyness.text +5 -0
  104. data/spec/data/ml-announce.txt +17 -0
  105. data/spec/data/re-overflow.txt +67 -0
  106. data/spec/data/re-overflow2.txt +281 -0
  107. data/spec/discount_spec.rb +184 -0
  108. data/spec/lib/constants.rb +5 -0
  109. data/spec/lib/helpers.rb +137 -0
  110. data/spec/lib/matchers.rb +235 -0
  111. data/spec/markdowntest_spec.rb +79 -0
  112. metadata +205 -0
data/ext/markdown.c ADDED
@@ -0,0 +1,1078 @@
1
+ /* markdown: a C implementation of John Gruber's Markdown markup language.
2
+ *
3
+ * Copyright (C) 2007 David L Parsons.
4
+ * The redistribution terms are provided in the COPYRIGHT file that must
5
+ * be distributed with this source code.
6
+ */
7
+ #include <stdio.h>
8
+ #include <string.h>
9
+ #include <stdarg.h>
10
+ #include <stdlib.h>
11
+ #include <time.h>
12
+ #include <ctype.h>
13
+
14
+ #include "config.h"
15
+
16
+ #include "cstring.h"
17
+ #include "markdown.h"
18
+ #include "amalloc.h"
19
+
20
+ /* block-level tags for passing html blocks through the blender
21
+ */
22
+ struct kw {
23
+ char *id;
24
+ int size;
25
+ int selfclose;
26
+ } ;
27
+
28
+ #define KW(x) { x, sizeof(x)-1, 0 }
29
+ #define SC(x) { x, sizeof(x)-1, 1 }
30
+
31
+ static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
32
+ KW("ADDRESS"), KW("BDO"), KW("BLOCKQUOTE"),
33
+ KW("CENTER"), KW("DFN"), KW("DIV"), KW("H1"),
34
+ KW("H2"), KW("H3"), KW("H4"), KW("H5"),
35
+ KW("H6"), KW("LISTING"), KW("NOBR"),
36
+ KW("UL"), KW("P"), KW("OL"), KW("DL"),
37
+ KW("PLAINTEXT"), KW("PRE"), KW("TABLE"),
38
+ KW("WBR"), KW("XMP"), SC("HR"), SC("BR"),
39
+ KW("IFRAME"), KW("MAP") };
40
+ #define SZTAGS (sizeof blocktags / sizeof blocktags[0])
41
+ #define MAXTAG 11 /* sizeof "BLOCKQUOTE" */
42
+
43
+ typedef int (*stfu)(const void*,const void*);
44
+
45
+ typedef ANCHOR(Paragraph) ParagraphRoot;
46
+
47
+
48
+ /* case insensitive string sort (for qsort() and bsearch() of block tags)
49
+ */
50
+ static int
51
+ casort(struct kw *a, struct kw *b)
52
+ {
53
+ if ( a->size != b->size )
54
+ return a->size - b->size;
55
+ return strncasecmp(a->id, b->id, b->size);
56
+ }
57
+
58
+
59
+ /* case insensitive string sort for Footnote tags.
60
+ */
61
+ int
62
+ __mkd_footsort(Footnote *a, Footnote *b)
63
+ {
64
+ int i;
65
+ char ac, bc;
66
+
67
+ if ( S(a->tag) != S(b->tag) )
68
+ return S(a->tag) - S(b->tag);
69
+
70
+ for ( i=0; i < S(a->tag); i++) {
71
+ ac = tolower(T(a->tag)[i]);
72
+ bc = tolower(T(b->tag)[i]);
73
+
74
+ if ( isspace(ac) && isspace(bc) )
75
+ continue;
76
+ if ( ac != bc )
77
+ return ac - bc;
78
+ }
79
+ return 0;
80
+ }
81
+
82
+
83
+ /* find the first blank character after position <i>
84
+ */
85
+ static int
86
+ nextblank(Line *t, int i)
87
+ {
88
+ while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
89
+ ++i;
90
+ return i;
91
+ }
92
+
93
+
94
+ /* find the next nonblank character after position <i>
95
+ */
96
+ static int
97
+ nextnonblank(Line *t, int i)
98
+ {
99
+ while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
100
+ ++i;
101
+ return i;
102
+ }
103
+
104
+
105
+ /* find the first nonblank character on the Line.
106
+ */
107
+ int
108
+ mkd_firstnonblank(Line *p)
109
+ {
110
+ return nextnonblank(p,0);
111
+ }
112
+
113
+
114
+ static int
115
+ blankline(Line *p)
116
+ {
117
+ return ! (p && (S(p->text) > p->dle) );
118
+ }
119
+
120
+
121
+ static Line *
122
+ skipempty(Line *p)
123
+ {
124
+ while ( p && (p->dle == S(p->text)) )
125
+ p = p->next;
126
+ return p;
127
+ }
128
+
129
+
130
+ void
131
+ ___mkd_tidy(Cstring *t)
132
+ {
133
+ while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
134
+ --S(*t);
135
+ }
136
+
137
+
138
+ static struct kw *
139
+ isopentag(Line *p)
140
+ {
141
+ int i=0, len;
142
+ struct kw key, *ret;
143
+
144
+ if ( !p ) return 0;
145
+
146
+ len = S(p->text);
147
+
148
+ if ( len < 3 || T(p->text)[0] != '<' )
149
+ return 0;
150
+
151
+ /* find how long the tag is so we can check to see if
152
+ * it's a block-level tag
153
+ */
154
+ for ( i=1; i < len && T(p->text)[i] != '>'
155
+ && T(p->text)[i] != '/'
156
+ && !isspace(T(p->text)[i]); ++i )
157
+ ;
158
+
159
+ key.id = T(p->text)+1;
160
+ key.size = i-1;
161
+
162
+ if ( ret = bsearch(&key, blocktags, SZTAGS, sizeof key, (stfu)casort))
163
+ return ret;
164
+
165
+ return 0;
166
+ }
167
+
168
+
169
+ typedef struct _flo {
170
+ Line *t;
171
+ int i;
172
+ } FLO;
173
+
174
+
175
+ static int
176
+ flogetc(FLO *f)
177
+ {
178
+ if ( f && f->t ) {
179
+ if ( f->i < S(f->t->text) )
180
+ return T(f->t->text)[f->i++];
181
+ f->t = f->t->next;
182
+ f->i = 0;
183
+ return flogetc(f);
184
+ }
185
+ return EOF;
186
+ }
187
+
188
+
189
+ static Line *
190
+ htmlblock(Paragraph *p, struct kw *tag)
191
+ {
192
+ Line *ret;
193
+ FLO f = { p->text, 0 };
194
+ int c;
195
+ int i, closing, depth=0;
196
+
197
+ if ( tag->selfclose || (tag->size >= MAXTAG) ) {
198
+ ret = f.t->next;
199
+ f.t->next = 0;
200
+ return ret;
201
+ }
202
+
203
+ while ( (c = flogetc(&f)) != EOF ) {
204
+ if ( c == '<' ) {
205
+ /* tag? */
206
+ c = flogetc(&f);
207
+ if ( c == '!' ) { /* comment? */
208
+ if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
209
+ /* yes */
210
+ while ( (c = flogetc(&f)) != EOF ) {
211
+ if ( c == '-' && flogetc(&f) == '-'
212
+ && flogetc(&f) == '>')
213
+ /* consumed whole comment */
214
+ break;
215
+ }
216
+ }
217
+ }
218
+ else {
219
+ if ( closing = (c == '/') ) c = flogetc(&f);
220
+
221
+ for ( i=0; i < tag->size; c=flogetc(&f) ) {
222
+ if ( tag->id[i++] != toupper(c) )
223
+ break;
224
+ }
225
+
226
+ if ( (i == tag->size) && !isalnum(c) ) {
227
+ depth = depth + (closing ? -1 : 1);
228
+ if ( depth == 0 ) {
229
+ while ( c != EOF && c != '>' ) {
230
+ /* consume trailing gunk in close tag */
231
+ c = flogetc(&f);
232
+ }
233
+ if ( !f.t )
234
+ return 0;
235
+ ret = f.t->next;
236
+ f.t->next = 0;
237
+ return ret;
238
+ }
239
+ }
240
+ }
241
+ }
242
+ }
243
+ return 0;
244
+ }
245
+
246
+
247
+ static Line *
248
+ comment(Paragraph *p)
249
+ {
250
+ Line *t, *ret;
251
+
252
+ for ( t = p->text; t ; t = t->next) {
253
+ if ( strstr(T(t->text), "-->") ) {
254
+ ret = t->next;
255
+ t->next = 0;
256
+ return ret;
257
+ }
258
+ }
259
+ return t;
260
+
261
+ }
262
+
263
+
264
+ /* tables look like
265
+ * header|header{|header}
266
+ * ------|------{|......}
267
+ * {body lines}
268
+ */
269
+ static int
270
+ istable(Line *t)
271
+ {
272
+ char *p;
273
+ Line *dashes = t->next;
274
+ int contains = 0; /* found character bits; 0x01 is |, 0x02 is - */
275
+
276
+ /* two lines, first must contain | */
277
+ if ( !(dashes && memchr(T(t->text), '|', S(t->text))) )
278
+ return 0;
279
+
280
+ /* second line must contain - or | and nothing
281
+ * else except for whitespace or :
282
+ */
283
+ for ( p = T(dashes->text)+S(dashes->text)-1; p >= T(dashes->text); --p)
284
+ if ( *p == '|' )
285
+ contains |= 0x01;
286
+ else if ( *p == '-' )
287
+ contains |= 0x02;
288
+ else if ( ! ((*p == ':') || isspace(*p)) )
289
+ return 0;
290
+
291
+ return (contains & 0x03);
292
+ }
293
+
294
+
295
+ /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
296
+ */
297
+ static int
298
+ isfootnote(Line *t)
299
+ {
300
+ int i;
301
+
302
+ if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
303
+ return 0;
304
+
305
+ for ( ++i; i < S(t->text) ; ++i ) {
306
+ if ( T(t->text)[i] == '[' )
307
+ return 0;
308
+ else if ( T(t->text)[i] == ']' && T(t->text)[i+1] == ':' )
309
+ return 1;
310
+ }
311
+ return 0;
312
+ }
313
+
314
+
315
+ static int
316
+ isquote(Line *t)
317
+ {
318
+ return ( T(t->text)[0] == '>' );
319
+ }
320
+
321
+
322
+ static int
323
+ dashchar(char c)
324
+ {
325
+ return (c == '*') || (c == '-') || (c == '_');
326
+ }
327
+
328
+
329
+ static int
330
+ iscode(Line *t)
331
+ {
332
+ return (t->dle >= 4);
333
+ }
334
+
335
+
336
+ static int
337
+ ishr(Line *t)
338
+ {
339
+ int i, count=0;
340
+ char dash = 0;
341
+ char c;
342
+
343
+ if ( iscode(t) ) return 0;
344
+
345
+ for ( i = 0; i < S(t->text); i++) {
346
+ c = T(t->text)[i];
347
+ if ( (dash == 0) && dashchar(c) )
348
+ dash = c;
349
+
350
+ if ( c == dash ) ++count;
351
+ else if ( !isspace(c) )
352
+ return 0;
353
+ }
354
+ return (count >= 3);
355
+ }
356
+
357
+
358
+ static int
359
+ ishdr(Line *t, int *htyp)
360
+ {
361
+ int i;
362
+
363
+
364
+ /* first check for etx-style ###HEADER###
365
+ */
366
+
367
+ /* leading run of `#`'s ?
368
+ */
369
+ for ( i=0; T(t->text)[i] == '#'; ++i)
370
+ ;
371
+
372
+ /* ANY leading `#`'s make this into an ETX header
373
+ */
374
+ if ( i && (i < S(t->text) || i > 1) ) {
375
+ *htyp = ETX;
376
+ return 1;
377
+ }
378
+
379
+ /* then check for setext-style HEADER
380
+ * ======
381
+ */
382
+
383
+ if ( t->next ) {
384
+ char *q = T(t->next->text);
385
+
386
+ if ( (*q == '=') || (*q == '-') ) {
387
+ for (i=1; i < S(t->next->text); i++)
388
+ if ( q[0] != q[i] )
389
+ return 0;
390
+ *htyp = SETEXT;
391
+ return 1;
392
+ }
393
+ }
394
+ return 0;
395
+ }
396
+
397
+
398
+ static int
399
+ isdefinition(Line *t)
400
+ {
401
+ #if DL_TAG_EXTENSION
402
+ return t && t->next
403
+ && (S(t->text) > 2)
404
+ && (t->dle == 0)
405
+ && (T(t->text)[0] == '=')
406
+ && (T(t->text)[S(t->text)-1] == '=')
407
+ && ( (t->next->dle >= 4) || isdefinition(t->next) );
408
+ #else
409
+ return 0;
410
+ #endif
411
+ }
412
+
413
+
414
+ static int
415
+ islist(Line *t, int *trim)
416
+ {
417
+ int i, j;
418
+ char *q;
419
+
420
+ if ( iscode(t) || blankline(t) || ishdr(t,&i) || ishr(t) )
421
+ return 0;
422
+
423
+ if ( isdefinition(t) ) {
424
+ *trim = 4;
425
+ return DL;
426
+ }
427
+
428
+ if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
429
+ i = nextnonblank(t, t->dle+1);
430
+ *trim = (i > 4) ? 4 : i;
431
+ return UL;
432
+ }
433
+
434
+ if ( (j = nextblank(t,t->dle)) > t->dle ) {
435
+ if ( T(t->text)[j-1] == '.' ) {
436
+ #if ALPHA_LIST
437
+ if ( (j == t->dle + 2) && isalpha(T(t->text)[t->dle]) ) {
438
+ j = nextnonblank(t,j);
439
+ *trim = j;
440
+ return AL;
441
+ }
442
+ #endif
443
+ strtoul(T(t->text)+t->dle, &q, 10);
444
+ if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
445
+ j = nextnonblank(t,j);
446
+ *trim = j;
447
+ return OL;
448
+ }
449
+ }
450
+ }
451
+ return 0;
452
+ }
453
+
454
+
455
+ static Line *
456
+ headerblock(Paragraph *pp, int htyp)
457
+ {
458
+ Line *ret = 0;
459
+ Line *p = pp->text;
460
+ int i, j;
461
+
462
+ switch (htyp) {
463
+ case SETEXT:
464
+ /* p->text is header, p->next->text is -'s or ='s
465
+ */
466
+ pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
467
+
468
+ ret = p->next->next;
469
+ ___mkd_freeLine(p->next);
470
+ p->next = 0;
471
+ break;
472
+
473
+ case ETX:
474
+ /* p->text is ###header###, so we need to trim off
475
+ * the leading and trailing `#`'s
476
+ */
477
+
478
+ for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1); i++)
479
+ ;
480
+
481
+ pp->hnumber = i;
482
+
483
+ while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
484
+ ++i;
485
+
486
+ CLIP(p->text, 0, i);
487
+
488
+ for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
489
+ ;
490
+
491
+ while ( j && isspace(T(p->text)[j-1]) )
492
+ --j;
493
+
494
+ S(p->text) = j;
495
+
496
+ ret = p->next;
497
+ p->next = 0;
498
+ break;
499
+ }
500
+ return ret;
501
+ }
502
+
503
+
504
+ static Line *
505
+ codeblock(Paragraph *p)
506
+ {
507
+ Line *t = p->text, *r;
508
+
509
+ for ( ; t; t = r ) {
510
+ CLIP(t->text,0,4);
511
+ t->dle = mkd_firstnonblank(t);
512
+
513
+ if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
514
+ ___mkd_freeLineRange(t,r);
515
+ t->next = 0;
516
+ return r;
517
+ }
518
+ }
519
+ return t;
520
+ }
521
+
522
+
523
+ static int
524
+ centered(Line *first, Line *last)
525
+ {
526
+
527
+ if ( first&&last ) {
528
+ int len = S(last->text);
529
+
530
+ if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
531
+ && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
532
+ CLIP(first->text, 0, 2);
533
+ S(last->text) -= 2;
534
+ return CENTER;
535
+ }
536
+ }
537
+ return 0;
538
+ }
539
+
540
+
541
+ static int
542
+ endoftextblock(Line *t, int toplevelblock)
543
+ {
544
+ int z;
545
+
546
+ if ( blankline(t)||isquote(t)||iscode(t)||ishdr(t,&z)||ishr(t) )
547
+ return 1;
548
+
549
+ /* HORRIBLE STANDARDS KLUDGE: Toplevel paragraphs eat absorb adjacent
550
+ * list items, but sublevel blocks behave properly.
551
+ */
552
+ return toplevelblock ? 0 : islist(t,&z);
553
+ }
554
+
555
+
556
+ static Line *
557
+ textblock(Paragraph *p, int toplevel)
558
+ {
559
+ Line *t, *next;
560
+
561
+ for ( t = p->text; t ; t = next ) {
562
+ if ( ((next = t->next) == 0) || endoftextblock(next, toplevel) ) {
563
+ p->align = centered(p->text, t);
564
+ t->next = 0;
565
+ return next;
566
+ }
567
+ }
568
+ return t;
569
+ }
570
+
571
+
572
+ /* length of the id: or class: kind in a special div-not-quote block
573
+ */
574
+ static int
575
+ szmarkerclass(char *p)
576
+ {
577
+ if ( strncasecmp(p, "id:", 3) == 0 )
578
+ return 3;
579
+ if ( strncasecmp(p, "class:", 6) == 0 )
580
+ return 6;
581
+ return 0;
582
+ }
583
+
584
+
585
+ /*
586
+ * check if the first line of a quoted block is the special div-not-quote
587
+ * marker %[kind:]name%
588
+ */
589
+ static int
590
+ isdivmarker(Line *p, int start)
591
+ {
592
+ #if DIV_QUOTE
593
+ char *s = T(p->text);
594
+ int len = S(p->text);
595
+ int i;
596
+
597
+ if ( !(len && s[start] == '%' && s[len-1] == '%') ) return 0;
598
+
599
+ i = szmarkerclass(s+start+1)+start;
600
+ len -= start+1;
601
+
602
+ while ( ++i < len )
603
+ if ( !isalnum(s[i]) )
604
+ return 0;
605
+
606
+ return 1;
607
+ #else
608
+ return 0;
609
+ #endif
610
+ }
611
+
612
+
613
+ /*
614
+ * accumulate a blockquote.
615
+ *
616
+ * one sick horrible thing about blockquotes is that even though
617
+ * it just takes ^> to start a quote, following lines, if quoted,
618
+ * assume that the prefix is ``>''. This means that code needs
619
+ * to be indented *5* spaces from the leading '>', but *4* spaces
620
+ * from the start of the line. This does not appear to be
621
+ * documented in the reference implementation, but it's the
622
+ * way the markdown sample web form at Daring Fireball works.
623
+ */
624
+ static Line *
625
+ quoteblock(Paragraph *p)
626
+ {
627
+ Line *t, *q;
628
+ int qp;
629
+
630
+ for ( t = p->text; t ; t = q ) {
631
+ if ( isquote(t) ) {
632
+ qp = (T(t->text)[1] == ' ') ? 2 : 1;
633
+ CLIP(t->text, 0, qp);
634
+ t->dle = mkd_firstnonblank(t);
635
+ }
636
+
637
+ q = skipempty(t->next);
638
+
639
+ if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1))) ) {
640
+ ___mkd_freeLineRange(t, q);
641
+ t = q;
642
+ break;
643
+ }
644
+ }
645
+ if ( isdivmarker(p->text,0) ) {
646
+ char *prefix = "class";
647
+ int i;
648
+
649
+ q = p->text;
650
+ p->text = p->text->next;
651
+
652
+ if ( (i = szmarkerclass(1+T(q->text))) == 3 )
653
+ /* and this would be an "%id:" prefix */
654
+ prefix="id";
655
+
656
+ if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
657
+ sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
658
+ T(q->text)+(i+1) );
659
+
660
+ ___mkd_freeLine(q);
661
+ }
662
+ return t;
663
+ }
664
+
665
+
666
+ /*
667
+ * A table block starts with a table header (see istable()), and continues
668
+ * until EOF or a line that /doesn't/ contain a |.
669
+ */
670
+ static Line *
671
+ tableblock(Paragraph *p)
672
+ {
673
+ Line *t, *q;
674
+
675
+ for ( t = p->text; t && (q = t->next); t = t->next ) {
676
+ if ( !memchr(T(q->text), '|', S(q->text)) ) {
677
+ t->next = 0;
678
+ return q;
679
+ }
680
+ }
681
+ return 0;
682
+ }
683
+
684
+
685
+ static Paragraph *Pp(ParagraphRoot *, Line *, int);
686
+ static Paragraph *compile(Line *, int, MMIOT *);
687
+
688
+
689
+ /*
690
+ * pull in a list block. A list block starts with a list marker and
691
+ * runs until the next list marker, the next non-indented paragraph,
692
+ * or EOF. You do not have to indent nonblank lines after the list
693
+ * marker, but multiple paragraphs need to start with a 4-space indent.
694
+ */
695
+ static Line *
696
+ listitem(Paragraph *p, int indent)
697
+ {
698
+ Line *t, *q;
699
+ int clip = indent;
700
+ int z;
701
+
702
+ for ( t = p->text; t ; t = q) {
703
+ CLIP(t->text, 0, clip);
704
+ t->dle = mkd_firstnonblank(t);
705
+
706
+ if ( (q = skipempty(t->next)) == 0 ) {
707
+ ___mkd_freeLineRange(t,q);
708
+ return 0;
709
+ }
710
+
711
+ /* after a blank line, the next block needs to start with a line
712
+ * that's indented 4 spaces, but after that the line doesn't
713
+ * need any indentation
714
+ */
715
+ if ( q != t->next ) {
716
+ if (q->dle < indent) {
717
+ q = t->next;
718
+ t->next = 0;
719
+ return q;
720
+ }
721
+ indent = 4;
722
+ }
723
+
724
+ if ( (q->dle < indent) && (ishr(q) || islist(q,&z)) && !ishdr(q,&z) ) {
725
+ q = t->next;
726
+ t->next = 0;
727
+ return q;
728
+ }
729
+
730
+ clip = (q->dle > indent) ? indent : q->dle;
731
+ }
732
+ return t;
733
+ }
734
+
735
+
736
+ static Line *
737
+ listblock(Paragraph *top, int trim, MMIOT *f)
738
+ {
739
+ ParagraphRoot d = { 0, 0 };
740
+ Paragraph *p;
741
+ Line *q = top->text, *text, *label;
742
+ int isdl = (top->typ == DL),
743
+ para = 0,
744
+ ltype;
745
+
746
+ while (( text = q )) {
747
+ if ( top->typ == DL ) {
748
+ Line *lp;
749
+
750
+ for ( lp = label = text; lp ; lp = lp->next ) {
751
+ text = lp->next;
752
+ CLIP(lp->text, 0, 1);
753
+ S(lp->text)--;
754
+ if ( !isdefinition(lp->next) )
755
+ lp->next = 0;
756
+ }
757
+ }
758
+ else label = 0;
759
+
760
+ p = Pp(&d, text, LISTITEM);
761
+ text = listitem(p, trim);
762
+
763
+ p->down = compile(p->text, 0, f);
764
+ p->text = label;
765
+
766
+ if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
767
+
768
+ if ( !(q = skipempty(text)) || ((ltype = islist(q, &trim)) == 0)
769
+ || (isdl != (ltype == DL)) )
770
+ break;
771
+
772
+ if ( para = (q != text) ) {
773
+ Line anchor;
774
+
775
+ anchor.next = text;
776
+ ___mkd_freeLineRange(&anchor, q);
777
+ }
778
+
779
+ if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
780
+ }
781
+ top->text = 0;
782
+ top->down = T(d);
783
+ return text;
784
+ }
785
+
786
+
787
+ static int
788
+ tgood(char c)
789
+ {
790
+ switch (c) {
791
+ case '\'':
792
+ case '"': return c;
793
+ case '(': return ')';
794
+ }
795
+ return 0;
796
+ }
797
+
798
+
799
+ /*
800
+ * add a new (image or link) footnote to the footnote table
801
+ */
802
+ static Line*
803
+ addfootnote(Line *p, MMIOT* f)
804
+ {
805
+ int j, i;
806
+ int c;
807
+ Line *np = p->next;
808
+
809
+ Footnote *foot = &EXPAND(*f->footnotes);
810
+
811
+ CREATE(foot->tag);
812
+ CREATE(foot->link);
813
+ CREATE(foot->title);
814
+ foot->height = foot->width = 0;
815
+
816
+ for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
817
+ EXPAND(foot->tag) = T(p->text)[j];
818
+
819
+ EXPAND(foot->tag) = 0;
820
+ S(foot->tag)--;
821
+ j = nextnonblank(p, j+2);
822
+
823
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
824
+ EXPAND(foot->link) = T(p->text)[j++];
825
+ EXPAND(foot->link) = 0;
826
+ S(foot->link)--;
827
+ j = nextnonblank(p,j);
828
+
829
+ if ( T(p->text)[j] == '=' ) {
830
+ sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
831
+ while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
832
+ ++j;
833
+ j = nextnonblank(p,j);
834
+ }
835
+
836
+
837
+ if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
838
+ ___mkd_freeLine(p);
839
+ p = np;
840
+ np = p->next;
841
+ j = p->dle;
842
+ }
843
+
844
+ if ( (c = tgood(T(p->text)[j])) ) {
845
+ /* Try to take the rest of the line as a comment; read to
846
+ * EOL, then shrink the string back to before the final
847
+ * quote.
848
+ */
849
+ ++j; /* skip leading quote */
850
+
851
+ while ( j < S(p->text) )
852
+ EXPAND(foot->title) = T(p->text)[j++];
853
+
854
+ while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
855
+ --S(foot->title);
856
+ if ( S(foot->title) ) /* skip trailing quote */
857
+ --S(foot->title);
858
+ EXPAND(foot->title) = 0;
859
+ --S(foot->title);
860
+ }
861
+
862
+ ___mkd_freeLine(p);
863
+ return np;
864
+ }
865
+
866
+
867
+ /*
868
+ * allocate a paragraph header, link it to the
869
+ * tail of the current document
870
+ */
871
+ static Paragraph *
872
+ Pp(ParagraphRoot *d, Line *ptr, int typ)
873
+ {
874
+ Paragraph *ret = calloc(sizeof *ret, 1);
875
+
876
+ ret->text = ptr;
877
+ ret->typ = typ;
878
+
879
+ return ATTACH(*d, ret);
880
+ }
881
+
882
+
883
+
884
+ static Line*
885
+ consume(Line *ptr, int *eaten)
886
+ {
887
+ Line *next;
888
+ int blanks=0;
889
+
890
+ for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
891
+ next = ptr->next;
892
+ ___mkd_freeLine(ptr);
893
+ }
894
+ if ( ptr ) *eaten = blanks;
895
+ return ptr;
896
+ }
897
+
898
+
899
+ /*
900
+ * top-level compilation; break the document into
901
+ * style, html, and source blocks with footnote links
902
+ * weeded out.
903
+ */
904
+ static Paragraph *
905
+ compile_document(Line *ptr, MMIOT *f)
906
+ {
907
+ ParagraphRoot d = { 0, 0 };
908
+ ANCHOR(Line) source = { 0, 0 };
909
+ Paragraph *p = 0;
910
+ struct kw *tag;
911
+ int eaten;
912
+
913
+ while ( ptr ) {
914
+ if ( !(f->flags & DENY_HTML) && (tag = isopentag(ptr)) ) {
915
+ /* If we encounter a html/style block, compile and save all
916
+ * of the cached source BEFORE processing the html/style.
917
+ */
918
+ if ( T(source) ) {
919
+ E(source)->next = 0;
920
+ p = Pp(&d, 0, SOURCE);
921
+ p->down = compile(T(source), 1, f);
922
+ T(source) = E(source) = 0;
923
+ }
924
+ p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML);
925
+ if ( strcmp(tag->id, "!--") == 0 )
926
+ ptr = comment(p);
927
+ else
928
+ ptr = htmlblock(p, tag);
929
+ }
930
+ else if ( isfootnote(ptr) ) {
931
+ /* footnotes, like cats, sleep anywhere; pull them
932
+ * out of the input stream and file them away for
933
+ * later processing
934
+ */
935
+ ptr = consume(addfootnote(ptr, f), &eaten);
936
+ }
937
+ else {
938
+ /* source; cache it up to wait for eof or the
939
+ * next html/style block
940
+ */
941
+ ATTACH(source,ptr);
942
+ ptr = ptr->next;
943
+ }
944
+ }
945
+ if ( T(source) ) {
946
+ /* if there's any cached source at EOF, compile
947
+ * it now.
948
+ */
949
+ E(source)->next = 0;
950
+ p = Pp(&d, 0, SOURCE);
951
+ p->down = compile(T(source), 1, f);
952
+ }
953
+ return T(d);
954
+ }
955
+
956
+
957
+ /*
958
+ * break a collection of markdown input into
959
+ * blocks of lists, code, html, and text to
960
+ * be marked up.
961
+ */
962
+ static Paragraph *
963
+ compile(Line *ptr, int toplevel, MMIOT *f)
964
+ {
965
+ ParagraphRoot d = { 0, 0 };
966
+ Paragraph *p = 0;
967
+ Line *r;
968
+ int para = toplevel;
969
+ int blocks = 0;
970
+ int hdr_type, list_type, indent;
971
+
972
+ ptr = consume(ptr, &para);
973
+
974
+ while ( ptr ) {
975
+ if ( iscode(ptr) ) {
976
+ p = Pp(&d, ptr, CODE);
977
+
978
+ if ( f->flags & MKD_1_COMPAT) {
979
+ /* HORRIBLE STANDARDS KLUDGE: the first line of every block
980
+ * has trailing whitespace trimmed off.
981
+ */
982
+ ___mkd_tidy(&p->text->text);
983
+ }
984
+
985
+ ptr = codeblock(p);
986
+ }
987
+ else if ( ishr(ptr) ) {
988
+ p = Pp(&d, 0, HR);
989
+ r = ptr;
990
+ ptr = ptr->next;
991
+ ___mkd_freeLine(r);
992
+ }
993
+ else if (( list_type = islist(ptr, &indent) )) {
994
+ p = Pp(&d, ptr, list_type);
995
+ ptr = listblock(p, indent, f);
996
+ }
997
+ else if ( isquote(ptr) ) {
998
+ p = Pp(&d, ptr, QUOTE);
999
+ ptr = quoteblock(p);
1000
+ p->down = compile(p->text, 1, f);
1001
+ p->text = 0;
1002
+ }
1003
+ else if ( ishdr(ptr, &hdr_type) ) {
1004
+ p = Pp(&d, ptr, HDR);
1005
+ ptr = headerblock(p, hdr_type);
1006
+ }
1007
+ else if ( istable(ptr) && !(f->flags & (STRICT|NOTABLES)) ) {
1008
+ p = Pp(&d, ptr, TABLE);
1009
+ ptr = tableblock(p);
1010
+ }
1011
+ else {
1012
+ p = Pp(&d, ptr, MARKUP);
1013
+ ptr = textblock(p, toplevel);
1014
+ }
1015
+
1016
+ if ( (para||toplevel) && !p->align )
1017
+ p->align = PARA;
1018
+
1019
+ blocks++;
1020
+ para = toplevel || (blocks > 1);
1021
+ ptr = consume(ptr, &para);
1022
+
1023
+ if ( para && !p->align )
1024
+ p->align = PARA;
1025
+
1026
+ }
1027
+ return T(d);
1028
+ }
1029
+
1030
+
1031
+ static void
1032
+ initialize()
1033
+ {
1034
+ static int first = 1;
1035
+
1036
+ if ( first-- > 0 ) {
1037
+ first = 0;
1038
+ INITRNG(time(0));
1039
+ qsort(blocktags, SZTAGS, sizeof blocktags[0], (stfu)casort);
1040
+ }
1041
+ }
1042
+
1043
+
1044
+ /*
1045
+ * the guts of the markdown() function, ripped out so I can do
1046
+ * debugging.
1047
+ */
1048
+
1049
+ /*
1050
+ * prepare and compile `text`, returning a Paragraph tree.
1051
+ */
1052
+ int
1053
+ mkd_compile(Document *doc, int flags)
1054
+ {
1055
+ if ( !doc )
1056
+ return 0;
1057
+
1058
+ if ( doc->compiled )
1059
+ return 1;
1060
+
1061
+ doc->compiled = 1;
1062
+ memset(doc->ctx, 0, sizeof(MMIOT) );
1063
+ doc->ctx->flags = flags & USER_FLAGS;
1064
+ doc->ctx->base = doc->base;
1065
+ CREATE(doc->ctx->in);
1066
+ doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
1067
+ CREATE(*doc->ctx->footnotes);
1068
+
1069
+ initialize();
1070
+
1071
+ doc->code = compile_document(T(doc->content), doc->ctx);
1072
+ qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
1073
+ sizeof T(*doc->ctx->footnotes)[0],
1074
+ (stfu)__mkd_footsort);
1075
+ memset(&doc->content, 0, sizeof doc->content);
1076
+ return 1;
1077
+ }
1078
+