bluecloth 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +629 -0
- data/LICENSE +27 -0
- data/LICENSE.discount +47 -0
- data/README +71 -0
- data/Rakefile +319 -0
- data/Rakefile.local +63 -0
- data/bin/bluecloth +84 -0
- data/ext/VERSION +1 -0
- data/ext/amalloc.h +29 -0
- data/ext/bluecloth.c +373 -0
- data/ext/config.h +47 -0
- data/ext/cstring.h +73 -0
- data/ext/docheader.c +43 -0
- data/ext/extconf.rb +45 -0
- data/ext/generate.c +1387 -0
- data/ext/markdown.c +939 -0
- data/ext/markdown.h +135 -0
- data/ext/mkdio.c +241 -0
- data/ext/mkdio.h +66 -0
- data/ext/resource.c +169 -0
- data/ext/version.c +28 -0
- data/lib/bluecloth.rb +148 -0
- data/rake/191_compat.rb +26 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +412 -0
- data/rake/manual.rb +782 -0
- data/rake/packaging.rb +116 -0
- data/rake/publishing.rb +321 -0
- data/rake/rdoc.rb +40 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +639 -0
- data/rake/testing.rb +204 -0
- data/rake/verifytask.rb +64 -0
- data/rake/win32.rb +186 -0
- data/spec/bluecloth/101_changes_spec.rb +141 -0
- data/spec/bluecloth/autolinks_spec.rb +49 -0
- data/spec/bluecloth/blockquotes_spec.rb +143 -0
- data/spec/bluecloth/code_spans_spec.rb +164 -0
- data/spec/bluecloth/emphasis_spec.rb +164 -0
- data/spec/bluecloth/entities_spec.rb +65 -0
- data/spec/bluecloth/hrules_spec.rb +90 -0
- data/spec/bluecloth/images_spec.rb +92 -0
- data/spec/bluecloth/inline_html_spec.rb +238 -0
- data/spec/bluecloth/links_spec.rb +171 -0
- data/spec/bluecloth/lists_spec.rb +294 -0
- data/spec/bluecloth/paragraphs_spec.rb +75 -0
- data/spec/bluecloth/titles_spec.rb +305 -0
- data/spec/bluecloth_spec.rb +209 -0
- data/spec/bugfix_spec.rb +123 -0
- data/spec/contributions_spec.rb +85 -0
- data/spec/data/antsugar.txt +34 -0
- data/spec/data/markdowntest/Amps and angle encoding.html +17 -0
- data/spec/data/markdowntest/Amps and angle encoding.text +21 -0
- data/spec/data/markdowntest/Auto links.html +18 -0
- data/spec/data/markdowntest/Auto links.text +13 -0
- data/spec/data/markdowntest/Backslash escapes.html +118 -0
- data/spec/data/markdowntest/Backslash escapes.text +120 -0
- data/spec/data/markdowntest/Blockquotes with code blocks.html +15 -0
- data/spec/data/markdowntest/Blockquotes with code blocks.text +11 -0
- data/spec/data/markdowntest/Code Blocks.html +18 -0
- data/spec/data/markdowntest/Code Blocks.text +14 -0
- data/spec/data/markdowntest/Code Spans.html +5 -0
- data/spec/data/markdowntest/Code Spans.text +5 -0
- data/spec/data/markdowntest/Hard-wrapped paragraphs with list-like lines.html +8 -0
- data/spec/data/markdowntest/Hard-wrapped paragraphs with list-like lines.text +8 -0
- data/spec/data/markdowntest/Horizontal rules.html +71 -0
- data/spec/data/markdowntest/Horizontal rules.text +67 -0
- data/spec/data/markdowntest/Inline HTML (Advanced).html +15 -0
- data/spec/data/markdowntest/Inline HTML (Advanced).text +15 -0
- data/spec/data/markdowntest/Inline HTML (Simple).html +72 -0
- data/spec/data/markdowntest/Inline HTML (Simple).text +69 -0
- data/spec/data/markdowntest/Inline HTML comments.html +13 -0
- data/spec/data/markdowntest/Inline HTML comments.text +13 -0
- data/spec/data/markdowntest/Links, inline style.html +11 -0
- data/spec/data/markdowntest/Links, inline style.text +12 -0
- data/spec/data/markdowntest/Links, reference style.html +52 -0
- data/spec/data/markdowntest/Links, reference style.text +71 -0
- data/spec/data/markdowntest/Links, shortcut references.html +9 -0
- data/spec/data/markdowntest/Links, shortcut references.text +20 -0
- data/spec/data/markdowntest/Literal quotes in titles.html +3 -0
- data/spec/data/markdowntest/Literal quotes in titles.text +7 -0
- data/spec/data/markdowntest/Markdown Documentation - Basics.html +314 -0
- data/spec/data/markdowntest/Markdown Documentation - Basics.text +306 -0
- data/spec/data/markdowntest/Markdown Documentation - Syntax.html +942 -0
- data/spec/data/markdowntest/Markdown Documentation - Syntax.text +888 -0
- data/spec/data/markdowntest/Nested blockquotes.html +9 -0
- data/spec/data/markdowntest/Nested blockquotes.text +5 -0
- data/spec/data/markdowntest/Ordered and unordered lists.html +148 -0
- data/spec/data/markdowntest/Ordered and unordered lists.text +131 -0
- data/spec/data/markdowntest/Strong and em together.html +7 -0
- data/spec/data/markdowntest/Strong and em together.text +7 -0
- data/spec/data/markdowntest/Tabs.html +25 -0
- data/spec/data/markdowntest/Tabs.text +21 -0
- data/spec/data/markdowntest/Tidyness.html +8 -0
- data/spec/data/markdowntest/Tidyness.text +5 -0
- data/spec/data/ml-announce.txt +17 -0
- data/spec/data/re-overflow.txt +67 -0
- data/spec/data/re-overflow2.txt +281 -0
- data/spec/lib/constants.rb +5 -0
- data/spec/lib/helpers.rb +137 -0
- data/spec/lib/matchers.rb +235 -0
- data/spec/markdowntest_spec.rb +76 -0
- metadata +305 -0
data/ext/markdown.c
ADDED
@@ -0,0 +1,939 @@
|
|
1
|
+
/* markdown: a C implementation of John Gruber's Markdown markup language.
|
2
|
+
*
|
3
|
+
* Copyright (C) 2007 David L Parsons.
|
4
|
+
* The redistribution terms are provided in the COPYRIGHT file that must
|
5
|
+
* be distributed with this source code.
|
6
|
+
*/
|
7
|
+
#include <stdio.h>
|
8
|
+
#include <string.h>
|
9
|
+
#include <stdarg.h>
|
10
|
+
#include <stdlib.h>
|
11
|
+
#include <time.h>
|
12
|
+
#include <ctype.h>
|
13
|
+
|
14
|
+
#include "config.h"
|
15
|
+
|
16
|
+
#include "cstring.h"
|
17
|
+
#include "markdown.h"
|
18
|
+
#include "amalloc.h"
|
19
|
+
|
20
|
+
/* block-level tags for passing html blocks through the blender
|
21
|
+
*/
|
22
|
+
struct kw {
|
23
|
+
char *id;
|
24
|
+
int siz;
|
25
|
+
} ;
|
26
|
+
|
27
|
+
#define KW(x) { x, sizeof(x)-1 }
|
28
|
+
|
29
|
+
static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
|
30
|
+
KW("ADDRESS"), KW("BDO"), KW("BLOCKQUOTE"),
|
31
|
+
KW("CENTER"), KW("DFN"), KW("DIV"), KW("H1"),
|
32
|
+
KW("H2"), KW("H3"), KW("H4"), KW("H5"),
|
33
|
+
KW("H6"), KW("IFRAME"), KW("LISTING"), KW("NOBR"),
|
34
|
+
KW("UL"), KW("P"), KW("OL"), KW("DL"),
|
35
|
+
KW("PLAINTEXT"), KW("PRE"), KW("TABLE"),
|
36
|
+
KW("WBR"), KW("XMP"), KW("HR"), KW("BR") };
|
37
|
+
#define SZTAGS (sizeof blocktags / sizeof blocktags[0])
|
38
|
+
#define MAXTAG 11 /* sizeof "BLOCKQUOTE" */
|
39
|
+
|
40
|
+
typedef int (*stfu)(const void*,const void*);
|
41
|
+
|
42
|
+
typedef ANCHOR(Paragraph) ParagraphRoot;
|
43
|
+
|
44
|
+
|
45
|
+
/* case insensitive string sort (for qsort() and bsearch() of block tags)
|
46
|
+
*/
|
47
|
+
static int
|
48
|
+
casort(struct kw *a, struct kw *b)
|
49
|
+
{
|
50
|
+
if ( a->siz != b->siz )
|
51
|
+
return a->siz - b->siz;
|
52
|
+
return strncasecmp(a->id, b->id, b->siz);
|
53
|
+
}
|
54
|
+
|
55
|
+
|
56
|
+
/* case insensitive string sort for Footnote tags.
|
57
|
+
*/
|
58
|
+
int
|
59
|
+
__mkd_footsort(Footnote *a, Footnote *b)
|
60
|
+
{
|
61
|
+
int i;
|
62
|
+
char ac, bc;
|
63
|
+
|
64
|
+
if ( S(a->tag) != S(b->tag) )
|
65
|
+
return S(a->tag) - S(b->tag);
|
66
|
+
|
67
|
+
for ( i=0; i < S(a->tag); i++) {
|
68
|
+
ac = tolower(T(a->tag)[i]);
|
69
|
+
bc = tolower(T(b->tag)[i]);
|
70
|
+
|
71
|
+
if ( isspace(ac) && isspace(bc) )
|
72
|
+
continue;
|
73
|
+
if ( ac != bc )
|
74
|
+
return ac - bc;
|
75
|
+
}
|
76
|
+
return 0;
|
77
|
+
}
|
78
|
+
|
79
|
+
|
80
|
+
/* find the first blank character after position <i>
|
81
|
+
*/
|
82
|
+
static int
|
83
|
+
nextblank(Line *t, int i)
|
84
|
+
{
|
85
|
+
while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
|
86
|
+
++i;
|
87
|
+
return i;
|
88
|
+
}
|
89
|
+
|
90
|
+
|
91
|
+
/* find the next nonblank character after position <i>
|
92
|
+
*/
|
93
|
+
static int
|
94
|
+
nextnonblank(Line *t, int i)
|
95
|
+
{
|
96
|
+
while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
|
97
|
+
++i;
|
98
|
+
return i;
|
99
|
+
}
|
100
|
+
|
101
|
+
|
102
|
+
/* find the first nonblank character on the Line.
|
103
|
+
*/
|
104
|
+
int
|
105
|
+
mkd_firstnonblank(Line *p)
|
106
|
+
{
|
107
|
+
return nextnonblank(p,0);
|
108
|
+
}
|
109
|
+
|
110
|
+
|
111
|
+
static int
|
112
|
+
blankline(Line *p)
|
113
|
+
{
|
114
|
+
return ! (p && (S(p->text) > p->dle) );
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
static Line *
|
119
|
+
skipempty(Line *p)
|
120
|
+
{
|
121
|
+
while ( p && (p->dle == S(p->text)) )
|
122
|
+
p = p->next;
|
123
|
+
return p;
|
124
|
+
}
|
125
|
+
|
126
|
+
|
127
|
+
void
|
128
|
+
___mkd_tidy(Line *t)
|
129
|
+
{
|
130
|
+
while ( S(t->text) && isspace(T(t->text)[S(t->text)-1]) )
|
131
|
+
--S(t->text);
|
132
|
+
}
|
133
|
+
|
134
|
+
|
135
|
+
static char *
|
136
|
+
isopentag(Line *p)
|
137
|
+
{
|
138
|
+
int i=0, len;
|
139
|
+
struct kw key, *ret;
|
140
|
+
|
141
|
+
if ( !p ) return 0;
|
142
|
+
|
143
|
+
len = S(p->text);
|
144
|
+
|
145
|
+
if ( len < 3 || T(p->text)[0] != '<' )
|
146
|
+
return 0;
|
147
|
+
|
148
|
+
/* find how long the tag is so we can check to see if
|
149
|
+
* it's a block-level tag
|
150
|
+
*/
|
151
|
+
for ( i=1; i < len && T(p->text)[i] != '>'
|
152
|
+
&& T(p->text)[i] != '/'
|
153
|
+
&& !isspace(T(p->text)[i]); ++i )
|
154
|
+
;
|
155
|
+
|
156
|
+
key.id = T(p->text)+1;
|
157
|
+
key.siz = i-1;
|
158
|
+
|
159
|
+
if ( ret = bsearch(&key,blocktags,SZTAGS,sizeof key, (stfu)casort))
|
160
|
+
return ret->id;
|
161
|
+
|
162
|
+
return 0;
|
163
|
+
}
|
164
|
+
|
165
|
+
|
166
|
+
static int
|
167
|
+
selfclose(Line *t, char *tag)
|
168
|
+
{
|
169
|
+
char *q = T(t->text);
|
170
|
+
int siz = strlen(tag);
|
171
|
+
int i;
|
172
|
+
|
173
|
+
if ( strcasecmp(tag, "HR") == 0 || strcasecmp(tag, "BR") == 0 )
|
174
|
+
/* <HR> and <BR> are self-closing block-level tags,
|
175
|
+
*/
|
176
|
+
return 1;
|
177
|
+
|
178
|
+
i = S(t->text) - (siz + 3);
|
179
|
+
|
180
|
+
/* we specialcase start and end tags on the same line.
|
181
|
+
*/
|
182
|
+
return ( i > 0 ) && (q[i] == '<') && (q[i+1] == '/')
|
183
|
+
&& (q[i+2+siz] == '>')
|
184
|
+
&& (strncasecmp(&q[i+2], tag, siz) == 0);
|
185
|
+
}
|
186
|
+
|
187
|
+
|
188
|
+
static Line *
|
189
|
+
htmlblock(Paragraph *p, char *tag)
|
190
|
+
{
|
191
|
+
Line *t = p->text, *ret;
|
192
|
+
int closesize;
|
193
|
+
char close[MAXTAG+4];
|
194
|
+
|
195
|
+
if ( selfclose(t, tag) || (strlen(tag) >= MAXTAG) ) {
|
196
|
+
ret = t->next;
|
197
|
+
t->next = 0;
|
198
|
+
return ret;
|
199
|
+
}
|
200
|
+
|
201
|
+
closesize = sprintf(close, "</%s>", tag);
|
202
|
+
|
203
|
+
for ( ; t ; t = t->next) {
|
204
|
+
if ( strncasecmp(T(t->text), close, closesize) == 0 ) {
|
205
|
+
ret = t->next;
|
206
|
+
t->next = 0;
|
207
|
+
return ret;
|
208
|
+
}
|
209
|
+
}
|
210
|
+
return 0;
|
211
|
+
}
|
212
|
+
|
213
|
+
|
214
|
+
static Line *
|
215
|
+
comment(Paragraph *p, char *key)
|
216
|
+
{
|
217
|
+
Line *t, *ret;
|
218
|
+
|
219
|
+
for ( t = p->text; t ; t = t->next) {
|
220
|
+
if ( strstr(T(t->text), "-->") ) {
|
221
|
+
ret = t->next;
|
222
|
+
t->next = 0;
|
223
|
+
return ret;
|
224
|
+
}
|
225
|
+
}
|
226
|
+
return t;
|
227
|
+
|
228
|
+
}
|
229
|
+
|
230
|
+
|
231
|
+
/* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
|
232
|
+
*/
|
233
|
+
static int
|
234
|
+
isfootnote(Line *t)
|
235
|
+
{
|
236
|
+
int i;
|
237
|
+
|
238
|
+
if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
|
239
|
+
return 0;
|
240
|
+
|
241
|
+
for ( ++i; i < S(t->text) ; ++i ) {
|
242
|
+
if ( T(t->text)[i] == '[' )
|
243
|
+
return 0;
|
244
|
+
else if ( T(t->text)[i] == ']' && T(t->text)[i+1] == ':' )
|
245
|
+
return 1;
|
246
|
+
}
|
247
|
+
return 0;
|
248
|
+
}
|
249
|
+
|
250
|
+
|
251
|
+
static int
|
252
|
+
isquote(Line *t)
|
253
|
+
{
|
254
|
+
return ( T(t->text)[0] == '>' );
|
255
|
+
}
|
256
|
+
|
257
|
+
|
258
|
+
static int
|
259
|
+
dashchar(char c)
|
260
|
+
{
|
261
|
+
return (c == '*') || (c == '-') || (c == '_');
|
262
|
+
}
|
263
|
+
|
264
|
+
|
265
|
+
static int
|
266
|
+
iscode(Line *t)
|
267
|
+
{
|
268
|
+
return (t->dle >= 4);
|
269
|
+
}
|
270
|
+
|
271
|
+
|
272
|
+
static int
|
273
|
+
ishr(Line *t)
|
274
|
+
{
|
275
|
+
int i, count=0;
|
276
|
+
char dash = 0;
|
277
|
+
char c;
|
278
|
+
|
279
|
+
if ( iscode(t) ) return 0;
|
280
|
+
|
281
|
+
for ( i = 0; i < S(t->text); i++) {
|
282
|
+
c = T(t->text)[i];
|
283
|
+
if ( (dash == 0) && dashchar(c) )
|
284
|
+
dash = c;
|
285
|
+
|
286
|
+
if ( c == dash ) ++count;
|
287
|
+
else if ( !isspace(c) )
|
288
|
+
return 0;
|
289
|
+
}
|
290
|
+
return (count >= 3);
|
291
|
+
}
|
292
|
+
|
293
|
+
|
294
|
+
static int
|
295
|
+
ishdr(Line *t, int *htyp)
|
296
|
+
{
|
297
|
+
int i;
|
298
|
+
|
299
|
+
|
300
|
+
/* first check for etx-style ###HEADER###
|
301
|
+
*/
|
302
|
+
|
303
|
+
/* leading run of `#`'s ?
|
304
|
+
*/
|
305
|
+
for ( i=0; T(t->text)[i] == '#'; ++i)
|
306
|
+
;
|
307
|
+
|
308
|
+
/* ANY leading `#`'s make this into an ETX header
|
309
|
+
*/
|
310
|
+
if ( i ) {
|
311
|
+
*htyp = ETX;
|
312
|
+
return 1;
|
313
|
+
}
|
314
|
+
|
315
|
+
/* then check for setext-style HEADER
|
316
|
+
* ======
|
317
|
+
*/
|
318
|
+
|
319
|
+
if ( t->next ) {
|
320
|
+
char *q = T(t->next->text);
|
321
|
+
|
322
|
+
if ( (*q == '=') || (*q == '-') ) {
|
323
|
+
for (i=1; i < S(t->next->text); i++)
|
324
|
+
if ( q[0] != q[i] )
|
325
|
+
return 0;
|
326
|
+
*htyp = SETEXT;
|
327
|
+
return 1;
|
328
|
+
}
|
329
|
+
}
|
330
|
+
return 0;
|
331
|
+
}
|
332
|
+
|
333
|
+
|
334
|
+
static int
|
335
|
+
isdefinition(Line *t)
|
336
|
+
{
|
337
|
+
#if DL_TAG_EXTENSION
|
338
|
+
return t && t->next
|
339
|
+
&& (S(t->text) > 2)
|
340
|
+
&& (t->dle == 0)
|
341
|
+
&& (T(t->text)[0] == '=')
|
342
|
+
&& (T(t->text)[S(t->text)-1] == '=')
|
343
|
+
&& ( (t->next->dle >= 4) || isdefinition(t->next) );
|
344
|
+
#else
|
345
|
+
return 0;
|
346
|
+
#endif
|
347
|
+
}
|
348
|
+
|
349
|
+
|
350
|
+
static int
|
351
|
+
islist(Line *t, int *trim)
|
352
|
+
{
|
353
|
+
int i, j;
|
354
|
+
char *q;
|
355
|
+
|
356
|
+
if ( iscode(t) || blankline(t) || ishdr(t,&i) || ishr(t) )
|
357
|
+
return 0;
|
358
|
+
|
359
|
+
if ( isdefinition(t) ) {
|
360
|
+
*trim = 4;
|
361
|
+
return DL;
|
362
|
+
}
|
363
|
+
|
364
|
+
if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
|
365
|
+
i = nextnonblank(t, t->dle+1);
|
366
|
+
*trim = (i > 4) ? 4 : i;
|
367
|
+
return UL;
|
368
|
+
}
|
369
|
+
|
370
|
+
if ( (j = nextblank(t,t->dle)) > t->dle ) {
|
371
|
+
if ( T(t->text)[j-1] == '.' ) {
|
372
|
+
#if ALPHA_LIST
|
373
|
+
if ( (j == t->dle + 2) && isalpha(T(t->text)[t->dle]) ) {
|
374
|
+
j = nextnonblank(t,j);
|
375
|
+
*trim = j;
|
376
|
+
return AL;
|
377
|
+
}
|
378
|
+
#endif
|
379
|
+
strtoul(T(t->text)+t->dle, &q, 10);
|
380
|
+
if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
|
381
|
+
j = nextnonblank(t,j);
|
382
|
+
*trim = j;
|
383
|
+
return OL;
|
384
|
+
}
|
385
|
+
}
|
386
|
+
}
|
387
|
+
return 0;
|
388
|
+
}
|
389
|
+
|
390
|
+
|
391
|
+
static Line *
|
392
|
+
headerblock(Paragraph *pp, int htyp)
|
393
|
+
{
|
394
|
+
Line *ret = 0;
|
395
|
+
Line *p = pp->text;
|
396
|
+
int i, j;
|
397
|
+
|
398
|
+
switch (htyp) {
|
399
|
+
case SETEXT:
|
400
|
+
/* p->text is header, p->next->text is -'s or ='s
|
401
|
+
*/
|
402
|
+
pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
|
403
|
+
|
404
|
+
ret = p->next->next;
|
405
|
+
___mkd_freeLine(p->next);
|
406
|
+
p->next = 0;
|
407
|
+
break;
|
408
|
+
|
409
|
+
case ETX:
|
410
|
+
/* p->text is ###header###, so we need to trim off
|
411
|
+
* the leading and trailing `#`'s
|
412
|
+
*/
|
413
|
+
|
414
|
+
for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1); i++)
|
415
|
+
;
|
416
|
+
|
417
|
+
pp->hnumber = i;
|
418
|
+
|
419
|
+
while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
|
420
|
+
++i;
|
421
|
+
|
422
|
+
CLIP(p->text, 0, i);
|
423
|
+
|
424
|
+
for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
|
425
|
+
;
|
426
|
+
|
427
|
+
while ( j && isspace(T(p->text)[j-1]) )
|
428
|
+
--j;
|
429
|
+
|
430
|
+
S(p->text) = j;
|
431
|
+
|
432
|
+
ret = p->next;
|
433
|
+
p->next = 0;
|
434
|
+
break;
|
435
|
+
}
|
436
|
+
return ret;
|
437
|
+
}
|
438
|
+
|
439
|
+
|
440
|
+
static Line *
|
441
|
+
codeblock(Paragraph *p)
|
442
|
+
{
|
443
|
+
Line *t = p->text, *r;
|
444
|
+
|
445
|
+
for ( ; t; t = r ) {
|
446
|
+
CLIP(t->text,0,4);
|
447
|
+
t->dle = mkd_firstnonblank(t);
|
448
|
+
|
449
|
+
if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
|
450
|
+
___mkd_freeLineRange(t,r);
|
451
|
+
t->next = 0;
|
452
|
+
return r;
|
453
|
+
}
|
454
|
+
}
|
455
|
+
return t;
|
456
|
+
}
|
457
|
+
|
458
|
+
|
459
|
+
static int
|
460
|
+
centered(Line *first, Line *last)
|
461
|
+
{
|
462
|
+
|
463
|
+
if ( first&&last ) {
|
464
|
+
int len = S(last->text);
|
465
|
+
|
466
|
+
if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
|
467
|
+
&& (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
|
468
|
+
CLIP(first->text, 0, 2);
|
469
|
+
S(last->text) -= 2;
|
470
|
+
return CENTER;
|
471
|
+
}
|
472
|
+
}
|
473
|
+
return 0;
|
474
|
+
}
|
475
|
+
|
476
|
+
|
477
|
+
static int
|
478
|
+
endoftextblock(Line *t, int toplevelblock)
|
479
|
+
{
|
480
|
+
int z;
|
481
|
+
|
482
|
+
if ( blankline(t)||isquote(t)||iscode(t)||ishdr(t,&z)||ishr(t) )
|
483
|
+
return 1;
|
484
|
+
|
485
|
+
/* HORRIBLE STANDARDS KLUDGE: Toplevel paragraphs eat absorb adjacent
|
486
|
+
* list items, but sublevel blocks behave properly.
|
487
|
+
*/
|
488
|
+
return toplevelblock ? 0 : islist(t,&z);
|
489
|
+
}
|
490
|
+
|
491
|
+
|
492
|
+
static Line *
|
493
|
+
textblock(Paragraph *p, int toplevel)
|
494
|
+
{
|
495
|
+
Line *t, *next;
|
496
|
+
|
497
|
+
for ( t = p->text; t ; t = next ) {
|
498
|
+
if ( ((next = t->next) == 0) || endoftextblock(next, toplevel) ) {
|
499
|
+
p->align = centered(p->text, t);
|
500
|
+
t->next = 0;
|
501
|
+
return next;
|
502
|
+
}
|
503
|
+
}
|
504
|
+
return t;
|
505
|
+
}
|
506
|
+
|
507
|
+
|
508
|
+
/* length of the id: or class: kind in a special div-not-quote block
|
509
|
+
*/
|
510
|
+
static int
|
511
|
+
szmarkerclass(char *p)
|
512
|
+
{
|
513
|
+
if ( strncasecmp(p, "id:", 3) == 0 )
|
514
|
+
return 3;
|
515
|
+
if ( strncasecmp(p, "class:", 6) == 0 )
|
516
|
+
return 6;
|
517
|
+
return 0;
|
518
|
+
}
|
519
|
+
|
520
|
+
|
521
|
+
/*
|
522
|
+
* check if the first line of a quoted block is the special div-not-quote
|
523
|
+
* marker %[kind:]name%
|
524
|
+
*/
|
525
|
+
static int
|
526
|
+
isdivmarker(Line *p)
|
527
|
+
{
|
528
|
+
#if DIV_QUOTE
|
529
|
+
char *s = T(p->text);
|
530
|
+
int len = S(p->text);
|
531
|
+
int i;
|
532
|
+
|
533
|
+
if ( !(len && s[0] == '%' && s[len-1] == '%') ) return 0;
|
534
|
+
|
535
|
+
i = szmarkerclass(s+1);
|
536
|
+
--len;
|
537
|
+
|
538
|
+
while ( ++i < len )
|
539
|
+
if ( !isalnum(s[i]) )
|
540
|
+
return 0;
|
541
|
+
|
542
|
+
return 1;
|
543
|
+
#else
|
544
|
+
return 0;
|
545
|
+
#endif
|
546
|
+
}
|
547
|
+
|
548
|
+
|
549
|
+
/*
|
550
|
+
* accumulate a blockquote.
|
551
|
+
*
|
552
|
+
* one sick horrible thing about blockquotes is that even though
|
553
|
+
* it just takes ^> to start a quote, following lines, if quoted,
|
554
|
+
* assume that the prefix is ``>''. This means that code needs
|
555
|
+
* to be indented *5* spaces from the leading '>', but *4* spaces
|
556
|
+
* from the start of the line. This does not appear to be
|
557
|
+
* documented in the reference implementation, but it's the
|
558
|
+
* way the markdown sample web form at Daring Fireball works.
|
559
|
+
*/
|
560
|
+
static Line *
|
561
|
+
quoteblock(Paragraph *p)
|
562
|
+
{
|
563
|
+
Line *t, *q;
|
564
|
+
int qp;
|
565
|
+
|
566
|
+
for ( t = p->text; t ; t = q ) {
|
567
|
+
if ( isquote(t) ) {
|
568
|
+
qp = (T(t->text)[1] == ' ') ? 2 : 1;
|
569
|
+
CLIP(t->text, 0, qp);
|
570
|
+
t->dle = mkd_firstnonblank(t);
|
571
|
+
}
|
572
|
+
|
573
|
+
if ( !(q = skipempty(t->next)) || ((q != t->next) && !isquote(q)) ) {
|
574
|
+
___mkd_freeLineRange(t, q);
|
575
|
+
t = q;
|
576
|
+
break;
|
577
|
+
}
|
578
|
+
}
|
579
|
+
if ( isdivmarker(p->text) ) {
|
580
|
+
char *prefix = "class";
|
581
|
+
int i;
|
582
|
+
|
583
|
+
q = p->text;
|
584
|
+
p->text = p->text->next;
|
585
|
+
|
586
|
+
if ( (i = szmarkerclass(1+T(q->text))) == 3 )
|
587
|
+
/* and this would be an "%id:" prefix */
|
588
|
+
prefix="id";
|
589
|
+
|
590
|
+
if ( p->ident = malloc(4+i+S(q->text)) )
|
591
|
+
sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
|
592
|
+
T(q->text)+(i+1) );
|
593
|
+
|
594
|
+
___mkd_freeLine(q);
|
595
|
+
}
|
596
|
+
return t;
|
597
|
+
}
|
598
|
+
|
599
|
+
|
600
|
+
static Paragraph *Pp(ParagraphRoot *, Line *, int);
|
601
|
+
static Paragraph *compile(Line *, int, MMIOT *);
|
602
|
+
|
603
|
+
|
604
|
+
/*
|
605
|
+
* pull in a list block. A list block starts with a list marker and
|
606
|
+
* runs until the next list marker, the next non-indented paragraph,
|
607
|
+
* or EOF. You do not have to indent nonblank lines after the list
|
608
|
+
* marker, but multiple paragraphs need to start with a 4-space indent.
|
609
|
+
*/
|
610
|
+
static Line *
|
611
|
+
listitem(Paragraph *p, int indent)
|
612
|
+
{
|
613
|
+
Line *t, *q;
|
614
|
+
int clip = indent;
|
615
|
+
int z;
|
616
|
+
|
617
|
+
for ( t = p->text; t ; t = q) {
|
618
|
+
CLIP(t->text, 0, clip);
|
619
|
+
t->dle = mkd_firstnonblank(t);
|
620
|
+
|
621
|
+
if ( (q = skipempty(t->next)) == 0 ) {
|
622
|
+
___mkd_freeLineRange(t,q);
|
623
|
+
return 0;
|
624
|
+
}
|
625
|
+
|
626
|
+
/* after a blank line, the next block needs to start with a line
|
627
|
+
* that's indented 4 spaces, but after that the line doesn't
|
628
|
+
* need any indentation
|
629
|
+
*/
|
630
|
+
if ( q != t->next ) {
|
631
|
+
if (q->dle < 4) {
|
632
|
+
q = t->next;
|
633
|
+
t->next = 0;
|
634
|
+
return q;
|
635
|
+
}
|
636
|
+
indent = 4;
|
637
|
+
}
|
638
|
+
|
639
|
+
if ( (q->dle < indent) && (ishr(q) || islist(q,&z)) && !ishdr(q,&z) ) {
|
640
|
+
q = t->next;
|
641
|
+
t->next = 0;
|
642
|
+
return q;
|
643
|
+
}
|
644
|
+
|
645
|
+
clip = (q->dle > indent) ? indent : q->dle;
|
646
|
+
}
|
647
|
+
return t;
|
648
|
+
}
|
649
|
+
|
650
|
+
|
651
|
+
static Line *
|
652
|
+
listblock(Paragraph *top, int trim, MMIOT *f)
|
653
|
+
{
|
654
|
+
ParagraphRoot d = { 0, 0 };
|
655
|
+
Paragraph *p;
|
656
|
+
Line *q = top->text, *text;
|
657
|
+
Line *label;
|
658
|
+
int para = 0;
|
659
|
+
|
660
|
+
while (( text = q )) {
|
661
|
+
if ( top->typ == DL ) {
|
662
|
+
Line *lp;
|
663
|
+
|
664
|
+
for ( lp = label = text; lp ; lp = lp->next ) {
|
665
|
+
text = lp->next;
|
666
|
+
CLIP(lp->text, 0, 1);
|
667
|
+
S(lp->text)--;
|
668
|
+
if ( !isdefinition(lp->next) )
|
669
|
+
lp->next = 0;
|
670
|
+
}
|
671
|
+
}
|
672
|
+
else label = 0;
|
673
|
+
|
674
|
+
p = Pp(&d, text, LISTITEM);
|
675
|
+
text = listitem(p, trim);
|
676
|
+
|
677
|
+
p->down = compile(p->text, 0, f);
|
678
|
+
p->text = label;
|
679
|
+
|
680
|
+
if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
|
681
|
+
|
682
|
+
if ( !(q = skipempty(text)) || (islist(q, &trim) == 0) )
|
683
|
+
break;
|
684
|
+
|
685
|
+
if ( para = (q != text) ) {
|
686
|
+
Line anchor;
|
687
|
+
|
688
|
+
anchor.next = text;
|
689
|
+
___mkd_freeLineRange(&anchor, q);
|
690
|
+
}
|
691
|
+
|
692
|
+
if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
|
693
|
+
}
|
694
|
+
top->text = 0;
|
695
|
+
top->down = T(d);
|
696
|
+
return text;
|
697
|
+
}
|
698
|
+
|
699
|
+
|
700
|
+
static int
|
701
|
+
tgood(char c)
|
702
|
+
{
|
703
|
+
switch (c) {
|
704
|
+
case '\'':
|
705
|
+
case '"': return c;
|
706
|
+
case '(': return ')';
|
707
|
+
}
|
708
|
+
return 0;
|
709
|
+
}
|
710
|
+
|
711
|
+
|
712
|
+
/*
|
713
|
+
* add a new (image or link) footnote to the footnote table
|
714
|
+
*/
|
715
|
+
static Line*
|
716
|
+
addfootnote(Line *p, MMIOT* f)
|
717
|
+
{
|
718
|
+
int j, i;
|
719
|
+
int c;
|
720
|
+
Line *np = p->next;
|
721
|
+
|
722
|
+
Footnote *foot = &EXPAND(*f->footnotes);
|
723
|
+
|
724
|
+
CREATE(foot->tag);
|
725
|
+
CREATE(foot->link);
|
726
|
+
CREATE(foot->title);
|
727
|
+
foot->height = foot->width = 0;
|
728
|
+
|
729
|
+
for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
|
730
|
+
EXPAND(foot->tag) = T(p->text)[j];
|
731
|
+
|
732
|
+
EXPAND(foot->tag) = 0;
|
733
|
+
S(foot->tag)--;
|
734
|
+
j = nextnonblank(p, j+2);
|
735
|
+
|
736
|
+
while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
|
737
|
+
EXPAND(foot->link) = T(p->text)[j++];
|
738
|
+
EXPAND(foot->link) = 0;
|
739
|
+
S(foot->link)--;
|
740
|
+
j = nextnonblank(p,j);
|
741
|
+
|
742
|
+
if ( T(p->text)[j] == '=' ) {
|
743
|
+
sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
|
744
|
+
while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
|
745
|
+
++j;
|
746
|
+
j = nextnonblank(p,j);
|
747
|
+
}
|
748
|
+
|
749
|
+
|
750
|
+
if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
|
751
|
+
___mkd_freeLine(p);
|
752
|
+
p = np;
|
753
|
+
np = p->next;
|
754
|
+
j = p->dle;
|
755
|
+
}
|
756
|
+
|
757
|
+
if ( (c = tgood(T(p->text)[j])) ) {
|
758
|
+
/* Try to take the rest of the line as a comment; read to
|
759
|
+
* EOL, then shrink the string back to before the final
|
760
|
+
* quote.
|
761
|
+
*/
|
762
|
+
++j; /* skip leading quote */
|
763
|
+
|
764
|
+
while ( j < S(p->text) )
|
765
|
+
EXPAND(foot->title) = T(p->text)[j++];
|
766
|
+
|
767
|
+
while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
|
768
|
+
--S(foot->title);
|
769
|
+
if ( S(foot->title) ) /* skip trailing quote */
|
770
|
+
--S(foot->title);
|
771
|
+
EXPAND(foot->title) = 0;
|
772
|
+
--S(foot->title);
|
773
|
+
}
|
774
|
+
|
775
|
+
___mkd_freeLine(p);
|
776
|
+
return np;
|
777
|
+
}
|
778
|
+
|
779
|
+
|
780
|
+
/*
|
781
|
+
* allocate a paragraph header, link it to the
|
782
|
+
* tail of the current document
|
783
|
+
*/
|
784
|
+
static Paragraph *
|
785
|
+
Pp(ParagraphRoot *d, Line *ptr, int typ)
|
786
|
+
{
|
787
|
+
Paragraph *ret = calloc(sizeof *ret, 1);
|
788
|
+
|
789
|
+
ret->text = ptr;
|
790
|
+
ret->typ = typ;
|
791
|
+
|
792
|
+
return ATTACH(*d, ret);
|
793
|
+
}
|
794
|
+
|
795
|
+
|
796
|
+
|
797
|
+
static Line*
|
798
|
+
consume(Line *ptr, int *eaten)
|
799
|
+
{
|
800
|
+
Line *next;
|
801
|
+
int blanks=0;
|
802
|
+
|
803
|
+
for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
|
804
|
+
next = ptr->next;
|
805
|
+
___mkd_freeLine(ptr);
|
806
|
+
}
|
807
|
+
if ( ptr ) *eaten = blanks;
|
808
|
+
return ptr;
|
809
|
+
}
|
810
|
+
|
811
|
+
|
812
|
+
/*
|
813
|
+
* break a collection of markdown input into
|
814
|
+
* blocks of lists, code, html, and text to
|
815
|
+
* be marked up.
|
816
|
+
*/
|
817
|
+
static Paragraph *
|
818
|
+
compile(Line *ptr, int toplevel, MMIOT *f)
|
819
|
+
{
|
820
|
+
ParagraphRoot d = { 0, 0 };
|
821
|
+
Paragraph *p = 0;
|
822
|
+
char *key;
|
823
|
+
Line *r;
|
824
|
+
int para = toplevel;
|
825
|
+
int hdr_type, list_type, indent;
|
826
|
+
|
827
|
+
ptr = consume(ptr, ¶);
|
828
|
+
|
829
|
+
while ( ptr ) {
|
830
|
+
if ( toplevel && !(f->flags & DENY_HTML) && (key = isopentag(ptr)) ) {
|
831
|
+
p = Pp(&d, ptr, strcmp(key, "STYLE") == 0 ? STYLE : HTML);
|
832
|
+
if ( strcmp(key, "!--") == 0 )
|
833
|
+
ptr = comment(p, key);
|
834
|
+
else
|
835
|
+
ptr = htmlblock(p, key);
|
836
|
+
}
|
837
|
+
else if ( iscode(ptr) ) {
|
838
|
+
p = Pp(&d, ptr, CODE);
|
839
|
+
|
840
|
+
if ( f->flags & MKD_1_COMPAT) {
|
841
|
+
/* HORRIBLE STANDARDS KLUDGE: the first line of every block
|
842
|
+
* has trailing whitespace trimmed off.
|
843
|
+
*/
|
844
|
+
___mkd_tidy(p->text);
|
845
|
+
}
|
846
|
+
|
847
|
+
ptr = codeblock(p);
|
848
|
+
}
|
849
|
+
else if ( ishr(ptr) ) {
|
850
|
+
p = Pp(&d, 0, HR);
|
851
|
+
r = ptr;
|
852
|
+
ptr = ptr->next;
|
853
|
+
___mkd_freeLine(r);
|
854
|
+
}
|
855
|
+
else if (( list_type = islist(ptr, &indent) )) {
|
856
|
+
p = Pp(&d, ptr, list_type);
|
857
|
+
ptr = listblock(p, indent, f);
|
858
|
+
}
|
859
|
+
else if ( isquote(ptr) ) {
|
860
|
+
p = Pp(&d, ptr, QUOTE);
|
861
|
+
ptr = quoteblock(p);
|
862
|
+
p->down = compile(p->text, 1, f);
|
863
|
+
p->text = 0;
|
864
|
+
}
|
865
|
+
else if ( ishdr(ptr, &hdr_type) ) {
|
866
|
+
p = Pp(&d, ptr, HDR);
|
867
|
+
ptr = headerblock(p, hdr_type);
|
868
|
+
}
|
869
|
+
else if ( toplevel && (isfootnote(ptr)) ) {
|
870
|
+
ptr = consume(addfootnote(ptr, f), ¶);
|
871
|
+
continue;
|
872
|
+
}
|
873
|
+
else {
|
874
|
+
p = Pp(&d, ptr, MARKUP);
|
875
|
+
ptr = textblock(p, toplevel);
|
876
|
+
}
|
877
|
+
|
878
|
+
if ( (para||toplevel) && !p->align )
|
879
|
+
p->align = PARA;
|
880
|
+
|
881
|
+
para = toplevel;
|
882
|
+
ptr = consume(ptr, ¶);
|
883
|
+
|
884
|
+
if ( para && !p->align )
|
885
|
+
p->align = PARA;
|
886
|
+
|
887
|
+
}
|
888
|
+
return T(d);
|
889
|
+
}
|
890
|
+
|
891
|
+
|
892
|
+
static void
|
893
|
+
initialize()
|
894
|
+
{
|
895
|
+
static int first = 1;
|
896
|
+
|
897
|
+
if ( first-- > 0 ) {
|
898
|
+
first = 0;
|
899
|
+
INITRNG(time(0));
|
900
|
+
qsort(blocktags, SZTAGS, sizeof blocktags[0], (stfu)casort);
|
901
|
+
}
|
902
|
+
}
|
903
|
+
|
904
|
+
|
905
|
+
/*
|
906
|
+
* the guts of the markdown() function, ripped out so I can do
|
907
|
+
* debugging.
|
908
|
+
*/
|
909
|
+
|
910
|
+
/*
|
911
|
+
* prepare and compile `text`, returning a Paragraph tree.
|
912
|
+
*/
|
913
|
+
int
|
914
|
+
mkd_compile(Document *doc, int flags)
|
915
|
+
{
|
916
|
+
if ( !doc )
|
917
|
+
return 0;
|
918
|
+
|
919
|
+
if ( doc->compiled )
|
920
|
+
return 1;
|
921
|
+
|
922
|
+
doc->compiled = 1;
|
923
|
+
memset(doc->ctx, 0, sizeof(MMIOT) );
|
924
|
+
doc->ctx->flags = flags & USER_FLAGS;
|
925
|
+
doc->ctx->base = doc->base;
|
926
|
+
CREATE(doc->ctx->in);
|
927
|
+
doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
|
928
|
+
CREATE(*doc->ctx->footnotes);
|
929
|
+
|
930
|
+
initialize();
|
931
|
+
|
932
|
+
doc->code = compile(T(doc->content), 1, doc->ctx);
|
933
|
+
qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
|
934
|
+
sizeof T(*doc->ctx->footnotes)[0],
|
935
|
+
(stfu)__mkd_footsort);
|
936
|
+
memset(&doc->content, 0, sizeof doc->content);
|
937
|
+
return 1;
|
938
|
+
}
|
939
|
+
|