rdiscount 1.3.5 → 1.5.5

Sign up to get free protection for your applications and to get access to all the features.
data/ext/markdown.c CHANGED
@@ -21,10 +21,12 @@
21
21
  */
22
22
  struct kw {
23
23
  char *id;
24
- int siz;
24
+ int size;
25
+ int selfclose;
25
26
  } ;
26
27
 
27
- #define KW(x) { x, sizeof(x)-1 }
28
+ #define KW(x) { x, sizeof(x)-1, 0 }
29
+ #define SC(x) { x, sizeof(x)-1, 1 }
28
30
 
29
31
  static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
30
32
  KW("ADDRESS"), KW("BDO"), KW("BLOCKQUOTE"),
@@ -33,7 +35,8 @@ static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
33
35
  KW("H6"), KW("LISTING"), KW("NOBR"),
34
36
  KW("UL"), KW("P"), KW("OL"), KW("DL"),
35
37
  KW("PLAINTEXT"), KW("PRE"), KW("TABLE"),
36
- KW("WBR"), KW("XMP"), KW("HR"), KW("BR") };
38
+ KW("WBR"), KW("XMP"), SC("HR"), SC("BR"),
39
+ KW("IFRAME"), KW("MAP") };
37
40
  #define SZTAGS (sizeof blocktags / sizeof blocktags[0])
38
41
  #define MAXTAG 11 /* sizeof "BLOCKQUOTE" */
39
42
 
@@ -47,9 +50,9 @@ typedef ANCHOR(Paragraph) ParagraphRoot;
47
50
  static int
48
51
  casort(struct kw *a, struct kw *b)
49
52
  {
50
- if ( a->siz != b->siz )
51
- return a->siz - b->siz;
52
- return strncasecmp(a->id, b->id, b->siz);
53
+ if ( a->size != b->size )
54
+ return a->size - b->size;
55
+ return strncasecmp(a->id, b->id, b->size);
53
56
  }
54
57
 
55
58
 
@@ -125,14 +128,14 @@ skipempty(Line *p)
125
128
 
126
129
 
127
130
  void
128
- ___mkd_tidy(Line *t)
131
+ ___mkd_tidy(Cstring *t)
129
132
  {
130
- while ( S(t->text) && isspace(T(t->text)[S(t->text)-1]) )
131
- --S(t->text);
133
+ while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
134
+ --S(*t);
132
135
  }
133
136
 
134
137
 
135
- static char *
138
+ static struct kw *
136
139
  isopentag(Line *p)
137
140
  {
138
141
  int i=0, len;
@@ -154,79 +157,84 @@ isopentag(Line *p)
154
157
  ;
155
158
 
156
159
  key.id = T(p->text)+1;
157
- key.siz = i-1;
160
+ key.size = i-1;
158
161
 
159
- if ( ret = bsearch(&key,blocktags,SZTAGS,sizeof key, (stfu)casort))
160
- return ret->id;
162
+ if ( ret = bsearch(&key, blocktags, SZTAGS, sizeof key, (stfu)casort))
163
+ return ret;
161
164
 
162
165
  return 0;
163
166
  }
164
167
 
165
168
 
166
- static int
167
- selfclose(Line *t, char *tag)
168
- {
169
- char *q = T(t->text);
170
- int siz = strlen(tag);
169
+ typedef struct _flo {
170
+ Line *t;
171
171
  int i;
172
+ } FLO;
172
173
 
173
- if ( strcasecmp(tag, "HR") == 0 || strcasecmp(tag, "BR") == 0 )
174
- /* <HR> and <BR> are self-closing block-level tags,
175
- */
176
- return 1;
177
174
 
178
- i = S(t->text) - (siz + 3);
179
-
180
- /* we specialcase start and end tags on the same line.
181
- */
182
- return ( i > 0 ) && (q[i] == '<') && (q[i+1] == '/')
183
- && (q[i+2+siz] == '>')
184
- && (strncasecmp(&q[i+2], tag, siz) == 0);
175
+ static int
176
+ flogetc(FLO *f)
177
+ {
178
+ if ( f && f->t ) {
179
+ if ( f->i < S(f->t->text) )
180
+ return T(f->t->text)[f->i++];
181
+ f->t = f->t->next;
182
+ f->i = 0;
183
+ return flogetc(f);
184
+ }
185
+ return EOF;
185
186
  }
186
187
 
187
188
 
188
189
  static Line *
189
- htmlblock(Paragraph *p, char *tag)
190
+ htmlblock(Paragraph *p, struct kw *tag)
190
191
  {
191
- Line *t = p->text, *ret;
192
- int closesize, tagsize;
193
- char close[MAXTAG+4];
194
-
195
- char *ps, *pse;
196
- int depth;
197
-
198
- tagsize = strlen(tag);
192
+ Line *ret;
193
+ FLO f = { p->text, 0 };
194
+ int c;
195
+ int i, closing, depth=0;
199
196
 
200
- if ( selfclose(t, tag) || (tagsize >= MAXTAG) ) {
201
- ret = t->next;
202
- t->next = 0;
197
+ if ( tag->selfclose || (tag->size >= MAXTAG) ) {
198
+ ret = f.t->next;
199
+ f.t->next = 0;
203
200
  return ret;
204
201
  }
205
202
 
206
- closesize = sprintf(close, "</%s>", tag);
207
- depth = 0;
208
-
209
- for ( ; t ; t = t->next) {
210
- ps = T(t->text);
211
- pse = ps + (S(t->text) - (tagsize + 1));
212
- for ( ; ps < pse; ps++ ) {
213
- if ( *ps == '<' ) {
214
- /* check for close tag */
215
- if ( strncasecmp(ps, close, closesize) == 0 ) {
216
- depth--;
217
- if ( depth == 0 ) {
218
- ret = t->next;
219
- t->next = 0;
220
- return ret;
221
- }
222
- continue;
223
- }
224
-
225
- /* check for nested open tag */
226
- if ( (strncasecmp(ps + 1, tag, tagsize) == 0) &&
227
- (ps[tagsize + 1] == '>' || ps[tagsize + 1] == ' ') ) {
228
- depth++;
229
- }
203
+ while ( (c = flogetc(&f)) != EOF ) {
204
+ if ( c == '<' ) {
205
+ /* tag? */
206
+ c = flogetc(&f);
207
+ if ( c == '!' ) { /* comment? */
208
+ if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
209
+ /* yes */
210
+ while ( (c = flogetc(&f)) != EOF ) {
211
+ if ( c == '-' && flogetc(&f) == '-'
212
+ && flogetc(&f) == '>')
213
+ /* consumed whole comment */
214
+ break;
215
+ }
216
+ }
217
+ }
218
+ else {
219
+ if ( closing = (c == '/') ) c = flogetc(&f);
220
+
221
+ for ( i=0; i < tag->size; c=flogetc(&f) ) {
222
+ if ( tag->id[i++] != toupper(c) )
223
+ break;
224
+ }
225
+
226
+ if ( (i == tag->size) && !isalnum(c) ) {
227
+ depth = depth + (closing ? -1 : 1);
228
+ if ( depth == 0 ) {
229
+ while ( c != EOF && c != '>' ) {
230
+ /* consume trailing gunk in close tag */
231
+ c = flogetc(&f);
232
+ }
233
+ ret = f.t->next;
234
+ f.t->next = 0;
235
+ return ret;
236
+ }
237
+ }
230
238
  }
231
239
  }
232
240
  }
@@ -235,7 +243,7 @@ htmlblock(Paragraph *p, char *tag)
235
243
 
236
244
 
237
245
  static Line *
238
- comment(Paragraph *p, char *key)
246
+ comment(Paragraph *p)
239
247
  {
240
248
  Line *t, *ret;
241
249
 
@@ -251,6 +259,30 @@ comment(Paragraph *p, char *key)
251
259
  }
252
260
 
253
261
 
262
+ /* tables look like
263
+ * header|header{|header}
264
+ * ------|------{|......}
265
+ * {body lines}
266
+ */
267
+ static int
268
+ istable(Line *t)
269
+ {
270
+ char *p;
271
+ Line *dashes = t->next;
272
+
273
+ /* two lines, first must contain | */
274
+ if ( !(dashes && memchr(T(t->text), '|', S(t->text))) )
275
+ return 0;
276
+
277
+ /* second line must be only whitespace, |, -, or - */
278
+ for ( p = T(dashes->text)+S(dashes->text)-1; p >= T(dashes->text); --p)
279
+ if ( ! ((*p == '|') || (*p == ':') || (*p == '-') || isspace(*p)) )
280
+ return 0;
281
+
282
+ return 1;
283
+ }
284
+
285
+
254
286
  /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
255
287
  */
256
288
  static int
@@ -274,7 +306,11 @@ isfootnote(Line *t)
274
306
  static int
275
307
  isquote(Line *t)
276
308
  {
277
- return ( T(t->text)[0] == '>' );
309
+ char *pt = T(t->text);
310
+ return ( pt[0] == '>' ) ||
311
+ ( pt[0] == ' ' && pt[1] == '>' ) ||
312
+ ( pt[0] == ' ' && pt[1] == ' ' && pt[2] == '>') ||
313
+ ( pt[0] == ' ' && pt[1] == ' ' && pt[2] == ' ' && pt[3] == '>');
278
314
  }
279
315
 
280
316
 
@@ -330,7 +366,7 @@ ishdr(Line *t, int *htyp)
330
366
 
331
367
  /* ANY leading `#`'s make this into an ETX header
332
368
  */
333
- if ( i ) {
369
+ if ( i && (i < S(t->text) || i > 1) ) {
334
370
  *htyp = ETX;
335
371
  return 1;
336
372
  }
@@ -546,17 +582,17 @@ szmarkerclass(char *p)
546
582
  * marker %[kind:]name%
547
583
  */
548
584
  static int
549
- isdivmarker(Line *p)
585
+ isdivmarker(Line *p, int start)
550
586
  {
551
587
  #if DIV_QUOTE
552
588
  char *s = T(p->text);
553
589
  int len = S(p->text);
554
590
  int i;
555
591
 
556
- if ( !(len && s[0] == '%' && s[len-1] == '%') ) return 0;
592
+ if ( !(len && s[start] == '%' && s[len-1] == '%') ) return 0;
557
593
 
558
- i = szmarkerclass(s+1);
559
- --len;
594
+ i = szmarkerclass(s+start+1)+start;
595
+ len -= start+1;
560
596
 
561
597
  while ( ++i < len )
562
598
  if ( !isalnum(s[i]) )
@@ -588,18 +624,22 @@ quoteblock(Paragraph *p)
588
624
 
589
625
  for ( t = p->text; t ; t = q ) {
590
626
  if ( isquote(t) ) {
591
- qp = (T(t->text)[1] == ' ') ? 2 : 1;
627
+ char *p = strchr(T(t->text), '>');
628
+ if ( p[1] == ' ' ) p++;
629
+ qp = p - T(t->text) + 1;
592
630
  CLIP(t->text, 0, qp);
593
631
  t->dle = mkd_firstnonblank(t);
594
632
  }
595
633
 
596
- if ( !(q = skipempty(t->next)) || ((q != t->next) && !isquote(q)) ) {
634
+ q = skipempty(t->next);
635
+
636
+ if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1))) ) {
597
637
  ___mkd_freeLineRange(t, q);
598
638
  t = q;
599
639
  break;
600
640
  }
601
641
  }
602
- if ( isdivmarker(p->text) ) {
642
+ if ( isdivmarker(p->text,0) ) {
603
643
  char *prefix = "class";
604
644
  int i;
605
645
 
@@ -610,7 +650,7 @@ quoteblock(Paragraph *p)
610
650
  /* and this would be an "%id:" prefix */
611
651
  prefix="id";
612
652
 
613
- if ( p->ident = malloc(4+i+S(q->text)) )
653
+ if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
614
654
  sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
615
655
  T(q->text)+(i+1) );
616
656
 
@@ -620,6 +660,25 @@ quoteblock(Paragraph *p)
620
660
  }
621
661
 
622
662
 
663
+ /*
664
+ * A table block starts with a table header (see istable()), and continues
665
+ * until EOF or a line that /doesn't/ contain a |.
666
+ */
667
+ static Line *
668
+ tableblock(Paragraph *p)
669
+ {
670
+ Line *t, *q;
671
+
672
+ for ( t = p->text; t && (q = t->next); t = t->next ) {
673
+ if ( !memchr(T(q->text), '|', S(q->text)) ) {
674
+ t->next = 0;
675
+ return q;
676
+ }
677
+ }
678
+ return 0;
679
+ }
680
+
681
+
623
682
  static Paragraph *Pp(ParagraphRoot *, Line *, int);
624
683
  static Paragraph *compile(Line *, int, MMIOT *);
625
684
 
@@ -651,7 +710,7 @@ listitem(Paragraph *p, int indent)
651
710
  * need any indentation
652
711
  */
653
712
  if ( q != t->next ) {
654
- if (q->dle < 4) {
713
+ if (q->dle < indent) {
655
714
  q = t->next;
656
715
  t->next = 0;
657
716
  return q;
@@ -676,9 +735,10 @@ listblock(Paragraph *top, int trim, MMIOT *f)
676
735
  {
677
736
  ParagraphRoot d = { 0, 0 };
678
737
  Paragraph *p;
679
- Line *q = top->text, *text;
680
- Line *label;
681
- int para = 0;
738
+ Line *q = top->text, *text, *label;
739
+ int isdl = (top->typ == DL),
740
+ para = 0,
741
+ ltype;
682
742
 
683
743
  while (( text = q )) {
684
744
  if ( top->typ == DL ) {
@@ -702,7 +762,8 @@ listblock(Paragraph *top, int trim, MMIOT *f)
702
762
 
703
763
  if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
704
764
 
705
- if ( !(q = skipempty(text)) || (islist(q, &trim) == 0) )
765
+ if ( !(q = skipempty(text)) || ((ltype = islist(q, &trim)) == 0)
766
+ || (isdl != (ltype == DL)) )
706
767
  break;
707
768
 
708
769
  if ( para = (q != text) ) {
@@ -832,6 +893,64 @@ consume(Line *ptr, int *eaten)
832
893
  }
833
894
 
834
895
 
896
+ /*
897
+ * top-level compilation; break the document into
898
+ * style, html, and source blocks with footnote links
899
+ * weeded out.
900
+ */
901
+ static Paragraph *
902
+ compile_document(Line *ptr, MMIOT *f)
903
+ {
904
+ ParagraphRoot d = { 0, 0 };
905
+ ANCHOR(Line) source = { 0, 0 };
906
+ Paragraph *p = 0;
907
+ struct kw *tag;
908
+ int eaten;
909
+
910
+ while ( ptr ) {
911
+ if ( !(f->flags & DENY_HTML) && (tag = isopentag(ptr)) ) {
912
+ /* If we encounter a html/style block, compile and save all
913
+ * of the cached source BEFORE processing the html/style.
914
+ */
915
+ if ( T(source) ) {
916
+ E(source)->next = 0;
917
+ p = Pp(&d, 0, SOURCE);
918
+ p->down = compile(T(source), 1, f);
919
+ T(source) = E(source) = 0;
920
+ }
921
+ p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML);
922
+ if ( strcmp(tag->id, "!--") == 0 )
923
+ ptr = comment(p);
924
+ else
925
+ ptr = htmlblock(p, tag);
926
+ }
927
+ else if ( isfootnote(ptr) ) {
928
+ /* footnotes, like cats, sleep anywhere; pull them
929
+ * out of the input stream and file them away for
930
+ * later processing
931
+ */
932
+ ptr = consume(addfootnote(ptr, f), &eaten);
933
+ }
934
+ else {
935
+ /* source; cache it up to wait for eof or the
936
+ * next html/style block
937
+ */
938
+ ATTACH(source,ptr);
939
+ ptr = ptr->next;
940
+ }
941
+ }
942
+ if ( T(source) ) {
943
+ /* if there's any cached source at EOF, compile
944
+ * it now.
945
+ */
946
+ E(source)->next = 0;
947
+ p = Pp(&d, 0, SOURCE);
948
+ p->down = compile(T(source), 1, f);
949
+ }
950
+ return T(d);
951
+ }
952
+
953
+
835
954
  /*
836
955
  * break a collection of markdown input into
837
956
  * blocks of lists, code, html, and text to
@@ -842,29 +961,22 @@ compile(Line *ptr, int toplevel, MMIOT *f)
842
961
  {
843
962
  ParagraphRoot d = { 0, 0 };
844
963
  Paragraph *p = 0;
845
- char *key;
846
964
  Line *r;
847
965
  int para = toplevel;
966
+ int blocks = 0;
848
967
  int hdr_type, list_type, indent;
849
968
 
850
969
  ptr = consume(ptr, &para);
851
970
 
852
971
  while ( ptr ) {
853
- if ( toplevel && !(f->flags & DENY_HTML) && (key = isopentag(ptr)) ) {
854
- p = Pp(&d, ptr, strcmp(key, "STYLE") == 0 ? STYLE : HTML);
855
- if ( strcmp(key, "!--") == 0 )
856
- ptr = comment(p, key);
857
- else
858
- ptr = htmlblock(p, key);
859
- }
860
- else if ( iscode(ptr) ) {
972
+ if ( iscode(ptr) ) {
861
973
  p = Pp(&d, ptr, CODE);
862
974
 
863
975
  if ( f->flags & MKD_1_COMPAT) {
864
976
  /* HORRIBLE STANDARDS KLUDGE: the first line of every block
865
977
  * has trailing whitespace trimmed off.
866
978
  */
867
- ___mkd_tidy(p->text);
979
+ ___mkd_tidy(&p->text->text);
868
980
  }
869
981
 
870
982
  ptr = codeblock(p);
@@ -889,9 +1001,9 @@ compile(Line *ptr, int toplevel, MMIOT *f)
889
1001
  p = Pp(&d, ptr, HDR);
890
1002
  ptr = headerblock(p, hdr_type);
891
1003
  }
892
- else if ( toplevel && (isfootnote(ptr)) ) {
893
- ptr = consume(addfootnote(ptr, f), &para);
894
- continue;
1004
+ else if ( istable(ptr) && !(f->flags & (STRICT|NOTABLES)) ) {
1005
+ p = Pp(&d, ptr, TABLE);
1006
+ ptr = tableblock(p);
895
1007
  }
896
1008
  else {
897
1009
  p = Pp(&d, ptr, MARKUP);
@@ -901,7 +1013,8 @@ compile(Line *ptr, int toplevel, MMIOT *f)
901
1013
  if ( (para||toplevel) && !p->align )
902
1014
  p->align = PARA;
903
1015
 
904
- para = toplevel;
1016
+ blocks++;
1017
+ para = toplevel || (blocks > 1);
905
1018
  ptr = consume(ptr, &para);
906
1019
 
907
1020
  if ( para && !p->align )
@@ -952,7 +1065,7 @@ mkd_compile(Document *doc, int flags)
952
1065
 
953
1066
  initialize();
954
1067
 
955
- doc->code = compile(T(doc->content), 1, doc->ctx);
1068
+ doc->code = compile_document(T(doc->content), doc->ctx);
956
1069
  qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
957
1070
  sizeof T(*doc->ctx->footnotes)[0],
958
1071
  (stfu)__mkd_footsort);