rdiscount 1.3.5 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/markdown.c CHANGED
@@ -21,10 +21,12 @@
21
21
  */
22
22
  struct kw {
23
23
  char *id;
24
- int siz;
24
+ int size;
25
+ int selfclose;
25
26
  } ;
26
27
 
27
- #define KW(x) { x, sizeof(x)-1 }
28
+ #define KW(x) { x, sizeof(x)-1, 0 }
29
+ #define SC(x) { x, sizeof(x)-1, 1 }
28
30
 
29
31
  static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
30
32
  KW("ADDRESS"), KW("BDO"), KW("BLOCKQUOTE"),
@@ -33,7 +35,8 @@ static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"),
33
35
  KW("H6"), KW("LISTING"), KW("NOBR"),
34
36
  KW("UL"), KW("P"), KW("OL"), KW("DL"),
35
37
  KW("PLAINTEXT"), KW("PRE"), KW("TABLE"),
36
- KW("WBR"), KW("XMP"), KW("HR"), KW("BR") };
38
+ KW("WBR"), KW("XMP"), SC("HR"), SC("BR"),
39
+ KW("IFRAME"), KW("MAP") };
37
40
  #define SZTAGS (sizeof blocktags / sizeof blocktags[0])
38
41
  #define MAXTAG 11 /* sizeof "BLOCKQUOTE" */
39
42
 
@@ -47,9 +50,9 @@ typedef ANCHOR(Paragraph) ParagraphRoot;
47
50
  static int
48
51
  casort(struct kw *a, struct kw *b)
49
52
  {
50
- if ( a->siz != b->siz )
51
- return a->siz - b->siz;
52
- return strncasecmp(a->id, b->id, b->siz);
53
+ if ( a->size != b->size )
54
+ return a->size - b->size;
55
+ return strncasecmp(a->id, b->id, b->size);
53
56
  }
54
57
 
55
58
 
@@ -125,14 +128,14 @@ skipempty(Line *p)
125
128
 
126
129
 
127
130
  void
128
- ___mkd_tidy(Line *t)
131
+ ___mkd_tidy(Cstring *t)
129
132
  {
130
- while ( S(t->text) && isspace(T(t->text)[S(t->text)-1]) )
131
- --S(t->text);
133
+ while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
134
+ --S(*t);
132
135
  }
133
136
 
134
137
 
135
- static char *
138
+ static struct kw *
136
139
  isopentag(Line *p)
137
140
  {
138
141
  int i=0, len;
@@ -154,79 +157,84 @@ isopentag(Line *p)
154
157
  ;
155
158
 
156
159
  key.id = T(p->text)+1;
157
- key.siz = i-1;
160
+ key.size = i-1;
158
161
 
159
- if ( ret = bsearch(&key,blocktags,SZTAGS,sizeof key, (stfu)casort))
160
- return ret->id;
162
+ if ( ret = bsearch(&key, blocktags, SZTAGS, sizeof key, (stfu)casort))
163
+ return ret;
161
164
 
162
165
  return 0;
163
166
  }
164
167
 
165
168
 
166
- static int
167
- selfclose(Line *t, char *tag)
168
- {
169
- char *q = T(t->text);
170
- int siz = strlen(tag);
169
+ typedef struct _flo {
170
+ Line *t;
171
171
  int i;
172
+ } FLO;
172
173
 
173
- if ( strcasecmp(tag, "HR") == 0 || strcasecmp(tag, "BR") == 0 )
174
- /* <HR> and <BR> are self-closing block-level tags,
175
- */
176
- return 1;
177
174
 
178
- i = S(t->text) - (siz + 3);
179
-
180
- /* we specialcase start and end tags on the same line.
181
- */
182
- return ( i > 0 ) && (q[i] == '<') && (q[i+1] == '/')
183
- && (q[i+2+siz] == '>')
184
- && (strncasecmp(&q[i+2], tag, siz) == 0);
175
+ static int
176
+ flogetc(FLO *f)
177
+ {
178
+ if ( f && f->t ) {
179
+ if ( f->i < S(f->t->text) )
180
+ return T(f->t->text)[f->i++];
181
+ f->t = f->t->next;
182
+ f->i = 0;
183
+ return flogetc(f);
184
+ }
185
+ return EOF;
185
186
  }
186
187
 
187
188
 
188
189
  static Line *
189
- htmlblock(Paragraph *p, char *tag)
190
+ htmlblock(Paragraph *p, struct kw *tag)
190
191
  {
191
- Line *t = p->text, *ret;
192
- int closesize, tagsize;
193
- char close[MAXTAG+4];
194
-
195
- char *ps, *pse;
196
- int depth;
197
-
198
- tagsize = strlen(tag);
192
+ Line *ret;
193
+ FLO f = { p->text, 0 };
194
+ int c;
195
+ int i, closing, depth=0;
199
196
 
200
- if ( selfclose(t, tag) || (tagsize >= MAXTAG) ) {
201
- ret = t->next;
202
- t->next = 0;
197
+ if ( tag->selfclose || (tag->size >= MAXTAG) ) {
198
+ ret = f.t->next;
199
+ f.t->next = 0;
203
200
  return ret;
204
201
  }
205
202
 
206
- closesize = sprintf(close, "</%s>", tag);
207
- depth = 0;
208
-
209
- for ( ; t ; t = t->next) {
210
- ps = T(t->text);
211
- pse = ps + (S(t->text) - (tagsize + 1));
212
- for ( ; ps < pse; ps++ ) {
213
- if ( *ps == '<' ) {
214
- /* check for close tag */
215
- if ( strncasecmp(ps, close, closesize) == 0 ) {
216
- depth--;
217
- if ( depth == 0 ) {
218
- ret = t->next;
219
- t->next = 0;
220
- return ret;
221
- }
222
- continue;
223
- }
224
-
225
- /* check for nested open tag */
226
- if ( (strncasecmp(ps + 1, tag, tagsize) == 0) &&
227
- (ps[tagsize + 1] == '>' || ps[tagsize + 1] == ' ') ) {
228
- depth++;
229
- }
203
+ while ( (c = flogetc(&f)) != EOF ) {
204
+ if ( c == '<' ) {
205
+ /* tag? */
206
+ c = flogetc(&f);
207
+ if ( c == '!' ) { /* comment? */
208
+ if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
209
+ /* yes */
210
+ while ( (c = flogetc(&f)) != EOF ) {
211
+ if ( c == '-' && flogetc(&f) == '-'
212
+ && flogetc(&f) == '>')
213
+ /* consumed whole comment */
214
+ break;
215
+ }
216
+ }
217
+ }
218
+ else {
219
+ if ( closing = (c == '/') ) c = flogetc(&f);
220
+
221
+ for ( i=0; i < tag->size; c=flogetc(&f) ) {
222
+ if ( tag->id[i++] != toupper(c) )
223
+ break;
224
+ }
225
+
226
+ if ( (i == tag->size) && !isalnum(c) ) {
227
+ depth = depth + (closing ? -1 : 1);
228
+ if ( depth == 0 ) {
229
+ while ( c != EOF && c != '>' ) {
230
+ /* consume trailing gunk in close tag */
231
+ c = flogetc(&f);
232
+ }
233
+ ret = f.t->next;
234
+ f.t->next = 0;
235
+ return ret;
236
+ }
237
+ }
230
238
  }
231
239
  }
232
240
  }
@@ -235,7 +243,7 @@ htmlblock(Paragraph *p, char *tag)
235
243
 
236
244
 
237
245
  static Line *
238
- comment(Paragraph *p, char *key)
246
+ comment(Paragraph *p)
239
247
  {
240
248
  Line *t, *ret;
241
249
 
@@ -251,6 +259,30 @@ comment(Paragraph *p, char *key)
251
259
  }
252
260
 
253
261
 
262
+ /* tables look like
263
+ * header|header{|header}
264
+ * ------|------{|......}
265
+ * {body lines}
266
+ */
267
+ static int
268
+ istable(Line *t)
269
+ {
270
+ char *p;
271
+ Line *dashes = t->next;
272
+
273
+ /* two lines, first must contain | */
274
+ if ( !(dashes && memchr(T(t->text), '|', S(t->text))) )
275
+ return 0;
276
+
277
+ /* second line must be only whitespace, |, -, or - */
278
+ for ( p = T(dashes->text)+S(dashes->text)-1; p >= T(dashes->text); --p)
279
+ if ( ! ((*p == '|') || (*p == ':') || (*p == '-') || isspace(*p)) )
280
+ return 0;
281
+
282
+ return 1;
283
+ }
284
+
285
+
254
286
  /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
255
287
  */
256
288
  static int
@@ -274,7 +306,11 @@ isfootnote(Line *t)
274
306
  static int
275
307
  isquote(Line *t)
276
308
  {
277
- return ( T(t->text)[0] == '>' );
309
+ char *pt = T(t->text);
310
+ return ( pt[0] == '>' ) ||
311
+ ( pt[0] == ' ' && pt[1] == '>' ) ||
312
+ ( pt[0] == ' ' && pt[1] == ' ' && pt[2] == '>') ||
313
+ ( pt[0] == ' ' && pt[1] == ' ' && pt[2] == ' ' && pt[3] == '>');
278
314
  }
279
315
 
280
316
 
@@ -330,7 +366,7 @@ ishdr(Line *t, int *htyp)
330
366
 
331
367
  /* ANY leading `#`'s make this into an ETX header
332
368
  */
333
- if ( i ) {
369
+ if ( i && (i < S(t->text) || i > 1) ) {
334
370
  *htyp = ETX;
335
371
  return 1;
336
372
  }
@@ -546,17 +582,17 @@ szmarkerclass(char *p)
546
582
  * marker %[kind:]name%
547
583
  */
548
584
  static int
549
- isdivmarker(Line *p)
585
+ isdivmarker(Line *p, int start)
550
586
  {
551
587
  #if DIV_QUOTE
552
588
  char *s = T(p->text);
553
589
  int len = S(p->text);
554
590
  int i;
555
591
 
556
- if ( !(len && s[0] == '%' && s[len-1] == '%') ) return 0;
592
+ if ( !(len && s[start] == '%' && s[len-1] == '%') ) return 0;
557
593
 
558
- i = szmarkerclass(s+1);
559
- --len;
594
+ i = szmarkerclass(s+start+1)+start;
595
+ len -= start+1;
560
596
 
561
597
  while ( ++i < len )
562
598
  if ( !isalnum(s[i]) )
@@ -588,18 +624,22 @@ quoteblock(Paragraph *p)
588
624
 
589
625
  for ( t = p->text; t ; t = q ) {
590
626
  if ( isquote(t) ) {
591
- qp = (T(t->text)[1] == ' ') ? 2 : 1;
627
+ char *p = strchr(T(t->text), '>');
628
+ if ( p[1] == ' ' ) p++;
629
+ qp = p - T(t->text) + 1;
592
630
  CLIP(t->text, 0, qp);
593
631
  t->dle = mkd_firstnonblank(t);
594
632
  }
595
633
 
596
- if ( !(q = skipempty(t->next)) || ((q != t->next) && !isquote(q)) ) {
634
+ q = skipempty(t->next);
635
+
636
+ if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1))) ) {
597
637
  ___mkd_freeLineRange(t, q);
598
638
  t = q;
599
639
  break;
600
640
  }
601
641
  }
602
- if ( isdivmarker(p->text) ) {
642
+ if ( isdivmarker(p->text,0) ) {
603
643
  char *prefix = "class";
604
644
  int i;
605
645
 
@@ -610,7 +650,7 @@ quoteblock(Paragraph *p)
610
650
  /* and this would be an "%id:" prefix */
611
651
  prefix="id";
612
652
 
613
- if ( p->ident = malloc(4+i+S(q->text)) )
653
+ if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
614
654
  sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
615
655
  T(q->text)+(i+1) );
616
656
 
@@ -620,6 +660,25 @@ quoteblock(Paragraph *p)
620
660
  }
621
661
 
622
662
 
663
+ /*
664
+ * A table block starts with a table header (see istable()), and continues
665
+ * until EOF or a line that /doesn't/ contain a |.
666
+ */
667
+ static Line *
668
+ tableblock(Paragraph *p)
669
+ {
670
+ Line *t, *q;
671
+
672
+ for ( t = p->text; t && (q = t->next); t = t->next ) {
673
+ if ( !memchr(T(q->text), '|', S(q->text)) ) {
674
+ t->next = 0;
675
+ return q;
676
+ }
677
+ }
678
+ return 0;
679
+ }
680
+
681
+
623
682
  static Paragraph *Pp(ParagraphRoot *, Line *, int);
624
683
  static Paragraph *compile(Line *, int, MMIOT *);
625
684
 
@@ -651,7 +710,7 @@ listitem(Paragraph *p, int indent)
651
710
  * need any indentation
652
711
  */
653
712
  if ( q != t->next ) {
654
- if (q->dle < 4) {
713
+ if (q->dle < indent) {
655
714
  q = t->next;
656
715
  t->next = 0;
657
716
  return q;
@@ -676,9 +735,10 @@ listblock(Paragraph *top, int trim, MMIOT *f)
676
735
  {
677
736
  ParagraphRoot d = { 0, 0 };
678
737
  Paragraph *p;
679
- Line *q = top->text, *text;
680
- Line *label;
681
- int para = 0;
738
+ Line *q = top->text, *text, *label;
739
+ int isdl = (top->typ == DL),
740
+ para = 0,
741
+ ltype;
682
742
 
683
743
  while (( text = q )) {
684
744
  if ( top->typ == DL ) {
@@ -702,7 +762,8 @@ listblock(Paragraph *top, int trim, MMIOT *f)
702
762
 
703
763
  if ( para && (top->typ != DL) && p->down ) p->down->align = PARA;
704
764
 
705
- if ( !(q = skipempty(text)) || (islist(q, &trim) == 0) )
765
+ if ( !(q = skipempty(text)) || ((ltype = islist(q, &trim)) == 0)
766
+ || (isdl != (ltype == DL)) )
706
767
  break;
707
768
 
708
769
  if ( para = (q != text) ) {
@@ -832,6 +893,64 @@ consume(Line *ptr, int *eaten)
832
893
  }
833
894
 
834
895
 
896
+ /*
897
+ * top-level compilation; break the document into
898
+ * style, html, and source blocks with footnote links
899
+ * weeded out.
900
+ */
901
+ static Paragraph *
902
+ compile_document(Line *ptr, MMIOT *f)
903
+ {
904
+ ParagraphRoot d = { 0, 0 };
905
+ ANCHOR(Line) source = { 0, 0 };
906
+ Paragraph *p = 0;
907
+ struct kw *tag;
908
+ int eaten;
909
+
910
+ while ( ptr ) {
911
+ if ( !(f->flags & DENY_HTML) && (tag = isopentag(ptr)) ) {
912
+ /* If we encounter a html/style block, compile and save all
913
+ * of the cached source BEFORE processing the html/style.
914
+ */
915
+ if ( T(source) ) {
916
+ E(source)->next = 0;
917
+ p = Pp(&d, 0, SOURCE);
918
+ p->down = compile(T(source), 1, f);
919
+ T(source) = E(source) = 0;
920
+ }
921
+ p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML);
922
+ if ( strcmp(tag->id, "!--") == 0 )
923
+ ptr = comment(p);
924
+ else
925
+ ptr = htmlblock(p, tag);
926
+ }
927
+ else if ( isfootnote(ptr) ) {
928
+ /* footnotes, like cats, sleep anywhere; pull them
929
+ * out of the input stream and file them away for
930
+ * later processing
931
+ */
932
+ ptr = consume(addfootnote(ptr, f), &eaten);
933
+ }
934
+ else {
935
+ /* source; cache it up to wait for eof or the
936
+ * next html/style block
937
+ */
938
+ ATTACH(source,ptr);
939
+ ptr = ptr->next;
940
+ }
941
+ }
942
+ if ( T(source) ) {
943
+ /* if there's any cached source at EOF, compile
944
+ * it now.
945
+ */
946
+ E(source)->next = 0;
947
+ p = Pp(&d, 0, SOURCE);
948
+ p->down = compile(T(source), 1, f);
949
+ }
950
+ return T(d);
951
+ }
952
+
953
+
835
954
  /*
836
955
  * break a collection of markdown input into
837
956
  * blocks of lists, code, html, and text to
@@ -842,29 +961,22 @@ compile(Line *ptr, int toplevel, MMIOT *f)
842
961
  {
843
962
  ParagraphRoot d = { 0, 0 };
844
963
  Paragraph *p = 0;
845
- char *key;
846
964
  Line *r;
847
965
  int para = toplevel;
966
+ int blocks = 0;
848
967
  int hdr_type, list_type, indent;
849
968
 
850
969
  ptr = consume(ptr, &para);
851
970
 
852
971
  while ( ptr ) {
853
- if ( toplevel && !(f->flags & DENY_HTML) && (key = isopentag(ptr)) ) {
854
- p = Pp(&d, ptr, strcmp(key, "STYLE") == 0 ? STYLE : HTML);
855
- if ( strcmp(key, "!--") == 0 )
856
- ptr = comment(p, key);
857
- else
858
- ptr = htmlblock(p, key);
859
- }
860
- else if ( iscode(ptr) ) {
972
+ if ( iscode(ptr) ) {
861
973
  p = Pp(&d, ptr, CODE);
862
974
 
863
975
  if ( f->flags & MKD_1_COMPAT) {
864
976
  /* HORRIBLE STANDARDS KLUDGE: the first line of every block
865
977
  * has trailing whitespace trimmed off.
866
978
  */
867
- ___mkd_tidy(p->text);
979
+ ___mkd_tidy(&p->text->text);
868
980
  }
869
981
 
870
982
  ptr = codeblock(p);
@@ -889,9 +1001,9 @@ compile(Line *ptr, int toplevel, MMIOT *f)
889
1001
  p = Pp(&d, ptr, HDR);
890
1002
  ptr = headerblock(p, hdr_type);
891
1003
  }
892
- else if ( toplevel && (isfootnote(ptr)) ) {
893
- ptr = consume(addfootnote(ptr, f), &para);
894
- continue;
1004
+ else if ( istable(ptr) && !(f->flags & (STRICT|NOTABLES)) ) {
1005
+ p = Pp(&d, ptr, TABLE);
1006
+ ptr = tableblock(p);
895
1007
  }
896
1008
  else {
897
1009
  p = Pp(&d, ptr, MARKUP);
@@ -901,7 +1013,8 @@ compile(Line *ptr, int toplevel, MMIOT *f)
901
1013
  if ( (para||toplevel) && !p->align )
902
1014
  p->align = PARA;
903
1015
 
904
- para = toplevel;
1016
+ blocks++;
1017
+ para = toplevel || (blocks > 1);
905
1018
  ptr = consume(ptr, &para);
906
1019
 
907
1020
  if ( para && !p->align )
@@ -952,7 +1065,7 @@ mkd_compile(Document *doc, int flags)
952
1065
 
953
1066
  initialize();
954
1067
 
955
- doc->code = compile(T(doc->content), 1, doc->ctx);
1068
+ doc->code = compile_document(T(doc->content), doc->ctx);
956
1069
  qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
957
1070
  sizeof T(*doc->ctx->footnotes)[0],
958
1071
  (stfu)__mkd_footsort);