bluecloth 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -1,3 +1,26 @@
1
+ -- Tue, 30 Jun 2009 01:33:36 -0000 by deveiant (r124) -----
2
+ Changed: ext/markdown.c
3
+ ext/VERSION
4
+ ext/markdown.h
5
+ ext/generate.c
6
+
7
+ * Updated with Discount 1.4.4.
8
+
9
+
10
+ -- Mon, 15 Jun 2009 13:15:54 -0000 by deveiant (r123) -----
11
+ Added: spec/discount_spec.rb (new)
12
+ Changed: ext/cstring.h
13
+ ext/bluecloth.c
14
+ ext/markdown.c
15
+ spec/bluecloth/links_spec.rb
16
+ spec/lib/matchers.rb
17
+ ext/resource.c (and 10 other/s)
18
+
19
+ * Updated to Discount 1.4.2.
20
+ * Added some more tests for various Discount extensions.
21
+ * Added support for the MKD_AUTOLINK and MKD_SAFELINK.
22
+
23
+
1
24
  -- Tue, 26 May 2009 05:08:15 -0000 by deveiant (r121) -----
2
25
  Added: ext/Csio.c (new)
3
26
  ext/xmlpage.c (new)
data/ext/Csio.c CHANGED
@@ -26,9 +26,9 @@ Csprintf(Cstring *iot, char *fmt, ...)
26
26
  do {
27
27
  RESERVE(*iot, siz);
28
28
  va_start(ptr, fmt);
29
- siz = vsnprintf(T(*iot)+S(*iot), ALL(*iot)-S(*iot), fmt, ptr);
29
+ siz = vsnprintf(T(*iot)+S(*iot), ALLOCATED(*iot)-S(*iot), fmt, ptr);
30
30
  va_end(ptr);
31
- } while ( siz > (ALL(*iot)-S(*iot)) );
31
+ } while ( siz > (ALLOCATED(*iot)-S(*iot)) );
32
32
 
33
33
  S(*iot) += siz;
34
34
  return siz;
data/ext/VERSION CHANGED
@@ -1 +1 @@
1
- 1.4.0
1
+ 1.4.4
data/ext/bluecloth.c CHANGED
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  * BlueCloth -- a Ruby implementation of Markdown
3
- * $Id: bluecloth.c 117 2009-05-14 15:21:03Z deveiant $
3
+ * $Id: bluecloth.c 123 2009-06-15 13:15:54Z deveiant $
4
4
  *
5
5
  * = Authors
6
6
  *
@@ -358,6 +358,12 @@ void Init_bluecloth_ext( void ) {
358
358
  /* MKD_NOLINKS|MKD_NOIMAGE|MKD_TAGTEXT */
359
359
  rb_define_const( bluecloth_cBlueCloth, "MKD_EMBED", INT2FIX(MKD_EMBED) );
360
360
 
361
+ /* Create links for inline URIs */
362
+ rb_define_const( bluecloth_cBlueCloth, "MKD_AUTOLINK", INT2FIX(MKD_AUTOLINK) );
363
+
364
+ /* Be paranoid about link protocols */
365
+ rb_define_const( bluecloth_cBlueCloth, "MKD_SAFELINK", INT2FIX(MKD_SAFELINK) );
366
+
361
367
 
362
368
  /* don't process header blocks */
363
369
  rb_define_const( bluecloth_cBlueCloth, "MKD_NOHEADER", INT2FIX(MKD_NOHEADER) );
data/ext/cstring.h CHANGED
@@ -22,8 +22,8 @@
22
22
  : (T(x) = T(x) ? realloc(T(x), sizeof T(x)[0] * ((x).alloc += 100)) \
23
23
  : malloc(sizeof T(x)[0] * ((x).alloc += 100)) )]
24
24
 
25
- #define DELETE(x) (x).alloc ? (free(T(x)), S(x) = (x).alloc = 0) \
26
- : ( S(x) = 0 )
25
+ #define DELETE(x) ALLOCATED(x) ? (free(T(x)), S(x) = (x).alloc = 0) \
26
+ : ( S(x) = 0 )
27
27
  #define CLIP(t,i,sz) \
28
28
  ( ((i) >= 0) && ((sz) > 0) && (((i)+(sz)) <= S(t)) ) ? \
29
29
  (memmove(&T(t)[i], &T(t)[i+sz], (S(t)-(i+sz)+1)*sizeof(T(t)[0])), \
@@ -50,7 +50,7 @@
50
50
  */
51
51
  #define T(x) (x).text
52
52
  #define S(x) (x).size
53
- #define ALL(x) (x).alloc
53
+ #define ALLOCATED(x) (x).alloc
54
54
 
55
55
  /* abstract anchor type that defines a list base
56
56
  * with a function that attaches an element to
data/ext/generate.c CHANGED
@@ -17,11 +17,6 @@
17
17
  #include "markdown.h"
18
18
  #include "amalloc.h"
19
19
 
20
- /* prefixes for <automatic links>
21
- */
22
- static char *autoprefix[] = { "http://", "https://", "ftp://", "news://" };
23
- #define SZAUTOPREFIX (sizeof autoprefix / sizeof autoprefix[0])
24
-
25
20
  typedef int (*stfu)(const void*,const void*);
26
21
 
27
22
 
@@ -119,7 +114,7 @@ shift(MMIOT *f, int i)
119
114
  /* Qchar()
120
115
  */
121
116
  static void
122
- Qchar(char c, MMIOT *f)
117
+ Qchar(int c, MMIOT *f)
123
118
  {
124
119
  block *cur;
125
120
 
@@ -328,18 +323,27 @@ ___mkd_reparse(char *bfr, int size, int flags, MMIOT *f)
328
323
  * write out a url, escaping problematic characters
329
324
  */
330
325
  static void
331
- puturl(char *s, int size, MMIOT *f)
326
+ puturl(char *s, int size, MMIOT *f, int display)
332
327
  {
333
328
  unsigned char c;
334
329
 
335
330
  while ( size-- > 0 ) {
336
331
  c = *s++;
337
332
 
333
+ if ( c == '\\' && size-- > 0 ) {
334
+ c = *s++;
335
+
336
+ if ( !( ispunct(c) || isspace(c) ) )
337
+ Qchar('\\', f);
338
+ }
339
+
338
340
  if ( c == '&' )
339
341
  Qstring("&amp;", f);
340
342
  else if ( c == '<' )
341
343
  Qstring("&lt;", f);
342
- else if ( isalnum(c) || ispunct(c) )
344
+ else if ( c == '"' )
345
+ Qstring("%22", f);
346
+ else if ( isalnum(c) || ispunct(c) || (display && isspace(c)) )
343
347
  Qchar(c, f);
344
348
  else
345
349
  Qprintf(f, "%%%02X", c);
@@ -372,186 +376,164 @@ parenthetical(int in, int out, MMIOT *f)
372
376
  return EOF;
373
377
  else if ( c == in )
374
378
  ++indent;
379
+ else if ( (c == '\\') && (peek(f,1) == out) ) {
380
+ ++size;
381
+ pull(f);
382
+ }
375
383
  else if ( c == out )
376
384
  --indent;
377
385
  }
378
- return size-1;
386
+ return size ? (size-1) : 0;
379
387
  }
380
388
 
381
389
 
382
390
  /* extract a []-delimited label from the input stream.
383
391
  */
384
- static char *
385
- linkylabel(MMIOT *f, int *sizep)
392
+ static int
393
+ linkylabel(MMIOT *f, Cstring *res)
386
394
  {
387
395
  char *ptr = cursor(f);
396
+ int size;
388
397
 
389
- if ( (*sizep = parenthetical('[',']',f)) != EOF )
390
- return ptr;
398
+ if ( (size = parenthetical('[',']',f)) != EOF ) {
399
+ T(*res) = ptr;
400
+ S(*res) = size;
401
+ return 1;
402
+ }
391
403
  return 0;
392
404
  }
393
405
 
394
406
 
395
- /* extract a (-prefixed url from the input stream.
396
- * the label is either of the format `<link>`, where I
397
- * extract until I find a >, or it is of the format
398
- * `text`, where I extract until I reach a ')' or
399
- * whitespace.
407
+ /* see if the quote-prefixed linky segment is actually a title.
400
408
  */
401
- static char*
402
- linkyurl(MMIOT *f, int *sizep)
409
+ static int
410
+ linkytitle(MMIOT *f, char quote, Footnote *ref)
403
411
  {
404
- int size = 0;
405
- char *ptr;
406
- int c;
407
-
408
- if ( (c = eatspace(f)) == EOF )
409
- return 0;
412
+ int whence = mmiottell(f);
413
+ char *title = cursor(f);
414
+ char *e;
415
+ register int c;
410
416
 
411
- ptr = cursor(f);
412
-
413
- /* if I do (title:blah blah blah) embedded links, I need to subvert
414
- * linkyurl to do a lookahead for the pseudo-protocol, then snarf
415
- * up everything up to the terminating ')'
416
- */
417
-
418
- if ( c == '<' ) {
419
- pull(f);
420
- ptr++;
421
- if ( (size = parenthetical('<', '>', f)) == EOF )
422
- return 0;
423
- }
424
- else {
425
- for ( ; ((c=pull(f)) != ')') && !isspace(c); size++)
426
- if ( c == EOF ) return 0;
427
- if ( c == ')' )
428
- shift(f, -1);
417
+ while ( (c = pull(f)) != EOF ) {
418
+ e = cursor(f);
419
+ if ( c == quote ) {
420
+ if ( (c = eatspace(f)) == ')' ) {
421
+ T(ref->title) = 1+title;
422
+ S(ref->title) = (e-title)-2;
423
+ return 1;
424
+ }
425
+ }
429
426
  }
430
- *sizep = size;
431
- return ptr;
427
+ mmiotseek(f, whence);
428
+ return 0;
432
429
  }
433
430
 
434
431
 
435
432
  /* extract a =HHHxWWW size from the input stream
436
433
  */
437
434
  static int
438
- linkysize(MMIOT *f, int *heightp, int *widthp)
435
+ linkysize(MMIOT *f, Footnote *ref)
439
436
  {
440
437
  int height=0, width=0;
438
+ int whence = mmiottell(f);
441
439
  int c;
442
440
 
443
- *heightp = 0;
444
- *widthp = 0;
445
-
446
- if ( (c = eatspace(f)) != '=' )
447
- return (c != EOF);
448
- pull(f); /* eat '=' */
441
+ if ( isspace(peek(f,0)) ) {
442
+ pull(f); /* eat '=' */
449
443
 
450
- for ( c = pull(f); isdigit(c); c = pull(f))
451
- width = (width * 10) + (c - '0');
452
-
453
- if ( c == 'x' ) {
454
444
  for ( c = pull(f); isdigit(c); c = pull(f))
455
- height = (height*10) + (c - '0');
456
-
457
- if ( c != EOF ) {
458
- if ( !isspace(c) ) shift(f, -1);
459
- *heightp = height;
460
- *widthp = width;
461
- return 1;
462
- }
463
- }
464
- return 0;
465
- }
466
-
445
+ width = (width * 10) + (c - '0');
467
446
 
468
- /* extract a )-terminated title from the input stream.
469
- */
470
- static char*
471
- linkytitle(MMIOT *f, int *sizep)
472
- {
473
- int countq=0, qc, c, size;
474
- char *ret, *lastqc = 0;
475
-
476
- eatspace(f);
477
- if ( (qc=pull(f)) != '"' && qc != '\'' && qc != '(' )
478
- return 0;
447
+ if ( c == 'x' ) {
448
+ for ( c = pull(f); isdigit(c); c = pull(f))
449
+ height = (height*10) + (c - '0');
479
450
 
480
- if ( qc == '(' ) qc = ')';
451
+ if ( isspace(c) )
452
+ c = eatspace(f);
481
453
 
482
- for ( ret = cursor(f); (c = pull(f)) != EOF; ) {
483
- if ( (c == ')') && countq ) {
484
- size = (lastqc ? lastqc : cursor(f)) - ret;
485
- *sizep = size-1;
486
- return ret;
487
- }
488
- else if ( c == qc ) {
489
- lastqc = cursor(f);
490
- countq++;
454
+ if ( (c == ')') || ((c == '\'' || c == '"') && linkytitle(f, c, ref)) ) {
455
+ ref->height = height;
456
+ ref->width = width;
457
+ return 1;
458
+ }
491
459
  }
492
460
  }
461
+ mmiotseek(f, whence);
493
462
  return 0;
494
463
  }
495
464
 
496
465
 
497
- /* look up (or construct) a footnote from the [xxx] link
498
- * at the head of the stream.
466
+ /* extract a (-prefixed url from the input stream.
467
+ * the label is either of the format `<link>`, where I
468
+ * extract until I find a >, or it is of the format
469
+ * `text`, where I extract until I reach a ')', a quote,
470
+ * or (if image) a '='
499
471
  */
500
472
  static int
501
- linkykey(int image, Footnote *val, MMIOT *f)
473
+ linkyurl(MMIOT *f, int image, Footnote *p)
502
474
  {
503
- Footnote *ret;
504
- Cstring mylabel;
505
- int here;
506
-
507
- memset(val, 0, sizeof *val);
475
+ int c;
476
+ int mayneedtotrim=0;
508
477
 
509
- if ( (T(val->tag) = linkylabel(f, &S(val->tag))) == 0 )
478
+ if ( (c = eatspace(f)) == EOF )
510
479
  return 0;
511
480
 
512
- here = mmiottell(f);
513
- eatspace(f);
514
- switch ( pull(f) ) {
515
- case '(':
516
- /* embedded link */
517
- if ( (T(val->link) = linkyurl(f,&S(val->link))) == 0 )
518
- return 0;
481
+ if ( c == '<' ) {
482
+ pull(f);
483
+ mayneedtotrim=1;
484
+ }
519
485
 
520
- if ( image && !linkysize(f, &val->height, &val->width) )
486
+ T(p->link) = cursor(f);
487
+ for ( S(p->link)=0; (c = peek(f,1)) != ')'; ++S(p->link) ) {
488
+ if ( c == EOF )
521
489
  return 0;
490
+ else if ( (c == '"' || c == '\'') && linkytitle(f, c, p) )
491
+ break;
492
+ else if ( image && (c == '=') && linkysize(f, p) )
493
+ break;
494
+ else if ( (c == '\\') && ispunct(peek(f,2)) ) {
495
+ ++S(p->link);
496
+ pull(f);
497
+ }
498
+ pull(f);
499
+ }
500
+ if ( peek(f, 1) == ')' )
501
+ pull(f);
502
+
503
+ ___mkd_tidy(&p->link);
504
+
505
+ if ( mayneedtotrim && (T(p->link)[S(p->link)-1] == '>') )
506
+ --S(p->link);
507
+
508
+ return 1;
509
+ }
522
510
 
523
- T(val->title) = linkytitle(f, &S(val->title));
524
511
 
525
- return peek(f,0) == ')';
526
512
 
527
- case '[': /* footnote links /as defined in the standard/ */
528
- default: /* footnote links -- undocumented extension */
529
- /* footnote link */
530
- mylabel = val->tag;
531
- if ( peek(f,0) == '[' ) {
532
- if ( (T(val->tag) = linkylabel(f, &S(val->tag))) == 0 )
533
- return 0;
513
+ /* prefixes for <automatic links>
514
+ */
515
+ static struct {
516
+ char *name;
517
+ int nlen;
518
+ } protocol[] = {
519
+ #define _aprotocol(x) { x, (sizeof x)-1 }
520
+ _aprotocol( "http://" ),
521
+ _aprotocol( "https://" ),
522
+ _aprotocol( "ftp://" ),
523
+ _aprotocol( "news://" ),
524
+ #undef _aprotocol
525
+ };
526
+ #define NRPROTOCOLS (sizeof protocol / sizeof protocol[0])
534
527
 
535
- if ( !S(val->tag) )
536
- val->tag = mylabel;
537
- }
538
- else if ( f->flags & MKD_1_COMPAT )
539
- break;
540
- else
541
- mmiotseek(f,here);
542
528
 
543
- ret = bsearch(val, T(*f->footnotes), S(*f->footnotes),
544
- sizeof *val, (stfu)__mkd_footsort);
529
+ static int
530
+ isautoprefix(char *text)
531
+ {
532
+ int i;
545
533
 
546
- if ( ret ) {
547
- val->tag = mylabel;
548
- val->link = ret->link;
549
- val->title = ret->title;
550
- val->height = ret->height;
551
- val->width = ret->width;
534
+ for (i=0; i < NRPROTOCOLS; i++)
535
+ if ( strncasecmp(text, protocol[i].name, protocol[i].nlen) == 0 )
552
536
  return 1;
553
- }
554
- }
555
537
  return 0;
556
538
  }
557
539
 
@@ -569,12 +551,14 @@ typedef struct linkytype {
569
551
  char *text_pfx; /* text prefix (eg: ">" */
570
552
  char *text_sfx; /* text suffix (eg: "</a>" */
571
553
  int flags; /* reparse flags */
554
+ int kind; /* tag is url or something else? */
555
+ #define IS_URL 0x01
572
556
  } linkytype;
573
557
 
574
558
  static linkytype imaget = { 0, 0, "<img src=\"", "\"",
575
- 1, " alt=\"", "\" />", DENY_IMG|INSIDE_TAG };
559
+ 1, " alt=\"", "\" />", DENY_IMG|INSIDE_TAG, IS_URL };
576
560
  static linkytype linkt = { 0, 0, "<a href=\"", "\"",
577
- 0, ">", "</a>", DENY_A };
561
+ 0, ">", "</a>", DENY_A, IS_URL };
578
562
 
579
563
  /*
580
564
  * pseudo-protocols for [][];
@@ -584,9 +568,10 @@ static linkytype linkt = { 0, 0, "<a href=\"", "\"",
584
568
  * raw: just dump the link without any processing
585
569
  */
586
570
  static linkytype specials[] = {
587
- { "id:", 3, "<a id=\"", "\"", 0, ">", "</a>", 0 },
588
- { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0 },
589
- { "raw:", 4, 0, 0, 0, 0, 0, 0 },
571
+ { "id:", 3, "<a id=\"", "\"", 0, ">", "</a>", 0, IS_URL },
572
+ { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0, 0 },
573
+ { "raw:", 4, 0, 0, 0, 0, 0, DENY_HTML, 0 },
574
+ { "abbr:", 5, "<abbr title=\"", "\"", 0, ">", "</abbr>", 0, 0 },
590
575
  } ;
591
576
 
592
577
  #define NR(x) (sizeof x / sizeof x[0])
@@ -594,7 +579,7 @@ static linkytype specials[] = {
594
579
  /* see if t contains one of our pseudo-protocols.
595
580
  */
596
581
  static linkytype *
597
- extratag(Cstring t)
582
+ pseudo(Cstring t)
598
583
  {
599
584
  int i;
600
585
  linkytype *r;
@@ -608,57 +593,122 @@ extratag(Cstring t)
608
593
  }
609
594
 
610
595
 
611
- /*
612
- * process embedded links and images
596
+ /* print out a linky (or fail if it's Not Allowed)
613
597
  */
614
598
  static int
615
- linkylinky(int image, MMIOT *f)
599
+ linkyformat(MMIOT *f, Cstring text, int image, Footnote *ref)
616
600
  {
617
- int start = mmiottell(f);
618
- Footnote link;
619
601
  linkytype *tag;
620
602
 
621
- if ( !linkykey(image, &link, f) ) {
622
- mmiotseek(f, start);
623
- return 0;
624
- }
625
-
626
603
  if ( image )
627
604
  tag = &imaget;
628
- else if ( (f->flags & NO_PSEUDO_PROTO) || (tag = extratag(link.link)) == 0 )
605
+ else if ( tag = pseudo(ref->link) ) {
606
+ if ( f->flags & (NO_PSEUDO_PROTO|SAFELINK) )
607
+ return 0;
608
+ }
609
+ else if ( (f->flags & SAFELINK) && T(ref->link)
610
+ && (T(ref->link)[0] != '/')
611
+ && !isautoprefix(T(ref->link)) )
612
+ /* if SAFELINK, only accept links that are local or
613
+ * a well-known protocol
614
+ */
615
+ return 0;
616
+ else
629
617
  tag = &linkt;
630
618
 
631
- if ( f->flags & tag-> flags ) {
632
- mmiotseek(f, start);
619
+ if ( f->flags & tag->flags )
633
620
  return 0;
634
- }
635
621
 
636
622
  if ( tag->link_pfx ) {
637
623
  Qstring(tag->link_pfx, f);
638
- if ( f->base && (T(link.link)[tag->szpat] == '/') )
639
- puturl(f->base, strlen(f->base), f);
640
- puturl(T(link.link) + tag->szpat, S(link.link) - tag->szpat, f);
624
+
625
+ if ( tag->kind & IS_URL ) {
626
+ if ( f->base && T(ref->link) && (T(ref->link)[tag->szpat] == '/') )
627
+ puturl(f->base, strlen(f->base), f, 0);
628
+ puturl(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f, 0);
629
+ }
630
+ else
631
+ ___mkd_reparse(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, INSIDE_TAG, f);
632
+
641
633
  Qstring(tag->link_sfx, f);
642
634
 
643
- if ( tag->WxH && link.height && link.width ) {
644
- Qprintf(f," height=\"%d\"", link.height);
645
- Qprintf(f, " width=\"%d\"", link.width);
635
+ if ( tag->WxH && ref->height && ref->width ) {
636
+ Qprintf(f," height=\"%d\"", ref->height);
637
+ Qprintf(f, " width=\"%d\"", ref->width);
646
638
  }
647
639
 
648
- if ( S(link.title) ) {
640
+ if ( S(ref->title) ) {
649
641
  Qstring(" title=\"", f);
650
- ___mkd_reparse(T(link.title), S(link.title), INSIDE_TAG, f);
642
+ ___mkd_reparse(T(ref->title), S(ref->title), INSIDE_TAG, f);
651
643
  Qchar('"', f);
652
644
  }
653
645
 
654
646
  Qstring(tag->text_pfx, f);
655
- ___mkd_reparse(T(link.tag), S(link.tag), tag->flags, f);
647
+ ___mkd_reparse(T(text), S(text), tag->flags, f);
656
648
  Qstring(tag->text_sfx, f);
657
649
  }
658
650
  else
659
- Qwrite(T(link.link) + tag->szpat, S(link.link) - tag->szpat, f);
651
+ Qwrite(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f);
660
652
 
661
653
  return 1;
654
+ } /* linkyformat */
655
+
656
+
657
+ /*
658
+ * process embedded links and images
659
+ */
660
+ static int
661
+ linkylinky(int image, MMIOT *f)
662
+ {
663
+ int start = mmiottell(f);
664
+ int implicit_mark;
665
+ Cstring name;
666
+ Footnote key, *ref;
667
+
668
+ int status = 0;
669
+
670
+ CREATE(name);
671
+ bzero(&key, sizeof key);
672
+
673
+ if ( linkylabel(f, &name) ) {
674
+ implicit_mark = mmiottell(f);
675
+ eatspace(f);
676
+
677
+ switch ( pull(f) ) {
678
+ case '(': /* embedded link */
679
+ if ( linkyurl(f, image, &key) )
680
+ status = linkyformat(f, name, image, &key);
681
+ break;
682
+
683
+ case '[':/* footnote link */
684
+ default: /* (undocumented) implicit link */
685
+ if ( peek(f, 0) != '[' ) {
686
+ mmiotseek(f, implicit_mark);
687
+ if ( f->flags & MKD_1_COMPAT )
688
+ break;
689
+ }
690
+ else if ( !linkylabel(f, &key.tag) )
691
+ break;
692
+
693
+ if ( !S(key.tag) ) {
694
+ DELETE(key.tag);
695
+ T(key.tag) = T(name);
696
+ S(key.tag) = S(name);
697
+ }
698
+
699
+ if ( ref = bsearch(&key, T(*f->footnotes), S(*f->footnotes),
700
+ sizeof key, (stfu)__mkd_footsort) )
701
+ status = linkyformat(f, name, image, ref);
702
+ }
703
+ }
704
+
705
+ DELETE(name);
706
+ ___mkd_freefootnote(&key);
707
+
708
+ if ( status == 0 )
709
+ mmiotseek(f, start);
710
+
711
+ return status;
662
712
  }
663
713
 
664
714
 
@@ -711,6 +761,80 @@ forbidden_tag(MMIOT *f)
711
761
  }
712
762
 
713
763
 
764
+ /* Check a string to see if it looks like a mail address
765
+ * "looks like a mail address" means alphanumeric + some
766
+ * specials, then a `@`, then alphanumeric + some specials,
767
+ * but with a `.`
768
+ */
769
+ static int
770
+ maybe_address(char *p, int size)
771
+ {
772
+ int ok = 0;
773
+
774
+ for ( ;size && (isalnum(*p) || strchr("._-+*", *p)); ++p, --size)
775
+ ;
776
+
777
+ if ( ! (size && *p == '@') )
778
+ return 0;
779
+
780
+ --size, ++p;
781
+
782
+ if ( size && *p == '.' ) return 0;
783
+
784
+ for ( ;size && (isalnum(*p) || strchr("._-+", *p)); ++p, --size )
785
+ if ( *p == '.' && size > 1 ) ok = 1;
786
+
787
+ return size ? 0 : ok;
788
+ }
789
+
790
+
791
+ /* The size-length token at cursor(f) is either a mailto:, an
792
+ * implicit mailto:, one of the approved url protocols, or just
793
+ * plain old text. If it's a mailto: or an approved protocol,
794
+ * linkify it, otherwise say "no"
795
+ */
796
+ static int
797
+ process_possible_link(MMIOT *f, int size)
798
+ {
799
+ int address= 0;
800
+ int mailto = 0;
801
+ char *text = cursor(f);
802
+
803
+ if ( f->flags & DENY_A ) return 0;
804
+
805
+ if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 ) {
806
+ /* if it says it's a mailto, it's a mailto -- who am
807
+ * I to second-guess the user?
808
+ */
809
+ address = 1;
810
+ mailto = 7; /* 7 is the length of "mailto:"; we need this */
811
+ }
812
+ else
813
+ address = maybe_address(text, size);
814
+
815
+ if ( address ) {
816
+ Qstring("<a href=\"", f);
817
+ if ( !mailto ) {
818
+ /* supply a mailto: protocol if one wasn't attached */
819
+ mangle("mailto:", 7, f);
820
+ }
821
+ mangle(text, size, f);
822
+ Qstring("\">", f);
823
+ mangle(text+mailto, size-mailto, f);
824
+ Qstring("</a>", f);
825
+ return 1;
826
+ }
827
+ else if ( isautoprefix(text) ) {
828
+ Qstring("<a href=\"", f);
829
+ puturl(text,size,f, 0);
830
+ Qstring("\">", f);
831
+ puturl(text,size,f, 1);
832
+ Qstring("</a>", f);
833
+ return 1;
834
+ }
835
+ return 0;
836
+ } /* process_possible_link */
837
+
714
838
 
715
839
  /* a < may be just a regular character, the start of an embedded html
716
840
  * tag, or the start of an <automatic link>. If it's an automatic
@@ -721,68 +845,69 @@ forbidden_tag(MMIOT *f)
721
845
  static int
722
846
  maybe_tag_or_link(MMIOT *f)
723
847
  {
724
- char *text;
725
- int c, size, i;
726
- int maybetag=1, maybeaddress=0;
727
- int mailto;
848
+ int c, size;
849
+ int maybetag = 1;
728
850
 
729
851
  if ( f->flags & INSIDE_TAG )
730
852
  return 0;
731
853
 
732
- for ( size=0; ((c = peek(f,size+1)) != '>') && !isspace(c); size++ ) {
733
- if ( ! (c == '/' || isalnum(c) || c == '~') )
734
- maybetag=0;
735
- if ( c == '@' )
736
- maybeaddress=1;
737
- else if ( c == EOF )
854
+ for ( size=0; (c = peek(f, size+1)) != '>'; size++) {
855
+ if ( c == EOF )
738
856
  return 0;
857
+ else if ( c == '\\' ) {
858
+ maybetag=0;
859
+ if ( peek(f, size+2) != EOF )
860
+ size++;
861
+ }
862
+ else if ( isspace(c) )
863
+ break;
864
+ else if ( ! (c == '/' || isalnum(c) ) )
865
+ maybetag=0;
739
866
  }
740
867
 
741
- if ( size == 0 )
742
- return 0;
743
-
744
- if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
745
- Qstring(forbidden_tag(f) ? "&lt;" : "<", f);
746
- while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
747
- cputc(pull(f), f);
748
- return 1;
868
+ if ( size ) {
869
+ if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
870
+ Qstring(forbidden_tag(f) ? "&lt;" : "<", f);
871
+ while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
872
+ cputc(pull(f), f);
873
+ return 1;
874
+ }
875
+ else if ( !isspace(c) && process_possible_link(f, size) ) {
876
+ shift(f, size+1);
877
+ return 1;
878
+ }
749
879
  }
880
+
881
+ return 0;
882
+ }
750
883
 
751
- if ( f->flags & DENY_A ) return 0;
752
-
753
- text = cursor(f);
754
- shift(f, size+1);
755
884
 
756
- for ( i=0; i < SZAUTOPREFIX; i++ )
757
- if ( strncasecmp(text, autoprefix[i], strlen(autoprefix[i])) == 0 ) {
758
- Qstring("<a href=\"", f);
759
- puturl(text,size,f);
760
- Qstring("\">", f);
761
- puturl(text,size,f);
762
- Qstring("</a>", f);
763
- return 1;
764
- }
765
- if ( maybeaddress ) {
885
+ /* autolinking means that all inline html is <a href'ified>. A
886
+ * autolink url is alphanumerics, slashes, periods, underscores,
887
+ * the at sign, colon, and the % character.
888
+ */
889
+ static int
890
+ maybe_autolink(MMIOT *f)
891
+ {
892
+ register int c;
893
+ int size;
766
894
 
767
- Qstring("<a href=\"", f);
768
- if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 )
769
- mailto = 7;
770
- else {
771
- mailto = 0;
772
- /* supply a mailto: protocol if one wasn't attached */
773
- mangle("mailto:", 7, f);
895
+ /* greedily scan forward for the end of a legitimate link.
896
+ */
897
+ for ( size=0; (c=peek(f, size+1)) != EOF; size++ )
898
+ if ( c == '\\' ) {
899
+ if ( peek(f, size+2) != EOF )
900
+ ++size;
774
901
  }
902
+ else if ( isspace(c) || strchr("'\"()[]{}<>`", c) )
903
+ break;
775
904
 
776
- mangle(text, size, f);
777
- Qstring("\">", f);
778
- mangle(text+mailto, size-mailto, f);
779
- Qstring("</a>", f);
905
+ if ( (size > 1) && process_possible_link(f, size) ) {
906
+ shift(f, size);
780
907
  return 1;
781
908
  }
782
-
783
- shift(f, -(size+1));
784
909
  return 0;
785
- } /* maybe_tag_or_link */
910
+ }
786
911
 
787
912
 
788
913
  /* smartyquote code that's common for single and double quotes
@@ -868,7 +993,7 @@ smartypants(int c, int *flags, MMIOT *f)
868
993
  {
869
994
  int i;
870
995
 
871
- if ( f->flags & DENY_SMARTY )
996
+ if ( f->flags & (DENY_SMARTY|INSIDE_TAG) )
872
997
  return 0;
873
998
 
874
999
  for ( i=0; i < NRSMART; i++)
@@ -922,12 +1047,23 @@ text(MMIOT *f)
922
1047
  int rep;
923
1048
  int smartyflags = 0;
924
1049
 
925
- while ( (c = pull(f)) != EOF ) {
1050
+ while (1) {
1051
+ if ( (f->flags & AUTOLINK) && isalpha(peek(f,1)) )
1052
+ maybe_autolink(f);
1053
+
1054
+ c = pull(f);
1055
+
1056
+ if (c == EOF)
1057
+ break;
1058
+
926
1059
  if ( smartypants(c, &smartyflags, f) )
927
1060
  continue;
928
1061
  switch (c) {
929
1062
  case 0: break;
930
1063
 
1064
+ case 3: Qstring("<br/>", f);
1065
+ break;
1066
+
931
1067
  case '>': if ( tag_text(f) )
932
1068
  Qstring("&gt;", f);
933
1069
  else
@@ -971,15 +1107,24 @@ text(MMIOT *f)
971
1107
  case '_':
972
1108
  #if RELAXED_EMPHASIS
973
1109
  /* Underscores don't count if they're in the middle of a word */
974
- if ( (!(f->flags & STRICT))
975
- && ((isthisspace(f,-1) && isthisspace(f,1))
976
- || (isthisalnum(f,-1) && isthisalnum(f,1))) ){
1110
+ if ( !(f->flags & STRICT) && isthisalnum(f,-1)
1111
+ && isthisalnum(f,1) ) {
1112
+ Qchar(c, f);
1113
+ break;
1114
+ }
1115
+ #endif
1116
+ case '*':
1117
+ #if RELAXED_EMPHASIS
1118
+ /* Underscores & stars don't count if they're out in the middle
1119
+ * of whitespace */
1120
+ if ( !(f->flags & STRICT) && isthisspace(f,-1)
1121
+ && isthisspace(f,1) ) {
977
1122
  Qchar(c, f);
978
1123
  break;
979
1124
  }
980
1125
  /* else fall into the regular old emphasis case */
981
1126
  #endif
982
- case '*': if ( tag_text(f) )
1127
+ if ( tag_text(f) )
983
1128
  Qchar(c, f);
984
1129
  else {
985
1130
  for (rep = 1; peek(f,1) == c; pull(f) )
@@ -1143,10 +1288,10 @@ printblock(Paragraph *pp, MMIOT *f)
1143
1288
  if ( S(t->text) > 2 && T(t->text)[S(t->text)-2] == ' '
1144
1289
  && T(t->text)[S(t->text)-1] == ' ') {
1145
1290
  push(T(t->text), S(t->text)-2, f);
1146
- push("<br/>\n", 6, f);
1291
+ push("\003\n", 2, f);
1147
1292
  }
1148
1293
  else {
1149
- ___mkd_tidy(t);
1294
+ ___mkd_tidy(&t->text);
1150
1295
  push(T(t->text), S(t->text), f);
1151
1296
  if ( t->next )
1152
1297
  push("\n", 1, f);