wikitext 3.0.1 → 3.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/parser.c CHANGED
@@ -1,4 +1,4 @@
1
- // Copyright 2007-2012 Wincent Colaiuta. All rights reserved.
1
+ // Copyright 2007-2013 Wincent Colaiuta. All rights reserved.
2
2
  //
3
3
  // Redistribution and use in source and binary forms, with or without
4
4
  // modification, are permitted provided that the following conditions are met:
@@ -279,6 +279,160 @@ void wiki_downcase_bang(char *ptr, long len)
279
279
  }
280
280
  }
281
281
 
282
+ void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
283
+ {
284
+ char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
285
+ char scratch = (character & 0xf000) >> 12;
286
+ hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
287
+ scratch = (character & 0x0f00) >> 8;
288
+ hex_string[4] = (scratch <= 9 ? scratch + 48 : scratch + 87);
289
+ scratch = (character & 0x00f0) >> 4;
290
+ hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
291
+ scratch = character & 0x000f;
292
+ hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
293
+ str_append(output, hex_string, sizeof(hex_string));
294
+ }
295
+
296
+ // Convert a single UTF-8 codepoint to UTF-32
297
+ //
298
+ // Expects an input buffer, src, containing a UTF-8 encoded character (which
299
+ // may be multi-byte). The end of the input buffer, end, is also passed in to
300
+ // allow the detection of invalidly truncated codepoints. The number of bytes
301
+ // in the UTF-8 character (between 1 and 4) is returned by reference in
302
+ // width_out.
303
+ //
304
+ // Raises a RangeError if the supplied character is invalid UTF-8.
305
+ uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
306
+ {
307
+ uint32_t dest = 0;
308
+ if ((unsigned char)src[0] <= 0x7f)
309
+ {
310
+ // ASCII
311
+ dest = src[0];
312
+ *width_out = 1;
313
+ }
314
+ else if ((src[0] & 0xe0) == 0xc0)
315
+ {
316
+ // byte starts with 110..... : this should be a two-byte sequence
317
+ if (src + 1 >= end)
318
+ // no second byte
319
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
320
+ else if (((unsigned char)src[0] == 0xc0) ||
321
+ ((unsigned char)src[0] == 0xc1))
322
+ // overlong encoding: lead byte of 110..... but code point <= 127
323
+ rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
324
+ else if ((src[1] & 0xc0) != 0x80 )
325
+ // should have second byte starting with 10......
326
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
327
+
328
+ dest =
329
+ ((uint32_t)(src[0] & 0x1f)) << 6 |
330
+ (src[1] & 0x3f);
331
+ *width_out = 2;
332
+ }
333
+ else if ((src[0] & 0xf0) == 0xe0)
334
+ {
335
+ // byte starts with 1110.... : this should be a three-byte sequence
336
+ if (src + 2 >= end)
337
+ // missing second or third byte
338
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
339
+ else if (((src[1] & 0xc0) != 0x80 ) ||
340
+ ((src[2] & 0xc0) != 0x80 ))
341
+ // should have second and third bytes starting with 10......
342
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
343
+
344
+ dest =
345
+ ((uint32_t)(src[0] & 0x0f)) << 12 |
346
+ ((uint32_t)(src[1] & 0x3f)) << 6 |
347
+ (src[2] & 0x3f);
348
+ *width_out = 3;
349
+ }
350
+ else if ((src[0] & 0xf8) == 0xf0)
351
+ {
352
+ // bytes starts with 11110... : this should be a four-byte sequence
353
+ if (src + 3 >= end)
354
+ // missing second, third, or fourth byte
355
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
356
+ else if ((unsigned char)src[0] >= 0xf5 &&
357
+ (unsigned char)src[0] <= 0xf7)
358
+ // disallowed by RFC 3629 (codepoints above 0x10ffff)
359
+ rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
360
+ else if (((src[1] & 0xc0) != 0x80 ) ||
361
+ ((src[2] & 0xc0) != 0x80 ) ||
362
+ ((src[3] & 0xc0) != 0x80 ))
363
+ // should have second and third bytes starting with 10......
364
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
365
+
366
+ dest =
367
+ ((uint32_t)(src[0] & 0x07)) << 18 |
368
+ ((uint32_t)(src[1] & 0x3f)) << 12 |
369
+ ((uint32_t)(src[1] & 0x3f)) << 6 |
370
+ (src[2] & 0x3f);
371
+ *width_out = 4;
372
+ }
373
+ else
374
+ rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
375
+ return dest;
376
+ }
377
+
378
+ // - non-printable (non-ASCII) characters converted to numeric entities
379
+ // - QUOT and AMP characters converted to named entities
380
+ // - if trim is true, leading and trailing whitespace trimmed
381
+ // - if trim is false, there is no special treatment of spaces
382
+ void wiki_append_sanitized_link_target(str_t *link_target, str_t *output, bool trim)
383
+ {
384
+ char *src = link_target->ptr;
385
+ char *start = src; // remember this so we can check if we're at the start
386
+ char *non_space = output->ptr + output->len; // remember last non-space character output
387
+ char *end = src + link_target->len;
388
+ while (src < end)
389
+ {
390
+ // need at most 8 bytes to display each input character (&#x0000;)
391
+ if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
392
+ {
393
+ char *old_ptr = output->ptr;
394
+ str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
395
+ if (old_ptr != output->ptr) // may have moved
396
+ non_space += output->ptr - old_ptr;
397
+ }
398
+
399
+ if (*src == '"')
400
+ {
401
+ char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
402
+ str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
403
+ }
404
+ else if (*src == '&')
405
+ {
406
+ char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
407
+ str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
408
+ }
409
+ else if (*src == '<' || *src == '>')
410
+ rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
411
+ else if (*src == ' ' && src == start && trim)
412
+ start++; // we eat leading space
413
+ else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
414
+ {
415
+ *(output->ptr + output->len) = *src;
416
+ output->len++;
417
+ }
418
+ else // all others: must convert to entities
419
+ {
420
+ long width;
421
+ wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
422
+ src += width;
423
+ non_space = output->ptr + output->len;
424
+ continue;
425
+ }
426
+ if (*src != ' ')
427
+ non_space = output->ptr + output->len;
428
+ src++;
429
+ }
430
+
431
+ // trim trailing space if necessary
432
+ if (trim && output->ptr + output->len != non_space)
433
+ output->len -= (output->ptr + output->len) - non_space;
434
+ }
435
+
282
436
  // prepare hyperlink and append it to parser->output
283
437
  // if check_autolink is true, checks parser->autolink to decide whether to emit a real hyperlink
284
438
  // or merely the literal link target
@@ -286,13 +440,13 @@ void wiki_downcase_bang(char *ptr, long len)
286
440
  void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_target, str_t *link_text, VALUE link_class, VALUE link_rel, bool check_autolink)
287
441
  {
288
442
  if (check_autolink && !parser->autolink)
289
- str_append_str(parser->output, link_target);
443
+ wiki_append_sanitized_link_target(link_target, parser->output, true);
290
444
  else
291
445
  {
292
446
  str_append(parser->output, a_start, sizeof(a_start) - 1); // <a href="
293
447
  if (!NIL_P(link_prefix))
294
448
  str_append_string(parser->output, link_prefix);
295
- str_append_str(parser->output, link_target);
449
+ wiki_append_sanitized_link_target(link_target, parser->output, true);
296
450
 
297
451
  // special handling for mailto URIs
298
452
  const char *mailto = "mailto:";
@@ -315,7 +469,7 @@ void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_targ
315
469
  }
316
470
  str_append(parser->output, a_start_close, sizeof(a_start_close) - 1); // ">
317
471
  if (!link_text || link_text->len == 0) // re-use link_target
318
- str_append_str(parser->output, link_target);
472
+ wiki_append_sanitized_link_target(link_target, parser->output, true);
319
473
  else
320
474
  str_append_str(parser->output, link_text);
321
475
  str_append(parser->output, a_end, sizeof(a_end) - 1); // </a>
@@ -649,102 +803,6 @@ void wiki_pop_excess_elements(parser_t *parser)
649
803
  }
650
804
  }
651
805
 
652
- // Convert a single UTF-8 codepoint to UTF-32
653
- //
654
- // Expects an input buffer, src, containing a UTF-8 encoded character (which
655
- // may be multi-byte). The end of the input buffer, end, is also passed in to
656
- // allow the detection of invalidly truncated codepoints. The number of bytes
657
- // in the UTF-8 character (between 1 and 4) is returned by reference in
658
- // width_out.
659
- //
660
- // Raises a RangeError if the supplied character is invalid UTF-8.
661
- uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
662
- {
663
- uint32_t dest;
664
- if ((unsigned char)src[0] <= 0x7f)
665
- {
666
- // ASCII
667
- dest = src[0];
668
- *width_out = 1;
669
- }
670
- else if ((src[0] & 0xe0) == 0xc0)
671
- {
672
- // byte starts with 110..... : this should be a two-byte sequence
673
- if (src + 1 >= end)
674
- // no second byte
675
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
676
- else if (((unsigned char)src[0] == 0xc0) ||
677
- ((unsigned char)src[0] == 0xc1))
678
- // overlong encoding: lead byte of 110..... but code point <= 127
679
- rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
680
- else if ((src[1] & 0xc0) != 0x80 )
681
- // should have second byte starting with 10......
682
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
683
-
684
- dest =
685
- ((uint32_t)(src[0] & 0x1f)) << 6 |
686
- (src[1] & 0x3f);
687
- *width_out = 2;
688
- }
689
- else if ((src[0] & 0xf0) == 0xe0)
690
- {
691
- // byte starts with 1110.... : this should be a three-byte sequence
692
- if (src + 2 >= end)
693
- // missing second or third byte
694
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
695
- else if (((src[1] & 0xc0) != 0x80 ) ||
696
- ((src[2] & 0xc0) != 0x80 ))
697
- // should have second and third bytes starting with 10......
698
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
699
-
700
- dest =
701
- ((uint32_t)(src[0] & 0x0f)) << 12 |
702
- ((uint32_t)(src[1] & 0x3f)) << 6 |
703
- (src[2] & 0x3f);
704
- *width_out = 3;
705
- }
706
- else if ((src[0] & 0xf8) == 0xf0)
707
- {
708
- // bytes starts with 11110... : this should be a four-byte sequence
709
- if (src + 3 >= end)
710
- // missing second, third, or fourth byte
711
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
712
- else if ((unsigned char)src[0] >= 0xf5 &&
713
- (unsigned char)src[0] <= 0xf7)
714
- // disallowed by RFC 3629 (codepoints above 0x10ffff)
715
- rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
716
- else if (((src[1] & 0xc0) != 0x80 ) ||
717
- ((src[2] & 0xc0) != 0x80 ) ||
718
- ((src[3] & 0xc0) != 0x80 ))
719
- // should have second and third bytes starting with 10......
720
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
721
-
722
- dest =
723
- ((uint32_t)(src[0] & 0x07)) << 18 |
724
- ((uint32_t)(src[1] & 0x3f)) << 12 |
725
- ((uint32_t)(src[1] & 0x3f)) << 6 |
726
- (src[2] & 0x3f);
727
- *width_out = 4;
728
- }
729
- else
730
- rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
731
- return dest;
732
- }
733
-
734
- void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
735
- {
736
- char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
737
- char scratch = (character & 0xf000) >> 12;
738
- hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
739
- scratch = (character & 0x0f00) >> 8;
740
- hex_string[4] = (scratch <= 9 ? scratch + 48 : scratch + 87);
741
- scratch = (character & 0x00f0) >> 4;
742
- hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
743
- scratch = character & 0x000f;
744
- hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
745
- str_append(output, hex_string, sizeof(hex_string));
746
- }
747
-
748
806
  // trim parser->link_text in place
749
807
  void wiki_trim_link_text(parser_t *parser)
750
808
  {
@@ -772,72 +830,13 @@ void wiki_trim_link_text(parser_t *parser)
772
830
  }
773
831
  }
774
832
 
775
- // - non-printable (non-ASCII) characters converted to numeric entities
776
- // - QUOT and AMP characters converted to named entities
777
- // - if trim is true, leading and trailing whitespace trimmed
778
- // - if trim is false, there is no special treatment of spaces
779
- void wiki_append_sanitized_link_target(parser_t *parser, str_t *output, bool trim)
780
- {
781
- char *src = parser->link_target->ptr;
782
- char *start = src; // remember this so we can check if we're at the start
783
- char *non_space = output->ptr + output->len; // remember last non-space character output
784
- char *end = src + parser->link_target->len;
785
- while (src < end)
786
- {
787
- // need at most 8 bytes to display each input character (&#x0000;)
788
- if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
789
- {
790
- char *old_ptr = output->ptr;
791
- str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
792
- if (old_ptr != output->ptr) // may have moved
793
- non_space += output->ptr - old_ptr;
794
- }
795
-
796
- if (*src == '"')
797
- {
798
- char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
799
- str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
800
- }
801
- else if (*src == '&')
802
- {
803
- char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
804
- str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
805
- }
806
- else if (*src == '<' || *src == '>')
807
- rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
808
- else if (*src == ' ' && src == start && trim)
809
- start++; // we eat leading space
810
- else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
811
- {
812
- *(output->ptr + output->len) = *src;
813
- output->len++;
814
- }
815
- else // all others: must convert to entities
816
- {
817
- long width;
818
- wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
819
- src += width;
820
- non_space = output->ptr + output->len;
821
- continue;
822
- }
823
- if (*src != ' ')
824
- non_space = output->ptr + output->len;
825
- src++;
826
- }
827
-
828
- // trim trailing space if necessary
829
- if (trim && output->ptr + output->len != non_space)
830
- output->len -= (output->ptr + output->len) - non_space;
831
- }
832
-
833
833
  VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
834
834
  {
835
- parser_t parser;
836
- parser.link_target = str_new_from_string(string);
837
- GC_WRAP_STR(parser.link_target, link_target_gc);
835
+ str_t *link_target = str_new_from_string(string);
836
+ GC_WRAP_STR(link_target, link_target_gc);
838
837
  str_t *output = str_new();
839
838
  GC_WRAP_STR(output, output_gc);
840
- wiki_append_sanitized_link_target(&parser, output, true);
839
+ wiki_append_sanitized_link_target(link_target, output, true);
841
840
  return string_from_str(output);
842
841
  }
843
842
 
@@ -950,7 +949,7 @@ void wiki_rollback_failed_internal_link(parser_t *parser)
950
949
  str_append(parser->output, link_start, sizeof(link_start) - 1);
951
950
  if (parser->link_target->len > 0)
952
951
  {
953
- wiki_append_sanitized_link_target(parser, parser->output, false);
952
+ wiki_append_sanitized_link_target(parser->link_target, parser->output, false);
954
953
  if (scope_includes_separator)
955
954
  {
956
955
  str_append(parser->output, separator, sizeof(separator) - 1);
@@ -1009,6 +1008,7 @@ VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
1009
1008
  VALUE external_link_class = rb_str_new2("external");
1010
1009
  VALUE external_link_rel = Qnil;
1011
1010
  VALUE mailto_class = rb_str_new2("mailto");
1011
+ VALUE link_proc = Qnil;
1012
1012
  VALUE internal_link_prefix = rb_str_new2("/wiki/");
1013
1013
  VALUE img_prefix = rb_str_new2("/images/");
1014
1014
  VALUE output_style = ID2SYM(rb_intern("html"));
@@ -1026,6 +1026,7 @@ VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
1026
1026
  external_link_class = OVERRIDE_IF_SET(external_link_class);
1027
1027
  external_link_rel = OVERRIDE_IF_SET(external_link_rel);
1028
1028
  mailto_class = OVERRIDE_IF_SET(mailto_class);
1029
+ link_proc = OVERRIDE_IF_SET(link_proc);
1029
1030
  internal_link_prefix = OVERRIDE_IF_SET(internal_link_prefix);
1030
1031
  img_prefix = OVERRIDE_IF_SET(img_prefix);
1031
1032
  output_style = OVERRIDE_IF_SET(output_style);
@@ -1040,6 +1041,7 @@ VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
1040
1041
  rb_iv_set(self, "@external_link_class", external_link_class);
1041
1042
  rb_iv_set(self, "@external_link_rel", external_link_rel);
1042
1043
  rb_iv_set(self, "@mailto_class", mailto_class);
1044
+ rb_iv_set(self, "@link_proc", link_proc);
1043
1045
  rb_iv_set(self, "@internal_link_prefix", internal_link_prefix);
1044
1046
  rb_iv_set(self, "@img_prefix", img_prefix);
1045
1047
  rb_iv_set(self, "@output_style", output_style);
@@ -1084,6 +1086,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1084
1086
  link_class = NIL_P(link_class) ? Qnil : StringValue(link_class);
1085
1087
  VALUE link_rel = rb_iv_get(self, "@external_link_rel");
1086
1088
  link_rel = NIL_P(link_rel) ? Qnil : StringValue(link_rel);
1089
+ VALUE link_proc = rb_iv_get(self, "@link_proc");
1087
1090
  VALUE mailto_class = rb_iv_get(self, "@mailto_class");
1088
1091
  mailto_class = NIL_P(mailto_class) ? Qnil : StringValue(mailto_class);
1089
1092
  VALUE prefix = rb_iv_get(self, "@internal_link_prefix");
@@ -1092,7 +1095,6 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1092
1095
  // process options hash
1093
1096
  int base_indent = 0;
1094
1097
  int base_heading_level = NUM2INT(rb_iv_get(self, "@base_heading_level"));
1095
- VALUE link_proc = Qnil;
1096
1098
  if (!NIL_P(options) && TYPE(options) == T_HASH)
1097
1099
  {
1098
1100
  // :indent => 0 (or more)
@@ -1979,9 +1981,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1979
1981
 
1980
1982
  case URI:
1981
1983
  if (IN(NO_WIKI_START))
1984
+ {
1982
1985
  // user can temporarily suppress autolinking by using <nowiki></nowiki>
1983
1986
  // note that unlike MediaWiki, we do allow autolinking inside PRE blocks
1984
- str_append(parser->output, token->start, TOKEN_LEN(token));
1987
+ token_str->ptr = token->start;
1988
+ token_str->len = TOKEN_LEN(token);
1989
+ wiki_append_sanitized_link_target(token_str, parser->output, false);
1990
+ }
1985
1991
  else if (IN(LINK_START))
1986
1992
  {
1987
1993
  // if the URI were allowed it would have been handled already in LINK_START
@@ -2018,7 +2024,11 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2018
2024
  }
2019
2025
  }
2020
2026
  else
2021
- str_append(parser->link_text, token->start, TOKEN_LEN(token));
2027
+ {
2028
+ token_str->ptr = token->start;
2029
+ token_str->len = TOKEN_LEN(token);
2030
+ wiki_append_sanitized_link_target(token_str, parser->link_text, false);
2031
+ }
2022
2032
  }
2023
2033
  else
2024
2034
  {
@@ -2213,7 +2223,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2213
2223
  {
2214
2224
  // use link target as link text
2215
2225
  str_clear(parser->link_text);
2216
- wiki_append_sanitized_link_target(parser, parser->link_text, true);
2226
+ wiki_append_sanitized_link_target(parser->link_target, parser->link_text, true);
2217
2227
  }
2218
2228
  else
2219
2229
  wiki_trim_link_text(parser);
data/ext/wikitext.c CHANGED
@@ -1,4 +1,4 @@
1
- // Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
1
+ // Copyright 2008-2013 Wincent Colaiuta. All rights reserved.
2
2
  //
3
3
  // Redistribution and use in source and binary forms, with or without
4
4
  // modification, are permitted provided that the following conditions are met:
@@ -80,6 +80,7 @@ void Init_wikitext()
80
80
  rb_define_attr(cWikitextParser, "external_link_class", Qtrue, Qtrue);
81
81
  rb_define_attr(cWikitextParser, "external_link_rel", Qtrue, Qtrue);
82
82
  rb_define_attr(cWikitextParser, "mailto_class", Qtrue, Qtrue);
83
+ rb_define_attr(cWikitextParser, "link_proc", Qtrue, Qtrue);
83
84
  rb_define_attr(cWikitextParser, "autolink", Qtrue, Qtrue);
84
85
  rb_define_attr(cWikitextParser, "space_to_underscore", Qtrue, Qtrue);
85
86
  rb_define_attr(cWikitextParser, "minimum_fulltext_token_length", Qtrue, Qtrue);
@@ -1,4 +1,4 @@
1
- # Copyright 2007-2012 Wincent Colaiuta. All rights reserved.
1
+ # Copyright 2007-2013 Wincent Colaiuta. All rights reserved.
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions are met:
@@ -22,5 +22,5 @@
22
22
  # POSSIBILITY OF SUCH DAMAGE.
23
23
 
24
24
  module Wikitext
25
- VERSION = '3.0.1'
25
+ VERSION = '3.1'
26
26
  end # module Wikitext
@@ -1,4 +1,4 @@
1
- # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
1
+ # Copyright 2007-2012 Wincent Colaiuta. All rights reserved.
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions are met:
@@ -58,7 +58,12 @@ describe Wikitext::Parser, 'autolinking' do
58
58
  @parser.parse(uri).should == %Q{<p><a href="svn://example.com/" class="external">svn://example.com/</a></p>\n}
59
59
  end
60
60
 
61
- it 'should apple the external_link_class CSS class if set' do
61
+ it 'converts ampersands into entities' do
62
+ expected = %{<p><a href="http://google.com/?q=1&amp;lang=en" class="external">http://google.com/?q=1&amp;lang=en</a></p>\n}
63
+ @parser.parse('http://google.com/?q=1&lang=en').should == expected
64
+ end
65
+
66
+ it 'should apply the external_link_class CSS class if set' do
62
67
  uri = 'http://example.com/'
63
68
  @parser.external_link_class = 'bar'
64
69
  @parser.parse(uri).should == %Q{<p><a href="http://example.com/" class="bar">http://example.com/</a></p>\n}
@@ -75,7 +75,16 @@ describe Wikitext::Parser, 'external links' do
75
75
  # more general case of bug reported here: https://wincent.com/issues/1955
76
76
  expected = %{<p><a href="http://google.com/?q=user@example.com" class="external">Google for user@example.com</a></p>\n}
77
77
  @parser.parse('[http://google.com/?q=user@example.com Google for user@example.com]').should == expected
78
+ end
79
+
80
+ it 'formats ampersands in link targets using entities' do
81
+ expected =%{<p><a href="http://google.com/?q=1&amp;lang=en" class="external">Google</a></p>\n}
82
+ @parser.parse('[http://google.com/?q=1&lang=en Google]').should == expected
83
+ end
78
84
 
85
+ it 'formats ampersands in URIs in link text' do
86
+ expected =%{<p><a href="http://google.com/?q=1&amp;lang=en" class="external">http://google.com/?q=1&amp;lang=en</a></p>\n}
87
+ @parser.parse('[http://google.com/?q=1&lang=en http://google.com/?q=1&lang=en]').should == expected
79
88
  end
80
89
 
81
90
  it 'should format absolute path links' do
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
- # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Copyright 2007-2013 Wincent Colaiuta. All rights reserved.
3
3
  #
4
4
  # Redistribution and use in source and binary forms, with or without
5
5
  # modification, are permitted provided that the following conditions are met:
@@ -113,26 +113,41 @@ describe Wikitext::Parser, 'internal links (space to underscore off)' do
113
113
  end
114
114
 
115
115
  describe '"red link" support' do
116
- it 'should accept a Proc object via the optional "link_proc" parameter' do
116
+ it 'accepts a Proc object via the optional "link_proc" parameter' do
117
117
  @parser.parse('foo', :link_proc => Proc.new { }).should == %Q{<p>foo</p>\n}
118
118
  end
119
119
 
120
- it 'should accept a lambda via the optional "link_proc" parameter' do
120
+ it 'accepts a lambda via the optional "link_proc" parameter' do
121
121
  @parser.parse('foo', :link_proc => lambda { }).should == %Q{<p>foo</p>\n}
122
122
  end
123
123
 
124
- it 'should apply custom link CSS when supplied (Proc object version)' do
124
+ it 'applies custom link CSS when supplied (Proc object version)' do
125
125
  link_proc = Proc.new { |target| target == 'bar' ? 'redlink' : nil }
126
126
  expected = %Q{<p><a href="/wiki/foo">foo</a> <a href="/wiki/bar" class="redlink">bar</a></p>\n}
127
127
  @parser.parse('[[foo]] [[bar]]', :link_proc => link_proc).should == expected
128
128
  end
129
129
 
130
- it 'should apply custom link CSS when supplied (lambda version)' do
130
+ it 'applies custom link CSS when supplied (lambda version)' do
131
131
  link_proc = lambda { |target| target == 'bar' ? 'redlink' : nil }
132
132
  expected = %Q{<p><a href="/wiki/foo">foo</a> <a href="/wiki/bar" class="redlink">bar</a></p>\n}
133
133
  @parser.parse('[[foo]] [[bar]]', :link_proc => link_proc).should == expected
134
134
  end
135
135
 
136
+ it 'uses a lamba passed in when the Parser is initialized' do
137
+ link_proc = lambda { |target| target == 'bar' ? 'redlink' : nil }
138
+ parser = Wikitext::Parser.new :link_proc => link_proc
139
+ expected = %Q{<p><a href="/wiki/foo">foo</a> <a href="/wiki/bar" class="redlink">bar</a></p>\n}
140
+ parser.parse('[[foo]] [[bar]]').should == expected
141
+ end
142
+
143
+ it 'uses a lamba set as an attribute on the Parser' do
144
+ link_proc = lambda { |target| target == 'bar' ? 'redlink' : nil }
145
+ parser = Wikitext::Parser.new
146
+ parser.link_proc = link_proc
147
+ expected = %Q{<p><a href="/wiki/foo">foo</a> <a href="/wiki/bar" class="redlink">bar</a></p>\n}
148
+ parser.parse('[[foo]] [[bar]]').should == expected
149
+ end
150
+
136
151
  it 'should apply no custom link CSS when supplied nil (Proc object version)' do
137
152
  expected = %Q{<p><a href="/wiki/foo">foo</a></p>\n}
138
153
  @parser.parse('[[foo]]', :link_proc => Proc.new { |target| nil }).should == expected
@@ -0,0 +1,239 @@
1
+ # Copyright 2009-2011 Wincent Colaiuta. All rights reserved.
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions are met:
5
+ #
6
+ # 1. Redistributions of source code must retain the above copyright notice,
7
+ # this list of conditions and the following disclaimer.
8
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
9
+ # this list of conditions and the following disclaimer in the documentation
10
+ # and/or other materials provided with the distribution.
11
+
12
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
13
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
16
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
22
+ # POSSIBILITY OF SUCH DAMAGE.
23
+
24
+ require 'spec_helper'
25
+ require 'wikitext/version'
26
+ require 'fileutils'
27
+ require 'pathname'
28
+ require 'wopen3'
29
+
30
+ module RailsSpecs
31
+ TRASH_PATH = Pathname.new(__FILE__).dirname + '.trash'
32
+ AREL_CLONE_PATH = TRASH_PATH + 'arel.git'
33
+ AREL_REPO = 'git://github.com/rails/arel.git'
34
+ RAILS_CLONE_PATH = TRASH_PATH + 'rails.git'
35
+ RAILS_REPO = 'git://github.com/rails/rails.git'
36
+ WIKITEXT_GEM_PATH = TRASH_PATH + '..' + '..'
37
+ SUCCESSFUL_TEST_RESULT = /1 tests, 3 assertions, 0 failures, 0 errors/
38
+
39
+ def run cmd, *args
40
+ result = Wopen3.system(*([cmd] + args))
41
+ if result.status != 0
42
+ command_string = ([cmd] + args).join(' ')
43
+ puts "\n*** COMMAND #{command_string} EXITED WITH NON-ZERO EXIT STATUS (#{result.status})"
44
+ puts "*** STDOUT FOR COMMAND #{command_string}:", result.stdout
45
+ puts "*** STDERR FOR COMMAND #{command_string}:", result.stderr
46
+ raise "non-zero exit status (#{result.status}) for '#{command_string}'"
47
+ end
48
+ result
49
+ end
50
+
51
+ def clone repo, path
52
+ if File.exist? path
53
+ FileUtils.cd path do
54
+ run 'git', 'fetch'
55
+ end
56
+ else
57
+ run 'git', 'clone', repo, path
58
+ end
59
+ end
60
+
61
+ def app_path version
62
+ version = 'edge' if version.nil?
63
+ version = "v#{version}" if version =~ /\A\d\./
64
+ TRASH_PATH + "#{version}-app"
65
+ end
66
+
67
+ # if version is nil will create an "Edge" app
68
+ def create_rails3_app rails_version, arel_version = nil
69
+ app = app_path rails_version
70
+ clone AREL_REPO, AREL_CLONE_PATH
71
+ FileUtils.cd AREL_CLONE_PATH do
72
+ if arel_version
73
+ run 'git', 'reset', '--hard', "v#{arel_version}"
74
+ else # "Edge"
75
+ run 'git', 'reset', '--hard', 'origin/master'
76
+ end
77
+ run 'git', 'clean', '-f'
78
+ end
79
+
80
+ clone RAILS_REPO, RAILS_CLONE_PATH
81
+ FileUtils.cd RAILS_CLONE_PATH do
82
+ if rails_version
83
+ run 'git', 'reset', '--hard', "v#{rails_version}"
84
+ else # "Edge"
85
+ run 'git', 'reset', '--hard', 'origin/master'
86
+ end
87
+ run 'git', 'clean', '-f'
88
+
89
+ begin
90
+ clean_bundler_environment
91
+ run 'env', "AREL=#{AREL_CLONE_PATH}",
92
+ 'bundle', 'install', '--path', '../bundle', '--without', 'db'
93
+ FileUtils.rm_r(app) if File.exist?(app)
94
+ run 'env', "AREL=#{AREL_CLONE_PATH}",
95
+ 'bundle', 'exec', 'bin/rails', 'new', app, '--skip-activerecord', '--dev'
96
+ ensure
97
+ restore_bundler_environment
98
+ end
99
+ end
100
+
101
+ create_gemfile app
102
+ bundlerize app
103
+ end
104
+
105
+ def insert text, after, infile
106
+ output = []
107
+ found = false
108
+ File.read(infile).split("\n").each do |line|
109
+ output << line
110
+ if found == false && line =~ /#{Regexp.escape(after)}/
111
+ found = true
112
+ output << text
113
+ end
114
+ end
115
+ File.open(infile, 'wb') { |f| f.write(output.join("\n")) }
116
+ raise "text '#{after}' not found" unless found
117
+ end
118
+
119
+ def add_text_to_routes text, infile
120
+ insert text, 'Application.routes.draw', infile
121
+ end
122
+
123
+ def create_gemfile app
124
+ File.open(app + 'Gemfile', 'w') do |f|
125
+ f.write <<-GEMFILE
126
+ source :rubygems
127
+ gem 'arel', :path => "#{AREL_CLONE_PATH.realpath}"
128
+ gem 'rake'
129
+ gem 'rails', :path => "#{RAILS_CLONE_PATH.realpath}"
130
+ gem 'sqlite3'
131
+ gem 'wikitext', :path => "#{WIKITEXT_GEM_PATH.realpath}"
132
+ GEMFILE
133
+ end
134
+ end
135
+
136
+ def bundlerize app
137
+ clean_bundler_environment
138
+ Dir.chdir app do
139
+ run 'bundle', 'install', '--path', '../bundle', '--binstubs'
140
+ end
141
+ ensure
142
+ restore_bundler_environment
143
+ end
144
+
145
+ def create_controller app
146
+ File.open(app + 'app' + 'controllers' + 'wiki_controller.rb', 'w') do |f|
147
+ f.write 'class WikiController < ApplicationController; end'
148
+ end
149
+ end
150
+
151
+ def create_template app
152
+ template_dir = app + 'app' + 'views' + 'wiki'
153
+ FileUtils.mkdir template_dir
154
+ File.open(template_dir + 'index.html.wikitext', 'w') do |f|
155
+ f.write '* hello, world!'
156
+ end
157
+ end
158
+
159
+ def create_test app
160
+ # integration tests won't run without a schema.rb
161
+ FileUtils.touch app + 'db' + 'schema.rb'
162
+
163
+ File.open(app + 'test' + 'integration' + 'wiki_test.rb', 'w') do |f|
164
+ f.write <<'TEST'
165
+ require File.join(File.dirname(__FILE__), '..', 'test_helper')
166
+
167
+ class WikiTest < ActionController::IntegrationTest
168
+ def test_wiki_index
169
+ get "/wiki"
170
+ assert_response :success
171
+ assert_template "wiki/index"
172
+ assert_select 'ul>li', 'hello, world!'
173
+ end
174
+ end
175
+ TEST
176
+ end
177
+ end
178
+
179
+ def update_routes app
180
+ routes = app + 'config' + 'routes.rb'
181
+ add_text_to_routes 'match "/wiki" => "wiki#index"', routes
182
+ end
183
+
184
+ def setup_rails_app rails_version = nil, arel_version = nil
185
+ create_rails3_app rails_version, arel_version
186
+ path = app_path rails_version
187
+ update_routes path
188
+ create_controller path
189
+ create_template path
190
+ create_test path
191
+ end
192
+
193
+ def clean_bundler_environment
194
+ @bundler_env = ENV.select { |key, value| key =~ /\A(BUNDLE|GEM)_/ }
195
+ @bundler_env.each { |pair| ENV.delete(pair.first) }
196
+ end
197
+
198
+ def restore_bundler_environment
199
+ @bundler_env.each { |pair| ENV[pair[0]] = pair[1] }
200
+ end
201
+
202
+ def run_integration_test app
203
+ clean_bundler_environment
204
+ FileUtils.cd app do
205
+ return run('bin/rake', 'test:integration').stdout
206
+ end
207
+ ensure
208
+ restore_bundler_environment
209
+ end
210
+ end # module RailsSpecs
211
+
212
+ # different versions of Rails require different versions of Arel
213
+ { '3.1.0' => '2.1.1' }.each do |rails_version, arel_version|
214
+ describe "Template handler in Rails #{rails_version}" do
215
+ include RailsSpecs
216
+
217
+ before :all do
218
+ setup_rails_app rails_version, arel_version
219
+ @path = app_path rails_version
220
+ end
221
+
222
+ it 'should process the template using the wikitext module' do
223
+ run_integration_test(@path).should =~ RailsSpecs::SUCCESSFUL_TEST_RESULT
224
+ end
225
+ end
226
+ end
227
+
228
+ describe 'Template handler in Edge Rails' do
229
+ include RailsSpecs
230
+
231
+ before :all do
232
+ setup_rails_app
233
+ @path = app_path nil
234
+ end
235
+
236
+ it 'should process the template using the wikitext module' do
237
+ run_integration_test(@path).should =~ RailsSpecs::SUCCESSFUL_TEST_RESULT
238
+ end
239
+ end
metadata CHANGED
@@ -4,9 +4,8 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 3
7
- - 0
8
7
  - 1
9
- version: 3.0.1
8
+ version: "3.1"
10
9
  platform: ruby
11
10
  authors:
12
11
  - Wincent Colaiuta
@@ -14,10 +13,11 @@ autorequire:
14
13
  bindir: bin
15
14
  cert_chain: []
16
15
 
17
- date: 2012-03-03 00:00:00 -08:00
16
+ date: 2013-02-16 00:00:00 -08:00
18
17
  default_executable:
19
18
  dependencies:
20
19
  - !ruby/object:Gem::Dependency
20
+ type: :development
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
@@ -25,24 +25,24 @@ dependencies:
25
25
  segments:
26
26
  - 0
27
27
  version: "0"
28
- requirement: *id001
29
28
  name: rake
29
+ requirement: *id001
30
30
  prerelease: false
31
- type: :development
32
31
  - !ruby/object:Gem::Dependency
32
+ type: :development
33
33
  version_requirements: &id002 !ruby/object:Gem::Requirement
34
34
  requirements:
35
- - - ~>
35
+ - - ">="
36
36
  - !ruby/object:Gem::Version
37
37
  segments:
38
38
  - 2
39
39
  - 0
40
40
  version: "2.0"
41
- requirement: *id002
42
41
  name: rspec
42
+ requirement: *id002
43
43
  prerelease: false
44
- type: :development
45
44
  - !ruby/object:Gem::Dependency
45
+ type: :development
46
46
  version_requirements: &id003 !ruby/object:Gem::Requirement
47
47
  requirements:
48
48
  - - ">="
@@ -50,11 +50,11 @@ dependencies:
50
50
  segments:
51
51
  - 0
52
52
  version: "0"
53
- requirement: *id003
54
53
  name: thor
54
+ requirement: *id003
55
55
  prerelease: false
56
- type: :development
57
56
  - !ruby/object:Gem::Dependency
57
+ type: :development
58
58
  version_requirements: &id004 !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - ">="
@@ -64,11 +64,11 @@ dependencies:
64
64
  - 5
65
65
  - 8
66
66
  version: 0.5.8
67
- requirement: *id004
68
67
  name: yard
68
+ requirement: *id004
69
69
  prerelease: false
70
- type: :development
71
70
  - !ruby/object:Gem::Dependency
71
+ type: :development
72
72
  version_requirements: &id005 !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - ">="
@@ -76,11 +76,11 @@ dependencies:
76
76
  segments:
77
77
  - 0
78
78
  version: "0"
79
- requirement: *id005
80
79
  name: wopen3
80
+ requirement: *id005
81
81
  prerelease: false
82
- type: :development
83
82
  - !ruby/object:Gem::Dependency
83
+ type: :development
84
84
  version_requirements: &id006 !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - ">="
@@ -88,10 +88,9 @@ dependencies:
88
88
  segments:
89
89
  - 0
90
90
  version: "0"
91
- requirement: *id006
92
91
  name: ZenTest
92
+ requirement: *id006
93
93
  prerelease: false
94
- type: :development
95
94
  description: " Wikitext is a fast wikitext-to-HTML translator written in C.\n"
96
95
  email: win@wincent.com
97
96
  executables:
@@ -149,6 +148,7 @@ files:
149
148
  - spec/p_spec.rb
150
149
  - spec/parser_spec.rb
151
150
  - spec/pre_spec.rb
151
+ - spec/rails_spec.rb
152
152
  - spec/regressions_spec.rb
153
153
  - spec/spec_helper.rb
154
154
  - spec/string_spec.rb