wikitext 3.0.1 → 3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/parser.c CHANGED
@@ -1,4 +1,4 @@
1
- // Copyright 2007-2012 Wincent Colaiuta. All rights reserved.
1
+ // Copyright 2007-2013 Wincent Colaiuta. All rights reserved.
2
2
  //
3
3
  // Redistribution and use in source and binary forms, with or without
4
4
  // modification, are permitted provided that the following conditions are met:
@@ -279,6 +279,160 @@ void wiki_downcase_bang(char *ptr, long len)
279
279
  }
280
280
  }
281
281
 
282
+ void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
283
+ {
284
+ char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
285
+ char scratch = (character & 0xf000) >> 12;
286
+ hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
287
+ scratch = (character & 0x0f00) >> 8;
288
+ hex_string[4] = (scratch <= 9 ? scratch + 48 : scratch + 87);
289
+ scratch = (character & 0x00f0) >> 4;
290
+ hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
291
+ scratch = character & 0x000f;
292
+ hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
293
+ str_append(output, hex_string, sizeof(hex_string));
294
+ }
295
+
296
+ // Convert a single UTF-8 codepoint to UTF-32
297
+ //
298
+ // Expects an input buffer, src, containing a UTF-8 encoded character (which
299
+ // may be multi-byte). The end of the input buffer, end, is also passed in to
300
+ // allow the detection of invalidly truncated codepoints. The number of bytes
301
+ // in the UTF-8 character (between 1 and 4) is returned by reference in
302
+ // width_out.
303
+ //
304
+ // Raises a RangeError if the supplied character is invalid UTF-8.
305
+ uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
306
+ {
307
+ uint32_t dest = 0;
308
+ if ((unsigned char)src[0] <= 0x7f)
309
+ {
310
+ // ASCII
311
+ dest = src[0];
312
+ *width_out = 1;
313
+ }
314
+ else if ((src[0] & 0xe0) == 0xc0)
315
+ {
316
+ // byte starts with 110..... : this should be a two-byte sequence
317
+ if (src + 1 >= end)
318
+ // no second byte
319
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
320
+ else if (((unsigned char)src[0] == 0xc0) ||
321
+ ((unsigned char)src[0] == 0xc1))
322
+ // overlong encoding: lead byte of 110..... but code point <= 127
323
+ rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
324
+ else if ((src[1] & 0xc0) != 0x80 )
325
+ // should have second byte starting with 10......
326
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
327
+
328
+ dest =
329
+ ((uint32_t)(src[0] & 0x1f)) << 6 |
330
+ (src[1] & 0x3f);
331
+ *width_out = 2;
332
+ }
333
+ else if ((src[0] & 0xf0) == 0xe0)
334
+ {
335
+ // byte starts with 1110.... : this should be a three-byte sequence
336
+ if (src + 2 >= end)
337
+ // missing second or third byte
338
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
339
+ else if (((src[1] & 0xc0) != 0x80 ) ||
340
+ ((src[2] & 0xc0) != 0x80 ))
341
+ // should have second and third bytes starting with 10......
342
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
343
+
344
+ dest =
345
+ ((uint32_t)(src[0] & 0x0f)) << 12 |
346
+ ((uint32_t)(src[1] & 0x3f)) << 6 |
347
+ (src[2] & 0x3f);
348
+ *width_out = 3;
349
+ }
350
+ else if ((src[0] & 0xf8) == 0xf0)
351
+ {
352
+ // bytes starts with 11110... : this should be a four-byte sequence
353
+ if (src + 3 >= end)
354
+ // missing second, third, or fourth byte
355
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
356
+ else if ((unsigned char)src[0] >= 0xf5 &&
357
+ (unsigned char)src[0] <= 0xf7)
358
+ // disallowed by RFC 3629 (codepoints above 0x10ffff)
359
+ rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
360
+ else if (((src[1] & 0xc0) != 0x80 ) ||
361
+ ((src[2] & 0xc0) != 0x80 ) ||
362
+ ((src[3] & 0xc0) != 0x80 ))
363
+ // should have second and third bytes starting with 10......
364
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
365
+
366
+ dest =
367
+ ((uint32_t)(src[0] & 0x07)) << 18 |
368
+ ((uint32_t)(src[1] & 0x3f)) << 12 |
369
+ ((uint32_t)(src[1] & 0x3f)) << 6 |
370
+ (src[2] & 0x3f);
371
+ *width_out = 4;
372
+ }
373
+ else
374
+ rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
375
+ return dest;
376
+ }
377
+
378
+ // - non-printable (non-ASCII) characters converted to numeric entities
379
+ // - QUOT and AMP characters converted to named entities
380
+ // - if trim is true, leading and trailing whitespace trimmed
381
+ // - if trim is false, there is no special treatment of spaces
382
+ void wiki_append_sanitized_link_target(str_t *link_target, str_t *output, bool trim)
383
+ {
384
+ char *src = link_target->ptr;
385
+ char *start = src; // remember this so we can check if we're at the start
386
+ char *non_space = output->ptr + output->len; // remember last non-space character output
387
+ char *end = src + link_target->len;
388
+ while (src < end)
389
+ {
390
+ // need at most 8 bytes to display each input character (&#x0000;)
391
+ if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
392
+ {
393
+ char *old_ptr = output->ptr;
394
+ str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
395
+ if (old_ptr != output->ptr) // may have moved
396
+ non_space += output->ptr - old_ptr;
397
+ }
398
+
399
+ if (*src == '"')
400
+ {
401
+ char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
402
+ str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
403
+ }
404
+ else if (*src == '&')
405
+ {
406
+ char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
407
+ str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
408
+ }
409
+ else if (*src == '<' || *src == '>')
410
+ rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
411
+ else if (*src == ' ' && src == start && trim)
412
+ start++; // we eat leading space
413
+ else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
414
+ {
415
+ *(output->ptr + output->len) = *src;
416
+ output->len++;
417
+ }
418
+ else // all others: must convert to entities
419
+ {
420
+ long width;
421
+ wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
422
+ src += width;
423
+ non_space = output->ptr + output->len;
424
+ continue;
425
+ }
426
+ if (*src != ' ')
427
+ non_space = output->ptr + output->len;
428
+ src++;
429
+ }
430
+
431
+ // trim trailing space if necessary
432
+ if (trim && output->ptr + output->len != non_space)
433
+ output->len -= (output->ptr + output->len) - non_space;
434
+ }
435
+
282
436
  // prepare hyperlink and append it to parser->output
283
437
  // if check_autolink is true, checks parser->autolink to decide whether to emit a real hyperlink
284
438
  // or merely the literal link target
@@ -286,13 +440,13 @@ void wiki_downcase_bang(char *ptr, long len)
286
440
  void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_target, str_t *link_text, VALUE link_class, VALUE link_rel, bool check_autolink)
287
441
  {
288
442
  if (check_autolink && !parser->autolink)
289
- str_append_str(parser->output, link_target);
443
+ wiki_append_sanitized_link_target(link_target, parser->output, true);
290
444
  else
291
445
  {
292
446
  str_append(parser->output, a_start, sizeof(a_start) - 1); // <a href="
293
447
  if (!NIL_P(link_prefix))
294
448
  str_append_string(parser->output, link_prefix);
295
- str_append_str(parser->output, link_target);
449
+ wiki_append_sanitized_link_target(link_target, parser->output, true);
296
450
 
297
451
  // special handling for mailto URIs
298
452
  const char *mailto = "mailto:";
@@ -315,7 +469,7 @@ void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_targ
315
469
  }
316
470
  str_append(parser->output, a_start_close, sizeof(a_start_close) - 1); // ">
317
471
  if (!link_text || link_text->len == 0) // re-use link_target
318
- str_append_str(parser->output, link_target);
472
+ wiki_append_sanitized_link_target(link_target, parser->output, true);
319
473
  else
320
474
  str_append_str(parser->output, link_text);
321
475
  str_append(parser->output, a_end, sizeof(a_end) - 1); // </a>
@@ -649,102 +803,6 @@ void wiki_pop_excess_elements(parser_t *parser)
649
803
  }
650
804
  }
651
805
 
652
- // Convert a single UTF-8 codepoint to UTF-32
653
- //
654
- // Expects an input buffer, src, containing a UTF-8 encoded character (which
655
- // may be multi-byte). The end of the input buffer, end, is also passed in to
656
- // allow the detection of invalidly truncated codepoints. The number of bytes
657
- // in the UTF-8 character (between 1 and 4) is returned by reference in
658
- // width_out.
659
- //
660
- // Raises a RangeError if the supplied character is invalid UTF-8.
661
- uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
662
- {
663
- uint32_t dest;
664
- if ((unsigned char)src[0] <= 0x7f)
665
- {
666
- // ASCII
667
- dest = src[0];
668
- *width_out = 1;
669
- }
670
- else if ((src[0] & 0xe0) == 0xc0)
671
- {
672
- // byte starts with 110..... : this should be a two-byte sequence
673
- if (src + 1 >= end)
674
- // no second byte
675
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
676
- else if (((unsigned char)src[0] == 0xc0) ||
677
- ((unsigned char)src[0] == 0xc1))
678
- // overlong encoding: lead byte of 110..... but code point <= 127
679
- rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
680
- else if ((src[1] & 0xc0) != 0x80 )
681
- // should have second byte starting with 10......
682
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
683
-
684
- dest =
685
- ((uint32_t)(src[0] & 0x1f)) << 6 |
686
- (src[1] & 0x3f);
687
- *width_out = 2;
688
- }
689
- else if ((src[0] & 0xf0) == 0xe0)
690
- {
691
- // byte starts with 1110.... : this should be a three-byte sequence
692
- if (src + 2 >= end)
693
- // missing second or third byte
694
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
695
- else if (((src[1] & 0xc0) != 0x80 ) ||
696
- ((src[2] & 0xc0) != 0x80 ))
697
- // should have second and third bytes starting with 10......
698
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
699
-
700
- dest =
701
- ((uint32_t)(src[0] & 0x0f)) << 12 |
702
- ((uint32_t)(src[1] & 0x3f)) << 6 |
703
- (src[2] & 0x3f);
704
- *width_out = 3;
705
- }
706
- else if ((src[0] & 0xf8) == 0xf0)
707
- {
708
- // bytes starts with 11110... : this should be a four-byte sequence
709
- if (src + 3 >= end)
710
- // missing second, third, or fourth byte
711
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
712
- else if ((unsigned char)src[0] >= 0xf5 &&
713
- (unsigned char)src[0] <= 0xf7)
714
- // disallowed by RFC 3629 (codepoints above 0x10ffff)
715
- rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
716
- else if (((src[1] & 0xc0) != 0x80 ) ||
717
- ((src[2] & 0xc0) != 0x80 ) ||
718
- ((src[3] & 0xc0) != 0x80 ))
719
- // should have second and third bytes starting with 10......
720
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
721
-
722
- dest =
723
- ((uint32_t)(src[0] & 0x07)) << 18 |
724
- ((uint32_t)(src[1] & 0x3f)) << 12 |
725
- ((uint32_t)(src[1] & 0x3f)) << 6 |
726
- (src[2] & 0x3f);
727
- *width_out = 4;
728
- }
729
- else
730
- rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
731
- return dest;
732
- }
733
-
734
- void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
735
- {
736
- char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
737
- char scratch = (character & 0xf000) >> 12;
738
- hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
739
- scratch = (character & 0x0f00) >> 8;
740
- hex_string[4] = (scratch <= 9 ? scratch + 48 : scratch + 87);
741
- scratch = (character & 0x00f0) >> 4;
742
- hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
743
- scratch = character & 0x000f;
744
- hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
745
- str_append(output, hex_string, sizeof(hex_string));
746
- }
747
-
748
806
  // trim parser->link_text in place
749
807
  void wiki_trim_link_text(parser_t *parser)
750
808
  {
@@ -772,72 +830,13 @@ void wiki_trim_link_text(parser_t *parser)
772
830
  }
773
831
  }
774
832
 
775
- // - non-printable (non-ASCII) characters converted to numeric entities
776
- // - QUOT and AMP characters converted to named entities
777
- // - if trim is true, leading and trailing whitespace trimmed
778
- // - if trim is false, there is no special treatment of spaces
779
- void wiki_append_sanitized_link_target(parser_t *parser, str_t *output, bool trim)
780
- {
781
- char *src = parser->link_target->ptr;
782
- char *start = src; // remember this so we can check if we're at the start
783
- char *non_space = output->ptr + output->len; // remember last non-space character output
784
- char *end = src + parser->link_target->len;
785
- while (src < end)
786
- {
787
- // need at most 8 bytes to display each input character (&#x0000;)
788
- if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
789
- {
790
- char *old_ptr = output->ptr;
791
- str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
792
- if (old_ptr != output->ptr) // may have moved
793
- non_space += output->ptr - old_ptr;
794
- }
795
-
796
- if (*src == '"')
797
- {
798
- char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
799
- str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
800
- }
801
- else if (*src == '&')
802
- {
803
- char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
804
- str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
805
- }
806
- else if (*src == '<' || *src == '>')
807
- rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
808
- else if (*src == ' ' && src == start && trim)
809
- start++; // we eat leading space
810
- else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
811
- {
812
- *(output->ptr + output->len) = *src;
813
- output->len++;
814
- }
815
- else // all others: must convert to entities
816
- {
817
- long width;
818
- wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
819
- src += width;
820
- non_space = output->ptr + output->len;
821
- continue;
822
- }
823
- if (*src != ' ')
824
- non_space = output->ptr + output->len;
825
- src++;
826
- }
827
-
828
- // trim trailing space if necessary
829
- if (trim && output->ptr + output->len != non_space)
830
- output->len -= (output->ptr + output->len) - non_space;
831
- }
832
-
833
833
  VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
834
834
  {
835
- parser_t parser;
836
- parser.link_target = str_new_from_string(string);
837
- GC_WRAP_STR(parser.link_target, link_target_gc);
835
+ str_t *link_target = str_new_from_string(string);
836
+ GC_WRAP_STR(link_target, link_target_gc);
838
837
  str_t *output = str_new();
839
838
  GC_WRAP_STR(output, output_gc);
840
- wiki_append_sanitized_link_target(&parser, output, true);
839
+ wiki_append_sanitized_link_target(link_target, output, true);
841
840
  return string_from_str(output);
842
841
  }
843
842
 
@@ -950,7 +949,7 @@ void wiki_rollback_failed_internal_link(parser_t *parser)
950
949
  str_append(parser->output, link_start, sizeof(link_start) - 1);
951
950
  if (parser->link_target->len > 0)
952
951
  {
953
- wiki_append_sanitized_link_target(parser, parser->output, false);
952
+ wiki_append_sanitized_link_target(parser->link_target, parser->output, false);
954
953
  if (scope_includes_separator)
955
954
  {
956
955
  str_append(parser->output, separator, sizeof(separator) - 1);
@@ -1009,6 +1008,7 @@ VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
1009
1008
  VALUE external_link_class = rb_str_new2("external");
1010
1009
  VALUE external_link_rel = Qnil;
1011
1010
  VALUE mailto_class = rb_str_new2("mailto");
1011
+ VALUE link_proc = Qnil;
1012
1012
  VALUE internal_link_prefix = rb_str_new2("/wiki/");
1013
1013
  VALUE img_prefix = rb_str_new2("/images/");
1014
1014
  VALUE output_style = ID2SYM(rb_intern("html"));
@@ -1026,6 +1026,7 @@ VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
1026
1026
  external_link_class = OVERRIDE_IF_SET(external_link_class);
1027
1027
  external_link_rel = OVERRIDE_IF_SET(external_link_rel);
1028
1028
  mailto_class = OVERRIDE_IF_SET(mailto_class);
1029
+ link_proc = OVERRIDE_IF_SET(link_proc);
1029
1030
  internal_link_prefix = OVERRIDE_IF_SET(internal_link_prefix);
1030
1031
  img_prefix = OVERRIDE_IF_SET(img_prefix);
1031
1032
  output_style = OVERRIDE_IF_SET(output_style);
@@ -1040,6 +1041,7 @@ VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
1040
1041
  rb_iv_set(self, "@external_link_class", external_link_class);
1041
1042
  rb_iv_set(self, "@external_link_rel", external_link_rel);
1042
1043
  rb_iv_set(self, "@mailto_class", mailto_class);
1044
+ rb_iv_set(self, "@link_proc", link_proc);
1043
1045
  rb_iv_set(self, "@internal_link_prefix", internal_link_prefix);
1044
1046
  rb_iv_set(self, "@img_prefix", img_prefix);
1045
1047
  rb_iv_set(self, "@output_style", output_style);
@@ -1084,6 +1086,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1084
1086
  link_class = NIL_P(link_class) ? Qnil : StringValue(link_class);
1085
1087
  VALUE link_rel = rb_iv_get(self, "@external_link_rel");
1086
1088
  link_rel = NIL_P(link_rel) ? Qnil : StringValue(link_rel);
1089
+ VALUE link_proc = rb_iv_get(self, "@link_proc");
1087
1090
  VALUE mailto_class = rb_iv_get(self, "@mailto_class");
1088
1091
  mailto_class = NIL_P(mailto_class) ? Qnil : StringValue(mailto_class);
1089
1092
  VALUE prefix = rb_iv_get(self, "@internal_link_prefix");
@@ -1092,7 +1095,6 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1092
1095
  // process options hash
1093
1096
  int base_indent = 0;
1094
1097
  int base_heading_level = NUM2INT(rb_iv_get(self, "@base_heading_level"));
1095
- VALUE link_proc = Qnil;
1096
1098
  if (!NIL_P(options) && TYPE(options) == T_HASH)
1097
1099
  {
1098
1100
  // :indent => 0 (or more)
@@ -1979,9 +1981,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1979
1981
 
1980
1982
  case URI:
1981
1983
  if (IN(NO_WIKI_START))
1984
+ {
1982
1985
  // user can temporarily suppress autolinking by using <nowiki></nowiki>
1983
1986
  // note that unlike MediaWiki, we do allow autolinking inside PRE blocks
1984
- str_append(parser->output, token->start, TOKEN_LEN(token));
1987
+ token_str->ptr = token->start;
1988
+ token_str->len = TOKEN_LEN(token);
1989
+ wiki_append_sanitized_link_target(token_str, parser->output, false);
1990
+ }
1985
1991
  else if (IN(LINK_START))
1986
1992
  {
1987
1993
  // if the URI were allowed it would have been handled already in LINK_START
@@ -2018,7 +2024,11 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2018
2024
  }
2019
2025
  }
2020
2026
  else
2021
- str_append(parser->link_text, token->start, TOKEN_LEN(token));
2027
+ {
2028
+ token_str->ptr = token->start;
2029
+ token_str->len = TOKEN_LEN(token);
2030
+ wiki_append_sanitized_link_target(token_str, parser->link_text, false);
2031
+ }
2022
2032
  }
2023
2033
  else
2024
2034
  {
@@ -2213,7 +2223,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2213
2223
  {
2214
2224
  // use link target as link text
2215
2225
  str_clear(parser->link_text);
2216
- wiki_append_sanitized_link_target(parser, parser->link_text, true);
2226
+ wiki_append_sanitized_link_target(parser->link_target, parser->link_text, true);
2217
2227
  }
2218
2228
  else
2219
2229
  wiki_trim_link_text(parser);
data/ext/wikitext.c CHANGED
@@ -1,4 +1,4 @@
1
- // Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
1
+ // Copyright 2008-2013 Wincent Colaiuta. All rights reserved.
2
2
  //
3
3
  // Redistribution and use in source and binary forms, with or without
4
4
  // modification, are permitted provided that the following conditions are met:
@@ -80,6 +80,7 @@ void Init_wikitext()
80
80
  rb_define_attr(cWikitextParser, "external_link_class", Qtrue, Qtrue);
81
81
  rb_define_attr(cWikitextParser, "external_link_rel", Qtrue, Qtrue);
82
82
  rb_define_attr(cWikitextParser, "mailto_class", Qtrue, Qtrue);
83
+ rb_define_attr(cWikitextParser, "link_proc", Qtrue, Qtrue);
83
84
  rb_define_attr(cWikitextParser, "autolink", Qtrue, Qtrue);
84
85
  rb_define_attr(cWikitextParser, "space_to_underscore", Qtrue, Qtrue);
85
86
  rb_define_attr(cWikitextParser, "minimum_fulltext_token_length", Qtrue, Qtrue);
@@ -1,4 +1,4 @@
1
- # Copyright 2007-2012 Wincent Colaiuta. All rights reserved.
1
+ # Copyright 2007-2013 Wincent Colaiuta. All rights reserved.
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions are met:
@@ -22,5 +22,5 @@
22
22
  # POSSIBILITY OF SUCH DAMAGE.
23
23
 
24
24
  module Wikitext
25
- VERSION = '3.0.1'
25
+ VERSION = '3.1'
26
26
  end # module Wikitext
@@ -1,4 +1,4 @@
1
- # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
1
+ # Copyright 2007-2012 Wincent Colaiuta. All rights reserved.
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions are met:
@@ -58,7 +58,12 @@ describe Wikitext::Parser, 'autolinking' do
58
58
  @parser.parse(uri).should == %Q{<p><a href="svn://example.com/" class="external">svn://example.com/</a></p>\n}
59
59
  end
60
60
 
61
- it 'should apple the external_link_class CSS class if set' do
61
+ it 'converts ampersands into entities' do
62
+ expected = %{<p><a href="http://google.com/?q=1&amp;lang=en" class="external">http://google.com/?q=1&amp;lang=en</a></p>\n}
63
+ @parser.parse('http://google.com/?q=1&lang=en').should == expected
64
+ end
65
+
66
+ it 'should apply the external_link_class CSS class if set' do
62
67
  uri = 'http://example.com/'
63
68
  @parser.external_link_class = 'bar'
64
69
  @parser.parse(uri).should == %Q{<p><a href="http://example.com/" class="bar">http://example.com/</a></p>\n}
@@ -75,7 +75,16 @@ describe Wikitext::Parser, 'external links' do
75
75
  # more general case of bug reported here: https://wincent.com/issues/1955
76
76
  expected = %{<p><a href="http://google.com/?q=user@example.com" class="external">Google for user@example.com</a></p>\n}
77
77
  @parser.parse('[http://google.com/?q=user@example.com Google for user@example.com]').should == expected
78
+ end
79
+
80
+ it 'formats ampersands in link targets using entities' do
81
+ expected =%{<p><a href="http://google.com/?q=1&amp;lang=en" class="external">Google</a></p>\n}
82
+ @parser.parse('[http://google.com/?q=1&lang=en Google]').should == expected
83
+ end
78
84
 
85
+ it 'formats ampersands in URIs in link text' do
86
+ expected =%{<p><a href="http://google.com/?q=1&amp;lang=en" class="external">http://google.com/?q=1&amp;lang=en</a></p>\n}
87
+ @parser.parse('[http://google.com/?q=1&lang=en http://google.com/?q=1&lang=en]').should == expected
79
88
  end
80
89
 
81
90
  it 'should format absolute path links' do
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
- # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Copyright 2007-2013 Wincent Colaiuta. All rights reserved.
3
3
  #
4
4
  # Redistribution and use in source and binary forms, with or without
5
5
  # modification, are permitted provided that the following conditions are met:
@@ -113,26 +113,41 @@ describe Wikitext::Parser, 'internal links (space to underscore off)' do
113
113
  end
114
114
 
115
115
  describe '"red link" support' do
116
- it 'should accept a Proc object via the optional "link_proc" parameter' do
116
+ it 'accepts a Proc object via the optional "link_proc" parameter' do
117
117
  @parser.parse('foo', :link_proc => Proc.new { }).should == %Q{<p>foo</p>\n}
118
118
  end
119
119
 
120
- it 'should accept a lambda via the optional "link_proc" parameter' do
120
+ it 'accepts a lambda via the optional "link_proc" parameter' do
121
121
  @parser.parse('foo', :link_proc => lambda { }).should == %Q{<p>foo</p>\n}
122
122
  end
123
123
 
124
- it 'should apply custom link CSS when supplied (Proc object version)' do
124
+ it 'applies custom link CSS when supplied (Proc object version)' do
125
125
  link_proc = Proc.new { |target| target == 'bar' ? 'redlink' : nil }
126
126
  expected = %Q{<p><a href="/wiki/foo">foo</a> <a href="/wiki/bar" class="redlink">bar</a></p>\n}
127
127
  @parser.parse('[[foo]] [[bar]]', :link_proc => link_proc).should == expected
128
128
  end
129
129
 
130
- it 'should apply custom link CSS when supplied (lambda version)' do
130
+ it 'applies custom link CSS when supplied (lambda version)' do
131
131
  link_proc = lambda { |target| target == 'bar' ? 'redlink' : nil }
132
132
  expected = %Q{<p><a href="/wiki/foo">foo</a> <a href="/wiki/bar" class="redlink">bar</a></p>\n}
133
133
  @parser.parse('[[foo]] [[bar]]', :link_proc => link_proc).should == expected
134
134
  end
135
135
 
136
+ it 'uses a lamba passed in when the Parser is initialized' do
137
+ link_proc = lambda { |target| target == 'bar' ? 'redlink' : nil }
138
+ parser = Wikitext::Parser.new :link_proc => link_proc
139
+ expected = %Q{<p><a href="/wiki/foo">foo</a> <a href="/wiki/bar" class="redlink">bar</a></p>\n}
140
+ parser.parse('[[foo]] [[bar]]').should == expected
141
+ end
142
+
143
+ it 'uses a lamba set as an attribute on the Parser' do
144
+ link_proc = lambda { |target| target == 'bar' ? 'redlink' : nil }
145
+ parser = Wikitext::Parser.new
146
+ parser.link_proc = link_proc
147
+ expected = %Q{<p><a href="/wiki/foo">foo</a> <a href="/wiki/bar" class="redlink">bar</a></p>\n}
148
+ parser.parse('[[foo]] [[bar]]').should == expected
149
+ end
150
+
136
151
  it 'should apply no custom link CSS when supplied nil (Proc object version)' do
137
152
  expected = %Q{<p><a href="/wiki/foo">foo</a></p>\n}
138
153
  @parser.parse('[[foo]]', :link_proc => Proc.new { |target| nil }).should == expected
@@ -0,0 +1,239 @@
1
+ # Copyright 2009-2011 Wincent Colaiuta. All rights reserved.
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions are met:
5
+ #
6
+ # 1. Redistributions of source code must retain the above copyright notice,
7
+ # this list of conditions and the following disclaimer.
8
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
9
+ # this list of conditions and the following disclaimer in the documentation
10
+ # and/or other materials provided with the distribution.
11
+
12
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
13
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
16
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
22
+ # POSSIBILITY OF SUCH DAMAGE.
23
+
24
+ require 'spec_helper'
25
+ require 'wikitext/version'
26
+ require 'fileutils'
27
+ require 'pathname'
28
+ require 'wopen3'
29
+
30
+ module RailsSpecs
31
+ TRASH_PATH = Pathname.new(__FILE__).dirname + '.trash'
32
+ AREL_CLONE_PATH = TRASH_PATH + 'arel.git'
33
+ AREL_REPO = 'git://github.com/rails/arel.git'
34
+ RAILS_CLONE_PATH = TRASH_PATH + 'rails.git'
35
+ RAILS_REPO = 'git://github.com/rails/rails.git'
36
+ WIKITEXT_GEM_PATH = TRASH_PATH + '..' + '..'
37
+ SUCCESSFUL_TEST_RESULT = /1 tests, 3 assertions, 0 failures, 0 errors/
38
+
39
+ def run cmd, *args
40
+ result = Wopen3.system(*([cmd] + args))
41
+ if result.status != 0
42
+ command_string = ([cmd] + args).join(' ')
43
+ puts "\n*** COMMAND #{command_string} EXITED WITH NON-ZERO EXIT STATUS (#{result.status})"
44
+ puts "*** STDOUT FOR COMMAND #{command_string}:", result.stdout
45
+ puts "*** STDERR FOR COMMAND #{command_string}:", result.stderr
46
+ raise "non-zero exit status (#{result.status}) for '#{command_string}'"
47
+ end
48
+ result
49
+ end
50
+
51
+ def clone repo, path
52
+ if File.exist? path
53
+ FileUtils.cd path do
54
+ run 'git', 'fetch'
55
+ end
56
+ else
57
+ run 'git', 'clone', repo, path
58
+ end
59
+ end
60
+
61
+ def app_path version
62
+ version = 'edge' if version.nil?
63
+ version = "v#{version}" if version =~ /\A\d\./
64
+ TRASH_PATH + "#{version}-app"
65
+ end
66
+
67
+ # if version is nil will create an "Edge" app
68
+ def create_rails3_app rails_version, arel_version = nil
69
+ app = app_path rails_version
70
+ clone AREL_REPO, AREL_CLONE_PATH
71
+ FileUtils.cd AREL_CLONE_PATH do
72
+ if arel_version
73
+ run 'git', 'reset', '--hard', "v#{arel_version}"
74
+ else # "Edge"
75
+ run 'git', 'reset', '--hard', 'origin/master'
76
+ end
77
+ run 'git', 'clean', '-f'
78
+ end
79
+
80
+ clone RAILS_REPO, RAILS_CLONE_PATH
81
+ FileUtils.cd RAILS_CLONE_PATH do
82
+ if rails_version
83
+ run 'git', 'reset', '--hard', "v#{rails_version}"
84
+ else # "Edge"
85
+ run 'git', 'reset', '--hard', 'origin/master'
86
+ end
87
+ run 'git', 'clean', '-f'
88
+
89
+ begin
90
+ clean_bundler_environment
91
+ run 'env', "AREL=#{AREL_CLONE_PATH}",
92
+ 'bundle', 'install', '--path', '../bundle', '--without', 'db'
93
+ FileUtils.rm_r(app) if File.exist?(app)
94
+ run 'env', "AREL=#{AREL_CLONE_PATH}",
95
+ 'bundle', 'exec', 'bin/rails', 'new', app, '--skip-activerecord', '--dev'
96
+ ensure
97
+ restore_bundler_environment
98
+ end
99
+ end
100
+
101
+ create_gemfile app
102
+ bundlerize app
103
+ end
104
+
105
+ def insert text, after, infile
106
+ output = []
107
+ found = false
108
+ File.read(infile).split("\n").each do |line|
109
+ output << line
110
+ if found == false && line =~ /#{Regexp.escape(after)}/
111
+ found = true
112
+ output << text
113
+ end
114
+ end
115
+ File.open(infile, 'wb') { |f| f.write(output.join("\n")) }
116
+ raise "text '#{after}' not found" unless found
117
+ end
118
+
119
+ def add_text_to_routes text, infile
120
+ insert text, 'Application.routes.draw', infile
121
+ end
122
+
123
+ def create_gemfile app
124
+ File.open(app + 'Gemfile', 'w') do |f|
125
+ f.write <<-GEMFILE
126
+ source :rubygems
127
+ gem 'arel', :path => "#{AREL_CLONE_PATH.realpath}"
128
+ gem 'rake'
129
+ gem 'rails', :path => "#{RAILS_CLONE_PATH.realpath}"
130
+ gem 'sqlite3'
131
+ gem 'wikitext', :path => "#{WIKITEXT_GEM_PATH.realpath}"
132
+ GEMFILE
133
+ end
134
+ end
135
+
136
+ def bundlerize app
137
+ clean_bundler_environment
138
+ Dir.chdir app do
139
+ run 'bundle', 'install', '--path', '../bundle', '--binstubs'
140
+ end
141
+ ensure
142
+ restore_bundler_environment
143
+ end
144
+
145
+ def create_controller app
146
+ File.open(app + 'app' + 'controllers' + 'wiki_controller.rb', 'w') do |f|
147
+ f.write 'class WikiController < ApplicationController; end'
148
+ end
149
+ end
150
+
151
+ def create_template app
152
+ template_dir = app + 'app' + 'views' + 'wiki'
153
+ FileUtils.mkdir template_dir
154
+ File.open(template_dir + 'index.html.wikitext', 'w') do |f|
155
+ f.write '* hello, world!'
156
+ end
157
+ end
158
+
159
+ def create_test app
160
+ # integration tests won't run without a schema.rb
161
+ FileUtils.touch app + 'db' + 'schema.rb'
162
+
163
+ File.open(app + 'test' + 'integration' + 'wiki_test.rb', 'w') do |f|
164
+ f.write <<'TEST'
165
+ require File.join(File.dirname(__FILE__), '..', 'test_helper')
166
+
167
+ class WikiTest < ActionController::IntegrationTest
168
+ def test_wiki_index
169
+ get "/wiki"
170
+ assert_response :success
171
+ assert_template "wiki/index"
172
+ assert_select 'ul>li', 'hello, world!'
173
+ end
174
+ end
175
+ TEST
176
+ end
177
+ end
178
+
179
+ def update_routes app
180
+ routes = app + 'config' + 'routes.rb'
181
+ add_text_to_routes 'match "/wiki" => "wiki#index"', routes
182
+ end
183
+
184
+ def setup_rails_app rails_version = nil, arel_version = nil
185
+ create_rails3_app rails_version, arel_version
186
+ path = app_path rails_version
187
+ update_routes path
188
+ create_controller path
189
+ create_template path
190
+ create_test path
191
+ end
192
+
193
+ def clean_bundler_environment
194
+ @bundler_env = ENV.select { |key, value| key =~ /\A(BUNDLE|GEM)_/ }
195
+ @bundler_env.each { |pair| ENV.delete(pair.first) }
196
+ end
197
+
198
+ def restore_bundler_environment
199
+ @bundler_env.each { |pair| ENV[pair[0]] = pair[1] }
200
+ end
201
+
202
+ def run_integration_test app
203
+ clean_bundler_environment
204
+ FileUtils.cd app do
205
+ return run('bin/rake', 'test:integration').stdout
206
+ end
207
+ ensure
208
+ restore_bundler_environment
209
+ end
210
+ end # module RailsSpecs
211
+
212
+ # different versions of Rails require different versions of Arel
213
+ { '3.1.0' => '2.1.1' }.each do |rails_version, arel_version|
214
+ describe "Template handler in Rails #{rails_version}" do
215
+ include RailsSpecs
216
+
217
+ before :all do
218
+ setup_rails_app rails_version, arel_version
219
+ @path = app_path rails_version
220
+ end
221
+
222
+ it 'should process the template using the wikitext module' do
223
+ run_integration_test(@path).should =~ RailsSpecs::SUCCESSFUL_TEST_RESULT
224
+ end
225
+ end
226
+ end
227
+
228
+ describe 'Template handler in Edge Rails' do
229
+ include RailsSpecs
230
+
231
+ before :all do
232
+ setup_rails_app
233
+ @path = app_path nil
234
+ end
235
+
236
+ it 'should process the template using the wikitext module' do
237
+ run_integration_test(@path).should =~ RailsSpecs::SUCCESSFUL_TEST_RESULT
238
+ end
239
+ end
metadata CHANGED
@@ -4,9 +4,8 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 3
7
- - 0
8
7
  - 1
9
- version: 3.0.1
8
+ version: "3.1"
10
9
  platform: ruby
11
10
  authors:
12
11
  - Wincent Colaiuta
@@ -14,10 +13,11 @@ autorequire:
14
13
  bindir: bin
15
14
  cert_chain: []
16
15
 
17
- date: 2012-03-03 00:00:00 -08:00
16
+ date: 2013-02-16 00:00:00 -08:00
18
17
  default_executable:
19
18
  dependencies:
20
19
  - !ruby/object:Gem::Dependency
20
+ type: :development
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
@@ -25,24 +25,24 @@ dependencies:
25
25
  segments:
26
26
  - 0
27
27
  version: "0"
28
- requirement: *id001
29
28
  name: rake
29
+ requirement: *id001
30
30
  prerelease: false
31
- type: :development
32
31
  - !ruby/object:Gem::Dependency
32
+ type: :development
33
33
  version_requirements: &id002 !ruby/object:Gem::Requirement
34
34
  requirements:
35
- - - ~>
35
+ - - ">="
36
36
  - !ruby/object:Gem::Version
37
37
  segments:
38
38
  - 2
39
39
  - 0
40
40
  version: "2.0"
41
- requirement: *id002
42
41
  name: rspec
42
+ requirement: *id002
43
43
  prerelease: false
44
- type: :development
45
44
  - !ruby/object:Gem::Dependency
45
+ type: :development
46
46
  version_requirements: &id003 !ruby/object:Gem::Requirement
47
47
  requirements:
48
48
  - - ">="
@@ -50,11 +50,11 @@ dependencies:
50
50
  segments:
51
51
  - 0
52
52
  version: "0"
53
- requirement: *id003
54
53
  name: thor
54
+ requirement: *id003
55
55
  prerelease: false
56
- type: :development
57
56
  - !ruby/object:Gem::Dependency
57
+ type: :development
58
58
  version_requirements: &id004 !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - ">="
@@ -64,11 +64,11 @@ dependencies:
64
64
  - 5
65
65
  - 8
66
66
  version: 0.5.8
67
- requirement: *id004
68
67
  name: yard
68
+ requirement: *id004
69
69
  prerelease: false
70
- type: :development
71
70
  - !ruby/object:Gem::Dependency
71
+ type: :development
72
72
  version_requirements: &id005 !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - ">="
@@ -76,11 +76,11 @@ dependencies:
76
76
  segments:
77
77
  - 0
78
78
  version: "0"
79
- requirement: *id005
80
79
  name: wopen3
80
+ requirement: *id005
81
81
  prerelease: false
82
- type: :development
83
82
  - !ruby/object:Gem::Dependency
83
+ type: :development
84
84
  version_requirements: &id006 !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - ">="
@@ -88,10 +88,9 @@ dependencies:
88
88
  segments:
89
89
  - 0
90
90
  version: "0"
91
- requirement: *id006
92
91
  name: ZenTest
92
+ requirement: *id006
93
93
  prerelease: false
94
- type: :development
95
94
  description: " Wikitext is a fast wikitext-to-HTML translator written in C.\n"
96
95
  email: win@wincent.com
97
96
  executables:
@@ -149,6 +148,7 @@ files:
149
148
  - spec/p_spec.rb
150
149
  - spec/parser_spec.rb
151
150
  - spec/pre_spec.rb
151
+ - spec/rails_spec.rb
152
152
  - spec/regressions_spec.rb
153
153
  - spec/spec_helper.rb
154
154
  - spec/string_spec.rb