RubyGems - wikitext - Versions diffs - 0.1 → 0.2 - Mend

wikitext 0.1 → 0.2

Files changed (5) hide show

data/ext/parser.c CHANGED

@@ -36,6 +36,7 @@ typedef struct
     VALUE   pending_crlf;           // boolean (Qtrue or Qfalse)
     VALUE   autolink;               // boolean (Qtrue or Qfalse)
     VALUE   treat_slash_as_special; // boolean (Qtrue or Qfalse)
+    VALUE   space_to_underscore;    // boolean (Qtrue or Qfalse)
     VALUE   special_link;           // boolean (Qtrue or Qfalse): is the current link_target a "special" link?
     str_t   *line_ending;
     int     base_indent;            // controlled by the :indent option to Wikitext::Parser#parse
@@ -548,10 +549,11 @@ inline VALUE _Wikitext_parser_trim_link_target(VALUE string)
 // - non-printable (non-ASCII) characters converted to numeric entities
 // - QUOT and AMP characters converted to named entities
-// - leading and trailing whitespace trimmed if trim is Qtrue
-inline VALUE _Wikitext_parser_sanitize_link_target(VALUE string, VALUE trim)
+// - if rollback is Qtrue, there is no special treatment of spaces
+// - if rollback is Qfalse, leading and trailing whitespace trimmed if trimmed
+inline VALUE _Wikitext_parser_sanitize_link_target(parser_t *parser, VALUE rollback)
 {
-    string              = StringValue(string);  // raises if string is nil or doesn't quack like a string
+    VALUE string        = StringValue(parser->link_target); // raises if string is nil or doesn't quack like a string
     char    *src        = RSTRING_PTR(string);
     char    *start      = src;                  // remember this so we can check if we're at the start
     long    len         = RSTRING_LEN(string);
@@ -605,7 +607,7 @@ inline VALUE _Wikitext_parser_sanitize_link_target(VALUE string, VALUE trim)
             free(dest_ptr);
             rb_raise(rb_eRangeError, "invalid link text (\">\" may not appear in link text)");
         }
-        else if (*src == ' ' && src == start && trim == Qtrue)
+        else if (*src == ' ' && src == start && rollback == Qfalse)
             start++;                // we eat leading space
         else if (*src >= 0x20 && *src <= 0x7e)    // printable ASCII
         {
@@ -630,7 +632,7 @@ inline VALUE _Wikitext_parser_sanitize_link_target(VALUE string, VALUE trim)
     }
     // trim trailing space if necessary
-    if (trim == Qtrue && non_space > dest_ptr && dest != non_space)
+    if (rollback == Qfalse && non_space > dest_ptr && dest != non_space)
         len = non_space - dest_ptr;
     else
         len = dest - dest_ptr;
@@ -641,7 +643,9 @@ inline VALUE _Wikitext_parser_sanitize_link_target(VALUE string, VALUE trim)
 VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
 {
-    return (_Wikitext_parser_sanitize_link_target(string, Qtrue));
+    parser_t parser;
+    parser.link_target          = string;
+    return _Wikitext_parser_sanitize_link_target(&parser, Qfalse);
 }
 // encodes the input string according to RFCs 2396 and 2718
@@ -729,6 +733,8 @@ inline static void _Wikitext_parser_encode_link_target(parser_t *parser)
         }
         else if (*input == ' ' && input == start)
             start++;                    // we eat leading space
+        else if (*input == ' ' && parser->space_to_underscore == Qtrue)
+            *dest++     = '_';
         else    // everything else gets URL-encoded
         {
             *dest++     = '%';
@@ -740,7 +746,7 @@ inline static void _Wikitext_parser_encode_link_target(parser_t *parser)
     }
     // trim trailing space if necessary
-    if (non_space > dest_ptr && dest - 1 != non_space)
+    if (non_space > dest_ptr && dest != non_space)
         dest_len = non_space - dest_ptr;
     else
         dest_len = dest - dest_ptr;
@@ -753,6 +759,7 @@ VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in)
     parser_t parser;
     parser.link_target              = in;
     parser.treat_slash_as_special   = Qfalse;
+    parser.space_to_underscore      = Qfalse;
     _Wikitext_parser_encode_link_target(&parser);
     return parser.link_target;
 }
@@ -763,6 +770,7 @@ VALUE Wikitext_parser_encode_special_link_target(VALUE self, VALUE in)
     parser_t parser;
     parser.link_target              = in;
     parser.treat_slash_as_special   = Qtrue;
+    parser.space_to_underscore      = Qfalse;
     _Wikitext_parser_encode_link_target(&parser);
     return parser.link_target;
 }
@@ -777,7 +785,7 @@ inline void _Wikitext_rollback_failed_link(parser_t *parser)
     rb_str_cat(parser->output, link_start, sizeof(link_start) - 1);
     if (!NIL_P(parser->link_target))
     {
-        VALUE sanitized = _Wikitext_parser_sanitize_link_target(parser->link_target, Qfalse);
+        VALUE sanitized = _Wikitext_parser_sanitize_link_target(parser, Qtrue);
         rb_str_append(parser->output, sanitized);
         if (scope_includes_separator)
         {
@@ -823,6 +831,7 @@ VALUE Wikitext_parser_initialize(VALUE self)
     rb_iv_set(self, "@external_link_class",     rb_str_new2("external"));
     rb_iv_set(self, "@mailto_class",            rb_str_new2("mailto"));
     rb_iv_set(self, "@internal_link_prefix",    rb_str_new2("/wiki/"));
+    rb_iv_set(self, "@space_to_underscore",     Qfalse);
     rb_iv_set(self, "@treat_slash_as_special",  Qtrue);
     return self;
 }
@@ -883,6 +892,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
     parser->pending_crlf            = Qfalse;
     parser->autolink                = rb_iv_get(self, "@autolink");
     parser->treat_slash_as_special  = rb_iv_get(self, "@treat_slash_as_special");
+    parser->space_to_underscore     = rb_iv_get(self, "@space_to_underscore");
     parser->special_link            = Qfalse;
     parser->line_ending             = str_new_from_string(line_ending);
     parser->base_indent             = base_indent;
@@ -1872,7 +1882,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
                     // in internal link scope!
                     if (NIL_P(parser->link_text) || RSTRING_LEN(parser->link_text) == 0)
                         // use link target as link text
-                        parser->link_text = _Wikitext_parser_sanitize_link_target(parser->link_target, Qtrue);
+                        parser->link_text = _Wikitext_parser_sanitize_link_target(parser, Qfalse);
                     else
                         parser->link_text = _Wikitext_parser_trim_link_target(parser->link_text);
                     _Wikitext_parser_encode_link_target(parser);

data/ext/wikitext.c CHANGED

@@ -41,6 +41,7 @@ void Init_wikitext()
     rb_define_attr(cWikitextParser, "mailto_class", Qtrue, Qtrue);
     rb_define_attr(cWikitextParser, "autolink", Qtrue, Qtrue);
     rb_define_attr(cWikitextParser, "treat_slash_as_special", Qtrue, Qtrue);
+    rb_define_attr(cWikitextParser, "space_to_underscore", Qtrue, Qtrue);
     // Wikitext::Parser::Error
     eWikitextParserError = rb_define_class_under(cWikitextParser, "Error", rb_eException);

data/spec/internal_link_spec.rb CHANGED

@@ -16,7 +16,7 @@
 require File.join(File.dirname(__FILE__), 'spec_helper.rb')
 require 'wikitext'
-describe Wikitext::Parser, 'internal links' do
+describe Wikitext::Parser, 'internal links (space to underscore off)' do
   before do
     @parser = Wikitext::Parser.new
   end
@@ -443,3 +443,432 @@ describe Wikitext::Parser, 'internal links' do
     end
   end
 end
+describe Wikitext::Parser, 'internal links (space to underscore on)' do
+  before do
+    @parser = Wikitext::Parser.new
+    @parser.space_to_underscore = true
+  end
+  it 'should pass through unexpected link end tokens literally' do
+    @parser.parse('foo ]] bar').should == "<p>foo ]] bar</p>\n"                                     # in plain scope
+    @parser.parse("foo '']]'' bar").should == "<p>foo <em>]]</em> bar</p>\n"                        # in EM scope
+    @parser.parse("foo ''']]''' bar").should == "<p>foo <strong>]]</strong> bar</p>\n"              # in STRONG scope
+    @parser.parse("foo ''''']]''''' bar").should == "<p>foo <strong><em>]]</em></strong> bar</p>\n" # in STRONG_EM scope
+    @parser.parse('foo <tt>]]</tt> bar').should == "<p>foo <tt>]]</tt> bar</p>\n"                   # in TT scope
+    @parser.parse('= foo ]] bar =').should == "<h1>foo ]] bar</h1>\n"                               # in H1 scope
+    @parser.parse('== foo ]] bar ==').should == "<h2>foo ]] bar</h2>\n"                             # in H2 scope
+    @parser.parse('=== foo ]] bar ===').should == "<h3>foo ]] bar</h3>\n"                           # in H3 scope
+    @parser.parse('==== foo ]] bar ====').should == "<h4>foo ]] bar</h4>\n"                         # in H4 scope
+    @parser.parse('===== foo ]] bar =====').should == "<h5>foo ]] bar</h5>\n"                       # in H5 scope
+    @parser.parse('====== foo ]] bar ======').should == "<h6>foo ]] bar</h6>\n"                     # in H6 scope
+    @parser.parse('> ]]').should == "<blockquote>\n  <p>]]</p>\n</blockquote>\n"                    # in BLOCKQUOTE scope
+  end
+  it 'should turn single words into links' do
+    @parser.parse('[[foo]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+  end
+  it 'should turn multiple words into links, converting spaces into underscores' do
+    @parser.parse('[[foo bar]]').should == %Q{<p><a href="/wiki/foo_bar">foo bar</a></p>\n}
+  end
+  it 'should trim leading whitespace' do
+    @parser.parse('[[ foo]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[  foo]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[   foo]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[    foo]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+  end
+  it 'should trim trailing whitespace' do
+    @parser.parse('[[foo ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[foo  ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[foo   ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[foo    ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}   # was a bug (exception)
+    @parser.parse('[[foo     ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}  # was a bug (crash)
+  end
+  it 'should trim leading and trailing whitespace (combined)' do
+    @parser.parse('[[ foo    ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[  foo   ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[   foo  ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+    @parser.parse('[[    foo ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+  end
+  it 'should convert embedded whitespace into underscores' do
+    @parser.parse('[[ foo bar ]]').should == %Q{<p><a href="/wiki/foo_bar">foo bar</a></p>\n}
+    @parser.parse('[[foo bar ]]').should == %Q{<p><a href="/wiki/foo_bar">foo bar</a></p>\n}
+    @parser.parse('[[ foo bar ]]').should == %Q{<p><a href="/wiki/foo_bar">foo bar</a></p>\n}
+  end
+  it 'should encode and sanitize quotes' do
+    # note how percent encoding is used in the href, and named entities in the link text
+    @parser.parse('[[hello "world"]]').should == %Q{<p><a href="/wiki/hello_%22world%22">hello &quot;world&quot;</a></p>\n}
+  end
+  it 'should encode and sanitize ampersands' do
+    @parser.parse('[[a & b]]').should == %Q{<p><a href="/wiki/a_%26_b">a &amp; b</a></p>\n}
+  end
+  it 'should allow ampersand entities (special exception)' do
+    @parser.parse('[[a &amp; b]]').should == %Q{<p><a href="/wiki/a_%26_b">a &amp; b</a></p>\n}
+  end
+  it 'should allow quote entities (special exception)' do
+    @parser.parse('[[a &quot; b]]').should == %Q{<p><a href="/wiki/a_%22_b">a &quot; b</a></p>\n}
+  end
+  it 'should handle mixed scenarios (quotes, ampersands, non-ASCII characers)' do
+    expected = %Q{<p><a href="/wiki/foo%2c_%22bar%22_%26_baz_%e2%82%ac">foo, &quot;bar&quot; &amp; baz &#x20ac;</a></p>\n}
+    @parser.parse('[[foo, "bar" & baz €]]').should == expected
+  end
+  it 'should handle links in paragraph flows' do
+    expected = %Q{<p>foo <a href="/wiki/bar">bar</a> baz</p>\n}
+    @parser.parse('foo [[bar]] baz').should == expected # was a bug
+  end
+  describe 'custom link text' do
+    it 'should recognize link text placed after the separator' do
+      @parser.parse('[[foo|bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+    end
+    it 'should trim whitespace to the left of the separator' do
+      @parser.parse('[[foo |bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo  |bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo   |bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo    |bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo     |bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo      |bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+    end
+    it 'should trim whitespace to the right of the separator' do
+      @parser.parse('[[foo| bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|  bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|   bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|    bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|     bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|      bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+    end
+    it 'should trim whitespace on both sides of the separator (at the same time)' do
+      @parser.parse('[[foo      | bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo     |  bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo    |   bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo   |    bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo  |     bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo |      bar]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+    end
+    it 'should trim trailing whitespace from the link text' do
+      @parser.parse('[[foo|bar ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|bar  ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|bar   ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|bar    ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|bar     ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|bar      ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+    end
+    it 'should trim leading and trailing whitespace from the link text' do
+      @parser.parse('[[foo|      bar ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|     bar  ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|    bar   ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|   bar    ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo|  bar     ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+      @parser.parse('[[foo| bar      ]]').should == %Q{<p><a href="/wiki/foo">bar</a></p>\n}
+    end
+    it 'should treat a separator inside the link text as part of the link text' do
+      @parser.parse('[[foo|bar|baz]]').should == %Q{<p><a href="/wiki/foo">bar|baz</a></p>\n}
+    end
+    it 'should treat separators outside of links as normal text' do
+      @parser.parse('foo|bar').should == %Q{<p>foo|bar</p>\n}
+    end
+    it 'should allow em markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar <em>baz</em></a></p>\n}
+      @parser.parse("[[foo|bar ''baz'']]").should == expected
+    end
+    it 'should automatically close unclosed em markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar <em>baz</em></a></p>\n}
+      @parser.parse("[[foo|bar ''baz]]").should == expected
+    end
+    it 'should allow strong markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar <strong>baz</strong></a></p>\n}
+      @parser.parse("[[foo|bar '''baz''']]").should == expected
+    end
+    it 'should automatically close unclosed strong markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar <strong>baz</strong></a></p>\n}
+      @parser.parse("[[foo|bar '''baz]]").should == expected
+    end
+    it 'should allow strong/em markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar <strong><em>baz</em></strong></a></p>\n}
+      @parser.parse("[[foo|bar '''''baz''''']]").should == expected
+    end
+    it 'should automatically close unclosed strong/em markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar <strong><em>baz</em></strong></a></p>\n}
+      @parser.parse("[[foo|bar '''''baz]]").should == expected
+    end
+    it 'should allow tt markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar <tt>baz</tt></a></p>\n}
+      @parser.parse('[[foo|bar <tt>baz</tt>]]').should == expected
+    end
+    it 'should automatically close unclosed tt markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar <tt>baz</tt></a></p>\n}
+      @parser.parse('[[foo|bar <tt>baz]]').should == expected
+    end
+    it 'should allow named entities in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar &copy;</a></p>\n}
+      @parser.parse('[[foo|bar &copy;]]').should == expected
+      # explicitly test &quot; because it is tokenized separately from the other named entities
+      expected = %Q{<p><a href="/wiki/foo">bar &quot;</a></p>\n}
+      @parser.parse('[[foo|bar &quot;]]').should == expected
+      # explicitly test &amp; because it is tokenized separately from the other named entities
+      expected = %Q{<p><a href="/wiki/foo">bar &amp;</a></p>\n}
+      @parser.parse('[[foo|bar &amp;]]').should == expected
+    end
+    it 'should allow decimal entities in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar &#8364;</a></p>\n}
+      @parser.parse('[[foo|bar &#8364;]]').should == expected
+    end
+    it 'should allow hexadecimal entities in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar &#x20ac;</a></p>\n}
+      @parser.parse('[[foo|bar &#x20ac;]]').should == expected
+    end
+    it 'should sanitize non-ASCII characters in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar &#x20ac;</a></p>\n}
+      @parser.parse('[[foo|bar €]]').should == expected
+    end
+    it 'should sanitize characters that have special meaning in HTML in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar &lt;</a></p>\n}
+      @parser.parse('[[foo|bar <]]').should == expected
+      expected = %Q{<p><a href="/wiki/foo">bar &gt;</a></p>\n}
+      @parser.parse('[[foo|bar >]]').should == expected
+      expected = %Q{<p><a href="/wiki/foo">bar &amp;</a></p>\n}
+      @parser.parse('[[foo|bar &]]').should == expected
+      expected = %Q{<p><a href="/wiki/foo">bar &quot;baz&quot;</a></p>\n}
+      @parser.parse('[[foo|bar "baz"]]').should == expected
+    end
+    it 'should allow nowiki markup in the custom link text' do
+      expected = %Q{<p><a href="/wiki/foo">bar [[</a></p>\n}
+      @parser.parse("[[foo|bar <nowiki>[[</nowiki>]]").should == expected
+      expected = %Q{<p><a href="/wiki/foo">bar [</a></p>\n}
+      @parser.parse("[[foo|bar <nowiki>[</nowiki>]]").should == expected
+      expected = %Q{<p><a href="/wiki/foo">bar ]]</a></p>\n}
+      @parser.parse("[[foo|bar <nowiki>]]</nowiki>]]").should == expected
+      expected = %Q{<p><a href="/wiki/foo">bar ]</a></p>\n}
+      @parser.parse("[[foo|bar <nowiki>]</nowiki>]]").should == expected
+    end
+  end
+  describe 'overriding the link prefix' do
+    it 'should be able to override the link prefix' do
+      @parser.internal_link_prefix = '/custom/'
+      @parser.parse('[[foo]]').should == %Q{<p><a href="/custom/foo">foo</a></p>\n}
+    end
+    it 'should interpet a nil link prefix as meaning no prefix' do
+      @parser.internal_link_prefix = nil
+      @parser.parse('[[foo]]').should == %Q{<p><a href="foo">foo</a></p>\n}
+    end
+  end
+  describe 'special links' do
+    it 'should recognize links of the form "bug/10" as special links' do
+      @parser.parse('[[bug/10]]').should == %Q{<p><a href="/bug/10">bug/10</a></p>\n}
+      @parser.parse('[[issue/25]]').should == %Q{<p><a href="/issue/25">issue/25</a></p>\n}
+      @parser.parse('[[post/7]]').should == %Q{<p><a href="/post/7">post/7</a></p>\n}
+    end
+    it 'should not recognize special links when "treat_slash_as_special" is set to false' do
+      @parser.treat_slash_as_special = false
+      @parser.parse('[[bug/10]]').should == %Q{<p><a href="/wiki/bug%2f10">bug/10</a></p>\n}
+      @parser.parse('[[issue/25]]').should == %Q{<p><a href="/wiki/issue%2f25">issue/25</a></p>\n}
+      @parser.parse('[[post/7]]').should == %Q{<p><a href="/wiki/post%2f7">post/7</a></p>\n}
+    end
+    it 'should accept custom link text in conjunction with special links' do
+      @parser.parse('[[bug/10|bug #10]]').should == %Q{<p><a href="/bug/10">bug #10</a></p>\n}
+    end
+    it 'should ignore link prefix overrides when emitting special links' do
+      @parser.internal_link_prefix = '/custom/'
+      @parser.parse('[[bug/10]]').should == %Q{<p><a href="/bug/10">bug/10</a></p>\n}
+    end
+    it 'should not classify links as special merely because of the presence of a slash' do
+      # we want the syntax to be tight to minimize false positives
+      @parser.parse('[[foo/bar]]').should == %Q{<p><a href="/wiki/foo%2fbar">foo/bar</a></p>\n}
+    end
+    it 'should not accept special links which have a leading forward slash' do
+      # this is a syntax error
+      @parser.parse('[[/bug/10]]').should == %Q{<p><a href="/wiki/%2fbug%2f10">/bug/10</a></p>\n}
+    end
+  end
+  describe 'invalid links' do
+    it 'should not allow entities in the link text' do
+      @parser.parse('[[a &euro; b]]').should == "<p>[[a &euro; b]]</p>\n"
+    end
+    it 'should not allow URIs in the link text' do
+      expected = %Q{<p>[[hello <a href="http://example.com/" class="external">http://example.com/</a> world]]</p>\n}
+      @parser.parse('[[hello http://example.com/ world]]').should == expected
+    end
+    it 'should handle embedded [[ inside links' do
+      # note how first part "[[foo " in itself is invalid and so gets rejected and echoed literally
+      expected = %Q{<p>[[foo <a href="/wiki/bar">bar</a></p>\n}
+      @parser.parse('[[foo [[bar]]').should == expected
+    end
+    it 'should handled embedded ]] inside links' do
+      # note how the link gets terminated early and the trailing part is rejected and echoed literally
+      expected = %Q{<p><a href="/wiki/foo">foo</a>bar]]</p>\n}
+      @parser.parse('[[foo ]]bar]]').should == expected
+    end
+    it 'should handle embedded [ inside links' do
+      # [ is not allowed at all so the entire link is rendered invalid
+      expected = "<p>[[foo [bar]]</p>\n"
+      @parser.parse('[[foo [bar]]').should == expected
+    end
+    it 'should handle embedded ] inside links' do
+      # [ is not allowed at all so the entire link is rendered invalid
+      expected = "<p>[[foo ]bar]]</p>\n"
+      @parser.parse('[[foo ]bar]]').should == expected
+    end
+    describe 'unterminated link targets (end-of-file)' do
+      it 'should rollback and show the unterminated link' do
+        @parser.parse('[[foo').should == %Q{<p>[[foo</p>\n}
+      end
+      it 'should not trim leading whitespace when rolling back' do
+        @parser.parse('[[ foo').should    == %Q{<p>[[ foo</p>\n}
+        @parser.parse('[[  foo').should   == %Q{<p>[[  foo</p>\n}
+        @parser.parse('[[   foo').should  == %Q{<p>[[   foo</p>\n}
+        @parser.parse('[[    foo').should == %Q{<p>[[    foo</p>\n}
+      end
+      it 'should not trim trailing whitespace when rolling back' do
+        @parser.parse('[[foo ').should    == %Q{<p>[[foo </p>\n}
+        @parser.parse('[[foo  ').should   == %Q{<p>[[foo  </p>\n}
+        @parser.parse('[[foo   ').should  == %Q{<p>[[foo   </p>\n}
+        @parser.parse('[[foo    ').should == %Q{<p>[[foo    </p>\n}
+      end
+      it 'should not trim leadig and trailing whitespace (combined) when rolling back' do
+        @parser.parse('[[    foo ').should == %Q{<p>[[    foo </p>\n}
+        @parser.parse('[[   foo  ').should == %Q{<p>[[   foo  </p>\n}
+        @parser.parse('[[  foo   ').should == %Q{<p>[[  foo   </p>\n}
+        @parser.parse('[[ foo    ').should == %Q{<p>[[ foo    </p>\n}
+      end
+    end
+    describe 'unterminated link targets (end-of-line)' do
+      it 'should rollback and show the unterminated link' do
+        @parser.parse("[[foo\n").should == %Q{<p>[[foo</p>\n}
+      end
+      it 'should not trim leading whitespace when rolling back' do
+        @parser.parse("[[ foo\n").should    == %Q{<p>[[ foo</p>\n}
+        @parser.parse("[[  foo\n").should   == %Q{<p>[[  foo</p>\n}
+        @parser.parse("[[   foo\n").should  == %Q{<p>[[   foo</p>\n}
+        @parser.parse("[[    foo\n").should == %Q{<p>[[    foo</p>\n}
+      end
+      it 'should not trim trailing whitespace when rolling back' do
+        @parser.parse("[[foo \n").should    == %Q{<p>[[foo </p>\n}
+        @parser.parse("[[foo  \n").should   == %Q{<p>[[foo  </p>\n}
+        @parser.parse("[[foo   \n").should  == %Q{<p>[[foo   </p>\n}
+        @parser.parse("[[foo    \n").should == %Q{<p>[[foo    </p>\n}
+      end
+      it 'should not trim leading and trailing whitespace (combined) when rolling back' do
+        @parser.parse("[[ foo    \n").should == %Q{<p>[[ foo    </p>\n}
+        @parser.parse("[[  foo   \n").should == %Q{<p>[[  foo   </p>\n}
+        @parser.parse("[[   foo  \n").should == %Q{<p>[[   foo  </p>\n}
+        @parser.parse("[[    foo \n").should == %Q{<p>[[    foo </p>\n}
+      end
+    end
+    describe 'missing link text' do
+      it 'should use link target' do
+        @parser.parse('[[foo|]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
+      end
+    end
+    describe 'link cut off at separator (end-of-file)' do
+      it 'should rollback and show the unterminated link' do
+        @parser.parse('[[foo|').should == %Q{<p>[[foo|</p>\n}
+        @parser.parse('[[foo| ').should == %Q{<p>[[foo| </p>\n}
+        @parser.parse('[[foo|  ').should == %Q{<p>[[foo|  </p>\n}
+        @parser.parse('[[foo|   ').should == %Q{<p>[[foo|   </p>\n}
+        @parser.parse('[[foo|    ').should == %Q{<p>[[foo|    </p>\n}
+        @parser.parse('[[foo|     ').should == %Q{<p>[[foo|     </p>\n}
+        @parser.parse('[[foo|      ').should == %Q{<p>[[foo|      </p>\n}
+      end
+    end
+    describe 'link cut off at separator (end-of-line)' do
+      it 'should rollback and show the unterminated link' do
+        @parser.parse("[[foo|\n").should == %Q{<p>[[foo|</p>\n}
+        @parser.parse("[[foo| \n").should == %Q{<p>[[foo| </p>\n}
+        @parser.parse("[[foo|  \n").should == %Q{<p>[[foo|  </p>\n}
+        @parser.parse("[[foo|   \n").should == %Q{<p>[[foo|   </p>\n}
+        @parser.parse("[[foo|    \n").should == %Q{<p>[[foo|    </p>\n}
+        @parser.parse("[[foo|     \n").should == %Q{<p>[[foo|     </p>\n}
+        @parser.parse("[[foo|      \n").should == %Q{<p>[[foo|      </p>\n}
+      end
+    end
+    describe 'unterminated link text (end-of-file)' do
+      it 'should rollback and show the unterminated link' do
+        @parser.parse('[[foo|hello').should == %Q{<p>[[foo|hello</p>\n}
+        @parser.parse('[[foo|hello ').should == %Q{<p>[[foo|hello </p>\n}
+        @parser.parse('[[foo|hello  ').should == %Q{<p>[[foo|hello  </p>\n}
+        @parser.parse('[[foo|hello   ').should == %Q{<p>[[foo|hello   </p>\n}
+        @parser.parse('[[foo|hello    ').should == %Q{<p>[[foo|hello    </p>\n}
+        @parser.parse('[[foo|hello     ').should == %Q{<p>[[foo|hello     </p>\n}
+        @parser.parse('[[foo|hello      ').should == %Q{<p>[[foo|hello      </p>\n}
+      end
+    end
+    describe 'unterminated link text (end-of-line)' do
+      it 'should rollback and show the unterminated link' do
+        @parser.parse("[[foo|hello\n").should == %Q{<p>[[foo|hello</p>\n}
+        @parser.parse("[[foo|hello \n").should == %Q{<p>[[foo|hello </p>\n}
+        @parser.parse("[[foo|hello  \n").should == %Q{<p>[[foo|hello  </p>\n}
+        @parser.parse("[[foo|hello   \n").should == %Q{<p>[[foo|hello   </p>\n}
+        @parser.parse("[[foo|hello    \n").should == %Q{<p>[[foo|hello    </p>\n}
+        @parser.parse("[[foo|hello     \n").should == %Q{<p>[[foo|hello     </p>\n}
+        @parser.parse("[[foo|hello      \n").should == %Q{<p>[[foo|hello      </p>\n}
+      end
+    end
+  end
+end

data/spec/wikitext_spec.rb CHANGED

@@ -16,6 +16,16 @@
 require File.join(File.dirname(__FILE__), 'spec_helper.rb')
 require 'wikitext'
+describe Wikitext::Parser do
+  before do
+    @parser = Wikitext::Parser.new
+  end
+  it 'should turn space-to-underscore off by default' do
+    @parser.space_to_underscore.should == false
+  end
+end
 describe Wikitext::Parser, 'parsing non-ASCII input' do
   before do
     @parser = Wikitext::Parser.new

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: wikitext
 version: !ruby/object:Gem::Version
-  version: "0.1"
+  version: "0.2"
 platform: ruby
 authors:
 - Wincent Colaiuta
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2008-02-13 00:00:00 +01:00
+date: 2008-02-18 00:00:00 +01:00
 default_executable:
 dependencies: []