RubyGems - wikitext - Versions diffs - 0.1 - Mend

wikitext 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

data/ext/ary.h +99 -0
data/ext/depend +22 -0
data/ext/extconf.rb +23 -0
data/ext/parser.c +2174 -0
data/ext/parser.h +31 -0
data/ext/str.h +135 -0
data/ext/token.c +109 -0
data/ext/token.h +95 -0
data/ext/wikitext.c +60 -0
data/ext/wikitext.h +30 -0
data/ext/wikitext_ragel.c +3354 -0
data/ext/wikitext_ragel.h +17 -0
data/spec/autolinking_spec.rb +122 -0
data/spec/blockquote_spec.rb +570 -0
data/spec/em_spec.rb +97 -0
data/spec/encoding_spec.rb +124 -0
data/spec/entity_spec.rb +40 -0
data/spec/external_link_spec.rb +289 -0
data/spec/h1_spec.rb +59 -0
data/spec/h2_spec.rb +59 -0
data/spec/h3_spec.rb +59 -0
data/spec/h4_spec.rb +59 -0
data/spec/h5_spec.rb +59 -0
data/spec/h6_spec.rb +59 -0
data/spec/indentation_spec.rb +70 -0
data/spec/integration_spec.rb +265 -0
data/spec/internal_link_spec.rb +445 -0
data/spec/line_endings_spec.rb +81 -0
data/spec/link_encoding_spec.rb +132 -0
data/spec/link_sanitizing_spec.rb +228 -0
data/spec/nowiki_spec.rb +155 -0
data/spec/p_spec.rb +44 -0
data/spec/pre_spec.rb +411 -0
data/spec/regressions_spec.rb +45 -0
data/spec/spec_helper.rb +77 -0
data/spec/strong_em_spec.rb +89 -0
data/spec/strong_spec.rb +99 -0
data/spec/tokenizing_spec.rb +190 -0
data/spec/tt_spec.rb +100 -0
data/spec/ul_spec.rb +307 -0
data/spec/wikitext_spec.rb +50 -0
metadata +93 -0

data/spec/regressions_spec.rb ADDED Viewed

@@ -0,0 +1,45 @@
+#!/usr/bin/env ruby
+# Copyright 2008 Wincent Colaiuta
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+require File.join(File.dirname(__FILE__), 'spec_helper.rb')
+require 'wikitext'
+# this is a general-purpose file in which I'll add specs for former bugs to make sure that they don't regress
+describe Wikitext::Parser, 'regressions' do
+  before do
+    @parser = Wikitext::Parser.new
+  end
+  it 'should correctly transform example #1' do
+    # turns out that this was never a bug in wikitext: it was a bug in the host application
+    input = dedent <<-END
+      = Leopard =
+      * punto 1
+      * punto 2
+      Y [[otro articulo]].
+    END
+    expected = dedent <<-END
+      <h1>Leopard</h1>
+      <ul>
+        <li>punto 1</li>
+        <li>punto 2</li>
+      </ul>
+      <p>Y <a href="/wiki/otro%20articulo">otro articulo</a>.</p>
+    END
+    @parser.parse(input).should == expected
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,77 @@
+# Copyright 2007-2008 Wincent Colaiuta
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+require 'pathname'
+require 'rubygems'
+require 'spec'
+# allow indenting of multiline spec data for better readability
+# but must dedent it before actually doing the comparison
+def dedent spaces, string = nil
+  if spaces.kind_of? String
+    if not string.nil?
+      raise 'When first argument is a String, second argument must be nil'
+    else
+      # default use: single String parameter, dedent by 6
+      string = spaces
+      spaces = 6
+    end
+  elsif spaces.kind_of? Integer
+    if string.nil? or not string.kind_of?(String)
+      raise 'When first argument is a number, second must be a String'
+    end
+  else
+    raise 'Invalid argument'
+  end
+  string.each do |line|
+    if not line =~ /\A {#{spaces.to_i}}/
+      raise "Underlength indent for line: #{line.inspect}"
+    end
+  end
+  string.gsub /^ {#{spaces.to_i}}/, ''
+end
+module Wikitext
+  if not const_defined? 'EXTDIR'
+    # append the local "ext" directory to search path if not already present
+    base        = File.join(File.dirname(__FILE__), '..')
+    EXTDIR      = Pathname.new(File.join(base, 'ext')).realpath
+    normalized  = $:.collect { |path| Pathname.new(path).realpath rescue path }
+    $:.push(EXTDIR) unless normalized.include?(EXTDIR)
+  end
+end # module Wikitext
+module UTF8
+  if not const_defined? 'Invalid'
+    module Invalid
+      TWO_BYTES_MISSING_SECOND_BYTE       = [0b11011111].pack('C*')
+      TWO_BYTES_MALFORMED_SECOND_BYTE     = [0b11011111, 0b00001111].pack('C*') # should be 10......
+      OVERLONG                            = [0b11000000, 0b10000000].pack('C*') # lead byte is 110..... but code point is <= 127
+      OVERLONG_ALT                        = [0b11000001, 0b10000000].pack('C*') # lead byte is 110..... but code point is <= 127
+      THREE_BYTES_MISSING_SECOND_BYTE     = [0b11100000].pack('C*')
+      THREE_BYTES_MISSING_THIRD_BYTE      = [0b11100000, 0b10000000].pack('C*')
+      THREE_BYTES_MALFORMED_SECOND_BYTE   = [0b11100000, 0b00001111, 0b10000000].pack('C*') # should be 10......
+      THREE_BYTES_MALFORMED_THIRD_BYTE    = [0b11100000, 0b10000000, 0b00001111].pack('C*') # should be 10......
+      FOUR_BYTES_MISSING_SECOND_BYTE      = [0b11110000].pack('C*')
+      FOUR_BYTES_MISSING_THIRD_BYTE       = [0b11110000, 0x10111111].pack('C*')
+      FOUR_BYTES_MISSING_FOURTH_BYTE      = [0b11110000, 0x10111111, 0x10111111].pack('C*')
+      FOUR_BYTES_ILLEGAL_FIRST_BYTE       = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
+      FOUR_BYTES_ILLEGAL_FIRST_BYTE_ALT   = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
+      FOUR_BYTES_ILLEGAL_FIRST_BYTE_ALT2  = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
+      UNEXPECTED_BYTE                     = [0b11111000].pack('C*')
+    end # module Invalid
+  end
+end # module UTF8
+require 'wikitext'

data/spec/strong_em_spec.rb ADDED Viewed

@@ -0,0 +1,89 @@
+#!/usr/bin/env ruby
+# Copyright 2007-2008 Wincent Colaiuta
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+require File.join(File.dirname(__FILE__), 'spec_helper.rb')
+require 'wikitext'
+describe Wikitext::Parser, 'parsing combined <strong>/<em> spans' do
+  before do
+    @parser = Wikitext::Parser.new
+  end
+  it 'should recognize paired "<strong><em>" tokens' do
+    @parser.parse("foo '''''bar''''' baz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n"
+  end
+  it 'should automatically insert missing closing tags' do
+    @parser.parse("foo '''''bar").should == "<p>foo <strong><em>bar</em></strong></p>\n"    # totally missing
+    @parser.parse("foo '''''bar''").should == "<p>foo <strong><em>bar</em></strong></p>\n"  # only <strong> missing
+    @parser.parse("foo '''''bar'''").should == "<p>foo <strong><em>bar</em></strong></p>\n" # only <em> missing
+  end
+  it 'should automatically close unclosed spans upon hitting newlines' do
+    @parser.parse("foo '''''bar\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n"     # totally missing
+    @parser.parse("foo '''''bar''\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n"   # only <strong> missing
+    @parser.parse("foo '''''bar'''\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n"  # only <em> missing
+  end
+  it 'should allow combined "<strong><em>" tokens to interact with separate <strong> and <em> tokens' do
+    @parser.parse("foo '''bar ''baz'''''").should == "<p>foo <strong>bar <em>baz</em></strong></p>\n"
+    @parser.parse("foo ''bar '''baz'''''").should == "<p>foo <em>bar <strong>baz</strong></em></p>\n"
+    @parser.parse("'''''foo'' bar''' baz").should == "<p><strong><em>foo</em> bar</strong> baz</p>\n"
+  end
+  it 'should handle (illegal) interleaved spans' do
+    # ''''' means "<strong><em>" so when we see ''' we try to close the <strong> first, which makes for illegal nesting
+    @parser.parse("'''''foo''' bar'' baz").should == "<p><strong><em>foo</em></strong> bar<em> baz</em></p>\n"
+    # note that if you really want ''''' to be parsed as "<em><strong>" you have to use whitespace to disambiguate
+    # for more examples see the "disambiguation" specs below
+    @parser.parse("'' '''foo''' bar'' baz").should == "<p><em> <strong>foo</strong> bar</em> baz</p>\n"
+  end
+  it 'should have no effect inside <pre> blocks' do
+    @parser.parse(" '''''foo'''''").should == "<pre>'''''foo'''''</pre>\n"
+  end
+  it 'should have no effect inside <nowiki> spans' do
+    @parser.parse("<nowiki>'''''foo'''''</nowiki>").should == "<p>'''''foo'''''</p>\n"
+  end
+  describe 'disambiguation' do
+    it 'should by default assume strong followed by em' do
+      @parser.parse("'''''foo'''''").should == "<p><strong><em>foo</em></strong></p>\n"
+    end
+    it 'should accept an empty nowiki span as a means of imposing em followed by strong' do
+      @parser.parse("''<nowiki></nowiki>'''foo'''''").should == "<p><em><strong>foo</strong></em></p>\n"
+    end
+    it 'should accept whitespace as a means of imposing em followed by strong' do
+      # when rendered in the browser the whitespace won't have any visual effect
+      @parser.parse("'' '''foo'''''").should == "<p><em> <strong>foo</strong></em></p>\n"
+    end
+    it 'should accept a literal <em> tag  as a means of imposing em followed by strong' do
+      @parser.parse("<em>'''foo'''</em>").should == "<p><em><strong>foo</strong></em></p>\n"
+    end
+    it 'should accept a literal <strong> tag  as a means of imposing em followed by strong' do
+      @parser.parse("''<strong>foo</strong>''").should == "<p><em><strong>foo</strong></em></p>\n"
+    end
+    it 'should accept literal <em> and <strong> tags  as a means of imposing em followed by strong' do
+      @parser.parse("<em><strong>foo</strong></em>").should == "<p><em><strong>foo</strong></em></p>\n"
+    end
+  end
+end

data/spec/strong_spec.rb ADDED Viewed

@@ -0,0 +1,99 @@
+#!/usr/bin/env ruby
+# Copyright 2007-2008 Wincent Colaiuta
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+require File.join(File.dirname(__FILE__), 'spec_helper.rb')
+require 'wikitext'
+describe Wikitext::Parser, 'parsing <strong> spans' do
+  before do
+    @parser = Wikitext::Parser.new
+  end
+  describe 'marked up using wikitext shorthand' do
+    it 'should recognize paired <strong> tokens' do
+      @parser.parse("foo '''bar''' baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
+    end
+    it 'should automatically insert missing closing tags' do
+      @parser.parse("foo '''bar").should == "<p>foo <strong>bar</strong></p>\n"
+    end
+    it 'should automatically close unclosed spans upon hitting newlines' do
+      @parser.parse("foo '''bar\nbaz").should == "<p>foo <strong>bar</strong> baz</p>\n"
+    end
+    it 'should handle (illegal) interleaved spans' do
+      @parser.parse("foo '''bar ''inner''' baz''").should == "<p>foo <strong>bar <em>inner</em></strong> baz<em></em></p>\n"
+    end
+    it 'should have no effect inside <pre> blocks' do
+      @parser.parse(" '''foo'''").should == "<pre>'''foo'''</pre>\n"
+    end
+    it 'should have no effect inside <nowiki> spans' do
+      @parser.parse("<nowiki>'''foo'''</nowiki>").should == "<p>'''foo'''</p>\n"
+    end
+    it "should have no effect if a strong (<strong>) span is already open" do
+      @parser.parse("foo <strong>'''bar'''</strong> baz").should == "<p>foo <strong>'''bar'''</strong> baz</p>\n"
+    end
+  end
+  describe 'marked up using HTML tags' do
+    it 'should recognized paired <strong> tokens' do
+      @parser.parse("foo <strong>bar</strong> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
+    end
+    it 'should recognize <strong> tokens case-insensitively' do
+      @parser.parse("foo <STRong>bar</STRONG> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
+      @parser.parse("foo <strONG>bar</STRong> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
+      @parser.parse("foo <STRONG>bar</strONG> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
+    end
+    it 'should automatically insert missing closing tags' do
+      @parser.parse("foo <strong>bar").should == "<p>foo <strong>bar</strong></p>\n"
+    end
+    it 'should automatically close unclosed spans upon hitting newlines' do
+      @parser.parse("foo <strong>bar\nbaz").should == "<p>foo <strong>bar</strong> baz</p>\n"
+    end
+    it 'should handle (illegal) interleaved spans' do
+      expected = "<p>foo <strong>bar <em>inner</em></strong> baz&lt;/em&gt;</p>\n"
+      @parser.parse("foo <strong>bar <em>inner</strong> baz</em>").should == expected
+      expected = "<p>foo <strong>bar <em>inner</em></strong> baz<em></em></p>\n"
+      @parser.parse("foo <strong>bar ''inner</strong> baz''").should == expected
+    end
+    it 'should handle (illegal) nested <strong> spans' do
+      expected = "<p>foo <strong>bar &lt;strong&gt;inner</strong>&lt;/strong&gt; baz</p>\n"
+      @parser.parse('foo <strong>bar <strong>inner</strong></strong> baz').should == expected
+    end
+    it 'should have no effect inside <pre> blocks' do
+      @parser.parse(" <strong>foo</strong>").should == "<pre>&lt;strong&gt;foo&lt;/strong&gt;</pre>\n"
+    end
+    it 'should have no effect inside <nowiki> spans' do
+      @parser.parse("<nowiki><strong>foo</strong></nowiki>").should == "<p>&lt;strong&gt;foo&lt;/strong&gt;</p>\n"
+    end
+    it "should have no effect if an strong (''') span is already open" do
+      expected = "<p>foo <strong>&lt;strong&gt;bar&lt;/strong&gt;</strong> baz</p>\n"
+      @parser.parse("foo '''<strong>bar</strong>''' baz").should == expected
+    end
+  end
+end

data/spec/tokenizing_spec.rb ADDED Viewed

@@ -0,0 +1,190 @@
+#!/usr/bin/env ruby
+# Copyright 2008 Wincent Colaiuta
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+require File.join(File.dirname(__FILE__), 'spec_helper.rb')
+require 'wikitext'
+describe Wikitext::Parser::Token do
+  before do
+    @tokens = Wikitext::Parser::Token.types
+  end
+  it 'should report the available token types as a hash' do
+    @tokens.should be_kind_of(Hash)
+  end
+  it 'should report token names as symbols and values as numbers' do
+    @tokens.each do |k, v|
+      v.should be_kind_of(Symbol)
+      k.should be_kind_of(Integer)
+    end
+  end
+  it 'should report unique token names and values' do
+    keys = @tokens.keys
+    keys.uniq.length.should == keys.length
+    values = @tokens.values
+    values.uniq.length.should == values.length
+  end
+end
+describe Wikitext::Parser, 'tokenizing' do
+  before do
+    @parser = Wikitext::Parser.new
+  end
+  it 'should do nothing if passed nil' do
+    @parser.tokenize(nil).should == nil
+  end
+  it "should complain if passed an object that doesn't quack like a string" do
+    lambda { @parser.tokenize({}) }.should raise_error
+  end
+  it 'should tokenize strings containing a single symbol' do
+    @tokens = @parser.tokenize('foo')
+    @tokens.length.should == 2
+    @tokens[0].token_type.should    == :printable
+    @tokens[0].string_value.should  == 'foo'
+    @tokens[1].token_type.should    == :end_of_file
+    @tokens[1].string_value.should  == ''
+  end
+  it 'should tokenize strings containing multiple symbols' do
+    @tokens = @parser.tokenize('foo http://example.com/')
+    @tokens.length.should == 4
+    @tokens[0].token_type.should    == :printable
+    @tokens[0].string_value.should  == 'foo'
+    @tokens[1].token_type.should    == :space
+    @tokens[1].string_value.should  == ' '
+    @tokens[2].token_type.should    == :uri
+    @tokens[2].string_value.should  == 'http://example.com/'
+    @tokens[3].token_type.should    == :end_of_file
+    @tokens[3].string_value.should  == ''
+  end
+  it 'should tokenize runs of printable characters as as single symbol' do
+    @tokens = @parser.tokenize('foo')
+    @tokens.length.should == 2
+    @tokens[0].token_type.should    == :printable
+    @tokens[0].string_value.should  == 'foo'
+    @tokens[0].line_start.should    == 1
+    @tokens[0].column_start.should  == 1
+    @tokens[0].line_stop.should     == 1
+    @tokens[0].column_stop.should   == 4
+    @tokens[1].token_type.should    == :end_of_file
+    @tokens[1].string_value.should  == ''
+  end
+  it 'should tokenize END_OF_FILE tokens as zero-width tokens' do
+    @tokens = @parser.tokenize('')
+    @tokens.length.should == 1
+    @tokens[0].token_type.should    == :end_of_file
+    @tokens[0].line_start.should    == 1
+    @tokens[0].column_start.should  == 1
+    @tokens[0].line_stop.should     == 1
+    @tokens[0].column_stop.should   == 1
+    @tokens[0].string_value.should  == ''
+  end
+  it 'should be able to tokenize strings containing "}"' do
+    # was a bug: we were throwing an exception "failed before finding a token" because our PRINTABLE rule omitted this code point
+    lambda { @tokens = @parser.tokenize('}') }.should_not raise_error
+    @tokens.length.should == 2
+    @tokens[0].token_type.should    == :printable
+    @tokens[0].string_value.should  == '}'
+    @tokens[0].line_start.should    == 1
+    @tokens[0].column_start.should  == 1
+    @tokens[0].line_stop.should     == 1
+    @tokens[0].column_stop.should   == 2
+    @tokens[1].token_type.should    == :end_of_file
+    @tokens[1].string_value.should  == ''
+  end
+  it 'should be able to tokenize the full range of printable ASCII' do
+    # see the previous example: we just want to make sure that our PRINTABLE rule is adequate
+    printable_ascii = (0x20..0x7e).to_a.pack('C*')
+    lambda { @parser.tokenize(printable_ascii) }.should_not raise_error
+  end
+  it 'should be able to tokenize large blocks of text' do
+    large_block_of_text = dedent <<-END
+      paragraph
+      second line
+      new paragraph
+      = a heading =
+      > a blockquote
+      > second line of blockquote
+      >
+      > new paragraph within blockquote
+      == another heading ==
+      paragraph within ''multiple '''styles''''' and <tt>tt span</tt>
+      similar, but with '''styles in ''different'' order'''
+      again, a '''different ''order'''''
+      * list item 1
+      ** nested list item 1
+      ** nested list item 2
+      ** nested list item 3
+      * list item 2
+       // this is a code block
+       notice how it can contain ''markup''
+       which would '''otherwise''' have <tt>special</tt> meaning
+       although explicit entities &copy; are passed through unchanged
+      a normal paragraph again
+      This is where we show a link to an article on [[GCC]].
+      Related to that, [[GCC|a link]] to the same
+      article but with custom link text.
+      External links [http://example.com work too].
+      As well as autolinks as seen http://example.com/
+      here.
+      Look at how we handle bad syntax. [[This is an unterminated
+      link. And [http://example.com/ is another.
+      # this is an ordered list
+      # which continues
+      ## and has another ordered list
+      ## nested inside it
+      # and then falls back
+      #* and then nests another list
+      #* this time an unordered one
+      #** itself containing a nested list
+      #** which continues
+      #**# and finally nests yet another ordered list
+      #**# which continues
+      #* drops back quite a way
+      # and finally all the way
+      #****** and finishes with an invalid item
+      === heading with missing closing tag
+      * list
+      # new list
+    END
+    @tokens = @parser.tokenize(large_block_of_text)
+    @tokens.length.should > 0
+  end
+end