RubyGems - wikitext - Versions diffs - 1.0.3 → 1.1 - Mend

wikitext 1.0.3 → 1.1

Files changed (12) hide show

data/lib/wikitext/version.rb CHANGED Viewed

@@ -13,5 +13,5 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 module Wikitext
-  VERSION = '1.0.3'
+  VERSION = '1.1'
 end # module Wikitext

data/spec/fulltext_spec.rb ADDED Viewed

@@ -0,0 +1,91 @@
+#!/usr/bin/env ruby
+# Copyright 2008 Wincent Colaiuta
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+require File.join(File.dirname(__FILE__), 'spec_helper.rb')
+require 'wikitext'
+describe Wikitext::Parser, 'fulltext tokenizing' do
+  before do
+    @parser = Wikitext::Parser.new
+  end
+  it 'should default to a minimum fulltext token length of 3' do
+    @parser.minimum_fulltext_token_length.should == 3
+  end
+  it 'should accept overrides of minimum fulltext token length at initialization time' do
+    parser = Wikitext::Parser.new(:minimum_fulltext_token_length => 10)
+    parser.minimum_fulltext_token_length.should == 10
+    parser.fulltext_tokenize('short loooooooooong').should == ['loooooooooong']
+  end
+  it 'should return nil for nil input' do
+    @parser.fulltext_tokenize(nil).should be_nil
+  end
+  it 'should return an empty array for empty string input' do
+    @parser.fulltext_tokenize('').should == []
+  end
+  it 'should return an empty array for an input string that contains nothing tokenizable' do
+    @parser.fulltext_tokenize('#!?()/&').should == []
+  end
+  it 'should tokenize simple words' do
+    @parser.fulltext_tokenize('foo bar baz').should == ['foo', 'bar', 'baz']
+  end
+  it 'should omit tokens shorter than the minimum required length' do
+    @parser.fulltext_tokenize('a b baz longer').should == ['baz', 'longer']
+  end
+  it 'should accept overrides of minimum length at parse time' do
+    @parser.fulltext_tokenize('a bc baz longer', :minimum => 2).should == ['bc', 'baz', 'longer']
+  end
+  it 'should treat a minimum length of 0 as meaning "no minimum length"' do
+    @parser.fulltext_tokenize('a bc baz longer', :minimum => 0).should == ['a', 'bc', 'baz', 'longer']
+  end
+  it 'should interpret a minimum length of nil as meaning "default minumum length" (3)' do
+    @parser.minimum_fulltext_token_length = 10
+    @parser.fulltext_tokenize('a bc baz longer', :minimum => nil).should == ['baz', 'longer']
+  end
+  it 'should tokenize URLs' do
+    @parser.fulltext_tokenize('foo http://example.com/ bar').should == ['foo', 'http://example.com/', 'bar']
+  end
+  it 'should tokenize email addresses' do
+    @parser.fulltext_tokenize('foo user@example.com bar').should == ['foo', 'user@example.com', 'bar']
+  end
+  it 'should ignore punctuation' do
+    @parser.fulltext_tokenize("don't forget!").should == ['don', 'forget']
+  end
+  it 'should ignore non-ASCII' do
+    # note that a search for "información lingüística" will still work, but might return some false positives
+    @parser.fulltext_tokenize('buscando información lingüística').should == ['buscando', 'informaci', 'ling', 'stica']
+  end
+  it 'should ignore wikitext markup' do
+    @parser.fulltext_tokenize("this <nowiki>that</nowiki> [[foo bar]]").should == ['this', 'that', 'foo', 'bar']
+  end
+  it 'should tokenize alphanumerics' do
+    @parser.fulltext_tokenize("password99 2008").should == ['password99', '2008']
+  end
+end

data/spec/indentation_spec.rb CHANGED Viewed

@@ -47,12 +47,12 @@ describe Wikitext::Parser, 'indentation' do
   end
   it 'should complain if the "indent" option is nil' do
-    lambda { @parser.parse('* foo', :default => nil) }.should raise_error(TypeError)
+    lambda { @parser.parse('* foo', :indent => nil) }.should raise_error(TypeError)
   end
   it 'should complain if the "indent" options is not an integer' do
-    lambda { @parser.parse('* foo', :default => 'bar') }.should raise_error(TypeError)
-    lambda { @parser.parse('* foo', :default => /baz/) }.should raise_error(TypeError)
+    lambda { @parser.parse('* foo', :indent => 'bar') }.should raise_error(TypeError)
+    lambda { @parser.parse('* foo', :indent => /baz/) }.should raise_error(TypeError)
   end
   it 'should treat a negative "indent" as though it were zero' do

data/spec/tokenizing_spec.rb CHANGED Viewed

@@ -56,7 +56,7 @@ describe Wikitext::Parser, 'tokenizing' do
   it 'should tokenize strings containing a single symbol' do
     @tokens = @parser.tokenize('foo')
     @tokens.length.should == 2
-    @tokens[0].token_type.should    == :printable
+    @tokens[0].token_type.should    == :alnum
     @tokens[0].string_value.should  == 'foo'
     @tokens[1].token_type.should    == :end_of_file
     @tokens[1].string_value.should  == ''
@@ -65,7 +65,7 @@ describe Wikitext::Parser, 'tokenizing' do
   it 'should tokenize strings containing multiple symbols' do
     @tokens = @parser.tokenize('foo http://example.com/')
     @tokens.length.should == 4
-    @tokens[0].token_type.should    == :printable
+    @tokens[0].token_type.should    == :alnum
     @tokens[0].string_value.should  == 'foo'
     @tokens[1].token_type.should    == :space
     @tokens[1].string_value.should  == ' '
@@ -78,7 +78,7 @@ describe Wikitext::Parser, 'tokenizing' do
   it 'should tokenize runs of printable characters as as single symbol' do
     @tokens = @parser.tokenize('foo')
     @tokens.length.should == 2
-    @tokens[0].token_type.should    == :printable
+    @tokens[0].token_type.should    == :alnum
     @tokens[0].string_value.should  == 'foo'
     @tokens[0].line_start.should    == 1
     @tokens[0].column_start.should  == 1

data/spec/wikitext_spec.rb CHANGED Viewed

@@ -88,6 +88,12 @@ describe Wikitext::Parser do
       Wikitext::Parser.new(:treat_slash_as_special => false).treat_slash_as_special.should == false
     end
   end
+  describe 'overriding defaults at parse time' do
+    it 'should ignore unknown options' do
+      @parser.parse('foo', :bar => 'baz').should == "<p>foo</p>\n"
+    end
+  end
 end
 describe Wikitext::Parser, 'parsing non-ASCII input' do

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: wikitext
 version: !ruby/object:Gem::Version
-  version: 1.0.3
+  version: "1.1"
 platform: ruby
 authors:
 - Wincent Colaiuta
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2008-04-17 00:00:00 +02:00
+date: 2008-04-25 00:00:00 +02:00
 default_executable:
 dependencies: []
@@ -28,6 +28,7 @@ files:
 - spec/encoding_spec.rb
 - spec/entity_spec.rb
 - spec/external_link_spec.rb
+- spec/fulltext_spec.rb
 - spec/h1_spec.rb
 - spec/h2_spec.rb
 - spec/h3_spec.rb