wikitext 1.0.3 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,5 +13,5 @@
13
13
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
14
14
 
15
15
  module Wikitext
16
- VERSION = '1.0.3'
16
+ VERSION = '1.1'
17
17
  end # module Wikitext
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ describe Wikitext::Parser, 'fulltext tokenizing' do
20
+ before do
21
+ @parser = Wikitext::Parser.new
22
+ end
23
+
24
+ it 'should default to a minimum fulltext token length of 3' do
25
+ @parser.minimum_fulltext_token_length.should == 3
26
+ end
27
+
28
+ it 'should accept overrides of minimum fulltext token length at initialization time' do
29
+ parser = Wikitext::Parser.new(:minimum_fulltext_token_length => 10)
30
+ parser.minimum_fulltext_token_length.should == 10
31
+ parser.fulltext_tokenize('short loooooooooong').should == ['loooooooooong']
32
+ end
33
+
34
+ it 'should return nil for nil input' do
35
+ @parser.fulltext_tokenize(nil).should be_nil
36
+ end
37
+
38
+ it 'should return an empty array for empty string input' do
39
+ @parser.fulltext_tokenize('').should == []
40
+ end
41
+
42
+ it 'should return an empty array for an input string that contains nothing tokenizable' do
43
+ @parser.fulltext_tokenize('#!?()/&').should == []
44
+ end
45
+
46
+ it 'should tokenize simple words' do
47
+ @parser.fulltext_tokenize('foo bar baz').should == ['foo', 'bar', 'baz']
48
+ end
49
+
50
+ it 'should omit tokens shorter than the minimum required length' do
51
+ @parser.fulltext_tokenize('a b baz longer').should == ['baz', 'longer']
52
+ end
53
+
54
+ it 'should accept overrides of minimum length at parse time' do
55
+ @parser.fulltext_tokenize('a bc baz longer', :minimum => 2).should == ['bc', 'baz', 'longer']
56
+ end
57
+
58
+ it 'should treat a minimum length of 0 as meaning "no minimum length"' do
59
+ @parser.fulltext_tokenize('a bc baz longer', :minimum => 0).should == ['a', 'bc', 'baz', 'longer']
60
+ end
61
+
62
+ it 'should interpret a minimum length of nil as meaning "default minumum length" (3)' do
63
+ @parser.minimum_fulltext_token_length = 10
64
+ @parser.fulltext_tokenize('a bc baz longer', :minimum => nil).should == ['baz', 'longer']
65
+ end
66
+
67
+ it 'should tokenize URLs' do
68
+ @parser.fulltext_tokenize('foo http://example.com/ bar').should == ['foo', 'http://example.com/', 'bar']
69
+ end
70
+
71
+ it 'should tokenize email addresses' do
72
+ @parser.fulltext_tokenize('foo user@example.com bar').should == ['foo', 'user@example.com', 'bar']
73
+ end
74
+
75
+ it 'should ignore punctuation' do
76
+ @parser.fulltext_tokenize("don't forget!").should == ['don', 'forget']
77
+ end
78
+
79
+ it 'should ignore non-ASCII' do
80
+ # note that a search for "información lingüística" will still work, but might return some false positives
81
+ @parser.fulltext_tokenize('buscando información lingüística').should == ['buscando', 'informaci', 'ling', 'stica']
82
+ end
83
+
84
+ it 'should ignore wikitext markup' do
85
+ @parser.fulltext_tokenize("this <nowiki>that</nowiki> [[foo bar]]").should == ['this', 'that', 'foo', 'bar']
86
+ end
87
+
88
+ it 'should tokenize alphanumerics' do
89
+ @parser.fulltext_tokenize("password99 2008").should == ['password99', '2008']
90
+ end
91
+ end
@@ -47,12 +47,12 @@ describe Wikitext::Parser, 'indentation' do
47
47
  end
48
48
 
49
49
  it 'should complain if the "indent" option is nil' do
50
- lambda { @parser.parse('* foo', :default => nil) }.should raise_error(TypeError)
50
+ lambda { @parser.parse('* foo', :indent => nil) }.should raise_error(TypeError)
51
51
  end
52
52
 
53
53
  it 'should complain if the "indent" options is not an integer' do
54
- lambda { @parser.parse('* foo', :default => 'bar') }.should raise_error(TypeError)
55
- lambda { @parser.parse('* foo', :default => /baz/) }.should raise_error(TypeError)
54
+ lambda { @parser.parse('* foo', :indent => 'bar') }.should raise_error(TypeError)
55
+ lambda { @parser.parse('* foo', :indent => /baz/) }.should raise_error(TypeError)
56
56
  end
57
57
 
58
58
  it 'should treat a negative "indent" as though it were zero' do
@@ -56,7 +56,7 @@ describe Wikitext::Parser, 'tokenizing' do
56
56
  it 'should tokenize strings containing a single symbol' do
57
57
  @tokens = @parser.tokenize('foo')
58
58
  @tokens.length.should == 2
59
- @tokens[0].token_type.should == :printable
59
+ @tokens[0].token_type.should == :alnum
60
60
  @tokens[0].string_value.should == 'foo'
61
61
  @tokens[1].token_type.should == :end_of_file
62
62
  @tokens[1].string_value.should == ''
@@ -65,7 +65,7 @@ describe Wikitext::Parser, 'tokenizing' do
65
65
  it 'should tokenize strings containing multiple symbols' do
66
66
  @tokens = @parser.tokenize('foo http://example.com/')
67
67
  @tokens.length.should == 4
68
- @tokens[0].token_type.should == :printable
68
+ @tokens[0].token_type.should == :alnum
69
69
  @tokens[0].string_value.should == 'foo'
70
70
  @tokens[1].token_type.should == :space
71
71
  @tokens[1].string_value.should == ' '
@@ -78,7 +78,7 @@ describe Wikitext::Parser, 'tokenizing' do
78
78
  it 'should tokenize runs of printable characters as as single symbol' do
79
79
  @tokens = @parser.tokenize('foo')
80
80
  @tokens.length.should == 2
81
- @tokens[0].token_type.should == :printable
81
+ @tokens[0].token_type.should == :alnum
82
82
  @tokens[0].string_value.should == 'foo'
83
83
  @tokens[0].line_start.should == 1
84
84
  @tokens[0].column_start.should == 1
@@ -88,6 +88,12 @@ describe Wikitext::Parser do
88
88
  Wikitext::Parser.new(:treat_slash_as_special => false).treat_slash_as_special.should == false
89
89
  end
90
90
  end
91
+
92
+ describe 'overriding defaults at parse time' do
93
+ it 'should ignore unknown options' do
94
+ @parser.parse('foo', :bar => 'baz').should == "<p>foo</p>\n"
95
+ end
96
+ end
91
97
  end
92
98
 
93
99
  describe Wikitext::Parser, 'parsing non-ASCII input' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikitext
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: "1.1"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wincent Colaiuta
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-04-17 00:00:00 +02:00
12
+ date: 2008-04-25 00:00:00 +02:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -28,6 +28,7 @@ files:
28
28
  - spec/encoding_spec.rb
29
29
  - spec/entity_spec.rb
30
30
  - spec/external_link_spec.rb
31
+ - spec/fulltext_spec.rb
31
32
  - spec/h1_spec.rb
32
33
  - spec/h2_spec.rb
33
34
  - spec/h3_spec.rb