wikitext 1.0.3 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,5 +13,5 @@
13
13
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
14
14
 
15
15
  module Wikitext
16
- VERSION = '1.0.3'
16
+ VERSION = '1.1'
17
17
  end # module Wikitext
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ describe Wikitext::Parser, 'fulltext tokenizing' do
20
+ before do
21
+ @parser = Wikitext::Parser.new
22
+ end
23
+
24
+ it 'should default to a minimum fulltext token length of 3' do
25
+ @parser.minimum_fulltext_token_length.should == 3
26
+ end
27
+
28
+ it 'should accept overrides of minimum fulltext token length at initialization time' do
29
+ parser = Wikitext::Parser.new(:minimum_fulltext_token_length => 10)
30
+ parser.minimum_fulltext_token_length.should == 10
31
+ parser.fulltext_tokenize('short loooooooooong').should == ['loooooooooong']
32
+ end
33
+
34
+ it 'should return nil for nil input' do
35
+ @parser.fulltext_tokenize(nil).should be_nil
36
+ end
37
+
38
+ it 'should return an empty array for empty string input' do
39
+ @parser.fulltext_tokenize('').should == []
40
+ end
41
+
42
+ it 'should return an empty array for an input string that contains nothing tokenizable' do
43
+ @parser.fulltext_tokenize('#!?()/&').should == []
44
+ end
45
+
46
+ it 'should tokenize simple words' do
47
+ @parser.fulltext_tokenize('foo bar baz').should == ['foo', 'bar', 'baz']
48
+ end
49
+
50
+ it 'should omit tokens shorter than the minimum required length' do
51
+ @parser.fulltext_tokenize('a b baz longer').should == ['baz', 'longer']
52
+ end
53
+
54
+ it 'should accept overrides of minimum length at parse time' do
55
+ @parser.fulltext_tokenize('a bc baz longer', :minimum => 2).should == ['bc', 'baz', 'longer']
56
+ end
57
+
58
+ it 'should treat a minimum length of 0 as meaning "no minimum length"' do
59
+ @parser.fulltext_tokenize('a bc baz longer', :minimum => 0).should == ['a', 'bc', 'baz', 'longer']
60
+ end
61
+
62
+ it 'should interpret a minimum length of nil as meaning "default minumum length" (3)' do
63
+ @parser.minimum_fulltext_token_length = 10
64
+ @parser.fulltext_tokenize('a bc baz longer', :minimum => nil).should == ['baz', 'longer']
65
+ end
66
+
67
+ it 'should tokenize URLs' do
68
+ @parser.fulltext_tokenize('foo http://example.com/ bar').should == ['foo', 'http://example.com/', 'bar']
69
+ end
70
+
71
+ it 'should tokenize email addresses' do
72
+ @parser.fulltext_tokenize('foo user@example.com bar').should == ['foo', 'user@example.com', 'bar']
73
+ end
74
+
75
+ it 'should ignore punctuation' do
76
+ @parser.fulltext_tokenize("don't forget!").should == ['don', 'forget']
77
+ end
78
+
79
+ it 'should ignore non-ASCII' do
80
+ # note that a search for "información lingüística" will still work, but might return some false positives
81
+ @parser.fulltext_tokenize('buscando información lingüística').should == ['buscando', 'informaci', 'ling', 'stica']
82
+ end
83
+
84
+ it 'should ignore wikitext markup' do
85
+ @parser.fulltext_tokenize("this <nowiki>that</nowiki> [[foo bar]]").should == ['this', 'that', 'foo', 'bar']
86
+ end
87
+
88
+ it 'should tokenize alphanumerics' do
89
+ @parser.fulltext_tokenize("password99 2008").should == ['password99', '2008']
90
+ end
91
+ end
@@ -47,12 +47,12 @@ describe Wikitext::Parser, 'indentation' do
47
47
  end
48
48
 
49
49
  it 'should complain if the "indent" option is nil' do
50
- lambda { @parser.parse('* foo', :default => nil) }.should raise_error(TypeError)
50
+ lambda { @parser.parse('* foo', :indent => nil) }.should raise_error(TypeError)
51
51
  end
52
52
 
53
53
  it 'should complain if the "indent" options is not an integer' do
54
- lambda { @parser.parse('* foo', :default => 'bar') }.should raise_error(TypeError)
55
- lambda { @parser.parse('* foo', :default => /baz/) }.should raise_error(TypeError)
54
+ lambda { @parser.parse('* foo', :indent => 'bar') }.should raise_error(TypeError)
55
+ lambda { @parser.parse('* foo', :indent => /baz/) }.should raise_error(TypeError)
56
56
  end
57
57
 
58
58
  it 'should treat a negative "indent" as though it were zero' do
@@ -56,7 +56,7 @@ describe Wikitext::Parser, 'tokenizing' do
56
56
  it 'should tokenize strings containing a single symbol' do
57
57
  @tokens = @parser.tokenize('foo')
58
58
  @tokens.length.should == 2
59
- @tokens[0].token_type.should == :printable
59
+ @tokens[0].token_type.should == :alnum
60
60
  @tokens[0].string_value.should == 'foo'
61
61
  @tokens[1].token_type.should == :end_of_file
62
62
  @tokens[1].string_value.should == ''
@@ -65,7 +65,7 @@ describe Wikitext::Parser, 'tokenizing' do
65
65
  it 'should tokenize strings containing multiple symbols' do
66
66
  @tokens = @parser.tokenize('foo http://example.com/')
67
67
  @tokens.length.should == 4
68
- @tokens[0].token_type.should == :printable
68
+ @tokens[0].token_type.should == :alnum
69
69
  @tokens[0].string_value.should == 'foo'
70
70
  @tokens[1].token_type.should == :space
71
71
  @tokens[1].string_value.should == ' '
@@ -78,7 +78,7 @@ describe Wikitext::Parser, 'tokenizing' do
78
78
  it 'should tokenize runs of printable characters as as single symbol' do
79
79
  @tokens = @parser.tokenize('foo')
80
80
  @tokens.length.should == 2
81
- @tokens[0].token_type.should == :printable
81
+ @tokens[0].token_type.should == :alnum
82
82
  @tokens[0].string_value.should == 'foo'
83
83
  @tokens[0].line_start.should == 1
84
84
  @tokens[0].column_start.should == 1
@@ -88,6 +88,12 @@ describe Wikitext::Parser do
88
88
  Wikitext::Parser.new(:treat_slash_as_special => false).treat_slash_as_special.should == false
89
89
  end
90
90
  end
91
+
92
+ describe 'overriding defaults at parse time' do
93
+ it 'should ignore unknown options' do
94
+ @parser.parse('foo', :bar => 'baz').should == "<p>foo</p>\n"
95
+ end
96
+ end
91
97
  end
92
98
 
93
99
  describe Wikitext::Parser, 'parsing non-ASCII input' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikitext
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: "1.1"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wincent Colaiuta
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-04-17 00:00:00 +02:00
12
+ date: 2008-04-25 00:00:00 +02:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -28,6 +28,7 @@ files:
28
28
  - spec/encoding_spec.rb
29
29
  - spec/entity_spec.rb
30
30
  - spec/external_link_spec.rb
31
+ - spec/fulltext_spec.rb
31
32
  - spec/h1_spec.rb
32
33
  - spec/h2_spec.rb
33
34
  - spec/h3_spec.rb