picky 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,68 +5,65 @@ require 'spec_helper'
5
5
  describe Tokenizers::Base do
6
6
 
7
7
  context 'with special instance' do
8
- before(:each) do
9
- @tokenizer = Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }
10
- end
8
+ let (:tokenizer) { Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello } }
11
9
  it 'rejects tokens with length < 2' do
12
- @tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
10
+ tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
13
11
  end
14
12
  it 'rejects tokens that are called :hello' do
15
- @tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
13
+ tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
16
14
  end
17
15
  end
18
16
 
19
17
  context 'with normal instance' do
20
- before(:each) do
21
- @tokenizer = Tokenizers::Base.new
22
- end
18
+ let(:tokenizer) { Tokenizers::Base.new }
23
19
 
24
20
  describe 'reject_token_if' do
25
21
  it 'rejects empty tokens by default' do
26
- @tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
22
+ tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
27
23
  end
28
24
  it 'rejects tokens based on the given rejection criteria if set' do
29
- @tokenizer.reject_token_if &:nil?
25
+ tokenizer.reject_token_if &:nil?
30
26
 
31
- @tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
27
+ tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
32
28
  end
33
29
  end
34
30
 
35
31
  describe "substitute(s)_characters*" do
36
32
  it "doesn't substitute if there is no substituter" do
37
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
33
+ tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
38
34
  end
39
35
  it "uses the substituter to replace characters" do
40
- @tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
36
+ tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
41
37
 
42
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
38
+ tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
43
39
  end
44
40
  it "uses the european substituter as default" do
45
- @tokenizer.substitutes_characters_with
41
+ tokenizer.substitutes_characters_with
46
42
 
47
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
43
+ tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
48
44
  end
49
45
  end
50
46
 
51
47
  describe "removes_characters_after_splitting" do
52
48
  context "without removes_characters_after_splitting called" do
53
49
  it "has remove_after_normalizing_illegals" do
54
- lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
50
+ expect { tokenizer.remove_after_normalizing_illegals('any') }.to_not raise_error
55
51
  end
56
52
  it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
57
53
  unchanging = stub :unchanging
58
- @tokenizer.remove_after_normalizing_illegals unchanging
54
+
55
+ tokenizer.remove_after_normalizing_illegals unchanging
59
56
  end
60
57
  end
61
58
  context "with removes_characters_after_splitting called" do
62
59
  before(:each) do
63
- @tokenizer.removes_characters_after_splitting(/[afo]/)
60
+ tokenizer.removes_characters_after_splitting(/[afo]/)
64
61
  end
65
62
  it "has remove_after_normalizing_illegals" do
66
- lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
63
+ expect { tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.to_not raise_error
67
64
  end
68
65
  it "removes illegal characters" do
69
- @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
66
+ tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
70
67
  end
71
68
  end
72
69
  end
@@ -74,25 +71,26 @@ describe Tokenizers::Base do
74
71
  describe "normalizes_words" do
75
72
  context "without normalizes_words called" do
76
73
  it "has normalize_with_patterns" do
77
- lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
74
+ expect { tokenizer.normalize_with_patterns('any') }.to_not raise_error
78
75
  end
79
76
  it 'should define a method normalize_with_patterns does nothing' do
80
77
  unchanging = stub :unchanging
81
- @tokenizer.normalize_with_patterns(unchanging).should == unchanging
78
+
79
+ tokenizer.normalize_with_patterns(unchanging).should == unchanging
82
80
  end
83
81
  end
84
82
  context "with normalizes_words called" do
85
83
  before(:each) do
86
- @tokenizer.normalizes_words([
84
+ tokenizer.normalizes_words([
87
85
  [/st\./, 'sankt'],
88
86
  [/stras?s?e?/, 'str']
89
87
  ])
90
88
  end
91
89
  it "has normalize_with_patterns" do
92
- lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
90
+ expect { tokenizer.normalize_with_patterns('a b/c.d') }.to_not raise_error
93
91
  end
94
92
  it "normalizes, but just the first one" do
95
- @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
93
+ tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
96
94
  end
97
95
  end
98
96
  end
@@ -100,24 +98,24 @@ describe Tokenizers::Base do
100
98
  describe "splits_text_on" do
101
99
  context "without splits_text_on called" do
102
100
  it "has split" do
103
- lambda { @tokenizer.split('any') }.should_not raise_error
101
+ lambda { tokenizer.split('any') }.should_not raise_error
104
102
  end
105
103
  it 'should define a method split that splits by default on \s' do
106
- @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
104
+ tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
107
105
  end
108
106
  it 'splits text on /\s/ by default' do
109
- @tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
107
+ tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
110
108
  end
111
109
  end
112
110
  context "with removes_characters called" do
113
111
  before(:each) do
114
- @tokenizer.splits_text_on(/[\s\.\/]/)
112
+ tokenizer.splits_text_on(/[\s\.\/]/)
115
113
  end
116
114
  it "has split" do
117
- lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
115
+ expect { tokenizer.split('a b/c.d') }.to_not raise_error
118
116
  end
119
117
  it "removes illegal characters" do
120
- @tokenizer.split('a b/c.d').should == ['a','b','c','d']
118
+ tokenizer.split('a b/c.d').should == ['a','b','c','d']
121
119
  end
122
120
  end
123
121
  end
@@ -125,22 +123,23 @@ describe Tokenizers::Base do
125
123
  describe "removes_characters" do
126
124
  context "without removes_characters called" do
127
125
  it "has remove_illegals" do
128
- lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
126
+ expect { tokenizer.remove_illegals('any') }.to_not raise_error
129
127
  end
130
128
  it 'should define a method remove_illegals that does nothing' do
131
129
  unchanging = stub :unchanging
132
- @tokenizer.remove_illegals unchanging
130
+
131
+ tokenizer.remove_illegals unchanging
133
132
  end
134
133
  end
135
134
  context "with removes_characters called" do
136
135
  before(:each) do
137
- @tokenizer.removes_characters(/[afo]/)
136
+ tokenizer.removes_characters(/[afo]/)
138
137
  end
139
138
  it "has remove_illegals" do
140
- lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
139
+ expect { tokenizer.remove_illegals('abcdefghijklmnop') }.to_not raise_error
141
140
  end
142
141
  it "removes illegal characters" do
143
- @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
142
+ tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
144
143
  end
145
144
  end
146
145
  end
@@ -148,45 +147,44 @@ describe Tokenizers::Base do
148
147
  describe 'stopwords' do
149
148
  context 'without stopwords given' do
150
149
  it 'should define a method remove_stopwords' do
151
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
150
+ lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
152
151
  end
153
152
  it 'should define a method remove_stopwords that does nothing' do
154
- @tokenizer.remove_stopwords('from this text').should == 'from this text'
153
+ tokenizer.remove_stopwords('from this text').should == 'from this text'
155
154
  end
156
155
  it 'should define a method remove_non_single_stopwords' do
157
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
158
-
156
+ expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
159
157
  end
160
158
  end
161
159
  context 'with stopwords given' do
162
160
  before(:each) do
163
- @tokenizer.stopwords(/r|e/)
161
+ tokenizer.stopwords(/r|e/)
164
162
  end
165
163
  it 'should define a method remove_stopwords' do
166
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
164
+ lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
167
165
  end
168
166
  it 'should define a method stopwords that removes stopwords' do
169
- @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
167
+ tokenizer.remove_stopwords('from this text').should == 'fom this txt'
170
168
  end
171
169
  it 'should define a method remove_non_single_stopwords' do
172
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
170
+ expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
173
171
  end
174
172
  it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
175
- @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
173
+ tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
176
174
  end
177
175
  it 'should define a method remove_non_single_stopwords that does not single stopwords' do
178
- @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
176
+ tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
179
177
  end
180
178
  end
181
179
  context 'error case' do
182
180
  before(:each) do
183
- @tokenizer.stopwords(/any/)
181
+ tokenizer.stopwords(/any/)
184
182
  end
185
183
  it 'should not remove non-single stopwords with a star' do
186
- @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
184
+ tokenizer.remove_non_single_stopwords('a*').should == 'a*'
187
185
  end
188
186
  it 'should not remove non-single stopwords with a tilde' do
189
- @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
187
+ tokenizer.remove_non_single_stopwords('a~').should == 'a~'
190
188
  end
191
189
  end
192
190
  end
@@ -4,9 +4,7 @@ require 'spec_helper'
4
4
 
5
5
  describe Tokenizers::Index do
6
6
 
7
- before(:each) do
8
- @tokenizer = Tokenizers::Index.new
9
- end
7
+ let(:tokenizer) { Tokenizers::Index.new }
10
8
 
11
9
  describe "default*" do
12
10
  before(:all) do
@@ -33,13 +31,13 @@ describe Tokenizers::Index do
33
31
 
34
32
  describe "remove_removes_characters" do
35
33
  it "should not remove ' from a query by default" do
36
- @tokenizer.remove_illegals("Lugi's").should == "Lugi's"
34
+ tokenizer.remove_illegals("Lugi's").should == "Lugi's"
37
35
  end
38
36
  end
39
37
 
40
38
  describe "reject!" do
41
39
  it "should reject tokens if blank" do
42
- @tokenizer.reject(['', 'not blank', '']).should == ['not blank']
40
+ tokenizer.reject(['', 'not blank', '']).should == ['not blank']
43
41
  end
44
42
  end
45
43
 
@@ -47,7 +45,7 @@ describe Tokenizers::Index do
47
45
  describe "normalizing" do
48
46
  def self.it_should_normalize_token(text, expected)
49
47
  it "should handle the #{text} case" do
50
- @tokenizer.tokenize(text).to_a.should == [expected].compact
48
+ tokenizer.tokenize(text).to_a.should == [expected].compact
51
49
  end
52
50
  end
53
51
  # defaults
@@ -57,7 +55,7 @@ describe Tokenizers::Index do
57
55
  describe "tokenizing" do
58
56
  def self.it_should_tokenize_token(text, expected)
59
57
  it "should handle the #{text} case" do
60
- @tokenizer.tokenize(text).to_a.should == expected
58
+ tokenizer.tokenize(text).to_a.should == expected
61
59
  end
62
60
  end
63
61
  # defaults
@@ -3,9 +3,7 @@ require 'spec_helper'
3
3
 
4
4
  describe Tokenizers::Query do
5
5
 
6
- before(:each) do
7
- @tokenizer = Tokenizers::Query.new
8
- end
6
+ let(:tokenizer) { Tokenizers::Query.new }
9
7
 
10
8
  describe "default*" do
11
9
  before(:all) do
@@ -32,7 +30,7 @@ describe Tokenizers::Query do
32
30
 
33
31
  describe "maximum_tokens" do
34
32
  it "should be set to 5 by default" do
35
- @tokenizer.maximum_tokens.should == 5
33
+ tokenizer.maximum_tokens.should == 5
36
34
  end
37
35
  it "should be settable" do
38
36
  Tokenizers::Query.new(maximum_tokens: 3).maximum_tokens.should == 3
@@ -43,15 +41,15 @@ describe Tokenizers::Query do
43
41
  it 'should call methods in order' do
44
42
  text = stub :text
45
43
 
46
- @tokenizer.should_receive(:remove_illegals).once.ordered.with text
47
- @tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
44
+ tokenizer.should_receive(:remove_illegals).once.ordered.with text
45
+ tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
48
46
 
49
- @tokenizer.preprocess text
47
+ tokenizer.preprocess text
50
48
  end
51
49
  it 'should return the text unchanged by default' do
52
50
  text = "some text"
53
51
 
54
- @tokenizer.preprocess(text).should == text
52
+ tokenizer.preprocess(text).should == text
55
53
  end
56
54
  end
57
55
 
@@ -60,9 +58,9 @@ describe Tokenizers::Query do
60
58
  @tokens = mock :tokens, :null_object => true
61
59
  end
62
60
  it 'should tokenize the tokens' do
63
- @tokens.should_receive(:tokenize_with).once.with @tokenizer
61
+ @tokens.should_receive(:tokenize_with).once.with tokenizer
64
62
 
65
- @tokenizer.process @tokens
63
+ tokenizer.process @tokens
66
64
  end
67
65
  it 'should call methods on the tokens in order' do
68
66
  @tokens.should_receive(:tokenize_with).once.ordered
@@ -70,17 +68,17 @@ describe Tokenizers::Query do
70
68
  @tokens.should_receive(:cap).once.ordered
71
69
  @tokens.should_receive(:partialize_last).once.ordered
72
70
 
73
- @tokenizer.process @tokens
71
+ tokenizer.process @tokens
74
72
  end
75
73
  it 'should return the tokens' do
76
- @tokenizer.process(@tokens).should == @tokens
74
+ tokenizer.process(@tokens).should == @tokens
77
75
  end
78
76
  end
79
77
 
80
78
  describe 'pretokenize' do
81
79
  def self.it_should_pretokenize text, expected
82
80
  it "should pretokenize #{text} as #{expected}" do
83
- @tokenizer.pretokenize(text).should == expected
81
+ tokenizer.pretokenize(text).should == expected
84
82
  end
85
83
  end
86
84
  it_should_pretokenize 'test miau test', ['test', 'miau', 'test']
@@ -89,7 +87,7 @@ describe Tokenizers::Query do
89
87
  describe "tokenizing" do
90
88
  def self.it_should_tokenize_token(text, expected)
91
89
  it "should handle the #{text} case" do
92
- @tokenizer.tokenize(text).map(&:text).should == expected
90
+ tokenizer.tokenize(text).map(&:text).should == expected
93
91
  end
94
92
  end
95
93
  it_should_tokenize_token 'simple tokenizing on \s', [:simple, :tokenizing, :on, :'\s']
@@ -98,7 +96,7 @@ describe Tokenizers::Query do
98
96
  describe 'normalize_with_patterns' do
99
97
  def self.it_should_pattern_normalize original, expected
100
98
  it "should normalize #{original} with pattern into #{expected}" do
101
- @tokenizer.normalize_with_patterns(original).should == expected
99
+ tokenizer.normalize_with_patterns(original).should == expected
102
100
  end
103
101
  end
104
102
  it_should_pattern_normalize 'no pattern normalization', 'no pattern normalization'
@@ -106,22 +104,22 @@ describe Tokenizers::Query do
106
104
 
107
105
  describe 'reject' do
108
106
  it 'should reject blank tokens' do
109
- @tokenizer.reject(["some token answering to blank?", nil, nil]).should == ["some token answering to blank?"]
107
+ tokenizer.reject(["some token answering to blank?", nil, nil]).should == ["some token answering to blank?"]
110
108
  end
111
109
  end
112
110
 
113
111
  describe "last token" do
114
112
  it "should be partial" do
115
- @tokenizer.tokenize("First Second Third Last").last.instance_variable_get(:@partial).should be_true
113
+ tokenizer.tokenize("First Second Third Last").last.instance_variable_get(:@partial).should be_true
116
114
  end
117
115
  end
118
116
 
119
117
  describe ".tokenize" do
120
118
  it "should return an Array of tokens" do
121
- @tokenizer.tokenize('test test').to_a.should be_instance_of(Array)
119
+ tokenizer.tokenize('test test').to_a.should be_instance_of(Array)
122
120
  end
123
121
  it "should return an empty tokenized query if the query string is blank or empty" do
124
- @tokenizer.tokenize('').map(&:to_s).should == []
122
+ tokenizer.tokenize('').map(&:to_s).should == []
125
123
  end
126
124
  end
127
125
  describe "token_for" do
@@ -129,7 +127,7 @@ describe Tokenizers::Query do
129
127
  text = stub(:text)
130
128
  Query::Token.should_receive(:processed).with text
131
129
 
132
- @tokenizer.token_for text
130
+ tokenizer.token_for text
133
131
  end
134
132
  end
135
133
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.4.0
5
+ version: 1.4.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Florian Hanke