picky 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,68 +5,65 @@ require 'spec_helper'
5
5
  describe Tokenizers::Base do
6
6
 
7
7
  context 'with special instance' do
8
- before(:each) do
9
- @tokenizer = Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }
10
- end
8
+ let (:tokenizer) { Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello } }
11
9
  it 'rejects tokens with length < 2' do
12
- @tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
10
+ tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
13
11
  end
14
12
  it 'rejects tokens that are called :hello' do
15
- @tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
13
+ tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
16
14
  end
17
15
  end
18
16
 
19
17
  context 'with normal instance' do
20
- before(:each) do
21
- @tokenizer = Tokenizers::Base.new
22
- end
18
+ let(:tokenizer) { Tokenizers::Base.new }
23
19
 
24
20
  describe 'reject_token_if' do
25
21
  it 'rejects empty tokens by default' do
26
- @tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
22
+ tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
27
23
  end
28
24
  it 'rejects tokens based on the given rejection criteria if set' do
29
- @tokenizer.reject_token_if &:nil?
25
+ tokenizer.reject_token_if &:nil?
30
26
 
31
- @tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
27
+ tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
32
28
  end
33
29
  end
34
30
 
35
31
  describe "substitute(s)_characters*" do
36
32
  it "doesn't substitute if there is no substituter" do
37
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
33
+ tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
38
34
  end
39
35
  it "uses the substituter to replace characters" do
40
- @tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
36
+ tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
41
37
 
42
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
38
+ tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
43
39
  end
44
40
  it "uses the european substituter as default" do
45
- @tokenizer.substitutes_characters_with
41
+ tokenizer.substitutes_characters_with
46
42
 
47
- @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
43
+ tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
48
44
  end
49
45
  end
50
46
 
51
47
  describe "removes_characters_after_splitting" do
52
48
  context "without removes_characters_after_splitting called" do
53
49
  it "has remove_after_normalizing_illegals" do
54
- lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
50
+ expect { tokenizer.remove_after_normalizing_illegals('any') }.to_not raise_error
55
51
  end
56
52
  it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
57
53
  unchanging = stub :unchanging
58
- @tokenizer.remove_after_normalizing_illegals unchanging
54
+
55
+ tokenizer.remove_after_normalizing_illegals unchanging
59
56
  end
60
57
  end
61
58
  context "with removes_characters_after_splitting called" do
62
59
  before(:each) do
63
- @tokenizer.removes_characters_after_splitting(/[afo]/)
60
+ tokenizer.removes_characters_after_splitting(/[afo]/)
64
61
  end
65
62
  it "has remove_after_normalizing_illegals" do
66
- lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
63
+ expect { tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.to_not raise_error
67
64
  end
68
65
  it "removes illegal characters" do
69
- @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
66
+ tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
70
67
  end
71
68
  end
72
69
  end
@@ -74,25 +71,26 @@ describe Tokenizers::Base do
74
71
  describe "normalizes_words" do
75
72
  context "without normalizes_words called" do
76
73
  it "has normalize_with_patterns" do
77
- lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
74
+ expect { tokenizer.normalize_with_patterns('any') }.to_not raise_error
78
75
  end
79
76
  it 'should define a method normalize_with_patterns does nothing' do
80
77
  unchanging = stub :unchanging
81
- @tokenizer.normalize_with_patterns(unchanging).should == unchanging
78
+
79
+ tokenizer.normalize_with_patterns(unchanging).should == unchanging
82
80
  end
83
81
  end
84
82
  context "with normalizes_words called" do
85
83
  before(:each) do
86
- @tokenizer.normalizes_words([
84
+ tokenizer.normalizes_words([
87
85
  [/st\./, 'sankt'],
88
86
  [/stras?s?e?/, 'str']
89
87
  ])
90
88
  end
91
89
  it "has normalize_with_patterns" do
92
- lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
90
+ expect { tokenizer.normalize_with_patterns('a b/c.d') }.to_not raise_error
93
91
  end
94
92
  it "normalizes, but just the first one" do
95
- @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
93
+ tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
96
94
  end
97
95
  end
98
96
  end
@@ -100,24 +98,24 @@ describe Tokenizers::Base do
100
98
  describe "splits_text_on" do
101
99
  context "without splits_text_on called" do
102
100
  it "has split" do
103
- lambda { @tokenizer.split('any') }.should_not raise_error
101
+ lambda { tokenizer.split('any') }.should_not raise_error
104
102
  end
105
103
  it 'should define a method split that splits by default on \s' do
106
- @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
104
+ tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
107
105
  end
108
106
  it 'splits text on /\s/ by default' do
109
- @tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
107
+ tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
110
108
  end
111
109
  end
112
110
  context "with removes_characters called" do
113
111
  before(:each) do
114
- @tokenizer.splits_text_on(/[\s\.\/]/)
112
+ tokenizer.splits_text_on(/[\s\.\/]/)
115
113
  end
116
114
  it "has split" do
117
- lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
115
+ expect { tokenizer.split('a b/c.d') }.to_not raise_error
118
116
  end
119
117
  it "removes illegal characters" do
120
- @tokenizer.split('a b/c.d').should == ['a','b','c','d']
118
+ tokenizer.split('a b/c.d').should == ['a','b','c','d']
121
119
  end
122
120
  end
123
121
  end
@@ -125,22 +123,23 @@ describe Tokenizers::Base do
125
123
  describe "removes_characters" do
126
124
  context "without removes_characters called" do
127
125
  it "has remove_illegals" do
128
- lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
126
+ expect { tokenizer.remove_illegals('any') }.to_not raise_error
129
127
  end
130
128
  it 'should define a method remove_illegals that does nothing' do
131
129
  unchanging = stub :unchanging
132
- @tokenizer.remove_illegals unchanging
130
+
131
+ tokenizer.remove_illegals unchanging
133
132
  end
134
133
  end
135
134
  context "with removes_characters called" do
136
135
  before(:each) do
137
- @tokenizer.removes_characters(/[afo]/)
136
+ tokenizer.removes_characters(/[afo]/)
138
137
  end
139
138
  it "has remove_illegals" do
140
- lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
139
+ expect { tokenizer.remove_illegals('abcdefghijklmnop') }.to_not raise_error
141
140
  end
142
141
  it "removes illegal characters" do
143
- @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
142
+ tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
144
143
  end
145
144
  end
146
145
  end
@@ -148,45 +147,44 @@ describe Tokenizers::Base do
148
147
  describe 'stopwords' do
149
148
  context 'without stopwords given' do
150
149
  it 'should define a method remove_stopwords' do
151
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
150
+ lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
152
151
  end
153
152
  it 'should define a method remove_stopwords that does nothing' do
154
- @tokenizer.remove_stopwords('from this text').should == 'from this text'
153
+ tokenizer.remove_stopwords('from this text').should == 'from this text'
155
154
  end
156
155
  it 'should define a method remove_non_single_stopwords' do
157
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
158
-
156
+ expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
159
157
  end
160
158
  end
161
159
  context 'with stopwords given' do
162
160
  before(:each) do
163
- @tokenizer.stopwords(/r|e/)
161
+ tokenizer.stopwords(/r|e/)
164
162
  end
165
163
  it 'should define a method remove_stopwords' do
166
- lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
164
+ lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
167
165
  end
168
166
  it 'should define a method stopwords that removes stopwords' do
169
- @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
167
+ tokenizer.remove_stopwords('from this text').should == 'fom this txt'
170
168
  end
171
169
  it 'should define a method remove_non_single_stopwords' do
172
- lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
170
+ expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
173
171
  end
174
172
  it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
175
- @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
173
+ tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
176
174
  end
177
175
  it 'should define a method remove_non_single_stopwords that does not single stopwords' do
178
- @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
176
+ tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
179
177
  end
180
178
  end
181
179
  context 'error case' do
182
180
  before(:each) do
183
- @tokenizer.stopwords(/any/)
181
+ tokenizer.stopwords(/any/)
184
182
  end
185
183
  it 'should not remove non-single stopwords with a star' do
186
- @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
184
+ tokenizer.remove_non_single_stopwords('a*').should == 'a*'
187
185
  end
188
186
  it 'should not remove non-single stopwords with a tilde' do
189
- @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
187
+ tokenizer.remove_non_single_stopwords('a~').should == 'a~'
190
188
  end
191
189
  end
192
190
  end
@@ -4,9 +4,7 @@ require 'spec_helper'
4
4
 
5
5
  describe Tokenizers::Index do
6
6
 
7
- before(:each) do
8
- @tokenizer = Tokenizers::Index.new
9
- end
7
+ let(:tokenizer) { Tokenizers::Index.new }
10
8
 
11
9
  describe "default*" do
12
10
  before(:all) do
@@ -33,13 +31,13 @@ describe Tokenizers::Index do
33
31
 
34
32
  describe "remove_removes_characters" do
35
33
  it "should not remove ' from a query by default" do
36
- @tokenizer.remove_illegals("Lugi's").should == "Lugi's"
34
+ tokenizer.remove_illegals("Lugi's").should == "Lugi's"
37
35
  end
38
36
  end
39
37
 
40
38
  describe "reject!" do
41
39
  it "should reject tokens if blank" do
42
- @tokenizer.reject(['', 'not blank', '']).should == ['not blank']
40
+ tokenizer.reject(['', 'not blank', '']).should == ['not blank']
43
41
  end
44
42
  end
45
43
 
@@ -47,7 +45,7 @@ describe Tokenizers::Index do
47
45
  describe "normalizing" do
48
46
  def self.it_should_normalize_token(text, expected)
49
47
  it "should handle the #{text} case" do
50
- @tokenizer.tokenize(text).to_a.should == [expected].compact
48
+ tokenizer.tokenize(text).to_a.should == [expected].compact
51
49
  end
52
50
  end
53
51
  # defaults
@@ -57,7 +55,7 @@ describe Tokenizers::Index do
57
55
  describe "tokenizing" do
58
56
  def self.it_should_tokenize_token(text, expected)
59
57
  it "should handle the #{text} case" do
60
- @tokenizer.tokenize(text).to_a.should == expected
58
+ tokenizer.tokenize(text).to_a.should == expected
61
59
  end
62
60
  end
63
61
  # defaults
@@ -3,9 +3,7 @@ require 'spec_helper'
3
3
 
4
4
  describe Tokenizers::Query do
5
5
 
6
- before(:each) do
7
- @tokenizer = Tokenizers::Query.new
8
- end
6
+ let(:tokenizer) { Tokenizers::Query.new }
9
7
 
10
8
  describe "default*" do
11
9
  before(:all) do
@@ -32,7 +30,7 @@ describe Tokenizers::Query do
32
30
 
33
31
  describe "maximum_tokens" do
34
32
  it "should be set to 5 by default" do
35
- @tokenizer.maximum_tokens.should == 5
33
+ tokenizer.maximum_tokens.should == 5
36
34
  end
37
35
  it "should be settable" do
38
36
  Tokenizers::Query.new(maximum_tokens: 3).maximum_tokens.should == 3
@@ -43,15 +41,15 @@ describe Tokenizers::Query do
43
41
  it 'should call methods in order' do
44
42
  text = stub :text
45
43
 
46
- @tokenizer.should_receive(:remove_illegals).once.ordered.with text
47
- @tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
44
+ tokenizer.should_receive(:remove_illegals).once.ordered.with text
45
+ tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
48
46
 
49
- @tokenizer.preprocess text
47
+ tokenizer.preprocess text
50
48
  end
51
49
  it 'should return the text unchanged by default' do
52
50
  text = "some text"
53
51
 
54
- @tokenizer.preprocess(text).should == text
52
+ tokenizer.preprocess(text).should == text
55
53
  end
56
54
  end
57
55
 
@@ -60,9 +58,9 @@ describe Tokenizers::Query do
60
58
  @tokens = mock :tokens, :null_object => true
61
59
  end
62
60
  it 'should tokenize the tokens' do
63
- @tokens.should_receive(:tokenize_with).once.with @tokenizer
61
+ @tokens.should_receive(:tokenize_with).once.with tokenizer
64
62
 
65
- @tokenizer.process @tokens
63
+ tokenizer.process @tokens
66
64
  end
67
65
  it 'should call methods on the tokens in order' do
68
66
  @tokens.should_receive(:tokenize_with).once.ordered
@@ -70,17 +68,17 @@ describe Tokenizers::Query do
70
68
  @tokens.should_receive(:cap).once.ordered
71
69
  @tokens.should_receive(:partialize_last).once.ordered
72
70
 
73
- @tokenizer.process @tokens
71
+ tokenizer.process @tokens
74
72
  end
75
73
  it 'should return the tokens' do
76
- @tokenizer.process(@tokens).should == @tokens
74
+ tokenizer.process(@tokens).should == @tokens
77
75
  end
78
76
  end
79
77
 
80
78
  describe 'pretokenize' do
81
79
  def self.it_should_pretokenize text, expected
82
80
  it "should pretokenize #{text} as #{expected}" do
83
- @tokenizer.pretokenize(text).should == expected
81
+ tokenizer.pretokenize(text).should == expected
84
82
  end
85
83
  end
86
84
  it_should_pretokenize 'test miau test', ['test', 'miau', 'test']
@@ -89,7 +87,7 @@ describe Tokenizers::Query do
89
87
  describe "tokenizing" do
90
88
  def self.it_should_tokenize_token(text, expected)
91
89
  it "should handle the #{text} case" do
92
- @tokenizer.tokenize(text).map(&:text).should == expected
90
+ tokenizer.tokenize(text).map(&:text).should == expected
93
91
  end
94
92
  end
95
93
  it_should_tokenize_token 'simple tokenizing on \s', [:simple, :tokenizing, :on, :'\s']
@@ -98,7 +96,7 @@ describe Tokenizers::Query do
98
96
  describe 'normalize_with_patterns' do
99
97
  def self.it_should_pattern_normalize original, expected
100
98
  it "should normalize #{original} with pattern into #{expected}" do
101
- @tokenizer.normalize_with_patterns(original).should == expected
99
+ tokenizer.normalize_with_patterns(original).should == expected
102
100
  end
103
101
  end
104
102
  it_should_pattern_normalize 'no pattern normalization', 'no pattern normalization'
@@ -106,22 +104,22 @@ describe Tokenizers::Query do
106
104
 
107
105
  describe 'reject' do
108
106
  it 'should reject blank tokens' do
109
- @tokenizer.reject(["some token answering to blank?", nil, nil]).should == ["some token answering to blank?"]
107
+ tokenizer.reject(["some token answering to blank?", nil, nil]).should == ["some token answering to blank?"]
110
108
  end
111
109
  end
112
110
 
113
111
  describe "last token" do
114
112
  it "should be partial" do
115
- @tokenizer.tokenize("First Second Third Last").last.instance_variable_get(:@partial).should be_true
113
+ tokenizer.tokenize("First Second Third Last").last.instance_variable_get(:@partial).should be_true
116
114
  end
117
115
  end
118
116
 
119
117
  describe ".tokenize" do
120
118
  it "should return an Array of tokens" do
121
- @tokenizer.tokenize('test test').to_a.should be_instance_of(Array)
119
+ tokenizer.tokenize('test test').to_a.should be_instance_of(Array)
122
120
  end
123
121
  it "should return an empty tokenized query if the query string is blank or empty" do
124
- @tokenizer.tokenize('').map(&:to_s).should == []
122
+ tokenizer.tokenize('').map(&:to_s).should == []
125
123
  end
126
124
  end
127
125
  describe "token_for" do
@@ -129,7 +127,7 @@ describe Tokenizers::Query do
129
127
  text = stub(:text)
130
128
  Query::Token.should_receive(:processed).with text
131
129
 
132
- @tokenizer.token_for text
130
+ tokenizer.token_for text
133
131
  end
134
132
  end
135
133
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.4.0
5
+ version: 1.4.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Florian Hanke