picky 3.5.0 → 3.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/bundle.rb +37 -4
- data/lib/picky/bundle_indexed.rb +12 -8
- data/lib/picky/bundle_indexing.rb +6 -26
- data/lib/picky/bundle_realtime.rb +26 -16
- data/lib/picky/category_indexing.rb +1 -3
- data/lib/picky/category_realtime.rb +1 -1
- data/lib/picky/character_substituters/west_european.rb +4 -4
- data/lib/picky/generators/partial/infix.rb +0 -47
- data/lib/picky/generators/partial/none.rb +0 -6
- data/lib/picky/generators/partial/substring.rb +0 -47
- data/lib/picky/generators/similarity/double_metaphone.rb +3 -3
- data/lib/picky/generators/similarity/metaphone.rb +3 -3
- data/lib/picky/generators/similarity/phonetic.rb +12 -24
- data/lib/picky/generators/similarity/soundex.rb +3 -3
- data/lib/picky/generators/weights/constant.rb +46 -0
- data/lib/picky/generators/weights/dynamic.rb +37 -0
- data/lib/picky/generators/weights/logarithmic.rb +0 -10
- data/lib/picky/generators/weights/runtime.rb +41 -0
- data/lib/picky/loader.rb +3 -3
- data/lib/picky/query/allocations.rb +2 -1
- data/lib/picky/query/tokens.rb +0 -10
- data/spec/lib/category_indexed_spec.rb +1 -1
- data/spec/lib/character_substituters/west_european_spec.rb +11 -13
- data/spec/lib/generators/partial/infix_spec.rb +161 -161
- data/spec/lib/generators/partial/none_spec.rb +3 -3
- data/spec/lib/generators/partial/postfix_spec.rb +109 -109
- data/spec/lib/generators/partial/substring_spec.rb +190 -190
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +38 -38
- data/spec/lib/generators/similarity/metaphone_spec.rb +38 -38
- data/spec/lib/generators/similarity/soundex_spec.rb +38 -38
- data/spec/lib/generators/weights/constant_spec.rb +37 -0
- data/spec/lib/generators/weights/dynamic_spec.rb +27 -0
- data/spec/lib/generators/weights/logarithmic_spec.rb +10 -15
- data/spec/lib/indexed/bundle_spec.rb +3 -2
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +8 -8
- data/spec/lib/indexing/bundle_spec.rb +5 -3
- data/spec/specific/dynamic_weights_spec.rb +44 -0
- metadata +36 -28
- data/lib/picky/generators/base.rb +0 -19
data/lib/picky/loader.rb
CHANGED
@@ -91,6 +91,9 @@ module Picky
|
|
91
91
|
# Weight index generation strategies.
|
92
92
|
#
|
93
93
|
load_relative 'generators/weights/strategy'
|
94
|
+
load_relative 'generators/weights/runtime'
|
95
|
+
load_relative 'generators/weights/dynamic'
|
96
|
+
load_relative 'generators/weights/constant'
|
94
97
|
load_relative 'generators/weights/logarithmic'
|
95
98
|
load_relative 'generators/weights/default'
|
96
99
|
|
@@ -143,9 +146,6 @@ module Picky
|
|
143
146
|
load_relative 'wrappers/bundle/location'
|
144
147
|
load_relative 'wrappers/bundle/exact_partial'
|
145
148
|
|
146
|
-
# load_relative 'wrappers/sources/base'
|
147
|
-
# load_relative 'wrappers/sources/location'
|
148
|
-
|
149
149
|
# Tokens.
|
150
150
|
#
|
151
151
|
load_relative 'query/token'
|
data/lib/picky/query/tokens.rb
CHANGED
@@ -27,16 +27,6 @@ module Picky
|
|
27
27
|
new words.zip(originals).collect! { |word, original| Token.processed word, original }, ignore_unassigned
|
28
28
|
end
|
29
29
|
|
30
|
-
# Tokenizes each token.
|
31
|
-
#
|
32
|
-
# Note: Passed tokenizer needs to offer #normalize(text).
|
33
|
-
#
|
34
|
-
# TODO Still needed?
|
35
|
-
#
|
36
|
-
def tokenize_with tokenizer
|
37
|
-
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
38
|
-
end
|
39
|
-
|
40
30
|
# Generates an array in the form of
|
41
31
|
# [
|
42
32
|
# [combination], # of token 1
|
@@ -7,7 +7,7 @@ describe Picky::Category do
|
|
7
7
|
source []
|
8
8
|
end
|
9
9
|
@partial_strategy = stub :partial, :use_exact_for_partial? => false
|
10
|
-
@weights_strategy = stub :weights
|
10
|
+
@weights_strategy = stub :weights, :saved? => true
|
11
11
|
@similarity_strategy = stub :similarity
|
12
12
|
|
13
13
|
@exact = stub :exact, :dump => nil
|
@@ -3,41 +3,39 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe Picky::CharacterSubstituters::WestEuropean do
|
6
|
-
|
7
|
-
|
8
|
-
@substituter = described_class.new.tap { |s| s.substitute '' }
|
9
|
-
end
|
6
|
+
|
7
|
+
let(:substituter) { described_class.new.tap { |s| s.substitute '' } }
|
10
8
|
|
11
9
|
# A bit of metaprogramming to help with the myriads of its.
|
12
10
|
#
|
13
11
|
def self.it_should_substitute special_character, normal_character
|
14
12
|
it "should substitute #{special_character} with #{normal_character}" do
|
15
|
-
|
13
|
+
substituter.substitute(special_character).should == normal_character
|
16
14
|
end
|
17
15
|
end
|
18
|
-
def self.it_should_not_substitute special_character
|
16
|
+
def self.it_should_not_substitute special_character
|
19
17
|
it "should not substitute #{special_character}" do
|
20
|
-
|
18
|
+
substituter.substitute(special_character).should == special_character
|
21
19
|
end
|
22
20
|
end
|
23
|
-
|
21
|
+
|
24
22
|
# Speed spec at the top since the order of the describes made the
|
25
23
|
# speed spec trip. And not on mushrooms either.
|
26
24
|
#
|
27
25
|
describe "speed" do
|
28
26
|
it "is fast" do
|
29
|
-
result = performance_of {
|
27
|
+
result = performance_of { substituter.substitute('ä') }
|
30
28
|
result.should < 0.00009
|
31
29
|
end
|
32
30
|
it "is fast" do
|
33
|
-
result = performance_of {
|
31
|
+
result = performance_of { substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
|
34
32
|
result.should < 0.00015
|
35
33
|
end
|
36
34
|
end
|
37
|
-
|
35
|
+
|
38
36
|
describe 'to_s' do
|
39
37
|
it 'outputs correctly' do
|
40
|
-
|
38
|
+
substituter.to_s.should == 'Picky::CharacterSubstituters::WestEuropean'
|
41
39
|
end
|
42
40
|
end
|
43
41
|
|
@@ -108,7 +106,7 @@ describe Picky::CharacterSubstituters::WestEuropean do
|
|
108
106
|
it_should_substitute 'å', 'a'
|
109
107
|
it_should_substitute 'Å', 'A'
|
110
108
|
end
|
111
|
-
|
109
|
+
|
112
110
|
describe "diacritic" do
|
113
111
|
it_should_substitute 'ñ', 'n'
|
114
112
|
end
|
@@ -15,100 +15,100 @@ describe Picky::Generators::Partial::Infix do
|
|
15
15
|
generator.min.should == 1
|
16
16
|
end
|
17
17
|
end
|
18
|
-
describe 'generate_from' do
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
end
|
18
|
+
# describe 'generate_from' do
|
19
|
+
# it 'should generate the right index' do
|
20
|
+
# generator.generate_from(florian: [1], flavia: [2]).should == {
|
21
|
+
# florian: [1],
|
22
|
+
# floria: [1],
|
23
|
+
# lorian: [1],
|
24
|
+
# flori: [1],
|
25
|
+
# loria: [1],
|
26
|
+
# orian: [1],
|
27
|
+
# flor: [1],
|
28
|
+
# lori: [1],
|
29
|
+
# oria: [1],
|
30
|
+
# rian: [1],
|
31
|
+
# flo: [1],
|
32
|
+
# lor: [1],
|
33
|
+
# ori: [1],
|
34
|
+
# ria: [1],
|
35
|
+
# ian: [1],
|
36
|
+
# fl: [1, 2],
|
37
|
+
# lo: [1],
|
38
|
+
# or: [1],
|
39
|
+
# ri: [1],
|
40
|
+
# ia: [1, 2],
|
41
|
+
# an: [1],
|
42
|
+
# f: [1, 2],
|
43
|
+
# l: [1, 2],
|
44
|
+
# o: [1],
|
45
|
+
# r: [1],
|
46
|
+
# i: [1, 2],
|
47
|
+
# a: [1, 2],
|
48
|
+
# n: [1],
|
49
|
+
# flavia: [2],
|
50
|
+
# flavi: [2],
|
51
|
+
# lavia: [2],
|
52
|
+
# flav: [2],
|
53
|
+
# lavi: [2],
|
54
|
+
# avia: [2],
|
55
|
+
# fla: [2],
|
56
|
+
# lav: [2],
|
57
|
+
# avi: [2],
|
58
|
+
# via: [2],
|
59
|
+
# la: [2],
|
60
|
+
# av: [2],
|
61
|
+
# vi: [2],
|
62
|
+
# v: [2]
|
63
|
+
# }
|
64
|
+
# end
|
65
|
+
# it "should be fast" do
|
66
|
+
# performance_of { generator.generate_from(florian: [1], flavia: [2]) }.should < 0.0001
|
67
|
+
# end
|
68
|
+
# it "should handle duplicate ids" do
|
69
|
+
# generator.generate_from(flo: [1], fla: [1]).should == {
|
70
|
+
# flo: [1],
|
71
|
+
# fl: [1],
|
72
|
+
# lo: [1],
|
73
|
+
# f: [1],
|
74
|
+
# l: [1],
|
75
|
+
# o: [1],
|
76
|
+
# a: [1],
|
77
|
+
# fla: [1],
|
78
|
+
# la: [1],
|
79
|
+
# }
|
80
|
+
# end
|
81
|
+
# end
|
82
82
|
end
|
83
83
|
context 'from set' do
|
84
|
-
describe 'negative min' do
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
end
|
84
|
+
# describe 'negative min' do
|
85
|
+
# before(:each) do
|
86
|
+
# @generator = described_class.new min: -2
|
87
|
+
# end
|
88
|
+
# it 'should generate the right index' do
|
89
|
+
# @generator.generate_from(florian: [1], flavia: [2]).should == {
|
90
|
+
# :florian => [1],
|
91
|
+
# :floria => [1],
|
92
|
+
# :lorian => [1],
|
93
|
+
# :flavia => [2],
|
94
|
+
# :flavi => [2],
|
95
|
+
# :lavia => [2]
|
96
|
+
# }
|
97
|
+
# end
|
98
|
+
# end
|
99
99
|
context "large min" do
|
100
100
|
before(:each) do
|
101
101
|
@generator = described_class.new min: 10
|
102
102
|
end
|
103
|
-
describe 'generate_from' do
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
end
|
103
|
+
# describe 'generate_from' do
|
104
|
+
# it 'should generate the right index' do
|
105
|
+
# @generator.generate_from(florian: [1], :'01234567890' => [2]).should == {
|
106
|
+
# :'01234567890' => [2],
|
107
|
+
# :'0123456789' => [2],
|
108
|
+
# :'1234567890' => [2]
|
109
|
+
# }
|
110
|
+
# end
|
111
|
+
# end
|
112
112
|
end
|
113
113
|
context 'default max' do
|
114
114
|
before(:each) do
|
@@ -124,54 +124,54 @@ describe Picky::Generators::Partial::Infix do
|
|
124
124
|
@generator.min.should == 4
|
125
125
|
end
|
126
126
|
end
|
127
|
-
describe 'generate_from' do
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
end
|
149
|
-
describe "a bigger example with disjunct symbols" do
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
end
|
163
|
-
describe "a bigger example with almost identical symbols" do
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
end
|
127
|
+
# describe 'generate_from' do
|
128
|
+
# it 'should generate the right index' do
|
129
|
+
# @generator.generate_from( :florian => [1], :flavia => [2] ).should == {
|
130
|
+
# :florian => [1],
|
131
|
+
# :floria => [1],
|
132
|
+
# :lorian => [1],
|
133
|
+
# :flori => [1],
|
134
|
+
# :loria => [1],
|
135
|
+
# :orian => [1],
|
136
|
+
# :flor => [1],
|
137
|
+
# :lori => [1],
|
138
|
+
# :oria => [1],
|
139
|
+
# :rian => [1],
|
140
|
+
# :flavia => [2],
|
141
|
+
# :flavi => [2],
|
142
|
+
# :lavia => [2],
|
143
|
+
# :flav => [2],
|
144
|
+
# :lavi => [2],
|
145
|
+
# :avia => [2]
|
146
|
+
# }
|
147
|
+
# end
|
148
|
+
# end
|
149
|
+
# describe "a bigger example with disjunct symbols" do
|
150
|
+
# before(:each) do
|
151
|
+
# abc = ('A'..'Z').to_a + ('a'..'z').to_a
|
152
|
+
# @index = {}
|
153
|
+
# 52.times do |i|
|
154
|
+
# @index[abc.join.to_sym] = [i]
|
155
|
+
# character = abc.shift
|
156
|
+
# abc << character
|
157
|
+
# end
|
158
|
+
# end
|
159
|
+
# it "should be fast" do
|
160
|
+
# performance_of { @generator.generate_from(@index) }.should < 0.07
|
161
|
+
# end
|
162
|
+
# end
|
163
|
+
# describe "a bigger example with almost identical symbols" do
|
164
|
+
# before(:each) do
|
165
|
+
# abc = ('A'..'Z').to_a + ('a'..'z').to_a
|
166
|
+
# @index = {}
|
167
|
+
# 52.times do |i|
|
168
|
+
# @index[(abc.join + abc[i].to_s).to_sym] = [i]
|
169
|
+
# end
|
170
|
+
# end
|
171
|
+
# it "should be fast" do
|
172
|
+
# performance_of { @generator.generate_from(@index) }.should < 0.07
|
173
|
+
# end
|
174
|
+
# end
|
175
175
|
end
|
176
176
|
context 'to set' do
|
177
177
|
before(:each) do
|
@@ -187,26 +187,26 @@ describe Picky::Generators::Partial::Infix do
|
|
187
187
|
@generator.min.should == 4
|
188
188
|
end
|
189
189
|
end
|
190
|
-
describe 'generate_from' do
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
end
|
190
|
+
# describe 'generate_from' do
|
191
|
+
# it 'should generate the right index' do
|
192
|
+
# @generator.generate_from( :florian => [1], :flavia => [2] ).should == {
|
193
|
+
# :floria => [1],
|
194
|
+
# :lorian => [1],
|
195
|
+
# :flori => [1],
|
196
|
+
# :loria => [1],
|
197
|
+
# :orian => [1],
|
198
|
+
# :flor => [1],
|
199
|
+
# :lori => [1],
|
200
|
+
# :oria => [1],
|
201
|
+
# :rian => [1],
|
202
|
+
# :flavi => [2],
|
203
|
+
# :lavia => [2],
|
204
|
+
# :flav => [2],
|
205
|
+
# :lavi => [2],
|
206
|
+
# :avia => [2]
|
207
|
+
# }
|
208
|
+
# end
|
209
|
+
# end
|
210
210
|
end
|
211
211
|
context 'min/max set' do
|
212
212
|
before(:each) do
|
@@ -222,11 +222,11 @@ describe Picky::Generators::Partial::Infix do
|
|
222
222
|
@generator.min.should == 4
|
223
223
|
end
|
224
224
|
end
|
225
|
-
describe 'generate_from' do
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
end
|
225
|
+
# describe 'generate_from' do
|
226
|
+
# it 'should generate the right index' do
|
227
|
+
# @generator.generate_from( :florian => [1], :flavia => [2] ).should == {}
|
228
|
+
# end
|
229
|
+
# end
|
230
230
|
end
|
231
231
|
end
|
232
232
|
|
@@ -8,9 +8,9 @@ describe Picky::Generators::Partial::None do
|
|
8
8
|
it "has the right superclass" do
|
9
9
|
described_class.should < Picky::Generators::Partial::Strategy
|
10
10
|
end
|
11
|
-
it "returns an empty index" do
|
12
|
-
|
13
|
-
end
|
11
|
+
# it "returns an empty index" do
|
12
|
+
# described_class.new.generate_from(:unimportant).should == {}
|
13
|
+
# end
|
14
14
|
describe 'use_exact_for_partial?' do
|
15
15
|
it 'returns true' do
|
16
16
|
described_class.new.use_exact_for_partial?.should == true
|