picky 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/application.rb +38 -37
- data/lib/picky/cacher/partial/default.rb +1 -3
- data/lib/picky/cacher/partial/subtoken.rb +44 -18
- data/lib/picky/configuration/field.rb +6 -2
- data/lib/picky/configuration/indexes.rb +16 -7
- data/lib/picky/configuration/queries.rb +3 -13
- data/lib/picky/extensions/symbol.rb +19 -4
- data/lib/picky/generator.rb +9 -0
- data/lib/picky/helpers/measuring.rb +3 -3
- data/lib/picky/index/bundle.rb +5 -4
- data/lib/picky/index/category.rb +14 -7
- data/lib/picky/index/combined.rb +6 -1
- data/lib/picky/indexers/no_source_specified_error.rb +2 -0
- data/lib/picky/indexes.rb +3 -9
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/allocations.rb +2 -2
- data/lib/picky/rack/harakiri.rb +10 -8
- data/lib/picky/routing.rb +19 -21
- data/lib/picky/solr/schema_generator.rb +4 -4
- data/lib/picky/sources/base.rb +16 -4
- data/lib/picky/sources/csv.rb +3 -0
- data/lib/picky/sources/db.rb +30 -22
- data/lib/picky/tokenizers/base.rb +7 -5
- data/lib/picky/tokenizers/index.rb +5 -5
- data/lib/picky/tokenizers/query.rb +9 -9
- data/prototype_project/app/application.rb +36 -29
- data/prototype_project/app/db.yml +1 -1
- data/prototype_project/config.ru +3 -2
- data/spec/ext/performant_spec.rb +2 -2
- data/spec/lib/application_spec.rb +54 -8
- data/spec/lib/cacher/partial/default_spec.rb +15 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
- data/spec/lib/extensions/symbol_spec.rb +124 -30
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +5 -5
- data/spec/lib/query/combinations_spec.rb +3 -3
- data/spec/lib/rack/harakiri_spec.rb +29 -0
- data/spec/lib/routing_spec.rb +22 -98
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/specific/speed_spec.rb +4 -5
- metadata +7 -3
data/prototype_project/config.ru
CHANGED
@@ -29,8 +29,9 @@ Indexes.load_from_cache
|
|
29
29
|
Rack::Harakiri.after = 50
|
30
30
|
use Rack::Harakiri
|
31
31
|
|
32
|
-
#
|
32
|
+
# Finalize the application and start accepting requests.
|
33
33
|
#
|
34
|
-
# Note: Needs to be the same name as in app/application.rb.
|
34
|
+
# Note: Needs to be the same constant name as in app/application.rb.
|
35
35
|
#
|
36
|
+
PickySearch.finalize
|
36
37
|
run PickySearch
|
data/spec/ext/performant_spec.rb
CHANGED
@@ -47,7 +47,7 @@ describe Performant::Array do
|
|
47
47
|
# brute force
|
48
48
|
Benchmark.realtime do
|
49
49
|
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size))
|
50
|
-
end.should
|
50
|
+
end.should < 0.001
|
51
51
|
end
|
52
52
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
53
53
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
@@ -57,7 +57,7 @@ describe Performant::Array do
|
|
57
57
|
arys.inject(arys.shift.dup) do |total, ary|
|
58
58
|
total & arys
|
59
59
|
end
|
60
|
-
end.should
|
60
|
+
end.should < 0.0015
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
@@ -4,6 +4,52 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Application do
|
6
6
|
|
7
|
+
describe "integration" do
|
8
|
+
it "should run ok" do
|
9
|
+
lambda {
|
10
|
+
# TODO Add all possible cases.
|
11
|
+
#
|
12
|
+
class TestApplication < Application
|
13
|
+
indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
|
14
|
+
indexing.contracts_expressions(/mr\.\s*|mister\s*/i, 'mr ')
|
15
|
+
indexing.stopwords(/\b(and|the|of|it|in|for)\b/)
|
16
|
+
indexing.splits_text_on(/[\s\/\-\"\&\.]/)
|
17
|
+
indexing.removes_characters_after_splitting(/[\.]/)
|
18
|
+
|
19
|
+
books_index = index Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
|
20
|
+
field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
|
21
|
+
field(:author),
|
22
|
+
field(:isbn, :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
|
23
|
+
|
24
|
+
# Note that Picky needs the following characters to
|
25
|
+
# pass through, as they are control characters: *"~:
|
26
|
+
#
|
27
|
+
querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
|
28
|
+
querying.stopwords(/\b(and|the|of|it|in|for)\b/)
|
29
|
+
querying.splits_text_on(/[\s\/\-\,\&]+/)
|
30
|
+
querying.normalizes_words([
|
31
|
+
[/Deoxyribonucleic Acid/i, 'DNA']
|
32
|
+
])
|
33
|
+
querying.maximum_tokens 5
|
34
|
+
|
35
|
+
full = Query::Full.new books_index
|
36
|
+
live = Query::Live.new books_index
|
37
|
+
|
38
|
+
route %r{^/books/full} => full
|
39
|
+
route %r{^/books/live} => live
|
40
|
+
end
|
41
|
+
}.should_not raise_error
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe 'delegation' do
|
46
|
+
it "should delegate route" do
|
47
|
+
Application.routing.should_receive(:route).once.with :path => :query
|
48
|
+
|
49
|
+
Application.route :path => :query
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
7
53
|
describe 'routing' do
|
8
54
|
it 'should be there' do
|
9
55
|
lambda { Application.routing }.should_not raise_error
|
@@ -31,30 +77,30 @@ describe Application do
|
|
31
77
|
describe "indexes" do
|
32
78
|
|
33
79
|
end
|
34
|
-
describe "
|
80
|
+
describe "indexing" do
|
35
81
|
it 'should be there' do
|
36
|
-
lambda { Application.
|
82
|
+
lambda { Application.indexing }.should_not raise_error
|
37
83
|
end
|
38
84
|
it "should return a new Routing instance" do
|
39
|
-
Application.
|
85
|
+
Application.indexing.should be_kind_of(Configuration::Indexes)
|
40
86
|
end
|
41
87
|
it "should cache the instance" do
|
42
|
-
Application.
|
88
|
+
Application.indexing.should == Application.indexing
|
43
89
|
end
|
44
90
|
end
|
45
91
|
|
46
92
|
describe "queries" do
|
47
93
|
|
48
94
|
end
|
49
|
-
describe "
|
95
|
+
describe "querying" do
|
50
96
|
it 'should be there' do
|
51
|
-
lambda { Application.
|
97
|
+
lambda { Application.querying }.should_not raise_error
|
52
98
|
end
|
53
99
|
it "should return a new Routing instance" do
|
54
|
-
Application.
|
100
|
+
Application.querying.should be_kind_of(Configuration::Queries)
|
55
101
|
end
|
56
102
|
it "should cache the instance" do
|
57
|
-
Application.
|
103
|
+
Application.querying.should == Application.querying
|
58
104
|
end
|
59
105
|
end
|
60
106
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Cacher::Partial::Default do
|
4
|
+
|
5
|
+
it "should be a subtoken" do
|
6
|
+
Cacher::Partial::Default.should be_kind_of(Cacher::Partial::Subtoken)
|
7
|
+
end
|
8
|
+
it "should be a the right down to" do
|
9
|
+
Cacher::Partial::Default.down_to.should == 1
|
10
|
+
end
|
11
|
+
it "should be a the right starting at" do
|
12
|
+
Cacher::Partial::Default.starting_at.should == -1
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -27,9 +27,35 @@ describe Cacher::Partial::Subtoken do
|
|
27
27
|
:fla => [2]
|
28
28
|
}
|
29
29
|
end
|
30
|
+
it "should be fast" do
|
31
|
+
Benchmark.realtime { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
|
32
|
+
end
|
33
|
+
it "should handle duplicate ids" do
|
34
|
+
@cacher.generate_from( :flo => [1], :fla => [1] ).should == {
|
35
|
+
:flo => [1],
|
36
|
+
:fl => [1],
|
37
|
+
:f => [1],
|
38
|
+
:fla => [1]
|
39
|
+
}
|
40
|
+
end
|
30
41
|
end
|
31
42
|
end
|
32
43
|
context 'down_to set' do
|
44
|
+
describe 'negative down_to' do
|
45
|
+
before(:each) do
|
46
|
+
@cacher = Cacher::Partial::Subtoken.new :down_to => -2
|
47
|
+
end
|
48
|
+
it 'should generate the right index' do
|
49
|
+
@cacher.generate_from( :florian => [1], :flavia => [2] ).should == {
|
50
|
+
:florian => [1],
|
51
|
+
:floria => [1],
|
52
|
+
:flori => [1],
|
53
|
+
:flavia => [2],
|
54
|
+
:flavi => [2],
|
55
|
+
:flav => [2]
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
33
59
|
context "large down_to" do
|
34
60
|
before(:each) do
|
35
61
|
@cacher = Cacher::Partial::Subtoken.new :down_to => 10
|
@@ -50,7 +76,7 @@ describe Cacher::Partial::Subtoken do
|
|
50
76
|
end
|
51
77
|
describe 'starting_at' do
|
52
78
|
it 'should return the right value' do
|
53
|
-
@cacher.starting_at.should ==
|
79
|
+
@cacher.starting_at.should == -1
|
54
80
|
end
|
55
81
|
end
|
56
82
|
describe 'down_to' do
|
@@ -71,10 +97,36 @@ describe Cacher::Partial::Subtoken do
|
|
71
97
|
}
|
72
98
|
end
|
73
99
|
end
|
100
|
+
describe "a bigger example with disjunct symbols" do
|
101
|
+
before(:each) do
|
102
|
+
abc = ('A'..'Z').to_a + ('a'..'z').to_a
|
103
|
+
@index = {}
|
104
|
+
52.times do |i|
|
105
|
+
@index[abc.join.to_sym] = [i]
|
106
|
+
character = abc.shift
|
107
|
+
abc << character
|
108
|
+
end
|
109
|
+
end
|
110
|
+
it "should be fast" do
|
111
|
+
Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.005
|
112
|
+
end
|
113
|
+
end
|
114
|
+
describe "a bigger example with almost identical symbols" do
|
115
|
+
before(:each) do
|
116
|
+
abc = ('A'..'Z').to_a + ('a'..'z').to_a
|
117
|
+
@index = {}
|
118
|
+
52.times do |i|
|
119
|
+
@index[(abc.join + abc[i].to_s).to_sym] = [i]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
it "should be fast" do
|
123
|
+
Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.003
|
124
|
+
end
|
125
|
+
end
|
74
126
|
end
|
75
127
|
context 'starting_at -1' do
|
76
128
|
before(:each) do
|
77
|
-
@cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -
|
129
|
+
@cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -2
|
78
130
|
end
|
79
131
|
describe 'starting_at' do
|
80
132
|
it 'should return the right value' do
|
@@ -1,26 +1,120 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Symbol do
|
4
|
-
|
5
|
-
before(:each) do
|
6
|
-
GC.disable
|
7
|
-
end
|
8
|
-
after(:each) do
|
9
|
-
GC.enable
|
10
|
-
GC.start
|
11
|
-
end
|
12
|
-
|
4
|
+
|
13
5
|
context 'performance' do
|
14
6
|
include Helpers::Measuring
|
7
|
+
before(:each) do
|
8
|
+
@token = (((0..9).to_a)*10).to_s.to_sym
|
9
|
+
GC.disable
|
10
|
+
end
|
11
|
+
after(:each) do
|
12
|
+
GC.enable
|
13
|
+
GC.start
|
14
|
+
end
|
15
|
+
# Note: They influence each other. each_subtoken is faster though.
|
16
|
+
#
|
15
17
|
it 'should be fast' do
|
16
|
-
s = (((0..9).to_a)*10).to_s.to_sym
|
17
|
-
|
18
18
|
timed do
|
19
|
-
|
20
|
-
end.should
|
19
|
+
@token.subtokens
|
20
|
+
end.should < 0.0009
|
21
|
+
end
|
22
|
+
it "should be fast" do
|
23
|
+
timed do
|
24
|
+
@token.each_subtoken do |subtoken| end
|
25
|
+
end.should < 0.0004
|
21
26
|
end
|
22
27
|
end
|
23
|
-
|
28
|
+
|
29
|
+
describe "each_subtoken" do
|
30
|
+
context 'normal symbol' do
|
31
|
+
before(:each) do
|
32
|
+
@sym = :reinke
|
33
|
+
end
|
34
|
+
context 'no downto' do
|
35
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
36
|
+
result = []
|
37
|
+
@sym.each_subtoken do |subtoken|
|
38
|
+
result << subtoken
|
39
|
+
end
|
40
|
+
result.should == [:reinke, :reink, :rein, :rei, :re, :r]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
context 'downto is larger than the symbol' do
|
44
|
+
before(:each) do
|
45
|
+
@downto = 8
|
46
|
+
end
|
47
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
48
|
+
result = []
|
49
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
50
|
+
result << subtoken
|
51
|
+
end
|
52
|
+
result.should == [:reinke]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
context 'downto is exactly the same as symbol' do
|
56
|
+
before(:each) do
|
57
|
+
@downto = 6
|
58
|
+
end
|
59
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
60
|
+
result = []
|
61
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
62
|
+
result << subtoken
|
63
|
+
end
|
64
|
+
result.should == [:reinke]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
context 'downto is smaller than the length of the symbol' do
|
68
|
+
before(:each) do
|
69
|
+
@downto = 4
|
70
|
+
end
|
71
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
72
|
+
result = []
|
73
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
74
|
+
result << subtoken
|
75
|
+
end
|
76
|
+
result.should == [:reinke, :reink, :rein]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
context 'downto is 1' do
|
80
|
+
before(:each) do
|
81
|
+
@downto = 1
|
82
|
+
end
|
83
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
84
|
+
result = []
|
85
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
86
|
+
result << subtoken
|
87
|
+
end
|
88
|
+
result.should == [:reinke, :reink, :rein, :rei, :re, :r]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
context 'downto is 0' do
|
92
|
+
before(:each) do
|
93
|
+
@downto = 0
|
94
|
+
end
|
95
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
96
|
+
result = []
|
97
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
98
|
+
result << subtoken
|
99
|
+
end
|
100
|
+
result.should == [:reinke, :reink, :rein, :rei, :re, :r, :'']
|
101
|
+
end
|
102
|
+
end
|
103
|
+
context 'downto is less than zero' do
|
104
|
+
before(:each) do
|
105
|
+
@downto = -2
|
106
|
+
end
|
107
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
108
|
+
result = []
|
109
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
110
|
+
result << subtoken
|
111
|
+
end
|
112
|
+
result.should == [:reinke, :reink, :rein]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
24
118
|
describe "subtokens" do
|
25
119
|
context 'normal symbol' do
|
26
120
|
before(:each) do
|
@@ -63,22 +157,22 @@ describe Symbol do
|
|
63
157
|
@sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
|
64
158
|
end
|
65
159
|
end
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
160
|
+
context 'downto is 0' do
|
161
|
+
before(:each) do
|
162
|
+
@downto = 0
|
163
|
+
end
|
164
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
165
|
+
@sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r, :""]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
context 'downto is less than zero' do
|
169
|
+
before(:each) do
|
170
|
+
@downto = -2
|
171
|
+
end
|
172
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
173
|
+
@sym.subtokens(@downto).should == [:reinke, :reink, :rein]
|
174
|
+
end
|
175
|
+
end
|
82
176
|
end
|
83
177
|
end
|
84
178
|
|
@@ -164,7 +164,7 @@ describe Query::Allocations do
|
|
164
164
|
context 'enough ids' do
|
165
165
|
before(:each) do
|
166
166
|
@allocation1 = stub :allocation1, :ids => [1, 2, 3]
|
167
|
-
@allocation2 = stub :
|
167
|
+
@allocation2 = stub :allocation2, :ids => [4, 5, 6, 7]
|
168
168
|
@allocations = Query::Allocations.new [@allocation1, @allocation2]
|
169
169
|
end
|
170
170
|
it 'should return one random id from the first allocations by default' do
|
@@ -177,7 +177,7 @@ describe Query::Allocations do
|
|
177
177
|
(1..7).to_a.should include(@allocations.random_ids.first)
|
178
178
|
end
|
179
179
|
it 'should not contain the same id twice' do
|
180
|
-
|
180
|
+
20.times do
|
181
181
|
@allocations.random_ids(2).uniq.size.should_not == 1
|
182
182
|
end
|
183
183
|
end
|
@@ -185,7 +185,7 @@ describe Query::Allocations do
|
|
185
185
|
context 'just one id' do
|
186
186
|
before(:each) do
|
187
187
|
@allocation1 = stub :allocation1, :ids => [1]
|
188
|
-
@allocation2 = stub :
|
188
|
+
@allocation2 = stub :allocation2, :ids => []
|
189
189
|
@allocations = Query::Allocations.new [@allocation1, @allocation2]
|
190
190
|
end
|
191
191
|
it 'should return one random id from its allocations by default' do
|
@@ -201,7 +201,7 @@ describe Query::Allocations do
|
|
201
201
|
context 'no id' do
|
202
202
|
before(:each) do
|
203
203
|
@allocation1 = stub :allocation1, :ids => []
|
204
|
-
@allocation2 = stub :
|
204
|
+
@allocation2 = stub :allocation2, :ids => []
|
205
205
|
@allocations = Query::Allocations.new [@allocation1, @allocation2]
|
206
206
|
end
|
207
207
|
it 'should return one random id from its allocations by default' do
|
@@ -328,7 +328,7 @@ describe Query::Allocations do
|
|
328
328
|
@allocations.total.should == 110
|
329
329
|
end
|
330
330
|
it 'should be fast' do
|
331
|
-
Benchmark.realtime { @allocations.process!(20, 0) }.should
|
331
|
+
Benchmark.realtime { @allocations.process!(20, 0) }.should < 0.0001
|
332
332
|
end
|
333
333
|
end
|
334
334
|
end
|