picky 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/application.rb +38 -37
- data/lib/picky/cacher/partial/default.rb +1 -3
- data/lib/picky/cacher/partial/subtoken.rb +44 -18
- data/lib/picky/configuration/field.rb +6 -2
- data/lib/picky/configuration/indexes.rb +16 -7
- data/lib/picky/configuration/queries.rb +3 -13
- data/lib/picky/extensions/symbol.rb +19 -4
- data/lib/picky/generator.rb +9 -0
- data/lib/picky/helpers/measuring.rb +3 -3
- data/lib/picky/index/bundle.rb +5 -4
- data/lib/picky/index/category.rb +14 -7
- data/lib/picky/index/combined.rb +6 -1
- data/lib/picky/indexers/no_source_specified_error.rb +2 -0
- data/lib/picky/indexes.rb +3 -9
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/allocations.rb +2 -2
- data/lib/picky/rack/harakiri.rb +10 -8
- data/lib/picky/routing.rb +19 -21
- data/lib/picky/solr/schema_generator.rb +4 -4
- data/lib/picky/sources/base.rb +16 -4
- data/lib/picky/sources/csv.rb +3 -0
- data/lib/picky/sources/db.rb +30 -22
- data/lib/picky/tokenizers/base.rb +7 -5
- data/lib/picky/tokenizers/index.rb +5 -5
- data/lib/picky/tokenizers/query.rb +9 -9
- data/prototype_project/app/application.rb +36 -29
- data/prototype_project/app/db.yml +1 -1
- data/prototype_project/config.ru +3 -2
- data/spec/ext/performant_spec.rb +2 -2
- data/spec/lib/application_spec.rb +54 -8
- data/spec/lib/cacher/partial/default_spec.rb +15 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
- data/spec/lib/extensions/symbol_spec.rb +124 -30
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +5 -5
- data/spec/lib/query/combinations_spec.rb +3 -3
- data/spec/lib/rack/harakiri_spec.rb +29 -0
- data/spec/lib/routing_spec.rb +22 -98
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/specific/speed_spec.rb +4 -5
- metadata +7 -3
data/prototype_project/config.ru
CHANGED
@@ -29,8 +29,9 @@ Indexes.load_from_cache
|
|
29
29
|
Rack::Harakiri.after = 50
|
30
30
|
use Rack::Harakiri
|
31
31
|
|
32
|
-
#
|
32
|
+
# Finalize the application and start accepting requests.
|
33
33
|
#
|
34
|
-
# Note: Needs to be the same name as in app/application.rb.
|
34
|
+
# Note: Needs to be the same constant name as in app/application.rb.
|
35
35
|
#
|
36
|
+
PickySearch.finalize
|
36
37
|
run PickySearch
|
data/spec/ext/performant_spec.rb
CHANGED
@@ -47,7 +47,7 @@ describe Performant::Array do
|
|
47
47
|
# brute force
|
48
48
|
Benchmark.realtime do
|
49
49
|
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size))
|
50
|
-
end.should
|
50
|
+
end.should < 0.001
|
51
51
|
end
|
52
52
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
53
53
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
@@ -57,7 +57,7 @@ describe Performant::Array do
|
|
57
57
|
arys.inject(arys.shift.dup) do |total, ary|
|
58
58
|
total & arys
|
59
59
|
end
|
60
|
-
end.should
|
60
|
+
end.should < 0.0015
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
@@ -4,6 +4,52 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Application do
|
6
6
|
|
7
|
+
describe "integration" do
|
8
|
+
it "should run ok" do
|
9
|
+
lambda {
|
10
|
+
# TODO Add all possible cases.
|
11
|
+
#
|
12
|
+
class TestApplication < Application
|
13
|
+
indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
|
14
|
+
indexing.contracts_expressions(/mr\.\s*|mister\s*/i, 'mr ')
|
15
|
+
indexing.stopwords(/\b(and|the|of|it|in|for)\b/)
|
16
|
+
indexing.splits_text_on(/[\s\/\-\"\&\.]/)
|
17
|
+
indexing.removes_characters_after_splitting(/[\.]/)
|
18
|
+
|
19
|
+
books_index = index Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
|
20
|
+
field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
|
21
|
+
field(:author),
|
22
|
+
field(:isbn, :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
|
23
|
+
|
24
|
+
# Note that Picky needs the following characters to
|
25
|
+
# pass through, as they are control characters: *"~:
|
26
|
+
#
|
27
|
+
querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
|
28
|
+
querying.stopwords(/\b(and|the|of|it|in|for)\b/)
|
29
|
+
querying.splits_text_on(/[\s\/\-\,\&]+/)
|
30
|
+
querying.normalizes_words([
|
31
|
+
[/Deoxyribonucleic Acid/i, 'DNA']
|
32
|
+
])
|
33
|
+
querying.maximum_tokens 5
|
34
|
+
|
35
|
+
full = Query::Full.new books_index
|
36
|
+
live = Query::Live.new books_index
|
37
|
+
|
38
|
+
route %r{^/books/full} => full
|
39
|
+
route %r{^/books/live} => live
|
40
|
+
end
|
41
|
+
}.should_not raise_error
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe 'delegation' do
|
46
|
+
it "should delegate route" do
|
47
|
+
Application.routing.should_receive(:route).once.with :path => :query
|
48
|
+
|
49
|
+
Application.route :path => :query
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
7
53
|
describe 'routing' do
|
8
54
|
it 'should be there' do
|
9
55
|
lambda { Application.routing }.should_not raise_error
|
@@ -31,30 +77,30 @@ describe Application do
|
|
31
77
|
describe "indexes" do
|
32
78
|
|
33
79
|
end
|
34
|
-
describe "
|
80
|
+
describe "indexing" do
|
35
81
|
it 'should be there' do
|
36
|
-
lambda { Application.
|
82
|
+
lambda { Application.indexing }.should_not raise_error
|
37
83
|
end
|
38
84
|
it "should return a new Routing instance" do
|
39
|
-
Application.
|
85
|
+
Application.indexing.should be_kind_of(Configuration::Indexes)
|
40
86
|
end
|
41
87
|
it "should cache the instance" do
|
42
|
-
Application.
|
88
|
+
Application.indexing.should == Application.indexing
|
43
89
|
end
|
44
90
|
end
|
45
91
|
|
46
92
|
describe "queries" do
|
47
93
|
|
48
94
|
end
|
49
|
-
describe "
|
95
|
+
describe "querying" do
|
50
96
|
it 'should be there' do
|
51
|
-
lambda { Application.
|
97
|
+
lambda { Application.querying }.should_not raise_error
|
52
98
|
end
|
53
99
|
it "should return a new Routing instance" do
|
54
|
-
Application.
|
100
|
+
Application.querying.should be_kind_of(Configuration::Queries)
|
55
101
|
end
|
56
102
|
it "should cache the instance" do
|
57
|
-
Application.
|
103
|
+
Application.querying.should == Application.querying
|
58
104
|
end
|
59
105
|
end
|
60
106
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Cacher::Partial::Default do
|
4
|
+
|
5
|
+
it "should be a subtoken" do
|
6
|
+
Cacher::Partial::Default.should be_kind_of(Cacher::Partial::Subtoken)
|
7
|
+
end
|
8
|
+
it "should be a the right down to" do
|
9
|
+
Cacher::Partial::Default.down_to.should == 1
|
10
|
+
end
|
11
|
+
it "should be a the right starting at" do
|
12
|
+
Cacher::Partial::Default.starting_at.should == -1
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -27,9 +27,35 @@ describe Cacher::Partial::Subtoken do
|
|
27
27
|
:fla => [2]
|
28
28
|
}
|
29
29
|
end
|
30
|
+
it "should be fast" do
|
31
|
+
Benchmark.realtime { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
|
32
|
+
end
|
33
|
+
it "should handle duplicate ids" do
|
34
|
+
@cacher.generate_from( :flo => [1], :fla => [1] ).should == {
|
35
|
+
:flo => [1],
|
36
|
+
:fl => [1],
|
37
|
+
:f => [1],
|
38
|
+
:fla => [1]
|
39
|
+
}
|
40
|
+
end
|
30
41
|
end
|
31
42
|
end
|
32
43
|
context 'down_to set' do
|
44
|
+
describe 'negative down_to' do
|
45
|
+
before(:each) do
|
46
|
+
@cacher = Cacher::Partial::Subtoken.new :down_to => -2
|
47
|
+
end
|
48
|
+
it 'should generate the right index' do
|
49
|
+
@cacher.generate_from( :florian => [1], :flavia => [2] ).should == {
|
50
|
+
:florian => [1],
|
51
|
+
:floria => [1],
|
52
|
+
:flori => [1],
|
53
|
+
:flavia => [2],
|
54
|
+
:flavi => [2],
|
55
|
+
:flav => [2]
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
33
59
|
context "large down_to" do
|
34
60
|
before(:each) do
|
35
61
|
@cacher = Cacher::Partial::Subtoken.new :down_to => 10
|
@@ -50,7 +76,7 @@ describe Cacher::Partial::Subtoken do
|
|
50
76
|
end
|
51
77
|
describe 'starting_at' do
|
52
78
|
it 'should return the right value' do
|
53
|
-
@cacher.starting_at.should ==
|
79
|
+
@cacher.starting_at.should == -1
|
54
80
|
end
|
55
81
|
end
|
56
82
|
describe 'down_to' do
|
@@ -71,10 +97,36 @@ describe Cacher::Partial::Subtoken do
|
|
71
97
|
}
|
72
98
|
end
|
73
99
|
end
|
100
|
+
describe "a bigger example with disjunct symbols" do
|
101
|
+
before(:each) do
|
102
|
+
abc = ('A'..'Z').to_a + ('a'..'z').to_a
|
103
|
+
@index = {}
|
104
|
+
52.times do |i|
|
105
|
+
@index[abc.join.to_sym] = [i]
|
106
|
+
character = abc.shift
|
107
|
+
abc << character
|
108
|
+
end
|
109
|
+
end
|
110
|
+
it "should be fast" do
|
111
|
+
Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.005
|
112
|
+
end
|
113
|
+
end
|
114
|
+
describe "a bigger example with almost identical symbols" do
|
115
|
+
before(:each) do
|
116
|
+
abc = ('A'..'Z').to_a + ('a'..'z').to_a
|
117
|
+
@index = {}
|
118
|
+
52.times do |i|
|
119
|
+
@index[(abc.join + abc[i].to_s).to_sym] = [i]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
it "should be fast" do
|
123
|
+
Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.003
|
124
|
+
end
|
125
|
+
end
|
74
126
|
end
|
75
127
|
context 'starting_at -1' do
|
76
128
|
before(:each) do
|
77
|
-
@cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -
|
129
|
+
@cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -2
|
78
130
|
end
|
79
131
|
describe 'starting_at' do
|
80
132
|
it 'should return the right value' do
|
@@ -1,26 +1,120 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Symbol do
|
4
|
-
|
5
|
-
before(:each) do
|
6
|
-
GC.disable
|
7
|
-
end
|
8
|
-
after(:each) do
|
9
|
-
GC.enable
|
10
|
-
GC.start
|
11
|
-
end
|
12
|
-
|
4
|
+
|
13
5
|
context 'performance' do
|
14
6
|
include Helpers::Measuring
|
7
|
+
before(:each) do
|
8
|
+
@token = (((0..9).to_a)*10).to_s.to_sym
|
9
|
+
GC.disable
|
10
|
+
end
|
11
|
+
after(:each) do
|
12
|
+
GC.enable
|
13
|
+
GC.start
|
14
|
+
end
|
15
|
+
# Note: They influence each other. each_subtoken is faster though.
|
16
|
+
#
|
15
17
|
it 'should be fast' do
|
16
|
-
s = (((0..9).to_a)*10).to_s.to_sym
|
17
|
-
|
18
18
|
timed do
|
19
|
-
|
20
|
-
end.should
|
19
|
+
@token.subtokens
|
20
|
+
end.should < 0.0009
|
21
|
+
end
|
22
|
+
it "should be fast" do
|
23
|
+
timed do
|
24
|
+
@token.each_subtoken do |subtoken| end
|
25
|
+
end.should < 0.0004
|
21
26
|
end
|
22
27
|
end
|
23
|
-
|
28
|
+
|
29
|
+
describe "each_subtoken" do
|
30
|
+
context 'normal symbol' do
|
31
|
+
before(:each) do
|
32
|
+
@sym = :reinke
|
33
|
+
end
|
34
|
+
context 'no downto' do
|
35
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
36
|
+
result = []
|
37
|
+
@sym.each_subtoken do |subtoken|
|
38
|
+
result << subtoken
|
39
|
+
end
|
40
|
+
result.should == [:reinke, :reink, :rein, :rei, :re, :r]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
context 'downto is larger than the symbol' do
|
44
|
+
before(:each) do
|
45
|
+
@downto = 8
|
46
|
+
end
|
47
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
48
|
+
result = []
|
49
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
50
|
+
result << subtoken
|
51
|
+
end
|
52
|
+
result.should == [:reinke]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
context 'downto is exactly the same as symbol' do
|
56
|
+
before(:each) do
|
57
|
+
@downto = 6
|
58
|
+
end
|
59
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
60
|
+
result = []
|
61
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
62
|
+
result << subtoken
|
63
|
+
end
|
64
|
+
result.should == [:reinke]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
context 'downto is smaller than the length of the symbol' do
|
68
|
+
before(:each) do
|
69
|
+
@downto = 4
|
70
|
+
end
|
71
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
72
|
+
result = []
|
73
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
74
|
+
result << subtoken
|
75
|
+
end
|
76
|
+
result.should == [:reinke, :reink, :rein]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
context 'downto is 1' do
|
80
|
+
before(:each) do
|
81
|
+
@downto = 1
|
82
|
+
end
|
83
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
84
|
+
result = []
|
85
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
86
|
+
result << subtoken
|
87
|
+
end
|
88
|
+
result.should == [:reinke, :reink, :rein, :rei, :re, :r]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
context 'downto is 0' do
|
92
|
+
before(:each) do
|
93
|
+
@downto = 0
|
94
|
+
end
|
95
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
96
|
+
result = []
|
97
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
98
|
+
result << subtoken
|
99
|
+
end
|
100
|
+
result.should == [:reinke, :reink, :rein, :rei, :re, :r, :'']
|
101
|
+
end
|
102
|
+
end
|
103
|
+
context 'downto is less than zero' do
|
104
|
+
before(:each) do
|
105
|
+
@downto = -2
|
106
|
+
end
|
107
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
108
|
+
result = []
|
109
|
+
@sym.each_subtoken(@downto) do |subtoken|
|
110
|
+
result << subtoken
|
111
|
+
end
|
112
|
+
result.should == [:reinke, :reink, :rein]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
24
118
|
describe "subtokens" do
|
25
119
|
context 'normal symbol' do
|
26
120
|
before(:each) do
|
@@ -63,22 +157,22 @@ describe Symbol do
|
|
63
157
|
@sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
|
64
158
|
end
|
65
159
|
end
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
160
|
+
context 'downto is 0' do
|
161
|
+
before(:each) do
|
162
|
+
@downto = 0
|
163
|
+
end
|
164
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
165
|
+
@sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r, :""]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
context 'downto is less than zero' do
|
169
|
+
before(:each) do
|
170
|
+
@downto = -2
|
171
|
+
end
|
172
|
+
it "should return an array of pieces of the original token, each 1 smaller than the other" do
|
173
|
+
@sym.subtokens(@downto).should == [:reinke, :reink, :rein]
|
174
|
+
end
|
175
|
+
end
|
82
176
|
end
|
83
177
|
end
|
84
178
|
|
@@ -164,7 +164,7 @@ describe Query::Allocations do
|
|
164
164
|
context 'enough ids' do
|
165
165
|
before(:each) do
|
166
166
|
@allocation1 = stub :allocation1, :ids => [1, 2, 3]
|
167
|
-
@allocation2 = stub :
|
167
|
+
@allocation2 = stub :allocation2, :ids => [4, 5, 6, 7]
|
168
168
|
@allocations = Query::Allocations.new [@allocation1, @allocation2]
|
169
169
|
end
|
170
170
|
it 'should return one random id from the first allocations by default' do
|
@@ -177,7 +177,7 @@ describe Query::Allocations do
|
|
177
177
|
(1..7).to_a.should include(@allocations.random_ids.first)
|
178
178
|
end
|
179
179
|
it 'should not contain the same id twice' do
|
180
|
-
|
180
|
+
20.times do
|
181
181
|
@allocations.random_ids(2).uniq.size.should_not == 1
|
182
182
|
end
|
183
183
|
end
|
@@ -185,7 +185,7 @@ describe Query::Allocations do
|
|
185
185
|
context 'just one id' do
|
186
186
|
before(:each) do
|
187
187
|
@allocation1 = stub :allocation1, :ids => [1]
|
188
|
-
@allocation2 = stub :
|
188
|
+
@allocation2 = stub :allocation2, :ids => []
|
189
189
|
@allocations = Query::Allocations.new [@allocation1, @allocation2]
|
190
190
|
end
|
191
191
|
it 'should return one random id from its allocations by default' do
|
@@ -201,7 +201,7 @@ describe Query::Allocations do
|
|
201
201
|
context 'no id' do
|
202
202
|
before(:each) do
|
203
203
|
@allocation1 = stub :allocation1, :ids => []
|
204
|
-
@allocation2 = stub :
|
204
|
+
@allocation2 = stub :allocation2, :ids => []
|
205
205
|
@allocations = Query::Allocations.new [@allocation1, @allocation2]
|
206
206
|
end
|
207
207
|
it 'should return one random id from its allocations by default' do
|
@@ -328,7 +328,7 @@ describe Query::Allocations do
|
|
328
328
|
@allocations.total.should == 110
|
329
329
|
end
|
330
330
|
it 'should be fast' do
|
331
|
-
Benchmark.realtime { @allocations.process!(20, 0) }.should
|
331
|
+
Benchmark.realtime { @allocations.process!(20, 0) }.should < 0.0001
|
332
332
|
end
|
333
333
|
end
|
334
334
|
end
|