picky 0.0.9 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/lib/picky/application.rb +38 -37
  2. data/lib/picky/cacher/partial/default.rb +1 -3
  3. data/lib/picky/cacher/partial/subtoken.rb +44 -18
  4. data/lib/picky/configuration/field.rb +6 -2
  5. data/lib/picky/configuration/indexes.rb +16 -7
  6. data/lib/picky/configuration/queries.rb +3 -13
  7. data/lib/picky/extensions/symbol.rb +19 -4
  8. data/lib/picky/generator.rb +9 -0
  9. data/lib/picky/helpers/measuring.rb +3 -3
  10. data/lib/picky/index/bundle.rb +5 -4
  11. data/lib/picky/index/category.rb +14 -7
  12. data/lib/picky/index/combined.rb +6 -1
  13. data/lib/picky/indexers/no_source_specified_error.rb +2 -0
  14. data/lib/picky/indexes.rb +3 -9
  15. data/lib/picky/query/allocation.rb +1 -1
  16. data/lib/picky/query/allocations.rb +2 -2
  17. data/lib/picky/rack/harakiri.rb +10 -8
  18. data/lib/picky/routing.rb +19 -21
  19. data/lib/picky/solr/schema_generator.rb +4 -4
  20. data/lib/picky/sources/base.rb +16 -4
  21. data/lib/picky/sources/csv.rb +3 -0
  22. data/lib/picky/sources/db.rb +30 -22
  23. data/lib/picky/tokenizers/base.rb +7 -5
  24. data/lib/picky/tokenizers/index.rb +5 -5
  25. data/lib/picky/tokenizers/query.rb +9 -9
  26. data/prototype_project/app/application.rb +36 -29
  27. data/prototype_project/app/db.yml +1 -1
  28. data/prototype_project/config.ru +3 -2
  29. data/spec/ext/performant_spec.rb +2 -2
  30. data/spec/lib/application_spec.rb +54 -8
  31. data/spec/lib/cacher/partial/default_spec.rb +15 -0
  32. data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
  33. data/spec/lib/extensions/symbol_spec.rb +124 -30
  34. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
  35. data/spec/lib/query/allocations_spec.rb +5 -5
  36. data/spec/lib/query/combinations_spec.rb +3 -3
  37. data/spec/lib/rack/harakiri_spec.rb +29 -0
  38. data/spec/lib/routing_spec.rb +22 -98
  39. data/spec/lib/tokenizers/index_spec.rb +1 -1
  40. data/spec/specific/speed_spec.rb +4 -5
  41. metadata +7 -3
@@ -6,5 +6,5 @@ adapter: mysql
6
6
  host: localhost
7
7
  username: root
8
8
  password:
9
- database: table_with_search_data
9
+ database: books_database # Needs to contain the DB source in app/application.rb.
10
10
  encoding: utf8
@@ -29,8 +29,9 @@ Indexes.load_from_cache
29
29
  Rack::Harakiri.after = 50
30
30
  use Rack::Harakiri
31
31
 
32
- # Start the application and start accepting requests.
32
+ # Finalize the application and start accepting requests.
33
33
  #
34
- # Note: Needs to be the same name as in app/application.rb.
34
+ # Note: Needs to be the same constant name as in app/application.rb.
35
35
  #
36
+ PickySearch.finalize
36
37
  run PickySearch
@@ -47,7 +47,7 @@ describe Performant::Array do
47
47
  # brute force
48
48
  Benchmark.realtime do
49
49
  Performant::Array.memory_efficient_intersect(arys.sort_by(&:size))
50
- end.should <= 0.001
50
+ end.should < 0.001
51
51
  end
52
52
  it "should be optimal for 2 small arrays of 50/10_000" do
53
53
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
@@ -57,7 +57,7 @@ describe Performant::Array do
57
57
  arys.inject(arys.shift.dup) do |total, ary|
58
58
  total & arys
59
59
  end
60
- end.should <= 0.0015
60
+ end.should < 0.0015
61
61
  end
62
62
  end
63
63
 
@@ -4,6 +4,52 @@ require 'spec_helper'
4
4
 
5
5
  describe Application do
6
6
 
7
+ describe "integration" do
8
+ it "should run ok" do
9
+ lambda {
10
+ # TODO Add all possible cases.
11
+ #
12
+ class TestApplication < Application
13
+ indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
14
+ indexing.contracts_expressions(/mr\.\s*|mister\s*/i, 'mr ')
15
+ indexing.stopwords(/\b(and|the|of|it|in|for)\b/)
16
+ indexing.splits_text_on(/[\s\/\-\"\&\.]/)
17
+ indexing.removes_characters_after_splitting(/[\.]/)
18
+
19
+ books_index = index Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
20
+ field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
21
+ field(:author),
22
+ field(:isbn, :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
23
+
24
+ # Note that Picky needs the following characters to
25
+ # pass through, as they are control characters: *"~:
26
+ #
27
+ querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
28
+ querying.stopwords(/\b(and|the|of|it|in|for)\b/)
29
+ querying.splits_text_on(/[\s\/\-\,\&]+/)
30
+ querying.normalizes_words([
31
+ [/Deoxyribonucleic Acid/i, 'DNA']
32
+ ])
33
+ querying.maximum_tokens 5
34
+
35
+ full = Query::Full.new books_index
36
+ live = Query::Live.new books_index
37
+
38
+ route %r{^/books/full} => full
39
+ route %r{^/books/live} => live
40
+ end
41
+ }.should_not raise_error
42
+ end
43
+ end
44
+
45
+ describe 'delegation' do
46
+ it "should delegate route" do
47
+ Application.routing.should_receive(:route).once.with :path => :query
48
+
49
+ Application.route :path => :query
50
+ end
51
+ end
52
+
7
53
  describe 'routing' do
8
54
  it 'should be there' do
9
55
  lambda { Application.routing }.should_not raise_error
@@ -31,30 +77,30 @@ describe Application do
31
77
  describe "indexes" do
32
78
 
33
79
  end
34
- describe "indexes_configuration" do
80
+ describe "indexing" do
35
81
  it 'should be there' do
36
- lambda { Application.indexes_configuration }.should_not raise_error
82
+ lambda { Application.indexing }.should_not raise_error
37
83
  end
38
84
  it "should return a new Routing instance" do
39
- Application.indexes_configuration.should be_kind_of(Configuration::Indexes)
85
+ Application.indexing.should be_kind_of(Configuration::Indexes)
40
86
  end
41
87
  it "should cache the instance" do
42
- Application.indexes_configuration.should == Application.indexes_configuration
88
+ Application.indexing.should == Application.indexing
43
89
  end
44
90
  end
45
91
 
46
92
  describe "queries" do
47
93
 
48
94
  end
49
- describe "queries_configuration" do
95
+ describe "querying" do
50
96
  it 'should be there' do
51
- lambda { Application.queries_configuration }.should_not raise_error
97
+ lambda { Application.querying }.should_not raise_error
52
98
  end
53
99
  it "should return a new Routing instance" do
54
- Application.queries_configuration.should be_kind_of(Configuration::Queries)
100
+ Application.querying.should be_kind_of(Configuration::Queries)
55
101
  end
56
102
  it "should cache the instance" do
57
- Application.queries_configuration.should == Application.queries_configuration
103
+ Application.querying.should == Application.querying
58
104
  end
59
105
  end
60
106
 
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe Cacher::Partial::Default do
4
+
5
+ it "should be a subtoken" do
6
+ Cacher::Partial::Default.should be_kind_of(Cacher::Partial::Subtoken)
7
+ end
8
+ it "should be a the right down to" do
9
+ Cacher::Partial::Default.down_to.should == 1
10
+ end
11
+ it "should be a the right starting at" do
12
+ Cacher::Partial::Default.starting_at.should == -1
13
+ end
14
+
15
+ end
@@ -27,9 +27,35 @@ describe Cacher::Partial::Subtoken do
27
27
  :fla => [2]
28
28
  }
29
29
  end
30
+ it "should be fast" do
31
+ Benchmark.realtime { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
32
+ end
33
+ it "should handle duplicate ids" do
34
+ @cacher.generate_from( :flo => [1], :fla => [1] ).should == {
35
+ :flo => [1],
36
+ :fl => [1],
37
+ :f => [1],
38
+ :fla => [1]
39
+ }
40
+ end
30
41
  end
31
42
  end
32
43
  context 'down_to set' do
44
+ describe 'negative down_to' do
45
+ before(:each) do
46
+ @cacher = Cacher::Partial::Subtoken.new :down_to => -2
47
+ end
48
+ it 'should generate the right index' do
49
+ @cacher.generate_from( :florian => [1], :flavia => [2] ).should == {
50
+ :florian => [1],
51
+ :floria => [1],
52
+ :flori => [1],
53
+ :flavia => [2],
54
+ :flavi => [2],
55
+ :flav => [2]
56
+ }
57
+ end
58
+ end
33
59
  context "large down_to" do
34
60
  before(:each) do
35
61
  @cacher = Cacher::Partial::Subtoken.new :down_to => 10
@@ -50,7 +76,7 @@ describe Cacher::Partial::Subtoken do
50
76
  end
51
77
  describe 'starting_at' do
52
78
  it 'should return the right value' do
53
- @cacher.starting_at.should == 0
79
+ @cacher.starting_at.should == -1
54
80
  end
55
81
  end
56
82
  describe 'down_to' do
@@ -71,10 +97,36 @@ describe Cacher::Partial::Subtoken do
71
97
  }
72
98
  end
73
99
  end
100
+ describe "a bigger example with disjunct symbols" do
101
+ before(:each) do
102
+ abc = ('A'..'Z').to_a + ('a'..'z').to_a
103
+ @index = {}
104
+ 52.times do |i|
105
+ @index[abc.join.to_sym] = [i]
106
+ character = abc.shift
107
+ abc << character
108
+ end
109
+ end
110
+ it "should be fast" do
111
+ Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.005
112
+ end
113
+ end
114
+ describe "a bigger example with almost identical symbols" do
115
+ before(:each) do
116
+ abc = ('A'..'Z').to_a + ('a'..'z').to_a
117
+ @index = {}
118
+ 52.times do |i|
119
+ @index[(abc.join + abc[i].to_s).to_sym] = [i]
120
+ end
121
+ end
122
+ it "should be fast" do
123
+ Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.003
124
+ end
125
+ end
74
126
  end
75
127
  context 'starting_at -1' do
76
128
  before(:each) do
77
- @cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -1
129
+ @cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -2
78
130
  end
79
131
  describe 'starting_at' do
80
132
  it 'should return the right value' do
@@ -1,26 +1,120 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Symbol do
4
-
5
- before(:each) do
6
- GC.disable
7
- end
8
- after(:each) do
9
- GC.enable
10
- GC.start
11
- end
12
-
4
+
13
5
  context 'performance' do
14
6
  include Helpers::Measuring
7
+ before(:each) do
8
+ @token = (((0..9).to_a)*10).to_s.to_sym
9
+ GC.disable
10
+ end
11
+ after(:each) do
12
+ GC.enable
13
+ GC.start
14
+ end
15
+ # Note: They influence each other. each_subtoken is faster though.
16
+ #
15
17
  it 'should be fast' do
16
- s = (((0..9).to_a)*10).to_s.to_sym
17
-
18
18
  timed do
19
- s.subtokens
20
- end.should <= 0.003 # was 0.0019
19
+ @token.subtokens
20
+ end.should < 0.0009
21
+ end
22
+ it "should be fast" do
23
+ timed do
24
+ @token.each_subtoken do |subtoken| end
25
+ end.should < 0.0004
21
26
  end
22
27
  end
23
-
28
+
29
+ describe "each_subtoken" do
30
+ context 'normal symbol' do
31
+ before(:each) do
32
+ @sym = :reinke
33
+ end
34
+ context 'no downto' do
35
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
36
+ result = []
37
+ @sym.each_subtoken do |subtoken|
38
+ result << subtoken
39
+ end
40
+ result.should == [:reinke, :reink, :rein, :rei, :re, :r]
41
+ end
42
+ end
43
+ context 'downto is larger than the symbol' do
44
+ before(:each) do
45
+ @downto = 8
46
+ end
47
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
48
+ result = []
49
+ @sym.each_subtoken(@downto) do |subtoken|
50
+ result << subtoken
51
+ end
52
+ result.should == [:reinke]
53
+ end
54
+ end
55
+ context 'downto is exactly the same as symbol' do
56
+ before(:each) do
57
+ @downto = 6
58
+ end
59
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
60
+ result = []
61
+ @sym.each_subtoken(@downto) do |subtoken|
62
+ result << subtoken
63
+ end
64
+ result.should == [:reinke]
65
+ end
66
+ end
67
+ context 'downto is smaller than the length of the symbol' do
68
+ before(:each) do
69
+ @downto = 4
70
+ end
71
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
72
+ result = []
73
+ @sym.each_subtoken(@downto) do |subtoken|
74
+ result << subtoken
75
+ end
76
+ result.should == [:reinke, :reink, :rein]
77
+ end
78
+ end
79
+ context 'downto is 1' do
80
+ before(:each) do
81
+ @downto = 1
82
+ end
83
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
84
+ result = []
85
+ @sym.each_subtoken(@downto) do |subtoken|
86
+ result << subtoken
87
+ end
88
+ result.should == [:reinke, :reink, :rein, :rei, :re, :r]
89
+ end
90
+ end
91
+ context 'downto is 0' do
92
+ before(:each) do
93
+ @downto = 0
94
+ end
95
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
96
+ result = []
97
+ @sym.each_subtoken(@downto) do |subtoken|
98
+ result << subtoken
99
+ end
100
+ result.should == [:reinke, :reink, :rein, :rei, :re, :r, :'']
101
+ end
102
+ end
103
+ context 'downto is less than zero' do
104
+ before(:each) do
105
+ @downto = -2
106
+ end
107
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
108
+ result = []
109
+ @sym.each_subtoken(@downto) do |subtoken|
110
+ result << subtoken
111
+ end
112
+ result.should == [:reinke, :reink, :rein]
113
+ end
114
+ end
115
+ end
116
+ end
117
+
24
118
  describe "subtokens" do
25
119
  context 'normal symbol' do
26
120
  before(:each) do
@@ -63,22 +157,22 @@ describe Symbol do
63
157
  @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
64
158
  end
65
159
  end
66
- # context 'downto is 0' do
67
- # before(:each) do
68
- # @downto = 0
69
- # end
70
- # it "should return an array of pieces of the original token, each 1 smaller than the other" do
71
- # @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
72
- # end
73
- # end
74
- # context 'downto is less than zero' do
75
- # before(:each) do
76
- # @downto = -2
77
- # end
78
- # it "should return an array of pieces of the original token, each 1 smaller than the other" do
79
- # @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
80
- # end
81
- # end
160
+ context 'downto is 0' do
161
+ before(:each) do
162
+ @downto = 0
163
+ end
164
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
165
+ @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r, :""]
166
+ end
167
+ end
168
+ context 'downto is less than zero' do
169
+ before(:each) do
170
+ @downto = -2
171
+ end
172
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
173
+ @sym.subtokens(@downto).should == [:reinke, :reink, :rein]
174
+ end
175
+ end
82
176
  end
83
177
  end
84
178
 
@@ -39,7 +39,7 @@ describe Index::Bundle do
39
39
  it 'should be fast' do
40
40
  Benchmark.realtime do
41
41
  @full.generate_partial
42
- end.should <= 0.2
42
+ end.should < 0.2
43
43
  end
44
44
  end
45
45
  end
@@ -164,7 +164,7 @@ describe Query::Allocations do
164
164
  context 'enough ids' do
165
165
  before(:each) do
166
166
  @allocation1 = stub :allocation1, :ids => [1, 2, 3]
167
- @allocation2 = stub :allocation1, :ids => [4, 5, 6, 7]
167
+ @allocation2 = stub :allocation2, :ids => [4, 5, 6, 7]
168
168
  @allocations = Query::Allocations.new [@allocation1, @allocation2]
169
169
  end
170
170
  it 'should return one random id from the first allocations by default' do
@@ -177,7 +177,7 @@ describe Query::Allocations do
177
177
  (1..7).to_a.should include(@allocations.random_ids.first)
178
178
  end
179
179
  it 'should not contain the same id twice' do
180
- 100.times do
180
+ 20.times do
181
181
  @allocations.random_ids(2).uniq.size.should_not == 1
182
182
  end
183
183
  end
@@ -185,7 +185,7 @@ describe Query::Allocations do
185
185
  context 'just one id' do
186
186
  before(:each) do
187
187
  @allocation1 = stub :allocation1, :ids => [1]
188
- @allocation2 = stub :allocation1, :ids => []
188
+ @allocation2 = stub :allocation2, :ids => []
189
189
  @allocations = Query::Allocations.new [@allocation1, @allocation2]
190
190
  end
191
191
  it 'should return one random id from its allocations by default' do
@@ -201,7 +201,7 @@ describe Query::Allocations do
201
201
  context 'no id' do
202
202
  before(:each) do
203
203
  @allocation1 = stub :allocation1, :ids => []
204
- @allocation2 = stub :allocation1, :ids => []
204
+ @allocation2 = stub :allocation2, :ids => []
205
205
  @allocations = Query::Allocations.new [@allocation1, @allocation2]
206
206
  end
207
207
  it 'should return one random id from its allocations by default' do
@@ -328,7 +328,7 @@ describe Query::Allocations do
328
328
  @allocations.total.should == 110
329
329
  end
330
330
  it 'should be fast' do
331
- Benchmark.realtime { @allocations.process!(20, 0) }.should <= 0.0001
331
+ Benchmark.realtime { @allocations.process!(20, 0) }.should < 0.0001
332
332
  end
333
333
  end
334
334
  end