picky 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/lib/picky/application.rb +38 -37
  2. data/lib/picky/cacher/partial/default.rb +1 -3
  3. data/lib/picky/cacher/partial/subtoken.rb +44 -18
  4. data/lib/picky/configuration/field.rb +6 -2
  5. data/lib/picky/configuration/indexes.rb +16 -7
  6. data/lib/picky/configuration/queries.rb +3 -13
  7. data/lib/picky/extensions/symbol.rb +19 -4
  8. data/lib/picky/generator.rb +9 -0
  9. data/lib/picky/helpers/measuring.rb +3 -3
  10. data/lib/picky/index/bundle.rb +5 -4
  11. data/lib/picky/index/category.rb +14 -7
  12. data/lib/picky/index/combined.rb +6 -1
  13. data/lib/picky/indexers/no_source_specified_error.rb +2 -0
  14. data/lib/picky/indexes.rb +3 -9
  15. data/lib/picky/query/allocation.rb +1 -1
  16. data/lib/picky/query/allocations.rb +2 -2
  17. data/lib/picky/rack/harakiri.rb +10 -8
  18. data/lib/picky/routing.rb +19 -21
  19. data/lib/picky/solr/schema_generator.rb +4 -4
  20. data/lib/picky/sources/base.rb +16 -4
  21. data/lib/picky/sources/csv.rb +3 -0
  22. data/lib/picky/sources/db.rb +30 -22
  23. data/lib/picky/tokenizers/base.rb +7 -5
  24. data/lib/picky/tokenizers/index.rb +5 -5
  25. data/lib/picky/tokenizers/query.rb +9 -9
  26. data/prototype_project/app/application.rb +36 -29
  27. data/prototype_project/app/db.yml +1 -1
  28. data/prototype_project/config.ru +3 -2
  29. data/spec/ext/performant_spec.rb +2 -2
  30. data/spec/lib/application_spec.rb +54 -8
  31. data/spec/lib/cacher/partial/default_spec.rb +15 -0
  32. data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
  33. data/spec/lib/extensions/symbol_spec.rb +124 -30
  34. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
  35. data/spec/lib/query/allocations_spec.rb +5 -5
  36. data/spec/lib/query/combinations_spec.rb +3 -3
  37. data/spec/lib/rack/harakiri_spec.rb +29 -0
  38. data/spec/lib/routing_spec.rb +22 -98
  39. data/spec/lib/tokenizers/index_spec.rb +1 -1
  40. data/spec/specific/speed_spec.rb +4 -5
  41. metadata +7 -3
@@ -6,5 +6,5 @@ adapter: mysql
6
6
  host: localhost
7
7
  username: root
8
8
  password:
9
- database: table_with_search_data
9
+ database: books_database # Needs to contain the DB source in app/application.rb.
10
10
  encoding: utf8
@@ -29,8 +29,9 @@ Indexes.load_from_cache
29
29
  Rack::Harakiri.after = 50
30
30
  use Rack::Harakiri
31
31
 
32
- # Start the application and start accepting requests.
32
+ # Finalize the application and start accepting requests.
33
33
  #
34
- # Note: Needs to be the same name as in app/application.rb.
34
+ # Note: Needs to be the same constant name as in app/application.rb.
35
35
  #
36
+ PickySearch.finalize
36
37
  run PickySearch
@@ -47,7 +47,7 @@ describe Performant::Array do
47
47
  # brute force
48
48
  Benchmark.realtime do
49
49
  Performant::Array.memory_efficient_intersect(arys.sort_by(&:size))
50
- end.should <= 0.001
50
+ end.should < 0.001
51
51
  end
52
52
  it "should be optimal for 2 small arrays of 50/10_000" do
53
53
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
@@ -57,7 +57,7 @@ describe Performant::Array do
57
57
  arys.inject(arys.shift.dup) do |total, ary|
58
58
  total & arys
59
59
  end
60
- end.should <= 0.0015
60
+ end.should < 0.0015
61
61
  end
62
62
  end
63
63
 
@@ -4,6 +4,52 @@ require 'spec_helper'
4
4
 
5
5
  describe Application do
6
6
 
7
+ describe "integration" do
8
+ it "should run ok" do
9
+ lambda {
10
+ # TODO Add all possible cases.
11
+ #
12
+ class TestApplication < Application
13
+ indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
14
+ indexing.contracts_expressions(/mr\.\s*|mister\s*/i, 'mr ')
15
+ indexing.stopwords(/\b(and|the|of|it|in|for)\b/)
16
+ indexing.splits_text_on(/[\s\/\-\"\&\.]/)
17
+ indexing.removes_characters_after_splitting(/[\.]/)
18
+
19
+ books_index = index Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
20
+ field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
21
+ field(:author),
22
+ field(:isbn, :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
23
+
24
+ # Note that Picky needs the following characters to
25
+ # pass through, as they are control characters: *"~:
26
+ #
27
+ querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
28
+ querying.stopwords(/\b(and|the|of|it|in|for)\b/)
29
+ querying.splits_text_on(/[\s\/\-\,\&]+/)
30
+ querying.normalizes_words([
31
+ [/Deoxyribonucleic Acid/i, 'DNA']
32
+ ])
33
+ querying.maximum_tokens 5
34
+
35
+ full = Query::Full.new books_index
36
+ live = Query::Live.new books_index
37
+
38
+ route %r{^/books/full} => full
39
+ route %r{^/books/live} => live
40
+ end
41
+ }.should_not raise_error
42
+ end
43
+ end
44
+
45
+ describe 'delegation' do
46
+ it "should delegate route" do
47
+ Application.routing.should_receive(:route).once.with :path => :query
48
+
49
+ Application.route :path => :query
50
+ end
51
+ end
52
+
7
53
  describe 'routing' do
8
54
  it 'should be there' do
9
55
  lambda { Application.routing }.should_not raise_error
@@ -31,30 +77,30 @@ describe Application do
31
77
  describe "indexes" do
32
78
 
33
79
  end
34
- describe "indexes_configuration" do
80
+ describe "indexing" do
35
81
  it 'should be there' do
36
- lambda { Application.indexes_configuration }.should_not raise_error
82
+ lambda { Application.indexing }.should_not raise_error
37
83
  end
38
84
  it "should return a new Routing instance" do
39
- Application.indexes_configuration.should be_kind_of(Configuration::Indexes)
85
+ Application.indexing.should be_kind_of(Configuration::Indexes)
40
86
  end
41
87
  it "should cache the instance" do
42
- Application.indexes_configuration.should == Application.indexes_configuration
88
+ Application.indexing.should == Application.indexing
43
89
  end
44
90
  end
45
91
 
46
92
  describe "queries" do
47
93
 
48
94
  end
49
- describe "queries_configuration" do
95
+ describe "querying" do
50
96
  it 'should be there' do
51
- lambda { Application.queries_configuration }.should_not raise_error
97
+ lambda { Application.querying }.should_not raise_error
52
98
  end
53
99
  it "should return a new Routing instance" do
54
- Application.queries_configuration.should be_kind_of(Configuration::Queries)
100
+ Application.querying.should be_kind_of(Configuration::Queries)
55
101
  end
56
102
  it "should cache the instance" do
57
- Application.queries_configuration.should == Application.queries_configuration
103
+ Application.querying.should == Application.querying
58
104
  end
59
105
  end
60
106
 
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe Cacher::Partial::Default do
4
+
5
+ it "should be a subtoken" do
6
+ Cacher::Partial::Default.should be_kind_of(Cacher::Partial::Subtoken)
7
+ end
8
+ it "should be a the right down to" do
9
+ Cacher::Partial::Default.down_to.should == 1
10
+ end
11
+ it "should be a the right starting at" do
12
+ Cacher::Partial::Default.starting_at.should == -1
13
+ end
14
+
15
+ end
@@ -27,9 +27,35 @@ describe Cacher::Partial::Subtoken do
27
27
  :fla => [2]
28
28
  }
29
29
  end
30
+ it "should be fast" do
31
+ Benchmark.realtime { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
32
+ end
33
+ it "should handle duplicate ids" do
34
+ @cacher.generate_from( :flo => [1], :fla => [1] ).should == {
35
+ :flo => [1],
36
+ :fl => [1],
37
+ :f => [1],
38
+ :fla => [1]
39
+ }
40
+ end
30
41
  end
31
42
  end
32
43
  context 'down_to set' do
44
+ describe 'negative down_to' do
45
+ before(:each) do
46
+ @cacher = Cacher::Partial::Subtoken.new :down_to => -2
47
+ end
48
+ it 'should generate the right index' do
49
+ @cacher.generate_from( :florian => [1], :flavia => [2] ).should == {
50
+ :florian => [1],
51
+ :floria => [1],
52
+ :flori => [1],
53
+ :flavia => [2],
54
+ :flavi => [2],
55
+ :flav => [2]
56
+ }
57
+ end
58
+ end
33
59
  context "large down_to" do
34
60
  before(:each) do
35
61
  @cacher = Cacher::Partial::Subtoken.new :down_to => 10
@@ -50,7 +76,7 @@ describe Cacher::Partial::Subtoken do
50
76
  end
51
77
  describe 'starting_at' do
52
78
  it 'should return the right value' do
53
- @cacher.starting_at.should == 0
79
+ @cacher.starting_at.should == -1
54
80
  end
55
81
  end
56
82
  describe 'down_to' do
@@ -71,10 +97,36 @@ describe Cacher::Partial::Subtoken do
71
97
  }
72
98
  end
73
99
  end
100
+ describe "a bigger example with disjunct symbols" do
101
+ before(:each) do
102
+ abc = ('A'..'Z').to_a + ('a'..'z').to_a
103
+ @index = {}
104
+ 52.times do |i|
105
+ @index[abc.join.to_sym] = [i]
106
+ character = abc.shift
107
+ abc << character
108
+ end
109
+ end
110
+ it "should be fast" do
111
+ Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.005
112
+ end
113
+ end
114
+ describe "a bigger example with almost identical symbols" do
115
+ before(:each) do
116
+ abc = ('A'..'Z').to_a + ('a'..'z').to_a
117
+ @index = {}
118
+ 52.times do |i|
119
+ @index[(abc.join + abc[i].to_s).to_sym] = [i]
120
+ end
121
+ end
122
+ it "should be fast" do
123
+ Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.003
124
+ end
125
+ end
74
126
  end
75
127
  context 'starting_at -1' do
76
128
  before(:each) do
77
- @cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -1
129
+ @cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -2
78
130
  end
79
131
  describe 'starting_at' do
80
132
  it 'should return the right value' do
@@ -1,26 +1,120 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Symbol do
4
-
5
- before(:each) do
6
- GC.disable
7
- end
8
- after(:each) do
9
- GC.enable
10
- GC.start
11
- end
12
-
4
+
13
5
  context 'performance' do
14
6
  include Helpers::Measuring
7
+ before(:each) do
8
+ @token = (((0..9).to_a)*10).to_s.to_sym
9
+ GC.disable
10
+ end
11
+ after(:each) do
12
+ GC.enable
13
+ GC.start
14
+ end
15
+ # Note: They influence each other. each_subtoken is faster though.
16
+ #
15
17
  it 'should be fast' do
16
- s = (((0..9).to_a)*10).to_s.to_sym
17
-
18
18
  timed do
19
- s.subtokens
20
- end.should <= 0.003 # was 0.0019
19
+ @token.subtokens
20
+ end.should < 0.0009
21
+ end
22
+ it "should be fast" do
23
+ timed do
24
+ @token.each_subtoken do |subtoken| end
25
+ end.should < 0.0004
21
26
  end
22
27
  end
23
-
28
+
29
+ describe "each_subtoken" do
30
+ context 'normal symbol' do
31
+ before(:each) do
32
+ @sym = :reinke
33
+ end
34
+ context 'no downto' do
35
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
36
+ result = []
37
+ @sym.each_subtoken do |subtoken|
38
+ result << subtoken
39
+ end
40
+ result.should == [:reinke, :reink, :rein, :rei, :re, :r]
41
+ end
42
+ end
43
+ context 'downto is larger than the symbol' do
44
+ before(:each) do
45
+ @downto = 8
46
+ end
47
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
48
+ result = []
49
+ @sym.each_subtoken(@downto) do |subtoken|
50
+ result << subtoken
51
+ end
52
+ result.should == [:reinke]
53
+ end
54
+ end
55
+ context 'downto is exactly the same as symbol' do
56
+ before(:each) do
57
+ @downto = 6
58
+ end
59
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
60
+ result = []
61
+ @sym.each_subtoken(@downto) do |subtoken|
62
+ result << subtoken
63
+ end
64
+ result.should == [:reinke]
65
+ end
66
+ end
67
+ context 'downto is smaller than the length of the symbol' do
68
+ before(:each) do
69
+ @downto = 4
70
+ end
71
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
72
+ result = []
73
+ @sym.each_subtoken(@downto) do |subtoken|
74
+ result << subtoken
75
+ end
76
+ result.should == [:reinke, :reink, :rein]
77
+ end
78
+ end
79
+ context 'downto is 1' do
80
+ before(:each) do
81
+ @downto = 1
82
+ end
83
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
84
+ result = []
85
+ @sym.each_subtoken(@downto) do |subtoken|
86
+ result << subtoken
87
+ end
88
+ result.should == [:reinke, :reink, :rein, :rei, :re, :r]
89
+ end
90
+ end
91
+ context 'downto is 0' do
92
+ before(:each) do
93
+ @downto = 0
94
+ end
95
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
96
+ result = []
97
+ @sym.each_subtoken(@downto) do |subtoken|
98
+ result << subtoken
99
+ end
100
+ result.should == [:reinke, :reink, :rein, :rei, :re, :r, :'']
101
+ end
102
+ end
103
+ context 'downto is less than zero' do
104
+ before(:each) do
105
+ @downto = -2
106
+ end
107
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
108
+ result = []
109
+ @sym.each_subtoken(@downto) do |subtoken|
110
+ result << subtoken
111
+ end
112
+ result.should == [:reinke, :reink, :rein]
113
+ end
114
+ end
115
+ end
116
+ end
117
+
24
118
  describe "subtokens" do
25
119
  context 'normal symbol' do
26
120
  before(:each) do
@@ -63,22 +157,22 @@ describe Symbol do
63
157
  @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
64
158
  end
65
159
  end
66
- # context 'downto is 0' do
67
- # before(:each) do
68
- # @downto = 0
69
- # end
70
- # it "should return an array of pieces of the original token, each 1 smaller than the other" do
71
- # @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
72
- # end
73
- # end
74
- # context 'downto is less than zero' do
75
- # before(:each) do
76
- # @downto = -2
77
- # end
78
- # it "should return an array of pieces of the original token, each 1 smaller than the other" do
79
- # @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
80
- # end
81
- # end
160
+ context 'downto is 0' do
161
+ before(:each) do
162
+ @downto = 0
163
+ end
164
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
165
+ @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r, :""]
166
+ end
167
+ end
168
+ context 'downto is less than zero' do
169
+ before(:each) do
170
+ @downto = -2
171
+ end
172
+ it "should return an array of pieces of the original token, each 1 smaller than the other" do
173
+ @sym.subtokens(@downto).should == [:reinke, :reink, :rein]
174
+ end
175
+ end
82
176
  end
83
177
  end
84
178
 
@@ -39,7 +39,7 @@ describe Index::Bundle do
39
39
  it 'should be fast' do
40
40
  Benchmark.realtime do
41
41
  @full.generate_partial
42
- end.should <= 0.2
42
+ end.should < 0.2
43
43
  end
44
44
  end
45
45
  end
@@ -164,7 +164,7 @@ describe Query::Allocations do
164
164
  context 'enough ids' do
165
165
  before(:each) do
166
166
  @allocation1 = stub :allocation1, :ids => [1, 2, 3]
167
- @allocation2 = stub :allocation1, :ids => [4, 5, 6, 7]
167
+ @allocation2 = stub :allocation2, :ids => [4, 5, 6, 7]
168
168
  @allocations = Query::Allocations.new [@allocation1, @allocation2]
169
169
  end
170
170
  it 'should return one random id from the first allocations by default' do
@@ -177,7 +177,7 @@ describe Query::Allocations do
177
177
  (1..7).to_a.should include(@allocations.random_ids.first)
178
178
  end
179
179
  it 'should not contain the same id twice' do
180
- 100.times do
180
+ 20.times do
181
181
  @allocations.random_ids(2).uniq.size.should_not == 1
182
182
  end
183
183
  end
@@ -185,7 +185,7 @@ describe Query::Allocations do
185
185
  context 'just one id' do
186
186
  before(:each) do
187
187
  @allocation1 = stub :allocation1, :ids => [1]
188
- @allocation2 = stub :allocation1, :ids => []
188
+ @allocation2 = stub :allocation2, :ids => []
189
189
  @allocations = Query::Allocations.new [@allocation1, @allocation2]
190
190
  end
191
191
  it 'should return one random id from its allocations by default' do
@@ -201,7 +201,7 @@ describe Query::Allocations do
201
201
  context 'no id' do
202
202
  before(:each) do
203
203
  @allocation1 = stub :allocation1, :ids => []
204
- @allocation2 = stub :allocation1, :ids => []
204
+ @allocation2 = stub :allocation2, :ids => []
205
205
  @allocations = Query::Allocations.new [@allocation1, @allocation2]
206
206
  end
207
207
  it 'should return one random id from its allocations by default' do
@@ -328,7 +328,7 @@ describe Query::Allocations do
328
328
  @allocations.total.should == 110
329
329
  end
330
330
  it 'should be fast' do
331
- Benchmark.realtime { @allocations.process!(20, 0) }.should <= 0.0001
331
+ Benchmark.realtime { @allocations.process!(20, 0) }.should < 0.0001
332
332
  end
333
333
  end
334
334
  end