picky 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. data/bin/picky +14 -0
  2. data/lib/bundling.rb +10 -0
  3. data/lib/constants.rb +9 -0
  4. data/lib/deployment.rb +212 -0
  5. data/lib/picky/application.rb +40 -0
  6. data/lib/picky/cacher/convenience.rb +3 -0
  7. data/lib/picky/cacher/generator.rb +17 -0
  8. data/lib/picky/cacher/partial/default.rb +7 -0
  9. data/lib/picky/cacher/partial/none.rb +19 -0
  10. data/lib/picky/cacher/partial/strategy.rb +7 -0
  11. data/lib/picky/cacher/partial/subtoken.rb +91 -0
  12. data/lib/picky/cacher/partial_generator.rb +15 -0
  13. data/lib/picky/cacher/similarity/default.rb +7 -0
  14. data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
  15. data/lib/picky/cacher/similarity/none.rb +25 -0
  16. data/lib/picky/cacher/similarity/strategy.rb +7 -0
  17. data/lib/picky/cacher/similarity_generator.rb +15 -0
  18. data/lib/picky/cacher/weights/default.rb +7 -0
  19. data/lib/picky/cacher/weights/logarithmic.rb +39 -0
  20. data/lib/picky/cacher/weights/strategy.rb +7 -0
  21. data/lib/picky/cacher/weights_generator.rb +15 -0
  22. data/lib/picky/configuration/configuration.rb +13 -0
  23. data/lib/picky/configuration/field.rb +68 -0
  24. data/lib/picky/configuration/indexes.rb +60 -0
  25. data/lib/picky/configuration/queries.rb +32 -0
  26. data/lib/picky/configuration/type.rb +52 -0
  27. data/lib/picky/cores.rb +101 -0
  28. data/lib/picky/db/configuration.rb +23 -0
  29. data/lib/picky/ext/ruby19/extconf.rb +7 -0
  30. data/lib/picky/ext/ruby19/performant.c +339 -0
  31. data/lib/picky/extensions/array.rb +45 -0
  32. data/lib/picky/extensions/hash.rb +11 -0
  33. data/lib/picky/extensions/module.rb +15 -0
  34. data/lib/picky/extensions/symbol.rb +18 -0
  35. data/lib/picky/generator.rb +156 -0
  36. data/lib/picky/helpers/cache.rb +23 -0
  37. data/lib/picky/helpers/gc.rb +11 -0
  38. data/lib/picky/helpers/measuring.rb +45 -0
  39. data/lib/picky/helpers/search.rb +27 -0
  40. data/lib/picky/index/bundle.rb +328 -0
  41. data/lib/picky/index/category.rb +109 -0
  42. data/lib/picky/index/combined.rb +38 -0
  43. data/lib/picky/index/type.rb +30 -0
  44. data/lib/picky/indexers/base.rb +77 -0
  45. data/lib/picky/indexers/default.rb +3 -0
  46. data/lib/picky/indexers/field.rb +13 -0
  47. data/lib/picky/indexers/no_source_specified_error.rb +5 -0
  48. data/lib/picky/indexers/solr.rb +60 -0
  49. data/lib/picky/indexes.rb +180 -0
  50. data/lib/picky/initializers/ext.rb +6 -0
  51. data/lib/picky/initializers/mysql.rb +22 -0
  52. data/lib/picky/loader.rb +287 -0
  53. data/lib/picky/loggers/search.rb +19 -0
  54. data/lib/picky/performant/array.rb +23 -0
  55. data/lib/picky/query/allocation.rb +82 -0
  56. data/lib/picky/query/allocations.rb +131 -0
  57. data/lib/picky/query/base.rb +124 -0
  58. data/lib/picky/query/combination.rb +69 -0
  59. data/lib/picky/query/combinations.rb +106 -0
  60. data/lib/picky/query/combinator.rb +92 -0
  61. data/lib/picky/query/full.rb +15 -0
  62. data/lib/picky/query/live.rb +22 -0
  63. data/lib/picky/query/qualifiers.rb +73 -0
  64. data/lib/picky/query/solr.rb +77 -0
  65. data/lib/picky/query/token.rb +215 -0
  66. data/lib/picky/query/tokens.rb +102 -0
  67. data/lib/picky/query/weigher.rb +159 -0
  68. data/lib/picky/query/weights.rb +55 -0
  69. data/lib/picky/rack/harakiri.rb +37 -0
  70. data/lib/picky/results/base.rb +103 -0
  71. data/lib/picky/results/full.rb +19 -0
  72. data/lib/picky/results/live.rb +19 -0
  73. data/lib/picky/routing.rb +165 -0
  74. data/lib/picky/signals.rb +11 -0
  75. data/lib/picky/solr/schema_generator.rb +73 -0
  76. data/lib/picky/sources/base.rb +19 -0
  77. data/lib/picky/sources/csv.rb +30 -0
  78. data/lib/picky/sources/db.rb +77 -0
  79. data/lib/picky/tokenizers/base.rb +130 -0
  80. data/lib/picky/tokenizers/default.rb +3 -0
  81. data/lib/picky/tokenizers/index.rb +73 -0
  82. data/lib/picky/tokenizers/query.rb +70 -0
  83. data/lib/picky/umlaut_substituter.rb +21 -0
  84. data/lib/picky-tasks.rb +6 -0
  85. data/lib/picky.rb +18 -0
  86. data/lib/tasks/application.rake +5 -0
  87. data/lib/tasks/cache.rake +53 -0
  88. data/lib/tasks/framework.rake +4 -0
  89. data/lib/tasks/index.rake +29 -0
  90. data/lib/tasks/server.rake +48 -0
  91. data/lib/tasks/shortcuts.rake +13 -0
  92. data/lib/tasks/solr.rake +36 -0
  93. data/lib/tasks/spec.rake +11 -0
  94. data/lib/tasks/statistics.rake +13 -0
  95. data/lib/tasks/try.rake +29 -0
  96. data/prototype_project/Gemfile +23 -0
  97. data/prototype_project/Rakefile +1 -0
  98. data/prototype_project/app/README +6 -0
  99. data/prototype_project/app/application.rb +50 -0
  100. data/prototype_project/app/application.ru +29 -0
  101. data/prototype_project/app/db.yml +10 -0
  102. data/prototype_project/app/logging.rb +20 -0
  103. data/prototype_project/app/unicorn.ru +10 -0
  104. data/prototype_project/log/README +1 -0
  105. data/prototype_project/script/console +34 -0
  106. data/prototype_project/tmp/README +0 -0
  107. data/prototype_project/tmp/pids/README +0 -0
  108. data/spec/ext/performant_spec.rb +64 -0
  109. data/spec/lib/application_spec.rb +61 -0
  110. data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
  111. data/spec/lib/cacher/partial_generator_spec.rb +35 -0
  112. data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
  113. data/spec/lib/cacher/similarity/none_spec.rb +23 -0
  114. data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
  115. data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
  116. data/spec/lib/cacher/weights_generator_spec.rb +21 -0
  117. data/spec/lib/configuration/configuration_spec.rb +38 -0
  118. data/spec/lib/configuration/type_spec.rb +49 -0
  119. data/spec/lib/configuration_spec.rb +8 -0
  120. data/spec/lib/cores_spec.rb +65 -0
  121. data/spec/lib/extensions/array_spec.rb +37 -0
  122. data/spec/lib/extensions/hash_spec.rb +11 -0
  123. data/spec/lib/extensions/module_spec.rb +27 -0
  124. data/spec/lib/extensions/symbol_spec.rb +85 -0
  125. data/spec/lib/generator_spec.rb +135 -0
  126. data/spec/lib/helpers/cache_spec.rb +35 -0
  127. data/spec/lib/helpers/gc_spec.rb +71 -0
  128. data/spec/lib/helpers/measuring_spec.rb +18 -0
  129. data/spec/lib/helpers/search_spec.rb +50 -0
  130. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
  131. data/spec/lib/index/bundle_spec.rb +260 -0
  132. data/spec/lib/index/category_spec.rb +203 -0
  133. data/spec/lib/indexers/base_spec.rb +73 -0
  134. data/spec/lib/indexers/field_spec.rb +20 -0
  135. data/spec/lib/loader_spec.rb +48 -0
  136. data/spec/lib/loggers/search_spec.rb +19 -0
  137. data/spec/lib/performant/array_spec.rb +13 -0
  138. data/spec/lib/query/allocation_spec.rb +194 -0
  139. data/spec/lib/query/allocations_spec.rb +336 -0
  140. data/spec/lib/query/base_spec.rb +104 -0
  141. data/spec/lib/query/combination_spec.rb +90 -0
  142. data/spec/lib/query/combinations_spec.rb +83 -0
  143. data/spec/lib/query/combinator_spec.rb +112 -0
  144. data/spec/lib/query/full_spec.rb +22 -0
  145. data/spec/lib/query/live_spec.rb +61 -0
  146. data/spec/lib/query/qualifiers_spec.rb +31 -0
  147. data/spec/lib/query/solr_spec.rb +51 -0
  148. data/spec/lib/query/token_spec.rb +297 -0
  149. data/spec/lib/query/tokens_spec.rb +189 -0
  150. data/spec/lib/query/weights_spec.rb +47 -0
  151. data/spec/lib/results/base_spec.rb +233 -0
  152. data/spec/lib/routing_spec.rb +318 -0
  153. data/spec/lib/solr/schema_generator_spec.rb +42 -0
  154. data/spec/lib/sources/db_spec.rb +91 -0
  155. data/spec/lib/tokenizers/base_spec.rb +61 -0
  156. data/spec/lib/tokenizers/index_spec.rb +51 -0
  157. data/spec/lib/tokenizers/query_spec.rb +105 -0
  158. data/spec/lib/umlaut_substituter_spec.rb +84 -0
  159. data/spec/specific/speed_spec.rb +55 -0
  160. metadata +371 -15
  161. data/README.textile +0 -9
@@ -0,0 +1,287 @@
1
+ # Loads the search engine and itself.
2
+ #
3
+ module Loader
4
+
5
+ # Reloads the whole app.
6
+ # First itself, then the app.
7
+ #
8
+ def self.reload
9
+ Dir.chdir(SEARCH_ROOT)
10
+ exclaim 'Reloading loader.'
11
+ load_self
12
+ exclaim 'Reloading framework.'
13
+ load_framework
14
+ exclaim 'Reloading application.'
15
+ load_application
16
+ end
17
+
18
+ # Loads this file anew.
19
+ #
20
+ def self.load_self
21
+ exclaim 'Loader loading itself.'
22
+ load __FILE__
23
+ end
24
+
25
+ def self.require_relative filename
26
+ require File.join(File.dirname(__FILE__), filename)
27
+ end
28
+ def self.load_relative filename_without_rb
29
+ load File.join(File.dirname(__FILE__), "#{filename_without_rb}.rb")
30
+ end
31
+
32
+ def self.load_user filename
33
+ load File.join(SEARCH_ROOT, "#{filename}.rb")
34
+ end
35
+ def self.load_user_lib filename
36
+ load_user File.join('lib', filename)
37
+ end
38
+ def self.load_all_user_in dirname
39
+ Dir[File.join(SEARCH_ROOT, dirname, '**', '*.rb')].each do |filename|
40
+ load filename
41
+ end
42
+ end
43
+
44
+ # def self.add_lib_dir
45
+ # lib_dir = File.join(SEARCH_ROOT, 'lib')
46
+ # $:.unshift lib_dir unless $:.include?(lib_dir)
47
+ # end
48
+
49
+ # Load the user's application.
50
+ #
51
+ def self.load_application
52
+ # DB.connect # FIXME only needed when indexing.
53
+ # Load the user's application.
54
+ #
55
+ exclaim 'Loading Application.'
56
+
57
+ load_all_user_in 'app/initializers'
58
+
59
+ # Add lib dir to load path.
60
+ #
61
+ # add_lib_dir
62
+
63
+ # Picky autoloading.
64
+ #
65
+ begin
66
+ load_all_user_in 'lib/tokenizers'
67
+ load_all_user_in 'lib/indexers'
68
+ load_all_user_in 'lib/query'
69
+ rescue NameError => name_error
70
+ namespaced_class_name = name_error.message.gsub /^uninitialized\sconstant\s/, ''
71
+ load_user_lib namespaced_class_name.underscore # Try it once.
72
+ retry
73
+ end
74
+
75
+ # TODO Rethink this.
76
+ #
77
+ load_user 'app/logging'
78
+ # load_user 'app/config'
79
+ # Configuration.apply
80
+
81
+ # Require the user's application.
82
+ #
83
+ load_user 'app/application'
84
+
85
+ # Setup Indexes from user definition
86
+ #
87
+ # TODO Make special task that reloads the indexes!
88
+ #
89
+ Query::Qualifiers.instance.prepare # TODO Rewrite
90
+
91
+ exclaim "Application loaded."
92
+ end
93
+
94
+ # Loads the framework.
95
+ #
96
+ def self.load_framework
97
+ # exclaim 'Loading the application.'
98
+
99
+ # exclaim 'Compiling C code.'
100
+ require_relative 'initializers/ext'
101
+
102
+ require 'rack_fast_escape'
103
+ # exclaim 'Loaded rack_fast_escape.'
104
+ require 'text'
105
+ # exclaim 'Loaded text.'
106
+
107
+ # Extend path with lib
108
+ #
109
+ extend_load_path 'lib'
110
+
111
+ # Load extensions.
112
+ #
113
+ load_relative 'extensions/array'
114
+ load_relative 'extensions/symbol'
115
+ load_relative 'extensions/module'
116
+ load_relative 'extensions/hash'
117
+ # exclaim "Loaded extensions."
118
+
119
+ # Load harakiri.
120
+ #
121
+ load_relative 'rack/harakiri'
122
+
123
+ # Requiring Helpers
124
+ #
125
+ load_relative 'helpers/gc'
126
+ load_relative 'helpers/cache'
127
+ load_relative 'helpers/measuring'
128
+ load_relative 'helpers/search'
129
+ # exclaim "Loaded helpers."
130
+
131
+ # Signal handling
132
+ #
133
+ load_relative 'signals'
134
+ # exclaim "Loaded signals handling."
135
+
136
+ # Load and require the plugins.
137
+ #
138
+ Dir['plugins/*'].each do |directory|
139
+ extend_load_path directory
140
+ extend_load_path directory, 'lib'
141
+ load "#{directory.gsub!(/plugins\//, '')}.rb"
142
+ end
143
+ # exclaim "Loaded plugins."
144
+
145
+ # Require the necessary libs. Referenced modules first.
146
+ #
147
+ load_relative 'loggers/search'
148
+ load_relative 'umlaut_substituter'
149
+
150
+ # Index generation strategies.
151
+ #
152
+ load_relative 'indexers/no_source_specified_error'
153
+ load_relative 'indexers/base'
154
+ load_relative 'indexers/field'
155
+ load_relative 'indexers/default'
156
+ #
157
+ load_relative 'indexers/solr'
158
+
159
+ # Partial index generation strategies.
160
+ #
161
+ load_relative 'cacher/partial/strategy'
162
+ load_relative 'cacher/partial/none'
163
+ load_relative 'cacher/partial/subtoken'
164
+ load_relative 'cacher/partial/default'
165
+
166
+ # Weight index generation strategies.
167
+ #
168
+ load_relative 'cacher/weights/strategy'
169
+ load_relative 'cacher/weights/logarithmic'
170
+ load_relative 'cacher/weights/default'
171
+
172
+ # Similarity index generation strategies.
173
+ #
174
+ load_relative 'cacher/similarity/strategy'
175
+ load_relative 'cacher/similarity/none'
176
+ load_relative 'cacher/similarity/double_levenshtone'
177
+ load_relative 'cacher/similarity/default'
178
+
179
+ # Convenience accessors for generators.
180
+ #
181
+ # TODO Just remove from under Cacher?
182
+ #
183
+ load_relative 'cacher/convenience'
184
+
185
+ # Index generators.
186
+ #
187
+ load_relative 'cacher/generator'
188
+ load_relative 'cacher/partial_generator'
189
+ load_relative 'cacher/weights_generator'
190
+ load_relative 'cacher/similarity_generator'
191
+
192
+ # Index types.
193
+ #
194
+ load_relative 'index/bundle'
195
+ load_relative 'index/category'
196
+ load_relative 'index/type'
197
+
198
+ load_relative 'index/combined'
199
+
200
+ # Tokens.
201
+ #
202
+ load_relative 'query/token'
203
+ load_relative 'query/tokens'
204
+
205
+ # Tokenizers types.
206
+ #
207
+ load_relative 'tokenizers/base'
208
+ load_relative 'tokenizers/index'
209
+ load_relative 'tokenizers/query'
210
+ load_relative 'tokenizers/default'
211
+
212
+ # Query combinations, qualifiers, weigher.
213
+ #
214
+ load_relative 'query/combination'
215
+ load_relative 'query/combinations'
216
+
217
+ load_relative 'query/allocation'
218
+ load_relative 'query/allocations'
219
+
220
+ load_relative 'query/qualifiers'
221
+ load_relative 'query/weigher'
222
+ load_relative 'query/combinator'
223
+
224
+ load_relative 'query/weights'
225
+
226
+ # Query.
227
+ #
228
+ load_relative 'query/base'
229
+ load_relative 'query/live'
230
+ load_relative 'query/full'
231
+ load_relative 'query/solr' # TODO ?
232
+
233
+ # Results.
234
+ #
235
+ load_relative 'results/base'
236
+ load_relative 'results/full'
237
+ load_relative 'results/live'
238
+
239
+ # Sources.
240
+ #
241
+ load_relative 'sources/base'
242
+ load_relative 'sources/db'
243
+
244
+ # DB
245
+ #
246
+ load_relative 'db/configuration'
247
+
248
+ # Indexes.
249
+ #
250
+ load_relative 'indexes'
251
+
252
+ # Configuration.
253
+ #
254
+ load_relative 'configuration/field'
255
+ load_relative 'configuration/type'
256
+ load_relative 'configuration/indexes'
257
+ load_relative 'configuration/configuration'
258
+
259
+ # ... in Application.
260
+ #
261
+ load_relative 'configuration/queries'
262
+
263
+ # Application and routing.
264
+ #
265
+ load_relative 'routing'
266
+ load_relative 'application'
267
+
268
+ # Load tools.
269
+ #
270
+ load_relative 'solr/schema_generator'
271
+ load_relative 'cores'
272
+
273
+ # Load generation.
274
+ #
275
+ load_relative 'generator'
276
+ end
277
+
278
+ def self.exclaim text
279
+ puts text
280
+ end
281
+
282
+ def self.extend_load_path *dirs
283
+ dir = File.join(SEARCH_ROOT, *dirs)
284
+ $:.unshift dir unless $:.include? dir
285
+ end
286
+
287
+ end
@@ -0,0 +1,19 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Loggers
4
+ # Log Proxy
5
+ #
6
+ class Search
7
+
8
+ attr_reader :logger
9
+
10
+ def initialize logger
11
+ @logger = logger
12
+ end
13
+
14
+ def log message
15
+ logger.info message
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,23 @@
1
+ module Performant
2
+ # This class will be enriched with c-methods
3
+ #
4
+ class Array
5
+
6
+ # Chooses a good algorithm for intersecting arrays.
7
+ #
8
+ # Note: The sort order will be changed.
9
+ #
10
+ def self.intersect array_of_arrays
11
+ array_of_arrays.sort! { |a, b| a.size <=> b.size }
12
+
13
+ if (array_of_arrays.sum(&:size) < 20_000)
14
+ Performant::Array.brute_force_intersect array_of_arrays
15
+ else
16
+ array_of_arrays.inject([]) do |total, elements|
17
+ total.empty? ? elements : elements & total
18
+ end
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,82 @@
1
+ module Query
2
+ # An allocation has a number of combinations:
3
+ # [token, index] [other_token, other_index], ...
4
+ #
5
+ class Allocation
6
+
7
+ attr_reader :count, :ids, :score, :combinations
8
+ attr_accessor :result_type
9
+
10
+ #
11
+ #
12
+ def initialize combinations
13
+ @combinations = combinations
14
+ end
15
+
16
+ def hash
17
+ @combinations.hash
18
+ end
19
+ def eql? other_allocation
20
+ true # FIXME
21
+ # @combinations.eql? other_allocation.combinations
22
+ end
23
+
24
+ # Scores its combinations and caches the result.
25
+ #
26
+ def calculate_score weights
27
+ @score || @score = @combinations.calculate_score(weights)
28
+ end
29
+
30
+ # Asks the combinations for the (intersected) ids.
31
+ #
32
+ def calculate_ids
33
+ @combinations.ids
34
+ end
35
+
36
+ # This starts the searching process.
37
+ #
38
+ def process! amount, offset
39
+ ids = calculate_ids
40
+ @count = ids.size # cache the count before throwing away the ids
41
+ @ids = ids.slice!(offset, amount) || [] # slice out the relevant part
42
+ end
43
+
44
+ #
45
+ #
46
+ def keep identifiers = [] # categories
47
+ @combinations.keep identifiers
48
+ end
49
+ #
50
+ #
51
+ def remove identifiers = [] # categories
52
+ @combinations.remove identifiers
53
+ end
54
+
55
+ # Sort highest score first.
56
+ #
57
+ def <=> other_allocation
58
+ other_allocation.score <=> self.score
59
+ end
60
+
61
+ # Transform the allocation into result form.
62
+ #
63
+ def to_result
64
+ [self.result_type, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
65
+ end
66
+
67
+ # Json representation of this allocation.
68
+ #
69
+ # Note: Delegates to to_result.
70
+ #
71
+ def to_json
72
+ to_result.to_json
73
+ end
74
+
75
+ #
76
+ #
77
+ def to_s
78
+ "Allocation: #{to_result.join(', ')}"
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,131 @@
1
+ module Query
2
+ # Container class for allocations.
3
+ #
4
+ class Allocations
5
+
6
+ # TODO Remove size
7
+ #
8
+ delegate :each, :inject, :empty?, :size, :to => :@allocations
9
+ attr_reader :total
10
+
11
+ def initialize allocations = []
12
+ @allocations = allocations
13
+ end
14
+
15
+ # Score each allocation.
16
+ #
17
+ def calculate_score weights
18
+ @allocations.each do |allocation|
19
+ allocation.calculate_score weights
20
+ end
21
+ end
22
+ # Sort the allocations.
23
+ #
24
+ def sort
25
+ @allocations.sort!
26
+ end
27
+
28
+ # Reduces the amount of allocations to x.
29
+ #
30
+ def reduce_to amount
31
+ @allocations = @allocations.shift amount
32
+ end
33
+
34
+ # Keeps combinations.
35
+ #
36
+ # Only those passed in remain.
37
+ #
38
+ def keep identifiers = []
39
+ @allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
40
+ end
41
+ # Removes combinations.
42
+ #
43
+ # Only those passed in are removed.
44
+ #
45
+ # TODO Rewrite
46
+ #
47
+ def remove identifiers = []
48
+ @allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
49
+ end
50
+
51
+ # Returns the top amount ids.
52
+ #
53
+ def ids amount = 20
54
+ @allocations.inject([]) do |total, allocation|
55
+ total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
56
+ end
57
+ end
58
+
59
+ # Returns a random id from the allocations.
60
+ #
61
+ # Note: This is an ok algorithm for small id sets.
62
+ #
63
+ # But still TODO try for a faster one.
64
+ #
65
+ def random_ids amount = 1
66
+ # TODO can there be no @allocations???
67
+ return [] if @allocations.empty?
68
+ ids = @allocations.first.ids
69
+ indexes = Array.new(ids.size) { |i| i }.sort_by { rand }
70
+ indexes.first(amount).map { |i| ids[i] }
71
+ end
72
+
73
+ # This is the main method of this class that will replace ids and count.
74
+ #
75
+ # What it does is calculate the ids and counts of its allocations
76
+ # for being used in the results. It also calculates the total
77
+ #
78
+ # Parameters:
79
+ # * amount: the amount of ids to calculate
80
+ # * offset: the offset from where in the result set to take the ids
81
+ #
82
+ # Note: With an amount of 0, an offset > 0 doesn't make much
83
+ # sense, as seen in the live search.
84
+ #
85
+ # Note: Each allocation caches its count, but not its ids (thrown away).
86
+ # The ids are cached in this class.
87
+ #
88
+ # Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
89
+ #
90
+ def process! amount, offset = 0
91
+ @total = 0
92
+ current_offset = 0
93
+ @allocations.each do |allocation|
94
+ ids = allocation.process! amount, offset
95
+ @total = @total + allocation.count # the total mixed in
96
+ if ids.empty?
97
+ offset = offset - allocation.count unless offset.zero?
98
+ else
99
+ amount = amount - ids.size # we need less results from the following allocation
100
+ offset = 0 # we have already passed the offset
101
+ end
102
+ end
103
+ end
104
+
105
+ def uniq
106
+ @allocations.uniq!
107
+ end
108
+
109
+ def to_a
110
+ @allocations
111
+ end
112
+
113
+ # Simply inspects the internal allocations.
114
+ #
115
+ def to_s
116
+ @allocations.inspect
117
+ end
118
+
119
+ # Allocations for results are in the form:
120
+ # [
121
+ # allocation1.to_result,
122
+ # allocation2.to_result
123
+ # ...
124
+ # ]
125
+ #
126
+ def to_result
127
+ @allocations.map(&:to_result).compact
128
+ end
129
+
130
+ end
131
+ end
@@ -0,0 +1,124 @@
1
+ module Query
2
+ # Base query class.
3
+ #
4
+ # Initialized with the index types it should search on.
5
+ #
6
+ class Base
7
+
8
+ include Helpers::Measuring
9
+
10
+ attr_writer :tokenizer
11
+ attr_accessor :reduce_to_amount, :weights
12
+
13
+ # Run a query on the given text, with the offset and these indexes.
14
+ #
15
+ def initialize *index_types
16
+ options = Hash === index_types.last ? index_types.pop : {}
17
+ @index_types = index_types
18
+ @weigher = Weigher.new index_types
19
+ @tokenizer = (options[:tokenizer] || Tokenizers::Query.new)
20
+ @weights = (options[:weights] || Weights.new)
21
+ end
22
+
23
+ # Convenience method.
24
+ #
25
+ def search_with_text text, offset = 0
26
+ search tokenized(text), offset
27
+ end
28
+
29
+ # This runs the actual search.
30
+ #
31
+ def search tokens, offset = 0
32
+ results = nil
33
+
34
+ duration = timed do
35
+ results = execute(tokens, offset) || empty_results # TODO Does not work yet
36
+ end
37
+ results.duration = duration.round 6
38
+
39
+ results
40
+ end
41
+
42
+ # Return nil if no results have been found.
43
+ #
44
+ def execute tokens, offset
45
+ results_from sorted_allocations(tokens), offset
46
+ end
47
+
48
+ # Returns an empty result with default values.
49
+ #
50
+ def empty_results
51
+ result_type.new
52
+ end
53
+
54
+ # Delegates the tokenizing to the query tokenizer.
55
+ #
56
+ def tokenized text
57
+ @tokenizer.tokenize text
58
+ end
59
+
60
+ # Gets sorted allocations for the tokens.
61
+ #
62
+ # This generates the possible allocations, sorted.
63
+ #
64
+ # TODO Smallify.
65
+ #
66
+ def sorted_allocations tokens
67
+ # Get the allocations.
68
+ #
69
+ allocations = @weigher.allocations_for tokens
70
+
71
+ # Callbacks.
72
+ #
73
+ reduce allocations
74
+ remove_from allocations
75
+
76
+ # TODO allocations#calculate # or better name
77
+ #
78
+
79
+ # Remove double allocations.
80
+ #
81
+ allocations.uniq
82
+
83
+ # Score the allocations.
84
+ #
85
+ allocations.calculate_score weights
86
+
87
+ # Sort the allocations.
88
+ # (allocations are sorted according to score, highest to lowest)
89
+ #
90
+ allocations.sort
91
+
92
+ # Return the allocations.
93
+ #
94
+ allocations
95
+ end
96
+ def reduce allocations
97
+ allocations.reduce_to reduce_to_amount if reduce_to_amount
98
+ end
99
+ def remove_identifiers?
100
+ identifiers_to_remove.present?
101
+ end
102
+ def remove_from allocations
103
+ allocations.remove(identifiers_to_remove) if remove_identifiers?
104
+ end
105
+ # Override.
106
+ #
107
+ def identifiers_to_remove
108
+ @identifiers_to_remove || @identifiers_to_remove = []
109
+ end
110
+
111
+ # Packs the sorted allocations into results.
112
+ #
113
+ # This generates the id intersections. Lots of work going on.
114
+ #
115
+ # TODO Move to results. result_type.from allocations, offset
116
+ #
117
+ def results_from allocations = nil, offset = 0
118
+ results = result_type.new allocations
119
+ results.prepare! offset
120
+ results
121
+ end
122
+
123
+ end
124
+ end