picky 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/picky +14 -0
- data/lib/bundling.rb +10 -0
- data/lib/constants.rb +9 -0
- data/lib/deployment.rb +212 -0
- data/lib/picky/application.rb +40 -0
- data/lib/picky/cacher/convenience.rb +3 -0
- data/lib/picky/cacher/generator.rb +17 -0
- data/lib/picky/cacher/partial/default.rb +7 -0
- data/lib/picky/cacher/partial/none.rb +19 -0
- data/lib/picky/cacher/partial/strategy.rb +7 -0
- data/lib/picky/cacher/partial/subtoken.rb +91 -0
- data/lib/picky/cacher/partial_generator.rb +15 -0
- data/lib/picky/cacher/similarity/default.rb +7 -0
- data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
- data/lib/picky/cacher/similarity/none.rb +25 -0
- data/lib/picky/cacher/similarity/strategy.rb +7 -0
- data/lib/picky/cacher/similarity_generator.rb +15 -0
- data/lib/picky/cacher/weights/default.rb +7 -0
- data/lib/picky/cacher/weights/logarithmic.rb +39 -0
- data/lib/picky/cacher/weights/strategy.rb +7 -0
- data/lib/picky/cacher/weights_generator.rb +15 -0
- data/lib/picky/configuration/configuration.rb +13 -0
- data/lib/picky/configuration/field.rb +68 -0
- data/lib/picky/configuration/indexes.rb +60 -0
- data/lib/picky/configuration/queries.rb +32 -0
- data/lib/picky/configuration/type.rb +52 -0
- data/lib/picky/cores.rb +101 -0
- data/lib/picky/db/configuration.rb +23 -0
- data/lib/picky/ext/ruby19/extconf.rb +7 -0
- data/lib/picky/ext/ruby19/performant.c +339 -0
- data/lib/picky/extensions/array.rb +45 -0
- data/lib/picky/extensions/hash.rb +11 -0
- data/lib/picky/extensions/module.rb +15 -0
- data/lib/picky/extensions/symbol.rb +18 -0
- data/lib/picky/generator.rb +156 -0
- data/lib/picky/helpers/cache.rb +23 -0
- data/lib/picky/helpers/gc.rb +11 -0
- data/lib/picky/helpers/measuring.rb +45 -0
- data/lib/picky/helpers/search.rb +27 -0
- data/lib/picky/index/bundle.rb +328 -0
- data/lib/picky/index/category.rb +109 -0
- data/lib/picky/index/combined.rb +38 -0
- data/lib/picky/index/type.rb +30 -0
- data/lib/picky/indexers/base.rb +77 -0
- data/lib/picky/indexers/default.rb +3 -0
- data/lib/picky/indexers/field.rb +13 -0
- data/lib/picky/indexers/no_source_specified_error.rb +5 -0
- data/lib/picky/indexers/solr.rb +60 -0
- data/lib/picky/indexes.rb +180 -0
- data/lib/picky/initializers/ext.rb +6 -0
- data/lib/picky/initializers/mysql.rb +22 -0
- data/lib/picky/loader.rb +287 -0
- data/lib/picky/loggers/search.rb +19 -0
- data/lib/picky/performant/array.rb +23 -0
- data/lib/picky/query/allocation.rb +82 -0
- data/lib/picky/query/allocations.rb +131 -0
- data/lib/picky/query/base.rb +124 -0
- data/lib/picky/query/combination.rb +69 -0
- data/lib/picky/query/combinations.rb +106 -0
- data/lib/picky/query/combinator.rb +92 -0
- data/lib/picky/query/full.rb +15 -0
- data/lib/picky/query/live.rb +22 -0
- data/lib/picky/query/qualifiers.rb +73 -0
- data/lib/picky/query/solr.rb +77 -0
- data/lib/picky/query/token.rb +215 -0
- data/lib/picky/query/tokens.rb +102 -0
- data/lib/picky/query/weigher.rb +159 -0
- data/lib/picky/query/weights.rb +55 -0
- data/lib/picky/rack/harakiri.rb +37 -0
- data/lib/picky/results/base.rb +103 -0
- data/lib/picky/results/full.rb +19 -0
- data/lib/picky/results/live.rb +19 -0
- data/lib/picky/routing.rb +165 -0
- data/lib/picky/signals.rb +11 -0
- data/lib/picky/solr/schema_generator.rb +73 -0
- data/lib/picky/sources/base.rb +19 -0
- data/lib/picky/sources/csv.rb +30 -0
- data/lib/picky/sources/db.rb +77 -0
- data/lib/picky/tokenizers/base.rb +130 -0
- data/lib/picky/tokenizers/default.rb +3 -0
- data/lib/picky/tokenizers/index.rb +73 -0
- data/lib/picky/tokenizers/query.rb +70 -0
- data/lib/picky/umlaut_substituter.rb +21 -0
- data/lib/picky-tasks.rb +6 -0
- data/lib/picky.rb +18 -0
- data/lib/tasks/application.rake +5 -0
- data/lib/tasks/cache.rake +53 -0
- data/lib/tasks/framework.rake +4 -0
- data/lib/tasks/index.rake +29 -0
- data/lib/tasks/server.rake +48 -0
- data/lib/tasks/shortcuts.rake +13 -0
- data/lib/tasks/solr.rake +36 -0
- data/lib/tasks/spec.rake +11 -0
- data/lib/tasks/statistics.rake +13 -0
- data/lib/tasks/try.rake +29 -0
- data/prototype_project/Gemfile +23 -0
- data/prototype_project/Rakefile +1 -0
- data/prototype_project/app/README +6 -0
- data/prototype_project/app/application.rb +50 -0
- data/prototype_project/app/application.ru +29 -0
- data/prototype_project/app/db.yml +10 -0
- data/prototype_project/app/logging.rb +20 -0
- data/prototype_project/app/unicorn.ru +10 -0
- data/prototype_project/log/README +1 -0
- data/prototype_project/script/console +34 -0
- data/prototype_project/tmp/README +0 -0
- data/prototype_project/tmp/pids/README +0 -0
- data/spec/ext/performant_spec.rb +64 -0
- data/spec/lib/application_spec.rb +61 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
- data/spec/lib/cacher/partial_generator_spec.rb +35 -0
- data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
- data/spec/lib/cacher/similarity/none_spec.rb +23 -0
- data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
- data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
- data/spec/lib/cacher/weights_generator_spec.rb +21 -0
- data/spec/lib/configuration/configuration_spec.rb +38 -0
- data/spec/lib/configuration/type_spec.rb +49 -0
- data/spec/lib/configuration_spec.rb +8 -0
- data/spec/lib/cores_spec.rb +65 -0
- data/spec/lib/extensions/array_spec.rb +37 -0
- data/spec/lib/extensions/hash_spec.rb +11 -0
- data/spec/lib/extensions/module_spec.rb +27 -0
- data/spec/lib/extensions/symbol_spec.rb +85 -0
- data/spec/lib/generator_spec.rb +135 -0
- data/spec/lib/helpers/cache_spec.rb +35 -0
- data/spec/lib/helpers/gc_spec.rb +71 -0
- data/spec/lib/helpers/measuring_spec.rb +18 -0
- data/spec/lib/helpers/search_spec.rb +50 -0
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
- data/spec/lib/index/bundle_spec.rb +260 -0
- data/spec/lib/index/category_spec.rb +203 -0
- data/spec/lib/indexers/base_spec.rb +73 -0
- data/spec/lib/indexers/field_spec.rb +20 -0
- data/spec/lib/loader_spec.rb +48 -0
- data/spec/lib/loggers/search_spec.rb +19 -0
- data/spec/lib/performant/array_spec.rb +13 -0
- data/spec/lib/query/allocation_spec.rb +194 -0
- data/spec/lib/query/allocations_spec.rb +336 -0
- data/spec/lib/query/base_spec.rb +104 -0
- data/spec/lib/query/combination_spec.rb +90 -0
- data/spec/lib/query/combinations_spec.rb +83 -0
- data/spec/lib/query/combinator_spec.rb +112 -0
- data/spec/lib/query/full_spec.rb +22 -0
- data/spec/lib/query/live_spec.rb +61 -0
- data/spec/lib/query/qualifiers_spec.rb +31 -0
- data/spec/lib/query/solr_spec.rb +51 -0
- data/spec/lib/query/token_spec.rb +297 -0
- data/spec/lib/query/tokens_spec.rb +189 -0
- data/spec/lib/query/weights_spec.rb +47 -0
- data/spec/lib/results/base_spec.rb +233 -0
- data/spec/lib/routing_spec.rb +318 -0
- data/spec/lib/solr/schema_generator_spec.rb +42 -0
- data/spec/lib/sources/db_spec.rb +91 -0
- data/spec/lib/tokenizers/base_spec.rb +61 -0
- data/spec/lib/tokenizers/index_spec.rb +51 -0
- data/spec/lib/tokenizers/query_spec.rb +105 -0
- data/spec/lib/umlaut_substituter_spec.rb +84 -0
- data/spec/specific/speed_spec.rb +55 -0
- metadata +371 -15
- data/README.textile +0 -9
data/lib/picky/loader.rb
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
# Loads the search engine and itself.
|
|
2
|
+
#
|
|
3
|
+
module Loader
|
|
4
|
+
|
|
5
|
+
# Reloads the whole app.
|
|
6
|
+
# First itself, then the app.
|
|
7
|
+
#
|
|
8
|
+
def self.reload
|
|
9
|
+
Dir.chdir(SEARCH_ROOT)
|
|
10
|
+
exclaim 'Reloading loader.'
|
|
11
|
+
load_self
|
|
12
|
+
exclaim 'Reloading framework.'
|
|
13
|
+
load_framework
|
|
14
|
+
exclaim 'Reloading application.'
|
|
15
|
+
load_application
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Loads this file anew.
|
|
19
|
+
#
|
|
20
|
+
def self.load_self
|
|
21
|
+
exclaim 'Loader loading itself.'
|
|
22
|
+
load __FILE__
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.require_relative filename
|
|
26
|
+
require File.join(File.dirname(__FILE__), filename)
|
|
27
|
+
end
|
|
28
|
+
def self.load_relative filename_without_rb
|
|
29
|
+
load File.join(File.dirname(__FILE__), "#{filename_without_rb}.rb")
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.load_user filename
|
|
33
|
+
load File.join(SEARCH_ROOT, "#{filename}.rb")
|
|
34
|
+
end
|
|
35
|
+
def self.load_user_lib filename
|
|
36
|
+
load_user File.join('lib', filename)
|
|
37
|
+
end
|
|
38
|
+
def self.load_all_user_in dirname
|
|
39
|
+
Dir[File.join(SEARCH_ROOT, dirname, '**', '*.rb')].each do |filename|
|
|
40
|
+
load filename
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# def self.add_lib_dir
|
|
45
|
+
# lib_dir = File.join(SEARCH_ROOT, 'lib')
|
|
46
|
+
# $:.unshift lib_dir unless $:.include?(lib_dir)
|
|
47
|
+
# end
|
|
48
|
+
|
|
49
|
+
# Load the user's application.
|
|
50
|
+
#
|
|
51
|
+
def self.load_application
|
|
52
|
+
# DB.connect # FIXME only needed when indexing.
|
|
53
|
+
# Load the user's application.
|
|
54
|
+
#
|
|
55
|
+
exclaim 'Loading Application.'
|
|
56
|
+
|
|
57
|
+
load_all_user_in 'app/initializers'
|
|
58
|
+
|
|
59
|
+
# Add lib dir to load path.
|
|
60
|
+
#
|
|
61
|
+
# add_lib_dir
|
|
62
|
+
|
|
63
|
+
# Picky autoloading.
|
|
64
|
+
#
|
|
65
|
+
begin
|
|
66
|
+
load_all_user_in 'lib/tokenizers'
|
|
67
|
+
load_all_user_in 'lib/indexers'
|
|
68
|
+
load_all_user_in 'lib/query'
|
|
69
|
+
rescue NameError => name_error
|
|
70
|
+
namespaced_class_name = name_error.message.gsub /^uninitialized\sconstant\s/, ''
|
|
71
|
+
load_user_lib namespaced_class_name.underscore # Try it once.
|
|
72
|
+
retry
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# TODO Rethink this.
|
|
76
|
+
#
|
|
77
|
+
load_user 'app/logging'
|
|
78
|
+
# load_user 'app/config'
|
|
79
|
+
# Configuration.apply
|
|
80
|
+
|
|
81
|
+
# Require the user's application.
|
|
82
|
+
#
|
|
83
|
+
load_user 'app/application'
|
|
84
|
+
|
|
85
|
+
# Setup Indexes from user definition
|
|
86
|
+
#
|
|
87
|
+
# TODO Make special task that reloads the indexes!
|
|
88
|
+
#
|
|
89
|
+
Query::Qualifiers.instance.prepare # TODO Rewrite
|
|
90
|
+
|
|
91
|
+
exclaim "Application loaded."
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Loads the framework.
|
|
95
|
+
#
|
|
96
|
+
def self.load_framework
|
|
97
|
+
# exclaim 'Loading the application.'
|
|
98
|
+
|
|
99
|
+
# exclaim 'Compiling C code.'
|
|
100
|
+
require_relative 'initializers/ext'
|
|
101
|
+
|
|
102
|
+
require 'rack_fast_escape'
|
|
103
|
+
# exclaim 'Loaded rack_fast_escape.'
|
|
104
|
+
require 'text'
|
|
105
|
+
# exclaim 'Loaded text.'
|
|
106
|
+
|
|
107
|
+
# Extend path with lib
|
|
108
|
+
#
|
|
109
|
+
extend_load_path 'lib'
|
|
110
|
+
|
|
111
|
+
# Load extensions.
|
|
112
|
+
#
|
|
113
|
+
load_relative 'extensions/array'
|
|
114
|
+
load_relative 'extensions/symbol'
|
|
115
|
+
load_relative 'extensions/module'
|
|
116
|
+
load_relative 'extensions/hash'
|
|
117
|
+
# exclaim "Loaded extensions."
|
|
118
|
+
|
|
119
|
+
# Load harakiri.
|
|
120
|
+
#
|
|
121
|
+
load_relative 'rack/harakiri'
|
|
122
|
+
|
|
123
|
+
# Requiring Helpers
|
|
124
|
+
#
|
|
125
|
+
load_relative 'helpers/gc'
|
|
126
|
+
load_relative 'helpers/cache'
|
|
127
|
+
load_relative 'helpers/measuring'
|
|
128
|
+
load_relative 'helpers/search'
|
|
129
|
+
# exclaim "Loaded helpers."
|
|
130
|
+
|
|
131
|
+
# Signal handling
|
|
132
|
+
#
|
|
133
|
+
load_relative 'signals'
|
|
134
|
+
# exclaim "Loaded signals handling."
|
|
135
|
+
|
|
136
|
+
# Load and require the plugins.
|
|
137
|
+
#
|
|
138
|
+
Dir['plugins/*'].each do |directory|
|
|
139
|
+
extend_load_path directory
|
|
140
|
+
extend_load_path directory, 'lib'
|
|
141
|
+
load "#{directory.gsub!(/plugins\//, '')}.rb"
|
|
142
|
+
end
|
|
143
|
+
# exclaim "Loaded plugins."
|
|
144
|
+
|
|
145
|
+
# Require the necessary libs. Referenced modules first.
|
|
146
|
+
#
|
|
147
|
+
load_relative 'loggers/search'
|
|
148
|
+
load_relative 'umlaut_substituter'
|
|
149
|
+
|
|
150
|
+
# Index generation strategies.
|
|
151
|
+
#
|
|
152
|
+
load_relative 'indexers/no_source_specified_error'
|
|
153
|
+
load_relative 'indexers/base'
|
|
154
|
+
load_relative 'indexers/field'
|
|
155
|
+
load_relative 'indexers/default'
|
|
156
|
+
#
|
|
157
|
+
load_relative 'indexers/solr'
|
|
158
|
+
|
|
159
|
+
# Partial index generation strategies.
|
|
160
|
+
#
|
|
161
|
+
load_relative 'cacher/partial/strategy'
|
|
162
|
+
load_relative 'cacher/partial/none'
|
|
163
|
+
load_relative 'cacher/partial/subtoken'
|
|
164
|
+
load_relative 'cacher/partial/default'
|
|
165
|
+
|
|
166
|
+
# Weight index generation strategies.
|
|
167
|
+
#
|
|
168
|
+
load_relative 'cacher/weights/strategy'
|
|
169
|
+
load_relative 'cacher/weights/logarithmic'
|
|
170
|
+
load_relative 'cacher/weights/default'
|
|
171
|
+
|
|
172
|
+
# Similarity index generation strategies.
|
|
173
|
+
#
|
|
174
|
+
load_relative 'cacher/similarity/strategy'
|
|
175
|
+
load_relative 'cacher/similarity/none'
|
|
176
|
+
load_relative 'cacher/similarity/double_levenshtone'
|
|
177
|
+
load_relative 'cacher/similarity/default'
|
|
178
|
+
|
|
179
|
+
# Convenience accessors for generators.
|
|
180
|
+
#
|
|
181
|
+
# TODO Just remove from under Cacher?
|
|
182
|
+
#
|
|
183
|
+
load_relative 'cacher/convenience'
|
|
184
|
+
|
|
185
|
+
# Index generators.
|
|
186
|
+
#
|
|
187
|
+
load_relative 'cacher/generator'
|
|
188
|
+
load_relative 'cacher/partial_generator'
|
|
189
|
+
load_relative 'cacher/weights_generator'
|
|
190
|
+
load_relative 'cacher/similarity_generator'
|
|
191
|
+
|
|
192
|
+
# Index types.
|
|
193
|
+
#
|
|
194
|
+
load_relative 'index/bundle'
|
|
195
|
+
load_relative 'index/category'
|
|
196
|
+
load_relative 'index/type'
|
|
197
|
+
|
|
198
|
+
load_relative 'index/combined'
|
|
199
|
+
|
|
200
|
+
# Tokens.
|
|
201
|
+
#
|
|
202
|
+
load_relative 'query/token'
|
|
203
|
+
load_relative 'query/tokens'
|
|
204
|
+
|
|
205
|
+
# Tokenizers types.
|
|
206
|
+
#
|
|
207
|
+
load_relative 'tokenizers/base'
|
|
208
|
+
load_relative 'tokenizers/index'
|
|
209
|
+
load_relative 'tokenizers/query'
|
|
210
|
+
load_relative 'tokenizers/default'
|
|
211
|
+
|
|
212
|
+
# Query combinations, qualifiers, weigher.
|
|
213
|
+
#
|
|
214
|
+
load_relative 'query/combination'
|
|
215
|
+
load_relative 'query/combinations'
|
|
216
|
+
|
|
217
|
+
load_relative 'query/allocation'
|
|
218
|
+
load_relative 'query/allocations'
|
|
219
|
+
|
|
220
|
+
load_relative 'query/qualifiers'
|
|
221
|
+
load_relative 'query/weigher'
|
|
222
|
+
load_relative 'query/combinator'
|
|
223
|
+
|
|
224
|
+
load_relative 'query/weights'
|
|
225
|
+
|
|
226
|
+
# Query.
|
|
227
|
+
#
|
|
228
|
+
load_relative 'query/base'
|
|
229
|
+
load_relative 'query/live'
|
|
230
|
+
load_relative 'query/full'
|
|
231
|
+
load_relative 'query/solr' # TODO ?
|
|
232
|
+
|
|
233
|
+
# Results.
|
|
234
|
+
#
|
|
235
|
+
load_relative 'results/base'
|
|
236
|
+
load_relative 'results/full'
|
|
237
|
+
load_relative 'results/live'
|
|
238
|
+
|
|
239
|
+
# Sources.
|
|
240
|
+
#
|
|
241
|
+
load_relative 'sources/base'
|
|
242
|
+
load_relative 'sources/db'
|
|
243
|
+
|
|
244
|
+
# DB
|
|
245
|
+
#
|
|
246
|
+
load_relative 'db/configuration'
|
|
247
|
+
|
|
248
|
+
# Indexes.
|
|
249
|
+
#
|
|
250
|
+
load_relative 'indexes'
|
|
251
|
+
|
|
252
|
+
# Configuration.
|
|
253
|
+
#
|
|
254
|
+
load_relative 'configuration/field'
|
|
255
|
+
load_relative 'configuration/type'
|
|
256
|
+
load_relative 'configuration/indexes'
|
|
257
|
+
load_relative 'configuration/configuration'
|
|
258
|
+
|
|
259
|
+
# ... in Application.
|
|
260
|
+
#
|
|
261
|
+
load_relative 'configuration/queries'
|
|
262
|
+
|
|
263
|
+
# Application and routing.
|
|
264
|
+
#
|
|
265
|
+
load_relative 'routing'
|
|
266
|
+
load_relative 'application'
|
|
267
|
+
|
|
268
|
+
# Load tools.
|
|
269
|
+
#
|
|
270
|
+
load_relative 'solr/schema_generator'
|
|
271
|
+
load_relative 'cores'
|
|
272
|
+
|
|
273
|
+
# Load generation.
|
|
274
|
+
#
|
|
275
|
+
load_relative 'generator'
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def self.exclaim text
|
|
279
|
+
puts text
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def self.extend_load_path *dirs
|
|
283
|
+
dir = File.join(SEARCH_ROOT, *dirs)
|
|
284
|
+
$:.unshift dir unless $:.include? dir
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
module Performant
|
|
2
|
+
# This class will be enriched with c-methods
|
|
3
|
+
#
|
|
4
|
+
class Array
|
|
5
|
+
|
|
6
|
+
# Chooses a good algorithm for intersecting arrays.
|
|
7
|
+
#
|
|
8
|
+
# Note: The sort order will be changed.
|
|
9
|
+
#
|
|
10
|
+
def self.intersect array_of_arrays
|
|
11
|
+
array_of_arrays.sort! { |a, b| a.size <=> b.size }
|
|
12
|
+
|
|
13
|
+
if (array_of_arrays.sum(&:size) < 20_000)
|
|
14
|
+
Performant::Array.brute_force_intersect array_of_arrays
|
|
15
|
+
else
|
|
16
|
+
array_of_arrays.inject([]) do |total, elements|
|
|
17
|
+
total.empty? ? elements : elements & total
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
# An allocation has a number of combinations:
|
|
3
|
+
# [token, index] [other_token, other_index], ...
|
|
4
|
+
#
|
|
5
|
+
class Allocation
|
|
6
|
+
|
|
7
|
+
attr_reader :count, :ids, :score, :combinations
|
|
8
|
+
attr_accessor :result_type
|
|
9
|
+
|
|
10
|
+
#
|
|
11
|
+
#
|
|
12
|
+
def initialize combinations
|
|
13
|
+
@combinations = combinations
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def hash
|
|
17
|
+
@combinations.hash
|
|
18
|
+
end
|
|
19
|
+
def eql? other_allocation
|
|
20
|
+
true # FIXME
|
|
21
|
+
# @combinations.eql? other_allocation.combinations
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Scores its combinations and caches the result.
|
|
25
|
+
#
|
|
26
|
+
def calculate_score weights
|
|
27
|
+
@score || @score = @combinations.calculate_score(weights)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Asks the combinations for the (intersected) ids.
|
|
31
|
+
#
|
|
32
|
+
def calculate_ids
|
|
33
|
+
@combinations.ids
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# This starts the searching process.
|
|
37
|
+
#
|
|
38
|
+
def process! amount, offset
|
|
39
|
+
ids = calculate_ids
|
|
40
|
+
@count = ids.size # cache the count before throwing away the ids
|
|
41
|
+
@ids = ids.slice!(offset, amount) || [] # slice out the relevant part
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
#
|
|
45
|
+
#
|
|
46
|
+
def keep identifiers = [] # categories
|
|
47
|
+
@combinations.keep identifiers
|
|
48
|
+
end
|
|
49
|
+
#
|
|
50
|
+
#
|
|
51
|
+
def remove identifiers = [] # categories
|
|
52
|
+
@combinations.remove identifiers
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Sort highest score first.
|
|
56
|
+
#
|
|
57
|
+
def <=> other_allocation
|
|
58
|
+
other_allocation.score <=> self.score
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Transform the allocation into result form.
|
|
62
|
+
#
|
|
63
|
+
def to_result
|
|
64
|
+
[self.result_type, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Json representation of this allocation.
|
|
68
|
+
#
|
|
69
|
+
# Note: Delegates to to_result.
|
|
70
|
+
#
|
|
71
|
+
def to_json
|
|
72
|
+
to_result.to_json
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
#
|
|
76
|
+
#
|
|
77
|
+
def to_s
|
|
78
|
+
"Allocation: #{to_result.join(', ')}"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
# Container class for allocations.
|
|
3
|
+
#
|
|
4
|
+
class Allocations
|
|
5
|
+
|
|
6
|
+
# TODO Remove size
|
|
7
|
+
#
|
|
8
|
+
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
|
9
|
+
attr_reader :total
|
|
10
|
+
|
|
11
|
+
def initialize allocations = []
|
|
12
|
+
@allocations = allocations
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Score each allocation.
|
|
16
|
+
#
|
|
17
|
+
def calculate_score weights
|
|
18
|
+
@allocations.each do |allocation|
|
|
19
|
+
allocation.calculate_score weights
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
# Sort the allocations.
|
|
23
|
+
#
|
|
24
|
+
def sort
|
|
25
|
+
@allocations.sort!
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Reduces the amount of allocations to x.
|
|
29
|
+
#
|
|
30
|
+
def reduce_to amount
|
|
31
|
+
@allocations = @allocations.shift amount
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Keeps combinations.
|
|
35
|
+
#
|
|
36
|
+
# Only those passed in remain.
|
|
37
|
+
#
|
|
38
|
+
def keep identifiers = []
|
|
39
|
+
@allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
|
|
40
|
+
end
|
|
41
|
+
# Removes combinations.
|
|
42
|
+
#
|
|
43
|
+
# Only those passed in are removed.
|
|
44
|
+
#
|
|
45
|
+
# TODO Rewrite
|
|
46
|
+
#
|
|
47
|
+
def remove identifiers = []
|
|
48
|
+
@allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Returns the top amount ids.
|
|
52
|
+
#
|
|
53
|
+
def ids amount = 20
|
|
54
|
+
@allocations.inject([]) do |total, allocation|
|
|
55
|
+
total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Returns a random id from the allocations.
|
|
60
|
+
#
|
|
61
|
+
# Note: This is an ok algorithm for small id sets.
|
|
62
|
+
#
|
|
63
|
+
# But still TODO try for a faster one.
|
|
64
|
+
#
|
|
65
|
+
def random_ids amount = 1
|
|
66
|
+
# TODO can there be no @allocations???
|
|
67
|
+
return [] if @allocations.empty?
|
|
68
|
+
ids = @allocations.first.ids
|
|
69
|
+
indexes = Array.new(ids.size) { |i| i }.sort_by { rand }
|
|
70
|
+
indexes.first(amount).map { |i| ids[i] }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# This is the main method of this class that will replace ids and count.
|
|
74
|
+
#
|
|
75
|
+
# What it does is calculate the ids and counts of its allocations
|
|
76
|
+
# for being used in the results. It also calculates the total
|
|
77
|
+
#
|
|
78
|
+
# Parameters:
|
|
79
|
+
# * amount: the amount of ids to calculate
|
|
80
|
+
# * offset: the offset from where in the result set to take the ids
|
|
81
|
+
#
|
|
82
|
+
# Note: With an amount of 0, an offset > 0 doesn't make much
|
|
83
|
+
# sense, as seen in the live search.
|
|
84
|
+
#
|
|
85
|
+
# Note: Each allocation caches its count, but not its ids (thrown away).
|
|
86
|
+
# The ids are cached in this class.
|
|
87
|
+
#
|
|
88
|
+
# Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
|
|
89
|
+
#
|
|
90
|
+
def process! amount, offset = 0
|
|
91
|
+
@total = 0
|
|
92
|
+
current_offset = 0
|
|
93
|
+
@allocations.each do |allocation|
|
|
94
|
+
ids = allocation.process! amount, offset
|
|
95
|
+
@total = @total + allocation.count # the total mixed in
|
|
96
|
+
if ids.empty?
|
|
97
|
+
offset = offset - allocation.count unless offset.zero?
|
|
98
|
+
else
|
|
99
|
+
amount = amount - ids.size # we need less results from the following allocation
|
|
100
|
+
offset = 0 # we have already passed the offset
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def uniq
|
|
106
|
+
@allocations.uniq!
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def to_a
|
|
110
|
+
@allocations
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Simply inspects the internal allocations.
|
|
114
|
+
#
|
|
115
|
+
def to_s
|
|
116
|
+
@allocations.inspect
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Allocations for results are in the form:
|
|
120
|
+
# [
|
|
121
|
+
# allocation1.to_result,
|
|
122
|
+
# allocation2.to_result
|
|
123
|
+
# ...
|
|
124
|
+
# ]
|
|
125
|
+
#
|
|
126
|
+
def to_result
|
|
127
|
+
@allocations.map(&:to_result).compact
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
# Base query class.
|
|
3
|
+
#
|
|
4
|
+
# Initialized with the index types it should search on.
|
|
5
|
+
#
|
|
6
|
+
class Base
|
|
7
|
+
|
|
8
|
+
include Helpers::Measuring
|
|
9
|
+
|
|
10
|
+
attr_writer :tokenizer
|
|
11
|
+
attr_accessor :reduce_to_amount, :weights
|
|
12
|
+
|
|
13
|
+
# Run a query on the given text, with the offset and these indexes.
|
|
14
|
+
#
|
|
15
|
+
def initialize *index_types
|
|
16
|
+
options = Hash === index_types.last ? index_types.pop : {}
|
|
17
|
+
@index_types = index_types
|
|
18
|
+
@weigher = Weigher.new index_types
|
|
19
|
+
@tokenizer = (options[:tokenizer] || Tokenizers::Query.new)
|
|
20
|
+
@weights = (options[:weights] || Weights.new)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Convenience method.
|
|
24
|
+
#
|
|
25
|
+
def search_with_text text, offset = 0
|
|
26
|
+
search tokenized(text), offset
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# This runs the actual search.
|
|
30
|
+
#
|
|
31
|
+
def search tokens, offset = 0
|
|
32
|
+
results = nil
|
|
33
|
+
|
|
34
|
+
duration = timed do
|
|
35
|
+
results = execute(tokens, offset) || empty_results # TODO Does not work yet
|
|
36
|
+
end
|
|
37
|
+
results.duration = duration.round 6
|
|
38
|
+
|
|
39
|
+
results
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Return nil if no results have been found.
|
|
43
|
+
#
|
|
44
|
+
def execute tokens, offset
|
|
45
|
+
results_from sorted_allocations(tokens), offset
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Returns an empty result with default values.
|
|
49
|
+
#
|
|
50
|
+
def empty_results
|
|
51
|
+
result_type.new
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Delegates the tokenizing to the query tokenizer.
|
|
55
|
+
#
|
|
56
|
+
def tokenized text
|
|
57
|
+
@tokenizer.tokenize text
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Gets sorted allocations for the tokens.
|
|
61
|
+
#
|
|
62
|
+
# This generates the possible allocations, sorted.
|
|
63
|
+
#
|
|
64
|
+
# TODO Smallify.
|
|
65
|
+
#
|
|
66
|
+
def sorted_allocations tokens
|
|
67
|
+
# Get the allocations.
|
|
68
|
+
#
|
|
69
|
+
allocations = @weigher.allocations_for tokens
|
|
70
|
+
|
|
71
|
+
# Callbacks.
|
|
72
|
+
#
|
|
73
|
+
reduce allocations
|
|
74
|
+
remove_from allocations
|
|
75
|
+
|
|
76
|
+
# TODO allocations#calculate # or better name
|
|
77
|
+
#
|
|
78
|
+
|
|
79
|
+
# Remove double allocations.
|
|
80
|
+
#
|
|
81
|
+
allocations.uniq
|
|
82
|
+
|
|
83
|
+
# Score the allocations.
|
|
84
|
+
#
|
|
85
|
+
allocations.calculate_score weights
|
|
86
|
+
|
|
87
|
+
# Sort the allocations.
|
|
88
|
+
# (allocations are sorted according to score, highest to lowest)
|
|
89
|
+
#
|
|
90
|
+
allocations.sort
|
|
91
|
+
|
|
92
|
+
# Return the allocations.
|
|
93
|
+
#
|
|
94
|
+
allocations
|
|
95
|
+
end
|
|
96
|
+
def reduce allocations
|
|
97
|
+
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
|
98
|
+
end
|
|
99
|
+
def remove_identifiers?
|
|
100
|
+
identifiers_to_remove.present?
|
|
101
|
+
end
|
|
102
|
+
def remove_from allocations
|
|
103
|
+
allocations.remove(identifiers_to_remove) if remove_identifiers?
|
|
104
|
+
end
|
|
105
|
+
# Override.
|
|
106
|
+
#
|
|
107
|
+
def identifiers_to_remove
|
|
108
|
+
@identifiers_to_remove || @identifiers_to_remove = []
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Packs the sorted allocations into results.
|
|
112
|
+
#
|
|
113
|
+
# This generates the id intersections. Lots of work going on.
|
|
114
|
+
#
|
|
115
|
+
# TODO Move to results. result_type.from allocations, offset
|
|
116
|
+
#
|
|
117
|
+
def results_from allocations = nil, offset = 0
|
|
118
|
+
results = result_type.new allocations
|
|
119
|
+
results.prepare! offset
|
|
120
|
+
results
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
end
|
|
124
|
+
end
|