picky 3.0.1 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/application.rb +12 -12
- data/lib/picky/backends/backend.rb +17 -0
- data/lib/picky/{backend → backends}/file/basic.rb +1 -1
- data/lib/picky/{backend → backends}/file/json.rb +1 -1
- data/lib/picky/{backend → backends}/file/marshal.rb +1 -1
- data/lib/picky/{backend → backends}/file/text.rb +1 -1
- data/lib/picky/backends/memory.rb +53 -0
- data/lib/picky/{backend → backends}/redis/basic.rb +9 -14
- data/lib/picky/backends/redis/float_hash.rb +26 -0
- data/lib/picky/{backend → backends}/redis/list_hash.rb +7 -11
- data/lib/picky/{backend → backends}/redis/string_hash.rb +7 -11
- data/lib/picky/backends/redis.rb +87 -0
- data/lib/picky/bundle.rb +107 -11
- data/lib/picky/category.rb +5 -5
- data/lib/picky/index.rb +329 -0
- data/lib/picky/index_indexed.rb +31 -0
- data/lib/picky/index_indexing.rb +161 -0
- data/lib/picky/indexed/bundle.rb +112 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +1 -1
- data/lib/picky/indexers/parallel.rb +2 -1
- data/lib/picky/indexers/serial.rb +2 -1
- data/lib/picky/indexes_indexing.rb +1 -1
- data/lib/picky/indexing/bundle.rb +188 -0
- data/lib/picky/indexing/wrappers/category/location.rb +1 -1
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +24 -38
- data/lib/picky/migrations/from_30_to_31.rb +61 -0
- data/lib/picky/query/allocation.rb +10 -5
- data/lib/picky/query/combinations.rb +70 -0
- data/lib/picky/query/indexes.rb +8 -7
- data/lib/picky/query/indexes_check.rb +47 -0
- data/lib/picky/query/token.rb +16 -29
- data/lib/picky/query/tokens.rb +4 -20
- data/lib/picky/search.rb +51 -58
- data/lib/picky/tokenizer.rb +231 -0
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/tasks/try.rake +4 -12
- data/lib/tasks/try.rb +37 -0
- data/spec/lib/application_spec.rb +5 -5
- data/spec/lib/{backend → backends}/file/basic_spec.rb +2 -2
- data/spec/lib/{backend → backends}/file/json_spec.rb +2 -2
- data/spec/lib/{backend → backends}/file/marshal_spec.rb +2 -2
- data/spec/lib/{backend → backends}/file/text_spec.rb +1 -1
- data/spec/lib/backends/memory_spec.rb +77 -0
- data/spec/lib/{backend → backends}/redis/basic_spec.rb +19 -21
- data/spec/lib/backends/redis/float_hash_spec.rb +38 -0
- data/spec/lib/backends/redis/list_hash_spec.rb +27 -0
- data/spec/lib/backends/redis/string_hash_spec.rb +38 -0
- data/spec/lib/backends/redis_spec.rb +79 -0
- data/spec/lib/categories_indexed_spec.rb +3 -3
- data/spec/lib/category_indexed_spec.rb +6 -6
- data/spec/lib/category_indexing_spec.rb +1 -1
- data/spec/lib/category_spec.rb +1 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +2 -2
- data/spec/lib/{indexes/index_indexed_spec.rb → index_indexed_spec.rb} +1 -1
- data/spec/lib/{indexes/index_indexing_spec.rb → index_indexing_spec.rb} +1 -1
- data/spec/lib/{indexes/index_spec.rb → index_spec.rb} +1 -1
- data/spec/lib/indexed/{bundle/memory_spec.rb → memory_spec.rb} +18 -18
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
- data/spec/lib/indexing/{bundle/memory_partial_generation_speed_spec.rb → bundle_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/indexing/bundle_spec.rb +302 -0
- data/spec/lib/query/allocation_spec.rb +21 -11
- data/spec/lib/query/combination_spec.rb +2 -2
- data/spec/lib/query/{combinations/base_spec.rb → combinations_spec.rb} +1 -1
- data/spec/lib/query/indexes_check_spec.rb +25 -0
- data/spec/lib/query/indexes_spec.rb +5 -1
- data/spec/lib/query/token_spec.rb +18 -20
- data/spec/lib/query/tokens_spec.rb +14 -65
- data/spec/lib/search_spec.rb +36 -37
- data/spec/lib/tasks/try_spec.rb +51 -0
- data/spec/lib/{tokenizers/base_spec.rb → tokenizer_spec.rb} +15 -44
- metadata +64 -81
- data/lib/picky/backend/base.rb +0 -121
- data/lib/picky/backend/files.rb +0 -28
- data/lib/picky/backend/redis.rb +0 -44
- data/lib/picky/indexed/bundle/base.rb +0 -47
- data/lib/picky/indexed/bundle/memory.rb +0 -88
- data/lib/picky/indexed/bundle/redis.rb +0 -91
- data/lib/picky/indexes/index.rb +0 -328
- data/lib/picky/indexes/index_indexed.rb +0 -35
- data/lib/picky/indexes/index_indexing.rb +0 -165
- data/lib/picky/indexes/memory.rb +0 -20
- data/lib/picky/indexes/redis.rb +0 -20
- data/lib/picky/indexing/bundle/base.rb +0 -242
- data/lib/picky/indexing/bundle/memory.rb +0 -26
- data/lib/picky/indexing/bundle/redis.rb +0 -26
- data/lib/picky/query/combinations/base.rb +0 -74
- data/lib/picky/query/combinations/memory.rb +0 -52
- data/lib/picky/query/combinations/redis.rb +0 -90
- data/lib/picky/query.rb +0 -6
- data/lib/picky/tokenizers/base.rb +0 -231
- data/lib/picky/tokenizers/index.rb +0 -34
- data/lib/picky/tokenizers/query.rb +0 -61
- data/spec/lib/backend/files_spec.rb +0 -189
- data/spec/lib/backend/redis/list_hash_spec.rb +0 -40
- data/spec/lib/backend/redis/string_hash_spec.rb +0 -47
- data/spec/lib/backend/redis_spec.rb +0 -170
- data/spec/lib/indexed/bundle/redis_spec.rb +0 -41
- data/spec/lib/indexes/redis_spec.rb +0 -15
- data/spec/lib/indexing/bundle/base_spec.rb +0 -38
- data/spec/lib/indexing/bundle/memory_spec.rb +0 -287
- data/spec/lib/indexing/bundle/redis_spec.rb +0 -283
- data/spec/lib/query/combinations/memory_spec.rb +0 -158
- data/spec/lib/query/combinations/redis_spec.rb +0 -172
- data/spec/lib/tokenizers/index_spec.rb +0 -69
- data/spec/lib/tokenizers/query_spec.rb +0 -121
data/lib/picky/indexes/index.rb
DELETED
@@ -1,328 +0,0 @@
|
|
1
|
-
module Picky
|
2
|
-
|
3
|
-
# = Picky Indexes
|
4
|
-
#
|
5
|
-
# A Picky Index defines
|
6
|
-
# * where its data comes from (a data source).
|
7
|
-
# * how this data it is indexed.
|
8
|
-
# * a number of categories that may or may not map directly to data categories.
|
9
|
-
#
|
10
|
-
# == Howto
|
11
|
-
#
|
12
|
-
# This is a step-by-step description on how to create an index.
|
13
|
-
#
|
14
|
-
# Start by choosing an <tt>Indexes::Memory</tt> or an <tt>Indexes::Redis</tt>.
|
15
|
-
# In the example, we will be using an in-memory index, <tt>Indexes::Memory</tt>.
|
16
|
-
#
|
17
|
-
# books = Indexes::Memory.new(:books)
|
18
|
-
#
|
19
|
-
# That in itself won't do much good, that's why we add a data source:
|
20
|
-
#
|
21
|
-
# books = Indexes::Memory.new(:books) do
|
22
|
-
# source Sources::CSV.new(:title, :author, file: 'data/books.csv')
|
23
|
-
# end
|
24
|
-
#
|
25
|
-
# In the example, we use an explicit <tt>Sources::CSV</tt> of Picky.
|
26
|
-
# However, anything that responds to <tt>#each</tt>, and returns an object that
|
27
|
-
# answers to <tt>#id</tt>, works.
|
28
|
-
#
|
29
|
-
# For example, a 3.0 ActiveRecord class:
|
30
|
-
#
|
31
|
-
# books = Indexes::Memory.new(:books) do
|
32
|
-
# source Book.order('isbn ASC')
|
33
|
-
# end
|
34
|
-
#
|
35
|
-
# Now we know where the data comes from, but not, how to categorize it.
|
36
|
-
#
|
37
|
-
# Let's add a few categories:
|
38
|
-
#
|
39
|
-
# books = Indexes::Memory.new(:books) do
|
40
|
-
# source Book.order('isbn ASC')
|
41
|
-
# category :title
|
42
|
-
# category :author
|
43
|
-
# category :isbn
|
44
|
-
# end
|
45
|
-
#
|
46
|
-
# Categories offer quite a few options, see <tt>Indexes::Base#category</tt> for details.
|
47
|
-
#
|
48
|
-
# After adding more options, it might look like this:
|
49
|
-
#
|
50
|
-
# books = Indexes::Memory.new(:books) do
|
51
|
-
# source Book.order('isbn ASC')
|
52
|
-
# category :title,
|
53
|
-
# partial: Partial::Substring.new(from: 1),
|
54
|
-
# similarity: Similarity::DoubleMetaphone.new(3),
|
55
|
-
# qualifiers: [:t, :title, :titulo]
|
56
|
-
# category :author,
|
57
|
-
# similarity: Similarity::Metaphone.new(2)
|
58
|
-
# category :isbn,
|
59
|
-
# partial: Partial::None.new,
|
60
|
-
# from: :legacy_isbn_name
|
61
|
-
# end
|
62
|
-
#
|
63
|
-
# For this to work, a <tt>Book</tt> should support methods <tt>#title</tt>, <tt>#author</tt> and <tt>#legacy_isbn_name</tt>.
|
64
|
-
#
|
65
|
-
# If it uses <tt>String</tt> ids, use <tt>#key_format</tt> to define a formatting method:
|
66
|
-
#
|
67
|
-
# books = Indexes::Memory.new(:books) do
|
68
|
-
# key_format :to_s
|
69
|
-
# source Book.order('isbn ASC')
|
70
|
-
# category :title
|
71
|
-
# category :author
|
72
|
-
# category :isbn
|
73
|
-
# end
|
74
|
-
#
|
75
|
-
# Finally, use the index for a <tt>Search</tt>:
|
76
|
-
#
|
77
|
-
# route %r{^/media$} => Search.new(books, dvds, mp3s)
|
78
|
-
#
|
79
|
-
class Indexes
|
80
|
-
|
81
|
-
# This class defines the indexing and index API that is exposed to the user
|
82
|
-
# as the #index method inside the Application class.
|
83
|
-
#
|
84
|
-
# It provides a single front for both indexing and index options. We suggest to always use the index API.
|
85
|
-
#
|
86
|
-
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Index*.
|
87
|
-
#
|
88
|
-
class Index
|
89
|
-
|
90
|
-
attr_reader :name,
|
91
|
-
:categories
|
92
|
-
|
93
|
-
delegate :[],
|
94
|
-
:each_category,
|
95
|
-
:to => :categories
|
96
|
-
|
97
|
-
# Create a new index with a given source.
|
98
|
-
#
|
99
|
-
# === Parameters
|
100
|
-
# * name: A name that will be used for the index directory and in the Picky front end.
|
101
|
-
#
|
102
|
-
# === Options (all are used in the block, see examples)
|
103
|
-
# * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
|
104
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
|
105
|
-
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
106
|
-
# * tokenizer: Call and pass either a tokenizer (responds to #tokenize) or the options for a tokenizer..
|
107
|
-
# * key_format: Call and pass in a format method for the ids (default is #to_i).
|
108
|
-
#
|
109
|
-
# Example:
|
110
|
-
# my_index = Indexes::Memory.new(:my_index) do
|
111
|
-
# source Sources::CSV.new(file: 'data/index.csv')
|
112
|
-
# key_format :to_sym
|
113
|
-
# category :bla
|
114
|
-
# result_identifier :my_special_results
|
115
|
-
# end
|
116
|
-
#
|
117
|
-
def initialize name, options = {}
|
118
|
-
@name = name.to_sym
|
119
|
-
|
120
|
-
# TODO Move ignore_unassigned_tokens to query, somehow. Then, remove options.
|
121
|
-
#
|
122
|
-
@categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
|
123
|
-
|
124
|
-
# Centralized registry.
|
125
|
-
#
|
126
|
-
Indexes.register self
|
127
|
-
|
128
|
-
instance_eval(&Proc.new) if block_given?
|
129
|
-
end
|
130
|
-
|
131
|
-
# Default bundles.
|
132
|
-
#
|
133
|
-
def indexing_bundle_class
|
134
|
-
Indexing::Bundle::Memory
|
135
|
-
end
|
136
|
-
def indexed_bundle_class
|
137
|
-
Indexed::Bundle::Memory
|
138
|
-
end
|
139
|
-
|
140
|
-
# Defines a searchable category on the index.
|
141
|
-
#
|
142
|
-
# === Parameters
|
143
|
-
# * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
|
144
|
-
#
|
145
|
-
# === Options
|
146
|
-
# * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
|
147
|
-
# * similarity: Similarity::None.new or Similarity::DoubleMetaphone.new(similar_words_searched). Default is Similarity::None.new.
|
148
|
-
# * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
|
149
|
-
# * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
|
150
|
-
# * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
|
151
|
-
# * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
|
152
|
-
#
|
153
|
-
def category category_name, options = {}
|
154
|
-
new_category = Category.new category_name.to_sym, self, options
|
155
|
-
categories << new_category
|
156
|
-
|
157
|
-
new_category = yield new_category if block_given?
|
158
|
-
|
159
|
-
new_category
|
160
|
-
end
|
161
|
-
alias define_category category
|
162
|
-
|
163
|
-
# Make this category range searchable with a fixed range. If you need other
|
164
|
-
# ranges, define another category with a different range value.
|
165
|
-
#
|
166
|
-
# Example:
|
167
|
-
# You have data values inside 1..100, and you want to have Picky return
|
168
|
-
# not only the results for 47 if you search for 47, but also results for
|
169
|
-
# 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
|
170
|
-
#
|
171
|
-
# Then you use:
|
172
|
-
# ranged_category :values_inside_1_100, 2
|
173
|
-
#
|
174
|
-
# Optionally, you give it a precision value to reduce the error margin
|
175
|
-
# around 47 (Picky is a bit liberal).
|
176
|
-
# Indexes::Memory.new :range do
|
177
|
-
# ranged_category :values_inside_1_100, 2, precision: 5
|
178
|
-
# end
|
179
|
-
#
|
180
|
-
# This will force Picky to maximally be wrong 5% of the given range value
|
181
|
-
# (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
|
182
|
-
#
|
183
|
-
# We suggest not to use much more than 5 as a higher precision is more
|
184
|
-
# performance intensive for less and less precision gain.
|
185
|
-
#
|
186
|
-
# == Protip 1
|
187
|
-
#
|
188
|
-
# Create two ranged categories to make an area search:
|
189
|
-
# Indexes::Memory.new :area do
|
190
|
-
# ranged_category :x, 1
|
191
|
-
# ranged_category :y, 1
|
192
|
-
# end
|
193
|
-
#
|
194
|
-
# Search for it using for example:
|
195
|
-
# x:133, y:120
|
196
|
-
#
|
197
|
-
# This will search this square area (* = 133, 120: The "search" point entered):
|
198
|
-
#
|
199
|
-
# 132 134
|
200
|
-
# | |
|
201
|
-
# --|---------|-- 121
|
202
|
-
# | |
|
203
|
-
# | * |
|
204
|
-
# | |
|
205
|
-
# --|---------|-- 119
|
206
|
-
# | |
|
207
|
-
#
|
208
|
-
# Note: The area does not need to be square, but can be rectangular.
|
209
|
-
#
|
210
|
-
# == Protip 2
|
211
|
-
#
|
212
|
-
# Create three ranged categories to make a volume search.
|
213
|
-
#
|
214
|
-
# Or go crazy and use 4 ranged categories for a space/time search! ;)
|
215
|
-
#
|
216
|
-
# === Parameters
|
217
|
-
# * category_name: The category_name as used in #define_category.
|
218
|
-
# * range: The range (in the units of your data values) around the query point where we search for results.
|
219
|
-
#
|
220
|
-
# -----|<- range ->*------------|-----
|
221
|
-
#
|
222
|
-
# === Options
|
223
|
-
# * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
224
|
-
# * ... all options of #define_category.
|
225
|
-
#
|
226
|
-
def ranged_category category_name, range, options = {}
|
227
|
-
precision = options[:precision] || 1 # THINK options.delete?
|
228
|
-
|
229
|
-
# Note: :key_format => :to_f ?
|
230
|
-
#
|
231
|
-
options = { partial: Partial::None.new }.merge options
|
232
|
-
|
233
|
-
define_category category_name, options do |category|
|
234
|
-
Indexing::Wrappers::Category::Location.install_on category, range, precision
|
235
|
-
Indexed::Wrappers::Category::Location.install_on category, range, precision
|
236
|
-
end
|
237
|
-
end
|
238
|
-
alias define_ranged_category ranged_category
|
239
|
-
|
240
|
-
# HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
|
241
|
-
#
|
242
|
-
# Also a range search see #ranged_category, but on the earth's surface.
|
243
|
-
#
|
244
|
-
# Parameters:
|
245
|
-
# * lat_name: The latitude's name as used in #define_category.
|
246
|
-
# * lng_name: The longitude's name as used in #define_category.
|
247
|
-
# * radius: The distance (in km) around the query point which we search for results.
|
248
|
-
#
|
249
|
-
# Note: Picky uses a square, not a circle. That should be ok for most usages.
|
250
|
-
#
|
251
|
-
# -----------------------------
|
252
|
-
# | |
|
253
|
-
# | |
|
254
|
-
# | |
|
255
|
-
# | |
|
256
|
-
# | |
|
257
|
-
# | *<- radius ->|
|
258
|
-
# | |
|
259
|
-
# | |
|
260
|
-
# | |
|
261
|
-
# | |
|
262
|
-
# | |
|
263
|
-
# -----------------------------
|
264
|
-
#
|
265
|
-
# Options
|
266
|
-
# * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
267
|
-
# * lat_from: The data category to take the data for the latitude from.
|
268
|
-
# * lng_from: The data category to take the data for the longitude from.
|
269
|
-
#
|
270
|
-
# TODO Will have to write a wrapper that combines two categories that are
|
271
|
-
# indexed simultaneously, since lat/lng are correlated.
|
272
|
-
#
|
273
|
-
def geo_categories lat_name, lng_name, radius, options = {} # :nodoc:
|
274
|
-
|
275
|
-
# Extract lat/lng specific options.
|
276
|
-
#
|
277
|
-
lat_from = options.delete :lat_from
|
278
|
-
lng_from = options.delete :lng_from
|
279
|
-
|
280
|
-
# One can be a normal ranged_category.
|
281
|
-
#
|
282
|
-
ranged_category lat_name, radius*0.00898312, options.merge(from: lat_from)
|
283
|
-
|
284
|
-
# The other needs to adapt the radius depending on the one.
|
285
|
-
#
|
286
|
-
# Depending on the latitude, the radius of the longitude
|
287
|
-
# needs to enlarge, the closer we get to the pole.
|
288
|
-
#
|
289
|
-
# In our simplified case, the radius is given as if all the
|
290
|
-
# locations were on the 45 degree line.
|
291
|
-
#
|
292
|
-
# This calculates km -> longitude (degrees).
|
293
|
-
#
|
294
|
-
# A degree on the 45 degree line is equal to ~222.6398 km.
|
295
|
-
# So a km on the 45 degree line is equal to 0.01796624 degrees.
|
296
|
-
#
|
297
|
-
ranged_category lng_name, radius*0.01796624, options.merge(from: lng_from)
|
298
|
-
|
299
|
-
end
|
300
|
-
alias define_geo_categories geo_categories
|
301
|
-
|
302
|
-
def to_stats # :nodoc:
|
303
|
-
stats = <<-INDEX
|
304
|
-
#{name} (#{self.class}):
|
305
|
-
#{"source: #{source}".indented_to_s}
|
306
|
-
#{"categories: #{categories.map(&:name).join(', ')}".indented_to_s}
|
307
|
-
INDEX
|
308
|
-
stats << " result identifier: \"#{result_identifier}\"".indented_to_s unless result_identifier.to_s == name.to_s
|
309
|
-
stats
|
310
|
-
end
|
311
|
-
|
312
|
-
# Identifier used for technical output.
|
313
|
-
#
|
314
|
-
def identifier
|
315
|
-
"#{PICKY_ENVIRONMENT}:#{name}"
|
316
|
-
end
|
317
|
-
|
318
|
-
#
|
319
|
-
#
|
320
|
-
def to_s
|
321
|
-
"#{self.class}(#{name}, result_id: #{result_identifier}, source: #{@source}, categories: #{categories})"
|
322
|
-
end
|
323
|
-
|
324
|
-
end
|
325
|
-
|
326
|
-
end
|
327
|
-
|
328
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
module Picky
|
2
|
-
|
3
|
-
class Indexes
|
4
|
-
|
5
|
-
#
|
6
|
-
#
|
7
|
-
class Index
|
8
|
-
|
9
|
-
attr_reader :combinator
|
10
|
-
|
11
|
-
delegate :load_from_cache,
|
12
|
-
:analyze,
|
13
|
-
:reindex,
|
14
|
-
:possible_combinations,
|
15
|
-
:to => :categories
|
16
|
-
|
17
|
-
alias reload load_from_cache
|
18
|
-
|
19
|
-
# Define how the results of this index are identified.
|
20
|
-
# (Shown in the client, for example)
|
21
|
-
#
|
22
|
-
# Default is the name of the index.
|
23
|
-
#
|
24
|
-
def result_identifier result_identifier = nil
|
25
|
-
result_identifier ? define_result_identifier(result_identifier) : (@result_identifier || @name)
|
26
|
-
end
|
27
|
-
def define_result_identifier result_identifier
|
28
|
-
@result_identifier = result_identifier
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
@@ -1,165 +0,0 @@
|
|
1
|
-
module Picky
|
2
|
-
|
3
|
-
class Indexes
|
4
|
-
|
5
|
-
#
|
6
|
-
#
|
7
|
-
class Index
|
8
|
-
|
9
|
-
attr_reader :bundle_class
|
10
|
-
|
11
|
-
# Delegators for indexing.
|
12
|
-
#
|
13
|
-
delegate :cache,
|
14
|
-
:check,
|
15
|
-
:clear,
|
16
|
-
:backup,
|
17
|
-
:restore,
|
18
|
-
:to => :categories
|
19
|
-
|
20
|
-
# Calling index on an index will call index
|
21
|
-
# on every category.
|
22
|
-
#
|
23
|
-
# Decides whether to use a parallel indexer or whether to
|
24
|
-
# delegate to each category to index themselves.
|
25
|
-
#
|
26
|
-
def index
|
27
|
-
if source.respond_to?(:each)
|
28
|
-
check_source_empty
|
29
|
-
index_in_parallel
|
30
|
-
else
|
31
|
-
with_data_snapshot do
|
32
|
-
categories.each &:index
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
# Define an index tokenizer on the index.
|
38
|
-
#
|
39
|
-
# Parameters are the exact same as for indexing.
|
40
|
-
#
|
41
|
-
def indexing options = {}
|
42
|
-
@tokenizer = if options.respond_to?(:tokenize)
|
43
|
-
options
|
44
|
-
else
|
45
|
-
options && Tokenizers::Index.new(options)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
alias define_indexing indexing
|
49
|
-
|
50
|
-
# Check if the given enumerable source is empty.
|
51
|
-
#
|
52
|
-
# Note: Checking as early as possible to tell the
|
53
|
-
# user as early as possible.
|
54
|
-
#
|
55
|
-
def check_source_empty
|
56
|
-
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
57
|
-
end
|
58
|
-
|
59
|
-
# Note: Duplicated in category_indexing.rb.
|
60
|
-
#
|
61
|
-
# Take a data snapshot if the source offers it.
|
62
|
-
#
|
63
|
-
def with_data_snapshot
|
64
|
-
if source.respond_to? :with_snapshot
|
65
|
-
source.with_snapshot(self) do
|
66
|
-
yield
|
67
|
-
end
|
68
|
-
else
|
69
|
-
yield
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
# Indexes the categories in parallel.
|
74
|
-
#
|
75
|
-
# Only use where the category does have a #each source defined.
|
76
|
-
#
|
77
|
-
def index_in_parallel
|
78
|
-
indexer = Indexers::Parallel.new self
|
79
|
-
indexer.index categories
|
80
|
-
categories.each &:cache
|
81
|
-
end
|
82
|
-
|
83
|
-
# Returns the installed tokenizer or the default.
|
84
|
-
#
|
85
|
-
def tokenizer
|
86
|
-
@tokenizer || Indexes.tokenizer
|
87
|
-
end
|
88
|
-
|
89
|
-
# Define a source on the index.
|
90
|
-
#
|
91
|
-
# Parameter is a source, either one of the standard sources or
|
92
|
-
# anything responding to #each and returning objects that
|
93
|
-
# respond to id and the category names (or the category from option).
|
94
|
-
#
|
95
|
-
def source some_source = nil, &block
|
96
|
-
some_source ||= block
|
97
|
-
some_source ? define_source(some_source) : (@source && extract_source || raise_no_source)
|
98
|
-
end
|
99
|
-
# Extract the actual source if it is wrapped in a time
|
100
|
-
# capsule, i.e. a block/lambda.
|
101
|
-
#
|
102
|
-
# TODO Extract into module.
|
103
|
-
#
|
104
|
-
def extract_source
|
105
|
-
@source = @source.respond_to?(:call) ? @source.call : @source
|
106
|
-
end
|
107
|
-
def define_source source
|
108
|
-
check_source source
|
109
|
-
@source = source
|
110
|
-
end
|
111
|
-
def raise_no_source
|
112
|
-
raise NoSourceSpecifiedException.new(<<-NO_SOURCE
|
113
|
-
|
114
|
-
|
115
|
-
No source given for index #{name}. An index needs a source.
|
116
|
-
Example:
|
117
|
-
Indexes::Memory.new(:with_source) do
|
118
|
-
source Sources::CSV.new(:title, file: 'data/books.csv')
|
119
|
-
category :title
|
120
|
-
category :author
|
121
|
-
end
|
122
|
-
|
123
|
-
NO_SOURCE
|
124
|
-
)
|
125
|
-
end
|
126
|
-
def check_source source # :nodoc:
|
127
|
-
raise ArgumentError.new(<<-SOURCE
|
128
|
-
|
129
|
-
|
130
|
-
The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text), OR it can be a lambda/block, returning such a source.
|
131
|
-
Or it could use one of the built-in sources:
|
132
|
-
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
133
|
-
Sources::')}
|
134
|
-
|
135
|
-
|
136
|
-
SOURCE
|
137
|
-
) unless source.respond_to?(:each) || source.respond_to?(:harvest) || source.respond_to?(:call)
|
138
|
-
end
|
139
|
-
|
140
|
-
# Define a key_format on the index.
|
141
|
-
#
|
142
|
-
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
143
|
-
#
|
144
|
-
def key_format format = nil
|
145
|
-
format ? define_key_format(format) : @key_format
|
146
|
-
end
|
147
|
-
def define_key_format key_format
|
148
|
-
@key_format = key_format
|
149
|
-
end
|
150
|
-
|
151
|
-
# Define what to do after indexing.
|
152
|
-
# (Only used in the Sources::DB)
|
153
|
-
#
|
154
|
-
def after_indexing after_indexing = nil
|
155
|
-
after_indexing ? define_after_indexing(after_indexing) : @after_indexing
|
156
|
-
end
|
157
|
-
def define_after_indexing after_indexing
|
158
|
-
@after_indexing = after_indexing
|
159
|
-
end
|
160
|
-
|
161
|
-
end
|
162
|
-
|
163
|
-
end
|
164
|
-
|
165
|
-
end
|
data/lib/picky/indexes/memory.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
module Picky
|
2
|
-
|
3
|
-
class Indexes
|
4
|
-
|
5
|
-
# An index that is persisted in files, loaded at startup and kept in memory at runtime.
|
6
|
-
#
|
7
|
-
class Memory < Index
|
8
|
-
|
9
|
-
def indexing_bundle_class
|
10
|
-
Indexing::Bundle::Memory
|
11
|
-
end
|
12
|
-
def indexed_bundle_class
|
13
|
-
Indexed::Bundle::Memory
|
14
|
-
end
|
15
|
-
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|
data/lib/picky/indexes/redis.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
module Picky
|
2
|
-
|
3
|
-
class Indexes
|
4
|
-
|
5
|
-
# An index that is persisted in Redis.
|
6
|
-
#
|
7
|
-
class Redis < Index
|
8
|
-
|
9
|
-
def indexing_bundle_class
|
10
|
-
Indexing::Bundle::Redis
|
11
|
-
end
|
12
|
-
def indexed_bundle_class
|
13
|
-
Indexed::Bundle::Redis
|
14
|
-
end
|
15
|
-
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|