picky 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
data/lib/picky/index/base.rb
CHANGED
@@ -85,7 +85,11 @@ module Index
|
|
85
85
|
#
|
86
86
|
class Base
|
87
87
|
|
88
|
-
attr_reader :name
|
88
|
+
attr_reader :name,
|
89
|
+
:categories
|
90
|
+
|
91
|
+
delegate :[],
|
92
|
+
:to => :categories
|
89
93
|
|
90
94
|
# Create a new index with a given source.
|
91
95
|
#
|
@@ -115,8 +119,22 @@ module Index
|
|
115
119
|
@name = name.to_sym
|
116
120
|
|
117
121
|
check_options options
|
118
|
-
|
119
|
-
@
|
122
|
+
|
123
|
+
@source = options[:source]
|
124
|
+
|
125
|
+
@after_indexing = options[:after_indexing]
|
126
|
+
@indexing_bundle_class = options[:indexing_bundle_class] # TODO This should probably be a fixed parameter.
|
127
|
+
@tokenizer = options[:tokenizer]
|
128
|
+
@key_format = options[:key_format]
|
129
|
+
|
130
|
+
# Indexed.
|
131
|
+
#
|
132
|
+
@result_identifier = options[:result_identifier] || name
|
133
|
+
@indexed_bundle_class = options[:indexed_bundle_class] # TODO This should probably be a fixed parameter.
|
134
|
+
|
135
|
+
# TODO Move ignore_unassigned_tokens to query, somehow.
|
136
|
+
#
|
137
|
+
@categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
|
120
138
|
|
121
139
|
# Centralized registry.
|
122
140
|
#
|
@@ -126,104 +144,19 @@ module Index
|
|
126
144
|
#
|
127
145
|
instance_eval(&Proc.new) if block_given?
|
128
146
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
@indexing
|
133
|
-
end
|
134
|
-
def internal_indexed # :nodoc:
|
135
|
-
@indexed
|
136
|
-
end
|
137
|
-
#
|
138
|
-
# Since this is an API, we fail hard quickly.
|
139
|
-
#
|
140
|
-
def check_name name # :nodoc:
|
141
|
-
raise ArgumentError.new(<<-NAME
|
142
|
-
|
143
|
-
|
144
|
-
The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
|
145
|
-
Examples:
|
146
|
-
Index::Memory.new(:my_cool_index) # Recommended
|
147
|
-
Index::Redis.new("a-redis-index")
|
148
|
-
NAME
|
149
|
-
|
150
|
-
|
151
|
-
) unless name.respond_to?(:to_sym)
|
152
|
-
end
|
153
|
-
def check_options options # :nodoc:
|
154
|
-
raise ArgumentError.new(<<-OPTIONS
|
155
|
-
|
156
|
-
|
157
|
-
Sources are not passed in as second parameter for #{self.class.name} anymore, but either
|
158
|
-
* as :source option:
|
159
|
-
#{self.class.name}.new(#{name.inspect}, source: #{options})
|
160
|
-
or
|
161
|
-
* given to the #source method inside the config block:
|
162
|
-
#{self.class.name}.new(#{name.inspect}) do
|
163
|
-
source #{options}
|
164
|
-
end
|
165
|
-
|
166
|
-
Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
|
167
|
-
|
168
|
-
All the best
|
169
|
-
-- Picky
|
170
|
-
|
171
|
-
|
172
|
-
OPTIONS
|
173
|
-
) unless options.respond_to?(:[])
|
174
|
-
end
|
175
|
-
def check_source source # :nodoc:
|
176
|
-
raise ArgumentError.new(<<-SOURCE
|
177
|
-
|
178
|
-
|
179
|
-
The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
|
180
|
-
Or it could use one of the built-in sources:
|
181
|
-
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
182
|
-
Sources::')}
|
183
|
-
|
184
|
-
|
185
|
-
SOURCE
|
186
|
-
) unless source.respond_to?(:each) || source.respond_to?(:harvest)
|
187
|
-
end
|
188
|
-
|
189
|
-
def to_stats # :nodoc:
|
190
|
-
stats = <<-INDEX
|
191
|
-
#{name} (#{self.class}):
|
192
|
-
#{"source: #{internal_indexing.source}".indented_to_s}
|
193
|
-
#{"categories: #{internal_indexing.categories.map(&:name).join(', ')}".indented_to_s}
|
194
|
-
INDEX
|
195
|
-
stats << " result identifier: \"#{internal_indexed.result_identifier}\"".indented_to_s unless internal_indexed.result_identifier.to_s == internal_indexed.name.to_s
|
196
|
-
stats
|
197
|
-
end
|
198
|
-
|
199
|
-
# Define an index tokenizer on the index.
|
200
|
-
#
|
201
|
-
# Parameters are the exact same as for indexing.
|
202
|
-
#
|
203
|
-
def indexing options = {}
|
204
|
-
internal_indexing.define_indexing options
|
147
|
+
# Check if any source has been given in the block or the options.
|
148
|
+
#
|
149
|
+
check_source @source
|
205
150
|
end
|
206
|
-
alias define_indexing indexing
|
207
151
|
|
208
|
-
#
|
209
|
-
#
|
210
|
-
# Parameter is a source, either one of the standard sources or
|
211
|
-
# anything responding to #each and returning objects that
|
212
|
-
# respond to id and the category names (or the category from option).
|
152
|
+
# Default bundles.
|
213
153
|
#
|
214
|
-
def
|
215
|
-
|
154
|
+
def indexing_bundle_class
|
155
|
+
Indexing::Bundle::Memory
|
216
156
|
end
|
217
|
-
|
218
|
-
|
219
|
-
# Define a key_format on the index.
|
220
|
-
#
|
221
|
-
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
222
|
-
#
|
223
|
-
def key_format key_format
|
224
|
-
internal_indexing.define_key_format key_format
|
157
|
+
def indexed_bundle_class
|
158
|
+
Indexed::Bundle::Memory
|
225
159
|
end
|
226
|
-
alias define_key_format key_format
|
227
160
|
|
228
161
|
# Defines a searchable category on the index.
|
229
162
|
#
|
@@ -239,17 +172,26 @@ INDEX
|
|
239
172
|
# * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
|
240
173
|
#
|
241
174
|
def category category_name, options = {}
|
242
|
-
|
175
|
+
options = default_category_options.merge options
|
243
176
|
|
244
|
-
|
245
|
-
|
177
|
+
new_category = Category.new category_name.to_sym, self, options
|
178
|
+
categories << new_category
|
246
179
|
|
247
|
-
yield
|
180
|
+
new_category = yield new_category if block_given?
|
248
181
|
|
249
|
-
|
182
|
+
new_category
|
250
183
|
end
|
251
184
|
alias define_category category
|
252
185
|
|
186
|
+
# By default, the category uses
|
187
|
+
# * the index's bundle type.
|
188
|
+
#
|
189
|
+
def default_category_options
|
190
|
+
{
|
191
|
+
:indexed_bundle_class => @indexed_bundle_class
|
192
|
+
}
|
193
|
+
end
|
194
|
+
|
253
195
|
# Make this category range searchable with a fixed range. If you need other
|
254
196
|
# ranges, define another category with a different range value.
|
255
197
|
#
|
@@ -318,9 +260,9 @@ INDEX
|
|
318
260
|
|
319
261
|
options = { partial: Partial::None.new }.merge options
|
320
262
|
|
321
|
-
define_category category_name, options do |
|
322
|
-
|
323
|
-
|
263
|
+
define_category category_name, options do |category|
|
264
|
+
Indexing::Wrappers::Category::Location.install_on category, range, precision
|
265
|
+
Indexed::Wrappers::Category::Location.install_on category, range, precision
|
324
266
|
end
|
325
267
|
end
|
326
268
|
alias define_ranged_category ranged_category
|
@@ -386,6 +328,79 @@ INDEX
|
|
386
328
|
|
387
329
|
end
|
388
330
|
alias define_geo_categories geo_categories
|
331
|
+
|
332
|
+
#
|
333
|
+
# Since this is an API, we fail hard quickly.
|
334
|
+
#
|
335
|
+
def check_name name # :nodoc:
|
336
|
+
raise ArgumentError.new(<<-NAME
|
337
|
+
|
338
|
+
|
339
|
+
The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
|
340
|
+
Examples:
|
341
|
+
Index::Memory.new(:my_cool_index) # Recommended
|
342
|
+
Index::Redis.new("a-redis-index")
|
343
|
+
NAME
|
344
|
+
|
345
|
+
|
346
|
+
) unless name.respond_to?(:to_sym)
|
347
|
+
end
|
348
|
+
def check_options options # :nodoc:
|
349
|
+
raise ArgumentError.new(<<-OPTIONS
|
350
|
+
|
351
|
+
|
352
|
+
Sources are not passed in as second parameter for #{self.class.name} anymore, but either
|
353
|
+
* as :source option:
|
354
|
+
#{self.class.name}.new(#{name.inspect}, source: #{options})
|
355
|
+
or
|
356
|
+
* given to the #source method inside the config block:
|
357
|
+
#{self.class.name}.new(#{name.inspect}) do
|
358
|
+
source #{options}
|
359
|
+
end
|
360
|
+
|
361
|
+
Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
|
362
|
+
|
363
|
+
All the best
|
364
|
+
-- Picky
|
365
|
+
|
366
|
+
|
367
|
+
OPTIONS
|
368
|
+
) unless options.respond_to?(:[])
|
369
|
+
end
|
370
|
+
def check_source source # :nodoc:
|
371
|
+
raise ArgumentError.new(<<-SOURCE
|
372
|
+
|
373
|
+
|
374
|
+
The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
|
375
|
+
Or it could use one of the built-in sources:
|
376
|
+
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
377
|
+
Sources::')}
|
378
|
+
|
379
|
+
|
380
|
+
SOURCE
|
381
|
+
) unless source.respond_to?(:each) || source.respond_to?(:harvest)
|
382
|
+
end
|
383
|
+
|
384
|
+
def method_name
|
385
|
+
|
386
|
+
end
|
387
|
+
|
388
|
+
#
|
389
|
+
#
|
390
|
+
def to_s
|
391
|
+
"#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
|
392
|
+
end
|
393
|
+
|
394
|
+
def to_stats # :nodoc:
|
395
|
+
stats = <<-INDEX
|
396
|
+
#{name} (#{self.class}):
|
397
|
+
#{"source: #{source}".indented_to_s}
|
398
|
+
#{"categories: #{categories.map(&:name).join(', ')}".indented_to_s}
|
399
|
+
INDEX
|
400
|
+
stats << " result identifier: \"#{result_identifier}\"".indented_to_s unless result_identifier.to_s == name.to_s
|
401
|
+
stats
|
402
|
+
end
|
403
|
+
|
389
404
|
end
|
390
405
|
|
391
406
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Index
|
2
|
+
|
3
|
+
#
|
4
|
+
#
|
5
|
+
class Base
|
6
|
+
|
7
|
+
attr_reader :result_identifier,
|
8
|
+
:combinator
|
9
|
+
|
10
|
+
delegate :load_from_cache,
|
11
|
+
:analyze,
|
12
|
+
:reindex,
|
13
|
+
:to => :categories
|
14
|
+
|
15
|
+
alias reload load_from_cache
|
16
|
+
|
17
|
+
# Return the possible combinations for this token.
|
18
|
+
#
|
19
|
+
# A combination is a tuple <token, index_bundle>.
|
20
|
+
#
|
21
|
+
def possible_combinations token
|
22
|
+
categories.possible_combinations_for token
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Index
|
2
|
+
|
3
|
+
#
|
4
|
+
#
|
5
|
+
class Base
|
6
|
+
|
7
|
+
attr_reader :after_indexing,
|
8
|
+
:bundle_class,
|
9
|
+
:tokenizer
|
10
|
+
|
11
|
+
# Delegators for indexing.
|
12
|
+
#
|
13
|
+
delegate :backup_caches,
|
14
|
+
:cache,
|
15
|
+
:check_caches,
|
16
|
+
:clear_caches,
|
17
|
+
:create_directory_structure,
|
18
|
+
:generate_caches,
|
19
|
+
:restore_caches,
|
20
|
+
:to => :categories
|
21
|
+
|
22
|
+
delegate :connect_backend,
|
23
|
+
:to => :source
|
24
|
+
|
25
|
+
# Calling index on an index will
|
26
|
+
# * prepare (the data)
|
27
|
+
# * cache (the data)
|
28
|
+
# on every category.
|
29
|
+
#
|
30
|
+
def index
|
31
|
+
prepare
|
32
|
+
cache
|
33
|
+
end
|
34
|
+
|
35
|
+
# Define an index tokenizer on the index.
|
36
|
+
#
|
37
|
+
# Parameters are the exact same as for indexing.
|
38
|
+
#
|
39
|
+
def indexing options = {}
|
40
|
+
@tokenizer = Tokenizers::Index.new options
|
41
|
+
end
|
42
|
+
alias define_indexing indexing
|
43
|
+
|
44
|
+
# Define a source on the index.
|
45
|
+
#
|
46
|
+
# Parameter is a source, either one of the standard sources or
|
47
|
+
# anything responding to #each and returning objects that
|
48
|
+
# respond to id and the category names (or the category from option).
|
49
|
+
#
|
50
|
+
def source some_source = nil
|
51
|
+
some_source ? define_source(some_source) : (@source || raise_no_source)
|
52
|
+
end
|
53
|
+
def define_source source
|
54
|
+
@source = source
|
55
|
+
end
|
56
|
+
def raise_no_source
|
57
|
+
raise NoSourceSpecifiedException.new(<<-NO_SOURCE
|
58
|
+
|
59
|
+
|
60
|
+
No source given for index #{name}. An index needs a source.
|
61
|
+
Example:
|
62
|
+
Index::Memory.new(:with_source) do
|
63
|
+
source Sources::CSV.new(:title, file: 'data/books.csv')
|
64
|
+
category :title
|
65
|
+
category :author
|
66
|
+
end
|
67
|
+
|
68
|
+
NO_SOURCE
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Define a key_format on the index.
|
73
|
+
#
|
74
|
+
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
75
|
+
#
|
76
|
+
def key_format format = nil
|
77
|
+
format ? define_key_format(format) : (@key_format || :to_i)
|
78
|
+
end
|
79
|
+
def define_key_format key_format
|
80
|
+
@key_format = key_format
|
81
|
+
end
|
82
|
+
|
83
|
+
# Decides whether to use a parallel indexer or whether to
|
84
|
+
# delegate to each category to index themselves.
|
85
|
+
#
|
86
|
+
# TODO Rename to prepare.
|
87
|
+
#
|
88
|
+
def prepare
|
89
|
+
# TODO Duplicated in category.rb def indexer.
|
90
|
+
#
|
91
|
+
if source.respond_to?(:each)
|
92
|
+
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
93
|
+
index_parallel
|
94
|
+
else
|
95
|
+
categories.each &:prepare
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Indexes the categories in parallel.
|
100
|
+
#
|
101
|
+
# Only use where the category does not have a non-#each source defined.
|
102
|
+
#
|
103
|
+
def index_parallel
|
104
|
+
indexer = Indexers::Parallel.new self
|
105
|
+
categories.first.prepare_index_directory # TODO Unnice.
|
106
|
+
indexer.index
|
107
|
+
end
|
108
|
+
|
109
|
+
# Indexing.
|
110
|
+
#
|
111
|
+
# Note: If it is an each source we do not take a snapshot.
|
112
|
+
#
|
113
|
+
def take_snapshot
|
114
|
+
source.take_snapshot self unless source.respond_to? :each
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
data/lib/picky/index/memory.rb
CHANGED
@@ -3,24 +3,12 @@ module Index
|
|
3
3
|
# An index that is persisted in files, loaded at startup and kept in memory at runtime.
|
4
4
|
#
|
5
5
|
class Memory < Base
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# - in the frontend to describe which index a result came from.
|
13
|
-
# - index directory naming (index/development/the_identifier/<lots of indexes>)
|
14
|
-
# * source: The source the data comes from. See Sources::Base.
|
15
|
-
#
|
16
|
-
# Options:
|
17
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
18
|
-
#
|
19
|
-
def initialize name, options = {}
|
20
|
-
options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Memory
|
21
|
-
options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Memory
|
22
|
-
|
23
|
-
super name, options
|
6
|
+
|
7
|
+
def indexing_bundle_class
|
8
|
+
Indexing::Bundle::Memory
|
9
|
+
end
|
10
|
+
def indexed_bundle_class
|
11
|
+
Indexed::Bundle::Memory
|
24
12
|
end
|
25
13
|
|
26
14
|
end
|
data/lib/picky/index/redis.rb
CHANGED
@@ -3,24 +3,12 @@ module Index
|
|
3
3
|
# An index that is persisted in Redis.
|
4
4
|
#
|
5
5
|
class Redis < Base
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# - in the frontend to describe which index a result came from.
|
13
|
-
# - index directory naming (index/development/the_identifier/<lots of indexes>)
|
14
|
-
# * source: The source the data comes from. See Sources::Base.
|
15
|
-
#
|
16
|
-
# Options:
|
17
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
18
|
-
#
|
19
|
-
def initialize name, options = {}
|
20
|
-
options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Redis
|
21
|
-
options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Redis
|
22
|
-
|
23
|
-
super name, options
|
6
|
+
|
7
|
+
def indexing_bundle_class
|
8
|
+
Indexing::Bundle::Redis
|
9
|
+
end
|
10
|
+
def indexed_bundle_class
|
11
|
+
Indexed::Bundle::Redis
|
24
12
|
end
|
25
13
|
|
26
14
|
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module Indexed # :nodoc:all
|
2
|
+
|
3
|
+
# A Bundle is a number of indexes
|
4
|
+
# per [index, category] combination.
|
5
|
+
#
|
6
|
+
# At most, there are three indexes:
|
7
|
+
# * *core* index (always used)
|
8
|
+
# * *weights* index (always used)
|
9
|
+
# * *similarity* index (used with similarity)
|
10
|
+
#
|
11
|
+
# In Picky, indexing is separated from the index
|
12
|
+
# handling itself through a parallel structure.
|
13
|
+
#
|
14
|
+
# Both use methods provided by this base class, but
|
15
|
+
# have very different goals:
|
16
|
+
#
|
17
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
18
|
+
# and providing helper functions to e.g. check the indexes.
|
19
|
+
#
|
20
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
21
|
+
# memory and looking up search data as fast as possible.
|
22
|
+
#
|
23
|
+
module Bundle
|
24
|
+
|
25
|
+
class Base
|
26
|
+
|
27
|
+
attr_reader :identifier, :configuration
|
28
|
+
attr_accessor :similarity_strategy
|
29
|
+
attr_accessor :index, :weights, :similarity, :configuration
|
30
|
+
|
31
|
+
delegate :[], :to => :configuration
|
32
|
+
delegate :size, :to => :index
|
33
|
+
|
34
|
+
def initialize name, category, similarity_strategy
|
35
|
+
@identifier = "#{category.identifier}:#{name}"
|
36
|
+
|
37
|
+
@index = {}
|
38
|
+
@weights = {}
|
39
|
+
@similarity = {}
|
40
|
+
|
41
|
+
@similarity_strategy = similarity_strategy
|
42
|
+
end
|
43
|
+
|
44
|
+
# Get a list of similar texts.
|
45
|
+
#
|
46
|
+
# Note: Does not return itself.
|
47
|
+
#
|
48
|
+
def similar text
|
49
|
+
code = similarity_strategy.encoded text
|
50
|
+
similar_codes = code && @similarity[code]
|
51
|
+
similar_codes.delete text if similar_codes
|
52
|
+
similar_codes || []
|
53
|
+
end
|
54
|
+
|
55
|
+
# Loads all indexes.
|
56
|
+
#
|
57
|
+
def load
|
58
|
+
load_index
|
59
|
+
load_weights
|
60
|
+
load_similarity
|
61
|
+
load_configuration
|
62
|
+
end
|
63
|
+
|
64
|
+
# Loads the core index.
|
65
|
+
#
|
66
|
+
def load_index
|
67
|
+
# No loading needed.
|
68
|
+
end
|
69
|
+
# Loads the weights index.
|
70
|
+
#
|
71
|
+
def load_weights
|
72
|
+
# No loading needed.
|
73
|
+
end
|
74
|
+
# Loads the similarity index.
|
75
|
+
#
|
76
|
+
def load_similarity
|
77
|
+
# No loading needed.
|
78
|
+
end
|
79
|
+
# Loads the configuration.
|
80
|
+
#
|
81
|
+
def load_configuration
|
82
|
+
# No loading needed.
|
83
|
+
end
|
84
|
+
|
85
|
+
# Loads the core index.
|
86
|
+
#
|
87
|
+
def clear_index
|
88
|
+
# No loading needed.
|
89
|
+
end
|
90
|
+
# Loads the weights index.
|
91
|
+
#
|
92
|
+
def clear_weights
|
93
|
+
# No loading needed.
|
94
|
+
end
|
95
|
+
# Loads the similarity index.
|
96
|
+
#
|
97
|
+
def clear_similarity
|
98
|
+
# No loading needed.
|
99
|
+
end
|
100
|
+
# Loads the configuration.
|
101
|
+
#
|
102
|
+
def clear_configuration
|
103
|
+
# No loading needed.
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexed # :nodoc:all
|
4
|
+
|
5
|
+
#
|
6
|
+
#
|
7
|
+
module Bundle
|
8
|
+
|
9
|
+
# This is the _actual_ index (based on memory).
|
10
|
+
#
|
11
|
+
# Handles exact/partial index, weights index, and similarity index.
|
12
|
+
#
|
13
|
+
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
14
|
+
#
|
15
|
+
class Memory < Base
|
16
|
+
|
17
|
+
delegate :[], :to => :configuration
|
18
|
+
|
19
|
+
def initialize name, configuration, *args
|
20
|
+
super name, configuration, *args
|
21
|
+
|
22
|
+
@configuration = {} # A hash with config options.
|
23
|
+
|
24
|
+
@backend = Backend::Files.new name, configuration
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
<<-MEMORY
|
29
|
+
Memory
|
30
|
+
#{@backend.indented_to_s}
|
31
|
+
MEMORY
|
32
|
+
end
|
33
|
+
|
34
|
+
# Get the ids for the given symbol.
|
35
|
+
#
|
36
|
+
def ids sym
|
37
|
+
@index[sym] || []
|
38
|
+
end
|
39
|
+
# Get a weight for the given symbol.
|
40
|
+
#
|
41
|
+
def weight sym
|
42
|
+
@weights[sym]
|
43
|
+
end
|
44
|
+
|
45
|
+
# Loads the core index.
|
46
|
+
#
|
47
|
+
def load_index
|
48
|
+
self.index = @backend.load_index
|
49
|
+
end
|
50
|
+
# Loads the weights index.
|
51
|
+
#
|
52
|
+
def load_weights
|
53
|
+
self.weights = @backend.load_weights
|
54
|
+
end
|
55
|
+
# Loads the similarity index.
|
56
|
+
#
|
57
|
+
def load_similarity
|
58
|
+
self.similarity = @backend.load_similarity
|
59
|
+
end
|
60
|
+
# Loads the configuration.
|
61
|
+
#
|
62
|
+
def load_configuration
|
63
|
+
self.configuration = @backend.load_configuration
|
64
|
+
end
|
65
|
+
|
66
|
+
# Loads the core index.
|
67
|
+
#
|
68
|
+
def clear_index
|
69
|
+
self.index = {}
|
70
|
+
end
|
71
|
+
# Loads the weights index.
|
72
|
+
#
|
73
|
+
def clear_weights
|
74
|
+
self.weights = {}
|
75
|
+
end
|
76
|
+
# Loads the similarity index.
|
77
|
+
#
|
78
|
+
def clear_similarity
|
79
|
+
self.similarity = {}
|
80
|
+
end
|
81
|
+
# Loads the configuration.
|
82
|
+
#
|
83
|
+
def clear_configuration
|
84
|
+
self.configuration = {}
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|