picky 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
data/lib/picky/index/base.rb
CHANGED
|
@@ -85,7 +85,11 @@ module Index
|
|
|
85
85
|
#
|
|
86
86
|
class Base
|
|
87
87
|
|
|
88
|
-
attr_reader :name
|
|
88
|
+
attr_reader :name,
|
|
89
|
+
:categories
|
|
90
|
+
|
|
91
|
+
delegate :[],
|
|
92
|
+
:to => :categories
|
|
89
93
|
|
|
90
94
|
# Create a new index with a given source.
|
|
91
95
|
#
|
|
@@ -115,8 +119,22 @@ module Index
|
|
|
115
119
|
@name = name.to_sym
|
|
116
120
|
|
|
117
121
|
check_options options
|
|
118
|
-
|
|
119
|
-
@
|
|
122
|
+
|
|
123
|
+
@source = options[:source]
|
|
124
|
+
|
|
125
|
+
@after_indexing = options[:after_indexing]
|
|
126
|
+
@indexing_bundle_class = options[:indexing_bundle_class] # TODO This should probably be a fixed parameter.
|
|
127
|
+
@tokenizer = options[:tokenizer]
|
|
128
|
+
@key_format = options[:key_format]
|
|
129
|
+
|
|
130
|
+
# Indexed.
|
|
131
|
+
#
|
|
132
|
+
@result_identifier = options[:result_identifier] || name
|
|
133
|
+
@indexed_bundle_class = options[:indexed_bundle_class] # TODO This should probably be a fixed parameter.
|
|
134
|
+
|
|
135
|
+
# TODO Move ignore_unassigned_tokens to query, somehow.
|
|
136
|
+
#
|
|
137
|
+
@categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
|
|
120
138
|
|
|
121
139
|
# Centralized registry.
|
|
122
140
|
#
|
|
@@ -126,104 +144,19 @@ module Index
|
|
|
126
144
|
#
|
|
127
145
|
instance_eval(&Proc.new) if block_given?
|
|
128
146
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
@indexing
|
|
133
|
-
end
|
|
134
|
-
def internal_indexed # :nodoc:
|
|
135
|
-
@indexed
|
|
136
|
-
end
|
|
137
|
-
#
|
|
138
|
-
# Since this is an API, we fail hard quickly.
|
|
139
|
-
#
|
|
140
|
-
def check_name name # :nodoc:
|
|
141
|
-
raise ArgumentError.new(<<-NAME
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
|
|
145
|
-
Examples:
|
|
146
|
-
Index::Memory.new(:my_cool_index) # Recommended
|
|
147
|
-
Index::Redis.new("a-redis-index")
|
|
148
|
-
NAME
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
) unless name.respond_to?(:to_sym)
|
|
152
|
-
end
|
|
153
|
-
def check_options options # :nodoc:
|
|
154
|
-
raise ArgumentError.new(<<-OPTIONS
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
Sources are not passed in as second parameter for #{self.class.name} anymore, but either
|
|
158
|
-
* as :source option:
|
|
159
|
-
#{self.class.name}.new(#{name.inspect}, source: #{options})
|
|
160
|
-
or
|
|
161
|
-
* given to the #source method inside the config block:
|
|
162
|
-
#{self.class.name}.new(#{name.inspect}) do
|
|
163
|
-
source #{options}
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
|
|
167
|
-
|
|
168
|
-
All the best
|
|
169
|
-
-- Picky
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
OPTIONS
|
|
173
|
-
) unless options.respond_to?(:[])
|
|
174
|
-
end
|
|
175
|
-
def check_source source # :nodoc:
|
|
176
|
-
raise ArgumentError.new(<<-SOURCE
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
|
|
180
|
-
Or it could use one of the built-in sources:
|
|
181
|
-
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
|
182
|
-
Sources::')}
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
SOURCE
|
|
186
|
-
) unless source.respond_to?(:each) || source.respond_to?(:harvest)
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
def to_stats # :nodoc:
|
|
190
|
-
stats = <<-INDEX
|
|
191
|
-
#{name} (#{self.class}):
|
|
192
|
-
#{"source: #{internal_indexing.source}".indented_to_s}
|
|
193
|
-
#{"categories: #{internal_indexing.categories.map(&:name).join(', ')}".indented_to_s}
|
|
194
|
-
INDEX
|
|
195
|
-
stats << " result identifier: \"#{internal_indexed.result_identifier}\"".indented_to_s unless internal_indexed.result_identifier.to_s == internal_indexed.name.to_s
|
|
196
|
-
stats
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
# Define an index tokenizer on the index.
|
|
200
|
-
#
|
|
201
|
-
# Parameters are the exact same as for indexing.
|
|
202
|
-
#
|
|
203
|
-
def indexing options = {}
|
|
204
|
-
internal_indexing.define_indexing options
|
|
147
|
+
# Check if any source has been given in the block or the options.
|
|
148
|
+
#
|
|
149
|
+
check_source @source
|
|
205
150
|
end
|
|
206
|
-
alias define_indexing indexing
|
|
207
151
|
|
|
208
|
-
#
|
|
209
|
-
#
|
|
210
|
-
# Parameter is a source, either one of the standard sources or
|
|
211
|
-
# anything responding to #each and returning objects that
|
|
212
|
-
# respond to id and the category names (or the category from option).
|
|
152
|
+
# Default bundles.
|
|
213
153
|
#
|
|
214
|
-
def
|
|
215
|
-
|
|
154
|
+
def indexing_bundle_class
|
|
155
|
+
Indexing::Bundle::Memory
|
|
216
156
|
end
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
# Define a key_format on the index.
|
|
220
|
-
#
|
|
221
|
-
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
|
222
|
-
#
|
|
223
|
-
def key_format key_format
|
|
224
|
-
internal_indexing.define_key_format key_format
|
|
157
|
+
def indexed_bundle_class
|
|
158
|
+
Indexed::Bundle::Memory
|
|
225
159
|
end
|
|
226
|
-
alias define_key_format key_format
|
|
227
160
|
|
|
228
161
|
# Defines a searchable category on the index.
|
|
229
162
|
#
|
|
@@ -239,17 +172,26 @@ INDEX
|
|
|
239
172
|
# * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
|
|
240
173
|
#
|
|
241
174
|
def category category_name, options = {}
|
|
242
|
-
|
|
175
|
+
options = default_category_options.merge options
|
|
243
176
|
|
|
244
|
-
|
|
245
|
-
|
|
177
|
+
new_category = Category.new category_name.to_sym, self, options
|
|
178
|
+
categories << new_category
|
|
246
179
|
|
|
247
|
-
yield
|
|
180
|
+
new_category = yield new_category if block_given?
|
|
248
181
|
|
|
249
|
-
|
|
182
|
+
new_category
|
|
250
183
|
end
|
|
251
184
|
alias define_category category
|
|
252
185
|
|
|
186
|
+
# By default, the category uses
|
|
187
|
+
# * the index's bundle type.
|
|
188
|
+
#
|
|
189
|
+
def default_category_options
|
|
190
|
+
{
|
|
191
|
+
:indexed_bundle_class => @indexed_bundle_class
|
|
192
|
+
}
|
|
193
|
+
end
|
|
194
|
+
|
|
253
195
|
# Make this category range searchable with a fixed range. If you need other
|
|
254
196
|
# ranges, define another category with a different range value.
|
|
255
197
|
#
|
|
@@ -318,9 +260,9 @@ INDEX
|
|
|
318
260
|
|
|
319
261
|
options = { partial: Partial::None.new }.merge options
|
|
320
262
|
|
|
321
|
-
define_category category_name, options do |
|
|
322
|
-
|
|
323
|
-
|
|
263
|
+
define_category category_name, options do |category|
|
|
264
|
+
Indexing::Wrappers::Category::Location.install_on category, range, precision
|
|
265
|
+
Indexed::Wrappers::Category::Location.install_on category, range, precision
|
|
324
266
|
end
|
|
325
267
|
end
|
|
326
268
|
alias define_ranged_category ranged_category
|
|
@@ -386,6 +328,79 @@ INDEX
|
|
|
386
328
|
|
|
387
329
|
end
|
|
388
330
|
alias define_geo_categories geo_categories
|
|
331
|
+
|
|
332
|
+
#
|
|
333
|
+
# Since this is an API, we fail hard quickly.
|
|
334
|
+
#
|
|
335
|
+
def check_name name # :nodoc:
|
|
336
|
+
raise ArgumentError.new(<<-NAME
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
|
|
340
|
+
Examples:
|
|
341
|
+
Index::Memory.new(:my_cool_index) # Recommended
|
|
342
|
+
Index::Redis.new("a-redis-index")
|
|
343
|
+
NAME
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
) unless name.respond_to?(:to_sym)
|
|
347
|
+
end
|
|
348
|
+
def check_options options # :nodoc:
|
|
349
|
+
raise ArgumentError.new(<<-OPTIONS
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
Sources are not passed in as second parameter for #{self.class.name} anymore, but either
|
|
353
|
+
* as :source option:
|
|
354
|
+
#{self.class.name}.new(#{name.inspect}, source: #{options})
|
|
355
|
+
or
|
|
356
|
+
* given to the #source method inside the config block:
|
|
357
|
+
#{self.class.name}.new(#{name.inspect}) do
|
|
358
|
+
source #{options}
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
|
|
362
|
+
|
|
363
|
+
All the best
|
|
364
|
+
-- Picky
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
OPTIONS
|
|
368
|
+
) unless options.respond_to?(:[])
|
|
369
|
+
end
|
|
370
|
+
def check_source source # :nodoc:
|
|
371
|
+
raise ArgumentError.new(<<-SOURCE
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
|
|
375
|
+
Or it could use one of the built-in sources:
|
|
376
|
+
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
|
377
|
+
Sources::')}
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
SOURCE
|
|
381
|
+
) unless source.respond_to?(:each) || source.respond_to?(:harvest)
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
def method_name
|
|
385
|
+
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
#
|
|
389
|
+
#
|
|
390
|
+
def to_s
|
|
391
|
+
"#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def to_stats # :nodoc:
|
|
395
|
+
stats = <<-INDEX
|
|
396
|
+
#{name} (#{self.class}):
|
|
397
|
+
#{"source: #{source}".indented_to_s}
|
|
398
|
+
#{"categories: #{categories.map(&:name).join(', ')}".indented_to_s}
|
|
399
|
+
INDEX
|
|
400
|
+
stats << " result identifier: \"#{result_identifier}\"".indented_to_s unless result_identifier.to_s == name.to_s
|
|
401
|
+
stats
|
|
402
|
+
end
|
|
403
|
+
|
|
389
404
|
end
|
|
390
405
|
|
|
391
406
|
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Index
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
#
|
|
5
|
+
class Base
|
|
6
|
+
|
|
7
|
+
attr_reader :result_identifier,
|
|
8
|
+
:combinator
|
|
9
|
+
|
|
10
|
+
delegate :load_from_cache,
|
|
11
|
+
:analyze,
|
|
12
|
+
:reindex,
|
|
13
|
+
:to => :categories
|
|
14
|
+
|
|
15
|
+
alias reload load_from_cache
|
|
16
|
+
|
|
17
|
+
# Return the possible combinations for this token.
|
|
18
|
+
#
|
|
19
|
+
# A combination is a tuple <token, index_bundle>.
|
|
20
|
+
#
|
|
21
|
+
def possible_combinations token
|
|
22
|
+
categories.possible_combinations_for token
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
module Index
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
#
|
|
5
|
+
class Base
|
|
6
|
+
|
|
7
|
+
attr_reader :after_indexing,
|
|
8
|
+
:bundle_class,
|
|
9
|
+
:tokenizer
|
|
10
|
+
|
|
11
|
+
# Delegators for indexing.
|
|
12
|
+
#
|
|
13
|
+
delegate :backup_caches,
|
|
14
|
+
:cache,
|
|
15
|
+
:check_caches,
|
|
16
|
+
:clear_caches,
|
|
17
|
+
:create_directory_structure,
|
|
18
|
+
:generate_caches,
|
|
19
|
+
:restore_caches,
|
|
20
|
+
:to => :categories
|
|
21
|
+
|
|
22
|
+
delegate :connect_backend,
|
|
23
|
+
:to => :source
|
|
24
|
+
|
|
25
|
+
# Calling index on an index will
|
|
26
|
+
# * prepare (the data)
|
|
27
|
+
# * cache (the data)
|
|
28
|
+
# on every category.
|
|
29
|
+
#
|
|
30
|
+
def index
|
|
31
|
+
prepare
|
|
32
|
+
cache
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Define an index tokenizer on the index.
|
|
36
|
+
#
|
|
37
|
+
# Parameters are the exact same as for indexing.
|
|
38
|
+
#
|
|
39
|
+
def indexing options = {}
|
|
40
|
+
@tokenizer = Tokenizers::Index.new options
|
|
41
|
+
end
|
|
42
|
+
alias define_indexing indexing
|
|
43
|
+
|
|
44
|
+
# Define a source on the index.
|
|
45
|
+
#
|
|
46
|
+
# Parameter is a source, either one of the standard sources or
|
|
47
|
+
# anything responding to #each and returning objects that
|
|
48
|
+
# respond to id and the category names (or the category from option).
|
|
49
|
+
#
|
|
50
|
+
def source some_source = nil
|
|
51
|
+
some_source ? define_source(some_source) : (@source || raise_no_source)
|
|
52
|
+
end
|
|
53
|
+
def define_source source
|
|
54
|
+
@source = source
|
|
55
|
+
end
|
|
56
|
+
def raise_no_source
|
|
57
|
+
raise NoSourceSpecifiedException.new(<<-NO_SOURCE
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
No source given for index #{name}. An index needs a source.
|
|
61
|
+
Example:
|
|
62
|
+
Index::Memory.new(:with_source) do
|
|
63
|
+
source Sources::CSV.new(:title, file: 'data/books.csv')
|
|
64
|
+
category :title
|
|
65
|
+
category :author
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
NO_SOURCE
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Define a key_format on the index.
|
|
73
|
+
#
|
|
74
|
+
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
|
75
|
+
#
|
|
76
|
+
def key_format format = nil
|
|
77
|
+
format ? define_key_format(format) : (@key_format || :to_i)
|
|
78
|
+
end
|
|
79
|
+
def define_key_format key_format
|
|
80
|
+
@key_format = key_format
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Decides whether to use a parallel indexer or whether to
|
|
84
|
+
# delegate to each category to index themselves.
|
|
85
|
+
#
|
|
86
|
+
# TODO Rename to prepare.
|
|
87
|
+
#
|
|
88
|
+
def prepare
|
|
89
|
+
# TODO Duplicated in category.rb def indexer.
|
|
90
|
+
#
|
|
91
|
+
if source.respond_to?(:each)
|
|
92
|
+
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
|
93
|
+
index_parallel
|
|
94
|
+
else
|
|
95
|
+
categories.each &:prepare
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Indexes the categories in parallel.
|
|
100
|
+
#
|
|
101
|
+
# Only use where the category does not have a non-#each source defined.
|
|
102
|
+
#
|
|
103
|
+
def index_parallel
|
|
104
|
+
indexer = Indexers::Parallel.new self
|
|
105
|
+
categories.first.prepare_index_directory # TODO Unnice.
|
|
106
|
+
indexer.index
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Indexing.
|
|
110
|
+
#
|
|
111
|
+
# Note: If it is an each source we do not take a snapshot.
|
|
112
|
+
#
|
|
113
|
+
def take_snapshot
|
|
114
|
+
source.take_snapshot self unless source.respond_to? :each
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
end
|
data/lib/picky/index/memory.rb
CHANGED
|
@@ -3,24 +3,12 @@ module Index
|
|
|
3
3
|
# An index that is persisted in files, loaded at startup and kept in memory at runtime.
|
|
4
4
|
#
|
|
5
5
|
class Memory < Base
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
# - in the frontend to describe which index a result came from.
|
|
13
|
-
# - index directory naming (index/development/the_identifier/<lots of indexes>)
|
|
14
|
-
# * source: The source the data comes from. See Sources::Base.
|
|
15
|
-
#
|
|
16
|
-
# Options:
|
|
17
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
|
18
|
-
#
|
|
19
|
-
def initialize name, options = {}
|
|
20
|
-
options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Memory
|
|
21
|
-
options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Memory
|
|
22
|
-
|
|
23
|
-
super name, options
|
|
6
|
+
|
|
7
|
+
def indexing_bundle_class
|
|
8
|
+
Indexing::Bundle::Memory
|
|
9
|
+
end
|
|
10
|
+
def indexed_bundle_class
|
|
11
|
+
Indexed::Bundle::Memory
|
|
24
12
|
end
|
|
25
13
|
|
|
26
14
|
end
|
data/lib/picky/index/redis.rb
CHANGED
|
@@ -3,24 +3,12 @@ module Index
|
|
|
3
3
|
# An index that is persisted in Redis.
|
|
4
4
|
#
|
|
5
5
|
class Redis < Base
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
# - in the frontend to describe which index a result came from.
|
|
13
|
-
# - index directory naming (index/development/the_identifier/<lots of indexes>)
|
|
14
|
-
# * source: The source the data comes from. See Sources::Base.
|
|
15
|
-
#
|
|
16
|
-
# Options:
|
|
17
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
|
18
|
-
#
|
|
19
|
-
def initialize name, options = {}
|
|
20
|
-
options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Redis
|
|
21
|
-
options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Redis
|
|
22
|
-
|
|
23
|
-
super name, options
|
|
6
|
+
|
|
7
|
+
def indexing_bundle_class
|
|
8
|
+
Indexing::Bundle::Redis
|
|
9
|
+
end
|
|
10
|
+
def indexed_bundle_class
|
|
11
|
+
Indexed::Bundle::Redis
|
|
24
12
|
end
|
|
25
13
|
|
|
26
14
|
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
module Indexed # :nodoc:all
|
|
2
|
+
|
|
3
|
+
# A Bundle is a number of indexes
|
|
4
|
+
# per [index, category] combination.
|
|
5
|
+
#
|
|
6
|
+
# At most, there are three indexes:
|
|
7
|
+
# * *core* index (always used)
|
|
8
|
+
# * *weights* index (always used)
|
|
9
|
+
# * *similarity* index (used with similarity)
|
|
10
|
+
#
|
|
11
|
+
# In Picky, indexing is separated from the index
|
|
12
|
+
# handling itself through a parallel structure.
|
|
13
|
+
#
|
|
14
|
+
# Both use methods provided by this base class, but
|
|
15
|
+
# have very different goals:
|
|
16
|
+
#
|
|
17
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
|
18
|
+
# and providing helper functions to e.g. check the indexes.
|
|
19
|
+
#
|
|
20
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
|
21
|
+
# memory and looking up search data as fast as possible.
|
|
22
|
+
#
|
|
23
|
+
module Bundle
|
|
24
|
+
|
|
25
|
+
class Base
|
|
26
|
+
|
|
27
|
+
attr_reader :identifier, :configuration
|
|
28
|
+
attr_accessor :similarity_strategy
|
|
29
|
+
attr_accessor :index, :weights, :similarity, :configuration
|
|
30
|
+
|
|
31
|
+
delegate :[], :to => :configuration
|
|
32
|
+
delegate :size, :to => :index
|
|
33
|
+
|
|
34
|
+
def initialize name, category, similarity_strategy
|
|
35
|
+
@identifier = "#{category.identifier}:#{name}"
|
|
36
|
+
|
|
37
|
+
@index = {}
|
|
38
|
+
@weights = {}
|
|
39
|
+
@similarity = {}
|
|
40
|
+
|
|
41
|
+
@similarity_strategy = similarity_strategy
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Get a list of similar texts.
|
|
45
|
+
#
|
|
46
|
+
# Note: Does not return itself.
|
|
47
|
+
#
|
|
48
|
+
def similar text
|
|
49
|
+
code = similarity_strategy.encoded text
|
|
50
|
+
similar_codes = code && @similarity[code]
|
|
51
|
+
similar_codes.delete text if similar_codes
|
|
52
|
+
similar_codes || []
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Loads all indexes.
|
|
56
|
+
#
|
|
57
|
+
def load
|
|
58
|
+
load_index
|
|
59
|
+
load_weights
|
|
60
|
+
load_similarity
|
|
61
|
+
load_configuration
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Loads the core index.
|
|
65
|
+
#
|
|
66
|
+
def load_index
|
|
67
|
+
# No loading needed.
|
|
68
|
+
end
|
|
69
|
+
# Loads the weights index.
|
|
70
|
+
#
|
|
71
|
+
def load_weights
|
|
72
|
+
# No loading needed.
|
|
73
|
+
end
|
|
74
|
+
# Loads the similarity index.
|
|
75
|
+
#
|
|
76
|
+
def load_similarity
|
|
77
|
+
# No loading needed.
|
|
78
|
+
end
|
|
79
|
+
# Loads the configuration.
|
|
80
|
+
#
|
|
81
|
+
def load_configuration
|
|
82
|
+
# No loading needed.
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Loads the core index.
|
|
86
|
+
#
|
|
87
|
+
def clear_index
|
|
88
|
+
# No loading needed.
|
|
89
|
+
end
|
|
90
|
+
# Loads the weights index.
|
|
91
|
+
#
|
|
92
|
+
def clear_weights
|
|
93
|
+
# No loading needed.
|
|
94
|
+
end
|
|
95
|
+
# Loads the similarity index.
|
|
96
|
+
#
|
|
97
|
+
def clear_similarity
|
|
98
|
+
# No loading needed.
|
|
99
|
+
end
|
|
100
|
+
# Loads the configuration.
|
|
101
|
+
#
|
|
102
|
+
def clear_configuration
|
|
103
|
+
# No loading needed.
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Indexed # :nodoc:all
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
#
|
|
7
|
+
module Bundle
|
|
8
|
+
|
|
9
|
+
# This is the _actual_ index (based on memory).
|
|
10
|
+
#
|
|
11
|
+
# Handles exact/partial index, weights index, and similarity index.
|
|
12
|
+
#
|
|
13
|
+
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
|
14
|
+
#
|
|
15
|
+
class Memory < Base
|
|
16
|
+
|
|
17
|
+
delegate :[], :to => :configuration
|
|
18
|
+
|
|
19
|
+
def initialize name, configuration, *args
|
|
20
|
+
super name, configuration, *args
|
|
21
|
+
|
|
22
|
+
@configuration = {} # A hash with config options.
|
|
23
|
+
|
|
24
|
+
@backend = Backend::Files.new name, configuration
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def to_s
|
|
28
|
+
<<-MEMORY
|
|
29
|
+
Memory
|
|
30
|
+
#{@backend.indented_to_s}
|
|
31
|
+
MEMORY
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Get the ids for the given symbol.
|
|
35
|
+
#
|
|
36
|
+
def ids sym
|
|
37
|
+
@index[sym] || []
|
|
38
|
+
end
|
|
39
|
+
# Get a weight for the given symbol.
|
|
40
|
+
#
|
|
41
|
+
def weight sym
|
|
42
|
+
@weights[sym]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Loads the core index.
|
|
46
|
+
#
|
|
47
|
+
def load_index
|
|
48
|
+
self.index = @backend.load_index
|
|
49
|
+
end
|
|
50
|
+
# Loads the weights index.
|
|
51
|
+
#
|
|
52
|
+
def load_weights
|
|
53
|
+
self.weights = @backend.load_weights
|
|
54
|
+
end
|
|
55
|
+
# Loads the similarity index.
|
|
56
|
+
#
|
|
57
|
+
def load_similarity
|
|
58
|
+
self.similarity = @backend.load_similarity
|
|
59
|
+
end
|
|
60
|
+
# Loads the configuration.
|
|
61
|
+
#
|
|
62
|
+
def load_configuration
|
|
63
|
+
self.configuration = @backend.load_configuration
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Loads the core index.
|
|
67
|
+
#
|
|
68
|
+
def clear_index
|
|
69
|
+
self.index = {}
|
|
70
|
+
end
|
|
71
|
+
# Loads the weights index.
|
|
72
|
+
#
|
|
73
|
+
def clear_weights
|
|
74
|
+
self.weights = {}
|
|
75
|
+
end
|
|
76
|
+
# Loads the similarity index.
|
|
77
|
+
#
|
|
78
|
+
def clear_similarity
|
|
79
|
+
self.similarity = {}
|
|
80
|
+
end
|
|
81
|
+
# Loads the configuration.
|
|
82
|
+
#
|
|
83
|
+
def clear_configuration
|
|
84
|
+
self.configuration = {}
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
end
|