picky 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,219 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Internals
|
4
|
+
|
5
|
+
module Indexing # :nodoc:all
|
6
|
+
|
7
|
+
module Bundle
|
8
|
+
|
9
|
+
# This is the indexing bundle.
|
10
|
+
# It does all menial tasks that have nothing to do
|
11
|
+
# with the actual index running etc.
|
12
|
+
#
|
13
|
+
class Base < SuperBase
|
14
|
+
|
15
|
+
attr_accessor :partial_strategy, :weights_strategy
|
16
|
+
|
17
|
+
# Path is in which directory the cache is located.
|
18
|
+
#
|
19
|
+
def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
|
20
|
+
super name, configuration, similarity_strategy
|
21
|
+
|
22
|
+
@partial_strategy = partial_strategy
|
23
|
+
@weights_strategy = weights_strategy
|
24
|
+
end
|
25
|
+
|
26
|
+
# Sets up a piece of the index for the given token.
|
27
|
+
#
|
28
|
+
def initialize_index_for token
|
29
|
+
index[token] ||= []
|
30
|
+
end
|
31
|
+
|
32
|
+
# Generation
|
33
|
+
#
|
34
|
+
|
35
|
+
# This method
|
36
|
+
# * loads the base index from the db
|
37
|
+
# * generates derived indexes
|
38
|
+
# * dumps all the indexes into files
|
39
|
+
#
|
40
|
+
def generate_caches_from_source
|
41
|
+
load_from_index_file
|
42
|
+
generate_caches_from_memory
|
43
|
+
end
|
44
|
+
# Generates derived indexes from the index and dumps.
|
45
|
+
#
|
46
|
+
# Note: assumes that there is something in the index
|
47
|
+
#
|
48
|
+
def generate_caches_from_memory
|
49
|
+
cache_from_memory_generation_message
|
50
|
+
generate_derived
|
51
|
+
end
|
52
|
+
def cache_from_memory_generation_message
|
53
|
+
timed_exclaim "CACHE FROM MEMORY #{identifier}."
|
54
|
+
end
|
55
|
+
|
56
|
+
# Generates the weights and similarity from the main index.
|
57
|
+
#
|
58
|
+
def generate_derived
|
59
|
+
generate_weights
|
60
|
+
generate_similarity
|
61
|
+
end
|
62
|
+
|
63
|
+
# Load the data from the db.
|
64
|
+
#
|
65
|
+
def load_from_index_file
|
66
|
+
load_from_index_generation_message
|
67
|
+
clear
|
68
|
+
retrieve
|
69
|
+
end
|
70
|
+
def load_from_index_generation_message
|
71
|
+
timed_exclaim "LOAD INDEX #{identifier}."
|
72
|
+
end
|
73
|
+
# Retrieves the prepared index data into the index.
|
74
|
+
#
|
75
|
+
# This is in preparation for generating
|
76
|
+
# derived indexes (like weights, similarity)
|
77
|
+
# and later dumping the optimized index.
|
78
|
+
#
|
79
|
+
def retrieve
|
80
|
+
key_format = self[:key_format] || :to_i
|
81
|
+
files.retrieve do |id, token|
|
82
|
+
initialize_index_for token
|
83
|
+
index[token] << id.send(key_format) # TODO Rewrite. Move this into the specific indexing.
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Generates a new index (writes its index) using the
|
88
|
+
# partial caching strategy of this bundle.
|
89
|
+
#
|
90
|
+
def generate_partial
|
91
|
+
generator = Generators::PartialGenerator.new self.index
|
92
|
+
self.index = generator.generate self.partial_strategy
|
93
|
+
end
|
94
|
+
# Generate a partial index from the given exact index.
|
95
|
+
#
|
96
|
+
def generate_partial_from exact_index
|
97
|
+
timed_exclaim "PARTIAL GENERATE #{identifier}."
|
98
|
+
self.index = exact_index
|
99
|
+
self.generate_partial
|
100
|
+
self
|
101
|
+
end
|
102
|
+
# Generates a new similarity index (writes its index) using the
|
103
|
+
# given similarity caching strategy.
|
104
|
+
#
|
105
|
+
def generate_similarity
|
106
|
+
generator = Generators::SimilarityGenerator.new self.index
|
107
|
+
self.similarity = generator.generate self.similarity_strategy
|
108
|
+
end
|
109
|
+
# Generates a new weights index (writes its index) using the
|
110
|
+
# given weight caching strategy.
|
111
|
+
#
|
112
|
+
def generate_weights
|
113
|
+
generator = Generators::WeightsGenerator.new self.index
|
114
|
+
self.weights = generator.generate self.weights_strategy
|
115
|
+
end
|
116
|
+
|
117
|
+
# Saves the indexes in a dump file.
|
118
|
+
#
|
119
|
+
def dump
|
120
|
+
dump_index
|
121
|
+
dump_similarity
|
122
|
+
dump_weights
|
123
|
+
dump_configuration
|
124
|
+
end
|
125
|
+
# Dumps the core index.
|
126
|
+
#
|
127
|
+
def dump_index
|
128
|
+
timed_exclaim "DUMP INDEX #{identifier}."
|
129
|
+
backend.dump_index index
|
130
|
+
end
|
131
|
+
# Dumps the weights index.
|
132
|
+
#
|
133
|
+
def dump_weights
|
134
|
+
timed_exclaim "DUMP WEIGHTS #{identifier}."
|
135
|
+
backend.dump_weights weights
|
136
|
+
end
|
137
|
+
# Dumps the similarity index.
|
138
|
+
#
|
139
|
+
def dump_similarity
|
140
|
+
timed_exclaim "DUMP SIMILARITY #{identifier}."
|
141
|
+
backend.dump_similarity similarity
|
142
|
+
end
|
143
|
+
# Dumps the similarity index.
|
144
|
+
#
|
145
|
+
def dump_configuration
|
146
|
+
timed_exclaim "DUMP CONFIGURATION #{identifier}."
|
147
|
+
backend.dump_configuration configuration
|
148
|
+
end
|
149
|
+
|
150
|
+
# Alerts the user if an index is missing.
|
151
|
+
#
|
152
|
+
def raise_unless_cache_exists
|
153
|
+
raise_unless_index_exists
|
154
|
+
raise_unless_similarity_exists
|
155
|
+
end
|
156
|
+
# Alerts the user if one of the necessary indexes
|
157
|
+
# (core, weights) is missing.
|
158
|
+
#
|
159
|
+
def raise_unless_index_exists
|
160
|
+
if partial_strategy.saved?
|
161
|
+
warn_if_index_small
|
162
|
+
raise_unless_index_ok
|
163
|
+
end
|
164
|
+
end
|
165
|
+
# Alerts the user if the similarity
|
166
|
+
# index is missing (given that it's used).
|
167
|
+
#
|
168
|
+
def raise_unless_similarity_exists
|
169
|
+
if similarity_strategy.saved?
|
170
|
+
warn_if_similarity_small
|
171
|
+
raise_unless_similarity_ok
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Outputs a warning for the given cache.
|
176
|
+
#
|
177
|
+
def warn_cache_small what
|
178
|
+
puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
|
179
|
+
end
|
180
|
+
# Raises an appropriate error message for the given cache.
|
181
|
+
#
|
182
|
+
def raise_cache_missing what
|
183
|
+
raise "#{what} cache for #{identifier} missing."
|
184
|
+
end
|
185
|
+
|
186
|
+
# Warns the user if the similarity index is small.
|
187
|
+
#
|
188
|
+
def warn_if_similarity_small
|
189
|
+
warn_cache_small :similarity if backend.similarity_cache_small?
|
190
|
+
end
|
191
|
+
# Alerts the user if the similarity index is not there.
|
192
|
+
#
|
193
|
+
def raise_unless_similarity_ok
|
194
|
+
raise_cache_missing :similarity unless backend.similarity_cache_ok?
|
195
|
+
end
|
196
|
+
|
197
|
+
# TODO Spec on down.
|
198
|
+
#
|
199
|
+
|
200
|
+
# Warns the user if the core or weights indexes are small.
|
201
|
+
#
|
202
|
+
def warn_if_index_small
|
203
|
+
warn_cache_small :index if backend.index_cache_small?
|
204
|
+
warn_cache_small :weights if backend.weights_cache_small?
|
205
|
+
end
|
206
|
+
# Alerts the user if the core or weights indexes are not there.
|
207
|
+
#
|
208
|
+
def raise_unless_index_ok
|
209
|
+
raise_cache_missing :index unless backend.index_cache_ok?
|
210
|
+
raise_cache_missing :weights unless backend.weights_cache_ok?
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Internals
|
4
|
+
|
5
|
+
module Indexing # :nodoc:all
|
6
|
+
|
7
|
+
module Bundle
|
8
|
+
|
9
|
+
# The memory version dumps its generated indexes to disk
|
10
|
+
# (mostly JSON) to load them into memory on startup.
|
11
|
+
#
|
12
|
+
class Memory < Base
|
13
|
+
|
14
|
+
# We're using files for the memory backend.
|
15
|
+
# E.g. dump writes files.
|
16
|
+
#
|
17
|
+
alias backend files
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Internals
|
4
|
+
|
5
|
+
module Indexing # :nodoc:all
|
6
|
+
|
7
|
+
module Bundle
|
8
|
+
|
9
|
+
# The memory version dumps its generated indexes to disk
|
10
|
+
# (mostly JSON) to load them into memory on startup.
|
11
|
+
#
|
12
|
+
class Redis < Base
|
13
|
+
|
14
|
+
attr_reader :backend
|
15
|
+
|
16
|
+
def initialize name, configuration, *args
|
17
|
+
super name, configuration, *args
|
18
|
+
|
19
|
+
@backend = Internals::Index::Redis.new name, configuration # TODO Needed?
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
# FIXME Merge into Base, extract common with Indexed::Base.
|
4
|
+
#
|
5
|
+
module Indexing # :nodoc:all
|
6
|
+
# A Bundle is a number of indexes
|
7
|
+
# per [index, category] combination.
|
8
|
+
#
|
9
|
+
# At most, there are three indexes:
|
10
|
+
# * *core* index (always used)
|
11
|
+
# * *weights* index (always used)
|
12
|
+
# * *similarity* index (used with similarity)
|
13
|
+
#
|
14
|
+
# In Picky, indexing is separated from the index
|
15
|
+
# handling itself through a parallel structure.
|
16
|
+
#
|
17
|
+
# Both use methods provided by this base class, but
|
18
|
+
# have very different goals:
|
19
|
+
#
|
20
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
21
|
+
# and providing helper functions to e.g. check the indexes.
|
22
|
+
#
|
23
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
24
|
+
# memory and looking up search data as fast as possible.
|
25
|
+
#
|
26
|
+
module Bundle
|
27
|
+
|
28
|
+
class SuperBase
|
29
|
+
|
30
|
+
attr_reader :identifier, :files
|
31
|
+
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
32
|
+
|
33
|
+
delegate :clear, :to => :index
|
34
|
+
delegate :[], :[]=, :to => :configuration
|
35
|
+
|
36
|
+
def initialize name, configuration, similarity_strategy
|
37
|
+
@identifier = "#{configuration.identifier}:#{name}"
|
38
|
+
@files = Internals::Index::Files.new name, configuration
|
39
|
+
|
40
|
+
@index = {}
|
41
|
+
@weights = {}
|
42
|
+
@similarity = {}
|
43
|
+
@configuration = {} # A hash with config options.
|
44
|
+
|
45
|
+
@similarity_strategy = similarity_strategy
|
46
|
+
end
|
47
|
+
|
48
|
+
# Get a list of similar texts.
|
49
|
+
#
|
50
|
+
# Note: Does not return itself.
|
51
|
+
#
|
52
|
+
def similar text
|
53
|
+
code = similarity_strategy.encoded text
|
54
|
+
similar_codes = code && @similarity[code]
|
55
|
+
similar_codes.delete text if similar_codes
|
56
|
+
similar_codes || []
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Indexing
|
4
|
+
|
5
|
+
class Categories
|
6
|
+
|
7
|
+
attr_reader :categories
|
8
|
+
|
9
|
+
each_delegate :index,
|
10
|
+
:cache,
|
11
|
+
:generate_caches,
|
12
|
+
:backup_caches,
|
13
|
+
:restore_caches,
|
14
|
+
:check_caches,
|
15
|
+
:clear_caches,
|
16
|
+
:create_directory_structure,
|
17
|
+
:to => :categories
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@categories = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def << category
|
24
|
+
categories << category
|
25
|
+
end
|
26
|
+
|
27
|
+
def find category_name
|
28
|
+
category_name = category_name.to_sym
|
29
|
+
|
30
|
+
categories.each do |category|
|
31
|
+
next unless category.name == category_name
|
32
|
+
return category
|
33
|
+
end
|
34
|
+
|
35
|
+
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Indexing
|
4
|
+
|
5
|
+
class Category
|
6
|
+
|
7
|
+
attr_reader :exact, :partial, :name, :configuration, :indexer
|
8
|
+
|
9
|
+
# Mandatory params:
|
10
|
+
# * name: Category name to use as identifier and file names.
|
11
|
+
# * index: Index to which this category is attached to.
|
12
|
+
# Options:
|
13
|
+
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
14
|
+
# * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
|
15
|
+
# * source: Use if the category should use a different source.
|
16
|
+
# * from: The source category identifier to take the data from.
|
17
|
+
#
|
18
|
+
# Advanced Options (TODO):
|
19
|
+
#
|
20
|
+
# * weights:
|
21
|
+
# * tokenizer:
|
22
|
+
#
|
23
|
+
def initialize name, index, options = {}
|
24
|
+
@name = name
|
25
|
+
@from = options[:from]
|
26
|
+
|
27
|
+
# Now we have enough info to combine the index and the category.
|
28
|
+
#
|
29
|
+
@configuration = Configuration::Index.new index, self
|
30
|
+
|
31
|
+
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
32
|
+
@indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
|
33
|
+
|
34
|
+
# TODO Push into Bundle. At least the weights.
|
35
|
+
#
|
36
|
+
partial = options[:partial] || Generators::Partial::Default
|
37
|
+
weights = options[:weights] || Generators::Weights::Default
|
38
|
+
similarity = options[:similarity] || Generators::Similarity::Default
|
39
|
+
|
40
|
+
bundle_class = options[:indexing_bundle_class] || Bundle::Memory
|
41
|
+
@exact = bundle_class.new(:exact, configuration, similarity, Generators::Partial::None.new, weights)
|
42
|
+
@partial = bundle_class.new(:partial, configuration, Generators::Similarity::None.new, partial, weights)
|
43
|
+
end
|
44
|
+
|
45
|
+
delegate :identifier, :prepare_index_directory, :to => :configuration
|
46
|
+
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
47
|
+
|
48
|
+
def from
|
49
|
+
@from || name
|
50
|
+
end
|
51
|
+
|
52
|
+
# TODO Spec.
|
53
|
+
#
|
54
|
+
def backup_caches
|
55
|
+
timed_exclaim "Backing up #{identifier}."
|
56
|
+
exact.backup
|
57
|
+
partial.backup
|
58
|
+
end
|
59
|
+
def restore_caches
|
60
|
+
timed_exclaim "Restoring #{identifier}."
|
61
|
+
exact.restore
|
62
|
+
partial.restore
|
63
|
+
end
|
64
|
+
def check_caches
|
65
|
+
timed_exclaim "Checking #{identifier}."
|
66
|
+
exact.raise_unless_cache_exists
|
67
|
+
partial.raise_unless_cache_exists
|
68
|
+
end
|
69
|
+
def clear_caches
|
70
|
+
timed_exclaim "Deleting #{identifier}."
|
71
|
+
exact.delete
|
72
|
+
partial.delete
|
73
|
+
end
|
74
|
+
|
75
|
+
def index
|
76
|
+
prepare_index_directory
|
77
|
+
indexer.index
|
78
|
+
end
|
79
|
+
|
80
|
+
# Generates all caches for this category.
|
81
|
+
#
|
82
|
+
def cache
|
83
|
+
prepare_index_directory
|
84
|
+
configure
|
85
|
+
generate_caches
|
86
|
+
end
|
87
|
+
# We need to set what formatting method should be used.
|
88
|
+
# Uses the one defined in the indexer.
|
89
|
+
#
|
90
|
+
def configure
|
91
|
+
key_format = indexer.key_format
|
92
|
+
exact[:key_format] = key_format
|
93
|
+
partial[:key_format] = key_format
|
94
|
+
end
|
95
|
+
def generate_caches
|
96
|
+
generate_caches_from_source
|
97
|
+
generate_partial
|
98
|
+
generate_caches_from_memory
|
99
|
+
dump_caches
|
100
|
+
timed_exclaim "CACHE FINISHED #{identifier}."
|
101
|
+
end
|
102
|
+
def generate_caches_from_source
|
103
|
+
exact.generate_caches_from_source
|
104
|
+
end
|
105
|
+
def generate_partial
|
106
|
+
partial.generate_partial_from exact.index
|
107
|
+
end
|
108
|
+
def generate_caches_from_memory
|
109
|
+
partial.generate_caches_from_memory
|
110
|
+
end
|
111
|
+
def dump_caches
|
112
|
+
exact.dump
|
113
|
+
partial.dump
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# TODO Move to the API.
|
2
|
+
#
|
3
|
+
module Internals
|
4
|
+
|
5
|
+
module Indexing
|
6
|
+
|
7
|
+
class Index
|
8
|
+
|
9
|
+
attr_reader :name, :source, :categories, :after_indexing
|
10
|
+
|
11
|
+
# Delegators for indexing.
|
12
|
+
#
|
13
|
+
delegate :connect_backend,
|
14
|
+
:to => :source
|
15
|
+
|
16
|
+
delegate :index,
|
17
|
+
:cache,
|
18
|
+
:generate_caches,
|
19
|
+
:backup_caches,
|
20
|
+
:restore_caches,
|
21
|
+
:check_caches,
|
22
|
+
:clear_caches,
|
23
|
+
:create_directory_structure,
|
24
|
+
:to => :categories
|
25
|
+
|
26
|
+
def initialize name, source, options = {}
|
27
|
+
@name = name
|
28
|
+
@source = source
|
29
|
+
|
30
|
+
@after_indexing = options[:after_indexing]
|
31
|
+
@bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
|
32
|
+
|
33
|
+
@categories = Categories.new
|
34
|
+
end
|
35
|
+
|
36
|
+
# TODO Spec. Doc.
|
37
|
+
#
|
38
|
+
def define_category category_name, options = {}
|
39
|
+
options = default_category_options.merge options
|
40
|
+
|
41
|
+
new_category = Category.new category_name, self, options
|
42
|
+
categories << new_category
|
43
|
+
new_category
|
44
|
+
end
|
45
|
+
|
46
|
+
# By default, the category uses
|
47
|
+
# * the index's source.
|
48
|
+
# * the index's bundle type.
|
49
|
+
#
|
50
|
+
def default_category_options
|
51
|
+
{
|
52
|
+
:source => @source,
|
53
|
+
:indexing_bundle_class => @bundle_class
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
# Indexing.
|
58
|
+
#
|
59
|
+
def take_snapshot
|
60
|
+
source.take_snapshot self
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
File without changes
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Query
|
4
|
+
|
5
|
+
# An allocation has a number of combinations:
|
6
|
+
# [token, index] [other_token, other_index], ...
|
7
|
+
#
|
8
|
+
class Allocation # :nodoc:all
|
9
|
+
|
10
|
+
attr_reader :count, :ids, :score, :combinations, :result_identifier
|
11
|
+
|
12
|
+
#
|
13
|
+
#
|
14
|
+
def initialize combinations, result_identifier
|
15
|
+
@combinations = combinations
|
16
|
+
@result_identifier = result_identifier
|
17
|
+
end
|
18
|
+
|
19
|
+
def hash
|
20
|
+
@combinations.hash
|
21
|
+
end
|
22
|
+
def eql? other_allocation
|
23
|
+
true # FIXME
|
24
|
+
# @combinations.eql? other_allocation.combinations
|
25
|
+
end
|
26
|
+
|
27
|
+
# Scores its combinations and caches the result.
|
28
|
+
#
|
29
|
+
def calculate_score weights
|
30
|
+
@score ||= @combinations.calculate_score(weights)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Asks the combinations for the (intersected) ids.
|
34
|
+
#
|
35
|
+
def calculate_ids amount, offset
|
36
|
+
@combinations.ids amount, offset # Calculate as many ids as are necessary.
|
37
|
+
end
|
38
|
+
|
39
|
+
# This starts the searching process.
|
40
|
+
#
|
41
|
+
def process! amount, offset
|
42
|
+
ids = calculate_ids amount, offset
|
43
|
+
@count = ids.size # cache the count before throwing away the ids
|
44
|
+
@ids = ids.slice!(offset, amount) || [] # slice out the relevant part
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
#
|
49
|
+
def keep identifiers = [] # categories
|
50
|
+
@combinations.keep identifiers
|
51
|
+
end
|
52
|
+
#
|
53
|
+
#
|
54
|
+
def remove identifiers = [] # categories
|
55
|
+
@combinations.remove identifiers
|
56
|
+
end
|
57
|
+
|
58
|
+
# Sort highest score first.
|
59
|
+
#
|
60
|
+
def <=> other_allocation
|
61
|
+
other_allocation.score <=> self.score
|
62
|
+
end
|
63
|
+
|
64
|
+
# Transform the allocation into result form.
|
65
|
+
#
|
66
|
+
def to_result
|
67
|
+
[self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
|
68
|
+
end
|
69
|
+
|
70
|
+
# Json representation of this allocation.
|
71
|
+
#
|
72
|
+
# Note: Delegates to to_result.
|
73
|
+
#
|
74
|
+
def to_json
|
75
|
+
to_result.to_json
|
76
|
+
end
|
77
|
+
|
78
|
+
#
|
79
|
+
#
|
80
|
+
def to_s
|
81
|
+
"Allocation: #{to_result.join(', ')}"
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|