picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Internals
|
|
4
|
+
|
|
5
|
+
module Indexing # :nodoc:all
|
|
6
|
+
|
|
7
|
+
module Bundle
|
|
8
|
+
|
|
9
|
+
# This is the indexing bundle.
|
|
10
|
+
# It does all menial tasks that have nothing to do
|
|
11
|
+
# with the actual index running etc.
|
|
12
|
+
#
|
|
13
|
+
class Base < SuperBase
|
|
14
|
+
|
|
15
|
+
attr_accessor :partial_strategy, :weights_strategy
|
|
16
|
+
|
|
17
|
+
# Path is in which directory the cache is located.
|
|
18
|
+
#
|
|
19
|
+
def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
|
|
20
|
+
super name, configuration, similarity_strategy
|
|
21
|
+
|
|
22
|
+
@partial_strategy = partial_strategy
|
|
23
|
+
@weights_strategy = weights_strategy
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Sets up a piece of the index for the given token.
|
|
27
|
+
#
|
|
28
|
+
def initialize_index_for token
|
|
29
|
+
index[token] ||= []
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Generation
|
|
33
|
+
#
|
|
34
|
+
|
|
35
|
+
# This method
|
|
36
|
+
# * loads the base index from the db
|
|
37
|
+
# * generates derived indexes
|
|
38
|
+
# * dumps all the indexes into files
|
|
39
|
+
#
|
|
40
|
+
def generate_caches_from_source
|
|
41
|
+
load_from_index_file
|
|
42
|
+
generate_caches_from_memory
|
|
43
|
+
end
|
|
44
|
+
# Generates derived indexes from the index and dumps.
|
|
45
|
+
#
|
|
46
|
+
# Note: assumes that there is something in the index
|
|
47
|
+
#
|
|
48
|
+
def generate_caches_from_memory
|
|
49
|
+
cache_from_memory_generation_message
|
|
50
|
+
generate_derived
|
|
51
|
+
end
|
|
52
|
+
def cache_from_memory_generation_message
|
|
53
|
+
timed_exclaim "CACHE FROM MEMORY #{identifier}."
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Generates the weights and similarity from the main index.
|
|
57
|
+
#
|
|
58
|
+
def generate_derived
|
|
59
|
+
generate_weights
|
|
60
|
+
generate_similarity
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Load the data from the db.
|
|
64
|
+
#
|
|
65
|
+
def load_from_index_file
|
|
66
|
+
load_from_index_generation_message
|
|
67
|
+
clear
|
|
68
|
+
retrieve
|
|
69
|
+
end
|
|
70
|
+
def load_from_index_generation_message
|
|
71
|
+
timed_exclaim "LOAD INDEX #{identifier}."
|
|
72
|
+
end
|
|
73
|
+
# Retrieves the prepared index data into the index.
|
|
74
|
+
#
|
|
75
|
+
# This is in preparation for generating
|
|
76
|
+
# derived indexes (like weights, similarity)
|
|
77
|
+
# and later dumping the optimized index.
|
|
78
|
+
#
|
|
79
|
+
def retrieve
|
|
80
|
+
key_format = self[:key_format] || :to_i
|
|
81
|
+
files.retrieve do |id, token|
|
|
82
|
+
initialize_index_for token
|
|
83
|
+
index[token] << id.send(key_format) # TODO Rewrite. Move this into the specific indexing.
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Generates a new index (writes its index) using the
|
|
88
|
+
# partial caching strategy of this bundle.
|
|
89
|
+
#
|
|
90
|
+
def generate_partial
|
|
91
|
+
generator = Generators::PartialGenerator.new self.index
|
|
92
|
+
self.index = generator.generate self.partial_strategy
|
|
93
|
+
end
|
|
94
|
+
# Generate a partial index from the given exact index.
|
|
95
|
+
#
|
|
96
|
+
def generate_partial_from exact_index
|
|
97
|
+
timed_exclaim "PARTIAL GENERATE #{identifier}."
|
|
98
|
+
self.index = exact_index
|
|
99
|
+
self.generate_partial
|
|
100
|
+
self
|
|
101
|
+
end
|
|
102
|
+
# Generates a new similarity index (writes its index) using the
|
|
103
|
+
# given similarity caching strategy.
|
|
104
|
+
#
|
|
105
|
+
def generate_similarity
|
|
106
|
+
generator = Generators::SimilarityGenerator.new self.index
|
|
107
|
+
self.similarity = generator.generate self.similarity_strategy
|
|
108
|
+
end
|
|
109
|
+
# Generates a new weights index (writes its index) using the
|
|
110
|
+
# given weight caching strategy.
|
|
111
|
+
#
|
|
112
|
+
def generate_weights
|
|
113
|
+
generator = Generators::WeightsGenerator.new self.index
|
|
114
|
+
self.weights = generator.generate self.weights_strategy
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Saves the indexes in a dump file.
|
|
118
|
+
#
|
|
119
|
+
def dump
|
|
120
|
+
dump_index
|
|
121
|
+
dump_similarity
|
|
122
|
+
dump_weights
|
|
123
|
+
dump_configuration
|
|
124
|
+
end
|
|
125
|
+
# Dumps the core index.
|
|
126
|
+
#
|
|
127
|
+
def dump_index
|
|
128
|
+
timed_exclaim "DUMP INDEX #{identifier}."
|
|
129
|
+
backend.dump_index index
|
|
130
|
+
end
|
|
131
|
+
# Dumps the weights index.
|
|
132
|
+
#
|
|
133
|
+
def dump_weights
|
|
134
|
+
timed_exclaim "DUMP WEIGHTS #{identifier}."
|
|
135
|
+
backend.dump_weights weights
|
|
136
|
+
end
|
|
137
|
+
# Dumps the similarity index.
|
|
138
|
+
#
|
|
139
|
+
def dump_similarity
|
|
140
|
+
timed_exclaim "DUMP SIMILARITY #{identifier}."
|
|
141
|
+
backend.dump_similarity similarity
|
|
142
|
+
end
|
|
143
|
+
# Dumps the similarity index.
|
|
144
|
+
#
|
|
145
|
+
def dump_configuration
|
|
146
|
+
timed_exclaim "DUMP CONFIGURATION #{identifier}."
|
|
147
|
+
backend.dump_configuration configuration
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Alerts the user if an index is missing.
|
|
151
|
+
#
|
|
152
|
+
def raise_unless_cache_exists
|
|
153
|
+
raise_unless_index_exists
|
|
154
|
+
raise_unless_similarity_exists
|
|
155
|
+
end
|
|
156
|
+
# Alerts the user if one of the necessary indexes
|
|
157
|
+
# (core, weights) is missing.
|
|
158
|
+
#
|
|
159
|
+
def raise_unless_index_exists
|
|
160
|
+
if partial_strategy.saved?
|
|
161
|
+
warn_if_index_small
|
|
162
|
+
raise_unless_index_ok
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
# Alerts the user if the similarity
|
|
166
|
+
# index is missing (given that it's used).
|
|
167
|
+
#
|
|
168
|
+
def raise_unless_similarity_exists
|
|
169
|
+
if similarity_strategy.saved?
|
|
170
|
+
warn_if_similarity_small
|
|
171
|
+
raise_unless_similarity_ok
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Outputs a warning for the given cache.
|
|
176
|
+
#
|
|
177
|
+
def warn_cache_small what
|
|
178
|
+
puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
|
|
179
|
+
end
|
|
180
|
+
# Raises an appropriate error message for the given cache.
|
|
181
|
+
#
|
|
182
|
+
def raise_cache_missing what
|
|
183
|
+
raise "#{what} cache for #{identifier} missing."
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Warns the user if the similarity index is small.
|
|
187
|
+
#
|
|
188
|
+
def warn_if_similarity_small
|
|
189
|
+
warn_cache_small :similarity if backend.similarity_cache_small?
|
|
190
|
+
end
|
|
191
|
+
# Alerts the user if the similarity index is not there.
|
|
192
|
+
#
|
|
193
|
+
def raise_unless_similarity_ok
|
|
194
|
+
raise_cache_missing :similarity unless backend.similarity_cache_ok?
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# TODO Spec on down.
|
|
198
|
+
#
|
|
199
|
+
|
|
200
|
+
# Warns the user if the core or weights indexes are small.
|
|
201
|
+
#
|
|
202
|
+
def warn_if_index_small
|
|
203
|
+
warn_cache_small :index if backend.index_cache_small?
|
|
204
|
+
warn_cache_small :weights if backend.weights_cache_small?
|
|
205
|
+
end
|
|
206
|
+
# Alerts the user if the core or weights indexes are not there.
|
|
207
|
+
#
|
|
208
|
+
def raise_unless_index_ok
|
|
209
|
+
raise_cache_missing :index unless backend.index_cache_ok?
|
|
210
|
+
raise_cache_missing :weights unless backend.weights_cache_ok?
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Internals
|
|
4
|
+
|
|
5
|
+
module Indexing # :nodoc:all
|
|
6
|
+
|
|
7
|
+
module Bundle
|
|
8
|
+
|
|
9
|
+
# The memory version dumps its generated indexes to disk
|
|
10
|
+
# (mostly JSON) to load them into memory on startup.
|
|
11
|
+
#
|
|
12
|
+
class Memory < Base
|
|
13
|
+
|
|
14
|
+
# We're using files for the memory backend.
|
|
15
|
+
# E.g. dump writes files.
|
|
16
|
+
#
|
|
17
|
+
alias backend files
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Internals
|
|
4
|
+
|
|
5
|
+
module Indexing # :nodoc:all
|
|
6
|
+
|
|
7
|
+
module Bundle
|
|
8
|
+
|
|
9
|
+
# The memory version dumps its generated indexes to disk
|
|
10
|
+
# (mostly JSON) to load them into memory on startup.
|
|
11
|
+
#
|
|
12
|
+
class Redis < Base
|
|
13
|
+
|
|
14
|
+
attr_reader :backend
|
|
15
|
+
|
|
16
|
+
def initialize name, configuration, *args
|
|
17
|
+
super name, configuration, *args
|
|
18
|
+
|
|
19
|
+
@backend = Internals::Index::Redis.new name, configuration # TODO Needed?
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
# FIXME Merge into Base, extract common with Indexed::Base.
|
|
4
|
+
#
|
|
5
|
+
module Indexing # :nodoc:all
|
|
6
|
+
# A Bundle is a number of indexes
|
|
7
|
+
# per [index, category] combination.
|
|
8
|
+
#
|
|
9
|
+
# At most, there are three indexes:
|
|
10
|
+
# * *core* index (always used)
|
|
11
|
+
# * *weights* index (always used)
|
|
12
|
+
# * *similarity* index (used with similarity)
|
|
13
|
+
#
|
|
14
|
+
# In Picky, indexing is separated from the index
|
|
15
|
+
# handling itself through a parallel structure.
|
|
16
|
+
#
|
|
17
|
+
# Both use methods provided by this base class, but
|
|
18
|
+
# have very different goals:
|
|
19
|
+
#
|
|
20
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
|
21
|
+
# and providing helper functions to e.g. check the indexes.
|
|
22
|
+
#
|
|
23
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
|
24
|
+
# memory and looking up search data as fast as possible.
|
|
25
|
+
#
|
|
26
|
+
module Bundle
|
|
27
|
+
|
|
28
|
+
class SuperBase
|
|
29
|
+
|
|
30
|
+
attr_reader :identifier, :files
|
|
31
|
+
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
|
32
|
+
|
|
33
|
+
delegate :clear, :to => :index
|
|
34
|
+
delegate :[], :[]=, :to => :configuration
|
|
35
|
+
|
|
36
|
+
def initialize name, configuration, similarity_strategy
|
|
37
|
+
@identifier = "#{configuration.identifier}:#{name}"
|
|
38
|
+
@files = Internals::Index::Files.new name, configuration
|
|
39
|
+
|
|
40
|
+
@index = {}
|
|
41
|
+
@weights = {}
|
|
42
|
+
@similarity = {}
|
|
43
|
+
@configuration = {} # A hash with config options.
|
|
44
|
+
|
|
45
|
+
@similarity_strategy = similarity_strategy
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Get a list of similar texts.
|
|
49
|
+
#
|
|
50
|
+
# Note: Does not return itself.
|
|
51
|
+
#
|
|
52
|
+
def similar text
|
|
53
|
+
code = similarity_strategy.encoded text
|
|
54
|
+
similar_codes = code && @similarity[code]
|
|
55
|
+
similar_codes.delete text if similar_codes
|
|
56
|
+
similar_codes || []
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Indexing
|
|
4
|
+
|
|
5
|
+
class Categories
|
|
6
|
+
|
|
7
|
+
attr_reader :categories
|
|
8
|
+
|
|
9
|
+
each_delegate :index,
|
|
10
|
+
:cache,
|
|
11
|
+
:generate_caches,
|
|
12
|
+
:backup_caches,
|
|
13
|
+
:restore_caches,
|
|
14
|
+
:check_caches,
|
|
15
|
+
:clear_caches,
|
|
16
|
+
:create_directory_structure,
|
|
17
|
+
:to => :categories
|
|
18
|
+
|
|
19
|
+
def initialize
|
|
20
|
+
@categories = []
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def << category
|
|
24
|
+
categories << category
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def find category_name
|
|
28
|
+
category_name = category_name.to_sym
|
|
29
|
+
|
|
30
|
+
categories.each do |category|
|
|
31
|
+
next unless category.name == category_name
|
|
32
|
+
return category
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Indexing
|
|
4
|
+
|
|
5
|
+
class Category
|
|
6
|
+
|
|
7
|
+
attr_reader :exact, :partial, :name, :configuration, :indexer
|
|
8
|
+
|
|
9
|
+
# Mandatory params:
|
|
10
|
+
# * name: Category name to use as identifier and file names.
|
|
11
|
+
# * index: Index to which this category is attached to.
|
|
12
|
+
# Options:
|
|
13
|
+
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
|
14
|
+
# * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
|
|
15
|
+
# * source: Use if the category should use a different source.
|
|
16
|
+
# * from: The source category identifier to take the data from.
|
|
17
|
+
#
|
|
18
|
+
# Advanced Options (TODO):
|
|
19
|
+
#
|
|
20
|
+
# * weights:
|
|
21
|
+
# * tokenizer:
|
|
22
|
+
#
|
|
23
|
+
def initialize name, index, options = {}
|
|
24
|
+
@name = name
|
|
25
|
+
@from = options[:from]
|
|
26
|
+
|
|
27
|
+
# Now we have enough info to combine the index and the category.
|
|
28
|
+
#
|
|
29
|
+
@configuration = Configuration::Index.new index, self
|
|
30
|
+
|
|
31
|
+
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
|
32
|
+
@indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
|
|
33
|
+
|
|
34
|
+
# TODO Push into Bundle. At least the weights.
|
|
35
|
+
#
|
|
36
|
+
partial = options[:partial] || Generators::Partial::Default
|
|
37
|
+
weights = options[:weights] || Generators::Weights::Default
|
|
38
|
+
similarity = options[:similarity] || Generators::Similarity::Default
|
|
39
|
+
|
|
40
|
+
bundle_class = options[:indexing_bundle_class] || Bundle::Memory
|
|
41
|
+
@exact = bundle_class.new(:exact, configuration, similarity, Generators::Partial::None.new, weights)
|
|
42
|
+
@partial = bundle_class.new(:partial, configuration, Generators::Similarity::None.new, partial, weights)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
delegate :identifier, :prepare_index_directory, :to => :configuration
|
|
46
|
+
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
|
47
|
+
|
|
48
|
+
def from
|
|
49
|
+
@from || name
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# TODO Spec.
|
|
53
|
+
#
|
|
54
|
+
def backup_caches
|
|
55
|
+
timed_exclaim "Backing up #{identifier}."
|
|
56
|
+
exact.backup
|
|
57
|
+
partial.backup
|
|
58
|
+
end
|
|
59
|
+
def restore_caches
|
|
60
|
+
timed_exclaim "Restoring #{identifier}."
|
|
61
|
+
exact.restore
|
|
62
|
+
partial.restore
|
|
63
|
+
end
|
|
64
|
+
def check_caches
|
|
65
|
+
timed_exclaim "Checking #{identifier}."
|
|
66
|
+
exact.raise_unless_cache_exists
|
|
67
|
+
partial.raise_unless_cache_exists
|
|
68
|
+
end
|
|
69
|
+
def clear_caches
|
|
70
|
+
timed_exclaim "Deleting #{identifier}."
|
|
71
|
+
exact.delete
|
|
72
|
+
partial.delete
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def index
|
|
76
|
+
prepare_index_directory
|
|
77
|
+
indexer.index
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Generates all caches for this category.
|
|
81
|
+
#
|
|
82
|
+
def cache
|
|
83
|
+
prepare_index_directory
|
|
84
|
+
configure
|
|
85
|
+
generate_caches
|
|
86
|
+
end
|
|
87
|
+
# We need to set what formatting method should be used.
|
|
88
|
+
# Uses the one defined in the indexer.
|
|
89
|
+
#
|
|
90
|
+
def configure
|
|
91
|
+
key_format = indexer.key_format
|
|
92
|
+
exact[:key_format] = key_format
|
|
93
|
+
partial[:key_format] = key_format
|
|
94
|
+
end
|
|
95
|
+
def generate_caches
|
|
96
|
+
generate_caches_from_source
|
|
97
|
+
generate_partial
|
|
98
|
+
generate_caches_from_memory
|
|
99
|
+
dump_caches
|
|
100
|
+
timed_exclaim "CACHE FINISHED #{identifier}."
|
|
101
|
+
end
|
|
102
|
+
def generate_caches_from_source
|
|
103
|
+
exact.generate_caches_from_source
|
|
104
|
+
end
|
|
105
|
+
def generate_partial
|
|
106
|
+
partial.generate_partial_from exact.index
|
|
107
|
+
end
|
|
108
|
+
def generate_caches_from_memory
|
|
109
|
+
partial.generate_caches_from_memory
|
|
110
|
+
end
|
|
111
|
+
def dump_caches
|
|
112
|
+
exact.dump
|
|
113
|
+
partial.dump
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# TODO Move to the API.
|
|
2
|
+
#
|
|
3
|
+
module Internals
|
|
4
|
+
|
|
5
|
+
module Indexing
|
|
6
|
+
|
|
7
|
+
class Index
|
|
8
|
+
|
|
9
|
+
attr_reader :name, :source, :categories, :after_indexing
|
|
10
|
+
|
|
11
|
+
# Delegators for indexing.
|
|
12
|
+
#
|
|
13
|
+
delegate :connect_backend,
|
|
14
|
+
:to => :source
|
|
15
|
+
|
|
16
|
+
delegate :index,
|
|
17
|
+
:cache,
|
|
18
|
+
:generate_caches,
|
|
19
|
+
:backup_caches,
|
|
20
|
+
:restore_caches,
|
|
21
|
+
:check_caches,
|
|
22
|
+
:clear_caches,
|
|
23
|
+
:create_directory_structure,
|
|
24
|
+
:to => :categories
|
|
25
|
+
|
|
26
|
+
def initialize name, source, options = {}
|
|
27
|
+
@name = name
|
|
28
|
+
@source = source
|
|
29
|
+
|
|
30
|
+
@after_indexing = options[:after_indexing]
|
|
31
|
+
@bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
|
|
32
|
+
|
|
33
|
+
@categories = Categories.new
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# TODO Spec. Doc.
|
|
37
|
+
#
|
|
38
|
+
def define_category category_name, options = {}
|
|
39
|
+
options = default_category_options.merge options
|
|
40
|
+
|
|
41
|
+
new_category = Category.new category_name, self, options
|
|
42
|
+
categories << new_category
|
|
43
|
+
new_category
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# By default, the category uses
|
|
47
|
+
# * the index's source.
|
|
48
|
+
# * the index's bundle type.
|
|
49
|
+
#
|
|
50
|
+
def default_category_options
|
|
51
|
+
{
|
|
52
|
+
:source => @source,
|
|
53
|
+
:indexing_bundle_class => @bundle_class
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Indexing.
|
|
58
|
+
#
|
|
59
|
+
def take_snapshot
|
|
60
|
+
source.take_snapshot self
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end
|
|
File without changes
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
# An allocation has a number of combinations:
|
|
6
|
+
# [token, index] [other_token, other_index], ...
|
|
7
|
+
#
|
|
8
|
+
class Allocation # :nodoc:all
|
|
9
|
+
|
|
10
|
+
attr_reader :count, :ids, :score, :combinations, :result_identifier
|
|
11
|
+
|
|
12
|
+
#
|
|
13
|
+
#
|
|
14
|
+
def initialize combinations, result_identifier
|
|
15
|
+
@combinations = combinations
|
|
16
|
+
@result_identifier = result_identifier
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def hash
|
|
20
|
+
@combinations.hash
|
|
21
|
+
end
|
|
22
|
+
def eql? other_allocation
|
|
23
|
+
true # FIXME
|
|
24
|
+
# @combinations.eql? other_allocation.combinations
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Scores its combinations and caches the result.
|
|
28
|
+
#
|
|
29
|
+
def calculate_score weights
|
|
30
|
+
@score ||= @combinations.calculate_score(weights)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Asks the combinations for the (intersected) ids.
|
|
34
|
+
#
|
|
35
|
+
def calculate_ids amount, offset
|
|
36
|
+
@combinations.ids amount, offset # Calculate as many ids as are necessary.
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# This starts the searching process.
|
|
40
|
+
#
|
|
41
|
+
def process! amount, offset
|
|
42
|
+
ids = calculate_ids amount, offset
|
|
43
|
+
@count = ids.size # cache the count before throwing away the ids
|
|
44
|
+
@ids = ids.slice!(offset, amount) || [] # slice out the relevant part
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
#
|
|
48
|
+
#
|
|
49
|
+
def keep identifiers = [] # categories
|
|
50
|
+
@combinations.keep identifiers
|
|
51
|
+
end
|
|
52
|
+
#
|
|
53
|
+
#
|
|
54
|
+
def remove identifiers = [] # categories
|
|
55
|
+
@combinations.remove identifiers
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Sort highest score first.
|
|
59
|
+
#
|
|
60
|
+
def <=> other_allocation
|
|
61
|
+
other_allocation.score <=> self.score
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Transform the allocation into result form.
|
|
65
|
+
#
|
|
66
|
+
def to_result
|
|
67
|
+
[self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Json representation of this allocation.
|
|
71
|
+
#
|
|
72
|
+
# Note: Delegates to to_result.
|
|
73
|
+
#
|
|
74
|
+
def to_json
|
|
75
|
+
to_result.to_json
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
#
|
|
79
|
+
#
|
|
80
|
+
def to_s
|
|
81
|
+
"Allocation: #{to_result.join(', ')}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
end
|