picky 2.7.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
|
@@ -1,227 +1,238 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# memory and looking up search data as fast as possible.
|
|
22
|
-
#
|
|
23
|
-
module Bundle
|
|
24
|
-
|
|
25
|
-
# This is the indexing bundle.
|
|
1
|
+
module Picky
|
|
2
|
+
|
|
3
|
+
module Indexing # :nodoc:all
|
|
4
|
+
|
|
5
|
+
# A Bundle is a number of indexes
|
|
6
|
+
# per [index, category] combination.
|
|
7
|
+
#
|
|
8
|
+
# At most, there are three indexes:
|
|
9
|
+
# * *core* index (always used)
|
|
10
|
+
# * *weights* index (always used)
|
|
11
|
+
# * *similarity* index (used with similarity)
|
|
12
|
+
#
|
|
13
|
+
# In Picky, indexing is separated from the index
|
|
14
|
+
# handling itself through a parallel structure.
|
|
15
|
+
#
|
|
16
|
+
# Both use methods provided by this base class, but
|
|
17
|
+
# have very different goals:
|
|
18
|
+
#
|
|
19
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
|
20
|
+
# and providing helper functions to e.g. check the indexes.
|
|
26
21
|
#
|
|
27
|
-
#
|
|
28
|
-
#
|
|
22
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
|
23
|
+
# memory and looking up search data as fast as possible.
|
|
29
24
|
#
|
|
30
|
-
|
|
25
|
+
module Bundle
|
|
31
26
|
|
|
32
|
-
|
|
33
|
-
|
|
27
|
+
# This is the indexing bundle.
|
|
28
|
+
#
|
|
29
|
+
# It does all menial tasks that have nothing to do
|
|
30
|
+
# with the actual index running etc.
|
|
31
|
+
#
|
|
32
|
+
class Base < Picky::Bundle
|
|
34
33
|
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
attr_reader :backend,
|
|
35
|
+
:prepared
|
|
37
36
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
end
|
|
37
|
+
attr_accessor :partial_strategy,
|
|
38
|
+
:weights_strategy
|
|
41
39
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def initialize_inverted_index_for token
|
|
45
|
-
self.inverted[token] ||= []
|
|
46
|
-
end
|
|
40
|
+
def initialize name, category, weights_strategy, partial_strategy, similarity_strategy, options = {}
|
|
41
|
+
super name, category, similarity_strategy, options
|
|
47
42
|
|
|
48
|
-
|
|
49
|
-
|
|
43
|
+
@weights_strategy = weights_strategy
|
|
44
|
+
@partial_strategy = partial_strategy
|
|
45
|
+
@key_format = options[:key_format]
|
|
46
|
+
@prepared = Backend::File::Text.new category.prepared_index_path
|
|
47
|
+
end
|
|
50
48
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def generate_caches_from_source
|
|
57
|
-
load_from_prepared_index_file
|
|
58
|
-
generate_caches_from_memory
|
|
59
|
-
end
|
|
60
|
-
# Generates derived indexes from the index and dumps.
|
|
61
|
-
#
|
|
62
|
-
# Note: assumes that there is something in the index
|
|
63
|
-
#
|
|
64
|
-
def generate_caches_from_memory
|
|
65
|
-
cache_from_memory_generation_message
|
|
66
|
-
generate_derived
|
|
67
|
-
end
|
|
68
|
-
def cache_from_memory_generation_message
|
|
69
|
-
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
|
70
|
-
end
|
|
49
|
+
# Sets up a piece of the index for the given token.
|
|
50
|
+
#
|
|
51
|
+
def initialize_inverted_index_for token
|
|
52
|
+
self.inverted[token] ||= []
|
|
53
|
+
end
|
|
71
54
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
55
|
+
# Generation
|
|
56
|
+
#
|
|
57
|
+
|
|
58
|
+
# This method
|
|
59
|
+
# * Loads the base index from the "prepared..." file.
|
|
60
|
+
# * Generates derived indexes.
|
|
61
|
+
# * Dumps all the indexes into files.
|
|
62
|
+
#
|
|
63
|
+
def generate_caches_from_source
|
|
64
|
+
load_from_prepared_index_file
|
|
65
|
+
generate_caches_from_memory
|
|
66
|
+
end
|
|
67
|
+
# Generates derived indexes from the index and dumps.
|
|
68
|
+
#
|
|
69
|
+
# Note: assumes that there is something in the index
|
|
70
|
+
#
|
|
71
|
+
def generate_caches_from_memory
|
|
72
|
+
cache_from_memory_generation_message
|
|
73
|
+
generate_derived
|
|
74
|
+
end
|
|
75
|
+
def cache_from_memory_generation_message
|
|
76
|
+
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
|
77
|
+
end
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
retrieve
|
|
85
|
-
end
|
|
86
|
-
def load_from_prepared_index_generation_message
|
|
87
|
-
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
|
88
|
-
end
|
|
89
|
-
# Retrieves the prepared index data into the index.
|
|
90
|
-
#
|
|
91
|
-
# This is in preparation for generating
|
|
92
|
-
# derived indexes (like weights, similarity)
|
|
93
|
-
# and later dumping the optimized index.
|
|
94
|
-
#
|
|
95
|
-
def retrieve
|
|
96
|
-
key_format = self[:key_format] || :to_i
|
|
97
|
-
files.retrieve do |id, token|
|
|
98
|
-
initialize_inverted_index_for token
|
|
99
|
-
self.inverted[token] << id.send(key_format)
|
|
79
|
+
# Generates the weights and similarity from the main index.
|
|
80
|
+
#
|
|
81
|
+
def generate_derived
|
|
82
|
+
generate_weights
|
|
83
|
+
generate_similarity
|
|
100
84
|
end
|
|
101
|
-
end
|
|
102
85
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def generate_similarity
|
|
129
|
-
generator = Generators::SimilarityGenerator.new self.inverted
|
|
130
|
-
self.similarity = generator.generate self.similarity_strategy
|
|
131
|
-
end
|
|
86
|
+
# Load the data from the db.
|
|
87
|
+
#
|
|
88
|
+
def load_from_prepared_index_file
|
|
89
|
+
load_from_prepared_index_generation_message
|
|
90
|
+
clear
|
|
91
|
+
retrieve
|
|
92
|
+
end
|
|
93
|
+
def load_from_prepared_index_generation_message
|
|
94
|
+
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
|
95
|
+
end
|
|
96
|
+
# Retrieves the prepared index data into the index.
|
|
97
|
+
#
|
|
98
|
+
# This is in preparation for generating
|
|
99
|
+
# derived indexes (like weights, similarity)
|
|
100
|
+
# and later dumping the optimized index.
|
|
101
|
+
#
|
|
102
|
+
# TODO Move this out to the category?
|
|
103
|
+
#
|
|
104
|
+
def retrieve
|
|
105
|
+
format = category.key_format || :to_i # Optimization.
|
|
106
|
+
prepared.retrieve do |id, token|
|
|
107
|
+
initialize_inverted_index_for token
|
|
108
|
+
self.inverted[token] << id.send(format)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
132
111
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
#
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def dump_configuration
|
|
163
|
-
# timed_exclaim %Q{"#{identifier}": Dumping configuration.}
|
|
164
|
-
backend.dump_configuration self.configuration
|
|
165
|
-
end
|
|
112
|
+
# Generates a new index (writes its index) using the
|
|
113
|
+
# partial caching strategy of this bundle.
|
|
114
|
+
#
|
|
115
|
+
def generate_partial
|
|
116
|
+
generator = Generators::PartialGenerator.new self.inverted
|
|
117
|
+
self.inverted = generator.generate self.partial_strategy
|
|
118
|
+
end
|
|
119
|
+
# Generate a partial index from the given exact inverted index.
|
|
120
|
+
#
|
|
121
|
+
def generate_partial_from exact_inverted_index
|
|
122
|
+
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
|
123
|
+
self.inverted = exact_inverted_index
|
|
124
|
+
self.generate_partial
|
|
125
|
+
self
|
|
126
|
+
end
|
|
127
|
+
# Generates a new weights index (writes its index) using the
|
|
128
|
+
# given weight caching strategy.
|
|
129
|
+
#
|
|
130
|
+
def generate_weights
|
|
131
|
+
generator = Generators::WeightsGenerator.new self.inverted
|
|
132
|
+
self.weights = generator.generate self.weights_strategy
|
|
133
|
+
end
|
|
134
|
+
# Generates a new similarity index (writes its index) using the
|
|
135
|
+
# given similarity caching strategy.
|
|
136
|
+
#
|
|
137
|
+
def generate_similarity
|
|
138
|
+
generator = Generators::SimilarityGenerator.new self.inverted
|
|
139
|
+
self.similarity = generator.generate self.similarity_strategy
|
|
140
|
+
end
|
|
166
141
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
#
|
|
176
|
-
def raise_unless_index_exists
|
|
177
|
-
if partial_strategy.saved?
|
|
178
|
-
warn_if_index_small
|
|
179
|
-
raise_unless_index_ok
|
|
142
|
+
# Saves the indexes in a dump file.
|
|
143
|
+
#
|
|
144
|
+
def dump
|
|
145
|
+
timed_exclaim %Q{"#{identifier}": Dumping data.}
|
|
146
|
+
dump_inverted
|
|
147
|
+
dump_similarity
|
|
148
|
+
dump_weights
|
|
149
|
+
dump_configuration
|
|
180
150
|
end
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
151
|
+
# Dumps the core index.
|
|
152
|
+
#
|
|
153
|
+
def dump_inverted
|
|
154
|
+
# timed_exclaim %Q{"#{identifier}": Dumping inverted index.}
|
|
155
|
+
backend.dump_inverted self.inverted
|
|
156
|
+
end
|
|
157
|
+
# Dumps the weights index.
|
|
158
|
+
#
|
|
159
|
+
def dump_weights
|
|
160
|
+
# timed_exclaim %Q{"#{identifier}": Dumping index weights.}
|
|
161
|
+
backend.dump_weights self.weights
|
|
162
|
+
end
|
|
163
|
+
# Dumps the similarity index.
|
|
164
|
+
#
|
|
165
|
+
def dump_similarity
|
|
166
|
+
# timed_exclaim %Q{"#{identifier}": Dumping similarity index.}
|
|
167
|
+
backend.dump_similarity self.similarity
|
|
168
|
+
end
|
|
169
|
+
# Dumps the similarity index.
|
|
170
|
+
#
|
|
171
|
+
def dump_configuration
|
|
172
|
+
# timed_exclaim %Q{"#{identifier}": Dumping configuration.}
|
|
173
|
+
backend.dump_configuration self.configuration
|
|
189
174
|
end
|
|
190
|
-
end
|
|
191
175
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
176
|
+
# Alerts the user if an index is missing.
|
|
177
|
+
#
|
|
178
|
+
def raise_unless_cache_exists
|
|
179
|
+
raise_unless_index_exists
|
|
180
|
+
raise_unless_similarity_exists
|
|
181
|
+
end
|
|
182
|
+
# Alerts the user if one of the necessary indexes
|
|
183
|
+
# (core, weights) is missing.
|
|
184
|
+
#
|
|
185
|
+
def raise_unless_index_exists
|
|
186
|
+
if partial_strategy.saved?
|
|
187
|
+
warn_if_index_small
|
|
188
|
+
raise_unless_index_ok
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
# Alerts the user if the similarity
|
|
192
|
+
# index is missing (given that it's used).
|
|
193
|
+
#
|
|
194
|
+
def raise_unless_similarity_exists
|
|
195
|
+
if similarity_strategy.saved?
|
|
196
|
+
warn_if_similarity_small
|
|
197
|
+
raise_unless_similarity_ok
|
|
198
|
+
end
|
|
199
|
+
end
|
|
202
200
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
201
|
+
# Outputs a warning for the given cache.
|
|
202
|
+
#
|
|
203
|
+
def warn_cache_small what
|
|
204
|
+
warn "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
|
|
205
|
+
end
|
|
206
|
+
# Raises an appropriate error message for the given cache.
|
|
207
|
+
#
|
|
208
|
+
def raise_cache_missing what
|
|
209
|
+
raise "Error: The #{what} cache for #{identifier} is missing."
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Warns the user if the similarity index is small.
|
|
213
|
+
#
|
|
214
|
+
def warn_if_similarity_small
|
|
215
|
+
warn_cache_small :similarity if backend.similarity_cache_small?
|
|
216
|
+
end
|
|
217
|
+
# Alerts the user if the similarity index is not there.
|
|
218
|
+
#
|
|
219
|
+
def raise_unless_similarity_ok
|
|
220
|
+
raise_cache_missing :similarity unless backend.similarity_cache_ok?
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Warns the user if the core or weights indexes are small.
|
|
224
|
+
#
|
|
225
|
+
def warn_if_index_small
|
|
226
|
+
warn_cache_small :inverted if backend.inverted_cache_small?
|
|
227
|
+
warn_cache_small :weights if backend.weights_cache_small?
|
|
228
|
+
end
|
|
229
|
+
# Alerts the user if the core or weights indexes are not there.
|
|
230
|
+
#
|
|
231
|
+
def raise_unless_index_ok
|
|
232
|
+
raise_cache_missing :inverted unless backend.inverted_cache_ok?
|
|
233
|
+
raise_cache_missing :weights unless backend.weights_cache_ok?
|
|
234
|
+
end
|
|
213
235
|
|
|
214
|
-
# Warns the user if the core or weights indexes are small.
|
|
215
|
-
#
|
|
216
|
-
def warn_if_index_small
|
|
217
|
-
warn_cache_small :inverted if backend.inverted_cache_small?
|
|
218
|
-
warn_cache_small :weights if backend.weights_cache_small?
|
|
219
|
-
end
|
|
220
|
-
# Alerts the user if the core or weights indexes are not there.
|
|
221
|
-
#
|
|
222
|
-
def raise_unless_index_ok
|
|
223
|
-
raise_cache_missing :inverted unless backend.inverted_cache_ok?
|
|
224
|
-
raise_cache_missing :weights unless backend.weights_cache_ok?
|
|
225
236
|
end
|
|
226
237
|
|
|
227
238
|
end
|
|
@@ -1,18 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
module Indexing # :nodoc:all
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Indexing # :nodoc:all
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
# (mostly JSON) to load them into memory on startup.
|
|
9
|
-
#
|
|
10
|
-
class Memory < Base
|
|
7
|
+
module Bundle
|
|
11
8
|
|
|
12
|
-
#
|
|
13
|
-
#
|
|
9
|
+
# The memory version dumps its generated indexes to disk
|
|
10
|
+
# (mostly JSON) to load them into memory on startup.
|
|
14
11
|
#
|
|
15
|
-
|
|
12
|
+
class Memory < Base
|
|
13
|
+
|
|
14
|
+
def initialize name, category, *args
|
|
15
|
+
super name, category, *args
|
|
16
|
+
|
|
17
|
+
@backend = Backend::Files.new self
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
end
|
|
16
21
|
|
|
17
22
|
end
|
|
18
23
|
|
|
@@ -1,20 +1,22 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
module Indexing # :nodoc:all
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Indexing # :nodoc:all
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
# the Redis backend.
|
|
9
|
-
#
|
|
10
|
-
class Redis < Base
|
|
7
|
+
module Bundle
|
|
11
8
|
|
|
12
|
-
|
|
9
|
+
# The Redis version dumps its generated indexes to
|
|
10
|
+
# the Redis backend.
|
|
11
|
+
#
|
|
12
|
+
class Redis < Base
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
def initialize name, category, *args
|
|
15
|
+
super name, category, *args
|
|
16
|
+
|
|
17
|
+
@backend = Backend::Redis.new self
|
|
18
|
+
end
|
|
16
19
|
|
|
17
|
-
@backend = Backend::Redis.new name, category
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
end
|
|
@@ -1,25 +1,29 @@
|
|
|
1
|
-
module
|
|
2
|
-
module Wrappers
|
|
3
|
-
module Category
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
module Indexing
|
|
4
|
+
module Wrappers
|
|
5
|
+
module Category
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
new_source = Sources::Wrappers::Location.new category.source, grid, precision
|
|
7
|
+
module Location
|
|
9
8
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
def self.install_on category, grid, precision = 1
|
|
10
|
+
new_source = Sources::Wrappers::Location.new category.source, grid, precision
|
|
11
|
+
|
|
12
|
+
category.class_eval do
|
|
13
|
+
def tokenizer
|
|
14
|
+
@tokenizer ||= Tokenizers::Index.new
|
|
15
|
+
end
|
|
16
|
+
define_method :source do
|
|
17
|
+
new_source
|
|
18
|
+
end
|
|
16
19
|
end
|
|
20
|
+
|
|
17
21
|
end
|
|
18
22
|
|
|
19
23
|
end
|
|
20
24
|
|
|
21
25
|
end
|
|
22
|
-
|
|
23
26
|
end
|
|
24
27
|
end
|
|
28
|
+
|
|
25
29
|
end
|