picky 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/aux/picky/cli.rb +3 -1
- data/lib/picky/backends/backend.rb +16 -0
- data/lib/picky/backends/file/basic.rb +18 -9
- data/lib/picky/backends/file/json.rb +1 -0
- data/lib/picky/backends/file.rb +8 -4
- data/lib/picky/backends/helpers/file.rb +6 -0
- data/lib/picky/backends/memory/basic.rb +10 -2
- data/lib/picky/backends/memory/json.rb +1 -6
- data/lib/picky/backends/memory/marshal.rb +1 -6
- data/lib/picky/backends/memory/text.rb +1 -0
- data/lib/picky/backends/memory.rb +8 -4
- data/lib/picky/backends/redis/basic.rb +12 -9
- data/lib/picky/backends/redis.rb +10 -4
- data/lib/picky/bundle.rb +14 -0
- data/lib/picky/bundle_indexed.rb +110 -0
- data/lib/picky/bundle_indexing.rb +177 -0
- data/lib/picky/bundle_realtime.rb +80 -0
- data/lib/picky/categories.rb +5 -1
- data/lib/picky/category.rb +12 -20
- data/lib/picky/category_indexed.rb +3 -6
- data/lib/picky/category_indexing.rb +19 -18
- data/lib/picky/category_realtime.rb +5 -10
- data/lib/picky/extensions/symbol.rb +1 -1
- data/lib/picky/generators/partial/default.rb +1 -1
- data/lib/picky/generators/partial/postfix.rb +30 -0
- data/lib/picky/generators/partial/substring.rb +8 -2
- data/lib/picky/index.rb +3 -3
- data/lib/picky/index_indexing.rb +3 -2
- data/lib/picky/indexers/base.rb +0 -8
- data/lib/picky/indexers/parallel.rb +1 -1
- data/lib/picky/loader.rb +15 -15
- data/lib/picky/query/qualifier_category_mapper.rb +1 -1
- data/lib/picky/rack/harakiri.rb +3 -1
- data/lib/picky/sources/db.rb +11 -0
- data/lib/picky/statistics.rb +2 -2
- data/lib/picky/tokenizer.rb +1 -1
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/picky/wrappers/bundle/calculation.rb +45 -0
- data/lib/picky/wrappers/bundle/delegators.rb +69 -0
- data/lib/picky/wrappers/bundle/exact_partial.rb +38 -0
- data/lib/picky/{indexed/wrappers → wrappers}/bundle/location.rb +6 -4
- data/lib/picky/wrappers/bundle/wrapper.rb +29 -0
- data/lib/picky/wrappers/category/exact_first.rb +55 -0
- data/lib/picky/wrappers/category/location.rb +33 -0
- data/lib/picky/{sources/wrappers → wrappers/sources}/base.rb +7 -3
- data/lib/picky/{sources/wrappers → wrappers/sources}/location.rb +3 -3
- data/lib/picky.rb +10 -11
- data/spec/aux/picky/cli_spec.rb +5 -5
- data/spec/lib/backends/backend_spec.rb +39 -0
- data/spec/lib/backends/file/basic_spec.rb +59 -0
- data/spec/lib/backends/file_spec.rb +105 -0
- data/spec/lib/backends/memory/basic_spec.rb +43 -15
- data/spec/lib/backends/memory_spec.rb +108 -54
- data/spec/lib/backends/redis/basic_spec.rb +81 -57
- data/spec/lib/backends/redis_spec.rb +120 -66
- data/spec/lib/category_indexed_spec.rb +12 -12
- data/spec/lib/category_indexing_spec.rb +23 -23
- data/spec/lib/category_spec.rb +14 -14
- data/spec/lib/cores_spec.rb +2 -2
- data/spec/lib/extensions/object_spec.rb +7 -7
- data/spec/lib/generators/partial/postfix_spec.rb +131 -0
- data/spec/lib/generators/partial/substring_spec.rb +29 -4
- data/spec/lib/generators/weights_generator_spec.rb +3 -3
- data/spec/lib/index_indexing_spec.rb +11 -15
- data/spec/lib/index_spec.rb +8 -8
- data/spec/lib/indexed/bundle_realtime_spec.rb +18 -18
- data/spec/lib/indexed/bundle_spec.rb +21 -21
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +9 -9
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +8 -8
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +16 -16
- data/spec/lib/indexers/base_spec.rb +6 -25
- data/spec/lib/indexes_spec.rb +33 -22
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +2 -2
- data/spec/lib/indexing/bundle_spec.rb +27 -28
- data/spec/lib/sources/wrappers/base_spec.rb +7 -7
- data/spec/lib/sources/wrappers/location_spec.rb +8 -8
- metadata +48 -38
- data/lib/picky/indexed/bundle.rb +0 -125
- data/lib/picky/indexed/bundle_realtime.rb +0 -76
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +0 -47
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +0 -47
- data/lib/picky/indexed/wrappers/category/location.rb +0 -31
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/indexing/bundle.rb +0 -183
- data/lib/picky/indexing/wrappers/category/location.rb +0 -29
@@ -1,183 +0,0 @@
|
|
1
|
-
module Picky
|
2
|
-
|
3
|
-
module Indexing # :nodoc:all
|
4
|
-
|
5
|
-
# A Bundle is a number of indexes
|
6
|
-
# per [index, category] combination.
|
7
|
-
#
|
8
|
-
# At most, there are three indexes:
|
9
|
-
# * *core* index (always used)
|
10
|
-
# * *weights* index (always used)
|
11
|
-
# * *similarity* index (used with similarity)
|
12
|
-
#
|
13
|
-
# In Picky, indexing is separated from the index
|
14
|
-
# handling itself through a parallel structure.
|
15
|
-
#
|
16
|
-
# Both use methods provided by this base class, but
|
17
|
-
# have very different goals:
|
18
|
-
#
|
19
|
-
# * *Indexing*::*Bundle* is just concerned with creating index files
|
20
|
-
# and providing helper functions to e.g. check the indexes.
|
21
|
-
#
|
22
|
-
# * *Index*::*Bundle* is concerned with loading these index files into
|
23
|
-
# memory and looking up search data as fast as possible.
|
24
|
-
#
|
25
|
-
# This is the indexing bundle.
|
26
|
-
#
|
27
|
-
# It does all menial tasks that have nothing to do
|
28
|
-
# with the actual index running etc.
|
29
|
-
# (Find these in Indexed::Bundle)
|
30
|
-
#
|
31
|
-
class Bundle < Picky::Bundle
|
32
|
-
|
33
|
-
attr_reader :backend,
|
34
|
-
:prepared
|
35
|
-
|
36
|
-
# When indexing, clear only clears the inverted index.
|
37
|
-
#
|
38
|
-
delegate :clear, :to => :inverted
|
39
|
-
|
40
|
-
def initialize name, category, backend, weights_strategy, partial_strategy, similarity_strategy, options = {}
|
41
|
-
super name, category, backend, weights_strategy, partial_strategy, similarity_strategy, options
|
42
|
-
|
43
|
-
@key_format = options[:key_format]
|
44
|
-
@prepared = Backends::Memory::Text.new category.prepared_index_path
|
45
|
-
|
46
|
-
@inverted = @backend_inverted.empty
|
47
|
-
@weights = @backend_weights.empty
|
48
|
-
@similarity = @backend_similarity.empty
|
49
|
-
@configuration = @backend_configuration.empty
|
50
|
-
end
|
51
|
-
|
52
|
-
# Sets up a piece of the index for the given token.
|
53
|
-
#
|
54
|
-
def initialize_inverted_index_for token
|
55
|
-
self.inverted[token] ||= []
|
56
|
-
end
|
57
|
-
|
58
|
-
# Generation
|
59
|
-
#
|
60
|
-
|
61
|
-
# This method
|
62
|
-
# * Loads the base index from the "prepared..." file.
|
63
|
-
# * Generates derived indexes.
|
64
|
-
# * Dumps all the indexes into files.
|
65
|
-
#
|
66
|
-
def generate_caches_from_source
|
67
|
-
load_from_prepared_index_file
|
68
|
-
generate_caches_from_memory
|
69
|
-
end
|
70
|
-
# Generates derived indexes from the index and dumps.
|
71
|
-
#
|
72
|
-
# Note: assumes that there is something in the index
|
73
|
-
#
|
74
|
-
def generate_caches_from_memory
|
75
|
-
cache_from_memory_generation_message
|
76
|
-
generate_derived
|
77
|
-
end
|
78
|
-
def cache_from_memory_generation_message
|
79
|
-
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
80
|
-
end
|
81
|
-
|
82
|
-
# Generates the weights and similarity from the main index.
|
83
|
-
#
|
84
|
-
def generate_derived
|
85
|
-
generate_weights
|
86
|
-
generate_similarity
|
87
|
-
end
|
88
|
-
|
89
|
-
# Load the data from the db.
|
90
|
-
#
|
91
|
-
def load_from_prepared_index_file
|
92
|
-
load_from_prepared_index_generation_message
|
93
|
-
clear
|
94
|
-
retrieve
|
95
|
-
end
|
96
|
-
def load_from_prepared_index_generation_message
|
97
|
-
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
98
|
-
end
|
99
|
-
# Retrieves the prepared index data into the index.
|
100
|
-
#
|
101
|
-
# This is in preparation for generating
|
102
|
-
# derived indexes (like weights, similarity)
|
103
|
-
# and later dumping the optimized index.
|
104
|
-
#
|
105
|
-
# TODO Move this out to the category?
|
106
|
-
#
|
107
|
-
def retrieve
|
108
|
-
format = category.key_format || :to_i # Optimization.
|
109
|
-
prepared.retrieve do |id, token|
|
110
|
-
initialize_inverted_index_for token
|
111
|
-
self.inverted[token] << id.send(format)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# Generates a new index (writes its index) using the
|
116
|
-
# partial caching strategy of this bundle.
|
117
|
-
#
|
118
|
-
def generate_partial
|
119
|
-
generator = Generators::PartialGenerator.new self.inverted
|
120
|
-
self.inverted = generator.generate self.partial_strategy
|
121
|
-
end
|
122
|
-
# Generate a partial index from the given exact inverted index.
|
123
|
-
#
|
124
|
-
def generate_partial_from exact_inverted_index
|
125
|
-
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
126
|
-
self.inverted = exact_inverted_index
|
127
|
-
self.generate_partial
|
128
|
-
self
|
129
|
-
end
|
130
|
-
# Generates a new weights index (writes its index) using the
|
131
|
-
# given weight caching strategy.
|
132
|
-
#
|
133
|
-
def generate_weights
|
134
|
-
generator = Generators::WeightsGenerator.new self.inverted
|
135
|
-
self.weights = generator.generate self.weights_strategy
|
136
|
-
end
|
137
|
-
# Generates a new similarity index (writes its index) using the
|
138
|
-
# given similarity caching strategy.
|
139
|
-
#
|
140
|
-
def generate_similarity
|
141
|
-
generator = Generators::SimilarityGenerator.new self.inverted
|
142
|
-
self.similarity = generator.generate self.similarity_strategy
|
143
|
-
end
|
144
|
-
|
145
|
-
# Saves the indexes in a dump file.
|
146
|
-
#
|
147
|
-
def dump
|
148
|
-
timed_exclaim %Q{"#{identifier}": Dumping data.}
|
149
|
-
dump_inverted
|
150
|
-
dump_similarity
|
151
|
-
dump_weights
|
152
|
-
dump_configuration
|
153
|
-
end
|
154
|
-
# Dumps the core index.
|
155
|
-
#
|
156
|
-
def dump_inverted
|
157
|
-
# timed_exclaim %Q{"#{identifier}": Dumping inverted index.}
|
158
|
-
@backend_inverted.dump self.inverted
|
159
|
-
end
|
160
|
-
# Dumps the weights index.
|
161
|
-
#
|
162
|
-
def dump_weights
|
163
|
-
# timed_exclaim %Q{"#{identifier}": Dumping index weights.}
|
164
|
-
@backend_weights.dump self.weights
|
165
|
-
end
|
166
|
-
# Dumps the similarity index.
|
167
|
-
#
|
168
|
-
def dump_similarity
|
169
|
-
# timed_exclaim %Q{"#{identifier}": Dumping similarity index.}
|
170
|
-
@backend_similarity.dump self.similarity
|
171
|
-
end
|
172
|
-
# Dumps the similarity index.
|
173
|
-
#
|
174
|
-
def dump_configuration
|
175
|
-
# timed_exclaim %Q{"#{identifier}": Dumping configuration.}
|
176
|
-
@backend_configuration.dump self.configuration
|
177
|
-
end
|
178
|
-
|
179
|
-
end
|
180
|
-
|
181
|
-
end
|
182
|
-
|
183
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
module Picky
|
2
|
-
|
3
|
-
module Indexing
|
4
|
-
module Wrappers
|
5
|
-
module Category
|
6
|
-
|
7
|
-
module Location
|
8
|
-
|
9
|
-
def self.install_on category, grid, precision = 1
|
10
|
-
new_source = Sources::Wrappers::Location.new category.source, grid, precision
|
11
|
-
|
12
|
-
category.class_eval do
|
13
|
-
def tokenizer
|
14
|
-
@tokenizer ||= Tokenizer.new
|
15
|
-
end
|
16
|
-
define_method :source do
|
17
|
-
new_source
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|