picky 3.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. data/lib/picky/application.rb +12 -12
  2. data/lib/picky/backends/backend.rb +17 -0
  3. data/lib/picky/{backend → backends}/file/basic.rb +1 -1
  4. data/lib/picky/{backend → backends}/file/json.rb +1 -1
  5. data/lib/picky/{backend → backends}/file/marshal.rb +1 -1
  6. data/lib/picky/{backend → backends}/file/text.rb +1 -1
  7. data/lib/picky/backends/memory.rb +53 -0
  8. data/lib/picky/{backend → backends}/redis/basic.rb +9 -14
  9. data/lib/picky/backends/redis/float_hash.rb +26 -0
  10. data/lib/picky/{backend → backends}/redis/list_hash.rb +7 -11
  11. data/lib/picky/{backend → backends}/redis/string_hash.rb +7 -11
  12. data/lib/picky/backends/redis.rb +87 -0
  13. data/lib/picky/bundle.rb +107 -11
  14. data/lib/picky/category.rb +5 -5
  15. data/lib/picky/index.rb +329 -0
  16. data/lib/picky/index_indexed.rb +31 -0
  17. data/lib/picky/index_indexing.rb +161 -0
  18. data/lib/picky/indexed/bundle.rb +112 -0
  19. data/lib/picky/indexed/wrappers/exact_first.rb +1 -1
  20. data/lib/picky/indexers/parallel.rb +2 -1
  21. data/lib/picky/indexers/serial.rb +2 -1
  22. data/lib/picky/indexes_indexing.rb +1 -1
  23. data/lib/picky/indexing/bundle.rb +188 -0
  24. data/lib/picky/indexing/wrappers/category/location.rb +1 -1
  25. data/lib/picky/interfaces/live_parameters.rb +8 -8
  26. data/lib/picky/loader.rb +24 -38
  27. data/lib/picky/migrations/from_30_to_31.rb +61 -0
  28. data/lib/picky/query/allocation.rb +10 -5
  29. data/lib/picky/query/combinations.rb +70 -0
  30. data/lib/picky/query/indexes.rb +8 -7
  31. data/lib/picky/query/indexes_check.rb +47 -0
  32. data/lib/picky/query/token.rb +16 -29
  33. data/lib/picky/query/tokens.rb +4 -20
  34. data/lib/picky/search.rb +51 -58
  35. data/lib/picky/tokenizer.rb +231 -0
  36. data/lib/picky/tokenizers/location.rb +1 -1
  37. data/lib/tasks/try.rake +4 -12
  38. data/lib/tasks/try.rb +37 -0
  39. data/spec/lib/application_spec.rb +5 -5
  40. data/spec/lib/{backend → backends}/file/basic_spec.rb +2 -2
  41. data/spec/lib/{backend → backends}/file/json_spec.rb +2 -2
  42. data/spec/lib/{backend → backends}/file/marshal_spec.rb +2 -2
  43. data/spec/lib/{backend → backends}/file/text_spec.rb +1 -1
  44. data/spec/lib/backends/memory_spec.rb +77 -0
  45. data/spec/lib/{backend → backends}/redis/basic_spec.rb +19 -21
  46. data/spec/lib/backends/redis/float_hash_spec.rb +38 -0
  47. data/spec/lib/backends/redis/list_hash_spec.rb +27 -0
  48. data/spec/lib/backends/redis/string_hash_spec.rb +38 -0
  49. data/spec/lib/backends/redis_spec.rb +79 -0
  50. data/spec/lib/categories_indexed_spec.rb +3 -3
  51. data/spec/lib/category_indexed_spec.rb +6 -6
  52. data/spec/lib/category_indexing_spec.rb +1 -1
  53. data/spec/lib/category_spec.rb +1 -1
  54. data/spec/lib/frontend_adapters/rack_spec.rb +2 -2
  55. data/spec/lib/{indexes/index_indexed_spec.rb → index_indexed_spec.rb} +1 -1
  56. data/spec/lib/{indexes/index_indexing_spec.rb → index_indexing_spec.rb} +1 -1
  57. data/spec/lib/{indexes/index_spec.rb → index_spec.rb} +1 -1
  58. data/spec/lib/indexed/{bundle/memory_spec.rb → memory_spec.rb} +18 -18
  59. data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
  60. data/spec/lib/indexing/{bundle/memory_partial_generation_speed_spec.rb → bundle_partial_generation_speed_spec.rb} +3 -3
  61. data/spec/lib/indexing/bundle_spec.rb +302 -0
  62. data/spec/lib/query/allocation_spec.rb +21 -11
  63. data/spec/lib/query/combination_spec.rb +2 -2
  64. data/spec/lib/query/{combinations/base_spec.rb → combinations_spec.rb} +1 -1
  65. data/spec/lib/query/indexes_check_spec.rb +25 -0
  66. data/spec/lib/query/indexes_spec.rb +5 -1
  67. data/spec/lib/query/token_spec.rb +18 -20
  68. data/spec/lib/query/tokens_spec.rb +14 -65
  69. data/spec/lib/search_spec.rb +36 -37
  70. data/spec/lib/tasks/try_spec.rb +51 -0
  71. data/spec/lib/{tokenizers/base_spec.rb → tokenizer_spec.rb} +15 -44
  72. metadata +64 -81
  73. data/lib/picky/backend/base.rb +0 -121
  74. data/lib/picky/backend/files.rb +0 -28
  75. data/lib/picky/backend/redis.rb +0 -44
  76. data/lib/picky/indexed/bundle/base.rb +0 -47
  77. data/lib/picky/indexed/bundle/memory.rb +0 -88
  78. data/lib/picky/indexed/bundle/redis.rb +0 -91
  79. data/lib/picky/indexes/index.rb +0 -328
  80. data/lib/picky/indexes/index_indexed.rb +0 -35
  81. data/lib/picky/indexes/index_indexing.rb +0 -165
  82. data/lib/picky/indexes/memory.rb +0 -20
  83. data/lib/picky/indexes/redis.rb +0 -20
  84. data/lib/picky/indexing/bundle/base.rb +0 -242
  85. data/lib/picky/indexing/bundle/memory.rb +0 -26
  86. data/lib/picky/indexing/bundle/redis.rb +0 -26
  87. data/lib/picky/query/combinations/base.rb +0 -74
  88. data/lib/picky/query/combinations/memory.rb +0 -52
  89. data/lib/picky/query/combinations/redis.rb +0 -90
  90. data/lib/picky/query.rb +0 -6
  91. data/lib/picky/tokenizers/base.rb +0 -231
  92. data/lib/picky/tokenizers/index.rb +0 -34
  93. data/lib/picky/tokenizers/query.rb +0 -61
  94. data/spec/lib/backend/files_spec.rb +0 -189
  95. data/spec/lib/backend/redis/list_hash_spec.rb +0 -40
  96. data/spec/lib/backend/redis/string_hash_spec.rb +0 -47
  97. data/spec/lib/backend/redis_spec.rb +0 -170
  98. data/spec/lib/indexed/bundle/redis_spec.rb +0 -41
  99. data/spec/lib/indexes/redis_spec.rb +0 -15
  100. data/spec/lib/indexing/bundle/base_spec.rb +0 -38
  101. data/spec/lib/indexing/bundle/memory_spec.rb +0 -287
  102. data/spec/lib/indexing/bundle/redis_spec.rb +0 -283
  103. data/spec/lib/query/combinations/memory_spec.rb +0 -158
  104. data/spec/lib/query/combinations/redis_spec.rb +0 -172
  105. data/spec/lib/tokenizers/index_spec.rb +0 -69
  106. data/spec/lib/tokenizers/query_spec.rb +0 -121
@@ -1,242 +0,0 @@
1
- module Picky
2
-
3
- module Indexing # :nodoc:all
4
-
5
- # A Bundle is a number of indexes
6
- # per [index, category] combination.
7
- #
8
- # At most, there are three indexes:
9
- # * *core* index (always used)
10
- # * *weights* index (always used)
11
- # * *similarity* index (used with similarity)
12
- #
13
- # In Picky, indexing is separated from the index
14
- # handling itself through a parallel structure.
15
- #
16
- # Both use methods provided by this base class, but
17
- # have very different goals:
18
- #
19
- # * *Indexing*::*Bundle* is just concerned with creating index files
20
- # and providing helper functions to e.g. check the indexes.
21
- #
22
- # * *Index*::*Bundle* is concerned with loading these index files into
23
- # memory and looking up search data as fast as possible.
24
- #
25
- module Bundle
26
-
27
- # This is the indexing bundle.
28
- #
29
- # It does all menial tasks that have nothing to do
30
- # with the actual index running etc.
31
- #
32
- class Base < Picky::Bundle
33
-
34
- attr_reader :backend,
35
- :prepared
36
-
37
- attr_accessor :partial_strategy,
38
- :weights_strategy
39
-
40
- def initialize name, category, weights_strategy, partial_strategy, similarity_strategy, options = {}
41
- super name, category, similarity_strategy, options
42
-
43
- @weights_strategy = weights_strategy
44
- @partial_strategy = partial_strategy
45
- @key_format = options[:key_format]
46
- @prepared = Backend::File::Text.new category.prepared_index_path
47
- end
48
-
49
- # Sets up a piece of the index for the given token.
50
- #
51
- def initialize_inverted_index_for token
52
- self.inverted[token] ||= []
53
- end
54
-
55
- # Generation
56
- #
57
-
58
- # This method
59
- # * Loads the base index from the "prepared..." file.
60
- # * Generates derived indexes.
61
- # * Dumps all the indexes into files.
62
- #
63
- def generate_caches_from_source
64
- load_from_prepared_index_file
65
- generate_caches_from_memory
66
- end
67
- # Generates derived indexes from the index and dumps.
68
- #
69
- # Note: assumes that there is something in the index
70
- #
71
- def generate_caches_from_memory
72
- cache_from_memory_generation_message
73
- generate_derived
74
- end
75
- def cache_from_memory_generation_message
76
- timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
77
- end
78
-
79
- # Generates the weights and similarity from the main index.
80
- #
81
- def generate_derived
82
- generate_weights
83
- generate_similarity
84
- end
85
-
86
- # Load the data from the db.
87
- #
88
- def load_from_prepared_index_file
89
- load_from_prepared_index_generation_message
90
- clear
91
- retrieve
92
- end
93
- def load_from_prepared_index_generation_message
94
- timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
95
- end
96
- # Retrieves the prepared index data into the index.
97
- #
98
- # This is in preparation for generating
99
- # derived indexes (like weights, similarity)
100
- # and later dumping the optimized index.
101
- #
102
- # TODO Move this out to the category?
103
- #
104
- def retrieve
105
- format = category.key_format || :to_i # Optimization.
106
- prepared.retrieve do |id, token|
107
- initialize_inverted_index_for token
108
- self.inverted[token] << id.send(format)
109
- end
110
- end
111
-
112
- # Generates a new index (writes its index) using the
113
- # partial caching strategy of this bundle.
114
- #
115
- def generate_partial
116
- generator = Generators::PartialGenerator.new self.inverted
117
- self.inverted = generator.generate self.partial_strategy
118
- end
119
- # Generate a partial index from the given exact inverted index.
120
- #
121
- def generate_partial_from exact_inverted_index
122
- timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
123
- self.inverted = exact_inverted_index
124
- self.generate_partial
125
- self
126
- end
127
- # Generates a new weights index (writes its index) using the
128
- # given weight caching strategy.
129
- #
130
- def generate_weights
131
- generator = Generators::WeightsGenerator.new self.inverted
132
- self.weights = generator.generate self.weights_strategy
133
- end
134
- # Generates a new similarity index (writes its index) using the
135
- # given similarity caching strategy.
136
- #
137
- def generate_similarity
138
- generator = Generators::SimilarityGenerator.new self.inverted
139
- self.similarity = generator.generate self.similarity_strategy
140
- end
141
-
142
- # Saves the indexes in a dump file.
143
- #
144
- def dump
145
- timed_exclaim %Q{"#{identifier}": Dumping data.}
146
- dump_inverted
147
- dump_similarity
148
- dump_weights
149
- dump_configuration
150
- end
151
- # Dumps the core index.
152
- #
153
- def dump_inverted
154
- # timed_exclaim %Q{"#{identifier}": Dumping inverted index.}
155
- backend.dump_inverted self.inverted
156
- end
157
- # Dumps the weights index.
158
- #
159
- def dump_weights
160
- # timed_exclaim %Q{"#{identifier}": Dumping index weights.}
161
- backend.dump_weights self.weights
162
- end
163
- # Dumps the similarity index.
164
- #
165
- def dump_similarity
166
- # timed_exclaim %Q{"#{identifier}": Dumping similarity index.}
167
- backend.dump_similarity self.similarity
168
- end
169
- # Dumps the similarity index.
170
- #
171
- def dump_configuration
172
- # timed_exclaim %Q{"#{identifier}": Dumping configuration.}
173
- backend.dump_configuration self.configuration
174
- end
175
-
176
- # Alerts the user if an index is missing.
177
- #
178
- def raise_unless_cache_exists
179
- raise_unless_index_exists
180
- raise_unless_similarity_exists
181
- end
182
- # Alerts the user if one of the necessary indexes
183
- # (core, weights) is missing.
184
- #
185
- def raise_unless_index_exists
186
- if partial_strategy.saved?
187
- warn_if_index_small
188
- raise_unless_index_ok
189
- end
190
- end
191
- # Alerts the user if the similarity
192
- # index is missing (given that it's used).
193
- #
194
- def raise_unless_similarity_exists
195
- if similarity_strategy.saved?
196
- warn_if_similarity_small
197
- raise_unless_similarity_ok
198
- end
199
- end
200
-
201
- # Outputs a warning for the given cache.
202
- #
203
- def warn_cache_small what
204
- warn "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
205
- end
206
- # Raises an appropriate error message for the given cache.
207
- #
208
- def raise_cache_missing what
209
- raise "Error: The #{what} cache for #{identifier} is missing."
210
- end
211
-
212
- # Warns the user if the similarity index is small.
213
- #
214
- def warn_if_similarity_small
215
- warn_cache_small :similarity if backend.similarity_cache_small?
216
- end
217
- # Alerts the user if the similarity index is not there.
218
- #
219
- def raise_unless_similarity_ok
220
- raise_cache_missing :similarity unless backend.similarity_cache_ok?
221
- end
222
-
223
- # Warns the user if the core or weights indexes are small.
224
- #
225
- def warn_if_index_small
226
- warn_cache_small :inverted if backend.inverted_cache_small?
227
- warn_cache_small :weights if backend.weights_cache_small?
228
- end
229
- # Alerts the user if the core or weights indexes are not there.
230
- #
231
- def raise_unless_index_ok
232
- raise_cache_missing :inverted unless backend.inverted_cache_ok?
233
- raise_cache_missing :weights unless backend.weights_cache_ok?
234
- end
235
-
236
- end
237
-
238
- end
239
-
240
- end
241
-
242
- end
@@ -1,26 +0,0 @@
1
- module Picky
2
-
3
- # encoding: utf-8
4
- #
5
- module Indexing # :nodoc:all
6
-
7
- module Bundle
8
-
9
- # The memory version dumps its generated indexes to disk
10
- # (mostly JSON) to load them into memory on startup.
11
- #
12
- class Memory < Base
13
-
14
- def initialize name, category, *args
15
- super name, category, *args
16
-
17
- @backend = Backend::Files.new self
18
- end
19
-
20
- end
21
-
22
- end
23
-
24
- end
25
-
26
- end
@@ -1,26 +0,0 @@
1
- module Picky
2
-
3
- # encoding: utf-8
4
- #
5
- module Indexing # :nodoc:all
6
-
7
- module Bundle
8
-
9
- # The Redis version dumps its generated indexes to
10
- # the Redis backend.
11
- #
12
- class Redis < Base
13
-
14
- def initialize name, category, *args
15
- super name, category, *args
16
-
17
- @backend = Backend::Redis.new self
18
- end
19
-
20
- end
21
-
22
- end
23
-
24
- end
25
-
26
- end
@@ -1,74 +0,0 @@
1
- module Picky
2
-
3
- module Query
4
-
5
- # Combinations are a number of Combination-s.
6
- #
7
- # They are the core of an allocation.
8
- # An allocation consists of a number of combinations.
9
- #
10
- module Combinations # :nodoc:all
11
-
12
- # Base Combinations contain methods for calculating score and ids.
13
- #
14
- class Base
15
-
16
- attr_reader :combinations
17
-
18
- delegate :empty?, :to => :@combinations
19
-
20
- def initialize combinations = []
21
- @combinations = combinations
22
- end
23
-
24
- def hash
25
- @combinations.hash
26
- end
27
-
28
- # Uses user specific weights to calculate a score for the combinations.
29
- #
30
- def calculate_score weights
31
- total_score + weighted_score(weights)
32
- end
33
- def total_score
34
- @combinations.sum &:weight
35
- end
36
- def weighted_score weights
37
- weights.score_for @combinations
38
- end
39
-
40
- # Filters the tokens and identifiers such that only identifiers
41
- # that are passed in, remain, including their tokens.
42
- #
43
- # Note: This method is not totally independent of the calculate_ids one.
44
- # Since identifiers are only nullified, we need to not include the
45
- # ids that have an associated identifier that is nil.
46
- #
47
- def keep identifiers = []
48
- @combinations.reject! { |combination| !combination.in?(identifiers) }
49
- end
50
-
51
- # Filters the tokens and identifiers such that identifiers
52
- # that are passed in, are removed, including their tokens.
53
- #
54
- # Note: This method is not totally independent of the calculate_ids one.
55
- # Since identifiers are only nullified, we need to not include the
56
- # ids that have an associated identifier that is nil.
57
- #
58
- def remove identifiers = []
59
- @combinations.reject! { |combination| combination.in?(identifiers) }
60
- end
61
-
62
- #
63
- #
64
- def to_result
65
- @combinations.map &:to_result
66
- end
67
-
68
- end
69
-
70
- end
71
-
72
- end
73
-
74
- end
@@ -1,52 +0,0 @@
1
- module Picky
2
-
3
- module Query
4
-
5
- # Combinations are a number of Combination-s.
6
- #
7
- # They are the core of an allocation.
8
- # An allocation consists of a number of combinations.
9
- #
10
- module Combinations # :nodoc:all
11
-
12
- # Memory Combinations contain specific methods for
13
- # calculating score and ids in memory.
14
- #
15
- class Memory < Base
16
-
17
- # Returns the result ids for the allocation.
18
- #
19
- # Sorts the ids by size and & through them in the following order (sizes):
20
- # 0. [100_000, 400, 30, 2]
21
- # 1. [2, 30, 400, 100_000]
22
- # 2. (100_000 & (400 & (30 & 2))) # => result
23
- #
24
- # Note: Uses a C-optimized intersection routine (in performant.c)
25
- # for speed and memory efficiency.
26
- #
27
- # Note: In the memory based version we ignore the (amount) needed hint.
28
- # We cannot use the information to speed up the algorithm, unfortunately.
29
- #
30
- def ids _, _
31
- return [] if @combinations.empty?
32
-
33
- # Get the ids for each combination.
34
- #
35
- id_arrays = @combinations.inject([]) do |total, combination|
36
- total << combination.ids
37
- end
38
-
39
- # Call the optimized C algorithm.
40
- #
41
- # Note: It orders the passed arrays by size.
42
- #
43
- Performant::Array.memory_efficient_intersect id_arrays
44
- end
45
-
46
- end
47
-
48
- end
49
-
50
- end
51
-
52
- end
@@ -1,90 +0,0 @@
1
- module Picky
2
-
3
- module Query
4
-
5
- # Combinations are a number of Combination-s.
6
- #
7
- # They are the core of an allocation.
8
- # An allocation consists of a number of combinations.
9
- #
10
- module Combinations # :nodoc:all
11
-
12
- # Redis Combinations contain specific methods for
13
- # calculating score and ids in memory.
14
- #
15
- class Redis < Base
16
-
17
- # Connect to the backend.
18
- #
19
- # TODO Use specific Picky Redis wrapper.
20
- #
21
- def self.redis
22
- @redis ||= ::Redis.new :db => 15
23
- end
24
-
25
- attr_reader :redis
26
-
27
- #
28
- #
29
- def initialize combinations
30
- super combinations
31
-
32
- @redis = self.class.redis
33
- end
34
-
35
- # Returns the result ids for the allocation.
36
- #
37
- def ids amount, offset
38
- return [] if @combinations.empty?
39
-
40
- identifiers = @combinations.inject([]) do |identifiers, combination|
41
- identifiers << "#{combination.identifier}"
42
- end
43
-
44
- result_id = generate_intermediate_result_id
45
-
46
- # Intersect and store.
47
- #
48
- redis.zinterstore result_id, identifiers
49
-
50
- # Get the stored result.
51
- #
52
- results = redis.zrange result_id, offset, (offset + amount)
53
-
54
- # Delete the stored result as it was only for temporary purposes.
55
- #
56
- # Note: I could also not delete it, but that would not be clean at all.
57
- #
58
- redis.del result_id
59
-
60
- results
61
- end
62
-
63
- # Generate a multiple host/process safe result id.
64
- #
65
- # Note: Generated when this class loads.
66
- #
67
- require 'socket'
68
- def self.extract_host
69
- @host ||= Socket.gethostname
70
- end
71
- def host
72
- self.class.extract_host
73
- end
74
- extract_host
75
- def pid
76
- @pid ||= Process.pid
77
- end
78
- # Use the host and pid (generated lazily in child processes) for the result.
79
- #
80
- def generate_intermediate_result_id
81
- :"#{host}:#{pid}:picky:result"
82
- end
83
-
84
- end
85
-
86
- end
87
-
88
- end
89
-
90
- end
data/lib/picky/query.rb DELETED
@@ -1,6 +0,0 @@
1
- module Picky
2
-
3
- module Query # :nodoc:all
4
- end
5
-
6
- end