picky 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,219 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Internals
4
+
5
+ module Indexing # :nodoc:all
6
+
7
+ module Bundle
8
+
9
+ # This is the indexing bundle.
10
+ # It does all menial tasks that have nothing to do
11
+ # with the actual index running etc.
12
+ #
13
+ class Base < SuperBase
14
+
15
+ attr_accessor :partial_strategy, :weights_strategy
16
+
17
+ # Path is in which directory the cache is located.
18
+ #
19
+ def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
20
+ super name, configuration, similarity_strategy
21
+
22
+ @partial_strategy = partial_strategy
23
+ @weights_strategy = weights_strategy
24
+ end
25
+
26
+ # Sets up a piece of the index for the given token.
27
+ #
28
+ def initialize_index_for token
29
+ index[token] ||= []
30
+ end
31
+
32
+ # Generation
33
+ #
34
+
35
+ # This method
36
+ # * loads the base index from the db
37
+ # * generates derived indexes
38
+ # * dumps all the indexes into files
39
+ #
40
+ def generate_caches_from_source
41
+ load_from_index_file
42
+ generate_caches_from_memory
43
+ end
44
+ # Generates derived indexes from the index and dumps.
45
+ #
46
+ # Note: assumes that there is something in the index
47
+ #
48
+ def generate_caches_from_memory
49
+ cache_from_memory_generation_message
50
+ generate_derived
51
+ end
52
+ def cache_from_memory_generation_message
53
+ timed_exclaim "CACHE FROM MEMORY #{identifier}."
54
+ end
55
+
56
+ # Generates the weights and similarity from the main index.
57
+ #
58
+ def generate_derived
59
+ generate_weights
60
+ generate_similarity
61
+ end
62
+
63
+ # Load the data from the db.
64
+ #
65
+ def load_from_index_file
66
+ load_from_index_generation_message
67
+ clear
68
+ retrieve
69
+ end
70
+ def load_from_index_generation_message
71
+ timed_exclaim "LOAD INDEX #{identifier}."
72
+ end
73
+ # Retrieves the prepared index data into the index.
74
+ #
75
+ # This is in preparation for generating
76
+ # derived indexes (like weights, similarity)
77
+ # and later dumping the optimized index.
78
+ #
79
+ def retrieve
80
+ key_format = self[:key_format] || :to_i
81
+ files.retrieve do |id, token|
82
+ initialize_index_for token
83
+ index[token] << id.send(key_format) # TODO Rewrite. Move this into the specific indexing.
84
+ end
85
+ end
86
+
87
+ # Generates a new index (writes its index) using the
88
+ # partial caching strategy of this bundle.
89
+ #
90
+ def generate_partial
91
+ generator = Generators::PartialGenerator.new self.index
92
+ self.index = generator.generate self.partial_strategy
93
+ end
94
+ # Generate a partial index from the given exact index.
95
+ #
96
+ def generate_partial_from exact_index
97
+ timed_exclaim "PARTIAL GENERATE #{identifier}."
98
+ self.index = exact_index
99
+ self.generate_partial
100
+ self
101
+ end
102
+ # Generates a new similarity index (writes its index) using the
103
+ # given similarity caching strategy.
104
+ #
105
+ def generate_similarity
106
+ generator = Generators::SimilarityGenerator.new self.index
107
+ self.similarity = generator.generate self.similarity_strategy
108
+ end
109
+ # Generates a new weights index (writes its index) using the
110
+ # given weight caching strategy.
111
+ #
112
+ def generate_weights
113
+ generator = Generators::WeightsGenerator.new self.index
114
+ self.weights = generator.generate self.weights_strategy
115
+ end
116
+
117
+ # Saves the indexes in a dump file.
118
+ #
119
+ def dump
120
+ dump_index
121
+ dump_similarity
122
+ dump_weights
123
+ dump_configuration
124
+ end
125
+ # Dumps the core index.
126
+ #
127
+ def dump_index
128
+ timed_exclaim "DUMP INDEX #{identifier}."
129
+ backend.dump_index index
130
+ end
131
+ # Dumps the weights index.
132
+ #
133
+ def dump_weights
134
+ timed_exclaim "DUMP WEIGHTS #{identifier}."
135
+ backend.dump_weights weights
136
+ end
137
+ # Dumps the similarity index.
138
+ #
139
+ def dump_similarity
140
+ timed_exclaim "DUMP SIMILARITY #{identifier}."
141
+ backend.dump_similarity similarity
142
+ end
143
+ # Dumps the similarity index.
144
+ #
145
+ def dump_configuration
146
+ timed_exclaim "DUMP CONFIGURATION #{identifier}."
147
+ backend.dump_configuration configuration
148
+ end
149
+
150
+ # Alerts the user if an index is missing.
151
+ #
152
+ def raise_unless_cache_exists
153
+ raise_unless_index_exists
154
+ raise_unless_similarity_exists
155
+ end
156
+ # Alerts the user if one of the necessary indexes
157
+ # (core, weights) is missing.
158
+ #
159
+ def raise_unless_index_exists
160
+ if partial_strategy.saved?
161
+ warn_if_index_small
162
+ raise_unless_index_ok
163
+ end
164
+ end
165
+ # Alerts the user if the similarity
166
+ # index is missing (given that it's used).
167
+ #
168
+ def raise_unless_similarity_exists
169
+ if similarity_strategy.saved?
170
+ warn_if_similarity_small
171
+ raise_unless_similarity_ok
172
+ end
173
+ end
174
+
175
+ # Outputs a warning for the given cache.
176
+ #
177
+ def warn_cache_small what
178
+ puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
179
+ end
180
+ # Raises an appropriate error message for the given cache.
181
+ #
182
+ def raise_cache_missing what
183
+ raise "#{what} cache for #{identifier} missing."
184
+ end
185
+
186
+ # Warns the user if the similarity index is small.
187
+ #
188
+ def warn_if_similarity_small
189
+ warn_cache_small :similarity if backend.similarity_cache_small?
190
+ end
191
+ # Alerts the user if the similarity index is not there.
192
+ #
193
+ def raise_unless_similarity_ok
194
+ raise_cache_missing :similarity unless backend.similarity_cache_ok?
195
+ end
196
+
197
+ # TODO Spec on down.
198
+ #
199
+
200
+ # Warns the user if the core or weights indexes are small.
201
+ #
202
+ def warn_if_index_small
203
+ warn_cache_small :index if backend.index_cache_small?
204
+ warn_cache_small :weights if backend.weights_cache_small?
205
+ end
206
+ # Alerts the user if the core or weights indexes are not there.
207
+ #
208
+ def raise_unless_index_ok
209
+ raise_cache_missing :index unless backend.index_cache_ok?
210
+ raise_cache_missing :weights unless backend.weights_cache_ok?
211
+ end
212
+
213
+ end
214
+
215
+ end
216
+
217
+ end
218
+
219
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Internals
4
+
5
+ module Indexing # :nodoc:all
6
+
7
+ module Bundle
8
+
9
+ # The memory version dumps its generated indexes to disk
10
+ # (mostly JSON) to load them into memory on startup.
11
+ #
12
+ class Memory < Base
13
+
14
+ # We're using files for the memory backend.
15
+ # E.g. dump writes files.
16
+ #
17
+ alias backend files
18
+
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Internals
4
+
5
+ module Indexing # :nodoc:all
6
+
7
+ module Bundle
8
+
9
+ # The memory version dumps its generated indexes to disk
10
+ # (mostly JSON) to load them into memory on startup.
11
+ #
12
+ class Redis < Base
13
+
14
+ attr_reader :backend
15
+
16
+ def initialize name, configuration, *args
17
+ super name, configuration, *args
18
+
19
+ @backend = Internals::Index::Redis.new name, configuration # TODO Needed?
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,65 @@
1
+ module Internals
2
+
3
+ # FIXME Merge into Base, extract common with Indexed::Base.
4
+ #
5
+ module Indexing # :nodoc:all
6
+ # A Bundle is a number of indexes
7
+ # per [index, category] combination.
8
+ #
9
+ # At most, there are three indexes:
10
+ # * *core* index (always used)
11
+ # * *weights* index (always used)
12
+ # * *similarity* index (used with similarity)
13
+ #
14
+ # In Picky, indexing is separated from the index
15
+ # handling itself through a parallel structure.
16
+ #
17
+ # Both use methods provided by this base class, but
18
+ # have very different goals:
19
+ #
20
+ # * *Indexing*::*Bundle* is just concerned with creating index files
21
+ # and providing helper functions to e.g. check the indexes.
22
+ #
23
+ # * *Index*::*Bundle* is concerned with loading these index files into
24
+ # memory and looking up search data as fast as possible.
25
+ #
26
+ module Bundle
27
+
28
+ class SuperBase
29
+
30
+ attr_reader :identifier, :files
31
+ attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
32
+
33
+ delegate :clear, :to => :index
34
+ delegate :[], :[]=, :to => :configuration
35
+
36
+ def initialize name, configuration, similarity_strategy
37
+ @identifier = "#{configuration.identifier}:#{name}"
38
+ @files = Internals::Index::Files.new name, configuration
39
+
40
+ @index = {}
41
+ @weights = {}
42
+ @similarity = {}
43
+ @configuration = {} # A hash with config options.
44
+
45
+ @similarity_strategy = similarity_strategy
46
+ end
47
+
48
+ # Get a list of similar texts.
49
+ #
50
+ # Note: Does not return itself.
51
+ #
52
+ def similar text
53
+ code = similarity_strategy.encoded text
54
+ similar_codes = code && @similarity[code]
55
+ similar_codes.delete text if similar_codes
56
+ similar_codes || []
57
+ end
58
+
59
+ end
60
+
61
+ end
62
+
63
+ end
64
+
65
+ end
@@ -0,0 +1,42 @@
1
+ module Internals
2
+
3
+ module Indexing
4
+
5
+ class Categories
6
+
7
+ attr_reader :categories
8
+
9
+ each_delegate :index,
10
+ :cache,
11
+ :generate_caches,
12
+ :backup_caches,
13
+ :restore_caches,
14
+ :check_caches,
15
+ :clear_caches,
16
+ :create_directory_structure,
17
+ :to => :categories
18
+
19
+ def initialize
20
+ @categories = []
21
+ end
22
+
23
+ def << category
24
+ categories << category
25
+ end
26
+
27
+ def find category_name
28
+ category_name = category_name.to_sym
29
+
30
+ categories.each do |category|
31
+ next unless category.name == category_name
32
+ return category
33
+ end
34
+
35
+ raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+
42
+ end
@@ -0,0 +1,120 @@
1
+ module Internals
2
+
3
+ module Indexing
4
+
5
+ class Category
6
+
7
+ attr_reader :exact, :partial, :name, :configuration, :indexer
8
+
9
+ # Mandatory params:
10
+ # * name: Category name to use as identifier and file names.
11
+ # * index: Index to which this category is attached to.
12
+ # Options:
13
+ # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
14
+ # * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
15
+ # * source: Use if the category should use a different source.
16
+ # * from: The source category identifier to take the data from.
17
+ #
18
+ # Advanced Options (TODO):
19
+ #
20
+ # * weights:
21
+ # * tokenizer:
22
+ #
23
+ def initialize name, index, options = {}
24
+ @name = name
25
+ @from = options[:from]
26
+
27
+ # Now we have enough info to combine the index and the category.
28
+ #
29
+ @configuration = Configuration::Index.new index, self
30
+
31
+ @tokenizer = options[:tokenizer] || Tokenizers::Index.default
32
+ @indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
33
+
34
+ # TODO Push into Bundle. At least the weights.
35
+ #
36
+ partial = options[:partial] || Generators::Partial::Default
37
+ weights = options[:weights] || Generators::Weights::Default
38
+ similarity = options[:similarity] || Generators::Similarity::Default
39
+
40
+ bundle_class = options[:indexing_bundle_class] || Bundle::Memory
41
+ @exact = bundle_class.new(:exact, configuration, similarity, Generators::Partial::None.new, weights)
42
+ @partial = bundle_class.new(:partial, configuration, Generators::Similarity::None.new, partial, weights)
43
+ end
44
+
45
+ delegate :identifier, :prepare_index_directory, :to => :configuration
46
+ delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
47
+
48
+ def from
49
+ @from || name
50
+ end
51
+
52
+ # TODO Spec.
53
+ #
54
+ def backup_caches
55
+ timed_exclaim "Backing up #{identifier}."
56
+ exact.backup
57
+ partial.backup
58
+ end
59
+ def restore_caches
60
+ timed_exclaim "Restoring #{identifier}."
61
+ exact.restore
62
+ partial.restore
63
+ end
64
+ def check_caches
65
+ timed_exclaim "Checking #{identifier}."
66
+ exact.raise_unless_cache_exists
67
+ partial.raise_unless_cache_exists
68
+ end
69
+ def clear_caches
70
+ timed_exclaim "Deleting #{identifier}."
71
+ exact.delete
72
+ partial.delete
73
+ end
74
+
75
+ def index
76
+ prepare_index_directory
77
+ indexer.index
78
+ end
79
+
80
+ # Generates all caches for this category.
81
+ #
82
+ def cache
83
+ prepare_index_directory
84
+ configure
85
+ generate_caches
86
+ end
87
+ # We need to set what formatting method should be used.
88
+ # Uses the one defined in the indexer.
89
+ #
90
+ def configure
91
+ key_format = indexer.key_format
92
+ exact[:key_format] = key_format
93
+ partial[:key_format] = key_format
94
+ end
95
+ def generate_caches
96
+ generate_caches_from_source
97
+ generate_partial
98
+ generate_caches_from_memory
99
+ dump_caches
100
+ timed_exclaim "CACHE FINISHED #{identifier}."
101
+ end
102
+ def generate_caches_from_source
103
+ exact.generate_caches_from_source
104
+ end
105
+ def generate_partial
106
+ partial.generate_partial_from exact.index
107
+ end
108
+ def generate_caches_from_memory
109
+ partial.generate_caches_from_memory
110
+ end
111
+ def dump_caches
112
+ exact.dump
113
+ partial.dump
114
+ end
115
+
116
+ end
117
+
118
+ end
119
+
120
+ end
@@ -0,0 +1,67 @@
1
+ # TODO Move to the API.
2
+ #
3
+ module Internals
4
+
5
+ module Indexing
6
+
7
+ class Index
8
+
9
+ attr_reader :name, :source, :categories, :after_indexing
10
+
11
+ # Delegators for indexing.
12
+ #
13
+ delegate :connect_backend,
14
+ :to => :source
15
+
16
+ delegate :index,
17
+ :cache,
18
+ :generate_caches,
19
+ :backup_caches,
20
+ :restore_caches,
21
+ :check_caches,
22
+ :clear_caches,
23
+ :create_directory_structure,
24
+ :to => :categories
25
+
26
+ def initialize name, source, options = {}
27
+ @name = name
28
+ @source = source
29
+
30
+ @after_indexing = options[:after_indexing]
31
+ @bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
32
+
33
+ @categories = Categories.new
34
+ end
35
+
36
+ # TODO Spec. Doc.
37
+ #
38
+ def define_category category_name, options = {}
39
+ options = default_category_options.merge options
40
+
41
+ new_category = Category.new category_name, self, options
42
+ categories << new_category
43
+ new_category
44
+ end
45
+
46
+ # By default, the category uses
47
+ # * the index's source.
48
+ # * the index's bundle type.
49
+ #
50
+ def default_category_options
51
+ {
52
+ :source => @source,
53
+ :indexing_bundle_class => @bundle_class
54
+ }
55
+ end
56
+
57
+ # Indexing.
58
+ #
59
+ def take_snapshot
60
+ source.take_snapshot self
61
+ end
62
+
63
+ end
64
+
65
+ end
66
+
67
+ end
@@ -0,0 +1,88 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # An allocation has a number of combinations:
6
+ # [token, index] [other_token, other_index], ...
7
+ #
8
+ class Allocation # :nodoc:all
9
+
10
+ attr_reader :count, :ids, :score, :combinations, :result_identifier
11
+
12
+ #
13
+ #
14
+ def initialize combinations, result_identifier
15
+ @combinations = combinations
16
+ @result_identifier = result_identifier
17
+ end
18
+
19
+ def hash
20
+ @combinations.hash
21
+ end
22
+ def eql? other_allocation
23
+ true # FIXME
24
+ # @combinations.eql? other_allocation.combinations
25
+ end
26
+
27
+ # Scores its combinations and caches the result.
28
+ #
29
+ def calculate_score weights
30
+ @score ||= @combinations.calculate_score(weights)
31
+ end
32
+
33
+ # Asks the combinations for the (intersected) ids.
34
+ #
35
+ def calculate_ids amount, offset
36
+ @combinations.ids amount, offset # Calculate as many ids as are necessary.
37
+ end
38
+
39
+ # This starts the searching process.
40
+ #
41
+ def process! amount, offset
42
+ ids = calculate_ids amount, offset
43
+ @count = ids.size # cache the count before throwing away the ids
44
+ @ids = ids.slice!(offset, amount) || [] # slice out the relevant part
45
+ end
46
+
47
+ #
48
+ #
49
+ def keep identifiers = [] # categories
50
+ @combinations.keep identifiers
51
+ end
52
+ #
53
+ #
54
+ def remove identifiers = [] # categories
55
+ @combinations.remove identifiers
56
+ end
57
+
58
+ # Sort highest score first.
59
+ #
60
+ def <=> other_allocation
61
+ other_allocation.score <=> self.score
62
+ end
63
+
64
+ # Transform the allocation into result form.
65
+ #
66
+ def to_result
67
+ [self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
68
+ end
69
+
70
+ # Json representation of this allocation.
71
+ #
72
+ # Note: Delegates to to_result.
73
+ #
74
+ def to_json
75
+ to_result.to_json
76
+ end
77
+
78
+ #
79
+ #
80
+ def to_s
81
+ "Allocation: #{to_result.join(', ')}"
82
+ end
83
+
84
+ end
85
+
86
+ end
87
+
88
+ end