picky 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,219 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Internals
4
+
5
+ module Indexing # :nodoc:all
6
+
7
+ module Bundle
8
+
9
+ # This is the indexing bundle.
10
+ # It does all menial tasks that have nothing to do
11
+ # with the actual index running etc.
12
+ #
13
+ class Base < SuperBase
14
+
15
+ attr_accessor :partial_strategy, :weights_strategy
16
+
17
+ # Path is in which directory the cache is located.
18
+ #
19
+ def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
20
+ super name, configuration, similarity_strategy
21
+
22
+ @partial_strategy = partial_strategy
23
+ @weights_strategy = weights_strategy
24
+ end
25
+
26
+ # Sets up a piece of the index for the given token.
27
+ #
28
+ def initialize_index_for token
29
+ index[token] ||= []
30
+ end
31
+
32
+ # Generation
33
+ #
34
+
35
+ # This method
36
+ # * loads the base index from the db
37
+ # * generates derived indexes
38
+ # * dumps all the indexes into files
39
+ #
40
+ def generate_caches_from_source
41
+ load_from_index_file
42
+ generate_caches_from_memory
43
+ end
44
+ # Generates derived indexes from the index and dumps.
45
+ #
46
+ # Note: assumes that there is something in the index
47
+ #
48
+ def generate_caches_from_memory
49
+ cache_from_memory_generation_message
50
+ generate_derived
51
+ end
52
+ def cache_from_memory_generation_message
53
+ timed_exclaim "CACHE FROM MEMORY #{identifier}."
54
+ end
55
+
56
+ # Generates the weights and similarity from the main index.
57
+ #
58
+ def generate_derived
59
+ generate_weights
60
+ generate_similarity
61
+ end
62
+
63
+ # Load the data from the db.
64
+ #
65
+ def load_from_index_file
66
+ load_from_index_generation_message
67
+ clear
68
+ retrieve
69
+ end
70
+ def load_from_index_generation_message
71
+ timed_exclaim "LOAD INDEX #{identifier}."
72
+ end
73
+ # Retrieves the prepared index data into the index.
74
+ #
75
+ # This is in preparation for generating
76
+ # derived indexes (like weights, similarity)
77
+ # and later dumping the optimized index.
78
+ #
79
+ def retrieve
80
+ key_format = self[:key_format] || :to_i
81
+ files.retrieve do |id, token|
82
+ initialize_index_for token
83
+ index[token] << id.send(key_format) # TODO Rewrite. Move this into the specific indexing.
84
+ end
85
+ end
86
+
87
+ # Generates a new index (writes its index) using the
88
+ # partial caching strategy of this bundle.
89
+ #
90
+ def generate_partial
91
+ generator = Generators::PartialGenerator.new self.index
92
+ self.index = generator.generate self.partial_strategy
93
+ end
94
+ # Generate a partial index from the given exact index.
95
+ #
96
+ def generate_partial_from exact_index
97
+ timed_exclaim "PARTIAL GENERATE #{identifier}."
98
+ self.index = exact_index
99
+ self.generate_partial
100
+ self
101
+ end
102
+ # Generates a new similarity index (writes its index) using the
103
+ # given similarity caching strategy.
104
+ #
105
+ def generate_similarity
106
+ generator = Generators::SimilarityGenerator.new self.index
107
+ self.similarity = generator.generate self.similarity_strategy
108
+ end
109
+ # Generates a new weights index (writes its index) using the
110
+ # given weight caching strategy.
111
+ #
112
+ def generate_weights
113
+ generator = Generators::WeightsGenerator.new self.index
114
+ self.weights = generator.generate self.weights_strategy
115
+ end
116
+
117
+ # Saves the indexes in a dump file.
118
+ #
119
+ def dump
120
+ dump_index
121
+ dump_similarity
122
+ dump_weights
123
+ dump_configuration
124
+ end
125
+ # Dumps the core index.
126
+ #
127
+ def dump_index
128
+ timed_exclaim "DUMP INDEX #{identifier}."
129
+ backend.dump_index index
130
+ end
131
+ # Dumps the weights index.
132
+ #
133
+ def dump_weights
134
+ timed_exclaim "DUMP WEIGHTS #{identifier}."
135
+ backend.dump_weights weights
136
+ end
137
+ # Dumps the similarity index.
138
+ #
139
+ def dump_similarity
140
+ timed_exclaim "DUMP SIMILARITY #{identifier}."
141
+ backend.dump_similarity similarity
142
+ end
143
+ # Dumps the similarity index.
144
+ #
145
+ def dump_configuration
146
+ timed_exclaim "DUMP CONFIGURATION #{identifier}."
147
+ backend.dump_configuration configuration
148
+ end
149
+
150
+ # Alerts the user if an index is missing.
151
+ #
152
+ def raise_unless_cache_exists
153
+ raise_unless_index_exists
154
+ raise_unless_similarity_exists
155
+ end
156
+ # Alerts the user if one of the necessary indexes
157
+ # (core, weights) is missing.
158
+ #
159
+ def raise_unless_index_exists
160
+ if partial_strategy.saved?
161
+ warn_if_index_small
162
+ raise_unless_index_ok
163
+ end
164
+ end
165
+ # Alerts the user if the similarity
166
+ # index is missing (given that it's used).
167
+ #
168
+ def raise_unless_similarity_exists
169
+ if similarity_strategy.saved?
170
+ warn_if_similarity_small
171
+ raise_unless_similarity_ok
172
+ end
173
+ end
174
+
175
+ # Outputs a warning for the given cache.
176
+ #
177
+ def warn_cache_small what
178
+ puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
179
+ end
180
+ # Raises an appropriate error message for the given cache.
181
+ #
182
+ def raise_cache_missing what
183
+ raise "#{what} cache for #{identifier} missing."
184
+ end
185
+
186
+ # Warns the user if the similarity index is small.
187
+ #
188
+ def warn_if_similarity_small
189
+ warn_cache_small :similarity if backend.similarity_cache_small?
190
+ end
191
+ # Alerts the user if the similarity index is not there.
192
+ #
193
+ def raise_unless_similarity_ok
194
+ raise_cache_missing :similarity unless backend.similarity_cache_ok?
195
+ end
196
+
197
+ # TODO Spec on down.
198
+ #
199
+
200
+ # Warns the user if the core or weights indexes are small.
201
+ #
202
+ def warn_if_index_small
203
+ warn_cache_small :index if backend.index_cache_small?
204
+ warn_cache_small :weights if backend.weights_cache_small?
205
+ end
206
+ # Alerts the user if the core or weights indexes are not there.
207
+ #
208
+ def raise_unless_index_ok
209
+ raise_cache_missing :index unless backend.index_cache_ok?
210
+ raise_cache_missing :weights unless backend.weights_cache_ok?
211
+ end
212
+
213
+ end
214
+
215
+ end
216
+
217
+ end
218
+
219
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Internals
4
+
5
+ module Indexing # :nodoc:all
6
+
7
+ module Bundle
8
+
9
+ # The memory version dumps its generated indexes to disk
10
+ # (mostly JSON) to load them into memory on startup.
11
+ #
12
+ class Memory < Base
13
+
14
+ # We're using files for the memory backend.
15
+ # E.g. dump writes files.
16
+ #
17
+ alias backend files
18
+
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Internals
4
+
5
+ module Indexing # :nodoc:all
6
+
7
+ module Bundle
8
+
9
+ # The memory version dumps its generated indexes to disk
10
+ # (mostly JSON) to load them into memory on startup.
11
+ #
12
+ class Redis < Base
13
+
14
+ attr_reader :backend
15
+
16
+ def initialize name, configuration, *args
17
+ super name, configuration, *args
18
+
19
+ @backend = Internals::Index::Redis.new name, configuration # TODO Needed?
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,65 @@
1
+ module Internals
2
+
3
+ # FIXME Merge into Base, extract common with Indexed::Base.
4
+ #
5
+ module Indexing # :nodoc:all
6
+ # A Bundle is a number of indexes
7
+ # per [index, category] combination.
8
+ #
9
+ # At most, there are three indexes:
10
+ # * *core* index (always used)
11
+ # * *weights* index (always used)
12
+ # * *similarity* index (used with similarity)
13
+ #
14
+ # In Picky, indexing is separated from the index
15
+ # handling itself through a parallel structure.
16
+ #
17
+ # Both use methods provided by this base class, but
18
+ # have very different goals:
19
+ #
20
+ # * *Indexing*::*Bundle* is just concerned with creating index files
21
+ # and providing helper functions to e.g. check the indexes.
22
+ #
23
+ # * *Index*::*Bundle* is concerned with loading these index files into
24
+ # memory and looking up search data as fast as possible.
25
+ #
26
+ module Bundle
27
+
28
+ class SuperBase
29
+
30
+ attr_reader :identifier, :files
31
+ attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
32
+
33
+ delegate :clear, :to => :index
34
+ delegate :[], :[]=, :to => :configuration
35
+
36
+ def initialize name, configuration, similarity_strategy
37
+ @identifier = "#{configuration.identifier}:#{name}"
38
+ @files = Internals::Index::Files.new name, configuration
39
+
40
+ @index = {}
41
+ @weights = {}
42
+ @similarity = {}
43
+ @configuration = {} # A hash with config options.
44
+
45
+ @similarity_strategy = similarity_strategy
46
+ end
47
+
48
+ # Get a list of similar texts.
49
+ #
50
+ # Note: Does not return itself.
51
+ #
52
+ def similar text
53
+ code = similarity_strategy.encoded text
54
+ similar_codes = code && @similarity[code]
55
+ similar_codes.delete text if similar_codes
56
+ similar_codes || []
57
+ end
58
+
59
+ end
60
+
61
+ end
62
+
63
+ end
64
+
65
+ end
@@ -0,0 +1,42 @@
1
+ module Internals
2
+
3
+ module Indexing
4
+
5
+ class Categories
6
+
7
+ attr_reader :categories
8
+
9
+ each_delegate :index,
10
+ :cache,
11
+ :generate_caches,
12
+ :backup_caches,
13
+ :restore_caches,
14
+ :check_caches,
15
+ :clear_caches,
16
+ :create_directory_structure,
17
+ :to => :categories
18
+
19
+ def initialize
20
+ @categories = []
21
+ end
22
+
23
+ def << category
24
+ categories << category
25
+ end
26
+
27
+ def find category_name
28
+ category_name = category_name.to_sym
29
+
30
+ categories.each do |category|
31
+ next unless category.name == category_name
32
+ return category
33
+ end
34
+
35
+ raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+
42
+ end
@@ -0,0 +1,120 @@
1
+ module Internals
2
+
3
+ module Indexing
4
+
5
+ class Category
6
+
7
+ attr_reader :exact, :partial, :name, :configuration, :indexer
8
+
9
+ # Mandatory params:
10
+ # * name: Category name to use as identifier and file names.
11
+ # * index: Index to which this category is attached to.
12
+ # Options:
13
+ # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
14
+ # * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
15
+ # * source: Use if the category should use a different source.
16
+ # * from: The source category identifier to take the data from.
17
+ #
18
+ # Advanced Options (TODO):
19
+ #
20
+ # * weights:
21
+ # * tokenizer:
22
+ #
23
+ def initialize name, index, options = {}
24
+ @name = name
25
+ @from = options[:from]
26
+
27
+ # Now we have enough info to combine the index and the category.
28
+ #
29
+ @configuration = Configuration::Index.new index, self
30
+
31
+ @tokenizer = options[:tokenizer] || Tokenizers::Index.default
32
+ @indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
33
+
34
+ # TODO Push into Bundle. At least the weights.
35
+ #
36
+ partial = options[:partial] || Generators::Partial::Default
37
+ weights = options[:weights] || Generators::Weights::Default
38
+ similarity = options[:similarity] || Generators::Similarity::Default
39
+
40
+ bundle_class = options[:indexing_bundle_class] || Bundle::Memory
41
+ @exact = bundle_class.new(:exact, configuration, similarity, Generators::Partial::None.new, weights)
42
+ @partial = bundle_class.new(:partial, configuration, Generators::Similarity::None.new, partial, weights)
43
+ end
44
+
45
+ delegate :identifier, :prepare_index_directory, :to => :configuration
46
+ delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
47
+
48
+ def from
49
+ @from || name
50
+ end
51
+
52
+ # TODO Spec.
53
+ #
54
+ def backup_caches
55
+ timed_exclaim "Backing up #{identifier}."
56
+ exact.backup
57
+ partial.backup
58
+ end
59
+ def restore_caches
60
+ timed_exclaim "Restoring #{identifier}."
61
+ exact.restore
62
+ partial.restore
63
+ end
64
+ def check_caches
65
+ timed_exclaim "Checking #{identifier}."
66
+ exact.raise_unless_cache_exists
67
+ partial.raise_unless_cache_exists
68
+ end
69
+ def clear_caches
70
+ timed_exclaim "Deleting #{identifier}."
71
+ exact.delete
72
+ partial.delete
73
+ end
74
+
75
+ def index
76
+ prepare_index_directory
77
+ indexer.index
78
+ end
79
+
80
+ # Generates all caches for this category.
81
+ #
82
+ def cache
83
+ prepare_index_directory
84
+ configure
85
+ generate_caches
86
+ end
87
+ # We need to set what formatting method should be used.
88
+ # Uses the one defined in the indexer.
89
+ #
90
+ def configure
91
+ key_format = indexer.key_format
92
+ exact[:key_format] = key_format
93
+ partial[:key_format] = key_format
94
+ end
95
+ def generate_caches
96
+ generate_caches_from_source
97
+ generate_partial
98
+ generate_caches_from_memory
99
+ dump_caches
100
+ timed_exclaim "CACHE FINISHED #{identifier}."
101
+ end
102
+ def generate_caches_from_source
103
+ exact.generate_caches_from_source
104
+ end
105
+ def generate_partial
106
+ partial.generate_partial_from exact.index
107
+ end
108
+ def generate_caches_from_memory
109
+ partial.generate_caches_from_memory
110
+ end
111
+ def dump_caches
112
+ exact.dump
113
+ partial.dump
114
+ end
115
+
116
+ end
117
+
118
+ end
119
+
120
+ end
@@ -0,0 +1,67 @@
1
+ # TODO Move to the API.
2
+ #
3
+ module Internals
4
+
5
+ module Indexing
6
+
7
+ class Index
8
+
9
+ attr_reader :name, :source, :categories, :after_indexing
10
+
11
+ # Delegators for indexing.
12
+ #
13
+ delegate :connect_backend,
14
+ :to => :source
15
+
16
+ delegate :index,
17
+ :cache,
18
+ :generate_caches,
19
+ :backup_caches,
20
+ :restore_caches,
21
+ :check_caches,
22
+ :clear_caches,
23
+ :create_directory_structure,
24
+ :to => :categories
25
+
26
+ def initialize name, source, options = {}
27
+ @name = name
28
+ @source = source
29
+
30
+ @after_indexing = options[:after_indexing]
31
+ @bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
32
+
33
+ @categories = Categories.new
34
+ end
35
+
36
+ # TODO Spec. Doc.
37
+ #
38
+ def define_category category_name, options = {}
39
+ options = default_category_options.merge options
40
+
41
+ new_category = Category.new category_name, self, options
42
+ categories << new_category
43
+ new_category
44
+ end
45
+
46
+ # By default, the category uses
47
+ # * the index's source.
48
+ # * the index's bundle type.
49
+ #
50
+ def default_category_options
51
+ {
52
+ :source => @source,
53
+ :indexing_bundle_class => @bundle_class
54
+ }
55
+ end
56
+
57
+ # Indexing.
58
+ #
59
+ def take_snapshot
60
+ source.take_snapshot self
61
+ end
62
+
63
+ end
64
+
65
+ end
66
+
67
+ end
@@ -0,0 +1,88 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # An allocation has a number of combinations:
6
+ # [token, index] [other_token, other_index], ...
7
+ #
8
+ class Allocation # :nodoc:all
9
+
10
+ attr_reader :count, :ids, :score, :combinations, :result_identifier
11
+
12
+ #
13
+ #
14
+ def initialize combinations, result_identifier
15
+ @combinations = combinations
16
+ @result_identifier = result_identifier
17
+ end
18
+
19
+ def hash
20
+ @combinations.hash
21
+ end
22
+ def eql? other_allocation
23
+ true # FIXME
24
+ # @combinations.eql? other_allocation.combinations
25
+ end
26
+
27
+ # Scores its combinations and caches the result.
28
+ #
29
+ def calculate_score weights
30
+ @score ||= @combinations.calculate_score(weights)
31
+ end
32
+
33
+ # Asks the combinations for the (intersected) ids.
34
+ #
35
+ def calculate_ids amount, offset
36
+ @combinations.ids amount, offset # Calculate as many ids as are necessary.
37
+ end
38
+
39
+ # This starts the searching process.
40
+ #
41
+ def process! amount, offset
42
+ ids = calculate_ids amount, offset
43
+ @count = ids.size # cache the count before throwing away the ids
44
+ @ids = ids.slice!(offset, amount) || [] # slice out the relevant part
45
+ end
46
+
47
+ #
48
+ #
49
+ def keep identifiers = [] # categories
50
+ @combinations.keep identifiers
51
+ end
52
+ #
53
+ #
54
+ def remove identifiers = [] # categories
55
+ @combinations.remove identifiers
56
+ end
57
+
58
+ # Sort highest score first.
59
+ #
60
+ def <=> other_allocation
61
+ other_allocation.score <=> self.score
62
+ end
63
+
64
+ # Transform the allocation into result form.
65
+ #
66
+ def to_result
67
+ [self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
68
+ end
69
+
70
+ # Json representation of this allocation.
71
+ #
72
+ # Note: Delegates to to_result.
73
+ #
74
+ def to_json
75
+ to_result.to_json
76
+ end
77
+
78
+ #
79
+ #
80
+ def to_s
81
+ "Allocation: #{to_result.join(', ')}"
82
+ end
83
+
84
+ end
85
+
86
+ end
87
+
88
+ end