picky 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,70 @@
1
+ module Query
2
+
3
+ # Combinations are a number of Combination-s.
4
+ #
5
+ # They are the core of an allocation.
6
+ # An allocation consists of a number of combinations.
7
+ #
8
+ module Combinations # :nodoc:all
9
+
10
+ # Base Combinations contain methods for calculating score and ids.
11
+ #
12
+ class Base
13
+
14
+ attr_reader :combinations
15
+
16
+ delegate :empty?, :to => :@combinations
17
+
18
+ def initialize combinations = []
19
+ @combinations = combinations
20
+ end
21
+
22
+ def hash
23
+ @combinations.hash
24
+ end
25
+
26
+ # Uses user specific weights to calculate a score for the combinations.
27
+ #
28
+ def calculate_score weights
29
+ total_score + weighted_score(weights)
30
+ end
31
+ def total_score
32
+ @combinations.sum &:weight
33
+ end
34
+ def weighted_score weights
35
+ weights.score @combinations
36
+ end
37
+
38
+ # Filters the tokens and identifiers such that only identifiers
39
+ # that are passed in, remain, including their tokens.
40
+ #
41
+ # Note: This method is not totally independent of the calculate_ids one.
42
+ # Since identifiers are only nullified, we need to not include the
43
+ # ids that have an associated identifier that is nil.
44
+ #
45
+ def keep identifiers = []
46
+ @combinations.reject! { |combination| !combination.in?(identifiers) }
47
+ end
48
+
49
+ # Filters the tokens and identifiers such that identifiers
50
+ # that are passed in, are removed, including their tokens.
51
+ #
52
+ # Note: This method is not totally independent of the calculate_ids one.
53
+ # Since identifiers are only nullified, we need to not include the
54
+ # ids that have an associated identifier that is nil.
55
+ #
56
+ def remove identifiers = []
57
+ @combinations.reject! { |combination| combination.in?(identifiers) }
58
+ end
59
+
60
+ #
61
+ #
62
+ def to_result
63
+ @combinations.map &:to_result
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+
70
+ end
@@ -0,0 +1,48 @@
1
+ module Query
2
+
3
+ # Combinations are a number of Combination-s.
4
+ #
5
+ # They are the core of an allocation.
6
+ # An allocation consists of a number of combinations.
7
+ #
8
+ module Combinations # :nodoc:all
9
+
10
+ # Memory Combinations contain specific methods for
11
+ # calculating score and ids in memory.
12
+ #
13
+ class Memory < Base
14
+
15
+ # Returns the result ids for the allocation.
16
+ #
17
+ # Sorts the ids by size and & through them in the following order (sizes):
18
+ # 0. [100_000, 400, 30, 2]
19
+ # 1. [2, 30, 400, 100_000]
20
+ # 2. (100_000 & (400 & (30 & 2))) # => result
21
+ #
22
+ # Note: Uses a C-optimized intersection routine (in performant.c)
23
+ # for speed and memory efficiency.
24
+ #
25
+ # Note: In the memory based version we ignore the (amount) needed hint.
26
+ # We cannot use the information to speed up the algorithm, unfortunately.
27
+ #
28
+ def ids _, _
29
+ return [] if @combinations.empty?
30
+
31
+ # Get the ids for each combination.
32
+ #
33
+ id_arrays = @combinations.inject([]) do |total, combination|
34
+ total << combination.ids
35
+ end
36
+
37
+ # Call the optimized C algorithm.
38
+ #
39
+ # Note: It orders the passed arrays by size.
40
+ #
41
+ Performant::Array.memory_efficient_intersect id_arrays
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,86 @@
1
+ module Query
2
+
3
+ # Combinations are a number of Combination-s.
4
+ #
5
+ # They are the core of an allocation.
6
+ # An allocation consists of a number of combinations.
7
+ #
8
+ module Combinations # :nodoc:all
9
+
10
+ # Redis Combinations contain specific methods for
11
+ # calculating score and ids in memory.
12
+ #
13
+ class Redis < Base
14
+
15
+ # Connect to the backend.
16
+ #
17
+ # TODO Use specific Picky Redis wrapper.
18
+ #
19
+ def self.redis
20
+ @redis ||= ::Redis.new :db => 15
21
+ end
22
+
23
+ attr_reader :redis
24
+
25
+ #
26
+ #
27
+ def initialize combinations
28
+ super combinations
29
+
30
+ @redis = self.class.redis
31
+ end
32
+
33
+ # Returns the result ids for the allocation.
34
+ #
35
+ def ids amount, offset
36
+ return [] if @combinations.empty?
37
+
38
+ identifiers = @combinations.inject([]) do |identifiers, combination|
39
+ identifiers << "#{combination.identifier}"
40
+ end
41
+
42
+ result_id = generate_intermediate_result_id
43
+
44
+ # Intersect and store.
45
+ #
46
+ redis.zinterstore result_id, identifiers
47
+
48
+ # Get the stored result.
49
+ #
50
+ results = redis.zrange result_id, offset, (offset + amount)
51
+
52
+ # Delete the stored result as it was only for temporary purposes.
53
+ #
54
+ # Note: I could also not delete it, but that would not be clean at all.
55
+ #
56
+ redis.del result_id
57
+
58
+ results
59
+ end
60
+
61
+ # Generate a multiple host/process safe result id.
62
+ #
63
+ # Note: Generated when this class loads.
64
+ #
65
+ require 'socket'
66
+ def self.extract_host
67
+ @host ||= Socket.gethostname
68
+ end
69
+ def host
70
+ self.class.extract_host
71
+ end
72
+ extract_host
73
+ def pid
74
+ @pid ||= Process.pid
75
+ end
76
+ # Use the host and pid (generated lazily in child processes) for the result.
77
+ #
78
+ def generate_intermediate_result_id
79
+ :"#{host}:#{pid}:picky:result"
80
+ end
81
+
82
+ end
83
+
84
+ end
85
+
86
+ end
@@ -0,0 +1,195 @@
1
+ module Query
2
+
3
+ # The query indexes class bundles indexes given to a query.
4
+ #
5
+ # Example:
6
+ # # If you call
7
+ # Search.new dvd_index, mp3_index, video_index
8
+ #
9
+ # # What it does is take the three given (API-) indexes and
10
+ # # bundle them in an index bundle.
11
+ #
12
+ class Indexes
13
+
14
+ attr_reader :indexes
15
+
16
+ # Creates a new Query::Indexes.
17
+ #
18
+ # Its job is to generate all possible combinations.
19
+ # Note: We cannot mix memory and redis indexes just yet.
20
+ #
21
+ def initialize *indexes, combinations_type
22
+ @indexes = indexes
23
+ @combinations_type = combinations_type
24
+ end
25
+
26
+ # Returns a number of prepared (sorted, reduced etc.) allocations for the given tokens.
27
+ #
28
+ def prepared_allocations_for tokens, weights = {}
29
+ allocations = allocations_for tokens
30
+
31
+ # Remove double allocations.
32
+ #
33
+ allocations.uniq
34
+
35
+ # Score the allocations using weights as bias.
36
+ #
37
+ allocations.calculate_score weights
38
+
39
+ # Sort the allocations.
40
+ # (allocations are sorted according to score, highest to lowest)
41
+ #
42
+ allocations.sort!
43
+
44
+ # Reduce the amount of allocations.
45
+ #
46
+ # allocations.reduce_to some_amount
47
+
48
+ # Remove identifiers from allocations.
49
+ #
50
+ # allocations.remove some_array_of_identifiers_to_remove
51
+
52
+ allocations
53
+ end
54
+ # Returns a number of possible allocations for the given tokens.
55
+ #
56
+ def allocations_for tokens
57
+ Allocations.new allocations_ary_for(tokens)
58
+ end
59
+ def allocations_ary_for tokens
60
+ indexes.inject([]) do |allocations, index|
61
+ allocations + allocation_for(tokens, index)
62
+ end
63
+ end
64
+ def allocation_for tokens, index
65
+ # Expand the combinations.
66
+ #
67
+ possible_combinations = tokens.possible_combinations_in index
68
+
69
+ # Generate all possible combinations.
70
+ #
71
+ expanded_combinations = expand_combinations_from possible_combinations
72
+
73
+ # Add the wrapped possible allocations to the ones we already have.
74
+ #
75
+ expanded_combinations.map! do |expanded_combination|
76
+ Allocation.new @combinations_type.new(expanded_combination), index.result_identifier # TODO Do not extract result_identifier.
77
+ end
78
+ end
79
+
80
+ # This is the core of the search engine. No kidding.
81
+ #
82
+ # Gets an array of
83
+ # [
84
+ # [<combinations for token1>],
85
+ # [<combinations for token2>],
86
+ # [<combinations for token3>]
87
+ # ]
88
+ #
89
+ # Generates all possible allocations of combinations.
90
+ # [
91
+ # [first combination of token1, first c of t2, first c of t3],
92
+ # [first combination of token1, first c of t2, second c of t3]
93
+ # ...
94
+ # ]
95
+ #
96
+ # Generates all possible combinations of array elements:
97
+ # [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
98
+ # Note: Also calculates the weights and sorts them accordingly.
99
+ #
100
+ # Note: This is a heavily optimized ruby version.
101
+ #
102
+ # Works like this:
103
+ # [1,2,3], [a,b,c], [k,l,m] are expanded to
104
+ # group mult: 1
105
+ # <- single mult ->
106
+ # [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
107
+ # group mult: 3
108
+ # <- -> s/m
109
+ # [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
110
+ # group mult: 9
111
+ # <> s/m
112
+ # [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
113
+ #
114
+ # It is then recombined, where
115
+ # [
116
+ # [a,a,b,b,c,c]
117
+ # [d,e,d,e,d,e]
118
+ # ]
119
+ # becomes
120
+ # [
121
+ # [a,d],
122
+ # [a,e],
123
+ # [b,d],
124
+ # [b,e],
125
+ # [c,d],
126
+ # [c,e]
127
+ # ]
128
+ #
129
+ # Note: Not using transpose as it is slower.
130
+ #
131
+ # Returns nil if there are no combinations.
132
+ #
133
+ # Note: Of course I could split this method up into smaller
134
+ # ones, but I guess I am a bit sentimental.
135
+ #
136
+ def expand_combinations_from possible_combinations
137
+ # If an element has size 0, this means one of the
138
+ # tokens could not be allocated.
139
+ #
140
+ return [] if possible_combinations.any?(&:empty?)
141
+
142
+ # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
143
+ #
144
+ single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
145
+
146
+ # Initialize a group multiplicator.
147
+ #
148
+ group_mult = 1
149
+
150
+ # The expanding part to line up the combinations
151
+ # for later combination in allocations.
152
+ #
153
+ possible_combinations.collect! do |combinations|
154
+
155
+ # Get the size of the combinations of the first token.
156
+ #
157
+ combinations_size = combinations.size
158
+
159
+ # Special case: If there is no combination for one of the tokens.
160
+ # In that case, we just use the same single mult for
161
+ # the next iteration.
162
+ # If there are combinations, we divide the single mult
163
+ # by the number of combinations.
164
+ #
165
+ single_mult /= combinations_size unless combinations_size.zero?
166
+
167
+ # Expand each combination by the single mult:
168
+ # [a,b,c]
169
+ # [a,a,a, b,b,b, c,c,c]
170
+ # Then, expand the result by the group mult:
171
+ # [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
172
+ #
173
+ combinations = combinations.inject([]) do |total, combination|
174
+ total + Array.new(single_mult, combination)
175
+ end * group_mult
176
+
177
+ # Multiply the group mult by the combinations size,
178
+ # since the next combinations' single mult is smaller
179
+ # and we need to adjust for that.
180
+ #
181
+ group_mult = group_mult * combinations_size
182
+
183
+ # Return the combinations.
184
+ #
185
+ combinations
186
+ end
187
+
188
+ return [] if possible_combinations.empty?
189
+
190
+ possible_combinations.shift.zip *possible_combinations
191
+ end
192
+
193
+ end
194
+
195
+ end
@@ -0,0 +1,76 @@
1
+ # coding: utf-8
2
+ #
3
+ module Query
4
+
5
+ # A single qualifier.
6
+ #
7
+ class Qualifier # :nodoc:all
8
+
9
+ attr_reader :normalized_qualifier, :codes
10
+
11
+ #
12
+ #
13
+ # codes is an array.
14
+ #
15
+ def initialize normalized_qualifier, codes
16
+ @normalized_qualifier = normalized_qualifier
17
+ @codes = codes.map &:to_sym
18
+ end
19
+
20
+ # Will overwrite if the key is present in the hash.
21
+ #
22
+ def inject_into hash
23
+ codes.each do |code|
24
+ hash[code] = normalized_qualifier
25
+ end
26
+ end
27
+
28
+ end
29
+
30
+ # Collection class for qualifiers.
31
+ #
32
+ class Qualifiers # :nodoc:all
33
+
34
+ attr_reader :qualifiers, :normalization_mapping
35
+
36
+ delegate :<<, :to => :qualifiers
37
+
38
+ #
39
+ #
40
+ def initialize
41
+ @qualifiers = []
42
+ @normalization_mapping = {}
43
+ end
44
+ def self.instance
45
+ @instanec ||= new
46
+ end
47
+
48
+ # TODO Spec.
49
+ #
50
+ def self.add name, qualifiers
51
+ instance << Qualifier.new(name, qualifiers)
52
+ end
53
+
54
+ # Uses the qualifiers to prepare (optimize) the qualifier handling.
55
+ #
56
+ def prepare
57
+ qualifiers.each do |qualifier|
58
+ qualifier.inject_into normalization_mapping
59
+ end
60
+ end
61
+
62
+ # Normalizes the given qualifier.
63
+ #
64
+ # Returns nil if it is not allowed, the normalized qualifier if it is.
65
+ #
66
+ # Note: Normalizes.
67
+ #
68
+ def normalize qualifier
69
+ return nil if qualifier.blank?
70
+
71
+ normalization_mapping[qualifier.to_sym]
72
+ end
73
+
74
+ end
75
+
76
+ end
@@ -0,0 +1,198 @@
1
+ module Query
2
+
3
+ # This is a query token. Together with other tokens it makes up a query.
4
+ #
5
+ # It remembers the original form, and and a normalized form.
6
+ #
7
+ # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
8
+ #
9
+ class Token # :nodoc:all
10
+
11
+ attr_reader :text, :original
12
+ attr_writer :similar
13
+
14
+ delegate :blank?, :to => :text
15
+
16
+ # Normal initializer.
17
+ #
18
+ # Note: Use this if you do not want a qualified and normalized token.
19
+ #
20
+ def initialize text
21
+ @text = text
22
+ end
23
+
24
+ # Returns a qualified and normalized token.
25
+ #
26
+ # Note: Use this in the search engine if you need a qualified
27
+ # and normalized token. I.e. one prepared for a search.
28
+ #
29
+ def self.processed text, downcase = true
30
+ new(text).process downcase
31
+ end
32
+ def process downcases = true
33
+ qualify
34
+ extract_original
35
+ downcase if downcases
36
+ partialize
37
+ similarize
38
+ remove_illegals
39
+ symbolize
40
+ self
41
+ end
42
+
43
+ # This returns an array of predefined category names if the user has given any.
44
+ #
45
+ def user_defined_category_names
46
+ @qualifiers
47
+ end
48
+
49
+ # Extracts a qualifier for this token and pre-assigns an allocation.
50
+ #
51
+ # Note: Removes the qualifier if it is not allowed.
52
+ #
53
+ def qualify
54
+ @qualifiers, @text = split @text
55
+ @qualifiers && @qualifiers.collect! { |qualifier| Query::Qualifiers.instance.normalize qualifier }.compact!
56
+ @qualifiers
57
+ end
58
+ def extract_original
59
+ @original = @text.dup
60
+ end
61
+
62
+ # Downcases the text.
63
+ #
64
+ def downcase
65
+ @text.downcase!
66
+ end
67
+
68
+ # Partial is a conditional setter.
69
+ #
70
+ # It is only settable if it hasn't been set yet.
71
+ #
72
+ def partial= partial
73
+ @partial = partial if @partial.nil?
74
+ end
75
+ def partial?
76
+ !@similar && @partial
77
+ end
78
+
79
+ # If the text ends with *, partialize it. If with ", don't.
80
+ #
81
+ # The latter wins. So "hello*" will not be partially searched.
82
+ #
83
+ @@no_partial = /\"\Z/
84
+ @@partial = /\*\Z/
85
+ def partialize
86
+ self.partial = false and return unless @text !~ @@no_partial
87
+ self.partial = true unless @text !~ @@partial
88
+ end
89
+
90
+ # If the text ends with ~ similarize it. If with ", don't.
91
+ #
92
+ # The latter wins.
93
+ #
94
+ @@no_similar = /\"\Z/
95
+ @@similar = /\~\Z/
96
+ def similarize
97
+ self.similar = false and return if @text =~ @@no_similar
98
+ self.similar = true if @text =~ @@similar
99
+ end
100
+
101
+ def similar?
102
+ @similar
103
+ end
104
+
105
+ # Normalizes this token's text.
106
+ #
107
+ @@illegals = /["*~]/
108
+ def remove_illegals
109
+ @text.gsub! @@illegals, '' unless @text.blank?
110
+ end
111
+
112
+ #
113
+ #
114
+ def symbolize
115
+ @text = @text.to_sym
116
+ end
117
+
118
+ # Returns an array of possible combinations.
119
+ #
120
+ def possible_combinations_in index
121
+ index.possible_combinations self
122
+ end
123
+
124
+ # Returns a token with the next similar text.
125
+ #
126
+ # TODO Rewrite this. It is hard to understand. Also spec performance.
127
+ #
128
+ def next_similar_token category
129
+ token = self.dup
130
+ token if token.next_similar category.bundle_for(token)
131
+ end
132
+ # Sets and returns the next similar word.
133
+ #
134
+ # Note: Also overrides the original.
135
+ #
136
+ def next_similar bundle
137
+ @text = @original = (similarity(bundle).shift || return) if similar?
138
+ end
139
+ # Lazy similar reader.
140
+ #
141
+ def similarity bundle = nil
142
+ @similarity || @similarity = generate_similarity_for(bundle)
143
+ end
144
+ # Returns an enumerator that traverses over the similar.
145
+ #
146
+ # Note: The dup isn't too nice – since it is needed on account of the shift, above.
147
+ # (We avoid a StopIteration exception. Which of both is less evil?)
148
+ #
149
+ def generate_similarity_for bundle
150
+ bundle.similar(@text).dup || []
151
+ end
152
+
153
+ #
154
+ #
155
+ def to_result
156
+ [@original, @text]
157
+ end
158
+
159
+ # Internal identifier.
160
+ #
161
+ def identifier
162
+ "#{similar?? :similarity : :index}:#{@text}"
163
+ end
164
+
165
+ # If the originals & the text are the same, they are the same.
166
+ #
167
+ def == other
168
+ self.original == other.original && self.text == other.text
169
+ end
170
+
171
+ # Displays the qualifier text and the text, joined.
172
+ #
173
+ # e.g. name:meier
174
+ #
175
+ @@split_qualifier_text = ':'
176
+ @@split_qualifiers = ','
177
+ def to_s
178
+ [@qualifiers && @qualifiers.join(@@split_qualifiers), @text].compact.join @@split_qualifier_text
179
+ end
180
+
181
+ private
182
+
183
+ # Splits text into a qualifier and text.
184
+ #
185
+ # Returns [qualifier, text].
186
+ #
187
+ def split unqualified_text
188
+ qualifiers, text = (unqualified_text || '').split(@@split_qualifier_text, 2)
189
+ if text.blank?
190
+ [nil, (qualifiers || '')]
191
+ else
192
+ [qualifiers.split(@@split_qualifiers), text]
193
+ end
194
+ end
195
+
196
+ end
197
+
198
+ end