picky 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,70 @@
1
+ module Query
2
+
3
+ # Combinations are a number of Combination-s.
4
+ #
5
+ # They are the core of an allocation.
6
+ # An allocation consists of a number of combinations.
7
+ #
8
+ module Combinations # :nodoc:all
9
+
10
+ # Base Combinations contain methods for calculating score and ids.
11
+ #
12
+ class Base
13
+
14
+ attr_reader :combinations
15
+
16
+ delegate :empty?, :to => :@combinations
17
+
18
+ def initialize combinations = []
19
+ @combinations = combinations
20
+ end
21
+
22
+ def hash
23
+ @combinations.hash
24
+ end
25
+
26
+ # Uses user specific weights to calculate a score for the combinations.
27
+ #
28
+ def calculate_score weights
29
+ total_score + weighted_score(weights)
30
+ end
31
+ def total_score
32
+ @combinations.sum &:weight
33
+ end
34
+ def weighted_score weights
35
+ weights.score @combinations
36
+ end
37
+
38
+ # Filters the tokens and identifiers such that only identifiers
39
+ # that are passed in, remain, including their tokens.
40
+ #
41
+ # Note: This method is not totally independent of the calculate_ids one.
42
+ # Since identifiers are only nullified, we need to not include the
43
+ # ids that have an associated identifier that is nil.
44
+ #
45
+ def keep identifiers = []
46
+ @combinations.reject! { |combination| !combination.in?(identifiers) }
47
+ end
48
+
49
+ # Filters the tokens and identifiers such that identifiers
50
+ # that are passed in, are removed, including their tokens.
51
+ #
52
+ # Note: This method is not totally independent of the calculate_ids one.
53
+ # Since identifiers are only nullified, we need to not include the
54
+ # ids that have an associated identifier that is nil.
55
+ #
56
+ def remove identifiers = []
57
+ @combinations.reject! { |combination| combination.in?(identifiers) }
58
+ end
59
+
60
+ #
61
+ #
62
+ def to_result
63
+ @combinations.map &:to_result
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+
70
+ end
@@ -0,0 +1,48 @@
1
+ module Query
2
+
3
+ # Combinations are a number of Combination-s.
4
+ #
5
+ # They are the core of an allocation.
6
+ # An allocation consists of a number of combinations.
7
+ #
8
+ module Combinations # :nodoc:all
9
+
10
+ # Memory Combinations contain specific methods for
11
+ # calculating score and ids in memory.
12
+ #
13
+ class Memory < Base
14
+
15
+ # Returns the result ids for the allocation.
16
+ #
17
+ # Sorts the ids by size and & through them in the following order (sizes):
18
+ # 0. [100_000, 400, 30, 2]
19
+ # 1. [2, 30, 400, 100_000]
20
+ # 2. (100_000 & (400 & (30 & 2))) # => result
21
+ #
22
+ # Note: Uses a C-optimized intersection routine (in performant.c)
23
+ # for speed and memory efficiency.
24
+ #
25
+ # Note: In the memory based version we ignore the (amount) needed hint.
26
+ # We cannot use the information to speed up the algorithm, unfortunately.
27
+ #
28
+ def ids _, _
29
+ return [] if @combinations.empty?
30
+
31
+ # Get the ids for each combination.
32
+ #
33
+ id_arrays = @combinations.inject([]) do |total, combination|
34
+ total << combination.ids
35
+ end
36
+
37
+ # Call the optimized C algorithm.
38
+ #
39
+ # Note: It orders the passed arrays by size.
40
+ #
41
+ Performant::Array.memory_efficient_intersect id_arrays
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,86 @@
1
+ module Query
2
+
3
+ # Combinations are a number of Combination-s.
4
+ #
5
+ # They are the core of an allocation.
6
+ # An allocation consists of a number of combinations.
7
+ #
8
+ module Combinations # :nodoc:all
9
+
10
+ # Redis Combinations contain specific methods for
11
+ # calculating score and ids in memory.
12
+ #
13
+ class Redis < Base
14
+
15
+ # Connect to the backend.
16
+ #
17
+ # TODO Use specific Picky Redis wrapper.
18
+ #
19
+ def self.redis
20
+ @redis ||= ::Redis.new :db => 15
21
+ end
22
+
23
+ attr_reader :redis
24
+
25
+ #
26
+ #
27
+ def initialize combinations
28
+ super combinations
29
+
30
+ @redis = self.class.redis
31
+ end
32
+
33
+ # Returns the result ids for the allocation.
34
+ #
35
+ def ids amount, offset
36
+ return [] if @combinations.empty?
37
+
38
+ identifiers = @combinations.inject([]) do |identifiers, combination|
39
+ identifiers << "#{combination.identifier}"
40
+ end
41
+
42
+ result_id = generate_intermediate_result_id
43
+
44
+ # Intersect and store.
45
+ #
46
+ redis.zinterstore result_id, identifiers
47
+
48
+ # Get the stored result.
49
+ #
50
+ results = redis.zrange result_id, offset, (offset + amount)
51
+
52
+ # Delete the stored result as it was only for temporary purposes.
53
+ #
54
+ # Note: I could also not delete it, but that would not be clean at all.
55
+ #
56
+ redis.del result_id
57
+
58
+ results
59
+ end
60
+
61
+ # Generate a multiple host/process safe result id.
62
+ #
63
+ # Note: Generated when this class loads.
64
+ #
65
+ require 'socket'
66
+ def self.extract_host
67
+ @host ||= Socket.gethostname
68
+ end
69
+ def host
70
+ self.class.extract_host
71
+ end
72
+ extract_host
73
+ def pid
74
+ @pid ||= Process.pid
75
+ end
76
+ # Use the host and pid (generated lazily in child processes) for the result.
77
+ #
78
+ def generate_intermediate_result_id
79
+ :"#{host}:#{pid}:picky:result"
80
+ end
81
+
82
+ end
83
+
84
+ end
85
+
86
+ end
@@ -0,0 +1,195 @@
1
+ module Query
2
+
3
+ # The query indexes class bundles indexes given to a query.
4
+ #
5
+ # Example:
6
+ # # If you call
7
+ # Search.new dvd_index, mp3_index, video_index
8
+ #
9
+ # # What it does is take the three given (API-) indexes and
10
+ # # bundle them in an index bundle.
11
+ #
12
+ class Indexes
13
+
14
+ attr_reader :indexes
15
+
16
+ # Creates a new Query::Indexes.
17
+ #
18
+ # Its job is to generate all possible combinations.
19
+ # Note: We cannot mix memory and redis indexes just yet.
20
+ #
21
+ def initialize *indexes, combinations_type
22
+ @indexes = indexes
23
+ @combinations_type = combinations_type
24
+ end
25
+
26
+ # Returns a number of prepared (sorted, reduced etc.) allocations for the given tokens.
27
+ #
28
+ def prepared_allocations_for tokens, weights = {}
29
+ allocations = allocations_for tokens
30
+
31
+ # Remove double allocations.
32
+ #
33
+ allocations.uniq
34
+
35
+ # Score the allocations using weights as bias.
36
+ #
37
+ allocations.calculate_score weights
38
+
39
+ # Sort the allocations.
40
+ # (allocations are sorted according to score, highest to lowest)
41
+ #
42
+ allocations.sort!
43
+
44
+ # Reduce the amount of allocations.
45
+ #
46
+ # allocations.reduce_to some_amount
47
+
48
+ # Remove identifiers from allocations.
49
+ #
50
+ # allocations.remove some_array_of_identifiers_to_remove
51
+
52
+ allocations
53
+ end
54
+ # Returns a number of possible allocations for the given tokens.
55
+ #
56
+ def allocations_for tokens
57
+ Allocations.new allocations_ary_for(tokens)
58
+ end
59
+ def allocations_ary_for tokens
60
+ indexes.inject([]) do |allocations, index|
61
+ allocations + allocation_for(tokens, index)
62
+ end
63
+ end
64
+ def allocation_for tokens, index
65
+ # Expand the combinations.
66
+ #
67
+ possible_combinations = tokens.possible_combinations_in index
68
+
69
+ # Generate all possible combinations.
70
+ #
71
+ expanded_combinations = expand_combinations_from possible_combinations
72
+
73
+ # Add the wrapped possible allocations to the ones we already have.
74
+ #
75
+ expanded_combinations.map! do |expanded_combination|
76
+ Allocation.new @combinations_type.new(expanded_combination), index.result_identifier # TODO Do not extract result_identifier.
77
+ end
78
+ end
79
+
80
+ # This is the core of the search engine. No kidding.
81
+ #
82
+ # Gets an array of
83
+ # [
84
+ # [<combinations for token1>],
85
+ # [<combinations for token2>],
86
+ # [<combinations for token3>]
87
+ # ]
88
+ #
89
+ # Generates all possible allocations of combinations.
90
+ # [
91
+ # [first combination of token1, first c of t2, first c of t3],
92
+ # [first combination of token1, first c of t2, second c of t3]
93
+ # ...
94
+ # ]
95
+ #
96
+ # Generates all possible combinations of array elements:
97
+ # [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
98
+ # Note: Also calculates the weights and sorts them accordingly.
99
+ #
100
+ # Note: This is a heavily optimized ruby version.
101
+ #
102
+ # Works like this:
103
+ # [1,2,3], [a,b,c], [k,l,m] are expanded to
104
+ # group mult: 1
105
+ # <- single mult ->
106
+ # [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
107
+ # group mult: 3
108
+ # <- -> s/m
109
+ # [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
110
+ # group mult: 9
111
+ # <> s/m
112
+ # [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
113
+ #
114
+ # It is then recombined, where
115
+ # [
116
+ # [a,a,b,b,c,c]
117
+ # [d,e,d,e,d,e]
118
+ # ]
119
+ # becomes
120
+ # [
121
+ # [a,d],
122
+ # [a,e],
123
+ # [b,d],
124
+ # [b,e],
125
+ # [c,d],
126
+ # [c,e]
127
+ # ]
128
+ #
129
+ # Note: Not using transpose as it is slower.
130
+ #
131
+ # Returns nil if there are no combinations.
132
+ #
133
+ # Note: Of course I could split this method up into smaller
134
+ # ones, but I guess I am a bit sentimental.
135
+ #
136
+ def expand_combinations_from possible_combinations
137
+ # If an element has size 0, this means one of the
138
+ # tokens could not be allocated.
139
+ #
140
+ return [] if possible_combinations.any?(&:empty?)
141
+
142
+ # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
143
+ #
144
+ single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
145
+
146
+ # Initialize a group multiplicator.
147
+ #
148
+ group_mult = 1
149
+
150
+ # The expanding part to line up the combinations
151
+ # for later combination in allocations.
152
+ #
153
+ possible_combinations.collect! do |combinations|
154
+
155
+ # Get the size of the combinations of the first token.
156
+ #
157
+ combinations_size = combinations.size
158
+
159
+ # Special case: If there is no combination for one of the tokens.
160
+ # In that case, we just use the same single mult for
161
+ # the next iteration.
162
+ # If there are combinations, we divide the single mult
163
+ # by the number of combinations.
164
+ #
165
+ single_mult /= combinations_size unless combinations_size.zero?
166
+
167
+ # Expand each combination by the single mult:
168
+ # [a,b,c]
169
+ # [a,a,a, b,b,b, c,c,c]
170
+ # Then, expand the result by the group mult:
171
+ # [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
172
+ #
173
+ combinations = combinations.inject([]) do |total, combination|
174
+ total + Array.new(single_mult, combination)
175
+ end * group_mult
176
+
177
+ # Multiply the group mult by the combinations size,
178
+ # since the next combinations' single mult is smaller
179
+ # and we need to adjust for that.
180
+ #
181
+ group_mult = group_mult * combinations_size
182
+
183
+ # Return the combinations.
184
+ #
185
+ combinations
186
+ end
187
+
188
+ return [] if possible_combinations.empty?
189
+
190
+ possible_combinations.shift.zip *possible_combinations
191
+ end
192
+
193
+ end
194
+
195
+ end
@@ -0,0 +1,76 @@
1
+ # coding: utf-8
2
+ #
3
+ module Query
4
+
5
+ # A single qualifier.
6
+ #
7
+ class Qualifier # :nodoc:all
8
+
9
+ attr_reader :normalized_qualifier, :codes
10
+
11
+ #
12
+ #
13
+ # codes is an array.
14
+ #
15
+ def initialize normalized_qualifier, codes
16
+ @normalized_qualifier = normalized_qualifier
17
+ @codes = codes.map &:to_sym
18
+ end
19
+
20
+ # Will overwrite if the key is present in the hash.
21
+ #
22
+ def inject_into hash
23
+ codes.each do |code|
24
+ hash[code] = normalized_qualifier
25
+ end
26
+ end
27
+
28
+ end
29
+
30
+ # Collection class for qualifiers.
31
+ #
32
+ class Qualifiers # :nodoc:all
33
+
34
+ attr_reader :qualifiers, :normalization_mapping
35
+
36
+ delegate :<<, :to => :qualifiers
37
+
38
+ #
39
+ #
40
+ def initialize
41
+ @qualifiers = []
42
+ @normalization_mapping = {}
43
+ end
44
+ def self.instance
45
+ @instanec ||= new
46
+ end
47
+
48
+ # TODO Spec.
49
+ #
50
+ def self.add name, qualifiers
51
+ instance << Qualifier.new(name, qualifiers)
52
+ end
53
+
54
+ # Uses the qualifiers to prepare (optimize) the qualifier handling.
55
+ #
56
+ def prepare
57
+ qualifiers.each do |qualifier|
58
+ qualifier.inject_into normalization_mapping
59
+ end
60
+ end
61
+
62
+ # Normalizes the given qualifier.
63
+ #
64
+ # Returns nil if it is not allowed, the normalized qualifier if it is.
65
+ #
66
+ # Note: Normalizes.
67
+ #
68
+ def normalize qualifier
69
+ return nil if qualifier.blank?
70
+
71
+ normalization_mapping[qualifier.to_sym]
72
+ end
73
+
74
+ end
75
+
76
+ end
@@ -0,0 +1,198 @@
1
+ module Query
2
+
3
+ # This is a query token. Together with other tokens it makes up a query.
4
+ #
5
+ # It remembers the original form, and and a normalized form.
6
+ #
7
+ # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
8
+ #
9
+ class Token # :nodoc:all
10
+
11
+ attr_reader :text, :original
12
+ attr_writer :similar
13
+
14
+ delegate :blank?, :to => :text
15
+
16
+ # Normal initializer.
17
+ #
18
+ # Note: Use this if you do not want a qualified and normalized token.
19
+ #
20
+ def initialize text
21
+ @text = text
22
+ end
23
+
24
+ # Returns a qualified and normalized token.
25
+ #
26
+ # Note: Use this in the search engine if you need a qualified
27
+ # and normalized token. I.e. one prepared for a search.
28
+ #
29
+ def self.processed text, downcase = true
30
+ new(text).process downcase
31
+ end
32
+ def process downcases = true
33
+ qualify
34
+ extract_original
35
+ downcase if downcases
36
+ partialize
37
+ similarize
38
+ remove_illegals
39
+ symbolize
40
+ self
41
+ end
42
+
43
+ # This returns an array of predefined category names if the user has given any.
44
+ #
45
+ def user_defined_category_names
46
+ @qualifiers
47
+ end
48
+
49
+ # Extracts a qualifier for this token and pre-assigns an allocation.
50
+ #
51
+ # Note: Removes the qualifier if it is not allowed.
52
+ #
53
+ def qualify
54
+ @qualifiers, @text = split @text
55
+ @qualifiers && @qualifiers.collect! { |qualifier| Query::Qualifiers.instance.normalize qualifier }.compact!
56
+ @qualifiers
57
+ end
58
+ def extract_original
59
+ @original = @text.dup
60
+ end
61
+
62
+ # Downcases the text.
63
+ #
64
+ def downcase
65
+ @text.downcase!
66
+ end
67
+
68
+ # Partial is a conditional setter.
69
+ #
70
+ # It is only settable if it hasn't been set yet.
71
+ #
72
+ def partial= partial
73
+ @partial = partial if @partial.nil?
74
+ end
75
+ def partial?
76
+ !@similar && @partial
77
+ end
78
+
79
+ # If the text ends with *, partialize it. If with ", don't.
80
+ #
81
+ # The latter wins. So "hello*" will not be partially searched.
82
+ #
83
+ @@no_partial = /\"\Z/
84
+ @@partial = /\*\Z/
85
+ def partialize
86
+ self.partial = false and return unless @text !~ @@no_partial
87
+ self.partial = true unless @text !~ @@partial
88
+ end
89
+
90
+ # If the text ends with ~ similarize it. If with ", don't.
91
+ #
92
+ # The latter wins.
93
+ #
94
+ @@no_similar = /\"\Z/
95
+ @@similar = /\~\Z/
96
+ def similarize
97
+ self.similar = false and return if @text =~ @@no_similar
98
+ self.similar = true if @text =~ @@similar
99
+ end
100
+
101
+ def similar?
102
+ @similar
103
+ end
104
+
105
+ # Normalizes this token's text.
106
+ #
107
+ @@illegals = /["*~]/
108
+ def remove_illegals
109
+ @text.gsub! @@illegals, '' unless @text.blank?
110
+ end
111
+
112
+ #
113
+ #
114
+ def symbolize
115
+ @text = @text.to_sym
116
+ end
117
+
118
+ # Returns an array of possible combinations.
119
+ #
120
+ def possible_combinations_in index
121
+ index.possible_combinations self
122
+ end
123
+
124
+ # Returns a token with the next similar text.
125
+ #
126
+ # TODO Rewrite this. It is hard to understand. Also spec performance.
127
+ #
128
+ def next_similar_token category
129
+ token = self.dup
130
+ token if token.next_similar category.bundle_for(token)
131
+ end
132
+ # Sets and returns the next similar word.
133
+ #
134
+ # Note: Also overrides the original.
135
+ #
136
+ def next_similar bundle
137
+ @text = @original = (similarity(bundle).shift || return) if similar?
138
+ end
139
+ # Lazy similar reader.
140
+ #
141
+ def similarity bundle = nil
142
+ @similarity || @similarity = generate_similarity_for(bundle)
143
+ end
144
+ # Returns an enumerator that traverses over the similar.
145
+ #
146
+ # Note: The dup isn't too nice – since it is needed on account of the shift, above.
147
+ # (We avoid a StopIteration exception. Which of both is less evil?)
148
+ #
149
+ def generate_similarity_for bundle
150
+ bundle.similar(@text).dup || []
151
+ end
152
+
153
+ #
154
+ #
155
+ def to_result
156
+ [@original, @text]
157
+ end
158
+
159
+ # Internal identifier.
160
+ #
161
+ def identifier
162
+ "#{similar?? :similarity : :index}:#{@text}"
163
+ end
164
+
165
+ # If the originals & the text are the same, they are the same.
166
+ #
167
+ def == other
168
+ self.original == other.original && self.text == other.text
169
+ end
170
+
171
+ # Displays the qualifier text and the text, joined.
172
+ #
173
+ # e.g. name:meier
174
+ #
175
+ @@split_qualifier_text = ':'
176
+ @@split_qualifiers = ','
177
+ def to_s
178
+ [@qualifiers && @qualifiers.join(@@split_qualifiers), @text].compact.join @@split_qualifier_text
179
+ end
180
+
181
+ private
182
+
183
+ # Splits text into a qualifier and text.
184
+ #
185
+ # Returns [qualifier, text].
186
+ #
187
+ def split unqualified_text
188
+ qualifiers, text = (unqualified_text || '').split(@@split_qualifier_text, 2)
189
+ if text.blank?
190
+ [nil, (qualifiers || '')]
191
+ else
192
+ [qualifiers.split(@@split_qualifiers), text]
193
+ end
194
+ end
195
+
196
+ end
197
+
198
+ end