picky 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,137 @@
1
+ module Internals
2
+
3
+ module Query
4
+ # Container class for allocations.
5
+ #
6
+ class Allocations # :nodoc:all
7
+
8
+ # TODO Remove size
9
+ #
10
+ delegate :each, :inject, :empty?, :size, :to => :@allocations
11
+ attr_reader :total
12
+
13
+ def initialize allocations = []
14
+ @allocations = allocations
15
+ end
16
+
17
+ # Score each allocation.
18
+ #
19
+ def calculate_score weights
20
+ @allocations.each do |allocation|
21
+ allocation.calculate_score weights
22
+ end
23
+ end
24
+ # Sort the allocations.
25
+ #
26
+ def sort
27
+ @allocations.sort!
28
+ end
29
+
30
+ # Reduces the amount of allocations to x.
31
+ #
32
+ def reduce_to amount
33
+ @allocations = @allocations.shift amount
34
+ end
35
+
36
+ # Keeps combinations.
37
+ #
38
+ # Only those passed in remain.
39
+ #
40
+ def keep identifiers = []
41
+ @allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
42
+ end
43
+ # Removes combinations.
44
+ #
45
+ # Only those passed in are removed.
46
+ #
47
+ # TODO Rewrite.
48
+ #
49
+ def remove identifiers = []
50
+ @allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
51
+ end
52
+
53
+ # Returns the top amount ids.
54
+ #
55
+ def ids amount = 20
56
+ @allocations.inject([]) do |total, allocation|
57
+ total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
58
+ end
59
+ end
60
+
61
+ # Returns a random id from the allocations.
62
+ #
63
+ # Note: This is an ok algorithm for small id sets.
64
+ #
65
+ # But still TODO try for a faster one.
66
+ #
67
+ # TODO For the 1 amount random case this needs to be improved.
68
+ #
69
+ def random_ids amount = 1
70
+ return [] if @allocations.empty?
71
+ ids = @allocations.first.ids
72
+ indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
73
+ indexes.first(amount).map { |id| ids[id] }
74
+ end
75
+
76
+ # This is the main method of this class that will replace ids and count.
77
+ #
78
+ # What it does is calculate the ids and counts of its allocations
79
+ # for being used in the results. It also calculates the total
80
+ #
81
+ # Parameters:
82
+ # * amount: the amount of ids to calculate
83
+ # * offset: the offset from where in the result set to take the ids
84
+ #
85
+ # Note: With an amount of 0, an offset > 0 doesn't make much
86
+ # sense, as seen in the live search.
87
+ #
88
+ # Note: Each allocation caches its count, but not its ids (thrown away).
89
+ # The ids are cached in this class.
90
+ #
91
+ # Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
92
+ #
93
+ def process! amount, offset = 0
94
+ @total = 0
95
+ current_offset = 0
96
+ @allocations.each do |allocation|
97
+ ids = allocation.process! amount, offset
98
+ @total = @total + allocation.count # the total mixed in
99
+ if ids.empty?
100
+ offset = offset - allocation.count unless offset.zero?
101
+ else
102
+ amount = amount - ids.size # we need less results from the following allocation
103
+ offset = 0 # we have already passed the offset
104
+ end
105
+ end
106
+ end
107
+
108
+ def uniq
109
+ @allocations.uniq!
110
+ end
111
+
112
+ def to_a
113
+ @allocations
114
+ end
115
+
116
+ # Simply inspects the internal allocations.
117
+ #
118
+ def to_s
119
+ @allocations.inspect
120
+ end
121
+
122
+ # Allocations for results are in the form:
123
+ # [
124
+ # allocation1.to_result,
125
+ # allocation2.to_result
126
+ # ...
127
+ # ]
128
+ #
129
+ def to_result
130
+ @allocations.map(&:to_result).compact
131
+ end
132
+
133
+ end
134
+
135
+ end
136
+
137
+ end
@@ -0,0 +1,80 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # Describes the combination of a token (the text) and
6
+ # the index (the bundle): [text, index_bundle]
7
+ #
8
+ # A combination is a single part of an allocation:
9
+ # [..., [text2, index_bundle2], ...]
10
+ #
11
+ # An allocation consists of a number of combinations:
12
+ # [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
13
+ #
14
+ class Combination # :nodoc:all
15
+
16
+ attr_reader :token, :bundle, :category_name
17
+
18
+ def initialize token, category
19
+ @token = token
20
+ @category_name = category.name
21
+ @bundle = category.bundle_for token
22
+ @text = @token.text # don't want to use reset_similar already
23
+ end
24
+
25
+ # Note: Required for uniq!
26
+ #
27
+ def hash
28
+ [@token.to_s, @bundle].hash
29
+ end
30
+
31
+ # Returns the weight of this combination.
32
+ #
33
+ # Note: Caching is most of the time useful.
34
+ #
35
+ def weight
36
+ @weight ||= @bundle.weight(@text)
37
+ end
38
+
39
+ # Returns an array of ids for the given text.
40
+ #
41
+ # Note: Caching is most of the time useful.
42
+ #
43
+ def ids
44
+ @ids ||= @bundle.ids(@text)
45
+ end
46
+
47
+ # The identifier for this combination.
48
+ #
49
+ def identifier
50
+ "#{bundle.identifier}:#{@token.identifier}"
51
+ end
52
+
53
+ # Is the identifier in the given identifiers?
54
+ #
55
+ def in? identifiers
56
+ identifiers.include? identifier
57
+ end
58
+
59
+ # Combines the category names with the original names.
60
+ # [
61
+ # [:title, 'Flarbl', :flarbl],
62
+ # [:category, 'Gnorf', :gnorf]
63
+ # ]
64
+ #
65
+ def to_result
66
+ [@category_name, *@token.to_result]
67
+ end
68
+
69
+ # Example:
70
+ # "exact title:Peter*:peter"
71
+ #
72
+ def to_s
73
+ "#{bundle.identifier} #{to_result.join(':')}"
74
+ end
75
+
76
+ end
77
+
78
+ end
79
+
80
+ end
@@ -0,0 +1,84 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # Combinations are a number of Combination-s.
6
+ #
7
+ # They are the core of an allocation.
8
+ # An allocation consists of a number of combinations.
9
+ #
10
+ module Combinations # :nodoc:all
11
+
12
+ # Base Combinations contain methods for calculating score and ids.
13
+ #
14
+ class Base
15
+
16
+ attr_reader :combinations
17
+
18
+ delegate :empty?, :to => :@combinations
19
+
20
+ def initialize combinations = []
21
+ @combinations = combinations
22
+ end
23
+
24
+ def hash
25
+ @combinations.hash
26
+ end
27
+
28
+ # Uses user specific weights to calculate a score for the combinations.
29
+ #
30
+ def calculate_score weights
31
+ total_score + weighted_score(weights)
32
+ end
33
+ def total_score
34
+ @combinations.sum &:weight
35
+ end
36
+ def weighted_score weights
37
+ weights.score @combinations
38
+ end
39
+
40
+ # Wrap the combinations into an allocation with the result_identifier.
41
+ #
42
+ def pack_into_allocation result_identifier
43
+ Allocation.new self, result_identifier
44
+ end
45
+
46
+ # Filters the tokens and identifiers such that only identifiers
47
+ # that are passed in, remain, including their tokens.
48
+ #
49
+ # Note: This method is not totally independent of the calculate_ids one.
50
+ # Since identifiers are only nullified, we need to not include the
51
+ # ids that have an associated identifier that is nil.
52
+ #
53
+ def keep identifiers = []
54
+ # TODO Rewrite to use the category!!!
55
+ #
56
+ @combinations.reject! { |combination| !combination.in?(identifiers) }
57
+ end
58
+
59
+ # Filters the tokens and identifiers such that identifiers
60
+ # that are passed in, are removed, including their tokens.
61
+ #
62
+ # Note: This method is not totally independent of the calculate_ids one.
63
+ # Since identifiers are only nullified, we need to not include the
64
+ # ids that have an associated identifier that is nil.
65
+ #
66
+ def remove identifiers = []
67
+ # TODO Rewrite to use the category!!!
68
+ #
69
+ @combinations.reject! { |combination| combination.in?(identifiers) }
70
+ end
71
+
72
+ #
73
+ #
74
+ def to_result
75
+ @combinations.map &:to_result
76
+ end
77
+
78
+ end
79
+
80
+ end
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,58 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # Combinations are a number of Combination-s.
6
+ #
7
+ # They are the core of an allocation.
8
+ # An allocation consists of a number of combinations.
9
+ #
10
+ module Combinations # :nodoc:all
11
+
12
+ # Memory Combinations contain specific methods for
13
+ # calculating score and ids in memory.
14
+ #
15
+ class Memory < Base
16
+
17
+ # Returns the result ids for the allocation.
18
+ #
19
+ # Sorts the ids by size and & through them in the following order (sizes):
20
+ # 0. [100_000, 400, 30, 2]
21
+ # 1. [2, 30, 400, 100_000]
22
+ # 2. (100_000 & (400 & (30 & 2))) # => result
23
+ #
24
+ # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
25
+ #
26
+ # Note: In the memory based version we ignore the (amount) needed hint.
27
+ # TODO Not ignore it?
28
+ #
29
+ def ids _, _
30
+ return [] if @combinations.empty?
31
+
32
+ # Get the ids for each combination.
33
+ #
34
+ # TODO For combinations with Redis
35
+ #
36
+ id_arrays = @combinations.inject([]) do |total, combination|
37
+ total << combination.ids
38
+ end
39
+
40
+ # Order by smallest size first such that the intersect can be performed faster.
41
+ #
42
+ # TODO Move into the memory_efficient_intersect such that
43
+ # this precondition for a fast algorithm is always given.
44
+ #
45
+ id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
46
+
47
+ # Call the optimized C algorithm.
48
+ #
49
+ Performant::Array.memory_efficient_intersect id_arrays
50
+ end
51
+
52
+ end
53
+
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,59 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # Combinations are a number of Combination-s.
6
+ #
7
+ # They are the core of an allocation.
8
+ # An allocation consists of a number of combinations.
9
+ #
10
+ module Combinations # :nodoc:all
11
+
12
+ # Redis Combinations contain specific methods for
13
+ # calculating score and ids in memory.
14
+ #
15
+ class Redis < Base
16
+
17
+ # TODO Err… yeah. Wrap in Picky specific wrapper.
18
+ #
19
+ def initialize combinations
20
+ super combinations
21
+
22
+ @@redis ||= ::Redis.new
23
+ end
24
+
25
+ # Returns the result ids for the allocation.
26
+ #
27
+ def ids amount, offset
28
+ return [] if @combinations.empty?
29
+
30
+ identifiers = @combinations.inject([]) do |identifiers, combination|
31
+ identifiers << "#{combination.identifier}"
32
+ end
33
+
34
+ result_id = generate_intermediate_result_id
35
+
36
+ # TODO multi?
37
+ #
38
+
39
+ @@redis.zinterstore result_id, identifiers
40
+
41
+ @@redis.zrange result_id, offset, (offset + amount)
42
+ end
43
+
44
+ # Generate a multiple host/process safe result id.
45
+ #
46
+ # TODO How expensive is Process.pid? If it changes once, remember forever?
47
+ #
48
+ def generate_intermediate_result_id
49
+ # TODO host -> extract host.
50
+ :"host:#{Process.pid}:picky:result"
51
+ end
52
+
53
+ end
54
+
55
+ end
56
+
57
+ end
58
+
59
+ end
@@ -0,0 +1,180 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # The query indexes class bundles indexes given to a query.
6
+ #
7
+ # Example:
8
+ # # If you call
9
+ # Query::Full.new dvd_index, mp3_index, video_index
10
+ #
11
+ # # What it does is take the three given (API-) indexes and
12
+ # # bundle them in an index bundle.
13
+ #
14
+ class Indexes
15
+
16
+ attr_reader :indexes
17
+
18
+ # Creates a new Query::Indexes.
19
+ #
20
+ # Its job is to generate all possible combinations, but also
21
+ # checking whether the query indexes are all of the same type.
22
+ # Note: We cannot mix memory and redis indexes just yet.
23
+ #
24
+ def initialize *index_definitions, combinations_type
25
+ @combinations_type = combinations_type
26
+ @indexes = index_definitions.map &:indexed
27
+ end
28
+
29
+ # Returns a number of possible allocations for the given tokens.
30
+ #
31
+ def allocations_for tokens
32
+ Allocations.new(indexes.inject([]) do |previous_allocations, index|
33
+ # Expand the combinations.
34
+ #
35
+ possible_combinations = tokens.possible_combinations_in index
36
+
37
+ # Optimization for ignoring tokens that allocate to nothing and
38
+ # can be ignored.
39
+ # For example in a special search, where "florian" is not
40
+ # mapped to any category.
41
+ #
42
+ possible_combinations.compact!
43
+
44
+ # Generate all possible combinations.
45
+ #
46
+ expanded_combinations = expand_combinations_from possible_combinations
47
+
48
+ # If there are none, try the next allocation.
49
+ #
50
+ next previous_allocations unless expanded_combinations
51
+
52
+ # Add the wrapped possible allocations to the ones we already have.
53
+ #
54
+ previous_allocations + expanded_combinations.map! do |expanded_combination|
55
+ # TODO Insert Redis here?
56
+ #
57
+ @combinations_type.new(expanded_combination).pack_into_allocation(index.result_identifier) # TODO Do not extract result_identifier. Remove pack_into_allocation.
58
+ end
59
+ end)
60
+ end
61
+
62
+ # This is the core of the search engine.
63
+ #
64
+ # Gets an array of
65
+ # [
66
+ # [<combinations for token1>],
67
+ # [<combinations for token2>],
68
+ # [<combinations for token3>]
69
+ # ]
70
+ #
71
+ # Generates all possible allocations of combinations.
72
+ # [
73
+ # [first combination of token1, first c of t2, first c of t3],
74
+ # [first combination of token1, first c of t2, second c of t3]
75
+ # ...
76
+ # ]
77
+ #
78
+ # Generates all possible combinations of array elements:
79
+ # [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
80
+ # Note: Also calculates the weights and sorts them accordingly.
81
+ #
82
+ # Note: This is a heavily optimized ruby version.
83
+ #
84
+ # Works like this:
85
+ # [1,2,3], [a,b,c], [k,l,m] are expanded to
86
+ # group mult: 1
87
+ # <- single mult ->
88
+ # [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
89
+ # group mult: 3
90
+ # <- -> s/m
91
+ # [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
92
+ # group mult: 9
93
+ # <> s/m
94
+ # [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
95
+ #
96
+ # It is then recombined, where
97
+ # [
98
+ # [a,a,b,b,c,c]
99
+ # [d,e,d,e,d,e]
100
+ # ]
101
+ # becomes
102
+ # [
103
+ # [a,d],
104
+ # [a,e],
105
+ # [b,d],
106
+ # [b,e],
107
+ # [c,d],
108
+ # [c,e]
109
+ # ]
110
+ #
111
+ # Note: Not using transpose as it is slower.
112
+ #
113
+ # Returns nil if there are no combinations.
114
+ #
115
+ # Note: Of course I could split this method up into smaller
116
+ # ones, but I guess I am a bit sentimental.
117
+ #
118
+ def expand_combinations_from possible_combinations
119
+ return if possible_combinations.any?(&:empty?)
120
+
121
+ # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
122
+ #
123
+ # TODO How does this work if an element has size 0? Since below we account for size 0.
124
+ # Should we even continue if an element has size 0?
125
+ # This means one of the tokens cannot be allocated.
126
+ #
127
+ single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
128
+
129
+ # Initialize a group multiplicator.
130
+ #
131
+ group_mult = 1
132
+
133
+ # The expanding part to line up the combinations
134
+ # for later combination in allocations.
135
+ #
136
+ possible_combinations.collect! do |combinations|
137
+
138
+ # Get the size of the combinations of the first token.
139
+ #
140
+ combinations_size = combinations.size
141
+
142
+ # Special case: If there is no combination for one of the tokens.
143
+ # In that case, we just use the same single mult for
144
+ # the next iteration.
145
+ # If there are combinations, we divide the single mult
146
+ # by the number of combinations.
147
+ #
148
+ single_mult /= combinations_size unless combinations_size.zero?
149
+
150
+ # Expand each combination by the single mult:
151
+ # [a,b,c]
152
+ # [a,a,a, b,b,b, c,c,c]
153
+ # Then, expand the result by the group mult:
154
+ # [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
155
+ #
156
+ combinations = combinations.inject([]) do |total, combination|
157
+ total + Array.new(single_mult, combination)
158
+ end * group_mult
159
+
160
+ # Multiply the group mult by the combinations size,
161
+ # since the next combinations' single mult is smaller
162
+ # and we need to adjust for that.
163
+ #
164
+ group_mult = group_mult * combinations_size
165
+
166
+ # Return the combinations.
167
+ #
168
+ combinations
169
+ end
170
+
171
+ return if possible_combinations.empty?
172
+
173
+ possible_combinations.shift.zip *possible_combinations
174
+ end
175
+
176
+ end
177
+
178
+ end
179
+
180
+ end
@@ -0,0 +1,81 @@
1
+ # coding: utf-8
2
+ #
3
+ module Internals
4
+
5
+ #
6
+ #
7
+ module Query
8
+
9
+ # A single qualifier.
10
+ #
11
+ class Qualifier # :nodoc:all
12
+
13
+ attr_reader :normalized_qualifier, :codes
14
+
15
+ #
16
+ #
17
+ # codes is an array.
18
+ #
19
+ def initialize normalized_qualifier, codes
20
+ @normalized_qualifier = normalized_qualifier
21
+ @codes = codes.map &:to_sym
22
+ end
23
+
24
+ # Will overwrite if the key is present in the hash.
25
+ #
26
+ def inject_into hash
27
+ codes.each do |code|
28
+ hash[code] = normalized_qualifier
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ # Collection class for qualifiers.
35
+ #
36
+ class Qualifiers # :nodoc:all
37
+
38
+ include Singleton
39
+
40
+ attr_reader :qualifiers, :normalization_mapping
41
+
42
+ delegate :<<, :to => :qualifiers
43
+
44
+ #
45
+ #
46
+ def initialize
47
+ @qualifiers = []
48
+ @normalization_mapping = {}
49
+ end
50
+
51
+ # TODO Spec.
52
+ #
53
+ def self.add name, qualifiers
54
+ instance << Qualifier.new(name, qualifiers)
55
+ end
56
+
57
+ # Uses the qualifiers to prepare (optimize) the qualifier handling.
58
+ #
59
+ def prepare
60
+ qualifiers.each do |qualifier|
61
+ qualifier.inject_into normalization_mapping
62
+ end
63
+ end
64
+
65
+ # Normalizes the given qualifier.
66
+ #
67
+ # Returns nil if it is not allowed, the normalized qualifier if it is.
68
+ #
69
+ # Note: Normalizes.
70
+ #
71
+ def normalize qualifier
72
+ return nil if qualifier.blank?
73
+
74
+ normalization_mapping[qualifier.to_sym]
75
+ end
76
+
77
+ end
78
+
79
+ end
80
+
81
+ end