picky 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,137 @@
1
+ module Internals
2
+
3
+ module Query
4
+ # Container class for allocations.
5
+ #
6
+ class Allocations # :nodoc:all
7
+
8
+ # TODO Remove size
9
+ #
10
+ delegate :each, :inject, :empty?, :size, :to => :@allocations
11
+ attr_reader :total
12
+
13
+ def initialize allocations = []
14
+ @allocations = allocations
15
+ end
16
+
17
+ # Score each allocation.
18
+ #
19
+ def calculate_score weights
20
+ @allocations.each do |allocation|
21
+ allocation.calculate_score weights
22
+ end
23
+ end
24
+ # Sort the allocations.
25
+ #
26
+ def sort
27
+ @allocations.sort!
28
+ end
29
+
30
+ # Reduces the amount of allocations to x.
31
+ #
32
+ def reduce_to amount
33
+ @allocations = @allocations.shift amount
34
+ end
35
+
36
+ # Keeps combinations.
37
+ #
38
+ # Only those passed in remain.
39
+ #
40
+ def keep identifiers = []
41
+ @allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
42
+ end
43
+ # Removes combinations.
44
+ #
45
+ # Only those passed in are removed.
46
+ #
47
+ # TODO Rewrite.
48
+ #
49
+ def remove identifiers = []
50
+ @allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
51
+ end
52
+
53
+ # Returns the top amount ids.
54
+ #
55
+ def ids amount = 20
56
+ @allocations.inject([]) do |total, allocation|
57
+ total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
58
+ end
59
+ end
60
+
61
+ # Returns a random id from the allocations.
62
+ #
63
+ # Note: This is an ok algorithm for small id sets.
64
+ #
65
+ # But still TODO try for a faster one.
66
+ #
67
+ # TODO For the 1 amount random case this needs to be improved.
68
+ #
69
+ def random_ids amount = 1
70
+ return [] if @allocations.empty?
71
+ ids = @allocations.first.ids
72
+ indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
73
+ indexes.first(amount).map { |id| ids[id] }
74
+ end
75
+
76
+ # This is the main method of this class that will replace ids and count.
77
+ #
78
+ # What it does is calculate the ids and counts of its allocations
79
+ # for being used in the results. It also calculates the total
80
+ #
81
+ # Parameters:
82
+ # * amount: the amount of ids to calculate
83
+ # * offset: the offset from where in the result set to take the ids
84
+ #
85
+ # Note: With an amount of 0, an offset > 0 doesn't make much
86
+ # sense, as seen in the live search.
87
+ #
88
+ # Note: Each allocation caches its count, but not its ids (thrown away).
89
+ # The ids are cached in this class.
90
+ #
91
+ # Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
92
+ #
93
+ def process! amount, offset = 0
94
+ @total = 0
95
+ current_offset = 0
96
+ @allocations.each do |allocation|
97
+ ids = allocation.process! amount, offset
98
+ @total = @total + allocation.count # the total mixed in
99
+ if ids.empty?
100
+ offset = offset - allocation.count unless offset.zero?
101
+ else
102
+ amount = amount - ids.size # we need less results from the following allocation
103
+ offset = 0 # we have already passed the offset
104
+ end
105
+ end
106
+ end
107
+
108
+ def uniq
109
+ @allocations.uniq!
110
+ end
111
+
112
+ def to_a
113
+ @allocations
114
+ end
115
+
116
+ # Simply inspects the internal allocations.
117
+ #
118
+ def to_s
119
+ @allocations.inspect
120
+ end
121
+
122
+ # Allocations for results are in the form:
123
+ # [
124
+ # allocation1.to_result,
125
+ # allocation2.to_result
126
+ # ...
127
+ # ]
128
+ #
129
+ def to_result
130
+ @allocations.map(&:to_result).compact
131
+ end
132
+
133
+ end
134
+
135
+ end
136
+
137
+ end
@@ -0,0 +1,80 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # Describes the combination of a token (the text) and
6
+ # the index (the bundle): [text, index_bundle]
7
+ #
8
+ # A combination is a single part of an allocation:
9
+ # [..., [text2, index_bundle2], ...]
10
+ #
11
+ # An allocation consists of a number of combinations:
12
+ # [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
13
+ #
14
+ class Combination # :nodoc:all
15
+
16
+ attr_reader :token, :bundle, :category_name
17
+
18
+ def initialize token, category
19
+ @token = token
20
+ @category_name = category.name
21
+ @bundle = category.bundle_for token
22
+ @text = @token.text # don't want to use reset_similar already
23
+ end
24
+
25
+ # Note: Required for uniq!
26
+ #
27
+ def hash
28
+ [@token.to_s, @bundle].hash
29
+ end
30
+
31
+ # Returns the weight of this combination.
32
+ #
33
+ # Note: Caching is most of the time useful.
34
+ #
35
+ def weight
36
+ @weight ||= @bundle.weight(@text)
37
+ end
38
+
39
+ # Returns an array of ids for the given text.
40
+ #
41
+ # Note: Caching is most of the time useful.
42
+ #
43
+ def ids
44
+ @ids ||= @bundle.ids(@text)
45
+ end
46
+
47
+ # The identifier for this combination.
48
+ #
49
+ def identifier
50
+ "#{bundle.identifier}:#{@token.identifier}"
51
+ end
52
+
53
+ # Is the identifier in the given identifiers?
54
+ #
55
+ def in? identifiers
56
+ identifiers.include? identifier
57
+ end
58
+
59
+ # Combines the category names with the original names.
60
+ # [
61
+ # [:title, 'Flarbl', :flarbl],
62
+ # [:category, 'Gnorf', :gnorf]
63
+ # ]
64
+ #
65
+ def to_result
66
+ [@category_name, *@token.to_result]
67
+ end
68
+
69
+ # Example:
70
+ # "exact title:Peter*:peter"
71
+ #
72
+ def to_s
73
+ "#{bundle.identifier} #{to_result.join(':')}"
74
+ end
75
+
76
+ end
77
+
78
+ end
79
+
80
+ end
@@ -0,0 +1,84 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # Combinations are a number of Combination-s.
6
+ #
7
+ # They are the core of an allocation.
8
+ # An allocation consists of a number of combinations.
9
+ #
10
+ module Combinations # :nodoc:all
11
+
12
+ # Base Combinations contain methods for calculating score and ids.
13
+ #
14
+ class Base
15
+
16
+ attr_reader :combinations
17
+
18
+ delegate :empty?, :to => :@combinations
19
+
20
+ def initialize combinations = []
21
+ @combinations = combinations
22
+ end
23
+
24
+ def hash
25
+ @combinations.hash
26
+ end
27
+
28
+ # Uses user specific weights to calculate a score for the combinations.
29
+ #
30
+ def calculate_score weights
31
+ total_score + weighted_score(weights)
32
+ end
33
+ def total_score
34
+ @combinations.sum &:weight
35
+ end
36
+ def weighted_score weights
37
+ weights.score @combinations
38
+ end
39
+
40
+ # Wrap the combinations into an allocation with the result_identifier.
41
+ #
42
+ def pack_into_allocation result_identifier
43
+ Allocation.new self, result_identifier
44
+ end
45
+
46
+ # Filters the tokens and identifiers such that only identifiers
47
+ # that are passed in, remain, including their tokens.
48
+ #
49
+ # Note: This method is not totally independent of the calculate_ids one.
50
+ # Since identifiers are only nullified, we need to not include the
51
+ # ids that have an associated identifier that is nil.
52
+ #
53
+ def keep identifiers = []
54
+ # TODO Rewrite to use the category!!!
55
+ #
56
+ @combinations.reject! { |combination| !combination.in?(identifiers) }
57
+ end
58
+
59
+ # Filters the tokens and identifiers such that identifiers
60
+ # that are passed in, are removed, including their tokens.
61
+ #
62
+ # Note: This method is not totally independent of the calculate_ids one.
63
+ # Since identifiers are only nullified, we need to not include the
64
+ # ids that have an associated identifier that is nil.
65
+ #
66
+ def remove identifiers = []
67
+ # TODO Rewrite to use the category!!!
68
+ #
69
+ @combinations.reject! { |combination| combination.in?(identifiers) }
70
+ end
71
+
72
+ #
73
+ #
74
+ def to_result
75
+ @combinations.map &:to_result
76
+ end
77
+
78
+ end
79
+
80
+ end
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,58 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # Combinations are a number of Combination-s.
6
+ #
7
+ # They are the core of an allocation.
8
+ # An allocation consists of a number of combinations.
9
+ #
10
+ module Combinations # :nodoc:all
11
+
12
+ # Memory Combinations contain specific methods for
13
+ # calculating score and ids in memory.
14
+ #
15
+ class Memory < Base
16
+
17
+ # Returns the result ids for the allocation.
18
+ #
19
+ # Sorts the ids by size and & through them in the following order (sizes):
20
+ # 0. [100_000, 400, 30, 2]
21
+ # 1. [2, 30, 400, 100_000]
22
+ # 2. (100_000 & (400 & (30 & 2))) # => result
23
+ #
24
+ # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
25
+ #
26
+ # Note: In the memory based version we ignore the (amount) needed hint.
27
+ # TODO Not ignore it?
28
+ #
29
+ def ids _, _
30
+ return [] if @combinations.empty?
31
+
32
+ # Get the ids for each combination.
33
+ #
34
+ # TODO For combinations with Redis
35
+ #
36
+ id_arrays = @combinations.inject([]) do |total, combination|
37
+ total << combination.ids
38
+ end
39
+
40
+ # Order by smallest size first such that the intersect can be performed faster.
41
+ #
42
+ # TODO Move into the memory_efficient_intersect such that
43
+ # this precondition for a fast algorithm is always given.
44
+ #
45
+ id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
46
+
47
+ # Call the optimized C algorithm.
48
+ #
49
+ Performant::Array.memory_efficient_intersect id_arrays
50
+ end
51
+
52
+ end
53
+
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,59 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # Combinations are a number of Combination-s.
6
+ #
7
+ # They are the core of an allocation.
8
+ # An allocation consists of a number of combinations.
9
+ #
10
+ module Combinations # :nodoc:all
11
+
12
+ # Redis Combinations contain specific methods for
13
+ # calculating score and ids in memory.
14
+ #
15
+ class Redis < Base
16
+
17
+ # TODO Err… yeah. Wrap in Picky specific wrapper.
18
+ #
19
+ def initialize combinations
20
+ super combinations
21
+
22
+ @@redis ||= ::Redis.new
23
+ end
24
+
25
+ # Returns the result ids for the allocation.
26
+ #
27
+ def ids amount, offset
28
+ return [] if @combinations.empty?
29
+
30
+ identifiers = @combinations.inject([]) do |identifiers, combination|
31
+ identifiers << "#{combination.identifier}"
32
+ end
33
+
34
+ result_id = generate_intermediate_result_id
35
+
36
+ # TODO multi?
37
+ #
38
+
39
+ @@redis.zinterstore result_id, identifiers
40
+
41
+ @@redis.zrange result_id, offset, (offset + amount)
42
+ end
43
+
44
+ # Generate a multiple host/process safe result id.
45
+ #
46
+ # TODO How expensive is Process.pid? If it changes once, remember forever?
47
+ #
48
+ def generate_intermediate_result_id
49
+ # TODO host -> extract host.
50
+ :"host:#{Process.pid}:picky:result"
51
+ end
52
+
53
+ end
54
+
55
+ end
56
+
57
+ end
58
+
59
+ end
@@ -0,0 +1,180 @@
1
+ module Internals
2
+
3
+ module Query
4
+
5
+ # The query indexes class bundles indexes given to a query.
6
+ #
7
+ # Example:
8
+ # # If you call
9
+ # Query::Full.new dvd_index, mp3_index, video_index
10
+ #
11
+ # # What it does is take the three given (API-) indexes and
12
+ # # bundle them in an index bundle.
13
+ #
14
+ class Indexes
15
+
16
+ attr_reader :indexes
17
+
18
+ # Creates a new Query::Indexes.
19
+ #
20
+ # Its job is to generate all possible combinations, but also
21
+ # checking whether the query indexes are all of the same type.
22
+ # Note: We cannot mix memory and redis indexes just yet.
23
+ #
24
+ def initialize *index_definitions, combinations_type
25
+ @combinations_type = combinations_type
26
+ @indexes = index_definitions.map &:indexed
27
+ end
28
+
29
+ # Returns a number of possible allocations for the given tokens.
30
+ #
31
+ def allocations_for tokens
32
+ Allocations.new(indexes.inject([]) do |previous_allocations, index|
33
+ # Expand the combinations.
34
+ #
35
+ possible_combinations = tokens.possible_combinations_in index
36
+
37
+ # Optimization for ignoring tokens that allocate to nothing and
38
+ # can be ignored.
39
+ # For example in a special search, where "florian" is not
40
+ # mapped to any category.
41
+ #
42
+ possible_combinations.compact!
43
+
44
+ # Generate all possible combinations.
45
+ #
46
+ expanded_combinations = expand_combinations_from possible_combinations
47
+
48
+ # If there are none, try the next allocation.
49
+ #
50
+ next previous_allocations unless expanded_combinations
51
+
52
+ # Add the wrapped possible allocations to the ones we already have.
53
+ #
54
+ previous_allocations + expanded_combinations.map! do |expanded_combination|
55
+ # TODO Insert Redis here?
56
+ #
57
+ @combinations_type.new(expanded_combination).pack_into_allocation(index.result_identifier) # TODO Do not extract result_identifier. Remove pack_into_allocation.
58
+ end
59
+ end)
60
+ end
61
+
62
+ # This is the core of the search engine.
63
+ #
64
+ # Gets an array of
65
+ # [
66
+ # [<combinations for token1>],
67
+ # [<combinations for token2>],
68
+ # [<combinations for token3>]
69
+ # ]
70
+ #
71
+ # Generates all possible allocations of combinations.
72
+ # [
73
+ # [first combination of token1, first c of t2, first c of t3],
74
+ # [first combination of token1, first c of t2, second c of t3]
75
+ # ...
76
+ # ]
77
+ #
78
+ # Generates all possible combinations of array elements:
79
+ # [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
80
+ # Note: Also calculates the weights and sorts them accordingly.
81
+ #
82
+ # Note: This is a heavily optimized ruby version.
83
+ #
84
+ # Works like this:
85
+ # [1,2,3], [a,b,c], [k,l,m] are expanded to
86
+ # group mult: 1
87
+ # <- single mult ->
88
+ # [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
89
+ # group mult: 3
90
+ # <- -> s/m
91
+ # [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
92
+ # group mult: 9
93
+ # <> s/m
94
+ # [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
95
+ #
96
+ # It is then recombined, where
97
+ # [
98
+ # [a,a,b,b,c,c]
99
+ # [d,e,d,e,d,e]
100
+ # ]
101
+ # becomes
102
+ # [
103
+ # [a,d],
104
+ # [a,e],
105
+ # [b,d],
106
+ # [b,e],
107
+ # [c,d],
108
+ # [c,e]
109
+ # ]
110
+ #
111
+ # Note: Not using transpose as it is slower.
112
+ #
113
+ # Returns nil if there are no combinations.
114
+ #
115
+ # Note: Of course I could split this method up into smaller
116
+ # ones, but I guess I am a bit sentimental.
117
+ #
118
+ def expand_combinations_from possible_combinations
119
+ return if possible_combinations.any?(&:empty?)
120
+
121
+ # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
122
+ #
123
+ # TODO How does this work if an element has size 0? Since below we account for size 0.
124
+ # Should we even continue if an element has size 0?
125
+ # This means one of the tokens cannot be allocated.
126
+ #
127
+ single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
128
+
129
+ # Initialize a group multiplicator.
130
+ #
131
+ group_mult = 1
132
+
133
+ # The expanding part to line up the combinations
134
+ # for later combination in allocations.
135
+ #
136
+ possible_combinations.collect! do |combinations|
137
+
138
+ # Get the size of the combinations of the first token.
139
+ #
140
+ combinations_size = combinations.size
141
+
142
+ # Special case: If there is no combination for one of the tokens.
143
+ # In that case, we just use the same single mult for
144
+ # the next iteration.
145
+ # If there are combinations, we divide the single mult
146
+ # by the number of combinations.
147
+ #
148
+ single_mult /= combinations_size unless combinations_size.zero?
149
+
150
+ # Expand each combination by the single mult:
151
+ # [a,b,c]
152
+ # [a,a,a, b,b,b, c,c,c]
153
+ # Then, expand the result by the group mult:
154
+ # [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
155
+ #
156
+ combinations = combinations.inject([]) do |total, combination|
157
+ total + Array.new(single_mult, combination)
158
+ end * group_mult
159
+
160
+ # Multiply the group mult by the combinations size,
161
+ # since the next combinations' single mult is smaller
162
+ # and we need to adjust for that.
163
+ #
164
+ group_mult = group_mult * combinations_size
165
+
166
+ # Return the combinations.
167
+ #
168
+ combinations
169
+ end
170
+
171
+ return if possible_combinations.empty?
172
+
173
+ possible_combinations.shift.zip *possible_combinations
174
+ end
175
+
176
+ end
177
+
178
+ end
179
+
180
+ end
@@ -0,0 +1,81 @@
1
+ # coding: utf-8
2
+ #
3
+ module Internals
4
+
5
+ #
6
+ #
7
+ module Query
8
+
9
+ # A single qualifier.
10
+ #
11
+ class Qualifier # :nodoc:all
12
+
13
+ attr_reader :normalized_qualifier, :codes
14
+
15
+ #
16
+ #
17
+ # codes is an array.
18
+ #
19
+ def initialize normalized_qualifier, codes
20
+ @normalized_qualifier = normalized_qualifier
21
+ @codes = codes.map &:to_sym
22
+ end
23
+
24
+ # Will overwrite if the key is present in the hash.
25
+ #
26
+ def inject_into hash
27
+ codes.each do |code|
28
+ hash[code] = normalized_qualifier
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ # Collection class for qualifiers.
35
+ #
36
+ class Qualifiers # :nodoc:all
37
+
38
+ include Singleton
39
+
40
+ attr_reader :qualifiers, :normalization_mapping
41
+
42
+ delegate :<<, :to => :qualifiers
43
+
44
+ #
45
+ #
46
+ def initialize
47
+ @qualifiers = []
48
+ @normalization_mapping = {}
49
+ end
50
+
51
+ # TODO Spec.
52
+ #
53
+ def self.add name, qualifiers
54
+ instance << Qualifier.new(name, qualifiers)
55
+ end
56
+
57
+ # Uses the qualifiers to prepare (optimize) the qualifier handling.
58
+ #
59
+ def prepare
60
+ qualifiers.each do |qualifier|
61
+ qualifier.inject_into normalization_mapping
62
+ end
63
+ end
64
+
65
+ # Normalizes the given qualifier.
66
+ #
67
+ # Returns nil if it is not allowed, the normalized qualifier if it is.
68
+ #
69
+ # Note: Normalizes.
70
+ #
71
+ def normalize qualifier
72
+ return nil if qualifier.blank?
73
+
74
+ normalization_mapping[qualifier.to_sym]
75
+ end
76
+
77
+ end
78
+
79
+ end
80
+
81
+ end