picky 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,40 @@
1
+ module Backend
2
+
3
+ class Redis
4
+
5
+ class StringHash < Basic
6
+
7
+ # Writes the hash into Redis.
8
+ #
9
+ # Note: We could use multi, but it did not help.
10
+ #
11
+ def dump hash
12
+ clear
13
+ hash.each_pair do |key, value|
14
+ backend.hset namespace, key, value
15
+ end
16
+ end
17
+
18
+ # Clears the hash.
19
+ #
20
+ def clear
21
+ backend.del namespace
22
+ end
23
+
24
+ # Get a collection.
25
+ #
26
+ def collection sym
27
+ raise "Can't retrieve a collection from a StringHash. Use Index::Redis::ListHash."
28
+ end
29
+
30
+ # Get a single value.
31
+ #
32
+ def member sym
33
+ backend.hget namespace, sym
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,40 @@
1
+ module Backend
2
+
3
+ # TODO Needs a reconnect to be run after forking.
4
+ #
5
+ class Redis < Backend
6
+
7
+ def initialize bundle_name, category
8
+ super bundle_name, category
9
+
10
+ # Refine a few Redis "types".
11
+ #
12
+ @index = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:index"
13
+ @weights = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:weights"
14
+ @similarity = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:similarity"
15
+ @configuration = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:configuration"
16
+ end
17
+
18
+ # Delegate to the right collection.
19
+ #
20
+ def ids sym
21
+ index.collection sym
22
+ end
23
+
24
+ # Delegate to the right member value.
25
+ #
26
+ # Note: Converts to float.
27
+ #
28
+ def weight sym
29
+ weights.member(sym).to_f
30
+ end
31
+
32
+ # Delegate to a member value.
33
+ #
34
+ def setting sym
35
+ configuration.member sym
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,57 @@
1
+ module Calculations # :nodoc:all
2
+
3
+ # A location calculation recalculates a 1-d location
4
+ # to the Picky internal 1-d "grid".
5
+ #
6
+ # For example, if you have a location x == 12.3456,
7
+ # it will be recalculated into 3, if the minimum is 9
8
+ # and the gridlength is 1.
9
+ #
10
+ class Location
11
+
12
+ attr_reader :minimum, :precision, :grid
13
+
14
+ def initialize user_grid, precision = nil
15
+ @user_grid = user_grid
16
+ @precision = precision || 1
17
+ @grid = @user_grid / (@precision + 0.5)
18
+ end
19
+
20
+ def minimum= minimum
21
+ # Add a margin of 1 user grid.
22
+ #
23
+ minimum -= @user_grid
24
+
25
+ # Add plus 1 grid so that the index key never falls on 0.
26
+ # Why? to_i maps by default to 0.
27
+ #
28
+ minimum -= @grid
29
+
30
+ @minimum = minimum
31
+ end
32
+
33
+ #
34
+ #
35
+ def add_margin length
36
+ @minimum -= length
37
+ end
38
+
39
+ #
40
+ #
41
+ def recalculated_range location
42
+ range recalculate(location)
43
+ end
44
+ #
45
+ #
46
+ def range around_location
47
+ (around_location - @precision)..(around_location + @precision)
48
+ end
49
+ #
50
+ #
51
+ def recalculate location
52
+ ((location - @minimum) / @grid).floor
53
+ end
54
+
55
+ end
56
+
57
+ end
@@ -0,0 +1,62 @@
1
+ class Categories
2
+
3
+ attr_reader :categories, :category_hash
4
+
5
+ delegate :each,
6
+ :first,
7
+ :map,
8
+ :to => :categories
9
+
10
+ each_delegate :reindex,
11
+ :to => :categories
12
+
13
+ # A list of indexed categories.
14
+ #
15
+ # Options:
16
+ # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
17
+ # The default behaviour is that if a token does not match to
18
+ # any category, the query will not return anything (since a
19
+ # single token cannot be matched). If you set this option to
20
+ # true, any token that cannot be matched to a category will be
21
+ # simply ignored.
22
+ # Use this if only a few matched words are important, like for
23
+ # example of the query "Jonathan Myers 86455 Las Cucarachas"
24
+ # you only want to match the zipcode, to have the search engine
25
+ # display advertisements on the side for the zipcode.
26
+ # Nifty! :)
27
+ #
28
+ def initialize options = {}
29
+ clear
30
+
31
+ @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
32
+ end
33
+
34
+ # Clears both the array of categories and the hash of categories.
35
+ #
36
+ def clear
37
+ @categories = []
38
+ @category_hash = {}
39
+ end
40
+
41
+ # Find a given category in the categories.
42
+ #
43
+ def [] category_name
44
+ category_name = category_name.to_sym
45
+ category_hash[category_name] || raise_not_found(category_name)
46
+ end
47
+ def raise_not_found category_name
48
+ raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
49
+ end
50
+
51
+ # Add the given category to the list of categories.
52
+ #
53
+ def << category
54
+ categories << category
55
+ category_hash[category.name] = category
56
+ end
57
+
58
+ def to_s
59
+ categories.join(', ')
60
+ end
61
+
62
+ end
@@ -0,0 +1,93 @@
1
+ class Categories
2
+
3
+ attr_reader :ignore_unassigned_tokens
4
+
5
+ each_delegate :load_from_cache,
6
+ :analyze,
7
+ :to => :categories
8
+
9
+ # Return all possible combinations for the given token.
10
+ #
11
+ # This checks if it needs to also search through similar
12
+ # tokens, if for example, the token is one with ~.
13
+ # If yes, it puts together all solutions.
14
+ #
15
+ def possible_combinations_for token
16
+ token.similar? ? similar_possible_for(token) : possible_for(token)
17
+ end
18
+ # Gets all similar tokens and puts together the possible combinations
19
+ # for each found similar token.
20
+ #
21
+ def similar_possible_for token
22
+ # Get as many tokens as necessary
23
+ #
24
+ tokens = similar_tokens_for token
25
+ # possible combinations
26
+ #
27
+ inject_possible_for tokens
28
+ end
29
+ def similar_tokens_for token
30
+ text = token.text
31
+ categories.inject([]) do |result, category|
32
+ next_token = token
33
+ # Note: We could also break off here if not all the available
34
+ # similars are needed.
35
+ # Wait for a concrete case that needs this before taking
36
+ # action.
37
+ #
38
+ while next_token = next_token.next_similar_token(category)
39
+ result << next_token if next_token && next_token.text != text
40
+ end
41
+ result
42
+ end
43
+ end
44
+ def inject_possible_for tokens
45
+ tokens.inject([]) do |result, token|
46
+ possible = possible_categories token
47
+ result + possible_for(token, possible)
48
+ end
49
+ end
50
+
51
+ # Returns possible Combinations for the token.
52
+ #
53
+ # Note: The preselected_categories param is an optimization.
54
+ #
55
+ # Note: Returns [] if no categories matched (will produce no result).
56
+ # Returns nil if this token needs to be removed from the query.
57
+ # (Also none of the categories matched, but the ignore unassigned
58
+ # tokens option is true)
59
+ #
60
+ def possible_for token, preselected_categories = nil
61
+ possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
62
+ combination = category.combination_for token
63
+ combination ? combinations << combination : combinations
64
+ end
65
+ # This is an optimization to mark tokens that are ignored.
66
+ #
67
+ return if ignore_unassigned_tokens && possible.empty?
68
+ possible # wrap in combinations
69
+ end
70
+ # This returns the possible categories for this token.
71
+ # If the user has already preselected a category for this token,
72
+ # like "artist:moby", if not just return all for the given token,
73
+ # since all are possible.
74
+ #
75
+ # Note: Once I thought this was called too often. But it is not (18.01.2011).
76
+ #
77
+ def possible_categories token
78
+ user_defined_categories(token) || categories
79
+ end
80
+ # This returns the array of categories if the user has defined
81
+ # an existing category.
82
+ #
83
+ # Note: Returns nil if the user did not define one
84
+ # or [] if he/she has defined a non-existing one.
85
+ #
86
+ def user_defined_categories token
87
+ names = token.user_defined_category_names
88
+ names && names.map do |name|
89
+ category_hash[name]
90
+ end.compact
91
+ end
92
+
93
+ end
@@ -0,0 +1,12 @@
1
+ class Categories
2
+
3
+ each_delegate :backup_caches,
4
+ :cache,
5
+ :check_caches,
6
+ :clear_caches,
7
+ :create_directory_structure,
8
+ :generate_caches,
9
+ :restore_caches,
10
+ :to => :categories
11
+
12
+ end
@@ -0,0 +1,127 @@
1
+ class Category
2
+
3
+ attr_reader :name,
4
+ :index
5
+
6
+ # Mandatory params:
7
+ # * name: Category name to use as identifier and file names.
8
+ # * index: Index to which this category is attached to.
9
+ #
10
+ # Options:
11
+ # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
12
+ # * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
13
+ # * from: The source category identifier to take the data from.
14
+ #
15
+ # Advanced Options:
16
+ # * source: Use if the category should use a different source.
17
+ # * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
18
+ # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
19
+ # * key_format: What this category's keys are formatted with (default is :to_i)
20
+ #
21
+ def initialize name, index, options = {}
22
+ @name = name
23
+ @index = index
24
+
25
+ # Indexing.
26
+ #
27
+ @source = options[:source]
28
+ @from = options[:from]
29
+ @tokenizer = options[:tokenizer]
30
+ @key_format = options[:key_format]
31
+
32
+ # TODO Push into Bundle. At least the weights.
33
+ #
34
+ partial = options[:partial] || Generators::Partial::Default
35
+ weights = options[:weights] || Generators::Weights::Default
36
+ similarity = options[:similarity] || Generators::Similarity::Default
37
+
38
+ @indexing_exact = index.indexing_bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
39
+ @indexing_partial = index.indexing_bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
40
+
41
+ # Indexed.
42
+ #
43
+ # TODO Push the defaults out into the index.
44
+ #
45
+ @partial_strategy = partial # TODO Duplicate work.
46
+
47
+ @indexed_exact = index.indexed_bundle_class.new :exact, self, similarity
48
+ @indexed_partial = index.indexed_bundle_class.new :partial, self, similarity
49
+
50
+ # @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
51
+ # @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
52
+
53
+ # TODO Extract? Yes.
54
+ #
55
+ Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
56
+ end
57
+
58
+ # Indexes and reloads the category.
59
+ #
60
+ def reindex
61
+ index
62
+ reload
63
+ end
64
+
65
+ # Category name.
66
+ #
67
+ def category_name
68
+ name
69
+ end
70
+
71
+ # Index name.
72
+ #
73
+ def index_name
74
+ @index.name
75
+ end
76
+
77
+ # Path and partial filename of a specific index on this category.
78
+ #
79
+ def index_path bundle_name, type
80
+ "#{index_directory}/#{name}_#{bundle_name}_#{type}"
81
+ end
82
+
83
+ # Path and partial filename of the prepared index on this category.
84
+ #
85
+ def prepared_index_path
86
+ @prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
87
+ end
88
+ def prepared_index_file &block
89
+ @prepared_index_file ||= Backend::File::Text.new prepared_index_path
90
+ @prepared_index_file.open_for_indexing &block
91
+ end
92
+
93
+ # The index directory for this category.
94
+ #
95
+ def index_directory
96
+ @index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
97
+ end
98
+
99
+ # Creates the index directory including all necessary paths above it.
100
+ #
101
+ def prepare_index_directory
102
+ FileUtils.mkdir_p index_directory
103
+ end
104
+
105
+ # Identifier for internal use.
106
+ #
107
+ # TODO What internal use?
108
+ #
109
+ def identifier
110
+ @identifier ||= "#{@index.name}:#{name}"
111
+ end
112
+
113
+ def to_info
114
+ <<-CATEGORY
115
+ Category(#{name}):
116
+ Exact:
117
+ #{exact.indented_to_s(4)}
118
+ Partial:
119
+ #{partial.indented_to_s(4)}
120
+ CATEGORY
121
+ end
122
+
123
+ def to_s
124
+ "Category(#{name})"
125
+ end
126
+
127
+ end
@@ -0,0 +1,64 @@
1
+ #
2
+ #
3
+ class Category
4
+
5
+ attr_reader :indexed_exact
6
+
7
+ # TODO Move to Index.
8
+ #
9
+ def generate_qualifiers_from options
10
+ options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
11
+ end
12
+
13
+ # Loads the index from cache.
14
+ #
15
+ def load_from_cache
16
+ timed_exclaim %Q{"#{identifier}": Loading index from cache.}
17
+ indexed_exact.load
18
+ indexed_partial.load
19
+ end
20
+ alias reload load_from_cache
21
+
22
+ # Loads, analyzes, and clears the index.
23
+ #
24
+ # Note: The idea is not to run this while the search engine is running.
25
+ #
26
+ def analyze collector
27
+ collector[identifier] = {
28
+ :exact => Analyzer.new.analyze(indexed_exact),
29
+ :partial => Analyzer.new.analyze(indexed_partial)
30
+ }
31
+ collector
32
+ end
33
+
34
+ # Gets the weight for this token's text.
35
+ #
36
+ def weight token
37
+ bundle_for(token).weight token.text
38
+ end
39
+
40
+ # Gets the ids for this token's text.
41
+ #
42
+ def ids token
43
+ bundle_for(token).ids token.text
44
+ end
45
+
46
+ # Returns the right index bundle for this token.
47
+ #
48
+ def bundle_for token
49
+ token.partial? ? indexed_partial : indexed_exact
50
+ end
51
+
52
+ # The partial strategy defines whether to really use the partial index.
53
+ #
54
+ def indexed_partial
55
+ @partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
56
+ end
57
+
58
+ #
59
+ #
60
+ def combination_for token
61
+ weight(token) && Query::Combination.new(token, self)
62
+ end
63
+
64
+ end
@@ -0,0 +1,145 @@
1
+ #
2
+ #
3
+ class Category
4
+
5
+ attr_reader :indexing_exact,
6
+ :indexing_partial
7
+
8
+ # Prepares and caches this category.
9
+ #
10
+ # This one should be used by users.
11
+ #
12
+ def index
13
+ prepare
14
+ cache
15
+ end
16
+
17
+ # Return an appropriate source.
18
+ #
19
+ def source
20
+ @source || @index.source
21
+ end
22
+
23
+ # Return the key format.
24
+ #
25
+ # If the source has no key format, then
26
+ # check for an explicit key format, and
27
+ # if none is defined, ask the index for
28
+ # one.
29
+ #
30
+ def key_format
31
+ source.respond_to?(:key_format) && source.key_format || @key_format || @index.key_format
32
+ end
33
+
34
+ # Where the data is taken from.
35
+ #
36
+ def from
37
+ @from || name
38
+ end
39
+
40
+ # The indexer is lazily generated and cached.
41
+ #
42
+ def indexer
43
+ @indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
44
+ end
45
+
46
+ # TODO This is a hack to get the parallel indexer working.
47
+ #
48
+ def categories
49
+ [self]
50
+ end
51
+
52
+ # Returns an appropriate tokenizer.
53
+ # If one isn't set on this category, will try the index,
54
+ # and finally the default index tokenizer.
55
+ #
56
+ def tokenizer
57
+ @tokenizer || @index.tokenizer || Tokenizers::Index.default
58
+ end
59
+
60
+ # Backup the caches.
61
+ # (Revert with restore_caches)
62
+ #
63
+ def backup_caches
64
+ timed_exclaim "Backing up #{identifier}."
65
+ indexing_exact.backup
66
+ indexing_partial.backup
67
+ end
68
+
69
+ # Restore the caches.
70
+ # (Revert with backup_caches)
71
+ #
72
+ def restore_caches
73
+ timed_exclaim "Restoring #{identifier}."
74
+ indexing_exact.restore
75
+ indexing_partial.restore
76
+ end
77
+
78
+ # Checks the caches for existence.
79
+ #
80
+ def check_caches
81
+ timed_exclaim "Checking #{identifier}."
82
+ indexing_exact.raise_unless_cache_exists
83
+ indexing_partial.raise_unless_cache_exists
84
+ end
85
+
86
+ # Deletes the caches.
87
+ #
88
+ def clear_caches
89
+ timed_exclaim "Deleting #{identifier}."
90
+ indexing_exact.delete
91
+ indexing_partial.delete
92
+ end
93
+
94
+ # We need to set what formatting method should be used.
95
+ # Uses the one defined in the indexer.
96
+ #
97
+ # TODO Make this more dynamic.
98
+ #
99
+ def configure
100
+ indexing_exact[:key_format] = self.key_format
101
+ indexing_partial[:key_format] = self.key_format
102
+ end
103
+
104
+ # Indexes, creates the "prepared_..." file.
105
+ #
106
+ # TODO This step could already prepare the id (if a
107
+ # per category key_format is not really needed).
108
+ #
109
+ def prepare
110
+ prepare_index_directory
111
+ indexer.index
112
+ end
113
+
114
+ # Generates all caches for this category.
115
+ #
116
+ def cache
117
+ prepare_index_directory
118
+ generate_caches
119
+ end
120
+
121
+ # Generate the cache data.
122
+ #
123
+ def generate_caches
124
+ configure
125
+ generate_caches_from_source
126
+ generate_partial
127
+ generate_caches_from_memory
128
+ dump_caches
129
+ timed_exclaim %Q{"#{identifier}": Caching finished.}
130
+ end
131
+ def generate_caches_from_source
132
+ indexing_exact.generate_caches_from_source
133
+ end
134
+ def generate_partial
135
+ indexing_partial.generate_partial_from indexing_exact.index
136
+ end
137
+ def generate_caches_from_memory
138
+ indexing_partial.generate_caches_from_memory
139
+ end
140
+ def dump_caches
141
+ indexing_exact.dump
142
+ indexing_partial.dump
143
+ end
144
+
145
+ end
File without changes
File without changes
@@ -0,0 +1,11 @@
1
+ # The original Class class.
2
+ #
3
+ class Class # :nodoc:all
4
+
5
+ def instance_delegate *methods
6
+ methods.each do |method|
7
+ module_eval("def self.#{method}(*args, &block)\nself.instance.__send__(#{method.inspect}, *args, &block)\nend\n", "(__DELEGATION__)", 1)
8
+ end
9
+ end
10
+
11
+ end