picky 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,40 @@
1
+ module Backend
2
+
3
+ class Redis
4
+
5
+ class StringHash < Basic
6
+
7
+ # Writes the hash into Redis.
8
+ #
9
+ # Note: We could use multi, but it did not help.
10
+ #
11
+ def dump hash
12
+ clear
13
+ hash.each_pair do |key, value|
14
+ backend.hset namespace, key, value
15
+ end
16
+ end
17
+
18
+ # Clears the hash.
19
+ #
20
+ def clear
21
+ backend.del namespace
22
+ end
23
+
24
+ # Get a collection.
25
+ #
26
+ def collection sym
27
+ raise "Can't retrieve a collection from a StringHash. Use Index::Redis::ListHash."
28
+ end
29
+
30
+ # Get a single value.
31
+ #
32
+ def member sym
33
+ backend.hget namespace, sym
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,40 @@
1
+ module Backend
2
+
3
+ # TODO Needs a reconnect to be run after forking.
4
+ #
5
+ class Redis < Backend
6
+
7
+ def initialize bundle_name, category
8
+ super bundle_name, category
9
+
10
+ # Refine a few Redis "types".
11
+ #
12
+ @index = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:index"
13
+ @weights = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:weights"
14
+ @similarity = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:similarity"
15
+ @configuration = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:configuration"
16
+ end
17
+
18
+ # Delegate to the right collection.
19
+ #
20
+ def ids sym
21
+ index.collection sym
22
+ end
23
+
24
+ # Delegate to the right member value.
25
+ #
26
+ # Note: Converts to float.
27
+ #
28
+ def weight sym
29
+ weights.member(sym).to_f
30
+ end
31
+
32
+ # Delegate to a member value.
33
+ #
34
+ def setting sym
35
+ configuration.member sym
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,57 @@
1
+ module Calculations # :nodoc:all
2
+
3
+ # A location calculation recalculates a 1-d location
4
+ # to the Picky internal 1-d "grid".
5
+ #
6
+ # For example, if you have a location x == 12.3456,
7
+ # it will be recalculated into 3, if the minimum is 9
8
+ # and the gridlength is 1.
9
+ #
10
+ class Location
11
+
12
+ attr_reader :minimum, :precision, :grid
13
+
14
+ def initialize user_grid, precision = nil
15
+ @user_grid = user_grid
16
+ @precision = precision || 1
17
+ @grid = @user_grid / (@precision + 0.5)
18
+ end
19
+
20
+ def minimum= minimum
21
+ # Add a margin of 1 user grid.
22
+ #
23
+ minimum -= @user_grid
24
+
25
+ # Add plus 1 grid so that the index key never falls on 0.
26
+ # Why? to_i maps by default to 0.
27
+ #
28
+ minimum -= @grid
29
+
30
+ @minimum = minimum
31
+ end
32
+
33
+ #
34
+ #
35
+ def add_margin length
36
+ @minimum -= length
37
+ end
38
+
39
+ #
40
+ #
41
+ def recalculated_range location
42
+ range recalculate(location)
43
+ end
44
+ #
45
+ #
46
+ def range around_location
47
+ (around_location - @precision)..(around_location + @precision)
48
+ end
49
+ #
50
+ #
51
+ def recalculate location
52
+ ((location - @minimum) / @grid).floor
53
+ end
54
+
55
+ end
56
+
57
+ end
@@ -0,0 +1,62 @@
1
+ class Categories
2
+
3
+ attr_reader :categories, :category_hash
4
+
5
+ delegate :each,
6
+ :first,
7
+ :map,
8
+ :to => :categories
9
+
10
+ each_delegate :reindex,
11
+ :to => :categories
12
+
13
+ # A list of indexed categories.
14
+ #
15
+ # Options:
16
+ # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
17
+ # The default behaviour is that if a token does not match to
18
+ # any category, the query will not return anything (since a
19
+ # single token cannot be matched). If you set this option to
20
+ # true, any token that cannot be matched to a category will be
21
+ # simply ignored.
22
+ # Use this if only a few matched words are important, like for
23
+ # example of the query "Jonathan Myers 86455 Las Cucarachas"
24
+ # you only want to match the zipcode, to have the search engine
25
+ # display advertisements on the side for the zipcode.
26
+ # Nifty! :)
27
+ #
28
+ def initialize options = {}
29
+ clear
30
+
31
+ @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
32
+ end
33
+
34
+ # Clears both the array of categories and the hash of categories.
35
+ #
36
+ def clear
37
+ @categories = []
38
+ @category_hash = {}
39
+ end
40
+
41
+ # Find a given category in the categories.
42
+ #
43
+ def [] category_name
44
+ category_name = category_name.to_sym
45
+ category_hash[category_name] || raise_not_found(category_name)
46
+ end
47
+ def raise_not_found category_name
48
+ raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
49
+ end
50
+
51
+ # Add the given category to the list of categories.
52
+ #
53
+ def << category
54
+ categories << category
55
+ category_hash[category.name] = category
56
+ end
57
+
58
+ def to_s
59
+ categories.join(', ')
60
+ end
61
+
62
+ end
@@ -0,0 +1,93 @@
1
+ class Categories
2
+
3
+ attr_reader :ignore_unassigned_tokens
4
+
5
+ each_delegate :load_from_cache,
6
+ :analyze,
7
+ :to => :categories
8
+
9
+ # Return all possible combinations for the given token.
10
+ #
11
+ # This checks if it needs to also search through similar
12
+ # tokens, if for example, the token is one with ~.
13
+ # If yes, it puts together all solutions.
14
+ #
15
+ def possible_combinations_for token
16
+ token.similar? ? similar_possible_for(token) : possible_for(token)
17
+ end
18
+ # Gets all similar tokens and puts together the possible combinations
19
+ # for each found similar token.
20
+ #
21
+ def similar_possible_for token
22
+ # Get as many tokens as necessary
23
+ #
24
+ tokens = similar_tokens_for token
25
+ # possible combinations
26
+ #
27
+ inject_possible_for tokens
28
+ end
29
+ def similar_tokens_for token
30
+ text = token.text
31
+ categories.inject([]) do |result, category|
32
+ next_token = token
33
+ # Note: We could also break off here if not all the available
34
+ # similars are needed.
35
+ # Wait for a concrete case that needs this before taking
36
+ # action.
37
+ #
38
+ while next_token = next_token.next_similar_token(category)
39
+ result << next_token if next_token && next_token.text != text
40
+ end
41
+ result
42
+ end
43
+ end
44
+ def inject_possible_for tokens
45
+ tokens.inject([]) do |result, token|
46
+ possible = possible_categories token
47
+ result + possible_for(token, possible)
48
+ end
49
+ end
50
+
51
+ # Returns possible Combinations for the token.
52
+ #
53
+ # Note: The preselected_categories param is an optimization.
54
+ #
55
+ # Note: Returns [] if no categories matched (will produce no result).
56
+ # Returns nil if this token needs to be removed from the query.
57
+ # (Also none of the categories matched, but the ignore unassigned
58
+ # tokens option is true)
59
+ #
60
+ def possible_for token, preselected_categories = nil
61
+ possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
62
+ combination = category.combination_for token
63
+ combination ? combinations << combination : combinations
64
+ end
65
+ # This is an optimization to mark tokens that are ignored.
66
+ #
67
+ return if ignore_unassigned_tokens && possible.empty?
68
+ possible # wrap in combinations
69
+ end
70
+ # This returns the possible categories for this token.
71
+ # If the user has already preselected a category for this token,
72
+ # like "artist:moby", if not just return all for the given token,
73
+ # since all are possible.
74
+ #
75
+ # Note: Once I thought this was called too often. But it is not (18.01.2011).
76
+ #
77
+ def possible_categories token
78
+ user_defined_categories(token) || categories
79
+ end
80
+ # This returns the array of categories if the user has defined
81
+ # an existing category.
82
+ #
83
+ # Note: Returns nil if the user did not define one
84
+ # or [] if he/she has defined a non-existing one.
85
+ #
86
+ def user_defined_categories token
87
+ names = token.user_defined_category_names
88
+ names && names.map do |name|
89
+ category_hash[name]
90
+ end.compact
91
+ end
92
+
93
+ end
@@ -0,0 +1,12 @@
1
+ class Categories
2
+
3
+ each_delegate :backup_caches,
4
+ :cache,
5
+ :check_caches,
6
+ :clear_caches,
7
+ :create_directory_structure,
8
+ :generate_caches,
9
+ :restore_caches,
10
+ :to => :categories
11
+
12
+ end
@@ -0,0 +1,127 @@
1
+ class Category
2
+
3
+ attr_reader :name,
4
+ :index
5
+
6
+ # Mandatory params:
7
+ # * name: Category name to use as identifier and file names.
8
+ # * index: Index to which this category is attached to.
9
+ #
10
+ # Options:
11
+ # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
12
+ # * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
13
+ # * from: The source category identifier to take the data from.
14
+ #
15
+ # Advanced Options:
16
+ # * source: Use if the category should use a different source.
17
+ # * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
18
+ # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
19
+ # * key_format: What this category's keys are formatted with (default is :to_i)
20
+ #
21
+ def initialize name, index, options = {}
22
+ @name = name
23
+ @index = index
24
+
25
+ # Indexing.
26
+ #
27
+ @source = options[:source]
28
+ @from = options[:from]
29
+ @tokenizer = options[:tokenizer]
30
+ @key_format = options[:key_format]
31
+
32
+ # TODO Push into Bundle. At least the weights.
33
+ #
34
+ partial = options[:partial] || Generators::Partial::Default
35
+ weights = options[:weights] || Generators::Weights::Default
36
+ similarity = options[:similarity] || Generators::Similarity::Default
37
+
38
+ @indexing_exact = index.indexing_bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
39
+ @indexing_partial = index.indexing_bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
40
+
41
+ # Indexed.
42
+ #
43
+ # TODO Push the defaults out into the index.
44
+ #
45
+ @partial_strategy = partial # TODO Duplicate work.
46
+
47
+ @indexed_exact = index.indexed_bundle_class.new :exact, self, similarity
48
+ @indexed_partial = index.indexed_bundle_class.new :partial, self, similarity
49
+
50
+ # @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
51
+ # @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
52
+
53
+ # TODO Extract? Yes.
54
+ #
55
+ Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
56
+ end
57
+
58
+ # Indexes and reloads the category.
59
+ #
60
+ def reindex
61
+ index
62
+ reload
63
+ end
64
+
65
+ # Category name.
66
+ #
67
+ def category_name
68
+ name
69
+ end
70
+
71
+ # Index name.
72
+ #
73
+ def index_name
74
+ @index.name
75
+ end
76
+
77
+ # Path and partial filename of a specific index on this category.
78
+ #
79
+ def index_path bundle_name, type
80
+ "#{index_directory}/#{name}_#{bundle_name}_#{type}"
81
+ end
82
+
83
+ # Path and partial filename of the prepared index on this category.
84
+ #
85
+ def prepared_index_path
86
+ @prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
87
+ end
88
+ def prepared_index_file &block
89
+ @prepared_index_file ||= Backend::File::Text.new prepared_index_path
90
+ @prepared_index_file.open_for_indexing &block
91
+ end
92
+
93
+ # The index directory for this category.
94
+ #
95
+ def index_directory
96
+ @index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
97
+ end
98
+
99
+ # Creates the index directory including all necessary paths above it.
100
+ #
101
+ def prepare_index_directory
102
+ FileUtils.mkdir_p index_directory
103
+ end
104
+
105
+ # Identifier for internal use.
106
+ #
107
+ # TODO What internal use?
108
+ #
109
+ def identifier
110
+ @identifier ||= "#{@index.name}:#{name}"
111
+ end
112
+
113
+ def to_info
114
+ <<-CATEGORY
115
+ Category(#{name}):
116
+ Exact:
117
+ #{exact.indented_to_s(4)}
118
+ Partial:
119
+ #{partial.indented_to_s(4)}
120
+ CATEGORY
121
+ end
122
+
123
+ def to_s
124
+ "Category(#{name})"
125
+ end
126
+
127
+ end
@@ -0,0 +1,64 @@
1
+ #
2
+ #
3
+ class Category
4
+
5
+ attr_reader :indexed_exact
6
+
7
+ # TODO Move to Index.
8
+ #
9
+ def generate_qualifiers_from options
10
+ options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
11
+ end
12
+
13
+ # Loads the index from cache.
14
+ #
15
+ def load_from_cache
16
+ timed_exclaim %Q{"#{identifier}": Loading index from cache.}
17
+ indexed_exact.load
18
+ indexed_partial.load
19
+ end
20
+ alias reload load_from_cache
21
+
22
+ # Loads, analyzes, and clears the index.
23
+ #
24
+ # Note: The idea is not to run this while the search engine is running.
25
+ #
26
+ def analyze collector
27
+ collector[identifier] = {
28
+ :exact => Analyzer.new.analyze(indexed_exact),
29
+ :partial => Analyzer.new.analyze(indexed_partial)
30
+ }
31
+ collector
32
+ end
33
+
34
+ # Gets the weight for this token's text.
35
+ #
36
+ def weight token
37
+ bundle_for(token).weight token.text
38
+ end
39
+
40
+ # Gets the ids for this token's text.
41
+ #
42
+ def ids token
43
+ bundle_for(token).ids token.text
44
+ end
45
+
46
+ # Returns the right index bundle for this token.
47
+ #
48
+ def bundle_for token
49
+ token.partial? ? indexed_partial : indexed_exact
50
+ end
51
+
52
+ # The partial strategy defines whether to really use the partial index.
53
+ #
54
+ def indexed_partial
55
+ @partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
56
+ end
57
+
58
+ #
59
+ #
60
+ def combination_for token
61
+ weight(token) && Query::Combination.new(token, self)
62
+ end
63
+
64
+ end
@@ -0,0 +1,145 @@
1
+ #
2
+ #
3
+ class Category
4
+
5
+ attr_reader :indexing_exact,
6
+ :indexing_partial
7
+
8
+ # Prepares and caches this category.
9
+ #
10
+ # This one should be used by users.
11
+ #
12
+ def index
13
+ prepare
14
+ cache
15
+ end
16
+
17
+ # Return an appropriate source.
18
+ #
19
+ def source
20
+ @source || @index.source
21
+ end
22
+
23
+ # Return the key format.
24
+ #
25
+ # If the source has no key format, then
26
+ # check for an explicit key format, and
27
+ # if none is defined, ask the index for
28
+ # one.
29
+ #
30
+ def key_format
31
+ source.respond_to?(:key_format) && source.key_format || @key_format || @index.key_format
32
+ end
33
+
34
+ # Where the data is taken from.
35
+ #
36
+ def from
37
+ @from || name
38
+ end
39
+
40
+ # The indexer is lazily generated and cached.
41
+ #
42
+ def indexer
43
+ @indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
44
+ end
45
+
46
+ # TODO This is a hack to get the parallel indexer working.
47
+ #
48
+ def categories
49
+ [self]
50
+ end
51
+
52
+ # Returns an appropriate tokenizer.
53
+ # If one isn't set on this category, will try the index,
54
+ # and finally the default index tokenizer.
55
+ #
56
+ def tokenizer
57
+ @tokenizer || @index.tokenizer || Tokenizers::Index.default
58
+ end
59
+
60
+ # Backup the caches.
61
+ # (Revert with restore_caches)
62
+ #
63
+ def backup_caches
64
+ timed_exclaim "Backing up #{identifier}."
65
+ indexing_exact.backup
66
+ indexing_partial.backup
67
+ end
68
+
69
+ # Restore the caches.
70
+ # (Revert with backup_caches)
71
+ #
72
+ def restore_caches
73
+ timed_exclaim "Restoring #{identifier}."
74
+ indexing_exact.restore
75
+ indexing_partial.restore
76
+ end
77
+
78
+ # Checks the caches for existence.
79
+ #
80
+ def check_caches
81
+ timed_exclaim "Checking #{identifier}."
82
+ indexing_exact.raise_unless_cache_exists
83
+ indexing_partial.raise_unless_cache_exists
84
+ end
85
+
86
+ # Deletes the caches.
87
+ #
88
+ def clear_caches
89
+ timed_exclaim "Deleting #{identifier}."
90
+ indexing_exact.delete
91
+ indexing_partial.delete
92
+ end
93
+
94
+ # We need to set what formatting method should be used.
95
+ # Uses the one defined in the indexer.
96
+ #
97
+ # TODO Make this more dynamic.
98
+ #
99
+ def configure
100
+ indexing_exact[:key_format] = self.key_format
101
+ indexing_partial[:key_format] = self.key_format
102
+ end
103
+
104
+ # Indexes, creates the "prepared_..." file.
105
+ #
106
+ # TODO This step could already prepare the id (if a
107
+ # per category key_format is not really needed).
108
+ #
109
+ def prepare
110
+ prepare_index_directory
111
+ indexer.index
112
+ end
113
+
114
+ # Generates all caches for this category.
115
+ #
116
+ def cache
117
+ prepare_index_directory
118
+ generate_caches
119
+ end
120
+
121
+ # Generate the cache data.
122
+ #
123
+ def generate_caches
124
+ configure
125
+ generate_caches_from_source
126
+ generate_partial
127
+ generate_caches_from_memory
128
+ dump_caches
129
+ timed_exclaim %Q{"#{identifier}": Caching finished.}
130
+ end
131
+ def generate_caches_from_source
132
+ indexing_exact.generate_caches_from_source
133
+ end
134
+ def generate_partial
135
+ indexing_partial.generate_partial_from indexing_exact.index
136
+ end
137
+ def generate_caches_from_memory
138
+ indexing_partial.generate_caches_from_memory
139
+ end
140
+ def dump_caches
141
+ indexing_exact.dump
142
+ indexing_partial.dump
143
+ end
144
+
145
+ end
File without changes
File without changes
@@ -0,0 +1,11 @@
1
+ # The original Class class.
2
+ #
3
+ class Class # :nodoc:all
4
+
5
+ def instance_delegate *methods
6
+ methods.each do |method|
7
+ module_eval("def self.#{method}(*args, &block)\nself.instance.__send__(#{method.inspect}, *args, &block)\nend\n", "(__DELEGATION__)", 1)
8
+ end
9
+ end
10
+
11
+ end