picky 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -1,85 +0,0 @@
1
- module Indexed
2
-
3
- # An index category holds a exact and a partial index for a given category.
4
- #
5
- # For example an index category for names holds a exact and
6
- # a partial index bundle for names.
7
- #
8
- class Category
9
-
10
- attr_accessor :exact
11
- attr_reader :identifier, :name
12
- attr_writer :partial
13
-
14
- #
15
- #
16
- def initialize name, index, options = {}
17
- @name = name
18
-
19
- configuration = Configuration::Index.new index, self
20
-
21
- @identifier = configuration.identifier
22
-
23
- # TODO Push the defaults out into the index.
24
- #
25
- @partial_strategy = options[:partial] || Cacher::Partial::Default
26
- similarity = options[:similarity] || Cacher::Similarity::Default
27
-
28
- @exact = options[:exact_index_bundle] || Bundle.new(:exact, configuration, similarity)
29
- @partial = options[:partial_index_bundle] || Bundle.new(:partial, configuration, similarity)
30
-
31
- @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
32
- @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
33
-
34
- # TODO Extract?
35
- #
36
- Query::Qualifiers.add(configuration.category_name, generate_qualifiers_from(options) || [name])
37
- end
38
-
39
- # TODO Move to Index.
40
- #
41
- def generate_qualifiers_from options
42
- options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
43
- end
44
-
45
- # Loads the index from cache.
46
- #
47
- def load_from_cache
48
- timed_exclaim "Loading index #{identifier}."
49
- exact.load
50
- partial.load
51
- end
52
-
53
- # Gets the weight for this token's text.
54
- #
55
- def weight token
56
- bundle_for(token).weight token.text
57
- end
58
-
59
- # Gets the ids for this token's text.
60
- #
61
- def ids token
62
- bundle_for(token).ids token.text
63
- end
64
-
65
- # Returns the right index bundle for this token.
66
- #
67
- def bundle_for token
68
- token.partial?? partial : exact
69
- end
70
-
71
- # The partial strategy defines whether to really use the partial index.
72
- #
73
- def partial
74
- @partial_strategy.use_exact_for_partial?? @exact : @partial
75
- end
76
-
77
- #
78
- #
79
- def combination_for token
80
- weight(token) && ::Query::Combination.new(token, self)
81
- end
82
-
83
- end
84
-
85
- end
@@ -1,39 +0,0 @@
1
- module Indexed
2
-
3
- #
4
- #
5
- class Index
6
-
7
- attr_reader :name, :result_identifier, :combinator, :categories
8
-
9
- delegate :load_from_cache,
10
- :to => :categories
11
-
12
- def initialize name, options = {}
13
- @name = name
14
-
15
- @result_identifier = options[:result_identifier] || name
16
- ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
17
-
18
- @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
19
- end
20
-
21
- # TODO Doc.
22
- #
23
- def define_category category_name, options = {}
24
- new_category = Category.new category_name, self, options
25
- categories << new_category
26
- new_category
27
- end
28
-
29
- # Return the possible combinations for this token.
30
- #
31
- # A combination is a tuple <token, index_bundle>.
32
- #
33
- def possible_combinations token
34
- categories.possible_combinations_for token
35
- end
36
-
37
- end
38
-
39
- end
@@ -1,61 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Indexed
4
-
5
- # TODO Spec
6
- #
7
- module Wrappers
8
-
9
- # This index combines an exact and partial index.
10
- # It serves to order the results such that exact hits are found first.
11
- #
12
- # TODO Need to use the right subtokens. Bake in?
13
- #
14
- class ExactFirst < Bundle
15
-
16
- delegate :similar,
17
- :identifier,
18
- :name,
19
- :to => :@exact
20
- delegate :index,
21
- :category,
22
- :weight,
23
- :generate_partial_from,
24
- :generate_caches_from_memory,
25
- :generate_derived,
26
- :dump,
27
- :load,
28
- :to => :@partial
29
-
30
- def initialize category
31
- @exact = category.exact
32
- @partial = category.partial
33
- end
34
-
35
- def self.wrap index_or_category
36
- if index_or_category.respond_to? :categories
37
- wrap_each_of index_or_category.categories
38
- index_or_category
39
- else
40
- new index_or_category
41
- end
42
- end
43
- # TODO Do not extract categories!
44
- #
45
- def self.wrap_each_of categories
46
- categories.categories.collect! { |category| new(category) }
47
- end
48
-
49
- def ids text
50
- @exact.ids(text) + @partial.ids(text)
51
- end
52
-
53
- def weight text
54
- [@exact.weight(text) || 0, @partial.weight(text) || 0].max
55
- end
56
-
57
- end
58
-
59
- end
60
-
61
- end
@@ -1,213 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Indexing # :nodoc:all
4
-
5
- # This is the indexing bundle.
6
- # It does all menial tasks that have nothing to do
7
- # with the actual index running etc.
8
- #
9
- class Bundle < Index::Bundle
10
-
11
- attr_accessor :partial_strategy, :weights_strategy
12
- attr_reader :files
13
-
14
- # Path is in which directory the cache is located.
15
- #
16
- def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
17
- super name, configuration, similarity_strategy
18
-
19
- @partial_strategy = partial_strategy
20
- @weights_strategy = weights_strategy
21
- end
22
-
23
- # Generation
24
- #
25
-
26
- # This method
27
- # * loads the base index from the db
28
- # * generates derived indexes
29
- # * dumps all the indexes into files
30
- #
31
- def generate_caches_from_source
32
- load_from_index_file
33
- generate_caches_from_memory
34
- end
35
- # Generates derived indexes from the index and dumps.
36
- #
37
- # Note: assumes that there is something in the index
38
- #
39
- def generate_caches_from_memory
40
- cache_from_memory_generation_message
41
- generate_derived
42
- end
43
- def cache_from_memory_generation_message
44
- timed_exclaim "CACHE FROM MEMORY #{identifier}."
45
- end
46
-
47
- # Generates the weights and similarity from the main index.
48
- #
49
- def generate_derived
50
- generate_weights
51
- generate_similarity
52
- end
53
-
54
- # Load the data from the db.
55
- #
56
- def load_from_index_file
57
- load_from_index_generation_message
58
- clear
59
- retrieve
60
- end
61
- def load_from_index_generation_message
62
- timed_exclaim "LOAD INDEX #{identifier}."
63
- end
64
- # Retrieves the prepared index data into the index.
65
- #
66
- # This is in preparation for generating
67
- # derived indexes (like weights, similarity)
68
- # and later dumping the optimized index.
69
- #
70
- def retrieve
71
- key_format = self[:key_format] || :to_i
72
- files.retrieve do |id, token|
73
- initialize_index_for token
74
- index[token] << id.send(key_format)
75
- end
76
- end
77
- # Sets up a piece of the index for the given token.
78
- #
79
- def initialize_index_for token
80
- index[token] ||= []
81
- end
82
-
83
- # Generators.
84
- #
85
- # TODO Move somewhere more fitting.
86
- #
87
-
88
- # Generates a new index (writes its index) using the
89
- # partial caching strategy of this bundle.
90
- #
91
- def generate_partial
92
- generator = Cacher::PartialGenerator.new self.index
93
- self.index = generator.generate self.partial_strategy
94
- end
95
- # Generate a partial index from the given exact index.
96
- #
97
- def generate_partial_from exact_index
98
- timed_exclaim "PARTIAL GENERATE #{identifier}."
99
- self.index = exact_index
100
- self.generate_partial
101
- self
102
- end
103
- # Generates a new similarity index (writes its index) using the
104
- # given similarity caching strategy.
105
- #
106
- def generate_similarity
107
- generator = Cacher::SimilarityGenerator.new self.index
108
- self.similarity = generator.generate self.similarity_strategy
109
- end
110
- # Generates a new weights index (writes its index) using the
111
- # given weight caching strategy.
112
- #
113
- def generate_weights
114
- generator = Cacher::WeightsGenerator.new self.index
115
- self.weights = generator.generate self.weights_strategy
116
- end
117
-
118
- # Saves the indexes in a dump file.
119
- #
120
- def dump
121
- dump_index
122
- dump_similarity
123
- dump_weights
124
- dump_configuration
125
- end
126
- # Dumps the core index.
127
- #
128
- def dump_index
129
- timed_exclaim "DUMP INDEX #{identifier}."
130
- files.dump_index index
131
- end
132
- # Dumps the weights index.
133
- #
134
- def dump_weights
135
- timed_exclaim "DUMP WEIGHTS #{identifier}."
136
- files.dump_weights weights
137
- end
138
- # Dumps the similarity index.
139
- #
140
- def dump_similarity
141
- timed_exclaim "DUMP SIMILARITY #{identifier}."
142
- files.dump_similarity similarity
143
- end
144
- # Dumps the similarity index.
145
- #
146
- def dump_configuration
147
- timed_exclaim "DUMP CONFIGURATION #{identifier}."
148
- files.dump_configuration configuration
149
- end
150
-
151
- # Alerts the user if an index is missing.
152
- #
153
- def raise_unless_cache_exists
154
- raise_unless_index_exists
155
- raise_unless_similarity_exists
156
- end
157
- # Alerts the user if one of the necessary indexes
158
- # (core, weights) is missing.
159
- #
160
- def raise_unless_index_exists
161
- if partial_strategy.saved?
162
- warn_if_index_small
163
- raise_unless_index_ok
164
- end
165
- end
166
- # Alerts the user if the similarity
167
- # index is missing (given that it's used).
168
- #
169
- def raise_unless_similarity_exists
170
- if similarity_strategy.saved?
171
- warn_if_similarity_small
172
- raise_unless_similarity_ok
173
- end
174
- end
175
- # Warns the user if the similarity index is small.
176
- #
177
- def warn_if_similarity_small
178
- warn_cache_small :similarity if files.similarity_cache_small?
179
- end
180
- # Alerts the user if the similarity index is not there.
181
- #
182
- def raise_unless_similarity_ok
183
- raise_cache_missing :similarity unless files.similarity_cache_ok?
184
- end
185
-
186
- # TODO Spec on down.
187
- #
188
-
189
- # Warns the user if the core or weights indexes are small.
190
- #
191
- def warn_if_index_small
192
- warn_cache_small :index if files.index_cache_small?
193
- warn_cache_small :weights if files.weights_cache_small?
194
- end
195
- # Alerts the user if the core or weights indexes are not there.
196
- #
197
- def raise_unless_index_ok
198
- raise_cache_missing :index unless files.index_cache_ok?
199
- raise_cache_missing :weights unless files.weights_cache_ok?
200
- end
201
- # Outputs a warning for the given cache.
202
- #
203
- def warn_cache_small what
204
- puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
205
- end
206
- # Raises an appropriate error message for the given cache.
207
- #
208
- def raise_cache_missing what
209
- raise "#{what} cache for #{identifier} missing."
210
- end
211
-
212
- end
213
- end
@@ -1,38 +0,0 @@
1
- module Indexing
2
-
3
- class Categories
4
-
5
- attr_reader :categories
6
-
7
- each_delegate :index,
8
- :cache,
9
- :generate_caches,
10
- :backup_caches,
11
- :restore_caches,
12
- :check_caches,
13
- :clear_caches,
14
- :create_directory_structure,
15
- :to => :categories
16
-
17
- def initialize
18
- @categories = []
19
- end
20
-
21
- def << category
22
- categories << category
23
- end
24
-
25
- def find category_name
26
- category_name = category_name.to_sym
27
-
28
- categories.each do |category|
29
- next unless category.name == category_name
30
- return category
31
- end
32
-
33
- raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
34
- end
35
-
36
- end
37
-
38
- end
@@ -1,117 +0,0 @@
1
- module Indexing
2
-
3
- class Category
4
-
5
- attr_reader :exact, :partial, :name, :configuration, :indexer
6
-
7
- # Mandatory params:
8
- # * name: Category name to use as identifier and file names.
9
- # * index: Index to which this category is attached to.
10
- # Options:
11
- # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
12
- # * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
13
- # * source: Use if the category should use a different source.
14
- # * from: The source category identifier to take the data from.
15
- #
16
- # Advanced Options (TODO):
17
- #
18
- # * weights:
19
- # * tokenizer:
20
- # * exact_indexing_bundle:
21
- # * partial_indexing_bundle:
22
- #
23
- def initialize name, index, options = {}
24
- @name = name
25
- @from = options[:from]
26
-
27
- # Now we have enough info to combine the index and the category.
28
- #
29
- @configuration = Configuration::Index.new index, self
30
-
31
- @tokenizer = options[:tokenizer] || Tokenizers::Index.default
32
- @indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
33
-
34
- # TODO Push into Bundle.
35
- #
36
- partial = options[:partial] || Cacher::Partial::Default
37
- weights = options[:weights] || Cacher::Weights::Default
38
- similarity = options[:similarity] || Cacher::Similarity::Default
39
-
40
- @exact = options[:exact_indexing_bundle] || Bundle.new(:exact, configuration, similarity, Cacher::Partial::None.new, weights)
41
- @partial = options[:partial_indexing_bundle] || Bundle.new(:partial, configuration, Cacher::Similarity::None.new, partial, weights)
42
- end
43
-
44
- delegate :identifier, :prepare_index_directory, :to => :configuration
45
- delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
46
-
47
- def from
48
- @from || name
49
- end
50
-
51
- # TODO Spec.
52
- #
53
- def backup_caches
54
- timed_exclaim "Backing up #{identifier}."
55
- exact.backup
56
- partial.backup
57
- end
58
- def restore_caches
59
- timed_exclaim "Restoring #{identifier}."
60
- exact.restore
61
- partial.restore
62
- end
63
- def check_caches
64
- timed_exclaim "Checking #{identifier}."
65
- exact.raise_unless_cache_exists
66
- partial.raise_unless_cache_exists
67
- end
68
- def clear_caches
69
- timed_exclaim "Deleting #{identifier}."
70
- exact.delete
71
- partial.delete
72
- end
73
-
74
- def index
75
- prepare_index_directory
76
- indexer.index
77
- end
78
-
79
- # Generates all caches for this category.
80
- #
81
- def cache
82
- prepare_index_directory
83
- configure
84
- generate_caches
85
- end
86
- # We need to set what formatting method should be used.
87
- # Uses the one defined in the indexer.
88
- #
89
- def configure
90
- key_format = indexer.key_format
91
- exact[:key_format] = key_format
92
- partial[:key_format] = key_format
93
- end
94
- def generate_caches
95
- generate_caches_from_source
96
- generate_partial
97
- generate_caches_from_memory
98
- dump_caches
99
- timed_exclaim "CACHE FINISHED #{identifier}."
100
- end
101
- def generate_caches_from_source
102
- exact.generate_caches_from_source
103
- end
104
- def generate_partial
105
- partial.generate_partial_from exact.index
106
- end
107
- def generate_caches_from_memory
108
- partial.generate_caches_from_memory
109
- end
110
- def dump_caches
111
- exact.dump
112
- partial.dump
113
- end
114
-
115
- end
116
-
117
- end
@@ -1,55 +0,0 @@
1
- module Indexing
2
-
3
- class Index
4
-
5
- attr_reader :name, :source, :categories, :after_indexing
6
-
7
- # Delegators for indexing.
8
- #
9
- delegate :connect_backend,
10
- :to => :source
11
-
12
- delegate :index,
13
- :cache,
14
- :generate_caches,
15
- :backup_caches,
16
- :restore_caches,
17
- :check_caches,
18
- :clear_caches,
19
- :create_directory_structure,
20
- :to => :categories
21
-
22
- def initialize name, source, options = {}
23
- @name = name
24
- @source = source
25
-
26
- @after_indexing = options[:after_indexing]
27
-
28
- @categories = Categories.new
29
- end
30
-
31
- # TODO Spec. Doc.
32
- #
33
- def define_category category_name, options = {}
34
- options = default_category_options.merge options
35
-
36
- new_category = Category.new category_name, self, options
37
- categories << new_category
38
- new_category
39
- end
40
-
41
- # By default, the category uses the index's source.
42
- #
43
- def default_category_options
44
- { :source => @source }
45
- end
46
-
47
- # Indexing.
48
- #
49
- def take_snapshot
50
- source.take_snapshot self
51
- end
52
-
53
- end
54
-
55
- end
@@ -1,82 +0,0 @@
1
- module Query
2
- # An allocation has a number of combinations:
3
- # [token, index] [other_token, other_index], ...
4
- #
5
- class Allocation # :nodoc:all
6
-
7
- attr_reader :count, :ids, :score, :combinations, :result_identifier
8
-
9
- #
10
- #
11
- def initialize combinations, result_identifier
12
- @combinations = combinations
13
- @result_identifier = result_identifier
14
- end
15
-
16
- def hash
17
- @combinations.hash
18
- end
19
- def eql? other_allocation
20
- true # FIXME
21
- # @combinations.eql? other_allocation.combinations
22
- end
23
-
24
- # Scores its combinations and caches the result.
25
- #
26
- def calculate_score weights
27
- @score ||= @combinations.calculate_score(weights)
28
- end
29
-
30
- # Asks the combinations for the (intersected) ids.
31
- #
32
- def calculate_ids
33
- @combinations.ids
34
- end
35
-
36
- # This starts the searching process.
37
- #
38
- def process! amount, offset
39
- ids = calculate_ids
40
- @count = ids.size # cache the count before throwing away the ids
41
- @ids = ids.slice!(offset, amount) || [] # slice out the relevant part
42
- end
43
-
44
- #
45
- #
46
- def keep identifiers = [] # categories
47
- @combinations.keep identifiers
48
- end
49
- #
50
- #
51
- def remove identifiers = [] # categories
52
- @combinations.remove identifiers
53
- end
54
-
55
- # Sort highest score first.
56
- #
57
- def <=> other_allocation
58
- other_allocation.score <=> self.score
59
- end
60
-
61
- # Transform the allocation into result form.
62
- #
63
- def to_result
64
- [self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
65
- end
66
-
67
- # Json representation of this allocation.
68
- #
69
- # Note: Delegates to to_result.
70
- #
71
- def to_json
72
- to_result.to_json
73
- end
74
-
75
- #
76
- #
77
- def to_s
78
- "Allocation: #{to_result.join(', ')}"
79
- end
80
-
81
- end
82
- end