picky 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -1,85 +0,0 @@
1
- module Indexed
2
-
3
- # An index category holds a exact and a partial index for a given category.
4
- #
5
- # For example an index category for names holds a exact and
6
- # a partial index bundle for names.
7
- #
8
- class Category
9
-
10
- attr_accessor :exact
11
- attr_reader :identifier, :name
12
- attr_writer :partial
13
-
14
- #
15
- #
16
- def initialize name, index, options = {}
17
- @name = name
18
-
19
- configuration = Configuration::Index.new index, self
20
-
21
- @identifier = configuration.identifier
22
-
23
- # TODO Push the defaults out into the index.
24
- #
25
- @partial_strategy = options[:partial] || Cacher::Partial::Default
26
- similarity = options[:similarity] || Cacher::Similarity::Default
27
-
28
- @exact = options[:exact_index_bundle] || Bundle.new(:exact, configuration, similarity)
29
- @partial = options[:partial_index_bundle] || Bundle.new(:partial, configuration, similarity)
30
-
31
- @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
32
- @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
33
-
34
- # TODO Extract?
35
- #
36
- Query::Qualifiers.add(configuration.category_name, generate_qualifiers_from(options) || [name])
37
- end
38
-
39
- # TODO Move to Index.
40
- #
41
- def generate_qualifiers_from options
42
- options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
43
- end
44
-
45
- # Loads the index from cache.
46
- #
47
- def load_from_cache
48
- timed_exclaim "Loading index #{identifier}."
49
- exact.load
50
- partial.load
51
- end
52
-
53
- # Gets the weight for this token's text.
54
- #
55
- def weight token
56
- bundle_for(token).weight token.text
57
- end
58
-
59
- # Gets the ids for this token's text.
60
- #
61
- def ids token
62
- bundle_for(token).ids token.text
63
- end
64
-
65
- # Returns the right index bundle for this token.
66
- #
67
- def bundle_for token
68
- token.partial?? partial : exact
69
- end
70
-
71
- # The partial strategy defines whether to really use the partial index.
72
- #
73
- def partial
74
- @partial_strategy.use_exact_for_partial?? @exact : @partial
75
- end
76
-
77
- #
78
- #
79
- def combination_for token
80
- weight(token) && ::Query::Combination.new(token, self)
81
- end
82
-
83
- end
84
-
85
- end
@@ -1,39 +0,0 @@
1
- module Indexed
2
-
3
- #
4
- #
5
- class Index
6
-
7
- attr_reader :name, :result_identifier, :combinator, :categories
8
-
9
- delegate :load_from_cache,
10
- :to => :categories
11
-
12
- def initialize name, options = {}
13
- @name = name
14
-
15
- @result_identifier = options[:result_identifier] || name
16
- ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
17
-
18
- @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
19
- end
20
-
21
- # TODO Doc.
22
- #
23
- def define_category category_name, options = {}
24
- new_category = Category.new category_name, self, options
25
- categories << new_category
26
- new_category
27
- end
28
-
29
- # Return the possible combinations for this token.
30
- #
31
- # A combination is a tuple <token, index_bundle>.
32
- #
33
- def possible_combinations token
34
- categories.possible_combinations_for token
35
- end
36
-
37
- end
38
-
39
- end
@@ -1,61 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Indexed
4
-
5
- # TODO Spec
6
- #
7
- module Wrappers
8
-
9
- # This index combines an exact and partial index.
10
- # It serves to order the results such that exact hits are found first.
11
- #
12
- # TODO Need to use the right subtokens. Bake in?
13
- #
14
- class ExactFirst < Bundle
15
-
16
- delegate :similar,
17
- :identifier,
18
- :name,
19
- :to => :@exact
20
- delegate :index,
21
- :category,
22
- :weight,
23
- :generate_partial_from,
24
- :generate_caches_from_memory,
25
- :generate_derived,
26
- :dump,
27
- :load,
28
- :to => :@partial
29
-
30
- def initialize category
31
- @exact = category.exact
32
- @partial = category.partial
33
- end
34
-
35
- def self.wrap index_or_category
36
- if index_or_category.respond_to? :categories
37
- wrap_each_of index_or_category.categories
38
- index_or_category
39
- else
40
- new index_or_category
41
- end
42
- end
43
- # TODO Do not extract categories!
44
- #
45
- def self.wrap_each_of categories
46
- categories.categories.collect! { |category| new(category) }
47
- end
48
-
49
- def ids text
50
- @exact.ids(text) + @partial.ids(text)
51
- end
52
-
53
- def weight text
54
- [@exact.weight(text) || 0, @partial.weight(text) || 0].max
55
- end
56
-
57
- end
58
-
59
- end
60
-
61
- end
@@ -1,213 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Indexing # :nodoc:all
4
-
5
- # This is the indexing bundle.
6
- # It does all menial tasks that have nothing to do
7
- # with the actual index running etc.
8
- #
9
- class Bundle < Index::Bundle
10
-
11
- attr_accessor :partial_strategy, :weights_strategy
12
- attr_reader :files
13
-
14
- # Path is in which directory the cache is located.
15
- #
16
- def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
17
- super name, configuration, similarity_strategy
18
-
19
- @partial_strategy = partial_strategy
20
- @weights_strategy = weights_strategy
21
- end
22
-
23
- # Generation
24
- #
25
-
26
- # This method
27
- # * loads the base index from the db
28
- # * generates derived indexes
29
- # * dumps all the indexes into files
30
- #
31
- def generate_caches_from_source
32
- load_from_index_file
33
- generate_caches_from_memory
34
- end
35
- # Generates derived indexes from the index and dumps.
36
- #
37
- # Note: assumes that there is something in the index
38
- #
39
- def generate_caches_from_memory
40
- cache_from_memory_generation_message
41
- generate_derived
42
- end
43
- def cache_from_memory_generation_message
44
- timed_exclaim "CACHE FROM MEMORY #{identifier}."
45
- end
46
-
47
- # Generates the weights and similarity from the main index.
48
- #
49
- def generate_derived
50
- generate_weights
51
- generate_similarity
52
- end
53
-
54
- # Load the data from the db.
55
- #
56
- def load_from_index_file
57
- load_from_index_generation_message
58
- clear
59
- retrieve
60
- end
61
- def load_from_index_generation_message
62
- timed_exclaim "LOAD INDEX #{identifier}."
63
- end
64
- # Retrieves the prepared index data into the index.
65
- #
66
- # This is in preparation for generating
67
- # derived indexes (like weights, similarity)
68
- # and later dumping the optimized index.
69
- #
70
- def retrieve
71
- key_format = self[:key_format] || :to_i
72
- files.retrieve do |id, token|
73
- initialize_index_for token
74
- index[token] << id.send(key_format)
75
- end
76
- end
77
- # Sets up a piece of the index for the given token.
78
- #
79
- def initialize_index_for token
80
- index[token] ||= []
81
- end
82
-
83
- # Generators.
84
- #
85
- # TODO Move somewhere more fitting.
86
- #
87
-
88
- # Generates a new index (writes its index) using the
89
- # partial caching strategy of this bundle.
90
- #
91
- def generate_partial
92
- generator = Cacher::PartialGenerator.new self.index
93
- self.index = generator.generate self.partial_strategy
94
- end
95
- # Generate a partial index from the given exact index.
96
- #
97
- def generate_partial_from exact_index
98
- timed_exclaim "PARTIAL GENERATE #{identifier}."
99
- self.index = exact_index
100
- self.generate_partial
101
- self
102
- end
103
- # Generates a new similarity index (writes its index) using the
104
- # given similarity caching strategy.
105
- #
106
- def generate_similarity
107
- generator = Cacher::SimilarityGenerator.new self.index
108
- self.similarity = generator.generate self.similarity_strategy
109
- end
110
- # Generates a new weights index (writes its index) using the
111
- # given weight caching strategy.
112
- #
113
- def generate_weights
114
- generator = Cacher::WeightsGenerator.new self.index
115
- self.weights = generator.generate self.weights_strategy
116
- end
117
-
118
- # Saves the indexes in a dump file.
119
- #
120
- def dump
121
- dump_index
122
- dump_similarity
123
- dump_weights
124
- dump_configuration
125
- end
126
- # Dumps the core index.
127
- #
128
- def dump_index
129
- timed_exclaim "DUMP INDEX #{identifier}."
130
- files.dump_index index
131
- end
132
- # Dumps the weights index.
133
- #
134
- def dump_weights
135
- timed_exclaim "DUMP WEIGHTS #{identifier}."
136
- files.dump_weights weights
137
- end
138
- # Dumps the similarity index.
139
- #
140
- def dump_similarity
141
- timed_exclaim "DUMP SIMILARITY #{identifier}."
142
- files.dump_similarity similarity
143
- end
144
- # Dumps the similarity index.
145
- #
146
- def dump_configuration
147
- timed_exclaim "DUMP CONFIGURATION #{identifier}."
148
- files.dump_configuration configuration
149
- end
150
-
151
- # Alerts the user if an index is missing.
152
- #
153
- def raise_unless_cache_exists
154
- raise_unless_index_exists
155
- raise_unless_similarity_exists
156
- end
157
- # Alerts the user if one of the necessary indexes
158
- # (core, weights) is missing.
159
- #
160
- def raise_unless_index_exists
161
- if partial_strategy.saved?
162
- warn_if_index_small
163
- raise_unless_index_ok
164
- end
165
- end
166
- # Alerts the user if the similarity
167
- # index is missing (given that it's used).
168
- #
169
- def raise_unless_similarity_exists
170
- if similarity_strategy.saved?
171
- warn_if_similarity_small
172
- raise_unless_similarity_ok
173
- end
174
- end
175
- # Warns the user if the similarity index is small.
176
- #
177
- def warn_if_similarity_small
178
- warn_cache_small :similarity if files.similarity_cache_small?
179
- end
180
- # Alerts the user if the similarity index is not there.
181
- #
182
- def raise_unless_similarity_ok
183
- raise_cache_missing :similarity unless files.similarity_cache_ok?
184
- end
185
-
186
- # TODO Spec on down.
187
- #
188
-
189
- # Warns the user if the core or weights indexes are small.
190
- #
191
- def warn_if_index_small
192
- warn_cache_small :index if files.index_cache_small?
193
- warn_cache_small :weights if files.weights_cache_small?
194
- end
195
- # Alerts the user if the core or weights indexes are not there.
196
- #
197
- def raise_unless_index_ok
198
- raise_cache_missing :index unless files.index_cache_ok?
199
- raise_cache_missing :weights unless files.weights_cache_ok?
200
- end
201
- # Outputs a warning for the given cache.
202
- #
203
- def warn_cache_small what
204
- puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
205
- end
206
- # Raises an appropriate error message for the given cache.
207
- #
208
- def raise_cache_missing what
209
- raise "#{what} cache for #{identifier} missing."
210
- end
211
-
212
- end
213
- end
@@ -1,38 +0,0 @@
1
- module Indexing
2
-
3
- class Categories
4
-
5
- attr_reader :categories
6
-
7
- each_delegate :index,
8
- :cache,
9
- :generate_caches,
10
- :backup_caches,
11
- :restore_caches,
12
- :check_caches,
13
- :clear_caches,
14
- :create_directory_structure,
15
- :to => :categories
16
-
17
- def initialize
18
- @categories = []
19
- end
20
-
21
- def << category
22
- categories << category
23
- end
24
-
25
- def find category_name
26
- category_name = category_name.to_sym
27
-
28
- categories.each do |category|
29
- next unless category.name == category_name
30
- return category
31
- end
32
-
33
- raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
34
- end
35
-
36
- end
37
-
38
- end
@@ -1,117 +0,0 @@
1
- module Indexing
2
-
3
- class Category
4
-
5
- attr_reader :exact, :partial, :name, :configuration, :indexer
6
-
7
- # Mandatory params:
8
- # * name: Category name to use as identifier and file names.
9
- # * index: Index to which this category is attached to.
10
- # Options:
11
- # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
12
- # * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
13
- # * source: Use if the category should use a different source.
14
- # * from: The source category identifier to take the data from.
15
- #
16
- # Advanced Options (TODO):
17
- #
18
- # * weights:
19
- # * tokenizer:
20
- # * exact_indexing_bundle:
21
- # * partial_indexing_bundle:
22
- #
23
- def initialize name, index, options = {}
24
- @name = name
25
- @from = options[:from]
26
-
27
- # Now we have enough info to combine the index and the category.
28
- #
29
- @configuration = Configuration::Index.new index, self
30
-
31
- @tokenizer = options[:tokenizer] || Tokenizers::Index.default
32
- @indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
33
-
34
- # TODO Push into Bundle.
35
- #
36
- partial = options[:partial] || Cacher::Partial::Default
37
- weights = options[:weights] || Cacher::Weights::Default
38
- similarity = options[:similarity] || Cacher::Similarity::Default
39
-
40
- @exact = options[:exact_indexing_bundle] || Bundle.new(:exact, configuration, similarity, Cacher::Partial::None.new, weights)
41
- @partial = options[:partial_indexing_bundle] || Bundle.new(:partial, configuration, Cacher::Similarity::None.new, partial, weights)
42
- end
43
-
44
- delegate :identifier, :prepare_index_directory, :to => :configuration
45
- delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
46
-
47
- def from
48
- @from || name
49
- end
50
-
51
- # TODO Spec.
52
- #
53
- def backup_caches
54
- timed_exclaim "Backing up #{identifier}."
55
- exact.backup
56
- partial.backup
57
- end
58
- def restore_caches
59
- timed_exclaim "Restoring #{identifier}."
60
- exact.restore
61
- partial.restore
62
- end
63
- def check_caches
64
- timed_exclaim "Checking #{identifier}."
65
- exact.raise_unless_cache_exists
66
- partial.raise_unless_cache_exists
67
- end
68
- def clear_caches
69
- timed_exclaim "Deleting #{identifier}."
70
- exact.delete
71
- partial.delete
72
- end
73
-
74
- def index
75
- prepare_index_directory
76
- indexer.index
77
- end
78
-
79
- # Generates all caches for this category.
80
- #
81
- def cache
82
- prepare_index_directory
83
- configure
84
- generate_caches
85
- end
86
- # We need to set what formatting method should be used.
87
- # Uses the one defined in the indexer.
88
- #
89
- def configure
90
- key_format = indexer.key_format
91
- exact[:key_format] = key_format
92
- partial[:key_format] = key_format
93
- end
94
- def generate_caches
95
- generate_caches_from_source
96
- generate_partial
97
- generate_caches_from_memory
98
- dump_caches
99
- timed_exclaim "CACHE FINISHED #{identifier}."
100
- end
101
- def generate_caches_from_source
102
- exact.generate_caches_from_source
103
- end
104
- def generate_partial
105
- partial.generate_partial_from exact.index
106
- end
107
- def generate_caches_from_memory
108
- partial.generate_caches_from_memory
109
- end
110
- def dump_caches
111
- exact.dump
112
- partial.dump
113
- end
114
-
115
- end
116
-
117
- end
@@ -1,55 +0,0 @@
1
- module Indexing
2
-
3
- class Index
4
-
5
- attr_reader :name, :source, :categories, :after_indexing
6
-
7
- # Delegators for indexing.
8
- #
9
- delegate :connect_backend,
10
- :to => :source
11
-
12
- delegate :index,
13
- :cache,
14
- :generate_caches,
15
- :backup_caches,
16
- :restore_caches,
17
- :check_caches,
18
- :clear_caches,
19
- :create_directory_structure,
20
- :to => :categories
21
-
22
- def initialize name, source, options = {}
23
- @name = name
24
- @source = source
25
-
26
- @after_indexing = options[:after_indexing]
27
-
28
- @categories = Categories.new
29
- end
30
-
31
- # TODO Spec. Doc.
32
- #
33
- def define_category category_name, options = {}
34
- options = default_category_options.merge options
35
-
36
- new_category = Category.new category_name, self, options
37
- categories << new_category
38
- new_category
39
- end
40
-
41
- # By default, the category uses the index's source.
42
- #
43
- def default_category_options
44
- { :source => @source }
45
- end
46
-
47
- # Indexing.
48
- #
49
- def take_snapshot
50
- source.take_snapshot self
51
- end
52
-
53
- end
54
-
55
- end
@@ -1,82 +0,0 @@
1
- module Query
2
- # An allocation has a number of combinations:
3
- # [token, index] [other_token, other_index], ...
4
- #
5
- class Allocation # :nodoc:all
6
-
7
- attr_reader :count, :ids, :score, :combinations, :result_identifier
8
-
9
- #
10
- #
11
- def initialize combinations, result_identifier
12
- @combinations = combinations
13
- @result_identifier = result_identifier
14
- end
15
-
16
- def hash
17
- @combinations.hash
18
- end
19
- def eql? other_allocation
20
- true # FIXME
21
- # @combinations.eql? other_allocation.combinations
22
- end
23
-
24
- # Scores its combinations and caches the result.
25
- #
26
- def calculate_score weights
27
- @score ||= @combinations.calculate_score(weights)
28
- end
29
-
30
- # Asks the combinations for the (intersected) ids.
31
- #
32
- def calculate_ids
33
- @combinations.ids
34
- end
35
-
36
- # This starts the searching process.
37
- #
38
- def process! amount, offset
39
- ids = calculate_ids
40
- @count = ids.size # cache the count before throwing away the ids
41
- @ids = ids.slice!(offset, amount) || [] # slice out the relevant part
42
- end
43
-
44
- #
45
- #
46
- def keep identifiers = [] # categories
47
- @combinations.keep identifiers
48
- end
49
- #
50
- #
51
- def remove identifiers = [] # categories
52
- @combinations.remove identifiers
53
- end
54
-
55
- # Sort highest score first.
56
- #
57
- def <=> other_allocation
58
- other_allocation.score <=> self.score
59
- end
60
-
61
- # Transform the allocation into result form.
62
- #
63
- def to_result
64
- [self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
65
- end
66
-
67
- # Json representation of this allocation.
68
- #
69
- # Note: Delegates to to_result.
70
- #
71
- def to_json
72
- to_result.to_json
73
- end
74
-
75
- #
76
- #
77
- def to_s
78
- "Allocation: #{to_result.join(', ')}"
79
- end
80
-
81
- end
82
- end