picky 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -1,65 +0,0 @@
1
- module Internals
2
-
3
- # TODO Merge into Base, extract common with Indexed::Base.
4
- #
5
- module Indexing # :nodoc:all
6
- # A Bundle is a number of indexes
7
- # per [index, category] combination.
8
- #
9
- # At most, there are three indexes:
10
- # * *core* index (always used)
11
- # * *weights* index (always used)
12
- # * *similarity* index (used with similarity)
13
- #
14
- # In Picky, indexing is separated from the index
15
- # handling itself through a parallel structure.
16
- #
17
- # Both use methods provided by this base class, but
18
- # have very different goals:
19
- #
20
- # * *Indexing*::*Bundle* is just concerned with creating index files
21
- # and providing helper functions to e.g. check the indexes.
22
- #
23
- # * *Index*::*Bundle* is concerned with loading these index files into
24
- # memory and looking up search data as fast as possible.
25
- #
26
- module Bundle
27
-
28
- class SuperBase
29
-
30
- attr_reader :identifier, :files
31
- attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
32
-
33
- delegate :clear, :to => :index
34
- delegate :[], :[]=, :to => :configuration
35
-
36
- def initialize name, category, similarity_strategy
37
- @identifier = "#{category.identifier}:#{name}"
38
- @files = Internals::Index::Files.new name, category
39
-
40
- @index = {}
41
- @weights = {}
42
- @similarity = {}
43
- @configuration = {} # A hash with config options.
44
-
45
- @similarity_strategy = similarity_strategy
46
- end
47
-
48
- # Get a list of similar texts.
49
- #
50
- # Note: Does not return itself.
51
- #
52
- def similar text
53
- code = similarity_strategy.encoded text
54
- similar_codes = code && @similarity[code]
55
- similar_codes.delete text if similar_codes
56
- similar_codes || []
57
- end
58
-
59
- end
60
-
61
- end
62
-
63
- end
64
-
65
- end
@@ -1,153 +0,0 @@
1
- module Internals
2
-
3
- module Indexing
4
-
5
- class Category
6
-
7
- include Internals::Shared::Category
8
-
9
- attr_reader :name, :index, :exact, :partial
10
-
11
- # Mandatory params:
12
- # * name: Category name to use as identifier and file names.
13
- # * index: Index to which this category is attached to.
14
- #
15
- # Options:
16
- # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
17
- # * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
18
- # * from: The source category identifier to take the data from.
19
- #
20
- # Advanced Options:
21
- # * source: Use if the category should use a different source.
22
- # * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
23
- # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
24
- # * key_format: What this category's keys are formatted with (default is :to_i)
25
- #
26
- def initialize name, index, options = {}
27
- @name = name
28
- @index = index
29
-
30
- @source = options[:source]
31
- @from = options[:from]
32
- @tokenizer = options[:tokenizer]
33
- @key_format = options[:key_format]
34
-
35
- # TODO Push into Bundle. At least the weights.
36
- #
37
- partial = options[:partial] || Generators::Partial::Default
38
- weights = options[:weights] || Generators::Weights::Default
39
- similarity = options[:similarity] || Generators::Similarity::Default
40
-
41
- bundle_class = index.bundle_class || Bundle::Memory
42
-
43
- @exact = bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
44
- @partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
45
- end
46
-
47
- # Return an appropriate source.
48
- #
49
- def source
50
- @source || @index.source
51
- end
52
- # Return the key format.
53
- #
54
- # If the source has no key format, then
55
- # check for an explicit key format, and
56
- # if none is defined, ask the index for
57
- # one.
58
- #
59
- def key_format
60
- source.respond_to?(:key_format) && source.key_format || @key_format || index.key_format
61
- end
62
- # The indexer is lazily generated and cached.
63
- #
64
- def indexer
65
- @indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
66
- end
67
- # TODO This is a hack to get the parallel indexer working.
68
- #
69
- def categories
70
- [self]
71
- end
72
- # Returns an appropriate tokenizer.
73
- # If one isn't set on this category, will try the index,
74
- # and finally the default index tokenizer.
75
- #
76
- def tokenizer
77
- @tokenizer || @index.tokenizer || Tokenizers::Index.default
78
- end
79
-
80
- # Where the data is taken from.
81
- #
82
- def from
83
- @from || name
84
- end
85
-
86
- def backup_caches
87
- timed_exclaim "Backing up #{identifier}."
88
- exact.backup
89
- partial.backup
90
- end
91
- def restore_caches
92
- timed_exclaim "Restoring #{identifier}."
93
- exact.restore
94
- partial.restore
95
- end
96
- def check_caches
97
- timed_exclaim "Checking #{identifier}."
98
- exact.raise_unless_cache_exists
99
- partial.raise_unless_cache_exists
100
- end
101
- def clear_caches
102
- timed_exclaim "Deleting #{identifier}."
103
- exact.delete
104
- partial.delete
105
- end
106
-
107
- # Indexes, creates the "prepared_..." file.
108
- #
109
- def index!
110
- prepare_index_directory
111
- indexer.index
112
- end
113
-
114
- # Generates all caches for this category.
115
- #
116
- def cache!
117
- prepare_index_directory
118
- generate_caches
119
- end
120
- # We need to set what formatting method should be used.
121
- # Uses the one defined in the indexer.
122
- #
123
- def configure
124
- exact[:key_format] = self.key_format
125
- partial[:key_format] = self.key_format
126
- end
127
- def generate_caches
128
- configure
129
- generate_caches_from_source
130
- generate_partial
131
- generate_caches_from_memory
132
- dump_caches
133
- timed_exclaim %Q{"#{identifier}": Caching finished.}
134
- end
135
- def generate_caches_from_source
136
- exact.generate_caches_from_source
137
- end
138
- def generate_partial
139
- partial.generate_partial_from exact.index
140
- end
141
- def generate_caches_from_memory
142
- partial.generate_caches_from_memory
143
- end
144
- def dump_caches
145
- exact.dump
146
- partial.dump
147
- end
148
-
149
- end
150
-
151
- end
152
-
153
- end
@@ -1,142 +0,0 @@
1
- # TODO Move to the API.
2
- #
3
- module Internals
4
-
5
- module Indexing
6
-
7
- class Index
8
-
9
- attr_reader :name, :categories, :after_indexing, :bundle_class, :tokenizer
10
-
11
- # Delegators for indexing.
12
- #
13
- delegate :connect_backend,
14
- :to => :source
15
-
16
- each_delegate :backup_caches,
17
- :cache!,
18
- :check_caches,
19
- :clear_caches,
20
- :create_directory_structure,
21
- :generate_caches,
22
- :restore_caches,
23
- :to => :categories
24
-
25
- def initialize name, options = {}
26
- @name = name
27
- @source = options[:source]
28
- @after_indexing = options[:after_indexing]
29
- @bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
30
- @tokenizer = options[:tokenizer]
31
- @key_format = options[:key_format]
32
-
33
- @categories = []
34
- end
35
-
36
- # TODO Spec. Doc.
37
- #
38
- def define_category category_name, options = {}
39
- new_category = Category.new category_name, self, options
40
- new_category = yield new_category if block_given?
41
- categories << new_category
42
- new_category
43
- end
44
-
45
- # TODO Spec. Doc.
46
- #
47
- def define_indexing options = {}
48
- @tokenizer = Internals::Tokenizers::Index.new options
49
- end
50
-
51
- #
52
- #
53
- def define_source source
54
- @source = source
55
- end
56
- def source
57
- @source || raise_no_source
58
- end
59
- def raise_no_source
60
- raise NoSourceSpecifiedException.new(<<-NO_SOURCE
61
-
62
-
63
- No source given for index #{name}. An index needs a source.
64
- Example:
65
- Index::Memory.new(:with_source) do
66
- source Sources::CSV.new(:title, file: 'data/books.csv')
67
- category :title
68
- category :author
69
- end
70
-
71
- NO_SOURCE
72
- )
73
- end
74
-
75
- #
76
- #
77
- def define_key_format key_format
78
- @key_format = key_format
79
- end
80
- def key_format
81
- @key_format || :to_i
82
- end
83
-
84
- #
85
- #
86
- def find category_name
87
- category_name = category_name.to_sym
88
-
89
- categories.each do |category|
90
- next unless category.name == category_name
91
- return category
92
- end
93
-
94
- raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
95
- end
96
-
97
- # Decides whether to use a parallel indexer or whether to
98
- # delegate to each category to index themselves.
99
- #
100
- def index!
101
- # TODO Duplicated in category.rb def indexer.
102
- #
103
- if source.respond_to?(:each)
104
- warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
105
- index_parallel
106
- else
107
- categories.each &:index!
108
- end
109
- end
110
- # Indexes the categories in parallel.
111
- #
112
- # Only use where the category does not have a non-#each source defined.
113
- #
114
- def index_parallel
115
- indexer = Indexers::Parallel.new self
116
- categories.first.prepare_index_directory # TODO Unnice.
117
- indexer.index
118
- end
119
-
120
- # Indexing.
121
- #
122
- # Note: If it is an each source we do not take a snapshot.
123
- #
124
- def take_snapshot
125
- source.take_snapshot self unless source.respond_to? :each
126
- end
127
-
128
- #
129
- #
130
- def to_s
131
- <<-INDEX
132
- Indexing(#{name}):
133
- #{"source: #{source}".indented_to_s}
134
- #{"Categories:\n#{categories.indented_to_s}".indented_to_s}
135
- INDEX
136
- end
137
-
138
- end
139
-
140
- end
141
-
142
- end
@@ -1,27 +0,0 @@
1
- module Internals
2
- module Indexing
3
- module Wrappers
4
- module Category
5
-
6
- module Location
7
-
8
- def self.install_on category, grid, precision = 1
9
- new_source = Sources::Wrappers::Location.new category.source, grid, precision
10
-
11
- category.class_eval do
12
- def tokenizer
13
- @tokenizer ||= Internals::Tokenizers::Index.new
14
- end
15
- define_method :source do
16
- new_source
17
- end
18
- end
19
-
20
- end
21
-
22
- end
23
-
24
- end
25
- end
26
- end
27
- end
@@ -1,88 +0,0 @@
1
- module Internals
2
-
3
- module Query
4
-
5
- # An allocation has a number of combinations:
6
- # [token, index] [other_token, other_index], ...
7
- #
8
- class Allocation # :nodoc:all
9
-
10
- attr_reader :count, :ids, :score, :combinations, :result_identifier
11
-
12
- #
13
- #
14
- def initialize combinations, result_identifier
15
- @combinations = combinations
16
- @result_identifier = result_identifier
17
- end
18
-
19
- def hash
20
- @combinations.hash
21
- end
22
- def eql? other_allocation
23
- true # FIXME
24
- # @combinations.eql? other_allocation.combinations
25
- end
26
-
27
- # Scores its combinations and caches the result.
28
- #
29
- def calculate_score weights
30
- @score ||= @combinations.calculate_score(weights)
31
- end
32
-
33
- # Asks the combinations for the (intersected) ids.
34
- #
35
- def calculate_ids amount, offset
36
- @combinations.ids amount, offset # Calculate as many ids as are necessary.
37
- end
38
-
39
- # This starts the searching process.
40
- #
41
- def process! amount, offset
42
- ids = calculate_ids amount, offset
43
- @count = ids.size # cache the count before throwing away the ids
44
- @ids = ids.slice!(offset, amount) || [] # slice out the relevant part
45
- end
46
-
47
- #
48
- #
49
- def keep identifiers = [] # categories
50
- @combinations.keep identifiers
51
- end
52
- #
53
- #
54
- def remove identifiers = [] # categories
55
- @combinations.remove identifiers
56
- end
57
-
58
- # Sort highest score first.
59
- #
60
- def <=> other_allocation
61
- other_allocation.score <=> self.score
62
- end
63
-
64
- # Transform the allocation into result form.
65
- #
66
- def to_result
67
- [self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
68
- end
69
-
70
- # Json representation of this allocation.
71
- #
72
- # Note: Delegates to to_result.
73
- #
74
- def to_json
75
- to_result.to_json
76
- end
77
-
78
- #
79
- #
80
- def to_s
81
- "Allocation: #{to_result.join(', ')}"
82
- end
83
-
84
- end
85
-
86
- end
87
-
88
- end
@@ -1,118 +0,0 @@
1
- module Internals
2
-
3
- module Query
4
- # Container class for allocations.
5
- #
6
- class Allocations # :nodoc:all
7
-
8
- delegate :each, :inject, :empty?, :size, :to => :@allocations
9
- attr_reader :total
10
-
11
- def initialize allocations = []
12
- @allocations = allocations
13
- end
14
-
15
- # Score each allocation.
16
- #
17
- def calculate_score weights
18
- @allocations.each do |allocation|
19
- allocation.calculate_score weights
20
- end
21
- end
22
- # Sort the allocations.
23
- #
24
- def sort!
25
- @allocations.sort!
26
- end
27
-
28
- # Reduces the amount of allocations to x.
29
- #
30
- def reduce_to amount
31
- @allocations = @allocations.shift amount
32
- end
33
-
34
- # Keeps combinations.
35
- #
36
- # Only those passed in remain.
37
- #
38
- def keep identifiers = []
39
- @allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
40
- end
41
- # Removes combinations.
42
- #
43
- # Only those passed in are removed.
44
- #
45
- def remove identifiers = []
46
- @allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
47
- end
48
-
49
- # Returns the top amount ids.
50
- #
51
- def ids amount = 20
52
- @allocations.inject([]) do |total, allocation|
53
- total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
54
- end
55
- end
56
-
57
- # This is the main method of this class that will replace ids and count.
58
- #
59
- # What it does is calculate the ids and counts of its allocations
60
- # for being used in the results. It also calculates the total
61
- #
62
- # Parameters:
63
- # * amount: the amount of ids to calculate
64
- # * offset: the offset from where in the result set to take the ids
65
- #
66
- # Note: With an amount of 0, an offset > 0 doesn't make much
67
- # sense, as seen in the live search.
68
- #
69
- # Note: Each allocation caches its count, but not its ids (thrown away).
70
- # The ids are cached in this class.
71
- #
72
- # Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
73
- #
74
- def process! amount, offset = 0
75
- @total = 0
76
- current_offset = 0
77
- @allocations.each do |allocation|
78
- ids = allocation.process! amount, offset
79
- @total = @total + allocation.count # the total mixed in
80
- if ids.empty?
81
- offset = offset - allocation.count unless offset.zero?
82
- else
83
- amount = amount - ids.size # we need less results from the following allocation
84
- offset = 0 # we have already passed the offset
85
- end
86
- end
87
- end
88
-
89
- def uniq
90
- @allocations.uniq!
91
- end
92
-
93
- def to_a
94
- @allocations
95
- end
96
-
97
- # Simply inspects the internal allocations.
98
- #
99
- def to_s
100
- @allocations.inspect
101
- end
102
-
103
- # Allocations for results are in the form:
104
- # [
105
- # allocation1.to_result,
106
- # allocation2.to_result
107
- # ...
108
- # ]
109
- #
110
- def to_result
111
- @allocations.map(&:to_result).compact
112
- end
113
-
114
- end
115
-
116
- end
117
-
118
- end
@@ -1,80 +0,0 @@
1
- module Internals
2
-
3
- module Query
4
-
5
- # Describes the combination of a token (the text) and
6
- # the index (the bundle): [text, index_bundle]
7
- #
8
- # A combination is a single part of an allocation:
9
- # [..., [text2, index_bundle2], ...]
10
- #
11
- # An allocation consists of a number of combinations:
12
- # [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
13
- #
14
- class Combination # :nodoc:all
15
-
16
- attr_reader :token, :bundle, :category_name
17
-
18
- def initialize token, category
19
- @token = token
20
- @category_name = category.name
21
- @bundle = category.bundle_for token
22
- @text = @token.text # don't want to use reset_similar already
23
- end
24
-
25
- # Note: Required for uniq!
26
- #
27
- def hash
28
- [@token.to_s, @bundle].hash
29
- end
30
-
31
- # Returns the weight of this combination.
32
- #
33
- # Note: Caching is most of the time useful.
34
- #
35
- def weight
36
- @weight ||= @bundle.weight(@text)
37
- end
38
-
39
- # Returns an array of ids for the given text.
40
- #
41
- # Note: Caching is most of the time useful.
42
- #
43
- def ids
44
- @ids ||= @bundle.ids(@text)
45
- end
46
-
47
- # The identifier for this combination.
48
- #
49
- def identifier
50
- "#{bundle.identifier}:#{@token.identifier}"
51
- end
52
-
53
- # Is the identifier in the given identifiers?
54
- #
55
- def in? identifiers
56
- identifiers.include? identifier
57
- end
58
-
59
- # Combines the category names with the original names.
60
- # [
61
- # [:title, 'Flarbl', :flarbl],
62
- # [:category, 'Gnorf', :gnorf]
63
- # ]
64
- #
65
- def to_result
66
- [@category_name, *@token.to_result]
67
- end
68
-
69
- # Example:
70
- # "exact title:Peter*:peter"
71
- #
72
- def to_s
73
- "#{bundle.identifier} #{to_result.join(':')}"
74
- end
75
-
76
- end
77
-
78
- end
79
-
80
- end