picky 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -1,65 +0,0 @@
1
- module Internals
2
-
3
- # TODO Merge into Base, extract common with Indexed::Base.
4
- #
5
- module Indexing # :nodoc:all
6
- # A Bundle is a number of indexes
7
- # per [index, category] combination.
8
- #
9
- # At most, there are three indexes:
10
- # * *core* index (always used)
11
- # * *weights* index (always used)
12
- # * *similarity* index (used with similarity)
13
- #
14
- # In Picky, indexing is separated from the index
15
- # handling itself through a parallel structure.
16
- #
17
- # Both use methods provided by this base class, but
18
- # have very different goals:
19
- #
20
- # * *Indexing*::*Bundle* is just concerned with creating index files
21
- # and providing helper functions to e.g. check the indexes.
22
- #
23
- # * *Index*::*Bundle* is concerned with loading these index files into
24
- # memory and looking up search data as fast as possible.
25
- #
26
- module Bundle
27
-
28
- class SuperBase
29
-
30
- attr_reader :identifier, :files
31
- attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
32
-
33
- delegate :clear, :to => :index
34
- delegate :[], :[]=, :to => :configuration
35
-
36
- def initialize name, category, similarity_strategy
37
- @identifier = "#{category.identifier}:#{name}"
38
- @files = Internals::Index::Files.new name, category
39
-
40
- @index = {}
41
- @weights = {}
42
- @similarity = {}
43
- @configuration = {} # A hash with config options.
44
-
45
- @similarity_strategy = similarity_strategy
46
- end
47
-
48
- # Get a list of similar texts.
49
- #
50
- # Note: Does not return itself.
51
- #
52
- def similar text
53
- code = similarity_strategy.encoded text
54
- similar_codes = code && @similarity[code]
55
- similar_codes.delete text if similar_codes
56
- similar_codes || []
57
- end
58
-
59
- end
60
-
61
- end
62
-
63
- end
64
-
65
- end
@@ -1,153 +0,0 @@
1
- module Internals
2
-
3
- module Indexing
4
-
5
- class Category
6
-
7
- include Internals::Shared::Category
8
-
9
- attr_reader :name, :index, :exact, :partial
10
-
11
- # Mandatory params:
12
- # * name: Category name to use as identifier and file names.
13
- # * index: Index to which this category is attached to.
14
- #
15
- # Options:
16
- # * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
17
- # * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
18
- # * from: The source category identifier to take the data from.
19
- #
20
- # Advanced Options:
21
- # * source: Use if the category should use a different source.
22
- # * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
23
- # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
24
- # * key_format: What this category's keys are formatted with (default is :to_i)
25
- #
26
- def initialize name, index, options = {}
27
- @name = name
28
- @index = index
29
-
30
- @source = options[:source]
31
- @from = options[:from]
32
- @tokenizer = options[:tokenizer]
33
- @key_format = options[:key_format]
34
-
35
- # TODO Push into Bundle. At least the weights.
36
- #
37
- partial = options[:partial] || Generators::Partial::Default
38
- weights = options[:weights] || Generators::Weights::Default
39
- similarity = options[:similarity] || Generators::Similarity::Default
40
-
41
- bundle_class = index.bundle_class || Bundle::Memory
42
-
43
- @exact = bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
44
- @partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
45
- end
46
-
47
- # Return an appropriate source.
48
- #
49
- def source
50
- @source || @index.source
51
- end
52
- # Return the key format.
53
- #
54
- # If the source has no key format, then
55
- # check for an explicit key format, and
56
- # if none is defined, ask the index for
57
- # one.
58
- #
59
- def key_format
60
- source.respond_to?(:key_format) && source.key_format || @key_format || index.key_format
61
- end
62
- # The indexer is lazily generated and cached.
63
- #
64
- def indexer
65
- @indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
66
- end
67
- # TODO This is a hack to get the parallel indexer working.
68
- #
69
- def categories
70
- [self]
71
- end
72
- # Returns an appropriate tokenizer.
73
- # If one isn't set on this category, will try the index,
74
- # and finally the default index tokenizer.
75
- #
76
- def tokenizer
77
- @tokenizer || @index.tokenizer || Tokenizers::Index.default
78
- end
79
-
80
- # Where the data is taken from.
81
- #
82
- def from
83
- @from || name
84
- end
85
-
86
- def backup_caches
87
- timed_exclaim "Backing up #{identifier}."
88
- exact.backup
89
- partial.backup
90
- end
91
- def restore_caches
92
- timed_exclaim "Restoring #{identifier}."
93
- exact.restore
94
- partial.restore
95
- end
96
- def check_caches
97
- timed_exclaim "Checking #{identifier}."
98
- exact.raise_unless_cache_exists
99
- partial.raise_unless_cache_exists
100
- end
101
- def clear_caches
102
- timed_exclaim "Deleting #{identifier}."
103
- exact.delete
104
- partial.delete
105
- end
106
-
107
- # Indexes, creates the "prepared_..." file.
108
- #
109
- def index!
110
- prepare_index_directory
111
- indexer.index
112
- end
113
-
114
- # Generates all caches for this category.
115
- #
116
- def cache!
117
- prepare_index_directory
118
- generate_caches
119
- end
120
- # We need to set what formatting method should be used.
121
- # Uses the one defined in the indexer.
122
- #
123
- def configure
124
- exact[:key_format] = self.key_format
125
- partial[:key_format] = self.key_format
126
- end
127
- def generate_caches
128
- configure
129
- generate_caches_from_source
130
- generate_partial
131
- generate_caches_from_memory
132
- dump_caches
133
- timed_exclaim %Q{"#{identifier}": Caching finished.}
134
- end
135
- def generate_caches_from_source
136
- exact.generate_caches_from_source
137
- end
138
- def generate_partial
139
- partial.generate_partial_from exact.index
140
- end
141
- def generate_caches_from_memory
142
- partial.generate_caches_from_memory
143
- end
144
- def dump_caches
145
- exact.dump
146
- partial.dump
147
- end
148
-
149
- end
150
-
151
- end
152
-
153
- end
@@ -1,142 +0,0 @@
1
- # TODO Move to the API.
2
- #
3
- module Internals
4
-
5
- module Indexing
6
-
7
- class Index
8
-
9
- attr_reader :name, :categories, :after_indexing, :bundle_class, :tokenizer
10
-
11
- # Delegators for indexing.
12
- #
13
- delegate :connect_backend,
14
- :to => :source
15
-
16
- each_delegate :backup_caches,
17
- :cache!,
18
- :check_caches,
19
- :clear_caches,
20
- :create_directory_structure,
21
- :generate_caches,
22
- :restore_caches,
23
- :to => :categories
24
-
25
- def initialize name, options = {}
26
- @name = name
27
- @source = options[:source]
28
- @after_indexing = options[:after_indexing]
29
- @bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
30
- @tokenizer = options[:tokenizer]
31
- @key_format = options[:key_format]
32
-
33
- @categories = []
34
- end
35
-
36
- # TODO Spec. Doc.
37
- #
38
- def define_category category_name, options = {}
39
- new_category = Category.new category_name, self, options
40
- new_category = yield new_category if block_given?
41
- categories << new_category
42
- new_category
43
- end
44
-
45
- # TODO Spec. Doc.
46
- #
47
- def define_indexing options = {}
48
- @tokenizer = Internals::Tokenizers::Index.new options
49
- end
50
-
51
- #
52
- #
53
- def define_source source
54
- @source = source
55
- end
56
- def source
57
- @source || raise_no_source
58
- end
59
- def raise_no_source
60
- raise NoSourceSpecifiedException.new(<<-NO_SOURCE
61
-
62
-
63
- No source given for index #{name}. An index needs a source.
64
- Example:
65
- Index::Memory.new(:with_source) do
66
- source Sources::CSV.new(:title, file: 'data/books.csv')
67
- category :title
68
- category :author
69
- end
70
-
71
- NO_SOURCE
72
- )
73
- end
74
-
75
- #
76
- #
77
- def define_key_format key_format
78
- @key_format = key_format
79
- end
80
- def key_format
81
- @key_format || :to_i
82
- end
83
-
84
- #
85
- #
86
- def find category_name
87
- category_name = category_name.to_sym
88
-
89
- categories.each do |category|
90
- next unless category.name == category_name
91
- return category
92
- end
93
-
94
- raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
95
- end
96
-
97
- # Decides whether to use a parallel indexer or whether to
98
- # delegate to each category to index themselves.
99
- #
100
- def index!
101
- # TODO Duplicated in category.rb def indexer.
102
- #
103
- if source.respond_to?(:each)
104
- warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
105
- index_parallel
106
- else
107
- categories.each &:index!
108
- end
109
- end
110
- # Indexes the categories in parallel.
111
- #
112
- # Only use where the category does not have a non-#each source defined.
113
- #
114
- def index_parallel
115
- indexer = Indexers::Parallel.new self
116
- categories.first.prepare_index_directory # TODO Unnice.
117
- indexer.index
118
- end
119
-
120
- # Indexing.
121
- #
122
- # Note: If it is an each source we do not take a snapshot.
123
- #
124
- def take_snapshot
125
- source.take_snapshot self unless source.respond_to? :each
126
- end
127
-
128
- #
129
- #
130
- def to_s
131
- <<-INDEX
132
- Indexing(#{name}):
133
- #{"source: #{source}".indented_to_s}
134
- #{"Categories:\n#{categories.indented_to_s}".indented_to_s}
135
- INDEX
136
- end
137
-
138
- end
139
-
140
- end
141
-
142
- end
@@ -1,27 +0,0 @@
1
- module Internals
2
- module Indexing
3
- module Wrappers
4
- module Category
5
-
6
- module Location
7
-
8
- def self.install_on category, grid, precision = 1
9
- new_source = Sources::Wrappers::Location.new category.source, grid, precision
10
-
11
- category.class_eval do
12
- def tokenizer
13
- @tokenizer ||= Internals::Tokenizers::Index.new
14
- end
15
- define_method :source do
16
- new_source
17
- end
18
- end
19
-
20
- end
21
-
22
- end
23
-
24
- end
25
- end
26
- end
27
- end
@@ -1,88 +0,0 @@
1
- module Internals
2
-
3
- module Query
4
-
5
- # An allocation has a number of combinations:
6
- # [token, index] [other_token, other_index], ...
7
- #
8
- class Allocation # :nodoc:all
9
-
10
- attr_reader :count, :ids, :score, :combinations, :result_identifier
11
-
12
- #
13
- #
14
- def initialize combinations, result_identifier
15
- @combinations = combinations
16
- @result_identifier = result_identifier
17
- end
18
-
19
- def hash
20
- @combinations.hash
21
- end
22
- def eql? other_allocation
23
- true # FIXME
24
- # @combinations.eql? other_allocation.combinations
25
- end
26
-
27
- # Scores its combinations and caches the result.
28
- #
29
- def calculate_score weights
30
- @score ||= @combinations.calculate_score(weights)
31
- end
32
-
33
- # Asks the combinations for the (intersected) ids.
34
- #
35
- def calculate_ids amount, offset
36
- @combinations.ids amount, offset # Calculate as many ids as are necessary.
37
- end
38
-
39
- # This starts the searching process.
40
- #
41
- def process! amount, offset
42
- ids = calculate_ids amount, offset
43
- @count = ids.size # cache the count before throwing away the ids
44
- @ids = ids.slice!(offset, amount) || [] # slice out the relevant part
45
- end
46
-
47
- #
48
- #
49
- def keep identifiers = [] # categories
50
- @combinations.keep identifiers
51
- end
52
- #
53
- #
54
- def remove identifiers = [] # categories
55
- @combinations.remove identifiers
56
- end
57
-
58
- # Sort highest score first.
59
- #
60
- def <=> other_allocation
61
- other_allocation.score <=> self.score
62
- end
63
-
64
- # Transform the allocation into result form.
65
- #
66
- def to_result
67
- [self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
68
- end
69
-
70
- # Json representation of this allocation.
71
- #
72
- # Note: Delegates to to_result.
73
- #
74
- def to_json
75
- to_result.to_json
76
- end
77
-
78
- #
79
- #
80
- def to_s
81
- "Allocation: #{to_result.join(', ')}"
82
- end
83
-
84
- end
85
-
86
- end
87
-
88
- end
@@ -1,118 +0,0 @@
1
- module Internals
2
-
3
- module Query
4
- # Container class for allocations.
5
- #
6
- class Allocations # :nodoc:all
7
-
8
- delegate :each, :inject, :empty?, :size, :to => :@allocations
9
- attr_reader :total
10
-
11
- def initialize allocations = []
12
- @allocations = allocations
13
- end
14
-
15
- # Score each allocation.
16
- #
17
- def calculate_score weights
18
- @allocations.each do |allocation|
19
- allocation.calculate_score weights
20
- end
21
- end
22
- # Sort the allocations.
23
- #
24
- def sort!
25
- @allocations.sort!
26
- end
27
-
28
- # Reduces the amount of allocations to x.
29
- #
30
- def reduce_to amount
31
- @allocations = @allocations.shift amount
32
- end
33
-
34
- # Keeps combinations.
35
- #
36
- # Only those passed in remain.
37
- #
38
- def keep identifiers = []
39
- @allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
40
- end
41
- # Removes combinations.
42
- #
43
- # Only those passed in are removed.
44
- #
45
- def remove identifiers = []
46
- @allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
47
- end
48
-
49
- # Returns the top amount ids.
50
- #
51
- def ids amount = 20
52
- @allocations.inject([]) do |total, allocation|
53
- total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
54
- end
55
- end
56
-
57
- # This is the main method of this class that will replace ids and count.
58
- #
59
- # What it does is calculate the ids and counts of its allocations
60
- # for being used in the results. It also calculates the total
61
- #
62
- # Parameters:
63
- # * amount: the amount of ids to calculate
64
- # * offset: the offset from where in the result set to take the ids
65
- #
66
- # Note: With an amount of 0, an offset > 0 doesn't make much
67
- # sense, as seen in the live search.
68
- #
69
- # Note: Each allocation caches its count, but not its ids (thrown away).
70
- # The ids are cached in this class.
71
- #
72
- # Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
73
- #
74
- def process! amount, offset = 0
75
- @total = 0
76
- current_offset = 0
77
- @allocations.each do |allocation|
78
- ids = allocation.process! amount, offset
79
- @total = @total + allocation.count # the total mixed in
80
- if ids.empty?
81
- offset = offset - allocation.count unless offset.zero?
82
- else
83
- amount = amount - ids.size # we need less results from the following allocation
84
- offset = 0 # we have already passed the offset
85
- end
86
- end
87
- end
88
-
89
- def uniq
90
- @allocations.uniq!
91
- end
92
-
93
- def to_a
94
- @allocations
95
- end
96
-
97
- # Simply inspects the internal allocations.
98
- #
99
- def to_s
100
- @allocations.inspect
101
- end
102
-
103
- # Allocations for results are in the form:
104
- # [
105
- # allocation1.to_result,
106
- # allocation2.to_result
107
- # ...
108
- # ]
109
- #
110
- def to_result
111
- @allocations.map(&:to_result).compact
112
- end
113
-
114
- end
115
-
116
- end
117
-
118
- end
@@ -1,80 +0,0 @@
1
- module Internals
2
-
3
- module Query
4
-
5
- # Describes the combination of a token (the text) and
6
- # the index (the bundle): [text, index_bundle]
7
- #
8
- # A combination is a single part of an allocation:
9
- # [..., [text2, index_bundle2], ...]
10
- #
11
- # An allocation consists of a number of combinations:
12
- # [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
13
- #
14
- class Combination # :nodoc:all
15
-
16
- attr_reader :token, :bundle, :category_name
17
-
18
- def initialize token, category
19
- @token = token
20
- @category_name = category.name
21
- @bundle = category.bundle_for token
22
- @text = @token.text # don't want to use reset_similar already
23
- end
24
-
25
- # Note: Required for uniq!
26
- #
27
- def hash
28
- [@token.to_s, @bundle].hash
29
- end
30
-
31
- # Returns the weight of this combination.
32
- #
33
- # Note: Caching is most of the time useful.
34
- #
35
- def weight
36
- @weight ||= @bundle.weight(@text)
37
- end
38
-
39
- # Returns an array of ids for the given text.
40
- #
41
- # Note: Caching is most of the time useful.
42
- #
43
- def ids
44
- @ids ||= @bundle.ids(@text)
45
- end
46
-
47
- # The identifier for this combination.
48
- #
49
- def identifier
50
- "#{bundle.identifier}:#{@token.identifier}"
51
- end
52
-
53
- # Is the identifier in the given identifiers?
54
- #
55
- def in? identifiers
56
- identifiers.include? identifier
57
- end
58
-
59
- # Combines the category names with the original names.
60
- # [
61
- # [:title, 'Flarbl', :flarbl],
62
- # [:category, 'Gnorf', :gnorf]
63
- # ]
64
- #
65
- def to_result
66
- [@category_name, *@token.to_result]
67
- end
68
-
69
- # Example:
70
- # "exact title:Peter*:peter"
71
- #
72
- def to_s
73
- "#{bundle.identifier} #{to_result.join(':')}"
74
- end
75
-
76
- end
77
-
78
- end
79
-
80
- end