picky 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,61 @@
1
+ # TODO Merge into Base, extract common with Indexed::Base.
2
+ #
3
+ module Indexing # :nodoc:all
4
+ # A Bundle is a number of indexes
5
+ # per [index, category] combination.
6
+ #
7
+ # At most, there are three indexes:
8
+ # * *core* index (always used)
9
+ # * *weights* index (always used)
10
+ # * *similarity* index (used with similarity)
11
+ #
12
+ # In Picky, indexing is separated from the index
13
+ # handling itself through a parallel structure.
14
+ #
15
+ # Both use methods provided by this base class, but
16
+ # have very different goals:
17
+ #
18
+ # * *Indexing*::*Bundle* is just concerned with creating index files
19
+ # and providing helper functions to e.g. check the indexes.
20
+ #
21
+ # * *Index*::*Bundle* is concerned with loading these index files into
22
+ # memory and looking up search data as fast as possible.
23
+ #
24
+ module Bundle
25
+
26
+ class SuperBase
27
+
28
+ attr_reader :identifier, :files
29
+ attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
30
+
31
+ delegate :clear, :to => :index
32
+ delegate :[], :[]=, :to => :configuration
33
+
34
+ def initialize name, category, similarity_strategy
35
+ @identifier = "#{category.identifier}:#{name}"
36
+ @files = Backend::Files.new name, category
37
+
38
+ @index = {}
39
+ @weights = {}
40
+ @similarity = {}
41
+ @configuration = {} # A hash with config options.
42
+
43
+ @similarity_strategy = similarity_strategy
44
+ end
45
+
46
+ # Get a list of similar texts.
47
+ #
48
+ # Note: Does not return itself.
49
+ #
50
+ def similar text
51
+ code = similarity_strategy.encoded text
52
+ similar_codes = code && @similarity[code]
53
+ similar_codes.delete text if similar_codes
54
+ similar_codes || []
55
+ end
56
+
57
+ end
58
+
59
+ end
60
+
61
+ end
@@ -0,0 +1,25 @@
1
+ module Indexing
2
+ module Wrappers
3
+ module Category
4
+
5
+ module Location
6
+
7
+ def self.install_on category, grid, precision = 1
8
+ new_source = Sources::Wrappers::Location.new category.source, grid, precision
9
+
10
+ category.class_eval do
11
+ def tokenizer
12
+ @tokenizer ||= Tokenizers::Index.new
13
+ end
14
+ define_method :source do
15
+ new_source
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -141,29 +141,29 @@ module Interfaces
141
141
  # TODO Move to Interface object.
142
142
  #
143
143
  def querying_removes_characters
144
- regexp = Internals::Tokenizers::Query.default.instance_variable_get :@removes_characters_regexp
144
+ regexp = Tokenizers::Query.default.instance_variable_get :@removes_characters_regexp
145
145
  regexp && regexp.source
146
146
  end
147
147
  def querying_removes_characters= new_value
148
- Internals::Tokenizers::Query.default.instance_variable_set(:@removes_characters_regexp, %r{#{new_value}})
148
+ Tokenizers::Query.default.instance_variable_set(:@removes_characters_regexp, %r{#{new_value}})
149
149
  end
150
150
  def querying_stopwords
151
- regexp = Internals::Tokenizers::Query.default.instance_variable_get :@remove_stopwords_regexp
151
+ regexp = Tokenizers::Query.default.instance_variable_get :@remove_stopwords_regexp
152
152
  regexp && regexp.source
153
153
  end
154
154
  def querying_stopwords= new_value
155
- Internals::Tokenizers::Query.default.instance_variable_set(:@remove_stopwords_regexp, %r{#{new_value}})
155
+ Tokenizers::Query.default.instance_variable_set(:@remove_stopwords_regexp, %r{#{new_value}})
156
156
  end
157
157
  def querying_splits_text_on
158
- splits = Internals::Tokenizers::Query.default.instance_variable_get :@splits_text_on
158
+ splits = Tokenizers::Query.default.instance_variable_get :@splits_text_on
159
159
  splits && splits.respond_to?(:source) ? splits.source : splits
160
160
  end
161
161
  def querying_splits_text_on= new_value
162
- splits = Internals::Tokenizers::Query.default.instance_variable_get :@splits_text_on
162
+ splits = Tokenizers::Query.default.instance_variable_get :@splits_text_on
163
163
  if splits.respond_to?(:source)
164
- Internals::Tokenizers::Query.default.instance_variable_set(:@splits_text_on, %r{#{new_value}})
164
+ Tokenizers::Query.default.instance_variable_set(:@splits_text_on, %r{#{new_value}})
165
165
  else
166
- Internals::Tokenizers::Query.default.instance_variable_set(:@splits_text_on, new_value)
166
+ Tokenizers::Query.default.instance_variable_set(:@splits_text_on, new_value)
167
167
  end
168
168
  end
169
169
 
data/lib/picky/loader.rb CHANGED
@@ -25,9 +25,6 @@ module Loader # :nodoc:all
25
25
  def self.load_relative filename_without_rb
26
26
  load File.join(File.dirname(__FILE__), "#{filename_without_rb}.rb")
27
27
  end
28
- def self.load_internals filename_without_rb
29
- load File.join(File.dirname(__FILE__), "internals/#{filename_without_rb}.rb")
30
- end
31
28
 
32
29
  def self.load_user filename
33
30
  load File.join(PICKY_ROOT, "#{filename}.rb")
@@ -76,7 +73,7 @@ module Loader # :nodoc:all
76
73
 
77
74
  # TODO Rewrite
78
75
  #
79
- Internals::Query::Qualifiers.instance.prepare
76
+ Query::Qualifiers.instance.prepare
80
77
 
81
78
  exclaim "Application #{Application.apps.map(&:name).join(', ')} loaded."
82
79
  end
@@ -85,143 +82,132 @@ module Loader # :nodoc:all
85
82
  # (Not for the user)
86
83
  #
87
84
  def self.load_framework_internals
88
- load_relative 'internals'
89
-
90
85
  # Load compiled C code.
91
86
  #
92
- load_internals 'ext/maybe_compile'
87
+ load_relative 'ext/maybe_compile'
93
88
 
94
89
  # Load extensions.
95
90
  #
96
- load_internals 'extensions/object'
97
- load_internals 'extensions/array'
98
- load_internals 'extensions/symbol'
99
- load_internals 'extensions/module'
100
- load_internals 'extensions/hash'
91
+ load_relative 'extensions/object'
92
+ load_relative 'extensions/array'
93
+ load_relative 'extensions/symbol'
94
+ load_relative 'extensions/module'
95
+ load_relative 'extensions/class'
96
+ load_relative 'extensions/hash'
101
97
 
102
98
  # Requiring Helpers
103
99
  #
104
- load_internals 'helpers/measuring'
100
+ load_relative 'helpers/measuring'
105
101
 
106
102
  # Calculations.
107
103
  #
108
- load_internals 'calculations/location'
104
+ load_relative 'calculations/location'
109
105
 
110
106
  # Index generation strategies.
111
107
  #
112
- load_internals 'indexers/base'
113
- load_internals 'indexers/serial'
114
- load_internals 'indexers/parallel'
108
+ load_relative 'indexers/base'
109
+ load_relative 'indexers/serial'
110
+ load_relative 'indexers/parallel'
115
111
 
116
112
  # Generators.
117
113
  #
118
- load_internals 'generators/strategy'
114
+ load_relative 'generators/strategy'
119
115
 
120
116
  # Partial index generation strategies.
121
117
  #
122
- load_internals 'generators/partial/strategy'
123
- load_internals 'generators/partial/none'
124
- load_internals 'generators/partial/substring'
125
- load_internals 'generators/partial/default'
118
+ load_relative 'generators/partial/strategy'
119
+ load_relative 'generators/partial/none'
120
+ load_relative 'generators/partial/substring'
121
+ load_relative 'generators/partial/default'
126
122
 
127
123
  # Weight index generation strategies.
128
124
  #
129
- load_internals 'generators/weights/strategy'
130
- load_internals 'generators/weights/logarithmic'
131
- load_internals 'generators/weights/default'
125
+ load_relative 'generators/weights/strategy'
126
+ load_relative 'generators/weights/logarithmic'
127
+ load_relative 'generators/weights/default'
132
128
 
133
129
  # Similarity index generation strategies.
134
130
  #
135
- load_internals 'generators/similarity/strategy'
136
- load_internals 'generators/similarity/none'
137
- load_internals 'generators/similarity/phonetic'
138
- load_internals 'generators/similarity/metaphone'
139
- load_internals 'generators/similarity/double_metaphone'
140
- load_internals 'generators/similarity/soundex'
141
- load_internals 'generators/similarity/default'
131
+ load_relative 'generators/similarity/strategy'
132
+ load_relative 'generators/similarity/none'
133
+ load_relative 'generators/similarity/phonetic'
134
+ load_relative 'generators/similarity/metaphone'
135
+ load_relative 'generators/similarity/double_metaphone'
136
+ load_relative 'generators/similarity/soundex'
137
+ load_relative 'generators/similarity/default'
142
138
 
143
139
  # Index generators.
144
140
  #
145
- load_internals 'generators/base'
146
- load_internals 'generators/partial_generator'
147
- load_internals 'generators/weights_generator'
148
- load_internals 'generators/similarity_generator'
149
-
150
- # Shared index elements.
151
- #
152
- load_internals 'shared/category'
141
+ load_relative 'generators/base'
142
+ load_relative 'generators/partial_generator'
143
+ load_relative 'generators/weights_generator'
144
+ load_relative 'generators/similarity_generator'
153
145
 
154
146
  # Index store handling.
155
147
  #
156
- load_internals 'index/backend'
148
+ load_relative 'backend/backend'
157
149
 
158
- load_internals 'index/redis'
159
- load_internals 'index/redis/basic'
160
- load_internals 'index/redis/list_hash'
161
- load_internals 'index/redis/string_hash'
150
+ load_relative 'backend/redis'
151
+ load_relative 'backend/redis/basic'
152
+ load_relative 'backend/redis/list_hash'
153
+ load_relative 'backend/redis/string_hash'
162
154
 
163
- load_internals 'index/file/basic'
164
- load_internals 'index/file/text'
165
- load_internals 'index/file/marshal'
166
- load_internals 'index/file/json'
167
-
168
- load_internals 'index/files'
155
+ load_relative 'backend/file/basic'
156
+ load_relative 'backend/file/text'
157
+ load_relative 'backend/file/marshal'
158
+ load_relative 'backend/file/json'
169
159
 
160
+ load_relative 'backend/files'
161
+
170
162
  # Indexing and Indexed things.
171
163
  #
172
- load_internals 'indexing/bundle/super_base' # TODO Remove.
173
- load_internals 'indexing/bundle/base'
174
- load_internals 'indexing/bundle/memory'
175
- load_internals 'indexing/bundle/redis'
176
- load_internals 'indexing/category'
177
- # load_internals 'indexing/categories'
178
- load_internals 'indexing/index'
164
+ load_relative 'indexing/bundle/super_base' # TODO Remove.
165
+ load_relative 'indexing/bundle/base'
166
+ load_relative 'indexing/bundle/memory'
167
+ load_relative 'indexing/bundle/redis'
179
168
 
180
- load_internals 'indexing/wrappers/category/location'
169
+ load_relative 'indexing/wrappers/category/location'
181
170
 
182
- load_internals 'indexed/bundle/base'
183
- load_internals 'indexed/bundle/memory'
184
- load_internals 'indexed/bundle/redis'
185
- load_internals 'indexed/category'
186
- load_internals 'indexed/categories'
187
- load_internals 'indexed/index'
171
+ load_relative 'indexed/bundle/base'
172
+ load_relative 'indexed/bundle/memory'
173
+ load_relative 'indexed/bundle/redis'
188
174
 
189
- load_internals 'indexed/wrappers/exact_first'
175
+ load_relative 'indexed/wrappers/exact_first'
190
176
 
191
177
  # Bundle Wrapper
192
178
  #
193
- load_internals 'indexed/wrappers/bundle/wrapper'
194
- load_internals 'indexed/wrappers/bundle/calculation'
195
- load_internals 'indexed/wrappers/bundle/location'
179
+ load_relative 'indexed/wrappers/bundle/wrapper'
180
+ load_relative 'indexed/wrappers/bundle/calculation'
181
+ load_relative 'indexed/wrappers/bundle/location'
196
182
 
197
- load_internals 'indexed/wrappers/category/location'
183
+ load_relative 'indexed/wrappers/category/location'
198
184
 
199
185
  # Tokens.
200
186
  #
201
- load_internals 'query/token'
202
- load_internals 'query/tokens'
187
+ load_relative 'query/token'
188
+ load_relative 'query/tokens'
203
189
 
204
190
  # Tokenizers types.
205
191
  #
206
- load_internals 'tokenizers/base'
207
- load_internals 'tokenizers/index'
208
- load_internals 'tokenizers/query'
192
+ load_relative 'tokenizers/base'
193
+ load_relative 'tokenizers/index'
194
+ load_relative 'tokenizers/query'
209
195
 
210
196
  # Query combinations, qualifiers, weigher.
211
197
  #
212
- load_internals 'query/combination'
213
- load_internals 'query/combinations/base'
214
- load_internals 'query/combinations/memory'
215
- load_internals 'query/combinations/redis'
198
+ load_relative 'query/combination'
199
+ load_relative 'query/combinations/base'
200
+ load_relative 'query/combinations/memory'
201
+ load_relative 'query/combinations/redis'
216
202
 
217
- load_internals 'query/allocation'
218
- load_internals 'query/allocations'
203
+ load_relative 'query/allocation'
204
+ load_relative 'query/allocations'
219
205
 
220
- load_internals 'query/qualifiers'
206
+ load_relative 'query/qualifiers'
221
207
 
222
- load_internals 'query/weights'
208
+ load_relative 'query/weights'
223
209
 
224
- load_internals 'query/indexes'
210
+ load_relative 'query/indexes'
225
211
 
226
212
  # Configuration.
227
213
  #
@@ -229,14 +215,14 @@ module Loader # :nodoc:all
229
215
 
230
216
  # Adapters.
231
217
  #
232
- load_internals 'adapters/rack/base'
233
- load_internals 'adapters/rack/query'
234
- load_internals 'adapters/rack/live_parameters'
235
- load_internals 'adapters/rack'
218
+ load_relative 'adapters/rack/base'
219
+ load_relative 'adapters/rack/query'
220
+ load_relative 'adapters/rack/live_parameters'
221
+ load_relative 'adapters/rack'
236
222
 
237
223
  # Routing.
238
224
  #
239
- load_internals 'frontend_adapters/rack'
225
+ load_relative 'frontend_adapters/rack'
240
226
  end
241
227
  # Loads the user interface parts.
242
228
  #
@@ -271,15 +257,23 @@ module Loader # :nodoc:all
271
257
 
272
258
  # API.
273
259
  #
260
+ load_relative 'category'
261
+ load_relative 'category_indexed'
262
+ load_relative 'category_indexing'
263
+
264
+ load_relative 'categories'
265
+ load_relative 'categories_indexed'
266
+ load_relative 'categories_indexing'
267
+
274
268
  load_relative 'index/base'
269
+ load_relative 'index/base_indexed'
270
+ load_relative 'index/base_indexing'
275
271
  load_relative 'index/memory'
276
272
  load_relative 'index/redis'
277
-
278
- load_relative 'indexing/indexes'
279
- load_relative 'indexed/indexes'
280
-
281
- load_relative 'index_bundle'
282
- load_relative 'aliases'
273
+
274
+ load_relative 'indexes'
275
+ load_relative 'indexes_indexed'
276
+ load_relative 'indexes_indexing'
283
277
 
284
278
  # Results.
285
279
  #
@@ -0,0 +1,84 @@
1
+ module Query
2
+
3
+ # An allocation has a number of combinations:
4
+ # [token, index] [other_token, other_index], ...
5
+ #
6
+ class Allocation # :nodoc:all
7
+
8
+ attr_reader :count, :ids, :score, :combinations, :result_identifier
9
+
10
+ #
11
+ #
12
+ def initialize combinations, result_identifier
13
+ @combinations = combinations
14
+ @result_identifier = result_identifier
15
+ end
16
+
17
+ def hash
18
+ @combinations.hash
19
+ end
20
+ def eql? other_allocation
21
+ true # FIXME
22
+ # @combinations.eql? other_allocation.combinations
23
+ end
24
+
25
+ # Scores its combinations and caches the result.
26
+ #
27
+ def calculate_score weights
28
+ @score ||= @combinations.calculate_score(weights)
29
+ end
30
+
31
+ # Asks the combinations for the (intersected) ids.
32
+ #
33
+ def calculate_ids amount, offset
34
+ @combinations.ids amount, offset # Calculate as many ids as are necessary.
35
+ end
36
+
37
+ # This starts the searching process.
38
+ #
39
+ def process! amount, offset
40
+ ids = calculate_ids amount, offset
41
+ @count = ids.size # cache the count before throwing away the ids
42
+ @ids = ids.slice!(offset, amount) || [] # slice out the relevant part
43
+ end
44
+
45
+ #
46
+ #
47
+ def keep identifiers = [] # categories
48
+ @combinations.keep identifiers
49
+ end
50
+ #
51
+ #
52
+ def remove identifiers = [] # categories
53
+ @combinations.remove identifiers
54
+ end
55
+
56
+ # Sort highest score first.
57
+ #
58
+ def <=> other_allocation
59
+ other_allocation.score <=> self.score
60
+ end
61
+
62
+ # Transform the allocation into result form.
63
+ #
64
+ def to_result
65
+ [self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
66
+ end
67
+
68
+ # Json representation of this allocation.
69
+ #
70
+ # Note: Delegates to to_result.
71
+ #
72
+ def to_json
73
+ to_result.to_json
74
+ end
75
+
76
+ #
77
+ #
78
+ def to_s
79
+ "Allocation: #{to_result.join(', ')}"
80
+ end
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,114 @@
1
+ module Query
2
+ # Container class for allocations.
3
+ #
4
+ class Allocations # :nodoc:all
5
+
6
+ delegate :each, :inject, :empty?, :size, :to => :@allocations
7
+ attr_reader :total
8
+
9
+ def initialize allocations = []
10
+ @allocations = allocations
11
+ end
12
+
13
+ # Score each allocation.
14
+ #
15
+ def calculate_score weights
16
+ @allocations.each do |allocation|
17
+ allocation.calculate_score weights
18
+ end
19
+ end
20
+ # Sort the allocations.
21
+ #
22
+ def sort!
23
+ @allocations.sort!
24
+ end
25
+
26
+ # Reduces the amount of allocations to x.
27
+ #
28
+ def reduce_to amount
29
+ @allocations = @allocations.shift amount
30
+ end
31
+
32
+ # Keeps combinations.
33
+ #
34
+ # Only those passed in remain.
35
+ #
36
+ def keep identifiers = []
37
+ @allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
38
+ end
39
+ # Removes combinations.
40
+ #
41
+ # Only those passed in are removed.
42
+ #
43
+ def remove identifiers = []
44
+ @allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
45
+ end
46
+
47
+ # Returns the top amount ids.
48
+ #
49
+ def ids amount = 20
50
+ @allocations.inject([]) do |total, allocation|
51
+ total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
52
+ end
53
+ end
54
+
55
+ # This is the main method of this class that will replace ids and count.
56
+ #
57
+ # What it does is calculate the ids and counts of its allocations
58
+ # for being used in the results. It also calculates the total
59
+ #
60
+ # Parameters:
61
+ # * amount: the amount of ids to calculate
62
+ # * offset: the offset from where in the result set to take the ids
63
+ #
64
+ # Note: With an amount of 0, an offset > 0 doesn't make much
65
+ # sense, as seen in the live search.
66
+ #
67
+ # Note: Each allocation caches its count, but not its ids (thrown away).
68
+ # The ids are cached in this class.
69
+ #
70
+ # Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
71
+ #
72
+ def process! amount, offset = 0
73
+ @total = 0
74
+ current_offset = 0
75
+ @allocations.each do |allocation|
76
+ ids = allocation.process! amount, offset
77
+ @total = @total + allocation.count # the total mixed in
78
+ if ids.empty?
79
+ offset = offset - allocation.count unless offset.zero?
80
+ else
81
+ amount = amount - ids.size # we need less results from the following allocation
82
+ offset = 0 # we have already passed the offset
83
+ end
84
+ end
85
+ end
86
+
87
+ def uniq
88
+ @allocations.uniq!
89
+ end
90
+
91
+ def to_a
92
+ @allocations
93
+ end
94
+
95
+ # Simply inspects the internal allocations.
96
+ #
97
+ def to_s
98
+ @allocations.inspect
99
+ end
100
+
101
+ # Allocations for results are in the form:
102
+ # [
103
+ # allocation1.to_result,
104
+ # allocation2.to_result
105
+ # ...
106
+ # ]
107
+ #
108
+ def to_result
109
+ @allocations.map(&:to_result).compact
110
+ end
111
+
112
+ end
113
+
114
+ end
@@ -0,0 +1,76 @@
1
+ module Query
2
+
3
+ # Describes the combination of a token (the text) and
4
+ # the index (the bundle): [text, index_bundle]
5
+ #
6
+ # A combination is a single part of an allocation:
7
+ # [..., [text2, index_bundle2], ...]
8
+ #
9
+ # An allocation consists of a number of combinations:
10
+ # [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
11
+ #
12
+ class Combination # :nodoc:all
13
+
14
+ attr_reader :token, :bundle, :category_name
15
+
16
+ def initialize token, category
17
+ @token = token
18
+ @category_name = category.name
19
+ @bundle = category.bundle_for token
20
+ @text = @token.text # don't want to use reset_similar already
21
+ end
22
+
23
+ # Note: Required for uniq!
24
+ #
25
+ def hash
26
+ [@token.to_s, @bundle].hash
27
+ end
28
+
29
+ # Returns the weight of this combination.
30
+ #
31
+ # Note: Caching is most of the time useful.
32
+ #
33
+ def weight
34
+ @weight ||= @bundle.weight(@text)
35
+ end
36
+
37
+ # Returns an array of ids for the given text.
38
+ #
39
+ # Note: Caching is most of the time useful.
40
+ #
41
+ def ids
42
+ @ids ||= @bundle.ids(@text)
43
+ end
44
+
45
+ # The identifier for this combination.
46
+ #
47
+ def identifier
48
+ "#{bundle.identifier}:#{@token.identifier}"
49
+ end
50
+
51
+ # Is the identifier in the given identifiers?
52
+ #
53
+ def in? identifiers
54
+ identifiers.include? identifier
55
+ end
56
+
57
+ # Combines the category names with the original names.
58
+ # [
59
+ # [:title, 'Flarbl', :flarbl],
60
+ # [:category, 'Gnorf', :gnorf]
61
+ # ]
62
+ #
63
+ def to_result
64
+ [@category_name, *@token.to_result]
65
+ end
66
+
67
+ # Example:
68
+ # "exact title:Peter*:peter"
69
+ #
70
+ def to_s
71
+ "#{bundle.identifier} #{to_result.join(':')}"
72
+ end
73
+
74
+ end
75
+
76
+ end