picky 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -85,7 +85,11 @@ module Index
85
85
  #
86
86
  class Base
87
87
 
88
- attr_reader :name
88
+ attr_reader :name,
89
+ :categories
90
+
91
+ delegate :[],
92
+ :to => :categories
89
93
 
90
94
  # Create a new index with a given source.
91
95
  #
@@ -115,8 +119,22 @@ module Index
115
119
  @name = name.to_sym
116
120
 
117
121
  check_options options
118
- @indexing = Internals::Indexing::Index.new name, options
119
- @indexed = Internals::Indexed::Index.new name, options
122
+
123
+ @source = options[:source]
124
+
125
+ @after_indexing = options[:after_indexing]
126
+ @indexing_bundle_class = options[:indexing_bundle_class] # TODO This should probably be a fixed parameter.
127
+ @tokenizer = options[:tokenizer]
128
+ @key_format = options[:key_format]
129
+
130
+ # Indexed.
131
+ #
132
+ @result_identifier = options[:result_identifier] || name
133
+ @indexed_bundle_class = options[:indexed_bundle_class] # TODO This should probably be a fixed parameter.
134
+
135
+ # TODO Move ignore_unassigned_tokens to query, somehow.
136
+ #
137
+ @categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
120
138
 
121
139
  # Centralized registry.
122
140
  #
@@ -126,104 +144,19 @@ module Index
126
144
  #
127
145
  instance_eval(&Proc.new) if block_given?
128
146
 
129
- check_source internal_indexing.source
130
- end
131
- def internal_indexing # :nodoc:
132
- @indexing
133
- end
134
- def internal_indexed # :nodoc:
135
- @indexed
136
- end
137
- #
138
- # Since this is an API, we fail hard quickly.
139
- #
140
- def check_name name # :nodoc:
141
- raise ArgumentError.new(<<-NAME
142
-
143
-
144
- The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
145
- Examples:
146
- Index::Memory.new(:my_cool_index) # Recommended
147
- Index::Redis.new("a-redis-index")
148
- NAME
149
-
150
-
151
- ) unless name.respond_to?(:to_sym)
152
- end
153
- def check_options options # :nodoc:
154
- raise ArgumentError.new(<<-OPTIONS
155
-
156
-
157
- Sources are not passed in as second parameter for #{self.class.name} anymore, but either
158
- * as :source option:
159
- #{self.class.name}.new(#{name.inspect}, source: #{options})
160
- or
161
- * given to the #source method inside the config block:
162
- #{self.class.name}.new(#{name.inspect}) do
163
- source #{options}
164
- end
165
-
166
- Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
167
-
168
- All the best
169
- -- Picky
170
-
171
-
172
- OPTIONS
173
- ) unless options.respond_to?(:[])
174
- end
175
- def check_source source # :nodoc:
176
- raise ArgumentError.new(<<-SOURCE
177
-
178
-
179
- The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
180
- Or it could use one of the built-in sources:
181
- Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
182
- Sources::')}
183
-
184
-
185
- SOURCE
186
- ) unless source.respond_to?(:each) || source.respond_to?(:harvest)
187
- end
188
-
189
- def to_stats # :nodoc:
190
- stats = <<-INDEX
191
- #{name} (#{self.class}):
192
- #{"source: #{internal_indexing.source}".indented_to_s}
193
- #{"categories: #{internal_indexing.categories.map(&:name).join(', ')}".indented_to_s}
194
- INDEX
195
- stats << " result identifier: \"#{internal_indexed.result_identifier}\"".indented_to_s unless internal_indexed.result_identifier.to_s == internal_indexed.name.to_s
196
- stats
197
- end
198
-
199
- # Define an index tokenizer on the index.
200
- #
201
- # Parameters are the exact same as for indexing.
202
- #
203
- def indexing options = {}
204
- internal_indexing.define_indexing options
147
+ # Check if any source has been given in the block or the options.
148
+ #
149
+ check_source @source
205
150
  end
206
- alias define_indexing indexing
207
151
 
208
- # Define a source on the index.
209
- #
210
- # Parameter is a source, either one of the standard sources or
211
- # anything responding to #each and returning objects that
212
- # respond to id and the category names (or the category from option).
152
+ # Default bundles.
213
153
  #
214
- def source source
215
- internal_indexing.define_source source
154
+ def indexing_bundle_class
155
+ Indexing::Bundle::Memory
216
156
  end
217
- alias define_source source
218
-
219
- # Define a key_format on the index.
220
- #
221
- # Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
222
- #
223
- def key_format key_format
224
- internal_indexing.define_key_format key_format
157
+ def indexed_bundle_class
158
+ Indexed::Bundle::Memory
225
159
  end
226
- alias define_key_format key_format
227
160
 
228
161
  # Defines a searchable category on the index.
229
162
  #
@@ -239,17 +172,26 @@ INDEX
239
172
  # * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
240
173
  #
241
174
  def category category_name, options = {}
242
- category_name = category_name.to_sym
175
+ options = default_category_options.merge options
243
176
 
244
- indexing_category = internal_indexing.define_category category_name, options
245
- indexed_category = internal_indexed.define_category category_name, options
177
+ new_category = Category.new category_name.to_sym, self, options
178
+ categories << new_category
246
179
 
247
- yield indexing_category, indexed_category if block_given?
180
+ new_category = yield new_category if block_given?
248
181
 
249
- self
182
+ new_category
250
183
  end
251
184
  alias define_category category
252
185
 
186
+ # By default, the category uses
187
+ # * the index's bundle type.
188
+ #
189
+ def default_category_options
190
+ {
191
+ :indexed_bundle_class => @indexed_bundle_class
192
+ }
193
+ end
194
+
253
195
  # Make this category range searchable with a fixed range. If you need other
254
196
  # ranges, define another category with a different range value.
255
197
  #
@@ -318,9 +260,9 @@ INDEX
318
260
 
319
261
  options = { partial: Partial::None.new }.merge options
320
262
 
321
- define_category category_name, options do |indexing_category, indexed_category|
322
- Internals::Indexing::Wrappers::Category::Location.install_on indexing_category, range, precision
323
- Internals::Indexed::Wrappers::Category::Location.install_on indexed_category, range, precision
263
+ define_category category_name, options do |category|
264
+ Indexing::Wrappers::Category::Location.install_on category, range, precision
265
+ Indexed::Wrappers::Category::Location.install_on category, range, precision
324
266
  end
325
267
  end
326
268
  alias define_ranged_category ranged_category
@@ -386,6 +328,79 @@ INDEX
386
328
 
387
329
  end
388
330
  alias define_geo_categories geo_categories
331
+
332
+ #
333
+ # Since this is an API, we fail hard quickly.
334
+ #
335
+ def check_name name # :nodoc:
336
+ raise ArgumentError.new(<<-NAME
337
+
338
+
339
+ The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
340
+ Examples:
341
+ Index::Memory.new(:my_cool_index) # Recommended
342
+ Index::Redis.new("a-redis-index")
343
+ NAME
344
+
345
+
346
+ ) unless name.respond_to?(:to_sym)
347
+ end
348
+ def check_options options # :nodoc:
349
+ raise ArgumentError.new(<<-OPTIONS
350
+
351
+
352
+ Sources are not passed in as second parameter for #{self.class.name} anymore, but either
353
+ * as :source option:
354
+ #{self.class.name}.new(#{name.inspect}, source: #{options})
355
+ or
356
+ * given to the #source method inside the config block:
357
+ #{self.class.name}.new(#{name.inspect}) do
358
+ source #{options}
359
+ end
360
+
361
+ Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
362
+
363
+ All the best
364
+ -- Picky
365
+
366
+
367
+ OPTIONS
368
+ ) unless options.respond_to?(:[])
369
+ end
370
+ def check_source source # :nodoc:
371
+ raise ArgumentError.new(<<-SOURCE
372
+
373
+
374
+ The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
375
+ Or it could use one of the built-in sources:
376
+ Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
377
+ Sources::')}
378
+
379
+
380
+ SOURCE
381
+ ) unless source.respond_to?(:each) || source.respond_to?(:harvest)
382
+ end
383
+
384
+ def method_name
385
+
386
+ end
387
+
388
+ #
389
+ #
390
+ def to_s
391
+ "#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
392
+ end
393
+
394
+ def to_stats # :nodoc:
395
+ stats = <<-INDEX
396
+ #{name} (#{self.class}):
397
+ #{"source: #{source}".indented_to_s}
398
+ #{"categories: #{categories.map(&:name).join(', ')}".indented_to_s}
399
+ INDEX
400
+ stats << " result identifier: \"#{result_identifier}\"".indented_to_s unless result_identifier.to_s == name.to_s
401
+ stats
402
+ end
403
+
389
404
  end
390
405
 
391
406
  end
@@ -0,0 +1,27 @@
1
+ module Index
2
+
3
+ #
4
+ #
5
+ class Base
6
+
7
+ attr_reader :result_identifier,
8
+ :combinator
9
+
10
+ delegate :load_from_cache,
11
+ :analyze,
12
+ :reindex,
13
+ :to => :categories
14
+
15
+ alias reload load_from_cache
16
+
17
+ # Return the possible combinations for this token.
18
+ #
19
+ # A combination is a tuple <token, index_bundle>.
20
+ #
21
+ def possible_combinations token
22
+ categories.possible_combinations_for token
23
+ end
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,119 @@
1
+ module Index
2
+
3
+ #
4
+ #
5
+ class Base
6
+
7
+ attr_reader :after_indexing,
8
+ :bundle_class,
9
+ :tokenizer
10
+
11
+ # Delegators for indexing.
12
+ #
13
+ delegate :backup_caches,
14
+ :cache,
15
+ :check_caches,
16
+ :clear_caches,
17
+ :create_directory_structure,
18
+ :generate_caches,
19
+ :restore_caches,
20
+ :to => :categories
21
+
22
+ delegate :connect_backend,
23
+ :to => :source
24
+
25
+ # Calling index on an index will
26
+ # * prepare (the data)
27
+ # * cache (the data)
28
+ # on every category.
29
+ #
30
+ def index
31
+ prepare
32
+ cache
33
+ end
34
+
35
+ # Define an index tokenizer on the index.
36
+ #
37
+ # Parameters are the exact same as for indexing.
38
+ #
39
+ def indexing options = {}
40
+ @tokenizer = Tokenizers::Index.new options
41
+ end
42
+ alias define_indexing indexing
43
+
44
+ # Define a source on the index.
45
+ #
46
+ # Parameter is a source, either one of the standard sources or
47
+ # anything responding to #each and returning objects that
48
+ # respond to id and the category names (or the category from option).
49
+ #
50
+ def source some_source = nil
51
+ some_source ? define_source(some_source) : (@source || raise_no_source)
52
+ end
53
+ def define_source source
54
+ @source = source
55
+ end
56
+ def raise_no_source
57
+ raise NoSourceSpecifiedException.new(<<-NO_SOURCE
58
+
59
+
60
+ No source given for index #{name}. An index needs a source.
61
+ Example:
62
+ Index::Memory.new(:with_source) do
63
+ source Sources::CSV.new(:title, file: 'data/books.csv')
64
+ category :title
65
+ category :author
66
+ end
67
+
68
+ NO_SOURCE
69
+ )
70
+ end
71
+
72
+ # Define a key_format on the index.
73
+ #
74
+ # Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
75
+ #
76
+ def key_format format = nil
77
+ format ? define_key_format(format) : (@key_format || :to_i)
78
+ end
79
+ def define_key_format key_format
80
+ @key_format = key_format
81
+ end
82
+
83
+ # Decides whether to use a parallel indexer or whether to
84
+ # delegate to each category to index themselves.
85
+ #
86
+ # TODO Rename to prepare.
87
+ #
88
+ def prepare
89
+ # TODO Duplicated in category.rb def indexer.
90
+ #
91
+ if source.respond_to?(:each)
92
+ warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
93
+ index_parallel
94
+ else
95
+ categories.each &:prepare
96
+ end
97
+ end
98
+
99
+ # Indexes the categories in parallel.
100
+ #
101
+ # Only use where the category does not have a non-#each source defined.
102
+ #
103
+ def index_parallel
104
+ indexer = Indexers::Parallel.new self
105
+ categories.first.prepare_index_directory # TODO Unnice.
106
+ indexer.index
107
+ end
108
+
109
+ # Indexing.
110
+ #
111
+ # Note: If it is an each source we do not take a snapshot.
112
+ #
113
+ def take_snapshot
114
+ source.take_snapshot self unless source.respond_to? :each
115
+ end
116
+
117
+ end
118
+
119
+ end
@@ -3,24 +3,12 @@ module Index
3
3
  # An index that is persisted in files, loaded at startup and kept in memory at runtime.
4
4
  #
5
5
  class Memory < Base
6
-
7
- # Create a new memory index for indexing and for querying.
8
- #
9
- # Parameters:
10
- # * name: The identifier of the index. Used:
11
- # - to identify an index (e.g. by you in Rake tasks: Indexes[:the_identifier]).
12
- # - in the frontend to describe which index a result came from.
13
- # - index directory naming (index/development/the_identifier/<lots of indexes>)
14
- # * source: The source the data comes from. See Sources::Base.
15
- #
16
- # Options:
17
- # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
18
- #
19
- def initialize name, options = {}
20
- options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Memory
21
- options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Memory
22
-
23
- super name, options
6
+
7
+ def indexing_bundle_class
8
+ Indexing::Bundle::Memory
9
+ end
10
+ def indexed_bundle_class
11
+ Indexed::Bundle::Memory
24
12
  end
25
13
 
26
14
  end
@@ -3,24 +3,12 @@ module Index
3
3
  # An index that is persisted in Redis.
4
4
  #
5
5
  class Redis < Base
6
-
7
- # Create a new Redis index for indexing and for querying.
8
- #
9
- # Parameters:
10
- # * name: The identifier of the index. Used:
11
- # - to identify an index (e.g. by you in Rake tasks: Indexes[:the_identifier]).
12
- # - in the frontend to describe which index a result came from.
13
- # - index directory naming (index/development/the_identifier/<lots of indexes>)
14
- # * source: The source the data comes from. See Sources::Base.
15
- #
16
- # Options:
17
- # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
18
- #
19
- def initialize name, options = {}
20
- options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Redis
21
- options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Redis
22
-
23
- super name, options
6
+
7
+ def indexing_bundle_class
8
+ Indexing::Bundle::Redis
9
+ end
10
+ def indexed_bundle_class
11
+ Indexed::Bundle::Redis
24
12
  end
25
13
 
26
14
  end
@@ -0,0 +1,110 @@
1
+ module Indexed # :nodoc:all
2
+
3
+ # A Bundle is a number of indexes
4
+ # per [index, category] combination.
5
+ #
6
+ # At most, there are three indexes:
7
+ # * *core* index (always used)
8
+ # * *weights* index (always used)
9
+ # * *similarity* index (used with similarity)
10
+ #
11
+ # In Picky, indexing is separated from the index
12
+ # handling itself through a parallel structure.
13
+ #
14
+ # Both use methods provided by this base class, but
15
+ # have very different goals:
16
+ #
17
+ # * *Indexing*::*Bundle* is just concerned with creating index files
18
+ # and providing helper functions to e.g. check the indexes.
19
+ #
20
+ # * *Index*::*Bundle* is concerned with loading these index files into
21
+ # memory and looking up search data as fast as possible.
22
+ #
23
+ module Bundle
24
+
25
+ class Base
26
+
27
+ attr_reader :identifier, :configuration
28
+ attr_accessor :similarity_strategy
29
+ attr_accessor :index, :weights, :similarity, :configuration
30
+
31
+ delegate :[], :to => :configuration
32
+ delegate :size, :to => :index
33
+
34
+ def initialize name, category, similarity_strategy
35
+ @identifier = "#{category.identifier}:#{name}"
36
+
37
+ @index = {}
38
+ @weights = {}
39
+ @similarity = {}
40
+
41
+ @similarity_strategy = similarity_strategy
42
+ end
43
+
44
+ # Get a list of similar texts.
45
+ #
46
+ # Note: Does not return itself.
47
+ #
48
+ def similar text
49
+ code = similarity_strategy.encoded text
50
+ similar_codes = code && @similarity[code]
51
+ similar_codes.delete text if similar_codes
52
+ similar_codes || []
53
+ end
54
+
55
+ # Loads all indexes.
56
+ #
57
+ def load
58
+ load_index
59
+ load_weights
60
+ load_similarity
61
+ load_configuration
62
+ end
63
+
64
+ # Loads the core index.
65
+ #
66
+ def load_index
67
+ # No loading needed.
68
+ end
69
+ # Loads the weights index.
70
+ #
71
+ def load_weights
72
+ # No loading needed.
73
+ end
74
+ # Loads the similarity index.
75
+ #
76
+ def load_similarity
77
+ # No loading needed.
78
+ end
79
+ # Loads the configuration.
80
+ #
81
+ def load_configuration
82
+ # No loading needed.
83
+ end
84
+
85
+ # Loads the core index.
86
+ #
87
+ def clear_index
88
+ # No loading needed.
89
+ end
90
+ # Loads the weights index.
91
+ #
92
+ def clear_weights
93
+ # No loading needed.
94
+ end
95
+ # Loads the similarity index.
96
+ #
97
+ def clear_similarity
98
+ # No loading needed.
99
+ end
100
+ # Loads the configuration.
101
+ #
102
+ def clear_configuration
103
+ # No loading needed.
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+
110
+ end
@@ -0,0 +1,91 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Indexed # :nodoc:all
4
+
5
+ #
6
+ #
7
+ module Bundle
8
+
9
+ # This is the _actual_ index (based on memory).
10
+ #
11
+ # Handles exact/partial index, weights index, and similarity index.
12
+ #
13
+ # Delegates file handling and checking to an *Indexed*::*Files* object.
14
+ #
15
+ class Memory < Base
16
+
17
+ delegate :[], :to => :configuration
18
+
19
+ def initialize name, configuration, *args
20
+ super name, configuration, *args
21
+
22
+ @configuration = {} # A hash with config options.
23
+
24
+ @backend = Backend::Files.new name, configuration
25
+ end
26
+
27
+ def to_s
28
+ <<-MEMORY
29
+ Memory
30
+ #{@backend.indented_to_s}
31
+ MEMORY
32
+ end
33
+
34
+ # Get the ids for the given symbol.
35
+ #
36
+ def ids sym
37
+ @index[sym] || []
38
+ end
39
+ # Get a weight for the given symbol.
40
+ #
41
+ def weight sym
42
+ @weights[sym]
43
+ end
44
+
45
+ # Loads the core index.
46
+ #
47
+ def load_index
48
+ self.index = @backend.load_index
49
+ end
50
+ # Loads the weights index.
51
+ #
52
+ def load_weights
53
+ self.weights = @backend.load_weights
54
+ end
55
+ # Loads the similarity index.
56
+ #
57
+ def load_similarity
58
+ self.similarity = @backend.load_similarity
59
+ end
60
+ # Loads the configuration.
61
+ #
62
+ def load_configuration
63
+ self.configuration = @backend.load_configuration
64
+ end
65
+
66
+ # Loads the core index.
67
+ #
68
+ def clear_index
69
+ self.index = {}
70
+ end
71
+ # Loads the weights index.
72
+ #
73
+ def clear_weights
74
+ self.weights = {}
75
+ end
76
+ # Loads the similarity index.
77
+ #
78
+ def clear_similarity
79
+ self.similarity = {}
80
+ end
81
+ # Loads the configuration.
82
+ #
83
+ def clear_configuration
84
+ self.configuration = {}
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end