picky 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +65 -0
  4. data/lib/picky/adapters/rack.rb +30 -0
  5. data/lib/picky/application.rb +5 -5
  6. data/lib/picky/backend/backend.rb +108 -0
  7. data/lib/picky/backend/file/basic.rb +101 -0
  8. data/lib/picky/backend/file/json.rb +34 -0
  9. data/lib/picky/backend/file/marshal.rb +34 -0
  10. data/lib/picky/backend/file/text.rb +56 -0
  11. data/lib/picky/backend/files.rb +30 -0
  12. data/lib/picky/backend/redis/basic.rb +85 -0
  13. data/lib/picky/backend/redis/list_hash.rb +49 -0
  14. data/lib/picky/backend/redis/string_hash.rb +40 -0
  15. data/lib/picky/backend/redis.rb +40 -0
  16. data/lib/picky/calculations/location.rb +57 -0
  17. data/lib/picky/categories.rb +62 -0
  18. data/lib/picky/categories_indexed.rb +93 -0
  19. data/lib/picky/categories_indexing.rb +12 -0
  20. data/lib/picky/category.rb +127 -0
  21. data/lib/picky/category_indexed.rb +64 -0
  22. data/lib/picky/category_indexing.rb +145 -0
  23. data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
  24. data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
  25. data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
  26. data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
  27. data/lib/picky/extensions/class.rb +11 -0
  28. data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
  29. data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
  30. data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
  31. data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
  32. data/lib/picky/frontend_adapters/rack.rb +146 -0
  33. data/lib/picky/generators/aliases.rb +3 -3
  34. data/lib/picky/generators/base.rb +15 -0
  35. data/lib/picky/generators/partial/default.rb +5 -0
  36. data/lib/picky/generators/partial/none.rb +31 -0
  37. data/lib/picky/generators/partial/strategy.rb +25 -0
  38. data/lib/picky/generators/partial/substring.rb +118 -0
  39. data/lib/picky/generators/partial_generator.rb +15 -0
  40. data/lib/picky/generators/similarity/default.rb +7 -0
  41. data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
  42. data/lib/picky/generators/similarity/metaphone.rb +28 -0
  43. data/lib/picky/generators/similarity/none.rb +31 -0
  44. data/lib/picky/generators/similarity/phonetic.rb +65 -0
  45. data/lib/picky/generators/similarity/soundex.rb +28 -0
  46. data/lib/picky/generators/similarity/strategy.rb +9 -0
  47. data/lib/picky/generators/similarity_generator.rb +15 -0
  48. data/lib/picky/generators/strategy.rb +14 -0
  49. data/lib/picky/generators/weights/default.rb +7 -0
  50. data/lib/picky/generators/weights/logarithmic.rb +39 -0
  51. data/lib/picky/generators/weights/strategy.rb +9 -0
  52. data/lib/picky/generators/weights_generator.rb +15 -0
  53. data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
  54. data/lib/picky/index/base.rb +119 -104
  55. data/lib/picky/index/base_indexed.rb +27 -0
  56. data/lib/picky/index/base_indexing.rb +119 -0
  57. data/lib/picky/index/memory.rb +6 -18
  58. data/lib/picky/index/redis.rb +6 -18
  59. data/lib/picky/indexed/bundle/base.rb +110 -0
  60. data/lib/picky/indexed/bundle/memory.rb +91 -0
  61. data/lib/picky/indexed/bundle/redis.rb +45 -0
  62. data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
  63. data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
  64. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
  65. data/lib/picky/indexed/wrappers/category/location.rb +25 -0
  66. data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
  67. data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
  68. data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
  69. data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
  70. data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
  71. data/lib/picky/indexes.rb +73 -0
  72. data/lib/picky/indexes_indexed.rb +29 -0
  73. data/lib/picky/indexes_indexing.rb +49 -0
  74. data/lib/picky/indexing/bundle/base.rb +212 -0
  75. data/lib/picky/indexing/bundle/memory.rb +25 -0
  76. data/lib/picky/indexing/bundle/redis.rb +24 -0
  77. data/lib/picky/indexing/bundle/super_base.rb +61 -0
  78. data/lib/picky/indexing/wrappers/category/location.rb +25 -0
  79. data/lib/picky/interfaces/live_parameters.rb +8 -8
  80. data/lib/picky/loader.rb +89 -95
  81. data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
  82. data/lib/picky/query/allocation.rb +84 -0
  83. data/lib/picky/query/allocations.rb +114 -0
  84. data/lib/picky/query/combination.rb +76 -0
  85. data/lib/picky/query/combinations/base.rb +70 -0
  86. data/lib/picky/query/combinations/memory.rb +48 -0
  87. data/lib/picky/query/combinations/redis.rb +86 -0
  88. data/lib/picky/query/indexes.rb +195 -0
  89. data/lib/picky/query/qualifiers.rb +76 -0
  90. data/lib/picky/query/token.rb +198 -0
  91. data/lib/picky/query/tokens.rb +103 -0
  92. data/lib/picky/{internals/query → query}/weights.rb +0 -0
  93. data/lib/picky/results.rb +1 -1
  94. data/lib/picky/search.rb +6 -6
  95. data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
  96. data/lib/picky/sources/db.rb +7 -7
  97. data/lib/picky/sources/wrappers/location.rb +2 -2
  98. data/lib/picky/tokenizers/base.rb +224 -0
  99. data/lib/picky/tokenizers/index.rb +30 -0
  100. data/lib/picky/tokenizers/location.rb +49 -0
  101. data/lib/picky/tokenizers/query.rb +55 -0
  102. data/lib/tasks/index.rake +4 -3
  103. data/lib/tasks/try.rake +2 -2
  104. data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
  105. data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
  106. data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
  107. data/spec/lib/application_spec.rb +3 -3
  108. data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
  109. data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
  110. data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
  111. data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
  112. data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
  113. data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
  114. data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
  115. data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
  116. data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
  117. data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
  118. data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
  119. data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
  120. data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
  121. data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
  122. data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
  123. data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
  124. data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
  125. data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
  126. data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
  127. data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
  128. data/spec/lib/generators/aliases_spec.rb +3 -3
  129. data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
  130. data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
  131. data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
  132. data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
  133. data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
  134. data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
  135. data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
  136. data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
  137. data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
  138. data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
  139. data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
  140. data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
  141. data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
  142. data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
  143. data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
  144. data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
  145. data/spec/lib/index/base_spec.rb +10 -53
  146. data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
  147. data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
  148. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
  149. data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
  150. data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
  151. data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
  152. data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
  153. data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
  154. data/spec/lib/indexes_class_spec.rb +30 -0
  155. data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
  156. data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
  157. data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
  158. data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
  159. data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
  160. data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
  161. data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
  162. data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
  163. data/spec/lib/query/allocation_spec.rb +1 -1
  164. data/spec/lib/query/allocations_spec.rb +1 -1
  165. data/spec/lib/query/combination_spec.rb +5 -5
  166. data/spec/lib/query/combinations/base_spec.rb +1 -1
  167. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  168. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  169. data/spec/lib/query/indexes_spec.rb +1 -1
  170. data/spec/lib/query/qualifiers_spec.rb +4 -4
  171. data/spec/lib/query/token_spec.rb +3 -3
  172. data/spec/lib/query/tokens_spec.rb +32 -32
  173. data/spec/lib/search_spec.rb +5 -5
  174. data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
  175. data/spec/lib/sources/db_spec.rb +4 -8
  176. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  177. data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
  178. data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
  179. data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
  180. metadata +214 -215
  181. data/lib/picky/aliases.rb +0 -4
  182. data/lib/picky/index_bundle.rb +0 -48
  183. data/lib/picky/indexed/indexes.rb +0 -59
  184. data/lib/picky/indexing/indexes.rb +0 -87
  185. data/lib/picky/internals/adapters/rack/base.rb +0 -27
  186. data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
  187. data/lib/picky/internals/adapters/rack/query.rb +0 -69
  188. data/lib/picky/internals/adapters/rack.rb +0 -34
  189. data/lib/picky/internals/calculations/location.rb +0 -59
  190. data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
  191. data/lib/picky/internals/generators/base.rb +0 -19
  192. data/lib/picky/internals/generators/partial/default.rb +0 -7
  193. data/lib/picky/internals/generators/partial/none.rb +0 -35
  194. data/lib/picky/internals/generators/partial/strategy.rb +0 -29
  195. data/lib/picky/internals/generators/partial/substring.rb +0 -122
  196. data/lib/picky/internals/generators/partial_generator.rb +0 -19
  197. data/lib/picky/internals/generators/similarity/default.rb +0 -9
  198. data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
  199. data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
  200. data/lib/picky/internals/generators/similarity/none.rb +0 -35
  201. data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
  202. data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
  203. data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
  204. data/lib/picky/internals/generators/similarity_generator.rb +0 -19
  205. data/lib/picky/internals/generators/strategy.rb +0 -18
  206. data/lib/picky/internals/generators/weights/default.rb +0 -9
  207. data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
  208. data/lib/picky/internals/generators/weights/strategy.rb +0 -11
  209. data/lib/picky/internals/generators/weights_generator.rb +0 -19
  210. data/lib/picky/internals/index/backend.rb +0 -112
  211. data/lib/picky/internals/index/file/basic.rb +0 -105
  212. data/lib/picky/internals/index/file/json.rb +0 -38
  213. data/lib/picky/internals/index/file/marshal.rb +0 -38
  214. data/lib/picky/internals/index/file/text.rb +0 -60
  215. data/lib/picky/internals/index/files.rb +0 -34
  216. data/lib/picky/internals/index/redis/basic.rb +0 -89
  217. data/lib/picky/internals/index/redis/list_hash.rb +0 -53
  218. data/lib/picky/internals/index/redis/string_hash.rb +0 -44
  219. data/lib/picky/internals/index/redis.rb +0 -44
  220. data/lib/picky/internals/indexed/bundle/base.rb +0 -114
  221. data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
  222. data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
  223. data/lib/picky/internals/indexed/categories.rb +0 -140
  224. data/lib/picky/internals/indexed/category.rb +0 -111
  225. data/lib/picky/internals/indexed/index.rb +0 -63
  226. data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
  227. data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
  228. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
  229. data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
  230. data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
  231. data/lib/picky/internals/indexing/bundle/base.rb +0 -216
  232. data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
  233. data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
  234. data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
  235. data/lib/picky/internals/indexing/category.rb +0 -153
  236. data/lib/picky/internals/indexing/index.rb +0 -142
  237. data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
  238. data/lib/picky/internals/query/allocation.rb +0 -88
  239. data/lib/picky/internals/query/allocations.rb +0 -118
  240. data/lib/picky/internals/query/combination.rb +0 -80
  241. data/lib/picky/internals/query/combinations/base.rb +0 -74
  242. data/lib/picky/internals/query/combinations/memory.rb +0 -52
  243. data/lib/picky/internals/query/combinations/redis.rb +0 -90
  244. data/lib/picky/internals/query/indexes.rb +0 -199
  245. data/lib/picky/internals/query/qualifiers.rb +0 -82
  246. data/lib/picky/internals/query/token.rb +0 -202
  247. data/lib/picky/internals/query/tokens.rb +0 -109
  248. data/lib/picky/internals/shared/category.rb +0 -52
  249. data/lib/picky/internals/tokenizers/base.rb +0 -228
  250. data/lib/picky/internals/tokenizers/index.rb +0 -34
  251. data/lib/picky/internals/tokenizers/location.rb +0 -54
  252. data/lib/picky/internals/tokenizers/query.rb +0 -59
  253. data/lib/picky/internals.rb +0 -2
  254. data/spec/lib/aliases_spec.rb +0 -9
  255. data/spec/lib/index_bundle_spec.rb +0 -69
@@ -85,7 +85,11 @@ module Index
85
85
  #
86
86
  class Base
87
87
 
88
- attr_reader :name
88
+ attr_reader :name,
89
+ :categories
90
+
91
+ delegate :[],
92
+ :to => :categories
89
93
 
90
94
  # Create a new index with a given source.
91
95
  #
@@ -115,8 +119,22 @@ module Index
115
119
  @name = name.to_sym
116
120
 
117
121
  check_options options
118
- @indexing = Internals::Indexing::Index.new name, options
119
- @indexed = Internals::Indexed::Index.new name, options
122
+
123
+ @source = options[:source]
124
+
125
+ @after_indexing = options[:after_indexing]
126
+ @indexing_bundle_class = options[:indexing_bundle_class] # TODO This should probably be a fixed parameter.
127
+ @tokenizer = options[:tokenizer]
128
+ @key_format = options[:key_format]
129
+
130
+ # Indexed.
131
+ #
132
+ @result_identifier = options[:result_identifier] || name
133
+ @indexed_bundle_class = options[:indexed_bundle_class] # TODO This should probably be a fixed parameter.
134
+
135
+ # TODO Move ignore_unassigned_tokens to query, somehow.
136
+ #
137
+ @categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
120
138
 
121
139
  # Centralized registry.
122
140
  #
@@ -126,104 +144,19 @@ module Index
126
144
  #
127
145
  instance_eval(&Proc.new) if block_given?
128
146
 
129
- check_source internal_indexing.source
130
- end
131
- def internal_indexing # :nodoc:
132
- @indexing
133
- end
134
- def internal_indexed # :nodoc:
135
- @indexed
136
- end
137
- #
138
- # Since this is an API, we fail hard quickly.
139
- #
140
- def check_name name # :nodoc:
141
- raise ArgumentError.new(<<-NAME
142
-
143
-
144
- The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
145
- Examples:
146
- Index::Memory.new(:my_cool_index) # Recommended
147
- Index::Redis.new("a-redis-index")
148
- NAME
149
-
150
-
151
- ) unless name.respond_to?(:to_sym)
152
- end
153
- def check_options options # :nodoc:
154
- raise ArgumentError.new(<<-OPTIONS
155
-
156
-
157
- Sources are not passed in as second parameter for #{self.class.name} anymore, but either
158
- * as :source option:
159
- #{self.class.name}.new(#{name.inspect}, source: #{options})
160
- or
161
- * given to the #source method inside the config block:
162
- #{self.class.name}.new(#{name.inspect}) do
163
- source #{options}
164
- end
165
-
166
- Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
167
-
168
- All the best
169
- -- Picky
170
-
171
-
172
- OPTIONS
173
- ) unless options.respond_to?(:[])
174
- end
175
- def check_source source # :nodoc:
176
- raise ArgumentError.new(<<-SOURCE
177
-
178
-
179
- The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
180
- Or it could use one of the built-in sources:
181
- Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
182
- Sources::')}
183
-
184
-
185
- SOURCE
186
- ) unless source.respond_to?(:each) || source.respond_to?(:harvest)
187
- end
188
-
189
- def to_stats # :nodoc:
190
- stats = <<-INDEX
191
- #{name} (#{self.class}):
192
- #{"source: #{internal_indexing.source}".indented_to_s}
193
- #{"categories: #{internal_indexing.categories.map(&:name).join(', ')}".indented_to_s}
194
- INDEX
195
- stats << " result identifier: \"#{internal_indexed.result_identifier}\"".indented_to_s unless internal_indexed.result_identifier.to_s == internal_indexed.name.to_s
196
- stats
197
- end
198
-
199
- # Define an index tokenizer on the index.
200
- #
201
- # Parameters are the exact same as for indexing.
202
- #
203
- def indexing options = {}
204
- internal_indexing.define_indexing options
147
+ # Check if any source has been given in the block or the options.
148
+ #
149
+ check_source @source
205
150
  end
206
- alias define_indexing indexing
207
151
 
208
- # Define a source on the index.
209
- #
210
- # Parameter is a source, either one of the standard sources or
211
- # anything responding to #each and returning objects that
212
- # respond to id and the category names (or the category from option).
152
+ # Default bundles.
213
153
  #
214
- def source source
215
- internal_indexing.define_source source
154
+ def indexing_bundle_class
155
+ Indexing::Bundle::Memory
216
156
  end
217
- alias define_source source
218
-
219
- # Define a key_format on the index.
220
- #
221
- # Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
222
- #
223
- def key_format key_format
224
- internal_indexing.define_key_format key_format
157
+ def indexed_bundle_class
158
+ Indexed::Bundle::Memory
225
159
  end
226
- alias define_key_format key_format
227
160
 
228
161
  # Defines a searchable category on the index.
229
162
  #
@@ -239,17 +172,26 @@ INDEX
239
172
  # * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
240
173
  #
241
174
  def category category_name, options = {}
242
- category_name = category_name.to_sym
175
+ options = default_category_options.merge options
243
176
 
244
- indexing_category = internal_indexing.define_category category_name, options
245
- indexed_category = internal_indexed.define_category category_name, options
177
+ new_category = Category.new category_name.to_sym, self, options
178
+ categories << new_category
246
179
 
247
- yield indexing_category, indexed_category if block_given?
180
+ new_category = yield new_category if block_given?
248
181
 
249
- self
182
+ new_category
250
183
  end
251
184
  alias define_category category
252
185
 
186
+ # By default, the category uses
187
+ # * the index's bundle type.
188
+ #
189
+ def default_category_options
190
+ {
191
+ :indexed_bundle_class => @indexed_bundle_class
192
+ }
193
+ end
194
+
253
195
  # Make this category range searchable with a fixed range. If you need other
254
196
  # ranges, define another category with a different range value.
255
197
  #
@@ -318,9 +260,9 @@ INDEX
318
260
 
319
261
  options = { partial: Partial::None.new }.merge options
320
262
 
321
- define_category category_name, options do |indexing_category, indexed_category|
322
- Internals::Indexing::Wrappers::Category::Location.install_on indexing_category, range, precision
323
- Internals::Indexed::Wrappers::Category::Location.install_on indexed_category, range, precision
263
+ define_category category_name, options do |category|
264
+ Indexing::Wrappers::Category::Location.install_on category, range, precision
265
+ Indexed::Wrappers::Category::Location.install_on category, range, precision
324
266
  end
325
267
  end
326
268
  alias define_ranged_category ranged_category
@@ -386,6 +328,79 @@ INDEX
386
328
 
387
329
  end
388
330
  alias define_geo_categories geo_categories
331
+
332
+ #
333
+ # Since this is an API, we fail hard quickly.
334
+ #
335
+ def check_name name # :nodoc:
336
+ raise ArgumentError.new(<<-NAME
337
+
338
+
339
+ The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a Symbol/String,
340
+ Examples:
341
+ Index::Memory.new(:my_cool_index) # Recommended
342
+ Index::Redis.new("a-redis-index")
343
+ NAME
344
+
345
+
346
+ ) unless name.respond_to?(:to_sym)
347
+ end
348
+ def check_options options # :nodoc:
349
+ raise ArgumentError.new(<<-OPTIONS
350
+
351
+
352
+ Sources are not passed in as second parameter for #{self.class.name} anymore, but either
353
+ * as :source option:
354
+ #{self.class.name}.new(#{name.inspect}, source: #{options})
355
+ or
356
+ * given to the #source method inside the config block:
357
+ #{self.class.name}.new(#{name.inspect}) do
358
+ source #{options}
359
+ end
360
+
361
+ Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
362
+
363
+ All the best
364
+ -- Picky
365
+
366
+
367
+ OPTIONS
368
+ ) unless options.respond_to?(:[])
369
+ end
370
+ def check_source source # :nodoc:
371
+ raise ArgumentError.new(<<-SOURCE
372
+
373
+
374
+ The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text).
375
+ Or it could use one of the built-in sources:
376
+ Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
377
+ Sources::')}
378
+
379
+
380
+ SOURCE
381
+ ) unless source.respond_to?(:each) || source.respond_to?(:harvest)
382
+ end
383
+
384
+ def method_name
385
+
386
+ end
387
+
388
+ #
389
+ #
390
+ def to_s
391
+ "#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
392
+ end
393
+
394
+ def to_stats # :nodoc:
395
+ stats = <<-INDEX
396
+ #{name} (#{self.class}):
397
+ #{"source: #{source}".indented_to_s}
398
+ #{"categories: #{categories.map(&:name).join(', ')}".indented_to_s}
399
+ INDEX
400
+ stats << " result identifier: \"#{result_identifier}\"".indented_to_s unless result_identifier.to_s == name.to_s
401
+ stats
402
+ end
403
+
389
404
  end
390
405
 
391
406
  end
@@ -0,0 +1,27 @@
1
+ module Index
2
+
3
+ #
4
+ #
5
+ class Base
6
+
7
+ attr_reader :result_identifier,
8
+ :combinator
9
+
10
+ delegate :load_from_cache,
11
+ :analyze,
12
+ :reindex,
13
+ :to => :categories
14
+
15
+ alias reload load_from_cache
16
+
17
+ # Return the possible combinations for this token.
18
+ #
19
+ # A combination is a tuple <token, index_bundle>.
20
+ #
21
+ def possible_combinations token
22
+ categories.possible_combinations_for token
23
+ end
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,119 @@
1
+ module Index
2
+
3
+ #
4
+ #
5
+ class Base
6
+
7
+ attr_reader :after_indexing,
8
+ :bundle_class,
9
+ :tokenizer
10
+
11
+ # Delegators for indexing.
12
+ #
13
+ delegate :backup_caches,
14
+ :cache,
15
+ :check_caches,
16
+ :clear_caches,
17
+ :create_directory_structure,
18
+ :generate_caches,
19
+ :restore_caches,
20
+ :to => :categories
21
+
22
+ delegate :connect_backend,
23
+ :to => :source
24
+
25
+ # Calling index on an index will
26
+ # * prepare (the data)
27
+ # * cache (the data)
28
+ # on every category.
29
+ #
30
+ def index
31
+ prepare
32
+ cache
33
+ end
34
+
35
+ # Define an index tokenizer on the index.
36
+ #
37
+ # Parameters are the exact same as for indexing.
38
+ #
39
+ def indexing options = {}
40
+ @tokenizer = Tokenizers::Index.new options
41
+ end
42
+ alias define_indexing indexing
43
+
44
+ # Define a source on the index.
45
+ #
46
+ # Parameter is a source, either one of the standard sources or
47
+ # anything responding to #each and returning objects that
48
+ # respond to id and the category names (or the category from option).
49
+ #
50
+ def source some_source = nil
51
+ some_source ? define_source(some_source) : (@source || raise_no_source)
52
+ end
53
+ def define_source source
54
+ @source = source
55
+ end
56
+ def raise_no_source
57
+ raise NoSourceSpecifiedException.new(<<-NO_SOURCE
58
+
59
+
60
+ No source given for index #{name}. An index needs a source.
61
+ Example:
62
+ Index::Memory.new(:with_source) do
63
+ source Sources::CSV.new(:title, file: 'data/books.csv')
64
+ category :title
65
+ category :author
66
+ end
67
+
68
+ NO_SOURCE
69
+ )
70
+ end
71
+
72
+ # Define a key_format on the index.
73
+ #
74
+ # Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
75
+ #
76
+ def key_format format = nil
77
+ format ? define_key_format(format) : (@key_format || :to_i)
78
+ end
79
+ def define_key_format key_format
80
+ @key_format = key_format
81
+ end
82
+
83
+ # Decides whether to use a parallel indexer or whether to
84
+ # delegate to each category to index themselves.
85
+ #
86
+ # TODO Rename to prepare.
87
+ #
88
+ def prepare
89
+ # TODO Duplicated in category.rb def indexer.
90
+ #
91
+ if source.respond_to?(:each)
92
+ warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
93
+ index_parallel
94
+ else
95
+ categories.each &:prepare
96
+ end
97
+ end
98
+
99
+ # Indexes the categories in parallel.
100
+ #
101
+ # Only use where the category does not have a non-#each source defined.
102
+ #
103
+ def index_parallel
104
+ indexer = Indexers::Parallel.new self
105
+ categories.first.prepare_index_directory # TODO Unnice.
106
+ indexer.index
107
+ end
108
+
109
+ # Indexing.
110
+ #
111
+ # Note: If it is an each source we do not take a snapshot.
112
+ #
113
+ def take_snapshot
114
+ source.take_snapshot self unless source.respond_to? :each
115
+ end
116
+
117
+ end
118
+
119
+ end
@@ -3,24 +3,12 @@ module Index
3
3
  # An index that is persisted in files, loaded at startup and kept in memory at runtime.
4
4
  #
5
5
  class Memory < Base
6
-
7
- # Create a new memory index for indexing and for querying.
8
- #
9
- # Parameters:
10
- # * name: The identifier of the index. Used:
11
- # - to identify an index (e.g. by you in Rake tasks: Indexes[:the_identifier]).
12
- # - in the frontend to describe which index a result came from.
13
- # - index directory naming (index/development/the_identifier/<lots of indexes>)
14
- # * source: The source the data comes from. See Sources::Base.
15
- #
16
- # Options:
17
- # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
18
- #
19
- def initialize name, options = {}
20
- options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Memory
21
- options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Memory
22
-
23
- super name, options
6
+
7
+ def indexing_bundle_class
8
+ Indexing::Bundle::Memory
9
+ end
10
+ def indexed_bundle_class
11
+ Indexed::Bundle::Memory
24
12
  end
25
13
 
26
14
  end
@@ -3,24 +3,12 @@ module Index
3
3
  # An index that is persisted in Redis.
4
4
  #
5
5
  class Redis < Base
6
-
7
- # Create a new Redis index for indexing and for querying.
8
- #
9
- # Parameters:
10
- # * name: The identifier of the index. Used:
11
- # - to identify an index (e.g. by you in Rake tasks: Indexes[:the_identifier]).
12
- # - in the frontend to describe which index a result came from.
13
- # - index directory naming (index/development/the_identifier/<lots of indexes>)
14
- # * source: The source the data comes from. See Sources::Base.
15
- #
16
- # Options:
17
- # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
18
- #
19
- def initialize name, options = {}
20
- options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Redis
21
- options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Redis
22
-
23
- super name, options
6
+
7
+ def indexing_bundle_class
8
+ Indexing::Bundle::Redis
9
+ end
10
+ def indexed_bundle_class
11
+ Indexed::Bundle::Redis
24
12
  end
25
13
 
26
14
  end
@@ -0,0 +1,110 @@
1
+ module Indexed # :nodoc:all
2
+
3
+ # A Bundle is a number of indexes
4
+ # per [index, category] combination.
5
+ #
6
+ # At most, there are three indexes:
7
+ # * *core* index (always used)
8
+ # * *weights* index (always used)
9
+ # * *similarity* index (used with similarity)
10
+ #
11
+ # In Picky, indexing is separated from the index
12
+ # handling itself through a parallel structure.
13
+ #
14
+ # Both use methods provided by this base class, but
15
+ # have very different goals:
16
+ #
17
+ # * *Indexing*::*Bundle* is just concerned with creating index files
18
+ # and providing helper functions to e.g. check the indexes.
19
+ #
20
+ # * *Index*::*Bundle* is concerned with loading these index files into
21
+ # memory and looking up search data as fast as possible.
22
+ #
23
+ module Bundle
24
+
25
+ class Base
26
+
27
+ attr_reader :identifier, :configuration
28
+ attr_accessor :similarity_strategy
29
+ attr_accessor :index, :weights, :similarity, :configuration
30
+
31
+ delegate :[], :to => :configuration
32
+ delegate :size, :to => :index
33
+
34
+ def initialize name, category, similarity_strategy
35
+ @identifier = "#{category.identifier}:#{name}"
36
+
37
+ @index = {}
38
+ @weights = {}
39
+ @similarity = {}
40
+
41
+ @similarity_strategy = similarity_strategy
42
+ end
43
+
44
+ # Get a list of similar texts.
45
+ #
46
+ # Note: Does not return itself.
47
+ #
48
+ def similar text
49
+ code = similarity_strategy.encoded text
50
+ similar_codes = code && @similarity[code]
51
+ similar_codes.delete text if similar_codes
52
+ similar_codes || []
53
+ end
54
+
55
+ # Loads all indexes.
56
+ #
57
+ def load
58
+ load_index
59
+ load_weights
60
+ load_similarity
61
+ load_configuration
62
+ end
63
+
64
+ # Loads the core index.
65
+ #
66
+ def load_index
67
+ # No loading needed.
68
+ end
69
+ # Loads the weights index.
70
+ #
71
+ def load_weights
72
+ # No loading needed.
73
+ end
74
+ # Loads the similarity index.
75
+ #
76
+ def load_similarity
77
+ # No loading needed.
78
+ end
79
+ # Loads the configuration.
80
+ #
81
+ def load_configuration
82
+ # No loading needed.
83
+ end
84
+
85
+ # Loads the core index.
86
+ #
87
+ def clear_index
88
+ # No loading needed.
89
+ end
90
+ # Loads the weights index.
91
+ #
92
+ def clear_weights
93
+ # No loading needed.
94
+ end
95
+ # Loads the similarity index.
96
+ #
97
+ def clear_similarity
98
+ # No loading needed.
99
+ end
100
+ # Loads the configuration.
101
+ #
102
+ def clear_configuration
103
+ # No loading needed.
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+
110
+ end
@@ -0,0 +1,91 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Indexed # :nodoc:all
4
+
5
+ #
6
+ #
7
+ module Bundle
8
+
9
+ # This is the _actual_ index (based on memory).
10
+ #
11
+ # Handles exact/partial index, weights index, and similarity index.
12
+ #
13
+ # Delegates file handling and checking to an *Indexed*::*Files* object.
14
+ #
15
+ class Memory < Base
16
+
17
+ delegate :[], :to => :configuration
18
+
19
+ def initialize name, configuration, *args
20
+ super name, configuration, *args
21
+
22
+ @configuration = {} # A hash with config options.
23
+
24
+ @backend = Backend::Files.new name, configuration
25
+ end
26
+
27
+ def to_s
28
+ <<-MEMORY
29
+ Memory
30
+ #{@backend.indented_to_s}
31
+ MEMORY
32
+ end
33
+
34
+ # Get the ids for the given symbol.
35
+ #
36
+ def ids sym
37
+ @index[sym] || []
38
+ end
39
+ # Get a weight for the given symbol.
40
+ #
41
+ def weight sym
42
+ @weights[sym]
43
+ end
44
+
45
+ # Loads the core index.
46
+ #
47
+ def load_index
48
+ self.index = @backend.load_index
49
+ end
50
+ # Loads the weights index.
51
+ #
52
+ def load_weights
53
+ self.weights = @backend.load_weights
54
+ end
55
+ # Loads the similarity index.
56
+ #
57
+ def load_similarity
58
+ self.similarity = @backend.load_similarity
59
+ end
60
+ # Loads the configuration.
61
+ #
62
+ def load_configuration
63
+ self.configuration = @backend.load_configuration
64
+ end
65
+
66
+ # Loads the core index.
67
+ #
68
+ def clear_index
69
+ self.index = {}
70
+ end
71
+ # Loads the weights index.
72
+ #
73
+ def clear_weights
74
+ self.weights = {}
75
+ end
76
+ # Loads the similarity index.
77
+ #
78
+ def clear_similarity
79
+ self.similarity = {}
80
+ end
81
+ # Loads the configuration.
82
+ #
83
+ def clear_configuration
84
+ self.configuration = {}
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end