sunspot_padrino 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,497 @@
1
+ module Sunspot #:nodoc:
2
+ module Padrino #:nodoc:
3
+ #
4
+ # This module adds Sunspot functionality to ActiveRecord models. As well as
5
+ # providing class and instance methods, it optionally adds lifecycle hooks
6
+ # to automatically add and remove models from the Solr index as they are
7
+ # created and destroyed.
8
+ #
9
+ module Searchable
10
+ class <<self
11
+ def included(base) #:nodoc:
12
+ base.module_eval do
13
+ extend(ActsAsMethods)
14
+ end
15
+ end
16
+ end
17
+
18
+ module ActsAsMethods
19
+ #
20
+ # Makes a class searchable if it is not already, or adds search
21
+ # configuration if it is. Note that the options passed in are only used
22
+ # the first time this method is called for a particular class; so,
23
+ # search should be defined before activating any mixins that extend
24
+ # search configuration.
25
+ #
26
+ # The block passed into this method is evaluated by the
27
+ # <code>Sunspot.setup</code> method. See the Sunspot documentation for
28
+ # complete information on the functionality provided by that method.
29
+ #
30
+ # ==== Options (+options+)
31
+ #
32
+ # :auto_index<Boolean>::
33
+ # Automatically index models in Solr when they are saved.
34
+ # Default: true
35
+ # :auto_remove<Boolean>::
36
+ # Automatically remove models from the Solr index when they are
37
+ # destroyed. <b>Setting this option to +false+ is not recommended
38
+ # </b>(see the README).
39
+ # :if<Mixed>::
40
+ # Only index models in Solr if the method, proc or string evaluates
41
+ # to true (e.g. <code>:if => :should_index?</code> or <code>:if =>
42
+ # proc { |model| model.foo > 2 }</code>). Models that do not match
43
+ # the constraint will be removed from the index upon save. Multiple
44
+ # constraints can be specified by passing an array (e.g. <code>:if =>
45
+ # [:method1, :method2]</code>).
46
+ # :ignore_attribute_changes_of<Array>::
47
+ # Define attributes, that should not trigger a reindex of that
48
+ # object. Usual suspects are updated_at or counters.
49
+ # :only_reindex_attribute_changes_of<Array>::
50
+ # Define attributes, that are the only attributes that should
51
+ # trigger a reindex of that object. Useful if there are a small
52
+ # number of searchable attributes and a large number of attributes
53
+ # to ignore.
54
+ # :include<Mixed>::
55
+ # Define default ActiveRecord includes, set this to allow ActiveRecord
56
+ # to load required associations when indexing. See ActiveRecord's
57
+ # documentation on eager-loading for examples on how to set this
58
+ # Default: []
59
+ # :unless<Mixed>::
60
+ # Only index models in Solr if the method, proc or string evaluates
61
+ # to false (e.g. <code>:unless => :should_not_index?</code> or <code>
62
+ # :unless => proc { |model| model.foo <= 2 }</code>). Models that do
63
+ # not match the constraint will be removed from the index upon save.
64
+ # Multiple constraints can be specified by passing an array (e.g.
65
+ # <code>:unless => [:method1, :method2]</code>).
66
+ #
67
+ # ==== Example
68
+ #
69
+ # class Post < ActiveRecord::Base
70
+ # searchable do
71
+ # text :title, :body
72
+ # string :sort_title do
73
+ # title.downcase.sub(/^(an?|the)/, '')
74
+ # end
75
+ # integer :blog_id
76
+ # time :updated_at
77
+ # end
78
+ # end
79
+ #
80
+ def searchable(options = {}, &block)
81
+ Sunspot.setup(self, &block)
82
+
83
+ if searchable?
84
+ sunspot_options[:include].concat(Util::Array(options[:include]))
85
+ else
86
+ extend ClassMethods
87
+ include InstanceMethods
88
+
89
+ class_attribute :sunspot_options
90
+
91
+ unless options[:auto_index] == false
92
+ before_save :mark_for_auto_indexing_or_removal
93
+ after_save :perform_index_tasks
94
+ end
95
+
96
+ unless options[:auto_remove] == false
97
+ after_destroy do |searchable|
98
+ searchable.remove_from_index
99
+ end
100
+ end
101
+ options[:include] = Util::Array(options[:include])
102
+
103
+ self.sunspot_options = options
104
+ end
105
+ end
106
+
107
+ #
108
+ # This method is defined on all ActiveRecord::Base subclasses. It
109
+ # is false for classes on which #searchable has not been called, and
110
+ # true for classes on which #searchable has been called.
111
+ #
112
+ # ==== Returns
113
+ #
114
+ # +false+
115
+ #
116
+ def searchable?
117
+ false
118
+ end
119
+ end
120
+
121
+ module ClassMethods
122
+ def self.extended(base) #:nodoc:
123
+ class <<base
124
+ alias_method :search, :solr_search unless method_defined? :search
125
+ alias_method :search_ids, :solr_search_ids unless method_defined? :search_ids
126
+ alias_method :remove_all_from_index, :solr_remove_all_from_index unless method_defined? :remove_all_from_index
127
+ alias_method :remove_all_from_index!, :solr_remove_all_from_index! unless method_defined? :remove_all_from_index!
128
+ alias_method :reindex, :solr_reindex unless method_defined? :reindex
129
+ alias_method :index, :solr_index unless method_defined? :index
130
+ alias_method :index_orphans, :solr_index_orphans unless method_defined? :index_orphans
131
+ alias_method :clean_index_orphans, :solr_clean_index_orphans unless method_defined? :clean_index_orphans
132
+ end
133
+ end
134
+ #
135
+ # Search for instances of this class in Solr. The block is delegated to
136
+ # the Sunspot.search method - see the Sunspot documentation for the full
137
+ # API.
138
+ #
139
+ # ==== Example
140
+ #
141
+ # Post.search(:include => [:blog]) do
142
+ # keywords 'best pizza'
143
+ # with :blog_id, 1
144
+ # order :updated_at, :desc
145
+ # facet :category_ids
146
+ # end
147
+ #
148
+ # ==== Options
149
+ #
150
+ # :include:: Specify associations to eager load
151
+ # :select:: Specify columns to select from database when loading results
152
+ #
153
+ # ==== Returns
154
+ #
155
+ # Sunspot::Search:: Object containing results, totals, facets, etc.
156
+ #
157
+ def solr_search(options = {}, &block)
158
+ solr_execute_search(options) do
159
+ Sunspot.new_search(self, &block)
160
+ end
161
+ end
162
+
163
+ #
164
+ # Get IDs of matching results without loading the result objects from
165
+ # the database. This method may be useful if search is used as an
166
+ # intermediate step in a larger find operation. The block is the same
167
+ # as the block provided to the #search method.
168
+ #
169
+ # ==== Returns
170
+ #
171
+ # Array:: Array of IDs, in the order returned by the search
172
+ #
173
+ def solr_search_ids(&block)
174
+ solr_execute_search_ids do
175
+ solr_search(&block)
176
+ end
177
+ end
178
+
179
+ #
180
+ # Remove instances of this class from the Solr index.
181
+ #
182
+ def solr_remove_all_from_index
183
+ Sunspot.remove_all(self)
184
+ end
185
+
186
+ #
187
+ # Remove all instances of this class from the Solr index and immediately
188
+ # commit.
189
+ #
190
+ #
191
+ def solr_remove_all_from_index!
192
+ Sunspot.remove_all!(self)
193
+ end
194
+
195
+ #
196
+ # Completely rebuild the index for this class. First removes all
197
+ # instances from the index, then loads records and indexes them.
198
+ #
199
+ # See #index for information on options, etc.
200
+ #
201
+ def solr_reindex(options = {})
202
+ solr_remove_all_from_index
203
+ solr_index(options)
204
+ end
205
+
206
+ #
207
+ # Add/update all existing records in the Solr index. The
208
+ # +batch_size+ argument specifies how many records to load out of the
209
+ # database at a time. The default batch size is 50; if nil is passed,
210
+ # records will not be indexed in batches. By default, a commit is issued
211
+ # after each batch; passing +false+ for +batch_commit+ will disable
212
+ # this, and only issue a commit at the end of the process. If associated
213
+ # objects need to indexed also, you can specify +include+ in format
214
+ # accepted by ActiveRecord to improve your sql select performance
215
+ #
216
+ # ==== Options (passed as a hash)
217
+ #
218
+ # batch_size<Integer>:: Batch size with which to load records. Passing
219
+ # 'nil' will skip batches. Default is 50.
220
+ # batch_commit<Boolean>:: Flag signalling if a commit should be done after
221
+ # after each batch is indexed, default is 'true'
222
+ # include<Mixed>:: include option to be passed to the ActiveRecord find,
223
+ # used for including associated objects that need to be
224
+ # indexed with the parent object, accepts all formats
225
+ # ActiveRecord::Base.find does
226
+ # first_id:: The lowest possible ID for this class. Defaults to 0, which
227
+ # is fine for integer IDs; string primary keys will need to
228
+ # specify something reasonable here.
229
+ #
230
+ # ==== Examples
231
+ #
232
+ # # index in batches of 50, commit after each
233
+ # Post.index
234
+ #
235
+ # # index all rows at once, then commit
236
+ # Post.index(:batch_size => nil)
237
+ #
238
+ # # index in batches of 50, commit when all batches complete
239
+ # Post.index(:batch_commit => false)
240
+ #
241
+ # # include the associated +author+ object when loading to index
242
+ # Post.index(:include => :author)
243
+ #
244
+ def solr_index(opts={})
245
+ options = {
246
+ :batch_size => Sunspot.config.indexing.default_batch_size,
247
+ :batch_commit => true,
248
+ :include => self.sunspot_options[:include],
249
+ :start => opts.delete(:first_id)
250
+ }.merge(opts)
251
+
252
+ if options[:batch_size].to_i > 0
253
+ batch_counter = 0
254
+ self.includes(options[:include]).find_in_batches(options.slice(:batch_size, :start)) do |records|
255
+
256
+ solr_benchmark(options[:batch_size], batch_counter += 1) do
257
+ Sunspot.index(records.select { |model| model.indexable? })
258
+ Sunspot.commit if options[:batch_commit]
259
+ end
260
+
261
+ options[:progress_bar].increment!(records.length) if options[:progress_bar]
262
+ end
263
+ else
264
+ Sunspot.index! self.includes(options[:include]).select(&:indexable?)
265
+ end
266
+
267
+ # perform a final commit if not committing in batches
268
+ Sunspot.commit unless options[:batch_commit]
269
+ end
270
+
271
+ #
272
+ # Return the IDs of records of this class that are indexed in Solr but
273
+ # do not exist in the database. Under normal circumstances, this should
274
+ # never happen, but this method is provided in case something goes
275
+ # wrong. Usually you will want to rectify the situation by calling
276
+ # #clean_index_orphans or #reindex
277
+ #
278
+ # ==== Options (passed as a hash)
279
+ #
280
+ # batch_size<Integer>:: Batch size with which to load records. Passing
281
+ # Default is 1000 (from ActiveRecord).
282
+ #
283
+ # ==== Returns
284
+ #
285
+ # Array:: Collection of IDs that exist in Solr but not in the database
286
+ def solr_index_orphans(opts={})
287
+ batch_size = opts[:batch_size] || Sunspot.config.indexing.default_batch_size
288
+
289
+ solr_page = 0
290
+ solr_ids = []
291
+ while (solr_page = solr_page.next)
292
+ ids = solr_search_ids { paginate(:page => solr_page, :per_page => 1000) }.to_a
293
+ break if ids.empty?
294
+ solr_ids.concat ids
295
+ end
296
+
297
+ return solr_ids - self.connection.select_values("SELECT id FROM #{quoted_table_name}").collect(&:to_i)
298
+ end
299
+
300
+ #
301
+ # Find IDs of records of this class that are indexed in Solr but do not
302
+ # exist in the database, and remove them from Solr. Under normal
303
+ # circumstances, this should not be necessary; this method is provided
304
+ # in case something goes wrong.
305
+ #
306
+ # ==== Options (passed as a hash)
307
+ #
308
+ # batch_size<Integer>:: Batch size with which to load records
309
+ # Default is 50
310
+ #
311
+ def solr_clean_index_orphans(opts={})
312
+ solr_index_orphans(opts).each do |id|
313
+ new do |fake_instance|
314
+ fake_instance.id = id
315
+ end.solr_remove_from_index
316
+ end
317
+ end
318
+
319
+ #
320
+ # Classes that have been defined as searchable return +true+ for this
321
+ # method.
322
+ #
323
+ # ==== Returns
324
+ #
325
+ # +true+
326
+ #
327
+ def searchable?
328
+ true
329
+ end
330
+
331
+ def solr_execute_search(options = {})
332
+ options.assert_valid_keys(:include, :select)
333
+ search = yield
334
+ unless options.empty?
335
+ search.build do |query|
336
+ if options[:include]
337
+ query.data_accessor_for(self).include = options[:include]
338
+ end
339
+ if options[:select]
340
+ query.data_accessor_for(self).select = options[:select]
341
+ end
342
+ end
343
+ end
344
+ search.execute
345
+ end
346
+
347
+ def solr_execute_search_ids(options = {})
348
+ search = yield
349
+ search.raw_results.map { |raw_result| raw_result.primary_key.to_i }
350
+ end
351
+
352
+ protected
353
+
354
+ #
355
+ # Does some logging for benchmarking indexing performance
356
+ #
357
+ def solr_benchmark(batch_size, counter, &block)
358
+ start = Time.now
359
+ logger.info("[#{Time.now}] Start Indexing")
360
+ yield
361
+ elapsed = Time.now-start
362
+ logger.info("[#{Time.now}] Completed Indexing. Rows indexed #{counter * batch_size}. Rows/sec: #{batch_size/elapsed.to_f} (Elapsed: #{elapsed} sec.)")
363
+ end
364
+
365
+ end
366
+
367
+ module InstanceMethods
368
+ def self.included(base) #:nodoc:
369
+ base.module_eval do
370
+ alias_method :index, :solr_index unless method_defined? :index
371
+ alias_method :index!, :solr_index! unless method_defined? :index!
372
+ alias_method :remove_from_index, :solr_remove_from_index unless method_defined? :remove_from_index
373
+ alias_method :remove_from_index!, :solr_remove_from_index! unless method_defined? :remove_from_index!
374
+ alias_method :more_like_this, :solr_more_like_this unless method_defined? :more_like_this
375
+ alias_method :more_like_this_ids, :solr_more_like_this_ids unless method_defined? :more_like_this_ids
376
+ end
377
+ end
378
+ #
379
+ # Index the model in Solr. If the model is already indexed, it will be
380
+ # updated. Using the defaults, you will usually not need to call this
381
+ # method, as models are indexed automatically when they are created or
382
+ # updated. If you have disabled automatic indexing (see
383
+ # ClassMethods#searchable), this method allows you to manage indexing
384
+ # manually.
385
+ #
386
+ def solr_index
387
+ Sunspot.index(self)
388
+ end
389
+
390
+ #
391
+ # Index the model in Solr and immediately commit. See #index
392
+ #
393
+ def solr_index!
394
+ Sunspot.index!(self)
395
+ end
396
+
397
+ #
398
+ # Remove the model from the Solr index. Using the defaults, this should
399
+ # not be necessary, as models will automatically be removed from the
400
+ # index when they are destroyed. If you disable automatic removal
401
+ # (which is not recommended!), you can use this method to manage removal
402
+ # manually.
403
+ #
404
+ def solr_remove_from_index
405
+ Sunspot.remove(self)
406
+ end
407
+
408
+ #
409
+ # Remove the model from the Solr index and commit immediately. See
410
+ # #remove_from_index
411
+ #
412
+ def solr_remove_from_index!
413
+ Sunspot.remove!(self)
414
+ end
415
+
416
+ def solr_more_like_this(*args, &block)
417
+ options = args.extract_options!
418
+ self.class.solr_execute_search(options) do
419
+ Sunspot.new_more_like_this(self, *args, &block)
420
+ end
421
+ end
422
+
423
+ def solr_more_like_this_ids(&block)
424
+ self.class.solr_execute_search_ids do
425
+ solr_more_like_this(&block)
426
+ end
427
+ end
428
+
429
+ def indexable?
430
+ # options[:if] is not specified or they successfully pass
431
+ if_passes = self.class.sunspot_options[:if].nil? ||
432
+ constraint_passes?(self.class.sunspot_options[:if])
433
+
434
+ # options[:unless] is not specified or they successfully pass
435
+ unless_passes = self.class.sunspot_options[:unless].nil? ||
436
+ !constraint_passes?(self.class.sunspot_options[:unless])
437
+
438
+ if_passes and unless_passes
439
+ end
440
+
441
+ private
442
+
443
+ def constraint_passes?(constraint)
444
+ case constraint
445
+ when Symbol
446
+ self.__send__(constraint)
447
+ when String
448
+ self.__send__(constraint.to_sym)
449
+ when Enumerable
450
+ # All constraints must pass
451
+ constraint.all? { |inner_constraint| constraint_passes?(inner_constraint) }
452
+ else
453
+ if constraint.respond_to?(:call) # could be a Proc or anything else that responds to call
454
+ constraint.call(self)
455
+ else
456
+ raise ArgumentError, "Unknown constraint type: #{constraint} (#{constraint.class})"
457
+ end
458
+ end
459
+ end
460
+
461
+ def mark_for_auto_indexing_or_removal
462
+ if indexable?
463
+ # :if/:unless constraints pass or were not present
464
+
465
+ @marked_for_auto_indexing =
466
+ if !new_record? && ignore_attributes = self.class.sunspot_options[:ignore_attribute_changes_of]
467
+ !(changed.map { |attr| attr.to_sym } - ignore_attributes).blank?
468
+ elsif !new_record? && only_attributes = self.class.sunspot_options[:only_reindex_attribute_changes_of]
469
+ !(changed.map { |attr| attr.to_sym } & only_attributes).blank?
470
+ else
471
+ true
472
+ end
473
+
474
+ @marked_for_auto_removal = false
475
+ else
476
+ # :if/:unless constraints did not pass; do not auto index and
477
+ # actually go one step further by removing it from the index
478
+ @marked_for_auto_indexing = false
479
+ @marked_for_auto_removal = true
480
+ end
481
+
482
+ true
483
+ end
484
+
485
+ def perform_index_tasks
486
+ if @marked_for_auto_indexing
487
+ solr_index
488
+ remove_instance_variable(:@marked_for_auto_indexing)
489
+ elsif @marked_for_auto_removal
490
+ solr_remove_from_index
491
+ remove_instance_variable(:@marked_for_auto_removal)
492
+ end
493
+ end
494
+ end
495
+ end
496
+ end
497
+ end