search_do 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/search_do.rb ADDED
@@ -0,0 +1,330 @@
1
+ # Copyright (c) 2006 Patrick Lenz
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining
4
+ # a copy of this software and associated documentation files (the
5
+ # "Software"), to deal in the Software without restriction, including
6
+ # without limitation the rights to use, copy, modify, merge, publish,
7
+ # distribute, sublicense, and/or sell copies of the Software, and to
8
+ # permit persons to whom the Software is furnished to do so, subject to
9
+ # the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be
12
+ # included in all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+ #
22
+ # Thanks: Rick Olson (technoweenie) for his numerous plugins that served
23
+ # as an example
24
+
25
+ require 'search_do/indexer'
26
+ require 'search_do/dirty_tracking'
27
+ require 'search_do/backends'
28
+ require 'vendor/estraierpure'
29
+
30
+ # Specify this act if you want to provide fulltext search capabilities to your model via Hyper Estraier. This
31
+ # assumes a setup and running Hyper Estraier node accessible through the HTTP API provided by the EstraierPure
32
+ # Ruby module (which is bundled with this plugin).
33
+ #
34
+ # The act supplies appropriate hooks to insert, update and remove documents from the index when you update your
35
+ # model data, create new objects or remove them from your database. For the initial indexing a convenience
36
+ # class method <tt>reindex!</tt> is provided.
37
+ #
38
+ # Example:
39
+ #
40
+ # class Article < ActiveRecord::Base
41
+ # attr_accessor :snippet
42
+ # acts_as_searchable
43
+ # end
44
+ #
45
+ # Article.reindex!
46
+ #
47
+ # As soon as your model data has been indexed you can make use of the <tt>fulltext_search</tt> class method
48
+ # to search the index and get back instantiated matches.
49
+ #
50
+ # results = Article.fulltext_search('rails')
51
+ # results.size # => 3
52
+ #
53
+ # results.first.class # => Article
54
+ # results.first.body # => "Ruby on Rails is an open-source web framework"
55
+ #
56
+ # Connectivity configuration can be either inherited from conventions or setup globally in the Rails
57
+ # database configuration file <tt>config/database.yml</tt>.
58
+ #
59
+ # Example:
60
+ #
61
+ # development:
62
+ # adapter: mysql
63
+ # database: rails_development
64
+ # host: localhost
65
+ # user: root
66
+ # password:
67
+ # estraier:
68
+ # host: localhost
69
+ # user: admin
70
+ # password: admin
71
+ # port: 1978
72
+ # node: development
73
+ #
74
+ # That way you can configure separate connections for each environment. The values shown above represent the
75
+ # defaults. If you don't need to change any of these it is safe to not specify the <tt>estraier</tt> hash
76
+ # at all.
77
+ #
78
+ # See SearchDo::ClassMethods#acts_as_searchable for per-model configuration options
79
+ #
80
+ module SearchDo
81
+
82
+ def self.included(base) #:nodoc:
83
+ base.extend ClassMethods
84
+ end
85
+
86
+ module ClassMethods
87
+ VALID_FULLTEXT_OPTIONS = [:limit, :offset, :order, :attributes, :raw_matches, :find, :count]
88
+
89
+ # == Configuration options
90
+ #
91
+ # * <tt>searchable_fields</tt> - Fields to provide searching and indexing for (default: 'body')
92
+ # * <tt>attributes</tt> - Additional attributes to store in Hyper Estraier with the appropriate method supplying the value
93
+ # * <tt>if_changed</tt> - Extra list of attributes to add to the list of attributes that trigger an index update when changed
94
+ #
95
+ # Examples:
96
+ #
97
+ # acts_as_searchable :attributes => { :title => nil, :blog => :blog_title }, :searchable_fields => [ :title, :body ]
98
+ #
99
+ # This would store the return value of the <tt>title</tt> method in the <tt>title</tt> attribute and the return value of the
100
+ # <tt>blog_title</tt> method in the <tt>blog</tt> attribute. The contents of the <tt>title</tt> and <tt>body</tt> columns
101
+ # would end up being indexed for searching.
102
+ #
103
+ # == Attribute naming
104
+ #
105
+ # Attributes that match the reserved names of the Hyper Estraier system attributes are mapped automatically. This is something
106
+ # to keep in mind for custom ordering options or additional query constraints in <tt>fulltext_search</tt>
107
+ # For a list of these attributes see <tt>EstraierPure::SYSTEM_ATTRIBUTES</tt> or visit:
108
+ #
109
+ # http://hyperestraier.sourceforge.net/uguide-en.html#attributes
110
+ #
111
+ # From the example above:
112
+ #
113
+ # Model.fulltext_search('query', :order => '@title STRA') # Returns results ordered by title in ascending order
114
+ # Model.fulltext_search('query', :attributes => 'blog STREQ poocs.net') # Returns results with a blog attribute of 'poocs.net'
115
+ #
116
+ def acts_as_searchable(options = {})
117
+ return if self.included_modules.include?(SearchDo::InstanceMethods)
118
+
119
+ cattr_accessor :search_indexer, :search_backend
120
+
121
+ self.search_indexer = returning(SearchDo::Indexer.new(self, configurations)) do |idx|
122
+ idx.searchable_fields = options[:searchable_fields] || [ :body ]
123
+ idx.attributes_to_store = options[:attributes] || {}
124
+ idx.if_changed = options[:if_changed] || []
125
+ end
126
+
127
+ if !options[:ignore_timestamp] && self.record_timestamps
128
+ search_indexer.record_timestamps!
129
+ end
130
+
131
+ unless options[:auto_update] == false
132
+ search_indexer.add_callbacks!
133
+ end
134
+
135
+ include SearchDo::InstanceMethods
136
+ include SearchDo::DirtyTracking
137
+
138
+ connect_backend(configurations)
139
+ end
140
+
141
+ # Perform a fulltext search against the Hyper Estraier index.
142
+ #
143
+ # Adds snippet (text that surround the place where the word was found) to results if the model responds to snippet=
144
+ #
145
+ # Options taken:
146
+ # * <tt>limit</tt> - Maximum number of records to retrieve (default: <tt>100</tt>)
147
+ # * <tt>offset</tt> - Number of records to skip (default: <tt>0</tt>)
148
+ # * <tt>order</tt> - Hyper Estraier expression to sort the results (example: <tt>@title STRA</tt>, default: ordering by score)
149
+ # * <tt>attributes</tt> - String to append to Hyper Estraier search query
150
+ # * <tt>raw_matches</tt> - Returns raw Hyper Estraier documents instead of instantiated AR objects
151
+ # * <tt>find</tt> - Options to pass on to the <tt>ActiveRecord::Base#find</tt> call
152
+ # * <tt>count</tt> - Set this to <tt>true</tt> if you're using <tt>fulltext_search</tt> in conjunction with <tt>ActionController::Pagination</tt> to return the number of matches only
153
+ #
154
+ # Examples:
155
+ #
156
+ # Article.fulltext_search("biscuits AND gravy")
157
+ # Article.fulltext_search("biscuits AND gravy", :limit => 15, :offset => 14)
158
+ # Article.fulltext_search("biscuits AND gravy", :attributes => "tag STRINC food")
159
+ # Article.fulltext_search("biscuits AND gravy", :attributes => {:user_id=>1})
160
+ # Article.fulltext_search("biscuits AND gravy", :attributes => {:tag=>'food'})
161
+ # Article.fulltext_search("biscuits AND gravy", :attributes => ["tag STRINC food", "@title STRBW Biscuit"])
162
+ # Article.fulltext_search("biscuits AND gravy", :order => "created_at DESC")
163
+ # Article.fulltext_search("biscuits AND gravy", :raw_matches => true)
164
+ # Article.fulltext_search("biscuits AND gravy", :find => { :order => :title, :include => :comments })
165
+ #
166
+ # Consult the Hyper Estraier documentation on proper query syntax:
167
+ #
168
+ # http://hyperestraier.sourceforge.net/uguide-en.html#searchcond
169
+ #
170
+ def fulltext_search(query = "", options = {})
171
+ find_options = options[:find] || {}
172
+ [ :limit, :offset ].each { |k| find_options.delete(k) } unless find_options.blank?
173
+
174
+ ids_and_raw = matched_ids_and_raw(query, options)
175
+ ids = ids_and_raw.map{|id,raw| id}
176
+
177
+ results = find_by_ids_scope(ids, find_options)
178
+ add_snippets(results,ids_and_raw) unless query.blank?
179
+ results
180
+ end
181
+
182
+ def paginate_by_fulltext_search(query, options={})
183
+ WillPaginate::Collection.create(*wp_parse_options(options)) do |pager|
184
+ #transform acts_as_searchable options to will_paginate options
185
+ page,per_page,total = wp_parse_options(options)
186
+ options[:limit]=per_page
187
+ options[:offset]=(page.to_i-1)*per_page
188
+ options.delete(:page)#acts_as cannot read this...
189
+ options.delete(:per_page)#acts_as cannot read this...
190
+
191
+ #find results
192
+ pager.replace fulltext_search(query,options)
193
+
194
+ #total items
195
+ #replace sets total if it can calculate by them itself
196
+ unless pager.total_entries
197
+ pager.total_entries = count_fulltext(query, :attributes=>options[:attributes]||{})
198
+ end
199
+ end
200
+ end
201
+
202
+ def count_fulltext(query, options={})
203
+ search_backend.count(query, options)
204
+ end
205
+
206
+ # this methods is NOT compat with original AAS
207
+ #FIXME is see no need for this method
208
+ def find_fulltext(query, options={}, with_mdate_desc_order=true)
209
+ fulltext_option = {}
210
+ fulltext_option[:order] = :updated_at if with_mdate_desc_order
211
+ ids = matched_ids(query, fulltext_option)
212
+ find_by_ids_scope(ids, options)
213
+ end
214
+
215
+ #[[1,Raw],[4,Raw],...]
216
+ def matched_ids_and_raw(query = "", options = {})
217
+ search_backend.search_all_ids_and_raw(query, options)
218
+ end
219
+
220
+ def matched_ids(query = "", options = {})
221
+ matched_ids_and_raw(query,options).map{|id,raw|id}
222
+ end
223
+
224
+ def matched_raw(query = "", options = {})
225
+ matched_ids_and_raw(query,options).map{|id,raw|raw}
226
+ end
227
+ alias :raw_matches :matched_raw
228
+
229
+ def raw_fulltext_index
230
+ search_backend.index
231
+ end
232
+
233
+ # Clear all entries from index
234
+ def clear_index!
235
+ search_backend.clear_index!
236
+ end
237
+
238
+ # Peform a full re-index of the model data for this model
239
+ def reindex!
240
+ find(:all).each { |r| r.update_index(true) }
241
+ end
242
+
243
+ private
244
+
245
+ def add_snippets(results,ids_and_raw)
246
+ results.each do |result|
247
+ raw = ids_and_raw.assoc(result.id)[1]
248
+ result.snippet = raw.snippet if result.respond_to?(:snippet=)
249
+ result.html_snippet = snippet_to_html(raw.snippet_a) if result.respond_to?(:html_snippet=)
250
+ end
251
+ end
252
+
253
+ def snippet_to_html(snippet)
254
+ snippet.map do |text,highlite|
255
+ text = strip_tags(text)
256
+ highlite ? "<b>#{text}</b>" : text
257
+ end * ''
258
+ end
259
+
260
+ def strip_tags(text)
261
+ #TODO better performance?
262
+ require 'action_controller'
263
+ ::ActionController::Base.helpers.strip_tags(text)
264
+ end
265
+
266
+ def connect_backend(active_record_config) #:nodoc:
267
+ backend_config = active_record_config[RAILS_ENV]['search'] || \
268
+ active_record_config[RAILS_ENV]['estraier'] || {}
269
+ self.search_backend = Backends.connect(self, backend_config)
270
+ end
271
+
272
+ def find_by_ids_scope(ids, options={})
273
+ return [] if ids.blank?
274
+ results = []
275
+ with_scope(:find=>{:conditions=>["#{table_name}.id IN (?)", ids]}) do
276
+ results = find(:all, options)
277
+ end
278
+ apply_ids_order_to(ids,results)
279
+ end
280
+
281
+ def apply_ids_order_to(ids,results)
282
+ #replace id with found item
283
+ results.each {|item| ids[ids.index(item.id)] = item}
284
+ #remove the unfound
285
+ ids.reject {|item_or_id| item_or_id.is_a?(Fixnum)}
286
+ end
287
+ end
288
+
289
+ module InstanceMethods
290
+ # Update index for current instance
291
+ def update_index(force = false)
292
+ return unless (need_update_index? || force)
293
+ remove_from_index
294
+ add_to_index
295
+ end
296
+
297
+ def add_to_index #:nodoc:
298
+ search_backend.add_to_index(search_texts, search_attrs)
299
+ end
300
+
301
+ def remove_from_index #:nodoc:
302
+ search_backend.remove_from_index(self.id)
303
+ end
304
+
305
+ private
306
+ def search_texts
307
+ search_indexer.searchable_fields.map{|f| send(f) }
308
+ end
309
+
310
+ def search_attrs
311
+ attrs = { 'db_id' => id.to_s,
312
+ '@uri' => "/#{self.class.to_s}/#{id}" }
313
+ # for STI
314
+ if self.class.descends_from_active_record?
315
+ attrs["type_base"] = self.class.base_class.to_s
316
+ end
317
+
318
+ unless (to_stores = search_indexer.attributes_to_store).blank?
319
+ to_stores.each do |attribute, method|
320
+ value = send(method || attribute)
321
+ value = value.xmlschema if value.is_a?(Time)
322
+ attrs[attribute] = value.to_s
323
+ end
324
+ end
325
+ attrs
326
+ end
327
+ end
328
+ end
329
+
330
+ ActiveRecord::Base.send :include, SearchDo