search_do 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +1 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +63 -0
- data/Rakefile +47 -0
- data/TESTING +6 -0
- data/VERSION +1 -0
- data/examples/he_search.rb +13 -0
- data/examples/person.rb +20 -0
- data/init.rb +1 -0
- data/lib/estraier_admin.rb +47 -0
- data/lib/search_do/backends/hyper_estraier/estraier_pure_extention.rb +61 -0
- data/lib/search_do/backends/hyper_estraier.rb +213 -0
- data/lib/search_do/backends.rb +17 -0
- data/lib/search_do/dirty_tracking/bridge.rb +22 -0
- data/lib/search_do/dirty_tracking/self_made.rb +36 -0
- data/lib/search_do/dirty_tracking.rb +15 -0
- data/lib/search_do/indexer.rb +65 -0
- data/lib/search_do/utils.rb +11 -0
- data/lib/search_do.rb +330 -0
- data/lib/vendor/estraierpure.rb +1025 -0
- data/lib/vendor/overview +100 -0
- data/recipes/mode_maintenance.rb +52 -0
- data/spec/backends/hyper_estraier_spec.rb +220 -0
- data/spec/backends/result_document_spec.rb +26 -0
- data/spec/dirty_tracking/bridge_spec.rb +33 -0
- data/spec/estraier_admin_spec.rb +26 -0
- data/spec/fixtures/stories.yml +27 -0
- data/spec/indexer_spec.rb +59 -0
- data/spec/search_do_spec.rb +335 -0
- data/spec/setup_test_model.rb +38 -0
- data/spec/spec_helper.rb +52 -0
- data/tasks/acts_as_searchable_tasks.rake +70 -0
- metadata +95 -0
data/lib/search_do.rb
ADDED
@@ -0,0 +1,330 @@
|
|
1
|
+
# Copyright (c) 2006 Patrick Lenz
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# "Software"), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
#
|
22
|
+
# Thanks: Rick Olson (technoweenie) for his numerous plugins that served
|
23
|
+
# as an example
|
24
|
+
|
25
|
+
require 'search_do/indexer'
|
26
|
+
require 'search_do/dirty_tracking'
|
27
|
+
require 'search_do/backends'
|
28
|
+
require 'vendor/estraierpure'
|
29
|
+
|
30
|
+
# Specify this act if you want to provide fulltext search capabilities to your model via Hyper Estraier. This
|
31
|
+
# assumes a setup and running Hyper Estraier node accessible through the HTTP API provided by the EstraierPure
|
32
|
+
# Ruby module (which is bundled with this plugin).
|
33
|
+
#
|
34
|
+
# The act supplies appropriate hooks to insert, update and remove documents from the index when you update your
|
35
|
+
# model data, create new objects or remove them from your database. For the initial indexing a convenience
|
36
|
+
# class method <tt>reindex!</tt> is provided.
|
37
|
+
#
|
38
|
+
# Example:
|
39
|
+
#
|
40
|
+
# class Article < ActiveRecord::Base
|
41
|
+
# attr_accessor :snippet
|
42
|
+
# acts_as_searchable
|
43
|
+
# end
|
44
|
+
#
|
45
|
+
# Article.reindex!
|
46
|
+
#
|
47
|
+
# As soon as your model data has been indexed you can make use of the <tt>fulltext_search</tt> class method
|
48
|
+
# to search the index and get back instantiated matches.
|
49
|
+
#
|
50
|
+
# results = Article.fulltext_search('rails')
|
51
|
+
# results.size # => 3
|
52
|
+
#
|
53
|
+
# results.first.class # => Article
|
54
|
+
# results.first.body # => "Ruby on Rails is an open-source web framework"
|
55
|
+
#
|
56
|
+
# Connectivity configuration can be either inherited from conventions or setup globally in the Rails
|
57
|
+
# database configuration file <tt>config/database.yml</tt>.
|
58
|
+
#
|
59
|
+
# Example:
|
60
|
+
#
|
61
|
+
# development:
|
62
|
+
# adapter: mysql
|
63
|
+
# database: rails_development
|
64
|
+
# host: localhost
|
65
|
+
# user: root
|
66
|
+
# password:
|
67
|
+
# estraier:
|
68
|
+
# host: localhost
|
69
|
+
# user: admin
|
70
|
+
# password: admin
|
71
|
+
# port: 1978
|
72
|
+
# node: development
|
73
|
+
#
|
74
|
+
# That way you can configure separate connections for each environment. The values shown above represent the
|
75
|
+
# defaults. If you don't need to change any of these it is safe to not specify the <tt>estraier</tt> hash
|
76
|
+
# at all.
|
77
|
+
#
|
78
|
+
# See SearchDo::ClassMethods#acts_as_searchable for per-model configuration options
|
79
|
+
#
|
80
|
+
module SearchDo
|
81
|
+
|
82
|
+
def self.included(base) #:nodoc:
|
83
|
+
base.extend ClassMethods
|
84
|
+
end
|
85
|
+
|
86
|
+
module ClassMethods
|
87
|
+
VALID_FULLTEXT_OPTIONS = [:limit, :offset, :order, :attributes, :raw_matches, :find, :count]
|
88
|
+
|
89
|
+
# == Configuration options
|
90
|
+
#
|
91
|
+
# * <tt>searchable_fields</tt> - Fields to provide searching and indexing for (default: 'body')
|
92
|
+
# * <tt>attributes</tt> - Additional attributes to store in Hyper Estraier with the appropriate method supplying the value
|
93
|
+
# * <tt>if_changed</tt> - Extra list of attributes to add to the list of attributes that trigger an index update when changed
|
94
|
+
#
|
95
|
+
# Examples:
|
96
|
+
#
|
97
|
+
# acts_as_searchable :attributes => { :title => nil, :blog => :blog_title }, :searchable_fields => [ :title, :body ]
|
98
|
+
#
|
99
|
+
# This would store the return value of the <tt>title</tt> method in the <tt>title</tt> attribute and the return value of the
|
100
|
+
# <tt>blog_title</tt> method in the <tt>blog</tt> attribute. The contents of the <tt>title</tt> and <tt>body</tt> columns
|
101
|
+
# would end up being indexed for searching.
|
102
|
+
#
|
103
|
+
# == Attribute naming
|
104
|
+
#
|
105
|
+
# Attributes that match the reserved names of the Hyper Estraier system attributes are mapped automatically. This is something
|
106
|
+
# to keep in mind for custom ordering options or additional query constraints in <tt>fulltext_search</tt>
|
107
|
+
# For a list of these attributes see <tt>EstraierPure::SYSTEM_ATTRIBUTES</tt> or visit:
|
108
|
+
#
|
109
|
+
# http://hyperestraier.sourceforge.net/uguide-en.html#attributes
|
110
|
+
#
|
111
|
+
# From the example above:
|
112
|
+
#
|
113
|
+
# Model.fulltext_search('query', :order => '@title STRA') # Returns results ordered by title in ascending order
|
114
|
+
# Model.fulltext_search('query', :attributes => 'blog STREQ poocs.net') # Returns results with a blog attribute of 'poocs.net'
|
115
|
+
#
|
116
|
+
def acts_as_searchable(options = {})
|
117
|
+
return if self.included_modules.include?(SearchDo::InstanceMethods)
|
118
|
+
|
119
|
+
cattr_accessor :search_indexer, :search_backend
|
120
|
+
|
121
|
+
self.search_indexer = returning(SearchDo::Indexer.new(self, configurations)) do |idx|
|
122
|
+
idx.searchable_fields = options[:searchable_fields] || [ :body ]
|
123
|
+
idx.attributes_to_store = options[:attributes] || {}
|
124
|
+
idx.if_changed = options[:if_changed] || []
|
125
|
+
end
|
126
|
+
|
127
|
+
if !options[:ignore_timestamp] && self.record_timestamps
|
128
|
+
search_indexer.record_timestamps!
|
129
|
+
end
|
130
|
+
|
131
|
+
unless options[:auto_update] == false
|
132
|
+
search_indexer.add_callbacks!
|
133
|
+
end
|
134
|
+
|
135
|
+
include SearchDo::InstanceMethods
|
136
|
+
include SearchDo::DirtyTracking
|
137
|
+
|
138
|
+
connect_backend(configurations)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Perform a fulltext search against the Hyper Estraier index.
|
142
|
+
#
|
143
|
+
# Adds snippet (text that surround the place where the word was found) to results if the model responds to snippet=
|
144
|
+
#
|
145
|
+
# Options taken:
|
146
|
+
# * <tt>limit</tt> - Maximum number of records to retrieve (default: <tt>100</tt>)
|
147
|
+
# * <tt>offset</tt> - Number of records to skip (default: <tt>0</tt>)
|
148
|
+
# * <tt>order</tt> - Hyper Estraier expression to sort the results (example: <tt>@title STRA</tt>, default: ordering by score)
|
149
|
+
# * <tt>attributes</tt> - String to append to Hyper Estraier search query
|
150
|
+
# * <tt>raw_matches</tt> - Returns raw Hyper Estraier documents instead of instantiated AR objects
|
151
|
+
# * <tt>find</tt> - Options to pass on to the <tt>ActiveRecord::Base#find</tt> call
|
152
|
+
# * <tt>count</tt> - Set this to <tt>true</tt> if you're using <tt>fulltext_search</tt> in conjunction with <tt>ActionController::Pagination</tt> to return the number of matches only
|
153
|
+
#
|
154
|
+
# Examples:
|
155
|
+
#
|
156
|
+
# Article.fulltext_search("biscuits AND gravy")
|
157
|
+
# Article.fulltext_search("biscuits AND gravy", :limit => 15, :offset => 14)
|
158
|
+
# Article.fulltext_search("biscuits AND gravy", :attributes => "tag STRINC food")
|
159
|
+
# Article.fulltext_search("biscuits AND gravy", :attributes => {:user_id=>1})
|
160
|
+
# Article.fulltext_search("biscuits AND gravy", :attributes => {:tag=>'food'})
|
161
|
+
# Article.fulltext_search("biscuits AND gravy", :attributes => ["tag STRINC food", "@title STRBW Biscuit"])
|
162
|
+
# Article.fulltext_search("biscuits AND gravy", :order => "created_at DESC")
|
163
|
+
# Article.fulltext_search("biscuits AND gravy", :raw_matches => true)
|
164
|
+
# Article.fulltext_search("biscuits AND gravy", :find => { :order => :title, :include => :comments })
|
165
|
+
#
|
166
|
+
# Consult the Hyper Estraier documentation on proper query syntax:
|
167
|
+
#
|
168
|
+
# http://hyperestraier.sourceforge.net/uguide-en.html#searchcond
|
169
|
+
#
|
170
|
+
def fulltext_search(query = "", options = {})
|
171
|
+
find_options = options[:find] || {}
|
172
|
+
[ :limit, :offset ].each { |k| find_options.delete(k) } unless find_options.blank?
|
173
|
+
|
174
|
+
ids_and_raw = matched_ids_and_raw(query, options)
|
175
|
+
ids = ids_and_raw.map{|id,raw| id}
|
176
|
+
|
177
|
+
results = find_by_ids_scope(ids, find_options)
|
178
|
+
add_snippets(results,ids_and_raw) unless query.blank?
|
179
|
+
results
|
180
|
+
end
|
181
|
+
|
182
|
+
def paginate_by_fulltext_search(query, options={})
|
183
|
+
WillPaginate::Collection.create(*wp_parse_options(options)) do |pager|
|
184
|
+
#transform acts_as_searchable options to will_paginate options
|
185
|
+
page,per_page,total = wp_parse_options(options)
|
186
|
+
options[:limit]=per_page
|
187
|
+
options[:offset]=(page.to_i-1)*per_page
|
188
|
+
options.delete(:page)#acts_as cannot read this...
|
189
|
+
options.delete(:per_page)#acts_as cannot read this...
|
190
|
+
|
191
|
+
#find results
|
192
|
+
pager.replace fulltext_search(query,options)
|
193
|
+
|
194
|
+
#total items
|
195
|
+
#replace sets total if it can calculate by them itself
|
196
|
+
unless pager.total_entries
|
197
|
+
pager.total_entries = count_fulltext(query, :attributes=>options[:attributes]||{})
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def count_fulltext(query, options={})
|
203
|
+
search_backend.count(query, options)
|
204
|
+
end
|
205
|
+
|
206
|
+
# this methods is NOT compat with original AAS
|
207
|
+
#FIXME is see no need for this method
|
208
|
+
def find_fulltext(query, options={}, with_mdate_desc_order=true)
|
209
|
+
fulltext_option = {}
|
210
|
+
fulltext_option[:order] = :updated_at if with_mdate_desc_order
|
211
|
+
ids = matched_ids(query, fulltext_option)
|
212
|
+
find_by_ids_scope(ids, options)
|
213
|
+
end
|
214
|
+
|
215
|
+
#[[1,Raw],[4,Raw],...]
|
216
|
+
def matched_ids_and_raw(query = "", options = {})
|
217
|
+
search_backend.search_all_ids_and_raw(query, options)
|
218
|
+
end
|
219
|
+
|
220
|
+
def matched_ids(query = "", options = {})
|
221
|
+
matched_ids_and_raw(query,options).map{|id,raw|id}
|
222
|
+
end
|
223
|
+
|
224
|
+
def matched_raw(query = "", options = {})
|
225
|
+
matched_ids_and_raw(query,options).map{|id,raw|raw}
|
226
|
+
end
|
227
|
+
alias :raw_matches :matched_raw
|
228
|
+
|
229
|
+
def raw_fulltext_index
|
230
|
+
search_backend.index
|
231
|
+
end
|
232
|
+
|
233
|
+
# Clear all entries from index
|
234
|
+
def clear_index!
|
235
|
+
search_backend.clear_index!
|
236
|
+
end
|
237
|
+
|
238
|
+
# Peform a full re-index of the model data for this model
|
239
|
+
def reindex!
|
240
|
+
find(:all).each { |r| r.update_index(true) }
|
241
|
+
end
|
242
|
+
|
243
|
+
private
|
244
|
+
|
245
|
+
def add_snippets(results,ids_and_raw)
|
246
|
+
results.each do |result|
|
247
|
+
raw = ids_and_raw.assoc(result.id)[1]
|
248
|
+
result.snippet = raw.snippet if result.respond_to?(:snippet=)
|
249
|
+
result.html_snippet = snippet_to_html(raw.snippet_a) if result.respond_to?(:html_snippet=)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def snippet_to_html(snippet)
|
254
|
+
snippet.map do |text,highlite|
|
255
|
+
text = strip_tags(text)
|
256
|
+
highlite ? "<b>#{text}</b>" : text
|
257
|
+
end * ''
|
258
|
+
end
|
259
|
+
|
260
|
+
def strip_tags(text)
|
261
|
+
#TODO better performance?
|
262
|
+
require 'action_controller'
|
263
|
+
::ActionController::Base.helpers.strip_tags(text)
|
264
|
+
end
|
265
|
+
|
266
|
+
def connect_backend(active_record_config) #:nodoc:
|
267
|
+
backend_config = active_record_config[RAILS_ENV]['search'] || \
|
268
|
+
active_record_config[RAILS_ENV]['estraier'] || {}
|
269
|
+
self.search_backend = Backends.connect(self, backend_config)
|
270
|
+
end
|
271
|
+
|
272
|
+
def find_by_ids_scope(ids, options={})
|
273
|
+
return [] if ids.blank?
|
274
|
+
results = []
|
275
|
+
with_scope(:find=>{:conditions=>["#{table_name}.id IN (?)", ids]}) do
|
276
|
+
results = find(:all, options)
|
277
|
+
end
|
278
|
+
apply_ids_order_to(ids,results)
|
279
|
+
end
|
280
|
+
|
281
|
+
def apply_ids_order_to(ids,results)
|
282
|
+
#replace id with found item
|
283
|
+
results.each {|item| ids[ids.index(item.id)] = item}
|
284
|
+
#remove the unfound
|
285
|
+
ids.reject {|item_or_id| item_or_id.is_a?(Fixnum)}
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
module InstanceMethods
|
290
|
+
# Update index for current instance
|
291
|
+
def update_index(force = false)
|
292
|
+
return unless (need_update_index? || force)
|
293
|
+
remove_from_index
|
294
|
+
add_to_index
|
295
|
+
end
|
296
|
+
|
297
|
+
def add_to_index #:nodoc:
|
298
|
+
search_backend.add_to_index(search_texts, search_attrs)
|
299
|
+
end
|
300
|
+
|
301
|
+
def remove_from_index #:nodoc:
|
302
|
+
search_backend.remove_from_index(self.id)
|
303
|
+
end
|
304
|
+
|
305
|
+
private
|
306
|
+
def search_texts
|
307
|
+
search_indexer.searchable_fields.map{|f| send(f) }
|
308
|
+
end
|
309
|
+
|
310
|
+
def search_attrs
|
311
|
+
attrs = { 'db_id' => id.to_s,
|
312
|
+
'@uri' => "/#{self.class.to_s}/#{id}" }
|
313
|
+
# for STI
|
314
|
+
if self.class.descends_from_active_record?
|
315
|
+
attrs["type_base"] = self.class.base_class.to_s
|
316
|
+
end
|
317
|
+
|
318
|
+
unless (to_stores = search_indexer.attributes_to_store).blank?
|
319
|
+
to_stores.each do |attribute, method|
|
320
|
+
value = send(method || attribute)
|
321
|
+
value = value.xmlschema if value.is_a?(Time)
|
322
|
+
attrs[attribute] = value.to_s
|
323
|
+
end
|
324
|
+
end
|
325
|
+
attrs
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
ActiveRecord::Base.send :include, SearchDo
|