cehoffman-acts_as_ferret 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README +68 -0
- data/bin/aaf_install +23 -0
- data/config/ferret_server.yml +24 -0
- data/doc/README.win32 +23 -0
- data/doc/demo/README +154 -0
- data/doc/demo/README_DEMO +23 -0
- data/doc/demo/Rakefile +10 -0
- data/doc/demo/app/controllers/admin/backend_controller.rb +14 -0
- data/doc/demo/app/controllers/admin_area_controller.rb +4 -0
- data/doc/demo/app/controllers/application.rb +5 -0
- data/doc/demo/app/controllers/contents_controller.rb +49 -0
- data/doc/demo/app/controllers/searches_controller.rb +8 -0
- data/doc/demo/app/helpers/admin/backend_helper.rb +2 -0
- data/doc/demo/app/helpers/application_helper.rb +3 -0
- data/doc/demo/app/helpers/content_helper.rb +2 -0
- data/doc/demo/app/helpers/search_helper.rb +2 -0
- data/doc/demo/app/models/comment.rb +48 -0
- data/doc/demo/app/models/content.rb +12 -0
- data/doc/demo/app/models/content_base.rb +28 -0
- data/doc/demo/app/models/search.rb +19 -0
- data/doc/demo/app/models/shared_index1.rb +3 -0
- data/doc/demo/app/models/shared_index2.rb +3 -0
- data/doc/demo/app/models/special_content.rb +3 -0
- data/doc/demo/app/models/stats.rb +20 -0
- data/doc/demo/app/views/admin/backend/search.rhtml +18 -0
- data/doc/demo/app/views/contents/_form.rhtml +10 -0
- data/doc/demo/app/views/contents/edit.rhtml +9 -0
- data/doc/demo/app/views/contents/index.rhtml +24 -0
- data/doc/demo/app/views/contents/new.rhtml +8 -0
- data/doc/demo/app/views/contents/show.rhtml +8 -0
- data/doc/demo/app/views/layouts/application.html.erb +17 -0
- data/doc/demo/app/views/searches/_content.html.erb +2 -0
- data/doc/demo/app/views/searches/search.html.erb +20 -0
- data/doc/demo/config/boot.rb +109 -0
- data/doc/demo/config/database.yml +38 -0
- data/doc/demo/config/environment.rb +69 -0
- data/doc/demo/config/environments/development.rb +16 -0
- data/doc/demo/config/environments/production.rb +19 -0
- data/doc/demo/config/environments/test.rb +21 -0
- data/doc/demo/config/ferret_server.yml +18 -0
- data/doc/demo/config/lighttpd.conf +40 -0
- data/doc/demo/config/routes.rb +9 -0
- data/doc/demo/db/development_structure.sql +15 -0
- data/doc/demo/db/migrate/001_initial_migration.rb +18 -0
- data/doc/demo/db/migrate/002_add_type_to_contents.rb +9 -0
- data/doc/demo/db/migrate/003_create_shared_index1s.rb +11 -0
- data/doc/demo/db/migrate/004_create_shared_index2s.rb +11 -0
- data/doc/demo/db/migrate/005_special_field.rb +9 -0
- data/doc/demo/db/migrate/006_create_stats.rb +15 -0
- data/doc/demo/db/schema.sql +18 -0
- data/doc/demo/doc/README_FOR_APP +2 -0
- data/doc/demo/doc/howto.txt +70 -0
- data/doc/demo/public/.htaccess +40 -0
- data/doc/demo/public/404.html +8 -0
- data/doc/demo/public/500.html +8 -0
- data/doc/demo/public/dispatch.cgi +10 -0
- data/doc/demo/public/dispatch.fcgi +24 -0
- data/doc/demo/public/dispatch.rb +10 -0
- data/doc/demo/public/favicon.ico +0 -0
- data/doc/demo/public/images/rails.png +0 -0
- data/doc/demo/public/index.html +277 -0
- data/doc/demo/public/robots.txt +1 -0
- data/doc/demo/public/stylesheets/scaffold.css +74 -0
- data/doc/demo/script/about +3 -0
- data/doc/demo/script/breakpointer +3 -0
- data/doc/demo/script/console +3 -0
- data/doc/demo/script/destroy +3 -0
- data/doc/demo/script/ferret_server +10 -0
- data/doc/demo/script/generate +3 -0
- data/doc/demo/script/performance/benchmarker +3 -0
- data/doc/demo/script/performance/profiler +3 -0
- data/doc/demo/script/plugin +3 -0
- data/doc/demo/script/process/inspector +3 -0
- data/doc/demo/script/process/reaper +3 -0
- data/doc/demo/script/process/spawner +3 -0
- data/doc/demo/script/process/spinner +3 -0
- data/doc/demo/script/runner +3 -0
- data/doc/demo/script/server +3 -0
- data/doc/demo/test/fixtures/comments.yml +12 -0
- data/doc/demo/test/fixtures/contents.yml +13 -0
- data/doc/demo/test/fixtures/remote_contents.yml +9 -0
- data/doc/demo/test/fixtures/shared_index1s.yml +7 -0
- data/doc/demo/test/fixtures/shared_index2s.yml +7 -0
- data/doc/demo/test/functional/admin/backend_controller_test.rb +35 -0
- data/doc/demo/test/functional/contents_controller_test.rb +81 -0
- data/doc/demo/test/functional/searches_controller_test.rb +71 -0
- data/doc/demo/test/smoke/drb_smoke_test.rb +321 -0
- data/doc/demo/test/smoke/process_stats.rb +21 -0
- data/doc/demo/test/test_helper.rb +30 -0
- data/doc/demo/test/unit/comment_test.rb +217 -0
- data/doc/demo/test/unit/content_test.rb +705 -0
- data/doc/demo/test/unit/ferret_result_test.rb +24 -0
- data/doc/demo/test/unit/multi_index_test.rb +329 -0
- data/doc/demo/test/unit/remote_index_test.rb +23 -0
- data/doc/demo/test/unit/shared_index1_test.rb +108 -0
- data/doc/demo/test/unit/shared_index2_test.rb +13 -0
- data/doc/demo/test/unit/sort_test.rb +21 -0
- data/doc/demo/test/unit/special_content_test.rb +25 -0
- data/doc/demo/vendor/plugins/will_paginate/LICENSE +18 -0
- data/doc/demo/vendor/plugins/will_paginate/README +108 -0
- data/doc/demo/vendor/plugins/will_paginate/Rakefile +23 -0
- data/doc/demo/vendor/plugins/will_paginate/init.rb +21 -0
- data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/collection.rb +45 -0
- data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/core_ext.rb +44 -0
- data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/finder.rb +159 -0
- data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/view_helpers.rb +95 -0
- data/doc/demo/vendor/plugins/will_paginate/test/array_pagination_test.rb +23 -0
- data/doc/demo/vendor/plugins/will_paginate/test/boot.rb +27 -0
- data/doc/demo/vendor/plugins/will_paginate/test/console +10 -0
- data/doc/demo/vendor/plugins/will_paginate/test/finder_test.rb +219 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/admin.rb +3 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/companies.yml +24 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/company.rb +23 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developer.rb +11 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developers_projects.yml +13 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/project.rb +4 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/projects.yml +7 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/replies.yml +20 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/reply.rb +5 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/schema.sql +44 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topic.rb +19 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topics.yml +30 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/user.rb +2 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/users.yml +35 -0
- data/doc/demo/vendor/plugins/will_paginate/test/helper.rb +42 -0
- data/doc/demo/vendor/plugins/will_paginate/test/lib/activerecord_test_connector.rb +64 -0
- data/doc/demo/vendor/plugins/will_paginate/test/lib/load_fixtures.rb +10 -0
- data/doc/demo/vendor/plugins/will_paginate/test/pagination_test.rb +136 -0
- data/doc/monit-example +22 -0
- data/init.rb +24 -0
- data/install.rb +18 -0
- data/lib/act_methods.rb +147 -0
- data/lib/acts_as_ferret.rb +584 -0
- data/lib/ar_mysql_auto_reconnect_patch.rb +41 -0
- data/lib/blank_slate.rb +53 -0
- data/lib/bulk_indexer.rb +38 -0
- data/lib/class_methods.rb +270 -0
- data/lib/ferret_extensions.rb +188 -0
- data/lib/ferret_find_methods.rb +141 -0
- data/lib/ferret_result.rb +53 -0
- data/lib/ferret_server.rb +238 -0
- data/lib/index.rb +99 -0
- data/lib/instance_methods.rb +171 -0
- data/lib/local_index.rb +205 -0
- data/lib/more_like_this.rb +217 -0
- data/lib/multi_index.rb +126 -0
- data/lib/rdig_adapter.rb +148 -0
- data/lib/remote_functions.rb +23 -0
- data/lib/remote_index.rb +54 -0
- data/lib/remote_multi_index.rb +20 -0
- data/lib/search_results.rb +50 -0
- data/lib/server_manager.rb +58 -0
- data/lib/unix_daemon.rb +64 -0
- data/lib/without_ar.rb +52 -0
- data/rakefile +141 -0
- data/recipes/aaf_recipes.rb +114 -0
- data/script/ferret_daemon +94 -0
- data/script/ferret_server +10 -0
- data/script/ferret_service +178 -0
- data/tasks/ferret.rake +22 -0
- metadata +258 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Source: http://pastie.caboo.se/154842
|
|
2
|
+
#
|
|
3
|
+
# in /etc/my.cnf on the MySQL server, you can set the interactive-timeout parameter,
|
|
4
|
+
# for example, 12 hours = 28800 sec
|
|
5
|
+
# interactive-timeout=28800
|
|
6
|
+
|
|
7
|
+
# in ActiveRecord, setting the verification_timeout to something less than
|
|
8
|
+
# the interactive-timeout parameter; 14400 sec = 6 hours
|
|
9
|
+
ActiveRecord::Base.verification_timeout = 14400
|
|
10
|
+
ActiveRecord::Base.establish_connection
|
|
11
|
+
|
|
12
|
+
# Below is a monkey patch for keeping ActiveRecord connections alive.
|
|
13
|
+
# http://www.sparecycles.org/2007/7/2/saying-goodbye-to-lost-connections-in-rails
|
|
14
|
+
|
|
15
|
+
module ActiveRecord
|
|
16
|
+
module ConnectionAdapters
|
|
17
|
+
class MysqlAdapter
|
|
18
|
+
def execute(sql, name = nil) #:nodoc:
|
|
19
|
+
reconnect_lost_connections = true
|
|
20
|
+
begin
|
|
21
|
+
log(sql, name) { @connection.query(sql) }
|
|
22
|
+
rescue ActiveRecord::StatementInvalid => exception
|
|
23
|
+
if reconnect_lost_connections and exception.message =~ /(Lost connection to MySQL server during query
|
|
24
|
+
|MySQL server has gone away)/
|
|
25
|
+
reconnect_lost_connections = false
|
|
26
|
+
reconnect!
|
|
27
|
+
retry
|
|
28
|
+
elsif exception.message.split(":").first =~ /Packets out of order/
|
|
29
|
+
raise ActiveRecord::StatementInvalid, "'Packets out of order' error was received from the database.
|
|
30
|
+
Please update your mysql bindings (gem install mysql) and read http://dev.mysql.com/doc/mysql/en/password-hash
|
|
31
|
+
ing.html for more information. If you're on Windows, use the Instant Rails installer to get the updated mysql
|
|
32
|
+
bindings."
|
|
33
|
+
else
|
|
34
|
+
raise
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
data/lib/blank_slate.rb
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
if defined?(BlankSlate)
|
|
2
|
+
# Rails 2.x has it already
|
|
3
|
+
module ActsAsFerret
|
|
4
|
+
class BlankSlate < ::BlankSlate
|
|
5
|
+
end
|
|
6
|
+
end
|
|
7
|
+
else
|
|
8
|
+
module ActsAsFerret
|
|
9
|
+
# 'backported' for Rails pre 2.0
|
|
10
|
+
#
|
|
11
|
+
#--
|
|
12
|
+
# Copyright 2004, 2006 by Jim Weirich (jim@weirichhouse.org).
|
|
13
|
+
# All rights reserved.
|
|
14
|
+
|
|
15
|
+
# Permission is granted for use, copying, modification, distribution,
|
|
16
|
+
# and distribution of modified versions of this work as long as the
|
|
17
|
+
# above copyright notice is included.
|
|
18
|
+
#++
|
|
19
|
+
|
|
20
|
+
######################################################################
|
|
21
|
+
# BlankSlate provides an abstract base class with no predefined
|
|
22
|
+
# methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
|
|
23
|
+
# BlankSlate is useful as a base class when writing classes that
|
|
24
|
+
# depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
|
|
25
|
+
#
|
|
26
|
+
class BlankSlate
|
|
27
|
+
class << self
|
|
28
|
+
# Hide the method named +name+ in the BlankSlate class. Don't
|
|
29
|
+
# hide +instance_eval+ or any method beginning with "__".
|
|
30
|
+
def hide(name)
|
|
31
|
+
if instance_methods.include?(name.to_s) and name !~ /^(__|instance_eval|methods)/
|
|
32
|
+
@hidden_methods ||= {}
|
|
33
|
+
@hidden_methods[name.to_sym] = instance_method(name)
|
|
34
|
+
undef_method name
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Redefine a previously hidden method so that it may be called on a blank
|
|
39
|
+
# slate object.
|
|
40
|
+
#
|
|
41
|
+
# no-op here since we don't hide the methods we reveal where this is
|
|
42
|
+
# used in this implementation
|
|
43
|
+
def reveal(name)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
instance_methods.each { |m| hide(m) }
|
|
48
|
+
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
|
data/lib/bulk_indexer.rb
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module ActsAsFerret
|
|
2
|
+
class BulkIndexer
|
|
3
|
+
def initialize(args = {})
|
|
4
|
+
@batch_size = args[:batch_size] || 1000
|
|
5
|
+
@logger = args[:logger]
|
|
6
|
+
@model = args[:model]
|
|
7
|
+
@work_done = 0
|
|
8
|
+
@index = args[:index]
|
|
9
|
+
if args[:reindex]
|
|
10
|
+
@reindex = true
|
|
11
|
+
@model_count = @model.count.to_f
|
|
12
|
+
else
|
|
13
|
+
@model_count = args[:total]
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def index_records(records, offset)
|
|
18
|
+
batch_time = measure_time {
|
|
19
|
+
docs = []
|
|
20
|
+
records.each { |rec| docs << [rec.to_doc, rec.ferret_analyzer] if rec.ferret_enabled?(true) }
|
|
21
|
+
@index.update_batch(docs)
|
|
22
|
+
# records.each { |rec| @index.add_document(rec.to_doc, rec.ferret_analyzer) if rec.ferret_enabled?(true) }
|
|
23
|
+
}.to_f
|
|
24
|
+
@work_done = offset.to_f / @model_count * 100.0 if @model_count > 0
|
|
25
|
+
remaining_time = ( batch_time / @batch_size ) * ( @model_count - offset + @batch_size )
|
|
26
|
+
@logger.info "#{@reindex ? 're' : 'bulk '}index model #{@model.name} : #{'%.2f' % @work_done}% complete : #{'%.2f' % remaining_time} secs to finish"
|
|
27
|
+
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def measure_time
|
|
31
|
+
t1 = Time.now
|
|
32
|
+
yield
|
|
33
|
+
Time.now - t1
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
module ActsAsFerret
|
|
2
|
+
|
|
3
|
+
module ClassMethods
|
|
4
|
+
|
|
5
|
+
# Disables ferret index updates for this model. When a block is given,
|
|
6
|
+
# Ferret will be re-enabled again after executing the block.
|
|
7
|
+
def disable_ferret
|
|
8
|
+
aaf_configuration[:enabled] = false
|
|
9
|
+
if block_given?
|
|
10
|
+
yield
|
|
11
|
+
enable_ferret
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def enable_ferret
|
|
16
|
+
aaf_configuration[:enabled] = true
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def ferret_enabled?
|
|
20
|
+
aaf_configuration[:enabled]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# rebuild the index from all data stored for this model, and any other
|
|
24
|
+
# model classes associated with the same index.
|
|
25
|
+
# This is called automatically when no index exists yet.
|
|
26
|
+
#
|
|
27
|
+
def rebuild_index
|
|
28
|
+
aaf_index.rebuild_index
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# re-index a number records specified by the given ids. Use for large
|
|
32
|
+
# indexing jobs i.e. after modifying a lot of records with Ferret disabled.
|
|
33
|
+
# Please note that the state of Ferret (enabled or disabled at class or
|
|
34
|
+
# record level) is not checked by this method, so if you need to do so
|
|
35
|
+
# (e.g. because of a custom ferret_enabled? implementation), you have to do
|
|
36
|
+
# so yourself.
|
|
37
|
+
def bulk_index(*ids)
|
|
38
|
+
options = Hash === ids.last ? ids.pop : {}
|
|
39
|
+
ids = ids.first if ids.size == 1 && ids.first.is_a?(Enumerable)
|
|
40
|
+
aaf_index.bulk_index(self.name, ids, options)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# true if our db and table appear to be suitable for the mysql fast batch
|
|
44
|
+
# hack (see
|
|
45
|
+
# http://weblog.jamisbuck.org/2007/4/6/faking-cursors-in-activerecord)
|
|
46
|
+
def use_fast_batches?
|
|
47
|
+
if connection.class.name =~ /Mysql/ && primary_key == 'id' && aaf_configuration[:mysql_fast_batches]
|
|
48
|
+
logger.info "using mysql specific batched find :all. Turn off with :mysql_fast_batches => false if you encounter problems (i.e. because of non-integer UUIDs in the id column)"
|
|
49
|
+
true
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Returns all records modified or created after the specified time.
|
|
54
|
+
# Used by the rake rebuild task to find models that need to be updated in
|
|
55
|
+
# the index after the rebuild finished because they changed while the
|
|
56
|
+
# rebuild was running.
|
|
57
|
+
# Override if your models don't stick to the created_at/updated_at
|
|
58
|
+
# convention.
|
|
59
|
+
def records_modified_since(time)
|
|
60
|
+
condition = []
|
|
61
|
+
%w(updated_at created_at).each do |col|
|
|
62
|
+
condition << "#{col} >= ?" if column_names.include? col
|
|
63
|
+
end
|
|
64
|
+
if condition.empty?
|
|
65
|
+
logger.warn "#{self.name}: Override records_modified_since(time) to keep the index up to date with records changed during rebuild."
|
|
66
|
+
[]
|
|
67
|
+
else
|
|
68
|
+
find :all, :conditions => [ condition.join(' AND '), *([time]*condition.size) ]
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# runs across all records yielding those to be indexed when the index is rebuilt
|
|
73
|
+
def records_for_rebuild(batch_size = 1000)
|
|
74
|
+
transaction do
|
|
75
|
+
if use_fast_batches?
|
|
76
|
+
offset = 0
|
|
77
|
+
while (rows = find :all, :conditions => [ "#{table_name}.id > ?", offset ], :limit => batch_size).any?
|
|
78
|
+
offset = rows.last.id
|
|
79
|
+
yield rows, offset
|
|
80
|
+
end
|
|
81
|
+
else
|
|
82
|
+
order = "#{primary_key} ASC" # fixes #212
|
|
83
|
+
0.step(self.count, batch_size) do |offset|
|
|
84
|
+
yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# yields the records with the given ids, in batches of batch_size
|
|
91
|
+
def records_for_bulk_index(ids, batch_size = 1000)
|
|
92
|
+
transaction do
|
|
93
|
+
offset = 0
|
|
94
|
+
ids.each_slice(batch_size) do |id_slice|
|
|
95
|
+
records = find( :all, :conditions => ["id in (?)", id_slice] )
|
|
96
|
+
#yield records, offset
|
|
97
|
+
yield find( :all, :conditions => ["id in (?)", id_slice] ), offset
|
|
98
|
+
offset += batch_size
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Retrieve the index instance for this model class. This can either be a
|
|
104
|
+
# LocalIndex, or a RemoteIndex instance.
|
|
105
|
+
#
|
|
106
|
+
def aaf_index
|
|
107
|
+
@index ||= ActsAsFerret::get_index(aaf_configuration[:name])
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Finds instances by searching the Ferret index. Terms are ANDed by default, use
|
|
111
|
+
# OR between terms for ORed queries. Or specify +:or_default => true+ in the
|
|
112
|
+
# +:ferret+ options hash of acts_as_ferret.
|
|
113
|
+
#
|
|
114
|
+
# You may either use the +offset+ and +limit+ options to implement your own
|
|
115
|
+
# pagination logic, or use the +page+ and +per_page+ options to use the
|
|
116
|
+
# built in pagination support which is compatible with will_paginate's view
|
|
117
|
+
# helpers. If +page+ and +per_page+ are given, +offset+ and +limit+ will be
|
|
118
|
+
# ignored.
|
|
119
|
+
#
|
|
120
|
+
# == options:
|
|
121
|
+
# page:: page of search results to retrieve
|
|
122
|
+
# per_page:: number of search results that are displayed per page
|
|
123
|
+
# offset:: first hit to retrieve (useful for paging)
|
|
124
|
+
# limit:: number of hits to retrieve, or :all to retrieve
|
|
125
|
+
# all results
|
|
126
|
+
# lazy:: Array of field names whose contents should be read directly
|
|
127
|
+
# from the index. Those fields have to be marked
|
|
128
|
+
# +:store => :yes+ in their field options. Give true to get all
|
|
129
|
+
# stored fields. Note that if you have a shared index, you have
|
|
130
|
+
# to explicitly state the fields you want to fetch, true won't
|
|
131
|
+
# work here)
|
|
132
|
+
#
|
|
133
|
+
# +find_options+ is a hash passed on to active_record's find when
|
|
134
|
+
# retrieving the data from db, useful to i.e. prefetch relationships with
|
|
135
|
+
# :include or to specify additional filter criteria with :conditions.
|
|
136
|
+
#
|
|
137
|
+
# This method returns a +SearchResults+ instance, which really is an Array that has
|
|
138
|
+
# been decorated with a total_hits attribute holding the total number of hits.
|
|
139
|
+
# Additionally, SearchResults is compatible with the pagination helper
|
|
140
|
+
# methods of the will_paginate plugin.
|
|
141
|
+
#
|
|
142
|
+
# Please keep in mind that the number of results delivered might be less than
|
|
143
|
+
# +limit+ if you specify any active record conditions that further limit
|
|
144
|
+
# the result. Use +limit+ and +offset+ as AR find_options instead.
|
|
145
|
+
# +page+ and +per_page+ are supposed to work regardless of any
|
|
146
|
+
# +conitions+ present in +find_options+.
|
|
147
|
+
def find_with_ferret(q, options = {}, find_options = {})
|
|
148
|
+
if respond_to?(:scope) && scope(:find, :conditions)
|
|
149
|
+
if find_options[:conditions]
|
|
150
|
+
find_options[:conditions] = "(#{find_options[:conditions]}) AND (#{scope(:find, :conditions)})"
|
|
151
|
+
else
|
|
152
|
+
find_options[:conditions] = scope(:find, :conditions)
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
return ActsAsFerret::find q, self, options, find_options
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# Returns the total number of hits for the given query
|
|
160
|
+
#
|
|
161
|
+
# Note that since we don't query the database here, this method won't deliver
|
|
162
|
+
# the expected results when used on an AR association.
|
|
163
|
+
#
|
|
164
|
+
def total_hits(q, options={})
|
|
165
|
+
aaf_index.total_hits(q, options)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Finds instance model name, ids and scores by contents.
|
|
169
|
+
# Useful e.g. if you want to search across models or do not want to fetch
|
|
170
|
+
# all result records (yet).
|
|
171
|
+
#
|
|
172
|
+
# Options are the same as for find_with_ferret
|
|
173
|
+
#
|
|
174
|
+
# A block can be given too, it will be executed with every result:
|
|
175
|
+
# find_ids_with_ferret(q, options) do |model, id, score|
|
|
176
|
+
# id_array << id
|
|
177
|
+
# scores_by_id[id] = score
|
|
178
|
+
# end
|
|
179
|
+
# NOTE: in case a block is given, only the total_hits value will be returned
|
|
180
|
+
# instead of the [total_hits, results] array!
|
|
181
|
+
#
|
|
182
|
+
def find_ids_with_ferret(q, options = {}, &block)
|
|
183
|
+
aaf_index.find_ids(q, options, &block)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
protected
|
|
188
|
+
|
|
189
|
+
# def find_records_lazy_or_not(q, options = {}, find_options = {})
|
|
190
|
+
# if options[:lazy]
|
|
191
|
+
# logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
|
192
|
+
# lazy_find_by_contents q, options
|
|
193
|
+
# else
|
|
194
|
+
# ar_find_by_contents q, options, find_options
|
|
195
|
+
# end
|
|
196
|
+
# end
|
|
197
|
+
#
|
|
198
|
+
# def ar_find_by_contents(q, options = {}, find_options = {})
|
|
199
|
+
# result_ids = {}
|
|
200
|
+
# total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
|
|
201
|
+
# # stores ids, index and score of each hit for later ordering of
|
|
202
|
+
# # results
|
|
203
|
+
# result_ids[id] = [ result_ids.size + 1, score ]
|
|
204
|
+
# end
|
|
205
|
+
#
|
|
206
|
+
# result = ActsAsFerret::retrieve_records( { self.name => result_ids }, find_options )
|
|
207
|
+
#
|
|
208
|
+
# # count total_hits via sql when using conditions or when we're called
|
|
209
|
+
# # from an ActiveRecord association.
|
|
210
|
+
# if find_options[:conditions] or caller.find{ |call| call =~ %r{active_record/associations} }
|
|
211
|
+
# # chances are the ferret result count is not our total_hits value, so
|
|
212
|
+
# # we correct this here.
|
|
213
|
+
# if options[:limit] != :all || options[:page] || options[:offset] || find_options[:limit] || find_options[:offset]
|
|
214
|
+
# # our ferret result has been limited, so we need to re-run that
|
|
215
|
+
# # search to get the full result set from ferret.
|
|
216
|
+
# result_ids = {}
|
|
217
|
+
# find_ids_with_ferret(q, options.update(:limit => :all, :offset => 0)) do |model, id, score, data|
|
|
218
|
+
# result_ids[id] = [ result_ids.size + 1, score ]
|
|
219
|
+
# end
|
|
220
|
+
# # Now ask the database for the total size of the final result set.
|
|
221
|
+
# total_hits = count_records( { self.name => result_ids }, find_options )
|
|
222
|
+
# else
|
|
223
|
+
# # what we got from the database is our full result set, so take
|
|
224
|
+
# # it's size
|
|
225
|
+
# total_hits = result.length
|
|
226
|
+
# end
|
|
227
|
+
# end
|
|
228
|
+
#
|
|
229
|
+
# [ total_hits, result ]
|
|
230
|
+
# end
|
|
231
|
+
#
|
|
232
|
+
# def lazy_find_by_contents(q, options = {})
|
|
233
|
+
# logger.debug "lazy_find_by_contents: #{q}"
|
|
234
|
+
# result = []
|
|
235
|
+
# rank = 0
|
|
236
|
+
# total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
|
|
237
|
+
# logger.debug "model: #{model}, id: #{id}, data: #{data}"
|
|
238
|
+
# result << FerretResult.new(model, id, score, rank += 1, data)
|
|
239
|
+
# end
|
|
240
|
+
# [ total_hits, result ]
|
|
241
|
+
# end
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def model_find(model, id, find_options = {})
|
|
245
|
+
model.constantize.find(id, find_options)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
# def count_records(id_arrays, find_options = {})
|
|
250
|
+
# count_options = find_options.dup
|
|
251
|
+
# count_options.delete :limit
|
|
252
|
+
# count_options.delete :offset
|
|
253
|
+
# count = 0
|
|
254
|
+
# id_arrays.each do |model, id_array|
|
|
255
|
+
# next if id_array.empty?
|
|
256
|
+
# model = model.constantize
|
|
257
|
+
# # merge conditions
|
|
258
|
+
# conditions = ActsAsFerret::combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ],
|
|
259
|
+
# find_options[:conditions])
|
|
260
|
+
# opts = find_options.merge :conditions => conditions
|
|
261
|
+
# opts.delete :limit; opts.delete :offset
|
|
262
|
+
# count += model.count opts
|
|
263
|
+
# end
|
|
264
|
+
# count
|
|
265
|
+
# end
|
|
266
|
+
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
end
|
|
270
|
+
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
module Ferret
|
|
2
|
+
|
|
3
|
+
module Analysis
|
|
4
|
+
|
|
5
|
+
# = PerFieldAnalyzer
|
|
6
|
+
#
|
|
7
|
+
# This PerFieldAnalyzer is a workaround to a memory leak in
|
|
8
|
+
# ferret 0.11.4. It does basically do the same as the original
|
|
9
|
+
# Ferret::Analysis::PerFieldAnalyzer, but without the leak :)
|
|
10
|
+
#
|
|
11
|
+
# http://ferret.davebalmain.com/api/classes/Ferret/Analysis/PerFieldAnalyzer.html
|
|
12
|
+
#
|
|
13
|
+
# Thanks to Ben from omdb.org for tracking this down and creating this
|
|
14
|
+
# workaround.
|
|
15
|
+
# You can read more about the issue there:
|
|
16
|
+
# http://blog.omdb-beta.org/2007/7/29/tracking-down-a-memory-leak-in-ferret-0-11-4
|
|
17
|
+
class PerFieldAnalyzer < ::Ferret::Analysis::Analyzer
|
|
18
|
+
def initialize( default_analyzer = StandardAnalyzer.new )
|
|
19
|
+
@analyzers = {}
|
|
20
|
+
@default_analyzer = default_analyzer
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def add_field( field, analyzer )
|
|
24
|
+
@analyzers[field] = analyzer
|
|
25
|
+
end
|
|
26
|
+
alias []= add_field
|
|
27
|
+
|
|
28
|
+
def token_stream(field, string)
|
|
29
|
+
@analyzers.has_key?(field) ? @analyzers[field].token_stream(field, string) :
|
|
30
|
+
@default_analyzer.token_stream(field, string)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
class Index::Index
|
|
36
|
+
attr_accessor :batch_size, :logger
|
|
37
|
+
|
|
38
|
+
def index_models(models)
|
|
39
|
+
models.each { |model| index_model model }
|
|
40
|
+
flush
|
|
41
|
+
optimize
|
|
42
|
+
close
|
|
43
|
+
ActsAsFerret::close_multi_indexes
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def index_model(model)
|
|
47
|
+
bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
|
|
48
|
+
:model => model, :index => self, :reindex => true)
|
|
49
|
+
logger.info "reindexing model #{model.name}"
|
|
50
|
+
|
|
51
|
+
model.records_for_rebuild(@batch_size) do |records, offset|
|
|
52
|
+
bulk_indexer.index_records(records, offset)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def bulk_index(model, ids, options = {})
|
|
57
|
+
options.reverse_merge! :optimize => true
|
|
58
|
+
orig_flush = @auto_flush
|
|
59
|
+
@auto_flush = false
|
|
60
|
+
bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
|
|
61
|
+
:model => model, :index => self, :total => ids.size)
|
|
62
|
+
model.records_for_bulk_index(ids, @batch_size) do |records, offset|
|
|
63
|
+
logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
|
|
64
|
+
bulk_indexer.index_records(records, offset)
|
|
65
|
+
end
|
|
66
|
+
logger.info 'finishing bulk index...'
|
|
67
|
+
flush
|
|
68
|
+
if options[:optimize]
|
|
69
|
+
logger.info 'optimizing...'
|
|
70
|
+
optimize
|
|
71
|
+
end
|
|
72
|
+
@auto_flush = orig_flush
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# bulk-inserts a number of ferret documents.
|
|
77
|
+
# The argument has to be an array of two-element arrays each holding the document data and the analyzer to
|
|
78
|
+
# use for this document (which may be nil).
|
|
79
|
+
def update_batch(document_analyzer_pairs)
|
|
80
|
+
ids = document_analyzer_pairs.collect {|da| da.first[@id_field] }
|
|
81
|
+
@dir.synchrolock do
|
|
82
|
+
batch_delete(ids)
|
|
83
|
+
ensure_writer_open()
|
|
84
|
+
document_analyzer_pairs.each do |doc, analyzer|
|
|
85
|
+
if analyzer
|
|
86
|
+
old_analyzer = @writer.analyzer
|
|
87
|
+
@writer.analyzer = analyzer
|
|
88
|
+
@writer.add_document(doc)
|
|
89
|
+
@writer.analyzer = old_analyzer
|
|
90
|
+
else
|
|
91
|
+
@writer.add_document(doc)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
flush()
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# If +docs+ is a Hash or an Array then a batch delete will be performed.
|
|
99
|
+
# If +docs+ is an Array then it will be considered an array of +id+'s. If
|
|
100
|
+
# it is a Hash, then its keys will be used instead as the Array of
|
|
101
|
+
# document +id+'s. If the +id+ is an Integers then it is considered a
|
|
102
|
+
# Ferret document number and the corresponding document will be deleted.
|
|
103
|
+
# If the +id+ is a String or a Symbol then the +id+ will be considered a
|
|
104
|
+
# term and the documents that contain that term in the +:id_field+ will
|
|
105
|
+
# be deleted.
|
|
106
|
+
#
|
|
107
|
+
# docs:: An Array of docs to be deleted, or a Hash (in which case the keys
|
|
108
|
+
# are used)
|
|
109
|
+
#
|
|
110
|
+
# ripped from Ferret trunk.
|
|
111
|
+
def batch_delete(docs)
|
|
112
|
+
docs = docs.keys if docs.is_a?(Hash)
|
|
113
|
+
raise ArgumentError, "must pass Array or Hash" unless docs.is_a? Array
|
|
114
|
+
ids = []
|
|
115
|
+
terms = []
|
|
116
|
+
docs.each do |doc|
|
|
117
|
+
case doc
|
|
118
|
+
when String: terms << doc
|
|
119
|
+
when Symbol: terms << doc.to_s
|
|
120
|
+
when Integer: ids << doc
|
|
121
|
+
else
|
|
122
|
+
raise ArgumentError, "Cannot delete for arg of type #{id.class}"
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
if ids.size > 0
|
|
126
|
+
ensure_reader_open
|
|
127
|
+
ids.each {|id| @reader.delete(id)}
|
|
128
|
+
end
|
|
129
|
+
if terms.size > 0
|
|
130
|
+
ensure_writer_open()
|
|
131
|
+
terms.each { |t| @writer.delete(@id_field, t) }
|
|
132
|
+
# TODO with Ferret trunk this would work:
|
|
133
|
+
# @writer.delete(@id_field, terms)
|
|
134
|
+
end
|
|
135
|
+
return self
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# search for the first document with +arg+ in the +id+ field and return it's internal document number.
|
|
139
|
+
# The +id+ field is either :id or whatever you set
|
|
140
|
+
# :id_field parameter to when you create the Index object.
|
|
141
|
+
def doc_number(id)
|
|
142
|
+
@dir.synchronize do
|
|
143
|
+
ensure_reader_open()
|
|
144
|
+
term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
|
|
145
|
+
return term_doc_enum.next? ? term_doc_enum.doc : nil
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# add marshalling support to SortFields
|
|
151
|
+
class Search::SortField
|
|
152
|
+
def _dump(depth)
|
|
153
|
+
to_s
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def self._load(string)
|
|
157
|
+
case string
|
|
158
|
+
when /<DOC(_ID)?>!/ : Ferret::Search::SortField::DOC_ID_REV
|
|
159
|
+
when /<DOC(_ID)?>/ : Ferret::Search::SortField::DOC_ID
|
|
160
|
+
when '<SCORE>!' : Ferret::Search::SortField::SCORE_REV
|
|
161
|
+
when '<SCORE>' : Ferret::Search::SortField::SCORE
|
|
162
|
+
when /^(\w+):<(\w+)>(!)?$/ : new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
|
|
163
|
+
else raise "invalid value: #{string}"
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# add marshalling support to Sort
|
|
169
|
+
class Search::Sort
|
|
170
|
+
def _dump(depth)
|
|
171
|
+
to_s
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def self._load(string)
|
|
175
|
+
# we exclude the last <DOC> sorting as it is appended by new anyway
|
|
176
|
+
if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/
|
|
177
|
+
sort_fields = $1.split(',').map do |value|
|
|
178
|
+
value.strip!
|
|
179
|
+
Ferret::Search::SortField._load value unless value.blank?
|
|
180
|
+
end
|
|
181
|
+
new sort_fields.compact
|
|
182
|
+
else
|
|
183
|
+
raise "invalid value: #{string}"
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
end
|