watson-acts_as_ferret 0.4.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README +104 -0
- data/acts_as_ferret.gemspec +58 -0
- data/bin/aaf_install +29 -0
- data/config/ferret_server.yml +24 -0
- data/doc/README.win32 +23 -0
- data/doc/demo/README +154 -0
- data/doc/demo/README_DEMO +23 -0
- data/doc/demo/Rakefile +10 -0
- data/doc/demo/app/controllers/admin/backend_controller.rb +14 -0
- data/doc/demo/app/controllers/admin_area_controller.rb +4 -0
- data/doc/demo/app/controllers/application.rb +5 -0
- data/doc/demo/app/controllers/contents_controller.rb +49 -0
- data/doc/demo/app/controllers/searches_controller.rb +8 -0
- data/doc/demo/app/helpers/admin/backend_helper.rb +2 -0
- data/doc/demo/app/helpers/application_helper.rb +3 -0
- data/doc/demo/app/helpers/content_helper.rb +2 -0
- data/doc/demo/app/helpers/search_helper.rb +2 -0
- data/doc/demo/app/models/comment.rb +48 -0
- data/doc/demo/app/models/content.rb +12 -0
- data/doc/demo/app/models/content_base.rb +28 -0
- data/doc/demo/app/models/search.rb +19 -0
- data/doc/demo/app/models/shared_index1.rb +3 -0
- data/doc/demo/app/models/shared_index2.rb +3 -0
- data/doc/demo/app/models/special_content.rb +3 -0
- data/doc/demo/app/models/stats.rb +20 -0
- data/doc/demo/app/views/admin/backend/search.rhtml +18 -0
- data/doc/demo/app/views/contents/_form.rhtml +10 -0
- data/doc/demo/app/views/contents/edit.rhtml +9 -0
- data/doc/demo/app/views/contents/index.rhtml +24 -0
- data/doc/demo/app/views/contents/new.rhtml +8 -0
- data/doc/demo/app/views/contents/show.rhtml +8 -0
- data/doc/demo/app/views/layouts/application.html.erb +17 -0
- data/doc/demo/app/views/searches/_content.html.erb +2 -0
- data/doc/demo/app/views/searches/search.html.erb +20 -0
- data/doc/demo/config/boot.rb +109 -0
- data/doc/demo/config/database.yml +38 -0
- data/doc/demo/config/environment.rb +69 -0
- data/doc/demo/config/environments/development.rb +16 -0
- data/doc/demo/config/environments/production.rb +19 -0
- data/doc/demo/config/environments/test.rb +21 -0
- data/doc/demo/config/ferret_server.yml +18 -0
- data/doc/demo/config/lighttpd.conf +40 -0
- data/doc/demo/config/routes.rb +9 -0
- data/doc/demo/db/development_structure.sql +15 -0
- data/doc/demo/db/migrate/001_initial_migration.rb +18 -0
- data/doc/demo/db/migrate/002_add_type_to_contents.rb +9 -0
- data/doc/demo/db/migrate/003_create_shared_index1s.rb +11 -0
- data/doc/demo/db/migrate/004_create_shared_index2s.rb +11 -0
- data/doc/demo/db/migrate/005_special_field.rb +9 -0
- data/doc/demo/db/migrate/006_create_stats.rb +15 -0
- data/doc/demo/db/schema.sql +18 -0
- data/doc/demo/db/schema.sqlite +14 -0
- data/doc/demo/doc/README_FOR_APP +2 -0
- data/doc/demo/doc/howto.txt +70 -0
- data/doc/demo/public/404.html +8 -0
- data/doc/demo/public/500.html +8 -0
- data/doc/demo/public/dispatch.cgi +10 -0
- data/doc/demo/public/dispatch.fcgi +24 -0
- data/doc/demo/public/dispatch.rb +10 -0
- data/doc/demo/public/favicon.ico +0 -0
- data/doc/demo/public/images/rails.png +0 -0
- data/doc/demo/public/index.html +277 -0
- data/doc/demo/public/robots.txt +1 -0
- data/doc/demo/public/stylesheets/scaffold.css +74 -0
- data/doc/demo/script/about +3 -0
- data/doc/demo/script/breakpointer +3 -0
- data/doc/demo/script/console +3 -0
- data/doc/demo/script/destroy +3 -0
- data/doc/demo/script/ferret_server +10 -0
- data/doc/demo/script/generate +3 -0
- data/doc/demo/script/performance/benchmarker +3 -0
- data/doc/demo/script/performance/profiler +3 -0
- data/doc/demo/script/plugin +3 -0
- data/doc/demo/script/process/inspector +3 -0
- data/doc/demo/script/process/reaper +3 -0
- data/doc/demo/script/process/spawner +3 -0
- data/doc/demo/script/process/spinner +3 -0
- data/doc/demo/script/runner +3 -0
- data/doc/demo/script/server +3 -0
- data/doc/demo/test/fixtures/comments.yml +12 -0
- data/doc/demo/test/fixtures/contents.yml +13 -0
- data/doc/demo/test/fixtures/remote_contents.yml +9 -0
- data/doc/demo/test/fixtures/shared_index1s.yml +7 -0
- data/doc/demo/test/fixtures/shared_index2s.yml +7 -0
- data/doc/demo/test/functional/admin/backend_controller_test.rb +35 -0
- data/doc/demo/test/functional/contents_controller_test.rb +81 -0
- data/doc/demo/test/functional/searches_controller_test.rb +71 -0
- data/doc/demo/test/smoke/drb_smoke_test.rb +321 -0
- data/doc/demo/test/smoke/process_stats.rb +21 -0
- data/doc/demo/test/test_helper.rb +30 -0
- data/doc/demo/test/unit/comment_test.rb +217 -0
- data/doc/demo/test/unit/content_test.rb +705 -0
- data/doc/demo/test/unit/ferret_result_test.rb +24 -0
- data/doc/demo/test/unit/multi_index_test.rb +329 -0
- data/doc/demo/test/unit/remote_index_test.rb +23 -0
- data/doc/demo/test/unit/shared_index1_test.rb +108 -0
- data/doc/demo/test/unit/shared_index2_test.rb +13 -0
- data/doc/demo/test/unit/sort_test.rb +21 -0
- data/doc/demo/test/unit/special_content_test.rb +25 -0
- data/doc/demo/vendor/plugins/will_paginate/LICENSE +18 -0
- data/doc/demo/vendor/plugins/will_paginate/README +108 -0
- data/doc/demo/vendor/plugins/will_paginate/Rakefile +23 -0
- data/doc/demo/vendor/plugins/will_paginate/init.rb +21 -0
- data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/collection.rb +45 -0
- data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/core_ext.rb +44 -0
- data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/finder.rb +159 -0
- data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/view_helpers.rb +95 -0
- data/doc/demo/vendor/plugins/will_paginate/test/array_pagination_test.rb +23 -0
- data/doc/demo/vendor/plugins/will_paginate/test/boot.rb +27 -0
- data/doc/demo/vendor/plugins/will_paginate/test/console +10 -0
- data/doc/demo/vendor/plugins/will_paginate/test/finder_test.rb +219 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/admin.rb +3 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/companies.yml +24 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/company.rb +23 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developer.rb +11 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developers_projects.yml +13 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/project.rb +4 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/projects.yml +7 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/replies.yml +20 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/reply.rb +5 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/schema.sql +44 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topic.rb +19 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topics.yml +30 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/user.rb +2 -0
- data/doc/demo/vendor/plugins/will_paginate/test/fixtures/users.yml +35 -0
- data/doc/demo/vendor/plugins/will_paginate/test/helper.rb +42 -0
- data/doc/demo/vendor/plugins/will_paginate/test/lib/activerecord_test_connector.rb +64 -0
- data/doc/demo/vendor/plugins/will_paginate/test/lib/load_fixtures.rb +10 -0
- data/doc/demo/vendor/plugins/will_paginate/test/pagination_test.rb +136 -0
- data/doc/monit-example +22 -0
- data/init.rb +24 -0
- data/install.rb +18 -0
- data/lib/act_methods.rb +147 -0
- data/lib/acts_as_ferret.rb +593 -0
- data/lib/ar_mysql_auto_reconnect_patch.rb +41 -0
- data/lib/blank_slate.rb +54 -0
- data/lib/bulk_indexer.rb +56 -0
- data/lib/class_methods.rb +279 -0
- data/lib/ferret_extensions.rb +192 -0
- data/lib/ferret_find_methods.rb +142 -0
- data/lib/ferret_result.rb +58 -0
- data/lib/ferret_server.rb +238 -0
- data/lib/index.rb +99 -0
- data/lib/instance_methods.rb +172 -0
- data/lib/local_index.rb +202 -0
- data/lib/more_like_this.rb +217 -0
- data/lib/multi_index.rb +133 -0
- data/lib/rdig_adapter.rb +149 -0
- data/lib/remote_functions.rb +43 -0
- data/lib/remote_index.rb +54 -0
- data/lib/remote_multi_index.rb +20 -0
- data/lib/search_results.rb +50 -0
- data/lib/server_manager.rb +71 -0
- data/lib/unix_daemon.rb +86 -0
- data/lib/without_ar.rb +52 -0
- data/recipes/aaf_recipes.rb +116 -0
- data/script/ferret_daemon +94 -0
- data/script/ferret_server +12 -0
- data/script/ferret_service +178 -0
- data/tasks/ferret.rake +39 -0
- metadata +246 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
# Source: http://pastie.caboo.se/154842
|
2
|
+
#
|
3
|
+
# in /etc/my.cnf on the MySQL server, you can set the interactive-timeout parameter,
|
4
|
+
# for example, 12 hours = 28800 sec
|
5
|
+
# interactive-timeout=28800
|
6
|
+
|
7
|
+
# in ActiveRecord, setting the verification_timeout to something less than
|
8
|
+
# the interactive-timeout parameter; 14400 sec = 6 hours
|
9
|
+
ActiveRecord::Base.verification_timeout = 14400
|
10
|
+
ActiveRecord::Base.establish_connection
|
11
|
+
|
12
|
+
# Below is a monkey patch for keeping ActiveRecord connections alive.
|
13
|
+
# http://www.sparecycles.org/2007/7/2/saying-goodbye-to-lost-connections-in-rails
|
14
|
+
|
15
|
+
module ActiveRecord
|
16
|
+
module ConnectionAdapters
|
17
|
+
class MysqlAdapter
|
18
|
+
def execute(sql, name = nil) #:nodoc:
|
19
|
+
reconnect_lost_connections = true
|
20
|
+
begin
|
21
|
+
log(sql, name) { @connection.query(sql) }
|
22
|
+
rescue ActiveRecord::StatementInvalid => exception
|
23
|
+
if reconnect_lost_connections and exception.message =~ /(Lost connection to MySQL server during query
|
24
|
+
|MySQL server has gone away)/
|
25
|
+
reconnect_lost_connections = false
|
26
|
+
reconnect!
|
27
|
+
retry
|
28
|
+
elsif exception.message.split(":").first =~ /Packets out of order/
|
29
|
+
raise ActiveRecord::StatementInvalid, "'Packets out of order' error was received from the database.
|
30
|
+
Please update your mysql bindings (gem install mysql) and read http://dev.mysql.com/doc/mysql/en/password-hash
|
31
|
+
ing.html for more information. If you're on Windows, use the Instant Rails installer to get the updated mysql
|
32
|
+
bindings."
|
33
|
+
else
|
34
|
+
raise
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
data/lib/blank_slate.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
module ActsAsFerret
|
2
|
+
if defined?(BasicObject)
|
3
|
+
# Ruby 1.9.x
|
4
|
+
class BlankSlate < BasicObject
|
5
|
+
end
|
6
|
+
elsif defined?(BlankSlate)
|
7
|
+
# Rails 2.x has it already
|
8
|
+
class BlankSlate < ::BlankSlate
|
9
|
+
end
|
10
|
+
else
|
11
|
+
# 'backported' for Rails pre 2.0
|
12
|
+
#
|
13
|
+
#--
|
14
|
+
# Copyright 2004, 2006 by Jim Weirich (jim@weirichhouse.org).
|
15
|
+
# All rights reserved.
|
16
|
+
|
17
|
+
# Permission is granted for use, copying, modification, distribution,
|
18
|
+
# and distribution of modified versions of this work as long as the
|
19
|
+
# above copyright notice is included.
|
20
|
+
#++
|
21
|
+
|
22
|
+
######################################################################
|
23
|
+
# BlankSlate provides an abstract base class with no predefined
|
24
|
+
# methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
|
25
|
+
# BlankSlate is useful as a base class when writing classes that
|
26
|
+
# depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
|
27
|
+
#
|
28
|
+
class BlankSlate
|
29
|
+
class << self
|
30
|
+
# Hide the method named +name+ in the BlankSlate class. Don't
|
31
|
+
# hide +instance_eval+ or any method beginning with "__".
|
32
|
+
def hide(name)
|
33
|
+
if instance_methods.include?(name.to_s) and name !~ /^(__|instance_eval|methods)/
|
34
|
+
@hidden_methods ||= {}
|
35
|
+
@hidden_methods[name.to_sym] = instance_method(name)
|
36
|
+
undef_method name
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Redefine a previously hidden method so that it may be called on a blank
|
41
|
+
# slate object.
|
42
|
+
#
|
43
|
+
# no-op here since we don't hide the methods we reveal where this is
|
44
|
+
# used in this implementation
|
45
|
+
def reveal(name)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
instance_methods.each { |m| hide(m) }
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
data/lib/bulk_indexer.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
module ActsAsFerret
|
2
|
+
class BulkIndexer
|
3
|
+
def initialize(args = {})
|
4
|
+
@batch_size = args[:batch_size] || 1000
|
5
|
+
@logger = args[:logger]
|
6
|
+
@model = args[:model]
|
7
|
+
@work_done = 0
|
8
|
+
@indexed_records = 0
|
9
|
+
@total_time = 0.0
|
10
|
+
@index = args[:index]
|
11
|
+
if args[:reindex]
|
12
|
+
@reindex = true
|
13
|
+
@model_count = @model.count.to_f
|
14
|
+
else
|
15
|
+
@model_count = args[:total]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def index_records(records, offset)
|
20
|
+
batch_time = measure_time {
|
21
|
+
docs = []
|
22
|
+
records.each { |rec| docs << [rec.to_doc, rec.ferret_analyzer] if rec.ferret_enabled?(true) }
|
23
|
+
@index.update_batch(docs)
|
24
|
+
}.to_f
|
25
|
+
rec_count = records.size
|
26
|
+
@indexed_records += rec_count
|
27
|
+
@total_time += batch_time
|
28
|
+
@work_done = @indexed_records.to_f / @model_count * 100.0 if @model_count > 0
|
29
|
+
@logger.debug "took #{batch_time} to index last #{rec_count} records. #{records_waiting} records to go. Avg time per record: #{avg_time_per_record}"
|
30
|
+
remaining_time = avg_time_per_record * records_waiting
|
31
|
+
@logger.info "#{@reindex ? 're' : 'bulk '}index model #{@model.name} : #{'%.2f' % @work_done}% complete : #{'%.2f' % remaining_time} secs to finish"
|
32
|
+
end
|
33
|
+
|
34
|
+
def measure_time
|
35
|
+
t1 = Time.now
|
36
|
+
yield
|
37
|
+
Time.now - t1
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
|
42
|
+
def avg_time_per_record
|
43
|
+
if @indexed_records > 0
|
44
|
+
@total_time / @indexed_records
|
45
|
+
else
|
46
|
+
0
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def records_waiting
|
51
|
+
@model_count - @indexed_records
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,279 @@
|
|
1
|
+
module ActsAsFerret
|
2
|
+
|
3
|
+
module ClassMethods
|
4
|
+
|
5
|
+
# Disables ferret index updates for this model. When a block is given,
|
6
|
+
# Ferret will be re-enabled again after executing the block.
|
7
|
+
def disable_ferret
|
8
|
+
aaf_configuration[:enabled] = false
|
9
|
+
if block_given?
|
10
|
+
yield
|
11
|
+
enable_ferret
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def enable_ferret
|
16
|
+
aaf_configuration[:enabled] = true
|
17
|
+
end
|
18
|
+
|
19
|
+
def ferret_enabled?
|
20
|
+
aaf_configuration[:enabled]
|
21
|
+
end
|
22
|
+
|
23
|
+
# rebuild the index from all data stored for this model, and any other
|
24
|
+
# model classes associated with the same index.
|
25
|
+
# This is called automatically when no index exists yet.
|
26
|
+
#
|
27
|
+
def rebuild_index
|
28
|
+
aaf_index.rebuild_index
|
29
|
+
end
|
30
|
+
|
31
|
+
# re-index a number records specified by the given ids. Use for large
|
32
|
+
# indexing jobs i.e. after modifying a lot of records with Ferret disabled.
|
33
|
+
# Please note that the state of Ferret (enabled or disabled at class or
|
34
|
+
# record level) is not checked by this method, so if you need to do so
|
35
|
+
# (e.g. because of a custom ferret_enabled? implementation), you have to do
|
36
|
+
# so yourself.
|
37
|
+
def bulk_index(*ids)
|
38
|
+
options = Hash === ids.last ? ids.pop : {}
|
39
|
+
ids = ids.first if ids.size == 1 && ids.first.is_a?(Enumerable)
|
40
|
+
aaf_index.bulk_index(self.name, ids, options)
|
41
|
+
end
|
42
|
+
|
43
|
+
# true if our db and table appear to be suitable for the mysql fast batch
|
44
|
+
# hack (see
|
45
|
+
# http://weblog.jamisbuck.org/2007/4/6/faking-cursors-in-activerecord)
|
46
|
+
def use_fast_batches?
|
47
|
+
if connection.class.name =~ /Mysql/ && primary_key == 'id' && aaf_configuration[:mysql_fast_batches]
|
48
|
+
logger.info "using mysql specific batched find :all. Turn off with :mysql_fast_batches => false if you encounter problems (i.e. because of non-integer UUIDs in the id column)"
|
49
|
+
true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns all records modified or created after the specified time.
|
54
|
+
# Used by the rake rebuild task to find models that need to be updated in
|
55
|
+
# the index after the rebuild finished because they changed while the
|
56
|
+
# rebuild was running.
|
57
|
+
# Override if your models don't stick to the created_at/updated_at
|
58
|
+
# convention.
|
59
|
+
def records_modified_since(time)
|
60
|
+
condition = []
|
61
|
+
%w(updated_at created_at).each do |col|
|
62
|
+
condition << "#{col} >= ?" if column_names.include? col
|
63
|
+
end
|
64
|
+
if condition.empty?
|
65
|
+
logger.warn "#{self.name}: Override records_modified_since(time) to keep the index up to date with records changed during rebuild."
|
66
|
+
[]
|
67
|
+
else
|
68
|
+
find :all, :conditions => [ condition.join(' AND '), *([time]*condition.size) ]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# runs across all records yielding those to be indexed when the index is rebuilt
|
73
|
+
def records_for_rebuild(batch_size = 1000)
|
74
|
+
transaction do
|
75
|
+
if use_fast_batches?
|
76
|
+
offset = 0
|
77
|
+
while (rows = find :all, :conditions => [ "#{table_name}.id > ?", offset ], :limit => batch_size).any?
|
78
|
+
offset = rows.last.id
|
79
|
+
yield rows, offset
|
80
|
+
end
|
81
|
+
else
|
82
|
+
order = "#{primary_key} ASC" # fixes #212
|
83
|
+
0.step(self.count, batch_size) do |offset|
|
84
|
+
yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# yields the records with the given ids, in batches of batch_size
|
91
|
+
def records_for_bulk_index(ids, batch_size = 1000)
|
92
|
+
transaction do
|
93
|
+
offset = 0
|
94
|
+
ids.each_slice(batch_size) do |id_slice|
|
95
|
+
records = find( :all, :conditions => ["id in (?)", id_slice] )
|
96
|
+
#yield records, offset
|
97
|
+
yield find( :all, :conditions => ["id in (?)", id_slice] ), offset
|
98
|
+
offset += batch_size
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Retrieve the index instance for this model class. This can either be a
|
104
|
+
# LocalIndex, or a RemoteIndex instance.
|
105
|
+
#
|
106
|
+
def aaf_index
|
107
|
+
@index ||= ActsAsFerret::get_index(aaf_configuration[:name])
|
108
|
+
end
|
109
|
+
|
110
|
+
# Finds instances by searching the Ferret index. Terms are ANDed by default, use
|
111
|
+
# OR between terms for ORed queries. Or specify +:or_default => true+ in the
|
112
|
+
# +:ferret+ options hash of acts_as_ferret.
|
113
|
+
#
|
114
|
+
# You may either use the +offset+ and +limit+ options to implement your own
|
115
|
+
# pagination logic, or use the +page+ and +per_page+ options to use the
|
116
|
+
# built in pagination support which is compatible with will_paginate's view
|
117
|
+
# helpers. If +page+ and +per_page+ are given, +offset+ and +limit+ will be
|
118
|
+
# ignored.
|
119
|
+
#
|
120
|
+
# == options:
|
121
|
+
# page:: page of search results to retrieve
|
122
|
+
# per_page:: number of search results that are displayed per page
|
123
|
+
# offset:: first hit to retrieve (useful for paging)
|
124
|
+
# limit:: number of hits to retrieve, or :all to retrieve
|
125
|
+
# all results
|
126
|
+
# lazy:: Array of field names whose contents should be read directly
|
127
|
+
# from the index. Those fields have to be marked
|
128
|
+
# +:store => :yes+ in their field options. Give true to get all
|
129
|
+
# stored fields. Note that if you have a shared index, you have
|
130
|
+
# to explicitly state the fields you want to fetch, true won't
|
131
|
+
# work here)
|
132
|
+
#
|
133
|
+
# +find_options+ is a hash passed on to active_record's find when
|
134
|
+
# retrieving the data from db, useful to i.e. prefetch relationships with
|
135
|
+
# :include or to specify additional filter criteria with :conditions (only string and array syntax supported).
|
136
|
+
# You can also call find_with_ferret inside named or dynamic scopes, if you like the conditions hash syntax more.
|
137
|
+
#
|
138
|
+
# This method returns a +SearchResults+ instance, which really is an Array that has
|
139
|
+
# been decorated with a total_hits attribute holding the total number of hits.
|
140
|
+
# Additionally, SearchResults is compatible with the pagination helper
|
141
|
+
# methods of the will_paginate plugin.
|
142
|
+
#
|
143
|
+
# Please keep in mind that the number of results delivered might be less than
|
144
|
+
# +limit+ if you specify any active record conditions that further limit
|
145
|
+
# the result. Use +limit+ and +offset+ as AR find_options instead.
|
146
|
+
# +page+ and +per_page+ are supposed to work regardless of any
|
147
|
+
# +conditions+ present in +find_options+.
|
148
|
+
def find_with_ferret(q, options = {}, find_options = {})
|
149
|
+
if respond_to?(:scope) && scope(:find, :conditions)
|
150
|
+
find_options[:conditions] ||= '1=1' # treat external scope the same as if :conditions present (i.e. when it comes to counting results)
|
151
|
+
end
|
152
|
+
return ActsAsFerret::find q, self, options, find_options
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
# Returns the total number of hits for the given query
|
157
|
+
#
|
158
|
+
# Note that since we don't query the database here, this method won't deliver
|
159
|
+
# the expected results when used on an AR association.
|
160
|
+
#
|
161
|
+
def total_hits(q, options={})
|
162
|
+
aaf_index.total_hits(q, options)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Finds instance model name, ids and scores by contents.
|
166
|
+
# Useful e.g. if you want to search across models or do not want to fetch
|
167
|
+
# all result records (yet).
|
168
|
+
#
|
169
|
+
# Options are the same as for find_with_ferret
|
170
|
+
#
|
171
|
+
# A block can be given too, it will be executed with every result:
|
172
|
+
# find_ids_with_ferret(q, options) do |model, id, score|
|
173
|
+
# id_array << id
|
174
|
+
# scores_by_id[id] = score
|
175
|
+
# end
|
176
|
+
# NOTE: in case a block is given, only the total_hits value will be returned
|
177
|
+
# instead of the [total_hits, results] array!
|
178
|
+
#
|
179
|
+
def find_ids_with_ferret(q, options = {}, &block)
|
180
|
+
aaf_index.find_ids(q, options, &block)
|
181
|
+
end
|
182
|
+
|
183
|
+
# An implementation of http://rm.jkraemer.net/issues/show/161
|
184
|
+
def highlight(id, query, options = {})
|
185
|
+
aaf_index.highlight(id, query, options)
|
186
|
+
end
|
187
|
+
|
188
|
+
def document_number(id)
|
189
|
+
aaf_index.document_number(id)
|
190
|
+
end
|
191
|
+
|
192
|
+
def query_for_record(id)
|
193
|
+
aaf_index.query_for_record(id)
|
194
|
+
end
|
195
|
+
|
196
|
+
protected
|
197
|
+
|
198
|
+
# def find_records_lazy_or_not(q, options = {}, find_options = {})
|
199
|
+
# if options[:lazy]
|
200
|
+
# logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
201
|
+
# lazy_find_by_contents q, options
|
202
|
+
# else
|
203
|
+
# ar_find_by_contents q, options, find_options
|
204
|
+
# end
|
205
|
+
# end
|
206
|
+
#
|
207
|
+
# def ar_find_by_contents(q, options = {}, find_options = {})
|
208
|
+
# result_ids = {}
|
209
|
+
# total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
|
210
|
+
# # stores ids, index and score of each hit for later ordering of
|
211
|
+
# # results
|
212
|
+
# result_ids[id] = [ result_ids.size + 1, score ]
|
213
|
+
# end
|
214
|
+
#
|
215
|
+
# result = ActsAsFerret::retrieve_records( { self.name => result_ids }, find_options )
|
216
|
+
#
|
217
|
+
# # count total_hits via sql when using conditions or when we're called
|
218
|
+
# # from an ActiveRecord association.
|
219
|
+
# if find_options[:conditions] or caller.find{ |call| call =~ %r{active_record/associations} }
|
220
|
+
# # chances are the ferret result count is not our total_hits value, so
|
221
|
+
# # we correct this here.
|
222
|
+
# if options[:limit] != :all || options[:page] || options[:offset] || find_options[:limit] || find_options[:offset]
|
223
|
+
# # our ferret result has been limited, so we need to re-run that
|
224
|
+
# # search to get the full result set from ferret.
|
225
|
+
# result_ids = {}
|
226
|
+
# find_ids_with_ferret(q, options.update(:limit => :all, :offset => 0)) do |model, id, score, data|
|
227
|
+
# result_ids[id] = [ result_ids.size + 1, score ]
|
228
|
+
# end
|
229
|
+
# # Now ask the database for the total size of the final result set.
|
230
|
+
# total_hits = count_records( { self.name => result_ids }, find_options )
|
231
|
+
# else
|
232
|
+
# # what we got from the database is our full result set, so take
|
233
|
+
# # it's size
|
234
|
+
# total_hits = result.length
|
235
|
+
# end
|
236
|
+
# end
|
237
|
+
#
|
238
|
+
# [ total_hits, result ]
|
239
|
+
# end
|
240
|
+
#
|
241
|
+
# def lazy_find_by_contents(q, options = {})
|
242
|
+
# logger.debug "lazy_find_by_contents: #{q}"
|
243
|
+
# result = []
|
244
|
+
# rank = 0
|
245
|
+
# total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
|
246
|
+
# logger.debug "model: #{model}, id: #{id}, data: #{data}"
|
247
|
+
# result << FerretResult.new(model, id, score, rank += 1, data)
|
248
|
+
# end
|
249
|
+
# [ total_hits, result ]
|
250
|
+
# end
|
251
|
+
|
252
|
+
|
253
|
+
def model_find(model, id, find_options = {})
|
254
|
+
model.constantize.find(id, find_options)
|
255
|
+
end
|
256
|
+
|
257
|
+
|
258
|
+
# def count_records(id_arrays, find_options = {})
|
259
|
+
# count_options = find_options.dup
|
260
|
+
# count_options.delete :limit
|
261
|
+
# count_options.delete :offset
|
262
|
+
# count = 0
|
263
|
+
# id_arrays.each do |model, id_array|
|
264
|
+
# next if id_array.empty?
|
265
|
+
# model = model.constantize
|
266
|
+
# # merge conditions
|
267
|
+
# conditions = ActsAsFerret::combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ],
|
268
|
+
# find_options[:conditions])
|
269
|
+
# opts = find_options.merge :conditions => conditions
|
270
|
+
# opts.delete :limit; opts.delete :offset
|
271
|
+
# count += model.count opts
|
272
|
+
# end
|
273
|
+
# count
|
274
|
+
# end
|
275
|
+
|
276
|
+
end
|
277
|
+
|
278
|
+
end
|
279
|
+
|
@@ -0,0 +1,192 @@
|
|
1
|
+
module Ferret
|
2
|
+
|
3
|
+
module Analysis
|
4
|
+
|
5
|
+
# = PerFieldAnalyzer
|
6
|
+
#
|
7
|
+
# This PerFieldAnalyzer is a workaround to a memory leak in
|
8
|
+
# ferret 0.11.4. It does basically do the same as the original
|
9
|
+
# Ferret::Analysis::PerFieldAnalyzer, but without the leak :)
|
10
|
+
#
|
11
|
+
# http://ferret.davebalmain.com/api/classes/Ferret/Analysis/PerFieldAnalyzer.html
|
12
|
+
#
|
13
|
+
# Thanks to Ben from omdb.org for tracking this down and creating this
|
14
|
+
# workaround.
|
15
|
+
# You can read more about the issue there:
|
16
|
+
# http://blog.omdb-beta.org/2007/7/29/tracking-down-a-memory-leak-in-ferret-0-11-4
|
17
|
+
class PerFieldAnalyzer < ::Ferret::Analysis::Analyzer
|
18
|
+
def initialize( default_analyzer = StandardAnalyzer.new )
|
19
|
+
@analyzers = {}
|
20
|
+
@default_analyzer = default_analyzer
|
21
|
+
end
|
22
|
+
|
23
|
+
def add_field( field, analyzer )
|
24
|
+
@analyzers[field] = analyzer
|
25
|
+
end
|
26
|
+
alias []= add_field
|
27
|
+
|
28
|
+
def token_stream(field, string)
|
29
|
+
@analyzers.has_key?(field) ? @analyzers[field].token_stream(field, string) :
|
30
|
+
@default_analyzer.token_stream(field, string)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class Index::Index
|
36
|
+
attr_accessor :batch_size, :logger
|
37
|
+
|
38
|
+
def index_models(models)
|
39
|
+
models.each { |model| index_model model }
|
40
|
+
flush
|
41
|
+
optimize
|
42
|
+
close
|
43
|
+
ActsAsFerret::close_multi_indexes
|
44
|
+
end
|
45
|
+
|
46
|
+
def index_model(model)
|
47
|
+
bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
|
48
|
+
:model => model, :index => self, :reindex => true)
|
49
|
+
logger.info "reindexing model #{model.name}"
|
50
|
+
|
51
|
+
model.records_for_rebuild(@batch_size) do |records, offset|
|
52
|
+
bulk_indexer.index_records(records, offset)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def bulk_index(model, ids, options = {})
|
57
|
+
options.reverse_merge! :optimize => true
|
58
|
+
orig_flush = @auto_flush
|
59
|
+
@auto_flush = false
|
60
|
+
bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
|
61
|
+
:model => model, :index => self, :total => ids.size)
|
62
|
+
model.records_for_bulk_index(ids, @batch_size) do |records, offset|
|
63
|
+
logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
|
64
|
+
bulk_indexer.index_records(records, offset)
|
65
|
+
end
|
66
|
+
logger.info 'finishing bulk index...'
|
67
|
+
flush
|
68
|
+
if options[:optimize]
|
69
|
+
logger.info 'optimizing...'
|
70
|
+
optimize
|
71
|
+
end
|
72
|
+
@auto_flush = orig_flush
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
# bulk-inserts a number of ferret documents.
|
77
|
+
# The argument has to be an array of two-element arrays each holding the document data and the analyzer to
|
78
|
+
# use for this document (which may be nil).
|
79
|
+
def update_batch(document_analyzer_pairs)
|
80
|
+
ids = document_analyzer_pairs.collect {|da| da.first[@id_field] }
|
81
|
+
@dir.synchronize do
|
82
|
+
batch_delete(ids)
|
83
|
+
ensure_writer_open()
|
84
|
+
document_analyzer_pairs.each do |doc, analyzer|
|
85
|
+
if analyzer
|
86
|
+
old_analyzer = @writer.analyzer
|
87
|
+
@writer.analyzer = analyzer
|
88
|
+
@writer.add_document(doc)
|
89
|
+
@writer.analyzer = old_analyzer
|
90
|
+
else
|
91
|
+
@writer.add_document(doc)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
flush()
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# search for the first document with +arg+ in the +id+ field and return it's internal document number.
|
99
|
+
# The +id+ field is either :id or whatever you set
|
100
|
+
# :id_field parameter to when you create the Index object.
|
101
|
+
def doc_number(id)
|
102
|
+
@dir.synchronize do
|
103
|
+
ensure_reader_open()
|
104
|
+
term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
|
105
|
+
return term_doc_enum.next? ? term_doc_enum.doc : nil
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
|
112
|
+
# If +docs+ is a Hash or an Array then a batch delete will be performed.
|
113
|
+
# If +docs+ is an Array then it will be considered an array of +id+'s. If
|
114
|
+
# it is a Hash, then its keys will be used instead as the Array of
|
115
|
+
# document +id+'s. If the +id+ is an Integers then it is considered a
|
116
|
+
# Ferret document number and the corresponding document will be deleted.
|
117
|
+
# If the +id+ is a String or a Symbol then the +id+ will be considered a
|
118
|
+
# term and the documents that contain that term in the +:id_field+ will
|
119
|
+
# be deleted.
|
120
|
+
#
|
121
|
+
# docs:: An Array of docs to be deleted, or a Hash (in which case the keys
|
122
|
+
# are used)
|
123
|
+
#
|
124
|
+
# ripped from Ferret trunk.
|
125
|
+
def batch_delete(docs)
|
126
|
+
docs = docs.keys if docs.is_a?(Hash)
|
127
|
+
raise ArgumentError, "must pass Array or Hash" unless docs.is_a? Array
|
128
|
+
ids = []
|
129
|
+
terms = []
|
130
|
+
docs.each do |doc|
|
131
|
+
case doc
|
132
|
+
when String then terms << doc
|
133
|
+
when Symbol then terms << doc.to_s
|
134
|
+
when Integer then ids << doc
|
135
|
+
else
|
136
|
+
raise ArgumentError, "Cannot delete for arg of type #{id.class}"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
if ids.size > 0
|
140
|
+
ensure_reader_open
|
141
|
+
ids.each {|id| @reader.delete(id)}
|
142
|
+
end
|
143
|
+
if terms.size > 0
|
144
|
+
ensure_writer_open()
|
145
|
+
terms.each { |t| @writer.delete(@id_field, t) }
|
146
|
+
# TODO with Ferret trunk this would work:
|
147
|
+
# @writer.delete(@id_field, terms)
|
148
|
+
end
|
149
|
+
return self
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
# add marshalling support to SortFields
|
155
|
+
class Search::SortField
|
156
|
+
def _dump(depth)
|
157
|
+
to_s
|
158
|
+
end
|
159
|
+
|
160
|
+
def self._load(string)
|
161
|
+
case string
|
162
|
+
when /<DOC(_ID)?>!/ then Ferret::Search::SortField::DOC_ID_REV
|
163
|
+
when /<DOC(_ID)?>/ then Ferret::Search::SortField::DOC_ID
|
164
|
+
when '<SCORE>!' then Ferret::Search::SortField::SCORE_REV
|
165
|
+
when '<SCORE>' then Ferret::Search::SortField::SCORE
|
166
|
+
when /^(\w+):<(\w+)>(!)?$/ then new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
|
167
|
+
else raise "invalid value: #{string}"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# add marshalling support to Sort
|
173
|
+
class Search::Sort
|
174
|
+
def _dump(depth)
|
175
|
+
to_s
|
176
|
+
end
|
177
|
+
|
178
|
+
def self._load(string)
|
179
|
+
# we exclude the last <DOC> sorting as it is appended by new anyway
|
180
|
+
if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/
|
181
|
+
sort_fields = $1.split(',').map do |value|
|
182
|
+
value.strip!
|
183
|
+
Ferret::Search::SortField._load value unless value.blank?
|
184
|
+
end
|
185
|
+
new sort_fields.compact
|
186
|
+
else
|
187
|
+
raise "invalid value: #{string}"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|