cehoffman-acts_as_ferret 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. data/LICENSE +20 -0
  2. data/README +68 -0
  3. data/bin/aaf_install +23 -0
  4. data/config/ferret_server.yml +24 -0
  5. data/doc/README.win32 +23 -0
  6. data/doc/demo/README +154 -0
  7. data/doc/demo/README_DEMO +23 -0
  8. data/doc/demo/Rakefile +10 -0
  9. data/doc/demo/app/controllers/admin/backend_controller.rb +14 -0
  10. data/doc/demo/app/controllers/admin_area_controller.rb +4 -0
  11. data/doc/demo/app/controllers/application.rb +5 -0
  12. data/doc/demo/app/controllers/contents_controller.rb +49 -0
  13. data/doc/demo/app/controllers/searches_controller.rb +8 -0
  14. data/doc/demo/app/helpers/admin/backend_helper.rb +2 -0
  15. data/doc/demo/app/helpers/application_helper.rb +3 -0
  16. data/doc/demo/app/helpers/content_helper.rb +2 -0
  17. data/doc/demo/app/helpers/search_helper.rb +2 -0
  18. data/doc/demo/app/models/comment.rb +48 -0
  19. data/doc/demo/app/models/content.rb +12 -0
  20. data/doc/demo/app/models/content_base.rb +28 -0
  21. data/doc/demo/app/models/search.rb +19 -0
  22. data/doc/demo/app/models/shared_index1.rb +3 -0
  23. data/doc/demo/app/models/shared_index2.rb +3 -0
  24. data/doc/demo/app/models/special_content.rb +3 -0
  25. data/doc/demo/app/models/stats.rb +20 -0
  26. data/doc/demo/app/views/admin/backend/search.rhtml +18 -0
  27. data/doc/demo/app/views/contents/_form.rhtml +10 -0
  28. data/doc/demo/app/views/contents/edit.rhtml +9 -0
  29. data/doc/demo/app/views/contents/index.rhtml +24 -0
  30. data/doc/demo/app/views/contents/new.rhtml +8 -0
  31. data/doc/demo/app/views/contents/show.rhtml +8 -0
  32. data/doc/demo/app/views/layouts/application.html.erb +17 -0
  33. data/doc/demo/app/views/searches/_content.html.erb +2 -0
  34. data/doc/demo/app/views/searches/search.html.erb +20 -0
  35. data/doc/demo/config/boot.rb +109 -0
  36. data/doc/demo/config/database.yml +38 -0
  37. data/doc/demo/config/environment.rb +69 -0
  38. data/doc/demo/config/environments/development.rb +16 -0
  39. data/doc/demo/config/environments/production.rb +19 -0
  40. data/doc/demo/config/environments/test.rb +21 -0
  41. data/doc/demo/config/ferret_server.yml +18 -0
  42. data/doc/demo/config/lighttpd.conf +40 -0
  43. data/doc/demo/config/routes.rb +9 -0
  44. data/doc/demo/db/development_structure.sql +15 -0
  45. data/doc/demo/db/migrate/001_initial_migration.rb +18 -0
  46. data/doc/demo/db/migrate/002_add_type_to_contents.rb +9 -0
  47. data/doc/demo/db/migrate/003_create_shared_index1s.rb +11 -0
  48. data/doc/demo/db/migrate/004_create_shared_index2s.rb +11 -0
  49. data/doc/demo/db/migrate/005_special_field.rb +9 -0
  50. data/doc/demo/db/migrate/006_create_stats.rb +15 -0
  51. data/doc/demo/db/schema.sql +18 -0
  52. data/doc/demo/doc/README_FOR_APP +2 -0
  53. data/doc/demo/doc/howto.txt +70 -0
  54. data/doc/demo/public/.htaccess +40 -0
  55. data/doc/demo/public/404.html +8 -0
  56. data/doc/demo/public/500.html +8 -0
  57. data/doc/demo/public/dispatch.cgi +10 -0
  58. data/doc/demo/public/dispatch.fcgi +24 -0
  59. data/doc/demo/public/dispatch.rb +10 -0
  60. data/doc/demo/public/favicon.ico +0 -0
  61. data/doc/demo/public/images/rails.png +0 -0
  62. data/doc/demo/public/index.html +277 -0
  63. data/doc/demo/public/robots.txt +1 -0
  64. data/doc/demo/public/stylesheets/scaffold.css +74 -0
  65. data/doc/demo/script/about +3 -0
  66. data/doc/demo/script/breakpointer +3 -0
  67. data/doc/demo/script/console +3 -0
  68. data/doc/demo/script/destroy +3 -0
  69. data/doc/demo/script/ferret_server +10 -0
  70. data/doc/demo/script/generate +3 -0
  71. data/doc/demo/script/performance/benchmarker +3 -0
  72. data/doc/demo/script/performance/profiler +3 -0
  73. data/doc/demo/script/plugin +3 -0
  74. data/doc/demo/script/process/inspector +3 -0
  75. data/doc/demo/script/process/reaper +3 -0
  76. data/doc/demo/script/process/spawner +3 -0
  77. data/doc/demo/script/process/spinner +3 -0
  78. data/doc/demo/script/runner +3 -0
  79. data/doc/demo/script/server +3 -0
  80. data/doc/demo/test/fixtures/comments.yml +12 -0
  81. data/doc/demo/test/fixtures/contents.yml +13 -0
  82. data/doc/demo/test/fixtures/remote_contents.yml +9 -0
  83. data/doc/demo/test/fixtures/shared_index1s.yml +7 -0
  84. data/doc/demo/test/fixtures/shared_index2s.yml +7 -0
  85. data/doc/demo/test/functional/admin/backend_controller_test.rb +35 -0
  86. data/doc/demo/test/functional/contents_controller_test.rb +81 -0
  87. data/doc/demo/test/functional/searches_controller_test.rb +71 -0
  88. data/doc/demo/test/smoke/drb_smoke_test.rb +321 -0
  89. data/doc/demo/test/smoke/process_stats.rb +21 -0
  90. data/doc/demo/test/test_helper.rb +30 -0
  91. data/doc/demo/test/unit/comment_test.rb +217 -0
  92. data/doc/demo/test/unit/content_test.rb +705 -0
  93. data/doc/demo/test/unit/ferret_result_test.rb +24 -0
  94. data/doc/demo/test/unit/multi_index_test.rb +329 -0
  95. data/doc/demo/test/unit/remote_index_test.rb +23 -0
  96. data/doc/demo/test/unit/shared_index1_test.rb +108 -0
  97. data/doc/demo/test/unit/shared_index2_test.rb +13 -0
  98. data/doc/demo/test/unit/sort_test.rb +21 -0
  99. data/doc/demo/test/unit/special_content_test.rb +25 -0
  100. data/doc/demo/vendor/plugins/will_paginate/LICENSE +18 -0
  101. data/doc/demo/vendor/plugins/will_paginate/README +108 -0
  102. data/doc/demo/vendor/plugins/will_paginate/Rakefile +23 -0
  103. data/doc/demo/vendor/plugins/will_paginate/init.rb +21 -0
  104. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/collection.rb +45 -0
  105. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/core_ext.rb +44 -0
  106. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/finder.rb +159 -0
  107. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/view_helpers.rb +95 -0
  108. data/doc/demo/vendor/plugins/will_paginate/test/array_pagination_test.rb +23 -0
  109. data/doc/demo/vendor/plugins/will_paginate/test/boot.rb +27 -0
  110. data/doc/demo/vendor/plugins/will_paginate/test/console +10 -0
  111. data/doc/demo/vendor/plugins/will_paginate/test/finder_test.rb +219 -0
  112. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/admin.rb +3 -0
  113. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/companies.yml +24 -0
  114. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/company.rb +23 -0
  115. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developer.rb +11 -0
  116. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developers_projects.yml +13 -0
  117. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/project.rb +4 -0
  118. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/projects.yml +7 -0
  119. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/replies.yml +20 -0
  120. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/reply.rb +5 -0
  121. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/schema.sql +44 -0
  122. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topic.rb +19 -0
  123. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topics.yml +30 -0
  124. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/user.rb +2 -0
  125. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/users.yml +35 -0
  126. data/doc/demo/vendor/plugins/will_paginate/test/helper.rb +42 -0
  127. data/doc/demo/vendor/plugins/will_paginate/test/lib/activerecord_test_connector.rb +64 -0
  128. data/doc/demo/vendor/plugins/will_paginate/test/lib/load_fixtures.rb +10 -0
  129. data/doc/demo/vendor/plugins/will_paginate/test/pagination_test.rb +136 -0
  130. data/doc/monit-example +22 -0
  131. data/init.rb +24 -0
  132. data/install.rb +18 -0
  133. data/lib/act_methods.rb +147 -0
  134. data/lib/acts_as_ferret.rb +584 -0
  135. data/lib/ar_mysql_auto_reconnect_patch.rb +41 -0
  136. data/lib/blank_slate.rb +53 -0
  137. data/lib/bulk_indexer.rb +38 -0
  138. data/lib/class_methods.rb +270 -0
  139. data/lib/ferret_extensions.rb +188 -0
  140. data/lib/ferret_find_methods.rb +141 -0
  141. data/lib/ferret_result.rb +53 -0
  142. data/lib/ferret_server.rb +238 -0
  143. data/lib/index.rb +99 -0
  144. data/lib/instance_methods.rb +171 -0
  145. data/lib/local_index.rb +205 -0
  146. data/lib/more_like_this.rb +217 -0
  147. data/lib/multi_index.rb +126 -0
  148. data/lib/rdig_adapter.rb +148 -0
  149. data/lib/remote_functions.rb +23 -0
  150. data/lib/remote_index.rb +54 -0
  151. data/lib/remote_multi_index.rb +20 -0
  152. data/lib/search_results.rb +50 -0
  153. data/lib/server_manager.rb +58 -0
  154. data/lib/unix_daemon.rb +64 -0
  155. data/lib/without_ar.rb +52 -0
  156. data/rakefile +141 -0
  157. data/recipes/aaf_recipes.rb +114 -0
  158. data/script/ferret_daemon +94 -0
  159. data/script/ferret_server +10 -0
  160. data/script/ferret_service +178 -0
  161. data/tasks/ferret.rake +22 -0
  162. metadata +258 -0
data/doc/monit-example ADDED
@@ -0,0 +1,22 @@
1
+ # monit configuration snippet to watch the Ferret DRb server shipped with
2
+ # acts_as_ferret
3
+ check process ferret with pidfile /path/to/ferret.pid
4
+
5
+ # username is the user the drb server should be running as (It's good practice
6
+ # to run such services as a non-privileged user)
7
+ start program = "/bin/su -c 'cd /path/to/your/app/current/ && script/ferret_server -e production start' username"
8
+ stop program = "/bin/su -c 'cd /path/to/your/app/current/ && script/ferret_server -e production stop' username"
9
+
10
+ # cpu usage boundaries
11
+ if cpu > 60% for 2 cycles then alert
12
+ if cpu > 90% for 5 cycles then restart
13
+
14
+ # memory usage varies with index size and usage scenarios, so check how
15
+ # much memory your DRb server uses up usually and add some spare to that
16
+ # before enabling this rule:
17
+ # if totalmem > 50.0 MB for 5 cycles then restart
18
+
19
+ # adjust port numbers according to your setup:
20
+ if failed port 9010 then alert
21
+ if failed port 9010 for 2 cycles then restart
22
+ group ferret
data/init.rb ADDED
@@ -0,0 +1,24 @@
1
+ # Copyright (c) 2006 Kasper Weibel Nielsen-Refs, Thomas Lockney, Jens Krämer
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ # SOFTWARE.
20
+
21
+ require 'acts_as_ferret'
22
+
23
+ config.after_initialize { ActsAsFerret::load_config }
24
+ config.to_prepare { ActsAsFerret::load_config }
data/install.rb ADDED
@@ -0,0 +1,18 @@
1
+ # acts_as_ferret install script
2
+ require 'fileutils'
3
+
4
+ def install(file)
5
+ puts "Installing: #{file}"
6
+ target = File.join(File.dirname(__FILE__), '..', '..', '..', file)
7
+ if File.exists?(target)
8
+ puts "target #{target} already exists, skipping"
9
+ else
10
+ FileUtils.cp File.join(File.dirname(__FILE__), file), target
11
+ end
12
+ end
13
+
14
+ install File.join( 'script', 'ferret_server' )
15
+ install File.join( 'config', 'ferret_server.yml' )
16
+
17
+ puts IO.read(File.join(File.dirname(__FILE__), 'README'))
18
+
@@ -0,0 +1,147 @@
1
+ module ActsAsFerret #:nodoc:
2
+
3
+ # This module defines the acts_as_ferret method and is included into
4
+ # ActiveRecord::Base
5
+ module ActMethods
6
+
7
+
8
+ def reloadable?; false end
9
+
10
+ # declares a class as ferret-searchable.
11
+ #
12
+ # ====options:
13
+ # fields:: names all fields to include in the index. If not given,
14
+ # all attributes of the class will be indexed. You may also give
15
+ # symbols pointing to instance methods of your model here, i.e.
16
+ # to retrieve and index data from a related model.
17
+ #
18
+ # additional_fields:: names fields to include in the index, in addition
19
+ # to those derived from the db scheme. use if you want
20
+ # to add custom fields derived from methods to the db
21
+ # fields (which will be picked by aaf). This option will
22
+ # be ignored when the fields option is given, in that
23
+ # case additional fields get specified there.
24
+ #
25
+ # if:: Can be set to a block that will be called with the record in question
26
+ # to determine if it should be indexed or not.
27
+ #
28
+ # index_dir:: declares the directory where to put the index for this class.
29
+ # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
30
+ # The index directory will be created if it doesn't exist.
31
+ #
32
+ # reindex_batch_size:: reindexing is done in batches of this size, default is 1000
33
+ # mysql_fast_batches:: set this to false to disable the faster mysql batching
34
+ # algorithm if this model uses a non-integer primary key named
35
+ # 'id' on MySQL.
36
+ #
37
+ # ferret:: Hash of Options that directly influence the way the Ferret engine works. You
38
+ # can use most of the options the Ferret::I class accepts here, too. Among the
39
+ # more useful are:
40
+ #
41
+ # or_default:: whether query terms are required by
42
+ # default (the default, false), or not (true)
43
+ #
44
+ # analyzer:: the analyzer to use for query parsing (default: nil,
45
+ # which means the ferret StandardAnalyzer gets used)
46
+ #
47
+ # default_field:: use to set one or more fields that are searched for query terms
48
+ # that don't have an explicit field list. This list should *not*
49
+ # contain any untokenized fields. If it does, you're asking
50
+ # for trouble (i.e. not getting results for queries having
51
+ # stop words in them). Aaf by default initializes the default field
52
+ # list to contain all tokenized fields. If you use :single_index => true,
53
+ # you really should set this option specifying your default field
54
+ # list (which should be equal in all your classes sharing the index).
55
+ # Otherwise you might get incorrect search results and you won't get
56
+ # any lazy loading of stored field data.
57
+ #
58
+ # For downwards compatibility reasons you can also specify the Ferret options in the
59
+ # last Hash argument.
60
+ def acts_as_ferret(options={})
61
+
62
+ extend ClassMethods
63
+
64
+ include InstanceMethods
65
+ include MoreLikeThis::InstanceMethods
66
+
67
+ if options[:rdig]
68
+ cattr_accessor :rdig_configuration
69
+ self.rdig_configuration = options[:rdig]
70
+ require 'rdig_adapter'
71
+ include ActsAsFerret::RdigAdapter
72
+ end
73
+
74
+ unless included_modules.include?(ActsAsFerret::WithoutAR)
75
+ # set up AR hooks
76
+ after_create :ferret_create
77
+ after_update :ferret_update
78
+ after_destroy :ferret_destroy
79
+ end
80
+
81
+ cattr_accessor :aaf_configuration
82
+
83
+ # apply default config for rdig based models
84
+ if options[:rdig]
85
+ options[:fields] ||= { :title => { :boost => 3, :store => :yes },
86
+ :content => { :store => :yes } }
87
+ end
88
+
89
+ # name of this index
90
+ index_name = options.delete(:index) || self.name.underscore
91
+
92
+ index = ActsAsFerret::register_class_with_index(self, index_name, options)
93
+ self.aaf_configuration = index.index_definition.dup
94
+ # logger.debug "configured index for class #{self.name}:\n#{aaf_configuration.inspect}"
95
+
96
+ # update our copy of the global index config with options local to this class
97
+ aaf_configuration[:class_name] ||= self.name
98
+ aaf_configuration[:if] ||= options[:if]
99
+
100
+ # add methods for retrieving field values
101
+ add_fields options[:fields]
102
+ add_fields options[:additional_fields]
103
+ add_fields aaf_configuration[:fields]
104
+ add_fields aaf_configuration[:additional_fields]
105
+
106
+ end
107
+
108
+
109
+ protected
110
+
111
+
112
+ # helper to defines a method which adds the given field to a ferret
113
+ # document instance
114
+ def define_to_field_method(field, options = {})
115
+ method_name = "#{field}_to_ferret"
116
+ return if instance_methods.include?(method_name) # already defined
117
+ aaf_configuration[:defined_fields] ||= {}
118
+ aaf_configuration[:defined_fields][field] = options
119
+ dynamic_boost = options[:boost] if options[:boost].is_a?(Symbol)
120
+ via = options[:via] || field
121
+ define_method(method_name.to_sym) do
122
+ val = begin
123
+ content_for_field_name(field, via, dynamic_boost)
124
+ rescue
125
+ logger.warn("Error retrieving value for field #{field}: #{$!}")
126
+ ''
127
+ end
128
+ logger.debug("Adding field #{field} with value '#{val}' to index")
129
+ val
130
+ end
131
+ end
132
+
133
+ def add_fields(field_config)
134
+ if field_config.is_a? Hash
135
+ field_config.each_pair do |field, options|
136
+ define_to_field_method field, options
137
+ end
138
+ elsif field_config.respond_to?(:each)
139
+ field_config.each do |field|
140
+ define_to_field_method field
141
+ end
142
+ end
143
+ end
144
+
145
+ end
146
+
147
+ end
@@ -0,0 +1,584 @@
1
+ # Copyright (c) 2006 Kasper Weibel Nielsen-Refs, Thomas Lockney, Jens Krämer
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ # SOFTWARE.
20
+
21
+ require 'active_support'
22
+ require 'active_record'
23
+ require 'set'
24
+ require 'enumerator'
25
+ require 'ferret'
26
+
27
+ require 'ferret_find_methods'
28
+ require 'remote_functions'
29
+ require 'blank_slate'
30
+ require 'bulk_indexer'
31
+ require 'ferret_extensions'
32
+ require 'act_methods'
33
+ require 'search_results'
34
+ require 'class_methods'
35
+ require 'ferret_result'
36
+ require 'instance_methods'
37
+ require 'without_ar'
38
+
39
+ require 'multi_index'
40
+ require 'remote_multi_index'
41
+ require 'more_like_this'
42
+
43
+ require 'index'
44
+ require 'local_index'
45
+ require 'remote_index'
46
+
47
+ require 'ferret_server'
48
+
49
+ require 'rdig_adapter'
50
+
51
+ # The Rails ActiveRecord Ferret Mixin.
52
+ #
53
+ # This mixin adds full text search capabilities to any Rails model.
54
+ #
55
+ # The current version emerged from on the original acts_as_ferret plugin done by
56
+ # Kasper Weibel and a modified version done by Thomas Lockney, which both can be
57
+ # found on the Ferret Wiki: http://ferret.davebalmain.com/trac/wiki/FerretOnRails.
58
+ #
59
+ # basic usage:
60
+ # include the following in your model class (specifiying the fields you want to get indexed):
61
+ # acts_as_ferret :fields => [ :title, :description ]
62
+ #
63
+ # now you can use ModelClass.find_with_ferret(query) to find instances of your model
64
+ # whose indexed fields match a given query. All query terms are required by default, but
65
+ # explicit OR queries are possible. This differs from the ferret default, but imho is the more
66
+ # often needed/expected behaviour (more query terms result in less results).
67
+ #
68
+ # Released under the MIT license.
69
+ #
70
+ # Authors:
71
+ # Kasper Weibel Nielsen-Refs (original author)
72
+ # Jens Kraemer <jk@jkraemer.net> (active maintainer since 2006)
73
+ #
74
+ #
75
+ # == Global properties
76
+ #
77
+ # raise_drb_errors:: Set this to true if you want aaf to raise Exceptions
78
+ # in case the DRb server cannot be reached (in other word - behave like
79
+ # versions up to 0.4.3). Defaults to false so DRb exceptions
80
+ # are logged but not raised. Be sure to set up some
81
+ # monitoring so you still detect when your DRb server died for
82
+ # whatever reason.
83
+ #
84
+ # remote:: Set this to false to force acts_as_ferret into local (non-DRb) mode even if
85
+ # config/ferret_server.yml contains a section for the current RAILS_ENV
86
+ # Usually you won't need to touch this option - just configure DRb for
87
+ # production mode in ferret_server.yml.
88
+ #
89
+ module ActsAsFerret
90
+
91
+ class ActsAsFerretError < StandardError; end
92
+ class IndexNotDefined < ActsAsFerretError; end
93
+ class IndexAlreadyDefined < ActsAsFerretError; end
94
+
95
+ # global Hash containing all multi indexes created by all classes using the plugin
96
+ # key is the concatenation of alphabetically sorted names of the classes the
97
+ # searcher searches.
98
+ @@multi_indexes = Hash.new
99
+ def self.multi_indexes; @@multi_indexes end
100
+
101
+ # global Hash containing the ferret indexes of all classes using the plugin
102
+ # key is the index name.
103
+ @@ferret_indexes = Hash.new
104
+ def self.ferret_indexes; @@ferret_indexes end
105
+
106
+ # mapping from class name to index name
107
+ @@index_using_classes = {}
108
+
109
+ @@logger = Logger.new "#{RAILS_ROOT}/log/acts_as_ferret.log"
110
+ @@logger.level = ActiveRecord::Base.logger.level rescue Logger::DEBUG
111
+ mattr_accessor :logger
112
+
113
+
114
+ # Default ferret configuration for index fields
115
+ DEFAULT_FIELD_OPTIONS = {
116
+ :store => :no,
117
+ :highlight => :yes,
118
+ :index => :yes,
119
+ :term_vector => :with_positions_offsets,
120
+ :boost => 1.0
121
+ }
122
+
123
+ @@raise_drb_errors = false
124
+ mattr_writer :raise_drb_errors
125
+ def self.raise_drb_errors?; @@raise_drb_errors end
126
+
127
+ @@remote = nil
128
+ mattr_accessor :remote
129
+ def self.remote?
130
+ if @@remote.nil?
131
+ if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running
132
+ @@remote = false
133
+ else
134
+ @@remote = ActsAsFerret::Remote::Config.new.uri rescue false
135
+ end
136
+ if @@remote
137
+ logger.info "Will use remote index server which should be available at #{@@remote}"
138
+ else
139
+ logger.info "Will use local index."
140
+ end
141
+ end
142
+ @@remote
143
+ end
144
+ remote?
145
+
146
+
147
+ # Globally declares an index.
148
+ #
149
+ # This method is also used to implicitly declare an index when you use the
150
+ # acts_as_ferret call in your class. Returns the created index instance.
151
+ #
152
+ # === Options are:
153
+ #
154
+ # +models+:: Hash of model classes and their per-class option hashes which should
155
+ # use this index. Any models mentioned here will automatically use
156
+ # the index, there is no need to explicitly call +acts_as_ferret+ in the
157
+ # model class definition.
158
+ def self.define_index(name, options = {})
159
+ name = name.to_sym
160
+ pending_classes = nil
161
+ if ferret_indexes.has_key?(name)
162
+ # seems models have been already loaded. remove that index for now,
163
+ # re-register any already loaded classes later on.
164
+ idx = get_index(name)
165
+ pending_classes = idx.index_definition[:registered_models]
166
+ pending_classes_configs = idx.registered_models_config
167
+ idx.close
168
+ ferret_indexes.delete(name)
169
+ end
170
+
171
+ index_definition = {
172
+ :index_dir => "#{ActsAsFerret::index_dir}/#{name}",
173
+ :name => name,
174
+ :single_index => false,
175
+ :reindex_batch_size => 1000,
176
+ :ferret => {},
177
+ :ferret_fields => {}, # list of indexed fields that will be filled later
178
+ :enabled => true, # used for class-wide disabling of Ferret
179
+ :mysql_fast_batches => true, # turn off to disable the faster, id based batching mechanism for MySQL
180
+ :raise_drb_errors => false # handle DRb connection errors by default
181
+ }.update( options )
182
+
183
+ index_definition[:registered_models] = []
184
+
185
+ # build ferret configuration
186
+ index_definition[:ferret] = {
187
+ :or_default => false,
188
+ :handle_parse_errors => true,
189
+ :default_field => nil, # will be set later on
190
+ #:max_clauses => 512,
191
+ #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
192
+ # :wild_card_downcase => true
193
+ }.update( options[:ferret] || {} )
194
+
195
+ index_definition[:user_default_field] = index_definition[:ferret][:default_field]
196
+
197
+ unless remote?
198
+ ActsAsFerret::ensure_directory index_definition[:index_dir]
199
+ index_definition[:index_base_dir] = index_definition[:index_dir]
200
+ index_definition[:index_dir] = find_last_index_version(index_definition[:index_dir])
201
+ logger.debug "using index in #{index_definition[:index_dir]}"
202
+ end
203
+
204
+ # these properties are somewhat vital to the plugin and shouldn't
205
+ # be overwritten by the user:
206
+ index_definition[:ferret].update(
207
+ :key => :key,
208
+ :path => index_definition[:index_dir],
209
+ :auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
210
+ :create_if_missing => true
211
+ )
212
+
213
+ # field config
214
+ index_definition[:ferret_fields] = build_field_config( options[:fields] )
215
+ index_definition[:ferret_fields].update build_field_config( options[:additional_fields] )
216
+
217
+ idx = ferret_indexes[name] = create_index_instance( index_definition )
218
+
219
+ # re-register early loaded classes
220
+ if pending_classes
221
+ pending_classes.each { |clazz| idx.register_class clazz, { :force_re_registration => true }.merge(pending_classes_configs[clazz]) }
222
+ end
223
+
224
+ if models = options[:models]
225
+ models.each do |clazz, config|
226
+ clazz.send :include, ActsAsFerret::WithoutAR unless clazz.respond_to?(:acts_as_ferret)
227
+ clazz.acts_as_ferret config.merge(:index => name)
228
+ end
229
+ end
230
+
231
+ return idx
232
+ end
233
+
234
+ # called internally by the acts_as_ferret method
235
+ #
236
+ # returns the index
237
+ def self.register_class_with_index(clazz, index_name, options = {})
238
+ index_name = index_name.to_sym
239
+ @@index_using_classes[clazz.name] = index_name
240
+ unless index = ferret_indexes[index_name]
241
+ # index definition on the fly
242
+ # default to all attributes of this class
243
+ options[:fields] ||= clazz.new.attributes.keys.map { |k| k.to_sym }
244
+ index = define_index index_name, options
245
+ end
246
+ index.register_class(clazz, options)
247
+ return index
248
+ end
249
+
250
+ def self.load_config
251
+ # using require_dependency to make the reloading in dev mode work.
252
+ require_dependency "#{RAILS_ROOT}/config/aaf.rb"
253
+ ActsAsFerret::logger.info "loaded configuration file aaf.rb"
254
+ rescue LoadError
255
+ ensure
256
+ @aaf_config_loaded = true
257
+ end
258
+
259
+ # returns the index with the given name.
260
+ def self.get_index(name)
261
+ name = name.to_sym rescue nil
262
+ unless ferret_indexes.has_key?(name)
263
+ if @aaf_config_loaded
264
+ raise IndexNotDefined.new(name.to_s)
265
+ else
266
+ load_config and return get_index name
267
+ end
268
+ end
269
+ ferret_indexes[name]
270
+ end
271
+
272
+ # count hits for a query
273
+ def self.total_hits(query, models_or_index_name, options = {})
274
+ options = add_models_to_options_if_necessary options, models_or_index_name
275
+ find_index(models_or_index_name).total_hits query, options
276
+ end
277
+
278
+ # find ids of records
279
+ def self.find_ids(query, models_or_index_name, options = {}, &block)
280
+ options = add_models_to_options_if_necessary options, models_or_index_name
281
+ find_index(models_or_index_name).find_ids query, options, &block
282
+ end
283
+
284
+ # returns an index instance suitable for searching/updating the named index. Will
285
+ # return a read only MultiIndex when multiple model classes are given that do not
286
+ # share the same physical index.
287
+ def self.find_index(models_or_index_name)
288
+ case models_or_index_name
289
+ when Symbol
290
+ get_index models_or_index_name
291
+ when String
292
+ get_index models_or_index_name.to_sym
293
+ else
294
+ get_index_for models_or_index_name
295
+ end
296
+ end
297
+
298
+ # models_or_index_name may be an index name as declared in config/aaf.rb,
299
+ # a single class or an array of classes to limit search to these classes.
300
+ def self.find(query, models_or_index_name, options = {}, ar_options = {})
301
+ models = case models_or_index_name
302
+ when Array
303
+ models_or_index_name
304
+ when Class
305
+ [ models_or_index_name ]
306
+ else
307
+ nil
308
+ end
309
+ index = find_index(models_or_index_name)
310
+ multi = (MultiIndexBase === index or index.shared?)
311
+ unless options[:per_page]
312
+ options[:limit] ||= ar_options.delete :limit
313
+ options[:offset] ||= ar_options.delete :offset
314
+ end
315
+ if options[:limit] || options[:per_page]
316
+ # need pagination
317
+ options[:page] = if options[:per_page]
318
+ options[:page] ? options[:page].to_i : 1
319
+ else
320
+ nil
321
+ end
322
+ limit = options[:limit] || options[:per_page]
323
+ offset = options[:offset] || (options[:page] ? (options[:page] - 1) * limit : 0)
324
+ options.delete :offset
325
+ options[:limit] = :all
326
+
327
+ if multi or ((ar_options[:conditions] || ar_options[:order]) && options[:sort])
328
+ # do pagination as the last step after everything has been fetched
329
+ options[:late_pagination] = { :limit => limit, :offset => offset }
330
+ elsif ar_options[:conditions] or ar_options[:order]
331
+ # late limiting in AR call
332
+ unless limit == :all
333
+ ar_options[:limit] = limit
334
+ ar_options[:offset] = offset
335
+ end
336
+ else
337
+ options[:limit] = limit
338
+ options[:offset] = offset
339
+ end
340
+ end
341
+ ActsAsFerret::logger.debug "options: #{options.inspect}\nar_options: #{ar_options.inspect}"
342
+ total_hits, result = index.find_records query, options.merge(:models => models), ar_options
343
+ ActsAsFerret::logger.debug "Query: #{query}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
344
+ SearchResults.new(result, total_hits, options[:page], options[:per_page])
345
+ end
346
+
347
+ def self.filter_include_list_for_model(model, include_options)
348
+ filtered_include_options = []
349
+ include_options = Array(include_options)
350
+ include_options.each do |include_option|
351
+ filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
352
+ end
353
+ return filtered_include_options
354
+ end
355
+
356
+ # returns the index used by the given class.
357
+ #
358
+ # If multiple classes are given, either the single index shared by these
359
+ # classes, or a multi index (to be used for search only) across the indexes
360
+ # of all models, is returned.
361
+ def self.get_index_for(*classes)
362
+ classes.flatten!
363
+ raise ArgumentError.new("no class specified") unless classes.any?
364
+ classes.map!(&:constantize) unless Class === classes.first
365
+ logger.debug "index_for #{classes.inspect}"
366
+ index = if classes.size > 1
367
+ indexes = classes.map { |c| get_index_for c }.uniq
368
+ indexes.size > 1 ? multi_index(indexes) : indexes.first
369
+ else
370
+ clazz = classes.first
371
+ clazz = clazz.superclass while clazz && !@@index_using_classes.has_key?(clazz.name)
372
+ get_index @@index_using_classes[clazz.name]
373
+ end
374
+ raise IndexNotDefined.new("no index found for class: #{classes.map(&:name).join(',')}") if index.nil?
375
+ return index
376
+ end
377
+
378
+
379
+ # creates a new Index instance.
380
+ def self.create_index_instance(definition)
381
+ (remote? ? RemoteIndex : LocalIndex).new(definition)
382
+ end
383
+
384
+ def self.rebuild_index(name)
385
+ get_index(name).rebuild_index
386
+ end
387
+
388
+ def self.change_index_dir(name, new_dir)
389
+ get_index(name).change_index_dir new_dir
390
+ end
391
+
392
+ # find the most recent version of an index
393
+ def self.find_last_index_version(basedir)
394
+ # check for versioned index
395
+ versions = Dir.entries(basedir).select do |f|
396
+ dir = File.join(basedir, f)
397
+ File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
398
+ end
399
+ if versions.any?
400
+ # select latest version
401
+ versions.sort!
402
+ File.join basedir, versions.last
403
+ else
404
+ basedir
405
+ end
406
+ end
407
+
408
+ # returns a MultiIndex instance operating on a MultiReader
409
+ def self.multi_index(indexes)
410
+ index_names = indexes.dup
411
+ index_names = index_names.map(&:to_s) if Symbol === index_names.first
412
+ if String === index_names.first
413
+ indexes = index_names.map{ |name| get_index name }
414
+ else
415
+ index_names = index_names.map{ |i| i.index_name.to_s }
416
+ end
417
+ key = index_names.sort.join(",")
418
+ ActsAsFerret::multi_indexes[key] ||= (remote? ? ActsAsFerret::RemoteMultiIndex : ActsAsFerret::MultiIndex).new(indexes)
419
+ end
420
+
421
+ # check for per-model conditions and return these if provided
422
+ def self.conditions_for_model(model, conditions = {})
423
+ if Hash === conditions
424
+ key = model.name.underscore.to_sym
425
+ conditions = conditions[key]
426
+ end
427
+ return conditions
428
+ end
429
+
430
+ # retrieves search result records from a data structure like this:
431
+ # { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
432
+ #
433
+ # TODO: in case of STI AR will filter out hits from other
434
+ # classes for us, but this
435
+ # will lead to less results retrieved --> scoping of ferret query
436
+ # to self.class is still needed.
437
+ # from the ferret ML (thanks Curtis Hatter)
438
+ # > I created a method in my base STI class so I can scope my query. For scoping
439
+ # > I used something like the following line:
440
+ # >
441
+ # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
442
+ # >
443
+ # > Though you could make it more generic by simply asking
444
+ # > "self.descends_from_active_record?" which is how rails decides if it should
445
+ # > scope your "find" query for STI models. You can check out "base.rb" in
446
+ # > activerecord to see that.
447
+ # but maybe better do the scoping in find_ids_with_ferret...
448
+ def self.retrieve_records(id_arrays, find_options = {})
449
+ result = []
450
+ # get objects for each model
451
+ id_arrays.each do |model, id_array|
452
+ next if id_array.empty?
453
+ model_class = model.constantize
454
+
455
+ # merge conditions
456
+ conditions = conditions_for_model model_class, find_options[:conditions]
457
+ conditions = combine_conditions([ "#{model_class.table_name}.#{model_class.primary_key} in (?)",
458
+ id_array.keys ],
459
+ conditions)
460
+
461
+ # check for include association that might only exist on some models in case of multi_search
462
+ filtered_include_options = nil
463
+ if include_options = find_options[:include]
464
+ filtered_include_options = filter_include_list_for_model(model_class, include_options)
465
+ end
466
+
467
+ # fetch
468
+ tmp_result = model_class.find(:all, find_options.merge(:conditions => conditions,
469
+ :include => filtered_include_options))
470
+
471
+ # set scores and rank
472
+ tmp_result.each do |record|
473
+ record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
474
+ end
475
+ # merge with result array
476
+ result += tmp_result
477
+ end
478
+
479
+ # order results as they were found by ferret, unless an AR :order
480
+ # option was given
481
+ result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
482
+ return result
483
+ end
484
+
485
+ # combine our conditions with those given by user, if any
486
+ def self.combine_conditions(conditions, additional_conditions = [])
487
+ returning conditions do
488
+ if additional_conditions && additional_conditions.any?
489
+ cust_opts = (Array === additional_conditions) ? additional_conditions.dup : [ additional_conditions ]
490
+ logger.debug "cust_opts: #{cust_opts.inspect}"
491
+ conditions.first << " and " << cust_opts.shift
492
+ conditions.concat(cust_opts)
493
+ end
494
+ end
495
+ end
496
+
497
+ def self.build_field_config(fields)
498
+ field_config = {}
499
+ case fields
500
+ when Array
501
+ fields.each { |name| field_config[name] = field_config_for name }
502
+ when Hash
503
+ fields.each { |name, options| field_config[name] = field_config_for name, options }
504
+ else raise InvalidArgumentError.new(":fields option must be Hash or Array")
505
+ end if fields
506
+ return field_config
507
+ end
508
+
509
+ def self.ensure_directory(dir)
510
+ FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir))
511
+ end
512
+
513
+
514
+ # make sure the default index base dir exists. by default, all indexes are created
515
+ # under RAILS_ROOT/index/RAILS_ENV
516
+ def self.init_index_basedir
517
+ index_base = "#{RAILS_ROOT}/index"
518
+ @@index_dir = "#{index_base}/#{RAILS_ENV}"
519
+ end
520
+
521
+ mattr_accessor :index_dir
522
+ init_index_basedir
523
+
524
+ def self.append_features(base)
525
+ super
526
+ base.extend(ClassMethods)
527
+ end
528
+
529
+ # builds a FieldInfos instance for creation of an index
530
+ def self.field_infos(index_definition)
531
+ # default attributes for fields
532
+ fi = Ferret::Index::FieldInfos.new(:store => :no,
533
+ :index => :yes,
534
+ :term_vector => :no,
535
+ :boost => 1.0)
536
+ # unique key composed of classname and id
537
+ fi.add_field(:key, :store => :no, :index => :untokenized)
538
+ # primary key
539
+ fi.add_field(:id, :store => :yes, :index => :untokenized)
540
+ # class_name
541
+ fi.add_field(:class_name, :store => :yes, :index => :untokenized)
542
+
543
+ # other fields
544
+ index_definition[:ferret_fields].each_pair do |field, options|
545
+ options = options.dup
546
+ options.delete :via
547
+ options.delete :boost if options[:boost].is_a?(Symbol) # dynamic boost
548
+ fi.add_field(field, options)
549
+ end
550
+ return fi
551
+ end
552
+
553
+ def self.close_multi_indexes
554
+ # close combined index readers, just in case
555
+ # this seems to fix a strange test failure that seems to relate to a
556
+ # multi_index looking at an old version of the content_base index.
557
+ multi_indexes.each_pair do |key, index|
558
+ # puts "#{key} -- #{self.name}"
559
+ # TODO only close those where necessary (watch inheritance, where
560
+ # self.name is base class of a class where key is made from)
561
+ index.close #if key =~ /#{self.name}/
562
+ end
563
+ multi_indexes.clear
564
+ end
565
+
566
+ protected
567
+
568
+ def self.add_models_to_options_if_necessary(options, models_or_index_name)
569
+ return options if String === models_or_index_name or Symbol === models_or_index_name
570
+ options.merge(:models => models_or_index_name)
571
+ end
572
+
573
+ def self.field_config_for(fieldname, options = {})
574
+ config = DEFAULT_FIELD_OPTIONS.merge options
575
+ config[:via] ||= fieldname
576
+ config[:term_vector] = :no if config[:index] == :no
577
+ return config
578
+ end
579
+
580
+ end
581
+
582
+ # include acts_as_ferret method into ActiveRecord::Base
583
+ ActiveRecord::Base.extend ActsAsFerret::ActMethods
584
+