watson-acts_as_ferret 0.4.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (162) hide show
  1. data/LICENSE +20 -0
  2. data/README +104 -0
  3. data/acts_as_ferret.gemspec +58 -0
  4. data/bin/aaf_install +29 -0
  5. data/config/ferret_server.yml +24 -0
  6. data/doc/README.win32 +23 -0
  7. data/doc/demo/README +154 -0
  8. data/doc/demo/README_DEMO +23 -0
  9. data/doc/demo/Rakefile +10 -0
  10. data/doc/demo/app/controllers/admin/backend_controller.rb +14 -0
  11. data/doc/demo/app/controllers/admin_area_controller.rb +4 -0
  12. data/doc/demo/app/controllers/application.rb +5 -0
  13. data/doc/demo/app/controllers/contents_controller.rb +49 -0
  14. data/doc/demo/app/controllers/searches_controller.rb +8 -0
  15. data/doc/demo/app/helpers/admin/backend_helper.rb +2 -0
  16. data/doc/demo/app/helpers/application_helper.rb +3 -0
  17. data/doc/demo/app/helpers/content_helper.rb +2 -0
  18. data/doc/demo/app/helpers/search_helper.rb +2 -0
  19. data/doc/demo/app/models/comment.rb +48 -0
  20. data/doc/demo/app/models/content.rb +12 -0
  21. data/doc/demo/app/models/content_base.rb +28 -0
  22. data/doc/demo/app/models/search.rb +19 -0
  23. data/doc/demo/app/models/shared_index1.rb +3 -0
  24. data/doc/demo/app/models/shared_index2.rb +3 -0
  25. data/doc/demo/app/models/special_content.rb +3 -0
  26. data/doc/demo/app/models/stats.rb +20 -0
  27. data/doc/demo/app/views/admin/backend/search.rhtml +18 -0
  28. data/doc/demo/app/views/contents/_form.rhtml +10 -0
  29. data/doc/demo/app/views/contents/edit.rhtml +9 -0
  30. data/doc/demo/app/views/contents/index.rhtml +24 -0
  31. data/doc/demo/app/views/contents/new.rhtml +8 -0
  32. data/doc/demo/app/views/contents/show.rhtml +8 -0
  33. data/doc/demo/app/views/layouts/application.html.erb +17 -0
  34. data/doc/demo/app/views/searches/_content.html.erb +2 -0
  35. data/doc/demo/app/views/searches/search.html.erb +20 -0
  36. data/doc/demo/config/boot.rb +109 -0
  37. data/doc/demo/config/database.yml +38 -0
  38. data/doc/demo/config/environment.rb +69 -0
  39. data/doc/demo/config/environments/development.rb +16 -0
  40. data/doc/demo/config/environments/production.rb +19 -0
  41. data/doc/demo/config/environments/test.rb +21 -0
  42. data/doc/demo/config/ferret_server.yml +18 -0
  43. data/doc/demo/config/lighttpd.conf +40 -0
  44. data/doc/demo/config/routes.rb +9 -0
  45. data/doc/demo/db/development_structure.sql +15 -0
  46. data/doc/demo/db/migrate/001_initial_migration.rb +18 -0
  47. data/doc/demo/db/migrate/002_add_type_to_contents.rb +9 -0
  48. data/doc/demo/db/migrate/003_create_shared_index1s.rb +11 -0
  49. data/doc/demo/db/migrate/004_create_shared_index2s.rb +11 -0
  50. data/doc/demo/db/migrate/005_special_field.rb +9 -0
  51. data/doc/demo/db/migrate/006_create_stats.rb +15 -0
  52. data/doc/demo/db/schema.sql +18 -0
  53. data/doc/demo/db/schema.sqlite +14 -0
  54. data/doc/demo/doc/README_FOR_APP +2 -0
  55. data/doc/demo/doc/howto.txt +70 -0
  56. data/doc/demo/public/404.html +8 -0
  57. data/doc/demo/public/500.html +8 -0
  58. data/doc/demo/public/dispatch.cgi +10 -0
  59. data/doc/demo/public/dispatch.fcgi +24 -0
  60. data/doc/demo/public/dispatch.rb +10 -0
  61. data/doc/demo/public/favicon.ico +0 -0
  62. data/doc/demo/public/images/rails.png +0 -0
  63. data/doc/demo/public/index.html +277 -0
  64. data/doc/demo/public/robots.txt +1 -0
  65. data/doc/demo/public/stylesheets/scaffold.css +74 -0
  66. data/doc/demo/script/about +3 -0
  67. data/doc/demo/script/breakpointer +3 -0
  68. data/doc/demo/script/console +3 -0
  69. data/doc/demo/script/destroy +3 -0
  70. data/doc/demo/script/ferret_server +10 -0
  71. data/doc/demo/script/generate +3 -0
  72. data/doc/demo/script/performance/benchmarker +3 -0
  73. data/doc/demo/script/performance/profiler +3 -0
  74. data/doc/demo/script/plugin +3 -0
  75. data/doc/demo/script/process/inspector +3 -0
  76. data/doc/demo/script/process/reaper +3 -0
  77. data/doc/demo/script/process/spawner +3 -0
  78. data/doc/demo/script/process/spinner +3 -0
  79. data/doc/demo/script/runner +3 -0
  80. data/doc/demo/script/server +3 -0
  81. data/doc/demo/test/fixtures/comments.yml +12 -0
  82. data/doc/demo/test/fixtures/contents.yml +13 -0
  83. data/doc/demo/test/fixtures/remote_contents.yml +9 -0
  84. data/doc/demo/test/fixtures/shared_index1s.yml +7 -0
  85. data/doc/demo/test/fixtures/shared_index2s.yml +7 -0
  86. data/doc/demo/test/functional/admin/backend_controller_test.rb +35 -0
  87. data/doc/demo/test/functional/contents_controller_test.rb +81 -0
  88. data/doc/demo/test/functional/searches_controller_test.rb +71 -0
  89. data/doc/demo/test/smoke/drb_smoke_test.rb +321 -0
  90. data/doc/demo/test/smoke/process_stats.rb +21 -0
  91. data/doc/demo/test/test_helper.rb +30 -0
  92. data/doc/demo/test/unit/comment_test.rb +217 -0
  93. data/doc/demo/test/unit/content_test.rb +705 -0
  94. data/doc/demo/test/unit/ferret_result_test.rb +24 -0
  95. data/doc/demo/test/unit/multi_index_test.rb +329 -0
  96. data/doc/demo/test/unit/remote_index_test.rb +23 -0
  97. data/doc/demo/test/unit/shared_index1_test.rb +108 -0
  98. data/doc/demo/test/unit/shared_index2_test.rb +13 -0
  99. data/doc/demo/test/unit/sort_test.rb +21 -0
  100. data/doc/demo/test/unit/special_content_test.rb +25 -0
  101. data/doc/demo/vendor/plugins/will_paginate/LICENSE +18 -0
  102. data/doc/demo/vendor/plugins/will_paginate/README +108 -0
  103. data/doc/demo/vendor/plugins/will_paginate/Rakefile +23 -0
  104. data/doc/demo/vendor/plugins/will_paginate/init.rb +21 -0
  105. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/collection.rb +45 -0
  106. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/core_ext.rb +44 -0
  107. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/finder.rb +159 -0
  108. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/view_helpers.rb +95 -0
  109. data/doc/demo/vendor/plugins/will_paginate/test/array_pagination_test.rb +23 -0
  110. data/doc/demo/vendor/plugins/will_paginate/test/boot.rb +27 -0
  111. data/doc/demo/vendor/plugins/will_paginate/test/console +10 -0
  112. data/doc/demo/vendor/plugins/will_paginate/test/finder_test.rb +219 -0
  113. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/admin.rb +3 -0
  114. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/companies.yml +24 -0
  115. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/company.rb +23 -0
  116. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developer.rb +11 -0
  117. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developers_projects.yml +13 -0
  118. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/project.rb +4 -0
  119. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/projects.yml +7 -0
  120. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/replies.yml +20 -0
  121. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/reply.rb +5 -0
  122. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/schema.sql +44 -0
  123. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topic.rb +19 -0
  124. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topics.yml +30 -0
  125. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/user.rb +2 -0
  126. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/users.yml +35 -0
  127. data/doc/demo/vendor/plugins/will_paginate/test/helper.rb +42 -0
  128. data/doc/demo/vendor/plugins/will_paginate/test/lib/activerecord_test_connector.rb +64 -0
  129. data/doc/demo/vendor/plugins/will_paginate/test/lib/load_fixtures.rb +10 -0
  130. data/doc/demo/vendor/plugins/will_paginate/test/pagination_test.rb +136 -0
  131. data/doc/monit-example +22 -0
  132. data/init.rb +24 -0
  133. data/install.rb +18 -0
  134. data/lib/act_methods.rb +147 -0
  135. data/lib/acts_as_ferret.rb +593 -0
  136. data/lib/ar_mysql_auto_reconnect_patch.rb +41 -0
  137. data/lib/blank_slate.rb +54 -0
  138. data/lib/bulk_indexer.rb +56 -0
  139. data/lib/class_methods.rb +279 -0
  140. data/lib/ferret_extensions.rb +192 -0
  141. data/lib/ferret_find_methods.rb +142 -0
  142. data/lib/ferret_result.rb +58 -0
  143. data/lib/ferret_server.rb +238 -0
  144. data/lib/index.rb +99 -0
  145. data/lib/instance_methods.rb +172 -0
  146. data/lib/local_index.rb +202 -0
  147. data/lib/more_like_this.rb +217 -0
  148. data/lib/multi_index.rb +133 -0
  149. data/lib/rdig_adapter.rb +149 -0
  150. data/lib/remote_functions.rb +43 -0
  151. data/lib/remote_index.rb +54 -0
  152. data/lib/remote_multi_index.rb +20 -0
  153. data/lib/search_results.rb +50 -0
  154. data/lib/server_manager.rb +71 -0
  155. data/lib/unix_daemon.rb +86 -0
  156. data/lib/without_ar.rb +52 -0
  157. data/recipes/aaf_recipes.rb +116 -0
  158. data/script/ferret_daemon +94 -0
  159. data/script/ferret_server +12 -0
  160. data/script/ferret_service +178 -0
  161. data/tasks/ferret.rake +39 -0
  162. metadata +246 -0
data/init.rb ADDED
@@ -0,0 +1,24 @@
1
+ # Copyright (c) 2006 Kasper Weibel Nielsen-Refs, Thomas Lockney, Jens Krämer
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ # SOFTWARE.
20
+
21
+ require 'acts_as_ferret'
22
+
23
+ config.after_initialize { ActsAsFerret::load_config }
24
+ config.to_prepare { ActsAsFerret::load_config }
@@ -0,0 +1,18 @@
1
+ # acts_as_ferret install script
2
+ require 'fileutils'
3
+
4
+ def install(file)
5
+ puts "Installing: #{file}"
6
+ target = File.join(File.dirname(__FILE__), '..', '..', '..', file)
7
+ if File.exists?(target)
8
+ puts "target #{target} already exists, skipping"
9
+ else
10
+ FileUtils.cp File.join(File.dirname(__FILE__), file), target
11
+ end
12
+ end
13
+
14
+ install File.join( 'script', 'ferret_server' )
15
+ install File.join( 'config', 'ferret_server.yml' )
16
+
17
+ puts IO.read(File.join(File.dirname(__FILE__), 'README'))
18
+
@@ -0,0 +1,147 @@
1
+ module ActsAsFerret #:nodoc:
2
+
3
+ # This module defines the acts_as_ferret method and is included into
4
+ # ActiveRecord::Base
5
+ module ActMethods
6
+
7
+
8
+ def reloadable?; false end
9
+
10
+ # declares a class as ferret-searchable.
11
+ #
12
+ # ====options:
13
+ # fields:: names all fields to include in the index. If not given,
14
+ # all attributes of the class will be indexed. You may also give
15
+ # symbols pointing to instance methods of your model here, i.e.
16
+ # to retrieve and index data from a related model.
17
+ #
18
+ # additional_fields:: names fields to include in the index, in addition
19
+ # to those derived from the db scheme. use if you want
20
+ # to add custom fields derived from methods to the db
21
+ # fields (which will be picked by aaf). This option will
22
+ # be ignored when the fields option is given, in that
23
+ # case additional fields get specified there.
24
+ #
25
+ # if:: Can be set to a block that will be called with the record in question
26
+ # to determine if it should be indexed or not.
27
+ #
28
+ # index_dir:: declares the directory where to put the index for this class.
29
+ # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
30
+ # The index directory will be created if it doesn't exist.
31
+ #
32
+ # reindex_batch_size:: reindexing is done in batches of this size, default is 1000
33
+ # mysql_fast_batches:: set this to false to disable the faster mysql batching
34
+ # algorithm if this model uses a non-integer primary key named
35
+ # 'id' on MySQL.
36
+ #
37
+ # ferret:: Hash of Options that directly influence the way the Ferret engine works. You
38
+ # can use most of the options the Ferret::I class accepts here, too. Among the
39
+ # more useful are:
40
+ #
41
+ # or_default:: whether query terms are required by
42
+ # default (the default, false), or not (true)
43
+ #
44
+ # analyzer:: the analyzer to use for query parsing (default: nil,
45
+ # which means the ferret StandardAnalyzer gets used)
46
+ #
47
+ # default_field:: use to set one or more fields that are searched for query terms
48
+ # that don't have an explicit field list. This list should *not*
49
+ # contain any untokenized fields. If it does, you're asking
50
+ # for trouble (i.e. not getting results for queries having
51
+ # stop words in them). Aaf by default initializes the default field
52
+ # list to contain all tokenized fields. If you use :single_index => true,
53
+ # you really should set this option specifying your default field
54
+ # list (which should be equal in all your classes sharing the index).
55
+ # Otherwise you might get incorrect search results and you won't get
56
+ # any lazy loading of stored field data.
57
+ #
58
+ # For downwards compatibility reasons you can also specify the Ferret options in the
59
+ # last Hash argument.
60
+ def acts_as_ferret(options={})
61
+
62
+ extend ClassMethods
63
+
64
+ include InstanceMethods
65
+ include MoreLikeThis::InstanceMethods
66
+
67
+ if options[:rdig]
68
+ cattr_accessor :rdig_configuration
69
+ self.rdig_configuration = options[:rdig]
70
+ require 'rdig_adapter'
71
+ include ActsAsFerret::RdigAdapter
72
+ end
73
+
74
+ unless included_modules.include?(ActsAsFerret::WithoutAR)
75
+ # set up AR hooks
76
+ after_create :ferret_create
77
+ after_update :ferret_update
78
+ after_destroy :ferret_destroy
79
+ end
80
+
81
+ cattr_accessor :aaf_configuration
82
+
83
+ # apply default config for rdig based models
84
+ if options[:rdig]
85
+ options[:fields] ||= { :title => { :boost => 3, :store => :yes },
86
+ :content => { :store => :yes } }
87
+ end
88
+
89
+ # name of this index
90
+ index_name = options.delete(:index) || self.name.underscore
91
+
92
+ index = ActsAsFerret::register_class_with_index(self, index_name, options)
93
+ self.aaf_configuration = index.index_definition.dup
94
+ # logger.debug "configured index for class #{self.name}:\n#{aaf_configuration.inspect}"
95
+
96
+ # update our copy of the global index config with options local to this class
97
+ aaf_configuration[:class_name] ||= self.name
98
+ aaf_configuration[:if] ||= options[:if]
99
+
100
+ # add methods for retrieving field values
101
+ add_fields options[:fields]
102
+ add_fields options[:additional_fields]
103
+ add_fields aaf_configuration[:fields]
104
+ add_fields aaf_configuration[:additional_fields]
105
+
106
+ end
107
+
108
+
109
+ protected
110
+
111
+
112
+ # helper to defines a method which adds the given field to a ferret
113
+ # document instance
114
+ def define_to_field_method(field, options = {})
115
+ method_name = "#{field}_to_ferret"
116
+ return if instance_methods.include?(method_name) # already defined
117
+ aaf_configuration[:defined_fields] ||= {}
118
+ aaf_configuration[:defined_fields][field] = options
119
+ dynamic_boost = options[:boost] if options[:boost].is_a?(Symbol)
120
+ via = options[:via] || field
121
+ define_method(method_name.to_sym) do
122
+ val = begin
123
+ content_for_field_name(field, via, dynamic_boost)
124
+ rescue
125
+ logger.warn("Error retrieving value for field #{field}: #{$!}")
126
+ ''
127
+ end
128
+ logger.debug("Adding field #{field} with value '#{val}' to index")
129
+ val
130
+ end
131
+ end
132
+
133
+ def add_fields(field_config)
134
+ if field_config.is_a? Hash
135
+ field_config.each_pair do |field, options|
136
+ define_to_field_method field, options
137
+ end
138
+ elsif field_config.respond_to?(:each)
139
+ field_config.each do |field|
140
+ define_to_field_method field
141
+ end
142
+ end
143
+ end
144
+
145
+ end
146
+
147
+ end
@@ -0,0 +1,593 @@
1
+ # Copyright (c) 2006 Kasper Weibel Nielsen-Refs, Thomas Lockney, Jens Krämer
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ # SOFTWARE.
20
+
21
+ require 'active_support'
22
+ require 'active_record'
23
+ require 'set'
24
+ require 'enumerator'
25
+ require 'ferret'
26
+
27
+ require 'ferret_find_methods'
28
+ require 'remote_functions'
29
+ require 'blank_slate'
30
+ require 'bulk_indexer'
31
+ require 'ferret_extensions'
32
+ require 'act_methods'
33
+ require 'search_results'
34
+ require 'class_methods'
35
+ require 'ferret_result'
36
+ require 'instance_methods'
37
+ require 'without_ar'
38
+
39
+ require 'multi_index'
40
+ require 'remote_multi_index'
41
+ require 'more_like_this'
42
+
43
+ require 'index'
44
+ require 'local_index'
45
+ require 'remote_index'
46
+
47
+ require 'ferret_server'
48
+
49
+ require 'rdig_adapter'
50
+
51
+ # The Rails ActiveRecord Ferret Mixin.
52
+ #
53
+ # This mixin adds full text search capabilities to any Rails model.
54
+ #
55
+ # The current version emerged from on the original acts_as_ferret plugin done by
56
+ # Kasper Weibel and a modified version done by Thomas Lockney, which both can be
57
+ # found on the Ferret Wiki: http://ferret.davebalmain.com/trac/wiki/FerretOnRails.
58
+ #
59
+ # basic usage:
60
+ # include the following in your model class (specifiying the fields you want to get indexed):
61
+ # acts_as_ferret :fields => [ :title, :description ]
62
+ #
63
+ # now you can use ModelClass.find_with_ferret(query) to find instances of your model
64
+ # whose indexed fields match a given query. All query terms are required by default, but
65
+ # explicit OR queries are possible. This differs from the ferret default, but imho is the more
66
+ # often needed/expected behaviour (more query terms result in less results).
67
+ #
68
+ # Released under the MIT license.
69
+ #
70
+ # Authors:
71
+ # Kasper Weibel Nielsen-Refs (original author)
72
+ # Jens Kraemer <jk@jkraemer.net> (active maintainer since 2006)
73
+ #
74
+ #
75
+ # == Global properties
76
+ #
77
+ # raise_drb_errors:: Set this to true if you want aaf to raise Exceptions
78
+ # in case the DRb server cannot be reached (in other word - behave like
79
+ # versions up to 0.4.3). Defaults to false so DRb exceptions
80
+ # are logged but not raised. Be sure to set up some
81
+ # monitoring so you still detect when your DRb server died for
82
+ # whatever reason.
83
+ #
84
+ # remote:: Set this to false to force acts_as_ferret into local (non-DRb) mode even if
85
+ # config/ferret_server.yml contains a section for the current RAILS_ENV
86
+ # Usually you won't need to touch this option - just configure DRb for
87
+ # production mode in ferret_server.yml.
88
+ #
89
+ module ActsAsFerret
90
+
91
+ class ActsAsFerretError < StandardError; end
92
+ class IndexNotDefined < ActsAsFerretError; end
93
+ class IndexAlreadyDefined < ActsAsFerretError; end
94
+
95
+ # global Hash containing all multi indexes created by all classes using the plugin
96
+ # key is the concatenation of alphabetically sorted names of the classes the
97
+ # searcher searches.
98
+ @@multi_indexes = Hash.new
99
+ def self.multi_indexes; @@multi_indexes end
100
+
101
+ # global Hash containing the ferret indexes of all classes using the plugin
102
+ # key is the index name.
103
+ @@ferret_indexes = Hash.new
104
+ def self.ferret_indexes; @@ferret_indexes end
105
+
106
+ # mapping from class name to index name
107
+ @@index_using_classes = {}
108
+ def self.index_using_classes; @@index_using_classes end
109
+
110
+ @@logger = Logger.new "#{RAILS_ROOT}/log/acts_as_ferret.log"
111
+ @@logger.level = ActiveRecord::Base.logger.level rescue Logger::DEBUG
112
+ mattr_accessor :logger
113
+
114
+
115
+ # Default ferret configuration for index fields
116
+ DEFAULT_FIELD_OPTIONS = {
117
+ :store => :no,
118
+ :highlight => :yes,
119
+ :index => :yes,
120
+ :term_vector => :with_positions_offsets,
121
+ :boost => 1.0
122
+ }
123
+
124
+ @@raise_drb_errors = false
125
+ mattr_writer :raise_drb_errors
126
+ def self.raise_drb_errors?; @@raise_drb_errors end
127
+
128
+ @@remote = nil
129
+ mattr_accessor :remote
130
+ def self.remote?
131
+ if @@remote.nil?
132
+ if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running
133
+ @@remote = false
134
+ else
135
+ @@remote = ActsAsFerret::Remote::Config.new.uri rescue false
136
+ end
137
+ if @@remote
138
+ logger.info "Will use remote index server which should be available at #{@@remote}"
139
+ else
140
+ logger.info "Will use local index."
141
+ end
142
+ end
143
+ @@remote
144
+ end
145
+ remote?
146
+
147
+
148
+ # Declares an index.
149
+ #
150
+ # Use this method to define your indexes in a global initializer (i.e. config/initializers/aaf.rb).
151
+ # This is especially useful if you want to have multiple classes share the same index for cross-model
152
+ # searching as you only need a single call to declare the index for all models.
153
+ #
154
+ # This method is also used internally to declare an index when you use the
155
+ # acts_as_ferret call inside your class (which in turn can be omitted if the initializer is used).
156
+ # Returns the created index instance.
157
+ #
158
+ # === Options are:
159
+ #
160
+ # +models+:: Hash of model classes and their per-class option hashes which should
161
+ # use this index. Any models mentioned here will automatically use
162
+ # the index, there is no need to explicitly call +acts_as_ferret+ in the
163
+ # model class definition.
164
+ def self.define_index(name, options = {})
165
+ name = name.to_sym
166
+ pending_classes = nil
167
+ if ferret_indexes.has_key?(name)
168
+ # seems models have been already loaded. remove that index for now,
169
+ # re-register any already loaded classes later on.
170
+ idx = get_index(name)
171
+ pending_classes = idx.index_definition[:registered_models]
172
+ pending_classes_configs = idx.registered_models_config
173
+ idx.close
174
+ ferret_indexes.delete(name)
175
+ end
176
+
177
+ index_definition = {
178
+ :index_dir => "#{ActsAsFerret::index_dir}/#{name}",
179
+ :name => name,
180
+ :single_index => false,
181
+ :reindex_batch_size => 1000,
182
+ :ferret => {},
183
+ :ferret_fields => {}, # list of indexed fields that will be filled later
184
+ :enabled => true, # used for class-wide disabling of Ferret
185
+ :mysql_fast_batches => true, # turn off to disable the faster, id based batching mechanism for MySQL
186
+ :raise_drb_errors => false # handle DRb connection errors by default
187
+ }.update( options )
188
+
189
+ index_definition[:registered_models] = []
190
+
191
+ # build ferret configuration
192
+ index_definition[:ferret] = {
193
+ :or_default => false,
194
+ :handle_parse_errors => true,
195
+ :default_field => nil, # will be set later on
196
+ #:max_clauses => 512,
197
+ #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
198
+ # :wild_card_downcase => true
199
+ }.update( options[:ferret] || {} )
200
+
201
+ index_definition[:user_default_field] = index_definition[:ferret][:default_field]
202
+
203
+ unless remote?
204
+ ActsAsFerret::ensure_directory index_definition[:index_dir]
205
+ index_definition[:index_base_dir] = index_definition[:index_dir]
206
+ index_definition[:index_dir] = find_last_index_version(index_definition[:index_dir])
207
+ logger.debug "using index in #{index_definition[:index_dir]}"
208
+ end
209
+
210
+ # these properties are somewhat vital to the plugin and shouldn't
211
+ # be overwritten by the user:
212
+ index_definition[:ferret].update(
213
+ :key => :key,
214
+ :path => index_definition[:index_dir],
215
+ :auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
216
+ :create_if_missing => true
217
+ )
218
+
219
+ # field config
220
+ index_definition[:ferret_fields] = build_field_config( options[:fields] )
221
+ index_definition[:ferret_fields].update build_field_config( options[:additional_fields] )
222
+
223
+ idx = ferret_indexes[name] = create_index_instance( index_definition )
224
+
225
+ # re-register early loaded classes
226
+ if pending_classes
227
+ pending_classes.each { |clazz| idx.register_class clazz, { :force_re_registration => true }.merge(pending_classes_configs[clazz]) }
228
+ end
229
+
230
+ if models = options[:models]
231
+ models.each do |clazz, config|
232
+ clazz.send :include, ActsAsFerret::WithoutAR unless clazz.respond_to?(:acts_as_ferret)
233
+ clazz.acts_as_ferret config.merge(:index => name)
234
+ end
235
+ end
236
+
237
+ return idx
238
+ end
239
+
240
+ # called internally by the acts_as_ferret method
241
+ #
242
+ # returns the index
243
+ def self.register_class_with_index(clazz, index_name, options = {})
244
+ index_name = index_name.to_sym
245
+ @@index_using_classes[clazz.name] = index_name
246
+ unless index = ferret_indexes[index_name]
247
+ # index definition on the fly
248
+ # default to all attributes of this class
249
+ options[:fields] ||= clazz.new.attributes.keys.map { |k| k.to_sym }
250
+ index = define_index index_name, options
251
+ end
252
+ index.register_class(clazz, options)
253
+ return index
254
+ end
255
+
256
+ def self.load_config
257
+ # using require_dependency to make the reloading in dev mode work.
258
+ require_dependency "#{RAILS_ROOT}/config/aaf.rb"
259
+ ActsAsFerret::logger.info "loaded configuration file aaf.rb"
260
+ rescue LoadError
261
+ ensure
262
+ @aaf_config_loaded = true
263
+ end
264
+
265
+ # returns the index with the given name.
266
+ def self.get_index(name)
267
+ name = name.to_sym rescue nil
268
+ unless ferret_indexes.has_key?(name)
269
+ if @aaf_config_loaded
270
+ raise IndexNotDefined.new(name.to_s)
271
+ else
272
+ load_config and return get_index name
273
+ end
274
+ end
275
+ ferret_indexes[name]
276
+ end
277
+
278
+ # count hits for a query
279
+ def self.total_hits(query, models_or_index_name, options = {})
280
+ options = add_models_to_options_if_necessary options, models_or_index_name
281
+ find_index(models_or_index_name).total_hits query, options
282
+ end
283
+
284
+ # find ids of records
285
+ def self.find_ids(query, models_or_index_name, options = {}, &block)
286
+ options = add_models_to_options_if_necessary options, models_or_index_name
287
+ find_index(models_or_index_name).find_ids query, options, &block
288
+ end
289
+
290
+ # returns an index instance suitable for searching/updating the named index. Will
291
+ # return a read only MultiIndex when multiple model classes are given that do not
292
+ # share the same physical index.
293
+ def self.find_index(models_or_index_name)
294
+ case models_or_index_name
295
+ when Symbol
296
+ get_index models_or_index_name
297
+ when String
298
+ get_index models_or_index_name.to_sym
299
+ else
300
+ get_index_for models_or_index_name
301
+ end
302
+ end
303
+
304
+ # models_or_index_name may be an index name as declared in config/aaf.rb,
305
+ # a single class or an array of classes to limit search to these classes.
306
+ def self.find(query, models_or_index_name, options = {}, ar_options = {})
307
+ models = case models_or_index_name
308
+ when Array
309
+ models_or_index_name
310
+ when Class
311
+ [ models_or_index_name ]
312
+ else
313
+ nil
314
+ end
315
+ index = find_index(models_or_index_name)
316
+ multi = (MultiIndexBase === index or index.shared?)
317
+ unless options[:per_page]
318
+ options[:limit] ||= ar_options.delete :limit
319
+ options[:offset] ||= ar_options.delete :offset
320
+ end
321
+ if options[:limit] || options[:per_page]
322
+ # need pagination
323
+ options[:page] = if options[:per_page]
324
+ options[:page] ? options[:page].to_i : 1
325
+ else
326
+ nil
327
+ end
328
+ limit = options[:limit] || options[:per_page]
329
+ offset = options[:offset] || (options[:page] ? (options[:page] - 1) * limit : 0)
330
+ options.delete :offset
331
+ options[:limit] = :all
332
+
333
+ if multi or ((ar_options[:conditions] || ar_options[:order]) && options[:sort])
334
+ # do pagination as the last step after everything has been fetched
335
+ options[:late_pagination] = { :limit => limit, :offset => offset }
336
+ elsif ar_options[:conditions] or ar_options[:order]
337
+ # late limiting in AR call
338
+ unless limit == :all
339
+ ar_options[:limit] = limit
340
+ ar_options[:offset] = offset
341
+ end
342
+ else
343
+ options[:limit] = limit
344
+ options[:offset] = offset
345
+ end
346
+ end
347
+ ActsAsFerret::logger.debug "options: #{options.inspect}\nar_options: #{ar_options.inspect}"
348
+ total_hits, result = index.find_records query, options.merge(:models => models), ar_options
349
+ ActsAsFerret::logger.debug "Query: #{query}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
350
+ SearchResults.new(result, total_hits, options[:page], options[:per_page])
351
+ end
352
+
353
+ def self.filter_include_list_for_model(model, include_options)
354
+ filtered_include_options = []
355
+ include_options = Array(include_options)
356
+ include_options.each do |include_option|
357
+ filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
358
+ end
359
+ return filtered_include_options
360
+ end
361
+
362
+ # returns the index used by the given class.
363
+ #
364
+ # If multiple classes are given, either the single index shared by these
365
+ # classes, or a multi index (to be used for search only) across the indexes
366
+ # of all models, is returned.
367
+ def self.get_index_for(*classes)
368
+ classes.flatten!
369
+ raise ArgumentError.new("no class specified") unless classes.any?
370
+ classes.map!(&:constantize) unless Class === classes.first
371
+ logger.debug "index_for #{classes.inspect}"
372
+ index = if classes.size > 1
373
+ indexes = classes.map { |c| get_index_for c }.uniq
374
+ indexes.size > 1 ? multi_index(indexes) : indexes.first
375
+ else
376
+ clazz = classes.first
377
+ clazz = clazz.superclass while clazz && !@@index_using_classes.has_key?(clazz.name)
378
+ get_index @@index_using_classes[clazz.name]
379
+ end
380
+ raise IndexNotDefined.new("no index found for class: #{classes.map(&:name).join(',')}") if index.nil?
381
+ return index
382
+ end
383
+
384
+
385
+ # creates a new Index instance.
386
+ def self.create_index_instance(definition)
387
+ (remote? ? RemoteIndex : LocalIndex).new(definition)
388
+ end
389
+
390
+ def self.rebuild_index(name)
391
+ get_index(name).rebuild_index
392
+ end
393
+
394
+ def self.change_index_dir(name, new_dir)
395
+ get_index(name).change_index_dir new_dir
396
+ end
397
+
398
+ # find the most recent version of an index
399
+ def self.find_last_index_version(basedir)
400
+ # check for versioned index
401
+ versions = Dir.entries(basedir).select do |f|
402
+ dir = File.join(basedir, f)
403
+ File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
404
+ end
405
+ if versions.any?
406
+ # select latest version
407
+ versions.sort!
408
+ File.join basedir, versions.last
409
+ else
410
+ basedir
411
+ end
412
+ end
413
+
414
+ # returns a MultiIndex instance operating on a MultiReader
415
+ def self.multi_index(indexes)
416
+ index_names = indexes.dup
417
+ index_names = index_names.map(&:to_s) if Symbol === index_names.first
418
+ if String === index_names.first
419
+ indexes = index_names.map{ |name| get_index name }
420
+ else
421
+ index_names = index_names.map{ |i| i.index_name.to_s }
422
+ end
423
+ key = index_names.sort.join(",")
424
+ ActsAsFerret::multi_indexes[key] ||= (remote? ? ActsAsFerret::RemoteMultiIndex : ActsAsFerret::MultiIndex).new(indexes)
425
+ end
426
+
427
+ # check for per-model conditions and return these if provided
428
+ def self.conditions_for_model(model, conditions = {})
429
+ if Hash === conditions
430
+ key = model.name.underscore.to_sym
431
+ conditions = conditions[key]
432
+ end
433
+ return conditions
434
+ end
435
+
436
+ # retrieves search result records from a data structure like this:
437
+ # { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
438
+ #
439
+ # TODO: in case of STI AR will filter out hits from other
440
+ # classes for us, but this
441
+ # will lead to less results retrieved --> scoping of ferret query
442
+ # to self.class is still needed.
443
+ # from the ferret ML (thanks Curtis Hatter)
444
+ # > I created a method in my base STI class so I can scope my query. For scoping
445
+ # > I used something like the following line:
446
+ # >
447
+ # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
448
+ # >
449
+ # > Though you could make it more generic by simply asking
450
+ # > "self.descends_from_active_record?" which is how rails decides if it should
451
+ # > scope your "find" query for STI models. You can check out "base.rb" in
452
+ # > activerecord to see that.
453
+ # but maybe better do the scoping in find_ids_with_ferret...
454
+ def self.retrieve_records(id_arrays, find_options = {})
455
+ result = []
456
+ # get objects for each model
457
+ id_arrays.each do |model, id_array|
458
+ next if id_array.empty?
459
+ # logger.debug "id array from index: #{id_array.inspect}"
460
+
461
+ model_class = model.constantize
462
+
463
+ # merge conditions
464
+ conditions = conditions_for_model model_class, find_options[:conditions]
465
+ conditions = combine_conditions([ "#{model_class.table_name}.#{model_class.primary_key} in (?)",
466
+ id_array.keys ],
467
+ conditions)
468
+
469
+ # check for include association that might only exist on some models in case of multi_search
470
+ filtered_include_options = nil
471
+ if include_options = find_options[:include]
472
+ filtered_include_options = filter_include_list_for_model(model_class, include_options)
473
+ end
474
+
475
+ # fetch
476
+ tmp_result = model_class.find(:all, find_options.merge(:conditions => conditions,
477
+ :include => filtered_include_options))
478
+
479
+ # set scores and rank
480
+ tmp_result.each do |record|
481
+ record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
482
+ end
483
+ # merge with result array
484
+ result += tmp_result
485
+ end
486
+
487
+ # order results as they were found by ferret, unless an AR :order
488
+ # option was given
489
+ # logger.debug "unsorted result: #{result.map{|a| "#{a.id} / #{a.title} / #{a.ferret_rank}"}.inspect}"
490
+ result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
491
+ # logger.debug "sorted result: #{result.map{|a| "#{a.id} / #{a.ferret_rank}"}.inspect}"
492
+ return result
493
+ end
494
+
495
+ # combine our conditions with those given by user, if any
496
+ def self.combine_conditions(conditions, additional_conditions = [])
497
+ if additional_conditions && additional_conditions.any?
498
+ cust_opts = (Array === additional_conditions) ? additional_conditions.dup : [ additional_conditions ]
499
+ logger.debug "cust_opts: #{cust_opts.inspect}"
500
+ conditions.first << " and " << cust_opts.shift
501
+ conditions.concat(cust_opts)
502
+ end
503
+ return conditions
504
+ end
505
+
506
+ def self.build_field_config(fields)
507
+ field_config = {}
508
+ case fields
509
+ when Array
510
+ fields.each { |name| field_config[name] = field_config_for name }
511
+ when Hash
512
+ fields.each { |name, options| field_config[name] = field_config_for name, options }
513
+ else raise InvalidArgumentError.new(":fields option must be Hash or Array")
514
+ end if fields
515
+ return field_config
516
+ end
517
+
518
+ def self.ensure_directory(dir)
519
+ FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir))
520
+ end
521
+
522
+
523
+ # make sure the default index base dir exists. by default, all indexes are created
524
+ # under RAILS_ROOT/index/RAILS_ENV
525
+ def self.init_index_basedir
526
+ index_base = "#{RAILS_ROOT}/index"
527
+ @@index_dir = "#{index_base}/#{RAILS_ENV}"
528
+ end
529
+
530
+ mattr_accessor :index_dir
531
+ init_index_basedir
532
+
533
+ def self.append_features(base)
534
+ super
535
+ base.extend(ClassMethods)
536
+ end
537
+
538
+ # builds a FieldInfos instance for creation of an index
539
+ def self.field_infos(index_definition)
540
+ # default attributes for fields
541
+ fi = Ferret::Index::FieldInfos.new(:store => :no,
542
+ :index => :yes,
543
+ :term_vector => :no,
544
+ :boost => 1.0)
545
+ # unique key composed of classname and id
546
+ fi.add_field(:key, :store => :no, :index => :untokenized)
547
+ # primary key
548
+ fi.add_field(:id, :store => :yes, :index => :untokenized)
549
+ # class_name
550
+ fi.add_field(:class_name, :store => :yes, :index => :untokenized)
551
+
552
+ # other fields
553
+ index_definition[:ferret_fields].each_pair do |field, options|
554
+ options = options.dup
555
+ options.delete :via
556
+ options.delete :boost if options[:boost].is_a?(Symbol) # dynamic boost
557
+ fi.add_field(field, options)
558
+ end
559
+ return fi
560
+ end
561
+
562
+ def self.close_multi_indexes
563
+ # close combined index readers, just in case
564
+ # this seems to fix a strange test failure that seems to relate to a
565
+ # multi_index looking at an old version of the content_base index.
566
+ multi_indexes.each_pair do |key, index|
567
+ # puts "#{key} -- #{self.name}"
568
+ # TODO only close those where necessary (watch inheritance, where
569
+ # self.name is base class of a class where key is made from)
570
+ index.close #if key =~ /#{self.name}/
571
+ end
572
+ multi_indexes.clear
573
+ end
574
+
575
+ protected
576
+
577
+ def self.add_models_to_options_if_necessary(options, models_or_index_name)
578
+ return options if String === models_or_index_name or Symbol === models_or_index_name
579
+ options.merge(:models => models_or_index_name)
580
+ end
581
+
582
+ def self.field_config_for(fieldname, options = {})
583
+ config = DEFAULT_FIELD_OPTIONS.merge options
584
+ config[:via] ||= fieldname
585
+ config[:term_vector] = :no if config[:index] == :no
586
+ return config
587
+ end
588
+
589
+ end
590
+
591
+ # include acts_as_ferret method into ActiveRecord::Base
592
+ ActiveRecord::Base.extend ActsAsFerret::ActMethods
593
+