cehoffman-acts_as_ferret 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. data/LICENSE +20 -0
  2. data/README +68 -0
  3. data/bin/aaf_install +23 -0
  4. data/config/ferret_server.yml +24 -0
  5. data/doc/README.win32 +23 -0
  6. data/doc/demo/README +154 -0
  7. data/doc/demo/README_DEMO +23 -0
  8. data/doc/demo/Rakefile +10 -0
  9. data/doc/demo/app/controllers/admin/backend_controller.rb +14 -0
  10. data/doc/demo/app/controllers/admin_area_controller.rb +4 -0
  11. data/doc/demo/app/controllers/application.rb +5 -0
  12. data/doc/demo/app/controllers/contents_controller.rb +49 -0
  13. data/doc/demo/app/controllers/searches_controller.rb +8 -0
  14. data/doc/demo/app/helpers/admin/backend_helper.rb +2 -0
  15. data/doc/demo/app/helpers/application_helper.rb +3 -0
  16. data/doc/demo/app/helpers/content_helper.rb +2 -0
  17. data/doc/demo/app/helpers/search_helper.rb +2 -0
  18. data/doc/demo/app/models/comment.rb +48 -0
  19. data/doc/demo/app/models/content.rb +12 -0
  20. data/doc/demo/app/models/content_base.rb +28 -0
  21. data/doc/demo/app/models/search.rb +19 -0
  22. data/doc/demo/app/models/shared_index1.rb +3 -0
  23. data/doc/demo/app/models/shared_index2.rb +3 -0
  24. data/doc/demo/app/models/special_content.rb +3 -0
  25. data/doc/demo/app/models/stats.rb +20 -0
  26. data/doc/demo/app/views/admin/backend/search.rhtml +18 -0
  27. data/doc/demo/app/views/contents/_form.rhtml +10 -0
  28. data/doc/demo/app/views/contents/edit.rhtml +9 -0
  29. data/doc/demo/app/views/contents/index.rhtml +24 -0
  30. data/doc/demo/app/views/contents/new.rhtml +8 -0
  31. data/doc/demo/app/views/contents/show.rhtml +8 -0
  32. data/doc/demo/app/views/layouts/application.html.erb +17 -0
  33. data/doc/demo/app/views/searches/_content.html.erb +2 -0
  34. data/doc/demo/app/views/searches/search.html.erb +20 -0
  35. data/doc/demo/config/boot.rb +109 -0
  36. data/doc/demo/config/database.yml +38 -0
  37. data/doc/demo/config/environment.rb +69 -0
  38. data/doc/demo/config/environments/development.rb +16 -0
  39. data/doc/demo/config/environments/production.rb +19 -0
  40. data/doc/demo/config/environments/test.rb +21 -0
  41. data/doc/demo/config/ferret_server.yml +18 -0
  42. data/doc/demo/config/lighttpd.conf +40 -0
  43. data/doc/demo/config/routes.rb +9 -0
  44. data/doc/demo/db/development_structure.sql +15 -0
  45. data/doc/demo/db/migrate/001_initial_migration.rb +18 -0
  46. data/doc/demo/db/migrate/002_add_type_to_contents.rb +9 -0
  47. data/doc/demo/db/migrate/003_create_shared_index1s.rb +11 -0
  48. data/doc/demo/db/migrate/004_create_shared_index2s.rb +11 -0
  49. data/doc/demo/db/migrate/005_special_field.rb +9 -0
  50. data/doc/demo/db/migrate/006_create_stats.rb +15 -0
  51. data/doc/demo/db/schema.sql +18 -0
  52. data/doc/demo/doc/README_FOR_APP +2 -0
  53. data/doc/demo/doc/howto.txt +70 -0
  54. data/doc/demo/public/.htaccess +40 -0
  55. data/doc/demo/public/404.html +8 -0
  56. data/doc/demo/public/500.html +8 -0
  57. data/doc/demo/public/dispatch.cgi +10 -0
  58. data/doc/demo/public/dispatch.fcgi +24 -0
  59. data/doc/demo/public/dispatch.rb +10 -0
  60. data/doc/demo/public/favicon.ico +0 -0
  61. data/doc/demo/public/images/rails.png +0 -0
  62. data/doc/demo/public/index.html +277 -0
  63. data/doc/demo/public/robots.txt +1 -0
  64. data/doc/demo/public/stylesheets/scaffold.css +74 -0
  65. data/doc/demo/script/about +3 -0
  66. data/doc/demo/script/breakpointer +3 -0
  67. data/doc/demo/script/console +3 -0
  68. data/doc/demo/script/destroy +3 -0
  69. data/doc/demo/script/ferret_server +10 -0
  70. data/doc/demo/script/generate +3 -0
  71. data/doc/demo/script/performance/benchmarker +3 -0
  72. data/doc/demo/script/performance/profiler +3 -0
  73. data/doc/demo/script/plugin +3 -0
  74. data/doc/demo/script/process/inspector +3 -0
  75. data/doc/demo/script/process/reaper +3 -0
  76. data/doc/demo/script/process/spawner +3 -0
  77. data/doc/demo/script/process/spinner +3 -0
  78. data/doc/demo/script/runner +3 -0
  79. data/doc/demo/script/server +3 -0
  80. data/doc/demo/test/fixtures/comments.yml +12 -0
  81. data/doc/demo/test/fixtures/contents.yml +13 -0
  82. data/doc/demo/test/fixtures/remote_contents.yml +9 -0
  83. data/doc/demo/test/fixtures/shared_index1s.yml +7 -0
  84. data/doc/demo/test/fixtures/shared_index2s.yml +7 -0
  85. data/doc/demo/test/functional/admin/backend_controller_test.rb +35 -0
  86. data/doc/demo/test/functional/contents_controller_test.rb +81 -0
  87. data/doc/demo/test/functional/searches_controller_test.rb +71 -0
  88. data/doc/demo/test/smoke/drb_smoke_test.rb +321 -0
  89. data/doc/demo/test/smoke/process_stats.rb +21 -0
  90. data/doc/demo/test/test_helper.rb +30 -0
  91. data/doc/demo/test/unit/comment_test.rb +217 -0
  92. data/doc/demo/test/unit/content_test.rb +705 -0
  93. data/doc/demo/test/unit/ferret_result_test.rb +24 -0
  94. data/doc/demo/test/unit/multi_index_test.rb +329 -0
  95. data/doc/demo/test/unit/remote_index_test.rb +23 -0
  96. data/doc/demo/test/unit/shared_index1_test.rb +108 -0
  97. data/doc/demo/test/unit/shared_index2_test.rb +13 -0
  98. data/doc/demo/test/unit/sort_test.rb +21 -0
  99. data/doc/demo/test/unit/special_content_test.rb +25 -0
  100. data/doc/demo/vendor/plugins/will_paginate/LICENSE +18 -0
  101. data/doc/demo/vendor/plugins/will_paginate/README +108 -0
  102. data/doc/demo/vendor/plugins/will_paginate/Rakefile +23 -0
  103. data/doc/demo/vendor/plugins/will_paginate/init.rb +21 -0
  104. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/collection.rb +45 -0
  105. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/core_ext.rb +44 -0
  106. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/finder.rb +159 -0
  107. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/view_helpers.rb +95 -0
  108. data/doc/demo/vendor/plugins/will_paginate/test/array_pagination_test.rb +23 -0
  109. data/doc/demo/vendor/plugins/will_paginate/test/boot.rb +27 -0
  110. data/doc/demo/vendor/plugins/will_paginate/test/console +10 -0
  111. data/doc/demo/vendor/plugins/will_paginate/test/finder_test.rb +219 -0
  112. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/admin.rb +3 -0
  113. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/companies.yml +24 -0
  114. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/company.rb +23 -0
  115. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developer.rb +11 -0
  116. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developers_projects.yml +13 -0
  117. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/project.rb +4 -0
  118. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/projects.yml +7 -0
  119. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/replies.yml +20 -0
  120. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/reply.rb +5 -0
  121. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/schema.sql +44 -0
  122. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topic.rb +19 -0
  123. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topics.yml +30 -0
  124. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/user.rb +2 -0
  125. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/users.yml +35 -0
  126. data/doc/demo/vendor/plugins/will_paginate/test/helper.rb +42 -0
  127. data/doc/demo/vendor/plugins/will_paginate/test/lib/activerecord_test_connector.rb +64 -0
  128. data/doc/demo/vendor/plugins/will_paginate/test/lib/load_fixtures.rb +10 -0
  129. data/doc/demo/vendor/plugins/will_paginate/test/pagination_test.rb +136 -0
  130. data/doc/monit-example +22 -0
  131. data/init.rb +24 -0
  132. data/install.rb +18 -0
  133. data/lib/act_methods.rb +147 -0
  134. data/lib/acts_as_ferret.rb +584 -0
  135. data/lib/ar_mysql_auto_reconnect_patch.rb +41 -0
  136. data/lib/blank_slate.rb +53 -0
  137. data/lib/bulk_indexer.rb +38 -0
  138. data/lib/class_methods.rb +270 -0
  139. data/lib/ferret_extensions.rb +188 -0
  140. data/lib/ferret_find_methods.rb +141 -0
  141. data/lib/ferret_result.rb +53 -0
  142. data/lib/ferret_server.rb +238 -0
  143. data/lib/index.rb +99 -0
  144. data/lib/instance_methods.rb +171 -0
  145. data/lib/local_index.rb +205 -0
  146. data/lib/more_like_this.rb +217 -0
  147. data/lib/multi_index.rb +126 -0
  148. data/lib/rdig_adapter.rb +148 -0
  149. data/lib/remote_functions.rb +23 -0
  150. data/lib/remote_index.rb +54 -0
  151. data/lib/remote_multi_index.rb +20 -0
  152. data/lib/search_results.rb +50 -0
  153. data/lib/server_manager.rb +58 -0
  154. data/lib/unix_daemon.rb +64 -0
  155. data/lib/without_ar.rb +52 -0
  156. data/rakefile +141 -0
  157. data/recipes/aaf_recipes.rb +114 -0
  158. data/script/ferret_daemon +94 -0
  159. data/script/ferret_server +10 -0
  160. data/script/ferret_service +178 -0
  161. data/tasks/ferret.rake +22 -0
  162. metadata +258 -0
@@ -0,0 +1,41 @@
1
+ # Source: http://pastie.caboo.se/154842
2
+ #
3
+ # in /etc/my.cnf on the MySQL server, you can set the interactive-timeout parameter,
4
+ # for example, 12 hours = 28800 sec
5
+ # interactive-timeout=28800
6
+
7
+ # in ActiveRecord, setting the verification_timeout to something less than
8
+ # the interactive-timeout parameter; 14400 sec = 6 hours
9
+ ActiveRecord::Base.verification_timeout = 14400
10
+ ActiveRecord::Base.establish_connection
11
+
12
+ # Below is a monkey patch for keeping ActiveRecord connections alive.
13
+ # http://www.sparecycles.org/2007/7/2/saying-goodbye-to-lost-connections-in-rails
14
+
15
+ module ActiveRecord
16
+ module ConnectionAdapters
17
+ class MysqlAdapter
18
+ def execute(sql, name = nil) #:nodoc:
19
+ reconnect_lost_connections = true
20
+ begin
21
+ log(sql, name) { @connection.query(sql) }
22
+ rescue ActiveRecord::StatementInvalid => exception
23
+ if reconnect_lost_connections and exception.message =~ /(Lost connection to MySQL server during query
24
+ |MySQL server has gone away)/
25
+ reconnect_lost_connections = false
26
+ reconnect!
27
+ retry
28
+ elsif exception.message.split(":").first =~ /Packets out of order/
29
+ raise ActiveRecord::StatementInvalid, "'Packets out of order' error was received from the database.
30
+ Please update your mysql bindings (gem install mysql) and read http://dev.mysql.com/doc/mysql/en/password-hash
31
+ ing.html for more information. If you're on Windows, use the Instant Rails installer to get the updated mysql
32
+ bindings."
33
+ else
34
+ raise
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
@@ -0,0 +1,53 @@
1
+ if defined?(BlankSlate)
2
+ # Rails 2.x has it already
3
+ module ActsAsFerret
4
+ class BlankSlate < ::BlankSlate
5
+ end
6
+ end
7
+ else
8
+ module ActsAsFerret
9
+ # 'backported' for Rails pre 2.0
10
+ #
11
+ #--
12
+ # Copyright 2004, 2006 by Jim Weirich (jim@weirichhouse.org).
13
+ # All rights reserved.
14
+
15
+ # Permission is granted for use, copying, modification, distribution,
16
+ # and distribution of modified versions of this work as long as the
17
+ # above copyright notice is included.
18
+ #++
19
+
20
+ ######################################################################
21
+ # BlankSlate provides an abstract base class with no predefined
22
+ # methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
23
+ # BlankSlate is useful as a base class when writing classes that
24
+ # depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
25
+ #
26
+ class BlankSlate
27
+ class << self
28
+ # Hide the method named +name+ in the BlankSlate class. Don't
29
+ # hide +instance_eval+ or any method beginning with "__".
30
+ def hide(name)
31
+ if instance_methods.include?(name.to_s) and name !~ /^(__|instance_eval|methods)/
32
+ @hidden_methods ||= {}
33
+ @hidden_methods[name.to_sym] = instance_method(name)
34
+ undef_method name
35
+ end
36
+ end
37
+
38
+ # Redefine a previously hidden method so that it may be called on a blank
39
+ # slate object.
40
+ #
41
+ # no-op here since we don't hide the methods we reveal where this is
42
+ # used in this implementation
43
+ def reveal(name)
44
+ end
45
+ end
46
+
47
+ instance_methods.each { |m| hide(m) }
48
+
49
+ end
50
+ end
51
+
52
+ end
53
+
@@ -0,0 +1,38 @@
1
+ module ActsAsFerret
2
+ class BulkIndexer
3
+ def initialize(args = {})
4
+ @batch_size = args[:batch_size] || 1000
5
+ @logger = args[:logger]
6
+ @model = args[:model]
7
+ @work_done = 0
8
+ @index = args[:index]
9
+ if args[:reindex]
10
+ @reindex = true
11
+ @model_count = @model.count.to_f
12
+ else
13
+ @model_count = args[:total]
14
+ end
15
+ end
16
+
17
+ def index_records(records, offset)
18
+ batch_time = measure_time {
19
+ docs = []
20
+ records.each { |rec| docs << [rec.to_doc, rec.ferret_analyzer] if rec.ferret_enabled?(true) }
21
+ @index.update_batch(docs)
22
+ # records.each { |rec| @index.add_document(rec.to_doc, rec.ferret_analyzer) if rec.ferret_enabled?(true) }
23
+ }.to_f
24
+ @work_done = offset.to_f / @model_count * 100.0 if @model_count > 0
25
+ remaining_time = ( batch_time / @batch_size ) * ( @model_count - offset + @batch_size )
26
+ @logger.info "#{@reindex ? 're' : 'bulk '}index model #{@model.name} : #{'%.2f' % @work_done}% complete : #{'%.2f' % remaining_time} secs to finish"
27
+
28
+ end
29
+
30
+ def measure_time
31
+ t1 = Time.now
32
+ yield
33
+ Time.now - t1
34
+ end
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,270 @@
1
+ module ActsAsFerret
2
+
3
+ module ClassMethods
4
+
5
+ # Disables ferret index updates for this model. When a block is given,
6
+ # Ferret will be re-enabled again after executing the block.
7
+ def disable_ferret
8
+ aaf_configuration[:enabled] = false
9
+ if block_given?
10
+ yield
11
+ enable_ferret
12
+ end
13
+ end
14
+
15
+ def enable_ferret
16
+ aaf_configuration[:enabled] = true
17
+ end
18
+
19
+ def ferret_enabled?
20
+ aaf_configuration[:enabled]
21
+ end
22
+
23
+ # rebuild the index from all data stored for this model, and any other
24
+ # model classes associated with the same index.
25
+ # This is called automatically when no index exists yet.
26
+ #
27
+ def rebuild_index
28
+ aaf_index.rebuild_index
29
+ end
30
+
31
+ # re-index a number records specified by the given ids. Use for large
32
+ # indexing jobs i.e. after modifying a lot of records with Ferret disabled.
33
+ # Please note that the state of Ferret (enabled or disabled at class or
34
+ # record level) is not checked by this method, so if you need to do so
35
+ # (e.g. because of a custom ferret_enabled? implementation), you have to do
36
+ # so yourself.
37
+ def bulk_index(*ids)
38
+ options = Hash === ids.last ? ids.pop : {}
39
+ ids = ids.first if ids.size == 1 && ids.first.is_a?(Enumerable)
40
+ aaf_index.bulk_index(self.name, ids, options)
41
+ end
42
+
43
+ # true if our db and table appear to be suitable for the mysql fast batch
44
+ # hack (see
45
+ # http://weblog.jamisbuck.org/2007/4/6/faking-cursors-in-activerecord)
46
+ def use_fast_batches?
47
+ if connection.class.name =~ /Mysql/ && primary_key == 'id' && aaf_configuration[:mysql_fast_batches]
48
+ logger.info "using mysql specific batched find :all. Turn off with :mysql_fast_batches => false if you encounter problems (i.e. because of non-integer UUIDs in the id column)"
49
+ true
50
+ end
51
+ end
52
+
53
+ # Returns all records modified or created after the specified time.
54
+ # Used by the rake rebuild task to find models that need to be updated in
55
+ # the index after the rebuild finished because they changed while the
56
+ # rebuild was running.
57
+ # Override if your models don't stick to the created_at/updated_at
58
+ # convention.
59
+ def records_modified_since(time)
60
+ condition = []
61
+ %w(updated_at created_at).each do |col|
62
+ condition << "#{col} >= ?" if column_names.include? col
63
+ end
64
+ if condition.empty?
65
+ logger.warn "#{self.name}: Override records_modified_since(time) to keep the index up to date with records changed during rebuild."
66
+ []
67
+ else
68
+ find :all, :conditions => [ condition.join(' AND '), *([time]*condition.size) ]
69
+ end
70
+ end
71
+
72
+ # runs across all records yielding those to be indexed when the index is rebuilt
73
+ def records_for_rebuild(batch_size = 1000)
74
+ transaction do
75
+ if use_fast_batches?
76
+ offset = 0
77
+ while (rows = find :all, :conditions => [ "#{table_name}.id > ?", offset ], :limit => batch_size).any?
78
+ offset = rows.last.id
79
+ yield rows, offset
80
+ end
81
+ else
82
+ order = "#{primary_key} ASC" # fixes #212
83
+ 0.step(self.count, batch_size) do |offset|
84
+ yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ # yields the records with the given ids, in batches of batch_size
91
+ def records_for_bulk_index(ids, batch_size = 1000)
92
+ transaction do
93
+ offset = 0
94
+ ids.each_slice(batch_size) do |id_slice|
95
+ records = find( :all, :conditions => ["id in (?)", id_slice] )
96
+ #yield records, offset
97
+ yield find( :all, :conditions => ["id in (?)", id_slice] ), offset
98
+ offset += batch_size
99
+ end
100
+ end
101
+ end
102
+
103
+ # Retrieve the index instance for this model class. This can either be a
104
+ # LocalIndex, or a RemoteIndex instance.
105
+ #
106
+ def aaf_index
107
+ @index ||= ActsAsFerret::get_index(aaf_configuration[:name])
108
+ end
109
+
110
+ # Finds instances by searching the Ferret index. Terms are ANDed by default, use
111
+ # OR between terms for ORed queries. Or specify +:or_default => true+ in the
112
+ # +:ferret+ options hash of acts_as_ferret.
113
+ #
114
+ # You may either use the +offset+ and +limit+ options to implement your own
115
+ # pagination logic, or use the +page+ and +per_page+ options to use the
116
+ # built in pagination support which is compatible with will_paginate's view
117
+ # helpers. If +page+ and +per_page+ are given, +offset+ and +limit+ will be
118
+ # ignored.
119
+ #
120
+ # == options:
121
+ # page:: page of search results to retrieve
122
+ # per_page:: number of search results that are displayed per page
123
+ # offset:: first hit to retrieve (useful for paging)
124
+ # limit:: number of hits to retrieve, or :all to retrieve
125
+ # all results
126
+ # lazy:: Array of field names whose contents should be read directly
127
+ # from the index. Those fields have to be marked
128
+ # +:store => :yes+ in their field options. Give true to get all
129
+ # stored fields. Note that if you have a shared index, you have
130
+ # to explicitly state the fields you want to fetch, true won't
131
+ # work here)
132
+ #
133
+ # +find_options+ is a hash passed on to active_record's find when
134
+ # retrieving the data from db, useful to i.e. prefetch relationships with
135
+ # :include or to specify additional filter criteria with :conditions.
136
+ #
137
+ # This method returns a +SearchResults+ instance, which really is an Array that has
138
+ # been decorated with a total_hits attribute holding the total number of hits.
139
+ # Additionally, SearchResults is compatible with the pagination helper
140
+ # methods of the will_paginate plugin.
141
+ #
142
+ # Please keep in mind that the number of results delivered might be less than
143
+ # +limit+ if you specify any active record conditions that further limit
144
+ # the result. Use +limit+ and +offset+ as AR find_options instead.
145
+ # +page+ and +per_page+ are supposed to work regardless of any
146
+ # +conitions+ present in +find_options+.
147
+ def find_with_ferret(q, options = {}, find_options = {})
148
+ if respond_to?(:scope) && scope(:find, :conditions)
149
+ if find_options[:conditions]
150
+ find_options[:conditions] = "(#{find_options[:conditions]}) AND (#{scope(:find, :conditions)})"
151
+ else
152
+ find_options[:conditions] = scope(:find, :conditions)
153
+ end
154
+ end
155
+ return ActsAsFerret::find q, self, options, find_options
156
+ end
157
+
158
+
159
+ # Returns the total number of hits for the given query
160
+ #
161
+ # Note that since we don't query the database here, this method won't deliver
162
+ # the expected results when used on an AR association.
163
+ #
164
+ def total_hits(q, options={})
165
+ aaf_index.total_hits(q, options)
166
+ end
167
+
168
+ # Finds instance model name, ids and scores by contents.
169
+ # Useful e.g. if you want to search across models or do not want to fetch
170
+ # all result records (yet).
171
+ #
172
+ # Options are the same as for find_with_ferret
173
+ #
174
+ # A block can be given too, it will be executed with every result:
175
+ # find_ids_with_ferret(q, options) do |model, id, score|
176
+ # id_array << id
177
+ # scores_by_id[id] = score
178
+ # end
179
+ # NOTE: in case a block is given, only the total_hits value will be returned
180
+ # instead of the [total_hits, results] array!
181
+ #
182
+ def find_ids_with_ferret(q, options = {}, &block)
183
+ aaf_index.find_ids(q, options, &block)
184
+ end
185
+
186
+
187
+ protected
188
+
189
+ # def find_records_lazy_or_not(q, options = {}, find_options = {})
190
+ # if options[:lazy]
191
+ # logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
192
+ # lazy_find_by_contents q, options
193
+ # else
194
+ # ar_find_by_contents q, options, find_options
195
+ # end
196
+ # end
197
+ #
198
+ # def ar_find_by_contents(q, options = {}, find_options = {})
199
+ # result_ids = {}
200
+ # total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
201
+ # # stores ids, index and score of each hit for later ordering of
202
+ # # results
203
+ # result_ids[id] = [ result_ids.size + 1, score ]
204
+ # end
205
+ #
206
+ # result = ActsAsFerret::retrieve_records( { self.name => result_ids }, find_options )
207
+ #
208
+ # # count total_hits via sql when using conditions or when we're called
209
+ # # from an ActiveRecord association.
210
+ # if find_options[:conditions] or caller.find{ |call| call =~ %r{active_record/associations} }
211
+ # # chances are the ferret result count is not our total_hits value, so
212
+ # # we correct this here.
213
+ # if options[:limit] != :all || options[:page] || options[:offset] || find_options[:limit] || find_options[:offset]
214
+ # # our ferret result has been limited, so we need to re-run that
215
+ # # search to get the full result set from ferret.
216
+ # result_ids = {}
217
+ # find_ids_with_ferret(q, options.update(:limit => :all, :offset => 0)) do |model, id, score, data|
218
+ # result_ids[id] = [ result_ids.size + 1, score ]
219
+ # end
220
+ # # Now ask the database for the total size of the final result set.
221
+ # total_hits = count_records( { self.name => result_ids }, find_options )
222
+ # else
223
+ # # what we got from the database is our full result set, so take
224
+ # # it's size
225
+ # total_hits = result.length
226
+ # end
227
+ # end
228
+ #
229
+ # [ total_hits, result ]
230
+ # end
231
+ #
232
+ # def lazy_find_by_contents(q, options = {})
233
+ # logger.debug "lazy_find_by_contents: #{q}"
234
+ # result = []
235
+ # rank = 0
236
+ # total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
237
+ # logger.debug "model: #{model}, id: #{id}, data: #{data}"
238
+ # result << FerretResult.new(model, id, score, rank += 1, data)
239
+ # end
240
+ # [ total_hits, result ]
241
+ # end
242
+
243
+
244
+ def model_find(model, id, find_options = {})
245
+ model.constantize.find(id, find_options)
246
+ end
247
+
248
+
249
+ # def count_records(id_arrays, find_options = {})
250
+ # count_options = find_options.dup
251
+ # count_options.delete :limit
252
+ # count_options.delete :offset
253
+ # count = 0
254
+ # id_arrays.each do |model, id_array|
255
+ # next if id_array.empty?
256
+ # model = model.constantize
257
+ # # merge conditions
258
+ # conditions = ActsAsFerret::combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ],
259
+ # find_options[:conditions])
260
+ # opts = find_options.merge :conditions => conditions
261
+ # opts.delete :limit; opts.delete :offset
262
+ # count += model.count opts
263
+ # end
264
+ # count
265
+ # end
266
+
267
+ end
268
+
269
+ end
270
+
@@ -0,0 +1,188 @@
1
+ module Ferret
2
+
3
+ module Analysis
4
+
5
+ # = PerFieldAnalyzer
6
+ #
7
+ # This PerFieldAnalyzer is a workaround to a memory leak in
8
+ # ferret 0.11.4. It does basically do the same as the original
9
+ # Ferret::Analysis::PerFieldAnalyzer, but without the leak :)
10
+ #
11
+ # http://ferret.davebalmain.com/api/classes/Ferret/Analysis/PerFieldAnalyzer.html
12
+ #
13
+ # Thanks to Ben from omdb.org for tracking this down and creating this
14
+ # workaround.
15
+ # You can read more about the issue there:
16
+ # http://blog.omdb-beta.org/2007/7/29/tracking-down-a-memory-leak-in-ferret-0-11-4
17
+ class PerFieldAnalyzer < ::Ferret::Analysis::Analyzer
18
+ def initialize( default_analyzer = StandardAnalyzer.new )
19
+ @analyzers = {}
20
+ @default_analyzer = default_analyzer
21
+ end
22
+
23
+ def add_field( field, analyzer )
24
+ @analyzers[field] = analyzer
25
+ end
26
+ alias []= add_field
27
+
28
+ def token_stream(field, string)
29
+ @analyzers.has_key?(field) ? @analyzers[field].token_stream(field, string) :
30
+ @default_analyzer.token_stream(field, string)
31
+ end
32
+ end
33
+ end
34
+
35
+ class Index::Index
36
+ attr_accessor :batch_size, :logger
37
+
38
+ def index_models(models)
39
+ models.each { |model| index_model model }
40
+ flush
41
+ optimize
42
+ close
43
+ ActsAsFerret::close_multi_indexes
44
+ end
45
+
46
+ def index_model(model)
47
+ bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
48
+ :model => model, :index => self, :reindex => true)
49
+ logger.info "reindexing model #{model.name}"
50
+
51
+ model.records_for_rebuild(@batch_size) do |records, offset|
52
+ bulk_indexer.index_records(records, offset)
53
+ end
54
+ end
55
+
56
+ def bulk_index(model, ids, options = {})
57
+ options.reverse_merge! :optimize => true
58
+ orig_flush = @auto_flush
59
+ @auto_flush = false
60
+ bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
61
+ :model => model, :index => self, :total => ids.size)
62
+ model.records_for_bulk_index(ids, @batch_size) do |records, offset|
63
+ logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
64
+ bulk_indexer.index_records(records, offset)
65
+ end
66
+ logger.info 'finishing bulk index...'
67
+ flush
68
+ if options[:optimize]
69
+ logger.info 'optimizing...'
70
+ optimize
71
+ end
72
+ @auto_flush = orig_flush
73
+ end
74
+
75
+
76
+ # bulk-inserts a number of ferret documents.
77
+ # The argument has to be an array of two-element arrays each holding the document data and the analyzer to
78
+ # use for this document (which may be nil).
79
+ def update_batch(document_analyzer_pairs)
80
+ ids = document_analyzer_pairs.collect {|da| da.first[@id_field] }
81
+ @dir.synchrolock do
82
+ batch_delete(ids)
83
+ ensure_writer_open()
84
+ document_analyzer_pairs.each do |doc, analyzer|
85
+ if analyzer
86
+ old_analyzer = @writer.analyzer
87
+ @writer.analyzer = analyzer
88
+ @writer.add_document(doc)
89
+ @writer.analyzer = old_analyzer
90
+ else
91
+ @writer.add_document(doc)
92
+ end
93
+ end
94
+ flush()
95
+ end
96
+ end
97
+
98
+ # If +docs+ is a Hash or an Array then a batch delete will be performed.
99
+ # If +docs+ is an Array then it will be considered an array of +id+'s. If
100
+ # it is a Hash, then its keys will be used instead as the Array of
101
+ # document +id+'s. If the +id+ is an Integers then it is considered a
102
+ # Ferret document number and the corresponding document will be deleted.
103
+ # If the +id+ is a String or a Symbol then the +id+ will be considered a
104
+ # term and the documents that contain that term in the +:id_field+ will
105
+ # be deleted.
106
+ #
107
+ # docs:: An Array of docs to be deleted, or a Hash (in which case the keys
108
+ # are used)
109
+ #
110
+ # ripped from Ferret trunk.
111
+ def batch_delete(docs)
112
+ docs = docs.keys if docs.is_a?(Hash)
113
+ raise ArgumentError, "must pass Array or Hash" unless docs.is_a? Array
114
+ ids = []
115
+ terms = []
116
+ docs.each do |doc|
117
+ case doc
118
+ when String: terms << doc
119
+ when Symbol: terms << doc.to_s
120
+ when Integer: ids << doc
121
+ else
122
+ raise ArgumentError, "Cannot delete for arg of type #{id.class}"
123
+ end
124
+ end
125
+ if ids.size > 0
126
+ ensure_reader_open
127
+ ids.each {|id| @reader.delete(id)}
128
+ end
129
+ if terms.size > 0
130
+ ensure_writer_open()
131
+ terms.each { |t| @writer.delete(@id_field, t) }
132
+ # TODO with Ferret trunk this would work:
133
+ # @writer.delete(@id_field, terms)
134
+ end
135
+ return self
136
+ end
137
+
138
+ # search for the first document with +arg+ in the +id+ field and return it's internal document number.
139
+ # The +id+ field is either :id or whatever you set
140
+ # :id_field parameter to when you create the Index object.
141
+ def doc_number(id)
142
+ @dir.synchronize do
143
+ ensure_reader_open()
144
+ term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
145
+ return term_doc_enum.next? ? term_doc_enum.doc : nil
146
+ end
147
+ end
148
+ end
149
+
150
+ # add marshalling support to SortFields
151
+ class Search::SortField
152
+ def _dump(depth)
153
+ to_s
154
+ end
155
+
156
+ def self._load(string)
157
+ case string
158
+ when /<DOC(_ID)?>!/ : Ferret::Search::SortField::DOC_ID_REV
159
+ when /<DOC(_ID)?>/ : Ferret::Search::SortField::DOC_ID
160
+ when '<SCORE>!' : Ferret::Search::SortField::SCORE_REV
161
+ when '<SCORE>' : Ferret::Search::SortField::SCORE
162
+ when /^(\w+):<(\w+)>(!)?$/ : new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
163
+ else raise "invalid value: #{string}"
164
+ end
165
+ end
166
+ end
167
+
168
+ # add marshalling support to Sort
169
+ class Search::Sort
170
+ def _dump(depth)
171
+ to_s
172
+ end
173
+
174
+ def self._load(string)
175
+ # we exclude the last <DOC> sorting as it is appended by new anyway
176
+ if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/
177
+ sort_fields = $1.split(',').map do |value|
178
+ value.strip!
179
+ Ferret::Search::SortField._load value unless value.blank?
180
+ end
181
+ new sort_fields.compact
182
+ else
183
+ raise "invalid value: #{string}"
184
+ end
185
+ end
186
+ end
187
+
188
+ end