watson-acts_as_ferret 0.4.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (162) hide show
  1. data/LICENSE +20 -0
  2. data/README +104 -0
  3. data/acts_as_ferret.gemspec +58 -0
  4. data/bin/aaf_install +29 -0
  5. data/config/ferret_server.yml +24 -0
  6. data/doc/README.win32 +23 -0
  7. data/doc/demo/README +154 -0
  8. data/doc/demo/README_DEMO +23 -0
  9. data/doc/demo/Rakefile +10 -0
  10. data/doc/demo/app/controllers/admin/backend_controller.rb +14 -0
  11. data/doc/demo/app/controllers/admin_area_controller.rb +4 -0
  12. data/doc/demo/app/controllers/application.rb +5 -0
  13. data/doc/demo/app/controllers/contents_controller.rb +49 -0
  14. data/doc/demo/app/controllers/searches_controller.rb +8 -0
  15. data/doc/demo/app/helpers/admin/backend_helper.rb +2 -0
  16. data/doc/demo/app/helpers/application_helper.rb +3 -0
  17. data/doc/demo/app/helpers/content_helper.rb +2 -0
  18. data/doc/demo/app/helpers/search_helper.rb +2 -0
  19. data/doc/demo/app/models/comment.rb +48 -0
  20. data/doc/demo/app/models/content.rb +12 -0
  21. data/doc/demo/app/models/content_base.rb +28 -0
  22. data/doc/demo/app/models/search.rb +19 -0
  23. data/doc/demo/app/models/shared_index1.rb +3 -0
  24. data/doc/demo/app/models/shared_index2.rb +3 -0
  25. data/doc/demo/app/models/special_content.rb +3 -0
  26. data/doc/demo/app/models/stats.rb +20 -0
  27. data/doc/demo/app/views/admin/backend/search.rhtml +18 -0
  28. data/doc/demo/app/views/contents/_form.rhtml +10 -0
  29. data/doc/demo/app/views/contents/edit.rhtml +9 -0
  30. data/doc/demo/app/views/contents/index.rhtml +24 -0
  31. data/doc/demo/app/views/contents/new.rhtml +8 -0
  32. data/doc/demo/app/views/contents/show.rhtml +8 -0
  33. data/doc/demo/app/views/layouts/application.html.erb +17 -0
  34. data/doc/demo/app/views/searches/_content.html.erb +2 -0
  35. data/doc/demo/app/views/searches/search.html.erb +20 -0
  36. data/doc/demo/config/boot.rb +109 -0
  37. data/doc/demo/config/database.yml +38 -0
  38. data/doc/demo/config/environment.rb +69 -0
  39. data/doc/demo/config/environments/development.rb +16 -0
  40. data/doc/demo/config/environments/production.rb +19 -0
  41. data/doc/demo/config/environments/test.rb +21 -0
  42. data/doc/demo/config/ferret_server.yml +18 -0
  43. data/doc/demo/config/lighttpd.conf +40 -0
  44. data/doc/demo/config/routes.rb +9 -0
  45. data/doc/demo/db/development_structure.sql +15 -0
  46. data/doc/demo/db/migrate/001_initial_migration.rb +18 -0
  47. data/doc/demo/db/migrate/002_add_type_to_contents.rb +9 -0
  48. data/doc/demo/db/migrate/003_create_shared_index1s.rb +11 -0
  49. data/doc/demo/db/migrate/004_create_shared_index2s.rb +11 -0
  50. data/doc/demo/db/migrate/005_special_field.rb +9 -0
  51. data/doc/demo/db/migrate/006_create_stats.rb +15 -0
  52. data/doc/demo/db/schema.sql +18 -0
  53. data/doc/demo/db/schema.sqlite +14 -0
  54. data/doc/demo/doc/README_FOR_APP +2 -0
  55. data/doc/demo/doc/howto.txt +70 -0
  56. data/doc/demo/public/404.html +8 -0
  57. data/doc/demo/public/500.html +8 -0
  58. data/doc/demo/public/dispatch.cgi +10 -0
  59. data/doc/demo/public/dispatch.fcgi +24 -0
  60. data/doc/demo/public/dispatch.rb +10 -0
  61. data/doc/demo/public/favicon.ico +0 -0
  62. data/doc/demo/public/images/rails.png +0 -0
  63. data/doc/demo/public/index.html +277 -0
  64. data/doc/demo/public/robots.txt +1 -0
  65. data/doc/demo/public/stylesheets/scaffold.css +74 -0
  66. data/doc/demo/script/about +3 -0
  67. data/doc/demo/script/breakpointer +3 -0
  68. data/doc/demo/script/console +3 -0
  69. data/doc/demo/script/destroy +3 -0
  70. data/doc/demo/script/ferret_server +10 -0
  71. data/doc/demo/script/generate +3 -0
  72. data/doc/demo/script/performance/benchmarker +3 -0
  73. data/doc/demo/script/performance/profiler +3 -0
  74. data/doc/demo/script/plugin +3 -0
  75. data/doc/demo/script/process/inspector +3 -0
  76. data/doc/demo/script/process/reaper +3 -0
  77. data/doc/demo/script/process/spawner +3 -0
  78. data/doc/demo/script/process/spinner +3 -0
  79. data/doc/demo/script/runner +3 -0
  80. data/doc/demo/script/server +3 -0
  81. data/doc/demo/test/fixtures/comments.yml +12 -0
  82. data/doc/demo/test/fixtures/contents.yml +13 -0
  83. data/doc/demo/test/fixtures/remote_contents.yml +9 -0
  84. data/doc/demo/test/fixtures/shared_index1s.yml +7 -0
  85. data/doc/demo/test/fixtures/shared_index2s.yml +7 -0
  86. data/doc/demo/test/functional/admin/backend_controller_test.rb +35 -0
  87. data/doc/demo/test/functional/contents_controller_test.rb +81 -0
  88. data/doc/demo/test/functional/searches_controller_test.rb +71 -0
  89. data/doc/demo/test/smoke/drb_smoke_test.rb +321 -0
  90. data/doc/demo/test/smoke/process_stats.rb +21 -0
  91. data/doc/demo/test/test_helper.rb +30 -0
  92. data/doc/demo/test/unit/comment_test.rb +217 -0
  93. data/doc/demo/test/unit/content_test.rb +705 -0
  94. data/doc/demo/test/unit/ferret_result_test.rb +24 -0
  95. data/doc/demo/test/unit/multi_index_test.rb +329 -0
  96. data/doc/demo/test/unit/remote_index_test.rb +23 -0
  97. data/doc/demo/test/unit/shared_index1_test.rb +108 -0
  98. data/doc/demo/test/unit/shared_index2_test.rb +13 -0
  99. data/doc/demo/test/unit/sort_test.rb +21 -0
  100. data/doc/demo/test/unit/special_content_test.rb +25 -0
  101. data/doc/demo/vendor/plugins/will_paginate/LICENSE +18 -0
  102. data/doc/demo/vendor/plugins/will_paginate/README +108 -0
  103. data/doc/demo/vendor/plugins/will_paginate/Rakefile +23 -0
  104. data/doc/demo/vendor/plugins/will_paginate/init.rb +21 -0
  105. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/collection.rb +45 -0
  106. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/core_ext.rb +44 -0
  107. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/finder.rb +159 -0
  108. data/doc/demo/vendor/plugins/will_paginate/lib/will_paginate/view_helpers.rb +95 -0
  109. data/doc/demo/vendor/plugins/will_paginate/test/array_pagination_test.rb +23 -0
  110. data/doc/demo/vendor/plugins/will_paginate/test/boot.rb +27 -0
  111. data/doc/demo/vendor/plugins/will_paginate/test/console +10 -0
  112. data/doc/demo/vendor/plugins/will_paginate/test/finder_test.rb +219 -0
  113. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/admin.rb +3 -0
  114. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/companies.yml +24 -0
  115. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/company.rb +23 -0
  116. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developer.rb +11 -0
  117. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/developers_projects.yml +13 -0
  118. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/project.rb +4 -0
  119. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/projects.yml +7 -0
  120. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/replies.yml +20 -0
  121. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/reply.rb +5 -0
  122. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/schema.sql +44 -0
  123. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topic.rb +19 -0
  124. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/topics.yml +30 -0
  125. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/user.rb +2 -0
  126. data/doc/demo/vendor/plugins/will_paginate/test/fixtures/users.yml +35 -0
  127. data/doc/demo/vendor/plugins/will_paginate/test/helper.rb +42 -0
  128. data/doc/demo/vendor/plugins/will_paginate/test/lib/activerecord_test_connector.rb +64 -0
  129. data/doc/demo/vendor/plugins/will_paginate/test/lib/load_fixtures.rb +10 -0
  130. data/doc/demo/vendor/plugins/will_paginate/test/pagination_test.rb +136 -0
  131. data/doc/monit-example +22 -0
  132. data/init.rb +24 -0
  133. data/install.rb +18 -0
  134. data/lib/act_methods.rb +147 -0
  135. data/lib/acts_as_ferret.rb +593 -0
  136. data/lib/ar_mysql_auto_reconnect_patch.rb +41 -0
  137. data/lib/blank_slate.rb +54 -0
  138. data/lib/bulk_indexer.rb +56 -0
  139. data/lib/class_methods.rb +279 -0
  140. data/lib/ferret_extensions.rb +192 -0
  141. data/lib/ferret_find_methods.rb +142 -0
  142. data/lib/ferret_result.rb +58 -0
  143. data/lib/ferret_server.rb +238 -0
  144. data/lib/index.rb +99 -0
  145. data/lib/instance_methods.rb +172 -0
  146. data/lib/local_index.rb +202 -0
  147. data/lib/more_like_this.rb +217 -0
  148. data/lib/multi_index.rb +133 -0
  149. data/lib/rdig_adapter.rb +149 -0
  150. data/lib/remote_functions.rb +43 -0
  151. data/lib/remote_index.rb +54 -0
  152. data/lib/remote_multi_index.rb +20 -0
  153. data/lib/search_results.rb +50 -0
  154. data/lib/server_manager.rb +71 -0
  155. data/lib/unix_daemon.rb +86 -0
  156. data/lib/without_ar.rb +52 -0
  157. data/recipes/aaf_recipes.rb +116 -0
  158. data/script/ferret_daemon +94 -0
  159. data/script/ferret_server +12 -0
  160. data/script/ferret_service +178 -0
  161. data/tasks/ferret.rake +39 -0
  162. metadata +246 -0
@@ -0,0 +1,41 @@
1
+ # Source: http://pastie.caboo.se/154842
2
+ #
3
+ # in /etc/my.cnf on the MySQL server, you can set the interactive-timeout parameter,
4
+ # for example, 12 hours = 28800 sec
5
+ # interactive-timeout=28800
6
+
7
+ # in ActiveRecord, setting the verification_timeout to something less than
8
+ # the interactive-timeout parameter; 14400 sec = 6 hours
9
+ ActiveRecord::Base.verification_timeout = 14400
10
+ ActiveRecord::Base.establish_connection
11
+
12
+ # Below is a monkey patch for keeping ActiveRecord connections alive.
13
+ # http://www.sparecycles.org/2007/7/2/saying-goodbye-to-lost-connections-in-rails
14
+
15
+ module ActiveRecord
16
+ module ConnectionAdapters
17
+ class MysqlAdapter
18
+ def execute(sql, name = nil) #:nodoc:
19
+ reconnect_lost_connections = true
20
+ begin
21
+ log(sql, name) { @connection.query(sql) }
22
+ rescue ActiveRecord::StatementInvalid => exception
23
+ if reconnect_lost_connections and exception.message =~ /(Lost connection to MySQL server during query
24
+ |MySQL server has gone away)/
25
+ reconnect_lost_connections = false
26
+ reconnect!
27
+ retry
28
+ elsif exception.message.split(":").first =~ /Packets out of order/
29
+ raise ActiveRecord::StatementInvalid, "'Packets out of order' error was received from the database.
30
+ Please update your mysql bindings (gem install mysql) and read http://dev.mysql.com/doc/mysql/en/password-hash
31
+ ing.html for more information. If you're on Windows, use the Instant Rails installer to get the updated mysql
32
+ bindings."
33
+ else
34
+ raise
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
@@ -0,0 +1,54 @@
1
+ module ActsAsFerret
2
+ if defined?(BasicObject)
3
+ # Ruby 1.9.x
4
+ class BlankSlate < BasicObject
5
+ end
6
+ elsif defined?(BlankSlate)
7
+ # Rails 2.x has it already
8
+ class BlankSlate < ::BlankSlate
9
+ end
10
+ else
11
+ # 'backported' for Rails pre 2.0
12
+ #
13
+ #--
14
+ # Copyright 2004, 2006 by Jim Weirich (jim@weirichhouse.org).
15
+ # All rights reserved.
16
+
17
+ # Permission is granted for use, copying, modification, distribution,
18
+ # and distribution of modified versions of this work as long as the
19
+ # above copyright notice is included.
20
+ #++
21
+
22
+ ######################################################################
23
+ # BlankSlate provides an abstract base class with no predefined
24
+ # methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
25
+ # BlankSlate is useful as a base class when writing classes that
26
+ # depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
27
+ #
28
+ class BlankSlate
29
+ class << self
30
+ # Hide the method named +name+ in the BlankSlate class. Don't
31
+ # hide +instance_eval+ or any method beginning with "__".
32
+ def hide(name)
33
+ if instance_methods.include?(name.to_s) and name !~ /^(__|instance_eval|methods)/
34
+ @hidden_methods ||= {}
35
+ @hidden_methods[name.to_sym] = instance_method(name)
36
+ undef_method name
37
+ end
38
+ end
39
+
40
+ # Redefine a previously hidden method so that it may be called on a blank
41
+ # slate object.
42
+ #
43
+ # no-op here since we don't hide the methods we reveal where this is
44
+ # used in this implementation
45
+ def reveal(name)
46
+ end
47
+ end
48
+
49
+ instance_methods.each { |m| hide(m) }
50
+
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,56 @@
1
+ module ActsAsFerret
2
+ class BulkIndexer
3
+ def initialize(args = {})
4
+ @batch_size = args[:batch_size] || 1000
5
+ @logger = args[:logger]
6
+ @model = args[:model]
7
+ @work_done = 0
8
+ @indexed_records = 0
9
+ @total_time = 0.0
10
+ @index = args[:index]
11
+ if args[:reindex]
12
+ @reindex = true
13
+ @model_count = @model.count.to_f
14
+ else
15
+ @model_count = args[:total]
16
+ end
17
+ end
18
+
19
+ def index_records(records, offset)
20
+ batch_time = measure_time {
21
+ docs = []
22
+ records.each { |rec| docs << [rec.to_doc, rec.ferret_analyzer] if rec.ferret_enabled?(true) }
23
+ @index.update_batch(docs)
24
+ }.to_f
25
+ rec_count = records.size
26
+ @indexed_records += rec_count
27
+ @total_time += batch_time
28
+ @work_done = @indexed_records.to_f / @model_count * 100.0 if @model_count > 0
29
+ @logger.debug "took #{batch_time} to index last #{rec_count} records. #{records_waiting} records to go. Avg time per record: #{avg_time_per_record}"
30
+ remaining_time = avg_time_per_record * records_waiting
31
+ @logger.info "#{@reindex ? 're' : 'bulk '}index model #{@model.name} : #{'%.2f' % @work_done}% complete : #{'%.2f' % remaining_time} secs to finish"
32
+ end
33
+
34
+ def measure_time
35
+ t1 = Time.now
36
+ yield
37
+ Time.now - t1
38
+ end
39
+
40
+ protected
41
+
42
+ def avg_time_per_record
43
+ if @indexed_records > 0
44
+ @total_time / @indexed_records
45
+ else
46
+ 0
47
+ end
48
+ end
49
+
50
+ def records_waiting
51
+ @model_count - @indexed_records
52
+ end
53
+
54
+ end
55
+
56
+ end
@@ -0,0 +1,279 @@
1
+ module ActsAsFerret
2
+
3
+ module ClassMethods
4
+
5
+ # Disables ferret index updates for this model. When a block is given,
6
+ # Ferret will be re-enabled again after executing the block.
7
+ def disable_ferret
8
+ aaf_configuration[:enabled] = false
9
+ if block_given?
10
+ yield
11
+ enable_ferret
12
+ end
13
+ end
14
+
15
+ def enable_ferret
16
+ aaf_configuration[:enabled] = true
17
+ end
18
+
19
+ def ferret_enabled?
20
+ aaf_configuration[:enabled]
21
+ end
22
+
23
+ # rebuild the index from all data stored for this model, and any other
24
+ # model classes associated with the same index.
25
+ # This is called automatically when no index exists yet.
26
+ #
27
+ def rebuild_index
28
+ aaf_index.rebuild_index
29
+ end
30
+
31
+ # re-index a number records specified by the given ids. Use for large
32
+ # indexing jobs i.e. after modifying a lot of records with Ferret disabled.
33
+ # Please note that the state of Ferret (enabled or disabled at class or
34
+ # record level) is not checked by this method, so if you need to do so
35
+ # (e.g. because of a custom ferret_enabled? implementation), you have to do
36
+ # so yourself.
37
+ def bulk_index(*ids)
38
+ options = Hash === ids.last ? ids.pop : {}
39
+ ids = ids.first if ids.size == 1 && ids.first.is_a?(Enumerable)
40
+ aaf_index.bulk_index(self.name, ids, options)
41
+ end
42
+
43
+ # true if our db and table appear to be suitable for the mysql fast batch
44
+ # hack (see
45
+ # http://weblog.jamisbuck.org/2007/4/6/faking-cursors-in-activerecord)
46
+ def use_fast_batches?
47
+ if connection.class.name =~ /Mysql/ && primary_key == 'id' && aaf_configuration[:mysql_fast_batches]
48
+ logger.info "using mysql specific batched find :all. Turn off with :mysql_fast_batches => false if you encounter problems (i.e. because of non-integer UUIDs in the id column)"
49
+ true
50
+ end
51
+ end
52
+
53
+ # Returns all records modified or created after the specified time.
54
+ # Used by the rake rebuild task to find models that need to be updated in
55
+ # the index after the rebuild finished because they changed while the
56
+ # rebuild was running.
57
+ # Override if your models don't stick to the created_at/updated_at
58
+ # convention.
59
+ def records_modified_since(time)
60
+ condition = []
61
+ %w(updated_at created_at).each do |col|
62
+ condition << "#{col} >= ?" if column_names.include? col
63
+ end
64
+ if condition.empty?
65
+ logger.warn "#{self.name}: Override records_modified_since(time) to keep the index up to date with records changed during rebuild."
66
+ []
67
+ else
68
+ find :all, :conditions => [ condition.join(' AND '), *([time]*condition.size) ]
69
+ end
70
+ end
71
+
72
+ # runs across all records yielding those to be indexed when the index is rebuilt
73
+ def records_for_rebuild(batch_size = 1000)
74
+ transaction do
75
+ if use_fast_batches?
76
+ offset = 0
77
+ while (rows = find :all, :conditions => [ "#{table_name}.id > ?", offset ], :limit => batch_size).any?
78
+ offset = rows.last.id
79
+ yield rows, offset
80
+ end
81
+ else
82
+ order = "#{primary_key} ASC" # fixes #212
83
+ 0.step(self.count, batch_size) do |offset|
84
+ yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ # yields the records with the given ids, in batches of batch_size
91
+ def records_for_bulk_index(ids, batch_size = 1000)
92
+ transaction do
93
+ offset = 0
94
+ ids.each_slice(batch_size) do |id_slice|
95
+ records = find( :all, :conditions => ["id in (?)", id_slice] )
96
+ #yield records, offset
97
+ yield find( :all, :conditions => ["id in (?)", id_slice] ), offset
98
+ offset += batch_size
99
+ end
100
+ end
101
+ end
102
+
103
+ # Retrieve the index instance for this model class. This can either be a
104
+ # LocalIndex, or a RemoteIndex instance.
105
+ #
106
+ def aaf_index
107
+ @index ||= ActsAsFerret::get_index(aaf_configuration[:name])
108
+ end
109
+
110
+ # Finds instances by searching the Ferret index. Terms are ANDed by default, use
111
+ # OR between terms for ORed queries. Or specify +:or_default => true+ in the
112
+ # +:ferret+ options hash of acts_as_ferret.
113
+ #
114
+ # You may either use the +offset+ and +limit+ options to implement your own
115
+ # pagination logic, or use the +page+ and +per_page+ options to use the
116
+ # built in pagination support which is compatible with will_paginate's view
117
+ # helpers. If +page+ and +per_page+ are given, +offset+ and +limit+ will be
118
+ # ignored.
119
+ #
120
+ # == options:
121
+ # page:: page of search results to retrieve
122
+ # per_page:: number of search results that are displayed per page
123
+ # offset:: first hit to retrieve (useful for paging)
124
+ # limit:: number of hits to retrieve, or :all to retrieve
125
+ # all results
126
+ # lazy:: Array of field names whose contents should be read directly
127
+ # from the index. Those fields have to be marked
128
+ # +:store => :yes+ in their field options. Give true to get all
129
+ # stored fields. Note that if you have a shared index, you have
130
+ # to explicitly state the fields you want to fetch, true won't
131
+ # work here)
132
+ #
133
+ # +find_options+ is a hash passed on to active_record's find when
134
+ # retrieving the data from db, useful to i.e. prefetch relationships with
135
+ # :include or to specify additional filter criteria with :conditions (only string and array syntax supported).
136
+ # You can also call find_with_ferret inside named or dynamic scopes, if you like the conditions hash syntax more.
137
+ #
138
+ # This method returns a +SearchResults+ instance, which really is an Array that has
139
+ # been decorated with a total_hits attribute holding the total number of hits.
140
+ # Additionally, SearchResults is compatible with the pagination helper
141
+ # methods of the will_paginate plugin.
142
+ #
143
+ # Please keep in mind that the number of results delivered might be less than
144
+ # +limit+ if you specify any active record conditions that further limit
145
+ # the result. Use +limit+ and +offset+ as AR find_options instead.
146
+ # +page+ and +per_page+ are supposed to work regardless of any
147
+ # +conditions+ present in +find_options+.
148
+ def find_with_ferret(q, options = {}, find_options = {})
149
+ if respond_to?(:scope) && scope(:find, :conditions)
150
+ find_options[:conditions] ||= '1=1' # treat external scope the same as if :conditions present (i.e. when it comes to counting results)
151
+ end
152
+ return ActsAsFerret::find q, self, options, find_options
153
+ end
154
+
155
+
156
+ # Returns the total number of hits for the given query
157
+ #
158
+ # Note that since we don't query the database here, this method won't deliver
159
+ # the expected results when used on an AR association.
160
+ #
161
+ def total_hits(q, options={})
162
+ aaf_index.total_hits(q, options)
163
+ end
164
+
165
+ # Finds instance model name, ids and scores by contents.
166
+ # Useful e.g. if you want to search across models or do not want to fetch
167
+ # all result records (yet).
168
+ #
169
+ # Options are the same as for find_with_ferret
170
+ #
171
+ # A block can be given too, it will be executed with every result:
172
+ # find_ids_with_ferret(q, options) do |model, id, score|
173
+ # id_array << id
174
+ # scores_by_id[id] = score
175
+ # end
176
+ # NOTE: in case a block is given, only the total_hits value will be returned
177
+ # instead of the [total_hits, results] array!
178
+ #
179
+ def find_ids_with_ferret(q, options = {}, &block)
180
+ aaf_index.find_ids(q, options, &block)
181
+ end
182
+
183
+ # An implementation of http://rm.jkraemer.net/issues/show/161
184
+ def highlight(id, query, options = {})
185
+ aaf_index.highlight(id, query, options)
186
+ end
187
+
188
+ def document_number(id)
189
+ aaf_index.document_number(id)
190
+ end
191
+
192
+ def query_for_record(id)
193
+ aaf_index.query_for_record(id)
194
+ end
195
+
196
+ protected
197
+
198
+ # def find_records_lazy_or_not(q, options = {}, find_options = {})
199
+ # if options[:lazy]
200
+ # logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
201
+ # lazy_find_by_contents q, options
202
+ # else
203
+ # ar_find_by_contents q, options, find_options
204
+ # end
205
+ # end
206
+ #
207
+ # def ar_find_by_contents(q, options = {}, find_options = {})
208
+ # result_ids = {}
209
+ # total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
210
+ # # stores ids, index and score of each hit for later ordering of
211
+ # # results
212
+ # result_ids[id] = [ result_ids.size + 1, score ]
213
+ # end
214
+ #
215
+ # result = ActsAsFerret::retrieve_records( { self.name => result_ids }, find_options )
216
+ #
217
+ # # count total_hits via sql when using conditions or when we're called
218
+ # # from an ActiveRecord association.
219
+ # if find_options[:conditions] or caller.find{ |call| call =~ %r{active_record/associations} }
220
+ # # chances are the ferret result count is not our total_hits value, so
221
+ # # we correct this here.
222
+ # if options[:limit] != :all || options[:page] || options[:offset] || find_options[:limit] || find_options[:offset]
223
+ # # our ferret result has been limited, so we need to re-run that
224
+ # # search to get the full result set from ferret.
225
+ # result_ids = {}
226
+ # find_ids_with_ferret(q, options.update(:limit => :all, :offset => 0)) do |model, id, score, data|
227
+ # result_ids[id] = [ result_ids.size + 1, score ]
228
+ # end
229
+ # # Now ask the database for the total size of the final result set.
230
+ # total_hits = count_records( { self.name => result_ids }, find_options )
231
+ # else
232
+ # # what we got from the database is our full result set, so take
233
+ # # it's size
234
+ # total_hits = result.length
235
+ # end
236
+ # end
237
+ #
238
+ # [ total_hits, result ]
239
+ # end
240
+ #
241
+ # def lazy_find_by_contents(q, options = {})
242
+ # logger.debug "lazy_find_by_contents: #{q}"
243
+ # result = []
244
+ # rank = 0
245
+ # total_hits = find_ids_with_ferret(q, options) do |model, id, score, data|
246
+ # logger.debug "model: #{model}, id: #{id}, data: #{data}"
247
+ # result << FerretResult.new(model, id, score, rank += 1, data)
248
+ # end
249
+ # [ total_hits, result ]
250
+ # end
251
+
252
+
253
+ def model_find(model, id, find_options = {})
254
+ model.constantize.find(id, find_options)
255
+ end
256
+
257
+
258
+ # def count_records(id_arrays, find_options = {})
259
+ # count_options = find_options.dup
260
+ # count_options.delete :limit
261
+ # count_options.delete :offset
262
+ # count = 0
263
+ # id_arrays.each do |model, id_array|
264
+ # next if id_array.empty?
265
+ # model = model.constantize
266
+ # # merge conditions
267
+ # conditions = ActsAsFerret::combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ],
268
+ # find_options[:conditions])
269
+ # opts = find_options.merge :conditions => conditions
270
+ # opts.delete :limit; opts.delete :offset
271
+ # count += model.count opts
272
+ # end
273
+ # count
274
+ # end
275
+
276
+ end
277
+
278
+ end
279
+
@@ -0,0 +1,192 @@
1
+ module Ferret
2
+
3
+ module Analysis
4
+
5
+ # = PerFieldAnalyzer
6
+ #
7
+ # This PerFieldAnalyzer is a workaround to a memory leak in
8
+ # ferret 0.11.4. It does basically do the same as the original
9
+ # Ferret::Analysis::PerFieldAnalyzer, but without the leak :)
10
+ #
11
+ # http://ferret.davebalmain.com/api/classes/Ferret/Analysis/PerFieldAnalyzer.html
12
+ #
13
+ # Thanks to Ben from omdb.org for tracking this down and creating this
14
+ # workaround.
15
+ # You can read more about the issue there:
16
+ # http://blog.omdb-beta.org/2007/7/29/tracking-down-a-memory-leak-in-ferret-0-11-4
17
+ class PerFieldAnalyzer < ::Ferret::Analysis::Analyzer
18
+ def initialize( default_analyzer = StandardAnalyzer.new )
19
+ @analyzers = {}
20
+ @default_analyzer = default_analyzer
21
+ end
22
+
23
+ def add_field( field, analyzer )
24
+ @analyzers[field] = analyzer
25
+ end
26
+ alias []= add_field
27
+
28
+ def token_stream(field, string)
29
+ @analyzers.has_key?(field) ? @analyzers[field].token_stream(field, string) :
30
+ @default_analyzer.token_stream(field, string)
31
+ end
32
+ end
33
+ end
34
+
35
+ class Index::Index
36
+ attr_accessor :batch_size, :logger
37
+
38
+ def index_models(models)
39
+ models.each { |model| index_model model }
40
+ flush
41
+ optimize
42
+ close
43
+ ActsAsFerret::close_multi_indexes
44
+ end
45
+
46
+ def index_model(model)
47
+ bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
48
+ :model => model, :index => self, :reindex => true)
49
+ logger.info "reindexing model #{model.name}"
50
+
51
+ model.records_for_rebuild(@batch_size) do |records, offset|
52
+ bulk_indexer.index_records(records, offset)
53
+ end
54
+ end
55
+
56
+ def bulk_index(model, ids, options = {})
57
+ options.reverse_merge! :optimize => true
58
+ orig_flush = @auto_flush
59
+ @auto_flush = false
60
+ bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
61
+ :model => model, :index => self, :total => ids.size)
62
+ model.records_for_bulk_index(ids, @batch_size) do |records, offset|
63
+ logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
64
+ bulk_indexer.index_records(records, offset)
65
+ end
66
+ logger.info 'finishing bulk index...'
67
+ flush
68
+ if options[:optimize]
69
+ logger.info 'optimizing...'
70
+ optimize
71
+ end
72
+ @auto_flush = orig_flush
73
+ end
74
+
75
+
76
+ # bulk-inserts a number of ferret documents.
77
+ # The argument has to be an array of two-element arrays each holding the document data and the analyzer to
78
+ # use for this document (which may be nil).
79
+ def update_batch(document_analyzer_pairs)
80
+ ids = document_analyzer_pairs.collect {|da| da.first[@id_field] }
81
+ @dir.synchronize do
82
+ batch_delete(ids)
83
+ ensure_writer_open()
84
+ document_analyzer_pairs.each do |doc, analyzer|
85
+ if analyzer
86
+ old_analyzer = @writer.analyzer
87
+ @writer.analyzer = analyzer
88
+ @writer.add_document(doc)
89
+ @writer.analyzer = old_analyzer
90
+ else
91
+ @writer.add_document(doc)
92
+ end
93
+ end
94
+ flush()
95
+ end
96
+ end
97
+
98
+ # search for the first document with +arg+ in the +id+ field and return it's internal document number.
99
+ # The +id+ field is either :id or whatever you set
100
+ # :id_field parameter to when you create the Index object.
101
+ def doc_number(id)
102
+ @dir.synchronize do
103
+ ensure_reader_open()
104
+ term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
105
+ return term_doc_enum.next? ? term_doc_enum.doc : nil
106
+ end
107
+ end
108
+
109
+ private
110
+
111
+
112
+ # If +docs+ is a Hash or an Array then a batch delete will be performed.
113
+ # If +docs+ is an Array then it will be considered an array of +id+'s. If
114
+ # it is a Hash, then its keys will be used instead as the Array of
115
+ # document +id+'s. If the +id+ is an Integers then it is considered a
116
+ # Ferret document number and the corresponding document will be deleted.
117
+ # If the +id+ is a String or a Symbol then the +id+ will be considered a
118
+ # term and the documents that contain that term in the +:id_field+ will
119
+ # be deleted.
120
+ #
121
+ # docs:: An Array of docs to be deleted, or a Hash (in which case the keys
122
+ # are used)
123
+ #
124
+ # ripped from Ferret trunk.
125
+ def batch_delete(docs)
126
+ docs = docs.keys if docs.is_a?(Hash)
127
+ raise ArgumentError, "must pass Array or Hash" unless docs.is_a? Array
128
+ ids = []
129
+ terms = []
130
+ docs.each do |doc|
131
+ case doc
132
+ when String then terms << doc
133
+ when Symbol then terms << doc.to_s
134
+ when Integer then ids << doc
135
+ else
136
+ raise ArgumentError, "Cannot delete for arg of type #{id.class}"
137
+ end
138
+ end
139
+ if ids.size > 0
140
+ ensure_reader_open
141
+ ids.each {|id| @reader.delete(id)}
142
+ end
143
+ if terms.size > 0
144
+ ensure_writer_open()
145
+ terms.each { |t| @writer.delete(@id_field, t) }
146
+ # TODO with Ferret trunk this would work:
147
+ # @writer.delete(@id_field, terms)
148
+ end
149
+ return self
150
+ end
151
+
152
+ end
153
+
154
+ # add marshalling support to SortFields
155
+ class Search::SortField
156
+ def _dump(depth)
157
+ to_s
158
+ end
159
+
160
+ def self._load(string)
161
+ case string
162
+ when /<DOC(_ID)?>!/ then Ferret::Search::SortField::DOC_ID_REV
163
+ when /<DOC(_ID)?>/ then Ferret::Search::SortField::DOC_ID
164
+ when '<SCORE>!' then Ferret::Search::SortField::SCORE_REV
165
+ when '<SCORE>' then Ferret::Search::SortField::SCORE
166
+ when /^(\w+):<(\w+)>(!)?$/ then new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
167
+ else raise "invalid value: #{string}"
168
+ end
169
+ end
170
+ end
171
+
172
+ # add marshalling support to Sort
173
+ class Search::Sort
174
+ def _dump(depth)
175
+ to_s
176
+ end
177
+
178
+ def self._load(string)
179
+ # we exclude the last <DOC> sorting as it is appended by new anyway
180
+ if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/
181
+ sort_fields = $1.split(',').map do |value|
182
+ value.strip!
183
+ Ferret::Search::SortField._load value unless value.blank?
184
+ end
185
+ new sort_fields.compact
186
+ else
187
+ raise "invalid value: #{string}"
188
+ end
189
+ end
190
+ end
191
+
192
+ end