ebeigarts-thinking-sphinx 1.1.22 → 1.2.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/README.textile +14 -0
  2. data/VERSION.yml +4 -0
  3. data/lib/thinking_sphinx.rb +60 -64
  4. data/lib/thinking_sphinx/active_record.rb +35 -7
  5. data/lib/thinking_sphinx/active_record/scopes.rb +39 -0
  6. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +3 -2
  7. data/lib/thinking_sphinx/attribute.rb +62 -22
  8. data/lib/thinking_sphinx/configuration.rb +21 -1
  9. data/lib/thinking_sphinx/core/array.rb +7 -0
  10. data/lib/thinking_sphinx/deltas/delayed_delta.rb +3 -0
  11. data/lib/thinking_sphinx/deploy/capistrano.rb +26 -8
  12. data/lib/thinking_sphinx/excerpter.rb +22 -0
  13. data/lib/thinking_sphinx/facet.rb +8 -2
  14. data/lib/thinking_sphinx/facet_search.rb +134 -0
  15. data/lib/thinking_sphinx/index.rb +2 -2
  16. data/lib/thinking_sphinx/index/builder.rb +0 -1
  17. data/lib/thinking_sphinx/property.rb +2 -0
  18. data/lib/thinking_sphinx/rails_additions.rb +14 -0
  19. data/lib/thinking_sphinx/search.rb +633 -671
  20. data/lib/thinking_sphinx/search_methods.rb +421 -0
  21. data/lib/thinking_sphinx/source.rb +5 -5
  22. data/lib/thinking_sphinx/source/internal_properties.rb +1 -1
  23. data/lib/thinking_sphinx/source/sql.rb +10 -8
  24. data/lib/thinking_sphinx/tasks.rb +14 -9
  25. data/spec/{unit → lib}/thinking_sphinx/active_record/delta_spec.rb +1 -1
  26. data/spec/{unit → lib}/thinking_sphinx/active_record/has_many_association_spec.rb +0 -0
  27. data/spec/lib/thinking_sphinx/active_record/scopes_spec.rb +96 -0
  28. data/spec/{unit → lib}/thinking_sphinx/active_record_spec.rb +44 -5
  29. data/spec/{unit → lib}/thinking_sphinx/association_spec.rb +0 -0
  30. data/spec/{unit → lib}/thinking_sphinx/attribute_spec.rb +110 -3
  31. data/spec/{unit → lib}/thinking_sphinx/configuration_spec.rb +87 -41
  32. data/spec/lib/thinking_sphinx/core/array_spec.rb +9 -0
  33. data/spec/{unit → lib}/thinking_sphinx/core/string_spec.rb +0 -0
  34. data/spec/lib/thinking_sphinx/excerpter_spec.rb +49 -0
  35. data/spec/lib/thinking_sphinx/facet_search_spec.rb +176 -0
  36. data/spec/{unit → lib}/thinking_sphinx/facet_spec.rb +34 -15
  37. data/spec/{unit → lib}/thinking_sphinx/field_spec.rb +0 -0
  38. data/spec/{unit → lib}/thinking_sphinx/index/builder_spec.rb +100 -0
  39. data/spec/{unit → lib}/thinking_sphinx/index/faux_column_spec.rb +0 -0
  40. data/spec/{unit → lib}/thinking_sphinx/index_spec.rb +0 -0
  41. data/spec/{unit → lib}/thinking_sphinx/rails_additions_spec.rb +12 -0
  42. data/spec/lib/thinking_sphinx/search_methods_spec.rb +152 -0
  43. data/spec/lib/thinking_sphinx/search_spec.rb +1066 -0
  44. data/spec/{unit → lib}/thinking_sphinx/source_spec.rb +10 -0
  45. data/spec/{unit → lib}/thinking_sphinx_spec.rb +10 -0
  46. data/tasks/distribution.rb +20 -38
  47. data/tasks/testing.rb +3 -1
  48. data/vendor/riddle/lib/riddle.rb +1 -1
  49. data/vendor/riddle/lib/riddle/client.rb +3 -0
  50. data/vendor/riddle/lib/riddle/client/message.rb +4 -3
  51. data/vendor/riddle/lib/riddle/configuration/section.rb +1 -1
  52. data/vendor/riddle/lib/riddle/controller.rb +17 -7
  53. metadata +63 -83
  54. data/lib/thinking_sphinx/active_record/search.rb +0 -57
  55. data/lib/thinking_sphinx/collection.rb +0 -148
  56. data/lib/thinking_sphinx/facet_collection.rb +0 -59
  57. data/lib/thinking_sphinx/search/facets.rb +0 -104
  58. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +0 -107
  59. data/spec/unit/thinking_sphinx/collection_spec.rb +0 -15
  60. data/spec/unit/thinking_sphinx/facet_collection_spec.rb +0 -64
  61. data/spec/unit/thinking_sphinx/search_spec.rb +0 -228
@@ -159,7 +159,9 @@ module ThinkingSphinx
159
159
  # messy dependencies issues).
160
160
  #
161
161
  def load_models
162
- return if defined?(Rails) && Rails.configuration.cache_classes
162
+ return if defined?(Rails) &&
163
+ Rails.configuration.cache_classes &&
164
+ Rails::VERSION::STRING.to_f > 2.1
163
165
 
164
166
  self.model_directories.each do |base|
165
167
  Dir["#{base}**/*.rb"].each do |file|
@@ -233,6 +235,24 @@ module ThinkingSphinx
233
235
  end
234
236
  end
235
237
 
238
+ def client
239
+ client = Riddle::Client.new address, port
240
+ client.max_matches = configuration.searchd.max_matches || 1000
241
+ client
242
+ end
243
+
244
+ def models_by_crc
245
+ @models_by_crc ||= begin
246
+ ThinkingSphinx.indexed_models.inject({}) do |hash, model|
247
+ hash[model.constantize.to_crc32] = model
248
+ Object.subclasses_of(model.constantize).each { |subclass|
249
+ hash[subclass.to_crc32] = subclass.name
250
+ }
251
+ hash
252
+ end
253
+ end
254
+ end
255
+
236
256
  private
237
257
 
238
258
  # Parse the config/sphinx.yml file - if it exists - then use the attribute
@@ -0,0 +1,7 @@
1
+ module SearchAsArray
2
+ def ===(object)
3
+ object.is_a?(ThinkingSphinx::Search) || super
4
+ end
5
+ end
6
+
7
+ Array.extend SearchAsArray
@@ -8,6 +8,9 @@ module ThinkingSphinx
8
8
  module Deltas
9
9
  class DelayedDelta < ThinkingSphinx::Deltas::DefaultDelta
10
10
  def index(model, instance = nil)
11
+ return true unless ThinkingSphinx.updates_enabled? && ThinkingSphinx.deltas_enabled?
12
+ return true if instance && !toggled(instance)
13
+
11
14
  ThinkingSphinx::Deltas::Job.enqueue(
12
15
  ThinkingSphinx::Deltas::DeltaJob.new(delta_index_name(model)),
13
16
  ThinkingSphinx::Configuration.instance.delayed_job_priority
@@ -1,11 +1,26 @@
1
1
  Capistrano::Configuration.instance(:must_exist).load do
2
2
  namespace :thinking_sphinx do
3
3
  namespace :install do
4
- desc "Install Sphinx by source"
4
+ desc <<-DESC
5
+ Install Sphinx by source
6
+
7
+ If Postgres is available, Sphinx will use it.
8
+
9
+ If the variable :thinking_sphinx_configure_args is set, it will
10
+ be passed to the Sphinx configure script. You can use this to
11
+ install Sphinx in a non-standard location:
12
+
13
+ set :thinking_sphinx_configure_args, "--prefix=$HOME/software"
14
+ DESC
15
+
5
16
  task :sphinx do
6
17
  with_postgres = false
7
- run "which pg_config" do |channel, stream, data|
8
- with_postgres = !(data.nil? || data == "")
18
+ begin
19
+ run "which pg_config" do |channel, stream, data|
20
+ with_postgres = !(data.nil? || data == "")
21
+ end
22
+ rescue Capistrano::CommandError => e
23
+ puts "Continuing despite error: #{e.message}"
9
24
  end
10
25
 
11
26
  args = []
@@ -14,14 +29,15 @@ Capistrano::Configuration.instance(:must_exist).load do
14
29
  args << "--with-pgsql=#{data}"
15
30
  end
16
31
  end
17
-
32
+ args << fetch(:thinking_sphinx_configure_args, '')
33
+
18
34
  commands = <<-CMD
19
35
  wget -q http://www.sphinxsearch.com/downloads/sphinx-0.9.8.1.tar.gz >> sphinx.log
20
36
  tar xzvf sphinx-0.9.8.1.tar.gz
21
37
  cd sphinx-0.9.8.1
22
38
  ./configure #{args.join(" ")}
23
39
  make
24
- sudo make install
40
+ #{try_sudo} make install
25
41
  rm -rf sphinx-0.9.8.1 sphinx-0.9.8.1.tar.gz
26
42
  CMD
27
43
  run commands.split(/\n\s+/).join(" && ")
@@ -29,7 +45,7 @@ Capistrano::Configuration.instance(:must_exist).load do
29
45
 
30
46
  desc "Install Thinking Sphinx as a gem from GitHub"
31
47
  task :ts do
32
- sudo "gem install freelancing-god-thinking-sphinx --source http://gems.github.com"
48
+ run "#{try_sudo} gem install freelancing-god-thinking-sphinx --source http://gems.github.com"
33
49
  end
34
50
  end
35
51
 
@@ -70,12 +86,14 @@ Capistrano::Configuration.instance(:must_exist).load do
70
86
 
71
87
  desc "Add the shared folder for sphinx files for the production environment"
72
88
  task :shared_sphinx_folder, :roles => :web do
73
- sudo "mkdir -p #{shared_path}/db/sphinx/production"
89
+ run "mkdir -p #{shared_path}/db/sphinx/production"
74
90
  end
75
91
 
76
92
  def rake(*tasks)
93
+ rails_env = fetch(:rails_env, "production")
94
+ rake = fetch(:rake, "rake")
77
95
  tasks.each do |t|
78
- run "cd #{current_path} && rake #{t} RAILS_ENV=production"
96
+ run "cd #{current_path}; #{rake} RAILS_ENV=#{rails_env} #{t}"
79
97
  end
80
98
  end
81
99
  end
@@ -0,0 +1,22 @@
1
+ module ThinkingSphinx
2
+ class Excerpter
3
+ CoreMethods = %w( kind_of? object_id respond_to? should should_not stub! )
4
+ # Hide most methods, to allow them to be passed through to the instance.
5
+ instance_methods.select { |method|
6
+ method.to_s[/^__/].nil? && !CoreMethods.include?(method.to_s)
7
+ }.each { |method|
8
+ undef_method method
9
+ }
10
+
11
+ def initialize(search, instance)
12
+ @search = search
13
+ @instance = instance
14
+ end
15
+
16
+ def method_missing(method, *args, &block)
17
+ string = @instance.send(method, *args, &block).to_s
18
+
19
+ @search.excerpt_for(string, @instance.class)
20
+ end
21
+ end
22
+ end
@@ -72,7 +72,7 @@ module ThinkingSphinx
72
72
  end
73
73
 
74
74
  def value(object, attribute_value)
75
- return translate(object, attribute_value) if translate?
75
+ return translate(object, attribute_value) if translate? || float?
76
76
 
77
77
  case @property.type
78
78
  when :datetime
@@ -95,7 +95,9 @@ module ThinkingSphinx
95
95
  return nil unless object = object.send(method)
96
96
  }
97
97
  if object.is_a?(Array)
98
- object.collect { |item| item.send(column.__name) }
98
+ object.collect { |item| item.send(column.__name) }.detect { |item|
99
+ item.to_crc32 == attribute_value
100
+ }
99
101
  else
100
102
  object.send(column.__name)
101
103
  end
@@ -104,5 +106,9 @@ module ThinkingSphinx
104
106
  def column
105
107
  @property.columns.first
106
108
  end
109
+
110
+ def float?
111
+ @property.type == :float
112
+ end
107
113
  end
108
114
  end
@@ -0,0 +1,134 @@
1
+ module ThinkingSphinx
2
+ class FacetSearch < Hash
3
+ attr_accessor :args, :options
4
+
5
+ def initialize(*args)
6
+ @options = args.extract_options!
7
+ @args = args
8
+
9
+ set_default_options
10
+
11
+ populate
12
+ end
13
+
14
+ def for(hash = {})
15
+ for_options = {:with => {}}.merge(options)
16
+
17
+ hash.each do |key, value|
18
+ attrib = ThinkingSphinx::Facet.attribute_name_from_value(key, value)
19
+ for_options[:with][attrib] = underlying_value key, value
20
+ end
21
+
22
+ ThinkingSphinx.search *(args + [for_options])
23
+ end
24
+
25
+ def facet_names
26
+ @facet_names ||= begin
27
+ names = options[:all_facets] ?
28
+ facet_names_for_all_classes : facet_names_common_to_all_classes
29
+
30
+ names.delete "class_crc" unless options[:class_facet]
31
+ names
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def set_default_options
38
+ options[:all_facets] ||= false
39
+ if options[:class_facet].nil?
40
+ options[:class_facet] = ((options[:classes] || []).length != 1)
41
+ end
42
+ end
43
+
44
+ def populate
45
+ facet_names.each do |name|
46
+ search_options = facet_search_options.merge(:group_by => name)
47
+ add_from_results name, ThinkingSphinx.search(
48
+ *(args + [search_options])
49
+ )
50
+ end
51
+ end
52
+
53
+ def facet_search_options
54
+ config = ThinkingSphinx::Configuration.instance
55
+ max = config.configuration.searchd.max_matches || 1000
56
+
57
+ options.merge(
58
+ :group_function => :attr,
59
+ :limit => max,
60
+ :max_matches => max,
61
+ :page => 1
62
+ )
63
+ end
64
+
65
+ def facet_classes
66
+ (
67
+ options[:classes] || ThinkingSphinx.indexed_models.collect { |model|
68
+ model.constantize
69
+ }
70
+ ).select { |klass| klass.sphinx_facets.any? }
71
+ end
72
+
73
+ def all_facets
74
+ facet_classes.collect { |klass|
75
+ klass.sphinx_facets
76
+ }.flatten.select { |facet|
77
+ options[:facets].blank? || Array(options[:facets]).include?(facet.name)
78
+ }
79
+ end
80
+
81
+ def facet_names_for_all_classes
82
+ all_facets.group_by { |facet|
83
+ facet.name
84
+ }.collect { |name, facets|
85
+ if facets.collect { |facet| facet.type }.uniq.length > 1
86
+ raise "Facet #{name} exists in more than one model with different types"
87
+ end
88
+ facets.first.attribute_name
89
+ }
90
+ end
91
+
92
+ def facet_names_common_to_all_classes
93
+ facet_names_for_all_classes.select { |name|
94
+ facet_classes.all? { |klass|
95
+ klass.sphinx_facets.detect { |facet|
96
+ facet.attribute_name == name
97
+ }
98
+ }
99
+ }
100
+ end
101
+
102
+ def add_from_results(facet, results)
103
+ name = ThinkingSphinx::Facet.name_for(facet)
104
+
105
+ self[name] ||= {}
106
+
107
+ return if results.empty?
108
+
109
+ facet = facet_from_object(results.first, facet) if facet.is_a?(String)
110
+
111
+ results.each_with_groupby_and_count { |result, group, count|
112
+ facet_value = facet.value(result, group)
113
+
114
+ self[name][facet_value] ||= 0
115
+ self[name][facet_value] += count
116
+ }
117
+ end
118
+
119
+ def underlying_value(key, value)
120
+ case value
121
+ when Array
122
+ value.collect { |item| underlying_value(key, item) }
123
+ when String
124
+ value.to_crc32
125
+ else
126
+ value
127
+ end
128
+ end
129
+
130
+ def facet_from_object(object, name)
131
+ object.sphinx_facets.detect { |facet| facet.attribute_name == name }
132
+ end
133
+ end
134
+ end
@@ -41,10 +41,10 @@ module ThinkingSphinx
41
41
  end
42
42
 
43
43
  def name
44
- self.class.name(@model)
44
+ self.class.name_for @model
45
45
  end
46
46
 
47
- def self.name(model)
47
+ def self.name_for(model)
48
48
  model.name.underscore.tr(':/\\', '_')
49
49
  end
50
50
 
@@ -198,7 +198,6 @@ module ThinkingSphinx
198
198
  # set_property :delta => true
199
199
  # set_property :field_weights => {"name" => 100}
200
200
  # set_property :order => "name ASC"
201
- # set_property :include => :picture
202
201
  # set_property :select => 'name'
203
202
  #
204
203
  # Also, the following two properties are particularly relevant for
@@ -14,6 +14,8 @@ module ThinkingSphinx
14
14
  @faceted = options[:facet]
15
15
  @admin = options[:admin]
16
16
 
17
+ @alias = @alias.to_sym unless @alias.blank?
18
+
17
19
  @columns.each { |col|
18
20
  @associations[col] = association_stack(col.__stack.clone).each { |assoc|
19
21
  assoc.join_to(source.base)
@@ -134,3 +134,17 @@ end
134
134
  Class.extend(
135
135
  ThinkingSphinx::ClassAttributeMethods
136
136
  ) unless Class.respond_to?(:cattr_reader)
137
+
138
+ module ThinkingSphinx
139
+ module MetaClass
140
+ def metaclass
141
+ class << self
142
+ self
143
+ end
144
+ end
145
+ end
146
+ end
147
+
148
+ unless Object.new.respond_to?(:metaclass)
149
+ Object.send(:include, ThinkingSphinx::MetaClass)
150
+ end
@@ -1,5 +1,4 @@
1
- require 'thinking_sphinx/search/facets'
2
-
1
+ # encoding: UTF-8
3
2
  module ThinkingSphinx
4
3
  # Once you've got those indexes in and built, this is the stuff that
5
4
  # matters - how to search! This class provides a generic search
@@ -9,718 +8,681 @@ module ThinkingSphinx
9
8
  # called from a model.
10
9
  #
11
10
  class Search
12
- GlobalFacetOptions = {
13
- :all_attributes => false,
14
- :class_facet => true
11
+ CoreMethods = %w( == class class_eval extend frozen? id instance_eval
12
+ instance_of? instance_values instance_variable_defined?
13
+ instance_variable_get instance_variable_set instance_variables is_a?
14
+ kind_of? member? method methods nil? object_id respond_to? send should
15
+ type )
16
+ SafeMethods = %w( partition private_methods protected_methods
17
+ public_methods send )
18
+
19
+ instance_methods.select { |method|
20
+ method.to_s[/^__/].nil? && !CoreMethods.include?(method.to_s)
21
+ }.each { |method|
22
+ undef_method method
15
23
  }
16
24
 
17
- class << self
18
- include ThinkingSphinx::Search::Facets
25
+ HashOptions = [:conditions, :with, :without, :with_all]
26
+ ArrayOptions = [:classes, :without_ids]
27
+
28
+ attr_reader :args, :options
29
+
30
+ # Deprecated. Use ThinkingSphinx.search
31
+ def self.search(*args)
32
+ log 'ThinkingSphinx::Search.search is deprecated. Please use ThinkingSphinx.search instead.'
33
+ ThinkingSphinx.search *args
34
+ end
35
+
36
+ # Deprecated. Use ThinkingSphinx.search_for_ids
37
+ def self.search_for_ids(*args)
38
+ log 'ThinkingSphinx::Search.search_for_ids is deprecated. Please use ThinkingSphinx.search_for_ids instead.'
39
+ ThinkingSphinx.search_for_ids *args
40
+ end
41
+
42
+ # Deprecated. Use ThinkingSphinx.search_for_ids
43
+ def self.search_for_id(*args)
44
+ log 'ThinkingSphinx::Search.search_for_id is deprecated. Please use ThinkingSphinx.search_for_id instead.'
45
+ ThinkingSphinx.search_for_id *args
46
+ end
47
+
48
+ # Deprecated. Use ThinkingSphinx.count
49
+ def self.count(*args)
50
+ log 'ThinkingSphinx::Search.count is deprecated. Please use ThinkingSphinx.count instead.'
51
+ ThinkingSphinx.count *args
52
+ end
53
+
54
+ # Deprecated. Use ThinkingSphinx.facets
55
+ def self.facets(*args)
56
+ log 'ThinkingSphinx::Search.facets is deprecated. Please use ThinkingSphinx.facets instead.'
57
+ ThinkingSphinx.facets *args
58
+ end
59
+
60
+ def initialize(*args)
61
+ @array = []
62
+ @options = args.extract_options!
63
+ @args = args
64
+ end
65
+
66
+ def to_a
67
+ populate
68
+ @array
69
+ end
70
+
71
+ # Indication of whether the request has been made to Sphinx for the search
72
+ # query.
73
+ #
74
+ # @return [Boolean] true if the results have been requested.
75
+ #
76
+ def populated?
77
+ !!@populated
78
+ end
79
+
80
+ # The query result hash from Riddle.
81
+ #
82
+ # @return [Hash] Raw Sphinx results
83
+ #
84
+ def results
85
+ populate
86
+ @results
87
+ end
88
+
89
+ def method_missing(method, *args, &block)
90
+ if is_scope?(method)
91
+ add_scope(method, *args, &block)
92
+ return self
93
+ elsif method.to_s[/^each_with_.*/].nil? && !@array.respond_to?(method)
94
+ super
95
+ elsif !SafeMethods.include?(method.to_s)
96
+ populate
97
+ end
19
98
 
20
- # Searches for results that match the parameters provided. Will only
21
- # return the ids for the matching objects. See #search for syntax
22
- # examples.
23
- #
24
- # Note that this only searches the Sphinx index, with no ActiveRecord
25
- # queries. Thus, if your index is not in sync with the database, this
26
- # method may return ids that no longer exist there.
27
- #
28
- def search_for_ids(*args)
29
- results, client = search_results(*args.clone)
30
-
31
- options = args.extract_options!
32
- page = options[:page] ? options[:page].to_i : 1
33
-
34
- ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options)
99
+ if method.to_s[/^each_with_.*/] && !@array.respond_to?(method)
100
+ each_with_attribute method.to_s.gsub(/^each_with_/, ''), &block
101
+ else
102
+ @array.send(method, *args, &block)
35
103
  end
36
-
37
- # Searches through the Sphinx indexes for relevant matches. There's
38
- # various ways to search, sort, group and filter - which are covered
39
- # below.
40
- #
41
- # Also, if you have WillPaginate installed, the search method can be used
42
- # just like paginate. The same parameters - :page and :per_page - work as
43
- # expected, and the returned result set can be used by the will_paginate
44
- # helper.
45
- #
46
- # == Basic Searching
47
- #
48
- # The simplest way of searching is straight text.
49
- #
50
- # ThinkingSphinx::Search.search "pat"
51
- # ThinkingSphinx::Search.search "google"
52
- # User.search "pat", :page => (params[:page] || 1)
53
- # Article.search "relevant news issue of the day"
54
- #
55
- # If you specify :include, like in an #find call, this will be respected
56
- # when loading the relevant models from the search results.
57
- #
58
- # User.search "pat", :include => :posts
59
- #
60
- # == Match Modes
61
- #
62
- # Sphinx supports 5 different matching modes. By default Thinking Sphinx
63
- # uses :all, which unsurprisingly requires all the supplied search terms
64
- # to match a result.
65
- #
66
- # Alternative modes include:
67
- #
68
- # User.search "pat allan", :match_mode => :any
69
- # User.search "pat allan", :match_mode => :phrase
70
- # User.search "pat | allan", :match_mode => :boolean
71
- # User.search "@name pat | @username pat", :match_mode => :extended
72
- #
73
- # Any will find results with any of the search terms. Phrase treats the search
74
- # terms a single phrase instead of individual words. Boolean and extended allow
75
- # for more complex query syntax, refer to the sphinx documentation for further
76
- # details.
77
- #
78
- # == Weighting
79
- #
80
- # Sphinx has support for weighting, where matches in one field can be considered
81
- # more important than in another. Weights are integers, with 1 as the default.
82
- # They can be set per-search like this:
83
- #
84
- # User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 }
85
- #
86
- # If you're searching multiple models, you can set per-index weights:
87
- #
88
- # ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 }
89
- #
90
- # See http://sphinxsearch.com/doc.html#weighting for further details.
91
- #
92
- # == Searching by Fields
93
- #
94
- # If you want to step it up a level, you can limit your search terms to
95
- # specific fields:
96
- #
97
- # User.search :conditions => {:name => "pat"}
98
- #
99
- # This uses Sphinx's extended match mode, unless you specify a different
100
- # match mode explicitly (but then this way of searching won't work). Also
101
- # note that you don't need to put in a search string.
102
- #
103
- # == Searching by Attributes
104
- #
105
- # Also known as filters, you can limit your searches to documents that
106
- # have specific values for their attributes. There are three ways to do
107
- # this. The first two techniques work in all scenarios - using the :with
108
- # or :with_all options.
109
- #
110
- # ThinkingSphinx::Search.search :with => {:tag_ids => 10}
111
- # ThinkingSphinx::Search.search :with => {:tag_ids => [10,12]}
112
- # ThinkingSphinx::Search.search :with_all => {:tag_ids => [10,12]}
113
- #
114
- # The first :with search will match records with a tag_id attribute of 10.
115
- # The second :with will match records with a tag_id attribute of 10 OR 12.
116
- # If you need to find records that are tagged with ids 10 AND 12, you
117
- # will need to use the :with_all search parameter. This is particuarly
118
- # useful in conjunction with Multi Value Attributes (MVAs).
119
- #
120
- # The third filtering technique is only viable if you're searching with a
121
- # specific model (not multi-model searching). With a single model,
122
- # Thinking Sphinx can figure out what attributes and fields are available,
123
- # so you can put it all in the :conditions hash, and it will sort it out.
124
- #
125
- # Node.search :conditions => {:parent_id => 10}
126
- #
127
- # Filters can be single values, arrays of values, or ranges.
128
- #
129
- # Article.search "East Timor", :conditions => {:rating => 3..5}
130
- #
131
- # == Excluding by Attributes
132
- #
133
- # Sphinx also supports negative filtering - where the filters are of
134
- # attribute values to exclude. This is done with the :without option:
135
- #
136
- # User.search :without => {:role_id => 1}
137
- #
138
- # == Excluding by Primary Key
139
- #
140
- # There is a shortcut to exclude records by their ActiveRecord primary key:
141
- #
142
- # User.search :without_ids => 1
143
- #
144
- # Pass an array or a single value.
145
- #
146
- # The primary key must be an integer as a negative filter is used. Note
147
- # that for multi-model search, an id may occur in more than one model.
148
- #
149
- # == Infix (Star) Searching
150
- #
151
- # By default, Sphinx uses English stemming, e.g. matching "shoes" if you
152
- # search for "shoe". It won't find "Melbourne" if you search for
153
- # "elbourn", though.
154
- #
155
- # Enable infix searching by something like this in config/sphinx.yml:
156
- #
157
- # development:
158
- # enable_star: 1
159
- # min_infix_length: 2
160
- #
161
- # Note that this will make indexing take longer.
162
- #
163
- # With those settings (and after reindexing), wildcard asterisks can be used
164
- # in queries:
165
- #
166
- # Location.search "*elbourn*"
167
- #
168
- # To automatically add asterisks around every token (but not operators),
169
- # pass the :star option:
170
- #
171
- # Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean
172
- #
173
- # This would become "*elbourn* -*ustrali*". The :star option only adds the
174
- # asterisks. You need to make the config/sphinx.yml changes yourself.
175
- #
176
- # By default, the tokens are assumed to match the regular expression /\w+/u.
177
- # If you've modified the charset_table, pass another regular expression, e.g.
178
- #
179
- # User.search("oo@bar.c", :star => /[\w@.]+/u)
180
- #
181
- # to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*".
182
- #
183
- # == Sorting
184
- #
185
- # Sphinx can only sort by attributes, so generally you will need to avoid
186
- # using field names in your :order option. However, if you're searching
187
- # on a single model, and have specified some fields as sortable, you can
188
- # use those field names and Thinking Sphinx will interpret accordingly.
189
- # Remember: this will only happen for single-model searches, and only
190
- # through the :order option.
191
- #
192
- # Location.search "Melbourne", :order => :state
193
- # User.search :conditions => {:role_id => 2}, :order => "name ASC"
194
- #
195
- # Keep in mind that if you use a string, you *must* specify the direction
196
- # (ASC or DESC) else Sphinx won't return any results. If you use a symbol
197
- # then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
198
- # use the :sort_mode option:
199
- #
200
- # Location.search "Melbourne", :order => :state, :sort_mode => :desc
201
- #
202
- # Of course, there are other sort modes - check out the Sphinx
203
- # documentation[http://sphinxsearch.com/doc.html] for that level of
204
- # detail though.
205
- #
206
- # If desired, you can sort by a column in your model instead of a sphinx
207
- # field or attribute. This sort only applies to the current page, so is
208
- # most useful when performing a search with a single page of results.
209
- #
210
- # User.search("pat", :sql_order => "name")
211
- #
212
- # == Grouping
213
- #
214
- # For this you can use the group_by, group_clause and group_function
215
- # options - which are all directly linked to Sphinx's expectations. No
216
- # magic from Thinking Sphinx. It can get a little tricky, so make sure
217
- # you read all the relevant
218
- # documentation[http://sphinxsearch.com/doc.html#clustering] first.
219
- #
220
- # Grouping is done via three parameters within the options hash
221
- # * <tt>:group_function</tt> determines the way grouping is done
222
- # * <tt>:group_by</tt> determines the field which is used for grouping
223
- # * <tt>:group_clause</tt> determines the sorting order
224
- #
225
- # As a convenience, you can also use
226
- # * <tt>:group</tt>
227
- # which sets :group_by and defaults to :group_function of :attr
228
- #
229
- # === group_function
230
- #
231
- # Valid values for :group_function are
232
- # * <tt>:day</tt>, <tt>:week</tt>, <tt>:month</tt>, <tt>:year</tt> - Grouping is done by the respective timeframes.
233
- # * <tt>:attr</tt>, <tt>:attrpair</tt> - Grouping is done by the specified attributes(s)
234
- #
235
- # === group_by
236
- #
237
- # This parameter denotes the field by which grouping is done. Note that the
238
- # specified field must be a sphinx attribute or index.
239
- #
240
- # === group_clause
241
- #
242
- # This determines the sorting order of the groups. In a grouping search,
243
- # the matches within a group will sorted by the <tt>:sort_mode</tt> and <tt>:order</tt> parameters.
244
- # The group matches themselves however, will be sorted by <tt>:group_clause</tt>.
245
- #
246
- # The syntax for this is the same as an order parameter in extended sort mode.
247
- # Namely, you can specify an SQL-like sort expression with up to 5 attributes
248
- # (including internal attributes), eg: "@relevance DESC, price ASC, @id DESC"
249
- #
250
- # === Grouping by timestamp
251
- #
252
- # Timestamp grouping groups off items by the day, week, month or year of the
253
- # attribute given. In order to do this you need to define a timestamp attribute,
254
- # which pretty much looks like the standard defintion for any attribute.
255
- #
256
- # define_index do
257
- # #
258
- # # All your other stuff
259
- # #
260
- # has :created_at
261
- # end
262
- #
263
- # When you need to fire off your search, it'll go something to the tune of
264
- #
265
- # Fruit.search "apricot", :group_function => :day, :group_by => 'created_at'
266
- #
267
- # The <tt>@groupby</tt> special attribute will contain the date for that group.
268
- # Depending on the <tt>:group_function</tt> parameter, the date format will be
269
- #
270
- # * <tt>:day</tt> - YYYYMMDD
271
- # * <tt>:week</tt> - YYYYNNN (NNN is the first day of the week in question,
272
- # counting from the start of the year )
273
- # * <tt>:month</tt> - YYYYMM
274
- # * <tt>:year</tt> - YYYY
275
- #
276
- #
277
- # === Grouping by attribute
278
- #
279
- # The syntax is the same as grouping by timestamp, except for the fact that the
280
- # <tt>:group_function</tt> parameter is changed
281
- #
282
- # Fruit.search "apricot", :group_function => :attr, :group_by => 'size'
283
- #
284
- #
285
- # == Geo/Location Searching
286
- #
287
- # Sphinx - and therefore Thinking Sphinx - has the facility to search
288
- # around a geographical point, using a given latitude and longitude. To
289
- # take advantage of this, you will need to have both of those values in
290
- # attributes. To search with that point, you can then use one of the
291
- # following syntax examples:
292
- #
293
- # Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc"
294
- # Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc"
295
- # :latitude_attr => "latit", :longitude_attr => "longit"
296
- #
297
- # The first example applies when your latitude and longitude attributes
298
- # are named any of lat, latitude, lon, long or longitude. If that's not
299
- # the case, you will need to explicitly state them in your search, _or_
300
- # you can do so in your model:
301
- #
302
- # define_index do
303
- # has :latit # Float column, stored in radians
304
- # has :longit # Float column, stored in radians
305
- #
306
- # set_property :latitude_attr => "latit"
307
- # set_property :longitude_attr => "longit"
308
- # end
309
- #
310
- # Now, geo-location searching really only has an affect if you have a
311
- # filter, sort or grouping clause related to it - otherwise it's just a
312
- # normal search, and _will not_ return a distance value otherwise. To
313
- # make use of the positioning difference, use the special attribute
314
- # "@geodist" in any of your filters or sorting or grouping clauses.
315
- #
316
- # And don't forget - both the latitude and longitude you use in your
317
- # search, and the values in your indexes, need to be stored as a float in radians,
318
- # _not_ degrees. Keep in mind that if you do this conversion in SQL
319
- # you will need to explicitly declare a column type of :float.
320
- #
321
- # define_index do
322
- # has 'RADIANS(lat)', :as => :lat, :type => :float
323
- # # ...
324
- # end
325
- #
326
- # Once you've got your results set, you can access the distances as
327
- # follows:
328
- #
329
- # @results.each_with_geodist do |result, distance|
330
- # # ...
331
- # end
332
- #
333
- # The distance value is returned as a float, representing the distance in
334
- # metres.
335
- #
336
- # == Handling a Stale Index
337
- #
338
- # Especially if you don't use delta indexing, you risk having records in the
339
- # Sphinx index that are no longer in the database. By default, those will simply
340
- # come back as nils:
341
- #
342
- # >> pat_user.delete
343
- # >> User.search("pat")
344
- # Sphinx Result: [1,2]
345
- # => [nil, <#User id: 2>]
346
- #
347
- # (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.)
348
- #
349
- # You can simply Array#compact these results or handle the nils in some other way, but
350
- # Sphinx will still report two results, and the missing records may upset your layout.
351
- #
352
- # If you pass :retry_stale => true to a single-model search, missing records will
353
- # cause Thinking Sphinx to retry the query but excluding those records. Since search
354
- # is paginated, the new search could potentially include missing records as well, so by
355
- # default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five
356
- # times, and so on. If there are still missing ids on the last retry, they are
357
- # shown as nils.
358
- #
359
- def search(*args)
360
- query = args.clone # an array
361
- options = query.extract_options!
362
-
363
- retry_search_on_stale_index(query, options) do
364
- results, client = search_results(*(query + [options]))
365
-
366
- log "Sphinx Error: #{results[:error]}", :error if results[:error]
367
-
368
- klass = options[:class]
369
- page = options[:page] ? options[:page].to_i : 1
104
+ end
370
105
 
371
- ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options)
372
- end
373
- end
106
+ # Returns true if the Search object or the underlying Array object respond
107
+ # to the requested method.
108
+ #
109
+ # @param [Symbol] method The method name
110
+ # @return [Boolean] true if either Search or Array responds to the method.
111
+ #
112
+ def respond_to?(method)
113
+ super || @array.respond_to?(method)
114
+ end
115
+
116
+ # The current page number of the result set. Defaults to 1 if no page was
117
+ # explicitly requested.
118
+ #
119
+ # @return [Integer]
120
+ #
121
+ def current_page
122
+ @options[:page].blank? ? 1 : @options[:page].to_i
123
+ end
124
+
125
+ # The next page number of the result set. If there are no more pages
126
+ # available, nil is returned.
127
+ #
128
+ # @return [Integer, nil]
129
+ #
130
+ def next_page
131
+ current_page >= total_pages ? nil : current_page + 1
132
+ end
133
+
134
+ # The previous page number of the result set. If this is the first page,
135
+ # then nil is returned.
136
+ #
137
+ # @return [Integer, nil]
138
+ #
139
+ def previous_page
140
+ current_page == 1 ? nil : current_page - 1
141
+ end
142
+
143
+ # The amount of records per set of paged results. Defaults to 20 unless a
144
+ # specific page size is requested.
145
+ #
146
+ # @return [Integer]
147
+ #
148
+ def per_page
149
+ @options[:limit] || @options[:per_page] || 20
150
+ end
151
+
152
+ # The total number of pages available if the results are paginated.
153
+ #
154
+ # @return [Integer]
155
+ #
156
+ def total_pages
157
+ populate
158
+ @total_pages ||= (@results[:total] / per_page.to_f).ceil
159
+ end
160
+ # Compatibility with older versions of will_paginate
161
+ alias_method :page_count, :total_pages
162
+
163
+ # The total number of search results available.
164
+ #
165
+ # @return [Integer]
166
+ #
167
+ def total_entries
168
+ populate
169
+ @total_entries ||= @results[:total_found]
170
+ end
171
+
172
+ # The current page's offset, based on the number of records per page.
173
+ #
174
+ # @return [Integer]
175
+ #
176
+ def offset
177
+ (current_page - 1) * per_page
178
+ end
179
+
180
+ def indexes
181
+ return options[:index] if options[:index]
182
+ return '*' if classes.empty?
374
183
 
375
- def retry_search_on_stale_index(query, options, &block)
376
- stale_ids = []
377
- stale_retries_left = case options[:retry_stale]
378
- when true
379
- 3 # default to three retries
380
- when nil, false
381
- 0 # no retries
382
- else options[:retry_stale].to_i
383
- end
384
- begin
385
- # Passing this in an option so Collection.create_from_results can see it.
386
- # It should only raise on stale records if there are any retries left.
387
- options[:raise_on_stale] = stale_retries_left > 0
388
- block.call
389
- # If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not
390
- # in the DB and the :raise_on_stale option is set, this exception is raised. We retry
391
- # a limited number of times, excluding the stale ids from the search.
392
- rescue StaleIdsException => e
393
- stale_retries_left -= 1
394
-
395
- stale_ids |= e.ids # For logging
396
- options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion
397
-
398
- tries = stale_retries_left
399
- log "Sphinx Stale Ids (%s %s left): %s" % [
400
- tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ')
401
- ]
402
-
403
- retry
404
- end
184
+ classes.collect { |klass| klass.sphinx_index_names }.flatten.join(',')
185
+ end
186
+
187
+ def each_with_groupby_and_count(&block)
188
+ populate
189
+ results[:matches].each_with_index do |match, index|
190
+ yield self[index],
191
+ match[:attributes]["@groupby"],
192
+ match[:attributes]["@count"]
405
193
  end
406
-
407
- def count(*args)
408
- results, client = search_results(*args.clone)
409
- results[:total_found] || 0
194
+ end
195
+
196
+ def each_with_weighting(&block)
197
+ populate
198
+ results[:matches].each_with_index do |match, index|
199
+ yield self[index], match[:weight]
410
200
  end
411
-
412
- # Checks if a document with the given id exists within a specific index.
413
- # Expected parameters:
414
- #
415
- # - ID of the document
416
- # - Index to check within
417
- # - Options hash (defaults to {})
418
- #
419
- # Example:
420
- #
421
- # ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User)
422
- #
423
- def search_for_id(*args)
424
- options = args.extract_options!
425
- client = client_from_options options
426
-
427
- query, filters = search_conditions(
428
- options[:class], options[:conditions] || {}
429
- )
430
- client.filters += filters
431
- client.match_mode = :extended unless query.empty?
432
- client.id_range = args.first..args.first
433
-
434
- begin
435
- return client.query(query, args[1])[:matches].length > 0
436
- rescue Errno::ECONNREFUSED => err
437
- raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
438
- end
201
+ end
202
+
203
+ def excerpt_for(string, model = nil)
204
+ if model.nil? && one_class
205
+ model ||= one_class
439
206
  end
440
207
 
441
- private
208
+ populate
209
+ client.excerpts(
210
+ :docs => [string],
211
+ :words => results[:words].keys.join(' '),
212
+ :index => "#{model.source_of_sphinx_index.sphinx_name}_core"
213
+ ).first
214
+ end
215
+
216
+ def search(*args)
217
+ merge_search ThinkingSphinx::Search.new(*args)
218
+ self
219
+ end
220
+
221
+ private
222
+
223
+ def config
224
+ ThinkingSphinx::Configuration.instance
225
+ end
226
+
227
+ def populate
228
+ return if @populated
229
+ @populated = true
442
230
 
443
- # This method handles the common search functionality, and returns both
444
- # the result hash and the client. Not super elegant, but it'll do for
445
- # the moment.
446
- #
447
- def search_results(*args)
448
- options = args.extract_options!
449
- query = args.join(' ')
450
- client = client_from_options options
451
-
452
- query = star_query(query, options[:star]) if options[:star]
453
-
454
- extra_query, filters = search_conditions(
455
- options[:class], options[:conditions] || {}
456
- )
457
- client.filters += filters
458
- client.match_mode = :extended unless extra_query.empty?
459
- query = [query, extra_query].join(' ')
460
- query.strip! # Because "" and " " are not equivalent
461
-
462
- set_sort_options! client, options
463
-
464
- client.limit = options[:per_page].to_i if options[:per_page]
465
- page = options[:page] ? options[:page].to_i : 1
466
- page = 1 if page <= 0
467
- client.offset = (page - 1) * client.limit
468
-
231
+ retry_on_stale_index do
469
232
  begin
470
- log "Sphinx: #{query}"
471
- results = client.query(query, '*', options[:comment] || '')
472
- log "Sphinx Result:"
473
- log results[:matches].collect { |m|
474
- m[:attributes]["sphinx_internal_id"]
475
- }.inspect
233
+ log "Querying Sphinx: #{query}"
234
+ @results = client.query query, indexes, comment
476
235
  rescue Errno::ECONNREFUSED => err
477
- raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
236
+ raise ThinkingSphinx::ConnectionError,
237
+ 'Connection to Sphinx Daemon (searchd) failed.'
478
238
  end
479
-
480
- return results, client
481
- end
482
239
 
483
- # Set all the appropriate settings for the client, using the provided
484
- # options hash.
485
- #
486
- def client_from_options(options = {})
487
- config = ThinkingSphinx::Configuration.instance
488
- client = Riddle::Client.new config.address, config.port
489
- klass = options[:class]
490
- index_options = klass ? klass.sphinx_index_options : {}
491
-
492
- # The Riddle default is per-query max_matches=1000. If we set the
493
- # per-server max to a smaller value in sphinx.yml, we need to override
494
- # the Riddle default or else we get search errors like
495
- # "per-query max_matches=1000 out of bounds (per-server max_matches=200)"
496
- if per_server_max_matches = config.configuration.searchd.max_matches
497
- options[:max_matches] ||= per_server_max_matches
498
- end
499
-
500
- # Turn :index_weights => { "foo" => 2, User => 1 }
501
- # into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
502
- if iw = options[:index_weights]
503
- options[:index_weights] = iw.inject({}) do |hash, (index,weight)|
504
- if index.is_a?(Class)
505
- name = ThinkingSphinx::Index.name(index)
506
- hash["#{name}_core"] = weight
507
- hash["#{name}_delta"] = weight
508
- else
509
- hash[index] = weight
510
- end
511
- hash
512
- end
240
+ if options[:ids_only]
241
+ replace @results[:matches].collect { |match|
242
+ match[:attributes]["sphinx_internal_id"]
243
+ }
244
+ else
245
+ replace instances_from_matches
246
+ add_excerpter
513
247
  end
248
+ end
249
+ end
250
+
251
+ def add_excerpter
252
+ each do |object|
253
+ next if object.respond_to?(:excerpts)
514
254
 
515
- # Group by defaults using :group
516
- if options[:group]
517
- options[:group_by] = options[:group].to_s
518
- options[:group_function] ||= :attr
519
- end
255
+ excerpter = ThinkingSphinx::Excerpter.new self, object
256
+ block = lambda { excerpter }
520
257
 
521
- [
522
- :max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
523
- :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
524
- :retry_count, :retry_delay, :index_weights, :rank_mode,
525
- :max_query_time, :field_weights, :filters, :anchor, :limit
526
- ].each do |key|
527
- client.send(
528
- key.to_s.concat("=").to_sym,
529
- options[key] || index_options[key] || client.send(key)
530
- )
258
+ object.metaclass.instance_eval do
259
+ define_method(:excerpts, &block)
531
260
  end
532
-
533
- options[:classes] = [klass] if klass
534
-
535
- client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
536
-
537
- client.filters << Riddle::Client::Filter.new(
538
- "sphinx_deleted", [0]
539
- )
540
-
541
- # class filters
542
- client.filters << Riddle::Client::Filter.new(
543
- "class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten
544
- ) if options[:classes]
545
-
546
- # normal attribute filters
547
- client.filters += options[:with].collect { |attr,val|
548
- Riddle::Client::Filter.new attr.to_s, filter_value(val)
549
- } if options[:with]
550
-
551
- # exclusive attribute filters
552
- client.filters += options[:without].collect { |attr,val|
553
- Riddle::Client::Filter.new attr.to_s, filter_value(val), true
554
- } if options[:without]
555
-
556
- # every-match attribute filters
557
- client.filters += options[:with_all].collect { |attr,vals|
558
- Array(vals).collect { |val|
559
- Riddle::Client::Filter.new attr.to_s, filter_value(val)
560
- }
561
- }.flatten if options[:with_all]
562
-
563
- # exclusive attribute filter on primary key
564
- client.filters += Array(options[:without_ids]).collect { |id|
565
- Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true
566
- } if options[:without_ids]
567
-
568
- client
569
261
  end
262
+ end
263
+
264
+ def self.log(message, method = :debug)
265
+ return if ::ActiveRecord::Base.logger.nil?
266
+ ::ActiveRecord::Base.logger.send method, message
267
+ end
268
+
269
+ def log(message, method = :debug)
270
+ self.class.log(message, method)
271
+ end
272
+
273
+ def client
274
+ client = config.client
275
+
276
+ index_options = one_class ?
277
+ one_class.sphinx_indexes.first.local_options : {}
278
+
279
+ [
280
+ :max_matches, :group_by, :group_function, :group_clause,
281
+ :group_distinct, :id_range, :cut_off, :retry_count, :retry_delay,
282
+ :rank_mode, :max_query_time, :field_weights
283
+ ].each do |key|
284
+ # puts "key: #{key}"
285
+ value = options[key] || index_options[key]
286
+ # puts "value: #{value.inspect}"
287
+ client.send("#{key}=", value) if value
288
+ end
289
+
290
+ client.limit = per_page
291
+ client.offset = offset
292
+ client.match_mode = match_mode
293
+ client.filters = filters
294
+ client.sort_mode = sort_mode
295
+ client.sort_by = sort_by
296
+ client.group_by = group_by if group_by
297
+ client.group_function = group_function if group_function
298
+ client.index_weights = index_weights
299
+ client.anchor = anchor
300
+
301
+ client
302
+ end
303
+
304
+ def retry_on_stale_index(&block)
305
+ stale_ids = []
306
+ retries = stale_retries
307
+
308
+ begin
309
+ options[:raise_on_stale] = retries > 0
310
+ block.call
311
+
312
+ # If ThinkingSphinx::Search#instances_from_matches found records in
313
+ # Sphinx but not in the DB and the :raise_on_stale option is set, this
314
+ # exception is raised. We retry a limited number of times, excluding the
315
+ # stale ids from the search.
316
+ rescue StaleIdsException => err
317
+ retries -= 1
318
+
319
+ # For logging
320
+ stale_ids |= err.ids
321
+ # ID exclusion
322
+ options[:without_ids] = Array(options[:without_ids]) | err.ids
323
+
324
+ log 'Sphinx Stale Ids (%s %s left): %s' % [
325
+ retries, (retries == 1 ? 'try' : 'tries'), stale_ids.join(', ')
326
+ ]
327
+ retry
328
+ end
329
+ end
330
+
331
+ def classes
332
+ @classes ||= options[:classes] || []
333
+ end
334
+
335
+ def one_class
336
+ @one_class ||= classes.length != 1 ? nil : classes.first
337
+ end
338
+
339
+ def query
340
+ @query ||= begin
341
+ q = @args.join(' ') << conditions_as_query
342
+ (options[:star] ? star_query(q) : q).strip
343
+ end
344
+ end
345
+
346
+ def conditions_as_query
347
+ return '' if @options[:conditions].blank?
570
348
 
571
- def star_query(query, custom_token = nil)
572
- token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u
349
+ # Soon to be deprecated.
350
+ keys = @options[:conditions].keys.reject { |key|
351
+ attributes.include?(key.to_sym)
352
+ }
353
+
354
+ ' ' + keys.collect { |key|
355
+ "@#{key} #{options[:conditions][key]}"
356
+ }.join(' ')
357
+ end
358
+
359
+ def star_query(query)
360
+ token = options[:star].is_a?(Regexp) ? options[:star] : /\w+/u
573
361
 
574
- query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
575
- pre, proper, post = $`, $&, $'
576
- is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token
577
- is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes
578
- has_star = pre.ends_with?("*") || post.starts_with?("*")
579
- if is_operator || is_quote || has_star
580
- proper
581
- else
582
- "*#{proper}*"
583
- end
362
+ query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
363
+ pre, proper, post = $`, $&, $'
364
+ # E.g. "@foo", "/2", "~3", but not as part of a token
365
+ is_operator = pre.match(%r{(\W|^)[@~/]\Z})
366
+ # E.g. "foo bar", with quotes
367
+ is_quote = proper.starts_with?('"') && proper.ends_with?('"')
368
+ has_star = pre.ends_with?("*") || post.starts_with?("*")
369
+ if is_operator || is_quote || has_star
370
+ proper
371
+ else
372
+ "*#{proper}*"
584
373
  end
585
374
  end
586
-
587
- def filter_value(value)
588
- case value
589
- when Range
590
- value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value
591
- when Array
592
- value.collect { |val| val.is_a?(Time) ? timestamp(val) : val }
375
+ end
376
+
377
+ def comment
378
+ options[:comment] || ''
379
+ end
380
+
381
+ def match_mode
382
+ options[:match_mode] || (options[:conditions].blank? ? :all : :extended)
383
+ end
384
+
385
+ def sort_mode
386
+ @sort_mode ||= case options[:sort_mode]
387
+ when :asc
388
+ :attr_asc
389
+ when :desc
390
+ :attr_desc
391
+ when nil
392
+ case options[:order]
393
+ when String
394
+ :extended
395
+ when Symbol
396
+ :attr_asc
593
397
  else
594
- Array(value)
398
+ :relevance
595
399
  end
400
+ else
401
+ options[:sort_mode]
596
402
  end
597
-
598
- # Returns the integer timestamp for a Time object.
599
- #
600
- # If using Rails 2.1+, need to handle timezones to translate them back to
601
- # UTC, as that's what datetimes will be stored as by MySQL.
602
- #
603
- # in_time_zone is a method that was added for the timezone support in
604
- # Rails 2.1, which is why it's used for testing. I'm sure there's better
605
- # ways, but this does the job.
606
- #
607
- def timestamp(value)
608
- value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i
403
+ end
404
+
405
+ def sort_by
406
+ case @sort_by = (options[:sort_by] || options[:order])
407
+ when String
408
+ sorted_fields_to_attributes(@sort_by)
409
+ when Symbol
410
+ field_names.include?(@sort_by) ?
411
+ @sort_by.to_s.concat('_sort') : @sort_by.to_s
412
+ else
413
+ ''
609
414
  end
415
+ end
416
+
417
+ def field_names
418
+ return [] unless one_class
610
419
 
611
- # Translate field and attribute conditions to the relevant search string
612
- # and filters.
613
- #
614
- def search_conditions(klass, conditions={})
615
- attributes = klass ? klass.sphinx_indexes.collect { |index|
616
- index.attributes.collect { |attrib| attrib.unique_name }
617
- }.flatten : []
618
-
619
- search_string = []
620
- filters = []
621
-
622
- conditions.each do |key,val|
623
- if attributes.include?(key.to_sym)
624
- filters << Riddle::Client::Filter.new(
625
- key.to_s, filter_value(val)
626
- )
627
- else
628
- search_string << "@#{key} #{val}"
629
- end
420
+ one_class.sphinx_indexes.collect { |index|
421
+ index.fields.collect { |field| field.unique_name }
422
+ }.flatten
423
+ end
424
+
425
+ def sorted_fields_to_attributes(order_string)
426
+ field_names.each { |field|
427
+ order_string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
428
+ match.gsub field.to_s, field.to_s.concat("_sort")
429
+ }
430
+ }
431
+
432
+ order_string
433
+ end
434
+
435
+ # Turn :index_weights => { "foo" => 2, User => 1 } into :index_weights =>
436
+ # { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
437
+ #
438
+ def index_weights
439
+ weights = options[:index_weights] || {}
440
+ weights.keys.inject({}) do |hash, key|
441
+ if key.is_a?(Class)
442
+ name = ThinkingSphinx::Index.name_for(key)
443
+ hash["#{name}_core"] = weights[key]
444
+ hash["#{name}_delta"] = weights[key]
445
+ else
446
+ hash[key] = weights[key]
630
447
  end
631
448
 
632
- return search_string.join(' '), filters
449
+ hash
450
+ end
451
+ end
452
+
453
+ def group_by
454
+ options[:group] ? options[:group].to_s : nil
455
+ end
456
+
457
+ def group_function
458
+ options[:group] ? :attr : nil
459
+ end
460
+
461
+ def internal_filters
462
+ filters = [Riddle::Client::Filter.new('sphinx_deleted', [0])]
463
+
464
+ class_crcs = classes.collect { |klass|
465
+ klass.to_crc32s
466
+ }.flatten
467
+
468
+ unless class_crcs.empty?
469
+ filters << Riddle::Client::Filter.new('class_crc', class_crcs)
633
470
  end
634
471
 
635
- # Return the appropriate latitude and longitude values, depending on
636
- # whether the relevant attributes have been defined, and also whether
637
- # there's actually any values.
638
- #
639
- def anchor_conditions(klass, options)
640
- attributes = klass ? klass.sphinx_indexes.collect { |index|
641
- index.attributes.collect { |attrib| attrib.unique_name }
642
- }.flatten : []
643
-
644
- lat_attr = klass ? klass.sphinx_indexes.collect { |index|
645
- index.local_options[:latitude_attr]
646
- }.compact.first : nil
647
-
648
- lon_attr = klass ? klass.sphinx_indexes.collect { |index|
649
- index.local_options[:longitude_attr]
650
- }.compact.first : nil
651
-
652
- lat_attr = options[:latitude_attr] if options[:latitude_attr]
653
- lat_attr ||= :lat if attributes.include?(:lat)
654
- lat_attr ||= :latitude if attributes.include?(:latitude)
655
-
656
- lon_attr = options[:longitude_attr] if options[:longitude_attr]
657
- lon_attr ||= :lng if attributes.include?(:lng)
658
- lon_attr ||= :lon if attributes.include?(:lon)
659
- lon_attr ||= :long if attributes.include?(:long)
660
- lon_attr ||= :longitude if attributes.include?(:longitude)
661
-
662
- lat = options[:lat]
663
- lon = options[:lon]
664
-
665
- if options[:geo]
666
- lat = options[:geo].first
667
- lon = options[:geo].last
472
+ filters << Riddle::Client::Filter.new(
473
+ 'sphinx_internal_id', filter_value(options[:without_ids]), true
474
+ ) if options[:without_ids]
475
+
476
+ filters
477
+ end
478
+
479
+ def condition_filters
480
+ (options[:conditions] || {}).collect { |attrib, value|
481
+ if attributes.include?(attrib.to_sym)
482
+ puts <<-MSG
483
+ Deprecation Warning: filters on attributes should be done using the :with
484
+ option, not :conditions. For example:
485
+ :with => {:#{attrib} => #{value.inspect}}
486
+ MSG
487
+ Riddle::Client::Filter.new attrib.to_s, filter_value(value)
488
+ else
489
+ nil
668
490
  end
669
-
670
- lat && lon ? {
671
- :latitude_attribute => lat_attr.to_s,
672
- :latitude => lat,
673
- :longitude_attribute => lon_attr.to_s,
674
- :longitude => lon
675
- } : nil
491
+ }.compact
492
+ end
493
+
494
+ def filters
495
+ internal_filters +
496
+ condition_filters +
497
+ (options[:with] || {}).collect { |attrib, value|
498
+ Riddle::Client::Filter.new attrib.to_s, filter_value(value)
499
+ } +
500
+ (options[:without] || {}).collect { |attrib, value|
501
+ Riddle::Client::Filter.new attrib.to_s, filter_value(value), true
502
+ } +
503
+ (options[:with_all] || {}).collect { |attrib, values|
504
+ Array(values).collect { |value|
505
+ Riddle::Client::Filter.new attrib.to_s, filter_value(value)
506
+ }
507
+ }.flatten
508
+ end
509
+
510
+ # When passed a Time instance, returns the integer timestamp.
511
+ #
512
+ # If using Rails 2.1+, need to handle timezones to translate them back to
513
+ # UTC, as that's what datetimes will be stored as by MySQL.
514
+ #
515
+ # in_time_zone is a method that was added for the timezone support in
516
+ # Rails 2.1, which is why it's used for testing. I'm sure there's better
517
+ # ways, but this does the job.
518
+ #
519
+ def filter_value(value)
520
+ case value
521
+ when Range
522
+ filter_value(value.first).first..filter_value(value.last).first
523
+ when Array
524
+ value.collect { |v| filter_value(v) }.flatten
525
+ when Time
526
+ value.respond_to?(:in_time_zone) ? [value.utc.to_i] : [value.to_i]
527
+ when NilClass
528
+ 0
529
+ else
530
+ Array(value)
676
531
  end
532
+ end
533
+
534
+ def anchor
535
+ return {} unless options[:geo] || (options[:lat] && options[:lng])
536
+
537
+ {
538
+ :latitude => options[:geo] ? options[:geo].first : options[:lat],
539
+ :longitude => options[:geo] ? options[:geo].last : options[:lng],
540
+ :latitude_attribute => latitude_attr.to_s,
541
+ :longitude_attribute => longitude_attr.to_s
542
+ }
543
+ end
544
+
545
+ def latitude_attr
546
+ options[:latitude_attr] ||
547
+ index_option(:latitude_attr) ||
548
+ attribute(:lat, :latitude)
549
+ end
550
+
551
+ def longitude_attr
552
+ options[:longitude_attr] ||
553
+ index_option(:longitude_attr) ||
554
+ attribute(:lon, :lng, :longitude)
555
+ end
556
+
557
+ def index_option(key)
558
+ return nil unless one_class
559
+
560
+ one_class.sphinx_indexes.collect { |index|
561
+ index.local_options[key]
562
+ }.compact.first
563
+ end
564
+
565
+ def attribute(*keys)
566
+ return nil unless one_class
567
+
568
+ keys.detect { |key|
569
+ attributes.include?(key)
570
+ }
571
+ end
572
+
573
+ def attributes
574
+ return [] unless one_class
677
575
 
678
- # Set the sort options using the :order key as well as the appropriate
679
- # Riddle settings.
680
- #
681
- def set_sort_options!(client, options)
682
- klass = options[:class]
683
- fields = klass ? klass.sphinx_indexes.collect { |index|
684
- index.fields.collect { |field| field.unique_name }
685
- }.flatten : []
686
- index_options = klass ? klass.sphinx_index_options : {}
576
+ attributes = one_class.sphinx_indexes.collect { |index|
577
+ index.attributes.collect { |attrib| attrib.unique_name }
578
+ }.flatten
579
+ end
580
+
581
+ def stale_retries
582
+ case options[:retry_stale]
583
+ when TrueClass
584
+ 3
585
+ when nil, FalseClass
586
+ 0
587
+ else
588
+ options[:retry_stale].to_i
589
+ end
590
+ end
591
+
592
+ def instances_from_class(klass, matches)
593
+ index_options = klass.sphinx_index_options
687
594
 
688
- order = options[:order] || index_options[:order]
689
- case order
690
- when Symbol
691
- client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil?
692
- if fields.include?(order)
693
- client.sort_by = order.to_s.concat("_sort")
694
- else
695
- client.sort_by = order.to_s
696
- end
697
- when String
698
- client.sort_mode = :extended unless options[:sort_mode]
699
- client.sort_by = sorted_fields_to_attributes(order, fields)
700
- else
701
- # do nothing
595
+ ids = matches.collect { |match| match[:attributes]["sphinx_internal_id"] }
596
+ instances = ids.length > 0 ? klass.find(
597
+ :all,
598
+ :joins => options[:joins],
599
+ :conditions => {klass.primary_key_for_sphinx.to_sym => ids},
600
+ :include => (options[:include] || index_options[:include]),
601
+ :select => (options[:select] || index_options[:select]),
602
+ :order => (options[:sql_order] || index_options[:sql_order])
603
+ ) : []
604
+
605
+ # Raise an exception if we find records in Sphinx but not in the DB, so
606
+ # the search method can retry without them. See
607
+ # ThinkingSphinx::Search.retry_search_on_stale_index.
608
+ if options[:raise_on_stale] && instances.length < ids.length
609
+ stale_ids = ids - instances.map { |i| i.id }
610
+ raise StaleIdsException, stale_ids
611
+ end
612
+
613
+ # if the user has specified an SQL order, return the collection
614
+ # without rearranging it into the Sphinx order
615
+ return instances if (options[:sql_order] || index_options[:sql_order])
616
+
617
+ ids.collect { |obj_id|
618
+ instances.detect do |obj|
619
+ obj.primary_key_for_sphinx == obj_id
702
620
  end
703
-
704
- client.sort_mode = :attr_asc if client.sort_mode == :asc
705
- client.sort_mode = :attr_desc if client.sort_mode == :desc
621
+ }
622
+ end
623
+
624
+ # Group results by class and call #find(:all) once for each group to reduce
625
+ # the number of #find's in multi-model searches.
626
+ #
627
+ def instances_from_matches
628
+ return single_class_results if one_class
629
+
630
+ groups = results[:matches].group_by { |match|
631
+ match[:attributes]["class_crc"]
632
+ }
633
+ groups.each do |crc, group|
634
+ group.replace(
635
+ instances_from_class(class_from_crc(crc), group)
636
+ )
706
637
  end
707
638
 
708
- # Search through a collection of fields and translate any appearances
709
- # of them in a string to their attribute equivalent for sorting.
710
- #
711
- def sorted_fields_to_attributes(string, fields)
712
- fields.each { |field|
713
- string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
714
- match.gsub field.to_s, field.to_s.concat("_sort")
715
- }
639
+ results[:matches].collect do |match|
640
+ groups.detect { |crc, group|
641
+ crc == match[:attributes]["class_crc"]
642
+ }[1].compact.detect { |obj|
643
+ obj.primary_key_for_sphinx == match[:attributes]["sphinx_internal_id"]
716
644
  }
717
-
718
- string
719
645
  end
646
+ end
647
+
648
+ def single_class_results
649
+ instances_from_class one_class, results[:matches]
650
+ end
651
+
652
+ def class_from_crc(crc)
653
+ config.models_by_crc[crc].constantize
654
+ end
655
+
656
+ def each_with_attribute(attribute, &block)
657
+ populate
658
+ results[:matches].each_with_index do |match, index|
659
+ yield self[index],
660
+ (match[:attributes][attribute] || match[:attributes]["@#{attribute}"])
661
+ end
662
+ end
663
+
664
+ def is_scope?(method)
665
+ one_class && one_class.sphinx_scopes.include?(method)
666
+ end
667
+
668
+ def add_scope(method, *args, &block)
669
+ merge_search one_class.send(method, *args, &block)
670
+ end
671
+
672
+ def merge_search(search)
673
+ search.args.each { |arg| args << arg }
720
674
 
721
- def log(message, method = :debug)
722
- return if ::ActiveRecord::Base.logger.nil?
723
- ::ActiveRecord::Base.logger.send method, message
675
+ search.options.keys.each do |key|
676
+ if HashOptions.include?(key)
677
+ options[key] ||= {}
678
+ options[key].merge! search.options[key]
679
+ elsif ArrayOptions.include?(key)
680
+ options[key] ||= []
681
+ options[key] += search.options[key]
682
+ options[key].uniq!
683
+ else
684
+ options[key] = search.options[key]
685
+ end
724
686
  end
725
687
  end
726
688
  end