ultrasphinx 1 → 1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,4 @@
1
1
 
2
- require 'chronic'
3
-
4
2
  class Array
5
3
  def _flatten_once
6
4
  self.inject([]) do |set, element|
@@ -15,6 +13,11 @@ class Object
15
13
  self
16
14
  end
17
15
  end
16
+
17
+ def _deep_dup
18
+ # Cause Ruby's dup sucks.
19
+ Marshal.load(Marshal.dump(self))
20
+ end
18
21
  end
19
22
 
20
23
  class String
@@ -36,16 +39,45 @@ end
36
39
  class Hash
37
40
  def _coerce_basic_types
38
41
  Hash[*self.map do |key, value|
39
- [key.to_sym,
42
+ [key.to_s,
40
43
  if value.respond_to?(:to_i) && value.to_i.to_s == value
41
44
  value.to_i
42
45
  elsif value == ""
43
46
  nil
44
- elsif value.is_a? String
45
- value.to_sym
46
47
  else
47
48
  value
48
49
  end]
49
50
  end._flatten_once]
50
51
  end
52
+
53
+ def _to_conf_string(section = nil)
54
+ inner = self.map do |key, value|
55
+ " #{key} = #{value}"
56
+ end.join("\n")
57
+ section ? "#{section} {\n#{inner}\n}\n" : inner
58
+ end
59
+
60
+ def _deep_stringify_keys
61
+ Hash[*(self.map do |key, value|
62
+ # puts "#{key.inspect}, #{value.inspect}"
63
+ z = [key.to_s,
64
+ case value
65
+ when Hash
66
+ value._deep_stringify_keys
67
+ when Array
68
+ value.map do |subvalue|
69
+ if subvalue.is_a? Hash or subvalue.is_a? Array
70
+ subvalue._deep_stringify_keys
71
+ else
72
+ subvalue
73
+ end
74
+ end
75
+ else
76
+ value
77
+ end
78
+ ]
79
+ # p z
80
+ # z
81
+ end._flatten_once)]
82
+ end
51
83
  end
@@ -3,26 +3,58 @@ require 'singleton'
3
3
 
4
4
  module Ultrasphinx
5
5
 
6
- class Fields < Hash
6
+ class Fields
7
7
  include Singleton
8
8
 
9
+ TYPE_MAP = {
10
+ 'string' => 'text',
11
+ 'text' => 'text',
12
+ 'integer' => 'numeric',
13
+ 'date' => 'date',
14
+ 'datetime' => 'date'
15
+ }
16
+
17
+ attr_accessor :classes, :types
18
+
9
19
  def initialize
10
- self["class_id"] = "numeric"
11
- self["class"] = "text"
20
+ @types = {}
21
+ @classes = Hash.new([])
22
+ @groups = []
23
+ end
24
+
25
+ def groups
26
+ @groups.compact.sort_by do |string|
27
+ string[/= (.*)/, 1]
28
+ end
12
29
  end
13
30
 
14
- def check_type_match(field, new_type)
15
- # tries to smoosh fields together by name in the sphinx query schema; raises if their types don't match
16
- field, new_type = field.to_s, COLUMN_TYPES[new_type.to_sym]
17
- if self[field]
18
- raise ConfigurationError, "Column type mismatch for #{field.inspect}; was already #{self[field].inspect}, but is now #{new_type.inspect}." unless self[field] == new_type
31
+ def save_and_verify_type(field, new_type, string_sortable, klass)
32
+ # Smoosh fields together based on their name in the Sphinx query schema
33
+ field, new_type = field.to_s, TYPE_MAP[new_type.to_s]
34
+
35
+ if types[field]
36
+ # Existing field name; verify its type
37
+ raise ConfigurationError, "Column type mismatch for #{field.inspect}; was already #{types[field].inspect}, but is now #{new_type.inspect}." unless types[field] == new_type
38
+ classes[field] = (classes[field] + [klass]).uniq
39
+
19
40
  else
20
- self[field] = new_type
41
+ # New field
42
+ types[field] = new_type
43
+ classes[field] = [klass]
44
+
45
+ @groups << case new_type
46
+ when 'numeric'
47
+ "sql_group_column = #{field}"
48
+ when 'date'
49
+ "sql_date_column = #{field}"
50
+ when 'text'
51
+ "sql_str2ordinal_column = #{field}" if string_sortable
52
+ end
21
53
  end
22
54
  end
23
55
 
24
56
  def cast(source_string, field)
25
- if self[field] == "date"
57
+ if types[field] == "date"
26
58
  "UNIX_TIMESTAMP(#{source_string})"
27
59
  elsif source_string =~ /GROUP_CONCAT/
28
60
  "CAST(#{source_string} AS CHAR)"
@@ -32,38 +64,56 @@ module Ultrasphinx
32
64
  end
33
65
 
34
66
  def null(field)
35
- case self[field]
67
+ case types[field]
36
68
  when 'text'
37
69
  "''"
38
70
  when 'numeric'
39
71
  "0"
40
72
  when 'date'
41
73
  "UNIX_TIMESTAMP('1970-01-01 00:00:00')"
74
+ else
75
+ raise "Field #{field} does not have a valid type."
42
76
  end + " AS #{field}"
43
77
  end
44
78
 
45
79
  def configure(configuration)
46
80
 
47
81
  configuration.each do |model, options|
48
- klass = model.constantize
82
+
83
+ klass = model.constantize
84
+ save_and_verify_type('class_id', 'integer', nil, klass)
85
+ save_and_verify_type('class', 'string', nil, klass)
49
86
 
50
87
  begin
51
- # fields are from the model
52
- options[:fields].to_a.each do |entry|
53
- entry = {:field => entry, :as => entry} unless entry.is_a? Hash
54
- unless klass.columns_hash[entry[:field]]
55
- ActiveRecord::Base.logger.warn "ultrasphinx: WARNING: field #{entry[:field]} is not present in #{model}"
88
+
89
+ # Fields are from the model. We destructively canonicize them back onto the configuration hash.
90
+ options['fields'] = options['fields'].to_a.map do |entry|
91
+
92
+ entry = {'field' => entry} unless entry.is_a? Hash
93
+ entry['as'] = entry['field'] unless entry['as']
94
+
95
+ unless klass.columns_hash[entry['field']]
96
+ ActiveRecord::Base.logger.warn "ultrasphinx: WARNING: field #{entry['field']} is not present in #{model}"
56
97
  else
57
- check_type_match(entry[:as], klass.columns_hash[entry[:field]].type)
98
+ save_and_verify_type(entry['as'], klass.columns_hash[entry['field']].type, entry['sortable'], klass)
99
+ end
100
+
101
+ if entry['facet']
102
+ save_and_verify_type(entry['as'], 'text', nil, klass) # source must be a string
103
+ save_and_verify_type("#{entry['as']}_facet", 'integer', nil, klass)
58
104
  end
105
+
106
+ entry
59
107
  end
60
- # joins are whatever they are in the target
61
- options[:includes].to_a.each do |join|
62
- check_type_match(join[:as] || join[:field], join[:model].constantize.columns_hash[join[:field]].type)
108
+
109
+ # Joins are whatever they are in the target
110
+ options['include'].to_a.each do |entry|
111
+ save_and_verify_type(entry['as'] || entry['field'], entry['class_name'].constantize.columns_hash[entry['field']].type, entry['sortable'], klass)
63
112
  end
64
- # regular concats are CHAR (I think), group_concats are BLOB and need to be cast to CHAR, e.g. :text
65
- options[:concats].to_a.each do |concats|
66
- check_type_match(concats[:as], :text)
113
+
114
+ # Regular concats are CHAR (I think), group_concats are BLOB and need to be cast to CHAR, e.g. :text
115
+ options['concatenate'].to_a.each do |entry|
116
+ save_and_verify_type(entry['as'], 'text', entry['sortable'], klass)
67
117
  end
68
118
  rescue ActiveRecord::StatementInvalid
69
119
  ActiveRecord::Base.logger.warn "ultrasphinx: WARNING: model #{model} does not exist in the database yet"
@@ -6,24 +6,41 @@ module ActiveRecord
6
6
 
7
7
  =begin rdoc
8
8
 
9
- The is_indexed macro configures a model for indexing. Its parameters are used to generate SQL queries for Sphinx.
9
+ The is_indexed method configures a model for indexing. Its parameters help generate SQL queries for Sphinx.
10
10
 
11
- == Indexing single fields
11
+ = Options
12
+
13
+ == Including regular fields
12
14
 
13
15
  Use the <tt>:fields</tt> key.
14
16
 
15
- Accepts an array of field names.
16
- :fields => ["created_at", "title", "body"]
17
+ Accepts an array of field names or field hashes.
18
+ :fields => [
19
+ 'created_at',
20
+ 'title',
21
+ {:field => 'body', :as => 'description'},
22
+ {:field => 'user_category', :facet => true, :as => 'category' }
23
+ ]
24
+
25
+ To alias a field, pass a hash instead of a string and set the <tt>:as</tt> key.
26
+
27
+ To allow faceting support on a text field, also pass a hash and set the <tt>:facet</tt> key to <tt>true</tt>. Faceting is off by default for text fields because there is some indexing overhead associated with it. Faceting is always on for numeric or date fields.
17
28
 
18
- == Indexing fields from belongs_to associations
29
+ To allow sorting by a text field, also pass a hash and set the <tt>:sortable</tt> key to true. This is turned off by default for the same reason as above. Sorting is always on for numeric or date fields.
19
30
 
20
- Use the <tt>:includes</tt> key.
31
+ To apply an SQL function to a field before it is indexed, use the key <tt>:function_sql</tt>. Pass a string such as <tt>"REPLACE(?, '_', ' ')"</tt>. The table and column name for your field will be interpolated into the first <tt>?</tt> in the string.
32
+
33
+ == Including a field from an association
34
+
35
+ Use the <tt>:include</tt> key.
21
36
 
22
37
  Accepts an array of hashes.
23
38
 
24
- Each should contain a <tt>:model</tt> key (the class name of the included model), a <tt>:field</tt> key (the name of the field to include), and an optional <tt>:as</tt> key (what to name the field in the parent). You can use the optional key <tt>:association_sql</tt> if you need to pass a custom JOIN string, in which case the default JOIN will not be generated.
39
+ Each should contain a <tt>:class_name</tt> key (the class name of the included model), a <tt>:field</tt> key (the name of the field to include), and an optional <tt>:as</tt> key (what to name the field in the parent). You can use the optional key <tt>:association_sql</tt> if you need to pass a custom JOIN string, in which case the default JOIN for <tt>belongs_to</tt> will not be generated.
40
+
41
+ The keys <tt>:facet</tt>, <tt>:sortable</tt>, and <tt>:function_sql</tt> are also recognized, just like for regular fields.
25
42
 
26
- == Scoping the searchable records
43
+ == Requiring conditions
27
44
 
28
45
  Use the <tt>:conditions</tt> key.
29
46
 
@@ -31,56 +48,95 @@ SQL conditions, to scope which records are selected for indexing. Accepts a stri
31
48
  :conditions => "created_at < NOW() AND deleted IS NOT NULL"
32
49
  The <tt>:conditions</tt> key is especially useful if you delete records by marking them deleted rather than removing them from the database.
33
50
 
34
- == Concatenating multiple fields
51
+ == Concatenating several fields within a record
35
52
 
36
- Use the <tt>:concats</tt> key (MySQL only).
53
+ Use the <tt>:concatenate</tt> key (MySQL only).
37
54
 
38
- Accepts an array of option hashes, which can be of two types:
55
+ Accepts an array of option hashes.
39
56
 
40
- 1. To concatenate many fields within one record, use a regular (or horizontal) concatenation. Regular concatenations contain a <tt>:fields</tt> key (again, an array of field names), and a mandatory <tt>:as</tt> key (the name of the result of the concatenation). For example, to concatenate the <tt>title</tt> and <tt>body</tt> into one field called <tt>text</tt>:
41
- :concats => [{:fields => ["title", "body"], :as => "text"}]
57
+ To concatenate several fields within one record as a combined field, use a regular (or horizontal) concatenation. Regular concatenations contain a <tt>:fields</tt> key (again, an array of field names), and a mandatory <tt>:as</tt> key (the name of the result of the concatenation). For example, to concatenate the <tt>title</tt> and <tt>body</tt> into one field called <tt>text</tt>:
58
+ :concatenate => [{:fields => ['title', 'body'], :as => 'text'}]
59
+
60
+ The keys <tt>:facet</tt>, <tt>:sortable</tt>, and <tt>:function_sql</tt> are also recognized, just like for regular fields.
61
+
62
+ == Concatenating one field from a set of associated records
63
+
64
+ Also use the <tt>:concatenate</tt> key.
42
65
 
43
- 2. To group and concatenate a field from a set of associated records, use a group (or vertical) concatenation. Group concatenations join into another table, and can be used to index a number of associated models as one field in a parent model. Group concatenations contain a <tt>:model</tt> key (the class name of the included model), a <tt>:field</tt> key (the field on the included model to concatenate), and an optional <tt>:as</tt> key (also the name of the result of the concatenation). For example, to concatenate all <tt>Post#body</tt> contents into the parent's <tt>responses</tt> field:
44
- :concats => {:model => "Post", :field => "body", :as => "responses"}
66
+ To concatenate one field from a set of associated records as a combined field in the parent record, use a group (or vertical) concatenation. A group concatenation should contain a <tt>:class_name</tt> key (the class name of the included model), a <tt>:field</tt> key (the field on the included model to concatenate), and an optional <tt>:as</tt> key (also the name of the result of the concatenation). For example, to concatenate all <tt>Post#body</tt> contents into the parent's <tt>responses</tt> field:
67
+ :concatenate => [{:class_name => 'Post', :field => 'body', :as => 'responses'}]
45
68
 
46
69
  Optional group concatenation keys are <tt>:association_name</tt> (if your <tt>has_many</tt> association can't be derived from the model name), <tt>:association_sql</tt>, if you need to pass a custom JOIN string (for example, a double JOIN for a <tt>has_many :through</tt>), and <tt>:conditions</tt> (if you need custom WHERE conditions for this particular association).
47
70
 
48
- == Example
71
+ The keys <tt>:facet</tt>, <tt>:sortable</tt>, and <tt>:function_sql</tt> are also recognized, just like for regular fields.
72
+
73
+ Ultrasphinx is not an object-relational mapper, and the association generation is intended to stay minimal--don't be afraid of <tt>:association_sql</tt>.
74
+
75
+ = Examples
76
+
77
+ == Complex configuration
49
78
 
50
79
  Here's an example configuration using most of the options, taken from production code:
51
80
 
52
81
  class Story < ActiveRecord::Base
53
82
  is_indexed :fields => [
54
- "title",
55
- "published_at"
83
+ 'title',
84
+ 'published_at',
85
+ {:field => 'author', :facet => true}
56
86
  ],
57
- :includes => [
58
- {:model => "Category", :field => "name", :as => "category"}
87
+ :include => [
88
+ {:class_name => 'Category', :field => 'name', :as => 'category'}
59
89
  ],
60
- :concats => [
61
- {:fields => ["title", "long_description", "short_description"], :as => "editorial"},
62
- {:model => "Page", :field => "body", :as => "body", :association_name => "pages"},
63
- {:model => "Comment", :field => "body", :as => "comments",
90
+ :concatenate => [
91
+ {:fields => ['title', 'long_description', 'short_description'],
92
+ :as => 'editorial'},
93
+ {:class_name => 'Page', :field => 'body', :as => 'body',
94
+ :association_name => 'pages'},
95
+ {:class_name => 'Comment', :field => 'body', :as => 'comments',
64
96
  :conditions => "comments.item_type = '#{base_class}'"}
65
97
  ],
66
98
  :conditions => self.live_condition_string
67
99
  end
68
100
 
101
+ Note how setting the <tt>:conditions</tt> on Comment is enough to configure a polymorphic <tt>has_many</tt>.
102
+
103
+ == Association scoping
104
+
105
+ A common use case is to only search records that belong to a particular parent model. Ultrasphinx configures Sphinx to support a <tt>:filter</tt> element on any date or numeric field, so any <tt>*_id</tt> fields you have will be filterable.
106
+
107
+ For example, say a Company <tt>has_many :users</tt> and each User <tt>has_many :articles</tt>. If you want to to filter Articles by Company, add <tt>company_id</tt> to the Article's <tt>is_indexed</tt> method. The best way is to grab it from the User association:
108
+
109
+ class Article < ActiveRecord::Base
110
+ is_indexed :include => [{:class_name => 'User', :field => 'company_id'}]
111
+ end
112
+
113
+ Now you can run:
114
+
115
+ @search = Ultrasphinx::Search.new('something',
116
+ :filter => {'company_id' => 493})
117
+
118
+ If the associations weren't just <tt>has_many</tt> and <tt>belongs_to</tt>, you would need to use the <tt>:association_sql</tt> key to set up a custom JOIN.
119
+
69
120
  =end
70
121
 
71
- def self.is_indexed opts = {}
72
-
73
- opts.assert_valid_keys [:fields, :concats, :conditions, :includes, :nulls]
122
+ def self.is_indexed opts = {}
123
+ opts = opts._deep_stringify_keys
124
+
125
+ opts.assert_valid_keys ['fields', 'concatenate', 'conditions', 'include']
126
+
127
+ Array(opts[:fields]).each do |field|
128
+ field.assert_valid_keys ['field', 'as', 'facet', 'function_sql', 'sortable'] if field.is_a? Hash
129
+ end
74
130
 
75
- Array(opts[:concats]).each do |concat|
76
- concat.assert_valid_keys [:model, :conditions, :field, :as, :fields, :association_name, :association_sql]
77
- raise Ultrasphinx::ConfigurationError, "You can't mix regular concat and group concats" if concat[:fields] and (concat[:field] or concat[:model] or concat[:association_name])
78
- raise Ultrasphinx::ConfigurationError, "Group concats must not have multiple fields" if concat[:field].is_a? Array
79
- raise Ultrasphinx::ConfigurationError, "Regular concats should have multiple fields" if concat[:fields] and !concat[:fields].is_a?(Array)
131
+ Array(opts[:concatenate]).each do |concat|
132
+ concat.assert_valid_keys ['class_name', 'conditions', 'field', 'as', 'fields', 'association_name', 'association_sql', 'facet', 'function_sql', 'sortable']
133
+ raise Ultrasphinx::ConfigurationError, "You can't mix regular concat and group concats" if concat['fields'] and (concat['field'] or concat['class_name'] or concat['association_name'])
134
+ raise Ultrasphinx::ConfigurationError, "Group concats must not have multiple fields" if concat['field'].is_a? Array
135
+ raise Ultrasphinx::ConfigurationError, "Regular concats should have multiple fields" if concat['fields'] and !concat['fields'].is_a?(Array)
80
136
  end
81
137
 
82
- Array(opts[:joins]).each do |join|
83
- join.assert_valid_keys [:model, :field, :as]
138
+ Array(opts[:include]).each do |inc|
139
+ inc.assert_valid_keys ['class_name', 'field', 'as', 'association_sql', 'facet', 'function_sql', 'sortable']
84
140
  end
85
141
 
86
142
  Ultrasphinx::MODEL_CONFIGURATION[self.name] = opts
@@ -4,40 +4,65 @@ module Ultrasphinx
4
4
  =begin rdoc
5
5
  Command-interface Search object.
6
6
 
7
- == Making a search
7
+ == Basic usage
8
8
 
9
- To perform a search, instantiate an Ultrasphinx::Search object. Parameters are the query string, and an optional hash of query options.
9
+ To set up a search, instantiate an Ultrasphinx::Search object with a hash of parameters. Only the <tt>'query'</tt> key is mandatory.
10
10
  @search = Ultrasphinx::Search.new(
11
- @query,
12
- :sort_mode => 'descending',
13
- :sort_by => 'created_at'
11
+ 'query' => @query,
12
+ 'sort_mode' => 'descending',
13
+ 'sort_by' => 'created_at'
14
14
  )
15
15
 
16
- Now, to run the query, call its <tt>run()</tt> method. Your results will be available as ActiveRecord instances via <tt>results()</tt>. Example:
16
+ Now, to run the query, call its <tt>run</tt> method. Your results will be available as ActiveRecord instances via the <tt>results</tt> method. Example:
17
17
  @search.run
18
18
  @search.results
19
19
 
20
- == Query options
21
- <tt>:per_page</tt>:: An integer.. How many results per page.
22
- <tt>:page</tt>:: An integer. Which page of the paginated results to return.
23
- <tt>:models</tt>:: An array or string. The class name of the model you want to search, an array of models names to search, or nil for all available models.
24
- <tt>:sort_mode</tt>:: 'relevance' or 'ascending' or 'descending'. How to order the result set. Note that 'time' and 'extended' modes are available, but not tested.
25
- <tt>:sort_by</tt>:: A field name. What field to order by for 'ascending' or 'descending' mode. Has no effect for 'relevance'.
26
- <tt>:weights</tt>:: A hash. Text-field names and associated query weighting. The default weight for every field is 1.0. Example: <tt>:weights => {"title" => 2.0}</tt>
27
- <tt>:raw_filters</tt>:: A hash. Field names and associated values. You can use a single value, an array of values, or a range.
20
+ = Options
21
+
22
+ == Query format
23
+
24
+ The query string supports boolean operation, parentheses, phrases, and field-specific search. Query words are stemmed and joined by an implicit <tt>AND</tt> by default.
25
+
26
+ * Valid boolean operators are <tt>AND</tt>, <tt>OR</tt>, and <tt>NOT</tt>.
27
+ * Field-specific searches should be formatted as <tt>fieldname:contents</tt>. (This will only work for text fields. For numeric and date fields, see the <tt>'filters</tt> parameter, below.)
28
+ * Phrases must be enclosed in double quotes.
29
+
30
+ A Sphinx::SphinxInternalError will be raised on invalid queries. In general, queries can only be nested to one level.
31
+ @query = 'dog OR cat OR "white tigers" NOT (lions OR bears) AND title:animals'
32
+
33
+ == Hash parameters
34
+
35
+ The hash lets you customize internal aspects of the search.
36
+
37
+ <tt>'per_page'</tt>:: An integer. How many results per page.
38
+ <tt>'page'</tt>:: An integer. Which page of the results to return.
39
+ <tt>'class_name'</tt>:: An array or string. The class name of the model you want to search, an array of model names to search, or <tt>nil</tt> for all available models.
40
+ <tt>'sort_mode'</tt>:: 'relevance' or 'ascending' or 'descending'. How to order the result set. Note that 'time' and 'extended' modes are available, but not tested.
41
+ <tt>'sort_by'</tt>:: A field name. What field to order by for 'ascending' or 'descending' mode. Has no effect for 'relevance'.
42
+ <tt>'weight'</tt>:: A hash. Text-field names and associated query weighting. The default weight for every field is 1.0. Example: <tt>'weight' => {'title' => 2.0}</tt>
43
+ <tt>'filter'</tt>:: A hash. Names of numeric or date fields and associated values. You can use a single value, an array of values, or a range. (See the bottom of the ActiveRecord::Base page for an example.)
44
+ <tt>'facets'</tt>:: An array of fields for grouping/faceting. You can access the returned facet values and their result counts with the <tt>facets</tt> method.
28
45
 
29
46
  Note that you can set up your own query defaults in <tt>environment.rb</tt>:
30
47
 
31
48
  Ultrasphinx::Search.query_defaults = {
32
- :per_page => 10,
33
- :sort_mode => :relevance,
34
- :weights => {"title" => 2.0}
49
+ 'per_page' => 10,
50
+ 'sort_mode' => 'relevance',
51
+ 'weight' => {'title' => 2.0}
35
52
  }
36
53
 
54
+ = Advanced features
55
+
37
56
  == Cache_fu integration
38
57
 
39
58
  The <tt>get_cache</tt> method will be used to instantiate records for models that respond to it. Otherwise, <tt>find</tt> is used.
40
59
 
60
+ == Will_paginate integration
61
+
62
+ The Search instance responds to the same methods as a WillPaginate::Collection object, so once you have called <tt>run</tt> or <tt>excerpt</tt> you can use it directly in your views:
63
+
64
+ will_paginate(@search)
65
+
41
66
  == Excerpt mode
42
67
 
43
68
  You can have Sphinx excerpt and highlight the matched sections in the associated fields. Instead of calling <tt>run</tt>, call <tt>excerpt</tt>.
@@ -46,17 +71,17 @@ You can have Sphinx excerpt and highlight the matched sections in the associated
46
71
 
47
72
  The returned models will be frozen and have their field contents temporarily changed to the excerpted and highlighted results.
48
73
 
49
- You need to set the <tt>content_methods</tt> key on Ultrasphinx::Search.excerpting_options to whatever methods you need the excerpter to try to excerpt. This way Ruby-only methods are supported (for example, a metadata method which combines various model fields, or an aliased field so that the original record contents are still available).
74
+ You need to set the <tt>content_methods</tt> key on Ultrasphinx::Search.excerpting_options to whatever groups of methods you need the excerpter to try to excerpt. The first responding method in each group for each record will be excerpted. This way Ruby-only methods are supported (for example, a metadata method which combines various model fields, or an aliased field so that the original record contents are still available).
50
75
 
51
76
  There are some other keys you can set, such as excerpt size, HTML tags to highlight with, and number of words on either side of each excerpt chunk. Example (in <tt>environment.rb</tt>):
52
77
 
53
78
  Ultrasphinx::Search.excerpting_options = {
54
- 'before_match' => "<strong>",
55
- 'after_match' => "</strong>",
79
+ 'before_match' => '<strong>',
80
+ 'after_match' => '</strong>',
56
81
  'chunk_separator' => "...",
57
82
  'limit' => 256,
58
83
  'around' => 3,
59
- 'content_methods' => [[:title], [:body, :description, :content], [:metadata]]
84
+ 'content_methods' => [['title'], ['body', 'description', 'content'], ['metadata']]
60
85
  }
61
86
 
62
87
  Note that your database is never changed by anything Ultrasphinx does.
@@ -64,16 +89,22 @@ Note that your database is never changed by anything Ultrasphinx does.
64
89
  =end
65
90
 
66
91
  class Search
67
- unloadable if RAILS_ENV == "development"
92
+
93
+ include Internals
94
+ include Parser
68
95
 
69
96
  cattr_accessor :query_defaults
70
- self.query_defaults ||= {:page => 1,
71
- :models => nil,
72
- :per_page => 20,
73
- :sort_by => 'created_at',
74
- :sort_mode => :relevance,
75
- :weights => nil,
76
- :raw_filters => nil}
97
+ self.query_defaults ||= {
98
+ 'query' => nil,
99
+ 'page' => 1,
100
+ 'class_name' => nil,
101
+ 'per_page' => 20,
102
+ 'sort_by' => 'created_at',
103
+ 'sort_mode' => 'relevance',
104
+ 'weight' => nil,
105
+ 'filter' => nil,
106
+ 'facets' => nil
107
+ }
77
108
 
78
109
  cattr_accessor :excerpting_options
79
110
  self.excerpting_options ||= {
@@ -81,29 +112,35 @@ Note that your database is never changed by anything Ultrasphinx does.
81
112
  'chunk_separator' => "...",
82
113
  'limit' => 256,
83
114
  'around' => 3,
84
- # results should respond to one in each group of these, in precedence order, in order for the excerpting to fire
85
- 'content_methods' => [[:title, :name], [:body, :description, :content], [:metadata]]
115
+ # results should respond to one in each group of these, in precedence order, for the excerpting to fire
116
+ 'content_methods' => [['title', 'name'], ['body', 'description', 'content'], ['metadata']]
86
117
  }
87
118
 
88
119
  cattr_accessor :client_options
89
120
  self.client_options ||= {
90
- :with_subtotals => true,
91
- :max_retries => 4,
92
- :retry_sleep_time => 3
121
+ 'with_subtotals' => false,
122
+ 'max_retries' => 4,
123
+ 'retry_sleep_time' => 3,
124
+ 'max_facets' => 100,
125
+ 'finder_methods' => ['get_cache', 'find']
93
126
  }
94
127
 
95
128
  # mode to integer mappings
96
129
  SPHINX_CLIENT_PARAMS = {
97
- :sort_mode => {
98
- :relevance => Sphinx::Client::SPH_SORT_RELEVANCE,
99
- :descending => Sphinx::Client::SPH_SORT_ATTR_DESC,
100
- :ascending => Sphinx::Client::SPH_SORT_ATTR_ASC,
101
- :time => Sphinx::Client::SPH_SORT_TIME_SEGMENTS,
102
- :extended => Sphinx::Client::SPH_SORT_EXTENDED,
103
- :desc => Sphinx::Client::SPH_SORT_ATTR_DESC, # legacy compatibility
104
- :asc => Sphinx::Client::SPH_SORT_ATTR_ASC
130
+ 'sort_mode' => {
131
+ 'relevance' => Sphinx::Client::SPH_SORT_RELEVANCE,
132
+ 'descending' => Sphinx::Client::SPH_SORT_ATTR_DESC,
133
+ 'ascending' => Sphinx::Client::SPH_SORT_ATTR_ASC,
134
+ 'time' => Sphinx::Client::SPH_SORT_TIME_SEGMENTS,
135
+ 'extended' => Sphinx::Client::SPH_SORT_EXTENDED,
136
+ 'desc' => Sphinx::Client::SPH_SORT_ATTR_DESC, # legacy compatibility
137
+ 'asc' => Sphinx::Client::SPH_SORT_ATTR_ASC
105
138
  }
106
139
  }
140
+
141
+ LEGACY_QUERY_KEYS = ['raw_filters'] #:nodoc:
142
+
143
+ INTERNAL_KEYS = ['parsed_query'] #:nodoc:
107
144
 
108
145
  def self.get_models_to_class_ids #:nodoc:
109
146
  # reading the conf file makes sure that we are in sync with the actual sphinx index,
@@ -132,30 +169,52 @@ Note that your database is never changed by anything Ultrasphinx does.
132
169
 
133
170
  MAX_MATCHES = DAEMON_SETTINGS["max_matches"].to_i
134
171
 
135
- # Returns the options hash you used.
136
- def options; @options; end
172
+ FACET_CACHE = {} #:nodoc:
173
+
174
+ # Returns the options hash.
175
+ def options
176
+ @options
177
+ end
137
178
 
138
179
  # Returns the query string used.
139
- def query; @query; end
180
+ def query
181
+ # redundant with method_missing
182
+ @options['query']
183
+ end
184
+
185
+ def parsed_query #:nodoc:
186
+ # redundant with method_missing
187
+ @options['parsed_query']
188
+ end
140
189
 
141
190
  # Returns an array of result objects.
142
- def results; @results; end
191
+ def results
192
+ run?(true)
193
+ @results
194
+ end
195
+
196
+ def facets
197
+ raise UsageError, "No facet field was configured" unless @options['facets']
198
+ run?(true)
199
+ @facets
200
+ end
201
+
143
202
 
144
203
  # Returns the raw response from the Sphinx client.
145
- def response; @response; end
204
+ def response
205
+ @response
206
+ end
146
207
 
147
- # Returns a hash of total result counts, scoped to each available model.
148
- def subtotals; @subtotals; end
208
+ # Returns a hash of total result counts, scoped to each available model. This requires extra queries against the search daemon right now. Set <tt>Ultrasphinx::Search.client_options['with_subtotals'] = true</tt> to enable the extra queries. Most of the overhead is in instantiating the AR result sets, so the performance hit is not usually significant.
209
+ def subtotals
210
+ raise UsageError, "Subtotals are not enabled" unless self.class.client_options['with_subtotals']
211
+ @subtotals
212
+ end
149
213
 
150
214
  # Returns the total result count.
151
- def total
215
+ def total_entries
152
216
  [response['total_found'] || 0, MAX_MATCHES].min
153
- end
154
-
155
- # Returns the number of results on this particular page, and may range from 0 up to per_page().
156
- def found
157
- results.size
158
- end
217
+ end
159
218
 
160
219
  # Returns the response time of the query, in milliseconds.
161
220
  def time
@@ -163,60 +222,103 @@ Note that your database is never changed by anything Ultrasphinx does.
163
222
  end
164
223
 
165
224
  # Returns whether the query has been run.
166
- def run?
167
- !response.blank?
225
+ def run?(should_raise = false)
226
+ if response.blank? and should_raise
227
+ raise UsageError, "Search has not yet been run" unless run?
228
+ else
229
+ !response.blank?
230
+ end
168
231
  end
169
232
 
170
233
  # Returns the current page number of the result set. (Page indexes begin at 1.)
171
- def page
172
- options[:page]
234
+ def current_page
235
+ @options['page']
173
236
  end
174
237
 
175
238
  # Returns the number of records per page.
176
239
  def per_page
177
- options[:per_page]
240
+ @options['per_page']
241
+ end
242
+
243
+ # Clear the associated facet caches. They will be rebuilt on your next <tt>run</tt> or <tt>excerpt</tt>.
244
+ def clear_facet_caches
245
+ Array(@options['facets']).each do |facet|
246
+ FACET_CACHE.delete(facet)
247
+ end
178
248
  end
179
249
 
180
250
  # Returns the last available page number in the result set.
181
- def last_page
182
- (total / per_page) + (total % per_page == 0 ? 0 : 1)
251
+ def page_count
252
+ (total_entries / per_page) + (total_entries % per_page == 0 ? 0 : 1)
253
+ end
254
+
255
+ # Returns the previous page number.
256
+ def previous_page
257
+ current_page > 1 ? (current_page - 1) : nil
258
+ end
259
+
260
+ # Returns the next page number.
261
+ def next_page
262
+ current_page < page_count ? (current_page + 1) : nil
183
263
  end
184
264
 
265
+ # Returns the global index position of the first result on this page.
266
+ def offset
267
+ (current_page - 1) * per_page
268
+ end
269
+
185
270
  # Builds a new command-interface Search object.
186
- def initialize query, opts = {}
187
- @query = query || ""
188
- @parsed_query = parse_google_to_sphinx(@query)
189
-
190
- @options = self.class.query_defaults.merge(opts._coerce_basic_types)
191
- @options[:raw_filters] ||= {}
192
- @options[:models] = Array(@options[:models])
271
+ def initialize opts = {}
272
+
273
+ opts = opts._deep_stringify_keys
274
+
275
+ @options = self.class.query_defaults.merge(opts._deep_dup._coerce_basic_types)
276
+
277
+ @options['filter'] ||= @options['raw_filters'] || {} # XXX legacy name
278
+
279
+ @options['query'] = @options['query'].to_s
280
+ @options['class_name'] = Array(@options['class_name'])
281
+
282
+ @options['parsed_query'] = if query.blank?
283
+ "@empty_searchable #{EMPTY_SEARCHABLE}"
284
+ else
285
+ parse(query)
286
+ end
193
287
 
194
- @results, @subtotals, @response = [], {}, {}
195
-
196
- raise Sphinx::SphinxArgumentError, "Invalid options: #{@extra * ', '}" if (@extra = (@options.keys - (SPHINX_CLIENT_PARAMS.merge(self.class.query_defaults).keys))).size > 0
288
+ @results, @subtotals, @facets, @response = [], {}, {}, {}
289
+
290
+ extra_keys = @options.keys - (SPHINX_CLIENT_PARAMS.merge(self.class.query_defaults).keys + LEGACY_QUERY_KEYS + INTERNAL_KEYS)
291
+ logger.warn "Discarded invalid keys: #{extra_keys * ', '}" if extra_keys.any?
197
292
  end
198
293
 
199
- # Run the search, filling results with an array of ActiveRecord objects.
200
- def run(reify = true)
294
+ # Run the search, filling results with an array of ActiveRecord objects. Set the parameter to false if you only want the ids returned.
295
+ def run(reify = true)
201
296
  @request = build_request_with_options(@options)
297
+ @paginate = nil # clear cache
202
298
  tries = 0
203
299
 
204
- logger.info "** ultrasphinx: searching for #{query.inspect} (parsed as #{@parsed_query.inspect}), options #{@options.inspect}"
300
+ logger.info "** ultrasphinx: searching for #{@options.inspect}"
205
301
 
206
302
  begin
207
- @response = @request.Query(@parsed_query)
208
- logger.info "** ultrasphinx: search returned, error #{@request.GetLastError.inspect}, warning #{@request.GetLastWarning.inspect}, returned #{total}/#{response['total_found']} in #{time} seconds."
303
+
304
+ @response = @request.Query(parsed_query)
305
+ logger.info "** ultrasphinx: search returned, error #{@request.GetLastError.inspect}, warning #{@request.GetLastWarning.inspect}, returned #{total_entries}/#{response['total_found']} in #{time} seconds."
209
306
 
210
- @subtotals = get_subtotals(@request, @parsed_query) if self.class.client_options[:with_subtotals]
307
+ @subtotals = get_subtotals(@request, parsed_query) if self.class.client_options['with_subtotals']
308
+
309
+ Array(@options['facets']).each do |facet|
310
+ @facets[facet] = get_facets(@request, parsed_query, facet)
311
+ end
312
+
211
313
  @results = response['matches']
212
314
 
213
315
  # if you don't reify, you'll have to do the modulus reversal yourself to get record ids
214
316
  @results = reify_results(@results) if reify
215
-
317
+
216
318
  rescue Sphinx::SphinxResponseError, Sphinx::SphinxTemporaryError, Errno::EPIPE => e
217
- if (tries += 1) <= self.class.client_options[:max_retries]
319
+ if (tries += 1) <= self.class.client_options['max_retries']
218
320
  logger.warn "** ultrasphinx: restarting query (#{tries} attempts already) (#{e})"
219
- sleep(self.class.client_options[:retry_sleep_time]) if tries == self.class.client_options[:max_retries]
321
+ sleep(self.class.client_options['retry_sleep_time']) if tries == self.class.client_options['max_retries']
220
322
  retry
221
323
  else
222
324
  logger.warn "** ultrasphinx: query failed"
@@ -253,7 +355,7 @@ Note that your database is never changed by anything Ultrasphinx does.
253
355
  responses = @request.BuildExcerpts(
254
356
  texts,
255
357
  UNIFIED_INDEX_NAME,
256
- strip_query_commands(@parsed_query),
358
+ strip_query_commands(parsed_query),
257
359
  self.class.excerpting_options.except('content_methods')
258
360
  ).in_groups_of(self.class.excerpting_options['content_methods'].size)
259
361
 
@@ -261,179 +363,30 @@ Note that your database is never changed by anything Ultrasphinx does.
261
363
  # override the individual model accessors with the excerpted data
262
364
  result, methods = result_and_methods
263
365
  methods.each_with_index do |method, j|
264
- result._metaclass.send(:define_method, method) { responses[i][j] } if method
366
+ result._metaclass.send('define_method', method) { responses[i][j] } if method
265
367
  end
266
368
  end
267
369
 
268
- @results = results_with_content_methods.map(&:first).map(&:freeze)
370
+ @results = results_with_content_methods.map do |result_and_content_method|
371
+ result_and_content_method.first.freeze
372
+ end
269
373
 
270
374
  self
271
375
  end
272
-
273
-
274
- private
275
376
 
276
- def build_request_with_options opts
277
-
278
- request = Sphinx::Client.new
279
-
280
- request.SetServer(PLUGIN_SETTINGS['server_host'], PLUGIN_SETTINGS['server_port'])
281
- request.SetMatchMode Sphinx::Client::SPH_MATCH_EXTENDED # force extended query mode
282
-
283
- offset, limit = opts[:per_page] * (opts[:page] - 1), opts[:per_page]
284
-
285
- request.SetLimits offset, limit, [offset + limit, MAX_MATCHES].min
286
- request.SetSortMode SPHINX_CLIENT_PARAMS[:sort_mode][opts[:sort_mode]], opts[:sort_by].to_s
287
-
288
- if weights = opts[:weights]
289
- # order the weights hash according to the field order for sphinx, and set the missing fields to 1.0
290
- # XXX we shouldn't really have to hit Fields.instance from within Ultrasphinx::Search
291
- request.SetWeights(Fields.instance.select{|n,t| t == 'text'}.map(&:first).sort.inject([]) do |array, field|
292
- array << (weights[field] || 1.0)
293
- end)
294
- end
295
-
296
- unless opts[:models].compact.empty?
297
- request.SetFilter 'class_id', opts[:models].map{|m| MODELS_TO_IDS[m.to_s]}
298
- end
299
-
300
- # extract ranged raw filters
301
- # XXX some of this mangling might not be necessary
302
- opts[:raw_filters].each do |field, value|
303
- begin
304
- unless value.is_a? Range
305
- request.SetFilter field, Array(value)
306
- else
307
- min, max = [value.first, value.last].map do |x|
308
- x._to_numeric if x.is_a? String
309
- end
310
- unless min.class != max.class
311
- min, max = max, min if min > max
312
- request.SetFilterRange field, min, max
313
- end
314
- end
315
- rescue NoMethodError => e
316
- raise Sphinx::SphinxArgumentError, "filter: #{field.inspect}:#{value.inspect} is invalid"
317
- end
318
- end
319
-
320
- # request.SetIdRange # never useful
321
- # request.SetGroup # never useful
322
-
323
- request
324
- end
325
-
326
- def get_subtotals(request, query)
327
- # XXX andrew says there's a better way to do this
328
- subtotals, filtered_request = {}, request.dup
329
-
330
- MODELS_TO_IDS.each do |name, class_id|
331
- filtered_request.instance_eval { @filters.delete_if {|f| f['attr'] == 'class_id'} }
332
- filtered_request.SetFilter 'class_id', [class_id]
333
- subtotals[name] = request.Query(query)['total_found']
334
- end
335
-
336
- subtotals
337
- end
338
-
339
- def strip_bogus_characters(s)
340
- # used to remove some garbage before highlighting
341
- s.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ') if s
342
- end
343
-
344
- def strip_query_commands(s)
345
- # XXX dumb hack for query commands, since sphinx doesn't intelligently parse the query in excerpt mode
346
- s.gsub(/AND|OR|NOT|\@\w+/, "")
347
- end
348
-
349
- def parse_google_to_sphinx query
350
- # alters google-style querystring into sphinx-style
351
- return if query.blank?
352
-
353
- # remove AND's, always
354
- query = " #{query} ".gsub(" AND ", " ")
355
-
356
- # split query on spaces that are not inside sets of quotes or parens
357
- query = query.scan(/[^"() ]*["(][^")]*[")]|[^"() ]+/)
358
-
359
- query.each_with_index do |token, index|
360
-
361
- # recurse for parens, if necessary
362
- if token =~ /^(.*?)\((.*)\)(.*?$)/
363
- token = query[index] = "#{$1}(#{parse_google_to_sphinx $2})#{$3}"
364
- end
365
-
366
- # translate to sphinx-language
367
- case token
368
- when "OR"
369
- query[index] = "|"
370
- when "NOT"
371
- query[index] = "-#{query[index+1]}"
372
- query[index+1] = ""
373
- when "AND"
374
- query[index] = ""
375
- when /:/
376
- query[query.size] = "@" + query[index].sub(":", " ")
377
- query[index] = ""
378
- end
379
-
377
+
378
+ # Delegates enumerable methods to @results, if possible. This allows us to behave directly like a WillPaginate::Collection. Failing that, we delegate to the options hash if a key is set. This lets us use the <tt>self</tt> directly in view helpers.
379
+ def method_missing(*args, &block)
380
+ if @results.respond_to? args.first
381
+ @results.send(*args, &block)
382
+ elsif options.has_key? args.first.to_s
383
+ @options[args.first.to_s]
384
+ else
385
+ super
380
386
  end
381
- query.join(" ").squeeze(" ")
382
387
  end
383
388
 
384
- def reify_results(sphinx_ids)
385
-
386
- # order by position and then toss the rest of the data
387
- # make sure you are using the bundled Sphinx client, which has a patch
388
- sphinx_ids = sphinx_ids.sort_by do |key, value|
389
- value['index'] or raise ConfigurationError, "Your Sphinx client is not properly patched."
390
- end.map(&:first)
391
-
392
- # inverse-modulus map the sphinx ids to the table-specific ids
393
- ids = Hash.new([])
394
- sphinx_ids.each do |id|
395
- ids[MODELS_TO_IDS.invert[id % MODELS_TO_IDS.size]] += [id / MODELS_TO_IDS.size] # yay math
396
- end
397
- raise Sphinx::SphinxResponseError, "impossible document id in query result" unless ids.values.flatten.size == sphinx_ids.size
398
-
399
- # fetch them for real
400
- results = []
401
- ids.each do |model, id_set|
402
- klass = model.constantize
403
- finder = klass.respond_to?(:get_cache) ? :get_cache : :find
404
- logger.debug "** ultrasphinx: using #{klass.name}\##{finder} as finder method"
405
-
406
- begin
407
- results += case instances = id_set.map {|id| klass.send(finder, id)} # XXX temporary until we update cache_fu
408
- when Hash
409
- instances.values
410
- when Array
411
- instances
412
- else
413
- Array(instances)
414
- end
415
- rescue ActiveRecord:: ActiveRecordError => e
416
- raise Sphinx::SphinxResponseError, e.inspect
417
- end
418
- end
419
-
420
- # put them back in order
421
- results.sort_by do |r|
422
- raise Sphinx::SphinxResponseError, "Bogus ActiveRecord id for #{r.class}:#{r.id}" unless r.id
423
- index = (sphinx_ids.index(sphinx_id = r.id * MODELS_TO_IDS.size + MODELS_TO_IDS[r.class.base_class.name]))
424
- raise Sphinx::SphinxResponseError, "Bogus reverse id for #{r.class}:#{r.id} (Sphinx:#{sphinx_id})" unless index
425
- index / sphinx_ids.size.to_f
426
- end
427
-
428
- # add an accessor for absolute search rank for each record
429
- results.each_with_index do |r, index|
430
- i = per_page * page + index
431
- r._metaclass.send(:define_method, "result_index") { i }
432
- end
433
-
434
- end
435
-
436
- def logger
389
+ def logger #:nodoc:
437
390
  RAILS_DEFAULT_LOGGER
438
391
  end
439
392