initforthe-thinking-sphinx 1.1.21
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENCE +20 -0
- data/README.textile +141 -0
- data/lib/thinking_sphinx.rb +215 -0
- data/lib/thinking_sphinx/active_record.rb +278 -0
- data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
- data/lib/thinking_sphinx/active_record/delta.rb +87 -0
- data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
- data/lib/thinking_sphinx/active_record/search.rb +57 -0
- data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
- data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
- data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +135 -0
- data/lib/thinking_sphinx/association.rb +164 -0
- data/lib/thinking_sphinx/attribute.rb +268 -0
- data/lib/thinking_sphinx/class_facet.rb +15 -0
- data/lib/thinking_sphinx/collection.rb +148 -0
- data/lib/thinking_sphinx/configuration.rb +262 -0
- data/lib/thinking_sphinx/core/string.rb +15 -0
- data/lib/thinking_sphinx/deltas.rb +30 -0
- data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
- data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
- data/lib/thinking_sphinx/deltas/delayed_delta.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
- data/lib/thinking_sphinx/deploy/capistrano.rb +82 -0
- data/lib/thinking_sphinx/facet.rb +108 -0
- data/lib/thinking_sphinx/facet_collection.rb +59 -0
- data/lib/thinking_sphinx/field.rb +82 -0
- data/lib/thinking_sphinx/index.rb +99 -0
- data/lib/thinking_sphinx/index/builder.rb +287 -0
- data/lib/thinking_sphinx/index/faux_column.rb +110 -0
- data/lib/thinking_sphinx/property.rb +160 -0
- data/lib/thinking_sphinx/rails_additions.rb +136 -0
- data/lib/thinking_sphinx/search.rb +727 -0
- data/lib/thinking_sphinx/search/facets.rb +104 -0
- data/lib/thinking_sphinx/source.rb +150 -0
- data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
- data/lib/thinking_sphinx/source/sql.rb +126 -0
- data/lib/thinking_sphinx/tasks.rb +162 -0
- data/rails/init.rb +14 -0
- data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
- data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
- data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
- data/spec/unit/thinking_sphinx/active_record_spec.rb +329 -0
- data/spec/unit/thinking_sphinx/association_spec.rb +246 -0
- data/spec/unit/thinking_sphinx/attribute_spec.rb +338 -0
- data/spec/unit/thinking_sphinx/collection_spec.rb +15 -0
- data/spec/unit/thinking_sphinx/configuration_spec.rb +222 -0
- data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
- data/spec/unit/thinking_sphinx/facet_collection_spec.rb +64 -0
- data/spec/unit/thinking_sphinx/facet_spec.rb +302 -0
- data/spec/unit/thinking_sphinx/field_spec.rb +154 -0
- data/spec/unit/thinking_sphinx/index/builder_spec.rb +355 -0
- data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
- data/spec/unit/thinking_sphinx/index_spec.rb +45 -0
- data/spec/unit/thinking_sphinx/rails_additions_spec.rb +191 -0
- data/spec/unit/thinking_sphinx/search_spec.rb +228 -0
- data/spec/unit/thinking_sphinx/source_spec.rb +217 -0
- data/spec/unit/thinking_sphinx_spec.rb +151 -0
- data/tasks/distribution.rb +67 -0
- data/tasks/rails.rake +1 -0
- data/tasks/testing.rb +78 -0
- data/vendor/after_commit/LICENSE +20 -0
- data/vendor/after_commit/README +16 -0
- data/vendor/after_commit/Rakefile +22 -0
- data/vendor/after_commit/init.rb +8 -0
- data/vendor/after_commit/lib/after_commit.rb +45 -0
- data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
- data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
- data/vendor/after_commit/test/after_commit_test.rb +53 -0
- data/vendor/delayed_job/lib/delayed/job.rb +251 -0
- data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
- data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
- data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
- data/vendor/riddle/lib/riddle.rb +30 -0
- data/vendor/riddle/lib/riddle/client.rb +619 -0
- data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
- data/vendor/riddle/lib/riddle/client/message.rb +65 -0
- data/vendor/riddle/lib/riddle/client/response.rb +84 -0
- data/vendor/riddle/lib/riddle/configuration.rb +33 -0
- data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
- data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
- data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
- data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
- data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
- data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
- data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
- data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
- data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
- data/vendor/riddle/lib/riddle/controller.rb +44 -0
- metadata +190 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
module ThinkingSphinx
|
2
|
+
module HashExcept
|
3
|
+
# Returns a new hash without the given keys.
|
4
|
+
def except(*keys)
|
5
|
+
rejected = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
|
6
|
+
reject { |key,| rejected.include?(key) }
|
7
|
+
end
|
8
|
+
|
9
|
+
# Replaces the hash without only the given keys.
|
10
|
+
def except!(*keys)
|
11
|
+
replace(except(*keys))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
Hash.send(
|
17
|
+
:include, ThinkingSphinx::HashExcept
|
18
|
+
) unless Hash.instance_methods.include?("except")
|
19
|
+
|
20
|
+
module ThinkingSphinx
|
21
|
+
module ArrayExtractOptions
|
22
|
+
def extract_options!
|
23
|
+
last.is_a?(::Hash) ? pop : {}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
Array.send(
|
29
|
+
:include, ThinkingSphinx::ArrayExtractOptions
|
30
|
+
) unless Array.instance_methods.include?("extract_options!")
|
31
|
+
|
32
|
+
module ThinkingSphinx
|
33
|
+
module AbstractQuotedTableName
|
34
|
+
def quote_table_name(name)
|
35
|
+
quote_column_name(name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
ActiveRecord::ConnectionAdapters::AbstractAdapter.send(
|
41
|
+
:include, ThinkingSphinx::AbstractQuotedTableName
|
42
|
+
) unless ActiveRecord::ConnectionAdapters::AbstractAdapter.instance_methods.include?("quote_table_name")
|
43
|
+
|
44
|
+
module ThinkingSphinx
|
45
|
+
module MysqlQuotedTableName
|
46
|
+
def quote_table_name(name) #:nodoc:
|
47
|
+
quote_column_name(name).gsub('.', '`.`')
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
if ActiveRecord::ConnectionAdapters.constants.include?("MysqlAdapter") or ActiveRecord::Base.respond_to?(:jdbcmysql_connection)
|
53
|
+
adapter = ActiveRecord::ConnectionAdapters.const_get(
|
54
|
+
defined?(JRUBY_VERSION) ? :JdbcAdapter : :MysqlAdapter
|
55
|
+
)
|
56
|
+
unless adapter.instance_methods.include?("quote_table_name")
|
57
|
+
adapter.send(:include, ThinkingSphinx::MysqlQuotedTableName)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
module ThinkingSphinx
|
62
|
+
module ActiveRecordQuotedName
|
63
|
+
def quoted_table_name
|
64
|
+
self.connection.quote_table_name(self.table_name)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
ActiveRecord::Base.extend(
|
70
|
+
ThinkingSphinx::ActiveRecordQuotedName
|
71
|
+
) unless ActiveRecord::Base.respond_to?("quoted_table_name")
|
72
|
+
|
73
|
+
module ThinkingSphinx
|
74
|
+
module ActiveRecordStoreFullSTIClass
|
75
|
+
def store_full_sti_class
|
76
|
+
false
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
ActiveRecord::Base.extend(
|
82
|
+
ThinkingSphinx::ActiveRecordStoreFullSTIClass
|
83
|
+
) unless ActiveRecord::Base.respond_to?(:store_full_sti_class)
|
84
|
+
|
85
|
+
module ThinkingSphinx
|
86
|
+
module ClassAttributeMethods
|
87
|
+
def cattr_reader(*syms)
|
88
|
+
syms.flatten.each do |sym|
|
89
|
+
next if sym.is_a?(Hash)
|
90
|
+
class_eval(<<-EOS, __FILE__, __LINE__)
|
91
|
+
unless defined? @@#{sym}
|
92
|
+
@@#{sym} = nil
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.#{sym}
|
96
|
+
@@#{sym}
|
97
|
+
end
|
98
|
+
|
99
|
+
def #{sym}
|
100
|
+
@@#{sym}
|
101
|
+
end
|
102
|
+
EOS
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def cattr_writer(*syms)
|
107
|
+
options = syms.extract_options!
|
108
|
+
syms.flatten.each do |sym|
|
109
|
+
class_eval(<<-EOS, __FILE__, __LINE__)
|
110
|
+
unless defined? @@#{sym}
|
111
|
+
@@#{sym} = nil
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.#{sym}=(obj)
|
115
|
+
@@#{sym} = obj
|
116
|
+
end
|
117
|
+
|
118
|
+
#{"
|
119
|
+
def #{sym}=(obj)
|
120
|
+
@@#{sym} = obj
|
121
|
+
end
|
122
|
+
" unless options[:instance_writer] == false }
|
123
|
+
EOS
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def cattr_accessor(*syms)
|
128
|
+
cattr_reader(*syms)
|
129
|
+
cattr_writer(*syms)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
Class.extend(
|
135
|
+
ThinkingSphinx::ClassAttributeMethods
|
136
|
+
) unless Class.respond_to?(:cattr_reader)
|
@@ -0,0 +1,727 @@
|
|
1
|
+
require 'thinking_sphinx/search/facets'
|
2
|
+
|
3
|
+
module ThinkingSphinx
|
4
|
+
# Once you've got those indexes in and built, this is the stuff that
|
5
|
+
# matters - how to search! This class provides a generic search
|
6
|
+
# interface - which you can use to search all your indexed models at once.
|
7
|
+
# Most times, you will just want a specific model's results - to search and
|
8
|
+
# search_for_ids methods will do the job in exactly the same manner when
|
9
|
+
# called from a model.
|
10
|
+
#
|
11
|
+
class Search
|
12
|
+
GlobalFacetOptions = {
|
13
|
+
:all_attributes => false,
|
14
|
+
:class_facet => true
|
15
|
+
}
|
16
|
+
|
17
|
+
class << self
|
18
|
+
include ThinkingSphinx::Search::Facets
|
19
|
+
|
20
|
+
# Searches for results that match the parameters provided. Will only
|
21
|
+
# return the ids for the matching objects. See #search for syntax
|
22
|
+
# examples.
|
23
|
+
#
|
24
|
+
# Note that this only searches the Sphinx index, with no ActiveRecord
|
25
|
+
# queries. Thus, if your index is not in sync with the database, this
|
26
|
+
# method may return ids that no longer exist there.
|
27
|
+
#
|
28
|
+
def search_for_ids(*args)
|
29
|
+
results, client = search_results(*args.clone)
|
30
|
+
|
31
|
+
options = args.extract_options!
|
32
|
+
page = options[:page] ? options[:page].to_i : 1
|
33
|
+
|
34
|
+
ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Searches through the Sphinx indexes for relevant matches. There's
|
38
|
+
# various ways to search, sort, group and filter - which are covered
|
39
|
+
# below.
|
40
|
+
#
|
41
|
+
# Also, if you have WillPaginate installed, the search method can be used
|
42
|
+
# just like paginate. The same parameters - :page and :per_page - work as
|
43
|
+
# expected, and the returned result set can be used by the will_paginate
|
44
|
+
# helper.
|
45
|
+
#
|
46
|
+
# == Basic Searching
|
47
|
+
#
|
48
|
+
# The simplest way of searching is straight text.
|
49
|
+
#
|
50
|
+
# ThinkingSphinx::Search.search "pat"
|
51
|
+
# ThinkingSphinx::Search.search "google"
|
52
|
+
# User.search "pat", :page => (params[:page] || 1)
|
53
|
+
# Article.search "relevant news issue of the day"
|
54
|
+
#
|
55
|
+
# If you specify :include, like in an #find call, this will be respected
|
56
|
+
# when loading the relevant models from the search results.
|
57
|
+
#
|
58
|
+
# User.search "pat", :include => :posts
|
59
|
+
#
|
60
|
+
# == Match Modes
|
61
|
+
#
|
62
|
+
# Sphinx supports 5 different matching modes. By default Thinking Sphinx
|
63
|
+
# uses :all, which unsurprisingly requires all the supplied search terms
|
64
|
+
# to match a result.
|
65
|
+
#
|
66
|
+
# Alternative modes include:
|
67
|
+
#
|
68
|
+
# User.search "pat allan", :match_mode => :any
|
69
|
+
# User.search "pat allan", :match_mode => :phrase
|
70
|
+
# User.search "pat | allan", :match_mode => :boolean
|
71
|
+
# User.search "@name pat | @username pat", :match_mode => :extended
|
72
|
+
#
|
73
|
+
# Any will find results with any of the search terms. Phrase treats the search
|
74
|
+
# terms a single phrase instead of individual words. Boolean and extended allow
|
75
|
+
# for more complex query syntax, refer to the sphinx documentation for further
|
76
|
+
# details.
|
77
|
+
#
|
78
|
+
# == Weighting
|
79
|
+
#
|
80
|
+
# Sphinx has support for weighting, where matches in one field can be considered
|
81
|
+
# more important than in another. Weights are integers, with 1 as the default.
|
82
|
+
# They can be set per-search like this:
|
83
|
+
#
|
84
|
+
# User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 }
|
85
|
+
#
|
86
|
+
# If you're searching multiple models, you can set per-index weights:
|
87
|
+
#
|
88
|
+
# ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 }
|
89
|
+
#
|
90
|
+
# See http://sphinxsearch.com/doc.html#weighting for further details.
|
91
|
+
#
|
92
|
+
# == Searching by Fields
|
93
|
+
#
|
94
|
+
# If you want to step it up a level, you can limit your search terms to
|
95
|
+
# specific fields:
|
96
|
+
#
|
97
|
+
# User.search :conditions => {:name => "pat"}
|
98
|
+
#
|
99
|
+
# This uses Sphinx's extended match mode, unless you specify a different
|
100
|
+
# match mode explicitly (but then this way of searching won't work). Also
|
101
|
+
# note that you don't need to put in a search string.
|
102
|
+
#
|
103
|
+
# == Searching by Attributes
|
104
|
+
#
|
105
|
+
# Also known as filters, you can limit your searches to documents that
|
106
|
+
# have specific values for their attributes. There are three ways to do
|
107
|
+
# this. The first two techniques work in all scenarios - using the :with
|
108
|
+
# or :with_all options.
|
109
|
+
#
|
110
|
+
# ThinkingSphinx::Search.search :with => {:tag_ids => 10}
|
111
|
+
# ThinkingSphinx::Search.search :with => {:tag_ids => [10,12]}
|
112
|
+
# ThinkingSphinx::Search.search :with_all => {:tag_ids => [10,12]}
|
113
|
+
#
|
114
|
+
# The first :with search will match records with a tag_id attribute of 10.
|
115
|
+
# The second :with will match records with a tag_id attribute of 10 OR 12.
|
116
|
+
# If you need to find records that are tagged with ids 10 AND 12, you
|
117
|
+
# will need to use the :with_all search parameter. This is particuarly
|
118
|
+
# useful in conjunction with Multi Value Attributes (MVAs).
|
119
|
+
#
|
120
|
+
# The third filtering technique is only viable if you're searching with a
|
121
|
+
# specific model (not multi-model searching). With a single model,
|
122
|
+
# Thinking Sphinx can figure out what attributes and fields are available,
|
123
|
+
# so you can put it all in the :conditions hash, and it will sort it out.
|
124
|
+
#
|
125
|
+
# Node.search :conditions => {:parent_id => 10}
|
126
|
+
#
|
127
|
+
# Filters can be single values, arrays of values, or ranges.
|
128
|
+
#
|
129
|
+
# Article.search "East Timor", :conditions => {:rating => 3..5}
|
130
|
+
#
|
131
|
+
# == Excluding by Attributes
|
132
|
+
#
|
133
|
+
# Sphinx also supports negative filtering - where the filters are of
|
134
|
+
# attribute values to exclude. This is done with the :without option:
|
135
|
+
#
|
136
|
+
# User.search :without => {:role_id => 1}
|
137
|
+
#
|
138
|
+
# == Excluding by Primary Key
|
139
|
+
#
|
140
|
+
# There is a shortcut to exclude records by their ActiveRecord primary key:
|
141
|
+
#
|
142
|
+
# User.search :without_ids => 1
|
143
|
+
#
|
144
|
+
# Pass an array or a single value.
|
145
|
+
#
|
146
|
+
# The primary key must be an integer as a negative filter is used. Note
|
147
|
+
# that for multi-model search, an id may occur in more than one model.
|
148
|
+
#
|
149
|
+
# == Infix (Star) Searching
|
150
|
+
#
|
151
|
+
# By default, Sphinx uses English stemming, e.g. matching "shoes" if you
|
152
|
+
# search for "shoe". It won't find "Melbourne" if you search for
|
153
|
+
# "elbourn", though.
|
154
|
+
#
|
155
|
+
# Enable infix searching by something like this in config/sphinx.yml:
|
156
|
+
#
|
157
|
+
# development:
|
158
|
+
# enable_star: 1
|
159
|
+
# min_infix_length: 2
|
160
|
+
#
|
161
|
+
# Note that this will make indexing take longer.
|
162
|
+
#
|
163
|
+
# With those settings (and after reindexing), wildcard asterisks can be used
|
164
|
+
# in queries:
|
165
|
+
#
|
166
|
+
# Location.search "*elbourn*"
|
167
|
+
#
|
168
|
+
# To automatically add asterisks around every token (but not operators),
|
169
|
+
# pass the :star option:
|
170
|
+
#
|
171
|
+
# Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean
|
172
|
+
#
|
173
|
+
# This would become "*elbourn* -*ustrali*". The :star option only adds the
|
174
|
+
# asterisks. You need to make the config/sphinx.yml changes yourself.
|
175
|
+
#
|
176
|
+
# By default, the tokens are assumed to match the regular expression /\w+/u.
|
177
|
+
# If you've modified the charset_table, pass another regular expression, e.g.
|
178
|
+
#
|
179
|
+
# User.search("oo@bar.c", :star => /[\w@.]+/u)
|
180
|
+
#
|
181
|
+
# to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*".
|
182
|
+
#
|
183
|
+
# == Sorting
|
184
|
+
#
|
185
|
+
# Sphinx can only sort by attributes, so generally you will need to avoid
|
186
|
+
# using field names in your :order option. However, if you're searching
|
187
|
+
# on a single model, and have specified some fields as sortable, you can
|
188
|
+
# use those field names and Thinking Sphinx will interpret accordingly.
|
189
|
+
# Remember: this will only happen for single-model searches, and only
|
190
|
+
# through the :order option.
|
191
|
+
#
|
192
|
+
# Location.search "Melbourne", :order => :state
|
193
|
+
# User.search :conditions => {:role_id => 2}, :order => "name ASC"
|
194
|
+
#
|
195
|
+
# Keep in mind that if you use a string, you *must* specify the direction
|
196
|
+
# (ASC or DESC) else Sphinx won't return any results. If you use a symbol
|
197
|
+
# then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
|
198
|
+
# use the :sort_mode option:
|
199
|
+
#
|
200
|
+
# Location.search "Melbourne", :order => :state, :sort_mode => :desc
|
201
|
+
#
|
202
|
+
# Of course, there are other sort modes - check out the Sphinx
|
203
|
+
# documentation[http://sphinxsearch.com/doc.html] for that level of
|
204
|
+
# detail though.
|
205
|
+
#
|
206
|
+
# If desired, you can sort by a column in your model instead of a sphinx
|
207
|
+
# field or attribute. This sort only applies to the current page, so is
|
208
|
+
# most useful when performing a search with a single page of results.
|
209
|
+
#
|
210
|
+
# User.search("pat", :sql_order => "name")
|
211
|
+
#
|
212
|
+
# == Grouping
|
213
|
+
#
|
214
|
+
# For this you can use the group_by, group_clause and group_function
|
215
|
+
# options - which are all directly linked to Sphinx's expectations. No
|
216
|
+
# magic from Thinking Sphinx. It can get a little tricky, so make sure
|
217
|
+
# you read all the relevant
|
218
|
+
# documentation[http://sphinxsearch.com/doc.html#clustering] first.
|
219
|
+
#
|
220
|
+
# Grouping is done via three parameters within the options hash
|
221
|
+
# * <tt>:group_function</tt> determines the way grouping is done
|
222
|
+
# * <tt>:group_by</tt> determines the field which is used for grouping
|
223
|
+
# * <tt>:group_clause</tt> determines the sorting order
|
224
|
+
#
|
225
|
+
# As a convenience, you can also use
|
226
|
+
# * <tt>:group</tt>
|
227
|
+
# which sets :group_by and defaults to :group_function of :attr
|
228
|
+
#
|
229
|
+
# === group_function
|
230
|
+
#
|
231
|
+
# Valid values for :group_function are
|
232
|
+
# * <tt>:day</tt>, <tt>:week</tt>, <tt>:month</tt>, <tt>:year</tt> - Grouping is done by the respective timeframes.
|
233
|
+
# * <tt>:attr</tt>, <tt>:attrpair</tt> - Grouping is done by the specified attributes(s)
|
234
|
+
#
|
235
|
+
# === group_by
|
236
|
+
#
|
237
|
+
# This parameter denotes the field by which grouping is done. Note that the
|
238
|
+
# specified field must be a sphinx attribute or index.
|
239
|
+
#
|
240
|
+
# === group_clause
|
241
|
+
#
|
242
|
+
# This determines the sorting order of the groups. In a grouping search,
|
243
|
+
# the matches within a group will sorted by the <tt>:sort_mode</tt> and <tt>:order</tt> parameters.
|
244
|
+
# The group matches themselves however, will be sorted by <tt>:group_clause</tt>.
|
245
|
+
#
|
246
|
+
# The syntax for this is the same as an order parameter in extended sort mode.
|
247
|
+
# Namely, you can specify an SQL-like sort expression with up to 5 attributes
|
248
|
+
# (including internal attributes), eg: "@relevance DESC, price ASC, @id DESC"
|
249
|
+
#
|
250
|
+
# === Grouping by timestamp
|
251
|
+
#
|
252
|
+
# Timestamp grouping groups off items by the day, week, month or year of the
|
253
|
+
# attribute given. In order to do this you need to define a timestamp attribute,
|
254
|
+
# which pretty much looks like the standard defintion for any attribute.
|
255
|
+
#
|
256
|
+
# define_index do
|
257
|
+
# #
|
258
|
+
# # All your other stuff
|
259
|
+
# #
|
260
|
+
# has :created_at
|
261
|
+
# end
|
262
|
+
#
|
263
|
+
# When you need to fire off your search, it'll go something to the tune of
|
264
|
+
#
|
265
|
+
# Fruit.search "apricot", :group_function => :day, :group_by => 'created_at'
|
266
|
+
#
|
267
|
+
# The <tt>@groupby</tt> special attribute will contain the date for that group.
|
268
|
+
# Depending on the <tt>:group_function</tt> parameter, the date format will be
|
269
|
+
#
|
270
|
+
# * <tt>:day</tt> - YYYYMMDD
|
271
|
+
# * <tt>:week</tt> - YYYYNNN (NNN is the first day of the week in question,
|
272
|
+
# counting from the start of the year )
|
273
|
+
# * <tt>:month</tt> - YYYYMM
|
274
|
+
# * <tt>:year</tt> - YYYY
|
275
|
+
#
|
276
|
+
#
|
277
|
+
# === Grouping by attribute
|
278
|
+
#
|
279
|
+
# The syntax is the same as grouping by timestamp, except for the fact that the
|
280
|
+
# <tt>:group_function</tt> parameter is changed
|
281
|
+
#
|
282
|
+
# Fruit.search "apricot", :group_function => :attr, :group_by => 'size'
|
283
|
+
#
|
284
|
+
#
|
285
|
+
# == Geo/Location Searching
|
286
|
+
#
|
287
|
+
# Sphinx - and therefore Thinking Sphinx - has the facility to search
|
288
|
+
# around a geographical point, using a given latitude and longitude. To
|
289
|
+
# take advantage of this, you will need to have both of those values in
|
290
|
+
# attributes. To search with that point, you can then use one of the
|
291
|
+
# following syntax examples:
|
292
|
+
#
|
293
|
+
# Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc"
|
294
|
+
# Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc"
|
295
|
+
# :latitude_attr => "latit", :longitude_attr => "longit"
|
296
|
+
#
|
297
|
+
# The first example applies when your latitude and longitude attributes
|
298
|
+
# are named any of lat, latitude, lon, long or longitude. If that's not
|
299
|
+
# the case, you will need to explicitly state them in your search, _or_
|
300
|
+
# you can do so in your model:
|
301
|
+
#
|
302
|
+
# define_index do
|
303
|
+
# has :latit # Float column, stored in radians
|
304
|
+
# has :longit # Float column, stored in radians
|
305
|
+
#
|
306
|
+
# set_property :latitude_attr => "latit"
|
307
|
+
# set_property :longitude_attr => "longit"
|
308
|
+
# end
|
309
|
+
#
|
310
|
+
# Now, geo-location searching really only has an affect if you have a
|
311
|
+
# filter, sort or grouping clause related to it - otherwise it's just a
|
312
|
+
# normal search, and _will not_ return a distance value otherwise. To
|
313
|
+
# make use of the positioning difference, use the special attribute
|
314
|
+
# "@geodist" in any of your filters or sorting or grouping clauses.
|
315
|
+
#
|
316
|
+
# And don't forget - both the latitude and longitude you use in your
|
317
|
+
# search, and the values in your indexes, need to be stored as a float in radians,
|
318
|
+
# _not_ degrees. Keep in mind that if you do this conversion in SQL
|
319
|
+
# you will need to explicitly declare a column type of :float.
|
320
|
+
#
|
321
|
+
# define_index do
|
322
|
+
# has 'RADIANS(lat)', :as => :lat, :type => :float
|
323
|
+
# # ...
|
324
|
+
# end
|
325
|
+
#
|
326
|
+
# Once you've got your results set, you can access the distances as
|
327
|
+
# follows:
|
328
|
+
#
|
329
|
+
# @results.each_with_geodist do |result, distance|
|
330
|
+
# # ...
|
331
|
+
# end
|
332
|
+
#
|
333
|
+
# The distance value is returned as a float, representing the distance in
|
334
|
+
# metres.
|
335
|
+
#
|
336
|
+
# == Handling a Stale Index
|
337
|
+
#
|
338
|
+
# Especially if you don't use delta indexing, you risk having records in the
|
339
|
+
# Sphinx index that are no longer in the database. By default, those will simply
|
340
|
+
# come back as nils:
|
341
|
+
#
|
342
|
+
# >> pat_user.delete
|
343
|
+
# >> User.search("pat")
|
344
|
+
# Sphinx Result: [1,2]
|
345
|
+
# => [nil, <#User id: 2>]
|
346
|
+
#
|
347
|
+
# (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.)
|
348
|
+
#
|
349
|
+
# You can simply Array#compact these results or handle the nils in some other way, but
|
350
|
+
# Sphinx will still report two results, and the missing records may upset your layout.
|
351
|
+
#
|
352
|
+
# If you pass :retry_stale => true to a single-model search, missing records will
|
353
|
+
# cause Thinking Sphinx to retry the query but excluding those records. Since search
|
354
|
+
# is paginated, the new search could potentially include missing records as well, so by
|
355
|
+
# default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five
|
356
|
+
# times, and so on. If there are still missing ids on the last retry, they are
|
357
|
+
# shown as nils.
|
358
|
+
#
|
359
|
+
def search(*args)
|
360
|
+
query = args.clone # an array
|
361
|
+
options = query.extract_options!
|
362
|
+
|
363
|
+
retry_search_on_stale_index(query, options) do
|
364
|
+
results, client = search_results(*(query + [options]))
|
365
|
+
|
366
|
+
log "Sphinx Error: #{results[:error]}", :error if results[:error]
|
367
|
+
|
368
|
+
klass = options[:class]
|
369
|
+
page = options[:page] ? options[:page].to_i : 1
|
370
|
+
|
371
|
+
ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options)
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
def retry_search_on_stale_index(query, options, &block)
|
376
|
+
stale_ids = []
|
377
|
+
stale_retries_left = case options[:retry_stale]
|
378
|
+
when true
|
379
|
+
3 # default to three retries
|
380
|
+
when nil, false
|
381
|
+
0 # no retries
|
382
|
+
else options[:retry_stale].to_i
|
383
|
+
end
|
384
|
+
begin
|
385
|
+
# Passing this in an option so Collection.create_from_results can see it.
|
386
|
+
# It should only raise on stale records if there are any retries left.
|
387
|
+
options[:raise_on_stale] = stale_retries_left > 0
|
388
|
+
block.call
|
389
|
+
# If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not
|
390
|
+
# in the DB and the :raise_on_stale option is set, this exception is raised. We retry
|
391
|
+
# a limited number of times, excluding the stale ids from the search.
|
392
|
+
rescue StaleIdsException => e
|
393
|
+
stale_retries_left -= 1
|
394
|
+
|
395
|
+
stale_ids |= e.ids # For logging
|
396
|
+
options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion
|
397
|
+
|
398
|
+
tries = stale_retries_left
|
399
|
+
log "Sphinx Stale Ids (%s %s left): %s" % [
|
400
|
+
tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ')
|
401
|
+
]
|
402
|
+
|
403
|
+
retry
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def count(*args)
|
408
|
+
results, client = search_results(*args.clone)
|
409
|
+
results[:total_found] || 0
|
410
|
+
end
|
411
|
+
|
412
|
+
# Checks if a document with the given id exists within a specific index.
|
413
|
+
# Expected parameters:
|
414
|
+
#
|
415
|
+
# - ID of the document
|
416
|
+
# - Index to check within
|
417
|
+
# - Options hash (defaults to {})
|
418
|
+
#
|
419
|
+
# Example:
|
420
|
+
#
|
421
|
+
# ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User)
|
422
|
+
#
|
423
|
+
def search_for_id(*args)
|
424
|
+
options = args.extract_options!
|
425
|
+
client = client_from_options options
|
426
|
+
|
427
|
+
query, filters = search_conditions(
|
428
|
+
options[:class], options[:conditions] || {}
|
429
|
+
)
|
430
|
+
client.filters += filters
|
431
|
+
client.match_mode = :extended unless query.empty?
|
432
|
+
client.id_range = args.first..args.first
|
433
|
+
|
434
|
+
begin
|
435
|
+
return client.query(query, args[1])[:matches].length > 0
|
436
|
+
rescue Errno::ECONNREFUSED => err
|
437
|
+
raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
private
|
442
|
+
|
443
|
+
# This method handles the common search functionality, and returns both
|
444
|
+
# the result hash and the client. Not super elegant, but it'll do for
|
445
|
+
# the moment.
|
446
|
+
#
|
447
|
+
def search_results(*args)
|
448
|
+
options = args.extract_options!
|
449
|
+
query = args.join(' ')
|
450
|
+
client = client_from_options options
|
451
|
+
|
452
|
+
query = star_query(query, options[:star]) if options[:star]
|
453
|
+
|
454
|
+
extra_query, filters = search_conditions(
|
455
|
+
options[:class], options[:conditions] || {}
|
456
|
+
)
|
457
|
+
client.filters += filters
|
458
|
+
client.match_mode = :extended unless extra_query.empty?
|
459
|
+
query = [query, extra_query].join(' ')
|
460
|
+
query.strip! # Because "" and " " are not equivalent
|
461
|
+
|
462
|
+
set_sort_options! client, options
|
463
|
+
|
464
|
+
client.limit = options[:per_page].to_i if options[:per_page]
|
465
|
+
page = options[:page] ? options[:page].to_i : 1
|
466
|
+
page = 1 if page <= 0
|
467
|
+
client.offset = (page - 1) * client.limit
|
468
|
+
|
469
|
+
begin
|
470
|
+
log "Sphinx: #{query}"
|
471
|
+
results = client.query(query, '*', options[:comment] || '')
|
472
|
+
log "Sphinx Result:"
|
473
|
+
log results[:matches].collect { |m|
|
474
|
+
m[:attributes]["sphinx_internal_id"]
|
475
|
+
}.inspect
|
476
|
+
rescue Errno::ECONNREFUSED => err
|
477
|
+
raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
|
478
|
+
end
|
479
|
+
|
480
|
+
return results, client
|
481
|
+
end
|
482
|
+
|
483
|
+
# Set all the appropriate settings for the client, using the provided
|
484
|
+
# options hash.
|
485
|
+
#
|
486
|
+
def client_from_options(options = {})
|
487
|
+
config = ThinkingSphinx::Configuration.instance
|
488
|
+
client = Riddle::Client.new config.address, config.port
|
489
|
+
klass = options[:class]
|
490
|
+
index_options = klass ? klass.sphinx_index_options : {}
|
491
|
+
|
492
|
+
# The Riddle default is per-query max_matches=1000. If we set the
|
493
|
+
# per-server max to a smaller value in sphinx.yml, we need to override
|
494
|
+
# the Riddle default or else we get search errors like
|
495
|
+
# "per-query max_matches=1000 out of bounds (per-server max_matches=200)"
|
496
|
+
if per_server_max_matches = config.configuration.searchd.max_matches
|
497
|
+
options[:max_matches] ||= per_server_max_matches
|
498
|
+
end
|
499
|
+
|
500
|
+
# Turn :index_weights => { "foo" => 2, User => 1 }
|
501
|
+
# into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
|
502
|
+
if iw = options[:index_weights]
|
503
|
+
options[:index_weights] = iw.inject({}) do |hash, (index,weight)|
|
504
|
+
if index.is_a?(Class)
|
505
|
+
name = ThinkingSphinx::Index.name(index)
|
506
|
+
hash["#{name}_core"] = weight
|
507
|
+
hash["#{name}_delta"] = weight
|
508
|
+
else
|
509
|
+
hash[index] = weight
|
510
|
+
end
|
511
|
+
hash
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
# Group by defaults using :group
|
516
|
+
if options[:group]
|
517
|
+
options[:group_by] = options[:group].to_s
|
518
|
+
options[:group_function] ||= :attr
|
519
|
+
end
|
520
|
+
|
521
|
+
[
|
522
|
+
:max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
|
523
|
+
:group_by, :group_function, :group_clause, :group_distinct, :cut_off,
|
524
|
+
:retry_count, :retry_delay, :index_weights, :rank_mode,
|
525
|
+
:max_query_time, :field_weights, :filters, :anchor, :limit
|
526
|
+
].each do |key|
|
527
|
+
client.send(
|
528
|
+
key.to_s.concat("=").to_sym,
|
529
|
+
options[key] || index_options[key] || client.send(key)
|
530
|
+
)
|
531
|
+
end
|
532
|
+
|
533
|
+
options[:classes] = [klass] if klass
|
534
|
+
|
535
|
+
client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
|
536
|
+
|
537
|
+
client.filters << Riddle::Client::Filter.new(
|
538
|
+
"sphinx_deleted", [0]
|
539
|
+
)
|
540
|
+
|
541
|
+
# class filters
|
542
|
+
client.filters << Riddle::Client::Filter.new(
|
543
|
+
"class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten
|
544
|
+
) if options[:classes]
|
545
|
+
|
546
|
+
# normal attribute filters
|
547
|
+
client.filters += options[:with].collect { |attr,val|
|
548
|
+
Riddle::Client::Filter.new attr.to_s, filter_value(val)
|
549
|
+
} if options[:with]
|
550
|
+
|
551
|
+
# exclusive attribute filters
|
552
|
+
client.filters += options[:without].collect { |attr,val|
|
553
|
+
Riddle::Client::Filter.new attr.to_s, filter_value(val), true
|
554
|
+
} if options[:without]
|
555
|
+
|
556
|
+
# every-match attribute filters
|
557
|
+
client.filters += options[:with_all].collect { |attr,vals|
|
558
|
+
Array(vals).collect { |val|
|
559
|
+
Riddle::Client::Filter.new attr.to_s, filter_value(val)
|
560
|
+
}
|
561
|
+
}.flatten if options[:with_all]
|
562
|
+
|
563
|
+
# exclusive attribute filter on primary key
|
564
|
+
client.filters += Array(options[:without_ids]).collect { |id|
|
565
|
+
Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true
|
566
|
+
} if options[:without_ids]
|
567
|
+
|
568
|
+
client
|
569
|
+
end
|
570
|
+
|
571
|
+
def star_query(query, custom_token = nil)
|
572
|
+
token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u
|
573
|
+
|
574
|
+
query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
|
575
|
+
pre, proper, post = $`, $&, $'
|
576
|
+
is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token
|
577
|
+
is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes
|
578
|
+
has_star = pre.ends_with?("*") || post.starts_with?("*")
|
579
|
+
if is_operator || is_quote || has_star
|
580
|
+
proper
|
581
|
+
else
|
582
|
+
"*#{proper}*"
|
583
|
+
end
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
def filter_value(value)
|
588
|
+
case value
|
589
|
+
when Range
|
590
|
+
value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value
|
591
|
+
when Array
|
592
|
+
value.collect { |val| val.is_a?(Time) ? timestamp(val) : val }
|
593
|
+
else
|
594
|
+
Array(value)
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
# Returns the integer timestamp for a Time object.
|
599
|
+
#
|
600
|
+
# If using Rails 2.1+, need to handle timezones to translate them back to
|
601
|
+
# UTC, as that's what datetimes will be stored as by MySQL.
|
602
|
+
#
|
603
|
+
# in_time_zone is a method that was added for the timezone support in
|
604
|
+
# Rails 2.1, which is why it's used for testing. I'm sure there's better
|
605
|
+
# ways, but this does the job.
|
606
|
+
#
|
607
|
+
def timestamp(value)
|
608
|
+
value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i
|
609
|
+
end
|
610
|
+
|
611
|
+
# Translate field and attribute conditions to the relevant search string
|
612
|
+
# and filters.
|
613
|
+
#
|
614
|
+
def search_conditions(klass, conditions={})
|
615
|
+
attributes = klass ? klass.sphinx_indexes.collect { |index|
|
616
|
+
index.attributes.collect { |attrib| attrib.unique_name }
|
617
|
+
}.flatten : []
|
618
|
+
|
619
|
+
search_string = []
|
620
|
+
filters = []
|
621
|
+
|
622
|
+
conditions.each do |key,val|
|
623
|
+
if attributes.include?(key.to_sym)
|
624
|
+
filters << Riddle::Client::Filter.new(
|
625
|
+
key.to_s, filter_value(val)
|
626
|
+
)
|
627
|
+
else
|
628
|
+
search_string << "@#{key} #{val}"
|
629
|
+
end
|
630
|
+
end
|
631
|
+
|
632
|
+
return search_string.join(' '), filters
|
633
|
+
end
|
634
|
+
|
635
|
+
# Return the appropriate latitude and longitude values, depending on
|
636
|
+
# whether the relevant attributes have been defined, and also whether
|
637
|
+
# there's actually any values.
|
638
|
+
#
|
639
|
+
def anchor_conditions(klass, options)
|
640
|
+
attributes = klass ? klass.sphinx_indexes.collect { |index|
|
641
|
+
index.attributes.collect { |attrib| attrib.unique_name }
|
642
|
+
}.flatten : []
|
643
|
+
|
644
|
+
lat_attr = klass ? klass.sphinx_indexes.collect { |index|
|
645
|
+
index.options[:latitude_attr]
|
646
|
+
}.compact.first : nil
|
647
|
+
|
648
|
+
lon_attr = klass ? klass.sphinx_indexes.collect { |index|
|
649
|
+
index.options[:longitude_attr]
|
650
|
+
}.compact.first : nil
|
651
|
+
|
652
|
+
lat_attr = options[:latitude_attr] if options[:latitude_attr]
|
653
|
+
lat_attr ||= :lat if attributes.include?(:lat)
|
654
|
+
lat_attr ||= :latitude if attributes.include?(:latitude)
|
655
|
+
|
656
|
+
lon_attr = options[:longitude_attr] if options[:longitude_attr]
|
657
|
+
lon_attr ||= :lng if attributes.include?(:lng)
|
658
|
+
lon_attr ||= :lon if attributes.include?(:lon)
|
659
|
+
lon_attr ||= :long if attributes.include?(:long)
|
660
|
+
lon_attr ||= :longitude if attributes.include?(:longitude)
|
661
|
+
|
662
|
+
lat = options[:lat]
|
663
|
+
lon = options[:lon]
|
664
|
+
|
665
|
+
if options[:geo]
|
666
|
+
lat = options[:geo].first
|
667
|
+
lon = options[:geo].last
|
668
|
+
end
|
669
|
+
|
670
|
+
lat && lon ? {
|
671
|
+
:latitude_attribute => lat_attr.to_s,
|
672
|
+
:latitude => lat,
|
673
|
+
:longitude_attribute => lon_attr.to_s,
|
674
|
+
:longitude => lon
|
675
|
+
} : nil
|
676
|
+
end
|
677
|
+
|
678
|
+
# Set the sort options using the :order key as well as the appropriate
|
679
|
+
# Riddle settings.
|
680
|
+
#
|
681
|
+
def set_sort_options!(client, options)
|
682
|
+
klass = options[:class]
|
683
|
+
fields = klass ? klass.sphinx_indexes.collect { |index|
|
684
|
+
index.fields.collect { |field| field.unique_name }
|
685
|
+
}.flatten : []
|
686
|
+
index_options = klass ? klass.sphinx_index_options : {}
|
687
|
+
|
688
|
+
order = options[:order] || index_options[:order]
|
689
|
+
case order
|
690
|
+
when Symbol
|
691
|
+
client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil?
|
692
|
+
if fields.include?(order)
|
693
|
+
client.sort_by = order.to_s.concat("_sort")
|
694
|
+
else
|
695
|
+
client.sort_by = order.to_s
|
696
|
+
end
|
697
|
+
when String
|
698
|
+
client.sort_mode = :extended unless options[:sort_mode]
|
699
|
+
client.sort_by = sorted_fields_to_attributes(order, fields)
|
700
|
+
else
|
701
|
+
# do nothing
|
702
|
+
end
|
703
|
+
|
704
|
+
client.sort_mode = :attr_asc if client.sort_mode == :asc
|
705
|
+
client.sort_mode = :attr_desc if client.sort_mode == :desc
|
706
|
+
end
|
707
|
+
|
708
|
+
# Search through a collection of fields and translate any appearances
|
709
|
+
# of them in a string to their attribute equivalent for sorting.
|
710
|
+
#
|
711
|
+
def sorted_fields_to_attributes(string, fields)
|
712
|
+
fields.each { |field|
|
713
|
+
string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
|
714
|
+
match.gsub field.to_s, field.to_s.concat("_sort")
|
715
|
+
}
|
716
|
+
}
|
717
|
+
|
718
|
+
string
|
719
|
+
end
|
720
|
+
|
721
|
+
def log(message, method = :debug)
|
722
|
+
return if ::ActiveRecord::Base.logger.nil?
|
723
|
+
::ActiveRecord::Base.logger.send method, message
|
724
|
+
end
|
725
|
+
end
|
726
|
+
end
|
727
|
+
end
|