pixeltrix-thinking-sphinx 1.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENCE +20 -0
- data/README +107 -0
- data/lib/thinking_sphinx.rb +144 -0
- data/lib/thinking_sphinx/active_record.rb +245 -0
- data/lib/thinking_sphinx/active_record/delta.rb +74 -0
- data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
- data/lib/thinking_sphinx/active_record/search.rb +57 -0
- data/lib/thinking_sphinx/adapters/abstract_adapter.rb +34 -0
- data/lib/thinking_sphinx/adapters/mysql_adapter.rb +53 -0
- data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +129 -0
- data/lib/thinking_sphinx/association.rb +144 -0
- data/lib/thinking_sphinx/attribute.rb +258 -0
- data/lib/thinking_sphinx/collection.rb +142 -0
- data/lib/thinking_sphinx/configuration.rb +236 -0
- data/lib/thinking_sphinx/core/string.rb +22 -0
- data/lib/thinking_sphinx/deltas.rb +22 -0
- data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
- data/lib/thinking_sphinx/deltas/default_delta.rb +65 -0
- data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
- data/lib/thinking_sphinx/facet.rb +58 -0
- data/lib/thinking_sphinx/facet_collection.rb +44 -0
- data/lib/thinking_sphinx/field.rb +172 -0
- data/lib/thinking_sphinx/index.rb +414 -0
- data/lib/thinking_sphinx/index/builder.rb +233 -0
- data/lib/thinking_sphinx/index/faux_column.rb +110 -0
- data/lib/thinking_sphinx/rails_additions.rb +133 -0
- data/lib/thinking_sphinx/search.rb +638 -0
- data/lib/thinking_sphinx/tasks.rb +128 -0
- data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
- data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
- data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
- data/spec/unit/thinking_sphinx/active_record_spec.rb +256 -0
- data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
- data/spec/unit/thinking_sphinx/attribute_spec.rb +212 -0
- data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
- data/spec/unit/thinking_sphinx/configuration_spec.rb +136 -0
- data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
- data/spec/unit/thinking_sphinx/field_spec.rb +145 -0
- data/spec/unit/thinking_sphinx/index/builder_spec.rb +5 -0
- data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
- data/spec/unit/thinking_sphinx/index_spec.rb +54 -0
- data/spec/unit/thinking_sphinx/search_spec.rb +59 -0
- data/spec/unit/thinking_sphinx_spec.rb +129 -0
- data/tasks/distribution.rb +48 -0
- data/tasks/rails.rake +1 -0
- data/tasks/testing.rb +86 -0
- data/vendor/after_commit/LICENSE +20 -0
- data/vendor/after_commit/README +16 -0
- data/vendor/after_commit/Rakefile +22 -0
- data/vendor/after_commit/init.rb +5 -0
- data/vendor/after_commit/lib/after_commit.rb +42 -0
- data/vendor/after_commit/lib/after_commit/active_record.rb +91 -0
- data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
- data/vendor/after_commit/test/after_commit_test.rb +53 -0
- data/vendor/delayed_job/lib/delayed/job.rb +251 -0
- data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
- data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
- data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
- data/vendor/riddle/lib/riddle.rb +30 -0
- data/vendor/riddle/lib/riddle/client.rb +619 -0
- data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
- data/vendor/riddle/lib/riddle/client/message.rb +65 -0
- data/vendor/riddle/lib/riddle/client/response.rb +84 -0
- data/vendor/riddle/lib/riddle/configuration.rb +33 -0
- data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
- data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
- data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
- data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
- data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
- data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
- data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
- data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
- data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
- data/vendor/riddle/lib/riddle/controller.rb +44 -0
- metadata +157 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
module ThinkingSphinx
|
2
|
+
class Index
|
3
|
+
# Instances of this class represent database columns and the stack of
|
4
|
+
# associations that lead from the base model to them.
|
5
|
+
#
|
6
|
+
# The name and stack are accessible through methods starting with __ to
|
7
|
+
# avoid conflicting with the method_missing calls that build the stack.
|
8
|
+
#
|
9
|
+
class FauxColumn
|
10
|
+
# Create a new column with a pre-defined stack. The top element in the
|
11
|
+
# stack will get shifted to be the name value.
|
12
|
+
#
|
13
|
+
def initialize(*stack)
|
14
|
+
@name = stack.pop
|
15
|
+
@stack = stack
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.coerce(columns)
|
19
|
+
case columns
|
20
|
+
when Symbol, String
|
21
|
+
FauxColumn.new(columns)
|
22
|
+
when Array
|
23
|
+
columns.collect { |col| FauxColumn.coerce(col) }
|
24
|
+
when FauxColumn
|
25
|
+
columns
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Can't use normal method name, as that could be an association or
|
32
|
+
# column name.
|
33
|
+
#
|
34
|
+
def __name
|
35
|
+
@name
|
36
|
+
end
|
37
|
+
|
38
|
+
# Can't use normal method name, as that could be an association or
|
39
|
+
# column name.
|
40
|
+
#
|
41
|
+
def __stack
|
42
|
+
@stack
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns true if the stack is empty *and* if the name is a string -
|
46
|
+
# which is an indication that of raw SQL, as opposed to a value from a
|
47
|
+
# table's column.
|
48
|
+
#
|
49
|
+
def is_string?
|
50
|
+
@name.is_a?(String) && @stack.empty?
|
51
|
+
end
|
52
|
+
|
53
|
+
# This handles any 'invalid' method calls and sets them as the name,
|
54
|
+
# and pushing the previous name into the stack. The object returns
|
55
|
+
# itself.
|
56
|
+
#
|
57
|
+
# If there's a single argument, it becomes the name, and the method
|
58
|
+
# symbol goes into the stack as well. Multiple arguments means new
|
59
|
+
# columns with the original stack and new names (from each argument) gets
|
60
|
+
# returned.
|
61
|
+
#
|
62
|
+
# Easier to explain with examples:
|
63
|
+
#
|
64
|
+
# col = FauxColumn.new :a, :b, :c
|
65
|
+
# col.__name #=> :c
|
66
|
+
# col.__stack #=> [:a, :b]
|
67
|
+
#
|
68
|
+
# col.whatever #=> col
|
69
|
+
# col.__name #=> :whatever
|
70
|
+
# col.__stack #=> [:a, :b, :c]
|
71
|
+
#
|
72
|
+
# col.something(:id) #=> col
|
73
|
+
# col.__name #=> :id
|
74
|
+
# col.__stack #=> [:a, :b, :c, :whatever, :something]
|
75
|
+
#
|
76
|
+
# cols = col.short(:x, :y, :z)
|
77
|
+
# cols[0].__name #=> :x
|
78
|
+
# cols[0].__stack #=> [:a, :b, :c, :whatever, :something, :short]
|
79
|
+
# cols[1].__name #=> :y
|
80
|
+
# cols[1].__stack #=> [:a, :b, :c, :whatever, :something, :short]
|
81
|
+
# cols[2].__name #=> :z
|
82
|
+
# cols[2].__stack #=> [:a, :b, :c, :whatever, :something, :short]
|
83
|
+
#
|
84
|
+
# Also, this allows method chaining to build up a relevant stack:
|
85
|
+
#
|
86
|
+
# col = FauxColumn.new :a, :b
|
87
|
+
# col.__name #=> :b
|
88
|
+
# col.__stack #=> [:a]
|
89
|
+
#
|
90
|
+
# col.one.two.three #=> col
|
91
|
+
# col.__name #=> :three
|
92
|
+
# col.__stack #=> [:a, :b, :one, :two]
|
93
|
+
#
|
94
|
+
def method_missing(method, *args)
|
95
|
+
@stack << @name
|
96
|
+
@name = method
|
97
|
+
|
98
|
+
if (args.empty?)
|
99
|
+
self
|
100
|
+
elsif (args.length == 1)
|
101
|
+
method_missing(args.first)
|
102
|
+
else
|
103
|
+
args.collect { |arg|
|
104
|
+
FauxColumn.new(@stack + [@name, arg])
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module ThinkingSphinx
|
2
|
+
module HashExcept
|
3
|
+
# Returns a new hash without the given keys.
|
4
|
+
def except(*keys)
|
5
|
+
rejected = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
|
6
|
+
reject { |key,| rejected.include?(key) }
|
7
|
+
end
|
8
|
+
|
9
|
+
# Replaces the hash without only the given keys.
|
10
|
+
def except!(*keys)
|
11
|
+
replace(except(*keys))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
Hash.send(
|
17
|
+
:include, ThinkingSphinx::HashExcept
|
18
|
+
) unless Hash.instance_methods.include?("except")
|
19
|
+
|
20
|
+
module ThinkingSphinx
|
21
|
+
module ArrayExtractOptions
|
22
|
+
def extract_options!
|
23
|
+
last.is_a?(::Hash) ? pop : {}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
Array.send(
|
29
|
+
:include, ThinkingSphinx::ArrayExtractOptions
|
30
|
+
) unless Array.instance_methods.include?("extract_options!")
|
31
|
+
|
32
|
+
module ThinkingSphinx
|
33
|
+
module AbstractQuotedTableName
|
34
|
+
def quote_table_name(name)
|
35
|
+
quote_column_name(name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
ActiveRecord::ConnectionAdapters::AbstractAdapter.send(
|
41
|
+
:include, ThinkingSphinx::AbstractQuotedTableName
|
42
|
+
) unless ActiveRecord::ConnectionAdapters::AbstractAdapter.instance_methods.include?("quote_table_name")
|
43
|
+
|
44
|
+
module ThinkingSphinx
|
45
|
+
module MysqlQuotedTableName
|
46
|
+
def quote_table_name(name) #:nodoc:
|
47
|
+
quote_column_name(name).gsub('.', '`.`')
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
if ActiveRecord::ConnectionAdapters.constants.include?("MysqlAdapter")
|
53
|
+
ActiveRecord::ConnectionAdapters::MysqlAdapter.send(
|
54
|
+
:include, ThinkingSphinx::MysqlQuotedTableName
|
55
|
+
) unless ActiveRecord::ConnectionAdapters::MysqlAdapter.instance_methods.include?("quote_table_name")
|
56
|
+
end
|
57
|
+
|
58
|
+
module ThinkingSphinx
|
59
|
+
module ActiveRecordQuotedName
|
60
|
+
def quoted_table_name
|
61
|
+
self.connection.quote_table_name(self.table_name)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
ActiveRecord::Base.extend(
|
67
|
+
ThinkingSphinx::ActiveRecordQuotedName
|
68
|
+
) unless ActiveRecord::Base.respond_to?("quoted_table_name")
|
69
|
+
|
70
|
+
module ThinkingSphinx
|
71
|
+
module ActiveRecordStoreFullSTIClass
|
72
|
+
def store_full_sti_class
|
73
|
+
false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
ActiveRecord::Base.extend(
|
79
|
+
ThinkingSphinx::ActiveRecordStoreFullSTIClass
|
80
|
+
) unless ActiveRecord::Base.respond_to?(:store_full_sti_class)
|
81
|
+
|
82
|
+
module ThinkingSphinx
|
83
|
+
module ClassAttributeMethods
|
84
|
+
def cattr_reader(*syms)
|
85
|
+
syms.flatten.each do |sym|
|
86
|
+
next if sym.is_a?(Hash)
|
87
|
+
class_eval(<<-EOS, __FILE__, __LINE__)
|
88
|
+
unless defined? @@#{sym}
|
89
|
+
@@#{sym} = nil
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.#{sym}
|
93
|
+
@@#{sym}
|
94
|
+
end
|
95
|
+
|
96
|
+
def #{sym}
|
97
|
+
@@#{sym}
|
98
|
+
end
|
99
|
+
EOS
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def cattr_writer(*syms)
|
104
|
+
options = syms.extract_options!
|
105
|
+
syms.flatten.each do |sym|
|
106
|
+
class_eval(<<-EOS, __FILE__, __LINE__)
|
107
|
+
unless defined? @@#{sym}
|
108
|
+
@@#{sym} = nil
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.#{sym}=(obj)
|
112
|
+
@@#{sym} = obj
|
113
|
+
end
|
114
|
+
|
115
|
+
#{"
|
116
|
+
def #{sym}=(obj)
|
117
|
+
@@#{sym} = obj
|
118
|
+
end
|
119
|
+
" unless options[:instance_writer] == false }
|
120
|
+
EOS
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def cattr_accessor(*syms)
|
125
|
+
cattr_reader(*syms)
|
126
|
+
cattr_writer(*syms)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
Class.extend(
|
132
|
+
ThinkingSphinx::ClassAttributeMethods
|
133
|
+
) unless Class.respond_to?(:cattr_reader)
|
@@ -0,0 +1,638 @@
|
|
1
|
+
module ThinkingSphinx
|
2
|
+
# Once you've got those indexes in and built, this is the stuff that
|
3
|
+
# matters - how to search! This class provides a generic search
|
4
|
+
# interface - which you can use to search all your indexed models at once.
|
5
|
+
# Most times, you will just want a specific model's results - to search and
|
6
|
+
# search_for_ids methods will do the job in exactly the same manner when
|
7
|
+
# called from a model.
|
8
|
+
#
|
9
|
+
class Search
|
10
|
+
class << self
|
11
|
+
# Searches for results that match the parameters provided. Will only
|
12
|
+
# return the ids for the matching objects. See #search for syntax
|
13
|
+
# examples.
|
14
|
+
#
|
15
|
+
# Note that this only searches the Sphinx index, with no ActiveRecord
|
16
|
+
# queries. Thus, if your index is not in sync with the database, this
|
17
|
+
# method may return ids that no longer exist there.
|
18
|
+
#
|
19
|
+
def search_for_ids(*args)
|
20
|
+
results, client = search_results(*args.clone)
|
21
|
+
|
22
|
+
options = args.extract_options!
|
23
|
+
page = options[:page] ? options[:page].to_i : 1
|
24
|
+
|
25
|
+
ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Searches through the Sphinx indexes for relevant matches. There's
|
29
|
+
# various ways to search, sort, group and filter - which are covered
|
30
|
+
# below.
|
31
|
+
#
|
32
|
+
# Also, if you have WillPaginate installed, the search method can be used
|
33
|
+
# just like paginate. The same parameters - :page and :per_page - work as
|
34
|
+
# expected, and the returned result set can be used by the will_paginate
|
35
|
+
# helper.
|
36
|
+
#
|
37
|
+
# == Basic Searching
|
38
|
+
#
|
39
|
+
# The simplest way of searching is straight text.
|
40
|
+
#
|
41
|
+
# ThinkingSphinx::Search.search "pat"
|
42
|
+
# ThinkingSphinx::Search.search "google"
|
43
|
+
# User.search "pat", :page => (params[:page] || 1)
|
44
|
+
# Article.search "relevant news issue of the day"
|
45
|
+
#
|
46
|
+
# If you specify :include, like in an #find call, this will be respected
|
47
|
+
# when loading the relevant models from the search results.
|
48
|
+
#
|
49
|
+
# User.search "pat", :include => :posts
|
50
|
+
#
|
51
|
+
# == Match Modes
|
52
|
+
#
|
53
|
+
# Sphinx supports 5 different matching modes. By default Thinking Sphinx
|
54
|
+
# uses :all, which unsurprisingly requires all the supplied search terms
|
55
|
+
# to match a result.
|
56
|
+
#
|
57
|
+
# Alternative modes include:
|
58
|
+
#
|
59
|
+
# User.search "pat allan", :match_mode => :any
|
60
|
+
# User.search "pat allan", :match_mode => :phrase
|
61
|
+
# User.search "pat | allan", :match_mode => :boolean
|
62
|
+
# User.search "@name pat | @username pat", :match_mode => :extended
|
63
|
+
#
|
64
|
+
# Any will find results with any of the search terms. Phrase treats the search
|
65
|
+
# terms a single phrase instead of individual words. Boolean and extended allow
|
66
|
+
# for more complex query syntax, refer to the sphinx documentation for further
|
67
|
+
# details.
|
68
|
+
#
|
69
|
+
# == Weighting
|
70
|
+
#
|
71
|
+
# Sphinx has support for weighting, where matches in one field can be considered
|
72
|
+
# more important than in another. Weights are integers, with 1 as the default.
|
73
|
+
# They can be set per-search like this:
|
74
|
+
#
|
75
|
+
# User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 }
|
76
|
+
#
|
77
|
+
# If you're searching multiple models, you can set per-index weights:
|
78
|
+
#
|
79
|
+
# ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 }
|
80
|
+
#
|
81
|
+
# See http://sphinxsearch.com/doc.html#weighting for further details.
|
82
|
+
#
|
83
|
+
# == Searching by Fields
|
84
|
+
#
|
85
|
+
# If you want to step it up a level, you can limit your search terms to
|
86
|
+
# specific fields:
|
87
|
+
#
|
88
|
+
# User.search :conditions => {:name => "pat"}
|
89
|
+
#
|
90
|
+
# This uses Sphinx's extended match mode, unless you specify a different
|
91
|
+
# match mode explicitly (but then this way of searching won't work). Also
|
92
|
+
# note that you don't need to put in a search string.
|
93
|
+
#
|
94
|
+
# == Searching by Attributes
|
95
|
+
#
|
96
|
+
# Also known as filters, you can limit your searches to documents that
|
97
|
+
# have specific values for their attributes. There are two ways to do
|
98
|
+
# this. The first is one that works in all scenarios - using the :with
|
99
|
+
# option.
|
100
|
+
#
|
101
|
+
# ThinkingSphinx::Search.search :with => {:parent_id => 10}
|
102
|
+
#
|
103
|
+
# The second is only viable if you're searching with a specific model
|
104
|
+
# (not multi-model searching). With a single model, Thinking Sphinx
|
105
|
+
# can figure out what attributes and fields are available, so you can
|
106
|
+
# put it all in the :conditions hash, and it will sort it out.
|
107
|
+
#
|
108
|
+
# Node.search :conditions => {:parent_id => 10}
|
109
|
+
#
|
110
|
+
# Filters can be single values, arrays of values, or ranges.
|
111
|
+
#
|
112
|
+
# Article.search "East Timor", :conditions => {:rating => 3..5}
|
113
|
+
#
|
114
|
+
# == Excluding by Attributes
|
115
|
+
#
|
116
|
+
# Sphinx also supports negative filtering - where the filters are of
|
117
|
+
# attribute values to exclude. This is done with the :without option:
|
118
|
+
#
|
119
|
+
# User.search :without => {:role_id => 1}
|
120
|
+
#
|
121
|
+
# == Excluding by Primary Key
|
122
|
+
#
|
123
|
+
# There is a shortcut to exclude records by their ActiveRecord primary key:
|
124
|
+
#
|
125
|
+
# User.search :without_ids => 1
|
126
|
+
#
|
127
|
+
# Pass an array or a single value.
|
128
|
+
#
|
129
|
+
# The primary key must be an integer as a negative filter is used. Note
|
130
|
+
# that for multi-model search, an id may occur in more than one model.
|
131
|
+
#
|
132
|
+
# == Infix (Star) Searching
|
133
|
+
#
|
134
|
+
# By default, Sphinx uses English stemming, e.g. matching "shoes" if you
|
135
|
+
# search for "shoe". It won't find "Melbourne" if you search for
|
136
|
+
# "elbourn", though.
|
137
|
+
#
|
138
|
+
# Enable infix searching by something like this in config/sphinx.yml:
|
139
|
+
#
|
140
|
+
# development:
|
141
|
+
# enable_star: 1
|
142
|
+
# min_infix_length: 2
|
143
|
+
#
|
144
|
+
# Note that this will make indexing take longer.
|
145
|
+
#
|
146
|
+
# With those settings (and after reindexing), wildcard asterisks can be used
|
147
|
+
# in queries:
|
148
|
+
#
|
149
|
+
# Location.search "*elbourn*"
|
150
|
+
#
|
151
|
+
# To automatically add asterisks around every token (but not operators),
|
152
|
+
# pass the :star option:
|
153
|
+
#
|
154
|
+
# Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean
|
155
|
+
#
|
156
|
+
# This would become "*elbourn* -*ustrali*". The :star option only adds the
|
157
|
+
# asterisks. You need to make the config/sphinx.yml changes yourself.
|
158
|
+
#
|
159
|
+
# By default, the tokens are assumed to match the regular expression /\w+/u.
|
160
|
+
# If you've modified the charset_table, pass another regular expression, e.g.
|
161
|
+
#
|
162
|
+
# User.search("oo@bar.c", :star => /[\w@.]+/u)
|
163
|
+
#
|
164
|
+
# to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*".
|
165
|
+
#
|
166
|
+
# == Sorting
|
167
|
+
#
|
168
|
+
# Sphinx can only sort by attributes, so generally you will need to avoid
|
169
|
+
# using field names in your :order option. However, if you're searching
|
170
|
+
# on a single model, and have specified some fields as sortable, you can
|
171
|
+
# use those field names and Thinking Sphinx will interpret accordingly.
|
172
|
+
# Remember: this will only happen for single-model searches, and only
|
173
|
+
# through the :order option.
|
174
|
+
#
|
175
|
+
# Location.search "Melbourne", :order => :state
|
176
|
+
# User.search :conditions => {:role_id => 2}, :order => "name ASC"
|
177
|
+
#
|
178
|
+
# Keep in mind that if you use a string, you *must* specify the direction
|
179
|
+
# (ASC or DESC) else Sphinx won't return any results. If you use a symbol
|
180
|
+
# then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
|
181
|
+
# use the :sort_mode option:
|
182
|
+
#
|
183
|
+
# Location.search "Melbourne", :order => :state, :sort_mode => :desc
|
184
|
+
#
|
185
|
+
# Of course, there are other sort modes - check out the Sphinx
|
186
|
+
# documentation[http://sphinxsearch.com/doc.html] for that level of
|
187
|
+
# detail though.
|
188
|
+
#
|
189
|
+
# == Grouping
|
190
|
+
#
|
191
|
+
# For this you can use the group_by, group_clause and group_function
|
192
|
+
# options - which are all directly linked to Sphinx's expectations. No
|
193
|
+
# magic from Thinking Sphinx. It can get a little tricky, so make sure
|
194
|
+
# you read all the relevant
|
195
|
+
# documentation[http://sphinxsearch.com/doc.html#clustering] first.
|
196
|
+
#
|
197
|
+
# Yes this section will be expanded, but this is a start.
|
198
|
+
#
|
199
|
+
# == Geo/Location Searching
|
200
|
+
#
|
201
|
+
# Sphinx - and therefore Thinking Sphinx - has the facility to search
|
202
|
+
# around a geographical point, using a given latitude and longitude. To
|
203
|
+
# take advantage of this, you will need to have both of those values in
|
204
|
+
# attributes. To search with that point, you can then use one of the
|
205
|
+
# following syntax examples:
|
206
|
+
#
|
207
|
+
# Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc"
|
208
|
+
# Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc"
|
209
|
+
# :latitude_attr => "latit", :longitude_attr => "longit"
|
210
|
+
#
|
211
|
+
# The first example applies when your latitude and longitude attributes
|
212
|
+
# are named any of lat, latitude, lon, long or longitude. If that's not
|
213
|
+
# the case, you will need to explicitly state them in your search, _or_
|
214
|
+
# you can do so in your model:
|
215
|
+
#
|
216
|
+
# define_index do
|
217
|
+
# has :latit # Float column, stored in radians
|
218
|
+
# has :longit # Float column, stored in radians
|
219
|
+
#
|
220
|
+
# set_property :latitude_attr => "latit"
|
221
|
+
# set_property :longitude_attr => "longit"
|
222
|
+
# end
|
223
|
+
#
|
224
|
+
# Now, geo-location searching really only has an affect if you have a
|
225
|
+
# filter, sort or grouping clause related to it - otherwise it's just a
|
226
|
+
# normal search, and _will not_ return a distance value otherwise. To
|
227
|
+
# make use of the positioning difference, use the special attribute
|
228
|
+
# "@geodist" in any of your filters or sorting or grouping clauses.
|
229
|
+
#
|
230
|
+
# And don't forget - both the latitude and longitude you use in your
|
231
|
+
# search, and the values in your indexes, need to be stored as a float in radians,
|
232
|
+
# _not_ degrees. Keep in mind that if you do this conversion in SQL
|
233
|
+
# you will need to explicitly declare a column type of :float.
|
234
|
+
#
|
235
|
+
# define_index do
|
236
|
+
# has 'RADIANS(lat)', :as => :lat, :type => :float
|
237
|
+
# # ...
|
238
|
+
# end
|
239
|
+
#
|
240
|
+
# Once you've got your results set, you can access the distances as
|
241
|
+
# follows:
|
242
|
+
#
|
243
|
+
# @results.each_with_geodist do |result, distance|
|
244
|
+
# # ...
|
245
|
+
# end
|
246
|
+
#
|
247
|
+
# The distance value is returned as a float, representing the distance in
|
248
|
+
# metres.
|
249
|
+
#
|
250
|
+
# == Handling a Stale Index
|
251
|
+
#
|
252
|
+
# Especially if you don't use delta indexing, you risk having records in the
|
253
|
+
# Sphinx index that are no longer in the database. By default, those will simply
|
254
|
+
# come back as nils:
|
255
|
+
#
|
256
|
+
# >> pat_user.delete
|
257
|
+
# >> User.search("pat")
|
258
|
+
# Sphinx Result: [1,2]
|
259
|
+
# => [nil, <#User id: 2>]
|
260
|
+
#
|
261
|
+
# (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.)
|
262
|
+
#
|
263
|
+
# You can simply Array#compact these results or handle the nils in some other way, but
|
264
|
+
# Sphinx will still report two results, and the missing records may upset your layout.
|
265
|
+
#
|
266
|
+
# If you pass :retry_stale => true to a single-model search, missing records will
|
267
|
+
# cause Thinking Sphinx to retry the query but excluding those records. Since search
|
268
|
+
# is paginated, the new search could potentially include missing records as well, so by
|
269
|
+
# default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five
|
270
|
+
# times, and so on. If there are still missing ids on the last retry, they are
|
271
|
+
# shown as nils.
|
272
|
+
#
|
273
|
+
def search(*args)
|
274
|
+
query = args.clone # an array
|
275
|
+
options = query.extract_options!
|
276
|
+
|
277
|
+
retry_search_on_stale_index(query, options) do
|
278
|
+
results, client = search_results(*(query + [options]))
|
279
|
+
|
280
|
+
::ActiveRecord::Base.logger.error(
|
281
|
+
"Sphinx Error: #{results[:error]}"
|
282
|
+
) if results[:error]
|
283
|
+
|
284
|
+
klass = options[:class]
|
285
|
+
page = options[:page] ? options[:page].to_i : 1
|
286
|
+
|
287
|
+
ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def retry_search_on_stale_index(query, options, &block)
|
292
|
+
stale_ids = []
|
293
|
+
stale_retries_left = case options[:retry_stale]
|
294
|
+
when true: 3 # default to three retries
|
295
|
+
when nil, false: 0 # no retries
|
296
|
+
else options[:retry_stale].to_i
|
297
|
+
end
|
298
|
+
begin
|
299
|
+
# Passing this in an option so Collection.create_from_results can see it.
|
300
|
+
# It should only raise on stale records if there are any retries left.
|
301
|
+
options[:raise_on_stale] = stale_retries_left > 0
|
302
|
+
block.call
|
303
|
+
# If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not
|
304
|
+
# in the DB and the :raise_on_stale option is set, this exception is raised. We retry
|
305
|
+
# a limited number of times, excluding the stale ids from the search.
|
306
|
+
rescue StaleIdsException => e
|
307
|
+
stale_retries_left -= 1
|
308
|
+
|
309
|
+
stale_ids |= e.ids # For logging
|
310
|
+
options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion
|
311
|
+
|
312
|
+
tries = stale_retries_left
|
313
|
+
::ActiveRecord::Base.logger.debug("Sphinx Stale Ids (%s %s left): %s" % [
|
314
|
+
tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ')
|
315
|
+
])
|
316
|
+
|
317
|
+
retry
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
def count(*args)
|
322
|
+
results, client = search_results(*args.clone)
|
323
|
+
results[:total_found] || 0
|
324
|
+
end
|
325
|
+
|
326
|
+
# Checks if a document with the given id exists within a specific index.
|
327
|
+
# Expected parameters:
|
328
|
+
#
|
329
|
+
# - ID of the document
|
330
|
+
# - Index to check within
|
331
|
+
# - Options hash (defaults to {})
|
332
|
+
#
|
333
|
+
# Example:
|
334
|
+
#
|
335
|
+
# ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User)
|
336
|
+
#
|
337
|
+
def search_for_id(*args)
|
338
|
+
options = args.extract_options!
|
339
|
+
client = client_from_options options
|
340
|
+
|
341
|
+
query, filters = search_conditions(
|
342
|
+
options[:class], options[:conditions] || {}
|
343
|
+
)
|
344
|
+
client.filters += filters
|
345
|
+
client.match_mode = :extended unless query.empty?
|
346
|
+
client.id_range = args.first..args.first
|
347
|
+
|
348
|
+
begin
|
349
|
+
return client.query(query, args[1])[:matches].length > 0
|
350
|
+
rescue Errno::ECONNREFUSED => err
|
351
|
+
raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
def facets(*args)
|
356
|
+
hash = ThinkingSphinx::FacetCollection.new args
|
357
|
+
options = args.extract_options!.clone.merge! :group_function => :attr
|
358
|
+
|
359
|
+
options[:class].sphinx_facets.inject(hash) do |hash, facet|
|
360
|
+
options[:group_by] = facet.attribute_name
|
361
|
+
|
362
|
+
hash.add_from_results facet, search(*(args + [options]))
|
363
|
+
hash
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
private
|
368
|
+
|
369
|
+
# This method handles the common search functionality, and returns both
|
370
|
+
# the result hash and the client. Not super elegant, but it'll do for
|
371
|
+
# the moment.
|
372
|
+
#
|
373
|
+
def search_results(*args)
|
374
|
+
options = args.extract_options!
|
375
|
+
query = args.join(' ')
|
376
|
+
client = client_from_options options
|
377
|
+
|
378
|
+
query = star_query(query, options[:star]) if options[:star]
|
379
|
+
|
380
|
+
extra_query, filters = search_conditions(
|
381
|
+
options[:class], options[:conditions] || {}
|
382
|
+
)
|
383
|
+
client.filters += filters
|
384
|
+
client.match_mode = :extended unless extra_query.empty?
|
385
|
+
query = [query, extra_query].join(' ')
|
386
|
+
query.strip! # Because "" and " " are not equivalent
|
387
|
+
|
388
|
+
set_sort_options! client, options
|
389
|
+
|
390
|
+
client.limit = options[:per_page].to_i if options[:per_page]
|
391
|
+
page = options[:page] ? options[:page].to_i : 1
|
392
|
+
client.offset = (page - 1) * client.limit
|
393
|
+
|
394
|
+
begin
|
395
|
+
::ActiveRecord::Base.logger.debug "Sphinx: #{query}"
|
396
|
+
results = client.query query
|
397
|
+
::ActiveRecord::Base.logger.debug "Sphinx Result: #{results[:matches].collect{|m| m[:attributes]["sphinx_internal_id"]}.inspect}"
|
398
|
+
rescue Errno::ECONNREFUSED => err
|
399
|
+
raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
|
400
|
+
end
|
401
|
+
|
402
|
+
return results, client
|
403
|
+
end
|
404
|
+
|
405
|
+
# Set all the appropriate settings for the client, using the provided
|
406
|
+
# options hash.
|
407
|
+
#
|
408
|
+
def client_from_options(options = {})
|
409
|
+
config = ThinkingSphinx::Configuration.instance
|
410
|
+
client = Riddle::Client.new config.address, config.port
|
411
|
+
klass = options[:class]
|
412
|
+
index_options = klass ? klass.sphinx_index_options : {}
|
413
|
+
|
414
|
+
# The Riddle default is per-query max_matches=1000. If we set the
|
415
|
+
# per-server max to a smaller value in sphinx.yml, we need to override
|
416
|
+
# the Riddle default or else we get search errors like
|
417
|
+
# "per-query max_matches=1000 out of bounds (per-server max_matches=200)"
|
418
|
+
if per_server_max_matches = config.configuration.searchd.max_matches
|
419
|
+
options[:max_matches] ||= per_server_max_matches
|
420
|
+
end
|
421
|
+
|
422
|
+
# Turn :index_weights => { "foo" => 2, User => 1 }
|
423
|
+
# into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
|
424
|
+
if iw = options[:index_weights]
|
425
|
+
options[:index_weights] = iw.inject({}) do |hash, (index,weight)|
|
426
|
+
if index.is_a?(Class)
|
427
|
+
name = ThinkingSphinx::Index.name(index)
|
428
|
+
hash["#{name}_core"] = weight
|
429
|
+
hash["#{name}_delta"] = weight
|
430
|
+
else
|
431
|
+
hash[index] = weight
|
432
|
+
end
|
433
|
+
hash
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
[
|
438
|
+
:max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
|
439
|
+
:group_by, :group_function, :group_clause, :group_distinct, :cut_off,
|
440
|
+
:retry_count, :retry_delay, :index_weights, :rank_mode,
|
441
|
+
:max_query_time, :field_weights, :filters, :anchor, :limit
|
442
|
+
].each do |key|
|
443
|
+
client.send(
|
444
|
+
key.to_s.concat("=").to_sym,
|
445
|
+
options[key] || index_options[key] || client.send(key)
|
446
|
+
)
|
447
|
+
end
|
448
|
+
|
449
|
+
options[:classes] = [klass] if klass
|
450
|
+
|
451
|
+
client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
|
452
|
+
|
453
|
+
client.filters << Riddle::Client::Filter.new(
|
454
|
+
"sphinx_deleted", [0]
|
455
|
+
)
|
456
|
+
|
457
|
+
# class filters
|
458
|
+
client.filters << Riddle::Client::Filter.new(
|
459
|
+
"class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten
|
460
|
+
) if options[:classes]
|
461
|
+
|
462
|
+
# normal attribute filters
|
463
|
+
client.filters += options[:with].collect { |attr,val|
|
464
|
+
Riddle::Client::Filter.new attr.to_s, filter_value(val)
|
465
|
+
} if options[:with]
|
466
|
+
|
467
|
+
# exclusive attribute filters
|
468
|
+
client.filters += options[:without].collect { |attr,val|
|
469
|
+
Riddle::Client::Filter.new attr.to_s, filter_value(val), true
|
470
|
+
} if options[:without]
|
471
|
+
|
472
|
+
# every-match attribute filters
|
473
|
+
client.filters += options[:with_all].collect { |attr,vals|
|
474
|
+
Array(vals).collect { |val|
|
475
|
+
Riddle::Client::Filter.new attr.to_s, filter_value(val)
|
476
|
+
}
|
477
|
+
}.flatten if options[:with_all]
|
478
|
+
|
479
|
+
# exclusive attribute filter on primary key
|
480
|
+
client.filters += Array(options[:without_ids]).collect { |id|
|
481
|
+
Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true
|
482
|
+
} if options[:without_ids]
|
483
|
+
|
484
|
+
client
|
485
|
+
end
|
486
|
+
|
487
|
+
def star_query(query, custom_token = nil)
|
488
|
+
token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u
|
489
|
+
|
490
|
+
query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
|
491
|
+
pre, proper, post = $`, $&, $'
|
492
|
+
is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token
|
493
|
+
is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes
|
494
|
+
has_star = pre.ends_with?("*") || post.starts_with?("*")
|
495
|
+
if is_operator || is_quote || has_star
|
496
|
+
proper
|
497
|
+
else
|
498
|
+
"*#{proper}*"
|
499
|
+
end
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
def filter_value(value)
|
504
|
+
case value
|
505
|
+
when Range
|
506
|
+
value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value
|
507
|
+
when Array
|
508
|
+
value.collect { |val| val.is_a?(Time) ? timestamp(val) : val }
|
509
|
+
else
|
510
|
+
Array(value)
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
# Returns the integer timestamp for a Time object.
|
515
|
+
#
|
516
|
+
# If using Rails 2.1+, need to handle timezones to translate them back to
|
517
|
+
# UTC, as that's what datetimes will be stored as by MySQL.
|
518
|
+
#
|
519
|
+
# in_time_zone is a method that was added for the timezone support in
|
520
|
+
# Rails 2.1, which is why it's used for testing. I'm sure there's better
|
521
|
+
# ways, but this does the job.
|
522
|
+
#
|
523
|
+
def timestamp(value)
|
524
|
+
value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i
|
525
|
+
end
|
526
|
+
|
527
|
+
# Translate field and attribute conditions to the relevant search string
|
528
|
+
# and filters.
|
529
|
+
#
|
530
|
+
def search_conditions(klass, conditions={})
|
531
|
+
attributes = klass ? klass.sphinx_indexes.collect { |index|
|
532
|
+
index.attributes.collect { |attrib| attrib.unique_name }
|
533
|
+
}.flatten : []
|
534
|
+
|
535
|
+
search_string = []
|
536
|
+
filters = []
|
537
|
+
|
538
|
+
conditions.each do |key,val|
|
539
|
+
if attributes.include?(key.to_sym)
|
540
|
+
filters << Riddle::Client::Filter.new(
|
541
|
+
key.to_s, filter_value(val)
|
542
|
+
)
|
543
|
+
else
|
544
|
+
search_string << "@#{key} #{val}"
|
545
|
+
end
|
546
|
+
end
|
547
|
+
|
548
|
+
return search_string.join(' '), filters
|
549
|
+
end
|
550
|
+
|
551
|
+
# Return the appropriate latitude and longitude values, depending on
|
552
|
+
# whether the relevant attributes have been defined, and also whether
|
553
|
+
# there's actually any values.
|
554
|
+
#
|
555
|
+
def anchor_conditions(klass, options)
|
556
|
+
attributes = klass ? klass.sphinx_indexes.collect { |index|
|
557
|
+
index.attributes.collect { |attrib| attrib.unique_name }
|
558
|
+
}.flatten : []
|
559
|
+
|
560
|
+
lat_attr = klass ? klass.sphinx_indexes.collect { |index|
|
561
|
+
index.options[:latitude_attr]
|
562
|
+
}.compact.first : nil
|
563
|
+
|
564
|
+
lon_attr = klass ? klass.sphinx_indexes.collect { |index|
|
565
|
+
index.options[:longitude_attr]
|
566
|
+
}.compact.first : nil
|
567
|
+
|
568
|
+
lat_attr = options[:latitude_attr] if options[:latitude_attr]
|
569
|
+
lat_attr ||= :lat if attributes.include?(:lat)
|
570
|
+
lat_attr ||= :latitude if attributes.include?(:latitude)
|
571
|
+
|
572
|
+
lon_attr = options[:longitude_attr] if options[:longitude_attr]
|
573
|
+
lon_attr ||= :lng if attributes.include?(:lng)
|
574
|
+
lon_attr ||= :lon if attributes.include?(:lon)
|
575
|
+
lon_attr ||= :long if attributes.include?(:long)
|
576
|
+
lon_attr ||= :longitude if attributes.include?(:longitude)
|
577
|
+
|
578
|
+
lat = options[:lat]
|
579
|
+
lon = options[:lon]
|
580
|
+
|
581
|
+
if options[:geo]
|
582
|
+
lat = options[:geo].first
|
583
|
+
lon = options[:geo].last
|
584
|
+
end
|
585
|
+
|
586
|
+
lat && lon ? {
|
587
|
+
:latitude_attribute => lat_attr.to_s,
|
588
|
+
:latitude => lat,
|
589
|
+
:longitude_attribute => lon_attr.to_s,
|
590
|
+
:longitude => lon
|
591
|
+
} : nil
|
592
|
+
end
|
593
|
+
|
594
|
+
# Set the sort options using the :order key as well as the appropriate
|
595
|
+
# Riddle settings.
|
596
|
+
#
|
597
|
+
def set_sort_options!(client, options)
|
598
|
+
klass = options[:class]
|
599
|
+
fields = klass ? klass.sphinx_indexes.collect { |index|
|
600
|
+
index.fields.collect { |field| field.unique_name }
|
601
|
+
}.flatten : []
|
602
|
+
index_options = klass ? klass.sphinx_index_options : {}
|
603
|
+
|
604
|
+
order = options[:order] || index_options[:order]
|
605
|
+
case order
|
606
|
+
when Symbol
|
607
|
+
client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil?
|
608
|
+
if fields.include?(order)
|
609
|
+
client.sort_by = order.to_s.concat("_sort")
|
610
|
+
else
|
611
|
+
client.sort_by = order.to_s
|
612
|
+
end
|
613
|
+
when String
|
614
|
+
client.sort_mode = :extended
|
615
|
+
client.sort_by = sorted_fields_to_attributes(order, fields)
|
616
|
+
else
|
617
|
+
# do nothing
|
618
|
+
end
|
619
|
+
|
620
|
+
client.sort_mode = :attr_asc if client.sort_mode == :asc
|
621
|
+
client.sort_mode = :attr_desc if client.sort_mode == :desc
|
622
|
+
end
|
623
|
+
|
624
|
+
# Search through a collection of fields and translate any appearances
|
625
|
+
# of them in a string to their attribute equivalent for sorting.
|
626
|
+
#
|
627
|
+
def sorted_fields_to_attributes(string, fields)
|
628
|
+
fields.each { |field|
|
629
|
+
string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
|
630
|
+
match.gsub field.to_s, field.to_s.concat("_sort")
|
631
|
+
}
|
632
|
+
}
|
633
|
+
|
634
|
+
string
|
635
|
+
end
|
636
|
+
end
|
637
|
+
end
|
638
|
+
end
|