thinking-sphinx 1.2.12
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENCE +20 -0
- data/README.textile +157 -0
- data/VERSION.yml +4 -0
- data/lib/thinking_sphinx.rb +211 -0
- data/lib/thinking_sphinx/active_record.rb +307 -0
- data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
- data/lib/thinking_sphinx/active_record/delta.rb +87 -0
- data/lib/thinking_sphinx/active_record/has_many_association.rb +28 -0
- data/lib/thinking_sphinx/active_record/scopes.rb +39 -0
- data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
- data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
- data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +136 -0
- data/lib/thinking_sphinx/association.rb +164 -0
- data/lib/thinking_sphinx/attribute.rb +342 -0
- data/lib/thinking_sphinx/class_facet.rb +15 -0
- data/lib/thinking_sphinx/configuration.rb +282 -0
- data/lib/thinking_sphinx/core/array.rb +7 -0
- data/lib/thinking_sphinx/core/string.rb +15 -0
- data/lib/thinking_sphinx/deltas.rb +30 -0
- data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
- data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
- data/lib/thinking_sphinx/deltas/delayed_delta.rb +30 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
- data/lib/thinking_sphinx/deploy/capistrano.rb +100 -0
- data/lib/thinking_sphinx/excerpter.rb +22 -0
- data/lib/thinking_sphinx/facet.rb +125 -0
- data/lib/thinking_sphinx/facet_search.rb +134 -0
- data/lib/thinking_sphinx/field.rb +82 -0
- data/lib/thinking_sphinx/index.rb +99 -0
- data/lib/thinking_sphinx/index/builder.rb +286 -0
- data/lib/thinking_sphinx/index/faux_column.rb +110 -0
- data/lib/thinking_sphinx/property.rb +162 -0
- data/lib/thinking_sphinx/rails_additions.rb +150 -0
- data/lib/thinking_sphinx/search.rb +707 -0
- data/lib/thinking_sphinx/search_methods.rb +421 -0
- data/lib/thinking_sphinx/source.rb +150 -0
- data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
- data/lib/thinking_sphinx/source/sql.rb +128 -0
- data/lib/thinking_sphinx/tasks.rb +165 -0
- data/rails/init.rb +14 -0
- data/spec/lib/thinking_sphinx/active_record/delta_spec.rb +130 -0
- data/spec/lib/thinking_sphinx/active_record/has_many_association_spec.rb +49 -0
- data/spec/lib/thinking_sphinx/active_record/scopes_spec.rb +96 -0
- data/spec/lib/thinking_sphinx/active_record_spec.rb +364 -0
- data/spec/lib/thinking_sphinx/association_spec.rb +239 -0
- data/spec/lib/thinking_sphinx/attribute_spec.rb +500 -0
- data/spec/lib/thinking_sphinx/configuration_spec.rb +268 -0
- data/spec/lib/thinking_sphinx/core/array_spec.rb +9 -0
- data/spec/lib/thinking_sphinx/core/string_spec.rb +9 -0
- data/spec/lib/thinking_sphinx/excerpter_spec.rb +49 -0
- data/spec/lib/thinking_sphinx/facet_search_spec.rb +176 -0
- data/spec/lib/thinking_sphinx/facet_spec.rb +333 -0
- data/spec/lib/thinking_sphinx/field_spec.rb +154 -0
- data/spec/lib/thinking_sphinx/index/builder_spec.rb +455 -0
- data/spec/lib/thinking_sphinx/index/faux_column_spec.rb +30 -0
- data/spec/lib/thinking_sphinx/index_spec.rb +45 -0
- data/spec/lib/thinking_sphinx/rails_additions_spec.rb +203 -0
- data/spec/lib/thinking_sphinx/search_methods_spec.rb +152 -0
- data/spec/lib/thinking_sphinx/search_spec.rb +1092 -0
- data/spec/lib/thinking_sphinx/source_spec.rb +227 -0
- data/spec/lib/thinking_sphinx_spec.rb +162 -0
- data/tasks/distribution.rb +50 -0
- data/tasks/rails.rake +1 -0
- data/tasks/testing.rb +83 -0
- data/vendor/after_commit/LICENSE +20 -0
- data/vendor/after_commit/README +16 -0
- data/vendor/after_commit/Rakefile +22 -0
- data/vendor/after_commit/init.rb +8 -0
- data/vendor/after_commit/lib/after_commit.rb +45 -0
- data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
- data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
- data/vendor/after_commit/test/after_commit_test.rb +53 -0
- data/vendor/delayed_job/lib/delayed/job.rb +251 -0
- data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
- data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
- data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
- data/vendor/riddle/lib/riddle.rb +30 -0
- data/vendor/riddle/lib/riddle/client.rb +635 -0
- data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
- data/vendor/riddle/lib/riddle/client/message.rb +66 -0
- data/vendor/riddle/lib/riddle/client/response.rb +84 -0
- data/vendor/riddle/lib/riddle/configuration.rb +33 -0
- data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
- data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
- data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
- data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
- data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
- data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
- data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
- data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
- data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
- data/vendor/riddle/lib/riddle/controller.rb +53 -0
- metadata +172 -0
@@ -0,0 +1,55 @@
|
|
1
|
+
module Delayed
|
2
|
+
class PerformableMethod < Struct.new(:object, :method, :args)
|
3
|
+
CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
|
4
|
+
AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
|
5
|
+
|
6
|
+
def initialize(object, method, args)
|
7
|
+
raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
|
8
|
+
|
9
|
+
self.object = dump(object)
|
10
|
+
self.args = args.map { |a| dump(a) }
|
11
|
+
self.method = method.to_sym
|
12
|
+
end
|
13
|
+
|
14
|
+
def display_name
|
15
|
+
case self.object
|
16
|
+
when CLASS_STRING_FORMAT then "#{$1}.#{method}"
|
17
|
+
when AR_STRING_FORMAT then "#{$1}##{method}"
|
18
|
+
else "Unknown##{method}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def perform
|
23
|
+
load(object).send(method, *args.map{|a| load(a)})
|
24
|
+
rescue ActiveRecord::RecordNotFound
|
25
|
+
# We cannot do anything about objects which were deleted in the meantime
|
26
|
+
true
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def load(arg)
|
32
|
+
case arg
|
33
|
+
when CLASS_STRING_FORMAT then $1.constantize
|
34
|
+
when AR_STRING_FORMAT then $1.constantize.find($2)
|
35
|
+
else arg
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def dump(arg)
|
40
|
+
case arg
|
41
|
+
when Class then class_to_string(arg)
|
42
|
+
when ActiveRecord::Base then ar_to_string(arg)
|
43
|
+
else arg
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def ar_to_string(obj)
|
48
|
+
"AR:#{obj.class}:#{obj.id}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def class_to_string(obj)
|
52
|
+
"CLASS:#{obj.name}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Delayed
|
2
|
+
class Worker
|
3
|
+
SLEEP = 5
|
4
|
+
|
5
|
+
cattr_accessor :logger
|
6
|
+
self.logger = if defined?(Merb::Logger)
|
7
|
+
Merb.logger
|
8
|
+
elsif defined?(RAILS_DEFAULT_LOGGER)
|
9
|
+
RAILS_DEFAULT_LOGGER
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(options={})
|
13
|
+
@quiet = options[:quiet]
|
14
|
+
Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
|
15
|
+
Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
|
16
|
+
end
|
17
|
+
|
18
|
+
def start
|
19
|
+
say "*** Starting job worker #{Delayed::Job.worker_name}"
|
20
|
+
|
21
|
+
trap('TERM') { say 'Exiting...'; $exit = true }
|
22
|
+
trap('INT') { say 'Exiting...'; $exit = true }
|
23
|
+
|
24
|
+
loop do
|
25
|
+
result = nil
|
26
|
+
|
27
|
+
realtime = Benchmark.realtime do
|
28
|
+
result = Delayed::Job.work_off
|
29
|
+
end
|
30
|
+
|
31
|
+
count = result.sum
|
32
|
+
|
33
|
+
break if $exit
|
34
|
+
|
35
|
+
if count.zero?
|
36
|
+
sleep(SLEEP)
|
37
|
+
else
|
38
|
+
say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
|
39
|
+
end
|
40
|
+
|
41
|
+
break if $exit
|
42
|
+
end
|
43
|
+
|
44
|
+
ensure
|
45
|
+
Delayed::Job.clear_locks!
|
46
|
+
end
|
47
|
+
|
48
|
+
def say(text)
|
49
|
+
puts text unless @quiet
|
50
|
+
logger.info text if logger
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'timeout'
|
3
|
+
|
4
|
+
require 'riddle/client'
|
5
|
+
require 'riddle/configuration'
|
6
|
+
require 'riddle/controller'
|
7
|
+
|
8
|
+
module Riddle #:nodoc:
|
9
|
+
class ConnectionError < StandardError #:nodoc:
|
10
|
+
end
|
11
|
+
|
12
|
+
module Version #:nodoc:
|
13
|
+
Major = 0
|
14
|
+
Minor = 9
|
15
|
+
Tiny = 8
|
16
|
+
# Revision number for RubyForge's sake, taken from what Sphinx
|
17
|
+
# outputs to the command line.
|
18
|
+
Rev = 1533
|
19
|
+
# Release number to mark my own fixes, beyond feature parity with
|
20
|
+
# Sphinx itself.
|
21
|
+
Release = 10
|
22
|
+
|
23
|
+
String = [Major, Minor, Tiny].join('.')
|
24
|
+
GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.escape(string)
|
28
|
+
string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,635 @@
|
|
1
|
+
require 'riddle/client/filter'
|
2
|
+
require 'riddle/client/message'
|
3
|
+
require 'riddle/client/response'
|
4
|
+
|
5
|
+
module Riddle
|
6
|
+
class VersionError < StandardError; end
|
7
|
+
class ResponseError < StandardError; end
|
8
|
+
|
9
|
+
# This class was heavily based on the existing Client API by Dmytro Shteflyuk
|
10
|
+
# and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
|
11
|
+
# more Ruby-ish (ie. lowercase and underscored method names). I also have
|
12
|
+
# used a few helper classes, just to neaten things up.
|
13
|
+
#
|
14
|
+
# Feel free to use it wherever. Send bug reports, patches, comments and
|
15
|
+
# suggestions to pat at freelancing-gods dot com.
|
16
|
+
#
|
17
|
+
# Most properties of the client are accessible through attribute accessors,
|
18
|
+
# and where relevant use symboles instead of the long constants common in
|
19
|
+
# other clients.
|
20
|
+
# Some examples:
|
21
|
+
#
|
22
|
+
# client.sort_mode = :extended
|
23
|
+
# client.sort_by = "birthday DESC"
|
24
|
+
# client.match_mode = :extended
|
25
|
+
#
|
26
|
+
# To add a filter, you will need to create a Filter object:
|
27
|
+
#
|
28
|
+
# client.filters << Riddle::Client::Filter.new("birthday",
|
29
|
+
# Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
|
30
|
+
#
|
31
|
+
class Client
|
32
|
+
Commands = {
|
33
|
+
:search => 0, # SEARCHD_COMMAND_SEARCH
|
34
|
+
:excerpt => 1, # SEARCHD_COMMAND_EXCERPT
|
35
|
+
:update => 2, # SEARCHD_COMMAND_UPDATE
|
36
|
+
:keywords => 3 # SEARCHD_COMMAND_KEYWORDS
|
37
|
+
}
|
38
|
+
|
39
|
+
Versions = {
|
40
|
+
:search => 0x113, # VER_COMMAND_SEARCH
|
41
|
+
:excerpt => 0x100, # VER_COMMAND_EXCERPT
|
42
|
+
:update => 0x101, # VER_COMMAND_UPDATE
|
43
|
+
:keywords => 0x100 # VER_COMMAND_KEYWORDS
|
44
|
+
}
|
45
|
+
|
46
|
+
Statuses = {
|
47
|
+
:ok => 0, # SEARCHD_OK
|
48
|
+
:error => 1, # SEARCHD_ERROR
|
49
|
+
:retry => 2, # SEARCHD_RETRY
|
50
|
+
:warning => 3 # SEARCHD_WARNING
|
51
|
+
}
|
52
|
+
|
53
|
+
MatchModes = {
|
54
|
+
:all => 0, # SPH_MATCH_ALL
|
55
|
+
:any => 1, # SPH_MATCH_ANY
|
56
|
+
:phrase => 2, # SPH_MATCH_PHRASE
|
57
|
+
:boolean => 3, # SPH_MATCH_BOOLEAN
|
58
|
+
:extended => 4, # SPH_MATCH_EXTENDED
|
59
|
+
:fullscan => 5, # SPH_MATCH_FULLSCAN
|
60
|
+
:extended2 => 6 # SPH_MATCH_EXTENDED2
|
61
|
+
}
|
62
|
+
|
63
|
+
RankModes = {
|
64
|
+
:proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
|
65
|
+
:bm25 => 1, # SPH_RANK_BM25
|
66
|
+
:none => 2, # SPH_RANK_NONE
|
67
|
+
:wordcount => 3 # SPH_RANK_WORDCOUNT
|
68
|
+
}
|
69
|
+
|
70
|
+
SortModes = {
|
71
|
+
:relevance => 0, # SPH_SORT_RELEVANCE
|
72
|
+
:attr_desc => 1, # SPH_SORT_ATTR_DESC
|
73
|
+
:attr_asc => 2, # SPH_SORT_ATTR_ASC
|
74
|
+
:time_segments => 3, # SPH_SORT_TIME_SEGMENTS
|
75
|
+
:extended => 4, # SPH_SORT_EXTENDED
|
76
|
+
:expr => 5 # SPH_SORT_EXPR
|
77
|
+
}
|
78
|
+
|
79
|
+
AttributeTypes = {
|
80
|
+
:integer => 1, # SPH_ATTR_INTEGER
|
81
|
+
:timestamp => 2, # SPH_ATTR_TIMESTAMP
|
82
|
+
:ordinal => 3, # SPH_ATTR_ORDINAL
|
83
|
+
:bool => 4, # SPH_ATTR_BOOL
|
84
|
+
:float => 5, # SPH_ATTR_FLOAT
|
85
|
+
:multi => 0x40000000 # SPH_ATTR_MULTI
|
86
|
+
}
|
87
|
+
|
88
|
+
GroupFunctions = {
|
89
|
+
:day => 0, # SPH_GROUPBY_DAY
|
90
|
+
:week => 1, # SPH_GROUPBY_WEEK
|
91
|
+
:month => 2, # SPH_GROUPBY_MONTH
|
92
|
+
:year => 3, # SPH_GROUPBY_YEAR
|
93
|
+
:attr => 4, # SPH_GROUPBY_ATTR
|
94
|
+
:attrpair => 5 # SPH_GROUPBY_ATTRPAIR
|
95
|
+
}
|
96
|
+
|
97
|
+
FilterTypes = {
|
98
|
+
:values => 0, # SPH_FILTER_VALUES
|
99
|
+
:range => 1, # SPH_FILTER_RANGE
|
100
|
+
:float_range => 2 # SPH_FILTER_FLOATRANGE
|
101
|
+
}
|
102
|
+
|
103
|
+
attr_accessor :server, :port, :offset, :limit, :max_matches,
|
104
|
+
:match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
|
105
|
+
:group_by, :group_function, :group_clause, :group_distinct, :cut_off,
|
106
|
+
:retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
|
107
|
+
:max_query_time, :field_weights, :timeout
|
108
|
+
attr_reader :queue
|
109
|
+
|
110
|
+
# Can instantiate with a specific server and port - otherwise it assumes
|
111
|
+
# defaults of localhost and 3312 respectively. All other settings can be
|
112
|
+
# accessed and changed via the attribute accessors.
|
113
|
+
def initialize(server=nil, port=nil)
|
114
|
+
@server = server || "localhost"
|
115
|
+
@port = port || 3312
|
116
|
+
|
117
|
+
reset
|
118
|
+
|
119
|
+
@queue = []
|
120
|
+
end
|
121
|
+
|
122
|
+
# Reset attributes and settings to defaults.
|
123
|
+
def reset
|
124
|
+
# defaults
|
125
|
+
@offset = 0
|
126
|
+
@limit = 20
|
127
|
+
@max_matches = 1000
|
128
|
+
@match_mode = :all
|
129
|
+
@sort_mode = :relevance
|
130
|
+
@sort_by = ''
|
131
|
+
@weights = []
|
132
|
+
@id_range = 0..0
|
133
|
+
@filters = []
|
134
|
+
@group_by = ''
|
135
|
+
@group_function = :day
|
136
|
+
@group_clause = '@group desc'
|
137
|
+
@group_distinct = ''
|
138
|
+
@cut_off = 0
|
139
|
+
@retry_count = 0
|
140
|
+
@retry_delay = 0
|
141
|
+
@anchor = {}
|
142
|
+
# string keys are index names, integer values are weightings
|
143
|
+
@index_weights = {}
|
144
|
+
@rank_mode = :proximity_bm25
|
145
|
+
@max_query_time = 0
|
146
|
+
# string keys are field names, integer values are weightings
|
147
|
+
@field_weights = {}
|
148
|
+
@timeout = 0
|
149
|
+
end
|
150
|
+
|
151
|
+
# Set the geo-anchor point - with the names of the attributes that contain
|
152
|
+
# the latitude and longitude (in radians), and the reference position.
|
153
|
+
# Note that for geocoding to work properly, you must also set
|
154
|
+
# match_mode to :extended. To sort results by distance, you will
|
155
|
+
# need to set sort_mode to '@geodist asc' for example. Sphinx
|
156
|
+
# expects latitude and longitude to be returned from you SQL source
|
157
|
+
# in radians.
|
158
|
+
#
|
159
|
+
# Example:
|
160
|
+
# client.set_anchor('lat', -0.6591741, 'long', 2.530770)
|
161
|
+
#
|
162
|
+
def set_anchor(lat_attr, lat, long_attr, long)
|
163
|
+
@anchor = {
|
164
|
+
:latitude_attribute => lat_attr,
|
165
|
+
:latitude => lat,
|
166
|
+
:longitude_attribute => long_attr,
|
167
|
+
:longitude => long
|
168
|
+
}
|
169
|
+
end
|
170
|
+
|
171
|
+
# Append a query to the queue. This uses the same parameters as the query
|
172
|
+
# method.
|
173
|
+
def append_query(search, index = '*', comments = '')
|
174
|
+
@queue << query_message(search, index, comments)
|
175
|
+
end
|
176
|
+
|
177
|
+
# Run all the queries currently in the queue. This will return an array of
|
178
|
+
# results hashes.
|
179
|
+
def run
|
180
|
+
response = Response.new request(:search, @queue)
|
181
|
+
|
182
|
+
results = @queue.collect do
|
183
|
+
result = {
|
184
|
+
:matches => [],
|
185
|
+
:fields => [],
|
186
|
+
:attributes => {},
|
187
|
+
:attribute_names => [],
|
188
|
+
:words => {}
|
189
|
+
}
|
190
|
+
|
191
|
+
result[:status] = response.next_int
|
192
|
+
case result[:status]
|
193
|
+
when Statuses[:warning]
|
194
|
+
result[:warning] = response.next
|
195
|
+
when Statuses[:error]
|
196
|
+
result[:error] = response.next
|
197
|
+
next result
|
198
|
+
end
|
199
|
+
|
200
|
+
result[:fields] = response.next_array
|
201
|
+
|
202
|
+
attributes = response.next_int
|
203
|
+
for i in 0...attributes
|
204
|
+
attribute_name = response.next
|
205
|
+
type = response.next_int
|
206
|
+
|
207
|
+
result[:attributes][attribute_name] = type
|
208
|
+
result[:attribute_names] << attribute_name
|
209
|
+
end
|
210
|
+
|
211
|
+
matches = response.next_int
|
212
|
+
is_64_bit = response.next_int
|
213
|
+
for i in 0...matches
|
214
|
+
doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
|
215
|
+
weight = response.next_int
|
216
|
+
|
217
|
+
result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
|
218
|
+
result[:attribute_names].each do |attr|
|
219
|
+
result[:matches].last[:attributes][attr] = attribute_from_type(
|
220
|
+
result[:attributes][attr], response
|
221
|
+
)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
result[:total] = response.next_int.to_i || 0
|
226
|
+
result[:total_found] = response.next_int.to_i || 0
|
227
|
+
result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
|
228
|
+
|
229
|
+
words = response.next_int
|
230
|
+
for i in 0...words
|
231
|
+
word = response.next
|
232
|
+
docs = response.next_int
|
233
|
+
hits = response.next_int
|
234
|
+
result[:words][word] = {:docs => docs, :hits => hits}
|
235
|
+
end
|
236
|
+
|
237
|
+
result
|
238
|
+
end
|
239
|
+
|
240
|
+
@queue.clear
|
241
|
+
results
|
242
|
+
end
|
243
|
+
|
244
|
+
# Query the Sphinx daemon - defaulting to all indexes, but you can specify
|
245
|
+
# a specific one if you wish. The search parameter should be a string
|
246
|
+
# following Sphinx's expectations.
|
247
|
+
#
|
248
|
+
# The object returned from this method is a hash with the following keys:
|
249
|
+
#
|
250
|
+
# * :matches
|
251
|
+
# * :fields
|
252
|
+
# * :attributes
|
253
|
+
# * :attribute_names
|
254
|
+
# * :words
|
255
|
+
# * :total
|
256
|
+
# * :total_found
|
257
|
+
# * :time
|
258
|
+
# * :status
|
259
|
+
# * :warning (if appropriate)
|
260
|
+
# * :error (if appropriate)
|
261
|
+
#
|
262
|
+
# The key <tt>:matches</tt> returns an array of hashes - the actual search
|
263
|
+
# results. Each hash has the document id (<tt>:doc</tt>), the result
|
264
|
+
# weighting (<tt>:weight</tt>), and a hash of the attributes for the
|
265
|
+
# document (<tt>:attributes</tt>).
|
266
|
+
#
|
267
|
+
# The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
|
268
|
+
# fields and attributes for the documents. The key <tt>:attributes</tt>
|
269
|
+
# will return a hash of attribute name and type pairs, and <tt>:words</tt>
|
270
|
+
# returns a hash of hashes representing the words from the search, with the
|
271
|
+
# number of documents and hits for each, along the lines of:
|
272
|
+
#
|
273
|
+
# results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
|
274
|
+
#
|
275
|
+
# <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
|
276
|
+
# number of matches available, the total number of matches (which may be
|
277
|
+
# greater than the maximum available, depending on the number of matches
|
278
|
+
# and your sphinx configuration), and the time in milliseconds that the
|
279
|
+
# query took to run.
|
280
|
+
#
|
281
|
+
# <tt>:status</tt> is the error code for the query - and if there was a
|
282
|
+
# related warning, it will be under the <tt>:warning</tt> key. Fatal errors
|
283
|
+
# will be described under <tt>:error</tt>.
|
284
|
+
#
|
285
|
+
def query(search, index = '*', comments = '')
|
286
|
+
@queue << query_message(search, index, comments)
|
287
|
+
self.run.first
|
288
|
+
end
|
289
|
+
|
290
|
+
# Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
|
291
|
+
# They may also be abbreviated to fit within a word limit.
|
292
|
+
#
|
293
|
+
# As part of the options hash, you will need to
|
294
|
+
# define:
|
295
|
+
# * :docs
|
296
|
+
# * :words
|
297
|
+
# * :index
|
298
|
+
#
|
299
|
+
# Optional settings include:
|
300
|
+
# * :before_match (defaults to <span class="match">)
|
301
|
+
# * :after_match (defaults to </span>)
|
302
|
+
# * :chunk_separator (defaults to ' … ' - which is an HTML ellipsis)
|
303
|
+
# * :limit (defaults to 256)
|
304
|
+
# * :around (defaults to 5)
|
305
|
+
# * :exact_phrase (defaults to false)
|
306
|
+
# * :single_passage (defaults to false)
|
307
|
+
#
|
308
|
+
# The defaults differ from the official PHP client, as I've opted for
|
309
|
+
# semantic HTML markup.
|
310
|
+
#
|
311
|
+
# Example:
|
312
|
+
#
|
313
|
+
# client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
|
314
|
+
# #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
|
315
|
+
#
|
316
|
+
# lorem_lipsum = "Lorem ipsum dolor..."
|
317
|
+
#
|
318
|
+
# client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
|
319
|
+
# #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
|
320
|
+
# elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua … . Excepteur
|
321
|
+
# sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
|
322
|
+
# laborum. <span class=\"match\">Pat</span> Cash"]
|
323
|
+
#
|
324
|
+
# Workflow:
|
325
|
+
#
|
326
|
+
# Excerpt creation is completely isolated from searching the index. The nominated index is only used to
|
327
|
+
# discover encoding and charset information.
|
328
|
+
#
|
329
|
+
# Therefore, the workflow goes:
|
330
|
+
#
|
331
|
+
# 1. Do the sphinx query.
|
332
|
+
# 2. Fetch the documents found by sphinx from their repositories.
|
333
|
+
# 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
|
334
|
+
#
|
335
|
+
def excerpts(options = {})
|
336
|
+
options[:index] ||= '*'
|
337
|
+
options[:before_match] ||= '<span class="match">'
|
338
|
+
options[:after_match] ||= '</span>'
|
339
|
+
options[:chunk_separator] ||= ' … ' # ellipsis
|
340
|
+
options[:limit] ||= 256
|
341
|
+
options[:around] ||= 5
|
342
|
+
options[:exact_phrase] ||= false
|
343
|
+
options[:single_passage] ||= false
|
344
|
+
|
345
|
+
response = Response.new request(:excerpt, excerpts_message(options))
|
346
|
+
|
347
|
+
options[:docs].collect { response.next }
|
348
|
+
end
|
349
|
+
|
350
|
+
# Update attributes - first parameter is the relevant index, second is an
|
351
|
+
# array of attributes to be updated, and the third is a hash, where the
|
352
|
+
# keys are the document ids, and the values are arrays with the attribute
|
353
|
+
# values - in the same order as the second parameter.
|
354
|
+
#
|
355
|
+
# Example:
|
356
|
+
#
|
357
|
+
# client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
|
358
|
+
#
|
359
|
+
def update(index, attributes, values_by_doc)
|
360
|
+
response = Response.new request(
|
361
|
+
:update,
|
362
|
+
update_message(index, attributes, values_by_doc)
|
363
|
+
)
|
364
|
+
|
365
|
+
response.next_int
|
366
|
+
end
|
367
|
+
|
368
|
+
# Generates a keyword list for a given query. Each keyword is represented
|
369
|
+
# by a hash, with keys :tokenised and :normalised. If return_hits is set to
|
370
|
+
# true it will also report on the number of hits and documents for each
|
371
|
+
# keyword (see :hits and :docs keys respectively).
|
372
|
+
def keywords(query, index, return_hits = false)
|
373
|
+
response = Response.new request(
|
374
|
+
:keywords,
|
375
|
+
keywords_message(query, index, return_hits)
|
376
|
+
)
|
377
|
+
|
378
|
+
(0...response.next_int).collect do
|
379
|
+
hash = {}
|
380
|
+
hash[:tokenised] = response.next
|
381
|
+
hash[:normalised] = response.next
|
382
|
+
|
383
|
+
if return_hits
|
384
|
+
hash[:docs] = response.next_int
|
385
|
+
hash[:hits] = response.next_int
|
386
|
+
end
|
387
|
+
|
388
|
+
hash
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
private
|
393
|
+
|
394
|
+
# Connects to the Sphinx daemon, and yields a socket to use. The socket is
|
395
|
+
# closed at the end of the block.
|
396
|
+
def connect(&block)
|
397
|
+
socket = nil
|
398
|
+
if @timeout == 0
|
399
|
+
socket = initialise_connection
|
400
|
+
else
|
401
|
+
begin
|
402
|
+
Timeout.timeout(@timeout) { socket = initialise_connection }
|
403
|
+
rescue Timeout::Error
|
404
|
+
raise Riddle::ConnectionError,
|
405
|
+
"Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
begin
|
410
|
+
yield socket
|
411
|
+
ensure
|
412
|
+
socket.close
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
def initialise_connection
|
417
|
+
socket = initialise_socket
|
418
|
+
|
419
|
+
# Checking version
|
420
|
+
version = socket.recv(4).unpack('N*').first
|
421
|
+
if version < 1
|
422
|
+
socket.close
|
423
|
+
raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
|
424
|
+
end
|
425
|
+
|
426
|
+
# Send version
|
427
|
+
socket.send [1].pack('N'), 0
|
428
|
+
|
429
|
+
socket
|
430
|
+
end
|
431
|
+
|
432
|
+
def initialise_socket
|
433
|
+
tries = 0
|
434
|
+
begin
|
435
|
+
socket = TCPSocket.new @server, @port
|
436
|
+
rescue Errno::ECONNREFUSED => e
|
437
|
+
retry if (tries += 1) < 5
|
438
|
+
raise Riddle::ConnectionError,
|
439
|
+
"Connection to #{@server} on #{@port} failed. #{e.message}"
|
440
|
+
end
|
441
|
+
|
442
|
+
socket
|
443
|
+
end
|
444
|
+
|
445
|
+
# Send a collection of messages, for a command type (eg, search, excerpts,
|
446
|
+
# update), to the Sphinx daemon.
|
447
|
+
def request(command, messages)
|
448
|
+
response = ""
|
449
|
+
status = -1
|
450
|
+
version = 0
|
451
|
+
length = 0
|
452
|
+
message = Array(messages).join("")
|
453
|
+
if message.respond_to?(:force_encoding)
|
454
|
+
message = message.force_encoding('ASCII-8BIT')
|
455
|
+
end
|
456
|
+
|
457
|
+
connect do |socket|
|
458
|
+
case command
|
459
|
+
when :search
|
460
|
+
# Message length is +4 to account for the following count value for
|
461
|
+
# the number of messages (well, that's what I'm assuming).
|
462
|
+
socket.send [
|
463
|
+
Commands[command], Versions[command],
|
464
|
+
4+message.length, messages.length
|
465
|
+
].pack("nnNN") + message, 0
|
466
|
+
else
|
467
|
+
socket.send [
|
468
|
+
Commands[command], Versions[command], message.length
|
469
|
+
].pack("nnN") + message, 0
|
470
|
+
end
|
471
|
+
|
472
|
+
header = socket.recv(8)
|
473
|
+
status, version, length = header.unpack('n2N')
|
474
|
+
|
475
|
+
while response.length < (length || 0)
|
476
|
+
part = socket.recv(length - response.length)
|
477
|
+
response << part if part
|
478
|
+
end
|
479
|
+
end
|
480
|
+
|
481
|
+
if response.empty? || response.length != length
|
482
|
+
raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
|
483
|
+
end
|
484
|
+
|
485
|
+
case status
|
486
|
+
when Statuses[:ok]
|
487
|
+
if version < Versions[command]
|
488
|
+
puts format("searchd command v.%d.%d older than client (v.%d.%d)",
|
489
|
+
version >> 8, version & 0xff,
|
490
|
+
Versions[command] >> 8, Versions[command] & 0xff)
|
491
|
+
end
|
492
|
+
response
|
493
|
+
when Statuses[:warning]
|
494
|
+
length = response[0, 4].unpack('N*').first
|
495
|
+
puts response[4, length]
|
496
|
+
response[4 + length, response.length - 4 - length]
|
497
|
+
when Statuses[:error], Statuses[:retry]
|
498
|
+
raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
|
499
|
+
else
|
500
|
+
raise ResponseError, "Unknown searchd error (status: #{status})"
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
# Generation of the message to send to Sphinx for a search.
|
505
|
+
def query_message(search, index, comments = '')
|
506
|
+
message = Message.new
|
507
|
+
|
508
|
+
# Mode, Limits, Sort Mode
|
509
|
+
message.append_ints @offset, @limit, MatchModes[@match_mode],
|
510
|
+
RankModes[@rank_mode], SortModes[@sort_mode]
|
511
|
+
message.append_string @sort_by
|
512
|
+
|
513
|
+
# Query
|
514
|
+
message.append_string search
|
515
|
+
|
516
|
+
# Weights
|
517
|
+
message.append_int @weights.length
|
518
|
+
message.append_ints *@weights
|
519
|
+
|
520
|
+
# Index
|
521
|
+
message.append_string index
|
522
|
+
|
523
|
+
# ID Range
|
524
|
+
message.append_int 1
|
525
|
+
message.append_64bit_ints @id_range.first, @id_range.last
|
526
|
+
|
527
|
+
# Filters
|
528
|
+
message.append_int @filters.length
|
529
|
+
@filters.each { |filter| message.append filter.query_message }
|
530
|
+
|
531
|
+
# Grouping
|
532
|
+
message.append_int GroupFunctions[@group_function]
|
533
|
+
message.append_string @group_by
|
534
|
+
message.append_int @max_matches
|
535
|
+
message.append_string @group_clause
|
536
|
+
message.append_ints @cut_off, @retry_count, @retry_delay
|
537
|
+
message.append_string @group_distinct
|
538
|
+
|
539
|
+
# Anchor Point
|
540
|
+
if @anchor.empty?
|
541
|
+
message.append_int 0
|
542
|
+
else
|
543
|
+
message.append_int 1
|
544
|
+
message.append_string @anchor[:latitude_attribute]
|
545
|
+
message.append_string @anchor[:longitude_attribute]
|
546
|
+
message.append_floats @anchor[:latitude], @anchor[:longitude]
|
547
|
+
end
|
548
|
+
|
549
|
+
# Per Index Weights
|
550
|
+
message.append_int @index_weights.length
|
551
|
+
@index_weights.each do |key,val|
|
552
|
+
message.append_string key.to_s
|
553
|
+
message.append_int val
|
554
|
+
end
|
555
|
+
|
556
|
+
# Max Query Time
|
557
|
+
message.append_int @max_query_time
|
558
|
+
|
559
|
+
# Per Field Weights
|
560
|
+
message.append_int @field_weights.length
|
561
|
+
@field_weights.each do |key,val|
|
562
|
+
message.append_string key.to_s
|
563
|
+
message.append_int val
|
564
|
+
end
|
565
|
+
|
566
|
+
message.append_string comments
|
567
|
+
|
568
|
+
message.to_s
|
569
|
+
end
|
570
|
+
|
571
|
+
# Generation of the message to send to Sphinx for an excerpts request.
|
572
|
+
def excerpts_message(options)
|
573
|
+
message = Message.new
|
574
|
+
|
575
|
+
flags = 1
|
576
|
+
flags |= 2 if options[:exact_phrase]
|
577
|
+
flags |= 4 if options[:single_passage]
|
578
|
+
flags |= 8 if options[:use_boundaries]
|
579
|
+
flags |= 16 if options[:weight_order]
|
580
|
+
|
581
|
+
message.append [0, flags].pack('N2') # 0 = mode
|
582
|
+
message.append_string options[:index]
|
583
|
+
message.append_string options[:words]
|
584
|
+
|
585
|
+
# options
|
586
|
+
message.append_string options[:before_match]
|
587
|
+
message.append_string options[:after_match]
|
588
|
+
message.append_string options[:chunk_separator]
|
589
|
+
message.append_ints options[:limit], options[:around]
|
590
|
+
|
591
|
+
message.append_array options[:docs]
|
592
|
+
|
593
|
+
message.to_s
|
594
|
+
end
|
595
|
+
|
596
|
+
# Generation of the message to send to Sphinx to update attributes of a
|
597
|
+
# document.
|
598
|
+
def update_message(index, attributes, values_by_doc)
|
599
|
+
message = Message.new
|
600
|
+
|
601
|
+
message.append_string index
|
602
|
+
message.append_array attributes
|
603
|
+
|
604
|
+
message.append_int values_by_doc.length
|
605
|
+
values_by_doc.each do |key,values|
|
606
|
+
message.append_64bit_int key # document ID
|
607
|
+
message.append_ints *values # array of new values (integers)
|
608
|
+
end
|
609
|
+
|
610
|
+
message.to_s
|
611
|
+
end
|
612
|
+
|
613
|
+
# Generates the simple message to send to the daemon for a keywords request.
|
614
|
+
def keywords_message(query, index, return_hits)
|
615
|
+
message = Message.new
|
616
|
+
|
617
|
+
message.append_string query
|
618
|
+
message.append_string index
|
619
|
+
message.append_int return_hits ? 1 : 0
|
620
|
+
|
621
|
+
message.to_s
|
622
|
+
end
|
623
|
+
|
624
|
+
def attribute_from_type(type, response)
|
625
|
+
type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
|
626
|
+
|
627
|
+
case type
|
628
|
+
when AttributeTypes[:float]
|
629
|
+
is_multi ? response.next_float_array : response.next_float
|
630
|
+
else
|
631
|
+
is_multi ? response.next_int_array : response.next_int
|
632
|
+
end
|
633
|
+
end
|
634
|
+
end
|
635
|
+
end
|