sphinx 0.9.9.2117
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/README.rdoc +243 -0
- data/Rakefile +45 -0
- data/VERSION.yml +5 -0
- data/init.rb +1 -0
- data/lib/sphinx/buffered_io.rb +26 -0
- data/lib/sphinx/client.rb +2426 -0
- data/lib/sphinx/constants.rb +179 -0
- data/lib/sphinx/indifferent_access.rb +152 -0
- data/lib/sphinx/request.rb +121 -0
- data/lib/sphinx/response.rb +71 -0
- data/lib/sphinx/server.rb +170 -0
- data/lib/sphinx/timeout.rb +31 -0
- data/lib/sphinx.rb +51 -0
- data/spec/client_response_spec.rb +170 -0
- data/spec/client_spec.rb +669 -0
- data/spec/client_validations_spec.rb +859 -0
- data/spec/fixtures/default_search.php +8 -0
- data/spec/fixtures/default_search_index.php +8 -0
- data/spec/fixtures/excerpt_custom.php +11 -0
- data/spec/fixtures/excerpt_default.php +8 -0
- data/spec/fixtures/excerpt_flags.php +12 -0
- data/spec/fixtures/field_weights.php +9 -0
- data/spec/fixtures/filter.php +9 -0
- data/spec/fixtures/filter_exclude.php +9 -0
- data/spec/fixtures/filter_float_range.php +9 -0
- data/spec/fixtures/filter_float_range_exclude.php +9 -0
- data/spec/fixtures/filter_range.php +9 -0
- data/spec/fixtures/filter_range_exclude.php +9 -0
- data/spec/fixtures/filter_range_int64.php +10 -0
- data/spec/fixtures/filter_ranges.php +10 -0
- data/spec/fixtures/filters.php +10 -0
- data/spec/fixtures/filters_different.php +13 -0
- data/spec/fixtures/geo_anchor.php +9 -0
- data/spec/fixtures/group_by_attr.php +9 -0
- data/spec/fixtures/group_by_attrpair.php +9 -0
- data/spec/fixtures/group_by_day.php +9 -0
- data/spec/fixtures/group_by_day_sort.php +9 -0
- data/spec/fixtures/group_by_month.php +9 -0
- data/spec/fixtures/group_by_week.php +9 -0
- data/spec/fixtures/group_by_year.php +9 -0
- data/spec/fixtures/group_distinct.php +10 -0
- data/spec/fixtures/id_range.php +9 -0
- data/spec/fixtures/id_range64.php +9 -0
- data/spec/fixtures/index_weights.php +9 -0
- data/spec/fixtures/keywords.php +8 -0
- data/spec/fixtures/limits.php +9 -0
- data/spec/fixtures/limits_cutoff.php +9 -0
- data/spec/fixtures/limits_max.php +9 -0
- data/spec/fixtures/limits_max_cutoff.php +9 -0
- data/spec/fixtures/match_all.php +9 -0
- data/spec/fixtures/match_any.php +9 -0
- data/spec/fixtures/match_boolean.php +9 -0
- data/spec/fixtures/match_extended.php +9 -0
- data/spec/fixtures/match_extended2.php +9 -0
- data/spec/fixtures/match_fullscan.php +9 -0
- data/spec/fixtures/match_phrase.php +9 -0
- data/spec/fixtures/max_query_time.php +9 -0
- data/spec/fixtures/miltiple_queries.php +12 -0
- data/spec/fixtures/ranking_bm25.php +9 -0
- data/spec/fixtures/ranking_fieldmask.php +9 -0
- data/spec/fixtures/ranking_matchany.php +9 -0
- data/spec/fixtures/ranking_none.php +9 -0
- data/spec/fixtures/ranking_proximity.php +9 -0
- data/spec/fixtures/ranking_proximity_bm25.php +9 -0
- data/spec/fixtures/ranking_wordcount.php +9 -0
- data/spec/fixtures/retries.php +9 -0
- data/spec/fixtures/retries_delay.php +9 -0
- data/spec/fixtures/select.php +9 -0
- data/spec/fixtures/set_override.php +11 -0
- data/spec/fixtures/sort_attr_asc.php +9 -0
- data/spec/fixtures/sort_attr_desc.php +9 -0
- data/spec/fixtures/sort_expr.php +9 -0
- data/spec/fixtures/sort_extended.php +9 -0
- data/spec/fixtures/sort_relevance.php +9 -0
- data/spec/fixtures/sort_time_segments.php +9 -0
- data/spec/fixtures/sphinxapi.php +1633 -0
- data/spec/fixtures/update_attributes.php +8 -0
- data/spec/fixtures/update_attributes_mva.php +8 -0
- data/spec/fixtures/weights.php +9 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/sphinx/sphinx-id64.conf +67 -0
- data/spec/sphinx/sphinx.conf +67 -0
- data/spec/sphinx/sphinx_test.sql +88 -0
- data/sphinx.gemspec +127 -0
- metadata +142 -0
data/.gitignore
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,243 @@
|
|
1
|
+
= Sphinx Client API
|
2
|
+
|
3
|
+
This document gives an overview of what is Sphinx itself and how to use it
|
4
|
+
from your Ruby on Rails application. For more information or documentation,
|
5
|
+
please go to http://www.sphinxsearch.com
|
6
|
+
|
7
|
+
== Sphinx
|
8
|
+
|
9
|
+
Sphinx is a standalone full-text search engine, meant to provide fast,
|
10
|
+
size-efficient and relevant fulltext search functions to other applications.
|
11
|
+
Sphinx was specially designed to integrate well with SQL databases and
|
12
|
+
scripting languages. Currently built-in data sources support fetching data
|
13
|
+
either via direct connection to MySQL, or from an XML pipe.
|
14
|
+
|
15
|
+
Simplest way to communicate with Sphinx is to use <tt>searchd</tt> —
|
16
|
+
a daemon to search through full text indexes from external software.
|
17
|
+
|
18
|
+
== Installation
|
19
|
+
|
20
|
+
There are two options when approaching sphinx plugin installation:
|
21
|
+
|
22
|
+
* using the gem (recommended)
|
23
|
+
* install as a Rails plugin
|
24
|
+
|
25
|
+
To install as a gem, add this to your environment.rb:
|
26
|
+
|
27
|
+
config.gem 'sphinx', :source => 'http://gemcutter.org'
|
28
|
+
|
29
|
+
And then run the command:
|
30
|
+
|
31
|
+
sudo rake gems:install
|
32
|
+
|
33
|
+
To install Sphinx as a Rails plugin use this:
|
34
|
+
|
35
|
+
script/plugin install git://github.com/kpumuk/sphinx.git
|
36
|
+
|
37
|
+
== Documentation
|
38
|
+
|
39
|
+
Complete Sphinx plugin documentation could be found here:
|
40
|
+
http://kpumuk.github.com/sphinx
|
41
|
+
|
42
|
+
Also you can find documentation on rdoc.info:
|
43
|
+
http://rdoc.info/projects/kpumuk/sphinx
|
44
|
+
|
45
|
+
You can build the documentation locally by running:
|
46
|
+
|
47
|
+
rake yard
|
48
|
+
|
49
|
+
Please note: you should have yard gem installed on your system:
|
50
|
+
|
51
|
+
sudo gem install yard --source http://gemcutter.org
|
52
|
+
|
53
|
+
Complete Sphinx API documentation could be found on Sphinx Search Engine
|
54
|
+
site: http://www.sphinxsearch.com/docs/current.html
|
55
|
+
This plugin is fully compatible with original PHP API implementation.
|
56
|
+
|
57
|
+
== Ruby naming conventions
|
58
|
+
|
59
|
+
Sphinx Client API supports Ruby naming conventions, so every API
|
60
|
+
method name is in underscored, lowercase form:
|
61
|
+
|
62
|
+
SetServer -> set_server
|
63
|
+
RunQueries -> run_queries
|
64
|
+
SetMatchMode -> set_match_mode
|
65
|
+
|
66
|
+
Every method is aliased to a corresponding one from standard Sphinx
|
67
|
+
API, so you can use both <tt>SetServer</tt> and <tt>set_server</tt>
|
68
|
+
with no differrence.
|
69
|
+
|
70
|
+
There are three exceptions to this naming rule:
|
71
|
+
|
72
|
+
GetLastError -> last_error
|
73
|
+
GetLastWarning -> last_warning
|
74
|
+
IsConnectError -> connect_error?
|
75
|
+
|
76
|
+
Of course, all of them are aliased to the original method names.
|
77
|
+
|
78
|
+
== Using multiple Sphinx servers
|
79
|
+
|
80
|
+
Since we actively use this plugin in our Scribd development workflow,
|
81
|
+
there are several methods have been added to accommodate our needs.
|
82
|
+
You can find documentation on Ruby-specific methods in documentation:
|
83
|
+
http://rdoc.info/projects/kpumuk/sphinx
|
84
|
+
|
85
|
+
First of all, we added support of multiple Sphinx servers to balance
|
86
|
+
load between them. Also it means that in case of any problems with one
|
87
|
+
of servers, library will try to fetch the results from another one.
|
88
|
+
Every consequence request will be executed on the next server in list
|
89
|
+
(round-robin technique).
|
90
|
+
|
91
|
+
sphinx.set_servers([
|
92
|
+
{ :host => 'browse01.local', :port => 3312 },
|
93
|
+
{ :host => 'browse02.local', :port => 3312 },
|
94
|
+
{ :host => 'browse03.local', :port => 3312 }
|
95
|
+
])
|
96
|
+
|
97
|
+
By default library will try to fetch results from a single server, and
|
98
|
+
fail if it does not respond. To setup number of retries being performed,
|
99
|
+
you can use second (additional) parameter of the <tt>set_connect_timeout</tt>
|
100
|
+
and <tt>set_request_timeout</tt> methods:
|
101
|
+
|
102
|
+
sphinx.set_connect_timeout(1, 3)
|
103
|
+
sphinx.set_request_timeout(1, 3)
|
104
|
+
|
105
|
+
There is a big difference between these two methods. First will affect
|
106
|
+
only on requests experiencing problems with connection (socket error,
|
107
|
+
pipe error, etc), second will be used when request is broken somehow
|
108
|
+
(temporary searchd error, incomplete reply, etc). The workflow looks like
|
109
|
+
this:
|
110
|
+
|
111
|
+
1. Increase retries number. If is less or equal to configured value,
|
112
|
+
try to connect to the next server. Otherwise, raise an error.
|
113
|
+
2. In case of connection problem go to 1.
|
114
|
+
3. Increase request retries number. If it less or equal to configured
|
115
|
+
value, try to perform request. Otherwise, raise an error.
|
116
|
+
4. In case of connection problem go to 1.
|
117
|
+
5. In case of request problem, go to 3.
|
118
|
+
6. Parse and return response.
|
119
|
+
|
120
|
+
Withdrawals:
|
121
|
+
|
122
|
+
1. Request could be performed <tt>connect_retries</tt> * <tt>request_retries</tt>
|
123
|
+
times. E.g., it could be tried <tt>request_retries</tt> times on each
|
124
|
+
of <tt>connect_retries</tt> servers (when you have 1 server configured,
|
125
|
+
but <tt>connect_retries</tt> is 5, library will try to connect to this
|
126
|
+
server 5 times).
|
127
|
+
2. Request could be tried to execute on each server <tt>1..request_retries</tt>
|
128
|
+
times. In case of connection problem, request will be moved to another
|
129
|
+
server immediately.
|
130
|
+
|
131
|
+
Usually you will set <tt>connect_retries</tt> equal to servers number,
|
132
|
+
so you will be sure each failing request will be performed on all servers.
|
133
|
+
This means that if one of servers is live, but others are dead, you request
|
134
|
+
will be finally executed successfully.
|
135
|
+
|
136
|
+
== Sphinx constants
|
137
|
+
|
138
|
+
Most Sphinx API methods expecting for special constants will be passed.
|
139
|
+
For example:
|
140
|
+
|
141
|
+
sphinx.set_match_mode(Sphinx::SPH_MATCH_ANY)
|
142
|
+
|
143
|
+
Please note that these constants defined in a <tt>Sphinx</tt>
|
144
|
+
module. You can use symbols or strings instead of these awful
|
145
|
+
constants:
|
146
|
+
|
147
|
+
sphinx.set_match_mode(:any)
|
148
|
+
sphinx.set_match_mode('any')
|
149
|
+
|
150
|
+
== Setting query filters
|
151
|
+
|
152
|
+
Every <tt>set_</tt> method returns <tt>Sphinx::Client</tt> object itself.
|
153
|
+
It means that you can chain filtering methods:
|
154
|
+
|
155
|
+
results = Sphinx::Client.new.
|
156
|
+
set_match_mode(:any).
|
157
|
+
set_ranking_mode(:bm25).
|
158
|
+
set_id_range(10, 1000).
|
159
|
+
query('test')
|
160
|
+
|
161
|
+
There is a handful ability to set query parameters directly in <tt>query</tt>
|
162
|
+
call. If block does not accept any parameters, it will be eval'ed inside
|
163
|
+
Sphinx::Client instance:
|
164
|
+
|
165
|
+
results = Sphinx::Client.new.query('test') do
|
166
|
+
match_mode :any
|
167
|
+
ranking_mode :bm25
|
168
|
+
id_range 10, 1000
|
169
|
+
end
|
170
|
+
|
171
|
+
As you can see, in this case you can omit the <tt>set_</tt> prefix for
|
172
|
+
this methods. If block accepts a parameter, sphinx instance will be
|
173
|
+
passed into the block. In this case you should you full method names
|
174
|
+
including the <tt>set_</tt> prefix:
|
175
|
+
|
176
|
+
results = Sphinx::Client.new.query('test') do |sphinx|
|
177
|
+
sphinx.set_match_mode :any
|
178
|
+
sphinx.set_ranking_mode :bm25
|
179
|
+
sphinx.set_id_range 10, 1000
|
180
|
+
end
|
181
|
+
|
182
|
+
== Example
|
183
|
+
|
184
|
+
This simple example illustrates base connection establishing,
|
185
|
+
search results retrieving, and excerpts building. Please note
|
186
|
+
how does it perform database select using ActiveRecord to
|
187
|
+
save the order of records established by Sphinx.
|
188
|
+
|
189
|
+
sphinx = Sphinx::Client.new
|
190
|
+
result = sphinx.query('test')
|
191
|
+
ids = result['matches'].map { |match| match['id'] }
|
192
|
+
posts = Post.all :conditions => { :id => ids },
|
193
|
+
:order => "FIELD(id,#{ids.join(',')})"
|
194
|
+
|
195
|
+
docs = posts.map(&:body)
|
196
|
+
excerpts = sphinx.build_excerpts(docs, 'index', 'test')
|
197
|
+
|
198
|
+
== Logging
|
199
|
+
|
200
|
+
You can ask Sphinx client API to log it's activity to some log. In
|
201
|
+
order to do that you can pass a logger object into the <tt>Sphinx::Client</tt>
|
202
|
+
constructor:
|
203
|
+
|
204
|
+
require 'logger'
|
205
|
+
Sphinx::Client.new(Logger.new(STDOUT)).query('test')
|
206
|
+
|
207
|
+
Logger object should respond to methods :debug, :info, and :warn, and
|
208
|
+
accept blocks (this is what standard Ruby <tt>Logger</tt> class does).
|
209
|
+
Here is what you will see in your log:
|
210
|
+
|
211
|
+
* <tt>DEBUG</tt> -- <tt>query</tt>, <tt>add_query</tt>, <tt>run_queries</tt>
|
212
|
+
method calls with configured filters.
|
213
|
+
* <tt>INFO</tt> -- initialization with Sphinx version, servers change,
|
214
|
+
attempts to re-connect, and all attempts to do an API call with server
|
215
|
+
where request being performed.
|
216
|
+
* <tt>WARN</tt> -- various connection and socket errors.
|
217
|
+
|
218
|
+
== Support
|
219
|
+
|
220
|
+
Source code:
|
221
|
+
http://github.com/kpumuk/sphinx
|
222
|
+
|
223
|
+
To suggest a feature or report a bug:
|
224
|
+
http://github.com/kpumuk/sphinx/issues
|
225
|
+
|
226
|
+
Project home page:
|
227
|
+
http://kpumuk.info/projects/ror-plugins/sphinx
|
228
|
+
|
229
|
+
== Credits
|
230
|
+
|
231
|
+
Dmytro Shteflyuk <kpumuk@kpumuk.info> http://kpumuk.info
|
232
|
+
|
233
|
+
Andrew Aksyonoff http://sphinxsearch.com
|
234
|
+
|
235
|
+
Special thanks to Alexey Kovyrin <alexey@kovyrin.net> http://blog.kovyrin.net
|
236
|
+
|
237
|
+
Special thanks to Mike Perham http://www.mikeperham.com for his awesome
|
238
|
+
memcache-client gem, where latest Sphinx gem got new sockets handling from.
|
239
|
+
|
240
|
+
==License
|
241
|
+
|
242
|
+
This library is distributed under the terms of the Ruby license.
|
243
|
+
You can freely distribute/modify this library.
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |gemspec|
|
6
|
+
gemspec.name = 'sphinx'
|
7
|
+
gemspec.summary = 'Sphinx Client API for Ruby'
|
8
|
+
gemspec.description = 'An easy interface to Sphinx standalone full-text search engine. It is implemented as plugin for Ruby on Rails, but can be easily used as standalone library.'
|
9
|
+
gemspec.email = 'kpumuk@kpumuk.info'
|
10
|
+
gemspec.homepage = 'http://github.com/kpumuk/sphinx'
|
11
|
+
gemspec.authors = ['Dmytro Shteflyuk']
|
12
|
+
end
|
13
|
+
Jeweler::GemcutterTasks.new
|
14
|
+
rescue LoadError
|
15
|
+
puts 'Jeweler not available. Install it with: sudo gem install jeweler'
|
16
|
+
end
|
17
|
+
|
18
|
+
begin
|
19
|
+
require 'spec/rake/spectask'
|
20
|
+
|
21
|
+
desc 'Default: run specs'
|
22
|
+
task :default => :spec
|
23
|
+
|
24
|
+
desc 'Test the sphinx plugin'
|
25
|
+
Spec::Rake::SpecTask.new do |t|
|
26
|
+
t.libs << 'lib'
|
27
|
+
t.pattern = 'spec/*_spec.rb'
|
28
|
+
end
|
29
|
+
rescue LoadError
|
30
|
+
puts 'RSpec not available. Install it with: sudo gem install rspec'
|
31
|
+
end
|
32
|
+
|
33
|
+
begin
|
34
|
+
require 'yard'
|
35
|
+
YARD::Rake::YardocTask.new(:yard) do |t|
|
36
|
+
t.options = ['--title', 'Sphinx Client API Documentation']
|
37
|
+
if ENV['PRIVATE']
|
38
|
+
t.options.concat ['--protected', '--private']
|
39
|
+
else
|
40
|
+
t.options << '--no-private'
|
41
|
+
end
|
42
|
+
end
|
43
|
+
rescue LoadError
|
44
|
+
puts 'Yard not available. Install it with: sudo gem install yard'
|
45
|
+
end
|
data/VERSION.yml
ADDED
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/lib/sphinx'
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# A simple wrapper around <tt>Net::BufferedIO</tt> performing
|
2
|
+
# non-blocking select.
|
3
|
+
#
|
4
|
+
# @private
|
5
|
+
class Sphinx::BufferedIO < Net::BufferedIO # :nodoc:
|
6
|
+
BUFSIZE = 1024 * 16
|
7
|
+
|
8
|
+
if RUBY_VERSION < '1.9.1'
|
9
|
+
def rbuf_fill
|
10
|
+
begin
|
11
|
+
@rbuf << @io.read_nonblock(BUFSIZE)
|
12
|
+
rescue Errno::EWOULDBLOCK
|
13
|
+
retry unless @read_timeout
|
14
|
+
if IO.select([@io], nil, nil, @read_timeout)
|
15
|
+
retry
|
16
|
+
else
|
17
|
+
raise Timeout::Error, 'IO timeout'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def setsockopt(*args)
|
24
|
+
@io.setsockopt(*args)
|
25
|
+
end
|
26
|
+
end
|