sphinx 0.9.9.2117

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. data/.gitignore +4 -0
  2. data/README.rdoc +243 -0
  3. data/Rakefile +45 -0
  4. data/VERSION.yml +5 -0
  5. data/init.rb +1 -0
  6. data/lib/sphinx/buffered_io.rb +26 -0
  7. data/lib/sphinx/client.rb +2426 -0
  8. data/lib/sphinx/constants.rb +179 -0
  9. data/lib/sphinx/indifferent_access.rb +152 -0
  10. data/lib/sphinx/request.rb +121 -0
  11. data/lib/sphinx/response.rb +71 -0
  12. data/lib/sphinx/server.rb +170 -0
  13. data/lib/sphinx/timeout.rb +31 -0
  14. data/lib/sphinx.rb +51 -0
  15. data/spec/client_response_spec.rb +170 -0
  16. data/spec/client_spec.rb +669 -0
  17. data/spec/client_validations_spec.rb +859 -0
  18. data/spec/fixtures/default_search.php +8 -0
  19. data/spec/fixtures/default_search_index.php +8 -0
  20. data/spec/fixtures/excerpt_custom.php +11 -0
  21. data/spec/fixtures/excerpt_default.php +8 -0
  22. data/spec/fixtures/excerpt_flags.php +12 -0
  23. data/spec/fixtures/field_weights.php +9 -0
  24. data/spec/fixtures/filter.php +9 -0
  25. data/spec/fixtures/filter_exclude.php +9 -0
  26. data/spec/fixtures/filter_float_range.php +9 -0
  27. data/spec/fixtures/filter_float_range_exclude.php +9 -0
  28. data/spec/fixtures/filter_range.php +9 -0
  29. data/spec/fixtures/filter_range_exclude.php +9 -0
  30. data/spec/fixtures/filter_range_int64.php +10 -0
  31. data/spec/fixtures/filter_ranges.php +10 -0
  32. data/spec/fixtures/filters.php +10 -0
  33. data/spec/fixtures/filters_different.php +13 -0
  34. data/spec/fixtures/geo_anchor.php +9 -0
  35. data/spec/fixtures/group_by_attr.php +9 -0
  36. data/spec/fixtures/group_by_attrpair.php +9 -0
  37. data/spec/fixtures/group_by_day.php +9 -0
  38. data/spec/fixtures/group_by_day_sort.php +9 -0
  39. data/spec/fixtures/group_by_month.php +9 -0
  40. data/spec/fixtures/group_by_week.php +9 -0
  41. data/spec/fixtures/group_by_year.php +9 -0
  42. data/spec/fixtures/group_distinct.php +10 -0
  43. data/spec/fixtures/id_range.php +9 -0
  44. data/spec/fixtures/id_range64.php +9 -0
  45. data/spec/fixtures/index_weights.php +9 -0
  46. data/spec/fixtures/keywords.php +8 -0
  47. data/spec/fixtures/limits.php +9 -0
  48. data/spec/fixtures/limits_cutoff.php +9 -0
  49. data/spec/fixtures/limits_max.php +9 -0
  50. data/spec/fixtures/limits_max_cutoff.php +9 -0
  51. data/spec/fixtures/match_all.php +9 -0
  52. data/spec/fixtures/match_any.php +9 -0
  53. data/spec/fixtures/match_boolean.php +9 -0
  54. data/spec/fixtures/match_extended.php +9 -0
  55. data/spec/fixtures/match_extended2.php +9 -0
  56. data/spec/fixtures/match_fullscan.php +9 -0
  57. data/spec/fixtures/match_phrase.php +9 -0
  58. data/spec/fixtures/max_query_time.php +9 -0
  59. data/spec/fixtures/miltiple_queries.php +12 -0
  60. data/spec/fixtures/ranking_bm25.php +9 -0
  61. data/spec/fixtures/ranking_fieldmask.php +9 -0
  62. data/spec/fixtures/ranking_matchany.php +9 -0
  63. data/spec/fixtures/ranking_none.php +9 -0
  64. data/spec/fixtures/ranking_proximity.php +9 -0
  65. data/spec/fixtures/ranking_proximity_bm25.php +9 -0
  66. data/spec/fixtures/ranking_wordcount.php +9 -0
  67. data/spec/fixtures/retries.php +9 -0
  68. data/spec/fixtures/retries_delay.php +9 -0
  69. data/spec/fixtures/select.php +9 -0
  70. data/spec/fixtures/set_override.php +11 -0
  71. data/spec/fixtures/sort_attr_asc.php +9 -0
  72. data/spec/fixtures/sort_attr_desc.php +9 -0
  73. data/spec/fixtures/sort_expr.php +9 -0
  74. data/spec/fixtures/sort_extended.php +9 -0
  75. data/spec/fixtures/sort_relevance.php +9 -0
  76. data/spec/fixtures/sort_time_segments.php +9 -0
  77. data/spec/fixtures/sphinxapi.php +1633 -0
  78. data/spec/fixtures/update_attributes.php +8 -0
  79. data/spec/fixtures/update_attributes_mva.php +8 -0
  80. data/spec/fixtures/weights.php +9 -0
  81. data/spec/spec_helper.rb +24 -0
  82. data/spec/sphinx/sphinx-id64.conf +67 -0
  83. data/spec/sphinx/sphinx.conf +67 -0
  84. data/spec/sphinx/sphinx_test.sql +88 -0
  85. data/sphinx.gemspec +127 -0
  86. metadata +142 -0
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ rdoc
2
+ doc
3
+ .yardoc
4
+ pkg
data/README.rdoc ADDED
@@ -0,0 +1,243 @@
1
+ = Sphinx Client API
2
+
3
+ This document gives an overview of what is Sphinx itself and how to use it
4
+ from your Ruby on Rails application. For more information or documentation,
5
+ please go to http://www.sphinxsearch.com
6
+
7
+ == Sphinx
8
+
9
+ Sphinx is a standalone full-text search engine, meant to provide fast,
10
+ size-efficient and relevant fulltext search functions to other applications.
11
+ Sphinx was specially designed to integrate well with SQL databases and
12
+ scripting languages. Currently built-in data sources support fetching data
13
+ either via direct connection to MySQL, or from an XML pipe.
14
+
15
+ Simplest way to communicate with Sphinx is to use <tt>searchd</tt> —
16
+ a daemon to search through full text indexes from external software.
17
+
18
+ == Installation
19
+
20
+ There are two options when approaching sphinx plugin installation:
21
+
22
+ * using the gem (recommended)
23
+ * install as a Rails plugin
24
+
25
+ To install as a gem, add this to your environment.rb:
26
+
27
+ config.gem 'sphinx', :source => 'http://gemcutter.org'
28
+
29
+ And then run the command:
30
+
31
+ sudo rake gems:install
32
+
33
+ To install Sphinx as a Rails plugin use this:
34
+
35
+ script/plugin install git://github.com/kpumuk/sphinx.git
36
+
37
+ == Documentation
38
+
39
+ Complete Sphinx plugin documentation could be found here:
40
+ http://kpumuk.github.com/sphinx
41
+
42
+ Also you can find documentation on rdoc.info:
43
+ http://rdoc.info/projects/kpumuk/sphinx
44
+
45
+ You can build the documentation locally by running:
46
+
47
+ rake yard
48
+
49
+ Please note: you should have yard gem installed on your system:
50
+
51
+ sudo gem install yard --source http://gemcutter.org
52
+
53
+ Complete Sphinx API documentation could be found on Sphinx Search Engine
54
+ site: http://www.sphinxsearch.com/docs/current.html
55
+ This plugin is fully compatible with original PHP API implementation.
56
+
57
+ == Ruby naming conventions
58
+
59
+ Sphinx Client API supports Ruby naming conventions, so every API
60
+ method name is in underscored, lowercase form:
61
+
62
+ SetServer -> set_server
63
+ RunQueries -> run_queries
64
+ SetMatchMode -> set_match_mode
65
+
66
+ Every method is aliased to a corresponding one from standard Sphinx
67
+ API, so you can use both <tt>SetServer</tt> and <tt>set_server</tt>
68
+ with no differrence.
69
+
70
+ There are three exceptions to this naming rule:
71
+
72
+ GetLastError -> last_error
73
+ GetLastWarning -> last_warning
74
+ IsConnectError -> connect_error?
75
+
76
+ Of course, all of them are aliased to the original method names.
77
+
78
+ == Using multiple Sphinx servers
79
+
80
+ Since we actively use this plugin in our Scribd development workflow,
81
+ there are several methods have been added to accommodate our needs.
82
+ You can find documentation on Ruby-specific methods in documentation:
83
+ http://rdoc.info/projects/kpumuk/sphinx
84
+
85
+ First of all, we added support of multiple Sphinx servers to balance
86
+ load between them. Also it means that in case of any problems with one
87
+ of servers, library will try to fetch the results from another one.
88
+ Every consequence request will be executed on the next server in list
89
+ (round-robin technique).
90
+
91
+ sphinx.set_servers([
92
+ { :host => 'browse01.local', :port => 3312 },
93
+ { :host => 'browse02.local', :port => 3312 },
94
+ { :host => 'browse03.local', :port => 3312 }
95
+ ])
96
+
97
+ By default library will try to fetch results from a single server, and
98
+ fail if it does not respond. To setup number of retries being performed,
99
+ you can use second (additional) parameter of the <tt>set_connect_timeout</tt>
100
+ and <tt>set_request_timeout</tt> methods:
101
+
102
+ sphinx.set_connect_timeout(1, 3)
103
+ sphinx.set_request_timeout(1, 3)
104
+
105
+ There is a big difference between these two methods. First will affect
106
+ only on requests experiencing problems with connection (socket error,
107
+ pipe error, etc), second will be used when request is broken somehow
108
+ (temporary searchd error, incomplete reply, etc). The workflow looks like
109
+ this:
110
+
111
+ 1. Increase retries number. If is less or equal to configured value,
112
+ try to connect to the next server. Otherwise, raise an error.
113
+ 2. In case of connection problem go to 1.
114
+ 3. Increase request retries number. If it less or equal to configured
115
+ value, try to perform request. Otherwise, raise an error.
116
+ 4. In case of connection problem go to 1.
117
+ 5. In case of request problem, go to 3.
118
+ 6. Parse and return response.
119
+
120
+ Withdrawals:
121
+
122
+ 1. Request could be performed <tt>connect_retries</tt> * <tt>request_retries</tt>
123
+ times. E.g., it could be tried <tt>request_retries</tt> times on each
124
+ of <tt>connect_retries</tt> servers (when you have 1 server configured,
125
+ but <tt>connect_retries</tt> is 5, library will try to connect to this
126
+ server 5 times).
127
+ 2. Request could be tried to execute on each server <tt>1..request_retries</tt>
128
+ times. In case of connection problem, request will be moved to another
129
+ server immediately.
130
+
131
+ Usually you will set <tt>connect_retries</tt> equal to servers number,
132
+ so you will be sure each failing request will be performed on all servers.
133
+ This means that if one of servers is live, but others are dead, you request
134
+ will be finally executed successfully.
135
+
136
+ == Sphinx constants
137
+
138
+ Most Sphinx API methods expecting for special constants will be passed.
139
+ For example:
140
+
141
+ sphinx.set_match_mode(Sphinx::SPH_MATCH_ANY)
142
+
143
+ Please note that these constants defined in a <tt>Sphinx</tt>
144
+ module. You can use symbols or strings instead of these awful
145
+ constants:
146
+
147
+ sphinx.set_match_mode(:any)
148
+ sphinx.set_match_mode('any')
149
+
150
+ == Setting query filters
151
+
152
+ Every <tt>set_</tt> method returns <tt>Sphinx::Client</tt> object itself.
153
+ It means that you can chain filtering methods:
154
+
155
+ results = Sphinx::Client.new.
156
+ set_match_mode(:any).
157
+ set_ranking_mode(:bm25).
158
+ set_id_range(10, 1000).
159
+ query('test')
160
+
161
+ There is a handful ability to set query parameters directly in <tt>query</tt>
162
+ call. If block does not accept any parameters, it will be eval'ed inside
163
+ Sphinx::Client instance:
164
+
165
+ results = Sphinx::Client.new.query('test') do
166
+ match_mode :any
167
+ ranking_mode :bm25
168
+ id_range 10, 1000
169
+ end
170
+
171
+ As you can see, in this case you can omit the <tt>set_</tt> prefix for
172
+ this methods. If block accepts a parameter, sphinx instance will be
173
+ passed into the block. In this case you should you full method names
174
+ including the <tt>set_</tt> prefix:
175
+
176
+ results = Sphinx::Client.new.query('test') do |sphinx|
177
+ sphinx.set_match_mode :any
178
+ sphinx.set_ranking_mode :bm25
179
+ sphinx.set_id_range 10, 1000
180
+ end
181
+
182
+ == Example
183
+
184
+ This simple example illustrates base connection establishing,
185
+ search results retrieving, and excerpts building. Please note
186
+ how does it perform database select using ActiveRecord to
187
+ save the order of records established by Sphinx.
188
+
189
+ sphinx = Sphinx::Client.new
190
+ result = sphinx.query('test')
191
+ ids = result['matches'].map { |match| match['id'] }
192
+ posts = Post.all :conditions => { :id => ids },
193
+ :order => "FIELD(id,#{ids.join(',')})"
194
+
195
+ docs = posts.map(&:body)
196
+ excerpts = sphinx.build_excerpts(docs, 'index', 'test')
197
+
198
+ == Logging
199
+
200
+ You can ask Sphinx client API to log it's activity to some log. In
201
+ order to do that you can pass a logger object into the <tt>Sphinx::Client</tt>
202
+ constructor:
203
+
204
+ require 'logger'
205
+ Sphinx::Client.new(Logger.new(STDOUT)).query('test')
206
+
207
+ Logger object should respond to methods :debug, :info, and :warn, and
208
+ accept blocks (this is what standard Ruby <tt>Logger</tt> class does).
209
+ Here is what you will see in your log:
210
+
211
+ * <tt>DEBUG</tt> -- <tt>query</tt>, <tt>add_query</tt>, <tt>run_queries</tt>
212
+ method calls with configured filters.
213
+ * <tt>INFO</tt> -- initialization with Sphinx version, servers change,
214
+ attempts to re-connect, and all attempts to do an API call with server
215
+ where request being performed.
216
+ * <tt>WARN</tt> -- various connection and socket errors.
217
+
218
+ == Support
219
+
220
+ Source code:
221
+ http://github.com/kpumuk/sphinx
222
+
223
+ To suggest a feature or report a bug:
224
+ http://github.com/kpumuk/sphinx/issues
225
+
226
+ Project home page:
227
+ http://kpumuk.info/projects/ror-plugins/sphinx
228
+
229
+ == Credits
230
+
231
+ Dmytro Shteflyuk <kpumuk@kpumuk.info> http://kpumuk.info
232
+
233
+ Andrew Aksyonoff http://sphinxsearch.com
234
+
235
+ Special thanks to Alexey Kovyrin <alexey@kovyrin.net> http://blog.kovyrin.net
236
+
237
+ Special thanks to Mike Perham http://www.mikeperham.com for his awesome
238
+ memcache-client gem, where latest Sphinx gem got new sockets handling from.
239
+
240
+ ==License
241
+
242
+ This library is distributed under the terms of the Ruby license.
243
+ You can freely distribute/modify this library.
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ require 'rake'
2
+
3
+ begin
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |gemspec|
6
+ gemspec.name = 'sphinx'
7
+ gemspec.summary = 'Sphinx Client API for Ruby'
8
+ gemspec.description = 'An easy interface to Sphinx standalone full-text search engine. It is implemented as plugin for Ruby on Rails, but can be easily used as standalone library.'
9
+ gemspec.email = 'kpumuk@kpumuk.info'
10
+ gemspec.homepage = 'http://github.com/kpumuk/sphinx'
11
+ gemspec.authors = ['Dmytro Shteflyuk']
12
+ end
13
+ Jeweler::GemcutterTasks.new
14
+ rescue LoadError
15
+ puts 'Jeweler not available. Install it with: sudo gem install jeweler'
16
+ end
17
+
18
+ begin
19
+ require 'spec/rake/spectask'
20
+
21
+ desc 'Default: run specs'
22
+ task :default => :spec
23
+
24
+ desc 'Test the sphinx plugin'
25
+ Spec::Rake::SpecTask.new do |t|
26
+ t.libs << 'lib'
27
+ t.pattern = 'spec/*_spec.rb'
28
+ end
29
+ rescue LoadError
30
+ puts 'RSpec not available. Install it with: sudo gem install rspec'
31
+ end
32
+
33
+ begin
34
+ require 'yard'
35
+ YARD::Rake::YardocTask.new(:yard) do |t|
36
+ t.options = ['--title', 'Sphinx Client API Documentation']
37
+ if ENV['PRIVATE']
38
+ t.options.concat ['--protected', '--private']
39
+ else
40
+ t.options << '--no-private'
41
+ end
42
+ end
43
+ rescue LoadError
44
+ puts 'Yard not available. Install it with: sudo gem install yard'
45
+ end
data/VERSION.yml ADDED
@@ -0,0 +1,5 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 9
4
+ :patch: 9
5
+ :build: 2117
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/lib/sphinx'
@@ -0,0 +1,26 @@
1
+ # A simple wrapper around <tt>Net::BufferedIO</tt> performing
2
+ # non-blocking select.
3
+ #
4
+ # @private
5
+ class Sphinx::BufferedIO < Net::BufferedIO # :nodoc:
6
+ BUFSIZE = 1024 * 16
7
+
8
+ if RUBY_VERSION < '1.9.1'
9
+ def rbuf_fill
10
+ begin
11
+ @rbuf << @io.read_nonblock(BUFSIZE)
12
+ rescue Errno::EWOULDBLOCK
13
+ retry unless @read_timeout
14
+ if IO.select([@io], nil, nil, @read_timeout)
15
+ retry
16
+ else
17
+ raise Timeout::Error, 'IO timeout'
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+ def setsockopt(*args)
24
+ @io.setsockopt(*args)
25
+ end
26
+ end