sphinx 0.9.9.2117

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.gitignore +4 -0
  2. data/README.rdoc +243 -0
  3. data/Rakefile +45 -0
  4. data/VERSION.yml +5 -0
  5. data/init.rb +1 -0
  6. data/lib/sphinx/buffered_io.rb +26 -0
  7. data/lib/sphinx/client.rb +2426 -0
  8. data/lib/sphinx/constants.rb +179 -0
  9. data/lib/sphinx/indifferent_access.rb +152 -0
  10. data/lib/sphinx/request.rb +121 -0
  11. data/lib/sphinx/response.rb +71 -0
  12. data/lib/sphinx/server.rb +170 -0
  13. data/lib/sphinx/timeout.rb +31 -0
  14. data/lib/sphinx.rb +51 -0
  15. data/spec/client_response_spec.rb +170 -0
  16. data/spec/client_spec.rb +669 -0
  17. data/spec/client_validations_spec.rb +859 -0
  18. data/spec/fixtures/default_search.php +8 -0
  19. data/spec/fixtures/default_search_index.php +8 -0
  20. data/spec/fixtures/excerpt_custom.php +11 -0
  21. data/spec/fixtures/excerpt_default.php +8 -0
  22. data/spec/fixtures/excerpt_flags.php +12 -0
  23. data/spec/fixtures/field_weights.php +9 -0
  24. data/spec/fixtures/filter.php +9 -0
  25. data/spec/fixtures/filter_exclude.php +9 -0
  26. data/spec/fixtures/filter_float_range.php +9 -0
  27. data/spec/fixtures/filter_float_range_exclude.php +9 -0
  28. data/spec/fixtures/filter_range.php +9 -0
  29. data/spec/fixtures/filter_range_exclude.php +9 -0
  30. data/spec/fixtures/filter_range_int64.php +10 -0
  31. data/spec/fixtures/filter_ranges.php +10 -0
  32. data/spec/fixtures/filters.php +10 -0
  33. data/spec/fixtures/filters_different.php +13 -0
  34. data/spec/fixtures/geo_anchor.php +9 -0
  35. data/spec/fixtures/group_by_attr.php +9 -0
  36. data/spec/fixtures/group_by_attrpair.php +9 -0
  37. data/spec/fixtures/group_by_day.php +9 -0
  38. data/spec/fixtures/group_by_day_sort.php +9 -0
  39. data/spec/fixtures/group_by_month.php +9 -0
  40. data/spec/fixtures/group_by_week.php +9 -0
  41. data/spec/fixtures/group_by_year.php +9 -0
  42. data/spec/fixtures/group_distinct.php +10 -0
  43. data/spec/fixtures/id_range.php +9 -0
  44. data/spec/fixtures/id_range64.php +9 -0
  45. data/spec/fixtures/index_weights.php +9 -0
  46. data/spec/fixtures/keywords.php +8 -0
  47. data/spec/fixtures/limits.php +9 -0
  48. data/spec/fixtures/limits_cutoff.php +9 -0
  49. data/spec/fixtures/limits_max.php +9 -0
  50. data/spec/fixtures/limits_max_cutoff.php +9 -0
  51. data/spec/fixtures/match_all.php +9 -0
  52. data/spec/fixtures/match_any.php +9 -0
  53. data/spec/fixtures/match_boolean.php +9 -0
  54. data/spec/fixtures/match_extended.php +9 -0
  55. data/spec/fixtures/match_extended2.php +9 -0
  56. data/spec/fixtures/match_fullscan.php +9 -0
  57. data/spec/fixtures/match_phrase.php +9 -0
  58. data/spec/fixtures/max_query_time.php +9 -0
  59. data/spec/fixtures/miltiple_queries.php +12 -0
  60. data/spec/fixtures/ranking_bm25.php +9 -0
  61. data/spec/fixtures/ranking_fieldmask.php +9 -0
  62. data/spec/fixtures/ranking_matchany.php +9 -0
  63. data/spec/fixtures/ranking_none.php +9 -0
  64. data/spec/fixtures/ranking_proximity.php +9 -0
  65. data/spec/fixtures/ranking_proximity_bm25.php +9 -0
  66. data/spec/fixtures/ranking_wordcount.php +9 -0
  67. data/spec/fixtures/retries.php +9 -0
  68. data/spec/fixtures/retries_delay.php +9 -0
  69. data/spec/fixtures/select.php +9 -0
  70. data/spec/fixtures/set_override.php +11 -0
  71. data/spec/fixtures/sort_attr_asc.php +9 -0
  72. data/spec/fixtures/sort_attr_desc.php +9 -0
  73. data/spec/fixtures/sort_expr.php +9 -0
  74. data/spec/fixtures/sort_extended.php +9 -0
  75. data/spec/fixtures/sort_relevance.php +9 -0
  76. data/spec/fixtures/sort_time_segments.php +9 -0
  77. data/spec/fixtures/sphinxapi.php +1633 -0
  78. data/spec/fixtures/update_attributes.php +8 -0
  79. data/spec/fixtures/update_attributes_mva.php +8 -0
  80. data/spec/fixtures/weights.php +9 -0
  81. data/spec/spec_helper.rb +24 -0
  82. data/spec/sphinx/sphinx-id64.conf +67 -0
  83. data/spec/sphinx/sphinx.conf +67 -0
  84. data/spec/sphinx/sphinx_test.sql +88 -0
  85. data/sphinx.gemspec +127 -0
  86. metadata +142 -0
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ rdoc
2
+ doc
3
+ .yardoc
4
+ pkg
data/README.rdoc ADDED
@@ -0,0 +1,243 @@
1
+ = Sphinx Client API
2
+
3
+ This document gives an overview of what is Sphinx itself and how to use it
4
+ from your Ruby on Rails application. For more information or documentation,
5
+ please go to http://www.sphinxsearch.com
6
+
7
+ == Sphinx
8
+
9
+ Sphinx is a standalone full-text search engine, meant to provide fast,
10
+ size-efficient and relevant fulltext search functions to other applications.
11
+ Sphinx was specially designed to integrate well with SQL databases and
12
+ scripting languages. Currently built-in data sources support fetching data
13
+ either via direct connection to MySQL, or from an XML pipe.
14
+
15
+ Simplest way to communicate with Sphinx is to use <tt>searchd</tt> —
16
+ a daemon to search through full text indexes from external software.
17
+
18
+ == Installation
19
+
20
+ There are two options when approaching sphinx plugin installation:
21
+
22
+ * using the gem (recommended)
23
+ * install as a Rails plugin
24
+
25
+ To install as a gem, add this to your environment.rb:
26
+
27
+ config.gem 'sphinx', :source => 'http://gemcutter.org'
28
+
29
+ And then run the command:
30
+
31
+ sudo rake gems:install
32
+
33
+ To install Sphinx as a Rails plugin use this:
34
+
35
+ script/plugin install git://github.com/kpumuk/sphinx.git
36
+
37
+ == Documentation
38
+
39
+ Complete Sphinx plugin documentation could be found here:
40
+ http://kpumuk.github.com/sphinx
41
+
42
+ Also you can find documentation on rdoc.info:
43
+ http://rdoc.info/projects/kpumuk/sphinx
44
+
45
+ You can build the documentation locally by running:
46
+
47
+ rake yard
48
+
49
+ Please note: you should have yard gem installed on your system:
50
+
51
+ sudo gem install yard --source http://gemcutter.org
52
+
53
+ Complete Sphinx API documentation could be found on Sphinx Search Engine
54
+ site: http://www.sphinxsearch.com/docs/current.html
55
+ This plugin is fully compatible with original PHP API implementation.
56
+
57
+ == Ruby naming conventions
58
+
59
+ Sphinx Client API supports Ruby naming conventions, so every API
60
+ method name is in underscored, lowercase form:
61
+
62
+ SetServer -> set_server
63
+ RunQueries -> run_queries
64
+ SetMatchMode -> set_match_mode
65
+
66
+ Every method is aliased to a corresponding one from standard Sphinx
67
+ API, so you can use both <tt>SetServer</tt> and <tt>set_server</tt>
68
+ with no differrence.
69
+
70
+ There are three exceptions to this naming rule:
71
+
72
+ GetLastError -> last_error
73
+ GetLastWarning -> last_warning
74
+ IsConnectError -> connect_error?
75
+
76
+ Of course, all of them are aliased to the original method names.
77
+
78
+ == Using multiple Sphinx servers
79
+
80
+ Since we actively use this plugin in our Scribd development workflow,
81
+ there are several methods have been added to accommodate our needs.
82
+ You can find documentation on Ruby-specific methods in documentation:
83
+ http://rdoc.info/projects/kpumuk/sphinx
84
+
85
+ First of all, we added support of multiple Sphinx servers to balance
86
+ load between them. Also it means that in case of any problems with one
87
+ of servers, library will try to fetch the results from another one.
88
+ Every consequence request will be executed on the next server in list
89
+ (round-robin technique).
90
+
91
+ sphinx.set_servers([
92
+ { :host => 'browse01.local', :port => 3312 },
93
+ { :host => 'browse02.local', :port => 3312 },
94
+ { :host => 'browse03.local', :port => 3312 }
95
+ ])
96
+
97
+ By default library will try to fetch results from a single server, and
98
+ fail if it does not respond. To setup number of retries being performed,
99
+ you can use second (additional) parameter of the <tt>set_connect_timeout</tt>
100
+ and <tt>set_request_timeout</tt> methods:
101
+
102
+ sphinx.set_connect_timeout(1, 3)
103
+ sphinx.set_request_timeout(1, 3)
104
+
105
+ There is a big difference between these two methods. First will affect
106
+ only on requests experiencing problems with connection (socket error,
107
+ pipe error, etc), second will be used when request is broken somehow
108
+ (temporary searchd error, incomplete reply, etc). The workflow looks like
109
+ this:
110
+
111
+ 1. Increase retries number. If is less or equal to configured value,
112
+ try to connect to the next server. Otherwise, raise an error.
113
+ 2. In case of connection problem go to 1.
114
+ 3. Increase request retries number. If it less or equal to configured
115
+ value, try to perform request. Otherwise, raise an error.
116
+ 4. In case of connection problem go to 1.
117
+ 5. In case of request problem, go to 3.
118
+ 6. Parse and return response.
119
+
120
+ Withdrawals:
121
+
122
+ 1. Request could be performed <tt>connect_retries</tt> * <tt>request_retries</tt>
123
+ times. E.g., it could be tried <tt>request_retries</tt> times on each
124
+ of <tt>connect_retries</tt> servers (when you have 1 server configured,
125
+ but <tt>connect_retries</tt> is 5, library will try to connect to this
126
+ server 5 times).
127
+ 2. Request could be tried to execute on each server <tt>1..request_retries</tt>
128
+ times. In case of connection problem, request will be moved to another
129
+ server immediately.
130
+
131
+ Usually you will set <tt>connect_retries</tt> equal to servers number,
132
+ so you will be sure each failing request will be performed on all servers.
133
+ This means that if one of servers is live, but others are dead, you request
134
+ will be finally executed successfully.
135
+
136
+ == Sphinx constants
137
+
138
+ Most Sphinx API methods expecting for special constants will be passed.
139
+ For example:
140
+
141
+ sphinx.set_match_mode(Sphinx::SPH_MATCH_ANY)
142
+
143
+ Please note that these constants defined in a <tt>Sphinx</tt>
144
+ module. You can use symbols or strings instead of these awful
145
+ constants:
146
+
147
+ sphinx.set_match_mode(:any)
148
+ sphinx.set_match_mode('any')
149
+
150
+ == Setting query filters
151
+
152
+ Every <tt>set_</tt> method returns <tt>Sphinx::Client</tt> object itself.
153
+ It means that you can chain filtering methods:
154
+
155
+ results = Sphinx::Client.new.
156
+ set_match_mode(:any).
157
+ set_ranking_mode(:bm25).
158
+ set_id_range(10, 1000).
159
+ query('test')
160
+
161
+ There is a handful ability to set query parameters directly in <tt>query</tt>
162
+ call. If block does not accept any parameters, it will be eval'ed inside
163
+ Sphinx::Client instance:
164
+
165
+ results = Sphinx::Client.new.query('test') do
166
+ match_mode :any
167
+ ranking_mode :bm25
168
+ id_range 10, 1000
169
+ end
170
+
171
+ As you can see, in this case you can omit the <tt>set_</tt> prefix for
172
+ this methods. If block accepts a parameter, sphinx instance will be
173
+ passed into the block. In this case you should you full method names
174
+ including the <tt>set_</tt> prefix:
175
+
176
+ results = Sphinx::Client.new.query('test') do |sphinx|
177
+ sphinx.set_match_mode :any
178
+ sphinx.set_ranking_mode :bm25
179
+ sphinx.set_id_range 10, 1000
180
+ end
181
+
182
+ == Example
183
+
184
+ This simple example illustrates base connection establishing,
185
+ search results retrieving, and excerpts building. Please note
186
+ how does it perform database select using ActiveRecord to
187
+ save the order of records established by Sphinx.
188
+
189
+ sphinx = Sphinx::Client.new
190
+ result = sphinx.query('test')
191
+ ids = result['matches'].map { |match| match['id'] }
192
+ posts = Post.all :conditions => { :id => ids },
193
+ :order => "FIELD(id,#{ids.join(',')})"
194
+
195
+ docs = posts.map(&:body)
196
+ excerpts = sphinx.build_excerpts(docs, 'index', 'test')
197
+
198
+ == Logging
199
+
200
+ You can ask Sphinx client API to log it's activity to some log. In
201
+ order to do that you can pass a logger object into the <tt>Sphinx::Client</tt>
202
+ constructor:
203
+
204
+ require 'logger'
205
+ Sphinx::Client.new(Logger.new(STDOUT)).query('test')
206
+
207
+ Logger object should respond to methods :debug, :info, and :warn, and
208
+ accept blocks (this is what standard Ruby <tt>Logger</tt> class does).
209
+ Here is what you will see in your log:
210
+
211
+ * <tt>DEBUG</tt> -- <tt>query</tt>, <tt>add_query</tt>, <tt>run_queries</tt>
212
+ method calls with configured filters.
213
+ * <tt>INFO</tt> -- initialization with Sphinx version, servers change,
214
+ attempts to re-connect, and all attempts to do an API call with server
215
+ where request being performed.
216
+ * <tt>WARN</tt> -- various connection and socket errors.
217
+
218
+ == Support
219
+
220
+ Source code:
221
+ http://github.com/kpumuk/sphinx
222
+
223
+ To suggest a feature or report a bug:
224
+ http://github.com/kpumuk/sphinx/issues
225
+
226
+ Project home page:
227
+ http://kpumuk.info/projects/ror-plugins/sphinx
228
+
229
+ == Credits
230
+
231
+ Dmytro Shteflyuk <kpumuk@kpumuk.info> http://kpumuk.info
232
+
233
+ Andrew Aksyonoff http://sphinxsearch.com
234
+
235
+ Special thanks to Alexey Kovyrin <alexey@kovyrin.net> http://blog.kovyrin.net
236
+
237
+ Special thanks to Mike Perham http://www.mikeperham.com for his awesome
238
+ memcache-client gem, where latest Sphinx gem got new sockets handling from.
239
+
240
+ ==License
241
+
242
+ This library is distributed under the terms of the Ruby license.
243
+ You can freely distribute/modify this library.
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ require 'rake'
2
+
3
+ begin
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |gemspec|
6
+ gemspec.name = 'sphinx'
7
+ gemspec.summary = 'Sphinx Client API for Ruby'
8
+ gemspec.description = 'An easy interface to Sphinx standalone full-text search engine. It is implemented as plugin for Ruby on Rails, but can be easily used as standalone library.'
9
+ gemspec.email = 'kpumuk@kpumuk.info'
10
+ gemspec.homepage = 'http://github.com/kpumuk/sphinx'
11
+ gemspec.authors = ['Dmytro Shteflyuk']
12
+ end
13
+ Jeweler::GemcutterTasks.new
14
+ rescue LoadError
15
+ puts 'Jeweler not available. Install it with: sudo gem install jeweler'
16
+ end
17
+
18
+ begin
19
+ require 'spec/rake/spectask'
20
+
21
+ desc 'Default: run specs'
22
+ task :default => :spec
23
+
24
+ desc 'Test the sphinx plugin'
25
+ Spec::Rake::SpecTask.new do |t|
26
+ t.libs << 'lib'
27
+ t.pattern = 'spec/*_spec.rb'
28
+ end
29
+ rescue LoadError
30
+ puts 'RSpec not available. Install it with: sudo gem install rspec'
31
+ end
32
+
33
+ begin
34
+ require 'yard'
35
+ YARD::Rake::YardocTask.new(:yard) do |t|
36
+ t.options = ['--title', 'Sphinx Client API Documentation']
37
+ if ENV['PRIVATE']
38
+ t.options.concat ['--protected', '--private']
39
+ else
40
+ t.options << '--no-private'
41
+ end
42
+ end
43
+ rescue LoadError
44
+ puts 'Yard not available. Install it with: sudo gem install yard'
45
+ end
data/VERSION.yml ADDED
@@ -0,0 +1,5 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 9
4
+ :patch: 9
5
+ :build: 2117
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/lib/sphinx'
@@ -0,0 +1,26 @@
1
+ # A simple wrapper around <tt>Net::BufferedIO</tt> performing
2
+ # non-blocking select.
3
+ #
4
+ # @private
5
+ class Sphinx::BufferedIO < Net::BufferedIO # :nodoc:
6
+ BUFSIZE = 1024 * 16
7
+
8
+ if RUBY_VERSION < '1.9.1'
9
+ def rbuf_fill
10
+ begin
11
+ @rbuf << @io.read_nonblock(BUFSIZE)
12
+ rescue Errno::EWOULDBLOCK
13
+ retry unless @read_timeout
14
+ if IO.select([@io], nil, nil, @read_timeout)
15
+ retry
16
+ else
17
+ raise Timeout::Error, 'IO timeout'
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+ def setsockopt(*args)
24
+ @io.setsockopt(*args)
25
+ end
26
+ end