freelancing-god-thinking-sphinx 0.9.8 → 0.9.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/README +20 -1
  2. data/lib/thinking_sphinx.rb +30 -2
  3. data/lib/thinking_sphinx/active_record.rb +25 -11
  4. data/lib/thinking_sphinx/active_record/delta.rb +46 -53
  5. data/lib/thinking_sphinx/active_record/has_many_association.rb +1 -1
  6. data/lib/thinking_sphinx/active_record/search.rb +8 -1
  7. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +27 -0
  8. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +9 -0
  9. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +84 -0
  10. data/lib/thinking_sphinx/association.rb +4 -0
  11. data/lib/thinking_sphinx/attribute.rb +4 -2
  12. data/lib/thinking_sphinx/collection.rb +105 -0
  13. data/lib/thinking_sphinx/configuration.rb +112 -75
  14. data/lib/thinking_sphinx/field.rb +11 -3
  15. data/lib/thinking_sphinx/index.rb +119 -26
  16. data/lib/thinking_sphinx/index/builder.rb +30 -22
  17. data/lib/thinking_sphinx/index/faux_column.rb +13 -0
  18. data/lib/thinking_sphinx/rails_additions.rb +13 -1
  19. data/lib/thinking_sphinx/search.rb +40 -81
  20. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +73 -127
  21. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +2 -2
  22. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +26 -0
  23. data/spec/unit/thinking_sphinx/active_record_spec.rb +94 -22
  24. data/spec/unit/thinking_sphinx/attribute_spec.rb +8 -4
  25. data/spec/unit/thinking_sphinx/collection_spec.rb +71 -0
  26. data/spec/unit/thinking_sphinx/configuration_spec.rb +149 -113
  27. data/spec/unit/thinking_sphinx/field_spec.rb +13 -4
  28. data/spec/unit/thinking_sphinx/index/builder_spec.rb +1 -0
  29. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +27 -0
  30. data/spec/unit/thinking_sphinx/index_spec.rb +79 -29
  31. data/spec/unit/thinking_sphinx/search_spec.rb +114 -74
  32. data/spec/unit/thinking_sphinx_spec.rb +21 -0
  33. data/tasks/thinking_sphinx_tasks.rb +24 -10
  34. metadata +21 -8
  35. data/lib/riddle.rb +0 -26
  36. data/lib/riddle/client.rb +0 -639
  37. data/lib/riddle/client/filter.rb +0 -44
  38. data/lib/riddle/client/message.rb +0 -65
  39. data/lib/riddle/client/response.rb +0 -84
  40. data/lib/test.rb +0 -46
@@ -40,6 +40,23 @@ describe ThinkingSphinx do
40
40
  ThinkingSphinx.deltas_enabled?.should be_true
41
41
  end
42
42
 
43
+ it "should update indexes by default" do
44
+ ThinkingSphinx.updates_enabled = nil
45
+ ThinkingSphinx.updates_enabled?.should be_true
46
+ end
47
+
48
+ it "should disable index updating" do
49
+ ThinkingSphinx.updates_enabled = false
50
+ ThinkingSphinx.updates_enabled?.should be_false
51
+ end
52
+
53
+ it "should enable index updating" do
54
+ ThinkingSphinx.updates_enabled = false
55
+ ThinkingSphinx.updates_enabled?.should be_false
56
+ ThinkingSphinx.updates_enabled = true
57
+ ThinkingSphinx.updates_enabled?.should be_true
58
+ end
59
+
43
60
  describe "use_group_by_shortcut? method" do
44
61
  after :each do
45
62
  ::ActiveRecord::Base.connection.unstub_method(:select_all)
@@ -82,6 +99,10 @@ describe ThinkingSphinx do
82
99
 
83
100
  describe "if not using MySQL" do
84
101
  before :each do
102
+ unless ::ActiveRecord::ConnectionAdapters.const_defined?(:PostgreSQLAdapter)
103
+ pending "No PostgreSQL"
104
+ return
105
+ end
85
106
  @connection = ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.stub_instance(
86
107
  :select_all => true
87
108
  )
@@ -6,16 +6,22 @@ namespace :thinking_sphinx do
6
6
  Rake::Task[:merb_env].invoke if defined?(Merb)
7
7
  end
8
8
 
9
+ desc "Stop if running, then start a Sphinx searchd daemon using Thinking Sphinx's settings"
10
+ task :running_start => :app_env do
11
+ Rake::Task["thinking_sphinx:stop"].invoke if sphinx_running?
12
+ Rake::Task["thinking_sphinx:start"].invoke
13
+ end
14
+
9
15
  desc "Start a Sphinx searchd daemon using Thinking Sphinx's settings"
10
16
  task :start => :app_env do
11
- config = ThinkingSphinx::Configuration.new
17
+ config = ThinkingSphinx::Configuration.instance
12
18
 
13
19
  FileUtils.mkdir_p config.searchd_file_path
14
20
  raise RuntimeError, "searchd is already running." if sphinx_running?
15
21
 
16
22
  Dir["#{config.searchd_file_path}/*.spl"].each { |file| File.delete(file) }
17
-
18
- cmd = "searchd --config #{config.config_file}"
23
+
24
+ cmd = "#{config.bin_path}searchd --config #{config.config_file}"
19
25
  puts cmd
20
26
  system cmd
21
27
 
@@ -41,15 +47,21 @@ namespace :thinking_sphinx do
41
47
 
42
48
  desc "Generate the Sphinx configuration file using Thinking Sphinx's settings"
43
49
  task :configure => :app_env do
44
- ThinkingSphinx::Configuration.new.build
50
+ config = ThinkingSphinx::Configuration.instance
51
+ puts "Generating Configuration to #{config.config_file}"
52
+ config.build
45
53
  end
46
54
 
47
55
  desc "Index data for Sphinx using Thinking Sphinx's settings"
48
- task :index => [:app_env, :configure] do
49
- config = ThinkingSphinx::Configuration.new
50
-
56
+ task :index => :app_env do
57
+ config = ThinkingSphinx::Configuration.instance
58
+ unless ENV["INDEX_ONLY"] == "true"
59
+ puts "Generating Configuration to #{config.config_file}"
60
+ config.build
61
+ end
62
+
51
63
  FileUtils.mkdir_p config.searchd_file_path
52
- cmd = "indexer --config #{config.config_file} --all"
64
+ cmd = "#{config.bin_path}indexer --config #{config.config_file} --all"
53
65
  cmd << " --rotate" if sphinx_running?
54
66
  puts cmd
55
67
  system cmd
@@ -57,6 +69,8 @@ namespace :thinking_sphinx do
57
69
  end
58
70
 
59
71
  namespace :ts do
72
+ desc "Stop if running, then start a Sphinx searchd daemon using Thinking Sphinx's settings"
73
+ task :run => "thinking_sphinx:running_start"
60
74
  desc "Start a Sphinx searchd daemon using Thinking Sphinx's settings"
61
75
  task :start => "thinking_sphinx:start"
62
76
  desc "Stop Sphinx using Thinking Sphinx's settings"
@@ -72,7 +86,7 @@ namespace :ts do
72
86
  end
73
87
 
74
88
  def sphinx_pid
75
- config = ThinkingSphinx::Configuration.new
89
+ config = ThinkingSphinx::Configuration.instance
76
90
 
77
91
  if File.exists?(config.pid_file)
78
92
  `cat #{config.pid_file}`[/\d+/]
@@ -83,4 +97,4 @@ end
83
97
 
84
98
  def sphinx_running?
85
99
  sphinx_pid && `ps -p #{sphinx_pid} | wc -l`.to_i > 1
86
- end
100
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: freelancing-god-thinking-sphinx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.8
4
+ version: 0.9.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pat Allan
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-07-08 00:00:00 -07:00
12
+ date: 2008-10-18 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -22,18 +22,16 @@ extensions: []
22
22
  extra_rdoc_files: []
23
23
 
24
24
  files:
25
- - lib/riddle/client/filter.rb
26
- - lib/riddle/client/message.rb
27
- - lib/riddle/client/response.rb
28
- - lib/riddle/client.rb
29
- - lib/riddle.rb
30
- - lib/test.rb
31
25
  - lib/thinking_sphinx/active_record/delta.rb
32
26
  - lib/thinking_sphinx/active_record/has_many_association.rb
33
27
  - lib/thinking_sphinx/active_record/search.rb
34
28
  - lib/thinking_sphinx/active_record.rb
29
+ - lib/thinking_sphinx/adapters/abstract_adapter.rb
30
+ - lib/thinking_sphinx/adapters/mysql_adapter.rb
31
+ - lib/thinking_sphinx/adapters/postgresql_adapter.rb
35
32
  - lib/thinking_sphinx/association.rb
36
33
  - lib/thinking_sphinx/attribute.rb
34
+ - lib/thinking_sphinx/collection.rb
37
35
  - lib/thinking_sphinx/configuration.rb
38
36
  - lib/thinking_sphinx/field.rb
39
37
  - lib/thinking_sphinx/index/builder.rb
@@ -46,6 +44,20 @@ files:
46
44
  - README
47
45
  - tasks/thinking_sphinx_tasks.rb
48
46
  - tasks/thinking_sphinx_tasks.rake
47
+ - spec/unit/thinking_sphinx/active_record/delta_spec.rb
48
+ - spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb
49
+ - spec/unit/thinking_sphinx/active_record/search_spec.rb
50
+ - spec/unit/thinking_sphinx/active_record_spec.rb
51
+ - spec/unit/thinking_sphinx/association_spec.rb
52
+ - spec/unit/thinking_sphinx/attribute_spec.rb
53
+ - spec/unit/thinking_sphinx/collection_spec.rb
54
+ - spec/unit/thinking_sphinx/configuration_spec.rb
55
+ - spec/unit/thinking_sphinx/field_spec.rb
56
+ - spec/unit/thinking_sphinx/index/builder_spec.rb
57
+ - spec/unit/thinking_sphinx/index/faux_column_spec.rb
58
+ - spec/unit/thinking_sphinx/index_spec.rb
59
+ - spec/unit/thinking_sphinx/search_spec.rb
60
+ - spec/unit/thinking_sphinx_spec.rb
49
61
  has_rdoc: true
50
62
  homepage: http://ts.freelancing-gods.com
51
63
  post_install_message:
@@ -81,6 +93,7 @@ test_files:
81
93
  - spec/unit/thinking_sphinx/active_record_spec.rb
82
94
  - spec/unit/thinking_sphinx/association_spec.rb
83
95
  - spec/unit/thinking_sphinx/attribute_spec.rb
96
+ - spec/unit/thinking_sphinx/collection_spec.rb
84
97
  - spec/unit/thinking_sphinx/configuration_spec.rb
85
98
  - spec/unit/thinking_sphinx/field_spec.rb
86
99
  - spec/unit/thinking_sphinx/index/builder_spec.rb
data/lib/riddle.rb DELETED
@@ -1,26 +0,0 @@
1
- require 'socket'
2
- require 'timeout'
3
- require 'riddle/client'
4
- require 'riddle/client/filter'
5
- require 'riddle/client/message'
6
- require 'riddle/client/response'
7
-
8
- module Riddle #:nodoc:
9
- class ConnectionError < StandardError #:nodoc:
10
- end
11
-
12
- module Version #:nodoc:
13
- Major = 0
14
- Minor = 9
15
- Tiny = 8
16
- # Revision number for RubyForge's sake, taken from what Sphinx
17
- # outputs to the command line.
18
- Rev = 1371
19
- # Release number to mark my own fixes, beyond feature parity with
20
- # Sphinx itself.
21
- Release = 0
22
-
23
- String = [Major, Minor, Tiny].join('.')
24
- GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
25
- end
26
- end
data/lib/riddle/client.rb DELETED
@@ -1,639 +0,0 @@
1
- module Riddle
2
- class VersionError < StandardError; end
3
- class ResponseError < StandardError; end
4
-
5
- # This class was heavily based on the existing Client API by Dmytro Shteflyuk
6
- # and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
7
- # more Ruby-ish (ie. lowercase and underscored method names). I also have
8
- # used a few helper classes, just to neaten things up.
9
- #
10
- # Feel free to use it wherever. Send bug reports, patches, comments and
11
- # suggestions to pat at freelancing-gods dot com.
12
- #
13
- # Most properties of the client are accessible through attribute accessors,
14
- # and where relevant use symboles instead of the long constants common in
15
- # other clients.
16
- # Some examples:
17
- #
18
- # client.sort_mode = :extended
19
- # client.sort_by = "birthday DESC"
20
- # client.match_mode = :extended
21
- #
22
- # To add a filter, you will need to create a Filter object:
23
- #
24
- # client.filters << Riddle::Client::Filter.new("birthday",
25
- # Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
26
- #
27
- class Client
28
- Commands = {
29
- :search => 0, # SEARCHD_COMMAND_SEARCH
30
- :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
31
- :update => 2, # SEARCHD_COMMAND_UPDATE
32
- :keywords => 3 # SEARCHD_COMMAND_KEYWORDS
33
- }
34
-
35
- Versions = {
36
- :search => 0x113, # VER_COMMAND_SEARCH
37
- :excerpt => 0x100, # VER_COMMAND_EXCERPT
38
- :update => 0x101, # VER_COMMAND_UPDATE
39
- :keywords => 0x100 # VER_COMMAND_KEYWORDS
40
- }
41
-
42
- Statuses = {
43
- :ok => 0, # SEARCHD_OK
44
- :error => 1, # SEARCHD_ERROR
45
- :retry => 2, # SEARCHD_RETRY
46
- :warning => 3 # SEARCHD_WARNING
47
- }
48
-
49
- MatchModes = {
50
- :all => 0, # SPH_MATCH_ALL
51
- :any => 1, # SPH_MATCH_ANY
52
- :phrase => 2, # SPH_MATCH_PHRASE
53
- :boolean => 3, # SPH_MATCH_BOOLEAN
54
- :extended => 4, # SPH_MATCH_EXTENDED
55
- :fullscan => 5, # SPH_MATCH_FULLSCAN
56
- :extended2 => 6 # SPH_MATCH_EXTENDED2
57
- }
58
-
59
- RankModes = {
60
- :proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
61
- :bm25 => 1, # SPH_RANK_BM25
62
- :none => 2, # SPH_RANK_NONE
63
- :wordcount => 3 # SPH_RANK_WORDCOUNT
64
- }
65
-
66
- SortModes = {
67
- :relevance => 0, # SPH_SORT_RELEVANCE
68
- :attr_desc => 1, # SPH_SORT_ATTR_DESC
69
- :attr_asc => 2, # SPH_SORT_ATTR_ASC
70
- :time_segments => 3, # SPH_SORT_TIME_SEGMENTS
71
- :extended => 4, # SPH_SORT_EXTENDED
72
- :expr => 5 # SPH_SORT_EXPR
73
- }
74
-
75
- AttributeTypes = {
76
- :integer => 1, # SPH_ATTR_INTEGER
77
- :timestamp => 2, # SPH_ATTR_TIMESTAMP
78
- :ordinal => 3, # SPH_ATTR_ORDINAL
79
- :bool => 4, # SPH_ATTR_BOOL
80
- :float => 5, # SPH_ATTR_FLOAT
81
- :multi => 0x40000000 # SPH_ATTR_MULTI
82
- }
83
-
84
- GroupFunctions = {
85
- :day => 0, # SPH_GROUPBY_DAY
86
- :week => 1, # SPH_GROUPBY_WEEK
87
- :month => 2, # SPH_GROUPBY_MONTH
88
- :year => 3, # SPH_GROUPBY_YEAR
89
- :attr => 4, # SPH_GROUPBY_ATTR
90
- :attrpair => 5 # SPH_GROUPBY_ATTRPAIR
91
- }
92
-
93
- FilterTypes = {
94
- :values => 0, # SPH_FILTER_VALUES
95
- :range => 1, # SPH_FILTER_RANGE
96
- :float_range => 2 # SPH_FILTER_FLOATRANGE
97
- }
98
-
99
- attr_accessor :server, :port, :offset, :limit, :max_matches,
100
- :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
101
- :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
102
- :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
103
- :max_query_time, :field_weights, :timeout
104
- attr_reader :queue
105
-
106
- # Can instantiate with a specific server and port - otherwise it assumes
107
- # defaults of localhost and 3312 respectively. All other settings can be
108
- # accessed and changed via the attribute accessors.
109
- def initialize(server=nil, port=nil)
110
- @server = server || "localhost"
111
- @port = port || 3312
112
-
113
- # defaults
114
- @offset = 0
115
- @limit = 20
116
- @max_matches = 1000
117
- @match_mode = :all
118
- @sort_mode = :relevance
119
- @sort_by = ''
120
- @weights = []
121
- @id_range = 0..0
122
- @filters = []
123
- @group_by = ''
124
- @group_function = :day
125
- @group_clause = '@group desc'
126
- @group_distinct = ''
127
- @cut_off = 0
128
- @retry_count = 0
129
- @retry_delay = 0
130
- @anchor = {}
131
- # string keys are index names, integer values are weightings
132
- @index_weights = {}
133
- @rank_mode = :proximity_bm25
134
- @max_query_time = 0
135
- # string keys are field names, integer values are weightings
136
- @field_weights = {}
137
- @timeout = 0
138
-
139
- @queue = []
140
- end
141
-
142
- # Reset attributes and settings to defaults.
143
- def reset
144
- # defaults
145
- @offset = 0
146
- @limit = 20
147
- @max_matches = 1000
148
- @match_mode = :all
149
- @sort_mode = :relevance
150
- @sort_by = ''
151
- @weights = []
152
- @id_range = 0..0
153
- @filters = []
154
- @group_by = ''
155
- @group_function = :day
156
- @group_clause = '@group desc'
157
- @group_distinct = ''
158
- @cut_off = 0
159
- @retry_count = 0
160
- @retry_delay = 0
161
- @anchor = {}
162
- # string keys are index names, integer values are weightings
163
- @index_weights = {}
164
- @rank_mode = :proximity_bm25
165
- @max_query_time = 0
166
- # string keys are field names, integer values are weightings
167
- @field_weights = {}
168
- @timeout = 0
169
- end
170
-
171
- # Set the geo-anchor point - with the names of the attributes that contain
172
- # the latitude and longitude (in radians), and the reference position.
173
- # Note that for geocoding to work properly, you must also set
174
- # match_mode to :extended. To sort results by distance, you will
175
- # need to set sort_mode to '@geodist asc' for example. Sphinx
176
- # expects latitude and longitude to be returned from you SQL source
177
- # in radians.
178
- #
179
- # Example:
180
- # client.set_anchor('lat', -0.6591741, 'long', 2.530770)
181
- #
182
- def set_anchor(lat_attr, lat, long_attr, long)
183
- @anchor = {
184
- :latitude_attribute => lat_attr,
185
- :latitude => lat,
186
- :longitude_attribute => long_attr,
187
- :longitude => long
188
- }
189
- end
190
-
191
- # Append a query to the queue. This uses the same parameters as the query
192
- # method.
193
- def append_query(search, index = '*', comments = '')
194
- @queue << query_message(search, index, comments)
195
- end
196
-
197
- # Run all the queries currently in the queue. This will return an array of
198
- # results hashes.
199
- def run
200
- response = Response.new request(:search, @queue)
201
-
202
- results = @queue.collect do
203
- result = {
204
- :matches => [],
205
- :fields => [],
206
- :attributes => {},
207
- :attribute_names => [],
208
- :words => {}
209
- }
210
-
211
- result[:status] = response.next_int
212
- case result[:status]
213
- when Statuses[:warning]
214
- result[:warning] = response.next
215
- when Statuses[:error]
216
- result[:error] = response.next
217
- next result
218
- end
219
-
220
- result[:fields] = response.next_array
221
-
222
- attributes = response.next_int
223
- for i in 0...attributes
224
- attribute_name = response.next
225
- type = response.next_int
226
-
227
- result[:attributes][attribute_name] = type
228
- result[:attribute_names] << attribute_name
229
- end
230
-
231
- matches = response.next_int
232
- is_64_bit = response.next_int
233
- for i in 0...matches
234
- doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
235
- weight = response.next_int
236
-
237
- result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
238
- result[:attribute_names].each do |attr|
239
- result[:matches].last[:attributes][attr] = attribute_from_type(
240
- result[:attributes][attr], response
241
- )
242
- end
243
- end
244
-
245
- result[:total] = response.next_int.to_i || 0
246
- result[:total_found] = response.next_int.to_i || 0
247
- result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
248
-
249
- words = response.next_int
250
- for i in 0...words
251
- word = response.next
252
- docs = response.next_int
253
- hits = response.next_int
254
- result[:words][word] = {:docs => docs, :hits => hits}
255
- end
256
-
257
- result
258
- end
259
-
260
- @queue.clear
261
- results
262
- end
263
-
264
- # Query the Sphinx daemon - defaulting to all indexes, but you can specify
265
- # a specific one if you wish. The search parameter should be a string
266
- # following Sphinx's expectations.
267
- #
268
- # The object returned from this method is a hash with the following keys:
269
- #
270
- # * :matches
271
- # * :fields
272
- # * :attributes
273
- # * :attribute_names
274
- # * :words
275
- # * :total
276
- # * :total_found
277
- # * :time
278
- # * :status
279
- # * :warning (if appropriate)
280
- # * :error (if appropriate)
281
- #
282
- # The key <tt>:matches</tt> returns an array of hashes - the actual search
283
- # results. Each hash has the document id (<tt>:doc</tt>), the result
284
- # weighting (<tt>:weight</tt>), and a hash of the attributes for the
285
- # document (<tt>:attributes</tt>).
286
- #
287
- # The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
288
- # fields and attributes for the documents. The key <tt>:attributes</tt>
289
- # will return a hash of attribute name and type pairs, and <tt>:words</tt>
290
- # returns a hash of hashes representing the words from the search, with the
291
- # number of documents and hits for each, along the lines of:
292
- #
293
- # results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
294
- #
295
- # <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
296
- # number of matches available, the total number of matches (which may be
297
- # greater than the maximum available, depending on the number of matches
298
- # and your sphinx configuration), and the time in milliseconds that the
299
- # query took to run.
300
- #
301
- # <tt>:status</tt> is the error code for the query - and if there was a
302
- # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
303
- # will be described under <tt>:error</tt>.
304
- #
305
- def query(search, index = '*', comments = '')
306
- @queue << query_message(search, index, comments)
307
- self.run.first
308
- end
309
-
310
- # Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
311
- # They may also be abbreviated to fit within a word limit.
312
- #
313
- # As part of the options hash, you will need to
314
- # define:
315
- # * :docs
316
- # * :words
317
- # * :index
318
- #
319
- # Optional settings include:
320
- # * :before_match (defaults to <span class="match">)
321
- # * :after_match (defaults to </span>)
322
- # * :chunk_separator (defaults to ' &#8230; ' - which is an HTML ellipsis)
323
- # * :limit (defaults to 256)
324
- # * :around (defaults to 5)
325
- # * :exact_phrase (defaults to false)
326
- # * :single_passage (defaults to false)
327
- #
328
- # The defaults differ from the official PHP client, as I've opted for
329
- # semantic HTML markup.
330
- #
331
- # Example:
332
- #
333
- # client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
334
- # #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
335
- #
336
- # lorem_lipsum = "Lorem ipsum dolor..."
337
- #
338
- # client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
339
- # #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
340
- # elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua &#8230; . Excepteur
341
- # sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
342
- # laborum. <span class=\"match\">Pat</span> Cash"]
343
- #
344
- # Workflow:
345
- #
346
- # Excerpt creation is completely isolated from searching the index. The nominated index is only used to
347
- # discover encoding and charset information.
348
- #
349
- # Therefore, the workflow goes:
350
- #
351
- # 1. Do the sphinx query.
352
- # 2. Fetch the documents found by sphinx from their repositories.
353
- # 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
354
- #
355
- def excerpts(options = {})
356
- options[:index] ||= '*'
357
- options[:before_match] ||= '<span class="match">'
358
- options[:after_match] ||= '</span>'
359
- options[:chunk_separator] ||= ' &#8230; ' # ellipsis
360
- options[:limit] ||= 256
361
- options[:around] ||= 5
362
- options[:exact_phrase] ||= false
363
- options[:single_passage] ||= false
364
-
365
- response = Response.new request(:excerpt, excerpts_message(options))
366
-
367
- options[:docs].collect { response.next }
368
- end
369
-
370
- # Update attributes - first parameter is the relevant index, second is an
371
- # array of attributes to be updated, and the third is a hash, where the
372
- # keys are the document ids, and the values are arrays with the attribute
373
- # values - in the same order as the second parameter.
374
- #
375
- # Example:
376
- #
377
- # client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
378
- #
379
- def update(index, attributes, values_by_doc)
380
- response = Response.new request(
381
- :update,
382
- update_message(index, attributes, values_by_doc)
383
- )
384
-
385
- response.next_int
386
- end
387
-
388
- # Generates a keyword list for a given query. Each keyword is represented
389
- # by a hash, with keys :tokenised and :normalised. If return_hits is set to
390
- # true it will also report on the number of hits and documents for each
391
- # keyword (see :hits and :docs keys respectively).
392
- def keywords(query, index, return_hits = false)
393
- response = Response.new request(
394
- :keywords,
395
- keywords_message(query, index, return_hits)
396
- )
397
-
398
- (0...response.next_int).collect do
399
- hash = {}
400
- hash[:tokenised] = response.next
401
- hash[:normalised] = response.next
402
-
403
- if return_hits
404
- hash[:docs] = response.next_int
405
- hash[:hits] = response.next_int
406
- end
407
-
408
- hash
409
- end
410
- end
411
-
412
- private
413
-
414
- # Connects to the Sphinx daemon, and yields a socket to use. The socket is
415
- # closed at the end of the block.
416
- def connect(&block)
417
- socket = nil
418
- if @timeout == 0
419
- socket = initialise_connection
420
- else
421
- begin
422
- Timeout.timeout(@timeout) { socket = initialise_connection }
423
- rescue Timeout::Error
424
- raise Riddle::ConnectionError,
425
- "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
426
- end
427
- end
428
-
429
- begin
430
- yield socket
431
- ensure
432
- socket.close
433
- end
434
- end
435
-
436
- def initialise_connection
437
- socket = TCPSocket.new @server, @port
438
-
439
- # Checking version
440
- version = socket.recv(4).unpack('N*').first
441
- if version < 1
442
- socket.close
443
- raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
444
- end
445
-
446
- # Send version
447
- socket.send [1].pack('N'), 0
448
-
449
- socket
450
- end
451
-
452
- # Send a collection of messages, for a command type (eg, search, excerpts,
453
- # update), to the Sphinx daemon.
454
- def request(command, messages)
455
- response = ""
456
- status = -1
457
- version = 0
458
- length = 0
459
- message = Array(messages).join("")
460
-
461
- connect do |socket|
462
- case command
463
- when :search
464
- # Message length is +4 to account for the following count value for
465
- # the number of messages (well, that's what I'm assuming).
466
- socket.send [
467
- Commands[command], Versions[command],
468
- 4+message.length, messages.length
469
- ].pack("nnNN") + message, 0
470
- else
471
- socket.send [
472
- Commands[command], Versions[command], message.length
473
- ].pack("nnN") + message, 0
474
- end
475
-
476
- header = socket.recv(8)
477
- status, version, length = header.unpack('n2N')
478
-
479
- while response.length < length
480
- part = socket.recv(length - response.length)
481
- response << part if part
482
- end
483
- end
484
-
485
- if response.empty? || response.length != length
486
- raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
487
- end
488
-
489
- case status
490
- when Statuses[:ok]
491
- if version < Versions[command]
492
- puts format("searchd command v.%d.%d older than client (v.%d.%d)",
493
- version >> 8, version & 0xff,
494
- Versions[command] >> 8, Versions[command] & 0xff)
495
- end
496
- response
497
- when Statuses[:warning]
498
- length = response[0, 4].unpack('N*').first
499
- puts response[4, length]
500
- response[4 + length, response.length - 4 - length]
501
- when Statuses[:error], Statuses[:retry]
502
- raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
503
- else
504
- raise ResponseError, "Unknown searchd error (status: #{status})"
505
- end
506
- end
507
-
508
- # Generation of the message to send to Sphinx for a search.
509
- def query_message(search, index, comments = '')
510
- message = Message.new
511
-
512
- # Mode, Limits, Sort Mode
513
- message.append_ints @offset, @limit, MatchModes[@match_mode],
514
- RankModes[@rank_mode], SortModes[@sort_mode]
515
- message.append_string @sort_by
516
-
517
- # Query
518
- message.append_string search
519
-
520
- # Weights
521
- message.append_int @weights.length
522
- message.append_ints *@weights
523
-
524
- # Index
525
- message.append_string index
526
-
527
- # ID Range
528
- message.append_int 1
529
- message.append_64bit_ints @id_range.first, @id_range.last
530
-
531
- # Filters
532
- message.append_int @filters.length
533
- @filters.each { |filter| message.append filter.query_message }
534
-
535
- # Grouping
536
- message.append_int GroupFunctions[@group_function]
537
- message.append_string @group_by
538
- message.append_int @max_matches
539
- message.append_string @group_clause
540
- message.append_ints @cut_off, @retry_count, @retry_delay
541
- message.append_string @group_distinct
542
-
543
- # Anchor Point
544
- if @anchor.empty?
545
- message.append_int 0
546
- else
547
- message.append_int 1
548
- message.append_string @anchor[:latitude_attribute]
549
- message.append_string @anchor[:longitude_attribute]
550
- message.append_floats @anchor[:latitude], @anchor[:longitude]
551
- end
552
-
553
- # Per Index Weights
554
- message.append_int @index_weights.length
555
- @index_weights.each do |key,val|
556
- message.append_string key.to_s
557
- message.append_int val
558
- end
559
-
560
- # Max Query Time
561
- message.append_int @max_query_time
562
-
563
- # Per Field Weights
564
- message.append_int @field_weights.length
565
- @field_weights.each do |key,val|
566
- message.append_string key.to_s
567
- message.append_int val
568
- end
569
-
570
- message.append_string comments
571
-
572
- message.to_s
573
- end
574
-
575
- # Generation of the message to send to Sphinx for an excerpts request.
576
- def excerpts_message(options)
577
- message = Message.new
578
-
579
- flags = 1
580
- flags |= 2 if options[:exact_phrase]
581
- flags |= 4 if options[:single_passage]
582
- flags |= 8 if options[:use_boundaries]
583
- flags |= 16 if options[:weight_order]
584
-
585
- message.append [0, flags].pack('N2') # 0 = mode
586
- message.append_string options[:index]
587
- message.append_string options[:words]
588
-
589
- # options
590
- message.append_string options[:before_match]
591
- message.append_string options[:after_match]
592
- message.append_string options[:chunk_separator]
593
- message.append_ints options[:limit], options[:around]
594
-
595
- message.append_array options[:docs]
596
-
597
- message.to_s
598
- end
599
-
600
- # Generation of the message to send to Sphinx to update attributes of a
601
- # document.
602
- def update_message(index, attributes, values_by_doc)
603
- message = Message.new
604
-
605
- message.append_string index
606
- message.append_array attributes
607
-
608
- message.append_int values_by_doc.length
609
- values_by_doc.each do |key,values|
610
- message.append_64bit_int key # document ID
611
- message.append_ints *values # array of new values (integers)
612
- end
613
-
614
- message.to_s
615
- end
616
-
617
- # Generates the simple message to send to the daemon for a keywords request.
618
- def keywords_message(query, index, return_hits)
619
- message = Message.new
620
-
621
- message.append_string query
622
- message.append_string index
623
- message.append_int return_hits ? 1 : 0
624
-
625
- message.to_s
626
- end
627
-
628
- def attribute_from_type(type, response)
629
- type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
630
-
631
- case type
632
- when AttributeTypes[:float]
633
- is_multi ? response.next_float_array : response.next_float
634
- else
635
- is_multi ? response.next_int_array : response.next_int
636
- end
637
- end
638
- end
639
- end