warp-thinking-sphinx 1.2.12 → 1.3.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (144) hide show
  1. data/README.textile +21 -4
  2. data/VERSION +1 -0
  3. data/features/abstract_inheritance.feature +10 -0
  4. data/features/alternate_primary_key.feature +1 -1
  5. data/features/attribute_updates.feature +22 -5
  6. data/features/deleting_instances.feature +3 -0
  7. data/features/facets.feature +6 -0
  8. data/features/facets_across_model.feature +2 -2
  9. data/features/searching_across_models.feature +1 -1
  10. data/features/searching_by_index.feature +40 -0
  11. data/features/sphinx_scopes.feature +7 -0
  12. data/features/step_definitions/alpha_steps.rb +14 -1
  13. data/features/step_definitions/beta_steps.rb +1 -1
  14. data/features/step_definitions/common_steps.rb +12 -2
  15. data/features/step_definitions/facet_steps.rb +5 -1
  16. data/features/step_definitions/scope_steps.rb +4 -0
  17. data/features/step_definitions/sphinx_steps.rb +8 -4
  18. data/features/sti_searching.feature +5 -0
  19. data/features/support/{db/database.example.yml → database.example.yml} +0 -0
  20. data/features/support/db/fixtures/foxes.rb +3 -0
  21. data/features/support/db/fixtures/music.rb +4 -0
  22. data/features/support/db/fixtures/robots.rb +1 -1
  23. data/features/support/db/fixtures/tags.rb +1 -1
  24. data/features/support/db/migrations/create_alphas.rb +1 -0
  25. data/features/support/db/migrations/create_genres.rb +3 -0
  26. data/features/support/db/migrations/create_music.rb +6 -0
  27. data/features/support/db/migrations/create_robots.rb +1 -2
  28. data/features/support/env.rb +16 -1
  29. data/features/support/models/alpha.rb +12 -0
  30. data/features/support/models/comment.rb +3 -3
  31. data/features/support/models/fox.rb +5 -0
  32. data/features/support/models/genre.rb +3 -0
  33. data/features/support/models/medium.rb +5 -0
  34. data/features/support/models/music.rb +8 -0
  35. data/features/support/models/post.rb +2 -1
  36. data/features/support/models/robot.rb +4 -0
  37. data/lib/cucumber/thinking_sphinx/external_world.rb +8 -0
  38. data/lib/cucumber/thinking_sphinx/internal_world.rb +126 -0
  39. data/lib/cucumber/thinking_sphinx/sql_logger.rb +20 -0
  40. data/lib/thinking_sphinx.rb +56 -37
  41. data/lib/thinking_sphinx/active_record.rb +257 -192
  42. data/lib/thinking_sphinx/active_record/attribute_updates.rb +10 -12
  43. data/lib/thinking_sphinx/active_record/delta.rb +0 -26
  44. data/lib/thinking_sphinx/active_record/scopes.rb +37 -1
  45. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +1 -1
  46. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +18 -11
  47. data/lib/thinking_sphinx/attribute.rb +19 -4
  48. data/lib/thinking_sphinx/auto_version.rb +22 -0
  49. data/lib/thinking_sphinx/configuration.rb +57 -59
  50. data/lib/thinking_sphinx/context.rb +74 -0
  51. data/lib/thinking_sphinx/deltas.rb +0 -2
  52. data/lib/thinking_sphinx/deltas/default_delta.rb +14 -20
  53. data/lib/thinking_sphinx/deploy/capistrano.rb +1 -1
  54. data/lib/thinking_sphinx/facet_search.rb +3 -1
  55. data/lib/thinking_sphinx/index.rb +77 -19
  56. data/lib/thinking_sphinx/index/builder.rb +2 -2
  57. data/lib/thinking_sphinx/search.rb +47 -9
  58. data/lib/thinking_sphinx/search_methods.rb +22 -4
  59. data/lib/thinking_sphinx/source.rb +9 -8
  60. data/lib/thinking_sphinx/source/sql.rb +5 -3
  61. data/lib/thinking_sphinx/tasks.rb +13 -57
  62. data/lib/thinking_sphinx/test.rb +52 -0
  63. data/rails/init.rb +4 -2
  64. data/spec/{lib/thinking_sphinx → thinking_sphinx}/active_record/delta_spec.rb +4 -6
  65. data/spec/{lib/thinking_sphinx → thinking_sphinx}/active_record/has_many_association_spec.rb +0 -0
  66. data/spec/thinking_sphinx/active_record/scopes_spec.rb +177 -0
  67. data/spec/thinking_sphinx/active_record_spec.rb +622 -0
  68. data/spec/{lib/thinking_sphinx → thinking_sphinx}/association_spec.rb +0 -0
  69. data/spec/{lib/thinking_sphinx → thinking_sphinx}/attribute_spec.rb +39 -0
  70. data/spec/thinking_sphinx/auto_version_spec.rb +39 -0
  71. data/spec/{lib/thinking_sphinx → thinking_sphinx}/configuration_spec.rb +27 -61
  72. data/spec/thinking_sphinx/context_spec.rb +119 -0
  73. data/spec/{lib/thinking_sphinx → thinking_sphinx}/core/array_spec.rb +0 -0
  74. data/spec/{lib/thinking_sphinx → thinking_sphinx}/core/string_spec.rb +0 -0
  75. data/spec/{lib/thinking_sphinx → thinking_sphinx}/excerpter_spec.rb +0 -0
  76. data/spec/{lib/thinking_sphinx → thinking_sphinx}/facet_search_spec.rb +0 -0
  77. data/spec/{lib/thinking_sphinx → thinking_sphinx}/facet_spec.rb +0 -0
  78. data/spec/{lib/thinking_sphinx → thinking_sphinx}/field_spec.rb +0 -0
  79. data/spec/{lib/thinking_sphinx → thinking_sphinx}/index/builder_spec.rb +24 -0
  80. data/spec/{lib/thinking_sphinx → thinking_sphinx}/index/faux_column_spec.rb +0 -0
  81. data/spec/thinking_sphinx/index_spec.rb +183 -0
  82. data/spec/{lib/thinking_sphinx → thinking_sphinx}/rails_additions_spec.rb +0 -0
  83. data/spec/{lib/thinking_sphinx → thinking_sphinx}/search_methods_spec.rb +0 -0
  84. data/spec/{lib/thinking_sphinx → thinking_sphinx}/search_spec.rb +41 -0
  85. data/spec/{lib/thinking_sphinx → thinking_sphinx}/source_spec.rb +1 -1
  86. data/spec/thinking_sphinx_spec.rb +204 -0
  87. data/tasks/distribution.rb +6 -20
  88. data/tasks/testing.rb +8 -19
  89. metadata +117 -142
  90. data/VERSION.yml +0 -4
  91. data/features/a.rb +0 -17
  92. data/features/datetime_deltas.feature +0 -66
  93. data/features/delayed_delta_indexing.feature +0 -37
  94. data/features/step_definitions/datetime_delta_steps.rb +0 -15
  95. data/features/step_definitions/delayed_delta_indexing_steps.rb +0 -7
  96. data/features/support/db/active_record.rb +0 -40
  97. data/features/support/db/fixtures/delayed_betas.rb +0 -10
  98. data/features/support/db/fixtures/thetas.rb +0 -10
  99. data/features/support/db/migrations/create_delayed_betas.rb +0 -17
  100. data/features/support/db/migrations/create_thetas.rb +0 -5
  101. data/features/support/db/mysql.rb +0 -3
  102. data/features/support/db/postgresql.rb +0 -3
  103. data/features/support/models/delayed_beta.rb +0 -7
  104. data/features/support/models/theta.rb +0 -7
  105. data/features/support/post_database.rb +0 -43
  106. data/features/support/z.rb +0 -19
  107. data/lib/thinking_sphinx/deltas/datetime_delta.rb +0 -50
  108. data/lib/thinking_sphinx/deltas/delayed_delta.rb +0 -30
  109. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +0 -24
  110. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +0 -27
  111. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +0 -26
  112. data/spec/lib/thinking_sphinx/active_record/scopes_spec.rb +0 -96
  113. data/spec/lib/thinking_sphinx/active_record_spec.rb +0 -353
  114. data/spec/lib/thinking_sphinx/deltas/job_spec.rb +0 -32
  115. data/spec/lib/thinking_sphinx/index_spec.rb +0 -45
  116. data/spec/lib/thinking_sphinx_spec.rb +0 -162
  117. data/vendor/after_commit/LICENSE +0 -20
  118. data/vendor/after_commit/README +0 -16
  119. data/vendor/after_commit/Rakefile +0 -22
  120. data/vendor/after_commit/init.rb +0 -8
  121. data/vendor/after_commit/lib/after_commit.rb +0 -45
  122. data/vendor/after_commit/lib/after_commit/active_record.rb +0 -114
  123. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +0 -103
  124. data/vendor/after_commit/test/after_commit_test.rb +0 -53
  125. data/vendor/delayed_job/lib/delayed/job.rb +0 -251
  126. data/vendor/delayed_job/lib/delayed/message_sending.rb +0 -7
  127. data/vendor/delayed_job/lib/delayed/performable_method.rb +0 -55
  128. data/vendor/delayed_job/lib/delayed/worker.rb +0 -54
  129. data/vendor/riddle/lib/riddle.rb +0 -30
  130. data/vendor/riddle/lib/riddle/client.rb +0 -635
  131. data/vendor/riddle/lib/riddle/client/filter.rb +0 -53
  132. data/vendor/riddle/lib/riddle/client/message.rb +0 -66
  133. data/vendor/riddle/lib/riddle/client/response.rb +0 -84
  134. data/vendor/riddle/lib/riddle/configuration.rb +0 -33
  135. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +0 -48
  136. data/vendor/riddle/lib/riddle/configuration/index.rb +0 -142
  137. data/vendor/riddle/lib/riddle/configuration/indexer.rb +0 -19
  138. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +0 -17
  139. data/vendor/riddle/lib/riddle/configuration/searchd.rb +0 -25
  140. data/vendor/riddle/lib/riddle/configuration/section.rb +0 -43
  141. data/vendor/riddle/lib/riddle/configuration/source.rb +0 -23
  142. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +0 -34
  143. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +0 -28
  144. data/vendor/riddle/lib/riddle/controller.rb +0 -53
@@ -1,54 +0,0 @@
1
- module Delayed
2
- class Worker
3
- SLEEP = 5
4
-
5
- cattr_accessor :logger
6
- self.logger = if defined?(Merb::Logger)
7
- Merb.logger
8
- elsif defined?(RAILS_DEFAULT_LOGGER)
9
- RAILS_DEFAULT_LOGGER
10
- end
11
-
12
- def initialize(options={})
13
- @quiet = options[:quiet]
14
- Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
15
- Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
16
- end
17
-
18
- def start
19
- say "*** Starting job worker #{Delayed::Job.worker_name}"
20
-
21
- trap('TERM') { say 'Exiting...'; $exit = true }
22
- trap('INT') { say 'Exiting...'; $exit = true }
23
-
24
- loop do
25
- result = nil
26
-
27
- realtime = Benchmark.realtime do
28
- result = Delayed::Job.work_off
29
- end
30
-
31
- count = result.sum
32
-
33
- break if $exit
34
-
35
- if count.zero?
36
- sleep(SLEEP)
37
- else
38
- say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
39
- end
40
-
41
- break if $exit
42
- end
43
-
44
- ensure
45
- Delayed::Job.clear_locks!
46
- end
47
-
48
- def say(text)
49
- puts text unless @quiet
50
- logger.info text if logger
51
- end
52
-
53
- end
54
- end
@@ -1,30 +0,0 @@
1
- require 'socket'
2
- require 'timeout'
3
-
4
- require 'riddle/client'
5
- require 'riddle/configuration'
6
- require 'riddle/controller'
7
-
8
- module Riddle #:nodoc:
9
- class ConnectionError < StandardError #:nodoc:
10
- end
11
-
12
- module Version #:nodoc:
13
- Major = 0
14
- Minor = 9
15
- Tiny = 8
16
- # Revision number for RubyForge's sake, taken from what Sphinx
17
- # outputs to the command line.
18
- Rev = 1533
19
- # Release number to mark my own fixes, beyond feature parity with
20
- # Sphinx itself.
21
- Release = 10
22
-
23
- String = [Major, Minor, Tiny].join('.')
24
- GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
25
- end
26
-
27
- def self.escape(string)
28
- string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
29
- end
30
- end
@@ -1,635 +0,0 @@
1
- require 'riddle/client/filter'
2
- require 'riddle/client/message'
3
- require 'riddle/client/response'
4
-
5
- module Riddle
6
- class VersionError < StandardError; end
7
- class ResponseError < StandardError; end
8
-
9
- # This class was heavily based on the existing Client API by Dmytro Shteflyuk
10
- # and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
11
- # more Ruby-ish (ie. lowercase and underscored method names). I also have
12
- # used a few helper classes, just to neaten things up.
13
- #
14
- # Feel free to use it wherever. Send bug reports, patches, comments and
15
- # suggestions to pat at freelancing-gods dot com.
16
- #
17
- # Most properties of the client are accessible through attribute accessors,
18
- # and where relevant use symboles instead of the long constants common in
19
- # other clients.
20
- # Some examples:
21
- #
22
- # client.sort_mode = :extended
23
- # client.sort_by = "birthday DESC"
24
- # client.match_mode = :extended
25
- #
26
- # To add a filter, you will need to create a Filter object:
27
- #
28
- # client.filters << Riddle::Client::Filter.new("birthday",
29
- # Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
30
- #
31
- class Client
32
- Commands = {
33
- :search => 0, # SEARCHD_COMMAND_SEARCH
34
- :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
35
- :update => 2, # SEARCHD_COMMAND_UPDATE
36
- :keywords => 3 # SEARCHD_COMMAND_KEYWORDS
37
- }
38
-
39
- Versions = {
40
- :search => 0x113, # VER_COMMAND_SEARCH
41
- :excerpt => 0x100, # VER_COMMAND_EXCERPT
42
- :update => 0x101, # VER_COMMAND_UPDATE
43
- :keywords => 0x100 # VER_COMMAND_KEYWORDS
44
- }
45
-
46
- Statuses = {
47
- :ok => 0, # SEARCHD_OK
48
- :error => 1, # SEARCHD_ERROR
49
- :retry => 2, # SEARCHD_RETRY
50
- :warning => 3 # SEARCHD_WARNING
51
- }
52
-
53
- MatchModes = {
54
- :all => 0, # SPH_MATCH_ALL
55
- :any => 1, # SPH_MATCH_ANY
56
- :phrase => 2, # SPH_MATCH_PHRASE
57
- :boolean => 3, # SPH_MATCH_BOOLEAN
58
- :extended => 4, # SPH_MATCH_EXTENDED
59
- :fullscan => 5, # SPH_MATCH_FULLSCAN
60
- :extended2 => 6 # SPH_MATCH_EXTENDED2
61
- }
62
-
63
- RankModes = {
64
- :proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
65
- :bm25 => 1, # SPH_RANK_BM25
66
- :none => 2, # SPH_RANK_NONE
67
- :wordcount => 3 # SPH_RANK_WORDCOUNT
68
- }
69
-
70
- SortModes = {
71
- :relevance => 0, # SPH_SORT_RELEVANCE
72
- :attr_desc => 1, # SPH_SORT_ATTR_DESC
73
- :attr_asc => 2, # SPH_SORT_ATTR_ASC
74
- :time_segments => 3, # SPH_SORT_TIME_SEGMENTS
75
- :extended => 4, # SPH_SORT_EXTENDED
76
- :expr => 5 # SPH_SORT_EXPR
77
- }
78
-
79
- AttributeTypes = {
80
- :integer => 1, # SPH_ATTR_INTEGER
81
- :timestamp => 2, # SPH_ATTR_TIMESTAMP
82
- :ordinal => 3, # SPH_ATTR_ORDINAL
83
- :bool => 4, # SPH_ATTR_BOOL
84
- :float => 5, # SPH_ATTR_FLOAT
85
- :multi => 0x40000000 # SPH_ATTR_MULTI
86
- }
87
-
88
- GroupFunctions = {
89
- :day => 0, # SPH_GROUPBY_DAY
90
- :week => 1, # SPH_GROUPBY_WEEK
91
- :month => 2, # SPH_GROUPBY_MONTH
92
- :year => 3, # SPH_GROUPBY_YEAR
93
- :attr => 4, # SPH_GROUPBY_ATTR
94
- :attrpair => 5 # SPH_GROUPBY_ATTRPAIR
95
- }
96
-
97
- FilterTypes = {
98
- :values => 0, # SPH_FILTER_VALUES
99
- :range => 1, # SPH_FILTER_RANGE
100
- :float_range => 2 # SPH_FILTER_FLOATRANGE
101
- }
102
-
103
- attr_accessor :server, :port, :offset, :limit, :max_matches,
104
- :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
105
- :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
106
- :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
107
- :max_query_time, :field_weights, :timeout
108
- attr_reader :queue
109
-
110
- # Can instantiate with a specific server and port - otherwise it assumes
111
- # defaults of localhost and 3312 respectively. All other settings can be
112
- # accessed and changed via the attribute accessors.
113
- def initialize(server=nil, port=nil)
114
- @server = server || "localhost"
115
- @port = port || 3312
116
-
117
- reset
118
-
119
- @queue = []
120
- end
121
-
122
- # Reset attributes and settings to defaults.
123
- def reset
124
- # defaults
125
- @offset = 0
126
- @limit = 20
127
- @max_matches = 1000
128
- @match_mode = :all
129
- @sort_mode = :relevance
130
- @sort_by = ''
131
- @weights = []
132
- @id_range = 0..0
133
- @filters = []
134
- @group_by = ''
135
- @group_function = :day
136
- @group_clause = '@group desc'
137
- @group_distinct = ''
138
- @cut_off = 0
139
- @retry_count = 0
140
- @retry_delay = 0
141
- @anchor = {}
142
- # string keys are index names, integer values are weightings
143
- @index_weights = {}
144
- @rank_mode = :proximity_bm25
145
- @max_query_time = 0
146
- # string keys are field names, integer values are weightings
147
- @field_weights = {}
148
- @timeout = 0
149
- end
150
-
151
- # Set the geo-anchor point - with the names of the attributes that contain
152
- # the latitude and longitude (in radians), and the reference position.
153
- # Note that for geocoding to work properly, you must also set
154
- # match_mode to :extended. To sort results by distance, you will
155
- # need to set sort_mode to '@geodist asc' for example. Sphinx
156
- # expects latitude and longitude to be returned from you SQL source
157
- # in radians.
158
- #
159
- # Example:
160
- # client.set_anchor('lat', -0.6591741, 'long', 2.530770)
161
- #
162
- def set_anchor(lat_attr, lat, long_attr, long)
163
- @anchor = {
164
- :latitude_attribute => lat_attr,
165
- :latitude => lat,
166
- :longitude_attribute => long_attr,
167
- :longitude => long
168
- }
169
- end
170
-
171
- # Append a query to the queue. This uses the same parameters as the query
172
- # method.
173
- def append_query(search, index = '*', comments = '')
174
- @queue << query_message(search, index, comments)
175
- end
176
-
177
- # Run all the queries currently in the queue. This will return an array of
178
- # results hashes.
179
- def run
180
- response = Response.new request(:search, @queue)
181
-
182
- results = @queue.collect do
183
- result = {
184
- :matches => [],
185
- :fields => [],
186
- :attributes => {},
187
- :attribute_names => [],
188
- :words => {}
189
- }
190
-
191
- result[:status] = response.next_int
192
- case result[:status]
193
- when Statuses[:warning]
194
- result[:warning] = response.next
195
- when Statuses[:error]
196
- result[:error] = response.next
197
- next result
198
- end
199
-
200
- result[:fields] = response.next_array
201
-
202
- attributes = response.next_int
203
- for i in 0...attributes
204
- attribute_name = response.next
205
- type = response.next_int
206
-
207
- result[:attributes][attribute_name] = type
208
- result[:attribute_names] << attribute_name
209
- end
210
-
211
- matches = response.next_int
212
- is_64_bit = response.next_int
213
- for i in 0...matches
214
- doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
215
- weight = response.next_int
216
-
217
- result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
218
- result[:attribute_names].each do |attr|
219
- result[:matches].last[:attributes][attr] = attribute_from_type(
220
- result[:attributes][attr], response
221
- )
222
- end
223
- end
224
-
225
- result[:total] = response.next_int.to_i || 0
226
- result[:total_found] = response.next_int.to_i || 0
227
- result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
228
-
229
- words = response.next_int
230
- for i in 0...words
231
- word = response.next
232
- docs = response.next_int
233
- hits = response.next_int
234
- result[:words][word] = {:docs => docs, :hits => hits}
235
- end
236
-
237
- result
238
- end
239
-
240
- @queue.clear
241
- results
242
- end
243
-
244
- # Query the Sphinx daemon - defaulting to all indexes, but you can specify
245
- # a specific one if you wish. The search parameter should be a string
246
- # following Sphinx's expectations.
247
- #
248
- # The object returned from this method is a hash with the following keys:
249
- #
250
- # * :matches
251
- # * :fields
252
- # * :attributes
253
- # * :attribute_names
254
- # * :words
255
- # * :total
256
- # * :total_found
257
- # * :time
258
- # * :status
259
- # * :warning (if appropriate)
260
- # * :error (if appropriate)
261
- #
262
- # The key <tt>:matches</tt> returns an array of hashes - the actual search
263
- # results. Each hash has the document id (<tt>:doc</tt>), the result
264
- # weighting (<tt>:weight</tt>), and a hash of the attributes for the
265
- # document (<tt>:attributes</tt>).
266
- #
267
- # The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
268
- # fields and attributes for the documents. The key <tt>:attributes</tt>
269
- # will return a hash of attribute name and type pairs, and <tt>:words</tt>
270
- # returns a hash of hashes representing the words from the search, with the
271
- # number of documents and hits for each, along the lines of:
272
- #
273
- # results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
274
- #
275
- # <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
276
- # number of matches available, the total number of matches (which may be
277
- # greater than the maximum available, depending on the number of matches
278
- # and your sphinx configuration), and the time in milliseconds that the
279
- # query took to run.
280
- #
281
- # <tt>:status</tt> is the error code for the query - and if there was a
282
- # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
283
- # will be described under <tt>:error</tt>.
284
- #
285
- def query(search, index = '*', comments = '')
286
- @queue << query_message(search, index, comments)
287
- self.run.first
288
- end
289
-
290
- # Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
291
- # They may also be abbreviated to fit within a word limit.
292
- #
293
- # As part of the options hash, you will need to
294
- # define:
295
- # * :docs
296
- # * :words
297
- # * :index
298
- #
299
- # Optional settings include:
300
- # * :before_match (defaults to <span class="match">)
301
- # * :after_match (defaults to </span>)
302
- # * :chunk_separator (defaults to ' &#8230; ' - which is an HTML ellipsis)
303
- # * :limit (defaults to 256)
304
- # * :around (defaults to 5)
305
- # * :exact_phrase (defaults to false)
306
- # * :single_passage (defaults to false)
307
- #
308
- # The defaults differ from the official PHP client, as I've opted for
309
- # semantic HTML markup.
310
- #
311
- # Example:
312
- #
313
- # client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
314
- # #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
315
- #
316
- # lorem_lipsum = "Lorem ipsum dolor..."
317
- #
318
- # client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
319
- # #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
320
- # elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua &#8230; . Excepteur
321
- # sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
322
- # laborum. <span class=\"match\">Pat</span> Cash"]
323
- #
324
- # Workflow:
325
- #
326
- # Excerpt creation is completely isolated from searching the index. The nominated index is only used to
327
- # discover encoding and charset information.
328
- #
329
- # Therefore, the workflow goes:
330
- #
331
- # 1. Do the sphinx query.
332
- # 2. Fetch the documents found by sphinx from their repositories.
333
- # 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
334
- #
335
- def excerpts(options = {})
336
- options[:index] ||= '*'
337
- options[:before_match] ||= '<span class="match">'
338
- options[:after_match] ||= '</span>'
339
- options[:chunk_separator] ||= ' &#8230; ' # ellipsis
340
- options[:limit] ||= 256
341
- options[:around] ||= 5
342
- options[:exact_phrase] ||= false
343
- options[:single_passage] ||= false
344
-
345
- response = Response.new request(:excerpt, excerpts_message(options))
346
-
347
- options[:docs].collect { response.next }
348
- end
349
-
350
- # Update attributes - first parameter is the relevant index, second is an
351
- # array of attributes to be updated, and the third is a hash, where the
352
- # keys are the document ids, and the values are arrays with the attribute
353
- # values - in the same order as the second parameter.
354
- #
355
- # Example:
356
- #
357
- # client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
358
- #
359
- def update(index, attributes, values_by_doc)
360
- response = Response.new request(
361
- :update,
362
- update_message(index, attributes, values_by_doc)
363
- )
364
-
365
- response.next_int
366
- end
367
-
368
- # Generates a keyword list for a given query. Each keyword is represented
369
- # by a hash, with keys :tokenised and :normalised. If return_hits is set to
370
- # true it will also report on the number of hits and documents for each
371
- # keyword (see :hits and :docs keys respectively).
372
- def keywords(query, index, return_hits = false)
373
- response = Response.new request(
374
- :keywords,
375
- keywords_message(query, index, return_hits)
376
- )
377
-
378
- (0...response.next_int).collect do
379
- hash = {}
380
- hash[:tokenised] = response.next
381
- hash[:normalised] = response.next
382
-
383
- if return_hits
384
- hash[:docs] = response.next_int
385
- hash[:hits] = response.next_int
386
- end
387
-
388
- hash
389
- end
390
- end
391
-
392
- private
393
-
394
- # Connects to the Sphinx daemon, and yields a socket to use. The socket is
395
- # closed at the end of the block.
396
- def connect(&block)
397
- socket = nil
398
- if @timeout == 0
399
- socket = initialise_connection
400
- else
401
- begin
402
- Timeout.timeout(@timeout) { socket = initialise_connection }
403
- rescue Timeout::Error
404
- raise Riddle::ConnectionError,
405
- "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
406
- end
407
- end
408
-
409
- begin
410
- yield socket
411
- ensure
412
- socket.close
413
- end
414
- end
415
-
416
- def initialise_connection
417
- socket = initialise_socket
418
-
419
- # Checking version
420
- version = socket.recv(4).unpack('N*').first
421
- if version < 1
422
- socket.close
423
- raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
424
- end
425
-
426
- # Send version
427
- socket.send [1].pack('N'), 0
428
-
429
- socket
430
- end
431
-
432
- def initialise_socket
433
- tries = 0
434
- begin
435
- socket = TCPSocket.new @server, @port
436
- rescue Errno::ECONNREFUSED => e
437
- retry if (tries += 1) < 5
438
- raise Riddle::ConnectionError,
439
- "Connection to #{@server} on #{@port} failed. #{e.message}"
440
- end
441
-
442
- socket
443
- end
444
-
445
- # Send a collection of messages, for a command type (eg, search, excerpts,
446
- # update), to the Sphinx daemon.
447
- def request(command, messages)
448
- response = ""
449
- status = -1
450
- version = 0
451
- length = 0
452
- message = Array(messages).join("")
453
- if message.respond_to?(:force_encoding)
454
- message = message.force_encoding('ASCII-8BIT')
455
- end
456
-
457
- connect do |socket|
458
- case command
459
- when :search
460
- # Message length is +4 to account for the following count value for
461
- # the number of messages (well, that's what I'm assuming).
462
- socket.send [
463
- Commands[command], Versions[command],
464
- 4+message.length, messages.length
465
- ].pack("nnNN") + message, 0
466
- else
467
- socket.send [
468
- Commands[command], Versions[command], message.length
469
- ].pack("nnN") + message, 0
470
- end
471
-
472
- header = socket.recv(8)
473
- status, version, length = header.unpack('n2N')
474
-
475
- while response.length < (length || 0)
476
- part = socket.recv(length - response.length)
477
- response << part if part
478
- end
479
- end
480
-
481
- if response.empty? || response.length != length
482
- raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
483
- end
484
-
485
- case status
486
- when Statuses[:ok]
487
- if version < Versions[command]
488
- puts format("searchd command v.%d.%d older than client (v.%d.%d)",
489
- version >> 8, version & 0xff,
490
- Versions[command] >> 8, Versions[command] & 0xff)
491
- end
492
- response
493
- when Statuses[:warning]
494
- length = response[0, 4].unpack('N*').first
495
- puts response[4, length]
496
- response[4 + length, response.length - 4 - length]
497
- when Statuses[:error], Statuses[:retry]
498
- raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
499
- else
500
- raise ResponseError, "Unknown searchd error (status: #{status})"
501
- end
502
- end
503
-
504
- # Generation of the message to send to Sphinx for a search.
505
- def query_message(search, index, comments = '')
506
- message = Message.new
507
-
508
- # Mode, Limits, Sort Mode
509
- message.append_ints @offset, @limit, MatchModes[@match_mode],
510
- RankModes[@rank_mode], SortModes[@sort_mode]
511
- message.append_string @sort_by
512
-
513
- # Query
514
- message.append_string search
515
-
516
- # Weights
517
- message.append_int @weights.length
518
- message.append_ints *@weights
519
-
520
- # Index
521
- message.append_string index
522
-
523
- # ID Range
524
- message.append_int 1
525
- message.append_64bit_ints @id_range.first, @id_range.last
526
-
527
- # Filters
528
- message.append_int @filters.length
529
- @filters.each { |filter| message.append filter.query_message }
530
-
531
- # Grouping
532
- message.append_int GroupFunctions[@group_function]
533
- message.append_string @group_by
534
- message.append_int @max_matches
535
- message.append_string @group_clause
536
- message.append_ints @cut_off, @retry_count, @retry_delay
537
- message.append_string @group_distinct
538
-
539
- # Anchor Point
540
- if @anchor.empty?
541
- message.append_int 0
542
- else
543
- message.append_int 1
544
- message.append_string @anchor[:latitude_attribute]
545
- message.append_string @anchor[:longitude_attribute]
546
- message.append_floats @anchor[:latitude], @anchor[:longitude]
547
- end
548
-
549
- # Per Index Weights
550
- message.append_int @index_weights.length
551
- @index_weights.each do |key,val|
552
- message.append_string key.to_s
553
- message.append_int val
554
- end
555
-
556
- # Max Query Time
557
- message.append_int @max_query_time
558
-
559
- # Per Field Weights
560
- message.append_int @field_weights.length
561
- @field_weights.each do |key,val|
562
- message.append_string key.to_s
563
- message.append_int val
564
- end
565
-
566
- message.append_string comments
567
-
568
- message.to_s
569
- end
570
-
571
- # Generation of the message to send to Sphinx for an excerpts request.
572
- def excerpts_message(options)
573
- message = Message.new
574
-
575
- flags = 1
576
- flags |= 2 if options[:exact_phrase]
577
- flags |= 4 if options[:single_passage]
578
- flags |= 8 if options[:use_boundaries]
579
- flags |= 16 if options[:weight_order]
580
-
581
- message.append [0, flags].pack('N2') # 0 = mode
582
- message.append_string options[:index]
583
- message.append_string options[:words]
584
-
585
- # options
586
- message.append_string options[:before_match]
587
- message.append_string options[:after_match]
588
- message.append_string options[:chunk_separator]
589
- message.append_ints options[:limit], options[:around]
590
-
591
- message.append_array options[:docs]
592
-
593
- message.to_s
594
- end
595
-
596
- # Generation of the message to send to Sphinx to update attributes of a
597
- # document.
598
- def update_message(index, attributes, values_by_doc)
599
- message = Message.new
600
-
601
- message.append_string index
602
- message.append_array attributes
603
-
604
- message.append_int values_by_doc.length
605
- values_by_doc.each do |key,values|
606
- message.append_64bit_int key # document ID
607
- message.append_ints *values # array of new values (integers)
608
- end
609
-
610
- message.to_s
611
- end
612
-
613
- # Generates the simple message to send to the daemon for a keywords request.
614
- def keywords_message(query, index, return_hits)
615
- message = Message.new
616
-
617
- message.append_string query
618
- message.append_string index
619
- message.append_int return_hits ? 1 : 0
620
-
621
- message.to_s
622
- end
623
-
624
- def attribute_from_type(type, response)
625
- type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
626
-
627
- case type
628
- when AttributeTypes[:float]
629
- is_multi ? response.next_float_array : response.next_float
630
- else
631
- is_multi ? response.next_int_array : response.next_int
632
- end
633
- end
634
- end
635
- end