skalee-thinking-sphinx 1.3.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +201 -0
  3. data/Rakefile +3 -0
  4. data/VERSION +1 -0
  5. data/contribute.rb +385 -0
  6. data/cucumber.yml +1 -0
  7. data/features/abstract_inheritance.feature +10 -0
  8. data/features/alternate_primary_key.feature +27 -0
  9. data/features/attribute_transformation.feature +22 -0
  10. data/features/attribute_updates.feature +51 -0
  11. data/features/deleting_instances.feature +67 -0
  12. data/features/direct_attributes.feature +11 -0
  13. data/features/excerpts.feature +13 -0
  14. data/features/extensible_delta_indexing.feature +9 -0
  15. data/features/facets.feature +82 -0
  16. data/features/facets_across_model.feature +29 -0
  17. data/features/handling_edits.feature +92 -0
  18. data/features/retry_stale_indexes.feature +24 -0
  19. data/features/searching_across_models.feature +20 -0
  20. data/features/searching_by_index.feature +40 -0
  21. data/features/searching_by_model.feature +175 -0
  22. data/features/searching_with_find_arguments.feature +56 -0
  23. data/features/sphinx_detection.feature +25 -0
  24. data/features/sphinx_scopes.feature +42 -0
  25. data/features/step_definitions/alpha_steps.rb +16 -0
  26. data/features/step_definitions/beta_steps.rb +7 -0
  27. data/features/step_definitions/common_steps.rb +188 -0
  28. data/features/step_definitions/extensible_delta_indexing_steps.rb +7 -0
  29. data/features/step_definitions/facet_steps.rb +96 -0
  30. data/features/step_definitions/find_arguments_steps.rb +36 -0
  31. data/features/step_definitions/gamma_steps.rb +15 -0
  32. data/features/step_definitions/scope_steps.rb +15 -0
  33. data/features/step_definitions/search_steps.rb +89 -0
  34. data/features/step_definitions/sphinx_steps.rb +35 -0
  35. data/features/sti_searching.feature +19 -0
  36. data/features/support/database.example.yml +3 -0
  37. data/features/support/db/.gitignore +1 -0
  38. data/features/support/db/fixtures/alphas.rb +10 -0
  39. data/features/support/db/fixtures/authors.rb +1 -0
  40. data/features/support/db/fixtures/betas.rb +10 -0
  41. data/features/support/db/fixtures/boxes.rb +9 -0
  42. data/features/support/db/fixtures/categories.rb +1 -0
  43. data/features/support/db/fixtures/cats.rb +3 -0
  44. data/features/support/db/fixtures/comments.rb +24 -0
  45. data/features/support/db/fixtures/developers.rb +29 -0
  46. data/features/support/db/fixtures/dogs.rb +3 -0
  47. data/features/support/db/fixtures/extensible_betas.rb +10 -0
  48. data/features/support/db/fixtures/foxes.rb +3 -0
  49. data/features/support/db/fixtures/gammas.rb +10 -0
  50. data/features/support/db/fixtures/music.rb +4 -0
  51. data/features/support/db/fixtures/people.rb +1001 -0
  52. data/features/support/db/fixtures/posts.rb +6 -0
  53. data/features/support/db/fixtures/robots.rb +14 -0
  54. data/features/support/db/fixtures/tags.rb +27 -0
  55. data/features/support/db/migrations/create_alphas.rb +8 -0
  56. data/features/support/db/migrations/create_animals.rb +5 -0
  57. data/features/support/db/migrations/create_authors.rb +3 -0
  58. data/features/support/db/migrations/create_authors_posts.rb +6 -0
  59. data/features/support/db/migrations/create_betas.rb +5 -0
  60. data/features/support/db/migrations/create_boxes.rb +5 -0
  61. data/features/support/db/migrations/create_categories.rb +3 -0
  62. data/features/support/db/migrations/create_comments.rb +10 -0
  63. data/features/support/db/migrations/create_developers.rb +9 -0
  64. data/features/support/db/migrations/create_extensible_betas.rb +5 -0
  65. data/features/support/db/migrations/create_gammas.rb +3 -0
  66. data/features/support/db/migrations/create_genres.rb +3 -0
  67. data/features/support/db/migrations/create_music.rb +6 -0
  68. data/features/support/db/migrations/create_people.rb +13 -0
  69. data/features/support/db/migrations/create_posts.rb +5 -0
  70. data/features/support/db/migrations/create_robots.rb +4 -0
  71. data/features/support/db/migrations/create_taggings.rb +5 -0
  72. data/features/support/db/migrations/create_tags.rb +4 -0
  73. data/features/support/env.rb +21 -0
  74. data/features/support/lib/generic_delta_handler.rb +8 -0
  75. data/features/support/models/alpha.rb +22 -0
  76. data/features/support/models/animal.rb +5 -0
  77. data/features/support/models/author.rb +3 -0
  78. data/features/support/models/beta.rb +8 -0
  79. data/features/support/models/box.rb +8 -0
  80. data/features/support/models/cat.rb +3 -0
  81. data/features/support/models/category.rb +4 -0
  82. data/features/support/models/comment.rb +10 -0
  83. data/features/support/models/developer.rb +16 -0
  84. data/features/support/models/dog.rb +3 -0
  85. data/features/support/models/extensible_beta.rb +9 -0
  86. data/features/support/models/fox.rb +5 -0
  87. data/features/support/models/gamma.rb +5 -0
  88. data/features/support/models/genre.rb +3 -0
  89. data/features/support/models/medium.rb +5 -0
  90. data/features/support/models/music.rb +8 -0
  91. data/features/support/models/person.rb +23 -0
  92. data/features/support/models/post.rb +21 -0
  93. data/features/support/models/robot.rb +12 -0
  94. data/features/support/models/tag.rb +3 -0
  95. data/features/support/models/tagging.rb +4 -0
  96. data/ginger_scenarios.rb +28 -0
  97. data/init.rb +5 -0
  98. data/install.rb +5 -0
  99. data/lib/cucumber/thinking_sphinx/external_world.rb +8 -0
  100. data/lib/cucumber/thinking_sphinx/internal_world.rb +126 -0
  101. data/lib/cucumber/thinking_sphinx/sql_logger.rb +20 -0
  102. data/lib/thinking_sphinx/active_record/attribute_updates.rb +19 -0
  103. data/lib/thinking_sphinx/active_record/delta.rb +47 -0
  104. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  105. data/lib/thinking_sphinx/active_record/scopes.rb +75 -0
  106. data/lib/thinking_sphinx/active_record.rb +348 -0
  107. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
  108. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  109. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +143 -0
  110. data/lib/thinking_sphinx/association.rb +164 -0
  111. data/lib/thinking_sphinx/attribute.rb +362 -0
  112. data/lib/thinking_sphinx/auto_version.rb +22 -0
  113. data/lib/thinking_sphinx/class_facet.rb +15 -0
  114. data/lib/thinking_sphinx/configuration.rb +300 -0
  115. data/lib/thinking_sphinx/context.rb +68 -0
  116. data/lib/thinking_sphinx/core/array.rb +7 -0
  117. data/lib/thinking_sphinx/core/string.rb +15 -0
  118. data/lib/thinking_sphinx/deltas/default_delta.rb +62 -0
  119. data/lib/thinking_sphinx/deltas.rb +28 -0
  120. data/lib/thinking_sphinx/deploy/capistrano.rb +100 -0
  121. data/lib/thinking_sphinx/excerpter.rb +22 -0
  122. data/lib/thinking_sphinx/facet.rb +125 -0
  123. data/lib/thinking_sphinx/facet_search.rb +136 -0
  124. data/lib/thinking_sphinx/field.rb +82 -0
  125. data/lib/thinking_sphinx/index/builder.rb +296 -0
  126. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  127. data/lib/thinking_sphinx/index.rb +157 -0
  128. data/lib/thinking_sphinx/property.rb +162 -0
  129. data/lib/thinking_sphinx/rails_additions.rb +150 -0
  130. data/lib/thinking_sphinx/search.rb +769 -0
  131. data/lib/thinking_sphinx/search_methods.rb +439 -0
  132. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  133. data/lib/thinking_sphinx/source/sql.rb +130 -0
  134. data/lib/thinking_sphinx/source.rb +153 -0
  135. data/lib/thinking_sphinx/tasks.rb +131 -0
  136. data/lib/thinking_sphinx/test.rb +52 -0
  137. data/lib/thinking_sphinx.rb +225 -0
  138. data/rails/init.rb +16 -0
  139. data/recipes/thinking_sphinx.rb +3 -0
  140. data/spec/fixtures/data.sql +32 -0
  141. data/spec/fixtures/database.yml.default +3 -0
  142. data/spec/fixtures/models.rb +145 -0
  143. data/spec/fixtures/structure.sql +125 -0
  144. data/spec/spec_helper.rb +60 -0
  145. data/spec/sphinx_helper.rb +81 -0
  146. data/spec/thinking_sphinx/active_record/delta_spec.rb +128 -0
  147. data/spec/thinking_sphinx/active_record/has_many_association_spec.rb +55 -0
  148. data/spec/thinking_sphinx/active_record/scopes_spec.rb +177 -0
  149. data/spec/thinking_sphinx/active_record_spec.rb +622 -0
  150. data/spec/thinking_sphinx/association_spec.rb +239 -0
  151. data/spec/thinking_sphinx/attribute_spec.rb +570 -0
  152. data/spec/thinking_sphinx/auto_version_spec.rb +39 -0
  153. data/spec/thinking_sphinx/configuration_spec.rb +234 -0
  154. data/spec/thinking_sphinx/context_spec.rb +119 -0
  155. data/spec/thinking_sphinx/core/array_spec.rb +9 -0
  156. data/spec/thinking_sphinx/core/string_spec.rb +9 -0
  157. data/spec/thinking_sphinx/excerpter_spec.rb +57 -0
  158. data/spec/thinking_sphinx/facet_search_spec.rb +176 -0
  159. data/spec/thinking_sphinx/facet_spec.rb +333 -0
  160. data/spec/thinking_sphinx/field_spec.rb +154 -0
  161. data/spec/thinking_sphinx/index/builder_spec.rb +479 -0
  162. data/spec/thinking_sphinx/index/faux_column_spec.rb +30 -0
  163. data/spec/thinking_sphinx/index_spec.rb +183 -0
  164. data/spec/thinking_sphinx/rails_additions_spec.rb +203 -0
  165. data/spec/thinking_sphinx/search_methods_spec.rb +152 -0
  166. data/spec/thinking_sphinx/search_spec.rb +1181 -0
  167. data/spec/thinking_sphinx/source_spec.rb +235 -0
  168. data/spec/thinking_sphinx_spec.rb +204 -0
  169. data/tasks/distribution.rb +41 -0
  170. data/tasks/rails.rake +1 -0
  171. data/tasks/testing.rb +72 -0
  172. data/vendor/after_commit/.gitignore +1 -0
  173. data/vendor/after_commit/lib/after_commit/active_record.rb +122 -0
  174. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +168 -0
  175. data/vendor/after_commit/lib/after_commit/test_bypass.rb +30 -0
  176. data/vendor/after_commit/lib/after_commit.rb +70 -0
  177. data/vendor/riddle/lib/riddle/0.9.8.rb +1 -0
  178. data/vendor/riddle/lib/riddle/0.9.9/client/filter.rb +22 -0
  179. data/vendor/riddle/lib/riddle/0.9.9/client.rb +49 -0
  180. data/vendor/riddle/lib/riddle/0.9.9/configuration/searchd.rb +28 -0
  181. data/vendor/riddle/lib/riddle/0.9.9.rb +7 -0
  182. data/vendor/riddle/lib/riddle/auto_version.rb +11 -0
  183. data/vendor/riddle/lib/riddle/client/filter.rb +62 -0
  184. data/vendor/riddle/lib/riddle/client/message.rb +70 -0
  185. data/vendor/riddle/lib/riddle/client/response.rb +94 -0
  186. data/vendor/riddle/lib/riddle/client.rb +745 -0
  187. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +49 -0
  188. data/vendor/riddle/lib/riddle/configuration/index.rb +149 -0
  189. data/vendor/riddle/lib/riddle/configuration/indexer.rb +20 -0
  190. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  191. data/vendor/riddle/lib/riddle/configuration/searchd.rb +28 -0
  192. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  193. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  194. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +53 -0
  195. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +29 -0
  196. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  197. data/vendor/riddle/lib/riddle/controller.rb +78 -0
  198. data/vendor/riddle/lib/riddle.rb +51 -0
  199. metadata +312 -0
@@ -0,0 +1,745 @@
1
+ require 'riddle/client/filter'
2
+ require 'riddle/client/message'
3
+ require 'riddle/client/response'
4
+
5
+ module Riddle
6
+ class VersionError < StandardError; end
7
+ class ResponseError < StandardError; end
8
+
9
+ # This class was heavily based on the existing Client API by Dmytro Shteflyuk
10
+ # and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
11
+ # more Ruby-ish (ie. lowercase and underscored method names). I also have
12
+ # used a few helper classes, just to neaten things up.
13
+ #
14
+ # Feel free to use it wherever. Send bug reports, patches, comments and
15
+ # suggestions to pat at freelancing-gods dot com.
16
+ #
17
+ # Most properties of the client are accessible through attribute accessors,
18
+ # and where relevant use symboles instead of the long constants common in
19
+ # other clients.
20
+ # Some examples:
21
+ #
22
+ # client.sort_mode = :extended
23
+ # client.sort_by = "birthday DESC"
24
+ # client.match_mode = :extended
25
+ #
26
+ # To add a filter, you will need to create a Filter object:
27
+ #
28
+ # client.filters << Riddle::Client::Filter.new("birthday",
29
+ # Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
30
+ #
31
+ class Client
32
+ Commands = {
33
+ :search => 0, # SEARCHD_COMMAND_SEARCH
34
+ :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
35
+ :update => 2, # SEARCHD_COMMAND_UPDATE
36
+ :keywords => 3, # SEARCHD_COMMAND_KEYWORDS
37
+ :persist => 4, # SEARCHD_COMMAND_PERSIST
38
+ :status => 5, # SEARCHD_COMMAND_STATUS
39
+ :query => 6 # SEARCHD_COMMAND_QUERY
40
+ }
41
+
42
+ Versions = {
43
+ :search => 0x113, # VER_COMMAND_SEARCH
44
+ :excerpt => 0x100, # VER_COMMAND_EXCERPT
45
+ :update => 0x101, # VER_COMMAND_UPDATE
46
+ :keywords => 0x100, # VER_COMMAND_KEYWORDS
47
+ :status => 0x100, # VER_COMMAND_STATUS
48
+ :query => 0x100 # VER_COMMAND_QUERY
49
+ }
50
+
51
+ Statuses = {
52
+ :ok => 0, # SEARCHD_OK
53
+ :error => 1, # SEARCHD_ERROR
54
+ :retry => 2, # SEARCHD_RETRY
55
+ :warning => 3 # SEARCHD_WARNING
56
+ }
57
+
58
+ MatchModes = {
59
+ :all => 0, # SPH_MATCH_ALL
60
+ :any => 1, # SPH_MATCH_ANY
61
+ :phrase => 2, # SPH_MATCH_PHRASE
62
+ :boolean => 3, # SPH_MATCH_BOOLEAN
63
+ :extended => 4, # SPH_MATCH_EXTENDED
64
+ :fullscan => 5, # SPH_MATCH_FULLSCAN
65
+ :extended2 => 6 # SPH_MATCH_EXTENDED2
66
+ }
67
+
68
+ RankModes = {
69
+ :proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
70
+ :bm25 => 1, # SPH_RANK_BM25
71
+ :none => 2, # SPH_RANK_NONE
72
+ :wordcount => 3, # SPH_RANK_WORDCOUNT
73
+ :proximity => 4, # SPH_RANK_PROXIMITY
74
+ :match_any => 5, # SPH_RANK_MATCHANY
75
+ :fieldmask => 6 # SPH_RANK_FIELDMASK
76
+ }
77
+
78
+ SortModes = {
79
+ :relevance => 0, # SPH_SORT_RELEVANCE
80
+ :attr_desc => 1, # SPH_SORT_ATTR_DESC
81
+ :attr_asc => 2, # SPH_SORT_ATTR_ASC
82
+ :time_segments => 3, # SPH_SORT_TIME_SEGMENTS
83
+ :extended => 4, # SPH_SORT_EXTENDED
84
+ :expr => 5 # SPH_SORT_EXPR
85
+ }
86
+
87
+ AttributeTypes = {
88
+ :integer => 1, # SPH_ATTR_INTEGER
89
+ :timestamp => 2, # SPH_ATTR_TIMESTAMP
90
+ :ordinal => 3, # SPH_ATTR_ORDINAL
91
+ :bool => 4, # SPH_ATTR_BOOL
92
+ :float => 5, # SPH_ATTR_FLOAT
93
+ :bigint => 6, # SPH_ATTR_BIGINT
94
+ :multi => 0x40000000 # SPH_ATTR_MULTI
95
+ }
96
+
97
+ GroupFunctions = {
98
+ :day => 0, # SPH_GROUPBY_DAY
99
+ :week => 1, # SPH_GROUPBY_WEEK
100
+ :month => 2, # SPH_GROUPBY_MONTH
101
+ :year => 3, # SPH_GROUPBY_YEAR
102
+ :attr => 4, # SPH_GROUPBY_ATTR
103
+ :attrpair => 5 # SPH_GROUPBY_ATTRPAIR
104
+ }
105
+
106
+ FilterTypes = {
107
+ :values => 0, # SPH_FILTER_VALUES
108
+ :range => 1, # SPH_FILTER_RANGE
109
+ :float_range => 2 # SPH_FILTER_FLOATRANGE
110
+ }
111
+
112
+ attr_accessor :server, :port, :offset, :limit, :max_matches,
113
+ :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
114
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
115
+ :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
116
+ :max_query_time, :field_weights, :timeout, :overrides, :select,
117
+ :connection
118
+ attr_reader :queue
119
+
120
+ def self.connection=(value)
121
+ Thread.current[:riddle_connection] = value
122
+ end
123
+
124
+ def self.connection
125
+ Thread.current[:riddle_connection]
126
+ end
127
+
128
+ # Can instantiate with a specific server and port - otherwise it assumes
129
+ # defaults of localhost and 3312 respectively. All other settings can be
130
+ # accessed and changed via the attribute accessors.
131
+ def initialize(server=nil, port=nil)
132
+ Riddle.version_warning
133
+
134
+ @server = server || "localhost"
135
+ @port = port || 9312
136
+ @socket = nil
137
+
138
+ reset
139
+
140
+ @queue = []
141
+ end
142
+
143
+ # Reset attributes and settings to defaults.
144
+ def reset
145
+ # defaults
146
+ @offset = 0
147
+ @limit = 20
148
+ @max_matches = 1000
149
+ @match_mode = :all
150
+ @sort_mode = :relevance
151
+ @sort_by = ''
152
+ @weights = []
153
+ @id_range = 0..0
154
+ @filters = []
155
+ @group_by = ''
156
+ @group_function = :day
157
+ @group_clause = '@group desc'
158
+ @group_distinct = ''
159
+ @cut_off = 0
160
+ @retry_count = 0
161
+ @retry_delay = 0
162
+ @anchor = {}
163
+ # string keys are index names, integer values are weightings
164
+ @index_weights = {}
165
+ @rank_mode = :proximity_bm25
166
+ @max_query_time = 0
167
+ # string keys are field names, integer values are weightings
168
+ @field_weights = {}
169
+ @timeout = 0
170
+ @overrides = {}
171
+ @select = "*"
172
+ end
173
+
174
+ # Set the geo-anchor point - with the names of the attributes that contain
175
+ # the latitude and longitude (in radians), and the reference position.
176
+ # Note that for geocoding to work properly, you must also set
177
+ # match_mode to :extended. To sort results by distance, you will
178
+ # need to set sort_mode to '@geodist asc' for example. Sphinx
179
+ # expects latitude and longitude to be returned from you SQL source
180
+ # in radians.
181
+ #
182
+ # Example:
183
+ # client.set_anchor('lat', -0.6591741, 'long', 2.530770)
184
+ #
185
+ def set_anchor(lat_attr, lat, long_attr, long)
186
+ @anchor = {
187
+ :latitude_attribute => lat_attr,
188
+ :latitude => lat,
189
+ :longitude_attribute => long_attr,
190
+ :longitude => long
191
+ }
192
+ end
193
+
194
+ # Append a query to the queue. This uses the same parameters as the query
195
+ # method.
196
+ def append_query(search, index = '*', comments = '')
197
+ @queue << query_message(search, index, comments)
198
+ end
199
+
200
+ # Run all the queries currently in the queue. This will return an array of
201
+ # results hashes.
202
+ def run
203
+ response = Response.new request(:search, @queue)
204
+
205
+ results = @queue.collect do
206
+ result = {
207
+ :matches => [],
208
+ :fields => [],
209
+ :attributes => {},
210
+ :attribute_names => [],
211
+ :words => {}
212
+ }
213
+
214
+ result[:status] = response.next_int
215
+ case result[:status]
216
+ when Statuses[:warning]
217
+ result[:warning] = response.next
218
+ when Statuses[:error]
219
+ result[:error] = response.next
220
+ next result
221
+ end
222
+
223
+ result[:fields] = response.next_array
224
+
225
+ attributes = response.next_int
226
+ for i in 0...attributes
227
+ attribute_name = response.next
228
+ type = response.next_int
229
+
230
+ result[:attributes][attribute_name] = type
231
+ result[:attribute_names] << attribute_name
232
+ end
233
+
234
+ matches = response.next_int
235
+ is_64_bit = response.next_int
236
+ for i in 0...matches
237
+ doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
238
+ weight = response.next_int
239
+
240
+ result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
241
+ result[:attribute_names].each do |attr|
242
+ result[:matches].last[:attributes][attr] = attribute_from_type(
243
+ result[:attributes][attr], response
244
+ )
245
+ end
246
+ end
247
+
248
+ result[:total] = response.next_int.to_i || 0
249
+ result[:total_found] = response.next_int.to_i || 0
250
+ result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
251
+
252
+ words = response.next_int
253
+ for i in 0...words
254
+ word = response.next
255
+ docs = response.next_int
256
+ hits = response.next_int
257
+ result[:words][word] = {:docs => docs, :hits => hits}
258
+ end
259
+
260
+ result
261
+ end
262
+
263
+ @queue.clear
264
+ results
265
+ end
266
+
267
+ # Query the Sphinx daemon - defaulting to all indexes, but you can specify
268
+ # a specific one if you wish. The search parameter should be a string
269
+ # following Sphinx's expectations.
270
+ #
271
+ # The object returned from this method is a hash with the following keys:
272
+ #
273
+ # * :matches
274
+ # * :fields
275
+ # * :attributes
276
+ # * :attribute_names
277
+ # * :words
278
+ # * :total
279
+ # * :total_found
280
+ # * :time
281
+ # * :status
282
+ # * :warning (if appropriate)
283
+ # * :error (if appropriate)
284
+ #
285
+ # The key <tt>:matches</tt> returns an array of hashes - the actual search
286
+ # results. Each hash has the document id (<tt>:doc</tt>), the result
287
+ # weighting (<tt>:weight</tt>), and a hash of the attributes for the
288
+ # document (<tt>:attributes</tt>).
289
+ #
290
+ # The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
291
+ # fields and attributes for the documents. The key <tt>:attributes</tt>
292
+ # will return a hash of attribute name and type pairs, and <tt>:words</tt>
293
+ # returns a hash of hashes representing the words from the search, with the
294
+ # number of documents and hits for each, along the lines of:
295
+ #
296
+ # results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
297
+ #
298
+ # <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
299
+ # number of matches available, the total number of matches (which may be
300
+ # greater than the maximum available, depending on the number of matches
301
+ # and your sphinx configuration), and the time in milliseconds that the
302
+ # query took to run.
303
+ #
304
+ # <tt>:status</tt> is the error code for the query - and if there was a
305
+ # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
306
+ # will be described under <tt>:error</tt>.
307
+ #
308
+ def query(search, index = '*', comments = '')
309
+ @queue << query_message(search, index, comments)
310
+ self.run.first
311
+ end
312
+
313
+ # Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
314
+ # They may also be abbreviated to fit within a word limit.
315
+ #
316
+ # As part of the options hash, you will need to
317
+ # define:
318
+ # * :docs
319
+ # * :words
320
+ # * :index
321
+ #
322
+ # Optional settings include:
323
+ # * :before_match (defaults to <span class="match">)
324
+ # * :after_match (defaults to </span>)
325
+ # * :chunk_separator (defaults to ' &#8230; ' - which is an HTML ellipsis)
326
+ # * :limit (defaults to 256)
327
+ # * :around (defaults to 5)
328
+ # * :exact_phrase (defaults to false)
329
+ # * :single_passage (defaults to false)
330
+ #
331
+ # The defaults differ from the official PHP client, as I've opted for
332
+ # semantic HTML markup.
333
+ #
334
+ # Example:
335
+ #
336
+ # client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
337
+ # #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
338
+ #
339
+ # lorem_lipsum = "Lorem ipsum dolor..."
340
+ #
341
+ # client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
342
+ # #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
343
+ # elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua &#8230; . Excepteur
344
+ # sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
345
+ # laborum. <span class=\"match\">Pat</span> Cash"]
346
+ #
347
+ # Workflow:
348
+ #
349
+ # Excerpt creation is completely isolated from searching the index. The nominated index is only used to
350
+ # discover encoding and charset information.
351
+ #
352
+ # Therefore, the workflow goes:
353
+ #
354
+ # 1. Do the sphinx query.
355
+ # 2. Fetch the documents found by sphinx from their repositories.
356
+ # 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
357
+ #
358
+ def excerpts(options = {})
359
+ options[:index] ||= '*'
360
+ options[:before_match] ||= '<span class="match">'
361
+ options[:after_match] ||= '</span>'
362
+ options[:chunk_separator] ||= ' &#8230; ' # ellipsis
363
+ options[:limit] ||= 256
364
+ options[:around] ||= 5
365
+ options[:exact_phrase] ||= false
366
+ options[:single_passage] ||= false
367
+
368
+ response = Response.new request(:excerpt, excerpts_message(options))
369
+
370
+ options[:docs].collect { response.next }
371
+ end
372
+
373
+ # Update attributes - first parameter is the relevant index, second is an
374
+ # array of attributes to be updated, and the third is a hash, where the
375
+ # keys are the document ids, and the values are arrays with the attribute
376
+ # values - in the same order as the second parameter.
377
+ #
378
+ # Example:
379
+ #
380
+ # client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
381
+ #
382
+ def update(index, attributes, values_by_doc)
383
+ response = Response.new request(
384
+ :update,
385
+ update_message(index, attributes, values_by_doc)
386
+ )
387
+
388
+ response.next_int
389
+ end
390
+
391
+ # Generates a keyword list for a given query. Each keyword is represented
392
+ # by a hash, with keys :tokenised and :normalised. If return_hits is set to
393
+ # true it will also report on the number of hits and documents for each
394
+ # keyword (see :hits and :docs keys respectively).
395
+ def keywords(query, index, return_hits = false)
396
+ response = Response.new request(
397
+ :keywords,
398
+ keywords_message(query, index, return_hits)
399
+ )
400
+
401
+ (0...response.next_int).collect do
402
+ hash = {}
403
+ hash[:tokenised] = response.next
404
+ hash[:normalised] = response.next
405
+
406
+ if return_hits
407
+ hash[:docs] = response.next_int
408
+ hash[:hits] = response.next_int
409
+ end
410
+
411
+ hash
412
+ end
413
+ end
414
+
415
+ def status
416
+ response = Response.new request(
417
+ :status, Message.new
418
+ )
419
+
420
+ rows, cols = response.next_int, response.next_int
421
+
422
+ (0...rows).inject({}) do |hash, row|
423
+ hash[response.next.to_sym] = response.next
424
+ hash
425
+ end
426
+ end
427
+
428
+ def add_override(attribute, type, values)
429
+ @overrides[attribute] = {:type => type, :values => values}
430
+ end
431
+
432
+ def open
433
+ open_socket
434
+
435
+ return if Versions[:search] < 0x116
436
+
437
+ @socket.send [
438
+ Commands[:persist], 0, 4, 1
439
+ ].pack("nnNN"), 0
440
+ end
441
+
442
+ def close
443
+ close_socket
444
+ end
445
+
446
+ private
447
+
448
+ def open_socket
449
+ raise "Already Connected" unless @socket.nil?
450
+
451
+ if @timeout == 0
452
+ @socket = initialise_connection
453
+ else
454
+ begin
455
+ Timeout.timeout(@timeout) { @socket = initialise_connection }
456
+ rescue Timeout::Error
457
+ raise Riddle::ConnectionError,
458
+ "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
459
+ end
460
+ end
461
+
462
+ true
463
+ end
464
+
465
+ def close_socket
466
+ raise "Not Connected" if @socket.nil?
467
+
468
+ @socket.close
469
+ @socket = nil
470
+
471
+ true
472
+ end
473
+
474
+ # Connects to the Sphinx daemon, and yields a socket to use. The socket is
475
+ # closed at the end of the block.
476
+ def connect(&block)
477
+ if @socket && !@socket.closed?
478
+ yield @socket
479
+ else
480
+ @socket = nil
481
+ open_socket
482
+ begin
483
+ yield @socket
484
+ ensure
485
+ close_socket
486
+ end
487
+ end
488
+ end
489
+
490
+ def initialise_connection
491
+ socket = initialise_socket
492
+
493
+ # Checking version
494
+ version = socket.recv(4).unpack('N*').first
495
+ if version < 1
496
+ socket.close
497
+ raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
498
+ end
499
+
500
+ # Send version
501
+ socket.send [1].pack('N'), 0
502
+
503
+ socket
504
+ end
505
+
506
+ def initialise_socket
507
+ tries = 0
508
+ begin
509
+ socket = if self.connection
510
+ self.connection.call(self)
511
+ elsif self.class.connection
512
+ self.class.connection.call(self)
513
+ else
514
+ TCPSocket.new @server, @port
515
+ end
516
+ rescue Errno::ECONNREFUSED => e
517
+ retry if (tries += 1) < 5
518
+ raise Riddle::ConnectionError,
519
+ "Connection to #{@server} on #{@port} failed. #{e.message}"
520
+ end
521
+
522
+ socket
523
+ end
524
+
525
+ # Send a collection of messages, for a command type (eg, search, excerpts,
526
+ # update), to the Sphinx daemon.
527
+ def request(command, messages)
528
+ response = ""
529
+ status = -1
530
+ version = 0
531
+ length = 0
532
+ message = Array(messages).join("")
533
+ if message.respond_to?(:force_encoding)
534
+ message = message.force_encoding('ASCII-8BIT')
535
+ end
536
+
537
+ connect do |socket|
538
+ case command
539
+ when :search
540
+ # Message length is +4 to account for the following count value for
541
+ # the number of messages (well, that's what I'm assuming).
542
+ socket.send [
543
+ Commands[command], Versions[command],
544
+ 4+message.length, messages.length
545
+ ].pack("nnNN") + message, 0
546
+ when :status
547
+ socket.send [
548
+ Commands[command], Versions[command], 4, 1
549
+ ].pack("nnNN"), 0
550
+ else
551
+ socket.send [
552
+ Commands[command], Versions[command], message.length
553
+ ].pack("nnN") + message, 0
554
+ end
555
+
556
+ header = socket.recv(8)
557
+ status, version, length = header.unpack('n2N')
558
+
559
+ while response.length < (length || 0)
560
+ part = socket.recv(length - response.length)
561
+ response << part if part
562
+ end
563
+ end
564
+
565
+ if response.empty? || response.length != length
566
+ raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
567
+ end
568
+
569
+ case status
570
+ when Statuses[:ok]
571
+ if version < Versions[command]
572
+ puts format("searchd command v.%d.%d older than client (v.%d.%d)",
573
+ version >> 8, version & 0xff,
574
+ Versions[command] >> 8, Versions[command] & 0xff)
575
+ end
576
+ response
577
+ when Statuses[:warning]
578
+ length = response[0, 4].unpack('N*').first
579
+ puts response[4, length]
580
+ response[4 + length, response.length - 4 - length]
581
+ when Statuses[:error], Statuses[:retry]
582
+ raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
583
+ else
584
+ raise ResponseError, "Unknown searchd error (status: #{status})"
585
+ end
586
+ end
587
+
588
+ # Generation of the message to send to Sphinx for a search.
589
+ def query_message(search, index, comments = '')
590
+ message = Message.new
591
+
592
+ # Mode, Limits, Sort Mode
593
+ message.append_ints @offset, @limit, MatchModes[@match_mode],
594
+ RankModes[@rank_mode], SortModes[@sort_mode]
595
+ message.append_string @sort_by
596
+
597
+ # Query
598
+ message.append_string search
599
+
600
+ # Weights
601
+ message.append_int @weights.length
602
+ message.append_ints *@weights
603
+
604
+ # Index
605
+ message.append_string index
606
+
607
+ # ID Range
608
+ message.append_int 1
609
+ message.append_64bit_ints @id_range.first, @id_range.last
610
+
611
+ # Filters
612
+ message.append_int @filters.length
613
+ @filters.each { |filter| message.append filter.query_message }
614
+
615
+ # Grouping
616
+ message.append_int GroupFunctions[@group_function]
617
+ message.append_string @group_by
618
+ message.append_int @max_matches
619
+ message.append_string @group_clause
620
+ message.append_ints @cut_off, @retry_count, @retry_delay
621
+ message.append_string @group_distinct
622
+
623
+ # Anchor Point
624
+ if @anchor.empty?
625
+ message.append_int 0
626
+ else
627
+ message.append_int 1
628
+ message.append_string @anchor[:latitude_attribute]
629
+ message.append_string @anchor[:longitude_attribute]
630
+ message.append_floats @anchor[:latitude], @anchor[:longitude]
631
+ end
632
+
633
+ # Per Index Weights
634
+ message.append_int @index_weights.length
635
+ @index_weights.each do |key,val|
636
+ message.append_string key.to_s
637
+ message.append_int val
638
+ end
639
+
640
+ # Max Query Time
641
+ message.append_int @max_query_time
642
+
643
+ # Per Field Weights
644
+ message.append_int @field_weights.length
645
+ @field_weights.each do |key,val|
646
+ message.append_string key.to_s
647
+ message.append_int val
648
+ end
649
+
650
+ message.append_string comments
651
+
652
+ return message.to_s if Versions[:search] < 0x116
653
+
654
+ # Overrides
655
+ message.append_int @overrides.length
656
+ @overrides.each do |key,val|
657
+ message.append_string key.to_s
658
+ message.append_int AttributeTypes[val[:type]]
659
+ message.append_int val[:values].length
660
+ val[:values].each do |id,map|
661
+ message.append_64bit_int id
662
+ method = case val[:type]
663
+ when :float
664
+ :append_float
665
+ when :bigint
666
+ :append_64bit_int
667
+ else
668
+ :append_int
669
+ end
670
+ message.send method, map
671
+ end
672
+ end
673
+
674
+ message.append_string @select
675
+
676
+ message.to_s
677
+ end
678
+
679
+ # Generation of the message to send to Sphinx for an excerpts request.
680
+ def excerpts_message(options)
681
+ message = Message.new
682
+
683
+ flags = 1
684
+ flags |= 2 if options[:exact_phrase]
685
+ flags |= 4 if options[:single_passage]
686
+ flags |= 8 if options[:use_boundaries]
687
+ flags |= 16 if options[:weight_order]
688
+
689
+ message.append [0, flags].pack('N2') # 0 = mode
690
+ message.append_string options[:index]
691
+ message.append_string options[:words]
692
+
693
+ # options
694
+ message.append_string options[:before_match]
695
+ message.append_string options[:after_match]
696
+ message.append_string options[:chunk_separator]
697
+ message.append_ints options[:limit], options[:around]
698
+
699
+ message.append_array options[:docs]
700
+
701
+ message.to_s
702
+ end
703
+
704
+ # Generation of the message to send to Sphinx to update attributes of a
705
+ # document.
706
+ def update_message(index, attributes, values_by_doc)
707
+ message = Message.new
708
+
709
+ message.append_string index
710
+ message.append_array attributes
711
+
712
+ message.append_int values_by_doc.length
713
+ values_by_doc.each do |key,values|
714
+ message.append_64bit_int key # document ID
715
+ message.append_ints *values # array of new values (integers)
716
+ end
717
+
718
+ message.to_s
719
+ end
720
+
721
+ # Generates the simple message to send to the daemon for a keywords request.
722
+ def keywords_message(query, index, return_hits)
723
+ message = Message.new
724
+
725
+ message.append_string query
726
+ message.append_string index
727
+ message.append_int return_hits ? 1 : 0
728
+
729
+ message.to_s
730
+ end
731
+
732
+ def attribute_from_type(type, response)
733
+ type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
734
+
735
+ case type
736
+ when AttributeTypes[:float]
737
+ is_multi ? response.next_float_array : response.next_float
738
+ when AttributeTypes[:bigint]
739
+ is_multi ? response.next_64bit_int_arry : response.next_64bit_int
740
+ else
741
+ is_multi ? response.next_int_array : response.next_int
742
+ end
743
+ end
744
+ end
745
+ end