sphinx 0.9.9.2117

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.gitignore +4 -0
  2. data/README.rdoc +243 -0
  3. data/Rakefile +45 -0
  4. data/VERSION.yml +5 -0
  5. data/init.rb +1 -0
  6. data/lib/sphinx/buffered_io.rb +26 -0
  7. data/lib/sphinx/client.rb +2426 -0
  8. data/lib/sphinx/constants.rb +179 -0
  9. data/lib/sphinx/indifferent_access.rb +152 -0
  10. data/lib/sphinx/request.rb +121 -0
  11. data/lib/sphinx/response.rb +71 -0
  12. data/lib/sphinx/server.rb +170 -0
  13. data/lib/sphinx/timeout.rb +31 -0
  14. data/lib/sphinx.rb +51 -0
  15. data/spec/client_response_spec.rb +170 -0
  16. data/spec/client_spec.rb +669 -0
  17. data/spec/client_validations_spec.rb +859 -0
  18. data/spec/fixtures/default_search.php +8 -0
  19. data/spec/fixtures/default_search_index.php +8 -0
  20. data/spec/fixtures/excerpt_custom.php +11 -0
  21. data/spec/fixtures/excerpt_default.php +8 -0
  22. data/spec/fixtures/excerpt_flags.php +12 -0
  23. data/spec/fixtures/field_weights.php +9 -0
  24. data/spec/fixtures/filter.php +9 -0
  25. data/spec/fixtures/filter_exclude.php +9 -0
  26. data/spec/fixtures/filter_float_range.php +9 -0
  27. data/spec/fixtures/filter_float_range_exclude.php +9 -0
  28. data/spec/fixtures/filter_range.php +9 -0
  29. data/spec/fixtures/filter_range_exclude.php +9 -0
  30. data/spec/fixtures/filter_range_int64.php +10 -0
  31. data/spec/fixtures/filter_ranges.php +10 -0
  32. data/spec/fixtures/filters.php +10 -0
  33. data/spec/fixtures/filters_different.php +13 -0
  34. data/spec/fixtures/geo_anchor.php +9 -0
  35. data/spec/fixtures/group_by_attr.php +9 -0
  36. data/spec/fixtures/group_by_attrpair.php +9 -0
  37. data/spec/fixtures/group_by_day.php +9 -0
  38. data/spec/fixtures/group_by_day_sort.php +9 -0
  39. data/spec/fixtures/group_by_month.php +9 -0
  40. data/spec/fixtures/group_by_week.php +9 -0
  41. data/spec/fixtures/group_by_year.php +9 -0
  42. data/spec/fixtures/group_distinct.php +10 -0
  43. data/spec/fixtures/id_range.php +9 -0
  44. data/spec/fixtures/id_range64.php +9 -0
  45. data/spec/fixtures/index_weights.php +9 -0
  46. data/spec/fixtures/keywords.php +8 -0
  47. data/spec/fixtures/limits.php +9 -0
  48. data/spec/fixtures/limits_cutoff.php +9 -0
  49. data/spec/fixtures/limits_max.php +9 -0
  50. data/spec/fixtures/limits_max_cutoff.php +9 -0
  51. data/spec/fixtures/match_all.php +9 -0
  52. data/spec/fixtures/match_any.php +9 -0
  53. data/spec/fixtures/match_boolean.php +9 -0
  54. data/spec/fixtures/match_extended.php +9 -0
  55. data/spec/fixtures/match_extended2.php +9 -0
  56. data/spec/fixtures/match_fullscan.php +9 -0
  57. data/spec/fixtures/match_phrase.php +9 -0
  58. data/spec/fixtures/max_query_time.php +9 -0
  59. data/spec/fixtures/miltiple_queries.php +12 -0
  60. data/spec/fixtures/ranking_bm25.php +9 -0
  61. data/spec/fixtures/ranking_fieldmask.php +9 -0
  62. data/spec/fixtures/ranking_matchany.php +9 -0
  63. data/spec/fixtures/ranking_none.php +9 -0
  64. data/spec/fixtures/ranking_proximity.php +9 -0
  65. data/spec/fixtures/ranking_proximity_bm25.php +9 -0
  66. data/spec/fixtures/ranking_wordcount.php +9 -0
  67. data/spec/fixtures/retries.php +9 -0
  68. data/spec/fixtures/retries_delay.php +9 -0
  69. data/spec/fixtures/select.php +9 -0
  70. data/spec/fixtures/set_override.php +11 -0
  71. data/spec/fixtures/sort_attr_asc.php +9 -0
  72. data/spec/fixtures/sort_attr_desc.php +9 -0
  73. data/spec/fixtures/sort_expr.php +9 -0
  74. data/spec/fixtures/sort_extended.php +9 -0
  75. data/spec/fixtures/sort_relevance.php +9 -0
  76. data/spec/fixtures/sort_time_segments.php +9 -0
  77. data/spec/fixtures/sphinxapi.php +1633 -0
  78. data/spec/fixtures/update_attributes.php +8 -0
  79. data/spec/fixtures/update_attributes_mva.php +8 -0
  80. data/spec/fixtures/weights.php +9 -0
  81. data/spec/spec_helper.rb +24 -0
  82. data/spec/sphinx/sphinx-id64.conf +67 -0
  83. data/spec/sphinx/sphinx.conf +67 -0
  84. data/spec/sphinx/sphinx_test.sql +88 -0
  85. data/sphinx.gemspec +127 -0
  86. metadata +142 -0
@@ -0,0 +1,1633 @@
1
+ <?php
2
+
3
+ //
4
+ // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
5
+ //
6
+
7
+ //
8
+ // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
9
+ //
10
+ // This program is free software; you can redistribute it and/or modify
11
+ // it under the terms of the GNU General Public License. You should have
12
+ // received a copy of the GPL license along with this program; if you
13
+ // did not, you can find it at http://www.gnu.org/
14
+ //
15
+
16
+ /////////////////////////////////////////////////////////////////////////////
17
+ // PHP version of Sphinx searchd client (PHP API)
18
+ /////////////////////////////////////////////////////////////////////////////
19
+
20
+ /// known searchd commands
21
+ define ( "SEARCHD_COMMAND_SEARCH", 0 );
22
+ define ( "SEARCHD_COMMAND_EXCERPT", 1 );
23
+ define ( "SEARCHD_COMMAND_UPDATE", 2 );
24
+ define ( "SEARCHD_COMMAND_KEYWORDS",3 );
25
+ define ( "SEARCHD_COMMAND_PERSIST", 4 );
26
+ define ( "SEARCHD_COMMAND_STATUS", 5 );
27
+ define ( "SEARCHD_COMMAND_QUERY", 6 );
28
+
29
+ /// current client-side command implementation versions
30
+ define ( "VER_COMMAND_SEARCH", 0x116 );
31
+ define ( "VER_COMMAND_EXCERPT", 0x100 );
32
+ define ( "VER_COMMAND_UPDATE", 0x102 );
33
+ define ( "VER_COMMAND_KEYWORDS", 0x100 );
34
+ define ( "VER_COMMAND_STATUS", 0x100 );
35
+ define ( "VER_COMMAND_QUERY", 0x100 );
36
+
37
+ /// known searchd status codes
38
+ define ( "SEARCHD_OK", 0 );
39
+ define ( "SEARCHD_ERROR", 1 );
40
+ define ( "SEARCHD_RETRY", 2 );
41
+ define ( "SEARCHD_WARNING", 3 );
42
+
43
+ /// known match modes
44
+ define ( "SPH_MATCH_ALL", 0 );
45
+ define ( "SPH_MATCH_ANY", 1 );
46
+ define ( "SPH_MATCH_PHRASE", 2 );
47
+ define ( "SPH_MATCH_BOOLEAN", 3 );
48
+ define ( "SPH_MATCH_EXTENDED", 4 );
49
+ define ( "SPH_MATCH_FULLSCAN", 5 );
50
+ define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
51
+
52
+ /// known ranking modes (ext2 only)
53
+ define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
54
+ define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
55
+ define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
56
+ define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
57
+ define ( "SPH_RANK_PROXIMITY", 4 );
58
+ define ( "SPH_RANK_MATCHANY", 5 );
59
+ define ( "SPH_RANK_FIELDMASK", 6 );
60
+
61
+ /// known sort modes
62
+ define ( "SPH_SORT_RELEVANCE", 0 );
63
+ define ( "SPH_SORT_ATTR_DESC", 1 );
64
+ define ( "SPH_SORT_ATTR_ASC", 2 );
65
+ define ( "SPH_SORT_TIME_SEGMENTS", 3 );
66
+ define ( "SPH_SORT_EXTENDED", 4 );
67
+ define ( "SPH_SORT_EXPR", 5 );
68
+
69
+ /// known filter types
70
+ define ( "SPH_FILTER_VALUES", 0 );
71
+ define ( "SPH_FILTER_RANGE", 1 );
72
+ define ( "SPH_FILTER_FLOATRANGE", 2 );
73
+
74
+ /// known attribute types
75
+ define ( "SPH_ATTR_INTEGER", 1 );
76
+ define ( "SPH_ATTR_TIMESTAMP", 2 );
77
+ define ( "SPH_ATTR_ORDINAL", 3 );
78
+ define ( "SPH_ATTR_BOOL", 4 );
79
+ define ( "SPH_ATTR_FLOAT", 5 );
80
+ define ( "SPH_ATTR_BIGINT", 6 );
81
+ define ( "SPH_ATTR_MULTI", 0x40000000 );
82
+
83
+ /// known grouping functions
84
+ define ( "SPH_GROUPBY_DAY", 0 );
85
+ define ( "SPH_GROUPBY_WEEK", 1 );
86
+ define ( "SPH_GROUPBY_MONTH", 2 );
87
+ define ( "SPH_GROUPBY_YEAR", 3 );
88
+ define ( "SPH_GROUPBY_ATTR", 4 );
89
+ define ( "SPH_GROUPBY_ATTRPAIR", 5 );
90
+
91
+ // important properties of PHP's integers:
92
+ // - always signed (one bit short of PHP_INT_SIZE)
93
+ // - conversion from string to int is saturated
94
+ // - float is double
95
+ // - div converts arguments to floats
96
+ // - mod converts arguments to ints
97
+
98
+ // the packing code below works as follows:
99
+ // - when we got an int, just pack it
100
+ // if performance is a problem, this is the branch users should aim for
101
+ //
102
+ // - otherwise, we got a number in string form
103
+ // this might be due to different reasons, but we assume that this is
104
+ // because it didn't fit into PHP int
105
+ //
106
+ // - factor the string into high and low ints for packing
107
+ // - if we have bcmath, then it is used
108
+ // - if we don't, we have to do it manually (this is the fun part)
109
+ //
110
+ // - x64 branch does factoring using ints
111
+ // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
112
+ //
113
+ // unpacking routines are pretty much the same.
114
+ // - return ints if we can
115
+ // - otherwise format number into a string
116
+
117
+ /// pack 64-bit signed
118
+ function sphPackI64 ( $v )
119
+ {
120
+ assert ( is_numeric($v) );
121
+
122
+ // x64
123
+ if ( PHP_INT_SIZE>=8 )
124
+ {
125
+ $v = (int)$v;
126
+ return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
127
+ }
128
+
129
+ // x32, int
130
+ if ( is_int($v) )
131
+ return pack ( "NN", $v < 0 ? -1 : 0, $v );
132
+
133
+ // x32, bcmath
134
+ if ( function_exists("bcmul") )
135
+ {
136
+ if ( bccomp ( $v, 0 ) == -1 )
137
+ $v = bcadd ( "18446744073709551616", $v );
138
+ $h = bcdiv ( $v, "4294967296", 0 );
139
+ $l = bcmod ( $v, "4294967296" );
140
+ return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
141
+ }
142
+
143
+ // x32, no-bcmath
144
+ $p = max(0, strlen($v) - 13);
145
+ $lo = abs((float)substr($v, $p));
146
+ $hi = abs((float)substr($v, 0, $p));
147
+
148
+ $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912
149
+ $q = floor($m/4294967296.0);
150
+ $l = $m - ($q*4294967296.0);
151
+ $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328
152
+
153
+ if ( $v<0 )
154
+ {
155
+ if ( $l==0 )
156
+ $h = 4294967296.0 - $h;
157
+ else
158
+ {
159
+ $h = 4294967295.0 - $h;
160
+ $l = 4294967296.0 - $l;
161
+ }
162
+ }
163
+ return pack ( "NN", $h, $l );
164
+ }
165
+
166
+ /// pack 64-bit unsigned
167
+ function sphPackU64 ( $v )
168
+ {
169
+ assert ( is_numeric($v) );
170
+
171
+ // x64
172
+ if ( PHP_INT_SIZE>=8 )
173
+ {
174
+ assert ( $v>=0 );
175
+
176
+ // x64, int
177
+ if ( is_int($v) )
178
+ return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
179
+
180
+ // x64, bcmath
181
+ if ( function_exists("bcmul") )
182
+ {
183
+ $h = bcdiv ( $v, 4294967296, 0 );
184
+ $l = bcmod ( $v, 4294967296 );
185
+ return pack ( "NN", $h, $l );
186
+ }
187
+
188
+ // x64, no-bcmath
189
+ $p = max ( 0, strlen($v) - 13 );
190
+ $lo = (int)substr ( $v, $p );
191
+ $hi = (int)substr ( $v, 0, $p );
192
+
193
+ $m = $lo + $hi*1316134912;
194
+ $l = $m % 4294967296;
195
+ $h = $hi*2328 + (int)($m/4294967296);
196
+
197
+ return pack ( "NN", $h, $l );
198
+ }
199
+
200
+ // x32, int
201
+ if ( is_int($v) )
202
+ return pack ( "NN", 0, $v );
203
+
204
+ // x32, bcmath
205
+ if ( function_exists("bcmul") )
206
+ {
207
+ $h = bcdiv ( $v, "4294967296", 0 );
208
+ $l = bcmod ( $v, "4294967296" );
209
+ return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
210
+ }
211
+
212
+ // x32, no-bcmath
213
+ $p = max(0, strlen($v) - 13);
214
+ $lo = (float)substr($v, $p);
215
+ $hi = (float)substr($v, 0, $p);
216
+
217
+ $m = $lo + $hi*1316134912.0;
218
+ $q = floor($m / 4294967296.0);
219
+ $l = $m - ($q * 4294967296.0);
220
+ $h = $hi*2328.0 + $q;
221
+
222
+ return pack ( "NN", $h, $l );
223
+ }
224
+
225
+ // unpack 64-bit unsigned
226
+ function sphUnpackU64 ( $v )
227
+ {
228
+ list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
229
+
230
+ if ( PHP_INT_SIZE>=8 )
231
+ {
232
+ if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
233
+ if ( $lo<0 ) $lo += (1<<32);
234
+
235
+ // x64, int
236
+ if ( $hi<=2147483647 )
237
+ return ($hi<<32) + $lo;
238
+
239
+ // x64, bcmath
240
+ if ( function_exists("bcmul") )
241
+ return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
242
+
243
+ // x64, no-bcmath
244
+ $C = 100000;
245
+ $h = ((int)($hi / $C) << 32) + (int)($lo / $C);
246
+ $l = (($hi % $C) << 32) + ($lo % $C);
247
+ if ( $l>$C )
248
+ {
249
+ $h += (int)($l / $C);
250
+ $l = $l % $C;
251
+ }
252
+
253
+ if ( $h==0 )
254
+ return $l;
255
+ return sprintf ( "%d%05d", $h, $l );
256
+ }
257
+
258
+ // x32, int
259
+ if ( $hi==0 )
260
+ {
261
+ if ( $lo>0 )
262
+ return $lo;
263
+ return sprintf ( "%u", $lo );
264
+ }
265
+
266
+ $hi = sprintf ( "%u", $hi );
267
+ $lo = sprintf ( "%u", $lo );
268
+
269
+ // x32, bcmath
270
+ if ( function_exists("bcmul") )
271
+ return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
272
+
273
+ // x32, no-bcmath
274
+ $hi = (float)$hi;
275
+ $lo = (float)$lo;
276
+
277
+ $q = floor($hi/10000000.0);
278
+ $r = $hi - $q*10000000.0;
279
+ $m = $lo + $r*4967296.0;
280
+ $mq = floor($m/10000000.0);
281
+ $l = $m - $mq*10000000.0;
282
+ $h = $q*4294967296.0 + $r*429.0 + $mq;
283
+
284
+ $h = sprintf ( "%.0f", $h );
285
+ $l = sprintf ( "%07.0f", $l );
286
+ if ( $h=="0" )
287
+ return sprintf( "%.0f", (float)$l );
288
+ return $h . $l;
289
+ }
290
+
291
+ // unpack 64-bit signed
292
+ function sphUnpackI64 ( $v )
293
+ {
294
+ list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
295
+
296
+ // x64
297
+ if ( PHP_INT_SIZE>=8 )
298
+ {
299
+ if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
300
+ if ( $lo<0 ) $lo += (1<<32);
301
+
302
+ return ($hi<<32) + $lo;
303
+ }
304
+
305
+ // x32, int
306
+ if ( $hi==0 )
307
+ {
308
+ if ( $lo>0 )
309
+ return $lo;
310
+ return sprintf ( "%u", $lo );
311
+ }
312
+ // x32, int
313
+ elseif ( $hi==-1 )
314
+ {
315
+ if ( $lo<0 )
316
+ return $lo;
317
+ return sprintf ( "%.0f", $lo - 4294967296.0 );
318
+ }
319
+
320
+ $neg = "";
321
+ $c = 0;
322
+ if ( $hi<0 )
323
+ {
324
+ $hi = ~$hi;
325
+ $lo = ~$lo;
326
+ $c = 1;
327
+ $neg = "-";
328
+ }
329
+
330
+ $hi = sprintf ( "%u", $hi );
331
+ $lo = sprintf ( "%u", $lo );
332
+
333
+ // x32, bcmath
334
+ if ( function_exists("bcmul") )
335
+ return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c );
336
+
337
+ // x32, no-bcmath
338
+ $hi = (float)$hi;
339
+ $lo = (float)$lo;
340
+
341
+ $q = floor($hi/10000000.0);
342
+ $r = $hi - $q*10000000.0;
343
+ $m = $lo + $r*4967296.0;
344
+ $mq = floor($m/10000000.0);
345
+ $l = $m - $mq*10000000.0 + $c;
346
+ $h = $q*4294967296.0 + $r*429.0 + $mq;
347
+ if ( $l==10000000 )
348
+ {
349
+ $l = 0;
350
+ $h += 1;
351
+ }
352
+
353
+ $h = sprintf ( "%.0f", $h );
354
+ $l = sprintf ( "%07.0f", $l );
355
+ if ( $h=="0" )
356
+ return $neg . sprintf( "%.0f", (float)$l );
357
+ return $neg . $h . $l;
358
+ }
359
+
360
+
361
+ function sphFixUint ( $value )
362
+ {
363
+ if ( PHP_INT_SIZE>=8 )
364
+ {
365
+ // x64 route, workaround broken unpack() in 5.2.2+
366
+ if ( $value<0 ) $value += (1<<32);
367
+ return $value;
368
+ }
369
+ else
370
+ {
371
+ // x32 route, workaround php signed/unsigned braindamage
372
+ return sprintf ( "%u", $value );
373
+ }
374
+ }
375
+
376
+
377
+ /// sphinx searchd client class
378
+ class SphinxClient
379
+ {
380
+ var $_host; ///< searchd host (default is "localhost")
381
+ var $_port; ///< searchd port (default is 9312)
382
+ var $_offset; ///< how many records to seek from result-set start (default is 0)
383
+ var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
384
+ var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
385
+ var $_weights; ///< per-field weights (default is 1 for all fields)
386
+ var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
387
+ var $_sortby; ///< attribute to sort by (defualt is "")
388
+ var $_min_id; ///< min ID to match (default is 0, which means no limit)
389
+ var $_max_id; ///< max ID to match (default is 0, which means no limit)
390
+ var $_filters; ///< search filters
391
+ var $_groupby; ///< group-by attribute name
392
+ var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
393
+ var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
394
+ var $_groupdistinct;///< group-by count-distinct attribute
395
+ var $_maxmatches; ///< max matches to retrieve
396
+ var $_cutoff; ///< cutoff to stop searching at (default is 0)
397
+ var $_retrycount; ///< distributed retries count
398
+ var $_retrydelay; ///< distributed retries delay
399
+ var $_anchor; ///< geographical anchor point
400
+ var $_indexweights; ///< per-index weights
401
+ var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
402
+ var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
403
+ var $_fieldweights; ///< per-field-name weights
404
+ var $_overrides; ///< per-query attribute values overrides
405
+ var $_select; ///< select-list (attributes or expressions, with optional aliases)
406
+
407
+ var $_error; ///< last error message
408
+ var $_warning; ///< last warning message
409
+ var $_connerror; ///< connection error vs remote error flag
410
+
411
+ var $_reqs; ///< requests array for multi-query
412
+ var $_mbenc; ///< stored mbstring encoding
413
+ var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
414
+ var $_timeout; ///< connect timeout
415
+
416
+ /////////////////////////////////////////////////////////////////////////////
417
+ // common stuff
418
+ /////////////////////////////////////////////////////////////////////////////
419
+
420
+ /// create a new client object and fill defaults
421
+ function SphinxClient ()
422
+ {
423
+ // per-client-object settings
424
+ $this->_host = "localhost";
425
+ $this->_port = 9312;
426
+ $this->_path = false;
427
+ $this->_socket = false;
428
+
429
+ // per-query settings
430
+ $this->_offset = 0;
431
+ $this->_limit = 20;
432
+ $this->_mode = SPH_MATCH_ALL;
433
+ $this->_weights = array ();
434
+ $this->_sort = SPH_SORT_RELEVANCE;
435
+ $this->_sortby = "";
436
+ $this->_min_id = 0;
437
+ $this->_max_id = 0;
438
+ $this->_filters = array ();
439
+ $this->_groupby = "";
440
+ $this->_groupfunc = SPH_GROUPBY_DAY;
441
+ $this->_groupsort = "@group desc";
442
+ $this->_groupdistinct= "";
443
+ $this->_maxmatches = 1000;
444
+ $this->_cutoff = 0;
445
+ $this->_retrycount = 0;
446
+ $this->_retrydelay = 0;
447
+ $this->_anchor = array ();
448
+ $this->_indexweights= array ();
449
+ $this->_ranker = SPH_RANK_PROXIMITY_BM25;
450
+ $this->_maxquerytime= 0;
451
+ $this->_fieldweights= array();
452
+ $this->_overrides = array();
453
+ $this->_select = "*";
454
+
455
+ $this->_error = ""; // per-reply fields (for single-query case)
456
+ $this->_warning = "";
457
+ $this->_connerror = false;
458
+
459
+ $this->_reqs = array (); // requests storage (for multi-query case)
460
+ $this->_mbenc = "";
461
+ $this->_arrayresult = false;
462
+ $this->_timeout = 0;
463
+ }
464
+
465
+ function __destruct()
466
+ {
467
+ if ( $this->_socket !== false )
468
+ fclose ( $this->_socket );
469
+ }
470
+
471
+ /// get last error message (string)
472
+ function GetLastError ()
473
+ {
474
+ return $this->_error;
475
+ }
476
+
477
+ /// get last warning message (string)
478
+ function GetLastWarning ()
479
+ {
480
+ return $this->_warning;
481
+ }
482
+
483
+ /// get last error flag (to tell network connection errors from searchd errors or broken responses)
484
+ function IsConnectError()
485
+ {
486
+ return $this->_connerror;
487
+ }
488
+
489
+ /// set searchd host name (string) and port (integer)
490
+ function SetServer ( $host, $port = 0 )
491
+ {
492
+ assert ( is_string($host) );
493
+ if ( $host[0] == '/')
494
+ {
495
+ $this->_path = 'unix://' . $host;
496
+ return;
497
+ }
498
+ if ( substr ( $host, 0, 7 )=="unix://" )
499
+ {
500
+ $this->_path = $host;
501
+ return;
502
+ }
503
+
504
+ assert ( is_int($port) );
505
+ $this->_host = $host;
506
+ $this->_port = $port;
507
+ $this->_path = '';
508
+
509
+ }
510
+
511
+ /// set server connection timeout (0 to remove)
512
+ function SetConnectTimeout ( $timeout )
513
+ {
514
+ assert ( is_numeric($timeout) );
515
+ $this->_timeout = $timeout;
516
+ }
517
+
518
+
519
+ function _Send ( $handle, $data, $length )
520
+ {
521
+ if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length )
522
+ {
523
+ $this->_error = 'connection unexpectedly closed (timed out?)';
524
+ $this->_connerror = true;
525
+ return false;
526
+ }
527
+ return true;
528
+ }
529
+
530
+ /////////////////////////////////////////////////////////////////////////////
531
+
532
+ /// enter mbstring workaround mode
533
+ function _MBPush ()
534
+ {
535
+ $this->_mbenc = "";
536
+ if ( ini_get ( "mbstring.func_overload" ) & 2 )
537
+ {
538
+ $this->_mbenc = mb_internal_encoding();
539
+ mb_internal_encoding ( "latin1" );
540
+ }
541
+ }
542
+
543
+ /// leave mbstring workaround mode
544
+ function _MBPop ()
545
+ {
546
+ if ( $this->_mbenc )
547
+ mb_internal_encoding ( $this->_mbenc );
548
+ }
549
+
550
+ /// connect to searchd server
551
+ function _Connect ()
552
+ {
553
+ return fopen('php://stdout', 'w');
554
+ }
555
+
556
+ function _OldConnect ()
557
+ {
558
+ if ( $this->_socket!==false )
559
+ {
560
+ // we are in persistent connection mode, so we have a socket
561
+ // however, need to check whether it's still alive
562
+ if ( !@feof ( $this->_socket ) )
563
+ return $this->_socket;
564
+
565
+ // force reopen
566
+ $this->_socket = false;
567
+ }
568
+
569
+ $errno = 0;
570
+ $errstr = "";
571
+ $this->_connerror = false;
572
+
573
+ if ( $this->_path )
574
+ {
575
+ $host = $this->_path;
576
+ $port = 0;
577
+ }
578
+ else
579
+ {
580
+ $host = $this->_host;
581
+ $port = $this->_port;
582
+ }
583
+
584
+ if ( $this->_timeout<=0 )
585
+ $fp = @fsockopen ( $host, $port, $errno, $errstr );
586
+ else
587
+ $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout );
588
+
589
+ if ( !$fp )
590
+ {
591
+ if ( $this->_path )
592
+ $location = $this->_path;
593
+ else
594
+ $location = "{$this->_host}:{$this->_port}";
595
+
596
+ $errstr = trim ( $errstr );
597
+ $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)";
598
+ $this->_connerror = true;
599
+ return false;
600
+ }
601
+
602
+ // send my version
603
+ // this is a subtle part. we must do it before (!) reading back from searchd.
604
+ // because otherwise under some conditions (reported on FreeBSD for instance)
605
+ // TCP stack could throttle write-write-read pattern because of Nagle.
606
+ if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) )
607
+ {
608
+ fclose ( $fp );
609
+ $this->_error = "failed to send client protocol version";
610
+ return false;
611
+ }
612
+
613
+ // check version
614
+ list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
615
+ $v = (int)$v;
616
+ if ( $v<1 )
617
+ {
618
+ fclose ( $fp );
619
+ $this->_error = "expected searchd protocol version 1+, got version '$v'";
620
+ return false;
621
+ }
622
+
623
+ return $fp;
624
+ }
625
+
626
+ /// get and check response packet from searchd server
627
+ function _GetResponse ( $fp, $client_ver )
628
+ {
629
+ $response = "";
630
+ $len = 0;
631
+
632
+ $header = fread ( $fp, 8 );
633
+ if ( strlen($header)==8 )
634
+ {
635
+ list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
636
+ $left = $len;
637
+ while ( $left>0 && !feof($fp) )
638
+ {
639
+ $chunk = fread ( $fp, $left );
640
+ if ( $chunk )
641
+ {
642
+ $response .= $chunk;
643
+ $left -= strlen($chunk);
644
+ }
645
+ }
646
+ }
647
+ if ( $this->_socket === false )
648
+ fclose ( $fp );
649
+
650
+ // check response
651
+ $read = strlen ( $response );
652
+ if ( !$response || $read!=$len )
653
+ {
654
+ $this->_error = $len
655
+ ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
656
+ : "received zero-sized searchd response";
657
+ return false;
658
+ }
659
+
660
+ // check status
661
+ if ( $status==SEARCHD_WARNING )
662
+ {
663
+ list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
664
+ $this->_warning = substr ( $response, 4, $wlen );
665
+ return substr ( $response, 4+$wlen );
666
+ }
667
+ if ( $status==SEARCHD_ERROR )
668
+ {
669
+ $this->_error = "searchd error: " . substr ( $response, 4 );
670
+ return false;
671
+ }
672
+ if ( $status==SEARCHD_RETRY )
673
+ {
674
+ $this->_error = "temporary searchd error: " . substr ( $response, 4 );
675
+ return false;
676
+ }
677
+ if ( $status!=SEARCHD_OK )
678
+ {
679
+ $this->_error = "unknown status code '$status'";
680
+ return false;
681
+ }
682
+
683
+ // check version
684
+ if ( $ver<$client_ver )
685
+ {
686
+ $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
687
+ $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
688
+ }
689
+
690
+ return $response;
691
+ }
692
+
693
+ /////////////////////////////////////////////////////////////////////////////
694
+ // searching
695
+ /////////////////////////////////////////////////////////////////////////////
696
+
697
+ /// set offset and count into result set,
698
+ /// and optionally set max-matches and cutoff limits
699
+ function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
700
+ {
701
+ assert ( is_int($offset) );
702
+ assert ( is_int($limit) );
703
+ assert ( $offset>=0 );
704
+ assert ( $limit>0 );
705
+ assert ( $max>=0 );
706
+ $this->_offset = $offset;
707
+ $this->_limit = $limit;
708
+ if ( $max>0 )
709
+ $this->_maxmatches = $max;
710
+ if ( $cutoff>0 )
711
+ $this->_cutoff = $cutoff;
712
+ }
713
+
714
+ /// set maximum query time, in milliseconds, per-index
715
+ /// integer, 0 means "do not limit"
716
+ function SetMaxQueryTime ( $max )
717
+ {
718
+ assert ( is_int($max) );
719
+ assert ( $max>=0 );
720
+ $this->_maxquerytime = $max;
721
+ }
722
+
723
+ /// set matching mode
724
+ function SetMatchMode ( $mode )
725
+ {
726
+ assert ( $mode==SPH_MATCH_ALL
727
+ || $mode==SPH_MATCH_ANY
728
+ || $mode==SPH_MATCH_PHRASE
729
+ || $mode==SPH_MATCH_BOOLEAN
730
+ || $mode==SPH_MATCH_EXTENDED
731
+ || $mode==SPH_MATCH_FULLSCAN
732
+ || $mode==SPH_MATCH_EXTENDED2 );
733
+ $this->_mode = $mode;
734
+ }
735
+
736
+ /// set ranking mode
737
+ function SetRankingMode ( $ranker )
738
+ {
739
+ assert ( $ranker==SPH_RANK_PROXIMITY_BM25
740
+ || $ranker==SPH_RANK_BM25
741
+ || $ranker==SPH_RANK_NONE
742
+ || $ranker==SPH_RANK_WORDCOUNT
743
+ || $ranker==SPH_RANK_PROXIMITY
744
+ || $ranker==SPH_RANK_MATCHANY
745
+ || $ranker==SPH_RANK_FIELDMASK );
746
+ $this->_ranker = $ranker;
747
+ }
748
+
749
+ /// set matches sorting mode
750
+ function SetSortMode ( $mode, $sortby="" )
751
+ {
752
+ assert (
753
+ $mode==SPH_SORT_RELEVANCE ||
754
+ $mode==SPH_SORT_ATTR_DESC ||
755
+ $mode==SPH_SORT_ATTR_ASC ||
756
+ $mode==SPH_SORT_TIME_SEGMENTS ||
757
+ $mode==SPH_SORT_EXTENDED ||
758
+ $mode==SPH_SORT_EXPR );
759
+ assert ( is_string($sortby) );
760
+ assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
761
+
762
+ $this->_sort = $mode;
763
+ $this->_sortby = $sortby;
764
+ }
765
+
766
+ /// bind per-field weights by order
767
+ /// DEPRECATED; use SetFieldWeights() instead
768
+ function SetWeights ( $weights )
769
+ {
770
+ assert ( is_array($weights) );
771
+ foreach ( $weights as $weight )
772
+ assert ( is_int($weight) );
773
+
774
+ $this->_weights = $weights;
775
+ }
776
+
777
+ /// bind per-field weights by name
778
+ function SetFieldWeights ( $weights )
779
+ {
780
+ assert ( is_array($weights) );
781
+ foreach ( $weights as $name=>$weight )
782
+ {
783
+ assert ( is_string($name) );
784
+ assert ( is_int($weight) );
785
+ }
786
+ $this->_fieldweights = $weights;
787
+ }
788
+
789
+ /// bind per-index weights by name
790
+ function SetIndexWeights ( $weights )
791
+ {
792
+ assert ( is_array($weights) );
793
+ foreach ( $weights as $index=>$weight )
794
+ {
795
+ assert ( is_string($index) );
796
+ assert ( is_int($weight) );
797
+ }
798
+ $this->_indexweights = $weights;
799
+ }
800
+
801
+ /// set IDs range to match
802
+ /// only match records if document ID is beetwen $min and $max (inclusive)
803
+ function SetIDRange ( $min, $max )
804
+ {
805
+ assert ( is_numeric($min) );
806
+ assert ( is_numeric($max) );
807
+ assert ( $min<=$max );
808
+ $this->_min_id = $min;
809
+ $this->_max_id = $max;
810
+ }
811
+
812
+ /// set values set filter
813
+ /// only match records where $attribute value is in given set
814
+ function SetFilter ( $attribute, $values, $exclude=false )
815
+ {
816
+ assert ( is_string($attribute) );
817
+ assert ( is_array($values) );
818
+ assert ( count($values) );
819
+
820
+ if ( is_array($values) && count($values) )
821
+ {
822
+ foreach ( $values as $value )
823
+ assert ( is_numeric($value) );
824
+
825
+ $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
826
+ }
827
+ }
828
+
829
+ /// set range filter
830
+ /// only match records if $attribute value is beetwen $min and $max (inclusive)
831
+ function SetFilterRange ( $attribute, $min, $max, $exclude=false )
832
+ {
833
+ assert ( is_string($attribute) );
834
+ assert ( is_numeric($min) );
835
+ assert ( is_numeric($max) );
836
+ assert ( $min<=$max );
837
+
838
+ $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
839
+ }
840
+
841
+ /// set float range filter
842
+ /// only match records if $attribute value is beetwen $min and $max (inclusive)
843
+ function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
844
+ {
845
+ assert ( is_string($attribute) );
846
+ assert ( is_float($min) );
847
+ assert ( is_float($max) );
848
+ assert ( $min<=$max );
849
+
850
+ $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
851
+ }
852
+
853
+ /// setup anchor point for geosphere distance calculations
854
+ /// required to use @geodist in filters and sorting
855
+ /// latitude and longitude must be in radians
856
+ function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
857
+ {
858
+ assert ( is_string($attrlat) );
859
+ assert ( is_string($attrlong) );
860
+ assert ( is_float($lat) );
861
+ assert ( is_float($long) );
862
+
863
+ $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
864
+ }
865
+
866
+ /// set grouping attribute and function
867
+ function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
868
+ {
869
+ assert ( is_string($attribute) );
870
+ assert ( is_string($groupsort) );
871
+ assert ( $func==SPH_GROUPBY_DAY
872
+ || $func==SPH_GROUPBY_WEEK
873
+ || $func==SPH_GROUPBY_MONTH
874
+ || $func==SPH_GROUPBY_YEAR
875
+ || $func==SPH_GROUPBY_ATTR
876
+ || $func==SPH_GROUPBY_ATTRPAIR );
877
+
878
+ $this->_groupby = $attribute;
879
+ $this->_groupfunc = $func;
880
+ $this->_groupsort = $groupsort;
881
+ }
882
+
883
+ /// set count-distinct attribute for group-by queries
884
+ function SetGroupDistinct ( $attribute )
885
+ {
886
+ assert ( is_string($attribute) );
887
+ $this->_groupdistinct = $attribute;
888
+ }
889
+
890
+ /// set distributed retries count and delay
891
+ function SetRetries ( $count, $delay=0 )
892
+ {
893
+ assert ( is_int($count) && $count>=0 );
894
+ assert ( is_int($delay) && $delay>=0 );
895
+ $this->_retrycount = $count;
896
+ $this->_retrydelay = $delay;
897
+ }
898
+
899
+ /// set result set format (hash or array; hash by default)
900
+ /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
901
+ function SetArrayResult ( $arrayresult )
902
+ {
903
+ assert ( is_bool($arrayresult) );
904
+ $this->_arrayresult = $arrayresult;
905
+ }
906
+
907
+ /// set attribute values override
908
+ /// there can be only one override per attribute
909
+ /// $values must be a hash that maps document IDs to attribute values
910
+ function SetOverride ( $attrname, $attrtype, $values )
911
+ {
912
+ assert ( is_string ( $attrname ) );
913
+ assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
914
+ assert ( is_array ( $values ) );
915
+
916
+ $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
917
+ }
918
+
919
+ /// set select-list (attributes or expressions), SQL-like syntax
920
+ function SetSelect ( $select )
921
+ {
922
+ assert ( is_string ( $select ) );
923
+ $this->_select = $select;
924
+ }
925
+
926
+ //////////////////////////////////////////////////////////////////////////////
927
+
928
+ /// clear all filters (for multi-queries)
929
+ function ResetFilters ()
930
+ {
931
+ $this->_filters = array();
932
+ $this->_anchor = array();
933
+ }
934
+
935
+ /// clear groupby settings (for multi-queries)
936
+ function ResetGroupBy ()
937
+ {
938
+ $this->_groupby = "";
939
+ $this->_groupfunc = SPH_GROUPBY_DAY;
940
+ $this->_groupsort = "@group desc";
941
+ $this->_groupdistinct= "";
942
+ }
943
+
944
+ /// clear all attribute value overrides (for multi-queries)
945
+ function ResetOverrides ()
946
+ {
947
+ $this->_overrides = array ();
948
+ }
949
+
950
+ //////////////////////////////////////////////////////////////////////////////
951
+
952
+ /// connect to searchd server, run given search query through given indexes,
953
+ /// and return the search results
954
+ function Query ( $query, $index="*", $comment="" )
955
+ {
956
+ assert ( empty($this->_reqs) );
957
+
958
+ $this->AddQuery ( $query, $index, $comment );
959
+ $results = $this->RunQueries ();
960
+ $this->_reqs = array (); // just in case it failed too early
961
+
962
+ if ( !is_array($results) )
963
+ return false; // probably network error; error message should be already filled
964
+
965
+ $this->_error = $results[0]["error"];
966
+ $this->_warning = $results[0]["warning"];
967
+ if ( $results[0]["status"]==SEARCHD_ERROR )
968
+ return false;
969
+ else
970
+ return $results[0];
971
+ }
972
+
973
+ /// helper to pack floats in network byte order
974
+ function _PackFloat ( $f )
975
+ {
976
+ $t1 = pack ( "f", $f ); // machine order
977
+ list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
978
+ return pack ( "N", $t2 );
979
+ }
980
+
981
+ /// add query to multi-query batch
982
+ /// returns index into results array from RunQueries() call
983
+ function AddQuery ( $query, $index="*", $comment="" )
984
+ {
985
+ // mbstring workaround
986
+ $this->_MBPush ();
987
+
988
+ // build request
989
+ $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
990
+ $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
991
+ $req .= pack ( "N", strlen($query) ) . $query; // query itself
992
+ $req .= pack ( "N", count($this->_weights) ); // weights
993
+ foreach ( $this->_weights as $weight )
994
+ $req .= pack ( "N", (int)$weight );
995
+ $req .= pack ( "N", strlen($index) ) . $index; // indexes
996
+ $req .= pack ( "N", 1 ); // id64 range marker
997
+ $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range
998
+
999
+ // filters
1000
+ $req .= pack ( "N", count($this->_filters) );
1001
+ foreach ( $this->_filters as $filter )
1002
+ {
1003
+ $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
1004
+ $req .= pack ( "N", $filter["type"] );
1005
+ switch ( $filter["type"] )
1006
+ {
1007
+ case SPH_FILTER_VALUES:
1008
+ $req .= pack ( "N", count($filter["values"]) );
1009
+ foreach ( $filter["values"] as $value )
1010
+ $req .= sphPackI64 ( $value );
1011
+ break;
1012
+
1013
+ case SPH_FILTER_RANGE:
1014
+ $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] );
1015
+ break;
1016
+
1017
+ case SPH_FILTER_FLOATRANGE:
1018
+ $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
1019
+ break;
1020
+
1021
+ default:
1022
+ assert ( 0 && "internal error: unhandled filter type" );
1023
+ }
1024
+ $req .= pack ( "N", $filter["exclude"] );
1025
+ }
1026
+
1027
+ // group-by clause, max-matches count, group-sort clause, cutoff count
1028
+ $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
1029
+ $req .= pack ( "N", $this->_maxmatches );
1030
+ $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
1031
+ $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
1032
+ $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
1033
+
1034
+ // anchor point
1035
+ if ( empty($this->_anchor) )
1036
+ {
1037
+ $req .= pack ( "N", 0 );
1038
+ } else
1039
+ {
1040
+ $a =& $this->_anchor;
1041
+ $req .= pack ( "N", 1 );
1042
+ $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
1043
+ $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
1044
+ $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
1045
+ }
1046
+
1047
+ // per-index weights
1048
+ $req .= pack ( "N", count($this->_indexweights) );
1049
+ foreach ( $this->_indexweights as $idx=>$weight )
1050
+ $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
1051
+
1052
+ // max query time
1053
+ $req .= pack ( "N", $this->_maxquerytime );
1054
+
1055
+ // per-field weights
1056
+ $req .= pack ( "N", count($this->_fieldweights) );
1057
+ foreach ( $this->_fieldweights as $field=>$weight )
1058
+ $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
1059
+
1060
+ // comment
1061
+ $req .= pack ( "N", strlen($comment) ) . $comment;
1062
+
1063
+ // attribute overrides
1064
+ $req .= pack ( "N", count($this->_overrides) );
1065
+ foreach ( $this->_overrides as $key => $entry )
1066
+ {
1067
+ $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
1068
+ $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
1069
+ foreach ( $entry["values"] as $id=>$val )
1070
+ {
1071
+ assert ( is_numeric($id) );
1072
+ assert ( is_numeric($val) );
1073
+
1074
+ $req .= sphPackU64 ( $id );
1075
+ switch ( $entry["type"] )
1076
+ {
1077
+ case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break;
1078
+ case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break;
1079
+ default: $req .= pack ( "N", $val ); break;
1080
+ }
1081
+ }
1082
+ }
1083
+
1084
+ // select-list
1085
+ $req .= pack ( "N", strlen($this->_select) ) . $this->_select;
1086
+
1087
+ // mbstring workaround
1088
+ $this->_MBPop ();
1089
+
1090
+ // store request to requests array
1091
+ $this->_reqs[] = $req;
1092
+ return count($this->_reqs)-1;
1093
+ }
1094
+
1095
+ /// connect to searchd, run queries batch, and return an array of result sets
1096
+ function RunQueries ()
1097
+ {
1098
+ if ( empty($this->_reqs) )
1099
+ {
1100
+ $this->_error = "no queries defined, issue AddQuery() first";
1101
+ return false;
1102
+ }
1103
+
1104
+ // mbstring workaround
1105
+ $this->_MBPush ();
1106
+
1107
+ if (!( $fp = $this->_Connect() ))
1108
+ {
1109
+ $this->_MBPop ();
1110
+ return false;
1111
+ }
1112
+
1113
+ // send query, get response
1114
+ $nreqs = count($this->_reqs);
1115
+ $req = join ( "", $this->_reqs );
1116
+ $len = 4+strlen($req);
1117
+ $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
1118
+
1119
+ if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1120
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) )
1121
+ {
1122
+ $this->_MBPop ();
1123
+ return false;
1124
+ }
1125
+
1126
+ // query sent ok; we can reset reqs now
1127
+ $this->_reqs = array ();
1128
+
1129
+ // parse and return response
1130
+ return $this->_ParseSearchResponse ( $response, $nreqs );
1131
+ }
1132
+
1133
+ /// parse and return search query (or queries) response
1134
+ function _ParseSearchResponse ( $response, $nreqs )
1135
+ {
1136
+ $p = 0; // current position
1137
+ $max = strlen($response); // max position for checks, to protect against broken responses
1138
+
1139
+ $results = array ();
1140
+ for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
1141
+ {
1142
+ $results[] = array();
1143
+ $result =& $results[$ires];
1144
+
1145
+ $result["error"] = "";
1146
+ $result["warning"] = "";
1147
+
1148
+ // extract status
1149
+ list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1150
+ $result["status"] = $status;
1151
+ if ( $status!=SEARCHD_OK )
1152
+ {
1153
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1154
+ $message = substr ( $response, $p, $len ); $p += $len;
1155
+
1156
+ if ( $status==SEARCHD_WARNING )
1157
+ {
1158
+ $result["warning"] = $message;
1159
+ } else
1160
+ {
1161
+ $result["error"] = $message;
1162
+ continue;
1163
+ }
1164
+ }
1165
+
1166
+ // read schema
1167
+ $fields = array ();
1168
+ $attrs = array ();
1169
+
1170
+ list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1171
+ while ( $nfields-->0 && $p<$max )
1172
+ {
1173
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1174
+ $fields[] = substr ( $response, $p, $len ); $p += $len;
1175
+ }
1176
+ $result["fields"] = $fields;
1177
+
1178
+ list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1179
+ while ( $nattrs-->0 && $p<$max )
1180
+ {
1181
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1182
+ $attr = substr ( $response, $p, $len ); $p += $len;
1183
+ list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1184
+ $attrs[$attr] = $type;
1185
+ }
1186
+ $result["attrs"] = $attrs;
1187
+
1188
+ // read match count
1189
+ list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1190
+ list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1191
+
1192
+ // read matches
1193
+ $idx = -1;
1194
+ while ( $count-->0 && $p<$max )
1195
+ {
1196
+ // index into result array
1197
+ $idx++;
1198
+
1199
+ // parse document id and weight
1200
+ if ( $id64 )
1201
+ {
1202
+ $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
1203
+ list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1204
+ }
1205
+ else
1206
+ {
1207
+ list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
1208
+ substr ( $response, $p, 8 ) ) );
1209
+ $p += 8;
1210
+ $doc = sphFixUint($doc);
1211
+ }
1212
+ $weight = sprintf ( "%u", $weight );
1213
+
1214
+ // create match entry
1215
+ if ( $this->_arrayresult )
1216
+ $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
1217
+ else
1218
+ $result["matches"][$doc]["weight"] = $weight;
1219
+
1220
+ // parse and create attributes
1221
+ $attrvals = array ();
1222
+ foreach ( $attrs as $attr=>$type )
1223
+ {
1224
+ // handle 64bit ints
1225
+ if ( $type==SPH_ATTR_BIGINT )
1226
+ {
1227
+ $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
1228
+ continue;
1229
+ }
1230
+
1231
+ // handle floats
1232
+ if ( $type==SPH_ATTR_FLOAT )
1233
+ {
1234
+ list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1235
+ list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
1236
+ $attrvals[$attr] = $fval;
1237
+ continue;
1238
+ }
1239
+
1240
+ // handle everything else as unsigned ints
1241
+ list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1242
+ if ( $type & SPH_ATTR_MULTI )
1243
+ {
1244
+ $attrvals[$attr] = array ();
1245
+ $nvalues = $val;
1246
+ while ( $nvalues-->0 && $p<$max )
1247
+ {
1248
+ list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1249
+ $attrvals[$attr][] = sphFixUint($val);
1250
+ }
1251
+ } else
1252
+ {
1253
+ $attrvals[$attr] = sphFixUint($val);
1254
+ }
1255
+ }
1256
+
1257
+ if ( $this->_arrayresult )
1258
+ $result["matches"][$idx]["attrs"] = $attrvals;
1259
+ else
1260
+ $result["matches"][$doc]["attrs"] = $attrvals;
1261
+ }
1262
+
1263
+ list ( $total, $total_found, $msecs, $words ) =
1264
+ array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
1265
+ $result["total"] = sprintf ( "%u", $total );
1266
+ $result["total_found"] = sprintf ( "%u", $total_found );
1267
+ $result["time"] = sprintf ( "%.3f", $msecs/1000 );
1268
+ $p += 16;
1269
+
1270
+ while ( $words-->0 && $p<$max )
1271
+ {
1272
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1273
+ $word = substr ( $response, $p, $len ); $p += $len;
1274
+ list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
1275
+ $result["words"][$word] = array (
1276
+ "docs"=>sprintf ( "%u", $docs ),
1277
+ "hits"=>sprintf ( "%u", $hits ) );
1278
+ }
1279
+ }
1280
+
1281
+ $this->_MBPop ();
1282
+ return $results;
1283
+ }
1284
+
1285
+ /////////////////////////////////////////////////////////////////////////////
1286
+ // excerpts generation
1287
+ /////////////////////////////////////////////////////////////////////////////
1288
+
1289
+ /// connect to searchd server, and generate exceprts (snippets)
1290
+ /// of given documents for given query. returns false on failure,
1291
+ /// an array of snippets on success
1292
+ function BuildExcerpts ( $docs, $index, $words, $opts=array() )
1293
+ {
1294
+ assert ( is_array($docs) );
1295
+ assert ( is_string($index) );
1296
+ assert ( is_string($words) );
1297
+ assert ( is_array($opts) );
1298
+
1299
+ $this->_MBPush ();
1300
+
1301
+ if (!( $fp = $this->_Connect() ))
1302
+ {
1303
+ $this->_MBPop();
1304
+ return false;
1305
+ }
1306
+
1307
+ /////////////////
1308
+ // fixup options
1309
+ /////////////////
1310
+
1311
+ if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
1312
+ if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
1313
+ if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
1314
+ if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
1315
+ if ( !isset($opts["around"]) ) $opts["around"] = 5;
1316
+ if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
1317
+ if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
1318
+ if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
1319
+ if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
1320
+
1321
+ /////////////////
1322
+ // build request
1323
+ /////////////////
1324
+
1325
+ // v.1.0 req
1326
+ $flags = 1; // remove spaces
1327
+ if ( $opts["exact_phrase"] ) $flags |= 2;
1328
+ if ( $opts["single_passage"] ) $flags |= 4;
1329
+ if ( $opts["use_boundaries"] ) $flags |= 8;
1330
+ if ( $opts["weight_order"] ) $flags |= 16;
1331
+ $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
1332
+ $req .= pack ( "N", strlen($index) ) . $index; // req index
1333
+ $req .= pack ( "N", strlen($words) ) . $words; // req words
1334
+
1335
+ // options
1336
+ $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
1337
+ $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
1338
+ $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
1339
+ $req .= pack ( "N", (int)$opts["limit"] );
1340
+ $req .= pack ( "N", (int)$opts["around"] );
1341
+
1342
+ // documents
1343
+ $req .= pack ( "N", count($docs) );
1344
+ foreach ( $docs as $doc )
1345
+ {
1346
+ assert ( is_string($doc) );
1347
+ $req .= pack ( "N", strlen($doc) ) . $doc;
1348
+ }
1349
+
1350
+ ////////////////////////////
1351
+ // send query, get response
1352
+ ////////////////////////////
1353
+
1354
+ $len = strlen($req);
1355
+ $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
1356
+ if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1357
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) )
1358
+ {
1359
+ $this->_MBPop ();
1360
+ return false;
1361
+ }
1362
+
1363
+ //////////////////
1364
+ // parse response
1365
+ //////////////////
1366
+
1367
+ $pos = 0;
1368
+ $res = array ();
1369
+ $rlen = strlen($response);
1370
+ for ( $i=0; $i<count($docs); $i++ )
1371
+ {
1372
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1373
+ $pos += 4;
1374
+
1375
+ if ( $pos+$len > $rlen )
1376
+ {
1377
+ $this->_error = "incomplete reply";
1378
+ $this->_MBPop ();
1379
+ return false;
1380
+ }
1381
+ $res[] = $len ? substr ( $response, $pos, $len ) : "";
1382
+ $pos += $len;
1383
+ }
1384
+
1385
+ $this->_MBPop ();
1386
+ return $res;
1387
+ }
1388
+
1389
+
1390
+ /////////////////////////////////////////////////////////////////////////////
1391
+ // keyword generation
1392
+ /////////////////////////////////////////////////////////////////////////////
1393
+
1394
+ /// connect to searchd server, and generate keyword list for a given query
1395
+ /// returns false on failure,
1396
+ /// an array of words on success
1397
+ function BuildKeywords ( $query, $index, $hits )
1398
+ {
1399
+ assert ( is_string($query) );
1400
+ assert ( is_string($index) );
1401
+ assert ( is_bool($hits) );
1402
+
1403
+ $this->_MBPush ();
1404
+
1405
+ if (!( $fp = $this->_Connect() ))
1406
+ {
1407
+ $this->_MBPop();
1408
+ return false;
1409
+ }
1410
+
1411
+ /////////////////
1412
+ // build request
1413
+ /////////////////
1414
+
1415
+ // v.1.0 req
1416
+ $req = pack ( "N", strlen($query) ) . $query; // req query
1417
+ $req .= pack ( "N", strlen($index) ) . $index; // req index
1418
+ $req .= pack ( "N", (int)$hits );
1419
+
1420
+ ////////////////////////////
1421
+ // send query, get response
1422
+ ////////////////////////////
1423
+
1424
+ $len = strlen($req);
1425
+ $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
1426
+ if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1427
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) )
1428
+ {
1429
+ $this->_MBPop ();
1430
+ return false;
1431
+ }
1432
+
1433
+ //////////////////
1434
+ // parse response
1435
+ //////////////////
1436
+
1437
+ $pos = 0;
1438
+ $res = array ();
1439
+ $rlen = strlen($response);
1440
+ list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1441
+ $pos += 4;
1442
+ for ( $i=0; $i<$nwords; $i++ )
1443
+ {
1444
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
1445
+ $tokenized = $len ? substr ( $response, $pos, $len ) : "";
1446
+ $pos += $len;
1447
+
1448
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
1449
+ $normalized = $len ? substr ( $response, $pos, $len ) : "";
1450
+ $pos += $len;
1451
+
1452
+ $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
1453
+
1454
+ if ( $hits )
1455
+ {
1456
+ list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
1457
+ $pos += 8;
1458
+ $res [$i]["docs"] = $ndocs;
1459
+ $res [$i]["hits"] = $nhits;
1460
+ }
1461
+
1462
+ if ( $pos > $rlen )
1463
+ {
1464
+ $this->_error = "incomplete reply";
1465
+ $this->_MBPop ();
1466
+ return false;
1467
+ }
1468
+ }
1469
+
1470
+ $this->_MBPop ();
1471
+ return $res;
1472
+ }
1473
+
1474
+ function EscapeString ( $string )
1475
+ {
1476
+ $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' );
1477
+ $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' );
1478
+
1479
+ return str_replace ( $from, $to, $string );
1480
+ }
1481
+
1482
+ /////////////////////////////////////////////////////////////////////////////
1483
+ // attribute updates
1484
+ /////////////////////////////////////////////////////////////////////////////
1485
+
1486
+ /// batch update given attributes in given rows in given indexes
1487
+ /// returns amount of updated documents (0 or more) on success, or -1 on failure
1488
+ function UpdateAttributes ( $index, $attrs, $values, $mva=false )
1489
+ {
1490
+ // verify everything
1491
+ assert ( is_string($index) );
1492
+ assert ( is_bool($mva) );
1493
+
1494
+ assert ( is_array($attrs) );
1495
+ foreach ( $attrs as $attr )
1496
+ assert ( is_string($attr) );
1497
+
1498
+ assert ( is_array($values) );
1499
+ foreach ( $values as $id=>$entry )
1500
+ {
1501
+ assert ( is_numeric($id) );
1502
+ assert ( is_array($entry) );
1503
+ assert ( count($entry)==count($attrs) );
1504
+ foreach ( $entry as $v )
1505
+ {
1506
+ if ( $mva )
1507
+ {
1508
+ assert ( is_array($v) );
1509
+ foreach ( $v as $vv )
1510
+ assert ( is_int($vv) );
1511
+ } else
1512
+ assert ( is_int($v) );
1513
+ }
1514
+ }
1515
+
1516
+ // build request
1517
+ $req = pack ( "N", strlen($index) ) . $index;
1518
+
1519
+ $req .= pack ( "N", count($attrs) );
1520
+ foreach ( $attrs as $attr )
1521
+ {
1522
+ $req .= pack ( "N", strlen($attr) ) . $attr;
1523
+ $req .= pack ( "N", $mva ? 1 : 0 );
1524
+ }
1525
+
1526
+ $req .= pack ( "N", count($values) );
1527
+ foreach ( $values as $id=>$entry )
1528
+ {
1529
+ $req .= sphPackU64 ( $id );
1530
+ foreach ( $entry as $v )
1531
+ {
1532
+ $req .= pack ( "N", $mva ? count($v) : $v );
1533
+ if ( $mva )
1534
+ foreach ( $v as $vv )
1535
+ $req .= pack ( "N", $vv );
1536
+ }
1537
+ }
1538
+
1539
+ // connect, send query, get response
1540
+ if (!( $fp = $this->_Connect() ))
1541
+ return -1;
1542
+
1543
+ $len = strlen($req);
1544
+ $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
1545
+ if ( !$this->_Send ( $fp, $req, $len+8 ) )
1546
+ return -1;
1547
+
1548
+ if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
1549
+ return -1;
1550
+
1551
+ // parse response
1552
+ list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
1553
+ return $updated;
1554
+ }
1555
+
1556
+ /////////////////////////////////////////////////////////////////////////////
1557
+ // persistent connections
1558
+ /////////////////////////////////////////////////////////////////////////////
1559
+
1560
+ function Open()
1561
+ {
1562
+ if ( $this->_socket !== false )
1563
+ {
1564
+ $this->_error = 'already connected';
1565
+ return false;
1566
+ }
1567
+ if ( !$fp = $this->_Connect() )
1568
+ return false;
1569
+
1570
+ // command, command version = 0, body length = 4, body = 1
1571
+ $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 );
1572
+ if ( !$this->_Send ( $fp, $req, 12 ) )
1573
+ return false;
1574
+
1575
+ $this->_socket = $fp;
1576
+ return true;
1577
+ }
1578
+
1579
+ function Close()
1580
+ {
1581
+ if ( $this->_socket === false )
1582
+ {
1583
+ $this->_error = 'not connected';
1584
+ return false;
1585
+ }
1586
+
1587
+ fclose ( $this->_socket );
1588
+ $this->_socket = false;
1589
+
1590
+ return true;
1591
+ }
1592
+
1593
+ //////////////////////////////////////////////////////////////////////////
1594
+ // status
1595
+ //////////////////////////////////////////////////////////////////////////
1596
+
1597
+ function Status ()
1598
+ {
1599
+ $this->_MBPush ();
1600
+ if (!( $fp = $this->_Connect() ))
1601
+ {
1602
+ $this->_MBPop();
1603
+ return false;
1604
+ }
1605
+
1606
+ $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1
1607
+ if ( !( $this->_Send ( $fp, $req, 12 ) ) ||
1608
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) )
1609
+ {
1610
+ $this->_MBPop ();
1611
+ return false;
1612
+ }
1613
+
1614
+ $res = substr ( $response, 4 ); // just ignore length, error handling, etc
1615
+ $p = 0;
1616
+ list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
1617
+
1618
+ $res = array();
1619
+ for ( $i=0; $i<$rows; $i++ )
1620
+ for ( $j=0; $j<$cols; $j++ )
1621
+ {
1622
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1623
+ $res[$i][] = substr ( $response, $p, $len ); $p += $len;
1624
+ }
1625
+
1626
+ $this->_MBPop ();
1627
+ return $res;
1628
+ }
1629
+ }
1630
+
1631
+ //
1632
+ // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
1633
+ //