riddle 0.9.8.1112 → 0.9.8.1198

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. data/README +17 -4
  2. data/lib/riddle.rb +4 -2
  3. data/lib/riddle/client.rb +51 -12
  4. data/spec/functional/keywords_spec.rb +40 -0
  5. data/spec/unit/client_spec.rb +26 -0
  6. metadata +3 -61
  7. data/spec/fixtures/data/anchor.bin +0 -0
  8. data/spec/fixtures/data/any.bin +0 -0
  9. data/spec/fixtures/data/boolean.bin +0 -0
  10. data/spec/fixtures/data/distinct.bin +0 -0
  11. data/spec/fixtures/data/field_weights.bin +0 -0
  12. data/spec/fixtures/data/filter.bin +0 -0
  13. data/spec/fixtures/data/filter_array.bin +0 -0
  14. data/spec/fixtures/data/filter_array_exclude.bin +0 -0
  15. data/spec/fixtures/data/filter_floats.bin +0 -0
  16. data/spec/fixtures/data/filter_floats_exclude.bin +0 -0
  17. data/spec/fixtures/data/filter_floats_range.bin +0 -0
  18. data/spec/fixtures/data/filter_range.bin +0 -0
  19. data/spec/fixtures/data/filter_range_exclude.bin +0 -0
  20. data/spec/fixtures/data/group.bin +0 -0
  21. data/spec/fixtures/data/index.bin +0 -0
  22. data/spec/fixtures/data/index_weights.bin +0 -0
  23. data/spec/fixtures/data/phrase.bin +0 -0
  24. data/spec/fixtures/data/rank_mode.bin +0 -0
  25. data/spec/fixtures/data/simple.bin +0 -0
  26. data/spec/fixtures/data/sort.bin +0 -0
  27. data/spec/fixtures/data/update_simple.bin +0 -0
  28. data/spec/fixtures/data/weights.bin +0 -0
  29. data/spec/fixtures/data_generator.php +0 -130
  30. data/spec/fixtures/sphinx/configuration.erb +0 -38
  31. data/spec/fixtures/sphinx/people.old.spa +0 -0
  32. data/spec/fixtures/sphinx/people.old.spd +0 -0
  33. data/spec/fixtures/sphinx/people.old.sph +0 -0
  34. data/spec/fixtures/sphinx/people.old.spi +0 -0
  35. data/spec/fixtures/sphinx/people.old.spm +0 -0
  36. data/spec/fixtures/sphinx/people.old.spp +0 -0
  37. data/spec/fixtures/sphinx/people.spa +0 -0
  38. data/spec/fixtures/sphinx/people.spd +0 -0
  39. data/spec/fixtures/sphinx/people.sph +0 -0
  40. data/spec/fixtures/sphinx/people.spi +0 -0
  41. data/spec/fixtures/sphinx/people.spm +0 -0
  42. data/spec/fixtures/sphinx/people.spp +0 -0
  43. data/spec/fixtures/sphinx/searchd.log +0 -4732
  44. data/spec/fixtures/sphinx/searchd.query.log +0 -783
  45. data/spec/fixtures/sphinx/spec.conf +0 -38
  46. data/spec/fixtures/sphinxapi.php +0 -1066
  47. data/spec/fixtures/sql/conf.example.yml +0 -3
  48. data/spec/fixtures/sql/conf.yml +0 -3
  49. data/spec/fixtures/sql/data.sql +0 -25000
  50. data/spec/fixtures/sql/structure.sql +0 -16
  51. data/spec/spec_helper.rb +0 -26
  52. data/spec/sphinx_helper.rb +0 -92
@@ -1,38 +0,0 @@
1
- indexer
2
- {
3
- mem_limit = 64M
4
- }
5
-
6
- searchd
7
- {
8
- port = 3313
9
- log = /Users/pat/Code/ruby/riddle/spec/fixtures/sphinx/searchd.log
10
- query_log = /Users/pat/Code/ruby/riddle/spec/fixtures/sphinx/searchd.query.log
11
- read_timeout = 5
12
- max_children = 30
13
- pid_file = /Users/pat/Code/ruby/riddle/spec/fixtures/sphinx/searchd.pid
14
- }
15
-
16
- source peoples
17
- {
18
- type = mysql
19
- sql_host = localhost
20
- sql_user = riddle
21
- sql_pass = wossname
22
- sql_db = riddle_sphinx_spec
23
-
24
- sql_query = SELECT id, first_name, middle_initial, last_name, gender, street_address, city, state, postcode, email, UNIX_TIMESTAMP(birthday) AS birthday FROM people WHERE id >= $start AND id <= $end
25
- sql_query_range = SELECT MIN(id), MAX(id) FROM people
26
- sql_query_info = SELECT * FROM people WHERE id = $id
27
- sql_date_column = birthday
28
- }
29
-
30
- index people
31
- {
32
- source = peoples
33
- morphology = stem_en
34
- path = /Users/pat/Code/ruby/riddle/spec/fixtures/sphinx/people
35
- charset_type = utf-8
36
- enable_star = 1
37
- min_prefix_len = 1
38
- }
@@ -1,1066 +0,0 @@
1
- <?php
2
-
3
- //
4
- // $Id: sphinxapi.php 1103 2008-01-24 18:42:57Z shodan $
5
- //
6
-
7
- //
8
- // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
9
- //
10
- // This program is free software; you can redistribute it and/or modify
11
- // it under the terms of the GNU General Public License. You should have
12
- // received a copy of the GPL license along with this program; if you
13
- // did not, you can find it at http://www.gnu.org/
14
- //
15
-
16
- /////////////////////////////////////////////////////////////////////////////
17
- // PHP version of Sphinx searchd client (PHP API)
18
- /////////////////////////////////////////////////////////////////////////////
19
-
20
- /// known searchd commands
21
- define ( "SEARCHD_COMMAND_SEARCH", 0 );
22
- define ( "SEARCHD_COMMAND_EXCERPT", 1 );
23
- define ( "SEARCHD_COMMAND_UPDATE", 2 );
24
-
25
- /// current client-side command implementation versions
26
- define ( "VER_COMMAND_SEARCH", 0x112 );
27
- define ( "VER_COMMAND_EXCERPT", 0x100 );
28
- define ( "VER_COMMAND_UPDATE", 0x101 );
29
-
30
- /// known searchd status codes
31
- define ( "SEARCHD_OK", 0 );
32
- define ( "SEARCHD_ERROR", 1 );
33
- define ( "SEARCHD_RETRY", 2 );
34
- define ( "SEARCHD_WARNING", 3 );
35
-
36
- /// known match modes
37
- define ( "SPH_MATCH_ALL", 0 );
38
- define ( "SPH_MATCH_ANY", 1 );
39
- define ( "SPH_MATCH_PHRASE", 2 );
40
- define ( "SPH_MATCH_BOOLEAN", 3 );
41
- define ( "SPH_MATCH_EXTENDED", 4 );
42
- define ( "SPH_MATCH_FULLSCAN", 5 );
43
- define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
44
-
45
- /// known ranking modes (ext2 only)
46
- define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
47
- define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
48
- define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
49
- define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
50
-
51
- /// known sort modes
52
- define ( "SPH_SORT_RELEVANCE", 0 );
53
- define ( "SPH_SORT_ATTR_DESC", 1 );
54
- define ( "SPH_SORT_ATTR_ASC", 2 );
55
- define ( "SPH_SORT_TIME_SEGMENTS", 3 );
56
- define ( "SPH_SORT_EXTENDED", 4 );
57
- define ( "SPH_SORT_EXPR", 5 );
58
-
59
- /// known filter types
60
- define ( "SPH_FILTER_VALUES", 0 );
61
- define ( "SPH_FILTER_RANGE", 1 );
62
- define ( "SPH_FILTER_FLOATRANGE", 2 );
63
-
64
- /// known attribute types
65
- define ( "SPH_ATTR_INTEGER", 1 );
66
- define ( "SPH_ATTR_TIMESTAMP", 2 );
67
- define ( "SPH_ATTR_ORDINAL", 3 );
68
- define ( "SPH_ATTR_BOOL", 4 );
69
- define ( "SPH_ATTR_FLOAT", 5 );
70
- define ( "SPH_ATTR_MULTI", 0x40000000 );
71
-
72
- /// known grouping functions
73
- define ( "SPH_GROUPBY_DAY", 0 );
74
- define ( "SPH_GROUPBY_WEEK", 1 );
75
- define ( "SPH_GROUPBY_MONTH", 2 );
76
- define ( "SPH_GROUPBY_YEAR", 3 );
77
- define ( "SPH_GROUPBY_ATTR", 4 );
78
- define ( "SPH_GROUPBY_ATTRPAIR", 5 );
79
-
80
-
81
- /// portably pack numeric to 64 unsigned bits, network order
82
- function sphPack64 ( $v )
83
- {
84
- assert ( is_numeric($v) );
85
-
86
- // x64 route
87
- if ( PHP_INT_SIZE>=8 )
88
- {
89
- $i = (int)$v;
90
- return pack ( "NN", $i>>32, $i&((1<<32)-1) );
91
- }
92
-
93
- // x32 route, bcmath
94
- $x = "4294967296";
95
- if ( function_exists("bcmul") )
96
- {
97
- $h = bcdiv ( $v, $x, 0 );
98
- $l = bcmod ( $v, $x );
99
- return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
100
- }
101
-
102
- // x32 route, 15 or less decimal digits
103
- // we can use float, because its actually double and has 52 precision bits
104
- if ( strlen($v)<=15 )
105
- {
106
- $f = (float)$v;
107
- $h = (int)($f/$x);
108
- $l = (int)($f-$x*$h);
109
- return pack ( "NN", $h, $l );
110
- }
111
-
112
- // x32 route, 16 or more decimal digits
113
- // well, let me know if you *really* need this
114
- die ( "INTERNAL ERROR: packing more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
115
- }
116
-
117
-
118
- /// portably unpack 64 unsigned bits, network order to numeric
119
- function sphUnpack64 ( $v )
120
- {
121
- list($h,$l) = array_values ( unpack ( "N*N*", $v ) );
122
-
123
- // x64 route
124
- if ( PHP_INT_SIZE>=8 )
125
- {
126
- if ( $h<0 ) $h += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
127
- if ( $l<0 ) $l += (1<<32);
128
- return ($h<<32) + $l;
129
- }
130
-
131
- // x32 route
132
- $h = sprintf ( "%u", $h );
133
- $l = sprintf ( "%u", $l );
134
- $x = "4294967296";
135
-
136
- // bcmath
137
- if ( function_exists("bcmul") )
138
- return bcadd ( $l, bcmul ( $x, $h ) );
139
-
140
- // no bcmath, 15 or less decimal digits
141
- // we can use float, because its actually double and has 52 precision bits
142
- if ( $h<1048576 )
143
- {
144
- $f = ((float)$h)*$x + (float)$l;
145
- return sprintf ( "%.0f", $f ); // builtin conversion is only about 39-40 bits precise!
146
- }
147
-
148
- // x32 route, 16 or more decimal digits
149
- // well, let me know if you *really* need this
150
- die ( "INTERNAL ERROR: unpacking more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
151
- }
152
-
153
-
154
- /// sphinx searchd client class
155
- class SphinxClient
156
- {
157
- var $_host; ///< searchd host (default is "localhost")
158
- var $_port; ///< searchd port (default is 3312)
159
- var $_offset; ///< how many records to seek from result-set start (default is 0)
160
- var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
161
- var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
162
- var $_weights; ///< per-field weights (default is 1 for all fields)
163
- var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
164
- var $_sortby; ///< attribute to sort by (defualt is "")
165
- var $_min_id; ///< min ID to match (default is 0, which means no limit)
166
- var $_max_id; ///< max ID to match (default is 0, which means no limit)
167
- var $_filters; ///< search filters
168
- var $_groupby; ///< group-by attribute name
169
- var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
170
- var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
171
- var $_groupdistinct;///< group-by count-distinct attribute
172
- var $_maxmatches; ///< max matches to retrieve
173
- var $_cutoff; ///< cutoff to stop searching at (default is 0)
174
- var $_retrycount; ///< distributed retries count
175
- var $_retrydelay; ///< distributed retries delay
176
- var $_anchor; ///< geographical anchor point
177
- var $_indexweights; ///< per-index weights
178
- var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
179
- var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
180
- var $_fieldweights; ///< per-field-name weights
181
-
182
- var $_error; ///< last error message
183
- var $_warning; ///< last warning message
184
-
185
- var $_reqs; ///< requests array for multi-query
186
- var $_mbenc; ///< stored mbstring encoding
187
- var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
188
-
189
- /////////////////////////////////////////////////////////////////////////////
190
- // common stuff
191
- /////////////////////////////////////////////////////////////////////////////
192
-
193
- /// create a new client object and fill defaults
194
- function SphinxClient ()
195
- {
196
- // per-client-object settings
197
- $this->_host = "localhost";
198
- $this->_port = 3312;
199
-
200
- // per-query settings
201
- $this->_offset = 0;
202
- $this->_limit = 20;
203
- $this->_mode = SPH_MATCH_ALL;
204
- $this->_weights = array ();
205
- $this->_sort = SPH_SORT_RELEVANCE;
206
- $this->_sortby = "";
207
- $this->_min_id = 0;
208
- $this->_max_id = 0;
209
- $this->_filters = array ();
210
- $this->_groupby = "";
211
- $this->_groupfunc = SPH_GROUPBY_DAY;
212
- $this->_groupsort = "@group desc";
213
- $this->_groupdistinct= "";
214
- $this->_maxmatches = 1000;
215
- $this->_cutoff = 0;
216
- $this->_retrycount = 0;
217
- $this->_retrydelay = 0;
218
- $this->_anchor = array ();
219
- $this->_indexweights= array ();
220
- $this->_ranker = SPH_RANK_PROXIMITY_BM25;
221
- $this->_maxquerytime= 0;
222
- $this->_fieldweights= array();
223
-
224
- $this->_error = ""; // per-reply fields (for single-query case)
225
- $this->_warning = "";
226
- $this->_reqs = array (); // requests storage (for multi-query case)
227
- $this->_mbenc = "";
228
- $this->_arrayresult = false;
229
- }
230
-
231
- /// get last error message (string)
232
- function GetLastError ()
233
- {
234
- return $this->_error;
235
- }
236
-
237
- /// get last warning message (string)
238
- function GetLastWarning ()
239
- {
240
- return $this->_warning;
241
- }
242
-
243
- /// set searchd host name (string) and port (integer)
244
- function SetServer ( $host, $port )
245
- {
246
- assert ( is_string($host) );
247
- assert ( is_int($port) );
248
- $this->_host = $host;
249
- $this->_port = $port;
250
- }
251
-
252
- /////////////////////////////////////////////////////////////////////////////
253
-
254
- /// enter mbstring workaround mode
255
- function _MBPush ()
256
- {
257
- $this->_mbenc = "";
258
- if ( ini_get ( "mbstring.func_overload" ) & 2 )
259
- {
260
- $this->_mbenc = mb_internal_encoding();
261
- mb_internal_encoding ( "latin1" );
262
- }
263
- }
264
-
265
- /// leave mbstring workaround mode
266
- function _MBPop ()
267
- {
268
- if ( $this->_mbenc )
269
- mb_internal_encoding ( $this->_mbenc );
270
- }
271
-
272
- /// connect to searchd server
273
- function _Connect ()
274
- {
275
- if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
276
- {
277
- $this->_error = "connection to {$this->_host}:{$this->_port} failed";
278
- return false;
279
- }
280
-
281
- // check version
282
- list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
283
- $v = (int)$v;
284
- if ( $v<1 )
285
- {
286
- fclose ( $fp );
287
- $this->_error = "expected searchd protocol version 1+, got version '$v'";
288
- return false;
289
- }
290
-
291
- // all ok, send my version
292
- fwrite ( $fp, pack ( "N", 1 ) );
293
- return $fp;
294
- }
295
-
296
- /// get and check response packet from searchd server
297
- function _GetResponse ( $fp, $client_ver )
298
- {
299
- $response = "";
300
- $len = 0;
301
-
302
- $header = fread ( $fp, 8 );
303
- if ( strlen($header)==8 )
304
- {
305
- list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
306
- $left = $len;
307
- while ( $left>0 && !feof($fp) )
308
- {
309
- $chunk = fread ( $fp, $left );
310
- if ( $chunk )
311
- {
312
- $response .= $chunk;
313
- $left -= strlen($chunk);
314
- }
315
- }
316
- }
317
- fclose ( $fp );
318
-
319
- // check response
320
- $read = strlen ( $response );
321
- if ( !$response || $read!=$len )
322
- {
323
- $this->_error = $len
324
- ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
325
- : "received zero-sized searchd response";
326
- return false;
327
- }
328
-
329
- // check status
330
- if ( $status==SEARCHD_WARNING )
331
- {
332
- list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
333
- $this->_warning = substr ( $response, 4, $wlen );
334
- return substr ( $response, 4+$wlen );
335
- }
336
- if ( $status==SEARCHD_ERROR )
337
- {
338
- $this->_error = "searchd error: " . substr ( $response, 4 );
339
- return false;
340
- }
341
- if ( $status==SEARCHD_RETRY )
342
- {
343
- $this->_error = "temporary searchd error: " . substr ( $response, 4 );
344
- return false;
345
- }
346
- if ( $status!=SEARCHD_OK )
347
- {
348
- $this->_error = "unknown status code '$status'";
349
- return false;
350
- }
351
-
352
- // check version
353
- if ( $ver<$client_ver )
354
- {
355
- $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
356
- $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
357
- }
358
-
359
- return $response;
360
- }
361
-
362
- /////////////////////////////////////////////////////////////////////////////
363
- // searching
364
- /////////////////////////////////////////////////////////////////////////////
365
-
366
- /// set offset and count into result set,
367
- /// and optionally set max-matches and cutoff limits
368
- function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
369
- {
370
- assert ( is_int($offset) );
371
- assert ( is_int($limit) );
372
- assert ( $offset>=0 );
373
- assert ( $limit>0 );
374
- assert ( $max>=0 );
375
- $this->_offset = $offset;
376
- $this->_limit = $limit;
377
- if ( $max>0 )
378
- $this->_maxmatches = $max;
379
- if ( $cutoff>0 )
380
- $this->_cutoff = $cutoff;
381
- }
382
-
383
- /// set maximum query time, in milliseconds, per-index
384
- /// integer, 0 means "do not limit"
385
- function SetMaxQueryTime ( $max )
386
- {
387
- assert ( is_int($max) );
388
- assert ( $max>=0 );
389
- $this->_maxquerytime = $max;
390
- }
391
-
392
- /// set matching mode
393
- function SetMatchMode ( $mode )
394
- {
395
- assert ( $mode==SPH_MATCH_ALL
396
- || $mode==SPH_MATCH_ANY
397
- || $mode==SPH_MATCH_PHRASE
398
- || $mode==SPH_MATCH_BOOLEAN
399
- || $mode==SPH_MATCH_EXTENDED
400
- || $mode==SPH_MATCH_EXTENDED2 );
401
- $this->_mode = $mode;
402
- }
403
-
404
- /// set ranking mode
405
- function SetRankingMode ( $ranker )
406
- {
407
- assert ( $ranker==SPH_RANK_PROXIMITY_BM25
408
- || $ranker==SPH_RANK_BM25
409
- || $ranker==SPH_RANK_NONE
410
- || $ranker==SPH_RANK_WORDCOUNT );
411
- $this->_ranker = $ranker;
412
- }
413
-
414
- /// set matches sorting mode
415
- function SetSortMode ( $mode, $sortby="" )
416
- {
417
- assert (
418
- $mode==SPH_SORT_RELEVANCE ||
419
- $mode==SPH_SORT_ATTR_DESC ||
420
- $mode==SPH_SORT_ATTR_ASC ||
421
- $mode==SPH_SORT_TIME_SEGMENTS ||
422
- $mode==SPH_SORT_EXTENDED ||
423
- $mode==SPH_SORT_EXPR );
424
- assert ( is_string($sortby) );
425
- assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
426
-
427
- $this->_sort = $mode;
428
- $this->_sortby = $sortby;
429
- }
430
-
431
- /// bind per-field weights by order
432
- /// DEPRECATED; use SetFieldWeights() instead
433
- function SetWeights ( $weights )
434
- {
435
- assert ( is_array($weights) );
436
- foreach ( $weights as $weight )
437
- assert ( is_int($weight) );
438
-
439
- $this->_weights = $weights;
440
- }
441
-
442
- /// bind per-field weights by name
443
- function SetFieldWeights ( $weights )
444
- {
445
- assert ( is_array($weights) );
446
- foreach ( $weights as $name=>$weight )
447
- {
448
- assert ( is_string($name) );
449
- assert ( is_int($weight) );
450
- }
451
- $this->_fieldweights = $weights;
452
- }
453
-
454
- /// bind per-index weights by name
455
- function SetIndexWeights ( $weights )
456
- {
457
- assert ( is_array($weights) );
458
- foreach ( $weights as $index=>$weight )
459
- {
460
- assert ( is_string($index) );
461
- assert ( is_int($weight) );
462
- }
463
- $this->_indexweights = $weights;
464
- }
465
-
466
- /// set IDs range to match
467
- /// only match records if document ID is beetwen $min and $max (inclusive)
468
- function SetIDRange ( $min, $max )
469
- {
470
- assert ( is_numeric($min) );
471
- assert ( is_numeric($max) );
472
- assert ( $min<=$max );
473
- $this->_min_id = $min;
474
- $this->_max_id = $max;
475
- }
476
-
477
- /// set values set filter
478
- /// only match records where $attribute value is in given set
479
- function SetFilter ( $attribute, $values, $exclude=false )
480
- {
481
- assert ( is_string($attribute) );
482
- assert ( is_array($values) );
483
- assert ( count($values) );
484
-
485
- if ( is_array($values) && count($values) )
486
- {
487
- foreach ( $values as $value )
488
- assert ( is_numeric($value) );
489
-
490
- $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
491
- }
492
- }
493
-
494
- /// set range filter
495
- /// only match records if $attribute value is beetwen $min and $max (inclusive)
496
- function SetFilterRange ( $attribute, $min, $max, $exclude=false )
497
- {
498
- assert ( is_string($attribute) );
499
- assert ( is_int($min) );
500
- assert ( is_int($max) );
501
- assert ( $min<=$max );
502
-
503
- $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
504
- }
505
-
506
- /// set float range filter
507
- /// only match records if $attribute value is beetwen $min and $max (inclusive)
508
- function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
509
- {
510
- assert ( is_string($attribute) );
511
- assert ( is_float($min) );
512
- assert ( is_float($max) );
513
- assert ( $min<=$max );
514
-
515
- $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
516
- }
517
-
518
- /// setup anchor point for geosphere distance calculations
519
- /// required to use @geodist in filters and sorting
520
- /// latitude and longitude must be in radians
521
- function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
522
- {
523
- assert ( is_string($attrlat) );
524
- assert ( is_string($attrlong) );
525
- assert ( is_float($lat) );
526
- assert ( is_float($long) );
527
-
528
- $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
529
- }
530
-
531
- /// set grouping attribute and function
532
- function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
533
- {
534
- assert ( is_string($attribute) );
535
- assert ( is_string($groupsort) );
536
- assert ( $func==SPH_GROUPBY_DAY
537
- || $func==SPH_GROUPBY_WEEK
538
- || $func==SPH_GROUPBY_MONTH
539
- || $func==SPH_GROUPBY_YEAR
540
- || $func==SPH_GROUPBY_ATTR
541
- || $func==SPH_GROUPBY_ATTRPAIR );
542
-
543
- $this->_groupby = $attribute;
544
- $this->_groupfunc = $func;
545
- $this->_groupsort = $groupsort;
546
- }
547
-
548
- /// set count-distinct attribute for group-by queries
549
- function SetGroupDistinct ( $attribute )
550
- {
551
- assert ( is_string($attribute) );
552
- $this->_groupdistinct = $attribute;
553
- }
554
-
555
- /// set distributed retries count and delay
556
- function SetRetries ( $count, $delay=0 )
557
- {
558
- assert ( is_int($count) && $count>=0 );
559
- assert ( is_int($delay) && $delay>=0 );
560
- $this->_retrycount = $count;
561
- $this->_retrydelay = $delay;
562
- }
563
-
564
- /// set result set format (hash or array; hash by default)
565
- /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
566
- function SetArrayResult ( $arrayresult )
567
- {
568
- assert ( is_bool($arrayresult) );
569
- $this->_arrayresult = $arrayresult;
570
- }
571
-
572
- //////////////////////////////////////////////////////////////////////////////
573
-
574
- /// clear all filters (for multi-queries)
575
- function ResetFilters ()
576
- {
577
- $this->_filters = array();
578
- $this->_anchor = array();
579
- }
580
-
581
- /// clear groupby settings (for multi-queries)
582
- function ResetGroupBy ()
583
- {
584
- $this->_groupby = "";
585
- $this->_groupfunc = SPH_GROUPBY_DAY;
586
- $this->_groupsort = "@group desc";
587
- $this->_groupdistinct= "";
588
- }
589
-
590
- //////////////////////////////////////////////////////////////////////////////
591
-
592
- /// connect to searchd server, run given search query through given indexes,
593
- /// and return the search results
594
- function Query ( $query, $index="*" )
595
- {
596
- assert ( empty($this->_reqs) );
597
-
598
- $this->AddQuery ( $query, $index );
599
- $results = $this->RunQueries ();
600
-
601
- if ( !is_array($results) )
602
- return false; // probably network error; error message should be already filled
603
-
604
- $this->_error = $results[0]["error"];
605
- $this->_warning = $results[0]["warning"];
606
- if ( $results[0]["status"]==SEARCHD_ERROR )
607
- return false;
608
- else
609
- return $results[0];
610
- }
611
-
612
- /// helper to pack floats in network byte order
613
- function _PackFloat ( $f )
614
- {
615
- $t1 = pack ( "f", $f ); // machine order
616
- list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
617
- return pack ( "N", $t2 );
618
- }
619
-
620
- /// add query to multi-query batch
621
- /// returns index into results array from RunQueries() call
622
- function AddQuery ( $query, $index="*" )
623
- {
624
- // mbstring workaround
625
- $this->_MBPush ();
626
-
627
- // build request
628
- $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
629
- $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
630
- $req .= pack ( "N", strlen($query) ) . $query; // query itself
631
- $req .= pack ( "N", count($this->_weights) ); // weights
632
- foreach ( $this->_weights as $weight )
633
- $req .= pack ( "N", (int)$weight );
634
- $req .= pack ( "N", strlen($index) ) . $index; // indexes
635
- $req .= pack ( "N", 1 ); // id64 range marker
636
- $req .= sphPack64 ( $this->_min_id ) . sphPack64 ( $this->_max_id ); // id64 range
637
-
638
- // filters
639
- $req .= pack ( "N", count($this->_filters) );
640
- foreach ( $this->_filters as $filter )
641
- {
642
- $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
643
- $req .= pack ( "N", $filter["type"] );
644
- switch ( $filter["type"] )
645
- {
646
- case SPH_FILTER_VALUES:
647
- $req .= pack ( "N", count($filter["values"]) );
648
- foreach ( $filter["values"] as $value )
649
- $req .= pack ( "N", floatval($value) ); // this uberhack is to workaround 32bit signed int limit on x32 platforms
650
- break;
651
-
652
- case SPH_FILTER_RANGE:
653
- $req .= pack ( "NN", $filter["min"], $filter["max"] );
654
- break;
655
-
656
- case SPH_FILTER_FLOATRANGE:
657
- $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
658
- break;
659
-
660
- default:
661
- assert ( 0 && "internal error: unhandled filter type" );
662
- }
663
- $req .= pack ( "N", $filter["exclude"] );
664
- }
665
-
666
- // group-by clause, max-matches count, group-sort clause, cutoff count
667
- $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
668
- $req .= pack ( "N", $this->_maxmatches );
669
- $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
670
- $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
671
- $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
672
-
673
- // anchor point
674
- if ( empty($this->_anchor) )
675
- {
676
- $req .= pack ( "N", 0 );
677
- } else
678
- {
679
- $a =& $this->_anchor;
680
- $req .= pack ( "N", 1 );
681
- $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
682
- $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
683
- $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
684
- }
685
-
686
- // per-index weights
687
- $req .= pack ( "N", count($this->_indexweights) );
688
- foreach ( $this->_indexweights as $idx=>$weight )
689
- $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
690
-
691
- // max query time
692
- $req .= pack ( "N", $this->_maxquerytime );
693
-
694
- // per-field weights
695
- $req .= pack ( "N", count($this->_fieldweights) );
696
- foreach ( $this->_fieldweights as $field=>$weight )
697
- $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
698
-
699
- // mbstring workaround
700
- $this->_MBPop ();
701
-
702
- // store request to requests array
703
- $this->_reqs[] = $req;
704
- return count($this->_reqs)-1;
705
- }
706
-
707
- /// connect to searchd, run queries batch, and return an array of result sets
708
- function RunQueries ()
709
- {
710
- if ( empty($this->_reqs) )
711
- {
712
- $this->_error = "no queries defined, issue AddQuery() first";
713
- return false;
714
- }
715
-
716
- // mbstring workaround
717
- $this->_MBPush ();
718
-
719
- if (!( $fp = $this->_Connect() ))
720
- {
721
- $this->_MBPop ();
722
- return false;
723
- }
724
-
725
- ////////////////////////////
726
- // send query, get response
727
- ////////////////////////////
728
-
729
- $nreqs = count($this->_reqs);
730
- $req = join ( "", $this->_reqs );
731
- $len = 4+strlen($req);
732
- $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
733
-
734
- fwrite ( $fp, $req, $len+8 );
735
- if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
736
- {
737
- $this->_MBPop ();
738
- return false;
739
- }
740
-
741
- $this->_reqs = array ();
742
-
743
- //////////////////
744
- // parse response
745
- //////////////////
746
-
747
- $p = 0; // current position
748
- $max = strlen($response); // max position for checks, to protect against broken responses
749
-
750
- $results = array ();
751
- for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
752
- {
753
- $results[] = array();
754
- $result =& $results[$ires];
755
-
756
- $result["error"] = "";
757
- $result["warning"] = "";
758
-
759
- // extract status
760
- list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
761
- $result["status"] = $status;
762
- if ( $status!=SEARCHD_OK )
763
- {
764
- list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
765
- $message = substr ( $response, $p, $len ); $p += $len;
766
-
767
- if ( $status==SEARCHD_WARNING )
768
- {
769
- $result["warning"] = $message;
770
- } else
771
- {
772
- $result["error"] = $message;
773
- continue;
774
- }
775
- }
776
-
777
- // read schema
778
- $fields = array ();
779
- $attrs = array ();
780
-
781
- list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
782
- while ( $nfields-->0 && $p<$max )
783
- {
784
- list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
785
- $fields[] = substr ( $response, $p, $len ); $p += $len;
786
- }
787
- $result["fields"] = $fields;
788
-
789
- list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
790
- while ( $nattrs-->0 && $p<$max )
791
- {
792
- list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
793
- $attr = substr ( $response, $p, $len ); $p += $len;
794
- list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
795
- $attrs[$attr] = $type;
796
- }
797
- $result["attrs"] = $attrs;
798
-
799
- // read match count
800
- list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
801
- list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
802
-
803
- // read matches
804
- $idx = -1;
805
- while ( $count-->0 && $p<$max )
806
- {
807
- // index into result array
808
- $idx++;
809
-
810
- // parse document id and weight
811
- if ( $id64 )
812
- {
813
- $doc = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
814
- list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
815
- } else
816
- {
817
- list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
818
- substr ( $response, $p, 8 ) ) );
819
- $p += 8;
820
- $doc = sprintf ( "%u", $doc ); // workaround for php signed/unsigned braindamage
821
- }
822
- $weight = sprintf ( "%u", $weight );
823
-
824
- // create match entry
825
- if ( $this->_arrayresult )
826
- $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
827
- else
828
- $result["matches"][$doc]["weight"] = $weight;
829
-
830
- // parse and create attributes
831
- $attrvals = array ();
832
- foreach ( $attrs as $attr=>$type )
833
- {
834
- // handle floats
835
- if ( $type==SPH_ATTR_FLOAT )
836
- {
837
- list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
838
- list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
839
- $attrvals[$attr] = $fval;
840
- continue;
841
- }
842
-
843
- // handle everything else as unsigned ints
844
- list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
845
- if ( $type & SPH_ATTR_MULTI )
846
- {
847
- $attrvals[$attr] = array ();
848
- $nvalues = $val;
849
- while ( $nvalues-->0 && $p<$max )
850
- {
851
- list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
852
- $attrvals[$attr][] = sprintf ( "%u", $val );
853
- }
854
- } else
855
- {
856
- $attrvals[$attr] = sprintf ( "%u", $val );
857
- }
858
- }
859
-
860
- if ( $this->_arrayresult )
861
- $result["matches"][$idx]["attrs"] = $attrvals;
862
- else
863
- $result["matches"][$doc]["attrs"] = $attrvals;
864
- }
865
-
866
- list ( $total, $total_found, $msecs, $words ) =
867
- array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
868
- $result["total"] = sprintf ( "%u", $total );
869
- $result["total_found"] = sprintf ( "%u", $total_found );
870
- $result["time"] = sprintf ( "%.3f", $msecs/1000 );
871
- $p += 16;
872
-
873
- while ( $words-->0 && $p<$max )
874
- {
875
- list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
876
- $word = substr ( $response, $p, $len ); $p += $len;
877
- list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
878
- $result["words"][$word] = array (
879
- "docs"=>sprintf ( "%u", $docs ),
880
- "hits"=>sprintf ( "%u", $hits ) );
881
- }
882
- }
883
-
884
- $this->_MBPop ();
885
- return $results;
886
- }
887
-
888
- /////////////////////////////////////////////////////////////////////////////
889
- // excerpts generation
890
- /////////////////////////////////////////////////////////////////////////////
891
-
892
- /// connect to searchd server, and generate exceprts (snippets)
893
- /// of given documents for given query. returns false on failure,
894
- /// an array of snippets on success
895
- function BuildExcerpts ( $docs, $index, $words, $opts=array() )
896
- {
897
- assert ( is_array($docs) );
898
- assert ( is_string($index) );
899
- assert ( is_string($words) );
900
- assert ( is_array($opts) );
901
-
902
- $this->_MBPush ();
903
-
904
- if (!( $fp = $this->_Connect() ))
905
- {
906
- $this->_MBPop();
907
- return false;
908
- }
909
-
910
- /////////////////
911
- // fixup options
912
- /////////////////
913
-
914
- if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
915
- if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
916
- if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
917
- if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
918
- if ( !isset($opts["around"]) ) $opts["around"] = 5;
919
- if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
920
- if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
921
- if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
922
- if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
923
-
924
- /////////////////
925
- // build request
926
- /////////////////
927
-
928
- // v.1.0 req
929
- $flags = 1; // remove spaces
930
- if ( $opts["exact_phrase"] ) $flags |= 2;
931
- if ( $opts["single_passage"] ) $flags |= 4;
932
- if ( $opts["use_boundaries"] ) $flags |= 8;
933
- if ( $opts["weight_order"] ) $flags |= 16;
934
- $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
935
- $req .= pack ( "N", strlen($index) ) . $index; // req index
936
- $req .= pack ( "N", strlen($words) ) . $words; // req words
937
-
938
- // options
939
- $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
940
- $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
941
- $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
942
- $req .= pack ( "N", (int)$opts["limit"] );
943
- $req .= pack ( "N", (int)$opts["around"] );
944
-
945
- // documents
946
- $req .= pack ( "N", count($docs) );
947
- foreach ( $docs as $doc )
948
- {
949
- assert ( is_string($doc) );
950
- $req .= pack ( "N", strlen($doc) ) . $doc;
951
- }
952
-
953
- ////////////////////////////
954
- // send query, get response
955
- ////////////////////////////
956
-
957
- $len = strlen($req);
958
- $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
959
- $wrote = fwrite ( $fp, $req, $len+8 );
960
- if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
961
- {
962
- $this->_MBPop ();
963
- return false;
964
- }
965
-
966
- //////////////////
967
- // parse response
968
- //////////////////
969
-
970
- $pos = 0;
971
- $res = array ();
972
- $rlen = strlen($response);
973
- for ( $i=0; $i<count($docs); $i++ )
974
- {
975
- list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
976
- $pos += 4;
977
-
978
- if ( $pos+$len > $rlen )
979
- {
980
- $this->_error = "incomplete reply";
981
- $this->_MBPop ();
982
- return false;
983
- }
984
- $res[] = $len ? substr ( $response, $pos, $len ) : "";
985
- $pos += $len;
986
- }
987
-
988
- $this->_MBPop ();
989
- return $res;
990
- }
991
-
992
- /////////////////////////////////////////////////////////////////////////////
993
- // attribute updates
994
- /////////////////////////////////////////////////////////////////////////////
995
-
996
- /// update given attribute values on given documents in given indexes
997
- /// returns amount of updated documents (0 or more) on success, or -1 on failure
998
- function UpdateAttributes ( $index, $attrs, $values )
999
- {
1000
- // verify everything
1001
- assert ( is_string($index) );
1002
-
1003
- assert ( is_array($attrs) );
1004
- foreach ( $attrs as $attr )
1005
- assert ( is_string($attr) );
1006
-
1007
- assert ( is_array($values) );
1008
- foreach ( $values as $id=>$entry )
1009
- {
1010
- assert ( is_numeric($id) );
1011
- assert ( is_array($entry) );
1012
- assert ( count($entry)==count($attrs) );
1013
- foreach ( $entry as $v )
1014
- assert ( is_int($v) );
1015
- }
1016
-
1017
- // build request
1018
- $req = pack ( "N", strlen($index) ) . $index;
1019
-
1020
- $req .= pack ( "N", count($attrs) );
1021
- foreach ( $attrs as $attr )
1022
- $req .= pack ( "N", strlen($attr) ) . $attr;
1023
-
1024
- $req .= pack ( "N", count($values) );
1025
- foreach ( $values as $id=>$entry )
1026
- {
1027
- $req .= sphPack64 ( $id );
1028
- foreach ( $entry as $v )
1029
- $req .= pack ( "N", $v );
1030
- }
1031
-
1032
- // Line for testing Riddle:
1033
- return $req;
1034
-
1035
- // mbstring workaround
1036
- $this->_MBPush ();
1037
-
1038
- // connect, send query, get response
1039
- if (!( $fp = $this->_Connect() ))
1040
- {
1041
- $this->_MBPop ();
1042
- return -1;
1043
- }
1044
-
1045
- $len = strlen($req);
1046
- $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
1047
- fwrite ( $fp, $req, $len+8 );
1048
-
1049
- if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
1050
- {
1051
- $this->_MBPop ();
1052
- return -1;
1053
- }
1054
-
1055
- // parse response
1056
- list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
1057
- $this->_MBPop ();
1058
- return $updated;
1059
- }
1060
- }
1061
-
1062
- //
1063
- // $Id: sphinxapi.php 1103 2008-01-24 18:42:57Z shodan $
1064
- //
1065
-
1066
- ?>