rlibsphinxclient 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/CHANGELOG.rdoc +18 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +151 -0
- data/Rakefile +39 -0
- data/VERSION +1 -0
- data/ext/extconf.rb +20 -0
- data/ext/rlibsphinxclient.i +314 -0
- data/ext/rlibsphinxclient_wrap.c +5931 -0
- data/init.rb +1 -0
- data/lib/sphinx.rb +22 -0
- data/lib/sphinx/client.rb +1070 -0
- data/lib/sphinx/fast_client.rb +184 -0
- data/lib/sphinx/request.rb +49 -0
- data/lib/sphinx/response.rb +69 -0
- data/lib/sphinx/safe_executor.rb +11 -0
- data/lib/sphinx/timeout.rb +9 -0
- data/rlibsphinxclient.gemspec +117 -0
- data/spec/client_response_spec.rb +135 -0
- data/spec/client_spec.rb +548 -0
- data/spec/fixtures/default_search.php +8 -0
- data/spec/fixtures/default_search_index.php +8 -0
- data/spec/fixtures/excerpt_custom.php +11 -0
- data/spec/fixtures/excerpt_default.php +8 -0
- data/spec/fixtures/excerpt_flags.php +11 -0
- data/spec/fixtures/field_weights.php +9 -0
- data/spec/fixtures/filter.php +9 -0
- data/spec/fixtures/filter_exclude.php +9 -0
- data/spec/fixtures/filter_float_range.php +9 -0
- data/spec/fixtures/filter_float_range_exclude.php +9 -0
- data/spec/fixtures/filter_range.php +9 -0
- data/spec/fixtures/filter_range_exclude.php +9 -0
- data/spec/fixtures/filter_ranges.php +10 -0
- data/spec/fixtures/filters.php +10 -0
- data/spec/fixtures/filters_different.php +13 -0
- data/spec/fixtures/geo_anchor.php +9 -0
- data/spec/fixtures/group_by_attr.php +9 -0
- data/spec/fixtures/group_by_attrpair.php +9 -0
- data/spec/fixtures/group_by_day.php +9 -0
- data/spec/fixtures/group_by_day_sort.php +9 -0
- data/spec/fixtures/group_by_month.php +9 -0
- data/spec/fixtures/group_by_week.php +9 -0
- data/spec/fixtures/group_by_year.php +9 -0
- data/spec/fixtures/group_distinct.php +10 -0
- data/spec/fixtures/id_range.php +9 -0
- data/spec/fixtures/id_range64.php +9 -0
- data/spec/fixtures/index_weights.php +9 -0
- data/spec/fixtures/keywords.php +8 -0
- data/spec/fixtures/limits.php +9 -0
- data/spec/fixtures/limits_cutoff.php +9 -0
- data/spec/fixtures/limits_max.php +9 -0
- data/spec/fixtures/limits_max_cutoff.php +9 -0
- data/spec/fixtures/match_all.php +9 -0
- data/spec/fixtures/match_any.php +9 -0
- data/spec/fixtures/match_boolean.php +9 -0
- data/spec/fixtures/match_extended.php +9 -0
- data/spec/fixtures/match_extended2.php +9 -0
- data/spec/fixtures/match_fullscan.php +9 -0
- data/spec/fixtures/match_phrase.php +9 -0
- data/spec/fixtures/max_query_time.php +9 -0
- data/spec/fixtures/miltiple_queries.php +12 -0
- data/spec/fixtures/ranking_bm25.php +9 -0
- data/spec/fixtures/ranking_none.php +9 -0
- data/spec/fixtures/ranking_proximity_bm25.php +9 -0
- data/spec/fixtures/ranking_wordcount.php +9 -0
- data/spec/fixtures/retries.php +9 -0
- data/spec/fixtures/retries_delay.php +9 -0
- data/spec/fixtures/sort_attr_asc.php +9 -0
- data/spec/fixtures/sort_attr_desc.php +9 -0
- data/spec/fixtures/sort_expr.php +9 -0
- data/spec/fixtures/sort_extended.php +9 -0
- data/spec/fixtures/sort_relevance.php +9 -0
- data/spec/fixtures/sort_time_segments.php +9 -0
- data/spec/fixtures/sphinxapi.php +1181 -0
- data/spec/fixtures/update_attributes.php +8 -0
- data/spec/fixtures/weights.php +9 -0
- data/spec/sphinx/sphinx.conf +67 -0
- data/spec/sphinx/sphinx_test.sql +86 -0
- metadata +133 -0
@@ -0,0 +1,1181 @@
|
|
1
|
+
<?php
|
2
|
+
|
3
|
+
//
|
4
|
+
// $Id$
|
5
|
+
//
|
6
|
+
|
7
|
+
//
|
8
|
+
// Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
|
9
|
+
//
|
10
|
+
// This program is free software; you can redistribute it and/or modify
|
11
|
+
// it under the terms of the GNU General Public License. You should have
|
12
|
+
// received a copy of the GPL license along with this program; if you
|
13
|
+
// did not, you can find it at http://www.gnu.org/
|
14
|
+
//
|
15
|
+
|
16
|
+
/////////////////////////////////////////////////////////////////////////////
|
17
|
+
// PHP version of Sphinx searchd client (PHP API)
|
18
|
+
/////////////////////////////////////////////////////////////////////////////
|
19
|
+
|
20
|
+
/// known searchd commands
|
21
|
+
define ( "SEARCHD_COMMAND_SEARCH", 0 );
|
22
|
+
define ( "SEARCHD_COMMAND_EXCERPT", 1 );
|
23
|
+
define ( "SEARCHD_COMMAND_UPDATE", 2 );
|
24
|
+
define ( "SEARCHD_COMMAND_KEYWORDS",3 );
|
25
|
+
|
26
|
+
/// current client-side command implementation versions
|
27
|
+
define ( "VER_COMMAND_SEARCH", 0x113 );
|
28
|
+
define ( "VER_COMMAND_EXCERPT", 0x100 );
|
29
|
+
define ( "VER_COMMAND_UPDATE", 0x101 );
|
30
|
+
define ( "VER_COMMAND_KEYWORDS", 0x100 );
|
31
|
+
|
32
|
+
/// known searchd status codes
|
33
|
+
define ( "SEARCHD_OK", 0 );
|
34
|
+
define ( "SEARCHD_ERROR", 1 );
|
35
|
+
define ( "SEARCHD_RETRY", 2 );
|
36
|
+
define ( "SEARCHD_WARNING", 3 );
|
37
|
+
|
38
|
+
/// known match modes
|
39
|
+
define ( "SPH_MATCH_ALL", 0 );
|
40
|
+
define ( "SPH_MATCH_ANY", 1 );
|
41
|
+
define ( "SPH_MATCH_PHRASE", 2 );
|
42
|
+
define ( "SPH_MATCH_BOOLEAN", 3 );
|
43
|
+
define ( "SPH_MATCH_EXTENDED", 4 );
|
44
|
+
define ( "SPH_MATCH_FULLSCAN", 5 );
|
45
|
+
define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
|
46
|
+
|
47
|
+
/// known ranking modes (ext2 only)
|
48
|
+
define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
|
49
|
+
define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
|
50
|
+
define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
|
51
|
+
define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
|
52
|
+
|
53
|
+
/// known sort modes
|
54
|
+
define ( "SPH_SORT_RELEVANCE", 0 );
|
55
|
+
define ( "SPH_SORT_ATTR_DESC", 1 );
|
56
|
+
define ( "SPH_SORT_ATTR_ASC", 2 );
|
57
|
+
define ( "SPH_SORT_TIME_SEGMENTS", 3 );
|
58
|
+
define ( "SPH_SORT_EXTENDED", 4 );
|
59
|
+
define ( "SPH_SORT_EXPR", 5 );
|
60
|
+
|
61
|
+
/// known filter types
|
62
|
+
define ( "SPH_FILTER_VALUES", 0 );
|
63
|
+
define ( "SPH_FILTER_RANGE", 1 );
|
64
|
+
define ( "SPH_FILTER_FLOATRANGE", 2 );
|
65
|
+
|
66
|
+
/// known attribute types
|
67
|
+
define ( "SPH_ATTR_INTEGER", 1 );
|
68
|
+
define ( "SPH_ATTR_TIMESTAMP", 2 );
|
69
|
+
define ( "SPH_ATTR_ORDINAL", 3 );
|
70
|
+
define ( "SPH_ATTR_BOOL", 4 );
|
71
|
+
define ( "SPH_ATTR_FLOAT", 5 );
|
72
|
+
define ( "SPH_ATTR_MULTI", 0x40000000 );
|
73
|
+
|
74
|
+
/// known grouping functions
|
75
|
+
define ( "SPH_GROUPBY_DAY", 0 );
|
76
|
+
define ( "SPH_GROUPBY_WEEK", 1 );
|
77
|
+
define ( "SPH_GROUPBY_MONTH", 2 );
|
78
|
+
define ( "SPH_GROUPBY_YEAR", 3 );
|
79
|
+
define ( "SPH_GROUPBY_ATTR", 4 );
|
80
|
+
define ( "SPH_GROUPBY_ATTRPAIR", 5 );
|
81
|
+
|
82
|
+
|
83
|
+
/// portably pack numeric to 64 unsigned bits, network order
|
84
|
+
function sphPack64 ( $v )
|
85
|
+
{
|
86
|
+
assert ( is_numeric($v) );
|
87
|
+
|
88
|
+
// x64 route
|
89
|
+
if ( PHP_INT_SIZE>=8 )
|
90
|
+
{
|
91
|
+
$i = (int)$v;
|
92
|
+
return pack ( "NN", $i>>32, $i&((1<<32)-1) );
|
93
|
+
}
|
94
|
+
|
95
|
+
// x32 route, bcmath
|
96
|
+
$x = "4294967296";
|
97
|
+
if ( function_exists("bcmul") )
|
98
|
+
{
|
99
|
+
$h = bcdiv ( $v, $x, 0 );
|
100
|
+
$l = bcmod ( $v, $x );
|
101
|
+
return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
|
102
|
+
}
|
103
|
+
|
104
|
+
// x32 route, 15 or less decimal digits
|
105
|
+
// we can use float, because its actually double and has 52 precision bits
|
106
|
+
if ( strlen($v)<=15 )
|
107
|
+
{
|
108
|
+
$f = (float)$v;
|
109
|
+
$h = (int)($f/$x);
|
110
|
+
$l = (int)($f-$x*$h);
|
111
|
+
return pack ( "NN", $h, $l );
|
112
|
+
}
|
113
|
+
|
114
|
+
// x32 route, 16 or more decimal digits
|
115
|
+
// well, let me know if you *really* need this
|
116
|
+
die ( "INTERNAL ERROR: packing more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
/// portably unpack 64 unsigned bits, network order to numeric
|
121
|
+
function sphUnpack64 ( $v )
|
122
|
+
{
|
123
|
+
list($h,$l) = array_values ( unpack ( "N*N*", $v ) );
|
124
|
+
|
125
|
+
// x64 route
|
126
|
+
if ( PHP_INT_SIZE>=8 )
|
127
|
+
{
|
128
|
+
if ( $h<0 ) $h += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
|
129
|
+
if ( $l<0 ) $l += (1<<32);
|
130
|
+
return ($h<<32) + $l;
|
131
|
+
}
|
132
|
+
|
133
|
+
// x32 route
|
134
|
+
$h = sprintf ( "%u", $h );
|
135
|
+
$l = sprintf ( "%u", $l );
|
136
|
+
$x = "4294967296";
|
137
|
+
|
138
|
+
// bcmath
|
139
|
+
if ( function_exists("bcmul") )
|
140
|
+
return bcadd ( $l, bcmul ( $x, $h ) );
|
141
|
+
|
142
|
+
// no bcmath, 15 or less decimal digits
|
143
|
+
// we can use float, because its actually double and has 52 precision bits
|
144
|
+
if ( $h<1048576 )
|
145
|
+
{
|
146
|
+
$f = ((float)$h)*$x + (float)$l;
|
147
|
+
return sprintf ( "%.0f", $f ); // builtin conversion is only about 39-40 bits precise!
|
148
|
+
}
|
149
|
+
|
150
|
+
// x32 route, 16 or more decimal digits
|
151
|
+
// well, let me know if you *really* need this
|
152
|
+
die ( "INTERNAL ERROR: unpacking more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
|
153
|
+
}
|
154
|
+
|
155
|
+
|
156
|
+
/// sphinx searchd client class
|
157
|
+
class SphinxClient
|
158
|
+
{
|
159
|
+
var $_host; ///< searchd host (default is "localhost")
|
160
|
+
var $_port; ///< searchd port (default is 3312)
|
161
|
+
var $_offset; ///< how many records to seek from result-set start (default is 0)
|
162
|
+
var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
|
163
|
+
var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
|
164
|
+
var $_weights; ///< per-field weights (default is 1 for all fields)
|
165
|
+
var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
|
166
|
+
var $_sortby; ///< attribute to sort by (defualt is "")
|
167
|
+
var $_min_id; ///< min ID to match (default is 0, which means no limit)
|
168
|
+
var $_max_id; ///< max ID to match (default is 0, which means no limit)
|
169
|
+
var $_filters; ///< search filters
|
170
|
+
var $_groupby; ///< group-by attribute name
|
171
|
+
var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
|
172
|
+
var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
|
173
|
+
var $_groupdistinct;///< group-by count-distinct attribute
|
174
|
+
var $_maxmatches; ///< max matches to retrieve
|
175
|
+
var $_cutoff; ///< cutoff to stop searching at (default is 0)
|
176
|
+
var $_retrycount; ///< distributed retries count
|
177
|
+
var $_retrydelay; ///< distributed retries delay
|
178
|
+
var $_anchor; ///< geographical anchor point
|
179
|
+
var $_indexweights; ///< per-index weights
|
180
|
+
var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
|
181
|
+
var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
|
182
|
+
var $_fieldweights; ///< per-field-name weights
|
183
|
+
|
184
|
+
var $_error; ///< last error message
|
185
|
+
var $_warning; ///< last warning message
|
186
|
+
|
187
|
+
var $_reqs; ///< requests array for multi-query
|
188
|
+
var $_mbenc; ///< stored mbstring encoding
|
189
|
+
var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
|
190
|
+
|
191
|
+
/////////////////////////////////////////////////////////////////////////////
|
192
|
+
// common stuff
|
193
|
+
/////////////////////////////////////////////////////////////////////////////
|
194
|
+
|
195
|
+
/// create a new client object and fill defaults
|
196
|
+
function SphinxClient ()
|
197
|
+
{
|
198
|
+
// per-client-object settings
|
199
|
+
$this->_host = "localhost";
|
200
|
+
$this->_port = 3312;
|
201
|
+
|
202
|
+
// per-query settings
|
203
|
+
$this->_offset = 0;
|
204
|
+
$this->_limit = 20;
|
205
|
+
$this->_mode = SPH_MATCH_ALL;
|
206
|
+
$this->_weights = array ();
|
207
|
+
$this->_sort = SPH_SORT_RELEVANCE;
|
208
|
+
$this->_sortby = "";
|
209
|
+
$this->_min_id = 0;
|
210
|
+
$this->_max_id = 0;
|
211
|
+
$this->_filters = array ();
|
212
|
+
$this->_groupby = "";
|
213
|
+
$this->_groupfunc = SPH_GROUPBY_DAY;
|
214
|
+
$this->_groupsort = "@group desc";
|
215
|
+
$this->_groupdistinct= "";
|
216
|
+
$this->_maxmatches = 1000;
|
217
|
+
$this->_cutoff = 0;
|
218
|
+
$this->_retrycount = 0;
|
219
|
+
$this->_retrydelay = 0;
|
220
|
+
$this->_anchor = array ();
|
221
|
+
$this->_indexweights= array ();
|
222
|
+
$this->_ranker = SPH_RANK_PROXIMITY_BM25;
|
223
|
+
$this->_maxquerytime= 0;
|
224
|
+
$this->_fieldweights= array();
|
225
|
+
|
226
|
+
$this->_error = ""; // per-reply fields (for single-query case)
|
227
|
+
$this->_warning = "";
|
228
|
+
$this->_reqs = array (); // requests storage (for multi-query case)
|
229
|
+
$this->_mbenc = "";
|
230
|
+
$this->_arrayresult = false;
|
231
|
+
}
|
232
|
+
|
233
|
+
/// get last error message (string)
|
234
|
+
function GetLastError ()
|
235
|
+
{
|
236
|
+
return $this->_error;
|
237
|
+
}
|
238
|
+
|
239
|
+
/// get last warning message (string)
|
240
|
+
function GetLastWarning ()
|
241
|
+
{
|
242
|
+
return $this->_warning;
|
243
|
+
}
|
244
|
+
|
245
|
+
/// set searchd host name (string) and port (integer)
|
246
|
+
function SetServer ( $host, $port )
|
247
|
+
{
|
248
|
+
assert ( is_string($host) );
|
249
|
+
assert ( is_int($port) );
|
250
|
+
$this->_host = $host;
|
251
|
+
$this->_port = $port;
|
252
|
+
}
|
253
|
+
|
254
|
+
/////////////////////////////////////////////////////////////////////////////
|
255
|
+
|
256
|
+
/// enter mbstring workaround mode
|
257
|
+
function _MBPush ()
|
258
|
+
{
|
259
|
+
$this->_mbenc = "";
|
260
|
+
if ( ini_get ( "mbstring.func_overload" ) & 2 )
|
261
|
+
{
|
262
|
+
$this->_mbenc = mb_internal_encoding();
|
263
|
+
mb_internal_encoding ( "latin1" );
|
264
|
+
}
|
265
|
+
}
|
266
|
+
|
267
|
+
/// leave mbstring workaround mode
|
268
|
+
function _MBPop ()
|
269
|
+
{
|
270
|
+
if ( $this->_mbenc )
|
271
|
+
mb_internal_encoding ( $this->_mbenc );
|
272
|
+
}
|
273
|
+
|
274
|
+
/// connect to searchd server
|
275
|
+
function _Connect ()
|
276
|
+
{
|
277
|
+
return fopen('php://stdout', 'w');
|
278
|
+
}
|
279
|
+
|
280
|
+
function _OldConnect()
|
281
|
+
{
|
282
|
+
if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
|
283
|
+
{
|
284
|
+
$this->_error = "connection to {$this->_host}:{$this->_port} failed";
|
285
|
+
return false;
|
286
|
+
}
|
287
|
+
|
288
|
+
// check version
|
289
|
+
list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
|
290
|
+
$v = (int)$v;
|
291
|
+
if ( $v<1 )
|
292
|
+
{
|
293
|
+
fclose ( $fp );
|
294
|
+
$this->_error = "expected searchd protocol version 1+, got version '$v'";
|
295
|
+
return false;
|
296
|
+
}
|
297
|
+
|
298
|
+
// all ok, send my version
|
299
|
+
fwrite ( $fp, pack ( "N", 1 ) );
|
300
|
+
return $fp;
|
301
|
+
}
|
302
|
+
|
303
|
+
/// get and check response packet from searchd server
|
304
|
+
function _GetResponse ( $fp, $client_ver )
|
305
|
+
{
|
306
|
+
return false;
|
307
|
+
}
|
308
|
+
|
309
|
+
function _OldGetResponse ( $fp, $client_ver )
|
310
|
+
{
|
311
|
+
$response = "";
|
312
|
+
$len = 0;
|
313
|
+
|
314
|
+
$header = fread ( $fp, 8 );
|
315
|
+
if ( strlen($header)==8 )
|
316
|
+
{
|
317
|
+
list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
|
318
|
+
$left = $len;
|
319
|
+
while ( $left>0 && !feof($fp) )
|
320
|
+
{
|
321
|
+
$chunk = fread ( $fp, $left );
|
322
|
+
if ( $chunk )
|
323
|
+
{
|
324
|
+
$response .= $chunk;
|
325
|
+
$left -= strlen($chunk);
|
326
|
+
}
|
327
|
+
}
|
328
|
+
}
|
329
|
+
fclose ( $fp );
|
330
|
+
|
331
|
+
// check response
|
332
|
+
$read = strlen ( $response );
|
333
|
+
if ( !$response || $read!=$len )
|
334
|
+
{
|
335
|
+
$this->_error = $len
|
336
|
+
? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
|
337
|
+
: "received zero-sized searchd response";
|
338
|
+
return false;
|
339
|
+
}
|
340
|
+
|
341
|
+
// check status
|
342
|
+
if ( $status==SEARCHD_WARNING )
|
343
|
+
{
|
344
|
+
list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
|
345
|
+
$this->_warning = substr ( $response, 4, $wlen );
|
346
|
+
return substr ( $response, 4+$wlen );
|
347
|
+
}
|
348
|
+
if ( $status==SEARCHD_ERROR )
|
349
|
+
{
|
350
|
+
$this->_error = "searchd error: " . substr ( $response, 4 );
|
351
|
+
return false;
|
352
|
+
}
|
353
|
+
if ( $status==SEARCHD_RETRY )
|
354
|
+
{
|
355
|
+
$this->_error = "temporary searchd error: " . substr ( $response, 4 );
|
356
|
+
return false;
|
357
|
+
}
|
358
|
+
if ( $status!=SEARCHD_OK )
|
359
|
+
{
|
360
|
+
$this->_error = "unknown status code '$status'";
|
361
|
+
return false;
|
362
|
+
}
|
363
|
+
|
364
|
+
// check version
|
365
|
+
if ( $ver<$client_ver )
|
366
|
+
{
|
367
|
+
$this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
|
368
|
+
$ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
|
369
|
+
}
|
370
|
+
|
371
|
+
return $response;
|
372
|
+
}
|
373
|
+
|
374
|
+
/////////////////////////////////////////////////////////////////////////////
|
375
|
+
// searching
|
376
|
+
/////////////////////////////////////////////////////////////////////////////
|
377
|
+
|
378
|
+
/// set offset and count into result set,
|
379
|
+
/// and optionally set max-matches and cutoff limits
|
380
|
+
function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
|
381
|
+
{
|
382
|
+
assert ( is_int($offset) );
|
383
|
+
assert ( is_int($limit) );
|
384
|
+
assert ( $offset>=0 );
|
385
|
+
assert ( $limit>0 );
|
386
|
+
assert ( $max>=0 );
|
387
|
+
$this->_offset = $offset;
|
388
|
+
$this->_limit = $limit;
|
389
|
+
if ( $max>0 )
|
390
|
+
$this->_maxmatches = $max;
|
391
|
+
if ( $cutoff>0 )
|
392
|
+
$this->_cutoff = $cutoff;
|
393
|
+
}
|
394
|
+
|
395
|
+
/// set maximum query time, in milliseconds, per-index
|
396
|
+
/// integer, 0 means "do not limit"
|
397
|
+
function SetMaxQueryTime ( $max )
|
398
|
+
{
|
399
|
+
assert ( is_int($max) );
|
400
|
+
assert ( $max>=0 );
|
401
|
+
$this->_maxquerytime = $max;
|
402
|
+
}
|
403
|
+
|
404
|
+
/// set matching mode
|
405
|
+
function SetMatchMode ( $mode )
|
406
|
+
{
|
407
|
+
assert ( $mode==SPH_MATCH_ALL
|
408
|
+
|| $mode==SPH_MATCH_ANY
|
409
|
+
|| $mode==SPH_MATCH_PHRASE
|
410
|
+
|| $mode==SPH_MATCH_BOOLEAN
|
411
|
+
|| $mode==SPH_MATCH_EXTENDED
|
412
|
+
|| $mode==SPH_MATCH_FULLSCAN
|
413
|
+
|| $mode==SPH_MATCH_EXTENDED2 );
|
414
|
+
$this->_mode = $mode;
|
415
|
+
}
|
416
|
+
|
417
|
+
/// set ranking mode
|
418
|
+
function SetRankingMode ( $ranker )
|
419
|
+
{
|
420
|
+
assert ( $ranker==SPH_RANK_PROXIMITY_BM25
|
421
|
+
|| $ranker==SPH_RANK_BM25
|
422
|
+
|| $ranker==SPH_RANK_NONE
|
423
|
+
|| $ranker==SPH_RANK_WORDCOUNT );
|
424
|
+
$this->_ranker = $ranker;
|
425
|
+
}
|
426
|
+
|
427
|
+
/// set matches sorting mode
|
428
|
+
function SetSortMode ( $mode, $sortby="" )
|
429
|
+
{
|
430
|
+
assert (
|
431
|
+
$mode==SPH_SORT_RELEVANCE ||
|
432
|
+
$mode==SPH_SORT_ATTR_DESC ||
|
433
|
+
$mode==SPH_SORT_ATTR_ASC ||
|
434
|
+
$mode==SPH_SORT_TIME_SEGMENTS ||
|
435
|
+
$mode==SPH_SORT_EXTENDED ||
|
436
|
+
$mode==SPH_SORT_EXPR );
|
437
|
+
assert ( is_string($sortby) );
|
438
|
+
assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
|
439
|
+
|
440
|
+
$this->_sort = $mode;
|
441
|
+
$this->_sortby = $sortby;
|
442
|
+
}
|
443
|
+
|
444
|
+
/// bind per-field weights by order
|
445
|
+
/// DEPRECATED; use SetFieldWeights() instead
|
446
|
+
function SetWeights ( $weights )
|
447
|
+
{
|
448
|
+
assert ( is_array($weights) );
|
449
|
+
foreach ( $weights as $weight )
|
450
|
+
assert ( is_int($weight) );
|
451
|
+
|
452
|
+
$this->_weights = $weights;
|
453
|
+
}
|
454
|
+
|
455
|
+
/// bind per-field weights by name
|
456
|
+
function SetFieldWeights ( $weights )
|
457
|
+
{
|
458
|
+
assert ( is_array($weights) );
|
459
|
+
foreach ( $weights as $name=>$weight )
|
460
|
+
{
|
461
|
+
assert ( is_string($name) );
|
462
|
+
assert ( is_int($weight) );
|
463
|
+
}
|
464
|
+
$this->_fieldweights = $weights;
|
465
|
+
}
|
466
|
+
|
467
|
+
/// bind per-index weights by name
|
468
|
+
function SetIndexWeights ( $weights )
|
469
|
+
{
|
470
|
+
assert ( is_array($weights) );
|
471
|
+
foreach ( $weights as $index=>$weight )
|
472
|
+
{
|
473
|
+
assert ( is_string($index) );
|
474
|
+
assert ( is_int($weight) );
|
475
|
+
}
|
476
|
+
$this->_indexweights = $weights;
|
477
|
+
}
|
478
|
+
|
479
|
+
/// set IDs range to match
|
480
|
+
/// only match records if document ID is beetwen $min and $max (inclusive)
|
481
|
+
function SetIDRange ( $min, $max )
|
482
|
+
{
|
483
|
+
assert ( is_numeric($min) );
|
484
|
+
assert ( is_numeric($max) );
|
485
|
+
assert ( $min<=$max );
|
486
|
+
$this->_min_id = $min;
|
487
|
+
$this->_max_id = $max;
|
488
|
+
}
|
489
|
+
|
490
|
+
/// set values set filter
|
491
|
+
/// only match records where $attribute value is in given set
|
492
|
+
function SetFilter ( $attribute, $values, $exclude=false )
|
493
|
+
{
|
494
|
+
assert ( is_string($attribute) );
|
495
|
+
assert ( is_array($values) );
|
496
|
+
assert ( count($values) );
|
497
|
+
|
498
|
+
if ( is_array($values) && count($values) )
|
499
|
+
{
|
500
|
+
foreach ( $values as $value )
|
501
|
+
assert ( is_numeric($value) );
|
502
|
+
|
503
|
+
$this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
|
504
|
+
}
|
505
|
+
}
|
506
|
+
|
507
|
+
/// set range filter
|
508
|
+
/// only match records if $attribute value is beetwen $min and $max (inclusive)
|
509
|
+
function SetFilterRange ( $attribute, $min, $max, $exclude=false )
|
510
|
+
{
|
511
|
+
assert ( is_string($attribute) );
|
512
|
+
assert ( is_int($min) );
|
513
|
+
assert ( is_int($max) );
|
514
|
+
assert ( $min<=$max );
|
515
|
+
|
516
|
+
$this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
|
517
|
+
}
|
518
|
+
|
519
|
+
/// set float range filter
|
520
|
+
/// only match records if $attribute value is beetwen $min and $max (inclusive)
|
521
|
+
function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
|
522
|
+
{
|
523
|
+
assert ( is_string($attribute) );
|
524
|
+
assert ( is_float($min) );
|
525
|
+
assert ( is_float($max) );
|
526
|
+
assert ( $min<=$max );
|
527
|
+
|
528
|
+
$this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
|
529
|
+
}
|
530
|
+
|
531
|
+
/// setup anchor point for geosphere distance calculations
|
532
|
+
/// required to use @geodist in filters and sorting
|
533
|
+
/// latitude and longitude must be in radians
|
534
|
+
function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
|
535
|
+
{
|
536
|
+
assert ( is_string($attrlat) );
|
537
|
+
assert ( is_string($attrlong) );
|
538
|
+
assert ( is_float($lat) );
|
539
|
+
assert ( is_float($long) );
|
540
|
+
|
541
|
+
$this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
|
542
|
+
}
|
543
|
+
|
544
|
+
/// set grouping attribute and function
|
545
|
+
function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
|
546
|
+
{
|
547
|
+
assert ( is_string($attribute) );
|
548
|
+
assert ( is_string($groupsort) );
|
549
|
+
assert ( $func==SPH_GROUPBY_DAY
|
550
|
+
|| $func==SPH_GROUPBY_WEEK
|
551
|
+
|| $func==SPH_GROUPBY_MONTH
|
552
|
+
|| $func==SPH_GROUPBY_YEAR
|
553
|
+
|| $func==SPH_GROUPBY_ATTR
|
554
|
+
|| $func==SPH_GROUPBY_ATTRPAIR );
|
555
|
+
|
556
|
+
$this->_groupby = $attribute;
|
557
|
+
$this->_groupfunc = $func;
|
558
|
+
$this->_groupsort = $groupsort;
|
559
|
+
}
|
560
|
+
|
561
|
+
/// set count-distinct attribute for group-by queries
|
562
|
+
function SetGroupDistinct ( $attribute )
|
563
|
+
{
|
564
|
+
assert ( is_string($attribute) );
|
565
|
+
$this->_groupdistinct = $attribute;
|
566
|
+
}
|
567
|
+
|
568
|
+
/// set distributed retries count and delay
|
569
|
+
function SetRetries ( $count, $delay=0 )
|
570
|
+
{
|
571
|
+
assert ( is_int($count) && $count>=0 );
|
572
|
+
assert ( is_int($delay) && $delay>=0 );
|
573
|
+
$this->_retrycount = $count;
|
574
|
+
$this->_retrydelay = $delay;
|
575
|
+
}
|
576
|
+
|
577
|
+
/// set result set format (hash or array; hash by default)
|
578
|
+
/// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
|
579
|
+
function SetArrayResult ( $arrayresult )
|
580
|
+
{
|
581
|
+
assert ( is_bool($arrayresult) );
|
582
|
+
$this->_arrayresult = $arrayresult;
|
583
|
+
}
|
584
|
+
|
585
|
+
//////////////////////////////////////////////////////////////////////////////
|
586
|
+
|
587
|
+
/// clear all filters (for multi-queries)
|
588
|
+
function ResetFilters ()
|
589
|
+
{
|
590
|
+
$this->_filters = array();
|
591
|
+
$this->_anchor = array();
|
592
|
+
}
|
593
|
+
|
594
|
+
/// clear groupby settings (for multi-queries)
|
595
|
+
function ResetGroupBy ()
|
596
|
+
{
|
597
|
+
$this->_groupby = "";
|
598
|
+
$this->_groupfunc = SPH_GROUPBY_DAY;
|
599
|
+
$this->_groupsort = "@group desc";
|
600
|
+
$this->_groupdistinct= "";
|
601
|
+
}
|
602
|
+
|
603
|
+
//////////////////////////////////////////////////////////////////////////////
|
604
|
+
|
605
|
+
/// connect to searchd server, run given search query through given indexes,
|
606
|
+
/// and return the search results
|
607
|
+
function Query ( $query, $index="*", $comment="" )
|
608
|
+
{
|
609
|
+
assert ( empty($this->_reqs) );
|
610
|
+
|
611
|
+
$this->AddQuery ( $query, $index, $comment );
|
612
|
+
$results = $this->RunQueries ();
|
613
|
+
|
614
|
+
if ( !is_array($results) )
|
615
|
+
return false; // probably network error; error message should be already filled
|
616
|
+
|
617
|
+
$this->_error = $results[0]["error"];
|
618
|
+
$this->_warning = $results[0]["warning"];
|
619
|
+
if ( $results[0]["status"]==SEARCHD_ERROR )
|
620
|
+
return false;
|
621
|
+
else
|
622
|
+
return $results[0];
|
623
|
+
}
|
624
|
+
|
625
|
+
/// helper to pack floats in network byte order
|
626
|
+
function _PackFloat ( $f )
|
627
|
+
{
|
628
|
+
$t1 = pack ( "f", $f ); // machine order
|
629
|
+
list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
|
630
|
+
return pack ( "N", $t2 );
|
631
|
+
}
|
632
|
+
|
633
|
+
/// add query to multi-query batch
|
634
|
+
/// returns index into results array from RunQueries() call
|
635
|
+
function AddQuery ( $query, $index="*", $comment="" )
|
636
|
+
{
|
637
|
+
// mbstring workaround
|
638
|
+
$this->_MBPush ();
|
639
|
+
|
640
|
+
// build request
|
641
|
+
$req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
|
642
|
+
$req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
|
643
|
+
$req .= pack ( "N", strlen($query) ) . $query; // query itself
|
644
|
+
$req .= pack ( "N", count($this->_weights) ); // weights
|
645
|
+
foreach ( $this->_weights as $weight )
|
646
|
+
$req .= pack ( "N", (int)$weight );
|
647
|
+
$req .= pack ( "N", strlen($index) ) . $index; // indexes
|
648
|
+
$req .= pack ( "N", 1 ); // id64 range marker
|
649
|
+
$req .= sphPack64 ( $this->_min_id ) . sphPack64 ( $this->_max_id ); // id64 range
|
650
|
+
|
651
|
+
// filters
|
652
|
+
$req .= pack ( "N", count($this->_filters) );
|
653
|
+
foreach ( $this->_filters as $filter )
|
654
|
+
{
|
655
|
+
$req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
|
656
|
+
$req .= pack ( "N", $filter["type"] );
|
657
|
+
switch ( $filter["type"] )
|
658
|
+
{
|
659
|
+
case SPH_FILTER_VALUES:
|
660
|
+
$req .= pack ( "N", count($filter["values"]) );
|
661
|
+
foreach ( $filter["values"] as $value )
|
662
|
+
$req .= pack ( "N", floatval($value) ); // this uberhack is to workaround 32bit signed int limit on x32 platforms
|
663
|
+
break;
|
664
|
+
|
665
|
+
case SPH_FILTER_RANGE:
|
666
|
+
$req .= pack ( "NN", $filter["min"], $filter["max"] );
|
667
|
+
break;
|
668
|
+
|
669
|
+
case SPH_FILTER_FLOATRANGE:
|
670
|
+
$req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
|
671
|
+
break;
|
672
|
+
|
673
|
+
default:
|
674
|
+
assert ( 0 && "internal error: unhandled filter type" );
|
675
|
+
}
|
676
|
+
$req .= pack ( "N", $filter["exclude"] );
|
677
|
+
}
|
678
|
+
|
679
|
+
// group-by clause, max-matches count, group-sort clause, cutoff count
|
680
|
+
$req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
|
681
|
+
$req .= pack ( "N", $this->_maxmatches );
|
682
|
+
$req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
|
683
|
+
$req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
|
684
|
+
$req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
|
685
|
+
|
686
|
+
// anchor point
|
687
|
+
if ( empty($this->_anchor) )
|
688
|
+
{
|
689
|
+
$req .= pack ( "N", 0 );
|
690
|
+
} else
|
691
|
+
{
|
692
|
+
$a =& $this->_anchor;
|
693
|
+
$req .= pack ( "N", 1 );
|
694
|
+
$req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
|
695
|
+
$req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
|
696
|
+
$req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
|
697
|
+
}
|
698
|
+
|
699
|
+
// per-index weights
|
700
|
+
$req .= pack ( "N", count($this->_indexweights) );
|
701
|
+
foreach ( $this->_indexweights as $idx=>$weight )
|
702
|
+
$req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
|
703
|
+
|
704
|
+
// max query time
|
705
|
+
$req .= pack ( "N", $this->_maxquerytime );
|
706
|
+
|
707
|
+
// per-field weights
|
708
|
+
$req .= pack ( "N", count($this->_fieldweights) );
|
709
|
+
foreach ( $this->_fieldweights as $field=>$weight )
|
710
|
+
$req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
|
711
|
+
|
712
|
+
// comment
|
713
|
+
$req .= pack ( "N", strlen($comment) ) . $comment;
|
714
|
+
|
715
|
+
// mbstring workaround
|
716
|
+
$this->_MBPop ();
|
717
|
+
|
718
|
+
// store request to requests array
|
719
|
+
$this->_reqs[] = $req;
|
720
|
+
return count($this->_reqs)-1;
|
721
|
+
}
|
722
|
+
|
723
|
+
/// connect to searchd, run queries batch, and return an array of result sets
|
724
|
+
function RunQueries ()
|
725
|
+
{
|
726
|
+
if ( empty($this->_reqs) )
|
727
|
+
{
|
728
|
+
$this->_error = "no queries defined, issue AddQuery() first";
|
729
|
+
return false;
|
730
|
+
}
|
731
|
+
|
732
|
+
// mbstring workaround
|
733
|
+
$this->_MBPush ();
|
734
|
+
|
735
|
+
if (!( $fp = $this->_Connect() ))
|
736
|
+
{
|
737
|
+
$this->_MBPop ();
|
738
|
+
return false;
|
739
|
+
}
|
740
|
+
|
741
|
+
////////////////////////////
|
742
|
+
// send query, get response
|
743
|
+
////////////////////////////
|
744
|
+
|
745
|
+
$nreqs = count($this->_reqs);
|
746
|
+
$req = join ( "", $this->_reqs );
|
747
|
+
$len = 4+strlen($req);
|
748
|
+
$req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
|
749
|
+
|
750
|
+
fwrite ( $fp, $req, $len+8 );
|
751
|
+
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
|
752
|
+
{
|
753
|
+
$this->_MBPop ();
|
754
|
+
return false;
|
755
|
+
}
|
756
|
+
|
757
|
+
$this->_reqs = array ();
|
758
|
+
|
759
|
+
//////////////////
|
760
|
+
// parse response
|
761
|
+
//////////////////
|
762
|
+
|
763
|
+
$p = 0; // current position
|
764
|
+
$max = strlen($response); // max position for checks, to protect against broken responses
|
765
|
+
|
766
|
+
$results = array ();
|
767
|
+
for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
|
768
|
+
{
|
769
|
+
$results[] = array();
|
770
|
+
$result =& $results[$ires];
|
771
|
+
|
772
|
+
$result["error"] = "";
|
773
|
+
$result["warning"] = "";
|
774
|
+
|
775
|
+
// extract status
|
776
|
+
list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
777
|
+
$result["status"] = $status;
|
778
|
+
if ( $status!=SEARCHD_OK )
|
779
|
+
{
|
780
|
+
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
781
|
+
$message = substr ( $response, $p, $len ); $p += $len;
|
782
|
+
|
783
|
+
if ( $status==SEARCHD_WARNING )
|
784
|
+
{
|
785
|
+
$result["warning"] = $message;
|
786
|
+
} else
|
787
|
+
{
|
788
|
+
$result["error"] = $message;
|
789
|
+
continue;
|
790
|
+
}
|
791
|
+
}
|
792
|
+
|
793
|
+
// read schema
|
794
|
+
$fields = array ();
|
795
|
+
$attrs = array ();
|
796
|
+
|
797
|
+
list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
798
|
+
while ( $nfields-->0 && $p<$max )
|
799
|
+
{
|
800
|
+
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
801
|
+
$fields[] = substr ( $response, $p, $len ); $p += $len;
|
802
|
+
}
|
803
|
+
$result["fields"] = $fields;
|
804
|
+
|
805
|
+
list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
806
|
+
while ( $nattrs-->0 && $p<$max )
|
807
|
+
{
|
808
|
+
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
809
|
+
$attr = substr ( $response, $p, $len ); $p += $len;
|
810
|
+
list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
811
|
+
$attrs[$attr] = $type;
|
812
|
+
}
|
813
|
+
$result["attrs"] = $attrs;
|
814
|
+
|
815
|
+
// read match count
|
816
|
+
list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
817
|
+
list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
818
|
+
|
819
|
+
// read matches
|
820
|
+
$idx = -1;
|
821
|
+
while ( $count-->0 && $p<$max )
|
822
|
+
{
|
823
|
+
// index into result array
|
824
|
+
$idx++;
|
825
|
+
|
826
|
+
// parse document id and weight
|
827
|
+
if ( $id64 )
|
828
|
+
{
|
829
|
+
$doc = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
|
830
|
+
list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
831
|
+
} else
|
832
|
+
{
|
833
|
+
list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
|
834
|
+
substr ( $response, $p, 8 ) ) );
|
835
|
+
$p += 8;
|
836
|
+
|
837
|
+
if ( PHP_INT_SIZE>=8 )
|
838
|
+
{
|
839
|
+
// x64 route, workaround broken unpack() in 5.2.2+
|
840
|
+
if ( $doc<0 ) $doc += (1<<32);
|
841
|
+
} else
|
842
|
+
{
|
843
|
+
// x32 route, workaround php signed/unsigned braindamage
|
844
|
+
$doc = sprintf ( "%u", $doc );
|
845
|
+
}
|
846
|
+
}
|
847
|
+
$weight = sprintf ( "%u", $weight );
|
848
|
+
|
849
|
+
// create match entry
|
850
|
+
if ( $this->_arrayresult )
|
851
|
+
$result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
|
852
|
+
else
|
853
|
+
$result["matches"][$doc]["weight"] = $weight;
|
854
|
+
|
855
|
+
// parse and create attributes
|
856
|
+
$attrvals = array ();
|
857
|
+
foreach ( $attrs as $attr=>$type )
|
858
|
+
{
|
859
|
+
// handle floats
|
860
|
+
if ( $type==SPH_ATTR_FLOAT )
|
861
|
+
{
|
862
|
+
list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
863
|
+
list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
|
864
|
+
$attrvals[$attr] = $fval;
|
865
|
+
continue;
|
866
|
+
}
|
867
|
+
|
868
|
+
// handle everything else as unsigned ints
|
869
|
+
list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
870
|
+
if ( $type & SPH_ATTR_MULTI )
|
871
|
+
{
|
872
|
+
$attrvals[$attr] = array ();
|
873
|
+
$nvalues = $val;
|
874
|
+
while ( $nvalues-->0 && $p<$max )
|
875
|
+
{
|
876
|
+
list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
877
|
+
$attrvals[$attr][] = sprintf ( "%u", $val );
|
878
|
+
}
|
879
|
+
} else
|
880
|
+
{
|
881
|
+
$attrvals[$attr] = sprintf ( "%u", $val );
|
882
|
+
}
|
883
|
+
}
|
884
|
+
|
885
|
+
if ( $this->_arrayresult )
|
886
|
+
$result["matches"][$idx]["attrs"] = $attrvals;
|
887
|
+
else
|
888
|
+
$result["matches"][$doc]["attrs"] = $attrvals;
|
889
|
+
}
|
890
|
+
|
891
|
+
list ( $total, $total_found, $msecs, $words ) =
|
892
|
+
array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
|
893
|
+
$result["total"] = sprintf ( "%u", $total );
|
894
|
+
$result["total_found"] = sprintf ( "%u", $total_found );
|
895
|
+
$result["time"] = sprintf ( "%.3f", $msecs/1000 );
|
896
|
+
$p += 16;
|
897
|
+
|
898
|
+
while ( $words-->0 && $p<$max )
|
899
|
+
{
|
900
|
+
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
901
|
+
$word = substr ( $response, $p, $len ); $p += $len;
|
902
|
+
list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
|
903
|
+
$result["words"][$word] = array (
|
904
|
+
"docs"=>sprintf ( "%u", $docs ),
|
905
|
+
"hits"=>sprintf ( "%u", $hits ) );
|
906
|
+
}
|
907
|
+
}
|
908
|
+
|
909
|
+
$this->_MBPop ();
|
910
|
+
return $results;
|
911
|
+
}
|
912
|
+
|
913
|
+
/////////////////////////////////////////////////////////////////////////////
|
914
|
+
// excerpts generation
|
915
|
+
/////////////////////////////////////////////////////////////////////////////
|
916
|
+
|
917
|
+
/// connect to searchd server, and generate exceprts (snippets)
|
918
|
+
/// of given documents for given query. returns false on failure,
|
919
|
+
/// an array of snippets on success
|
920
|
+
function BuildExcerpts ( $docs, $index, $words, $opts=array() )
|
921
|
+
{
|
922
|
+
assert ( is_array($docs) );
|
923
|
+
assert ( is_string($index) );
|
924
|
+
assert ( is_string($words) );
|
925
|
+
assert ( is_array($opts) );
|
926
|
+
|
927
|
+
$this->_MBPush ();
|
928
|
+
|
929
|
+
if (!( $fp = $this->_Connect() ))
|
930
|
+
{
|
931
|
+
$this->_MBPop();
|
932
|
+
return false;
|
933
|
+
}
|
934
|
+
|
935
|
+
/////////////////
|
936
|
+
// fixup options
|
937
|
+
/////////////////
|
938
|
+
|
939
|
+
if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
|
940
|
+
if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
|
941
|
+
if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
|
942
|
+
if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
|
943
|
+
if ( !isset($opts["around"]) ) $opts["around"] = 5;
|
944
|
+
if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
|
945
|
+
if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
|
946
|
+
if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
|
947
|
+
if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
|
948
|
+
|
949
|
+
/////////////////
|
950
|
+
// build request
|
951
|
+
/////////////////
|
952
|
+
|
953
|
+
// v.1.0 req
|
954
|
+
$flags = 1; // remove spaces
|
955
|
+
if ( $opts["exact_phrase"] ) $flags |= 2;
|
956
|
+
if ( $opts["single_passage"] ) $flags |= 4;
|
957
|
+
if ( $opts["use_boundaries"] ) $flags |= 8;
|
958
|
+
if ( $opts["weight_order"] ) $flags |= 16;
|
959
|
+
$req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
|
960
|
+
$req .= pack ( "N", strlen($index) ) . $index; // req index
|
961
|
+
$req .= pack ( "N", strlen($words) ) . $words; // req words
|
962
|
+
|
963
|
+
// options
|
964
|
+
$req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
|
965
|
+
$req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
|
966
|
+
$req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
|
967
|
+
$req .= pack ( "N", (int)$opts["limit"] );
|
968
|
+
$req .= pack ( "N", (int)$opts["around"] );
|
969
|
+
|
970
|
+
// documents
|
971
|
+
$req .= pack ( "N", count($docs) );
|
972
|
+
foreach ( $docs as $doc )
|
973
|
+
{
|
974
|
+
assert ( is_string($doc) );
|
975
|
+
$req .= pack ( "N", strlen($doc) ) . $doc;
|
976
|
+
}
|
977
|
+
|
978
|
+
////////////////////////////
|
979
|
+
// send query, get response
|
980
|
+
////////////////////////////
|
981
|
+
|
982
|
+
$len = strlen($req);
|
983
|
+
$req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
|
984
|
+
$wrote = fwrite ( $fp, $req, $len+8 );
|
985
|
+
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
|
986
|
+
{
|
987
|
+
$this->_MBPop ();
|
988
|
+
return false;
|
989
|
+
}
|
990
|
+
|
991
|
+
//////////////////
|
992
|
+
// parse response
|
993
|
+
//////////////////
|
994
|
+
|
995
|
+
$pos = 0;
|
996
|
+
$res = array ();
|
997
|
+
$rlen = strlen($response);
|
998
|
+
for ( $i=0; $i<count($docs); $i++ )
|
999
|
+
{
|
1000
|
+
list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
|
1001
|
+
$pos += 4;
|
1002
|
+
|
1003
|
+
if ( $pos+$len > $rlen )
|
1004
|
+
{
|
1005
|
+
$this->_error = "incomplete reply";
|
1006
|
+
$this->_MBPop ();
|
1007
|
+
return false;
|
1008
|
+
}
|
1009
|
+
$res[] = $len ? substr ( $response, $pos, $len ) : "";
|
1010
|
+
$pos += $len;
|
1011
|
+
}
|
1012
|
+
|
1013
|
+
$this->_MBPop ();
|
1014
|
+
return $res;
|
1015
|
+
}
|
1016
|
+
|
1017
|
+
|
1018
|
+
/////////////////////////////////////////////////////////////////////////////
|
1019
|
+
// keyword generation
|
1020
|
+
/////////////////////////////////////////////////////////////////////////////
|
1021
|
+
|
1022
|
+
/// connect to searchd server, and generate keyword list for a given query
|
1023
|
+
/// returns false on failure,
|
1024
|
+
/// an array of words on success
|
1025
|
+
function BuildKeywords ( $query, $index, $hits )
|
1026
|
+
{
|
1027
|
+
assert ( is_string($query) );
|
1028
|
+
assert ( is_string($index) );
|
1029
|
+
assert ( is_bool($hits) );
|
1030
|
+
|
1031
|
+
$this->_MBPush ();
|
1032
|
+
|
1033
|
+
if (!( $fp = $this->_Connect() ))
|
1034
|
+
{
|
1035
|
+
$this->_MBPop();
|
1036
|
+
return false;
|
1037
|
+
}
|
1038
|
+
|
1039
|
+
/////////////////
|
1040
|
+
// build request
|
1041
|
+
/////////////////
|
1042
|
+
|
1043
|
+
// v.1.0 req
|
1044
|
+
$req = pack ( "N", strlen($query) ) . $query; // req query
|
1045
|
+
$req .= pack ( "N", strlen($index) ) . $index; // req index
|
1046
|
+
$req .= pack ( "N", (int)$hits );
|
1047
|
+
|
1048
|
+
////////////////////////////
|
1049
|
+
// send query, get response
|
1050
|
+
////////////////////////////
|
1051
|
+
|
1052
|
+
$len = strlen($req);
|
1053
|
+
$req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
|
1054
|
+
$wrote = fwrite ( $fp, $req, $len+8 );
|
1055
|
+
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ))
|
1056
|
+
{
|
1057
|
+
$this->_MBPop ();
|
1058
|
+
return false;
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
//////////////////
|
1062
|
+
// parse response
|
1063
|
+
//////////////////
|
1064
|
+
|
1065
|
+
$pos = 0;
|
1066
|
+
$res = array ();
|
1067
|
+
$rlen = strlen($response);
|
1068
|
+
list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
|
1069
|
+
$pos += 4;
|
1070
|
+
for ( $i=0; $i<$nwords; $i++ )
|
1071
|
+
{
|
1072
|
+
list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
|
1073
|
+
$tokenized = $len ? substr ( $response, $pos, $len ) : "";
|
1074
|
+
$pos += $len;
|
1075
|
+
|
1076
|
+
list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
|
1077
|
+
$normalized = $len ? substr ( $response, $pos, $len ) : "";
|
1078
|
+
$pos += $len;
|
1079
|
+
|
1080
|
+
$res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
|
1081
|
+
|
1082
|
+
if ( $hits )
|
1083
|
+
{
|
1084
|
+
list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
|
1085
|
+
$pos += 8;
|
1086
|
+
$res [$i]["docs"] = $ndocs;
|
1087
|
+
$res [$i]["hits"] = $nhits;
|
1088
|
+
}
|
1089
|
+
|
1090
|
+
if ( $pos > $rlen )
|
1091
|
+
{
|
1092
|
+
$this->_error = "incomplete reply";
|
1093
|
+
$this->_MBPop ();
|
1094
|
+
return false;
|
1095
|
+
}
|
1096
|
+
}
|
1097
|
+
|
1098
|
+
$this->_MBPop ();
|
1099
|
+
return $res;
|
1100
|
+
}
|
1101
|
+
|
1102
|
+
function EscapeString ( $string )
|
1103
|
+
{
|
1104
|
+
$from = array ( '(',')','|','-','!','@','~','\"','&' );
|
1105
|
+
$to = array ( '\\(','\\)','\\|','\\-','\\!','\\@','\\~','\\\"', '\\&' );
|
1106
|
+
|
1107
|
+
return str_replace ( $from, $to, $string );
|
1108
|
+
}
|
1109
|
+
|
1110
|
+
/////////////////////////////////////////////////////////////////////////////
|
1111
|
+
// attribute updates
|
1112
|
+
/////////////////////////////////////////////////////////////////////////////
|
1113
|
+
|
1114
|
+
/// update given attribute values on given documents in given indexes
|
1115
|
+
/// returns amount of updated documents (0 or more) on success, or -1 on failure
|
1116
|
+
function UpdateAttributes ( $index, $attrs, $values )
|
1117
|
+
{
|
1118
|
+
// verify everything
|
1119
|
+
assert ( is_string($index) );
|
1120
|
+
|
1121
|
+
assert ( is_array($attrs) );
|
1122
|
+
foreach ( $attrs as $attr )
|
1123
|
+
assert ( is_string($attr) );
|
1124
|
+
|
1125
|
+
assert ( is_array($values) );
|
1126
|
+
foreach ( $values as $id=>$entry )
|
1127
|
+
{
|
1128
|
+
assert ( is_numeric($id) );
|
1129
|
+
assert ( is_array($entry) );
|
1130
|
+
assert ( count($entry)==count($attrs) );
|
1131
|
+
foreach ( $entry as $v )
|
1132
|
+
assert ( is_int($v) );
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
// build request
|
1136
|
+
$req = pack ( "N", strlen($index) ) . $index;
|
1137
|
+
|
1138
|
+
$req .= pack ( "N", count($attrs) );
|
1139
|
+
foreach ( $attrs as $attr )
|
1140
|
+
$req .= pack ( "N", strlen($attr) ) . $attr;
|
1141
|
+
|
1142
|
+
$req .= pack ( "N", count($values) );
|
1143
|
+
foreach ( $values as $id=>$entry )
|
1144
|
+
{
|
1145
|
+
$req .= sphPack64 ( $id );
|
1146
|
+
foreach ( $entry as $v )
|
1147
|
+
$req .= pack ( "N", $v );
|
1148
|
+
}
|
1149
|
+
|
1150
|
+
// mbstring workaround
|
1151
|
+
$this->_MBPush ();
|
1152
|
+
|
1153
|
+
// connect, send query, get response
|
1154
|
+
if (!( $fp = $this->_Connect() ))
|
1155
|
+
{
|
1156
|
+
$this->_MBPop ();
|
1157
|
+
return -1;
|
1158
|
+
}
|
1159
|
+
|
1160
|
+
$len = strlen($req);
|
1161
|
+
$req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
|
1162
|
+
fwrite ( $fp, $req, $len+8 );
|
1163
|
+
|
1164
|
+
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
|
1165
|
+
{
|
1166
|
+
$this->_MBPop ();
|
1167
|
+
return -1;
|
1168
|
+
}
|
1169
|
+
|
1170
|
+
// parse response
|
1171
|
+
list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
|
1172
|
+
$this->_MBPop ();
|
1173
|
+
return $updated;
|
1174
|
+
}
|
1175
|
+
}
|
1176
|
+
|
1177
|
+
//
|
1178
|
+
// $Id$
|
1179
|
+
//
|
1180
|
+
|
1181
|
+
?>
|