riddle 0.9.8.1112
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENCE +20 -0
- data/README +60 -0
- data/lib/riddle.rb +20 -0
- data/lib/riddle/client.rb +548 -0
- data/lib/riddle/client/filter.rb +44 -0
- data/lib/riddle/client/message.rb +65 -0
- data/lib/riddle/client/response.rb +74 -0
- data/spec/fixtures/data/anchor.bin +0 -0
- data/spec/fixtures/data/any.bin +0 -0
- data/spec/fixtures/data/boolean.bin +0 -0
- data/spec/fixtures/data/distinct.bin +0 -0
- data/spec/fixtures/data/field_weights.bin +0 -0
- data/spec/fixtures/data/filter.bin +0 -0
- data/spec/fixtures/data/filter_array.bin +0 -0
- data/spec/fixtures/data/filter_array_exclude.bin +0 -0
- data/spec/fixtures/data/filter_floats.bin +0 -0
- data/spec/fixtures/data/filter_floats_exclude.bin +0 -0
- data/spec/fixtures/data/filter_floats_range.bin +0 -0
- data/spec/fixtures/data/filter_range.bin +0 -0
- data/spec/fixtures/data/filter_range_exclude.bin +0 -0
- data/spec/fixtures/data/group.bin +0 -0
- data/spec/fixtures/data/index.bin +0 -0
- data/spec/fixtures/data/index_weights.bin +0 -0
- data/spec/fixtures/data/phrase.bin +0 -0
- data/spec/fixtures/data/rank_mode.bin +0 -0
- data/spec/fixtures/data/simple.bin +0 -0
- data/spec/fixtures/data/sort.bin +0 -0
- data/spec/fixtures/data/update_simple.bin +0 -0
- data/spec/fixtures/data/weights.bin +0 -0
- data/spec/fixtures/data_generator.php +130 -0
- data/spec/fixtures/sphinx/configuration.erb +38 -0
- data/spec/fixtures/sphinx/people.old.spa +0 -0
- data/spec/fixtures/sphinx/people.old.spd +0 -0
- data/spec/fixtures/sphinx/people.old.sph +0 -0
- data/spec/fixtures/sphinx/people.old.spi +0 -0
- data/spec/fixtures/sphinx/people.old.spm +0 -0
- data/spec/fixtures/sphinx/people.old.spp +0 -0
- data/spec/fixtures/sphinx/people.spa +0 -0
- data/spec/fixtures/sphinx/people.spd +0 -0
- data/spec/fixtures/sphinx/people.sph +0 -0
- data/spec/fixtures/sphinx/people.spi +0 -0
- data/spec/fixtures/sphinx/people.spm +0 -0
- data/spec/fixtures/sphinx/people.spp +0 -0
- data/spec/fixtures/sphinx/searchd.log +4732 -0
- data/spec/fixtures/sphinx/searchd.query.log +783 -0
- data/spec/fixtures/sphinx/spec.conf +38 -0
- data/spec/fixtures/sphinxapi.php +1066 -0
- data/spec/fixtures/sql/conf.example.yml +3 -0
- data/spec/fixtures/sql/conf.yml +3 -0
- data/spec/fixtures/sql/data.sql +25000 -0
- data/spec/fixtures/sql/structure.sql +16 -0
- data/spec/functional/excerpt_spec.rb +102 -0
- data/spec/functional/search_spec.rb +69 -0
- data/spec/functional/update_spec.rb +41 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/sphinx_helper.rb +92 -0
- data/spec/unit/client_spec.rb +154 -0
- data/spec/unit/filter_spec.rb +33 -0
- data/spec/unit/message_spec.rb +63 -0
- data/spec/unit/response_spec.rb +64 -0
- metadata +128 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
indexer
|
2
|
+
{
|
3
|
+
mem_limit = 64M
|
4
|
+
}
|
5
|
+
|
6
|
+
searchd
|
7
|
+
{
|
8
|
+
port = 3313
|
9
|
+
log = /Users/pat/Code/ruby/riddle/spec/fixtures/sphinx/searchd.log
|
10
|
+
query_log = /Users/pat/Code/ruby/riddle/spec/fixtures/sphinx/searchd.query.log
|
11
|
+
read_timeout = 5
|
12
|
+
max_children = 30
|
13
|
+
pid_file = /Users/pat/Code/ruby/riddle/spec/fixtures/sphinx/searchd.pid
|
14
|
+
}
|
15
|
+
|
16
|
+
source peoples
|
17
|
+
{
|
18
|
+
type = mysql
|
19
|
+
sql_host = localhost
|
20
|
+
sql_user = riddle
|
21
|
+
sql_pass = wossname
|
22
|
+
sql_db = riddle_sphinx_spec
|
23
|
+
|
24
|
+
sql_query = SELECT id, first_name, middle_initial, last_name, gender, street_address, city, state, postcode, email, UNIX_TIMESTAMP(birthday) AS birthday FROM people WHERE id >= $start AND id <= $end
|
25
|
+
sql_query_range = SELECT MIN(id), MAX(id) FROM people
|
26
|
+
sql_query_info = SELECT * FROM people WHERE id = $id
|
27
|
+
sql_date_column = birthday
|
28
|
+
}
|
29
|
+
|
30
|
+
index people
|
31
|
+
{
|
32
|
+
source = peoples
|
33
|
+
morphology = stem_en
|
34
|
+
path = /Users/pat/Code/ruby/riddle/spec/fixtures/sphinx/people
|
35
|
+
charset_type = utf-8
|
36
|
+
enable_star = 1
|
37
|
+
min_prefix_len = 1
|
38
|
+
}
|
@@ -0,0 +1,1066 @@
|
|
1
|
+
<?php
|
2
|
+
|
3
|
+
//
|
4
|
+
// $Id: sphinxapi.php 1103 2008-01-24 18:42:57Z shodan $
|
5
|
+
//
|
6
|
+
|
7
|
+
//
|
8
|
+
// Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
|
9
|
+
//
|
10
|
+
// This program is free software; you can redistribute it and/or modify
|
11
|
+
// it under the terms of the GNU General Public License. You should have
|
12
|
+
// received a copy of the GPL license along with this program; if you
|
13
|
+
// did not, you can find it at http://www.gnu.org/
|
14
|
+
//
|
15
|
+
|
16
|
+
/////////////////////////////////////////////////////////////////////////////
|
17
|
+
// PHP version of Sphinx searchd client (PHP API)
|
18
|
+
/////////////////////////////////////////////////////////////////////////////
|
19
|
+
|
20
|
+
/// known searchd commands
|
21
|
+
define ( "SEARCHD_COMMAND_SEARCH", 0 );
|
22
|
+
define ( "SEARCHD_COMMAND_EXCERPT", 1 );
|
23
|
+
define ( "SEARCHD_COMMAND_UPDATE", 2 );
|
24
|
+
|
25
|
+
/// current client-side command implementation versions
|
26
|
+
define ( "VER_COMMAND_SEARCH", 0x112 );
|
27
|
+
define ( "VER_COMMAND_EXCERPT", 0x100 );
|
28
|
+
define ( "VER_COMMAND_UPDATE", 0x101 );
|
29
|
+
|
30
|
+
/// known searchd status codes
|
31
|
+
define ( "SEARCHD_OK", 0 );
|
32
|
+
define ( "SEARCHD_ERROR", 1 );
|
33
|
+
define ( "SEARCHD_RETRY", 2 );
|
34
|
+
define ( "SEARCHD_WARNING", 3 );
|
35
|
+
|
36
|
+
/// known match modes
|
37
|
+
define ( "SPH_MATCH_ALL", 0 );
|
38
|
+
define ( "SPH_MATCH_ANY", 1 );
|
39
|
+
define ( "SPH_MATCH_PHRASE", 2 );
|
40
|
+
define ( "SPH_MATCH_BOOLEAN", 3 );
|
41
|
+
define ( "SPH_MATCH_EXTENDED", 4 );
|
42
|
+
define ( "SPH_MATCH_FULLSCAN", 5 );
|
43
|
+
define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
|
44
|
+
|
45
|
+
/// known ranking modes (ext2 only)
|
46
|
+
define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
|
47
|
+
define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
|
48
|
+
define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
|
49
|
+
define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
|
50
|
+
|
51
|
+
/// known sort modes
|
52
|
+
define ( "SPH_SORT_RELEVANCE", 0 );
|
53
|
+
define ( "SPH_SORT_ATTR_DESC", 1 );
|
54
|
+
define ( "SPH_SORT_ATTR_ASC", 2 );
|
55
|
+
define ( "SPH_SORT_TIME_SEGMENTS", 3 );
|
56
|
+
define ( "SPH_SORT_EXTENDED", 4 );
|
57
|
+
define ( "SPH_SORT_EXPR", 5 );
|
58
|
+
|
59
|
+
/// known filter types
|
60
|
+
define ( "SPH_FILTER_VALUES", 0 );
|
61
|
+
define ( "SPH_FILTER_RANGE", 1 );
|
62
|
+
define ( "SPH_FILTER_FLOATRANGE", 2 );
|
63
|
+
|
64
|
+
/// known attribute types
|
65
|
+
define ( "SPH_ATTR_INTEGER", 1 );
|
66
|
+
define ( "SPH_ATTR_TIMESTAMP", 2 );
|
67
|
+
define ( "SPH_ATTR_ORDINAL", 3 );
|
68
|
+
define ( "SPH_ATTR_BOOL", 4 );
|
69
|
+
define ( "SPH_ATTR_FLOAT", 5 );
|
70
|
+
define ( "SPH_ATTR_MULTI", 0x40000000 );
|
71
|
+
|
72
|
+
/// known grouping functions
|
73
|
+
define ( "SPH_GROUPBY_DAY", 0 );
|
74
|
+
define ( "SPH_GROUPBY_WEEK", 1 );
|
75
|
+
define ( "SPH_GROUPBY_MONTH", 2 );
|
76
|
+
define ( "SPH_GROUPBY_YEAR", 3 );
|
77
|
+
define ( "SPH_GROUPBY_ATTR", 4 );
|
78
|
+
define ( "SPH_GROUPBY_ATTRPAIR", 5 );
|
79
|
+
|
80
|
+
|
81
|
+
/// portably pack numeric to 64 unsigned bits, network order
|
82
|
+
function sphPack64 ( $v )
|
83
|
+
{
|
84
|
+
assert ( is_numeric($v) );
|
85
|
+
|
86
|
+
// x64 route
|
87
|
+
if ( PHP_INT_SIZE>=8 )
|
88
|
+
{
|
89
|
+
$i = (int)$v;
|
90
|
+
return pack ( "NN", $i>>32, $i&((1<<32)-1) );
|
91
|
+
}
|
92
|
+
|
93
|
+
// x32 route, bcmath
|
94
|
+
$x = "4294967296";
|
95
|
+
if ( function_exists("bcmul") )
|
96
|
+
{
|
97
|
+
$h = bcdiv ( $v, $x, 0 );
|
98
|
+
$l = bcmod ( $v, $x );
|
99
|
+
return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
|
100
|
+
}
|
101
|
+
|
102
|
+
// x32 route, 15 or less decimal digits
|
103
|
+
// we can use float, because its actually double and has 52 precision bits
|
104
|
+
if ( strlen($v)<=15 )
|
105
|
+
{
|
106
|
+
$f = (float)$v;
|
107
|
+
$h = (int)($f/$x);
|
108
|
+
$l = (int)($f-$x*$h);
|
109
|
+
return pack ( "NN", $h, $l );
|
110
|
+
}
|
111
|
+
|
112
|
+
// x32 route, 16 or more decimal digits
|
113
|
+
// well, let me know if you *really* need this
|
114
|
+
die ( "INTERNAL ERROR: packing more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
/// portably unpack 64 unsigned bits, network order to numeric
|
119
|
+
function sphUnpack64 ( $v )
|
120
|
+
{
|
121
|
+
list($h,$l) = array_values ( unpack ( "N*N*", $v ) );
|
122
|
+
|
123
|
+
// x64 route
|
124
|
+
if ( PHP_INT_SIZE>=8 )
|
125
|
+
{
|
126
|
+
if ( $h<0 ) $h += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
|
127
|
+
if ( $l<0 ) $l += (1<<32);
|
128
|
+
return ($h<<32) + $l;
|
129
|
+
}
|
130
|
+
|
131
|
+
// x32 route
|
132
|
+
$h = sprintf ( "%u", $h );
|
133
|
+
$l = sprintf ( "%u", $l );
|
134
|
+
$x = "4294967296";
|
135
|
+
|
136
|
+
// bcmath
|
137
|
+
if ( function_exists("bcmul") )
|
138
|
+
return bcadd ( $l, bcmul ( $x, $h ) );
|
139
|
+
|
140
|
+
// no bcmath, 15 or less decimal digits
|
141
|
+
// we can use float, because its actually double and has 52 precision bits
|
142
|
+
if ( $h<1048576 )
|
143
|
+
{
|
144
|
+
$f = ((float)$h)*$x + (float)$l;
|
145
|
+
return sprintf ( "%.0f", $f ); // builtin conversion is only about 39-40 bits precise!
|
146
|
+
}
|
147
|
+
|
148
|
+
// x32 route, 16 or more decimal digits
|
149
|
+
// well, let me know if you *really* need this
|
150
|
+
die ( "INTERNAL ERROR: unpacking more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
|
151
|
+
}
|
152
|
+
|
153
|
+
|
154
|
+
/// sphinx searchd client class
|
155
|
+
class SphinxClient
|
156
|
+
{
|
157
|
+
var $_host; ///< searchd host (default is "localhost")
|
158
|
+
var $_port; ///< searchd port (default is 3312)
|
159
|
+
var $_offset; ///< how many records to seek from result-set start (default is 0)
|
160
|
+
var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
|
161
|
+
var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
|
162
|
+
var $_weights; ///< per-field weights (default is 1 for all fields)
|
163
|
+
var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
|
164
|
+
var $_sortby; ///< attribute to sort by (defualt is "")
|
165
|
+
var $_min_id; ///< min ID to match (default is 0, which means no limit)
|
166
|
+
var $_max_id; ///< max ID to match (default is 0, which means no limit)
|
167
|
+
var $_filters; ///< search filters
|
168
|
+
var $_groupby; ///< group-by attribute name
|
169
|
+
var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
|
170
|
+
var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
|
171
|
+
var $_groupdistinct;///< group-by count-distinct attribute
|
172
|
+
var $_maxmatches; ///< max matches to retrieve
|
173
|
+
var $_cutoff; ///< cutoff to stop searching at (default is 0)
|
174
|
+
var $_retrycount; ///< distributed retries count
|
175
|
+
var $_retrydelay; ///< distributed retries delay
|
176
|
+
var $_anchor; ///< geographical anchor point
|
177
|
+
var $_indexweights; ///< per-index weights
|
178
|
+
var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
|
179
|
+
var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
|
180
|
+
var $_fieldweights; ///< per-field-name weights
|
181
|
+
|
182
|
+
var $_error; ///< last error message
|
183
|
+
var $_warning; ///< last warning message
|
184
|
+
|
185
|
+
var $_reqs; ///< requests array for multi-query
|
186
|
+
var $_mbenc; ///< stored mbstring encoding
|
187
|
+
var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
|
188
|
+
|
189
|
+
/////////////////////////////////////////////////////////////////////////////
|
190
|
+
// common stuff
|
191
|
+
/////////////////////////////////////////////////////////////////////////////
|
192
|
+
|
193
|
+
/// create a new client object and fill defaults
|
194
|
+
function SphinxClient ()
|
195
|
+
{
|
196
|
+
// per-client-object settings
|
197
|
+
$this->_host = "localhost";
|
198
|
+
$this->_port = 3312;
|
199
|
+
|
200
|
+
// per-query settings
|
201
|
+
$this->_offset = 0;
|
202
|
+
$this->_limit = 20;
|
203
|
+
$this->_mode = SPH_MATCH_ALL;
|
204
|
+
$this->_weights = array ();
|
205
|
+
$this->_sort = SPH_SORT_RELEVANCE;
|
206
|
+
$this->_sortby = "";
|
207
|
+
$this->_min_id = 0;
|
208
|
+
$this->_max_id = 0;
|
209
|
+
$this->_filters = array ();
|
210
|
+
$this->_groupby = "";
|
211
|
+
$this->_groupfunc = SPH_GROUPBY_DAY;
|
212
|
+
$this->_groupsort = "@group desc";
|
213
|
+
$this->_groupdistinct= "";
|
214
|
+
$this->_maxmatches = 1000;
|
215
|
+
$this->_cutoff = 0;
|
216
|
+
$this->_retrycount = 0;
|
217
|
+
$this->_retrydelay = 0;
|
218
|
+
$this->_anchor = array ();
|
219
|
+
$this->_indexweights= array ();
|
220
|
+
$this->_ranker = SPH_RANK_PROXIMITY_BM25;
|
221
|
+
$this->_maxquerytime= 0;
|
222
|
+
$this->_fieldweights= array();
|
223
|
+
|
224
|
+
$this->_error = ""; // per-reply fields (for single-query case)
|
225
|
+
$this->_warning = "";
|
226
|
+
$this->_reqs = array (); // requests storage (for multi-query case)
|
227
|
+
$this->_mbenc = "";
|
228
|
+
$this->_arrayresult = false;
|
229
|
+
}
|
230
|
+
|
231
|
+
/// get last error message (string)
|
232
|
+
function GetLastError ()
|
233
|
+
{
|
234
|
+
return $this->_error;
|
235
|
+
}
|
236
|
+
|
237
|
+
/// get last warning message (string)
|
238
|
+
function GetLastWarning ()
|
239
|
+
{
|
240
|
+
return $this->_warning;
|
241
|
+
}
|
242
|
+
|
243
|
+
/// set searchd host name (string) and port (integer)
|
244
|
+
function SetServer ( $host, $port )
|
245
|
+
{
|
246
|
+
assert ( is_string($host) );
|
247
|
+
assert ( is_int($port) );
|
248
|
+
$this->_host = $host;
|
249
|
+
$this->_port = $port;
|
250
|
+
}
|
251
|
+
|
252
|
+
/////////////////////////////////////////////////////////////////////////////
|
253
|
+
|
254
|
+
/// enter mbstring workaround mode
|
255
|
+
function _MBPush ()
|
256
|
+
{
|
257
|
+
$this->_mbenc = "";
|
258
|
+
if ( ini_get ( "mbstring.func_overload" ) & 2 )
|
259
|
+
{
|
260
|
+
$this->_mbenc = mb_internal_encoding();
|
261
|
+
mb_internal_encoding ( "latin1" );
|
262
|
+
}
|
263
|
+
}
|
264
|
+
|
265
|
+
/// leave mbstring workaround mode
|
266
|
+
function _MBPop ()
|
267
|
+
{
|
268
|
+
if ( $this->_mbenc )
|
269
|
+
mb_internal_encoding ( $this->_mbenc );
|
270
|
+
}
|
271
|
+
|
272
|
+
/// connect to searchd server
|
273
|
+
function _Connect ()
|
274
|
+
{
|
275
|
+
if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
|
276
|
+
{
|
277
|
+
$this->_error = "connection to {$this->_host}:{$this->_port} failed";
|
278
|
+
return false;
|
279
|
+
}
|
280
|
+
|
281
|
+
// check version
|
282
|
+
list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
|
283
|
+
$v = (int)$v;
|
284
|
+
if ( $v<1 )
|
285
|
+
{
|
286
|
+
fclose ( $fp );
|
287
|
+
$this->_error = "expected searchd protocol version 1+, got version '$v'";
|
288
|
+
return false;
|
289
|
+
}
|
290
|
+
|
291
|
+
// all ok, send my version
|
292
|
+
fwrite ( $fp, pack ( "N", 1 ) );
|
293
|
+
return $fp;
|
294
|
+
}
|
295
|
+
|
296
|
+
/// get and check response packet from searchd server
|
297
|
+
function _GetResponse ( $fp, $client_ver )
|
298
|
+
{
|
299
|
+
$response = "";
|
300
|
+
$len = 0;
|
301
|
+
|
302
|
+
$header = fread ( $fp, 8 );
|
303
|
+
if ( strlen($header)==8 )
|
304
|
+
{
|
305
|
+
list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
|
306
|
+
$left = $len;
|
307
|
+
while ( $left>0 && !feof($fp) )
|
308
|
+
{
|
309
|
+
$chunk = fread ( $fp, $left );
|
310
|
+
if ( $chunk )
|
311
|
+
{
|
312
|
+
$response .= $chunk;
|
313
|
+
$left -= strlen($chunk);
|
314
|
+
}
|
315
|
+
}
|
316
|
+
}
|
317
|
+
fclose ( $fp );
|
318
|
+
|
319
|
+
// check response
|
320
|
+
$read = strlen ( $response );
|
321
|
+
if ( !$response || $read!=$len )
|
322
|
+
{
|
323
|
+
$this->_error = $len
|
324
|
+
? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
|
325
|
+
: "received zero-sized searchd response";
|
326
|
+
return false;
|
327
|
+
}
|
328
|
+
|
329
|
+
// check status
|
330
|
+
if ( $status==SEARCHD_WARNING )
|
331
|
+
{
|
332
|
+
list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
|
333
|
+
$this->_warning = substr ( $response, 4, $wlen );
|
334
|
+
return substr ( $response, 4+$wlen );
|
335
|
+
}
|
336
|
+
if ( $status==SEARCHD_ERROR )
|
337
|
+
{
|
338
|
+
$this->_error = "searchd error: " . substr ( $response, 4 );
|
339
|
+
return false;
|
340
|
+
}
|
341
|
+
if ( $status==SEARCHD_RETRY )
|
342
|
+
{
|
343
|
+
$this->_error = "temporary searchd error: " . substr ( $response, 4 );
|
344
|
+
return false;
|
345
|
+
}
|
346
|
+
if ( $status!=SEARCHD_OK )
|
347
|
+
{
|
348
|
+
$this->_error = "unknown status code '$status'";
|
349
|
+
return false;
|
350
|
+
}
|
351
|
+
|
352
|
+
// check version
|
353
|
+
if ( $ver<$client_ver )
|
354
|
+
{
|
355
|
+
$this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
|
356
|
+
$ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
|
357
|
+
}
|
358
|
+
|
359
|
+
return $response;
|
360
|
+
}
|
361
|
+
|
362
|
+
/////////////////////////////////////////////////////////////////////////////
|
363
|
+
// searching
|
364
|
+
/////////////////////////////////////////////////////////////////////////////
|
365
|
+
|
366
|
+
/// set offset and count into result set,
|
367
|
+
/// and optionally set max-matches and cutoff limits
|
368
|
+
function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
|
369
|
+
{
|
370
|
+
assert ( is_int($offset) );
|
371
|
+
assert ( is_int($limit) );
|
372
|
+
assert ( $offset>=0 );
|
373
|
+
assert ( $limit>0 );
|
374
|
+
assert ( $max>=0 );
|
375
|
+
$this->_offset = $offset;
|
376
|
+
$this->_limit = $limit;
|
377
|
+
if ( $max>0 )
|
378
|
+
$this->_maxmatches = $max;
|
379
|
+
if ( $cutoff>0 )
|
380
|
+
$this->_cutoff = $cutoff;
|
381
|
+
}
|
382
|
+
|
383
|
+
/// set maximum query time, in milliseconds, per-index
|
384
|
+
/// integer, 0 means "do not limit"
|
385
|
+
function SetMaxQueryTime ( $max )
|
386
|
+
{
|
387
|
+
assert ( is_int($max) );
|
388
|
+
assert ( $max>=0 );
|
389
|
+
$this->_maxquerytime = $max;
|
390
|
+
}
|
391
|
+
|
392
|
+
/// set matching mode
|
393
|
+
function SetMatchMode ( $mode )
|
394
|
+
{
|
395
|
+
assert ( $mode==SPH_MATCH_ALL
|
396
|
+
|| $mode==SPH_MATCH_ANY
|
397
|
+
|| $mode==SPH_MATCH_PHRASE
|
398
|
+
|| $mode==SPH_MATCH_BOOLEAN
|
399
|
+
|| $mode==SPH_MATCH_EXTENDED
|
400
|
+
|| $mode==SPH_MATCH_EXTENDED2 );
|
401
|
+
$this->_mode = $mode;
|
402
|
+
}
|
403
|
+
|
404
|
+
/// set ranking mode
|
405
|
+
function SetRankingMode ( $ranker )
|
406
|
+
{
|
407
|
+
assert ( $ranker==SPH_RANK_PROXIMITY_BM25
|
408
|
+
|| $ranker==SPH_RANK_BM25
|
409
|
+
|| $ranker==SPH_RANK_NONE
|
410
|
+
|| $ranker==SPH_RANK_WORDCOUNT );
|
411
|
+
$this->_ranker = $ranker;
|
412
|
+
}
|
413
|
+
|
414
|
+
/// set matches sorting mode
|
415
|
+
function SetSortMode ( $mode, $sortby="" )
|
416
|
+
{
|
417
|
+
assert (
|
418
|
+
$mode==SPH_SORT_RELEVANCE ||
|
419
|
+
$mode==SPH_SORT_ATTR_DESC ||
|
420
|
+
$mode==SPH_SORT_ATTR_ASC ||
|
421
|
+
$mode==SPH_SORT_TIME_SEGMENTS ||
|
422
|
+
$mode==SPH_SORT_EXTENDED ||
|
423
|
+
$mode==SPH_SORT_EXPR );
|
424
|
+
assert ( is_string($sortby) );
|
425
|
+
assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
|
426
|
+
|
427
|
+
$this->_sort = $mode;
|
428
|
+
$this->_sortby = $sortby;
|
429
|
+
}
|
430
|
+
|
431
|
+
/// bind per-field weights by order
|
432
|
+
/// DEPRECATED; use SetFieldWeights() instead
|
433
|
+
function SetWeights ( $weights )
|
434
|
+
{
|
435
|
+
assert ( is_array($weights) );
|
436
|
+
foreach ( $weights as $weight )
|
437
|
+
assert ( is_int($weight) );
|
438
|
+
|
439
|
+
$this->_weights = $weights;
|
440
|
+
}
|
441
|
+
|
442
|
+
/// bind per-field weights by name
|
443
|
+
function SetFieldWeights ( $weights )
|
444
|
+
{
|
445
|
+
assert ( is_array($weights) );
|
446
|
+
foreach ( $weights as $name=>$weight )
|
447
|
+
{
|
448
|
+
assert ( is_string($name) );
|
449
|
+
assert ( is_int($weight) );
|
450
|
+
}
|
451
|
+
$this->_fieldweights = $weights;
|
452
|
+
}
|
453
|
+
|
454
|
+
/// bind per-index weights by name
|
455
|
+
function SetIndexWeights ( $weights )
|
456
|
+
{
|
457
|
+
assert ( is_array($weights) );
|
458
|
+
foreach ( $weights as $index=>$weight )
|
459
|
+
{
|
460
|
+
assert ( is_string($index) );
|
461
|
+
assert ( is_int($weight) );
|
462
|
+
}
|
463
|
+
$this->_indexweights = $weights;
|
464
|
+
}
|
465
|
+
|
466
|
+
/// set IDs range to match
|
467
|
+
/// only match records if document ID is beetwen $min and $max (inclusive)
|
468
|
+
function SetIDRange ( $min, $max )
|
469
|
+
{
|
470
|
+
assert ( is_numeric($min) );
|
471
|
+
assert ( is_numeric($max) );
|
472
|
+
assert ( $min<=$max );
|
473
|
+
$this->_min_id = $min;
|
474
|
+
$this->_max_id = $max;
|
475
|
+
}
|
476
|
+
|
477
|
+
/// set values set filter
|
478
|
+
/// only match records where $attribute value is in given set
|
479
|
+
function SetFilter ( $attribute, $values, $exclude=false )
|
480
|
+
{
|
481
|
+
assert ( is_string($attribute) );
|
482
|
+
assert ( is_array($values) );
|
483
|
+
assert ( count($values) );
|
484
|
+
|
485
|
+
if ( is_array($values) && count($values) )
|
486
|
+
{
|
487
|
+
foreach ( $values as $value )
|
488
|
+
assert ( is_numeric($value) );
|
489
|
+
|
490
|
+
$this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
|
491
|
+
}
|
492
|
+
}
|
493
|
+
|
494
|
+
/// set range filter
|
495
|
+
/// only match records if $attribute value is beetwen $min and $max (inclusive)
|
496
|
+
function SetFilterRange ( $attribute, $min, $max, $exclude=false )
|
497
|
+
{
|
498
|
+
assert ( is_string($attribute) );
|
499
|
+
assert ( is_int($min) );
|
500
|
+
assert ( is_int($max) );
|
501
|
+
assert ( $min<=$max );
|
502
|
+
|
503
|
+
$this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
|
504
|
+
}
|
505
|
+
|
506
|
+
/// set float range filter
|
507
|
+
/// only match records if $attribute value is beetwen $min and $max (inclusive)
|
508
|
+
function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
|
509
|
+
{
|
510
|
+
assert ( is_string($attribute) );
|
511
|
+
assert ( is_float($min) );
|
512
|
+
assert ( is_float($max) );
|
513
|
+
assert ( $min<=$max );
|
514
|
+
|
515
|
+
$this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
|
516
|
+
}
|
517
|
+
|
518
|
+
/// setup anchor point for geosphere distance calculations
|
519
|
+
/// required to use @geodist in filters and sorting
|
520
|
+
/// latitude and longitude must be in radians
|
521
|
+
function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
|
522
|
+
{
|
523
|
+
assert ( is_string($attrlat) );
|
524
|
+
assert ( is_string($attrlong) );
|
525
|
+
assert ( is_float($lat) );
|
526
|
+
assert ( is_float($long) );
|
527
|
+
|
528
|
+
$this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
|
529
|
+
}
|
530
|
+
|
531
|
+
/// set grouping attribute and function
|
532
|
+
function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
|
533
|
+
{
|
534
|
+
assert ( is_string($attribute) );
|
535
|
+
assert ( is_string($groupsort) );
|
536
|
+
assert ( $func==SPH_GROUPBY_DAY
|
537
|
+
|| $func==SPH_GROUPBY_WEEK
|
538
|
+
|| $func==SPH_GROUPBY_MONTH
|
539
|
+
|| $func==SPH_GROUPBY_YEAR
|
540
|
+
|| $func==SPH_GROUPBY_ATTR
|
541
|
+
|| $func==SPH_GROUPBY_ATTRPAIR );
|
542
|
+
|
543
|
+
$this->_groupby = $attribute;
|
544
|
+
$this->_groupfunc = $func;
|
545
|
+
$this->_groupsort = $groupsort;
|
546
|
+
}
|
547
|
+
|
548
|
+
/// set count-distinct attribute for group-by queries
|
549
|
+
function SetGroupDistinct ( $attribute )
|
550
|
+
{
|
551
|
+
assert ( is_string($attribute) );
|
552
|
+
$this->_groupdistinct = $attribute;
|
553
|
+
}
|
554
|
+
|
555
|
+
/// set distributed retries count and delay
|
556
|
+
function SetRetries ( $count, $delay=0 )
|
557
|
+
{
|
558
|
+
assert ( is_int($count) && $count>=0 );
|
559
|
+
assert ( is_int($delay) && $delay>=0 );
|
560
|
+
$this->_retrycount = $count;
|
561
|
+
$this->_retrydelay = $delay;
|
562
|
+
}
|
563
|
+
|
564
|
+
/// set result set format (hash or array; hash by default)
|
565
|
+
/// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
|
566
|
+
function SetArrayResult ( $arrayresult )
|
567
|
+
{
|
568
|
+
assert ( is_bool($arrayresult) );
|
569
|
+
$this->_arrayresult = $arrayresult;
|
570
|
+
}
|
571
|
+
|
572
|
+
//////////////////////////////////////////////////////////////////////////////
|
573
|
+
|
574
|
+
/// clear all filters (for multi-queries)
|
575
|
+
function ResetFilters ()
|
576
|
+
{
|
577
|
+
$this->_filters = array();
|
578
|
+
$this->_anchor = array();
|
579
|
+
}
|
580
|
+
|
581
|
+
/// clear groupby settings (for multi-queries)
|
582
|
+
function ResetGroupBy ()
|
583
|
+
{
|
584
|
+
$this->_groupby = "";
|
585
|
+
$this->_groupfunc = SPH_GROUPBY_DAY;
|
586
|
+
$this->_groupsort = "@group desc";
|
587
|
+
$this->_groupdistinct= "";
|
588
|
+
}
|
589
|
+
|
590
|
+
//////////////////////////////////////////////////////////////////////////////
|
591
|
+
|
592
|
+
/// connect to searchd server, run given search query through given indexes,
|
593
|
+
/// and return the search results
|
594
|
+
function Query ( $query, $index="*" )
|
595
|
+
{
|
596
|
+
assert ( empty($this->_reqs) );
|
597
|
+
|
598
|
+
$this->AddQuery ( $query, $index );
|
599
|
+
$results = $this->RunQueries ();
|
600
|
+
|
601
|
+
if ( !is_array($results) )
|
602
|
+
return false; // probably network error; error message should be already filled
|
603
|
+
|
604
|
+
$this->_error = $results[0]["error"];
|
605
|
+
$this->_warning = $results[0]["warning"];
|
606
|
+
if ( $results[0]["status"]==SEARCHD_ERROR )
|
607
|
+
return false;
|
608
|
+
else
|
609
|
+
return $results[0];
|
610
|
+
}
|
611
|
+
|
612
|
+
/// helper to pack floats in network byte order
|
613
|
+
function _PackFloat ( $f )
|
614
|
+
{
|
615
|
+
$t1 = pack ( "f", $f ); // machine order
|
616
|
+
list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
|
617
|
+
return pack ( "N", $t2 );
|
618
|
+
}
|
619
|
+
|
620
|
+
/// add query to multi-query batch
|
621
|
+
/// returns index into results array from RunQueries() call
|
622
|
+
function AddQuery ( $query, $index="*" )
|
623
|
+
{
|
624
|
+
// mbstring workaround
|
625
|
+
$this->_MBPush ();
|
626
|
+
|
627
|
+
// build request
|
628
|
+
$req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
|
629
|
+
$req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
|
630
|
+
$req .= pack ( "N", strlen($query) ) . $query; // query itself
|
631
|
+
$req .= pack ( "N", count($this->_weights) ); // weights
|
632
|
+
foreach ( $this->_weights as $weight )
|
633
|
+
$req .= pack ( "N", (int)$weight );
|
634
|
+
$req .= pack ( "N", strlen($index) ) . $index; // indexes
|
635
|
+
$req .= pack ( "N", 1 ); // id64 range marker
|
636
|
+
$req .= sphPack64 ( $this->_min_id ) . sphPack64 ( $this->_max_id ); // id64 range
|
637
|
+
|
638
|
+
// filters
|
639
|
+
$req .= pack ( "N", count($this->_filters) );
|
640
|
+
foreach ( $this->_filters as $filter )
|
641
|
+
{
|
642
|
+
$req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
|
643
|
+
$req .= pack ( "N", $filter["type"] );
|
644
|
+
switch ( $filter["type"] )
|
645
|
+
{
|
646
|
+
case SPH_FILTER_VALUES:
|
647
|
+
$req .= pack ( "N", count($filter["values"]) );
|
648
|
+
foreach ( $filter["values"] as $value )
|
649
|
+
$req .= pack ( "N", floatval($value) ); // this uberhack is to workaround 32bit signed int limit on x32 platforms
|
650
|
+
break;
|
651
|
+
|
652
|
+
case SPH_FILTER_RANGE:
|
653
|
+
$req .= pack ( "NN", $filter["min"], $filter["max"] );
|
654
|
+
break;
|
655
|
+
|
656
|
+
case SPH_FILTER_FLOATRANGE:
|
657
|
+
$req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
|
658
|
+
break;
|
659
|
+
|
660
|
+
default:
|
661
|
+
assert ( 0 && "internal error: unhandled filter type" );
|
662
|
+
}
|
663
|
+
$req .= pack ( "N", $filter["exclude"] );
|
664
|
+
}
|
665
|
+
|
666
|
+
// group-by clause, max-matches count, group-sort clause, cutoff count
|
667
|
+
$req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
|
668
|
+
$req .= pack ( "N", $this->_maxmatches );
|
669
|
+
$req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
|
670
|
+
$req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
|
671
|
+
$req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
|
672
|
+
|
673
|
+
// anchor point
|
674
|
+
if ( empty($this->_anchor) )
|
675
|
+
{
|
676
|
+
$req .= pack ( "N", 0 );
|
677
|
+
} else
|
678
|
+
{
|
679
|
+
$a =& $this->_anchor;
|
680
|
+
$req .= pack ( "N", 1 );
|
681
|
+
$req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
|
682
|
+
$req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
|
683
|
+
$req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
|
684
|
+
}
|
685
|
+
|
686
|
+
// per-index weights
|
687
|
+
$req .= pack ( "N", count($this->_indexweights) );
|
688
|
+
foreach ( $this->_indexweights as $idx=>$weight )
|
689
|
+
$req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
|
690
|
+
|
691
|
+
// max query time
|
692
|
+
$req .= pack ( "N", $this->_maxquerytime );
|
693
|
+
|
694
|
+
// per-field weights
|
695
|
+
$req .= pack ( "N", count($this->_fieldweights) );
|
696
|
+
foreach ( $this->_fieldweights as $field=>$weight )
|
697
|
+
$req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
|
698
|
+
|
699
|
+
// mbstring workaround
|
700
|
+
$this->_MBPop ();
|
701
|
+
|
702
|
+
// store request to requests array
|
703
|
+
$this->_reqs[] = $req;
|
704
|
+
return count($this->_reqs)-1;
|
705
|
+
}
|
706
|
+
|
707
|
+
/// connect to searchd, run queries batch, and return an array of result sets
|
708
|
+
function RunQueries ()
|
709
|
+
{
|
710
|
+
if ( empty($this->_reqs) )
|
711
|
+
{
|
712
|
+
$this->_error = "no queries defined, issue AddQuery() first";
|
713
|
+
return false;
|
714
|
+
}
|
715
|
+
|
716
|
+
// mbstring workaround
|
717
|
+
$this->_MBPush ();
|
718
|
+
|
719
|
+
if (!( $fp = $this->_Connect() ))
|
720
|
+
{
|
721
|
+
$this->_MBPop ();
|
722
|
+
return false;
|
723
|
+
}
|
724
|
+
|
725
|
+
////////////////////////////
|
726
|
+
// send query, get response
|
727
|
+
////////////////////////////
|
728
|
+
|
729
|
+
$nreqs = count($this->_reqs);
|
730
|
+
$req = join ( "", $this->_reqs );
|
731
|
+
$len = 4+strlen($req);
|
732
|
+
$req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
|
733
|
+
|
734
|
+
fwrite ( $fp, $req, $len+8 );
|
735
|
+
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
|
736
|
+
{
|
737
|
+
$this->_MBPop ();
|
738
|
+
return false;
|
739
|
+
}
|
740
|
+
|
741
|
+
$this->_reqs = array ();
|
742
|
+
|
743
|
+
//////////////////
|
744
|
+
// parse response
|
745
|
+
//////////////////
|
746
|
+
|
747
|
+
$p = 0; // current position
|
748
|
+
$max = strlen($response); // max position for checks, to protect against broken responses
|
749
|
+
|
750
|
+
$results = array ();
|
751
|
+
for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
|
752
|
+
{
|
753
|
+
$results[] = array();
|
754
|
+
$result =& $results[$ires];
|
755
|
+
|
756
|
+
$result["error"] = "";
|
757
|
+
$result["warning"] = "";
|
758
|
+
|
759
|
+
// extract status
|
760
|
+
list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
761
|
+
$result["status"] = $status;
|
762
|
+
if ( $status!=SEARCHD_OK )
|
763
|
+
{
|
764
|
+
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
765
|
+
$message = substr ( $response, $p, $len ); $p += $len;
|
766
|
+
|
767
|
+
if ( $status==SEARCHD_WARNING )
|
768
|
+
{
|
769
|
+
$result["warning"] = $message;
|
770
|
+
} else
|
771
|
+
{
|
772
|
+
$result["error"] = $message;
|
773
|
+
continue;
|
774
|
+
}
|
775
|
+
}
|
776
|
+
|
777
|
+
// read schema
|
778
|
+
$fields = array ();
|
779
|
+
$attrs = array ();
|
780
|
+
|
781
|
+
list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
782
|
+
while ( $nfields-->0 && $p<$max )
|
783
|
+
{
|
784
|
+
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
785
|
+
$fields[] = substr ( $response, $p, $len ); $p += $len;
|
786
|
+
}
|
787
|
+
$result["fields"] = $fields;
|
788
|
+
|
789
|
+
list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
790
|
+
while ( $nattrs-->0 && $p<$max )
|
791
|
+
{
|
792
|
+
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
793
|
+
$attr = substr ( $response, $p, $len ); $p += $len;
|
794
|
+
list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
795
|
+
$attrs[$attr] = $type;
|
796
|
+
}
|
797
|
+
$result["attrs"] = $attrs;
|
798
|
+
|
799
|
+
// read match count
|
800
|
+
list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
801
|
+
list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
802
|
+
|
803
|
+
// read matches
|
804
|
+
$idx = -1;
|
805
|
+
while ( $count-->0 && $p<$max )
|
806
|
+
{
|
807
|
+
// index into result array
|
808
|
+
$idx++;
|
809
|
+
|
810
|
+
// parse document id and weight
|
811
|
+
if ( $id64 )
|
812
|
+
{
|
813
|
+
$doc = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
|
814
|
+
list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
815
|
+
} else
|
816
|
+
{
|
817
|
+
list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
|
818
|
+
substr ( $response, $p, 8 ) ) );
|
819
|
+
$p += 8;
|
820
|
+
$doc = sprintf ( "%u", $doc ); // workaround for php signed/unsigned braindamage
|
821
|
+
}
|
822
|
+
$weight = sprintf ( "%u", $weight );
|
823
|
+
|
824
|
+
// create match entry
|
825
|
+
if ( $this->_arrayresult )
|
826
|
+
$result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
|
827
|
+
else
|
828
|
+
$result["matches"][$doc]["weight"] = $weight;
|
829
|
+
|
830
|
+
// parse and create attributes
|
831
|
+
$attrvals = array ();
|
832
|
+
foreach ( $attrs as $attr=>$type )
|
833
|
+
{
|
834
|
+
// handle floats
|
835
|
+
if ( $type==SPH_ATTR_FLOAT )
|
836
|
+
{
|
837
|
+
list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
838
|
+
list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
|
839
|
+
$attrvals[$attr] = $fval;
|
840
|
+
continue;
|
841
|
+
}
|
842
|
+
|
843
|
+
// handle everything else as unsigned ints
|
844
|
+
list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
845
|
+
if ( $type & SPH_ATTR_MULTI )
|
846
|
+
{
|
847
|
+
$attrvals[$attr] = array ();
|
848
|
+
$nvalues = $val;
|
849
|
+
while ( $nvalues-->0 && $p<$max )
|
850
|
+
{
|
851
|
+
list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
852
|
+
$attrvals[$attr][] = sprintf ( "%u", $val );
|
853
|
+
}
|
854
|
+
} else
|
855
|
+
{
|
856
|
+
$attrvals[$attr] = sprintf ( "%u", $val );
|
857
|
+
}
|
858
|
+
}
|
859
|
+
|
860
|
+
if ( $this->_arrayresult )
|
861
|
+
$result["matches"][$idx]["attrs"] = $attrvals;
|
862
|
+
else
|
863
|
+
$result["matches"][$doc]["attrs"] = $attrvals;
|
864
|
+
}
|
865
|
+
|
866
|
+
list ( $total, $total_found, $msecs, $words ) =
|
867
|
+
array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
|
868
|
+
$result["total"] = sprintf ( "%u", $total );
|
869
|
+
$result["total_found"] = sprintf ( "%u", $total_found );
|
870
|
+
$result["time"] = sprintf ( "%.3f", $msecs/1000 );
|
871
|
+
$p += 16;
|
872
|
+
|
873
|
+
while ( $words-->0 && $p<$max )
|
874
|
+
{
|
875
|
+
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
876
|
+
$word = substr ( $response, $p, $len ); $p += $len;
|
877
|
+
list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
|
878
|
+
$result["words"][$word] = array (
|
879
|
+
"docs"=>sprintf ( "%u", $docs ),
|
880
|
+
"hits"=>sprintf ( "%u", $hits ) );
|
881
|
+
}
|
882
|
+
}
|
883
|
+
|
884
|
+
$this->_MBPop ();
|
885
|
+
return $results;
|
886
|
+
}
|
887
|
+
|
888
|
+
/////////////////////////////////////////////////////////////////////////////
|
889
|
+
// excerpts generation
|
890
|
+
/////////////////////////////////////////////////////////////////////////////
|
891
|
+
|
892
|
+
/// connect to searchd server, and generate exceprts (snippets)
|
893
|
+
/// of given documents for given query. returns false on failure,
|
894
|
+
/// an array of snippets on success
|
895
|
+
function BuildExcerpts ( $docs, $index, $words, $opts=array() )
|
896
|
+
{
|
897
|
+
assert ( is_array($docs) );
|
898
|
+
assert ( is_string($index) );
|
899
|
+
assert ( is_string($words) );
|
900
|
+
assert ( is_array($opts) );
|
901
|
+
|
902
|
+
$this->_MBPush ();
|
903
|
+
|
904
|
+
if (!( $fp = $this->_Connect() ))
|
905
|
+
{
|
906
|
+
$this->_MBPop();
|
907
|
+
return false;
|
908
|
+
}
|
909
|
+
|
910
|
+
/////////////////
|
911
|
+
// fixup options
|
912
|
+
/////////////////
|
913
|
+
|
914
|
+
if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
|
915
|
+
if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
|
916
|
+
if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
|
917
|
+
if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
|
918
|
+
if ( !isset($opts["around"]) ) $opts["around"] = 5;
|
919
|
+
if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
|
920
|
+
if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
|
921
|
+
if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
|
922
|
+
if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
|
923
|
+
|
924
|
+
/////////////////
|
925
|
+
// build request
|
926
|
+
/////////////////
|
927
|
+
|
928
|
+
// v.1.0 req
|
929
|
+
$flags = 1; // remove spaces
|
930
|
+
if ( $opts["exact_phrase"] ) $flags |= 2;
|
931
|
+
if ( $opts["single_passage"] ) $flags |= 4;
|
932
|
+
if ( $opts["use_boundaries"] ) $flags |= 8;
|
933
|
+
if ( $opts["weight_order"] ) $flags |= 16;
|
934
|
+
$req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
|
935
|
+
$req .= pack ( "N", strlen($index) ) . $index; // req index
|
936
|
+
$req .= pack ( "N", strlen($words) ) . $words; // req words
|
937
|
+
|
938
|
+
// options
|
939
|
+
$req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
|
940
|
+
$req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
|
941
|
+
$req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
|
942
|
+
$req .= pack ( "N", (int)$opts["limit"] );
|
943
|
+
$req .= pack ( "N", (int)$opts["around"] );
|
944
|
+
|
945
|
+
// documents
|
946
|
+
$req .= pack ( "N", count($docs) );
|
947
|
+
foreach ( $docs as $doc )
|
948
|
+
{
|
949
|
+
assert ( is_string($doc) );
|
950
|
+
$req .= pack ( "N", strlen($doc) ) . $doc;
|
951
|
+
}
|
952
|
+
|
953
|
+
////////////////////////////
|
954
|
+
// send query, get response
|
955
|
+
////////////////////////////
|
956
|
+
|
957
|
+
$len = strlen($req);
|
958
|
+
$req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
|
959
|
+
$wrote = fwrite ( $fp, $req, $len+8 );
|
960
|
+
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
|
961
|
+
{
|
962
|
+
$this->_MBPop ();
|
963
|
+
return false;
|
964
|
+
}
|
965
|
+
|
966
|
+
//////////////////
|
967
|
+
// parse response
|
968
|
+
//////////////////
|
969
|
+
|
970
|
+
$pos = 0;
|
971
|
+
$res = array ();
|
972
|
+
$rlen = strlen($response);
|
973
|
+
for ( $i=0; $i<count($docs); $i++ )
|
974
|
+
{
|
975
|
+
list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
|
976
|
+
$pos += 4;
|
977
|
+
|
978
|
+
if ( $pos+$len > $rlen )
|
979
|
+
{
|
980
|
+
$this->_error = "incomplete reply";
|
981
|
+
$this->_MBPop ();
|
982
|
+
return false;
|
983
|
+
}
|
984
|
+
$res[] = $len ? substr ( $response, $pos, $len ) : "";
|
985
|
+
$pos += $len;
|
986
|
+
}
|
987
|
+
|
988
|
+
$this->_MBPop ();
|
989
|
+
return $res;
|
990
|
+
}
|
991
|
+
|
992
|
+
/////////////////////////////////////////////////////////////////////////////
|
993
|
+
// attribute updates
|
994
|
+
/////////////////////////////////////////////////////////////////////////////
|
995
|
+
|
996
|
+
/// update given attribute values on given documents in given indexes
|
997
|
+
/// returns amount of updated documents (0 or more) on success, or -1 on failure
|
998
|
+
function UpdateAttributes ( $index, $attrs, $values )
|
999
|
+
{
|
1000
|
+
// verify everything
|
1001
|
+
assert ( is_string($index) );
|
1002
|
+
|
1003
|
+
assert ( is_array($attrs) );
|
1004
|
+
foreach ( $attrs as $attr )
|
1005
|
+
assert ( is_string($attr) );
|
1006
|
+
|
1007
|
+
assert ( is_array($values) );
|
1008
|
+
foreach ( $values as $id=>$entry )
|
1009
|
+
{
|
1010
|
+
assert ( is_numeric($id) );
|
1011
|
+
assert ( is_array($entry) );
|
1012
|
+
assert ( count($entry)==count($attrs) );
|
1013
|
+
foreach ( $entry as $v )
|
1014
|
+
assert ( is_int($v) );
|
1015
|
+
}
|
1016
|
+
|
1017
|
+
// build request
|
1018
|
+
$req = pack ( "N", strlen($index) ) . $index;
|
1019
|
+
|
1020
|
+
$req .= pack ( "N", count($attrs) );
|
1021
|
+
foreach ( $attrs as $attr )
|
1022
|
+
$req .= pack ( "N", strlen($attr) ) . $attr;
|
1023
|
+
|
1024
|
+
$req .= pack ( "N", count($values) );
|
1025
|
+
foreach ( $values as $id=>$entry )
|
1026
|
+
{
|
1027
|
+
$req .= sphPack64 ( $id );
|
1028
|
+
foreach ( $entry as $v )
|
1029
|
+
$req .= pack ( "N", $v );
|
1030
|
+
}
|
1031
|
+
|
1032
|
+
// Line for testing Riddle:
|
1033
|
+
return $req;
|
1034
|
+
|
1035
|
+
// mbstring workaround
|
1036
|
+
$this->_MBPush ();
|
1037
|
+
|
1038
|
+
// connect, send query, get response
|
1039
|
+
if (!( $fp = $this->_Connect() ))
|
1040
|
+
{
|
1041
|
+
$this->_MBPop ();
|
1042
|
+
return -1;
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
$len = strlen($req);
|
1046
|
+
$req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
|
1047
|
+
fwrite ( $fp, $req, $len+8 );
|
1048
|
+
|
1049
|
+
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
|
1050
|
+
{
|
1051
|
+
$this->_MBPop ();
|
1052
|
+
return -1;
|
1053
|
+
}
|
1054
|
+
|
1055
|
+
// parse response
|
1056
|
+
list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
|
1057
|
+
$this->_MBPop ();
|
1058
|
+
return $updated;
|
1059
|
+
}
|
1060
|
+
}
|
1061
|
+
|
1062
|
+
//
|
1063
|
+
// $Id: sphinxapi.php 1103 2008-01-24 18:42:57Z shodan $
|
1064
|
+
//
|
1065
|
+
|
1066
|
+
?>
|