model_set 0.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +39 -0
  3. data/VERSION.yml +5 -0
  4. data/lib/model_set/conditioned.rb +33 -0
  5. data/lib/model_set/conditions.rb +103 -0
  6. data/lib/model_set/query.rb +132 -0
  7. data/lib/model_set/raw_query.rb +41 -0
  8. data/lib/model_set/raw_sql_query.rb +19 -0
  9. data/lib/model_set/set_query.rb +34 -0
  10. data/lib/model_set/solr_query.rb +70 -0
  11. data/lib/model_set/sphinx_query.rb +206 -0
  12. data/lib/model_set/sql_base_query.rb +52 -0
  13. data/lib/model_set/sql_query.rb +109 -0
  14. data/lib/model_set.rb +743 -0
  15. data/lib/multi_set.rb +67 -0
  16. data/test/model_set_test.rb +329 -0
  17. data/test/multi_set_test.rb +65 -0
  18. data/test/test_helper.rb +23 -0
  19. data/vendor/sphinx_client/README.rdoc +41 -0
  20. data/vendor/sphinx_client/Rakefile +21 -0
  21. data/vendor/sphinx_client/init.rb +1 -0
  22. data/vendor/sphinx_client/install.rb +5 -0
  23. data/vendor/sphinx_client/lib/sphinx/client.rb +1093 -0
  24. data/vendor/sphinx_client/lib/sphinx/request.rb +50 -0
  25. data/vendor/sphinx_client/lib/sphinx/response.rb +69 -0
  26. data/vendor/sphinx_client/lib/sphinx.rb +6 -0
  27. data/vendor/sphinx_client/spec/client_response_spec.rb +112 -0
  28. data/vendor/sphinx_client/spec/client_spec.rb +469 -0
  29. data/vendor/sphinx_client/spec/fixtures/default_search.php +8 -0
  30. data/vendor/sphinx_client/spec/fixtures/default_search_index.php +8 -0
  31. data/vendor/sphinx_client/spec/fixtures/excerpt_custom.php +11 -0
  32. data/vendor/sphinx_client/spec/fixtures/excerpt_default.php +8 -0
  33. data/vendor/sphinx_client/spec/fixtures/excerpt_flags.php +11 -0
  34. data/vendor/sphinx_client/spec/fixtures/field_weights.php +9 -0
  35. data/vendor/sphinx_client/spec/fixtures/filter.php +9 -0
  36. data/vendor/sphinx_client/spec/fixtures/filter_exclude.php +9 -0
  37. data/vendor/sphinx_client/spec/fixtures/filter_float_range.php +9 -0
  38. data/vendor/sphinx_client/spec/fixtures/filter_float_range_exclude.php +9 -0
  39. data/vendor/sphinx_client/spec/fixtures/filter_range.php +9 -0
  40. data/vendor/sphinx_client/spec/fixtures/filter_range_exclude.php +9 -0
  41. data/vendor/sphinx_client/spec/fixtures/filter_range_int64.php +10 -0
  42. data/vendor/sphinx_client/spec/fixtures/filter_ranges.php +10 -0
  43. data/vendor/sphinx_client/spec/fixtures/filters.php +10 -0
  44. data/vendor/sphinx_client/spec/fixtures/filters_different.php +13 -0
  45. data/vendor/sphinx_client/spec/fixtures/geo_anchor.php +9 -0
  46. data/vendor/sphinx_client/spec/fixtures/group_by_attr.php +9 -0
  47. data/vendor/sphinx_client/spec/fixtures/group_by_attrpair.php +9 -0
  48. data/vendor/sphinx_client/spec/fixtures/group_by_day.php +9 -0
  49. data/vendor/sphinx_client/spec/fixtures/group_by_day_sort.php +9 -0
  50. data/vendor/sphinx_client/spec/fixtures/group_by_month.php +9 -0
  51. data/vendor/sphinx_client/spec/fixtures/group_by_week.php +9 -0
  52. data/vendor/sphinx_client/spec/fixtures/group_by_year.php +9 -0
  53. data/vendor/sphinx_client/spec/fixtures/group_distinct.php +10 -0
  54. data/vendor/sphinx_client/spec/fixtures/id_range.php +9 -0
  55. data/vendor/sphinx_client/spec/fixtures/id_range64.php +9 -0
  56. data/vendor/sphinx_client/spec/fixtures/index_weights.php +9 -0
  57. data/vendor/sphinx_client/spec/fixtures/keywords.php +8 -0
  58. data/vendor/sphinx_client/spec/fixtures/limits.php +9 -0
  59. data/vendor/sphinx_client/spec/fixtures/limits_cutoff.php +9 -0
  60. data/vendor/sphinx_client/spec/fixtures/limits_max.php +9 -0
  61. data/vendor/sphinx_client/spec/fixtures/limits_max_cutoff.php +9 -0
  62. data/vendor/sphinx_client/spec/fixtures/match_all.php +9 -0
  63. data/vendor/sphinx_client/spec/fixtures/match_any.php +9 -0
  64. data/vendor/sphinx_client/spec/fixtures/match_boolean.php +9 -0
  65. data/vendor/sphinx_client/spec/fixtures/match_extended.php +9 -0
  66. data/vendor/sphinx_client/spec/fixtures/match_extended2.php +9 -0
  67. data/vendor/sphinx_client/spec/fixtures/match_fullscan.php +9 -0
  68. data/vendor/sphinx_client/spec/fixtures/match_phrase.php +9 -0
  69. data/vendor/sphinx_client/spec/fixtures/max_query_time.php +9 -0
  70. data/vendor/sphinx_client/spec/fixtures/miltiple_queries.php +12 -0
  71. data/vendor/sphinx_client/spec/fixtures/ranking_bm25.php +9 -0
  72. data/vendor/sphinx_client/spec/fixtures/ranking_none.php +9 -0
  73. data/vendor/sphinx_client/spec/fixtures/ranking_proximity.php +9 -0
  74. data/vendor/sphinx_client/spec/fixtures/ranking_proximity_bm25.php +9 -0
  75. data/vendor/sphinx_client/spec/fixtures/ranking_wordcount.php +9 -0
  76. data/vendor/sphinx_client/spec/fixtures/retries.php +9 -0
  77. data/vendor/sphinx_client/spec/fixtures/retries_delay.php +9 -0
  78. data/vendor/sphinx_client/spec/fixtures/select.php +9 -0
  79. data/vendor/sphinx_client/spec/fixtures/set_override.php +11 -0
  80. data/vendor/sphinx_client/spec/fixtures/sort_attr_asc.php +9 -0
  81. data/vendor/sphinx_client/spec/fixtures/sort_attr_desc.php +9 -0
  82. data/vendor/sphinx_client/spec/fixtures/sort_expr.php +9 -0
  83. data/vendor/sphinx_client/spec/fixtures/sort_extended.php +9 -0
  84. data/vendor/sphinx_client/spec/fixtures/sort_relevance.php +9 -0
  85. data/vendor/sphinx_client/spec/fixtures/sort_time_segments.php +9 -0
  86. data/vendor/sphinx_client/spec/fixtures/sphinxapi.php +1269 -0
  87. data/vendor/sphinx_client/spec/fixtures/update_attributes.php +8 -0
  88. data/vendor/sphinx_client/spec/fixtures/update_attributes_mva.php +8 -0
  89. data/vendor/sphinx_client/spec/fixtures/weights.php +9 -0
  90. data/vendor/sphinx_client/spec/sphinx/sphinx-id64.conf +67 -0
  91. data/vendor/sphinx_client/spec/sphinx/sphinx.conf +67 -0
  92. data/vendor/sphinx_client/spec/sphinx/sphinx_test.sql +86 -0
  93. data/vendor/sphinx_client/sphinx.yml.tpl +3 -0
  94. data/vendor/sphinx_client/tasks/sphinx.rake +75 -0
  95. metadata +151 -0
@@ -0,0 +1,1269 @@
1
+ <?php
2
+
3
+ //
4
+ // $Id$
5
+ //
6
+
7
+ //
8
+ // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
9
+ //
10
+ // This program is free software; you can redistribute it and/or modify
11
+ // it under the terms of the GNU General Public License. You should have
12
+ // received a copy of the GPL license along with this program; if you
13
+ // did not, you can find it at http://www.gnu.org/
14
+ //
15
+
16
+ /////////////////////////////////////////////////////////////////////////////
17
+ // PHP version of Sphinx searchd client (PHP API)
18
+ /////////////////////////////////////////////////////////////////////////////
19
+
20
+ /// known searchd commands
21
+ define ( "SEARCHD_COMMAND_SEARCH", 0 );
22
+ define ( "SEARCHD_COMMAND_EXCERPT", 1 );
23
+ define ( "SEARCHD_COMMAND_UPDATE", 2 );
24
+ define ( "SEARCHD_COMMAND_KEYWORDS",3 );
25
+
26
+ /// current client-side command implementation versions
27
+ define ( "VER_COMMAND_SEARCH", 0x116 );
28
+ define ( "VER_COMMAND_EXCERPT", 0x100 );
29
+ define ( "VER_COMMAND_UPDATE", 0x102 );
30
+ define ( "VER_COMMAND_KEYWORDS", 0x100 );
31
+
32
+ /// known searchd status codes
33
+ define ( "SEARCHD_OK", 0 );
34
+ define ( "SEARCHD_ERROR", 1 );
35
+ define ( "SEARCHD_RETRY", 2 );
36
+ define ( "SEARCHD_WARNING", 3 );
37
+
38
+ /// known match modes
39
+ define ( "SPH_MATCH_ALL", 0 );
40
+ define ( "SPH_MATCH_ANY", 1 );
41
+ define ( "SPH_MATCH_PHRASE", 2 );
42
+ define ( "SPH_MATCH_BOOLEAN", 3 );
43
+ define ( "SPH_MATCH_EXTENDED", 4 );
44
+ define ( "SPH_MATCH_FULLSCAN", 5 );
45
+ define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
46
+
47
+ /// known ranking modes (ext2 only)
48
+ define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
49
+ define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
50
+ define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
51
+ define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
52
+ define ( "SPH_RANK_PROXIMITY", 4 );
53
+
54
+ /// known sort modes
55
+ define ( "SPH_SORT_RELEVANCE", 0 );
56
+ define ( "SPH_SORT_ATTR_DESC", 1 );
57
+ define ( "SPH_SORT_ATTR_ASC", 2 );
58
+ define ( "SPH_SORT_TIME_SEGMENTS", 3 );
59
+ define ( "SPH_SORT_EXTENDED", 4 );
60
+ define ( "SPH_SORT_EXPR", 5 );
61
+
62
+ /// known filter types
63
+ define ( "SPH_FILTER_VALUES", 0 );
64
+ define ( "SPH_FILTER_RANGE", 1 );
65
+ define ( "SPH_FILTER_FLOATRANGE", 2 );
66
+
67
+ /// known attribute types
68
+ define ( "SPH_ATTR_INTEGER", 1 );
69
+ define ( "SPH_ATTR_TIMESTAMP", 2 );
70
+ define ( "SPH_ATTR_ORDINAL", 3 );
71
+ define ( "SPH_ATTR_BOOL", 4 );
72
+ define ( "SPH_ATTR_FLOAT", 5 );
73
+ define ( "SPH_ATTR_BIGINT", 6 );
74
+ define ( "SPH_ATTR_MULTI", 0x40000000 );
75
+
76
+ /// known grouping functions
77
+ define ( "SPH_GROUPBY_DAY", 0 );
78
+ define ( "SPH_GROUPBY_WEEK", 1 );
79
+ define ( "SPH_GROUPBY_MONTH", 2 );
80
+ define ( "SPH_GROUPBY_YEAR", 3 );
81
+ define ( "SPH_GROUPBY_ATTR", 4 );
82
+ define ( "SPH_GROUPBY_ATTRPAIR", 5 );
83
+
84
+
85
+ /// portably pack numeric to 64 unsigned bits, network order
86
+ function sphPack64 ( $v )
87
+ {
88
+ assert ( is_numeric($v) );
89
+
90
+ // x64 route
91
+ if ( PHP_INT_SIZE>=8 )
92
+ {
93
+ $i = (int)$v;
94
+ return pack ( "NN", $i>>32, $i&((1<<32)-1) );
95
+ }
96
+
97
+ // x32 route, bcmath
98
+ $x = "4294967296";
99
+ if ( function_exists("bcmul") )
100
+ {
101
+ $h = bcdiv ( $v, $x, 0 );
102
+ $l = bcmod ( $v, $x );
103
+ if ( $v<0 )
104
+ {
105
+ $h = -1+(float)$h;
106
+ $l = $l+(float)$x;
107
+ }
108
+ return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
109
+ }
110
+
111
+ // x32 route, 15 or less decimal digits
112
+ // we can use float, because its actually double and has 52 precision bits
113
+ if ( strlen($v)<=15 )
114
+ {
115
+ $f = (float)$v;
116
+ $h = (int)($f/$x);
117
+ $l = $f-$x*(float)$h;
118
+ if ( $v<0 )
119
+ {
120
+ $h = -1+(float)$h;
121
+ $l = $l+(float)$x;
122
+ }
123
+ return pack ( "NN", $h, $l );
124
+ }
125
+
126
+ // x32 route, 16 or more decimal digits
127
+ // well, let me know if you *really* need this
128
+ die ( "INTERNAL ERROR: packing more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
129
+ }
130
+
131
+ /// portably unpack 64 signed bits, network order to numeric
132
+ function sphUnpack64 ( $v )
133
+ {
134
+ list($h,$l) = array_values ( unpack ( "N*N*", $v ) );
135
+
136
+ // x64 route
137
+ if ( PHP_INT_SIZE>=8 )
138
+ {
139
+ if ( $h<0 ) $h += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
140
+ if ( $l<0 ) $l += (1<<32);
141
+ return ($h<<32) + $l;
142
+ }
143
+
144
+ // x32 route
145
+ $x = "4294967296";
146
+ $y = 0;
147
+ $p = "";
148
+ if ( $h<0 )
149
+ {
150
+ $h = ~$h;
151
+ $l = ~$l;
152
+ $y = 1;
153
+ $p = "-";
154
+ }
155
+ $h = sprintf ( "%u", $h );
156
+ $l = sprintf ( "%u", $l );
157
+
158
+ // bcmath
159
+ if ( function_exists("bcmul") )
160
+ return $p . bcadd ( bcadd ( $l, bcmul ( $x, $h ) ), $y );
161
+
162
+ // no bcmath, 15 or less decimal digits
163
+ // we can use float, because its actually double and has 52 precision bits
164
+ if ( $h<1048576 )
165
+ {
166
+ $f = ((float)$h)*$x + (float)$l + (float)$y;
167
+ return $p . sprintf ( "%.0f", $f ); // builtin conversion is only about 39-40 bits precise!
168
+ }
169
+
170
+ // x32 route, 16 or more decimal digits
171
+ // well, let me know if you *really* need this
172
+ die ( "INTERNAL ERROR: unpacking more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
173
+ }
174
+
175
+
176
+ /// sphinx searchd client class
177
+ class SphinxClient
178
+ {
179
+ var $_host; ///< searchd host (default is "localhost")
180
+ var $_port; ///< searchd port (default is 3312)
181
+ var $_offset; ///< how many records to seek from result-set start (default is 0)
182
+ var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
183
+ var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
184
+ var $_weights; ///< per-field weights (default is 1 for all fields)
185
+ var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
186
+ var $_sortby; ///< attribute to sort by (defualt is "")
187
+ var $_min_id; ///< min ID to match (default is 0, which means no limit)
188
+ var $_max_id; ///< max ID to match (default is 0, which means no limit)
189
+ var $_filters; ///< search filters
190
+ var $_groupby; ///< group-by attribute name
191
+ var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
192
+ var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
193
+ var $_groupdistinct;///< group-by count-distinct attribute
194
+ var $_maxmatches; ///< max matches to retrieve
195
+ var $_cutoff; ///< cutoff to stop searching at (default is 0)
196
+ var $_retrycount; ///< distributed retries count
197
+ var $_retrydelay; ///< distributed retries delay
198
+ var $_anchor; ///< geographical anchor point
199
+ var $_indexweights; ///< per-index weights
200
+ var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
201
+ var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
202
+ var $_fieldweights; ///< per-field-name weights
203
+ var $_overrides; ///< per-query attribute values overrides
204
+ var $_select; ///< select-list (attributes or expressions, with optional aliases)
205
+
206
+ var $_error; ///< last error message
207
+ var $_warning; ///< last warning message
208
+
209
+ var $_reqs; ///< requests array for multi-query
210
+ var $_mbenc; ///< stored mbstring encoding
211
+ var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
212
+
213
+ /////////////////////////////////////////////////////////////////////////////
214
+ // common stuff
215
+ /////////////////////////////////////////////////////////////////////////////
216
+
217
+ /// create a new client object and fill defaults
218
+ function SphinxClient ()
219
+ {
220
+ // per-client-object settings
221
+ $this->_host = "localhost";
222
+ $this->_port = 3312;
223
+
224
+ // per-query settings
225
+ $this->_offset = 0;
226
+ $this->_limit = 20;
227
+ $this->_mode = SPH_MATCH_ALL;
228
+ $this->_weights = array ();
229
+ $this->_sort = SPH_SORT_RELEVANCE;
230
+ $this->_sortby = "";
231
+ $this->_min_id = 0;
232
+ $this->_max_id = 0;
233
+ $this->_filters = array ();
234
+ $this->_groupby = "";
235
+ $this->_groupfunc = SPH_GROUPBY_DAY;
236
+ $this->_groupsort = "@group desc";
237
+ $this->_groupdistinct= "";
238
+ $this->_maxmatches = 1000;
239
+ $this->_cutoff = 0;
240
+ $this->_retrycount = 0;
241
+ $this->_retrydelay = 0;
242
+ $this->_anchor = array ();
243
+ $this->_indexweights= array ();
244
+ $this->_ranker = SPH_RANK_PROXIMITY_BM25;
245
+ $this->_maxquerytime= 0;
246
+ $this->_fieldweights= array();
247
+ $this->_overrides = array();
248
+ $this->_select = "*";
249
+
250
+ $this->_error = ""; // per-reply fields (for single-query case)
251
+ $this->_warning = "";
252
+ $this->_reqs = array (); // requests storage (for multi-query case)
253
+ $this->_mbenc = "";
254
+ $this->_arrayresult = false;
255
+ }
256
+
257
+ /// get last error message (string)
258
+ function GetLastError ()
259
+ {
260
+ return $this->_error;
261
+ }
262
+
263
+ /// get last warning message (string)
264
+ function GetLastWarning ()
265
+ {
266
+ return $this->_warning;
267
+ }
268
+
269
+ /// set searchd host name (string) and port (integer)
270
+ function SetServer ( $host, $port )
271
+ {
272
+ assert ( is_string($host) );
273
+ assert ( is_int($port) );
274
+ $this->_host = $host;
275
+ $this->_port = $port;
276
+ }
277
+
278
+ /////////////////////////////////////////////////////////////////////////////
279
+
280
+ /// enter mbstring workaround mode
281
+ function _MBPush ()
282
+ {
283
+ $this->_mbenc = "";
284
+ if ( ini_get ( "mbstring.func_overload" ) & 2 )
285
+ {
286
+ $this->_mbenc = mb_internal_encoding();
287
+ mb_internal_encoding ( "latin1" );
288
+ }
289
+ }
290
+
291
+ /// leave mbstring workaround mode
292
+ function _MBPop ()
293
+ {
294
+ if ( $this->_mbenc )
295
+ mb_internal_encoding ( $this->_mbenc );
296
+ }
297
+
298
+ /// connect to searchd server
299
+ function _Connect ()
300
+ {
301
+ return fopen('php://stdout', 'w');
302
+ }
303
+
304
+ function _OldConnect()
305
+ {
306
+ if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
307
+ {
308
+ $this->_error = "connection to {$this->_host}:{$this->_port} failed";
309
+ return false;
310
+ }
311
+
312
+ // check version
313
+ list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
314
+ $v = (int)$v;
315
+ if ( $v<1 )
316
+ {
317
+ fclose ( $fp );
318
+ $this->_error = "expected searchd protocol version 1+, got version '$v'";
319
+ return false;
320
+ }
321
+
322
+ // all ok, send my version
323
+ fwrite ( $fp, pack ( "N", 1 ) );
324
+ return $fp;
325
+ }
326
+
327
+ /// get and check response packet from searchd server
328
+ function _GetResponse ( $fp, $client_ver )
329
+ {
330
+ return false;
331
+ }
332
+
333
+ function _OldGetResponse ( $fp, $client_ver )
334
+ {
335
+ $response = "";
336
+ $len = 0;
337
+
338
+ $header = fread ( $fp, 8 );
339
+ if ( strlen($header)==8 )
340
+ {
341
+ list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
342
+ $left = $len;
343
+ while ( $left>0 && !feof($fp) )
344
+ {
345
+ $chunk = fread ( $fp, $left );
346
+ if ( $chunk )
347
+ {
348
+ $response .= $chunk;
349
+ $left -= strlen($chunk);
350
+ }
351
+ }
352
+ }
353
+ fclose ( $fp );
354
+
355
+ // check response
356
+ $read = strlen ( $response );
357
+ if ( !$response || $read!=$len )
358
+ {
359
+ $this->_error = $len
360
+ ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
361
+ : "received zero-sized searchd response";
362
+ return false;
363
+ }
364
+
365
+ // check status
366
+ if ( $status==SEARCHD_WARNING )
367
+ {
368
+ list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
369
+ $this->_warning = substr ( $response, 4, $wlen );
370
+ return substr ( $response, 4+$wlen );
371
+ }
372
+ if ( $status==SEARCHD_ERROR )
373
+ {
374
+ $this->_error = "searchd error: " . substr ( $response, 4 );
375
+ return false;
376
+ }
377
+ if ( $status==SEARCHD_RETRY )
378
+ {
379
+ $this->_error = "temporary searchd error: " . substr ( $response, 4 );
380
+ return false;
381
+ }
382
+ if ( $status!=SEARCHD_OK )
383
+ {
384
+ $this->_error = "unknown status code '$status'";
385
+ return false;
386
+ }
387
+
388
+ // check version
389
+ if ( $ver<$client_ver )
390
+ {
391
+ $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
392
+ $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
393
+ }
394
+
395
+ return $response;
396
+ }
397
+
398
+ /////////////////////////////////////////////////////////////////////////////
399
+ // searching
400
+ /////////////////////////////////////////////////////////////////////////////
401
+
402
+ /// set offset and count into result set,
403
+ /// and optionally set max-matches and cutoff limits
404
+ function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
405
+ {
406
+ assert ( is_int($offset) );
407
+ assert ( is_int($limit) );
408
+ assert ( $offset>=0 );
409
+ assert ( $limit>0 );
410
+ assert ( $max>=0 );
411
+ $this->_offset = $offset;
412
+ $this->_limit = $limit;
413
+ if ( $max>0 )
414
+ $this->_maxmatches = $max;
415
+ if ( $cutoff>0 )
416
+ $this->_cutoff = $cutoff;
417
+ }
418
+
419
+ /// set maximum query time, in milliseconds, per-index
420
+ /// integer, 0 means "do not limit"
421
+ function SetMaxQueryTime ( $max )
422
+ {
423
+ assert ( is_int($max) );
424
+ assert ( $max>=0 );
425
+ $this->_maxquerytime = $max;
426
+ }
427
+
428
+ /// set matching mode
429
+ function SetMatchMode ( $mode )
430
+ {
431
+ assert ( $mode==SPH_MATCH_ALL
432
+ || $mode==SPH_MATCH_ANY
433
+ || $mode==SPH_MATCH_PHRASE
434
+ || $mode==SPH_MATCH_BOOLEAN
435
+ || $mode==SPH_MATCH_EXTENDED
436
+ || $mode==SPH_MATCH_FULLSCAN
437
+ || $mode==SPH_MATCH_EXTENDED2 );
438
+ $this->_mode = $mode;
439
+ }
440
+
441
+ /// set ranking mode
442
+ function SetRankingMode ( $ranker )
443
+ {
444
+ assert ( $ranker==SPH_RANK_PROXIMITY_BM25
445
+ || $ranker==SPH_RANK_BM25
446
+ || $ranker==SPH_RANK_NONE
447
+ || $ranker==SPH_RANK_WORDCOUNT
448
+ || $ranker==SPH_RANK_PROXIMITY );
449
+ $this->_ranker = $ranker;
450
+ }
451
+
452
+ /// set matches sorting mode
453
+ function SetSortMode ( $mode, $sortby="" )
454
+ {
455
+ assert (
456
+ $mode==SPH_SORT_RELEVANCE ||
457
+ $mode==SPH_SORT_ATTR_DESC ||
458
+ $mode==SPH_SORT_ATTR_ASC ||
459
+ $mode==SPH_SORT_TIME_SEGMENTS ||
460
+ $mode==SPH_SORT_EXTENDED ||
461
+ $mode==SPH_SORT_EXPR );
462
+ assert ( is_string($sortby) );
463
+ assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
464
+
465
+ $this->_sort = $mode;
466
+ $this->_sortby = $sortby;
467
+ }
468
+
469
+ /// bind per-field weights by order
470
+ /// DEPRECATED; use SetFieldWeights() instead
471
+ function SetWeights ( $weights )
472
+ {
473
+ assert ( is_array($weights) );
474
+ foreach ( $weights as $weight )
475
+ assert ( is_int($weight) );
476
+
477
+ $this->_weights = $weights;
478
+ }
479
+
480
+ /// bind per-field weights by name
481
+ function SetFieldWeights ( $weights )
482
+ {
483
+ assert ( is_array($weights) );
484
+ foreach ( $weights as $name=>$weight )
485
+ {
486
+ assert ( is_string($name) );
487
+ assert ( is_int($weight) );
488
+ }
489
+ $this->_fieldweights = $weights;
490
+ }
491
+
492
+ /// bind per-index weights by name
493
+ function SetIndexWeights ( $weights )
494
+ {
495
+ assert ( is_array($weights) );
496
+ foreach ( $weights as $index=>$weight )
497
+ {
498
+ assert ( is_string($index) );
499
+ assert ( is_int($weight) );
500
+ }
501
+ $this->_indexweights = $weights;
502
+ }
503
+
504
+ /// set IDs range to match
505
+ /// only match records if document ID is beetwen $min and $max (inclusive)
506
+ function SetIDRange ( $min, $max )
507
+ {
508
+ assert ( is_numeric($min) );
509
+ assert ( is_numeric($max) );
510
+ assert ( $min<=$max );
511
+ $this->_min_id = $min;
512
+ $this->_max_id = $max;
513
+ }
514
+
515
+ /// set values set filter
516
+ /// only match records where $attribute value is in given set
517
+ function SetFilter ( $attribute, $values, $exclude=false )
518
+ {
519
+ assert ( is_string($attribute) );
520
+ assert ( is_array($values) );
521
+ assert ( count($values) );
522
+
523
+ if ( is_array($values) && count($values) )
524
+ {
525
+ foreach ( $values as $value )
526
+ assert ( is_numeric($value) );
527
+
528
+ $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
529
+ }
530
+ }
531
+
532
+ /// set range filter
533
+ /// only match records if $attribute value is beetwen $min and $max (inclusive)
534
+ function SetFilterRange ( $attribute, $min, $max, $exclude=false )
535
+ {
536
+ assert ( is_string($attribute) );
537
+ assert ( is_numeric($min) );
538
+ assert ( is_numeric($max) );
539
+ assert ( $min<=$max );
540
+
541
+ $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
542
+ }
543
+
544
+ /// set float range filter
545
+ /// only match records if $attribute value is beetwen $min and $max (inclusive)
546
+ function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
547
+ {
548
+ assert ( is_string($attribute) );
549
+ assert ( is_float($min) );
550
+ assert ( is_float($max) );
551
+ assert ( $min<=$max );
552
+
553
+ $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
554
+ }
555
+
556
+ /// setup anchor point for geosphere distance calculations
557
+ /// required to use @geodist in filters and sorting
558
+ /// latitude and longitude must be in radians
559
+ function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
560
+ {
561
+ assert ( is_string($attrlat) );
562
+ assert ( is_string($attrlong) );
563
+ assert ( is_float($lat) );
564
+ assert ( is_float($long) );
565
+
566
+ $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
567
+ }
568
+
569
+ /// set grouping attribute and function
570
+ function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
571
+ {
572
+ assert ( is_string($attribute) );
573
+ assert ( is_string($groupsort) );
574
+ assert ( $func==SPH_GROUPBY_DAY
575
+ || $func==SPH_GROUPBY_WEEK
576
+ || $func==SPH_GROUPBY_MONTH
577
+ || $func==SPH_GROUPBY_YEAR
578
+ || $func==SPH_GROUPBY_ATTR
579
+ || $func==SPH_GROUPBY_ATTRPAIR );
580
+
581
+ $this->_groupby = $attribute;
582
+ $this->_groupfunc = $func;
583
+ $this->_groupsort = $groupsort;
584
+ }
585
+
586
+ /// set count-distinct attribute for group-by queries
587
+ function SetGroupDistinct ( $attribute )
588
+ {
589
+ assert ( is_string($attribute) );
590
+ $this->_groupdistinct = $attribute;
591
+ }
592
+
593
+ /// set distributed retries count and delay
594
+ function SetRetries ( $count, $delay=0 )
595
+ {
596
+ assert ( is_int($count) && $count>=0 );
597
+ assert ( is_int($delay) && $delay>=0 );
598
+ $this->_retrycount = $count;
599
+ $this->_retrydelay = $delay;
600
+ }
601
+
602
+ /// set result set format (hash or array; hash by default)
603
+ /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
604
+ function SetArrayResult ( $arrayresult )
605
+ {
606
+ assert ( is_bool($arrayresult) );
607
+ $this->_arrayresult = $arrayresult;
608
+ }
609
+
610
+ /// set attribute values override
611
+ /// there can be only one override per attribute
612
+ /// $values must be a hash that maps document IDs to attribute values
613
+ function SetOverride ( $attrname, $attrtype, $values )
614
+ {
615
+ assert ( is_string ( $attrname ) );
616
+ assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
617
+ assert ( is_array ( $values ) );
618
+
619
+ $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
620
+ }
621
+
622
+ /// set select-list (attributes or expressions), SQL-like syntax
623
+ function SetSelect ( $select )
624
+ {
625
+ assert ( is_string ( $select ) );
626
+ $this->_select = $select;
627
+ }
628
+
629
+ //////////////////////////////////////////////////////////////////////////////
630
+
631
+ /// clear all filters (for multi-queries)
632
+ function ResetFilters ()
633
+ {
634
+ $this->_filters = array();
635
+ $this->_anchor = array();
636
+ }
637
+
638
+ /// clear groupby settings (for multi-queries)
639
+ function ResetGroupBy ()
640
+ {
641
+ $this->_groupby = "";
642
+ $this->_groupfunc = SPH_GROUPBY_DAY;
643
+ $this->_groupsort = "@group desc";
644
+ $this->_groupdistinct= "";
645
+ }
646
+
647
+ /// clear all attribute value overrides (for multi-queries)
648
+ function ResetOverrides ()
649
+ {
650
+ $this->_overrides = array ();
651
+ }
652
+
653
+ //////////////////////////////////////////////////////////////////////////////
654
+
655
+ /// connect to searchd server, run given search query through given indexes,
656
+ /// and return the search results
657
+ function Query ( $query, $index="*", $comment="" )
658
+ {
659
+ assert ( empty($this->_reqs) );
660
+
661
+ $this->AddQuery ( $query, $index, $comment );
662
+ $results = $this->RunQueries ();
663
+
664
+ if ( !is_array($results) )
665
+ return false; // probably network error; error message should be already filled
666
+
667
+ $this->_error = $results[0]["error"];
668
+ $this->_warning = $results[0]["warning"];
669
+ if ( $results[0]["status"]==SEARCHD_ERROR )
670
+ return false;
671
+ else
672
+ return $results[0];
673
+ }
674
+
675
+ /// helper to pack floats in network byte order
676
+ function _PackFloat ( $f )
677
+ {
678
+ $t1 = pack ( "f", $f ); // machine order
679
+ list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
680
+ return pack ( "N", $t2 );
681
+ }
682
+
683
+ /// add query to multi-query batch
684
+ /// returns index into results array from RunQueries() call
685
+ function AddQuery ( $query, $index="*", $comment="" )
686
+ {
687
+ // mbstring workaround
688
+ $this->_MBPush ();
689
+
690
+ // build request
691
+ $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
692
+ $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
693
+ $req .= pack ( "N", strlen($query) ) . $query; // query itself
694
+ $req .= pack ( "N", count($this->_weights) ); // weights
695
+ foreach ( $this->_weights as $weight )
696
+ $req .= pack ( "N", (int)$weight );
697
+ $req .= pack ( "N", strlen($index) ) . $index; // indexes
698
+ $req .= pack ( "N", 1 ); // id64 range marker
699
+ $req .= sphPack64 ( $this->_min_id ) . sphPack64 ( $this->_max_id ); // id64 range
700
+
701
+ // filters
702
+ $req .= pack ( "N", count($this->_filters) );
703
+ foreach ( $this->_filters as $filter )
704
+ {
705
+ $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
706
+ $req .= pack ( "N", $filter["type"] );
707
+ switch ( $filter["type"] )
708
+ {
709
+ case SPH_FILTER_VALUES:
710
+ $req .= pack ( "N", count($filter["values"]) );
711
+ foreach ( $filter["values"] as $value )
712
+ $req .= sphPack64 ( $value );
713
+ break;
714
+
715
+ case SPH_FILTER_RANGE:
716
+ $req .= sphPack64 ( $filter["min"] ) . sphPack64 ( $filter["max"] );
717
+ break;
718
+
719
+ case SPH_FILTER_FLOATRANGE:
720
+ $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
721
+ break;
722
+
723
+ default:
724
+ assert ( 0 && "internal error: unhandled filter type" );
725
+ }
726
+ $req .= pack ( "N", $filter["exclude"] );
727
+ }
728
+
729
+ // group-by clause, max-matches count, group-sort clause, cutoff count
730
+ $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
731
+ $req .= pack ( "N", $this->_maxmatches );
732
+ $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
733
+ $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
734
+ $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
735
+
736
+ // anchor point
737
+ if ( empty($this->_anchor) )
738
+ {
739
+ $req .= pack ( "N", 0 );
740
+ } else
741
+ {
742
+ $a =& $this->_anchor;
743
+ $req .= pack ( "N", 1 );
744
+ $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
745
+ $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
746
+ $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
747
+ }
748
+
749
+ // per-index weights
750
+ $req .= pack ( "N", count($this->_indexweights) );
751
+ foreach ( $this->_indexweights as $idx=>$weight )
752
+ $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
753
+
754
+ // max query time
755
+ $req .= pack ( "N", $this->_maxquerytime );
756
+
757
+ // per-field weights
758
+ $req .= pack ( "N", count($this->_fieldweights) );
759
+ foreach ( $this->_fieldweights as $field=>$weight )
760
+ $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
761
+
762
+ // comment
763
+ $req .= pack ( "N", strlen($comment) ) . $comment;
764
+
765
+ // attribute overrides
766
+ $req .= pack ( "N", count($this->_overrides) );
767
+ foreach ( $this->_overrides as $key => $entry )
768
+ {
769
+ $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
770
+ $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
771
+ foreach ( $entry["values"] as $id=>$val )
772
+ {
773
+ assert ( is_numeric($id) );
774
+ assert ( is_numeric($val) );
775
+
776
+ $req .= sphPack64 ( $id );
777
+ switch ( $entry["type"] )
778
+ {
779
+ case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break;
780
+ case SPH_ATTR_BIGINT: $req .= sphPack64 ( $val ); break;
781
+ default: $req .= pack ( "N", $val ); break;
782
+ }
783
+ }
784
+ }
785
+
786
+ // select-list
787
+ $req .= pack ( "N", strlen($this->_select) ) . $this->_select;
788
+
789
+ // mbstring workaround
790
+ $this->_MBPop ();
791
+
792
+ // store request to requests array
793
+ $this->_reqs[] = $req;
794
+ return count($this->_reqs)-1;
795
+ }
796
+
797
+ /// connect to searchd, run queries batch, and return an array of result sets
798
+ function RunQueries ()
799
+ {
800
+ if ( empty($this->_reqs) )
801
+ {
802
+ $this->_error = "no queries defined, issue AddQuery() first";
803
+ return false;
804
+ }
805
+
806
+ // mbstring workaround
807
+ $this->_MBPush ();
808
+
809
+ if (!( $fp = $this->_Connect() ))
810
+ {
811
+ $this->_MBPop ();
812
+ return false;
813
+ }
814
+
815
+ ////////////////////////////
816
+ // send query, get response
817
+ ////////////////////////////
818
+
819
+ $nreqs = count($this->_reqs);
820
+ $req = join ( "", $this->_reqs );
821
+ $len = 4+strlen($req);
822
+ $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
823
+
824
+ fwrite ( $fp, $req, $len+8 );
825
+ if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
826
+ {
827
+ $this->_MBPop ();
828
+ return false;
829
+ }
830
+
831
+ $this->_reqs = array ();
832
+
833
+ //////////////////
834
+ // parse response
835
+ //////////////////
836
+
837
+ $p = 0; // current position
838
+ $max = strlen($response); // max position for checks, to protect against broken responses
839
+
840
+ $results = array ();
841
+ for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
842
+ {
843
+ $results[] = array();
844
+ $result =& $results[$ires];
845
+
846
+ $result["error"] = "";
847
+ $result["warning"] = "";
848
+
849
+ // extract status
850
+ list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
851
+ $result["status"] = $status;
852
+ if ( $status!=SEARCHD_OK )
853
+ {
854
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
855
+ $message = substr ( $response, $p, $len ); $p += $len;
856
+
857
+ if ( $status==SEARCHD_WARNING )
858
+ {
859
+ $result["warning"] = $message;
860
+ } else
861
+ {
862
+ $result["error"] = $message;
863
+ continue;
864
+ }
865
+ }
866
+
867
+ // read schema
868
+ $fields = array ();
869
+ $attrs = array ();
870
+
871
+ list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
872
+ while ( $nfields-->0 && $p<$max )
873
+ {
874
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
875
+ $fields[] = substr ( $response, $p, $len ); $p += $len;
876
+ }
877
+ $result["fields"] = $fields;
878
+
879
+ list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
880
+ while ( $nattrs-->0 && $p<$max )
881
+ {
882
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
883
+ $attr = substr ( $response, $p, $len ); $p += $len;
884
+ list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
885
+ $attrs[$attr] = $type;
886
+ }
887
+ $result["attrs"] = $attrs;
888
+
889
+ // read match count
890
+ list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
891
+ list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
892
+
893
+ // read matches
894
+ $idx = -1;
895
+ while ( $count-->0 && $p<$max )
896
+ {
897
+ // index into result array
898
+ $idx++;
899
+
900
+ // parse document id and weight
901
+ if ( $id64 )
902
+ {
903
+ $doc = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
904
+ list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
905
+ } else
906
+ {
907
+ list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
908
+ substr ( $response, $p, 8 ) ) );
909
+ $p += 8;
910
+
911
+ if ( PHP_INT_SIZE>=8 )
912
+ {
913
+ // x64 route, workaround broken unpack() in 5.2.2+
914
+ if ( $doc<0 ) $doc += (1<<32);
915
+ } else
916
+ {
917
+ // x32 route, workaround php signed/unsigned braindamage
918
+ $doc = sprintf ( "%u", $doc );
919
+ }
920
+ }
921
+ $weight = sprintf ( "%u", $weight );
922
+
923
+ // create match entry
924
+ if ( $this->_arrayresult )
925
+ $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
926
+ else
927
+ $result["matches"][$doc]["weight"] = $weight;
928
+
929
+ // parse and create attributes
930
+ $attrvals = array ();
931
+ foreach ( $attrs as $attr=>$type )
932
+ {
933
+ // handle 64bit ints
934
+ if ( $type==SPH_ATTR_BIGINT )
935
+ {
936
+ $attrvals[$attr] = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
937
+ continue;
938
+ }
939
+
940
+ // handle floats
941
+ if ( $type==SPH_ATTR_FLOAT )
942
+ {
943
+ list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
944
+ list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
945
+ $attrvals[$attr] = $fval;
946
+ continue;
947
+ }
948
+
949
+ // handle everything else as unsigned ints
950
+ list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
951
+ if ( $type & SPH_ATTR_MULTI )
952
+ {
953
+ $attrvals[$attr] = array ();
954
+ $nvalues = $val;
955
+ while ( $nvalues-->0 && $p<$max )
956
+ {
957
+ list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
958
+ $attrvals[$attr][] = sprintf ( "%u", $val );
959
+ }
960
+ } else
961
+ {
962
+ $attrvals[$attr] = sprintf ( "%u", $val );
963
+ }
964
+ }
965
+
966
+ if ( $this->_arrayresult )
967
+ $result["matches"][$idx]["attrs"] = $attrvals;
968
+ else
969
+ $result["matches"][$doc]["attrs"] = $attrvals;
970
+ }
971
+
972
+ list ( $total, $total_found, $msecs, $words ) =
973
+ array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
974
+ $result["total"] = sprintf ( "%u", $total );
975
+ $result["total_found"] = sprintf ( "%u", $total_found );
976
+ $result["time"] = sprintf ( "%.3f", $msecs/1000 );
977
+ $p += 16;
978
+
979
+ while ( $words-->0 && $p<$max )
980
+ {
981
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
982
+ $word = substr ( $response, $p, $len ); $p += $len;
983
+ list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
984
+ $result["words"][$word] = array (
985
+ "docs"=>sprintf ( "%u", $docs ),
986
+ "hits"=>sprintf ( "%u", $hits ) );
987
+ }
988
+ }
989
+
990
+ $this->_MBPop ();
991
+ return $results;
992
+ }
993
+
994
+ /////////////////////////////////////////////////////////////////////////////
995
+ // excerpts generation
996
+ /////////////////////////////////////////////////////////////////////////////
997
+
998
+ /// connect to searchd server, and generate exceprts (snippets)
999
+ /// of given documents for given query. returns false on failure,
1000
+ /// an array of snippets on success
1001
+ function BuildExcerpts ( $docs, $index, $words, $opts=array() )
1002
+ {
1003
+ assert ( is_array($docs) );
1004
+ assert ( is_string($index) );
1005
+ assert ( is_string($words) );
1006
+ assert ( is_array($opts) );
1007
+
1008
+ $this->_MBPush ();
1009
+
1010
+ if (!( $fp = $this->_Connect() ))
1011
+ {
1012
+ $this->_MBPop();
1013
+ return false;
1014
+ }
1015
+
1016
+ /////////////////
1017
+ // fixup options
1018
+ /////////////////
1019
+
1020
+ if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
1021
+ if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
1022
+ if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
1023
+ if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
1024
+ if ( !isset($opts["around"]) ) $opts["around"] = 5;
1025
+ if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
1026
+ if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
1027
+ if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
1028
+ if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
1029
+
1030
+ /////////////////
1031
+ // build request
1032
+ /////////////////
1033
+
1034
+ // v.1.0 req
1035
+ $flags = 1; // remove spaces
1036
+ if ( $opts["exact_phrase"] ) $flags |= 2;
1037
+ if ( $opts["single_passage"] ) $flags |= 4;
1038
+ if ( $opts["use_boundaries"] ) $flags |= 8;
1039
+ if ( $opts["weight_order"] ) $flags |= 16;
1040
+ $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
1041
+ $req .= pack ( "N", strlen($index) ) . $index; // req index
1042
+ $req .= pack ( "N", strlen($words) ) . $words; // req words
1043
+
1044
+ // options
1045
+ $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
1046
+ $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
1047
+ $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
1048
+ $req .= pack ( "N", (int)$opts["limit"] );
1049
+ $req .= pack ( "N", (int)$opts["around"] );
1050
+
1051
+ // documents
1052
+ $req .= pack ( "N", count($docs) );
1053
+ foreach ( $docs as $doc )
1054
+ {
1055
+ assert ( is_string($doc) );
1056
+ $req .= pack ( "N", strlen($doc) ) . $doc;
1057
+ }
1058
+
1059
+ ////////////////////////////
1060
+ // send query, get response
1061
+ ////////////////////////////
1062
+
1063
+ $len = strlen($req);
1064
+ $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
1065
+ $wrote = fwrite ( $fp, $req, $len+8 );
1066
+ if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
1067
+ {
1068
+ $this->_MBPop ();
1069
+ return false;
1070
+ }
1071
+
1072
+ //////////////////
1073
+ // parse response
1074
+ //////////////////
1075
+
1076
+ $pos = 0;
1077
+ $res = array ();
1078
+ $rlen = strlen($response);
1079
+ for ( $i=0; $i<count($docs); $i++ )
1080
+ {
1081
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1082
+ $pos += 4;
1083
+
1084
+ if ( $pos+$len > $rlen )
1085
+ {
1086
+ $this->_error = "incomplete reply";
1087
+ $this->_MBPop ();
1088
+ return false;
1089
+ }
1090
+ $res[] = $len ? substr ( $response, $pos, $len ) : "";
1091
+ $pos += $len;
1092
+ }
1093
+
1094
+ $this->_MBPop ();
1095
+ return $res;
1096
+ }
1097
+
1098
+
1099
+ /////////////////////////////////////////////////////////////////////////////
1100
+ // keyword generation
1101
+ /////////////////////////////////////////////////////////////////////////////
1102
+
1103
+ /// connect to searchd server, and generate keyword list for a given query
1104
+ /// returns false on failure,
1105
+ /// an array of words on success
1106
+ function BuildKeywords ( $query, $index, $hits )
1107
+ {
1108
+ assert ( is_string($query) );
1109
+ assert ( is_string($index) );
1110
+ assert ( is_bool($hits) );
1111
+
1112
+ $this->_MBPush ();
1113
+
1114
+ if (!( $fp = $this->_Connect() ))
1115
+ {
1116
+ $this->_MBPop();
1117
+ return false;
1118
+ }
1119
+
1120
+ /////////////////
1121
+ // build request
1122
+ /////////////////
1123
+
1124
+ // v.1.0 req
1125
+ $req = pack ( "N", strlen($query) ) . $query; // req query
1126
+ $req .= pack ( "N", strlen($index) ) . $index; // req index
1127
+ $req .= pack ( "N", (int)$hits );
1128
+
1129
+ ////////////////////////////
1130
+ // send query, get response
1131
+ ////////////////////////////
1132
+
1133
+ $len = strlen($req);
1134
+ $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
1135
+ $wrote = fwrite ( $fp, $req, $len+8 );
1136
+ if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ))
1137
+ {
1138
+ $this->_MBPop ();
1139
+ return false;
1140
+ }
1141
+
1142
+ //////////////////
1143
+ // parse response
1144
+ //////////////////
1145
+
1146
+ $pos = 0;
1147
+ $res = array ();
1148
+ $rlen = strlen($response);
1149
+ list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1150
+ $pos += 4;
1151
+ for ( $i=0; $i<$nwords; $i++ )
1152
+ {
1153
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
1154
+ $tokenized = $len ? substr ( $response, $pos, $len ) : "";
1155
+ $pos += $len;
1156
+
1157
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
1158
+ $normalized = $len ? substr ( $response, $pos, $len ) : "";
1159
+ $pos += $len;
1160
+
1161
+ $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
1162
+
1163
+ if ( $hits )
1164
+ {
1165
+ list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
1166
+ $pos += 8;
1167
+ $res [$i]["docs"] = $ndocs;
1168
+ $res [$i]["hits"] = $nhits;
1169
+ }
1170
+
1171
+ if ( $pos > $rlen )
1172
+ {
1173
+ $this->_error = "incomplete reply";
1174
+ $this->_MBPop ();
1175
+ return false;
1176
+ }
1177
+ }
1178
+
1179
+ $this->_MBPop ();
1180
+ return $res;
1181
+ }
1182
+
1183
+ function EscapeString ( $string )
1184
+ {
1185
+ $from = array ( '(',')','|','-','!','@','~','"','&', '/' );
1186
+ $to = array ( '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/' );
1187
+
1188
+ return str_replace ( $from, $to, $string );
1189
+ }
1190
+
1191
+ /////////////////////////////////////////////////////////////////////////////
1192
+ // attribute updates
1193
+ /////////////////////////////////////////////////////////////////////////////
1194
+
1195
+ /// batch update given attributes in given rows in given indexes
1196
+ /// returns amount of updated documents (0 or more) on success, or -1 on failure
1197
+ function UpdateAttributes ( $index, $attrs, $values, $mva=false )
1198
+ {
1199
+ // verify everything
1200
+ assert ( is_string($index) );
1201
+ assert ( is_bool($mva) );
1202
+
1203
+ assert ( is_array($attrs) );
1204
+ foreach ( $attrs as $attr )
1205
+ assert ( is_string($attr) );
1206
+
1207
+ assert ( is_array($values) );
1208
+ foreach ( $values as $id=>$entry )
1209
+ {
1210
+ assert ( is_numeric($id) );
1211
+ assert ( is_array($entry) );
1212
+ assert ( count($entry)==count($attrs) );
1213
+ foreach ( $entry as $v )
1214
+ {
1215
+ if ( $mva )
1216
+ {
1217
+ assert ( is_array($v) );
1218
+ foreach ( $v as $vv )
1219
+ assert ( is_int($vv) );
1220
+ } else
1221
+ assert ( is_int($v) );
1222
+ }
1223
+ }
1224
+
1225
+ // build request
1226
+ $req = pack ( "N", strlen($index) ) . $index;
1227
+
1228
+ $req .= pack ( "N", count($attrs) );
1229
+ foreach ( $attrs as $attr )
1230
+ {
1231
+ $req .= pack ( "N", strlen($attr) ) . $attr;
1232
+ $req .= pack ( "N", $mva ? 1 : 0 );
1233
+ }
1234
+
1235
+ $req .= pack ( "N", count($values) );
1236
+ foreach ( $values as $id=>$entry )
1237
+ {
1238
+ $req .= sphPack64 ( $id );
1239
+ foreach ( $entry as $v )
1240
+ {
1241
+ $req .= pack ( "N", $mva ? count($v) : $v );
1242
+ if ( $mva )
1243
+ foreach ( $v as $vv )
1244
+ $req .= pack ( "N", $vv );
1245
+ }
1246
+ }
1247
+
1248
+ // connect, send query, get response
1249
+ if (!( $fp = $this->_Connect() ))
1250
+ return -1;
1251
+
1252
+ $len = strlen($req);
1253
+ $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
1254
+ fwrite ( $fp, $req, $len+8 );
1255
+
1256
+ if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
1257
+ return -1;
1258
+
1259
+ // parse response
1260
+ list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
1261
+ return $updated;
1262
+ }
1263
+ }
1264
+
1265
+ //
1266
+ // $Id$
1267
+ //
1268
+
1269
+ ?>