whistlepig 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,537 @@
1
+ #include <stdio.h>
2
+ #include <ruby.h>
3
+ #include "whistlepig.h"
4
+
5
+ static VALUE m_whistlepig;
6
+ static VALUE c_index;
7
+ static VALUE c_entry;
8
+ static VALUE c_query;
9
+ static VALUE c_error;
10
+ static VALUE c_parseerror;
11
+
12
+ static char* strdup(const char* old) { // wtf stupid
13
+ size_t len = strlen(old) + 1;
14
+ char *new = malloc(len * sizeof(char));
15
+ return (char *)memcpy(new, old, len);
16
+ }
17
+
18
+ static void index_free(wp_index* index) {
19
+ wp_error* e = wp_index_free(index);
20
+ //printf("# index free at %p with error %p\n", index, e);
21
+ if(e != NULL) {
22
+ PRINT_ERROR(e, stderr); // why not?
23
+ wp_error_free(e);
24
+ }
25
+ }
26
+
27
+ #define RAISE_IF_NECESSARY(e) do { \
28
+ if(e != NULL) { \
29
+ VALUE exc = rb_exc_new2(c_error, e->msg); \
30
+ wp_error_free(e); \
31
+ rb_exc_raise(exc); \
32
+ } \
33
+ } while(0)
34
+
35
+ // support 1.9 and 1.8
36
+ #ifndef RSTRING_PTR
37
+ #define RSTRING_PTR(v) RSTRING(v)->ptr
38
+ #endif
39
+
40
+ /*
41
+ * call-seq: Index.new(pathname_base)
42
+ *
43
+ * Creates or loads a new index. The on-disk representation will be multiple
44
+ * files starting * with +pathname_base+.
45
+ *
46
+ * The index may be later be explicitly closed with Index#close. It will also
47
+ * be automatically closed when Ruby exits.
48
+ *
49
+ */
50
+
51
+ static VALUE index_new(VALUE class, VALUE v_pathname_base) {
52
+ Check_Type(v_pathname_base, T_STRING);
53
+
54
+ wp_index* index;
55
+ wp_error* e;
56
+ char* pathname_base = RSTRING_PTR(v_pathname_base);
57
+
58
+ if(wp_index_exists(pathname_base)) e = wp_index_load(&index, strdup(pathname_base));
59
+ else e = wp_index_create(&index, strdup(pathname_base));
60
+ RAISE_IF_NECESSARY(e);
61
+
62
+ VALUE o_index = Data_Wrap_Struct(class, NULL, index_free, index);
63
+ VALUE argv[1] = { v_pathname_base };
64
+ rb_obj_call_init(o_index, 1, argv);
65
+ return o_index;
66
+ }
67
+
68
+ /*
69
+ * call-seq: Index.create(pathname_base)
70
+ *
71
+ * Creates a new index, raising an error if it already exists. The on-disk
72
+ * representation will be multiple files starting with
73
+ * +pathname_base+.
74
+ *
75
+ */
76
+
77
+ static VALUE index_create(VALUE class, VALUE v_pathname_base) {
78
+ Check_Type(v_pathname_base, T_STRING);
79
+
80
+ wp_index* index;
81
+ wp_error* e = wp_index_create(&index, strdup(RSTRING_PTR(v_pathname_base)));
82
+ //printf("# index create at %p, error is %p\n", index, e);
83
+ RAISE_IF_NECESSARY(e);
84
+
85
+ VALUE o_index = Data_Wrap_Struct(class, NULL, index_free, index);
86
+ VALUE argv[1] = { v_pathname_base };
87
+ rb_obj_call_init(o_index, 1, argv);
88
+ return o_index;
89
+ }
90
+
91
+ /*
92
+ * call-seq: Index.load(pathname_base)
93
+ *
94
+ * Loads a new index, raising an error if it doesn't exists. The on-disk *
95
+ * representation will be multiple files starting with
96
+ * +pathname_base+.
97
+ *
98
+ */
99
+
100
+ static VALUE index_load(VALUE class, VALUE v_pathname_base) {
101
+ Check_Type(v_pathname_base, T_STRING);
102
+
103
+ wp_index* index;
104
+ wp_error* e = wp_index_load(&index, strdup(RSTRING_PTR(v_pathname_base)));
105
+ //printf("# index load at %p, error is %p\n", index, e);
106
+ RAISE_IF_NECESSARY(e);
107
+
108
+ VALUE o_index = Data_Wrap_Struct(class, NULL, index_free, index);
109
+ VALUE argv[1] = { v_pathname_base };
110
+ rb_obj_call_init(o_index, 1, argv);
111
+ return o_index;
112
+ }
113
+
114
+ /*
115
+ * call-seq: Index.exists?(pathname_base)
116
+ *
117
+ * Returns true iff an index with base pathname of +pathname_base+
118
+ * exists on disk.
119
+ *
120
+ */
121
+ static VALUE index_exists(VALUE class, VALUE v_pathname_base) {
122
+ Check_Type(v_pathname_base, T_STRING);
123
+
124
+ if(wp_index_exists(RSTRING_PTR(v_pathname_base))) return Qtrue;
125
+ else return Qfalse;
126
+ }
127
+
128
+ /*
129
+ * call-seq: Index.delete!(pathname_base)
130
+ *
131
+ * Deletes the index with base pathname +pathname_base+ from disk.
132
+ * Does nothing if the index does not exist. If that index is currently loaded
133
+ * in memory, expect may to see segfaults when you try to access it.
134
+ *
135
+ */
136
+ static VALUE index_delete(VALUE class, VALUE v_pathname_base) {
137
+ Check_Type(v_pathname_base, T_STRING);
138
+
139
+ wp_error* e = wp_index_delete(RSTRING_PTR(v_pathname_base));
140
+ RAISE_IF_NECESSARY(e);
141
+
142
+ return v_pathname_base;
143
+ }
144
+
145
+ /*
146
+ * Returns the number of entries in the index.
147
+ *
148
+ */
149
+ static VALUE index_size(VALUE self) {
150
+ wp_index* index;
151
+ Data_Get_Struct(self, wp_index, index);
152
+ return INT2NUM(wp_index_num_docs(index));
153
+ }
154
+
155
+ static VALUE index_init(VALUE self, VALUE v_pathname_base) {
156
+ rb_iv_set(self, "@pathname_base", v_pathname_base);
157
+ return self;
158
+ }
159
+
160
+ /*
161
+ * call-seq: count(query)
162
+ *
163
+ * Returns the number of entries matched by +query+, which should be a Query object.
164
+ * Note that in the current implementation, this is almost as expensive as retrieving all the
165
+ * results directly.
166
+ *
167
+ */
168
+ static VALUE index_count(VALUE self, VALUE v_query) {
169
+ if(CLASS_OF(v_query) != c_query) {
170
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
171
+ // not reached
172
+ }
173
+
174
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
175
+ wp_query* query; Data_Get_Struct(v_query, wp_query, query);
176
+ uint32_t num_results;
177
+ // clone the query because we don't want to interrupt any search state
178
+ // which may otherwise be being used for pagination.
179
+ wp_error* e = wp_index_count_results(index, wp_query_clone(query), &num_results);
180
+ RAISE_IF_NECESSARY(e);
181
+
182
+ return INT2NUM(num_results);
183
+ }
184
+
185
+ /*
186
+ * Closes the index, flushing all changes to disk. Future calls to this index
187
+ * may result in a segfault.
188
+ *
189
+ */
190
+ static VALUE index_close(VALUE self) {
191
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
192
+ wp_error* e = wp_index_unload(index);
193
+ RAISE_IF_NECESSARY(e);
194
+
195
+ return Qnil;
196
+ }
197
+
198
+ static void entry_free(wp_entry* entry) {
199
+ wp_error* e = wp_entry_free(entry);
200
+ //printf("# entry free at %p with error %p\n", entry, e);
201
+ if(e != NULL) {
202
+ PRINT_ERROR(e, stderr); // why not?
203
+ wp_error_free(e);
204
+ }
205
+ }
206
+
207
+ /* Creates a new, empty entry. */
208
+ static VALUE entry_new(VALUE class) {
209
+ wp_entry* entry = wp_entry_new();
210
+
211
+ //printf("# entry create at %p\n", entry);
212
+ VALUE o_entry = Data_Wrap_Struct(class, NULL, entry_free, entry);
213
+ rb_obj_call_init(o_entry, 0, NULL);
214
+ return o_entry;
215
+ }
216
+
217
+ /*
218
+ * call-seq: add_token(field, token)
219
+ *
220
+ * Adds a single token +token+ with field +field</field> to an entry. Both
221
+ * +token+ and +field</field> must be strings.
222
+ *
223
+ * Returns itself.
224
+ */
225
+ static VALUE entry_add_token(VALUE self, VALUE field, VALUE term) {
226
+ Check_Type(field, T_STRING);
227
+ Check_Type(term, T_STRING);
228
+
229
+ wp_entry* entry; Data_Get_Struct(self, wp_entry, entry);
230
+ wp_error* e = wp_entry_add_token(entry, RSTRING_PTR(field), RSTRING_PTR(term));
231
+ RAISE_IF_NECESSARY(e);
232
+
233
+ return self;
234
+ }
235
+
236
+ /*
237
+ * call-seq: add_string(field, string)
238
+ *
239
+ * Adds a String +string+ with field +field</field> to an entry. The string
240
+ * will be tokenized on whitespace. Both +token+ and +string</field> must be
241
+ * strings.
242
+ *
243
+ * Returns itself.
244
+ */
245
+ static VALUE entry_add_string(VALUE self, VALUE field, VALUE string) {
246
+ Check_Type(field, T_STRING);
247
+ Check_Type(string, T_STRING);
248
+
249
+ wp_entry* entry; Data_Get_Struct(self, wp_entry, entry);
250
+ wp_error* e = wp_entry_add_string(entry, RSTRING_PTR(field), RSTRING_PTR(string));
251
+ RAISE_IF_NECESSARY(e);
252
+
253
+ return self;
254
+ }
255
+
256
+ /*
257
+ * Returns the number of tokens in the entry.
258
+ */
259
+ static VALUE entry_size(VALUE self) {
260
+ wp_entry* entry; Data_Get_Struct(self, wp_entry, entry);
261
+ return INT2NUM(wp_entry_size(entry));
262
+ }
263
+
264
+ /*
265
+ * call-seq: add_entry(entry)
266
+ *
267
+ * Adds the entry +entry+ to the index. Returns the document id
268
+ * corresponding to this entry.
269
+ */
270
+ static VALUE index_add_entry(VALUE self, VALUE v_entry) {
271
+ if(CLASS_OF(v_entry) != c_entry) {
272
+ rb_raise(rb_eTypeError, "entry must be a Whistlepig::Entry object"); // would be nice to support subclasses somehow...
273
+ // not reached
274
+ }
275
+
276
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
277
+ wp_entry* entry; Data_Get_Struct(v_entry, wp_entry, entry);
278
+ uint64_t doc_id;
279
+ wp_error* e = wp_index_add_entry(index, entry, &doc_id);
280
+ RAISE_IF_NECESSARY(e);
281
+
282
+ return INT2NUM(doc_id);
283
+ }
284
+
285
+ /*
286
+ * call-seq: add_label(doc_id, label)
287
+ *
288
+ * Adds the label +label+ to the document corresponding to doc id
289
+ * +doc_id+ in the index. +label+ must be a String.
290
+ * If the label has already been added to the document, does nothing.
291
+ */
292
+ static VALUE index_add_label(VALUE self, VALUE v_doc_id, VALUE v_label) {
293
+ Check_Type(v_doc_id, T_FIXNUM);
294
+ Check_Type(v_label, T_STRING);
295
+
296
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
297
+ wp_error* e = wp_index_add_label(index, RSTRING_PTR(v_label), NUM2INT(v_doc_id));
298
+ RAISE_IF_NECESSARY(e);
299
+
300
+ return v_label;
301
+ }
302
+
303
+ /*
304
+ * call-seq: remove_label(doc_id, label)
305
+ *
306
+ * Removes the label +label+ from the document corresponding to doc id
307
+ * +doc_id+ in the index. +label+ must be a String.
308
+ * If the label has not been added to the document, does nothing.
309
+ */
310
+ static VALUE index_remove_label(VALUE self, VALUE v_doc_id, VALUE v_label) {
311
+ Check_Type(v_doc_id, T_FIXNUM);
312
+ Check_Type(v_label, T_STRING);
313
+
314
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
315
+ wp_error* e = wp_index_remove_label(index, RSTRING_PTR(v_label), NUM2INT(v_doc_id));
316
+ RAISE_IF_NECESSARY(e);
317
+
318
+ return v_label;
319
+ }
320
+
321
+ /*
322
+ * call-seq: Query.new(default_field, query_string)
323
+ *
324
+ * Creates a new query by parsing the string +query_string+, which must be a
325
+ * String. Any non-fielded terms will used the field +default_field+, which
326
+ * must also be a String. Raises a ParseError if the query cannot be parsed.
327
+ *
328
+ */
329
+ static VALUE query_new(VALUE class, VALUE default_field, VALUE string) {
330
+ Check_Type(default_field, T_STRING);
331
+ Check_Type(string, T_STRING);
332
+
333
+ wp_query* query;
334
+ wp_error* e = wp_query_parse(RSTRING_PTR(string), RSTRING_PTR(default_field), &query);
335
+ if(e != NULL) {
336
+ VALUE exc = rb_exc_new2(c_parseerror, e->msg);
337
+ wp_error_free(e);
338
+ rb_exc_raise(exc);
339
+ }
340
+
341
+ VALUE o_query = Data_Wrap_Struct(class, NULL, wp_query_free, query);
342
+ VALUE argv[2] = { string, default_field };
343
+ rb_obj_call_init(o_query, 2, argv);
344
+
345
+ return o_query;
346
+ }
347
+
348
+ /*
349
+ * Returns a parsed representation of a String, useful for debugging.
350
+ */
351
+ static VALUE query_to_s(VALUE self) {
352
+ char buf[1024];
353
+
354
+ wp_query* query; Data_Get_Struct(self, wp_query, query);
355
+ wp_query_to_s(query, 1024, buf);
356
+
357
+ return rb_str_new2(buf);
358
+ }
359
+
360
+ /*
361
+ * call-seq: and(other)
362
+ *
363
+ * Returns a new Query that is a conjunction of this query and +other+, which
364
+ * must also be a Query object.
365
+ *
366
+ */
367
+ static VALUE query_and(VALUE self, VALUE v_other) {
368
+ if(CLASS_OF(v_other) != c_query) {
369
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
370
+ // not reached
371
+ }
372
+
373
+ wp_query* query; Data_Get_Struct(self, wp_query, query);
374
+ wp_query* other; Data_Get_Struct(v_other, wp_query, other);
375
+
376
+ wp_query* result = wp_query_new_conjunction();
377
+ result = wp_query_add(result, wp_query_clone(query));
378
+ result = wp_query_add(result, wp_query_clone(other));
379
+
380
+ VALUE o_result = Data_Wrap_Struct(c_query, NULL, wp_query_free, result);
381
+ VALUE argv[2] = { Qnil, Qnil }; // i guess
382
+ rb_obj_call_init(o_result, 2, argv);
383
+
384
+ return o_result;
385
+ }
386
+
387
+ /*
388
+ * call-seq: or(other)
389
+ *
390
+ * Returns a new Query that is a disjunction of this query and +other+, which
391
+ * must also be a Query object.
392
+ *
393
+ */
394
+ static VALUE query_or(VALUE self, VALUE v_other) {
395
+ if(CLASS_OF(v_other) != c_query) {
396
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
397
+ // not reached
398
+ }
399
+
400
+ wp_query* query; Data_Get_Struct(self, wp_query, query);
401
+ wp_query* other; Data_Get_Struct(v_other, wp_query, other);
402
+
403
+ wp_query* result = wp_query_new_disjunction();
404
+ result = wp_query_add(result, wp_query_clone(query));
405
+ result = wp_query_add(result, wp_query_clone(other));
406
+
407
+ VALUE o_result = Data_Wrap_Struct(c_query, NULL, wp_query_free, result);
408
+ VALUE argv[2] = { Qnil, Qnil }; // i guess
409
+ rb_obj_call_init(o_result, 2, argv);
410
+
411
+ return o_result;
412
+ }
413
+
414
+ static VALUE query_init(VALUE self, VALUE query) {
415
+ rb_iv_set(self, "@query", query);
416
+ return self;
417
+ }
418
+
419
+ /*
420
+ * call-seq: setup_query(query)
421
+ *
422
+ * Initializes query for use with run_query. If you do not call teardown_query
423
+ * on this query later, you will leak memory.
424
+ */
425
+ static VALUE index_setup_query(VALUE self, VALUE v_query) {
426
+ if(CLASS_OF(v_query) != c_query) {
427
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
428
+ // not reached
429
+ }
430
+
431
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
432
+ wp_query* query; Data_Get_Struct(v_query, wp_query, query);
433
+ wp_error* e = wp_index_setup_query(index, query);
434
+ RAISE_IF_NECESSARY(e);
435
+
436
+ return self;
437
+ }
438
+
439
+ /*
440
+ * call-seq: teardown_query(query)
441
+ *
442
+ * Releases any held state used by the query, if it has been first passed to
443
+ * setup_query. If you call run_query on this query after calling this
444
+ * function, terrible things will happen.
445
+ */
446
+ static VALUE index_teardown_query(VALUE self, VALUE v_query) {
447
+ if(CLASS_OF(v_query) != c_query) {
448
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
449
+ // not reached
450
+ }
451
+
452
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
453
+ wp_query* query; Data_Get_Struct(v_query, wp_query, query);
454
+ wp_error* e = wp_index_teardown_query(index, query);
455
+ RAISE_IF_NECESSARY(e);
456
+
457
+ return self;
458
+ }
459
+
460
+ /*
461
+ * call-seq: run_query(query, max_num_results)
462
+ *
463
+ * Runs a query which has been first passed to setup_query, and returns an
464
+ * array of at most +max_num_results+ doc ids. Can be called
465
+ * multiple times to retrieve successive results from the query. The query
466
+ * must have been passed to setup_query first, or terrible things will happen.
467
+ * The query must be passed to teardown_query when done, or memory leaks will
468
+ * occur.
469
+ *
470
+ */
471
+ static VALUE index_run_query(VALUE self, VALUE v_query, VALUE v_max_num_results) {
472
+ Check_Type(v_max_num_results, T_FIXNUM);
473
+ if(CLASS_OF(v_query) != c_query) {
474
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
475
+ // not reached
476
+ }
477
+
478
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
479
+ wp_query* query; Data_Get_Struct(v_query, wp_query, query);
480
+
481
+ uint32_t max_num_results = NUM2INT(v_max_num_results);
482
+ uint32_t num_results;
483
+ uint64_t* results = malloc(sizeof(uint64_t) * max_num_results);
484
+
485
+ wp_error* e = wp_index_run_query(index, query, max_num_results, &num_results, results);
486
+ RAISE_IF_NECESSARY(e);
487
+
488
+ VALUE array = rb_ary_new2(num_results);
489
+ for(uint32_t i = 0; i < num_results; i++) {
490
+ rb_ary_store(array, i, INT2NUM(results[i]));
491
+ }
492
+ free(results);
493
+
494
+ return array;
495
+ }
496
+
497
+ void Init_whistlepigc() {
498
+ VALUE m_whistlepig;
499
+
500
+ m_whistlepig = rb_define_module("Whistlepig");
501
+
502
+ c_index = rb_define_class_under(m_whistlepig, "Index", rb_cObject);
503
+ rb_define_singleton_method(c_index, "new", index_new, 1);
504
+ rb_define_singleton_method(c_index, "create", index_create, 1);
505
+ rb_define_singleton_method(c_index, "load", index_load, 1);
506
+ rb_define_singleton_method(c_index, "delete!", index_delete, 1);
507
+ rb_define_singleton_method(c_index, "exists?", index_exists, 1);
508
+ rb_define_method(c_index, "initialize", index_init, 1);
509
+ rb_define_method(c_index, "close", index_close, 0);
510
+ rb_define_method(c_index, "size", index_size, 0);
511
+ rb_define_method(c_index, "add_entry", index_add_entry, 1);
512
+ rb_define_method(c_index, "add_label", index_add_label, 2);
513
+ rb_define_method(c_index, "remove_label", index_remove_label, 2);
514
+ rb_define_method(c_index, "count", index_count, 1);
515
+ rb_define_method(c_index, "setup_query", index_setup_query, 1);
516
+ rb_define_method(c_index, "run_query", index_run_query, 2);
517
+ rb_define_method(c_index, "teardown_query", index_teardown_query, 1);
518
+ rb_define_attr(c_index, "pathname_base", 1, 0);
519
+
520
+ c_entry = rb_define_class_under(m_whistlepig, "Entry", rb_cObject);
521
+ rb_define_singleton_method(c_entry, "new", entry_new, 0);
522
+ rb_define_method(c_entry, "size", entry_size, 0);
523
+ rb_define_method(c_entry, "add_token", entry_add_token, 2);
524
+ rb_define_method(c_entry, "add_string", entry_add_string, 2);
525
+ //rb_define_method(c_entry, "add_file", entry_add_file, 2);
526
+
527
+ c_query = rb_define_class_under(m_whistlepig, "Query", rb_cObject);
528
+ rb_define_singleton_method(c_query, "new", query_new, 2);
529
+ rb_define_method(c_query, "initialize", query_init, 2);
530
+ rb_define_method(c_query, "and", query_and, 1);
531
+ rb_define_method(c_query, "or", query_or, 1);
532
+ rb_define_method(c_query, "to_s", query_to_s, 0);
533
+ rb_define_attr(c_query, "query", 1, 0);
534
+
535
+ c_error = rb_define_class_under(m_whistlepig, "Error", rb_eStandardError);
536
+ c_parseerror = rb_define_class_under(m_whistlepig, "ParseError", rb_eStandardError);
537
+ }
data/lib/whistlepig.rb ADDED
@@ -0,0 +1,119 @@
1
+ require "whistlepigc"
2
+
3
+ module Whistlepig
4
+ ## A full-text index. You can add entries to it, and you can run queries
5
+ ## against it.
6
+ ##
7
+ ## To add documents, create Entry objects and call add_entry. Entries
8
+ ## represent the document before addition; add_entry will return an integer
9
+ ## docid and the entry can be discarded at that point.
10
+ ##
11
+ ## To run queries, the simplest option is to call Index#search or
12
+ ## Index#each_result_for.
13
+ ##
14
+ ## The more complex option is to use setup_query, run_query, and
15
+ ## teardown_query, in that order. The advantage of this approach is that
16
+ ## run_query can be called multiple times, and each call will return more
17
+ ## results, allowing for query pagination.
18
+ class Index
19
+ ## Runs a query and yield each matching doc id. Handles the mechanics of
20
+ ## setting up and tearing down the query.
21
+ def each_result_for query, chunk_size=10
22
+ setup_query query
23
+ begin
24
+ while true
25
+ results = run_query query, chunk_size
26
+ results.each { |r| yield r }
27
+ break if results.size < chunk_size
28
+ end
29
+ ensure
30
+ teardown_query query
31
+ end
32
+ self
33
+ end
34
+
35
+ ## Convenience method. Runs a query and returns up to +max_results+
36
+ ## matching doc ids. Handles the mechanics of setting up and tearing down
37
+ ## the query.
38
+ def search query, max_results=nil
39
+ setup_query query
40
+ ret = []
41
+ num_per_call = max_results || 100
42
+ begin
43
+ while true
44
+ results = run_query query, num_per_call
45
+ ret += results
46
+ break if max_results || results.size < num_per_call
47
+ end
48
+ ensure
49
+ teardown_query query
50
+ end
51
+
52
+ ret
53
+ end
54
+ end
55
+
56
+ ## Represents document, before being added to the index.
57
+ ##
58
+ ## Entries allow you to build up a document in memory before indexing it.
59
+ ## Once you've built it, pass it to Index#add_entry.
60
+ class Entry
61
+ end
62
+
63
+ ## A generic error.
64
+ class Error
65
+ end
66
+
67
+ ## A parser error.
68
+ class ParseError
69
+ end
70
+
71
+ ## A query. Queries are created from strings with Query#new. If parsing the
72
+ ## string fails, a ParseError is thrown.
73
+ ##
74
+ ## At the lowest level, queries are composed of space-separated terms.
75
+ ## Matches against that term are restricted to the default field specified at
76
+ ## parse time.
77
+ ##
78
+ ## hello # search for "hello" in the default field
79
+ ##
80
+ ## Term matches can be restricted to another field by by
81
+ ## prefixing them with the field name and ":", e.g. "subject:hello".
82
+ ##
83
+ ## subject:hello # search for "hello" in the "subject" field
84
+ ##
85
+ ## Multiple terms are considered conjunctive (i.e. all must match) unless the
86
+ ## special token "OR" appears between them. The "OR" must be capitalized
87
+ ## in this case.
88
+ ## word1 word2 # search for word1 and word2
89
+ ## word1 OR word2 # search for word1 or word2
90
+ ## subject:hello bob # "hello" in the subject field and "bob" in the
91
+ ## # default field
92
+ ##
93
+ ## Parentheses can be used to group disjunctions, conjunctions or fields.
94
+ ## (word1 OR word2) word3 # "word3" and either "word1" or "word2"
95
+ ## field:(word1 OR word2) # "word1" or "word2" in field "field"
96
+ ##
97
+ ## Phrases are specified by surrounding the terms with double quotes.
98
+ ## "bob jones" # documents with the phrase "bob jones"
99
+ ##
100
+ ## Negations can be specified with a - prefix.
101
+ ## -word # docs without "word"
102
+ ## -subject:(bob OR joe) # docs with neither "bob" nor "joe" in subject
103
+ ##
104
+ ## Labels are specified with a ~ prefix. Labels do not have fields.
105
+ ## ~inbox # docs with the "inbox" label
106
+ ## -~inbox # docs without the "inbox" label
107
+ ## -~inbox subject:hello # docs with subject "hello" and without the
108
+ ## # inbox label
109
+ ##
110
+ ## All of the above can be mixed and matched, of course.
111
+ ## -subject:"spam email" ~inbox (money OR cash)
112
+ ## ("love you" OR "hate you") -(~deleted OR ~spam)
113
+ ## etc...
114
+ ##
115
+ ## Existing query objects can also be altered programmatically, at least to
116
+ ## a limited extent, by calling Query#and and Query#or.
117
+ class Query
118
+ end
119
+ end