whistlepig 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,537 @@
1
+ #include <stdio.h>
2
+ #include <ruby.h>
3
+ #include "whistlepig.h"
4
+
5
+ static VALUE m_whistlepig;
6
+ static VALUE c_index;
7
+ static VALUE c_entry;
8
+ static VALUE c_query;
9
+ static VALUE c_error;
10
+ static VALUE c_parseerror;
11
+
12
+ static char* strdup(const char* old) { // wtf stupid
13
+ size_t len = strlen(old) + 1;
14
+ char *new = malloc(len * sizeof(char));
15
+ return (char *)memcpy(new, old, len);
16
+ }
17
+
18
+ static void index_free(wp_index* index) {
19
+ wp_error* e = wp_index_free(index);
20
+ //printf("# index free at %p with error %p\n", index, e);
21
+ if(e != NULL) {
22
+ PRINT_ERROR(e, stderr); // why not?
23
+ wp_error_free(e);
24
+ }
25
+ }
26
+
27
+ #define RAISE_IF_NECESSARY(e) do { \
28
+ if(e != NULL) { \
29
+ VALUE exc = rb_exc_new2(c_error, e->msg); \
30
+ wp_error_free(e); \
31
+ rb_exc_raise(exc); \
32
+ } \
33
+ } while(0)
34
+
35
+ // support 1.9 and 1.8
36
+ #ifndef RSTRING_PTR
37
+ #define RSTRING_PTR(v) RSTRING(v)->ptr
38
+ #endif
39
+
40
+ /*
41
+ * call-seq: Index.new(pathname_base)
42
+ *
43
+ * Creates or loads a new index. The on-disk representation will be multiple
44
+ * files starting * with +pathname_base+.
45
+ *
46
+ * The index may be later be explicitly closed with Index#close. It will also
47
+ * be automatically closed when Ruby exits.
48
+ *
49
+ */
50
+
51
+ static VALUE index_new(VALUE class, VALUE v_pathname_base) {
52
+ Check_Type(v_pathname_base, T_STRING);
53
+
54
+ wp_index* index;
55
+ wp_error* e;
56
+ char* pathname_base = RSTRING_PTR(v_pathname_base);
57
+
58
+ if(wp_index_exists(pathname_base)) e = wp_index_load(&index, strdup(pathname_base));
59
+ else e = wp_index_create(&index, strdup(pathname_base));
60
+ RAISE_IF_NECESSARY(e);
61
+
62
+ VALUE o_index = Data_Wrap_Struct(class, NULL, index_free, index);
63
+ VALUE argv[1] = { v_pathname_base };
64
+ rb_obj_call_init(o_index, 1, argv);
65
+ return o_index;
66
+ }
67
+
68
+ /*
69
+ * call-seq: Index.create(pathname_base)
70
+ *
71
+ * Creates a new index, raising an error if it already exists. The on-disk
72
+ * representation will be multiple files starting with
73
+ * +pathname_base+.
74
+ *
75
+ */
76
+
77
+ static VALUE index_create(VALUE class, VALUE v_pathname_base) {
78
+ Check_Type(v_pathname_base, T_STRING);
79
+
80
+ wp_index* index;
81
+ wp_error* e = wp_index_create(&index, strdup(RSTRING_PTR(v_pathname_base)));
82
+ //printf("# index create at %p, error is %p\n", index, e);
83
+ RAISE_IF_NECESSARY(e);
84
+
85
+ VALUE o_index = Data_Wrap_Struct(class, NULL, index_free, index);
86
+ VALUE argv[1] = { v_pathname_base };
87
+ rb_obj_call_init(o_index, 1, argv);
88
+ return o_index;
89
+ }
90
+
91
+ /*
92
+ * call-seq: Index.load(pathname_base)
93
+ *
94
+ * Loads a new index, raising an error if it doesn't exists. The on-disk *
95
+ * representation will be multiple files starting with
96
+ * +pathname_base+.
97
+ *
98
+ */
99
+
100
+ static VALUE index_load(VALUE class, VALUE v_pathname_base) {
101
+ Check_Type(v_pathname_base, T_STRING);
102
+
103
+ wp_index* index;
104
+ wp_error* e = wp_index_load(&index, strdup(RSTRING_PTR(v_pathname_base)));
105
+ //printf("# index load at %p, error is %p\n", index, e);
106
+ RAISE_IF_NECESSARY(e);
107
+
108
+ VALUE o_index = Data_Wrap_Struct(class, NULL, index_free, index);
109
+ VALUE argv[1] = { v_pathname_base };
110
+ rb_obj_call_init(o_index, 1, argv);
111
+ return o_index;
112
+ }
113
+
114
+ /*
115
+ * call-seq: Index.exists?(pathname_base)
116
+ *
117
+ * Returns true iff an index with base pathname of +pathname_base+
118
+ * exists on disk.
119
+ *
120
+ */
121
+ static VALUE index_exists(VALUE class, VALUE v_pathname_base) {
122
+ Check_Type(v_pathname_base, T_STRING);
123
+
124
+ if(wp_index_exists(RSTRING_PTR(v_pathname_base))) return Qtrue;
125
+ else return Qfalse;
126
+ }
127
+
128
+ /*
129
+ * call-seq: Index.delete!(pathname_base)
130
+ *
131
+ * Deletes the index with base pathname +pathname_base+ from disk.
132
+ * Does nothing if the index does not exist. If that index is currently loaded
133
+ * in memory, expect may to see segfaults when you try to access it.
134
+ *
135
+ */
136
+ static VALUE index_delete(VALUE class, VALUE v_pathname_base) {
137
+ Check_Type(v_pathname_base, T_STRING);
138
+
139
+ wp_error* e = wp_index_delete(RSTRING_PTR(v_pathname_base));
140
+ RAISE_IF_NECESSARY(e);
141
+
142
+ return v_pathname_base;
143
+ }
144
+
145
+ /*
146
+ * Returns the number of entries in the index.
147
+ *
148
+ */
149
+ static VALUE index_size(VALUE self) {
150
+ wp_index* index;
151
+ Data_Get_Struct(self, wp_index, index);
152
+ return INT2NUM(wp_index_num_docs(index));
153
+ }
154
+
155
+ static VALUE index_init(VALUE self, VALUE v_pathname_base) {
156
+ rb_iv_set(self, "@pathname_base", v_pathname_base);
157
+ return self;
158
+ }
159
+
160
+ /*
161
+ * call-seq: count(query)
162
+ *
163
+ * Returns the number of entries matched by +query+, which should be a Query object.
164
+ * Note that in the current implementation, this is almost as expensive as retrieving all the
165
+ * results directly.
166
+ *
167
+ */
168
+ static VALUE index_count(VALUE self, VALUE v_query) {
169
+ if(CLASS_OF(v_query) != c_query) {
170
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
171
+ // not reached
172
+ }
173
+
174
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
175
+ wp_query* query; Data_Get_Struct(v_query, wp_query, query);
176
+ uint32_t num_results;
177
+ // clone the query because we don't want to interrupt any search state
178
+ // which may otherwise be being used for pagination.
179
+ wp_error* e = wp_index_count_results(index, wp_query_clone(query), &num_results);
180
+ RAISE_IF_NECESSARY(e);
181
+
182
+ return INT2NUM(num_results);
183
+ }
184
+
185
+ /*
186
+ * Closes the index, flushing all changes to disk. Future calls to this index
187
+ * may result in a segfault.
188
+ *
189
+ */
190
+ static VALUE index_close(VALUE self) {
191
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
192
+ wp_error* e = wp_index_unload(index);
193
+ RAISE_IF_NECESSARY(e);
194
+
195
+ return Qnil;
196
+ }
197
+
198
+ static void entry_free(wp_entry* entry) {
199
+ wp_error* e = wp_entry_free(entry);
200
+ //printf("# entry free at %p with error %p\n", entry, e);
201
+ if(e != NULL) {
202
+ PRINT_ERROR(e, stderr); // why not?
203
+ wp_error_free(e);
204
+ }
205
+ }
206
+
207
+ /* Creates a new, empty entry. */
208
+ static VALUE entry_new(VALUE class) {
209
+ wp_entry* entry = wp_entry_new();
210
+
211
+ //printf("# entry create at %p\n", entry);
212
+ VALUE o_entry = Data_Wrap_Struct(class, NULL, entry_free, entry);
213
+ rb_obj_call_init(o_entry, 0, NULL);
214
+ return o_entry;
215
+ }
216
+
217
+ /*
218
+ * call-seq: add_token(field, token)
219
+ *
220
+ * Adds a single token +token+ with field +field</field> to an entry. Both
221
+ * +token+ and +field</field> must be strings.
222
+ *
223
+ * Returns itself.
224
+ */
225
+ static VALUE entry_add_token(VALUE self, VALUE field, VALUE term) {
226
+ Check_Type(field, T_STRING);
227
+ Check_Type(term, T_STRING);
228
+
229
+ wp_entry* entry; Data_Get_Struct(self, wp_entry, entry);
230
+ wp_error* e = wp_entry_add_token(entry, RSTRING_PTR(field), RSTRING_PTR(term));
231
+ RAISE_IF_NECESSARY(e);
232
+
233
+ return self;
234
+ }
235
+
236
+ /*
237
+ * call-seq: add_string(field, string)
238
+ *
239
+ * Adds a String +string+ with field +field</field> to an entry. The string
240
+ * will be tokenized on whitespace. Both +token+ and +string</field> must be
241
+ * strings.
242
+ *
243
+ * Returns itself.
244
+ */
245
+ static VALUE entry_add_string(VALUE self, VALUE field, VALUE string) {
246
+ Check_Type(field, T_STRING);
247
+ Check_Type(string, T_STRING);
248
+
249
+ wp_entry* entry; Data_Get_Struct(self, wp_entry, entry);
250
+ wp_error* e = wp_entry_add_string(entry, RSTRING_PTR(field), RSTRING_PTR(string));
251
+ RAISE_IF_NECESSARY(e);
252
+
253
+ return self;
254
+ }
255
+
256
+ /*
257
+ * Returns the number of tokens in the entry.
258
+ */
259
+ static VALUE entry_size(VALUE self) {
260
+ wp_entry* entry; Data_Get_Struct(self, wp_entry, entry);
261
+ return INT2NUM(wp_entry_size(entry));
262
+ }
263
+
264
+ /*
265
+ * call-seq: add_entry(entry)
266
+ *
267
+ * Adds the entry +entry+ to the index. Returns the document id
268
+ * corresponding to this entry.
269
+ */
270
+ static VALUE index_add_entry(VALUE self, VALUE v_entry) {
271
+ if(CLASS_OF(v_entry) != c_entry) {
272
+ rb_raise(rb_eTypeError, "entry must be a Whistlepig::Entry object"); // would be nice to support subclasses somehow...
273
+ // not reached
274
+ }
275
+
276
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
277
+ wp_entry* entry; Data_Get_Struct(v_entry, wp_entry, entry);
278
+ uint64_t doc_id;
279
+ wp_error* e = wp_index_add_entry(index, entry, &doc_id);
280
+ RAISE_IF_NECESSARY(e);
281
+
282
+ return INT2NUM(doc_id);
283
+ }
284
+
285
+ /*
286
+ * call-seq: add_label(doc_id, label)
287
+ *
288
+ * Adds the label +label+ to the document corresponding to doc id
289
+ * +doc_id+ in the index. +label+ must be a String.
290
+ * If the label has already been added to the document, does nothing.
291
+ */
292
+ static VALUE index_add_label(VALUE self, VALUE v_doc_id, VALUE v_label) {
293
+ Check_Type(v_doc_id, T_FIXNUM);
294
+ Check_Type(v_label, T_STRING);
295
+
296
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
297
+ wp_error* e = wp_index_add_label(index, RSTRING_PTR(v_label), NUM2INT(v_doc_id));
298
+ RAISE_IF_NECESSARY(e);
299
+
300
+ return v_label;
301
+ }
302
+
303
+ /*
304
+ * call-seq: remove_label(doc_id, label)
305
+ *
306
+ * Removes the label +label+ from the document corresponding to doc id
307
+ * +doc_id+ in the index. +label+ must be a String.
308
+ * If the label has not been added to the document, does nothing.
309
+ */
310
+ static VALUE index_remove_label(VALUE self, VALUE v_doc_id, VALUE v_label) {
311
+ Check_Type(v_doc_id, T_FIXNUM);
312
+ Check_Type(v_label, T_STRING);
313
+
314
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
315
+ wp_error* e = wp_index_remove_label(index, RSTRING_PTR(v_label), NUM2INT(v_doc_id));
316
+ RAISE_IF_NECESSARY(e);
317
+
318
+ return v_label;
319
+ }
320
+
321
+ /*
322
+ * call-seq: Query.new(default_field, query_string)
323
+ *
324
+ * Creates a new query by parsing the string +query_string+, which must be a
325
+ * String. Any non-fielded terms will used the field +default_field+, which
326
+ * must also be a String. Raises a ParseError if the query cannot be parsed.
327
+ *
328
+ */
329
+ static VALUE query_new(VALUE class, VALUE default_field, VALUE string) {
330
+ Check_Type(default_field, T_STRING);
331
+ Check_Type(string, T_STRING);
332
+
333
+ wp_query* query;
334
+ wp_error* e = wp_query_parse(RSTRING_PTR(string), RSTRING_PTR(default_field), &query);
335
+ if(e != NULL) {
336
+ VALUE exc = rb_exc_new2(c_parseerror, e->msg);
337
+ wp_error_free(e);
338
+ rb_exc_raise(exc);
339
+ }
340
+
341
+ VALUE o_query = Data_Wrap_Struct(class, NULL, wp_query_free, query);
342
+ VALUE argv[2] = { string, default_field };
343
+ rb_obj_call_init(o_query, 2, argv);
344
+
345
+ return o_query;
346
+ }
347
+
348
+ /*
349
+ * Returns a parsed representation of a String, useful for debugging.
350
+ */
351
+ static VALUE query_to_s(VALUE self) {
352
+ char buf[1024];
353
+
354
+ wp_query* query; Data_Get_Struct(self, wp_query, query);
355
+ wp_query_to_s(query, 1024, buf);
356
+
357
+ return rb_str_new2(buf);
358
+ }
359
+
360
+ /*
361
+ * call-seq: and(other)
362
+ *
363
+ * Returns a new Query that is a conjunction of this query and +other+, which
364
+ * must also be a Query object.
365
+ *
366
+ */
367
+ static VALUE query_and(VALUE self, VALUE v_other) {
368
+ if(CLASS_OF(v_other) != c_query) {
369
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
370
+ // not reached
371
+ }
372
+
373
+ wp_query* query; Data_Get_Struct(self, wp_query, query);
374
+ wp_query* other; Data_Get_Struct(v_other, wp_query, other);
375
+
376
+ wp_query* result = wp_query_new_conjunction();
377
+ result = wp_query_add(result, wp_query_clone(query));
378
+ result = wp_query_add(result, wp_query_clone(other));
379
+
380
+ VALUE o_result = Data_Wrap_Struct(c_query, NULL, wp_query_free, result);
381
+ VALUE argv[2] = { Qnil, Qnil }; // i guess
382
+ rb_obj_call_init(o_result, 2, argv);
383
+
384
+ return o_result;
385
+ }
386
+
387
+ /*
388
+ * call-seq: or(other)
389
+ *
390
+ * Returns a new Query that is a disjunction of this query and +other+, which
391
+ * must also be a Query object.
392
+ *
393
+ */
394
+ static VALUE query_or(VALUE self, VALUE v_other) {
395
+ if(CLASS_OF(v_other) != c_query) {
396
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
397
+ // not reached
398
+ }
399
+
400
+ wp_query* query; Data_Get_Struct(self, wp_query, query);
401
+ wp_query* other; Data_Get_Struct(v_other, wp_query, other);
402
+
403
+ wp_query* result = wp_query_new_disjunction();
404
+ result = wp_query_add(result, wp_query_clone(query));
405
+ result = wp_query_add(result, wp_query_clone(other));
406
+
407
+ VALUE o_result = Data_Wrap_Struct(c_query, NULL, wp_query_free, result);
408
+ VALUE argv[2] = { Qnil, Qnil }; // i guess
409
+ rb_obj_call_init(o_result, 2, argv);
410
+
411
+ return o_result;
412
+ }
413
+
414
+ static VALUE query_init(VALUE self, VALUE query) {
415
+ rb_iv_set(self, "@query", query);
416
+ return self;
417
+ }
418
+
419
+ /*
420
+ * call-seq: setup_query(query)
421
+ *
422
+ * Initializes query for use with run_query. If you do not call teardown_query
423
+ * on this query later, you will leak memory.
424
+ */
425
+ static VALUE index_setup_query(VALUE self, VALUE v_query) {
426
+ if(CLASS_OF(v_query) != c_query) {
427
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
428
+ // not reached
429
+ }
430
+
431
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
432
+ wp_query* query; Data_Get_Struct(v_query, wp_query, query);
433
+ wp_error* e = wp_index_setup_query(index, query);
434
+ RAISE_IF_NECESSARY(e);
435
+
436
+ return self;
437
+ }
438
+
439
+ /*
440
+ * call-seq: teardown_query(query)
441
+ *
442
+ * Releases any held state used by the query, if it has been first passed to
443
+ * setup_query. If you call run_query on this query after calling this
444
+ * function, terrible things will happen.
445
+ */
446
+ static VALUE index_teardown_query(VALUE self, VALUE v_query) {
447
+ if(CLASS_OF(v_query) != c_query) {
448
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
449
+ // not reached
450
+ }
451
+
452
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
453
+ wp_query* query; Data_Get_Struct(v_query, wp_query, query);
454
+ wp_error* e = wp_index_teardown_query(index, query);
455
+ RAISE_IF_NECESSARY(e);
456
+
457
+ return self;
458
+ }
459
+
460
+ /*
461
+ * call-seq: run_query(query, max_num_results)
462
+ *
463
+ * Runs a query which has been first passed to setup_query, and returns an
464
+ * array of at most +max_num_results+ doc ids. Can be called
465
+ * multiple times to retrieve successive results from the query. The query
466
+ * must have been passed to setup_query first, or terrible things will happen.
467
+ * The query must be passed to teardown_query when done, or memory leaks will
468
+ * occur.
469
+ *
470
+ */
471
+ static VALUE index_run_query(VALUE self, VALUE v_query, VALUE v_max_num_results) {
472
+ Check_Type(v_max_num_results, T_FIXNUM);
473
+ if(CLASS_OF(v_query) != c_query) {
474
+ rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
475
+ // not reached
476
+ }
477
+
478
+ wp_index* index; Data_Get_Struct(self, wp_index, index);
479
+ wp_query* query; Data_Get_Struct(v_query, wp_query, query);
480
+
481
+ uint32_t max_num_results = NUM2INT(v_max_num_results);
482
+ uint32_t num_results;
483
+ uint64_t* results = malloc(sizeof(uint64_t) * max_num_results);
484
+
485
+ wp_error* e = wp_index_run_query(index, query, max_num_results, &num_results, results);
486
+ RAISE_IF_NECESSARY(e);
487
+
488
+ VALUE array = rb_ary_new2(num_results);
489
+ for(uint32_t i = 0; i < num_results; i++) {
490
+ rb_ary_store(array, i, INT2NUM(results[i]));
491
+ }
492
+ free(results);
493
+
494
+ return array;
495
+ }
496
+
497
+ void Init_whistlepigc() {
498
+ VALUE m_whistlepig;
499
+
500
+ m_whistlepig = rb_define_module("Whistlepig");
501
+
502
+ c_index = rb_define_class_under(m_whistlepig, "Index", rb_cObject);
503
+ rb_define_singleton_method(c_index, "new", index_new, 1);
504
+ rb_define_singleton_method(c_index, "create", index_create, 1);
505
+ rb_define_singleton_method(c_index, "load", index_load, 1);
506
+ rb_define_singleton_method(c_index, "delete!", index_delete, 1);
507
+ rb_define_singleton_method(c_index, "exists?", index_exists, 1);
508
+ rb_define_method(c_index, "initialize", index_init, 1);
509
+ rb_define_method(c_index, "close", index_close, 0);
510
+ rb_define_method(c_index, "size", index_size, 0);
511
+ rb_define_method(c_index, "add_entry", index_add_entry, 1);
512
+ rb_define_method(c_index, "add_label", index_add_label, 2);
513
+ rb_define_method(c_index, "remove_label", index_remove_label, 2);
514
+ rb_define_method(c_index, "count", index_count, 1);
515
+ rb_define_method(c_index, "setup_query", index_setup_query, 1);
516
+ rb_define_method(c_index, "run_query", index_run_query, 2);
517
+ rb_define_method(c_index, "teardown_query", index_teardown_query, 1);
518
+ rb_define_attr(c_index, "pathname_base", 1, 0);
519
+
520
+ c_entry = rb_define_class_under(m_whistlepig, "Entry", rb_cObject);
521
+ rb_define_singleton_method(c_entry, "new", entry_new, 0);
522
+ rb_define_method(c_entry, "size", entry_size, 0);
523
+ rb_define_method(c_entry, "add_token", entry_add_token, 2);
524
+ rb_define_method(c_entry, "add_string", entry_add_string, 2);
525
+ //rb_define_method(c_entry, "add_file", entry_add_file, 2);
526
+
527
+ c_query = rb_define_class_under(m_whistlepig, "Query", rb_cObject);
528
+ rb_define_singleton_method(c_query, "new", query_new, 2);
529
+ rb_define_method(c_query, "initialize", query_init, 2);
530
+ rb_define_method(c_query, "and", query_and, 1);
531
+ rb_define_method(c_query, "or", query_or, 1);
532
+ rb_define_method(c_query, "to_s", query_to_s, 0);
533
+ rb_define_attr(c_query, "query", 1, 0);
534
+
535
+ c_error = rb_define_class_under(m_whistlepig, "Error", rb_eStandardError);
536
+ c_parseerror = rb_define_class_under(m_whistlepig, "ParseError", rb_eStandardError);
537
+ }
data/lib/whistlepig.rb ADDED
@@ -0,0 +1,119 @@
1
+ require "whistlepigc"
2
+
3
+ module Whistlepig
4
+ ## A full-text index. You can add entries to it, and you can run queries
5
+ ## against it.
6
+ ##
7
+ ## To add documents, create Entry objects and call add_entry. Entries
8
+ ## represent the document before addition; add_entry will return an integer
9
+ ## docid and the entry can be discarded at that point.
10
+ ##
11
+ ## To run queries, the simplest option is to call Index#search or
12
+ ## Index#each_result_for.
13
+ ##
14
+ ## The more complex option is to use setup_query, run_query, and
15
+ ## teardown_query, in that order. The advantage of this approach is that
16
+ ## run_query can be called multiple times, and each call will return more
17
+ ## results, allowing for query pagination.
18
+ class Index
19
+ ## Runs a query and yield each matching doc id. Handles the mechanics of
20
+ ## setting up and tearing down the query.
21
+ def each_result_for query, chunk_size=10
22
+ setup_query query
23
+ begin
24
+ while true
25
+ results = run_query query, chunk_size
26
+ results.each { |r| yield r }
27
+ break if results.size < chunk_size
28
+ end
29
+ ensure
30
+ teardown_query query
31
+ end
32
+ self
33
+ end
34
+
35
+ ## Convenience method. Runs a query and returns up to +max_results+
36
+ ## matching doc ids. Handles the mechanics of setting up and tearing down
37
+ ## the query.
38
+ def search query, max_results=nil
39
+ setup_query query
40
+ ret = []
41
+ num_per_call = max_results || 100
42
+ begin
43
+ while true
44
+ results = run_query query, num_per_call
45
+ ret += results
46
+ break if max_results || results.size < num_per_call
47
+ end
48
+ ensure
49
+ teardown_query query
50
+ end
51
+
52
+ ret
53
+ end
54
+ end
55
+
56
+ ## Represents document, before being added to the index.
57
+ ##
58
+ ## Entries allow you to build up a document in memory before indexing it.
59
+ ## Once you've built it, pass it to Index#add_entry.
60
+ class Entry
61
+ end
62
+
63
+ ## A generic error.
64
+ class Error
65
+ end
66
+
67
+ ## A parser error.
68
+ class ParseError
69
+ end
70
+
71
+ ## A query. Queries are created from strings with Query#new. If parsing the
72
+ ## string fails, a ParseError is thrown.
73
+ ##
74
+ ## At the lowest level, queries are composed of space-separated terms.
75
+ ## Matches against that term are restricted to the default field specified at
76
+ ## parse time.
77
+ ##
78
+ ## hello # search for "hello" in the default field
79
+ ##
80
+ ## Term matches can be restricted to another field by by
81
+ ## prefixing them with the field name and ":", e.g. "subject:hello".
82
+ ##
83
+ ## subject:hello # search for "hello" in the "subject" field
84
+ ##
85
+ ## Multiple terms are considered conjunctive (i.e. all must match) unless the
86
+ ## special token "OR" appears between them. The "OR" must be capitalized
87
+ ## in this case.
88
+ ## word1 word2 # search for word1 and word2
89
+ ## word1 OR word2 # search for word1 or word2
90
+ ## subject:hello bob # "hello" in the subject field and "bob" in the
91
+ ## # default field
92
+ ##
93
+ ## Parentheses can be used to group disjunctions, conjunctions or fields.
94
+ ## (word1 OR word2) word3 # "word3" and either "word1" or "word2"
95
+ ## field:(word1 OR word2) # "word1" or "word2" in field "field"
96
+ ##
97
+ ## Phrases are specified by surrounding the terms with double quotes.
98
+ ## "bob jones" # documents with the phrase "bob jones"
99
+ ##
100
+ ## Negations can be specified with a - prefix.
101
+ ## -word # docs without "word"
102
+ ## -subject:(bob OR joe) # docs with neither "bob" nor "joe" in subject
103
+ ##
104
+ ## Labels are specified with a ~ prefix. Labels do not have fields.
105
+ ## ~inbox # docs with the "inbox" label
106
+ ## -~inbox # docs without the "inbox" label
107
+ ## -~inbox subject:hello # docs with subject "hello" and without the
108
+ ## # inbox label
109
+ ##
110
+ ## All of the above can be mixed and matched, of course.
111
+ ## -subject:"spam email" ~inbox (money OR cash)
112
+ ## ("love you" OR "hate you") -(~deleted OR ~spam)
113
+ ## etc...
114
+ ##
115
+ ## Existing query objects can also be altered programmatically, at least to
116
+ ## a limited extent, by calling Query#and and Query#or.
117
+ class Query
118
+ end
119
+ end