ferret 0.9.4 → 0.9.5

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -26,7 +26,7 @@ Run the following;
26
26
  $ rake ext
27
27
  $ ruby setup.rb config
28
28
  $ ruby setup.rb setup
29
- # ruby setup.rb install
29
+ # sudo ruby setup.rb install
30
30
 
31
31
  These simple steps install ferret in the default location of Ruby libraries.
32
32
  You can also install files into your favorite directory by supplying setup.rb
data/Rakefile CHANGED
@@ -211,6 +211,7 @@ else
211
211
  #### Load-time details: library and application (you will need one or both).
212
212
 
213
213
  s.require_path = 'lib' # Use these for libraries.
214
+ s.autorequire = 'ferret'
214
215
 
215
216
  #s.bindir = "bin" # Use these for applications.
216
217
  #s.executables = ["rake"]
data/ext/field.c CHANGED
@@ -213,93 +213,6 @@ FieldInfos *fis_add_doc(FieldInfos *fis, Document *doc)
213
213
  return fis;
214
214
  }
215
215
 
216
- /****************************************************************************
217
- *
218
- * FieldsWriter
219
- *
220
- ****************************************************************************/
221
-
222
- FieldsWriter *fw_open(Store *store, char *segment, FieldInfos *fis)
223
- {
224
- FieldsWriter *fw = ALLOC(FieldsWriter);
225
- char buf[SEGMENT_NAME_MAX_LENGTH];
226
- int slen = (int)strlen(segment);
227
-
228
- strcpy(buf, segment);
229
-
230
- fw->fis = fis;
231
- strcpy(buf+slen, ".fdt");
232
- fw->fields_out = store->create_output(store, buf);
233
- strcpy(buf+slen, ".fdx");
234
- fw->index_out = store->create_output(store, buf);
235
- return fw;
236
- }
237
-
238
- void fw_close(FieldsWriter *fw)
239
- {
240
- os_close(fw->fields_out);
241
- os_close(fw->index_out);
242
- free(fw);
243
- }
244
-
245
- void save_data(OutStream *fout, char *data, int dlen)
246
- {
247
- os_write_vint(fout, dlen);
248
- os_write_bytes(fout, (uchar *)data, dlen);
249
- }
250
-
251
- void fw_add_doc(FieldsWriter *fw, Document *doc)
252
- {
253
- int i, bits;
254
- DocField *df;
255
- char *data;
256
- int stored_count = 0;
257
- OutStream *fout = fw->fields_out, *iout = fw->index_out;
258
-
259
- os_write_long(iout, os_pos(fout));
260
-
261
- for (i = 0; i < doc->dfcnt; i++) {
262
- if (doc->df_arr[i]->is_stored)
263
- stored_count++;
264
- }
265
- os_write_vint(fout, stored_count);
266
-
267
- for (i = 0; i < doc->dfcnt; i++) {
268
- df = doc->df_arr[i];
269
- if (df->is_stored) {
270
- os_write_vint(fout, ((FieldInfo *)ht_get(fw->fis->by_name, df->name))->number);
271
-
272
- bits = 0;
273
- if (df->is_tokenized) {
274
- bits |= FIELD_IS_TOKENIZED;
275
- }
276
- if (df->is_binary) {
277
- bits |= FIELD_IS_BINARY;
278
- }
279
- if (df->is_compressed) {
280
- bits |= FIELD_IS_COMPRESSED;
281
- }
282
- os_write_byte(fout, bits);
283
-
284
- data = NULL;
285
- if (df->is_compressed) {
286
- /* Not compressing just yet but we'll save it anyway */
287
- if (df->is_binary) {
288
- save_data(fout, df->data, df->blen);
289
- } else {
290
- os_write_string(fout, df->data);
291
- }
292
- } else {
293
- if (df->is_binary) {
294
- save_data(fout, df->data, df->blen);
295
- } else {
296
- os_write_string(fout, df->data);
297
- }
298
- }
299
- }
300
- }
301
- }
302
-
303
216
  /****************************************************************************
304
217
  *
305
218
  * FieldsReader
@@ -406,3 +319,90 @@ Document *fr_get_doc(FieldsReader *fr, int doc_num)
406
319
  return doc;
407
320
  }
408
321
 
322
+ /****************************************************************************
323
+ *
324
+ * FieldsWriter
325
+ *
326
+ ****************************************************************************/
327
+
328
+ FieldsWriter *fw_open(Store *store, char *segment, FieldInfos *fis)
329
+ {
330
+ FieldsWriter *fw = ALLOC(FieldsWriter);
331
+ char buf[SEGMENT_NAME_MAX_LENGTH];
332
+ int slen = (int)strlen(segment);
333
+
334
+ strcpy(buf, segment);
335
+
336
+ fw->fis = fis;
337
+ strcpy(buf+slen, ".fdt");
338
+ fw->fields_out = store->create_output(store, buf);
339
+ strcpy(buf+slen, ".fdx");
340
+ fw->index_out = store->create_output(store, buf);
341
+ return fw;
342
+ }
343
+
344
+ void fw_close(FieldsWriter *fw)
345
+ {
346
+ os_close(fw->fields_out);
347
+ os_close(fw->index_out);
348
+ free(fw);
349
+ }
350
+
351
+ void save_data(OutStream *fout, char *data, int dlen)
352
+ {
353
+ os_write_vint(fout, dlen);
354
+ os_write_bytes(fout, (uchar *)data, dlen);
355
+ }
356
+
357
+ void fw_add_doc(FieldsWriter *fw, Document *doc)
358
+ {
359
+ int i, bits;
360
+ DocField *df;
361
+ char *data;
362
+ int stored_count = 0;
363
+ OutStream *fout = fw->fields_out, *iout = fw->index_out;
364
+
365
+ os_write_long(iout, os_pos(fout));
366
+
367
+ for (i = 0; i < doc->dfcnt; i++) {
368
+ if (doc->df_arr[i]->is_stored)
369
+ stored_count++;
370
+ }
371
+ os_write_vint(fout, stored_count);
372
+
373
+ for (i = 0; i < doc->dfcnt; i++) {
374
+ df = doc->df_arr[i];
375
+ if (df->is_stored) {
376
+ os_write_vint(fout, ((FieldInfo *)ht_get(fw->fis->by_name, df->name))->number);
377
+
378
+ bits = 0;
379
+ if (df->is_tokenized) {
380
+ bits |= FIELD_IS_TOKENIZED;
381
+ }
382
+ if (df->is_binary) {
383
+ bits |= FIELD_IS_BINARY;
384
+ }
385
+ if (df->is_compressed) {
386
+ bits |= FIELD_IS_COMPRESSED;
387
+ }
388
+ os_write_byte(fout, bits);
389
+
390
+ data = NULL;
391
+ if (df->is_compressed) {
392
+ /* Not compressing just yet but we'll save it anyway */
393
+ if (df->is_binary) {
394
+ save_data(fout, df->data, df->blen);
395
+ } else {
396
+ os_write_string(fout, df->data);
397
+ }
398
+ } else {
399
+ if (df->is_binary) {
400
+ save_data(fout, df->data, df->blen);
401
+ } else {
402
+ os_write_string(fout, df->data);
403
+ }
404
+ }
405
+ }
406
+ }
407
+ }
408
+
data/ext/index.h CHANGED
@@ -162,7 +162,6 @@ int ti_eq(TermInfo *ti1, TermInfo *ti2);
162
162
  *
163
163
  ****************************************************************************/
164
164
 
165
- typedef struct TermEnumFilter TermEnumFilter;
166
165
  typedef struct TermEnum TermEnum;
167
166
  struct TermEnum {
168
167
  void *data;
@@ -301,10 +300,11 @@ void tvf_destroy(void *p);
301
300
  ****************************************************************************/
302
301
 
303
302
  typedef struct TVTerm {
304
- char *text;
305
- int freq;
306
- int *positions;
307
- TVOffsetInfo **offsets;
303
+ int field_num;
304
+ char *text;
305
+ int freq;
306
+ int *positions;
307
+ TVOffsetInfo **offsets;
308
308
  } TVTerm;
309
309
 
310
310
  TVTerm *tvt_create(char *text,
@@ -320,12 +320,12 @@ void tvt_destroy(void *p);
320
320
  ****************************************************************************/
321
321
 
322
322
  typedef struct TermVector {
323
- char *field;
324
- char **terms;
325
- int tcnt;
326
- int *freqs;
327
- int **positions;
328
- TVOffsetInfo ***offsets;
323
+ char *field;
324
+ char **terms;
325
+ int tcnt;
326
+ int *freqs;
327
+ int **positions;
328
+ TVOffsetInfo ***offsets;
329
329
  } TermVector;
330
330
 
331
331
  TermVector *tv_create(const char *field,
@@ -344,27 +344,27 @@ void tv_destroy(TermVector *tv);
344
344
 
345
345
  #define STORE_POSITIONS_WITH_TERMVECTOR 0x1
346
346
  #define STORE_OFFSET_WITH_TERMVECTOR 0x2
347
-
347
+
348
348
  #define FORMAT_VERSION 2
349
349
  #define FORMAT_SIZE 4
350
-
350
+
351
351
  #define TVX_EXTENSION ".tvx"
352
352
  #define TVD_EXTENSION ".tvd"
353
353
  #define TVF_EXTENSION ".tvf"
354
354
 
355
355
  typedef struct TermVectorsWriter {
356
- TVField *curr_field;
357
- int curr_doc_pointer;
358
- OutStream *tvx;
359
- OutStream *tvd;
360
- OutStream *tvf;
361
- FieldInfos *fis;
362
- TVField **fields;
363
- int fcnt;
364
- int fsize;
365
- TVTerm **terms;
366
- int tcnt;
367
- int tsize;
356
+ TVField *curr_field;
357
+ int curr_doc_pointer;
358
+ OutStream *tvx;
359
+ OutStream *tvd;
360
+ OutStream *tvf;
361
+ FieldInfos *fis;
362
+ TVField **fields;
363
+ int fcnt;
364
+ int fsize;
365
+ TVTerm **terms;
366
+ int tcnt;
367
+ int tsize;
368
368
  } TermVectorsWriter;
369
369
 
370
370
  TermVectorsWriter *tvw_open(Store *store, char *segment, FieldInfos *fis);
@@ -384,23 +384,40 @@ void tvw_add_all_doc_vectors(TermVectorsWriter *tvw, Array *vectors);
384
384
  ****************************************************************************/
385
385
 
386
386
  typedef struct TermVectorsReader {
387
- int size;
388
- InStream *tvx;
389
- InStream *tvd;
390
- InStream *tvf;
391
- FieldInfos *fis;
392
- int tvd_format;
393
- int tvf_format;
387
+ int size;
388
+ InStream *tvx;
389
+ InStream *tvd;
390
+ InStream *tvf;
391
+ FieldInfos *fis;
392
+ int tvd_format;
393
+ int tvf_format;
394
394
  } TermVectorsReader;
395
395
 
396
396
  TermVectorsReader *tvr_open(Store *store, char *segment, FieldInfos *fis);
397
397
  TermVectorsReader *tvr_clone(TermVectorsReader *orig);
398
398
  void tvr_close(TermVectorsReader *tvr);
399
399
  TermVector *tvr_read_term_vector(TermVectorsReader *tvr,
400
- char *field, int tvf_pointer);
400
+ char *field, int tvf_pointer);
401
401
  Array *tvr_get_tv(TermVectorsReader *tvr, int doc_num);
402
402
  TermVector *tvr_get_field_tv(TermVectorsReader *tvr, int doc_num, char *field);
403
403
 
404
+ /****************************************************************************
405
+ *
406
+ * FieldsReader
407
+ *
408
+ ****************************************************************************/
409
+
410
+ typedef struct FieldsReader {
411
+ int len;
412
+ FieldInfos *fis;
413
+ InStream *fields_in;
414
+ InStream *index_in;
415
+ } FieldsReader;
416
+
417
+ FieldsReader *fr_open(Store *store, char *segment, FieldInfos *fis);
418
+ void fr_close(FieldsReader *fr);
419
+ Document *fr_get_doc(FieldsReader *fr, int doc_num);
420
+
404
421
  /****************************************************************************
405
422
  *
406
423
  * FieldsWriter
@@ -412,9 +429,9 @@ TermVector *tvr_get_field_tv(TermVectorsReader *tvr, int doc_num, char *field);
412
429
  #define FIELD_IS_COMPRESSED 0X4
413
430
 
414
431
  typedef struct FieldsWriter {
415
- FieldInfos *fis;
416
- OutStream *fields_out;
417
- OutStream *index_out;
432
+ FieldInfos *fis;
433
+ OutStream *fields_out;
434
+ OutStream *index_out;
418
435
  } FieldsWriter;
419
436
 
420
437
  FieldsWriter *fw_open(Store *store, char *segment, FieldInfos *fis);
@@ -429,15 +446,15 @@ void fw_add_doc(FieldsWriter *fw, Document *doc);
429
446
 
430
447
  typedef struct TermDocEnum TermDocEnum;
431
448
  struct TermDocEnum {
432
- void *data;
433
- void (*seek)(TermDocEnum *tde, Term *term);
434
- int (*doc_num)(TermDocEnum *tde);
435
- int (*freq)(TermDocEnum *tde);
436
- bool (*next)(TermDocEnum *tde);
437
- int (*read)(TermDocEnum *tde, int *docs, int *freqs, int req_num);
438
- bool (*skip_to)(TermDocEnum *tde, int target);
439
- int (*next_position)(TermDocEnum *tde);
440
- void (*close)(TermDocEnum *tde);
449
+ void *data;
450
+ void (*seek)(TermDocEnum *tde, Term *term);
451
+ int (*doc_num)(TermDocEnum *tde);
452
+ int (*freq)(TermDocEnum *tde);
453
+ bool (*next)(TermDocEnum *tde);
454
+ int (*read)(TermDocEnum *tde, int *docs, int *freqs, int req_num);
455
+ bool (*skip_to)(TermDocEnum *tde, int target);
456
+ int (*next_position)(TermDocEnum *tde);
457
+ void (*close)(TermDocEnum *tde);
441
458
  };
442
459
 
443
460
  /* * SegmentTermDocEnum * */
@@ -445,27 +462,27 @@ struct TermDocEnum {
445
462
  typedef struct SegmentTermDocEnum SegmentTermDocEnum;
446
463
 
447
464
  struct SegmentTermDocEnum {
448
- SegmentReader *parent;
449
- InStream *freq_in;
450
- int count; /* number of docs for this term skipped */
451
- int doc_freq; /* number of doc this term appears in */
452
- BitVector *deleted_docs;
453
- int doc_num;
454
- int freq;
455
- int skip_interval;
456
- int num_skips;
457
- int skip_count;
458
- InStream *skip_in;
459
- int skip_doc;
460
- int freq_pointer;
461
- int prox_pointer;
462
- int skip_pointer;
463
- unsigned int have_skipped : 1;
464
- void (*skip_prox)(SegmentTermDocEnum *stde);
465
- InStream *prox_in;
466
- int prox_cnt;
467
- int position;
468
- void (*seek_prox)(SegmentTermDocEnum *stde, int prox_pointer);
465
+ SegmentReader *parent;
466
+ InStream *freq_in;
467
+ int count; /* number of docs for this term skipped */
468
+ int doc_freq; /* number of doc this term appears in */
469
+ BitVector *deleted_docs;
470
+ int doc_num;
471
+ int freq;
472
+ int skip_interval;
473
+ int num_skips;
474
+ int skip_count;
475
+ InStream *skip_in;
476
+ int skip_doc;
477
+ int freq_pointer;
478
+ int prox_pointer;
479
+ int skip_pointer;
480
+ unsigned int have_skipped : 1;
481
+ void (*skip_prox)(SegmentTermDocEnum *stde);
482
+ InStream *prox_in;
483
+ int prox_cnt;
484
+ int position;
485
+ void (*seek_prox)(SegmentTermDocEnum *stde, int prox_pointer);
469
486
  };
470
487
 
471
488
  TermDocEnum *stde_create(IndexReader *ir);
@@ -477,15 +494,15 @@ TermDocEnum *stpe_create(IndexReader *ir);
477
494
  /* * MultiTermDocEnum * */
478
495
  typedef struct MultiTermDocEnum MultiTermDocEnum;
479
496
  struct MultiTermDocEnum {
480
- IndexReader **irs;
481
- int *starts;
482
- int ir_cnt;
483
- Term *term;
484
- int base;
485
- int pointer;
486
- TermDocEnum **irs_tde;
487
- TermDocEnum *curr_tde;
488
- TermDocEnum *(*term_docs_from_reader)(IndexReader *ir);
497
+ IndexReader **irs;
498
+ int *starts;
499
+ int ir_cnt;
500
+ Term *term;
501
+ int base;
502
+ int pointer;
503
+ TermDocEnum **irs_tde;
504
+ TermDocEnum *curr_tde;
505
+ TermDocEnum *(*term_docs_from_reader)(IndexReader *ir);
489
506
  };
490
507
 
491
508
  TermDocEnum *mtde_create(IndexReader **readers, int *starts, int ir_cnt);
@@ -499,33 +516,16 @@ TermDocEnum *mtpe_create(IndexReader **readers, int *starts, int ir_cnt);
499
516
 
500
517
  #define MTDPE_POS_QUEUE_INIT_CAPA 8
501
518
  typedef struct {
502
- int doc_num;
503
- int freq;
504
- PriorityQueue *pq;
505
- int *pos_queue;
506
- int pos_queue_index;
507
- int pos_queue_capa;
519
+ int doc_num;
520
+ int freq;
521
+ PriorityQueue *pq;
522
+ int *pos_queue;
523
+ int pos_queue_index;
524
+ int pos_queue_capa;
508
525
  } MultipleTermDocPosEnum;
509
526
 
510
527
  TermDocEnum *mtdpe_create(IndexReader *ir, Term **terms, int t_cnt);
511
528
 
512
- /****************************************************************************
513
- *
514
- * FieldsReader
515
- *
516
- ****************************************************************************/
517
-
518
- typedef struct FieldsReader {
519
- int len;
520
- FieldInfos *fis;
521
- InStream *fields_in;
522
- InStream *index_in;
523
- } FieldsReader;
524
-
525
- FieldsReader *fr_open(Store *store, char *segment, FieldInfos *fis);
526
- void fr_close(FieldsReader *fr);
527
- Document *fr_get_doc(FieldsReader *fr, int doc_num);
528
-
529
529
  /****************************************************************************
530
530
  *
531
531
  * Posting
@@ -533,11 +533,11 @@ Document *fr_get_doc(FieldsReader *fr, int doc_num);
533
533
  ****************************************************************************/
534
534
 
535
535
  typedef struct Posting {
536
- Term *term;
537
- int freq;
538
- int size;
539
- int *positions;
540
- TVOffsetInfo **offsets;
536
+ Term *term;
537
+ int freq;
538
+ int size;
539
+ int *positions;
540
+ TVOffsetInfo **offsets;
541
541
  } Posting;
542
542
 
543
543
  Posting *p_create(Term *term, int position, TVOffsetInfo *offset);
@@ -552,22 +552,22 @@ void p_add_occurance(Posting *self, int position, TVOffsetInfo *offset);
552
552
  ****************************************************************************/
553
553
 
554
554
  typedef struct DocumentWriter {
555
- Store *store;
556
- Analyzer *analyzer;
557
- Similarity *similarity;
558
- HshTable *postingtable;
559
- int pcnt;
560
- FieldInfos *fis;
561
- float *field_boosts;
562
- int *field_lengths;
563
- int *field_positions;
564
- int *field_offsets;
565
- int max_field_length;
566
- int term_index_interval;
555
+ Store *store;
556
+ Analyzer *analyzer;
557
+ Similarity *similarity;
558
+ HshTable *postingtable;
559
+ int pcnt;
560
+ FieldInfos *fis;
561
+ float *field_boosts;
562
+ int *field_lengths;
563
+ int *field_positions;
564
+ int *field_offsets;
565
+ int max_field_length;
566
+ int term_index_interval;
567
567
  } DocumentWriter;
568
568
 
569
569
  DocumentWriter *dw_open(Store *store, Analyzer *analyzer,
570
- Similarity *similarity, int max_field_length, int term_index_interval);
570
+ Similarity *similarity, int max_field_length, int term_index_interval);
571
571
  void dw_close(DocumentWriter *dw);
572
572
  void dw_add_doc(DocumentWriter *dw, char *segment, Document *doc);
573
573
 
@@ -578,9 +578,9 @@ void dw_add_doc(DocumentWriter *dw, char *segment, Document *doc);
578
578
  ****************************************************************************/
579
579
 
580
580
  typedef struct SegmentInfo {
581
- char *name;
582
- int doc_cnt;
583
- Store *store;
581
+ char *name;
582
+ int doc_cnt;
583
+ Store *store;
584
584
  } SegmentInfo;
585
585
 
586
586
  SegmentInfo *si_create(char *name, int doc_cnt, Store *store);
@@ -596,13 +596,13 @@ bool si_has_separate_norms(SegmentInfo *si);
596
596
  ****************************************************************************/
597
597
 
598
598
  typedef struct SegmentInfos {
599
- Store *store;
600
- SegmentInfo **segs;
601
- int scnt;
602
- int size;
603
- int counter;
604
- int version;
605
- int format;
599
+ Store *store;
600
+ SegmentInfo **segs;
601
+ int scnt;
602
+ int size;
603
+ int counter;
604
+ int version;
605
+ int format;
606
606
  } SegmentInfos;
607
607
 
608
608
  SegmentInfos *sis_create();
@@ -622,65 +622,63 @@ int sis_read_current_version(Store *store);
622
622
  ****************************************************************************/
623
623
 
624
624
  enum FIELD_TYPE {
625
- /* all fields */
626
- IR_ALL,
627
- /* all indexed fields */
628
- IR_INDEXED,
629
- /* all fields which are not indexed */
630
- IR_UNINDEXED,
631
- /* all fields which are indexed with termvectors enables */
632
- IR_INDEXED_WITH_TERM_VECTOR,
633
- /* all fields which are indexed but don't have termvectors enabled */
634
- IR_INDEXED_NO_TERM_VECTOR,
635
- /* all fields where termvectors are enabled. Please note that only standard */
636
- /* termvector fields are returned */
637
- IR_TERM_VECTOR,
638
- /* all field with termvectors wiht positions enabled */
639
- IR_TERM_VECTOR_WITH_POSITION,
640
- /* all fields where termvectors with offset position are set */
641
- IR_TERM_VECTOR_WITH_OFFSET,
642
- /* all fields where termvectors with offset and position values set */
643
- IR_TERM_VECTOR_WITH_POSITION_OFFSET
625
+ /* all fields */
626
+ IR_ALL,
627
+ /* all indexed fields */
628
+ IR_INDEXED,
629
+ /* all fields which are not indexed */
630
+ IR_UNINDEXED,
631
+ /* all fields which are indexed with termvectors enables */
632
+ IR_INDEXED_WITH_TERM_VECTOR,
633
+ /* all fields which are indexed but don't have termvectors enabled */
634
+ IR_INDEXED_NO_TERM_VECTOR,
635
+ /* all fields where termvectors are enabled. Please note that only standard */
636
+ /* termvector fields are returned */
637
+ IR_TERM_VECTOR,
638
+ /* all field with termvectors wiht positions enabled */
639
+ IR_TERM_VECTOR_WITH_POSITION,
640
+ /* all fields where termvectors with offset position are set */
641
+ IR_TERM_VECTOR_WITH_OFFSET,
642
+ /* all fields where termvectors with offset and position values set */
643
+ IR_TERM_VECTOR_WITH_POSITION_OFFSET
644
644
  };
645
645
 
646
646
  struct IndexReader {
647
- mutex_t mutex;
648
- HshTable *cache;
649
- HshTable *sort_cache;
650
- void *data;
651
- Store *store;
652
- Lock *write_lock;
653
- SegmentInfos *sis;
654
- bool has_changes : 1;
655
- bool is_stale : 1;
656
- bool is_owner : 1;
657
- TermVector *(*get_term_vector)(IndexReader *ir, int doc_num, char *field);
658
- Array *(*get_term_vectors)(IndexReader *ir, int doc_num);
659
- int (*num_docs)(IndexReader *ir);
660
- int (*max_doc)(IndexReader *ir);
661
- Document *(*get_doc)(IndexReader *ir, int doc_num);
662
- uchar *(*get_norms)(IndexReader *ir, char *field);
663
- uchar *(*get_norms_always)(IndexReader *ir, char *field);
664
- void (*do_set_norm)(IndexReader *ir, int doc_num, char *field,
665
- uchar val);
666
- void (*get_norms_into)(IndexReader *ir, char *field, uchar *buf,
667
- int offset);
668
- TermEnum *(*terms)(IndexReader *ir);
669
- TermEnum *(*terms_from)(IndexReader *ir, Term *term);
670
- int (*doc_freq)(IndexReader *ir, Term *t);
671
- TermDocEnum *(*term_docs)(IndexReader *ir);
672
- TermDocEnum *(*term_positions)(IndexReader *ir);
673
- void (*do_delete_doc)(IndexReader *ir, int doc_num);
674
- void (*do_undelete_all)(IndexReader *ir);
675
- bool (*is_deleted)(IndexReader *ir, int doc_num);
676
- bool (*has_deletions)(IndexReader *ir);
677
- bool (*has_norms)(IndexReader *ir, char *field);
678
- HashSet *(*get_field_names)(IndexReader *ir, int field_type);
679
- void (*do_commit)(IndexReader *ir);
680
- void (*do_close)(IndexReader *ir);
681
- void (*acquire_write_lock)(IndexReader *ir);
682
- int (*write_fields_i)(IndexReader *ir, OutStream *fdt_out,
683
- OutStream *fdx_out);
647
+ mutex_t mutex;
648
+ HshTable *cache;
649
+ HshTable *sort_cache;
650
+ void *data;
651
+ Store *store;
652
+ Lock *write_lock;
653
+ SegmentInfos *sis;
654
+ bool has_changes : 1;
655
+ bool is_stale : 1;
656
+ bool is_owner : 1;
657
+ TermVector *(*get_term_vector)(IndexReader *ir, int doc_num, char *field);
658
+ Array *(*get_term_vectors)(IndexReader *ir, int doc_num);
659
+ int (*num_docs)(IndexReader *ir);
660
+ int (*max_doc)(IndexReader *ir);
661
+ Document *(*get_doc)(IndexReader *ir, int doc_num);
662
+ uchar *(*get_norms)(IndexReader *ir, char *field);
663
+ uchar *(*get_norms_always)(IndexReader *ir, char *field);
664
+ void (*do_set_norm)(IndexReader *ir, int doc_num, char *field,
665
+ uchar val);
666
+ void (*get_norms_into)(IndexReader *ir, char *field, uchar *buf,
667
+ int offset);
668
+ TermEnum *(*terms)(IndexReader *ir);
669
+ TermEnum *(*terms_from)(IndexReader *ir, Term *term);
670
+ int (*doc_freq)(IndexReader *ir, Term *t);
671
+ TermDocEnum *(*term_docs)(IndexReader *ir);
672
+ TermDocEnum *(*term_positions)(IndexReader *ir);
673
+ void (*do_delete_doc)(IndexReader *ir, int doc_num);
674
+ void (*do_undelete_all)(IndexReader *ir);
675
+ bool (*is_deleted)(IndexReader *ir, int doc_num);
676
+ bool (*has_deletions)(IndexReader *ir);
677
+ bool (*has_norms)(IndexReader *ir, char *field);
678
+ HashSet *(*get_field_names)(IndexReader *ir, int field_type);
679
+ void (*do_commit)(IndexReader *ir);
680
+ void (*do_close)(IndexReader *ir);
681
+ void (*acquire_write_lock)(IndexReader *ir);
684
682
  };
685
683
 
686
684
  IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner);
@@ -705,10 +703,10 @@ bool ir_is_latest(IndexReader *ir);
705
703
  ****************************************************************************/
706
704
 
707
705
  typedef struct Norm {
708
- bool is_dirty : 1;
709
- int field_num;
710
- InStream *is;
711
- uchar *bytes;
706
+ bool is_dirty : 1;
707
+ int field_num;
708
+ InStream *is;
709
+ uchar *bytes;
712
710
  } Norm;
713
711
 
714
712
  /****************************************************************************
@@ -718,22 +716,22 @@ typedef struct Norm {
718
716
  ****************************************************************************/
719
717
 
720
718
  struct SegmentReader {
721
- FieldInfos *fis;
722
- FieldsReader *fr;
723
- char *segment;
724
- BitVector *deleted_docs;
725
- bool deleted_docs_dirty : 1;
726
- bool undelete_all : 1;
727
- bool norms_dirty : 1;
728
- InStream *freq_in;
729
- InStream *prox_in;
730
- TermInfosReader *tir;
731
- TermVectorsReader *orig_tvr;
732
- thread_key_t thread_tvr;
733
- Array *tvr_bucket;
734
- HshTable *norms;
735
- Store *cfs_store;
736
- uchar *fake_norms;
719
+ FieldInfos *fis;
720
+ FieldsReader *fr;
721
+ char *segment;
722
+ BitVector *deleted_docs;
723
+ bool deleted_docs_dirty : 1;
724
+ bool undelete_all : 1;
725
+ bool norms_dirty : 1;
726
+ InStream *freq_in;
727
+ InStream *prox_in;
728
+ TermInfosReader *tir;
729
+ TermVectorsReader *orig_tvr;
730
+ thread_key_t thread_tvr;
731
+ Array *tvr_bucket;
732
+ HshTable *norms;
733
+ Store *cfs_store;
734
+ uchar *fake_norms;
737
735
  };
738
736
 
739
737
  IndexReader *sr_open(SegmentInfos *sis, int si_num, bool is_owner);
@@ -746,17 +744,17 @@ IndexReader *sr_open_si(SegmentInfo *si);
746
744
  ****************************************************************************/
747
745
 
748
746
  typedef struct MultiReader {
749
- bool has_deletions : 1;
750
- int max_doc;
751
- int num_docs_cache;
752
- int rcnt;
753
- int *starts;
754
- IndexReader **sub_readers;
755
- HshTable *norms_cache;
747
+ bool has_deletions : 1;
748
+ int max_doc;
749
+ int num_docs_cache;
750
+ int rcnt;
751
+ int *starts;
752
+ IndexReader **sub_readers;
753
+ HshTable *norms_cache;
756
754
  } MultiReader;
757
755
 
758
756
  IndexReader *mr_open(Store *store, SegmentInfos *sis, IndexReader **readers,
759
- int rcnt);
757
+ int rcnt);
760
758
 
761
759
  /****************************************************************************
762
760
  *
@@ -765,12 +763,12 @@ IndexReader *mr_open(Store *store, SegmentInfos *sis, IndexReader **readers,
765
763
  ****************************************************************************/
766
764
 
767
765
  typedef struct SegmentMergeInfo {
768
- int base;
769
- IndexReader *ir;
770
- TermEnum *te;
771
- TermBuffer *tb;
772
- TermDocEnum *postings;
773
- int *doc_map;
766
+ int base;
767
+ IndexReader *ir;
768
+ TermEnum *te;
769
+ TermBuffer *tb;
770
+ TermDocEnum *postings;
771
+ int *doc_map;
774
772
  } SegmentMergeInfo;
775
773
 
776
774
  SegmentMergeInfo *smi_create(int base, TermEnum *te, IndexReader *ir);
@@ -785,24 +783,24 @@ bool smi_lt(SegmentMergeInfo *smi1, SegmentMergeInfo *smi2);
785
783
  ****************************************************************************/
786
784
 
787
785
  typedef struct SegmentMerger {
788
- Store *store;
789
- char *name;
790
- Array *readers;
791
- FieldInfos *fis;
792
- OutStream *freq_out;
793
- OutStream *prox_out;
794
- TermInfosWriter *tiw;
795
- Term *terms_buf;
796
- int terms_buf_pointer;
797
- int terms_buf_size;
798
- PriorityQueue *queue;
799
- TermInfo *ti;
800
- int term_index_interval;
801
- OutStream *skip_buffer;
802
- int skip_interval;
803
- int last_skip_doc;
804
- int last_skip_freq_pointer;
805
- int last_skip_prox_pointer;
786
+ Store *store;
787
+ char *name;
788
+ Array *readers;
789
+ FieldInfos *fis;
790
+ OutStream *freq_out;
791
+ OutStream *prox_out;
792
+ TermInfosWriter *tiw;
793
+ Term *terms_buf;
794
+ int terms_buf_pointer;
795
+ int terms_buf_size;
796
+ PriorityQueue *queue;
797
+ TermInfo *ti;
798
+ int term_index_interval;
799
+ OutStream *skip_buffer;
800
+ int skip_interval;
801
+ int last_skip_doc;
802
+ int last_skip_freq_pointer;
803
+ int last_skip_prox_pointer;
806
804
  } SegmentMerger;
807
805
 
808
806
  SegmentMerger *sm_create(Store *store, char *name, int term_index_interval);
@@ -821,25 +819,25 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *fname);
821
819
  #define WRITE_LOCK_NAME "write"
822
820
  #define COMMIT_LOCK_NAME "commit"
823
821
  struct IndexWriter {
824
- mutex_t mutex;
825
- HshTable *postings;
826
- FieldInfos *fis;
827
- int merge_factor;
828
- int min_merge_docs;
829
- int max_merge_docs;
830
- int max_field_length;
831
- int term_index_interval;
832
- Store *store;
833
- Analyzer *analyzer;
834
- Similarity *similarity;
835
- SegmentInfos *sis;
836
- Store *ram_store;
837
- Lock *write_lock;
838
- bool use_compound_file : 1;
822
+ mutex_t mutex;
823
+ HshTable *postings;
824
+ FieldInfos *fis;
825
+ int merge_factor;
826
+ int min_merge_docs;
827
+ int max_merge_docs;
828
+ int max_field_length;
829
+ int term_index_interval;
830
+ Store *store;
831
+ Analyzer *analyzer;
832
+ Similarity *similarity;
833
+ SegmentInfos *sis;
834
+ Store *ram_store;
835
+ Lock *write_lock;
836
+ bool use_compound_file : 1;
839
837
  };
840
838
 
841
839
  IndexWriter *iw_open(Store *store, Analyzer *analyzer,
842
- bool create);
840
+ bool create);
843
841
  void iw_flush_ram_segments(IndexWriter *iw);
844
842
  void iw_close(IndexWriter *iw);
845
843
  int iw_doc_count(IndexWriter *iw);
@@ -855,11 +853,11 @@ void iw_add_readers(IndexWriter *iw, IndexReader **stores, int cnt);
855
853
  ****************************************************************************/
856
854
 
857
855
  typedef struct CompoundWriter {
858
- Store *store;
859
- const char *name;
860
- HashSet *ids;
861
- Array *file_entries;
862
- bool merged;
856
+ Store *store;
857
+ const char *name;
858
+ HashSet *ids;
859
+ Array *file_entries;
860
+ bool merged;
863
861
  } CompoundWriter;
864
862
 
865
863
  CompoundWriter *open_cw(Store *store, char *name);