ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/term.c CHANGED
@@ -1,7 +1,7 @@
1
- #include <index.h>
1
+ #include "index.h"
2
+ #include "helper.h"
3
+ #include "hash.h"
2
4
  #include <string.h>
3
- #include <helper.h>
4
- #include <hash.h>
5
5
 
6
6
  static char * const FORMAT_VERSION_ERROR_MSG = "Unknown format version";
7
7
  static char * const TERM_ORDER_ERROR_MSG = "term out of order";
@@ -32,11 +32,10 @@ Term *term_create(const char *field, char *text)
32
32
  return t;
33
33
  }
34
34
 
35
- void term_destroy(void *p)
35
+ void term_destroy(Term *self)
36
36
  {
37
- Term *t = (Term *)p;
38
- free(t->text);
39
- free(t);
37
+ free(self->text);
38
+ free(self);
40
39
  }
41
40
 
42
41
  int term_cmp(void *t1, void *t2)
@@ -87,9 +86,9 @@ TermBuffer *tb_create()
87
86
  return tb;
88
87
  }
89
88
 
90
- void tb_destroy(void *p)
89
+ void tb_destroy(TermBuffer *tb)
91
90
  {
92
- free(p);
91
+ free(tb);
93
92
  }
94
93
 
95
94
  TermBuffer *tb_set_term(TermBuffer *tb, Term *t)
@@ -106,8 +105,9 @@ Term *tb_get_term(TermBuffer *tb)
106
105
 
107
106
  int tb_cmp(TermBuffer *tb1, TermBuffer *tb2)
108
107
  {
109
- int res = strcmp(tb1->field, tb2->field);
110
- if (res != 0) {
108
+ int res;
109
+ if ((tb1->field != tb2->field) &&
110
+ (0 != (res = strcmp(tb1->field, tb2->field)))) {
111
111
  return res;
112
112
  } else {
113
113
  return strcmp(tb1->text, tb2->text);
@@ -133,12 +133,13 @@ TermBuffer *tb_cpy(TermBuffer *tb1, TermBuffer *tb2)
133
133
 
134
134
  TermBuffer *tb_read(TermBuffer *tb, InStream *is, FieldInfos *fis)
135
135
  {
136
- int start = is_read_vint(is);
137
- int length = is_read_vint(is);
136
+ signed int fnum;
137
+ int start = (int)is_read_vint(is);
138
+ int length = (int)is_read_vint(is);
138
139
  int total_length = start + length;
139
140
  is_read_bytes(is, (uchar *)tb->text, start, length);
140
141
  tb->text[total_length] = '\0';
141
- int fnum = is_read_vint(is);
142
+ fnum = (signed int)is_read_vint(is);
142
143
  if (fnum < 0)
143
144
  tb->field = (char *)EMPTY_STRING;
144
145
  else
@@ -171,9 +172,9 @@ TermInfo *ti_set(TermInfo *ti, int doc_freq, int freq_pointer, int prox_pointer,
171
172
  return ti;
172
173
  }
173
174
 
174
- void ti_destroy(void *p)
175
+ void ti_destroy(TermInfo *ti)
175
176
  {
176
- free(p);
177
+ free(ti);
177
178
  }
178
179
 
179
180
  TermInfo *ti_cpy(TermInfo *ti, TermInfo *other)
@@ -208,13 +209,12 @@ TermEnum *te_create()
208
209
  return te;
209
210
  }
210
211
 
211
- void te_destroy(void *p)
212
+ void te_destroy(TermEnum *te)
212
213
  {
213
- TermEnum *te = (TermEnum *)p;
214
214
  tb_destroy(te->tb_curr);
215
215
  tb_destroy(te->tb_prev);
216
216
  ti_destroy(te->ti_curr);
217
- free(p);
217
+ free(te);
218
218
  }
219
219
 
220
220
  Term *te_get_term(TermEnum *te)
@@ -246,11 +246,12 @@ TermBuffer *te_skip_to(TermEnum *te, Term *t)
246
246
  *
247
247
  ****************************************************************************/
248
248
 
249
- #define GET_STE SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
249
+ #define GET_STE SegmentTermEnum *ste = (SegmentTermEnum *)te->data
250
250
 
251
251
  TermBuffer *ste_next(TermEnum *te)
252
252
  {
253
253
  GET_STE;
254
+ TermInfo *ti;
254
255
  InStream *is = ste->is;
255
256
  ste->pos++;
256
257
  if (ste->pos > ste->size - 1) {
@@ -261,38 +262,43 @@ TermBuffer *ste_next(TermEnum *te)
261
262
  tb_cpy(te->tb_prev, te->tb_curr);
262
263
  tb_read(te->tb_curr, is, ste->fis);
263
264
 
264
- TermInfo *ti = te->ti_curr;
265
- ti->doc_freq = is_read_vint(is); // read doc freq
266
- ti->freq_pointer += is_read_vint(is); // read freq pointer
267
- ti->prox_pointer += is_read_vint(is); // read prox pointer
265
+ ti = te->ti_curr;
266
+ ti->doc_freq = (int)is_read_vint(is); /* read doc freq */
267
+ ti->freq_pointer += (int)is_read_vint(is);/* read freq pointer */
268
+ ti->prox_pointer += (int)is_read_vint(is);/* read prox pointer */
268
269
 
269
270
  if (ste->format == -1) {
270
- // just read skip_offset in order to increment file pointer
271
- // value is never used since skip_to is switched off
271
+ /* just read skip_offset in order to increment file pointer
272
+ * value is never used since skip_to is switched off */
272
273
  if (!ste->is_index) {
273
- if (ti->doc_freq > ste->format_m1skip_interval)
274
- ti->skip_offset = is_read_vint(is);
274
+ if (ti->doc_freq > ste->format_m1skip_interval) {
275
+ ti->skip_offset = (int)is_read_vint(is);
276
+ }
275
277
  }
276
278
  } else {
277
- if (ti->doc_freq >= ste->skip_interval)
278
- ti->skip_offset = is_read_vint(is);
279
+ if (ti->doc_freq >= ste->skip_interval) {
280
+ ti->skip_offset = (int)is_read_vint(is);
281
+ }
279
282
  }
280
283
 
281
- if (ste->is_index)
282
- ste->index_pointer += is_read_vint(is); // read index pointer
284
+ if (ste->is_index) {
285
+ ste->index_pointer += (int)is_read_vint(is); /* read index pointer */
286
+ }
283
287
 
284
288
  return te->tb_curr;
285
289
  }
286
290
 
287
291
  TermEnum *ste_clone(TermEnum *other_te);
292
+
288
293
  TermEnum *ste_allocate()
289
294
  {
290
295
  TermEnum *te = te_create();
296
+ SegmentTermEnum *ste;
297
+
291
298
  te->next = &ste_next;
292
299
  te->close = &ste_close;
293
300
  te->clone = &ste_clone;
294
- SegmentTermEnum *ste =
295
- ALLOC(SegmentTermEnum);
301
+ ste = ALLOC(SegmentTermEnum);
296
302
  te->data = ste;
297
303
  return te;
298
304
  }
@@ -302,6 +308,7 @@ TermEnum *ste_clone(TermEnum *other_te)
302
308
  SegmentTermEnum *other_ste = (SegmentTermEnum *)other_te->data;
303
309
  TermEnum *te = ste_allocate();
304
310
  SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
311
+
305
312
  memcpy(ste, other_ste, sizeof(SegmentTermEnum));
306
313
  ste->is = is_clone(other_ste->is);
307
314
  tb_cpy(te->tb_curr, other_te->tb_curr);
@@ -323,6 +330,8 @@ TermEnum *ste_create(InStream *is, FieldInfos *fis, int is_index)
323
330
  {
324
331
  TermEnum *te = ste_allocate();
325
332
  GET_STE;
333
+ int first_int;
334
+
326
335
  ste->fis = fis;
327
336
  ste->is_index = is_index;
328
337
  ste->is = is;
@@ -330,36 +339,36 @@ TermEnum *ste_create(InStream *is, FieldInfos *fis, int is_index)
330
339
  ste->index_pointer = 0;
331
340
  ste->format_m1skip_interval = -1;
332
341
 
333
- int first_int = is_read_int(is);
342
+ first_int = (int)is_read_int(is);
334
343
 
335
344
  if (first_int >= 0) {
336
- // original-format file, without explicit format version number
345
+ /* original-format file, without explicit format version number */
337
346
  ste->format = 0;
338
347
  ste->size = first_int;
339
348
 
340
- // back-compatible settings
349
+ /* back-compatible settings */
341
350
  ste->index_interval = 128;
342
- ste->skip_interval = INT_MAX; // switch off skip_to optimization
351
+ ste->skip_interval = INT_MAX; /* switch off skip_to optimization */
343
352
 
344
353
  } else {
345
- // check that it is a format we can understand
354
+ /* check that it is a format we can understand */
346
355
  if (first_int < TERM_INFO_FORMAT)
347
- RAISE(ERROR, FORMAT_VERSION_ERROR_MSG);
356
+ RAISE(EXCEPTION, FORMAT_VERSION_ERROR_MSG);
348
357
 
349
- // we have a format version number
358
+ /* we have a format version number */
350
359
  ste->format = first_int;
351
360
 
352
361
 
353
- ste->size = is_read_long(is); // read the size
362
+ ste->size = (int)is_read_long(is); /* read the size */
354
363
 
355
364
  if (ste->format == -1) {
356
365
  if (!ste->is_index) {
357
366
  ste->index_interval = is_read_int(is);
358
367
  ste->format_m1skip_interval = is_read_int(is);
359
368
  }
360
- // switch off skip_to optimization for file format prior to
361
- // 1.4rc2 in order to avoid a bug in skip_to implementation
362
- // of these versions
369
+ /* switch off skip_to optimization for file format prior to
370
+ * 1.4rc2 in order to avoid a bug in skip_to implementation
371
+ * of these versions */
363
372
  ste->skip_interval = INT_MAX;
364
373
  } else {
365
374
  ste->index_interval = is_read_int(is);
@@ -407,7 +416,7 @@ Term *ste_scan_for_term(TermEnum *te, int pos)
407
416
  *
408
417
  ****************************************************************************/
409
418
 
410
- #define GET_MTE MultiTermEnum *mte = (MultiTermEnum *)te->data;
419
+ #define GET_MTE MultiTermEnum *mte = (MultiTermEnum *)te->data
411
420
 
412
421
  TermBuffer *mte_next(TermEnum *te)
413
422
  {
@@ -426,11 +435,11 @@ TermBuffer *mte_next(TermEnum *te)
426
435
 
427
436
  while ((top != NULL) && (tb_cmp(te->tb_curr, top->tb) == 0)) {
428
437
  pq_pop(mte->smi_queue);
429
- te->ti_curr->doc_freq += top->te->ti_curr->doc_freq; // increment freq
438
+ te->ti_curr->doc_freq += top->te->ti_curr->doc_freq;/* increment freq */
430
439
  if (smi_next(top)) {
431
- pq_push(mte->smi_queue, top); // restore queue
440
+ pq_push(mte->smi_queue, top); /* restore queue */
432
441
  } else {
433
- smi_destroy(top); // done with a segment
442
+ smi_destroy(top); /* done with a segment */
434
443
  }
435
444
  top = (SegmentMergeInfo *)pq_top(mte->smi_queue);
436
445
  }
@@ -455,21 +464,21 @@ TermEnum *mte_clone(TermEnum *te)
455
464
  TermEnum *mte_create(IndexReader **readers, int *starts, int rcnt, Term *t)
456
465
  {
457
466
  int i;
467
+ IndexReader *reader;
468
+ TermEnum *sub_te;
469
+ MultiTermEnum *mte = ALLOC(MultiTermEnum);
458
470
  TermEnum *te = te_create();
459
471
  te->next = &mte_next;
460
472
  te->clone = &mte_clone;
461
473
  te->close = &mte_close;
462
474
 
463
- MultiTermEnum *mte = ALLOC(MultiTermEnum);
464
475
  te->data = mte;
465
476
 
466
- IndexReader *reader;
467
- TermEnum *sub_te;
468
-
469
- mte->smi_queue = pq_create(rcnt, &smi_lt);
470
- mte->smi_queue->free_elem = &smi_destroy;
477
+ mte->smi_queue = pq_create(rcnt, (lt_ft)&smi_lt);
478
+ mte->smi_queue->free_elem = (free_ft)&smi_destroy;
471
479
 
472
480
  for (i = 0; i < rcnt; i++) {
481
+ SegmentMergeInfo *smi;
473
482
  reader = readers[i];
474
483
 
475
484
  if (t != NULL) {
@@ -478,10 +487,10 @@ TermEnum *mte_create(IndexReader **readers, int *starts, int rcnt, Term *t)
478
487
  sub_te = reader->terms(reader);
479
488
  }
480
489
 
481
- SegmentMergeInfo *smi = smi_create(starts[i], sub_te, reader);
490
+ smi = smi_create(starts[i], sub_te, reader);
482
491
  if (((t == NULL) && smi_next(smi)) ||
483
492
  (sub_te->tb_curr->field != (char *)EMPTY_STRING)) {
484
- pq_push(mte->smi_queue, smi); // initialize queue
493
+ pq_push(mte->smi_queue, smi); /* initialize queue */
485
494
  } else {
486
495
  smi_destroy(smi);
487
496
  }
@@ -508,7 +517,10 @@ TermInfosWriter *tiw_open_internal(Store *store,
508
517
  int interval,
509
518
  int is_index)
510
519
  {
520
+ char fname[SEGMENT_NAME_MAX_LENGTH];
511
521
  TermInfosWriter *tiw = ALLOC(TermInfosWriter);
522
+ OutStream *os;
523
+
512
524
  tiw->index_interval = interval;
513
525
  tiw->skip_interval = 16;
514
526
  tiw->last_index_pointer = 0;
@@ -520,14 +532,13 @@ TermInfosWriter *tiw_open_internal(Store *store,
520
532
  tiw->curr_field = NULL;
521
533
  tiw->curr_field_num = -1;
522
534
 
523
- char fname[SEGMENT_NAME_MAX_LENGTH];
524
535
  strcpy(fname, segment);
525
536
  strcat(fname, (is_index ? ".tii" : ".tis"));
526
- OutStream *os = tiw->os = store->create_output(store, fname);
527
- os_write_int(os, TERM_INFO_FORMAT); // write format
528
- os_write_long(os, 0); // leave space for size
529
- os_write_int(os, tiw->index_interval); // write index_interval
530
- os_write_int(os, tiw->skip_interval); // write skip_interval
537
+ os = tiw->os = store->create_output(store, fname);
538
+ os_write_int(os, TERM_INFO_FORMAT); /* write format */
539
+ os_write_long(os, 0); /* leave space for size */
540
+ os_write_int(os, tiw->index_interval); /* write index_interval */
541
+ os_write_int(os, tiw->skip_interval); /* write skip_interval */
531
542
  if (!is_index) {
532
543
  tiw->other = tiw_open_internal(store, segment, fis, interval, true);
533
544
  tiw->other->other = tiw;
@@ -544,11 +555,11 @@ void tiw_write_term(TermInfosWriter *tiw, OutStream *os, Term *t)
544
555
  {
545
556
  //printf("%s, %s\n", tiw->last_term->text, t->text);
546
557
  int start = hlp_string_diff(tiw->last_term->text, t->text);
547
- int length = strlen(t->text) - start;
558
+ int length = (int)strlen(t->text) - start;
548
559
 
549
- os_write_vint(os, start); // write shared prefix length
550
- os_write_vint(os, length); // write delta length
551
- os_write_chars(os, t->text, start, length); // write delta chars
560
+ os_write_vint(os, start); /* write shared prefix length */
561
+ os_write_vint(os, length); /* write delta length */
562
+ os_write_chars(os, t->text, start, length); /* write delta chars */
552
563
  if (tiw->curr_field != t->field) {
553
564
  tiw->curr_field = t->field;
554
565
  tiw->curr_field_num = fis_get_number(tiw->fis, t->field);
@@ -559,6 +570,7 @@ void tiw_write_term(TermInfosWriter *tiw, OutStream *os, Term *t)
559
570
 
560
571
  void tiw_add(TermInfosWriter *tiw, Term *t, TermInfo *ti)
561
572
  {
573
+ #ifdef DEBUG
562
574
  if (tiw->is_index && term_cmp(tiw->last_term, t) > 0) {
563
575
  RAISE(STATE_ERROR, TERM_ORDER_ERROR_MSG);
564
576
  }
@@ -568,22 +580,27 @@ void tiw_add(TermInfosWriter *tiw, Term *t, TermInfo *ti)
568
580
  if (ti->prox_pointer < tiw->last_term_info->prox_pointer) {
569
581
  RAISE(STATE_ERROR, PP_ORDER_ERROR_MSG);
570
582
  }
583
+ #endif
571
584
 
572
- if (!tiw->is_index && (tiw->size % tiw->index_interval) == 0)
573
- tiw_add(tiw->other, tiw->last_term, tiw->last_term_info); // add an index term
585
+ if (!tiw->is_index && (tiw->size % tiw->index_interval) == 0) {
586
+ /* add an index term */
587
+ tiw_add(tiw->other, tiw->last_term, tiw->last_term_info);
588
+ }
574
589
 
575
- tiw_write_term(tiw, tiw->os, t); // write term
576
- os_write_vint(tiw->os, ti->doc_freq); // write doc freq
590
+ tiw_write_term(tiw, tiw->os, t); /* write term */
591
+ os_write_vint(tiw->os, ti->doc_freq); /* write doc freq */
577
592
  os_write_vint(tiw->os, ti->freq_pointer - tiw->last_term_info->freq_pointer);
578
593
  os_write_vint(tiw->os, ti->prox_pointer - tiw->last_term_info->prox_pointer);
579
- if (ti->doc_freq >= tiw->skip_interval)
594
+
595
+ if (ti->doc_freq >= tiw->skip_interval) {
580
596
  os_write_vint(tiw->os, ti->skip_offset);
597
+ }
581
598
 
582
599
  if (tiw->is_index) {
583
600
  OutStream *other_os = tiw->other->os;
584
601
  int other_pos = os_pos(other_os);
585
602
  os_write_vint(tiw->os, other_pos - tiw->last_index_pointer);
586
- tiw->last_index_pointer = other_pos; // write pointer
603
+ tiw->last_index_pointer = other_pos; /* write pointer */
587
604
  }
588
605
 
589
606
  ti_cpy(tiw->last_term_info, ti);
@@ -593,7 +610,7 @@ void tiw_add(TermInfosWriter *tiw, Term *t, TermInfo *ti)
593
610
  void tiw_close(TermInfosWriter *tiw)
594
611
  {
595
612
  OutStream *os = tiw->os;
596
- os_seek(os, 4); // write @size after format
613
+ os_seek(os, 4); /* write @size after format */
597
614
  os_write_long(os, tiw->size);
598
615
  os_close(os);
599
616
 
@@ -632,17 +649,20 @@ void tir_close(TermInfosReader *tir)
632
649
 
633
650
  TermInfosReader *tir_open(Store *store, char *segment, FieldInfos *fis)
634
651
  {
652
+ SegmentTermEnum *ste;
635
653
  TermInfosReader *tir = ALLOC(TermInfosReader);
636
654
  char fname[SEGMENT_NAME_MAX_LENGTH];
655
+ InStream *is;
656
+
637
657
  mutex_init(&tir->mutex, NULL);
638
658
  strcpy(fname, segment);
639
659
  strcpy(fname + strlen(segment), ".tis");
640
- InStream *is = store->open_input(store, fname);
660
+ is = store->open_input(store, fname);
641
661
  tir->orig_te = ste_create(is, fis, false);
642
662
  thread_key_create(&tir->thread_te, NULL);
643
- tir->te_bucket = ary_create(1, (destroy_func_t)tir->orig_te->close);
663
+ tir->te_bucket = ary_create(1, (free_ft)tir->orig_te->close);
644
664
 
645
- SegmentTermEnum *ste = tir->orig_te->data;
665
+ ste = tir->orig_te->data;
646
666
  tir->size = ste->size;
647
667
  tir->skip_interval = ste->skip_interval;
648
668
 
@@ -659,6 +679,9 @@ void tir_ensure_index_is_read(TermInfosReader *tir)
659
679
  {
660
680
  mutex_lock(&tir->mutex);
661
681
  if (tir->index_terms == NULL) {
682
+ TermEnum *index_te;
683
+ SegmentTermEnum *ste;
684
+ int i = 0;
662
685
  int index_size = ((SegmentTermEnum *)tir->index_te->data)->size;
663
686
  tir->index_size = index_size;
664
687
 
@@ -666,9 +689,8 @@ void tir_ensure_index_is_read(TermInfosReader *tir)
666
689
  tir->index_term_infos = ALLOC_N(TermInfo *, index_size);
667
690
  tir->index_pointers = ALLOC_N(int, index_size);
668
691
 
669
- int i = 0;
670
- TermEnum *index_te = tir->index_te;
671
- SegmentTermEnum *ste = index_te->data;
692
+ index_te = tir->index_te;
693
+ ste = index_te->data;
672
694
 
673
695
  TRY
674
696
  while (ste_next(index_te) != NULL) {
@@ -708,7 +730,7 @@ void tir_seek_enum(TermInfosReader *tir, int ind_offset)
708
730
 
709
731
  int tir_get_index_offset(TermInfosReader *tir, Term *t)
710
732
  {
711
- int lo = 0; // binary search tir->index_terms[]
733
+ int lo = 0; /* binary search tir->index_terms[] */
712
734
  int hi = tir->index_size - 1;
713
735
  int mid, delta;
714
736
  Term **index_terms = tir->index_terms;
@@ -729,61 +751,70 @@ int tir_get_index_offset(TermInfosReader *tir, Term *t)
729
751
 
730
752
  TermInfo *tir_get_ti(TermInfosReader *tir, Term *t)
731
753
  {
732
- if (tir->size == 0)
754
+ TermEnum *te;
755
+ SegmentTermEnum *ste;
756
+ if (tir->size == 0) {
733
757
  return NULL;
758
+ }
734
759
 
735
760
  tir_ensure_index_is_read(tir);
736
761
 
737
- // optimize sequential access: first try scanning cached enum w/o seeking
738
- TermEnum *te = tir_enum(tir);
739
- SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
762
+ /* optimize sequential access: first try scanning cached enum w/o seeking */
763
+ te = tir_enum(tir);
764
+ ste = (SegmentTermEnum *)te->data;
740
765
  if (ste->pos < ste->size && tb_term_cmp(te->tb_curr, t) <= 0) {
741
766
  SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
742
767
  int enum_offset = (int)(ste->pos / ste->index_interval) + 1;
743
768
  if (tir->index_size == enum_offset ||
744
- term_cmp(t, tir->index_terms[enum_offset]) < 0) { // but before end of block
745
- return ste_scan_for_term_info(te, t); // no need to seek
769
+ term_cmp(t, tir->index_terms[enum_offset]) < 0) { /* but before end of block */
770
+ return ste_scan_for_term_info(te, t); /* no need to seek */
746
771
  }
747
772
  }
748
773
 
749
- // random-access: must seek
774
+ /* random-access: must seek */
750
775
  tir_seek_enum(tir, tir_get_index_offset(tir, t));
751
776
  return ste_scan_for_term_info(te, t);
752
777
  }
753
778
 
754
779
  Term *tir_get_term(TermInfosReader *tir, int pos)
755
780
  {
756
- if (tir->size == 0)
781
+ if (tir->size == 0) {
757
782
  return NULL;
783
+ } else {
784
+ TermEnum *te = tir_enum(tir);
785
+ SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
786
+ if (pos >= ste->pos &&
787
+ pos < (ste->pos + ste->index_interval)) {
788
+ return ste_scan_for_term(te, pos); /* can avoid seek */
789
+ }
758
790
 
759
- TermEnum *te = tir_enum(tir);
760
- SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
761
- if (pos >= ste->pos &&
762
- pos < (ste->pos + ste->index_interval)) {
763
- return ste_scan_for_term(te, pos); // can avoid seek
791
+ tir_seek_enum(tir, (int)(pos / ste->index_interval)); /* must seek */
792
+ return ste_scan_for_term(te, pos);
764
793
  }
765
-
766
- tir_seek_enum(tir, (int)(pos / ste->index_interval)); // must seek
767
- return ste_scan_for_term(te, pos);
768
794
  }
769
795
 
770
796
  int tir_get_term_pos(TermInfosReader *tir, Term *t)
771
797
  {
772
- if (tir->size == 0)
798
+ if (tir->size == 0) {
773
799
  return -1;
800
+ } else {
801
+ TermEnum *te;
802
+ int ind_offset;
803
+
804
+ tir_ensure_index_is_read(tir);
774
805
 
775
- tir_ensure_index_is_read(tir);
776
-
777
- int ind_offset = tir_get_index_offset(tir, t);
778
- tir_seek_enum(tir, ind_offset);
806
+ ind_offset = tir_get_index_offset(tir, t);
807
+ tir_seek_enum(tir, ind_offset);
779
808
 
780
- TermEnum *te = tir_enum(tir);
781
- while ((tb_term_cmp(te->tb_curr, t) < 0) && (ste_next(te) != NULL))
782
- ;
809
+ te = tir_enum(tir);
810
+ while ((tb_term_cmp(te->tb_curr, t) < 0) && (ste_next(te) != NULL)) {
811
+ }
783
812
 
784
- if (tb_term_cmp(te->tb_curr, t) == 0)
785
- return ((SegmentTermEnum *)te->data)->pos;
786
- else
787
- return -1;
813
+ if (tb_term_cmp(te->tb_curr, t) == 0) {
814
+ return ((SegmentTermEnum *)te->data)->pos;
815
+ } else {
816
+ return -1;
817
+ }
818
+ }
788
819
  }
789
820