ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/search.h CHANGED
@@ -9,6 +9,9 @@ typedef struct Scorer Scorer;
9
9
  #include "bitvector.h"
10
10
  #include "similarity.h"
11
11
 
12
+ #define term_set_create() \
13
+ hs_create((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
14
+
12
15
  /***************************************************************************
13
16
  *
14
17
  * Explanation
@@ -24,11 +27,11 @@ typedef struct Explanation {
24
27
  int dcapa;
25
28
  } Explanation;
26
29
 
27
- Explanation *expl_create(float value, char *description);
28
- void expl_destoy(void *p);
29
- Explanation *expl_add_detail(Explanation *self, Explanation *detail);
30
- char *expl_to_s(Explanation *self, int depth);
31
- char *expl_to_html(Explanation *self);
30
+ extern Explanation *expl_create(float value, char *description);
31
+ extern void expl_destoy(void *p);
32
+ extern Explanation *expl_add_detail(Explanation *self, Explanation *detail);
33
+ extern char *expl_to_s(Explanation *self, int depth);
34
+ extern char *expl_to_html(Explanation *self);
32
35
 
33
36
  /***************************************************************************
34
37
  *
@@ -41,7 +44,7 @@ typedef struct Hit {
41
44
  float score;
42
45
  } Hit;
43
46
 
44
- bool hit_less_than(void *p1, void *p2);
47
+ extern bool hit_less_than(void *p1, void *p2);
45
48
 
46
49
  /***************************************************************************
47
50
  *
@@ -55,9 +58,9 @@ typedef struct TopDocs {
55
58
  Hit **hits;
56
59
  } TopDocs;
57
60
 
58
- TopDocs *td_create(int total_hits, int size, Hit **hits);
59
- void td_destroy(void *p);
60
- char *td_to_s(TopDocs *td);
61
+ extern TopDocs *td_create(int total_hits, int size, Hit **hits);
62
+ extern void td_destroy(TopDocs *td);
63
+ extern char *td_to_s(TopDocs *td);
61
64
 
62
65
  /***************************************************************************
63
66
  *
@@ -71,13 +74,17 @@ typedef struct Filter {
71
74
  HshTable *cache;
72
75
  BitVector *(*get_bv)(struct Filter *self, IndexReader *ir);
73
76
  char *(*to_s)(struct Filter *self);
74
- void (*destroy)(void *p);
77
+ uint (*hash)(struct Filter *self);
78
+ int (*eq)(struct Filter *self, struct Filter *o);
79
+ void (*destroy)(struct Filter *self);
75
80
  } Filter;
76
81
 
77
- Filter *filt_create(char *name);
78
- char *filt_to_s(Filter *self);
79
- BitVector *filt_get_bv(Filter *self, IndexReader *ir);
80
- void filt_destroy(void *p);
82
+ extern Filter *filt_create(char *name);
83
+ extern char *filt_to_s_i(Filter *self);
84
+ extern BitVector *filt_get_bv(Filter *self, IndexReader *ir);
85
+ extern void filt_destroy(Filter *self);
86
+ extern uint filt_hash(Filter *self);
87
+ extern int filt_eq(Filter *self, Filter *o);
81
88
 
82
89
  /***************************************************************************
83
90
  *
@@ -85,9 +92,8 @@ void filt_destroy(void *p);
85
92
  *
86
93
  ***************************************************************************/
87
94
 
88
- Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
89
- bool include_lower, bool include_upper);
90
- void rfilt_destroy(void *p);
95
+ extern Filter *rfilt_create(const char *field, char *lower_term,
96
+ char *upper_term, bool include_lower, bool include_upper);
91
97
 
92
98
  /***************************************************************************
93
99
  *
@@ -99,8 +105,7 @@ typedef struct QueryFilter {
99
105
  Query *query;
100
106
  } QueryFilter;
101
107
 
102
- Filter *qfilt_create(Query *query);
103
-
108
+ extern Filter *qfilt_create(Query *query);
104
109
 
105
110
  /***************************************************************************
106
111
  *
@@ -123,13 +128,16 @@ struct Weight {
123
128
  Explanation *(*explain)(Weight *self, IndexReader *ir, int doc_num);
124
129
  float (*sum_of_squared_weights)(Weight *self);
125
130
  char *(*to_s)(Weight *self);
126
- void (*destroy)(void *p);
131
+ void (*destroy)(Weight *self);
127
132
  };
128
133
 
129
- Query *w_get_query(Weight *self);
130
- float w_get_value(Weight *self);
131
- float w_sum_of_squared_weights(Weight *self);
132
- void w_normalize(Weight *self, float normalization_factor);
134
+ extern Weight *w_create(Query *query);
135
+ extern void w_destroy(Weight *self);
136
+
137
+ extern Query *w_get_query(Weight *self);
138
+ extern float w_get_value(Weight *self);
139
+ extern float w_sum_of_squared_weights(Weight *self);
140
+ extern void w_normalize(Weight *self, float normalization_factor);
133
141
 
134
142
  /***************************************************************************
135
143
  *
@@ -137,7 +145,7 @@ void w_normalize(Weight *self, float normalization_factor);
137
145
  *
138
146
  ***************************************************************************/
139
147
 
140
- Weight *tw_create(Query *query, Searcher *searcher);
148
+ extern Weight *tw_create(Query *query, Searcher *searcher);
141
149
 
142
150
  /***************************************************************************
143
151
  *
@@ -149,7 +157,8 @@ typedef struct BooleanWeight {
149
157
  Weight **weights;
150
158
  int w_cnt;
151
159
  } BooleanWeight;
152
- Weight *bw_create(Query *query, Searcher *searcher);
160
+
161
+ extern Weight *bw_create(Query *query, Searcher *searcher);
153
162
 
154
163
  /***************************************************************************
155
164
  *
@@ -157,7 +166,7 @@ Weight *bw_create(Query *query, Searcher *searcher);
157
166
  *
158
167
  ***************************************************************************/
159
168
 
160
- Weight *phw_create(Query *query, Searcher *searcher);
169
+ extern Weight *phw_create(Query *query, Searcher *searcher);
161
170
 
162
171
  /***************************************************************************
163
172
  *
@@ -165,7 +174,7 @@ Weight *phw_create(Query *query, Searcher *searcher);
165
174
  *
166
175
  ***************************************************************************/
167
176
 
168
- Weight *csw_create(Query *query, Searcher *searcher);
177
+ extern Weight *csw_create(Query *query, Searcher *searcher);
169
178
 
170
179
  /***************************************************************************
171
180
  *
@@ -173,7 +182,7 @@ Weight *csw_create(Query *query, Searcher *searcher);
173
182
  *
174
183
  ***************************************************************************/
175
184
 
176
- Weight *maw_create(Query *query, Searcher *searcher);
185
+ extern Weight *maw_create(Query *query, Searcher *searcher);
177
186
 
178
187
  /***************************************************************************
179
188
  *
@@ -181,7 +190,7 @@ Weight *maw_create(Query *query, Searcher *searcher);
181
190
  *
182
191
  ***************************************************************************/
183
192
 
184
- Weight *spanw_create(Query *query, Searcher *searcher);
193
+ extern Weight *spanw_create(Query *query, Searcher *searcher);
185
194
 
186
195
  /***************************************************************************
187
196
  *
@@ -211,24 +220,32 @@ enum QUERY_TYPE {
211
220
  struct Query {
212
221
  bool destroy_all : 1;
213
222
  uchar type;
223
+ int ref_cnt;
214
224
  void *data;
215
225
  float boost;
216
- float original_boost;
217
226
  Weight *weight;
218
- Query *rewritten;
219
- Weight *(*create_weight)(Query *self, Searcher *searcher);
220
227
  Query *(*rewrite)(Query *self, IndexReader *ir);
221
- void (*extract_terms)(Query *self, Array *terms);
228
+ void (*extract_terms)(Query *self, HashSet *terms);
222
229
  Similarity *(*get_similarity)(Query *self, Searcher *searcher);
223
230
  char *(*to_s)(Query *self, char *field);
224
- void (*destroy)(void *p);
231
+ uint (*hash)(Query *self);
232
+ int (*eq)(Query *self, Query *o);
233
+ void (*destroy_i)(Query *self);
234
+ Weight *(*create_weight_i)(Query *self, Searcher *searcher);
225
235
  };
226
236
 
227
- Weight *q_weight(Query *self, Searcher *searcher);
228
- void q_destroy(Query *self);
229
- Similarity *q_get_similarity(Query *self, Searcher *searcher);
230
- void q_extract_terms(Query *self, Array *terms);
231
- Query *q_create();
237
+ /* Internal Query Functions */
238
+ extern Query *q_create();
239
+ extern Similarity *q_get_similarity_i(Query *self, Searcher *searcher);
240
+ extern void q_destroy_i(Query *self);
241
+ extern Weight *q_create_weight_unsup(Query *self, Searcher *searcher);
242
+
243
+
244
+ extern void q_deref(Query *self);
245
+ extern Weight *q_weight(Query *self, Searcher *searcher);
246
+ extern Query *q_combine(Query **queries, int q_cnt);
247
+ extern uint q_hash(Query *self);
248
+ extern int q_eq(Query *self, Query *o);
232
249
 
233
250
  /***************************************************************************
234
251
  *
@@ -240,7 +257,7 @@ typedef struct TermQuery {
240
257
  Term *term;
241
258
  } TermQuery;
242
259
 
243
- Query *tq_create(Term *term);
260
+ extern Query *tq_create(Term *term);
244
261
 
245
262
  /***************************************************************************
246
263
  *
@@ -259,6 +276,7 @@ enum BC_TYPE {
259
276
  };
260
277
 
261
278
  typedef struct BooleanClause {
279
+ int ref_cnt;
262
280
  Query *query;
263
281
  Query *rewritten;
264
282
  unsigned int occur : 4;
@@ -266,9 +284,9 @@ typedef struct BooleanClause {
266
284
  bool is_required : 1;
267
285
  } BooleanClause;
268
286
 
269
- BooleanClause *bc_create(Query *query, unsigned int occur);
270
- void bc_destroy(BooleanClause *self);
271
- void bc_set_occur(BooleanClause *self, unsigned int occur);
287
+ extern BooleanClause *bc_create(Query *query, unsigned int occur);
288
+ extern void bc_deref(BooleanClause *self);
289
+ extern void bc_set_occur(BooleanClause *self, unsigned int occur);
272
290
 
273
291
  /***************************************************************************
274
292
  * BooleanQuery
@@ -283,13 +301,15 @@ typedef struct BooleanQuery {
283
301
  int max_clause_cnt;
284
302
  int clause_cnt;
285
303
  int clause_capa;
304
+ float original_boost;
286
305
  BooleanClause **clauses;
287
306
  Similarity *similarity;
288
307
  } BooleanQuery;
289
308
 
290
- Query *bq_create(bool coord_disabled);
291
- BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur);
292
- BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
309
+ extern Query *bq_create(bool coord_disabled);
310
+ extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
311
+ unsigned int occur);
312
+ extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
293
313
 
294
314
  /***************************************************************************
295
315
  *
@@ -307,8 +327,8 @@ typedef struct PhraseQuery {
307
327
  char *field;
308
328
  } PhraseQuery;
309
329
 
310
- Query *phq_create();
311
- void phq_add_term(Query *self, Term *term, int pos_inc);
330
+ extern Query *phq_create();
331
+ extern void phq_add_term(Query *self, Term *term, int pos_inc);
312
332
 
313
333
  /***************************************************************************
314
334
  *
@@ -326,8 +346,8 @@ typedef struct MultiPhraseQuery {
326
346
  char *field;
327
347
  } MultiPhraseQuery;
328
348
 
329
- Query *mphq_create();
330
- void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc);
349
+ extern Query *mphq_create();
350
+ extern void mphq_add_terms(Query *self, Term **ts, int t_cnt, int pos_inc);
331
351
 
332
352
  /***************************************************************************
333
353
  *
@@ -335,7 +355,7 @@ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc);
335
355
  *
336
356
  ***************************************************************************/
337
357
 
338
- Query *prefixq_create(Term *prefix);
358
+ extern Query *prefixq_create(Term *prefix);
339
359
 
340
360
  /***************************************************************************
341
361
  *
@@ -345,8 +365,9 @@ Query *prefixq_create(Term *prefix);
345
365
 
346
366
  #define WILD_CHAR '?'
347
367
  #define WILD_STRING '*'
348
- Query *wcq_create(Term *term);
349
- bool wc_match(char *pattern, char *text);
368
+
369
+ extern Query *wcq_create(Term *term);
370
+ extern bool wc_match(char *pattern, char *text);
350
371
 
351
372
  /***************************************************************************
352
373
  *
@@ -370,8 +391,8 @@ typedef struct FuzzyQuery {
370
391
  int da_capa;
371
392
  } FuzzyQuery;
372
393
 
373
- Query *fuzq_create(Term *term);
374
- Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
394
+ extern Query *fuzq_create(Term *term);
395
+ extern Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
375
396
 
376
397
  /***************************************************************************
377
398
  *
@@ -379,7 +400,7 @@ Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
379
400
  *
380
401
  ***************************************************************************/
381
402
 
382
- Query *csq_create(Filter *filter);
403
+ extern Query *csq_create(Filter *filter);
383
404
 
384
405
  /***************************************************************************
385
406
  *
@@ -387,7 +408,7 @@ Query *csq_create(Filter *filter);
387
408
  *
388
409
  ***************************************************************************/
389
410
 
390
- Query *fq_create(Query *query, Filter *filter);
411
+ extern Query *fq_create(Query *query, Filter *filter);
391
412
 
392
413
  /***************************************************************************
393
414
  *
@@ -395,7 +416,7 @@ Query *fq_create(Query *query, Filter *filter);
395
416
  *
396
417
  ***************************************************************************/
397
418
 
398
- Query *maq_create();
419
+ extern Query *maq_create();
399
420
 
400
421
  /***************************************************************************
401
422
  *
@@ -407,14 +428,16 @@ typedef struct Range {
407
428
  char *field;
408
429
  char *lower_term;
409
430
  char *upper_term;
410
- bool include_lower;
411
- bool include_upper;
431
+ bool include_lower : 1;
432
+ bool include_upper : 1;
412
433
  } Range;
413
434
 
414
- Query *rq_create(const char *field, char *lower_term, char *upper_term,
415
- bool include_lower, bool include_upper);
416
- Query *rq_create_less(const char *field, char *upper_term, bool include_upper);
417
- Query *rq_create_more(const char *field, char *lower_term, bool include_lower);
435
+ extern Query *rq_create(const char *field, char *lower_term,
436
+ char *upper_term, bool include_lower, bool include_upper);
437
+ extern Query *rq_create_less(const char *field, char *upper_term,
438
+ bool include_upper);
439
+ extern Query *rq_create_more(const char *field, char *lower_term,
440
+ bool include_lower);
418
441
 
419
442
  /***************************************************************************
420
443
  *
@@ -447,7 +470,7 @@ struct SpanEnum {
447
470
  int (*start)(SpanEnum *self);
448
471
  int (*end)(SpanEnum *self);
449
472
  char *(*to_s)(SpanEnum *self);
450
- void (*destroy)(void *p);
473
+ void (*destroy)(SpanEnum *self);
451
474
  };
452
475
 
453
476
  /***************************************************************************
@@ -463,13 +486,13 @@ struct SpanTermEnum {
463
486
  int freq;
464
487
  };
465
488
 
466
- SpanEnum *spante_create(Query *query, IndexReader *ir);
489
+ extern SpanEnum *spante_create(Query *query, IndexReader *ir);
467
490
 
468
491
  /***************************************************************************
469
492
  * SpanFirstEnum
470
493
  ***************************************************************************/
471
494
 
472
- SpanEnum *spanfe_create(Query *query, IndexReader *ir);
495
+ extern SpanEnum *spanfe_create(Query *query, IndexReader *ir);
473
496
 
474
497
  /***************************************************************************
475
498
  * SpanOrEnum
@@ -481,7 +504,8 @@ typedef struct SpanOrEnum {
481
504
  int s_cnt;
482
505
  bool first_time;
483
506
  } SpanOrEnum;
484
- SpanEnum *spanoe_create(Query *query, IndexReader *ir);
507
+
508
+ extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
485
509
 
486
510
  /***************************************************************************
487
511
  * SpanEnumCell
@@ -493,7 +517,8 @@ typedef struct SpanEnumCell {
493
517
  int index;
494
518
  int length;
495
519
  } SpanEnumCell;
496
- SpanEnum *spanec_create(Query *parent, Query *child, int index);
520
+
521
+ extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
497
522
 
498
523
  /***************************************************************************
499
524
  * SpanNearEnum
@@ -511,7 +536,7 @@ typedef struct SpanNearEnum {
511
536
  int end;
512
537
  } SpanNearEnum;
513
538
 
514
- SpanEnum *spanne_create(Query *query, IndexReader *ir);
539
+ extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
515
540
 
516
541
  /***************************************************************************
517
542
  * SpanNotEnum
@@ -524,7 +549,7 @@ typedef struct SpanNotEnum {
524
549
  bool more_exc : 1;
525
550
  } SpanNotEnum;
526
551
 
527
- SpanEnum *spanxe_create(Query *query, IndexReader *ir);
552
+ extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
528
553
 
529
554
  /***************************************************************************
530
555
  * SpanQuery
@@ -535,14 +560,14 @@ struct SpanQuery {
535
560
  void *data;
536
561
  char *field;
537
562
  SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
538
- Array *(*get_terms)(Query *self);
563
+ HashSet *(*get_terms)(Query *self);
539
564
  };
540
565
 
541
566
  /***************************************************************************
542
567
  * SpanTermQuery
543
568
  ***************************************************************************/
544
569
 
545
- Query *spantq_create(Term *term);
570
+ extern Query *spantq_create(Term *term);
546
571
 
547
572
  /***************************************************************************
548
573
  * SpanFirstQuery
@@ -553,7 +578,7 @@ typedef struct SpanFirstQuery {
553
578
  Query *match;
554
579
  } SpanFirstQuery;
555
580
 
556
- Query *spanfq_create(Query *match, int end);
581
+ extern Query *spanfq_create(Query *match, int end);
557
582
 
558
583
  /***************************************************************************
559
584
  * SpanOrQuery
@@ -564,7 +589,7 @@ typedef struct SpanOrQuery {
564
589
  int c_cnt;
565
590
  } SpanOrQuery;
566
591
 
567
- Query *spanoq_create(Query **clauses, int c_cnt);
592
+ extern Query *spanoq_create(Query **clauses, int c_cnt);
568
593
 
569
594
  /***************************************************************************
570
595
  * SpanNearQuery
@@ -577,7 +602,8 @@ typedef struct SpanNearQuery {
577
602
  bool in_order;
578
603
  } SpanNearQuery;
579
604
 
580
- Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order);
605
+ extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
606
+ bool in_order);
581
607
 
582
608
 
583
609
  /***************************************************************************
@@ -589,7 +615,7 @@ typedef struct SpanNotQuery {
589
615
  Query *exc;
590
616
  } SpanNotQuery;
591
617
 
592
- Query *spanxq_create(Query *inc, Query *exc);
618
+ extern Query *spanxq_create(Query *inc, Query *exc);
593
619
 
594
620
  /***************************************************************************
595
621
  *
@@ -607,14 +633,16 @@ struct Scorer {
607
633
  bool (*next)(Scorer *self);
608
634
  bool (*skip_to)(Scorer *self, int doc_num);
609
635
  Explanation *(*explain)(Scorer *self, int doc_num);
610
- void (*destroy)(void *p);
636
+ void (*destroy)(Scorer *self);
611
637
  };
612
638
 
613
- void scorer_destroy(void *p);
614
- Scorer *scorer_create(Similarity *similarity);
615
- bool scorer_less_than(void *p1, void *p2);
616
- bool scorer_doc_less_than(void *p1, void *p2);
617
- int scorer_doc_cmp(const void *p1, const void *p2);
639
+ /* Internal Scorer Function */
640
+ extern void scorer_destroy_i(Scorer *self);
641
+
642
+ extern Scorer *scorer_create(Similarity *similarity);
643
+ extern bool scorer_less_than(void *p1, void *p2);
644
+ extern bool scorer_doc_less_than(void *p1, void *p2);
645
+ extern int scorer_doc_cmp(const void *p1, const void *p2);
618
646
 
619
647
  /***************************************************************************
620
648
  *
@@ -637,7 +665,7 @@ typedef struct TermScorer {
637
665
  float weight_value;
638
666
  } TermScorer;
639
667
 
640
- Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
668
+ extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
641
669
 
642
670
  /***************************************************************************
643
671
  *
@@ -656,9 +684,6 @@ typedef struct Coordinator {
656
684
  int num_matches;
657
685
  } Coordinator;
658
686
 
659
- Coordinator *coo_create(Similarity *similarity);
660
- Coordinator *coo_init(Coordinator *self);
661
-
662
687
  /***************************************************************************
663
688
  * DisjunctionSumScorer
664
689
  ***************************************************************************/
@@ -680,7 +705,7 @@ typedef struct DisjunctionSumScorer{
680
705
  typedef struct ConjunctionScorer{
681
706
  bool first_time : 1;
682
707
  bool more : 1;
683
- int coord;
708
+ float coord;
684
709
  int ss_cnt;
685
710
  int ss_capa;
686
711
  Scorer **sub_scorers;
@@ -737,8 +762,8 @@ typedef struct BooleanScorer {
737
762
  Coordinator *coordinator;
738
763
  } BooleanScorer;
739
764
 
740
- Scorer *bsc_create(Similarity *similarity);
741
- void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
765
+ extern Scorer *bsc_create(Similarity *similarity);
766
+ extern void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
742
767
 
743
768
  /***************************************************************************
744
769
  *
@@ -757,7 +782,8 @@ typedef struct PhrasePosition {
757
782
  int position;
758
783
  } PhrasePosition;
759
784
 
760
- PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
785
+ extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
786
+
761
787
  /***************************************************************************
762
788
  * PhraseScorer
763
789
  ***************************************************************************/
@@ -777,22 +803,24 @@ typedef struct PhraseScorer {
777
803
  int slop;
778
804
  } PhraseScorer;
779
805
 
780
- Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
806
+ extern Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
781
807
  int *positions, int t_cnt, Similarity *similarity, uchar *norms);
782
808
 
783
809
  /***************************************************************************
784
810
  * ExactPhraseScorer
785
811
  ***************************************************************************/
786
812
 
787
- Scorer *exact_phrase_scorer_create(Weight *weight, TermDocEnum **term_pos_enum,
788
- int *positions, int t_cnt, Similarity *similarity, uchar *norms);
813
+ extern Scorer *exact_phrase_scorer_create(Weight *weight,
814
+ TermDocEnum **term_pos_enum, int *positions, int t_cnt,
815
+ Similarity *similarity, uchar *norms);
789
816
 
790
817
  /***************************************************************************
791
818
  * SloppyPhraseScorer
792
819
  ***************************************************************************/
793
820
 
794
- Scorer *sloppy_phrase_scorer_create(Weight *weight, TermDocEnum **term_pos_enum,
795
- int *positions, int t_cnt, Similarity *similarity, int slop, uchar *norms);
821
+ extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
822
+ TermDocEnum **term_pos_enum, int *positions, int t_cnt,
823
+ Similarity *similarity, int slop, uchar *norms);
796
824
 
797
825
  /***************************************************************************
798
826
  *
@@ -805,7 +833,7 @@ typedef struct ConstantScoreScorer {
805
833
  float score;
806
834
  } ConstantScoreScorer;
807
835
 
808
- Scorer *cssc_create(Weight *weight, IndexReader *ir);
836
+ extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
809
837
 
810
838
 
811
839
  /***************************************************************************
@@ -820,7 +848,7 @@ typedef struct MatchAllScorer {
820
848
  float score;
821
849
  } MatchAllScorer;
822
850
 
823
- Scorer *masc_create(Weight *weight, IndexReader *ir);
851
+ extern Scorer *masc_create(Weight *weight, IndexReader *ir);
824
852
 
825
853
 
826
854
  /***************************************************************************
@@ -841,7 +869,7 @@ typedef struct SpanScorer {
841
869
  float freq;
842
870
  } SpanScorer;
843
871
 
844
- Scorer *spansc_create(Weight *weight, IndexReader *ir);
872
+ extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
845
873
 
846
874
  /***************************************************************************
847
875
  *
@@ -874,14 +902,15 @@ typedef struct SortField {
874
902
  void (*handle_term)(void *index, TermDocEnum *tde, char *text);
875
903
  } SortField;
876
904
 
877
- SortField *sort_field_create(char *field, int type, bool reverse);
878
- SortField *sort_field_score_create(bool reverse);
879
- SortField *sort_field_doc_create(bool reverse);
880
- SortField *sort_field_int_create(char *field, bool reverse);
881
- SortField *sort_field_float_create(char *field, bool reverse);
882
- SortField *sort_field_string_create(char *field, bool reverse);
883
- SortField *sort_field_auto_create(char *field, bool reverse);
884
- void sort_field_destroy(void *p);
905
+ extern SortField *sort_field_create(char *field, int type, bool reverse);
906
+ extern SortField *sort_field_score_create(bool reverse);
907
+ extern SortField *sort_field_doc_create(bool reverse);
908
+ extern SortField *sort_field_int_create(char *field, bool reverse);
909
+ extern SortField *sort_field_float_create(char *field, bool reverse);
910
+ extern SortField *sort_field_string_create(char *field, bool reverse);
911
+ extern SortField *sort_field_auto_create(char *field, bool reverse);
912
+ extern void sort_field_destroy(void *p);
913
+ extern char *sort_field_to_s(SortField *self);
885
914
 
886
915
  extern SortField SORT_FIELD_SCORE;
887
916
  extern SortField SORT_FIELD_SCORE_REV;
@@ -899,20 +928,21 @@ typedef struct Sort {
899
928
  bool destroy_all : 1;
900
929
  } Sort;
901
930
 
902
- Sort *sort_create();
903
- void sort_destroy(void *p);
904
- void sort_add_sort_field(Sort *self, SortField *sf);
905
- void sort_clear(Sort *self);
931
+ extern Sort *sort_create();
932
+ extern void sort_destroy(void *p);
933
+ extern void sort_add_sort_field(Sort *self, SortField *sf);
934
+ extern void sort_clear(Sort *self);
935
+ extern char *sort_to_s(Sort *self);
906
936
 
907
937
  /***************************************************************************
908
938
  * FieldSortedHitQueue
909
939
  ***************************************************************************/
910
940
 
911
- Hit *fshq_pq_pop(PriorityQueue *pq);
912
- void fshq_pq_down(PriorityQueue *pq);
913
- void fshq_pq_push(PriorityQueue *pq, void *elem);
914
- void fshq_pq_destroy(void *p);
915
- PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
941
+ extern Hit *fshq_pq_pop(PriorityQueue *pq);
942
+ extern void fshq_pq_down(PriorityQueue *pq);
943
+ extern void fshq_pq_insert(PriorityQueue *pq, Hit *hit);
944
+ extern void fshq_pq_destroy(PriorityQueue *pq);
945
+ extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
916
946
 
917
947
  /***************************************************************************
918
948
  *
@@ -921,6 +951,7 @@ PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
921
951
  ***************************************************************************/
922
952
 
923
953
  struct Searcher {
954
+ void *data;
924
955
  IndexReader *ir;
925
956
  Similarity *similarity;
926
957
  bool close_ir : 1;
@@ -930,25 +961,53 @@ struct Searcher {
930
961
  int (*max_doc)(Searcher *self);
931
962
  Weight *(*create_weight)(Searcher *self, Query *query);
932
963
  TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
933
- int num_docs, Filter *filter, Sort *sort);
964
+ int num_docs, Filter *filter, Sort *sort);
965
+ void (*search_each)(Searcher *self, Query *query, Filter *filter,
966
+ void (*fn)(Searcher *, int, float, void *), void *arg);
967
+ void (*search_each_w)(Searcher *self, Weight *weight,
968
+ Filter *filter, void (*fn)(Searcher *, int, float, void *),
969
+ void *arg);
934
970
  Query *(*rewrite)(Searcher *self, Query *original);
935
971
  Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
972
+ Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
936
973
  Similarity *(*get_similarity)(Searcher *self);
937
974
  void (*close)(Searcher *self);
938
975
  };
939
976
 
940
- Searcher *sea_create(IndexReader *ir);
941
- TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
942
- int num_docs, Filter *filter, Sort *sort);
943
- void sea_search_each(Searcher *self, Query *query, Filter *filter,
944
- void (*fn)(Searcher *self, int doc_num, void *arg), void *arg);
945
- Explanation *sea_explain(Searcher *self, Query *query, int doc_num);
946
- Similarity *sea_get_similarity(Searcher *self);
947
- Query *sea_rewrite(Searcher *self, Query *original);
948
- void sea_close(Searcher *self);
949
- Document *sea_get_doc(Searcher *self, int doc_num);
950
- Weight *sea_create_weight(Searcher *self, Query *query);
951
- int sea_doc_freq(Searcher *self, Term *term);
977
+ #define sea_doc_freq(s, t) s->doc_freq(s, t)
978
+ #define sea_doc_freqs(s, t, c) s->doc_freqs(s, t, c)
979
+ #define sea_get_doc(s, dn) s->get_doc(s, dn)
980
+ #define sea_max_doc(s) s->max_doc(s)
981
+ #define sea_search(s, q, fd, nd, filt, sort)\
982
+ s->search(s, q, fd, nd, filt, sort)
983
+ #define sea_search_each(s, q, filt, fn, arg)\
984
+ s->search_each(s, q, filt, fn, arg)
985
+ #define sea_search_each_w(s, q, filt, fn, arg)\
986
+ s->search_each_w(s, q, filt, fn, arg)
987
+ #define sea_rewrite(s, q) s->rewrite(s, q)
988
+ #define sea_explain(s, q, dn) s->explain(s, q, dn)
989
+ #define sea_explain_w(s, q, dn) s->explain_w(s, q, dn)
990
+ #define sea_get_similarity(s) s->get_similarity(s)
991
+ #define sea_close(s) s->close(s)
992
+
993
+ extern Searcher *sea_create(IndexReader *ir);
994
+
995
+ /***************************************************************************
996
+ *
997
+ * MultiSearcher
998
+ *
999
+ ***************************************************************************/
1000
+
1001
+ typedef struct MultiSearcher {
1002
+ int s_cnt;
1003
+ Searcher **searchers;
1004
+ int *starts;
1005
+ int max_doc;
1006
+ bool close_subs : 1;
1007
+ } MultiSearcher;
1008
+
1009
+ extern Searcher *msea_create(Searcher **searchers, int s_cnt,
1010
+ bool close_subs);
952
1011
 
953
1012
  /***************************************************************************
954
1013
  *
@@ -979,10 +1038,11 @@ typedef struct QParser {
979
1038
  Query *result;
980
1039
  } QParser;
981
1040
 
982
- QParser *qp_create(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer);
983
- void qp_destroy(void *p);
984
- Query *qp_parse(QParser *self, char *qstr);
985
- char *qp_clean_str(char *str);
1041
+ extern QParser *qp_create(HashSet *all_fields, HashSet *def_fields,
1042
+ Analyzer *analyzer);
1043
+ extern void qp_destroy(QParser *self);
1044
+ extern Query *qp_parse(QParser *self, char *qstr);
1045
+ extern char *qp_clean_str(char *str);
986
1046
 
987
1047
  /***************************************************************************
988
1048
  *
@@ -1001,8 +1061,6 @@ typedef struct Index {
1001
1061
  HashSet *key;
1002
1062
  char *id_field;
1003
1063
  char *def_field;
1004
- bool close_analyzer : 1;
1005
- bool close_store : 1;
1006
1064
  /* for IndexWriter */
1007
1065
  bool use_compound_file : 1;
1008
1066
  bool auto_flush : 1;
@@ -1010,32 +1068,38 @@ typedef struct Index {
1010
1068
  bool check_latest : 1;
1011
1069
  } Index;
1012
1070
 
1013
- Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
1014
- bool create);
1015
- void index_destroy(Index *self);
1016
- void index_flush(Index *self);
1017
- int index_size(Index *self);
1018
- void index_optimize(Index *self);
1019
- bool index_has_del(Index *self);
1020
- bool index_is_deleted(Index *self, int doc_num);
1021
- void index_add_doc(Index *self, Document *doc);
1022
- void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
1023
- void index_add_string(Index *self, char *str, Analyzer *analyzer);
1024
- void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
1025
- TopDocs *index_search_str(Index *self, char *query, int first_doc,
1071
+ extern Index *index_create(Store *store, Analyzer *analyzer,
1072
+ HashSet *def_fields, bool create);
1073
+ extern void index_destroy(Index *self);
1074
+ extern void index_flush(Index *self);
1075
+ extern int index_size(Index *self);
1076
+ extern void index_optimize(Index *self);
1077
+ extern bool index_has_del(Index *self);
1078
+ extern bool index_is_deleted(Index *self, int doc_num);
1079
+ extern void index_add_doc(Index *self, Document *doc);
1080
+ extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
1081
+ extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
1082
+ extern void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
1083
+ extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
1026
1084
  int num_docs, Filter *filter, Sort *sort);
1027
- Query *index_get_query(Index *self, char *qstr);
1028
- Document *index_get_doc(Index *self, int doc_num);
1029
- Document *index_get_doc_ts(Index *self, int doc_num);
1030
- Document *index_get_doc_id(Index *self, char *id);
1031
- Document *index_get_doc_term(Index *self, Term *term);
1032
- void index_delete(Index *self, int doc_num);
1033
- void index_delete_term(Index *self, Term *term);
1034
- void index_delete_id(Index *self, char *id);
1035
- void index_delete_query(Index *self, Query *q, Filter *f);
1036
- void index_delete_query_str(Index *self, char *qstr, Filter *f);
1037
- int index_term_id(Index *self, Term *term);
1038
- Explanation *index_explain(Index *self, Query *q, int doc_num);
1039
- void index_auto_flush_ir(Index *self);
1040
- void index_auto_flush_iw(Index *self);
1085
+ extern Query *index_get_query(Index *self, char *qstr);
1086
+ extern Document *index_get_doc(Index *self, int doc_num);
1087
+ extern Document *index_get_doc_ts(Index *self, int doc_num);
1088
+ extern Document *index_get_doc_id(Index *self, char *id);
1089
+ extern Document *index_get_doc_term(Index *self, Term *term);
1090
+ extern void index_delete(Index *self, int doc_num);
1091
+ extern void index_delete_term(Index *self, Term *term);
1092
+ extern void index_delete_id(Index *self, char *id);
1093
+ extern void index_delete_query(Index *self, Query *q, Filter *f);
1094
+ extern void index_delete_query_str(Index *self, char *qstr, Filter *f);
1095
+ extern int index_term_id(Index *self, Term *term);
1096
+ extern Explanation *index_explain(Index *self, Query *q, int doc_num);
1097
+ extern void index_auto_flush_ir(Index *self);
1098
+ extern void index_auto_flush_iw(Index *self);
1099
+
1100
+ extern inline void ensure_searcher_open(Index *self);
1101
+ extern inline void ensure_reader_open(Index *self);
1102
+ extern inline void ensure_writer_open(Index *self);
1103
+
1041
1104
  #endif
1105
+