ferret 0.11.6 → 0.11.8.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. data/README +10 -22
  2. data/RELEASE_CHANGES +137 -0
  3. data/RELEASE_NOTES +60 -0
  4. data/Rakefile +379 -274
  5. data/TODO +100 -8
  6. data/bin/ferret-browser +0 -0
  7. data/ext/BZLIB_blocksort.c +1094 -0
  8. data/ext/BZLIB_bzlib.c +1578 -0
  9. data/ext/BZLIB_compress.c +672 -0
  10. data/ext/BZLIB_crctable.c +104 -0
  11. data/ext/BZLIB_decompress.c +626 -0
  12. data/ext/BZLIB_huffman.c +205 -0
  13. data/ext/BZLIB_randtable.c +84 -0
  14. data/ext/{api.c → STEMMER_api.c} +7 -10
  15. data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
  16. data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
  17. data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
  18. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  19. data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
  20. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  21. data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
  22. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  25. data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
  26. data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
  27. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  29. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  30. data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
  31. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  32. data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
  33. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  34. data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
  35. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  36. data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
  37. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  38. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  39. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  40. data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
  41. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  42. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  43. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  44. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  45. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  46. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  47. data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
  48. data/ext/analysis.c +276 -121
  49. data/ext/analysis.h +190 -143
  50. data/ext/api.h +3 -4
  51. data/ext/array.c +5 -3
  52. data/ext/array.h +52 -43
  53. data/ext/bitvector.c +38 -482
  54. data/ext/bitvector.h +446 -124
  55. data/ext/bzlib.h +282 -0
  56. data/ext/bzlib_private.h +503 -0
  57. data/ext/compound_io.c +23 -22
  58. data/ext/config.h +21 -11
  59. data/ext/document.c +43 -40
  60. data/ext/document.h +31 -21
  61. data/ext/except.c +20 -38
  62. data/ext/except.h +89 -76
  63. data/ext/extconf.rb +3 -2
  64. data/ext/ferret.c +49 -35
  65. data/ext/ferret.h +14 -11
  66. data/ext/field_index.c +262 -0
  67. data/ext/field_index.h +52 -0
  68. data/ext/filter.c +11 -10
  69. data/ext/fs_store.c +65 -47
  70. data/ext/global.c +245 -165
  71. data/ext/global.h +252 -54
  72. data/ext/hash.c +200 -243
  73. data/ext/hash.h +205 -163
  74. data/ext/hashset.c +118 -96
  75. data/ext/hashset.h +110 -82
  76. data/ext/header.h +19 -19
  77. data/ext/helper.c +11 -10
  78. data/ext/helper.h +14 -6
  79. data/ext/index.c +745 -366
  80. data/ext/index.h +503 -529
  81. data/ext/internal.h +1020 -0
  82. data/ext/lang.c +10 -0
  83. data/ext/lang.h +35 -15
  84. data/ext/mempool.c +5 -4
  85. data/ext/mempool.h +30 -22
  86. data/ext/modules.h +35 -7
  87. data/ext/multimapper.c +43 -2
  88. data/ext/multimapper.h +32 -23
  89. data/ext/posh.c +0 -0
  90. data/ext/posh.h +4 -38
  91. data/ext/priorityqueue.c +10 -12
  92. data/ext/priorityqueue.h +33 -21
  93. data/ext/q_boolean.c +22 -9
  94. data/ext/q_const_score.c +3 -2
  95. data/ext/q_filtered_query.c +15 -12
  96. data/ext/q_fuzzy.c +147 -135
  97. data/ext/q_match_all.c +3 -2
  98. data/ext/q_multi_term.c +28 -32
  99. data/ext/q_parser.c +451 -173
  100. data/ext/q_phrase.c +158 -79
  101. data/ext/q_prefix.c +16 -18
  102. data/ext/q_range.c +363 -31
  103. data/ext/q_span.c +130 -141
  104. data/ext/q_term.c +21 -21
  105. data/ext/q_wildcard.c +19 -23
  106. data/ext/r_analysis.c +369 -242
  107. data/ext/r_index.c +421 -434
  108. data/ext/r_qparser.c +142 -92
  109. data/ext/r_search.c +790 -407
  110. data/ext/r_store.c +44 -44
  111. data/ext/r_utils.c +264 -96
  112. data/ext/ram_store.c +29 -23
  113. data/ext/scanner.c +895 -0
  114. data/ext/scanner.h +36 -0
  115. data/ext/scanner_mb.c +6701 -0
  116. data/ext/scanner_utf8.c +4415 -0
  117. data/ext/search.c +210 -87
  118. data/ext/search.h +556 -488
  119. data/ext/similarity.c +17 -16
  120. data/ext/similarity.h +51 -44
  121. data/ext/sort.c +157 -354
  122. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  123. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  124. data/ext/stem_UTF_8_hungarian.h +16 -0
  125. data/ext/stem_UTF_8_romanian.h +16 -0
  126. data/ext/stem_UTF_8_turkish.h +16 -0
  127. data/ext/stopwords.c +287 -278
  128. data/ext/store.c +57 -51
  129. data/ext/store.h +308 -286
  130. data/ext/symbol.c +10 -0
  131. data/ext/symbol.h +23 -0
  132. data/ext/term_vectors.c +14 -293
  133. data/ext/threading.h +22 -22
  134. data/ext/win32.h +12 -4
  135. data/lib/ferret.rb +2 -1
  136. data/lib/ferret/browser.rb +1 -1
  137. data/lib/ferret/field_symbol.rb +94 -0
  138. data/lib/ferret/index.rb +221 -34
  139. data/lib/ferret/number_tools.rb +6 -6
  140. data/lib/ferret/version.rb +3 -0
  141. data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
  142. data/test/test_helper.rb +7 -2
  143. data/test/test_installed.rb +1 -0
  144. data/test/threading/thread_safety_index_test.rb +10 -1
  145. data/test/threading/thread_safety_read_write_test.rb +4 -7
  146. data/test/threading/thread_safety_test.rb +0 -0
  147. data/test/unit/analysis/tc_analyzer.rb +29 -27
  148. data/test/unit/analysis/tc_token_stream.rb +23 -16
  149. data/test/unit/index/tc_index.rb +116 -11
  150. data/test/unit/index/tc_index_reader.rb +27 -27
  151. data/test/unit/index/tc_index_writer.rb +10 -0
  152. data/test/unit/index/th_doc.rb +38 -21
  153. data/test/unit/search/tc_filter.rb +31 -10
  154. data/test/unit/search/tc_index_searcher.rb +6 -0
  155. data/test/unit/search/tm_searcher.rb +53 -1
  156. data/test/unit/store/tc_fs_store.rb +40 -2
  157. data/test/unit/store/tc_ram_store.rb +0 -0
  158. data/test/unit/store/tm_store.rb +0 -0
  159. data/test/unit/store/tm_store_lock.rb +7 -6
  160. data/test/unit/tc_field_symbol.rb +26 -0
  161. data/test/unit/ts_analysis.rb +0 -0
  162. data/test/unit/ts_index.rb +0 -0
  163. data/test/unit/ts_store.rb +0 -0
  164. data/test/unit/ts_utils.rb +0 -0
  165. data/test/unit/utils/tc_number_tools.rb +0 -0
  166. data/test/utils/content_generator.rb +226 -0
  167. metadata +262 -221
  168. data/ext/inc/lang.h +0 -48
  169. data/ext/inc/threading.h +0 -31
  170. data/ext/stem_ISO_8859_1_english.c +0 -1156
  171. data/ext/stem_ISO_8859_1_french.c +0 -1276
  172. data/ext/stem_ISO_8859_1_italian.c +0 -1091
  173. data/ext/stem_ISO_8859_1_norwegian.c +0 -296
  174. data/ext/stem_ISO_8859_1_spanish.c +0 -1119
  175. data/ext/stem_ISO_8859_1_swedish.c +0 -307
  176. data/ext/stem_UTF_8_danish.c +0 -344
  177. data/ext/stem_UTF_8_english.c +0 -1176
  178. data/ext/stem_UTF_8_french.c +0 -1296
  179. data/ext/stem_UTF_8_italian.c +0 -1113
  180. data/ext/stem_UTF_8_norwegian.c +0 -302
  181. data/ext/stem_UTF_8_portuguese.c +0 -1055
  182. data/ext/stem_UTF_8_russian.c +0 -709
  183. data/ext/stem_UTF_8_spanish.c +0 -1137
  184. data/ext/stem_UTF_8_swedish.c +0 -313
  185. data/lib/ferret_version.rb +0 -3
@@ -1,5 +1,7 @@
1
1
  #include <string.h>
2
2
  #include "search.h"
3
+ #include "symbol.h"
4
+ #include "internal.h"
3
5
 
4
6
  /****************************************************************************
5
7
  *
@@ -9,23 +11,20 @@
9
11
 
10
12
  #define PfxQ(query) ((PrefixQuery *)(query))
11
13
 
12
- static char *prq_to_s(Query *self, const char *current_field)
14
+ static char *prq_to_s(Query *self, Symbol default_field)
13
15
  {
14
16
  char *buffer, *bptr;
15
17
  const char *prefix = PfxQ(self)->prefix;
16
- const char *field = PfxQ(self)->field;
17
18
  size_t plen = strlen(prefix);
18
- size_t flen = strlen(field);
19
+ size_t flen = sym_len(PfxQ(self)->field);
19
20
 
20
21
  bptr = buffer = ALLOC_N(char, plen + flen + 35);
21
22
 
22
- if (strcmp(field, current_field) != 0) {
23
- sprintf(bptr, "%s:", field);
24
- bptr += flen + 1;
23
+ if (PfxQ(self)->field != default_field) {
24
+ bptr += sprintf(bptr, "%s:", S(PfxQ(self)->field));
25
25
  }
26
26
 
27
- sprintf(bptr, "%s*", prefix);
28
- bptr += plen + 1;
27
+ bptr += sprintf(bptr, "%s*", prefix);
29
28
  if (self->boost != 1.0) {
30
29
  *bptr = '^';
31
30
  dbl_to_s(++bptr, self->boost);
@@ -36,9 +35,9 @@ static char *prq_to_s(Query *self, const char *current_field)
36
35
 
37
36
  static Query *prq_rewrite(Query *self, IndexReader *ir)
38
37
  {
39
- const char *field = PfxQ(self)->field;
40
- const int field_num = fis_get_field_num(ir->fis, field);
41
- Query *volatile q = multi_tq_new_conf(field, MTQMaxTerms(self), 0.0);
38
+ const int field_num = fis_get_field_num(ir->fis, PfxQ(self)->field);
39
+ Query *volatile q = multi_tq_new_conf(PfxQ(self)->field,
40
+ MTQMaxTerms(self), 0.0);
42
41
  q->boost = self->boost; /* set the boost */
43
42
 
44
43
  if (field_num >= 0) {
@@ -48,7 +47,7 @@ static Query *prq_rewrite(Query *self, IndexReader *ir)
48
47
  size_t prefix_len = strlen(prefix);
49
48
 
50
49
  TRY
51
- do {
50
+ do {
52
51
  if (strncmp(term, prefix, prefix_len) != 0) {
53
52
  break;
54
53
  }
@@ -64,27 +63,26 @@ static Query *prq_rewrite(Query *self, IndexReader *ir)
64
63
 
65
64
  static void prq_destroy(Query *self)
66
65
  {
67
- free(PfxQ(self)->field);
68
66
  free(PfxQ(self)->prefix);
69
67
  q_destroy_i(self);
70
68
  }
71
69
 
72
70
  static unsigned long prq_hash(Query *self)
73
71
  {
74
- return str_hash(PfxQ(self)->field) ^ str_hash(PfxQ(self)->prefix);
72
+ return sym_hash(PfxQ(self)->field) ^ str_hash(PfxQ(self)->prefix);
75
73
  }
76
74
 
77
75
  static int prq_eq(Query *self, Query *o)
78
76
  {
79
- return (strcmp(PfxQ(self)->prefix, PfxQ(o)->prefix) == 0)
80
- && (strcmp(PfxQ(self)->field, PfxQ(o)->field) == 0);
77
+ return (strcmp(PfxQ(self)->prefix, PfxQ(o)->prefix) == 0)
78
+ && (PfxQ(self)->field == PfxQ(o)->field);
81
79
  }
82
80
 
83
- Query *prefixq_new(const char *field, const char *prefix)
81
+ Query *prefixq_new(Symbol field, const char *prefix)
84
82
  {
85
83
  Query *self = q_new(PrefixQuery);
86
84
 
87
- PfxQ(self)->field = estrdup(field);
85
+ PfxQ(self)->field = field;
88
86
  PfxQ(self)->prefix = estrdup(prefix);
89
87
  MTQMaxTerms(self) = PREFIX_QUERY_MAX_TERMS;
90
88
 
@@ -1,5 +1,7 @@
1
1
  #include <string.h>
2
2
  #include "search.h"
3
+ #include "symbol.h"
4
+ #include "internal.h"
3
5
 
4
6
  /*****************************************************************************
5
7
  *
@@ -9,26 +11,27 @@
9
11
 
10
12
  typedef struct Range
11
13
  {
12
- char *field;
14
+ Symbol field;
13
15
  char *lower_term;
14
16
  char *upper_term;
15
17
  bool include_lower : 1;
16
18
  bool include_upper : 1;
17
19
  } Range;
18
20
 
19
- static char *range_to_s(Range *range, const char *field, float boost)
21
+ static char *range_to_s(Range *range, Symbol default_field, float boost)
20
22
  {
21
23
  char *buffer, *b;
22
24
  size_t flen, llen, ulen;
25
+ const char *field = S(range->field);
23
26
 
24
- flen = strlen(range->field);
27
+ flen = strlen(field);
25
28
  llen = range->lower_term ? strlen(range->lower_term) : 0;
26
29
  ulen = range->upper_term ? strlen(range->upper_term) : 0;
27
30
  buffer = ALLOC_N(char, flen + llen + ulen + 40);
28
31
  b = buffer;
29
32
 
30
- if (strcmp(field, range->field)) {
31
- memcpy(buffer, range->field, flen * sizeof(char));
33
+ if (default_field != range->field) {
34
+ memcpy(buffer, field, flen * sizeof(char));
32
35
  b += flen;
33
36
  *b = ':';
34
37
  b++;
@@ -68,7 +71,6 @@ static char *range_to_s(Range *range, const char *field, float boost)
68
71
 
69
72
  static void range_destroy(Range *range)
70
73
  {
71
- free(range->field);
72
74
  free(range->lower_term);
73
75
  free(range->upper_term);
74
76
  free(range);
@@ -77,26 +79,26 @@ static void range_destroy(Range *range)
77
79
  static unsigned long range_hash(Range *filt)
78
80
  {
79
81
  return filt->include_lower | (filt->include_upper << 1)
80
- | ((str_hash(filt->field)
82
+ | ((sym_hash(filt->field)
81
83
  ^ (filt->lower_term ? str_hash(filt->lower_term) : 0)
82
84
  ^ (filt->upper_term ? str_hash(filt->upper_term) : 0)) << 2);
83
85
  }
84
86
 
85
- static int str_eq(char *s1, char *s2)
87
+ static int str_eq(const char *s1, const char *s2)
86
88
  {
87
89
  return (s1 && s2 && (strcmp(s1, s2) == 0)) || (s1 == s2);
88
90
  }
89
91
 
90
92
  static int range_eq(Range *filt, Range *o)
91
93
  {
92
- return (str_eq(filt->field, o->field)
94
+ return ((filt->field == o->field)
93
95
  && str_eq(filt->lower_term, o->lower_term)
94
96
  && str_eq(filt->upper_term, o->upper_term)
95
97
  && (filt->include_lower == o->include_lower)
96
98
  && (filt->include_upper == o->include_upper));
97
99
  }
98
100
 
99
- Range *range_new(const char *field, const char *lower_term,
101
+ static Range *range_new(Symbol field, const char *lower_term,
100
102
  const char *upper_term, bool include_lower,
101
103
  bool include_upper)
102
104
  {
@@ -123,7 +125,61 @@ Range *range_new(const char *field, const char *lower_term,
123
125
 
124
126
  range = ALLOC(Range);
125
127
 
126
- range->field = estrdup((char *)field);
128
+ range->field = field;
129
+ range->lower_term = lower_term ? estrdup(lower_term) : NULL;
130
+ range->upper_term = upper_term ? estrdup(upper_term) : NULL;
131
+ range->include_lower = include_lower;
132
+ range->include_upper = include_upper;
133
+ return range;
134
+ }
135
+
136
+ static Range *trange_new(Symbol field, const char *lower_term,
137
+ const char *upper_term, bool include_lower,
138
+ bool include_upper)
139
+ {
140
+ Range *range;
141
+ int len;
142
+ double upper_num, lower_num;
143
+
144
+ if (!lower_term && !upper_term) {
145
+ RAISE(ARG_ERROR, "Nil bounds for range. A range must include either "
146
+ "lower bound or an upper bound");
147
+ }
148
+ if (include_lower && !lower_term) {
149
+ RAISE(ARG_ERROR, "Lower bound must be non-nil to be inclusive. That "
150
+ "is, if you specify :include_lower => true when you create a "
151
+ "range you must include a :lower_term");
152
+ }
153
+ if (include_upper && !upper_term) {
154
+ RAISE(ARG_ERROR, "Upper bound must be non-nil to be inclusive. That "
155
+ "is, if you specify :include_upper => true when you create a "
156
+ "range you must include a :upper_term");
157
+ }
158
+ if (upper_term && lower_term) {
159
+ if ((!lower_term ||
160
+ (sscanf(lower_term, "%lg%n", &lower_num, &len) &&
161
+ (int)strlen(lower_term) == len)) &&
162
+ (!upper_term ||
163
+ (sscanf(upper_term, "%lg%n", &upper_num, &len) &&
164
+ (int)strlen(upper_term) == len)))
165
+ {
166
+ if (upper_num < lower_num) {
167
+ RAISE(ARG_ERROR, "Upper bound must be greater than lower bound."
168
+ " numbers \"%lg\" < \"%lg\"", upper_num, lower_num);
169
+ }
170
+ }
171
+ else {
172
+ if (upper_term && lower_term &&
173
+ (strcmp(upper_term, lower_term) < 0)) {
174
+ RAISE(ARG_ERROR, "Upper bound must be greater than lower bound."
175
+ " \"%s\" < \"%s\"", upper_term, lower_term);
176
+ }
177
+ }
178
+ }
179
+
180
+ range = ALLOC(Range);
181
+
182
+ range->field = field;
127
183
  range->lower_term = lower_term ? estrdup(lower_term) : NULL;
128
184
  range->upper_term = upper_term ? estrdup(upper_term) : NULL;
129
185
  range->include_lower = include_lower;
@@ -153,7 +209,7 @@ static void rfilt_destroy_i(Filter *filt)
153
209
 
154
210
  static char *rfilt_to_s(Filter *filt)
155
211
  {
156
- char *rstr = range_to_s(RF(filt)->range, "", 1.0);
212
+ char *rstr = range_to_s(RF(filt)->range, NULL, 1.0);
157
213
  char *rfstr = strfmt("RangeFilter< %s >", rstr);
158
214
  free(rstr);
159
215
  return rfstr;
@@ -229,7 +285,7 @@ static int rfilt_eq(Filter *filt, Filter *o)
229
285
  return range_eq(RF(filt)->range, RF(o)->range);
230
286
  }
231
287
 
232
- Filter *rfilt_new(const char *field,
288
+ Filter *rfilt_new(Symbol field,
233
289
  const char *lower_term, const char *upper_term,
234
290
  bool include_lower, bool include_upper)
235
291
  {
@@ -245,6 +301,143 @@ Filter *rfilt_new(const char *field,
245
301
  return filt;
246
302
  }
247
303
 
304
+ /***************************************************************************
305
+ *
306
+ * RangeFilter
307
+ *
308
+ ***************************************************************************/
309
+
310
+ static char *trfilt_to_s(Filter *filt)
311
+ {
312
+ char *rstr = range_to_s(RF(filt)->range, NULL, 1.0);
313
+ char *rfstr = strfmt("TypedRangeFilter< %s >", rstr);
314
+ free(rstr);
315
+ return rfstr;
316
+ }
317
+
318
+ typedef enum {
319
+ TRC_NONE = 0x00,
320
+ TRC_LE = 0x01,
321
+ TRC_LT = 0x02,
322
+ TRC_GE = 0x04,
323
+ TRC_GE_LE = 0x05,
324
+ TRC_GE_LT = 0x06,
325
+ TRC_GT = 0x08,
326
+ TRC_GT_LE = 0x09,
327
+ TRC_GT_LT = 0x0a
328
+ } TypedRangeCheck;
329
+
330
+ #define SET_DOCS(cond)\
331
+ do {\
332
+ if (term[0] > '9') break; /* done */\
333
+ sscanf(term, "%lg%n", &num, &len);\
334
+ if (len == te->curr_term_len) { /* We have a number */\
335
+ if (cond) {\
336
+ tde->seek_te(tde, te);\
337
+ while (tde->next(tde)) {\
338
+ bv_set(bv, tde->doc_num(tde));\
339
+ }\
340
+ }\
341
+ }\
342
+ } while (te->next(te))
343
+
344
+
345
+ static BitVector *trfilt_get_bv_i(Filter *filt, IndexReader *ir)
346
+ {
347
+ Range *range = RF(filt)->range;
348
+ double lnum = 0.0, unum = 0.0;
349
+ int len = 0;
350
+ const char *lt = range->lower_term;
351
+ const char *ut = range->upper_term;
352
+ if ((!lt || (sscanf(lt, "%lg%n", &lnum, &len) && (int)strlen(lt) == len)) &&
353
+ (!ut || (sscanf(ut, "%lg%n", &unum, &len) && (int)strlen(ut) == len)))
354
+ {
355
+ BitVector *bv = bv_new_capa(ir->max_doc(ir));
356
+ FieldInfo *fi = fis_get_field(ir->fis, range->field);
357
+ /* the field info exists we need to add docs to the bit vector,
358
+ * otherwise we just return an empty bit vector */
359
+ if (fi) {
360
+ const int field_num = fi->number;
361
+ char *term;
362
+ double num;
363
+ TermEnum* te;
364
+ TermDocEnum *tde;
365
+ TypedRangeCheck check = TRC_NONE;
366
+
367
+ te = ir->terms(ir, field_num);
368
+ if (te->skip_to(te, "+.") == NULL) {
369
+ te->close(te);
370
+ return bv;
371
+ }
372
+
373
+ tde = ir->term_docs(ir);
374
+ term = te->curr_term;
375
+
376
+ if (lt) {
377
+ check = range->include_lower ? TRC_GE : TRC_GT;
378
+ }
379
+ if (ut) {
380
+ check = (TypedRangeCheck)(check | (range->include_upper
381
+ ? TRC_LE
382
+ : TRC_LT));
383
+ }
384
+
385
+ switch(check) {
386
+ case TRC_LE:
387
+ SET_DOCS(num <= unum);
388
+ break;
389
+ case TRC_LT:
390
+ SET_DOCS(num < unum);
391
+ break;
392
+ case TRC_GE:
393
+ SET_DOCS(num >= lnum);
394
+ break;
395
+ case TRC_GE_LE:
396
+ SET_DOCS(num >= lnum && num <= unum);
397
+ break;
398
+ case TRC_GE_LT:
399
+ SET_DOCS(num >= lnum && num < unum);
400
+ break;
401
+ case TRC_GT:
402
+ SET_DOCS(num > lnum);
403
+ break;
404
+ case TRC_GT_LE:
405
+ SET_DOCS(num > lnum && num <= unum);
406
+ break;
407
+ case TRC_GT_LT:
408
+ SET_DOCS(num > lnum && num < unum);
409
+ break;
410
+ case TRC_NONE:
411
+ /* should never happen. Error should have been raised */
412
+ assert(false);
413
+ }
414
+ tde->close(tde);
415
+ te->close(te);
416
+ }
417
+
418
+ return bv;
419
+ }
420
+ else {
421
+ return rfilt_get_bv_i(filt, ir);
422
+ }
423
+ }
424
+
425
+ Filter *trfilt_new(Symbol field,
426
+ const char *lower_term, const char *upper_term,
427
+ bool include_lower, bool include_upper)
428
+ {
429
+ Filter *filt = filt_new(RangeFilter);
430
+ RF(filt)->range = trange_new(field, lower_term, upper_term,
431
+ include_lower, include_upper);
432
+
433
+ filt->get_bv_i = &trfilt_get_bv_i;
434
+ filt->hash = &rfilt_hash;
435
+ filt->eq = &rfilt_eq;
436
+ filt->to_s = &trfilt_to_s;
437
+ filt->destroy_i = &rfilt_destroy_i;
438
+ return filt;
439
+ }
440
+
248
441
  /*****************************************************************************
249
442
  *
250
443
  * RangeQuery
@@ -258,7 +451,7 @@ typedef struct RangeQuery
258
451
  Range *range;
259
452
  } RangeQuery;
260
453
 
261
- static char *rq_to_s(Query *self, const char *field)
454
+ static char *rq_to_s(Query *self, Symbol field)
262
455
  {
263
456
  return range_to_s(RQ(self)->range, field, self->boost);
264
457
  }
@@ -273,23 +466,29 @@ static MatchVector *rq_get_matchv_i(Query *self, MatchVector *mv,
273
466
  TermVector *tv)
274
467
  {
275
468
  Range *range = RQ(((ConstantScoreQuery *)self)->original)->range;
276
- if (strcmp(tv->field, range->field) == 0) {
469
+ if (tv->field == range->field) {
470
+ const int term_cnt = tv->term_cnt;
277
471
  int i, j;
278
472
  char *upper_text = range->upper_term;
279
473
  char *lower_text = range->lower_term;
280
474
  int upper_limit = range->include_upper ? 1 : 0;
281
- int lower_limit = range->include_lower ? 1 : 0;
282
475
 
283
- for (i = tv->term_cnt - 1; i >= 0; i--) {
476
+ i = lower_text ? tv_scan_to_term_index(tv, lower_text) : 0;
477
+ if (i < term_cnt && !range->include_lower && lower_text
478
+ && 0 == strcmp(lower_text, tv->terms[i].text)) {
479
+ i++;
480
+ }
481
+
482
+ for (; i < term_cnt; i++) {
284
483
  TVTerm *tv_term = &(tv->terms[i]);
285
484
  char *text = tv_term->text;
286
- if ((!upper_text || strcmp(text, upper_text) < upper_limit) &&
287
- (!lower_text || strcmp(lower_text, text) < lower_limit)) {
288
-
289
- for (j = 0; j < tv_term->freq; j++) {
290
- int pos = tv_term->positions[j];
291
- matchv_add(mv, pos, pos);
292
- }
485
+ const int tv_term_freq = tv_term->freq;
486
+ if (upper_text && strcmp(text, upper_text) >= upper_limit) {
487
+ break;
488
+ }
489
+ for (j = 0; j < tv_term_freq; j++) {
490
+ int pos = tv_term->positions[j];
491
+ matchv_add(mv, pos, pos);
293
492
  }
294
493
  }
295
494
  }
@@ -319,25 +518,26 @@ static int rq_eq(Query *self, Query *o)
319
518
  return range_eq(RQ(self)->range, RQ(o)->range);
320
519
  }
321
520
 
322
- Query *rq_new_less(const char *field, const char *upper_term,
521
+ Query *rq_new_less(Symbol field, const char *upper_term,
323
522
  bool include_upper)
324
523
  {
325
524
  return rq_new(field, NULL, upper_term, false, include_upper);
326
525
  }
327
526
 
328
- Query *rq_new_more(const char *field, const char *lower_term,
527
+ Query *rq_new_more(Symbol field, const char *lower_term,
329
528
  bool include_lower)
330
529
  {
331
530
  return rq_new(field, lower_term, NULL, include_lower, false);
332
531
  }
333
532
 
334
- Query *rq_new(const char *field, const char *lower_term,
533
+ Query *rq_new(Symbol field, const char *lower_term,
335
534
  const char *upper_term, bool include_lower, bool include_upper)
336
535
  {
337
- Query *self = q_new(RangeQuery);
338
-
339
- RQ(self)->range = range_new(field, lower_term, upper_term,
340
- include_lower, include_upper);
536
+ Query *self;
537
+ Range *range = range_new(field, lower_term, upper_term,
538
+ include_lower, include_upper);
539
+ self = q_new(RangeQuery);
540
+ RQ(self)->range = range;
341
541
 
342
542
  self->type = RANGE_QUERY;
343
543
  self->rewrite = &rq_rewrite;
@@ -348,3 +548,135 @@ Query *rq_new(const char *field, const char *lower_term,
348
548
  self->create_weight_i = &q_create_weight_unsup;
349
549
  return self;
350
550
  }
551
+
552
+ /*****************************************************************************
553
+ *
554
+ * TypedRangeQuery
555
+ *
556
+ *****************************************************************************/
557
+
558
+ #define SET_TERMS(cond)\
559
+ for (i = tv->term_cnt - 1; i >= 0; i--) {\
560
+ TVTerm *tv_term = &(tv->terms[i]);\
561
+ char *text = tv_term->text;\
562
+ double num;\
563
+ sscanf(text, "%lg%n", &num, &len);\
564
+ if ((int)strlen(text) == len) { /* We have a number */\
565
+ if (cond) {\
566
+ const int tv_term_freq = tv_term->freq;\
567
+ for (j = 0; j < tv_term_freq; j++) {\
568
+ int pos = tv_term->positions[j];\
569
+ matchv_add(mv, pos, pos);\
570
+ }\
571
+ }\
572
+ }\
573
+ }\
574
+
575
+ static MatchVector *trq_get_matchv_i(Query *self, MatchVector *mv,
576
+ TermVector *tv)
577
+ {
578
+ Range *range = RQ(((ConstantScoreQuery *)self)->original)->range;
579
+ if (tv->field == range->field) {
580
+ double lnum = 0.0, unum = 0.0;
581
+ int len = 0;
582
+ const char *lt = range->lower_term;
583
+ const char *ut = range->upper_term;
584
+ if ((!lt
585
+ || (sscanf(lt,"%lg%n",&lnum,&len) && (int)strlen(lt) == len))
586
+ &&
587
+ (!ut
588
+ || (sscanf(ut,"%lg%n",&unum,&len) && (int)strlen(ut) == len)))
589
+ {
590
+ TypedRangeCheck check = TRC_NONE;
591
+ int i = 0, j = 0;
592
+
593
+ if (lt) {
594
+ check = range->include_lower ? TRC_GE : TRC_GT;
595
+ }
596
+ if (ut) {
597
+ check = (TypedRangeCheck)(check | (range->include_upper
598
+ ? TRC_LE
599
+ : TRC_LT));
600
+ }
601
+
602
+ switch(check) {
603
+ case TRC_LE:
604
+ SET_TERMS(num <= unum);
605
+ break;
606
+ case TRC_LT:
607
+ SET_TERMS(num < unum);
608
+ break;
609
+ case TRC_GE:
610
+ SET_TERMS(num >= lnum);
611
+ break;
612
+ case TRC_GE_LE:
613
+ SET_TERMS(num >= lnum && num <= unum);
614
+ break;
615
+ case TRC_GE_LT:
616
+ SET_TERMS(num >= lnum && num < unum);
617
+ break;
618
+ case TRC_GT:
619
+ SET_TERMS(num > lnum);
620
+ break;
621
+ case TRC_GT_LE:
622
+ SET_TERMS(num > lnum && num <= unum);
623
+ break;
624
+ case TRC_GT_LT:
625
+ SET_TERMS(num > lnum && num < unum);
626
+ break;
627
+ case TRC_NONE:
628
+ /* should never happen. Error should have been raised */
629
+ assert(false);
630
+ }
631
+
632
+ }
633
+ else {
634
+ return rq_get_matchv_i(self, mv, tv);
635
+ }
636
+ }
637
+ return mv;
638
+ }
639
+
640
+ static Query *trq_rewrite(Query *self, IndexReader *ir)
641
+ {
642
+ Query *csq;
643
+ Range *r = RQ(self)->range;
644
+ Filter *filter = trfilt_new(r->field, r->lower_term, r->upper_term,
645
+ r->include_lower, r->include_upper);
646
+ (void)ir;
647
+ csq = csq_new_nr(filter);
648
+ ((ConstantScoreQuery *)csq)->original = self;
649
+ csq->get_matchv_i = &trq_get_matchv_i;
650
+ return (Query *)csq;
651
+ }
652
+
653
+ Query *trq_new_less(Symbol field, const char *upper_term,
654
+ bool include_upper)
655
+ {
656
+ return trq_new(field, NULL, upper_term, false, include_upper);
657
+ }
658
+
659
+ Query *trq_new_more(Symbol field, const char *lower_term,
660
+ bool include_lower)
661
+ {
662
+ return trq_new(field, lower_term, NULL, include_lower, false);
663
+ }
664
+
665
+ Query *trq_new(Symbol field, const char *lower_term,
666
+ const char *upper_term, bool include_lower, bool include_upper)
667
+ {
668
+ Query *self;
669
+ Range *range = trange_new(field, lower_term, upper_term,
670
+ include_lower, include_upper);
671
+ self = q_new(RangeQuery);
672
+ RQ(self)->range = range;
673
+
674
+ self->type = TYPED_RANGE_QUERY;
675
+ self->rewrite = &trq_rewrite;
676
+ self->to_s = &rq_to_s;
677
+ self->hash = &rq_hash;
678
+ self->eq = &rq_eq;
679
+ self->destroy_i = &rq_destroy;
680
+ self->create_weight_i = &q_create_weight_unsup;
681
+ return self;
682
+ }