ferret 0.10.2 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +31 -36
- data/ext/analysis.c +97 -37
- data/ext/analysis.h +11 -0
- data/ext/ferret.c +10 -0
- data/ext/ferret.h +2 -0
- data/ext/inc/lang.h +1 -0
- data/ext/index.c +2 -2
- data/ext/lang.h +1 -0
- data/ext/q_parser.c +25 -5
- data/ext/r_analysis.c +97 -53
- data/ext/r_index.c +0 -1
- data/ext/r_search.c +1 -1
- data/ext/search.c +7 -3
- data/ext/term_vectors.c +1 -1
- data/lib/ferret/index.rb +94 -48
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_analyzer.rb +24 -8
- data/test/unit/analysis/tc_token_stream.rb +7 -0
- data/test/unit/index/tc_index.rb +2 -2
- data/test/unit/query_parser/tc_query_parser.rb +3 -3
- metadata +12 -7
- data/ext/tags +0 -7841
data/Rakefile
CHANGED
@@ -121,7 +121,17 @@ file "ext/#{EXT}" => ["ext/Makefile"] do
|
|
121
121
|
cp "ext/inc/threading.h", "ext/threading.h"
|
122
122
|
cd "ext"
|
123
123
|
if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
|
124
|
-
|
124
|
+
begin
|
125
|
+
sh "nmake"
|
126
|
+
rescue Exception => e
|
127
|
+
puts
|
128
|
+
puts "**********************************************************************"
|
129
|
+
puts "You may need to call VCVARS32.BAT to set the environment variables."
|
130
|
+
puts ' c:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT'
|
131
|
+
puts "**********************************************************************"
|
132
|
+
puts
|
133
|
+
raise e
|
134
|
+
end
|
125
135
|
else
|
126
136
|
sh "make"
|
127
137
|
end
|
@@ -132,6 +142,7 @@ file "ext/lang.h" => ["ext/inc/lang.h"] do
|
|
132
142
|
rm_f "ext/lang.h"
|
133
143
|
cp "ext/inc/lang.h", "ext/lang.h"
|
134
144
|
end
|
145
|
+
|
135
146
|
file "ext/threading.h" => ["ext/inc/threading.h"] do
|
136
147
|
rm_f "ext/threading.h"
|
137
148
|
cp "ext/inc/threading.h", "ext/threading.h"
|
@@ -158,7 +169,7 @@ end
|
|
158
169
|
PKG_FILES = FileList[
|
159
170
|
'setup.rb',
|
160
171
|
'[-A-Z]*',
|
161
|
-
'ext
|
172
|
+
'ext/**/*.[ch]',
|
162
173
|
'lib/**/*.rb',
|
163
174
|
'test/**/*.rb',
|
164
175
|
'test/**/wordfile',
|
@@ -176,7 +187,6 @@ else
|
|
176
187
|
spec = Gem::Specification.new do |s|
|
177
188
|
|
178
189
|
#### Basic information.
|
179
|
-
|
180
190
|
s.name = 'ferret'
|
181
191
|
s.version = PKG_VERSION
|
182
192
|
s.summary = "Ruby indexing library."
|
@@ -186,29 +196,17 @@ else
|
|
186
196
|
EOF
|
187
197
|
|
188
198
|
#### Dependencies and requirements.
|
189
|
-
|
190
|
-
#s.add_dependency('log4r', '> 1.0.4')
|
191
|
-
#s.requirements << ""
|
192
|
-
|
193
|
-
#### Which files are to be included in this gem? Everything! (Except CVS directories.)
|
194
|
-
|
199
|
+
s.add_dependency('rake')
|
195
200
|
s.files = PKG_FILES.to_a
|
196
|
-
|
197
|
-
#### C code extensions.
|
198
|
-
|
199
201
|
s.extensions << "ext/extconf.rb"
|
200
|
-
|
201
|
-
#### Load-time details: library and application (you will need one or both).
|
202
|
-
|
203
|
-
s.require_path = 'lib' # Use these for libraries.
|
202
|
+
s.require_path = 'lib'
|
204
203
|
s.autorequire = 'ferret'
|
205
204
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
#### Documentation and testing.
|
205
|
+
#### Author and project details.
|
206
|
+
s.author = "David Balmain"
|
207
|
+
s.email = "dbalmain@gmail.com"
|
208
|
+
s.homepage = "http://ferret.davebalmain.com/trac"
|
209
|
+
s.rubyforge_project = "ferret"
|
212
210
|
|
213
211
|
s.has_rdoc = true
|
214
212
|
s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
|
@@ -217,21 +215,18 @@ else
|
|
217
215
|
'--main' << 'README' << '--line-numbers' <<
|
218
216
|
'TUTORIAL' << 'TODO'
|
219
217
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
s.rubyforge_project = "ferret"
|
226
|
-
# if ENV['CERT_DIR']
|
227
|
-
# s.signing_key = File.join(ENV['CERT_DIR'], 'gem-private_key.pem')
|
228
|
-
# s.cert_chain = [File.join(ENV['CERT_DIR'], 'gem-public_cert.pem')]
|
229
|
-
# end
|
218
|
+
if RUBY_PLATFORM =~ /mswin/
|
219
|
+
s.files = PKG_FILES.to_a + ["ext/#{EXT}"]
|
220
|
+
s.extensions.clear
|
221
|
+
s.platform = Gem::Platform::WIN32
|
222
|
+
end
|
230
223
|
end
|
231
224
|
|
232
225
|
package_task = Rake::GemPackageTask.new(spec) do |pkg|
|
233
|
-
|
234
|
-
|
226
|
+
unless RUBY_PLATFORM =~ /mswin/
|
227
|
+
pkg.need_zip = true
|
228
|
+
pkg.need_tar = true
|
229
|
+
end
|
235
230
|
end
|
236
231
|
end
|
237
232
|
|
@@ -309,11 +304,11 @@ task :update_version => [:prerelease] do
|
|
309
304
|
announce "No version change ... skipping version update"
|
310
305
|
else
|
311
306
|
announce "Updating Ferret version to #{PKG_VERSION}"
|
312
|
-
reversion("lib/
|
307
|
+
reversion("lib/ferret_version.rb")
|
313
308
|
if ENV['RELTEST']
|
314
309
|
announce "Release Task Testing, skipping commiting of new version"
|
315
310
|
else
|
316
|
-
sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/
|
311
|
+
sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/ferret_version.rb}
|
317
312
|
end
|
318
313
|
end
|
319
314
|
end
|
data/ext/analysis.c
CHANGED
@@ -55,7 +55,8 @@ __inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
|
|
55
55
|
int tk_eq(Token *tk1, Token *tk2)
|
56
56
|
{
|
57
57
|
return (strcmp((char *)tk1->text, (char *)tk2->text) == 0 &&
|
58
|
-
tk1->start == tk2->start && tk1->end == tk2->end
|
58
|
+
tk1->start == tk2->start && tk1->end == tk2->end &&
|
59
|
+
tk1->pos_inc == tk2->pos_inc);
|
59
60
|
}
|
60
61
|
|
61
62
|
int tk_cmp(Token *tk1, Token *tk2)
|
@@ -724,7 +725,7 @@ static int std_get_url(char *input, char *token, int i)
|
|
724
725
|
{
|
725
726
|
while (isurlc(input[i])) {
|
726
727
|
if (isurlpunc(input[i]) && isurlpunc(input[i - 1])) {
|
727
|
-
break;
|
728
|
+
break; /* can't have two puncs in a row */
|
728
729
|
}
|
729
730
|
if (i < MAX_WORD_SIZE) {
|
730
731
|
token[i] = input[i];
|
@@ -1061,18 +1062,18 @@ static TokenStream *sf_clone_i(TokenStream *orig_ts)
|
|
1061
1062
|
|
1062
1063
|
static Token *sf_next(TokenStream *ts)
|
1063
1064
|
{
|
1064
|
-
int pos_inc =
|
1065
|
+
int pos_inc = 0;
|
1065
1066
|
HashTable *words = StopFilt(ts)->words;
|
1066
1067
|
TokenFilter *tf = TkFilt(ts);
|
1067
1068
|
Token *tk = tf->sub_ts->next(tf->sub_ts);
|
1068
1069
|
|
1069
1070
|
while ((tk != NULL) && (h_get(words, tk->text) != NULL)) {
|
1071
|
+
pos_inc += tk->pos_inc;
|
1070
1072
|
tk = tf->sub_ts->next(tf->sub_ts);
|
1071
|
-
pos_inc++;
|
1072
1073
|
}
|
1073
1074
|
|
1074
1075
|
if (tk != NULL) {
|
1075
|
-
tk->pos_inc
|
1076
|
+
tk->pos_inc += pos_inc;
|
1076
1077
|
}
|
1077
1078
|
|
1078
1079
|
return tk;
|
@@ -1122,6 +1123,85 @@ TokenStream *stop_filter_new(TokenStream *ts)
|
|
1122
1123
|
return stop_filter_new_with_words(ts, FULL_ENGLISH_STOP_WORDS);
|
1123
1124
|
}
|
1124
1125
|
|
1126
|
+
/****************************************************************************
|
1127
|
+
* HyphenFilter
|
1128
|
+
****************************************************************************/
|
1129
|
+
|
1130
|
+
#define HyphenFilt(filter) ((HyphenFilter *)(filter))
|
1131
|
+
|
1132
|
+
static TokenStream *hf_clone_i(TokenStream *orig_ts)
|
1133
|
+
{
|
1134
|
+
TokenStream *new_ts = filter_clone_size(orig_ts, sizeof(HyphenFilter));
|
1135
|
+
return new_ts;
|
1136
|
+
}
|
1137
|
+
|
1138
|
+
static Token *hf_next(TokenStream *ts)
|
1139
|
+
{
|
1140
|
+
HyphenFilter *hf = HyphenFilt(ts);
|
1141
|
+
TokenFilter *tf = TkFilt(ts);
|
1142
|
+
Token *tk = hf->tk;
|
1143
|
+
|
1144
|
+
if (hf->pos < hf->len) {
|
1145
|
+
const int pos = hf->pos;
|
1146
|
+
const int text_len = strlen(hf->text + pos);
|
1147
|
+
strcpy(tk->text, hf->text + pos);
|
1148
|
+
tk->pos_inc = ((pos != 0) ? 1 : 0);
|
1149
|
+
tk->start = hf->start + pos;
|
1150
|
+
tk->end = tk->start + text_len;
|
1151
|
+
hf->pos += text_len + 1;
|
1152
|
+
tk->len = text_len;
|
1153
|
+
return tk;
|
1154
|
+
}
|
1155
|
+
else {
|
1156
|
+
char *p;
|
1157
|
+
bool seen_hyphen = false;
|
1158
|
+
bool seen_other_punc = false;
|
1159
|
+
hf->tk = tk = tf->sub_ts->next(tf->sub_ts);
|
1160
|
+
if (NULL == tk) return NULL;
|
1161
|
+
p = tk->text + 1;
|
1162
|
+
while (*p) {
|
1163
|
+
if (*p == '-') {
|
1164
|
+
seen_hyphen = true;
|
1165
|
+
}
|
1166
|
+
else if (!isalpha(*p)) {
|
1167
|
+
seen_other_punc = true;
|
1168
|
+
break;
|
1169
|
+
}
|
1170
|
+
p++;
|
1171
|
+
}
|
1172
|
+
if (seen_hyphen && !seen_other_punc) {
|
1173
|
+
char *q = hf->text;
|
1174
|
+
char *r = tk->text;
|
1175
|
+
p = tk->text;
|
1176
|
+
while (*p) {
|
1177
|
+
if (*p == '-') {
|
1178
|
+
*q = '\0';
|
1179
|
+
}
|
1180
|
+
else {
|
1181
|
+
*r = *q = *p;
|
1182
|
+
r++;
|
1183
|
+
}
|
1184
|
+
q++;
|
1185
|
+
p++;
|
1186
|
+
}
|
1187
|
+
*r = *q = '\0';
|
1188
|
+
hf->start = tk->start;
|
1189
|
+
hf->pos = 0;
|
1190
|
+
hf->len = q - hf->text;
|
1191
|
+
tk->len = r - tk->text;
|
1192
|
+
}
|
1193
|
+
}
|
1194
|
+
return tk;
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
TokenStream *hyphen_filter_new(TokenStream *sub_ts)
|
1198
|
+
{
|
1199
|
+
TokenStream *ts = tf_new(HyphenFilter, sub_ts);
|
1200
|
+
ts->next = &hf_next;
|
1201
|
+
ts->clone_i = &hf_clone_i;
|
1202
|
+
return ts;
|
1203
|
+
}
|
1204
|
+
|
1125
1205
|
/****************************************************************************
|
1126
1206
|
* LowerCaseFilter
|
1127
1207
|
****************************************************************************/
|
@@ -1257,64 +1337,44 @@ TokenStream *stem_filter_new(TokenStream *ts, const char *algorithm,
|
|
1257
1337
|
Analyzer *standard_analyzer_new_with_words_len(const char **words, int len,
|
1258
1338
|
bool lowercase)
|
1259
1339
|
{
|
1260
|
-
TokenStream *ts;
|
1340
|
+
TokenStream *ts = standard_tokenizer_new();
|
1261
1341
|
if (lowercase) {
|
1262
|
-
ts =
|
1263
|
-
(standard_tokenizer_new()),
|
1264
|
-
words, len);
|
1265
|
-
}
|
1266
|
-
else {
|
1267
|
-
ts = stop_filter_new_with_words_len(standard_tokenizer_new(),
|
1268
|
-
words, len);
|
1342
|
+
ts = lowercase_filter_new(ts);
|
1269
1343
|
}
|
1344
|
+
ts = hyphen_filter_new(stop_filter_new_with_words_len(ts, words, len));
|
1270
1345
|
return analyzer_new(ts, NULL, NULL);
|
1271
1346
|
}
|
1272
1347
|
|
1273
1348
|
Analyzer *standard_analyzer_new_with_words(const char **words,
|
1274
1349
|
bool lowercase)
|
1275
1350
|
{
|
1276
|
-
TokenStream *ts;
|
1351
|
+
TokenStream *ts = standard_tokenizer_new();
|
1277
1352
|
if (lowercase) {
|
1278
|
-
ts =
|
1279
|
-
(standard_tokenizer_new()),
|
1280
|
-
words);
|
1281
|
-
}
|
1282
|
-
else {
|
1283
|
-
ts = stop_filter_new_with_words(standard_tokenizer_new(),
|
1284
|
-
words);
|
1353
|
+
ts = lowercase_filter_new(ts);
|
1285
1354
|
}
|
1355
|
+
ts = hyphen_filter_new(stop_filter_new_with_words(ts, words));
|
1286
1356
|
return analyzer_new(ts, NULL, NULL);
|
1287
1357
|
}
|
1288
1358
|
|
1289
1359
|
Analyzer *mb_standard_analyzer_new_with_words_len(const char **words,
|
1290
1360
|
int len, bool lowercase)
|
1291
1361
|
{
|
1292
|
-
TokenStream *ts;
|
1362
|
+
TokenStream *ts = mb_standard_tokenizer_new();
|
1293
1363
|
if (lowercase) {
|
1294
|
-
ts =
|
1295
|
-
(mb_standard_tokenizer_new
|
1296
|
-
()), words, len);
|
1297
|
-
}
|
1298
|
-
else {
|
1299
|
-
ts = stop_filter_new_with_words_len(mb_standard_tokenizer_new(),
|
1300
|
-
words, len);
|
1364
|
+
ts = mb_lowercase_filter_new(ts);
|
1301
1365
|
}
|
1366
|
+
ts = hyphen_filter_new(stop_filter_new_with_words_len(ts, words, len));
|
1302
1367
|
return analyzer_new(ts, NULL, NULL);
|
1303
1368
|
}
|
1304
1369
|
|
1305
1370
|
Analyzer *mb_standard_analyzer_new_with_words(const char **words,
|
1306
1371
|
bool lowercase)
|
1307
1372
|
{
|
1308
|
-
TokenStream *ts;
|
1373
|
+
TokenStream *ts = mb_standard_tokenizer_new();
|
1309
1374
|
if (lowercase) {
|
1310
|
-
ts =
|
1311
|
-
(mb_standard_tokenizer_new()),
|
1312
|
-
words);
|
1313
|
-
}
|
1314
|
-
else {
|
1315
|
-
ts = stop_filter_new_with_words(mb_standard_tokenizer_new(),
|
1316
|
-
words);
|
1375
|
+
ts = mb_lowercase_filter_new(ts);
|
1317
1376
|
}
|
1377
|
+
ts = hyphen_filter_new(stop_filter_new_with_words(ts, words));
|
1318
1378
|
return analyzer_new(ts, NULL, NULL);
|
1319
1379
|
}
|
1320
1380
|
|
data/ext/analysis.h
CHANGED
@@ -89,6 +89,16 @@ typedef struct StopFilter
|
|
89
89
|
HashTable *words;
|
90
90
|
} StopFilter;
|
91
91
|
|
92
|
+
typedef struct HyphenFilter
|
93
|
+
{
|
94
|
+
TokenFilter super;
|
95
|
+
char text[MAX_WORD_SIZE];
|
96
|
+
int start;
|
97
|
+
int pos;
|
98
|
+
int len;
|
99
|
+
Token *tk;
|
100
|
+
} HyphenFilter;
|
101
|
+
|
92
102
|
typedef struct StemFilter
|
93
103
|
{
|
94
104
|
TokenFilter super;
|
@@ -111,6 +121,7 @@ extern TokenStream *mb_letter_tokenizer_new(bool lowercase);
|
|
111
121
|
extern TokenStream *standard_tokenizer_new();
|
112
122
|
extern TokenStream *mb_standard_tokenizer_new();
|
113
123
|
|
124
|
+
extern TokenStream *hyphen_filter_new(TokenStream *ts);
|
114
125
|
extern TokenStream *lowercase_filter_new(TokenStream *ts);
|
115
126
|
extern TokenStream *mb_lowercase_filter_new(TokenStream *ts);
|
116
127
|
|
data/ext/ferret.c
CHANGED
@@ -16,6 +16,7 @@ ID id_lt;
|
|
16
16
|
ID id_call;
|
17
17
|
ID id_is_directory;
|
18
18
|
ID id_close;
|
19
|
+
ID id_cclass;
|
19
20
|
ID id_data;
|
20
21
|
|
21
22
|
static ID id_mkdir_p;
|
@@ -97,6 +98,13 @@ VALUE frt_data_alloc(VALUE klass)
|
|
97
98
|
return Frt_Make_Struct(klass);
|
98
99
|
}
|
99
100
|
|
101
|
+
VALUE frt_define_class_under(VALUE module, char *name, VALUE super)
|
102
|
+
{
|
103
|
+
VALUE klass = rb_define_class_under(module, name, super);
|
104
|
+
rb_ivar_set(klass, id_cclass, Qtrue);
|
105
|
+
return klass;
|
106
|
+
}
|
107
|
+
|
100
108
|
void frt_deref_free(void *p)
|
101
109
|
{
|
102
110
|
object_del(p);
|
@@ -255,6 +263,8 @@ void Init_ferret_ext(void)
|
|
255
263
|
id_is_directory = rb_intern("directory?");
|
256
264
|
id_close = rb_intern("close");
|
257
265
|
|
266
|
+
id_cclass = rb_intern("cclass");
|
267
|
+
|
258
268
|
id_data = rb_intern("@data");
|
259
269
|
|
260
270
|
/* Symbols */
|
data/ext/ferret.h
CHANGED
@@ -13,6 +13,7 @@ extern ID id_lt;
|
|
13
13
|
extern ID id_call;
|
14
14
|
extern ID id_is_directory;
|
15
15
|
extern ID id_close;
|
16
|
+
extern ID id_cclass;
|
16
17
|
extern ID id_data;
|
17
18
|
|
18
19
|
/* Symbols */
|
@@ -60,6 +61,7 @@ extern void frt_create_dir(VALUE rpath);
|
|
60
61
|
extern VALUE frt_hs_to_rb_ary(HashSet *hs);
|
61
62
|
extern void *frt_rb_data_ptr(VALUE val);
|
62
63
|
extern char * frt_field(VALUE rfield);
|
64
|
+
extern VALUE frt_define_class_under(VALUE module, char *name, VALUE super);
|
63
65
|
|
64
66
|
#define Frt_Make_Struct(klass)\
|
65
67
|
rb_data_object_alloc(klass,NULL,(RUBY_DATA_FUNC)NULL,(RUBY_DATA_FUNC)NULL)
|
data/ext/inc/lang.h
CHANGED
data/ext/index.c
CHANGED
@@ -722,8 +722,8 @@ void lazy_df_get_bytes(LazyDocField *self, char *buf, int start, int len)
|
|
722
722
|
RAISE(IO_ERROR, "start out of range in LazyDocField#get_bytes. %d "
|
723
723
|
"is not between 0 and %d", start, self->len);
|
724
724
|
}
|
725
|
-
if (len
|
726
|
-
RAISE(IO_ERROR, "len %d should be greater than 0", len);
|
725
|
+
if (len <= 0) {
|
726
|
+
RAISE(IO_ERROR, "len = %d, but should be greater than 0", len);
|
727
727
|
}
|
728
728
|
if (start + len > self->len) {
|
729
729
|
RAISE(IO_ERROR, "Tried to read past end of field. Field is only %d "
|
data/ext/lang.h
CHANGED
data/ext/q_parser.c
CHANGED
@@ -1984,7 +1984,14 @@ static Query *get_term_q(QParser *qp, char *field, char *word)
|
|
1984
1984
|
q->destroy_i(q);
|
1985
1985
|
q = phq;
|
1986
1986
|
do {
|
1987
|
-
|
1987
|
+
if (token->pos_inc) {
|
1988
|
+
phq_add_term(q, token->text, token->pos_inc);
|
1989
|
+
/* add some slop since single term was expected */
|
1990
|
+
((PhraseQuery *)q)->slop++;
|
1991
|
+
}
|
1992
|
+
else {
|
1993
|
+
phq_append_multi_term(q, token->text);
|
1994
|
+
}
|
1988
1995
|
} while ((token = ts_next(stream)) != NULL);
|
1989
1996
|
}
|
1990
1997
|
}
|
@@ -2157,7 +2164,7 @@ static Phrase *ph_add_multi_word(Phrase *self, char *word)
|
|
2157
2164
|
}
|
2158
2165
|
|
2159
2166
|
static Query *get_phrase_query(QParser *qp, char *field,
|
2160
|
-
|
2167
|
+
Phrase *phrase, char *slop_str)
|
2161
2168
|
{
|
2162
2169
|
const int pos_cnt = phrase->size;
|
2163
2170
|
Query *q = NULL;
|
@@ -2180,6 +2187,7 @@ static Query *get_phrase_query(QParser *qp, char *field,
|
|
2180
2187
|
Token *token;
|
2181
2188
|
TokenStream *stream;
|
2182
2189
|
int i, j;
|
2190
|
+
int pos_inc = 0;
|
2183
2191
|
q = phq_new(field);
|
2184
2192
|
if (slop_str) {
|
2185
2193
|
int slop;
|
@@ -2188,14 +2196,24 @@ static Query *get_phrase_query(QParser *qp, char *field,
|
|
2188
2196
|
}
|
2189
2197
|
|
2190
2198
|
for (i = 0; i < pos_cnt; i++) {
|
2191
|
-
int pos_inc = phrase->positions[i].pos; /* Actually holds pos_inc */
|
2192
2199
|
char **words = phrase->positions[i].terms;
|
2193
2200
|
const int word_count = ary_size(words);
|
2201
|
+
if (pos_inc) {
|
2202
|
+
((PhraseQuery *)q)->slop++;
|
2203
|
+
}
|
2204
|
+
pos_inc += phrase->positions[i].pos + 1; /* Actually holds pos_inc*/
|
2194
2205
|
|
2195
2206
|
if (word_count == 1) {
|
2196
2207
|
stream = get_cached_ts(qp, field, words[0]);
|
2197
2208
|
while ((token = ts_next(stream))) {
|
2198
|
-
|
2209
|
+
if (token->pos_inc) {
|
2210
|
+
phq_add_term(q, token->text,
|
2211
|
+
pos_inc ? pos_inc : token->pos_inc);
|
2212
|
+
}
|
2213
|
+
else {
|
2214
|
+
phq_append_multi_term(q, token->text);
|
2215
|
+
((PhraseQuery *)q)->slop++;
|
2216
|
+
}
|
2199
2217
|
pos_inc = 0;
|
2200
2218
|
}
|
2201
2219
|
}
|
@@ -2206,8 +2224,10 @@ static Query *get_phrase_query(QParser *qp, char *field,
|
|
2206
2224
|
stream = get_cached_ts(qp, field, words[j]);
|
2207
2225
|
if ((token = ts_next(stream))) {
|
2208
2226
|
if (!added_position) {
|
2209
|
-
phq_add_term(q, token->text,
|
2227
|
+
phq_add_term(q, token->text,
|
2228
|
+
pos_inc ? pos_inc : token->pos_inc);
|
2210
2229
|
added_position = true;
|
2230
|
+
pos_inc = 0;
|
2211
2231
|
}
|
2212
2232
|
else {
|
2213
2233
|
phq_append_multi_term(q, token->text);
|