ferret 0.10.6 → 0.10.7
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/analysis.c +136 -107
- data/ext/analysis.h +4 -0
- data/ext/bitvector.c +2 -2
- data/ext/bitvector.h +1 -1
- data/ext/compound_io.c +4 -4
- data/ext/defines.h +0 -2
- data/ext/filter.c +3 -3
- data/ext/fs_store.c +4 -4
- data/ext/hash.c +29 -18
- data/ext/hash.h +34 -16
- data/ext/hashset.c +6 -3
- data/ext/hashset.h +1 -1
- data/ext/index.c +22 -20
- data/ext/q_boolean.c +3 -3
- data/ext/q_const_score.c +1 -1
- data/ext/q_fuzzy.c +1 -1
- data/ext/q_match_all.c +1 -1
- data/ext/q_multi_term.c +2 -2
- data/ext/q_parser.c +21 -6
- data/ext/q_phrase.c +2 -2
- data/ext/q_prefix.c +1 -1
- data/ext/q_range.c +3 -3
- data/ext/q_span.c +8 -8
- data/ext/q_term.c +1 -1
- data/ext/q_wildcard.c +1 -1
- data/ext/r_analysis.c +10 -4
- data/ext/r_index.c +89 -12
- data/ext/r_qparser.c +67 -4
- data/ext/r_search.c +11 -1
- data/ext/r_store.c +51 -35
- data/ext/ram_store.c +18 -18
- data/ext/search.c +1 -1
- data/ext/search.h +25 -23
- data/ext/similarity.c +1 -1
- data/ext/sort.c +1 -1
- data/ext/store.c +22 -3
- data/ext/store.h +8 -2
- data/lib/ferret/index.rb +14 -4
- data/lib/ferret_version.rb +1 -1
- data/test/test_helper.rb +3 -0
- data/test/unit/analysis/tc_analyzer.rb +5 -5
- data/test/unit/analysis/tc_token_stream.rb +3 -3
- data/test/unit/index/tc_index_writer.rb +1 -1
- data/test/unit/query_parser/tc_query_parser.rb +7 -5
- data/test/unit/search/tc_filter.rb +1 -1
- data/test/unit/search/tc_fuzzy_query.rb +1 -1
- data/test/unit/search/tc_index_searcher.rb +1 -1
- data/test/unit/search/tc_multi_searcher.rb +1 -1
- data/test/unit/search/tc_search_and_sort.rb +1 -1
- data/test/unit/search/tc_spans.rb +1 -1
- metadata +4 -3
data/ext/q_phrase.c
CHANGED
@@ -1015,11 +1015,11 @@ static Query *phq_rewrite(Query *self, IndexReader *ir)
|
|
1015
1015
|
}
|
1016
1016
|
}
|
1017
1017
|
|
1018
|
-
static
|
1018
|
+
static unsigned long phq_hash(Query *self)
|
1019
1019
|
{
|
1020
1020
|
int i, j;
|
1021
1021
|
PhraseQuery *phq = PhQ(self);
|
1022
|
-
|
1022
|
+
unsigned long hash = str_hash(phq->field);
|
1023
1023
|
for (i = 0; i < phq->pos_cnt; i++) {
|
1024
1024
|
char **terms = phq->positions[i].terms;
|
1025
1025
|
for (j = ary_size(terms) - 1; j >= 0; j--) {
|
data/ext/q_prefix.c
CHANGED
data/ext/q_range.c
CHANGED
@@ -74,7 +74,7 @@ static void range_destroy(Range *range)
|
|
74
74
|
free(range);
|
75
75
|
}
|
76
76
|
|
77
|
-
static
|
77
|
+
static unsigned long range_hash(Range *filt)
|
78
78
|
{
|
79
79
|
return filt->include_lower | (filt->include_upper << 1)
|
80
80
|
| ((str_hash(filt->field)
|
@@ -219,7 +219,7 @@ static BitVector *rfilt_get_bv_i(Filter *filt, IndexReader *ir)
|
|
219
219
|
return bv;
|
220
220
|
}
|
221
221
|
|
222
|
-
static
|
222
|
+
static unsigned long rfilt_hash(Filter *filt)
|
223
223
|
{
|
224
224
|
return range_hash(RF(filt)->range);
|
225
225
|
}
|
@@ -278,7 +278,7 @@ static Query *rq_rewrite(Query *self, IndexReader *ir)
|
|
278
278
|
return csq_new_nr(filter);
|
279
279
|
}
|
280
280
|
|
281
|
-
static
|
281
|
+
static unsigned long rq_hash(Query *self)
|
282
282
|
{
|
283
283
|
return range_hash(RQ(self)->range);
|
284
284
|
}
|
data/ext/q_span.c
CHANGED
@@ -17,7 +17,7 @@
|
|
17
17
|
|
18
18
|
#define SpQ(query) ((SpanQuery *)(query))
|
19
19
|
|
20
|
-
static
|
20
|
+
static unsigned long spanq_hash(Query *self)
|
21
21
|
{
|
22
22
|
return str_hash(SpQ(self)->field);
|
23
23
|
}
|
@@ -1355,7 +1355,7 @@ static HashSet *spantq_get_terms(Query *self)
|
|
1355
1355
|
return terms;
|
1356
1356
|
}
|
1357
1357
|
|
1358
|
-
static
|
1358
|
+
static unsigned long spantq_hash(Query *self)
|
1359
1359
|
{
|
1360
1360
|
return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
|
1361
1361
|
}
|
@@ -1430,7 +1430,7 @@ static void spanfq_destroy_i(Query *self)
|
|
1430
1430
|
spanq_destroy_i(self);
|
1431
1431
|
}
|
1432
1432
|
|
1433
|
-
static
|
1433
|
+
static unsigned long spanfq_hash(Query *self)
|
1434
1434
|
{
|
1435
1435
|
return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
|
1436
1436
|
^ SpFQ(self)->end;
|
@@ -1573,10 +1573,10 @@ static void spanoq_destroy_i(Query *self)
|
|
1573
1573
|
spanq_destroy_i(self);
|
1574
1574
|
}
|
1575
1575
|
|
1576
|
-
static
|
1576
|
+
static unsigned long spanoq_hash(Query *self)
|
1577
1577
|
{
|
1578
1578
|
int i;
|
1579
|
-
|
1579
|
+
unsigned long hash = spanq_hash(self);
|
1580
1580
|
SpanOrQuery *soq = SpOQ(self);
|
1581
1581
|
|
1582
1582
|
for (i = 0; i < soq->c_cnt; i++) {
|
@@ -1756,10 +1756,10 @@ static void spannq_destroy(Query *self)
|
|
1756
1756
|
spanq_destroy_i(self);
|
1757
1757
|
}
|
1758
1758
|
|
1759
|
-
static
|
1759
|
+
static unsigned long spannq_hash(Query *self)
|
1760
1760
|
{
|
1761
1761
|
int i;
|
1762
|
-
|
1762
|
+
unsigned long hash = spanq_hash(self);
|
1763
1763
|
SpanNearQuery *snq = SpNQ(self);
|
1764
1764
|
|
1765
1765
|
for (i = 0; i < snq->c_cnt; i++) {
|
@@ -1907,7 +1907,7 @@ static void spanxq_destroy(Query *self)
|
|
1907
1907
|
spanq_destroy_i(self);
|
1908
1908
|
}
|
1909
1909
|
|
1910
|
-
static
|
1910
|
+
static unsigned long spanxq_hash(Query *self)
|
1911
1911
|
{
|
1912
1912
|
SpanNotQuery *sxq = SpXQ(self);
|
1913
1913
|
return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
|
data/ext/q_term.c
CHANGED
@@ -289,7 +289,7 @@ static void tq_extract_terms(Query *self, HashSet *terms)
|
|
289
289
|
hs_add(terms, term_new(TQ(self)->field, TQ(self)->term));
|
290
290
|
}
|
291
291
|
|
292
|
-
static
|
292
|
+
static unsigned long tq_hash(Query *self)
|
293
293
|
{
|
294
294
|
return str_hash(TQ(self)->term) ^ str_hash(TQ(self)->field);
|
295
295
|
}
|
data/ext/q_wildcard.c
CHANGED
data/ext/r_analysis.c
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
#include "ferret.h"
|
5
5
|
#include "analysis.h"
|
6
6
|
|
7
|
+
static char *frt_locale = NULL;
|
8
|
+
|
7
9
|
static VALUE mAnalysis;
|
8
10
|
|
9
11
|
static VALUE cToken;
|
@@ -808,6 +810,7 @@ static VALUE
|
|
808
810
|
frt_letter_tokenizer_init(int argc, VALUE *argv, VALUE self)
|
809
811
|
{
|
810
812
|
TS_ARGS(false);
|
813
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
811
814
|
return get_wrapped_ts(self, rstr, mb_letter_tokenizer_new(lower));
|
812
815
|
}
|
813
816
|
|
@@ -836,6 +839,7 @@ static VALUE
|
|
836
839
|
frt_whitespace_tokenizer_init(int argc, VALUE *argv, VALUE self)
|
837
840
|
{
|
838
841
|
TS_ARGS(false);
|
842
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
839
843
|
return get_wrapped_ts(self, rstr, mb_whitespace_tokenizer_new(lower));
|
840
844
|
}
|
841
845
|
|
@@ -863,6 +867,7 @@ frt_a_standard_tokenizer_init(VALUE self, VALUE rstr)
|
|
863
867
|
static VALUE
|
864
868
|
frt_standard_tokenizer_init(VALUE self, VALUE rstr)
|
865
869
|
{
|
870
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
866
871
|
return get_wrapped_ts(self, rstr, mb_standard_tokenizer_new());
|
867
872
|
}
|
868
873
|
|
@@ -902,6 +907,7 @@ static VALUE
|
|
902
907
|
frt_lowercase_filter_init(VALUE self, VALUE rsub_ts)
|
903
908
|
{
|
904
909
|
TokenStream *ts = frt_get_cwrapped_rts(rsub_ts);
|
910
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
905
911
|
ts = mb_lowercase_filter_new(ts);
|
906
912
|
object_add(&(TkFilt(ts)->sub_ts), rsub_ts);
|
907
913
|
|
@@ -1150,6 +1156,7 @@ frt_white_space_analyzer_init(int argc, VALUE *argv, VALUE self)
|
|
1150
1156
|
{
|
1151
1157
|
Analyzer *a;
|
1152
1158
|
GET_LOWER(false);
|
1159
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
1153
1160
|
a = mb_whitespace_analyzer_new(lower);
|
1154
1161
|
Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
|
1155
1162
|
object_add(a, self);
|
@@ -1192,6 +1199,7 @@ frt_letter_analyzer_init(int argc, VALUE *argv, VALUE self)
|
|
1192
1199
|
{
|
1193
1200
|
Analyzer *a;
|
1194
1201
|
GET_LOWER(true);
|
1202
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
1195
1203
|
a = mb_letter_analyzer_new(lower);
|
1196
1204
|
Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
|
1197
1205
|
object_add(a, self);
|
@@ -1263,6 +1271,7 @@ frt_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
|
|
1263
1271
|
bool lower;
|
1264
1272
|
VALUE rlower, rstop_words;
|
1265
1273
|
Analyzer *a;
|
1274
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
1266
1275
|
rb_scan_args(argc, argv, "02", &rstop_words, &rlower);
|
1267
1276
|
lower = ((rlower == Qnil) ? true : RTEST(rlower));
|
1268
1277
|
if (rstop_words != Qnil) {
|
@@ -1390,8 +1399,6 @@ frt_re_analyzer_init(int argc, VALUE *argv, VALUE self)
|
|
1390
1399
|
*
|
1391
1400
|
****************************************************************************/
|
1392
1401
|
|
1393
|
-
static char *frt_locale = NULL;
|
1394
|
-
|
1395
1402
|
/*
|
1396
1403
|
* call-seq:
|
1397
1404
|
* Ferret.locale -> locale_str
|
@@ -1415,7 +1422,7 @@ static VALUE frt_get_locale(VALUE self, VALUE locale)
|
|
1415
1422
|
static VALUE frt_set_locale(VALUE self, VALUE locale)
|
1416
1423
|
{
|
1417
1424
|
char *l = ((locale == Qnil) ? NULL : RSTRING(rb_obj_as_string(locale))->ptr);
|
1418
|
-
frt_locale = setlocale(
|
1425
|
+
frt_locale = setlocale(LC_CTYPE, l);
|
1419
1426
|
return frt_locale ? rb_str_new2(frt_locale) : Qnil;
|
1420
1427
|
}
|
1421
1428
|
|
@@ -2188,7 +2195,6 @@ Init_Analysis(void)
|
|
2188
2195
|
rb_define_const(mFerret, "OBJECT_SPACE", object_space);
|
2189
2196
|
|
2190
2197
|
/*** * * Locale stuff * * ***/
|
2191
|
-
frt_locale = setlocale(LC_ALL, "");
|
2192
2198
|
rb_define_singleton_method(mFerret, "locale=", frt_set_locale, 1);
|
2193
2199
|
rb_define_singleton_method(mFerret, "locale", frt_get_locale, 0);
|
2194
2200
|
|
data/ext/r_index.c
CHANGED
@@ -240,9 +240,11 @@ frt_fi_is_indexed(VALUE self)
|
|
240
240
|
* fi.tokenized? -> bool
|
241
241
|
*
|
242
242
|
* Return true if the field is tokenized. Tokenizing is the process of
|
243
|
-
* breaking the field up into tokens. That is "the quick brown fox" becomes
|
244
|
-
*
|
245
|
-
*
|
243
|
+
* breaking the field up into tokens. That is "the quick brown fox" becomes:
|
244
|
+
*
|
245
|
+
* ["the", "quick", "brown", "fox"]
|
246
|
+
*
|
247
|
+
* A field can only be tokenized if it is indexed.
|
246
248
|
*/
|
247
249
|
static VALUE
|
248
250
|
frt_fi_is_tokenized(VALUE self)
|
@@ -595,7 +597,8 @@ frt_fis_create_index(VALUE self, VALUE rdir)
|
|
595
597
|
* call-seq:
|
596
598
|
* fis.fields -> symbol array
|
597
599
|
*
|
598
|
-
* Return a list of the
|
600
|
+
* Return a list of the field names (as symbols) of all the fieldcs in the
|
601
|
+
* index.
|
599
602
|
*/
|
600
603
|
static VALUE
|
601
604
|
frt_fis_get_fields(VALUE self)
|
@@ -609,6 +612,26 @@ frt_fis_get_fields(VALUE self)
|
|
609
612
|
return rfield_names;
|
610
613
|
}
|
611
614
|
|
615
|
+
/*
|
616
|
+
* call-seq:
|
617
|
+
* fis.tokenized_fields -> symbol array
|
618
|
+
*
|
619
|
+
* Return a list of the field names (as symbols) of all the tokenized fields
|
620
|
+
* in the index.
|
621
|
+
*/
|
622
|
+
static VALUE
|
623
|
+
frt_fis_get_tk_fields(VALUE self)
|
624
|
+
{
|
625
|
+
FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
|
626
|
+
VALUE rfield_names = rb_ary_new();
|
627
|
+
int i;
|
628
|
+
for (i = 0; i < fis->size; i++) {
|
629
|
+
if (!fi_is_tokenized(fis->fields[i])) continue;
|
630
|
+
rb_ary_push(rfield_names, ID2SYM(rb_intern(fis->fields[i]->name)));
|
631
|
+
}
|
632
|
+
return rfield_names;
|
633
|
+
}
|
634
|
+
|
612
635
|
/****************************************************************************
|
613
636
|
*
|
614
637
|
* TermEnum Methods
|
@@ -2375,7 +2398,7 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
|
|
2375
2398
|
|
2376
2399
|
/*
|
2377
2400
|
* call-seq:
|
2378
|
-
* index_reader.
|
2401
|
+
* index_reader.fields -> array of field-names
|
2379
2402
|
*
|
2380
2403
|
* Returns an array of field names in the index. This can be used to pass to
|
2381
2404
|
* the QueryParser so that the QueryParser knows how to expand the "*"
|
@@ -2383,7 +2406,7 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
|
|
2383
2406
|
* gathered from the FieldInfos object.
|
2384
2407
|
*/
|
2385
2408
|
static VALUE
|
2386
|
-
|
2409
|
+
frt_ir_fields(VALUE self)
|
2387
2410
|
{
|
2388
2411
|
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2389
2412
|
FieldInfos *fis = ir->fis;
|
@@ -2408,6 +2431,29 @@ frt_ir_field_infos(VALUE self)
|
|
2408
2431
|
return frt_get_field_infos(ir->fis);
|
2409
2432
|
}
|
2410
2433
|
|
2434
|
+
/*
|
2435
|
+
* call-seq:
|
2436
|
+
* index_reader.tokenized_fields -> array of field-names
|
2437
|
+
*
|
2438
|
+
* Returns an array of field names of all of the tokenized fields in the
|
2439
|
+
* index. This can be used to pass to the QueryParser so that the QueryParser
|
2440
|
+
* knows how to expand the "*" wild-card to all fields in the index. A list
|
2441
|
+
* of field names can also be gathered from the FieldInfos object.
|
2442
|
+
*/
|
2443
|
+
static VALUE
|
2444
|
+
frt_ir_tk_fields(VALUE self)
|
2445
|
+
{
|
2446
|
+
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2447
|
+
FieldInfos *fis = ir->fis;
|
2448
|
+
VALUE rfield_names = rb_ary_new();
|
2449
|
+
int i;
|
2450
|
+
for (i = 0; i < fis->size; i++) {
|
2451
|
+
if (!fi_is_tokenized(fis->fields[i])) continue;
|
2452
|
+
rb_ary_push(rfield_names, ID2SYM(rb_intern(fis->fields[i]->name)));
|
2453
|
+
}
|
2454
|
+
return rfield_names;
|
2455
|
+
}
|
2456
|
+
|
2411
2457
|
/****************************************************************************
|
2412
2458
|
*
|
2413
2459
|
* Init Functions
|
@@ -2515,6 +2561,16 @@ frt_ir_field_infos(VALUE self)
|
|
2515
2561
|
* | |
|
2516
2562
|
* | :with_positions_offsets | Store term-vectors with
|
2517
2563
|
* | (default) | positions and offsets.
|
2564
|
+
* -------------|-------------------------|------------------------------
|
2565
|
+
* :boost | Float | The boost property is used to
|
2566
|
+
* | | set the default boost for a
|
2567
|
+
* | | field. This boost value will
|
2568
|
+
* | | used for all instances of the
|
2569
|
+
* | | field in the index unless
|
2570
|
+
* | | otherwise specified when you
|
2571
|
+
* | | create the field. All values
|
2572
|
+
* | | should be positive.
|
2573
|
+
* | |
|
2518
2574
|
*
|
2519
2575
|
* == Examples
|
2520
2576
|
*
|
@@ -2625,7 +2681,8 @@ Init_FieldInfos(void)
|
|
2625
2681
|
rb_define_method(cFieldInfos, "to_s", frt_fis_to_s, 0);
|
2626
2682
|
rb_define_method(cFieldInfos, "create_index",
|
2627
2683
|
frt_fis_create_index, 1);
|
2628
|
-
rb_define_method(cFieldInfos, "fields", frt_fis_get_fields,
|
2684
|
+
rb_define_method(cFieldInfos, "fields", frt_fis_get_fields, 0);
|
2685
|
+
rb_define_method(cFieldInfos, "tokenized_fields", frt_fis_get_tk_fields, 0);
|
2629
2686
|
}
|
2630
2687
|
|
2631
2688
|
/*
|
@@ -2717,21 +2774,33 @@ Init_TermDocEnum(void)
|
|
2717
2774
|
rb_define_method(cTermDocEnum, "skip_to", frt_tde_skip_to, 1);
|
2718
2775
|
}
|
2719
2776
|
|
2777
|
+
/* rdochack
|
2778
|
+
cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
|
2779
|
+
*/
|
2780
|
+
|
2720
2781
|
/*
|
2721
2782
|
* Document-class: Ferret::Index::TermVector::TVOffsets
|
2722
2783
|
*
|
2723
2784
|
* == Summary
|
2724
2785
|
*
|
2725
2786
|
* Holds the start and end byte-offsets of a term in a field. For example, if
|
2726
|
-
* the field was "the quick brown fox" then the start and end offsets of
|
2727
|
-
*
|
2728
|
-
*
|
2729
|
-
*
|
2787
|
+
* the field was "the quick brown fox" then the start and end offsets of:
|
2788
|
+
*
|
2789
|
+
* ["the", "quick", "brown", "fox"]
|
2790
|
+
*
|
2791
|
+
* Would be:
|
2792
|
+
*
|
2793
|
+
* [(0,3), (4,9), (10,15), (16,19)]
|
2794
|
+
*
|
2795
|
+
* See the Analysis module for more information on setting the offsets.
|
2730
2796
|
*/
|
2731
2797
|
static void
|
2732
2798
|
Init_TVOffsets(void)
|
2733
2799
|
{
|
2734
2800
|
const char *tv_offsets_class = "TVOffsets";
|
2801
|
+
/* rdochack
|
2802
|
+
cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
|
2803
|
+
*/
|
2735
2804
|
cTVOffsets = rb_struct_define(tv_offsets_class, "start", "end", NULL);
|
2736
2805
|
rb_set_class_path(cTVOffsets, cTermVector, tv_offsets_class);
|
2737
2806
|
rb_const_set(mIndex, rb_intern(tv_offsets_class), cTVOffsets);
|
@@ -2756,6 +2825,9 @@ static void
|
|
2756
2825
|
Init_TVTerm(void)
|
2757
2826
|
{
|
2758
2827
|
const char *tv_term_class = "TVTerm";
|
2828
|
+
/* rdochack
|
2829
|
+
cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject);
|
2830
|
+
*/
|
2759
2831
|
cTVTerm = rb_struct_define(tv_term_class, "text", "positions", NULL);
|
2760
2832
|
rb_set_class_path(cTVTerm, cTermVector, tv_term_class);
|
2761
2833
|
rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm);
|
@@ -2795,6 +2867,9 @@ static void
|
|
2795
2867
|
Init_TermVector(void)
|
2796
2868
|
{
|
2797
2869
|
const char *tv_class = "TermVector";
|
2870
|
+
/* rdochack
|
2871
|
+
cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
|
2872
|
+
*/
|
2798
2873
|
cTermVector = rb_struct_define(tv_class,
|
2799
2874
|
"field", "terms", "offsets", NULL);
|
2800
2875
|
rb_set_class_path(cTermVector, mIndex, tv_class);
|
@@ -3108,8 +3183,10 @@ Init_IndexReader(void)
|
|
3108
3183
|
rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 2);
|
3109
3184
|
rb_define_method(cIndexReader, "terms", frt_ir_terms, 1);
|
3110
3185
|
rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 2);
|
3111
|
-
rb_define_method(cIndexReader, "
|
3186
|
+
rb_define_method(cIndexReader, "fields", frt_ir_fields, 0);
|
3187
|
+
rb_define_method(cIndexReader, "field_names", frt_ir_fields, 0);
|
3112
3188
|
rb_define_method(cIndexReader, "field_infos", frt_ir_field_infos, 0);
|
3189
|
+
rb_define_method(cIndexReader, "tokenized_fields", frt_ir_tk_fields, 0);
|
3113
3190
|
}
|
3114
3191
|
|
3115
3192
|
/* rdoc hack
|
data/ext/r_qparser.c
CHANGED
@@ -6,7 +6,9 @@ VALUE cQueryParseException;
|
|
6
6
|
|
7
7
|
extern VALUE sym_analyzer;
|
8
8
|
static VALUE sym_wild_card_downcase;
|
9
|
+
static VALUE sym_fields;
|
9
10
|
static VALUE sym_all_fields;
|
11
|
+
static VALUE sym_tkz_fields;
|
10
12
|
static VALUE sym_default_field;
|
11
13
|
static VALUE sym_validate_fields;
|
12
14
|
static VALUE sym_or_default;
|
@@ -42,9 +44,12 @@ static HashSet *
|
|
42
44
|
frt_get_fields(VALUE rfields)
|
43
45
|
{
|
44
46
|
VALUE rval;
|
45
|
-
HashSet *fields
|
47
|
+
HashSet *fields;
|
46
48
|
char *s, *p, *str;
|
47
49
|
|
50
|
+
if (rfields == Qnil) return NULL;
|
51
|
+
|
52
|
+
fields = hs_new_str(&free);
|
48
53
|
if (TYPE(rfields) == T_ARRAY) {
|
49
54
|
int i;
|
50
55
|
for (i = 0; i < RARRAY(rfields)->len; i++) {
|
@@ -87,9 +92,12 @@ frt_get_fields(VALUE rfields)
|
|
87
92
|
* :wild_card_downcase:: Default: true. Specifies whether wild-card queries
|
88
93
|
* should be downcased or not since they are not
|
89
94
|
* passed through the parser
|
90
|
-
* :
|
95
|
+
* :fields:: Default: []. Lets the query parser know what
|
91
96
|
* fields are available for searching, particularly
|
92
97
|
* when the "*" is specified as the search field
|
98
|
+
* :tokenized_fields:: Default: :fields. Lets the query parser know which
|
99
|
+
* fields are tokenized so it knows which fields to
|
100
|
+
* run the analyzer over.
|
93
101
|
* :validate_fields:: Default: false. Set to true if you want an
|
94
102
|
* exception to be raised if there is an attempt to
|
95
103
|
* search a non-existent field
|
@@ -118,6 +126,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
118
126
|
bool has_options = false;
|
119
127
|
|
120
128
|
HashSet *all_fields = NULL;
|
129
|
+
HashSet *tkz_fields = NULL;
|
121
130
|
HashSet *def_fields = NULL;
|
122
131
|
QParser *qp;
|
123
132
|
|
@@ -133,6 +142,12 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
133
142
|
if (Qnil != (rval = rb_hash_aref(roptions, sym_all_fields))) {
|
134
143
|
all_fields = frt_get_fields(rval);
|
135
144
|
}
|
145
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_fields))) {
|
146
|
+
all_fields = frt_get_fields(rval);
|
147
|
+
}
|
148
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_tkz_fields))) {
|
149
|
+
tkz_fields = frt_get_fields(rval);
|
150
|
+
}
|
136
151
|
} else {
|
137
152
|
def_fields = frt_get_fields(roptions);
|
138
153
|
}
|
@@ -145,7 +160,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
145
160
|
analyzer = mb_standard_analyzer_new(true);
|
146
161
|
}
|
147
162
|
|
148
|
-
qp = qp_new(all_fields, def_fields, analyzer);
|
163
|
+
qp = qp_new(all_fields, def_fields, tkz_fields, analyzer);
|
149
164
|
qp->allow_any_fields = true;
|
150
165
|
qp->clean_str = true;
|
151
166
|
/* handle options */
|
@@ -255,6 +270,48 @@ frt_qp_set_fields(VALUE self, VALUE rfields)
|
|
255
270
|
return self;
|
256
271
|
}
|
257
272
|
|
273
|
+
/*
|
274
|
+
* call-seq:
|
275
|
+
* query_parser.tokenized_fields -> Array of Symbols
|
276
|
+
*
|
277
|
+
* Returns the list of all tokenized_fields that the QueryParser knows about.
|
278
|
+
*/
|
279
|
+
static VALUE
|
280
|
+
frt_qp_get_tkz_fields(VALUE self)
|
281
|
+
{
|
282
|
+
GET_QP;
|
283
|
+
int i;
|
284
|
+
HashSet *fields = qp->tokenized_fields;
|
285
|
+
if (fields) {
|
286
|
+
VALUE rfields = rb_ary_new();
|
287
|
+
|
288
|
+
for (i = 0; i < fields->size; i++) {
|
289
|
+
rb_ary_push(rfields, ID2SYM(rb_intern((char *)fields->elems[i])));
|
290
|
+
}
|
291
|
+
|
292
|
+
return rfields;
|
293
|
+
}
|
294
|
+
else {
|
295
|
+
return Qnil;
|
296
|
+
}
|
297
|
+
}
|
298
|
+
|
299
|
+
/*
|
300
|
+
* call-seq:
|
301
|
+
* query_parser.tokenized_fields = fields -> self
|
302
|
+
*
|
303
|
+
* Set the list of tokenized_fields. These tokenized_fields are tokenized in
|
304
|
+
* the queries. If this is set to Qnil then all fields will be tokenized.
|
305
|
+
*/
|
306
|
+
static VALUE
|
307
|
+
frt_qp_set_tkz_fields(VALUE self, VALUE rfields)
|
308
|
+
{
|
309
|
+
GET_QP;
|
310
|
+
if (qp->tokenized_fields) hs_destroy(qp->tokenized_fields);
|
311
|
+
qp->tokenized_fields = frt_get_fields(rfields);
|
312
|
+
return self;
|
313
|
+
}
|
314
|
+
|
258
315
|
/****************************************************************************
|
259
316
|
*
|
260
317
|
* Init function
|
@@ -483,7 +540,9 @@ Init_QueryParser(void)
|
|
483
540
|
{
|
484
541
|
/* hash keys */
|
485
542
|
sym_wild_card_downcase = ID2SYM(rb_intern("wild_card_downcase"));
|
486
|
-
|
543
|
+
sym_fields = ID2SYM(rb_intern("fields"));
|
544
|
+
sym_all_fields = ID2SYM(rb_intern("all_fields"));
|
545
|
+
sym_tkz_fields = ID2SYM(rb_intern("tokenized_fields"));
|
487
546
|
sym_default_field = ID2SYM(rb_intern("default_field"));
|
488
547
|
sym_validate_fields = ID2SYM(rb_intern("validate_fields"));
|
489
548
|
sym_or_default = ID2SYM(rb_intern("or_default"));
|
@@ -500,6 +559,10 @@ Init_QueryParser(void)
|
|
500
559
|
rb_define_method(cQueryParser, "parse", frt_qp_parse, 1);
|
501
560
|
rb_define_method(cQueryParser, "fields", frt_qp_get_fields, 0);
|
502
561
|
rb_define_method(cQueryParser, "fields=", frt_qp_set_fields, 1);
|
562
|
+
rb_define_method(cQueryParser, "tokenized_fields",
|
563
|
+
frt_qp_get_tkz_fields, 0);
|
564
|
+
rb_define_method(cQueryParser, "tokenized_fields=",
|
565
|
+
frt_qp_set_tkz_fields, 1);
|
503
566
|
|
504
567
|
Init_QueryParseException();
|
505
568
|
}
|