ferret 0.10.6 → 0.10.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/analysis.c +136 -107
- data/ext/analysis.h +4 -0
- data/ext/bitvector.c +2 -2
- data/ext/bitvector.h +1 -1
- data/ext/compound_io.c +4 -4
- data/ext/defines.h +0 -2
- data/ext/filter.c +3 -3
- data/ext/fs_store.c +4 -4
- data/ext/hash.c +29 -18
- data/ext/hash.h +34 -16
- data/ext/hashset.c +6 -3
- data/ext/hashset.h +1 -1
- data/ext/index.c +22 -20
- data/ext/q_boolean.c +3 -3
- data/ext/q_const_score.c +1 -1
- data/ext/q_fuzzy.c +1 -1
- data/ext/q_match_all.c +1 -1
- data/ext/q_multi_term.c +2 -2
- data/ext/q_parser.c +21 -6
- data/ext/q_phrase.c +2 -2
- data/ext/q_prefix.c +1 -1
- data/ext/q_range.c +3 -3
- data/ext/q_span.c +8 -8
- data/ext/q_term.c +1 -1
- data/ext/q_wildcard.c +1 -1
- data/ext/r_analysis.c +10 -4
- data/ext/r_index.c +89 -12
- data/ext/r_qparser.c +67 -4
- data/ext/r_search.c +11 -1
- data/ext/r_store.c +51 -35
- data/ext/ram_store.c +18 -18
- data/ext/search.c +1 -1
- data/ext/search.h +25 -23
- data/ext/similarity.c +1 -1
- data/ext/sort.c +1 -1
- data/ext/store.c +22 -3
- data/ext/store.h +8 -2
- data/lib/ferret/index.rb +14 -4
- data/lib/ferret_version.rb +1 -1
- data/test/test_helper.rb +3 -0
- data/test/unit/analysis/tc_analyzer.rb +5 -5
- data/test/unit/analysis/tc_token_stream.rb +3 -3
- data/test/unit/index/tc_index_writer.rb +1 -1
- data/test/unit/query_parser/tc_query_parser.rb +7 -5
- data/test/unit/search/tc_filter.rb +1 -1
- data/test/unit/search/tc_fuzzy_query.rb +1 -1
- data/test/unit/search/tc_index_searcher.rb +1 -1
- data/test/unit/search/tc_multi_searcher.rb +1 -1
- data/test/unit/search/tc_search_and_sort.rb +1 -1
- data/test/unit/search/tc_spans.rb +1 -1
- metadata +4 -3
data/ext/q_phrase.c
CHANGED
@@ -1015,11 +1015,11 @@ static Query *phq_rewrite(Query *self, IndexReader *ir)
|
|
1015
1015
|
}
|
1016
1016
|
}
|
1017
1017
|
|
1018
|
-
static
|
1018
|
+
static unsigned long phq_hash(Query *self)
|
1019
1019
|
{
|
1020
1020
|
int i, j;
|
1021
1021
|
PhraseQuery *phq = PhQ(self);
|
1022
|
-
|
1022
|
+
unsigned long hash = str_hash(phq->field);
|
1023
1023
|
for (i = 0; i < phq->pos_cnt; i++) {
|
1024
1024
|
char **terms = phq->positions[i].terms;
|
1025
1025
|
for (j = ary_size(terms) - 1; j >= 0; j--) {
|
data/ext/q_prefix.c
CHANGED
data/ext/q_range.c
CHANGED
@@ -74,7 +74,7 @@ static void range_destroy(Range *range)
|
|
74
74
|
free(range);
|
75
75
|
}
|
76
76
|
|
77
|
-
static
|
77
|
+
static unsigned long range_hash(Range *filt)
|
78
78
|
{
|
79
79
|
return filt->include_lower | (filt->include_upper << 1)
|
80
80
|
| ((str_hash(filt->field)
|
@@ -219,7 +219,7 @@ static BitVector *rfilt_get_bv_i(Filter *filt, IndexReader *ir)
|
|
219
219
|
return bv;
|
220
220
|
}
|
221
221
|
|
222
|
-
static
|
222
|
+
static unsigned long rfilt_hash(Filter *filt)
|
223
223
|
{
|
224
224
|
return range_hash(RF(filt)->range);
|
225
225
|
}
|
@@ -278,7 +278,7 @@ static Query *rq_rewrite(Query *self, IndexReader *ir)
|
|
278
278
|
return csq_new_nr(filter);
|
279
279
|
}
|
280
280
|
|
281
|
-
static
|
281
|
+
static unsigned long rq_hash(Query *self)
|
282
282
|
{
|
283
283
|
return range_hash(RQ(self)->range);
|
284
284
|
}
|
data/ext/q_span.c
CHANGED
@@ -17,7 +17,7 @@
|
|
17
17
|
|
18
18
|
#define SpQ(query) ((SpanQuery *)(query))
|
19
19
|
|
20
|
-
static
|
20
|
+
static unsigned long spanq_hash(Query *self)
|
21
21
|
{
|
22
22
|
return str_hash(SpQ(self)->field);
|
23
23
|
}
|
@@ -1355,7 +1355,7 @@ static HashSet *spantq_get_terms(Query *self)
|
|
1355
1355
|
return terms;
|
1356
1356
|
}
|
1357
1357
|
|
1358
|
-
static
|
1358
|
+
static unsigned long spantq_hash(Query *self)
|
1359
1359
|
{
|
1360
1360
|
return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
|
1361
1361
|
}
|
@@ -1430,7 +1430,7 @@ static void spanfq_destroy_i(Query *self)
|
|
1430
1430
|
spanq_destroy_i(self);
|
1431
1431
|
}
|
1432
1432
|
|
1433
|
-
static
|
1433
|
+
static unsigned long spanfq_hash(Query *self)
|
1434
1434
|
{
|
1435
1435
|
return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
|
1436
1436
|
^ SpFQ(self)->end;
|
@@ -1573,10 +1573,10 @@ static void spanoq_destroy_i(Query *self)
|
|
1573
1573
|
spanq_destroy_i(self);
|
1574
1574
|
}
|
1575
1575
|
|
1576
|
-
static
|
1576
|
+
static unsigned long spanoq_hash(Query *self)
|
1577
1577
|
{
|
1578
1578
|
int i;
|
1579
|
-
|
1579
|
+
unsigned long hash = spanq_hash(self);
|
1580
1580
|
SpanOrQuery *soq = SpOQ(self);
|
1581
1581
|
|
1582
1582
|
for (i = 0; i < soq->c_cnt; i++) {
|
@@ -1756,10 +1756,10 @@ static void spannq_destroy(Query *self)
|
|
1756
1756
|
spanq_destroy_i(self);
|
1757
1757
|
}
|
1758
1758
|
|
1759
|
-
static
|
1759
|
+
static unsigned long spannq_hash(Query *self)
|
1760
1760
|
{
|
1761
1761
|
int i;
|
1762
|
-
|
1762
|
+
unsigned long hash = spanq_hash(self);
|
1763
1763
|
SpanNearQuery *snq = SpNQ(self);
|
1764
1764
|
|
1765
1765
|
for (i = 0; i < snq->c_cnt; i++) {
|
@@ -1907,7 +1907,7 @@ static void spanxq_destroy(Query *self)
|
|
1907
1907
|
spanq_destroy_i(self);
|
1908
1908
|
}
|
1909
1909
|
|
1910
|
-
static
|
1910
|
+
static unsigned long spanxq_hash(Query *self)
|
1911
1911
|
{
|
1912
1912
|
SpanNotQuery *sxq = SpXQ(self);
|
1913
1913
|
return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
|
data/ext/q_term.c
CHANGED
@@ -289,7 +289,7 @@ static void tq_extract_terms(Query *self, HashSet *terms)
|
|
289
289
|
hs_add(terms, term_new(TQ(self)->field, TQ(self)->term));
|
290
290
|
}
|
291
291
|
|
292
|
-
static
|
292
|
+
static unsigned long tq_hash(Query *self)
|
293
293
|
{
|
294
294
|
return str_hash(TQ(self)->term) ^ str_hash(TQ(self)->field);
|
295
295
|
}
|
data/ext/q_wildcard.c
CHANGED
data/ext/r_analysis.c
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
#include "ferret.h"
|
5
5
|
#include "analysis.h"
|
6
6
|
|
7
|
+
static char *frt_locale = NULL;
|
8
|
+
|
7
9
|
static VALUE mAnalysis;
|
8
10
|
|
9
11
|
static VALUE cToken;
|
@@ -808,6 +810,7 @@ static VALUE
|
|
808
810
|
frt_letter_tokenizer_init(int argc, VALUE *argv, VALUE self)
|
809
811
|
{
|
810
812
|
TS_ARGS(false);
|
813
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
811
814
|
return get_wrapped_ts(self, rstr, mb_letter_tokenizer_new(lower));
|
812
815
|
}
|
813
816
|
|
@@ -836,6 +839,7 @@ static VALUE
|
|
836
839
|
frt_whitespace_tokenizer_init(int argc, VALUE *argv, VALUE self)
|
837
840
|
{
|
838
841
|
TS_ARGS(false);
|
842
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
839
843
|
return get_wrapped_ts(self, rstr, mb_whitespace_tokenizer_new(lower));
|
840
844
|
}
|
841
845
|
|
@@ -863,6 +867,7 @@ frt_a_standard_tokenizer_init(VALUE self, VALUE rstr)
|
|
863
867
|
static VALUE
|
864
868
|
frt_standard_tokenizer_init(VALUE self, VALUE rstr)
|
865
869
|
{
|
870
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
866
871
|
return get_wrapped_ts(self, rstr, mb_standard_tokenizer_new());
|
867
872
|
}
|
868
873
|
|
@@ -902,6 +907,7 @@ static VALUE
|
|
902
907
|
frt_lowercase_filter_init(VALUE self, VALUE rsub_ts)
|
903
908
|
{
|
904
909
|
TokenStream *ts = frt_get_cwrapped_rts(rsub_ts);
|
910
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
905
911
|
ts = mb_lowercase_filter_new(ts);
|
906
912
|
object_add(&(TkFilt(ts)->sub_ts), rsub_ts);
|
907
913
|
|
@@ -1150,6 +1156,7 @@ frt_white_space_analyzer_init(int argc, VALUE *argv, VALUE self)
|
|
1150
1156
|
{
|
1151
1157
|
Analyzer *a;
|
1152
1158
|
GET_LOWER(false);
|
1159
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
1153
1160
|
a = mb_whitespace_analyzer_new(lower);
|
1154
1161
|
Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
|
1155
1162
|
object_add(a, self);
|
@@ -1192,6 +1199,7 @@ frt_letter_analyzer_init(int argc, VALUE *argv, VALUE self)
|
|
1192
1199
|
{
|
1193
1200
|
Analyzer *a;
|
1194
1201
|
GET_LOWER(true);
|
1202
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
1195
1203
|
a = mb_letter_analyzer_new(lower);
|
1196
1204
|
Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
|
1197
1205
|
object_add(a, self);
|
@@ -1263,6 +1271,7 @@ frt_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
|
|
1263
1271
|
bool lower;
|
1264
1272
|
VALUE rlower, rstop_words;
|
1265
1273
|
Analyzer *a;
|
1274
|
+
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
|
1266
1275
|
rb_scan_args(argc, argv, "02", &rstop_words, &rlower);
|
1267
1276
|
lower = ((rlower == Qnil) ? true : RTEST(rlower));
|
1268
1277
|
if (rstop_words != Qnil) {
|
@@ -1390,8 +1399,6 @@ frt_re_analyzer_init(int argc, VALUE *argv, VALUE self)
|
|
1390
1399
|
*
|
1391
1400
|
****************************************************************************/
|
1392
1401
|
|
1393
|
-
static char *frt_locale = NULL;
|
1394
|
-
|
1395
1402
|
/*
|
1396
1403
|
* call-seq:
|
1397
1404
|
* Ferret.locale -> locale_str
|
@@ -1415,7 +1422,7 @@ static VALUE frt_get_locale(VALUE self, VALUE locale)
|
|
1415
1422
|
static VALUE frt_set_locale(VALUE self, VALUE locale)
|
1416
1423
|
{
|
1417
1424
|
char *l = ((locale == Qnil) ? NULL : RSTRING(rb_obj_as_string(locale))->ptr);
|
1418
|
-
frt_locale = setlocale(
|
1425
|
+
frt_locale = setlocale(LC_CTYPE, l);
|
1419
1426
|
return frt_locale ? rb_str_new2(frt_locale) : Qnil;
|
1420
1427
|
}
|
1421
1428
|
|
@@ -2188,7 +2195,6 @@ Init_Analysis(void)
|
|
2188
2195
|
rb_define_const(mFerret, "OBJECT_SPACE", object_space);
|
2189
2196
|
|
2190
2197
|
/*** * * Locale stuff * * ***/
|
2191
|
-
frt_locale = setlocale(LC_ALL, "");
|
2192
2198
|
rb_define_singleton_method(mFerret, "locale=", frt_set_locale, 1);
|
2193
2199
|
rb_define_singleton_method(mFerret, "locale", frt_get_locale, 0);
|
2194
2200
|
|
data/ext/r_index.c
CHANGED
@@ -240,9 +240,11 @@ frt_fi_is_indexed(VALUE self)
|
|
240
240
|
* fi.tokenized? -> bool
|
241
241
|
*
|
242
242
|
* Return true if the field is tokenized. Tokenizing is the process of
|
243
|
-
* breaking the field up into tokens. That is "the quick brown fox" becomes
|
244
|
-
*
|
245
|
-
*
|
243
|
+
* breaking the field up into tokens. That is "the quick brown fox" becomes:
|
244
|
+
*
|
245
|
+
* ["the", "quick", "brown", "fox"]
|
246
|
+
*
|
247
|
+
* A field can only be tokenized if it is indexed.
|
246
248
|
*/
|
247
249
|
static VALUE
|
248
250
|
frt_fi_is_tokenized(VALUE self)
|
@@ -595,7 +597,8 @@ frt_fis_create_index(VALUE self, VALUE rdir)
|
|
595
597
|
* call-seq:
|
596
598
|
* fis.fields -> symbol array
|
597
599
|
*
|
598
|
-
* Return a list of the
|
600
|
+
* Return a list of the field names (as symbols) of all the fieldcs in the
|
601
|
+
* index.
|
599
602
|
*/
|
600
603
|
static VALUE
|
601
604
|
frt_fis_get_fields(VALUE self)
|
@@ -609,6 +612,26 @@ frt_fis_get_fields(VALUE self)
|
|
609
612
|
return rfield_names;
|
610
613
|
}
|
611
614
|
|
615
|
+
/*
|
616
|
+
* call-seq:
|
617
|
+
* fis.tokenized_fields -> symbol array
|
618
|
+
*
|
619
|
+
* Return a list of the field names (as symbols) of all the tokenized fields
|
620
|
+
* in the index.
|
621
|
+
*/
|
622
|
+
static VALUE
|
623
|
+
frt_fis_get_tk_fields(VALUE self)
|
624
|
+
{
|
625
|
+
FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
|
626
|
+
VALUE rfield_names = rb_ary_new();
|
627
|
+
int i;
|
628
|
+
for (i = 0; i < fis->size; i++) {
|
629
|
+
if (!fi_is_tokenized(fis->fields[i])) continue;
|
630
|
+
rb_ary_push(rfield_names, ID2SYM(rb_intern(fis->fields[i]->name)));
|
631
|
+
}
|
632
|
+
return rfield_names;
|
633
|
+
}
|
634
|
+
|
612
635
|
/****************************************************************************
|
613
636
|
*
|
614
637
|
* TermEnum Methods
|
@@ -2375,7 +2398,7 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
|
|
2375
2398
|
|
2376
2399
|
/*
|
2377
2400
|
* call-seq:
|
2378
|
-
* index_reader.
|
2401
|
+
* index_reader.fields -> array of field-names
|
2379
2402
|
*
|
2380
2403
|
* Returns an array of field names in the index. This can be used to pass to
|
2381
2404
|
* the QueryParser so that the QueryParser knows how to expand the "*"
|
@@ -2383,7 +2406,7 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
|
|
2383
2406
|
* gathered from the FieldInfos object.
|
2384
2407
|
*/
|
2385
2408
|
static VALUE
|
2386
|
-
|
2409
|
+
frt_ir_fields(VALUE self)
|
2387
2410
|
{
|
2388
2411
|
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2389
2412
|
FieldInfos *fis = ir->fis;
|
@@ -2408,6 +2431,29 @@ frt_ir_field_infos(VALUE self)
|
|
2408
2431
|
return frt_get_field_infos(ir->fis);
|
2409
2432
|
}
|
2410
2433
|
|
2434
|
+
/*
|
2435
|
+
* call-seq:
|
2436
|
+
* index_reader.tokenized_fields -> array of field-names
|
2437
|
+
*
|
2438
|
+
* Returns an array of field names of all of the tokenized fields in the
|
2439
|
+
* index. This can be used to pass to the QueryParser so that the QueryParser
|
2440
|
+
* knows how to expand the "*" wild-card to all fields in the index. A list
|
2441
|
+
* of field names can also be gathered from the FieldInfos object.
|
2442
|
+
*/
|
2443
|
+
static VALUE
|
2444
|
+
frt_ir_tk_fields(VALUE self)
|
2445
|
+
{
|
2446
|
+
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2447
|
+
FieldInfos *fis = ir->fis;
|
2448
|
+
VALUE rfield_names = rb_ary_new();
|
2449
|
+
int i;
|
2450
|
+
for (i = 0; i < fis->size; i++) {
|
2451
|
+
if (!fi_is_tokenized(fis->fields[i])) continue;
|
2452
|
+
rb_ary_push(rfield_names, ID2SYM(rb_intern(fis->fields[i]->name)));
|
2453
|
+
}
|
2454
|
+
return rfield_names;
|
2455
|
+
}
|
2456
|
+
|
2411
2457
|
/****************************************************************************
|
2412
2458
|
*
|
2413
2459
|
* Init Functions
|
@@ -2515,6 +2561,16 @@ frt_ir_field_infos(VALUE self)
|
|
2515
2561
|
* | |
|
2516
2562
|
* | :with_positions_offsets | Store term-vectors with
|
2517
2563
|
* | (default) | positions and offsets.
|
2564
|
+
* -------------|-------------------------|------------------------------
|
2565
|
+
* :boost | Float | The boost property is used to
|
2566
|
+
* | | set the default boost for a
|
2567
|
+
* | | field. This boost value will
|
2568
|
+
* | | used for all instances of the
|
2569
|
+
* | | field in the index unless
|
2570
|
+
* | | otherwise specified when you
|
2571
|
+
* | | create the field. All values
|
2572
|
+
* | | should be positive.
|
2573
|
+
* | |
|
2518
2574
|
*
|
2519
2575
|
* == Examples
|
2520
2576
|
*
|
@@ -2625,7 +2681,8 @@ Init_FieldInfos(void)
|
|
2625
2681
|
rb_define_method(cFieldInfos, "to_s", frt_fis_to_s, 0);
|
2626
2682
|
rb_define_method(cFieldInfos, "create_index",
|
2627
2683
|
frt_fis_create_index, 1);
|
2628
|
-
rb_define_method(cFieldInfos, "fields", frt_fis_get_fields,
|
2684
|
+
rb_define_method(cFieldInfos, "fields", frt_fis_get_fields, 0);
|
2685
|
+
rb_define_method(cFieldInfos, "tokenized_fields", frt_fis_get_tk_fields, 0);
|
2629
2686
|
}
|
2630
2687
|
|
2631
2688
|
/*
|
@@ -2717,21 +2774,33 @@ Init_TermDocEnum(void)
|
|
2717
2774
|
rb_define_method(cTermDocEnum, "skip_to", frt_tde_skip_to, 1);
|
2718
2775
|
}
|
2719
2776
|
|
2777
|
+
/* rdochack
|
2778
|
+
cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
|
2779
|
+
*/
|
2780
|
+
|
2720
2781
|
/*
|
2721
2782
|
* Document-class: Ferret::Index::TermVector::TVOffsets
|
2722
2783
|
*
|
2723
2784
|
* == Summary
|
2724
2785
|
*
|
2725
2786
|
* Holds the start and end byte-offsets of a term in a field. For example, if
|
2726
|
-
* the field was "the quick brown fox" then the start and end offsets of
|
2727
|
-
*
|
2728
|
-
*
|
2729
|
-
*
|
2787
|
+
* the field was "the quick brown fox" then the start and end offsets of:
|
2788
|
+
*
|
2789
|
+
* ["the", "quick", "brown", "fox"]
|
2790
|
+
*
|
2791
|
+
* Would be:
|
2792
|
+
*
|
2793
|
+
* [(0,3), (4,9), (10,15), (16,19)]
|
2794
|
+
*
|
2795
|
+
* See the Analysis module for more information on setting the offsets.
|
2730
2796
|
*/
|
2731
2797
|
static void
|
2732
2798
|
Init_TVOffsets(void)
|
2733
2799
|
{
|
2734
2800
|
const char *tv_offsets_class = "TVOffsets";
|
2801
|
+
/* rdochack
|
2802
|
+
cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
|
2803
|
+
*/
|
2735
2804
|
cTVOffsets = rb_struct_define(tv_offsets_class, "start", "end", NULL);
|
2736
2805
|
rb_set_class_path(cTVOffsets, cTermVector, tv_offsets_class);
|
2737
2806
|
rb_const_set(mIndex, rb_intern(tv_offsets_class), cTVOffsets);
|
@@ -2756,6 +2825,9 @@ static void
|
|
2756
2825
|
Init_TVTerm(void)
|
2757
2826
|
{
|
2758
2827
|
const char *tv_term_class = "TVTerm";
|
2828
|
+
/* rdochack
|
2829
|
+
cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject);
|
2830
|
+
*/
|
2759
2831
|
cTVTerm = rb_struct_define(tv_term_class, "text", "positions", NULL);
|
2760
2832
|
rb_set_class_path(cTVTerm, cTermVector, tv_term_class);
|
2761
2833
|
rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm);
|
@@ -2795,6 +2867,9 @@ static void
|
|
2795
2867
|
Init_TermVector(void)
|
2796
2868
|
{
|
2797
2869
|
const char *tv_class = "TermVector";
|
2870
|
+
/* rdochack
|
2871
|
+
cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
|
2872
|
+
*/
|
2798
2873
|
cTermVector = rb_struct_define(tv_class,
|
2799
2874
|
"field", "terms", "offsets", NULL);
|
2800
2875
|
rb_set_class_path(cTermVector, mIndex, tv_class);
|
@@ -3108,8 +3183,10 @@ Init_IndexReader(void)
|
|
3108
3183
|
rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 2);
|
3109
3184
|
rb_define_method(cIndexReader, "terms", frt_ir_terms, 1);
|
3110
3185
|
rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 2);
|
3111
|
-
rb_define_method(cIndexReader, "
|
3186
|
+
rb_define_method(cIndexReader, "fields", frt_ir_fields, 0);
|
3187
|
+
rb_define_method(cIndexReader, "field_names", frt_ir_fields, 0);
|
3112
3188
|
rb_define_method(cIndexReader, "field_infos", frt_ir_field_infos, 0);
|
3189
|
+
rb_define_method(cIndexReader, "tokenized_fields", frt_ir_tk_fields, 0);
|
3113
3190
|
}
|
3114
3191
|
|
3115
3192
|
/* rdoc hack
|
data/ext/r_qparser.c
CHANGED
@@ -6,7 +6,9 @@ VALUE cQueryParseException;
|
|
6
6
|
|
7
7
|
extern VALUE sym_analyzer;
|
8
8
|
static VALUE sym_wild_card_downcase;
|
9
|
+
static VALUE sym_fields;
|
9
10
|
static VALUE sym_all_fields;
|
11
|
+
static VALUE sym_tkz_fields;
|
10
12
|
static VALUE sym_default_field;
|
11
13
|
static VALUE sym_validate_fields;
|
12
14
|
static VALUE sym_or_default;
|
@@ -42,9 +44,12 @@ static HashSet *
|
|
42
44
|
frt_get_fields(VALUE rfields)
|
43
45
|
{
|
44
46
|
VALUE rval;
|
45
|
-
HashSet *fields
|
47
|
+
HashSet *fields;
|
46
48
|
char *s, *p, *str;
|
47
49
|
|
50
|
+
if (rfields == Qnil) return NULL;
|
51
|
+
|
52
|
+
fields = hs_new_str(&free);
|
48
53
|
if (TYPE(rfields) == T_ARRAY) {
|
49
54
|
int i;
|
50
55
|
for (i = 0; i < RARRAY(rfields)->len; i++) {
|
@@ -87,9 +92,12 @@ frt_get_fields(VALUE rfields)
|
|
87
92
|
* :wild_card_downcase:: Default: true. Specifies whether wild-card queries
|
88
93
|
* should be downcased or not since they are not
|
89
94
|
* passed through the parser
|
90
|
-
* :
|
95
|
+
* :fields:: Default: []. Lets the query parser know what
|
91
96
|
* fields are available for searching, particularly
|
92
97
|
* when the "*" is specified as the search field
|
98
|
+
* :tokenized_fields:: Default: :fields. Lets the query parser know which
|
99
|
+
* fields are tokenized so it knows which fields to
|
100
|
+
* run the analyzer over.
|
93
101
|
* :validate_fields:: Default: false. Set to true if you want an
|
94
102
|
* exception to be raised if there is an attempt to
|
95
103
|
* search a non-existent field
|
@@ -118,6 +126,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
118
126
|
bool has_options = false;
|
119
127
|
|
120
128
|
HashSet *all_fields = NULL;
|
129
|
+
HashSet *tkz_fields = NULL;
|
121
130
|
HashSet *def_fields = NULL;
|
122
131
|
QParser *qp;
|
123
132
|
|
@@ -133,6 +142,12 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
133
142
|
if (Qnil != (rval = rb_hash_aref(roptions, sym_all_fields))) {
|
134
143
|
all_fields = frt_get_fields(rval);
|
135
144
|
}
|
145
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_fields))) {
|
146
|
+
all_fields = frt_get_fields(rval);
|
147
|
+
}
|
148
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_tkz_fields))) {
|
149
|
+
tkz_fields = frt_get_fields(rval);
|
150
|
+
}
|
136
151
|
} else {
|
137
152
|
def_fields = frt_get_fields(roptions);
|
138
153
|
}
|
@@ -145,7 +160,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
145
160
|
analyzer = mb_standard_analyzer_new(true);
|
146
161
|
}
|
147
162
|
|
148
|
-
qp = qp_new(all_fields, def_fields, analyzer);
|
163
|
+
qp = qp_new(all_fields, def_fields, tkz_fields, analyzer);
|
149
164
|
qp->allow_any_fields = true;
|
150
165
|
qp->clean_str = true;
|
151
166
|
/* handle options */
|
@@ -255,6 +270,48 @@ frt_qp_set_fields(VALUE self, VALUE rfields)
|
|
255
270
|
return self;
|
256
271
|
}
|
257
272
|
|
273
|
+
/*
|
274
|
+
* call-seq:
|
275
|
+
* query_parser.tokenized_fields -> Array of Symbols
|
276
|
+
*
|
277
|
+
* Returns the list of all tokenized_fields that the QueryParser knows about.
|
278
|
+
*/
|
279
|
+
static VALUE
|
280
|
+
frt_qp_get_tkz_fields(VALUE self)
|
281
|
+
{
|
282
|
+
GET_QP;
|
283
|
+
int i;
|
284
|
+
HashSet *fields = qp->tokenized_fields;
|
285
|
+
if (fields) {
|
286
|
+
VALUE rfields = rb_ary_new();
|
287
|
+
|
288
|
+
for (i = 0; i < fields->size; i++) {
|
289
|
+
rb_ary_push(rfields, ID2SYM(rb_intern((char *)fields->elems[i])));
|
290
|
+
}
|
291
|
+
|
292
|
+
return rfields;
|
293
|
+
}
|
294
|
+
else {
|
295
|
+
return Qnil;
|
296
|
+
}
|
297
|
+
}
|
298
|
+
|
299
|
+
/*
|
300
|
+
* call-seq:
|
301
|
+
* query_parser.tokenized_fields = fields -> self
|
302
|
+
*
|
303
|
+
* Set the list of tokenized_fields. These tokenized_fields are tokenized in
|
304
|
+
* the queries. If this is set to Qnil then all fields will be tokenized.
|
305
|
+
*/
|
306
|
+
static VALUE
|
307
|
+
frt_qp_set_tkz_fields(VALUE self, VALUE rfields)
|
308
|
+
{
|
309
|
+
GET_QP;
|
310
|
+
if (qp->tokenized_fields) hs_destroy(qp->tokenized_fields);
|
311
|
+
qp->tokenized_fields = frt_get_fields(rfields);
|
312
|
+
return self;
|
313
|
+
}
|
314
|
+
|
258
315
|
/****************************************************************************
|
259
316
|
*
|
260
317
|
* Init function
|
@@ -483,7 +540,9 @@ Init_QueryParser(void)
|
|
483
540
|
{
|
484
541
|
/* hash keys */
|
485
542
|
sym_wild_card_downcase = ID2SYM(rb_intern("wild_card_downcase"));
|
486
|
-
|
543
|
+
sym_fields = ID2SYM(rb_intern("fields"));
|
544
|
+
sym_all_fields = ID2SYM(rb_intern("all_fields"));
|
545
|
+
sym_tkz_fields = ID2SYM(rb_intern("tokenized_fields"));
|
487
546
|
sym_default_field = ID2SYM(rb_intern("default_field"));
|
488
547
|
sym_validate_fields = ID2SYM(rb_intern("validate_fields"));
|
489
548
|
sym_or_default = ID2SYM(rb_intern("or_default"));
|
@@ -500,6 +559,10 @@ Init_QueryParser(void)
|
|
500
559
|
rb_define_method(cQueryParser, "parse", frt_qp_parse, 1);
|
501
560
|
rb_define_method(cQueryParser, "fields", frt_qp_get_fields, 0);
|
502
561
|
rb_define_method(cQueryParser, "fields=", frt_qp_set_fields, 1);
|
562
|
+
rb_define_method(cQueryParser, "tokenized_fields",
|
563
|
+
frt_qp_get_tkz_fields, 0);
|
564
|
+
rb_define_method(cQueryParser, "tokenized_fields=",
|
565
|
+
frt_qp_set_tkz_fields, 1);
|
503
566
|
|
504
567
|
Init_QueryParseException();
|
505
568
|
}
|