ferret 0.10.6 → 0.10.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/ext/analysis.c +136 -107
  2. data/ext/analysis.h +4 -0
  3. data/ext/bitvector.c +2 -2
  4. data/ext/bitvector.h +1 -1
  5. data/ext/compound_io.c +4 -4
  6. data/ext/defines.h +0 -2
  7. data/ext/filter.c +3 -3
  8. data/ext/fs_store.c +4 -4
  9. data/ext/hash.c +29 -18
  10. data/ext/hash.h +34 -16
  11. data/ext/hashset.c +6 -3
  12. data/ext/hashset.h +1 -1
  13. data/ext/index.c +22 -20
  14. data/ext/q_boolean.c +3 -3
  15. data/ext/q_const_score.c +1 -1
  16. data/ext/q_fuzzy.c +1 -1
  17. data/ext/q_match_all.c +1 -1
  18. data/ext/q_multi_term.c +2 -2
  19. data/ext/q_parser.c +21 -6
  20. data/ext/q_phrase.c +2 -2
  21. data/ext/q_prefix.c +1 -1
  22. data/ext/q_range.c +3 -3
  23. data/ext/q_span.c +8 -8
  24. data/ext/q_term.c +1 -1
  25. data/ext/q_wildcard.c +1 -1
  26. data/ext/r_analysis.c +10 -4
  27. data/ext/r_index.c +89 -12
  28. data/ext/r_qparser.c +67 -4
  29. data/ext/r_search.c +11 -1
  30. data/ext/r_store.c +51 -35
  31. data/ext/ram_store.c +18 -18
  32. data/ext/search.c +1 -1
  33. data/ext/search.h +25 -23
  34. data/ext/similarity.c +1 -1
  35. data/ext/sort.c +1 -1
  36. data/ext/store.c +22 -3
  37. data/ext/store.h +8 -2
  38. data/lib/ferret/index.rb +14 -4
  39. data/lib/ferret_version.rb +1 -1
  40. data/test/test_helper.rb +3 -0
  41. data/test/unit/analysis/tc_analyzer.rb +5 -5
  42. data/test/unit/analysis/tc_token_stream.rb +3 -3
  43. data/test/unit/index/tc_index_writer.rb +1 -1
  44. data/test/unit/query_parser/tc_query_parser.rb +7 -5
  45. data/test/unit/search/tc_filter.rb +1 -1
  46. data/test/unit/search/tc_fuzzy_query.rb +1 -1
  47. data/test/unit/search/tc_index_searcher.rb +1 -1
  48. data/test/unit/search/tc_multi_searcher.rb +1 -1
  49. data/test/unit/search/tc_search_and_sort.rb +1 -1
  50. data/test/unit/search/tc_spans.rb +1 -1
  51. metadata +4 -3
data/ext/q_phrase.c CHANGED
@@ -1015,11 +1015,11 @@ static Query *phq_rewrite(Query *self, IndexReader *ir)
1015
1015
  }
1016
1016
  }
1017
1017
 
1018
- static ulong phq_hash(Query *self)
1018
+ static unsigned long phq_hash(Query *self)
1019
1019
  {
1020
1020
  int i, j;
1021
1021
  PhraseQuery *phq = PhQ(self);
1022
- ulong hash = str_hash(phq->field);
1022
+ unsigned long hash = str_hash(phq->field);
1023
1023
  for (i = 0; i < phq->pos_cnt; i++) {
1024
1024
  char **terms = phq->positions[i].terms;
1025
1025
  for (j = ary_size(terms) - 1; j >= 0; j--) {
data/ext/q_prefix.c CHANGED
@@ -69,7 +69,7 @@ static void prq_destroy(Query *self)
69
69
  q_destroy_i(self);
70
70
  }
71
71
 
72
- static ulong prq_hash(Query *self)
72
+ static unsigned long prq_hash(Query *self)
73
73
  {
74
74
  return str_hash(PfxQ(self)->field) ^ str_hash(PfxQ(self)->prefix);
75
75
  }
data/ext/q_range.c CHANGED
@@ -74,7 +74,7 @@ static void range_destroy(Range *range)
74
74
  free(range);
75
75
  }
76
76
 
77
- static ulong range_hash(Range *filt)
77
+ static unsigned long range_hash(Range *filt)
78
78
  {
79
79
  return filt->include_lower | (filt->include_upper << 1)
80
80
  | ((str_hash(filt->field)
@@ -219,7 +219,7 @@ static BitVector *rfilt_get_bv_i(Filter *filt, IndexReader *ir)
219
219
  return bv;
220
220
  }
221
221
 
222
- static ulong rfilt_hash(Filter *filt)
222
+ static unsigned long rfilt_hash(Filter *filt)
223
223
  {
224
224
  return range_hash(RF(filt)->range);
225
225
  }
@@ -278,7 +278,7 @@ static Query *rq_rewrite(Query *self, IndexReader *ir)
278
278
  return csq_new_nr(filter);
279
279
  }
280
280
 
281
- static ulong rq_hash(Query *self)
281
+ static unsigned long rq_hash(Query *self)
282
282
  {
283
283
  return range_hash(RQ(self)->range);
284
284
  }
data/ext/q_span.c CHANGED
@@ -17,7 +17,7 @@
17
17
 
18
18
  #define SpQ(query) ((SpanQuery *)(query))
19
19
 
20
- static ulong spanq_hash(Query *self)
20
+ static unsigned long spanq_hash(Query *self)
21
21
  {
22
22
  return str_hash(SpQ(self)->field);
23
23
  }
@@ -1355,7 +1355,7 @@ static HashSet *spantq_get_terms(Query *self)
1355
1355
  return terms;
1356
1356
  }
1357
1357
 
1358
- static ulong spantq_hash(Query *self)
1358
+ static unsigned long spantq_hash(Query *self)
1359
1359
  {
1360
1360
  return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
1361
1361
  }
@@ -1430,7 +1430,7 @@ static void spanfq_destroy_i(Query *self)
1430
1430
  spanq_destroy_i(self);
1431
1431
  }
1432
1432
 
1433
- static ulong spanfq_hash(Query *self)
1433
+ static unsigned long spanfq_hash(Query *self)
1434
1434
  {
1435
1435
  return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
1436
1436
  ^ SpFQ(self)->end;
@@ -1573,10 +1573,10 @@ static void spanoq_destroy_i(Query *self)
1573
1573
  spanq_destroy_i(self);
1574
1574
  }
1575
1575
 
1576
- static ulong spanoq_hash(Query *self)
1576
+ static unsigned long spanoq_hash(Query *self)
1577
1577
  {
1578
1578
  int i;
1579
- ulong hash = spanq_hash(self);
1579
+ unsigned long hash = spanq_hash(self);
1580
1580
  SpanOrQuery *soq = SpOQ(self);
1581
1581
 
1582
1582
  for (i = 0; i < soq->c_cnt; i++) {
@@ -1756,10 +1756,10 @@ static void spannq_destroy(Query *self)
1756
1756
  spanq_destroy_i(self);
1757
1757
  }
1758
1758
 
1759
- static ulong spannq_hash(Query *self)
1759
+ static unsigned long spannq_hash(Query *self)
1760
1760
  {
1761
1761
  int i;
1762
- ulong hash = spanq_hash(self);
1762
+ unsigned long hash = spanq_hash(self);
1763
1763
  SpanNearQuery *snq = SpNQ(self);
1764
1764
 
1765
1765
  for (i = 0; i < snq->c_cnt; i++) {
@@ -1907,7 +1907,7 @@ static void spanxq_destroy(Query *self)
1907
1907
  spanq_destroy_i(self);
1908
1908
  }
1909
1909
 
1910
- static ulong spanxq_hash(Query *self)
1910
+ static unsigned long spanxq_hash(Query *self)
1911
1911
  {
1912
1912
  SpanNotQuery *sxq = SpXQ(self);
1913
1913
  return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
data/ext/q_term.c CHANGED
@@ -289,7 +289,7 @@ static void tq_extract_terms(Query *self, HashSet *terms)
289
289
  hs_add(terms, term_new(TQ(self)->field, TQ(self)->term));
290
290
  }
291
291
 
292
- static ulong tq_hash(Query *self)
292
+ static unsigned long tq_hash(Query *self)
293
293
  {
294
294
  return str_hash(TQ(self)->term) ^ str_hash(TQ(self)->field);
295
295
  }
data/ext/q_wildcard.c CHANGED
@@ -140,7 +140,7 @@ static void wcq_destroy(Query *self)
140
140
  q_destroy_i(self);
141
141
  }
142
142
 
143
- static ulong wcq_hash(Query *self)
143
+ static unsigned long wcq_hash(Query *self)
144
144
  {
145
145
  return str_hash(WCQ(self)->field) ^ str_hash(WCQ(self)->pattern);
146
146
  }
data/ext/r_analysis.c CHANGED
@@ -4,6 +4,8 @@
4
4
  #include "ferret.h"
5
5
  #include "analysis.h"
6
6
 
7
+ static char *frt_locale = NULL;
8
+
7
9
  static VALUE mAnalysis;
8
10
 
9
11
  static VALUE cToken;
@@ -808,6 +810,7 @@ static VALUE
808
810
  frt_letter_tokenizer_init(int argc, VALUE *argv, VALUE self)
809
811
  {
810
812
  TS_ARGS(false);
813
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
811
814
  return get_wrapped_ts(self, rstr, mb_letter_tokenizer_new(lower));
812
815
  }
813
816
 
@@ -836,6 +839,7 @@ static VALUE
836
839
  frt_whitespace_tokenizer_init(int argc, VALUE *argv, VALUE self)
837
840
  {
838
841
  TS_ARGS(false);
842
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
839
843
  return get_wrapped_ts(self, rstr, mb_whitespace_tokenizer_new(lower));
840
844
  }
841
845
 
@@ -863,6 +867,7 @@ frt_a_standard_tokenizer_init(VALUE self, VALUE rstr)
863
867
  static VALUE
864
868
  frt_standard_tokenizer_init(VALUE self, VALUE rstr)
865
869
  {
870
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
866
871
  return get_wrapped_ts(self, rstr, mb_standard_tokenizer_new());
867
872
  }
868
873
 
@@ -902,6 +907,7 @@ static VALUE
902
907
  frt_lowercase_filter_init(VALUE self, VALUE rsub_ts)
903
908
  {
904
909
  TokenStream *ts = frt_get_cwrapped_rts(rsub_ts);
910
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
905
911
  ts = mb_lowercase_filter_new(ts);
906
912
  object_add(&(TkFilt(ts)->sub_ts), rsub_ts);
907
913
 
@@ -1150,6 +1156,7 @@ frt_white_space_analyzer_init(int argc, VALUE *argv, VALUE self)
1150
1156
  {
1151
1157
  Analyzer *a;
1152
1158
  GET_LOWER(false);
1159
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
1153
1160
  a = mb_whitespace_analyzer_new(lower);
1154
1161
  Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
1155
1162
  object_add(a, self);
@@ -1192,6 +1199,7 @@ frt_letter_analyzer_init(int argc, VALUE *argv, VALUE self)
1192
1199
  {
1193
1200
  Analyzer *a;
1194
1201
  GET_LOWER(true);
1202
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
1195
1203
  a = mb_letter_analyzer_new(lower);
1196
1204
  Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
1197
1205
  object_add(a, self);
@@ -1263,6 +1271,7 @@ frt_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
1263
1271
  bool lower;
1264
1272
  VALUE rlower, rstop_words;
1265
1273
  Analyzer *a;
1274
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
1266
1275
  rb_scan_args(argc, argv, "02", &rstop_words, &rlower);
1267
1276
  lower = ((rlower == Qnil) ? true : RTEST(rlower));
1268
1277
  if (rstop_words != Qnil) {
@@ -1390,8 +1399,6 @@ frt_re_analyzer_init(int argc, VALUE *argv, VALUE self)
1390
1399
  *
1391
1400
  ****************************************************************************/
1392
1401
 
1393
- static char *frt_locale = NULL;
1394
-
1395
1402
  /*
1396
1403
  * call-seq:
1397
1404
  * Ferret.locale -> locale_str
@@ -1415,7 +1422,7 @@ static VALUE frt_get_locale(VALUE self, VALUE locale)
1415
1422
  static VALUE frt_set_locale(VALUE self, VALUE locale)
1416
1423
  {
1417
1424
  char *l = ((locale == Qnil) ? NULL : RSTRING(rb_obj_as_string(locale))->ptr);
1418
- frt_locale = setlocale(LC_ALL, l);
1425
+ frt_locale = setlocale(LC_CTYPE, l);
1419
1426
  return frt_locale ? rb_str_new2(frt_locale) : Qnil;
1420
1427
  }
1421
1428
 
@@ -2188,7 +2195,6 @@ Init_Analysis(void)
2188
2195
  rb_define_const(mFerret, "OBJECT_SPACE", object_space);
2189
2196
 
2190
2197
  /*** * * Locale stuff * * ***/
2191
- frt_locale = setlocale(LC_ALL, "");
2192
2198
  rb_define_singleton_method(mFerret, "locale=", frt_set_locale, 1);
2193
2199
  rb_define_singleton_method(mFerret, "locale", frt_get_locale, 0);
2194
2200
 
data/ext/r_index.c CHANGED
@@ -240,9 +240,11 @@ frt_fi_is_indexed(VALUE self)
240
240
  * fi.tokenized? -> bool
241
241
  *
242
242
  * Return true if the field is tokenized. Tokenizing is the process of
243
- * breaking the field up into tokens. That is "the quick brown fox" becomes
244
- * ["the", "quick", "brown", "fox"] This is only possible if the field in
245
- * indexed.
243
+ * breaking the field up into tokens. That is "the quick brown fox" becomes:
244
+ *
245
+ * ["the", "quick", "brown", "fox"]
246
+ *
247
+ * A field can only be tokenized if it is indexed.
246
248
  */
247
249
  static VALUE
248
250
  frt_fi_is_tokenized(VALUE self)
@@ -595,7 +597,8 @@ frt_fis_create_index(VALUE self, VALUE rdir)
595
597
  * call-seq:
596
598
  * fis.fields -> symbol array
597
599
  *
598
- * Return a list of the the field names (as symbols) in the index.
600
+ * Return a list of the field names (as symbols) of all the fieldcs in the
601
+ * index.
599
602
  */
600
603
  static VALUE
601
604
  frt_fis_get_fields(VALUE self)
@@ -609,6 +612,26 @@ frt_fis_get_fields(VALUE self)
609
612
  return rfield_names;
610
613
  }
611
614
 
615
+ /*
616
+ * call-seq:
617
+ * fis.tokenized_fields -> symbol array
618
+ *
619
+ * Return a list of the field names (as symbols) of all the tokenized fields
620
+ * in the index.
621
+ */
622
+ static VALUE
623
+ frt_fis_get_tk_fields(VALUE self)
624
+ {
625
+ FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
626
+ VALUE rfield_names = rb_ary_new();
627
+ int i;
628
+ for (i = 0; i < fis->size; i++) {
629
+ if (!fi_is_tokenized(fis->fields[i])) continue;
630
+ rb_ary_push(rfield_names, ID2SYM(rb_intern(fis->fields[i]->name)));
631
+ }
632
+ return rfield_names;
633
+ }
634
+
612
635
  /****************************************************************************
613
636
  *
614
637
  * TermEnum Methods
@@ -2375,7 +2398,7 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
2375
2398
 
2376
2399
  /*
2377
2400
  * call-seq:
2378
- * index_reader.field_names -> array of field-names
2401
+ * index_reader.fields -> array of field-names
2379
2402
  *
2380
2403
  * Returns an array of field names in the index. This can be used to pass to
2381
2404
  * the QueryParser so that the QueryParser knows how to expand the "*"
@@ -2383,7 +2406,7 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
2383
2406
  * gathered from the FieldInfos object.
2384
2407
  */
2385
2408
  static VALUE
2386
- frt_ir_field_names(VALUE self)
2409
+ frt_ir_fields(VALUE self)
2387
2410
  {
2388
2411
  IndexReader *ir = (IndexReader *)DATA_PTR(self);
2389
2412
  FieldInfos *fis = ir->fis;
@@ -2408,6 +2431,29 @@ frt_ir_field_infos(VALUE self)
2408
2431
  return frt_get_field_infos(ir->fis);
2409
2432
  }
2410
2433
 
2434
+ /*
2435
+ * call-seq:
2436
+ * index_reader.tokenized_fields -> array of field-names
2437
+ *
2438
+ * Returns an array of field names of all of the tokenized fields in the
2439
+ * index. This can be used to pass to the QueryParser so that the QueryParser
2440
+ * knows how to expand the "*" wild-card to all fields in the index. A list
2441
+ * of field names can also be gathered from the FieldInfos object.
2442
+ */
2443
+ static VALUE
2444
+ frt_ir_tk_fields(VALUE self)
2445
+ {
2446
+ IndexReader *ir = (IndexReader *)DATA_PTR(self);
2447
+ FieldInfos *fis = ir->fis;
2448
+ VALUE rfield_names = rb_ary_new();
2449
+ int i;
2450
+ for (i = 0; i < fis->size; i++) {
2451
+ if (!fi_is_tokenized(fis->fields[i])) continue;
2452
+ rb_ary_push(rfield_names, ID2SYM(rb_intern(fis->fields[i]->name)));
2453
+ }
2454
+ return rfield_names;
2455
+ }
2456
+
2411
2457
  /****************************************************************************
2412
2458
  *
2413
2459
  * Init Functions
@@ -2515,6 +2561,16 @@ frt_ir_field_infos(VALUE self)
2515
2561
  * | |
2516
2562
  * | :with_positions_offsets | Store term-vectors with
2517
2563
  * | (default) | positions and offsets.
2564
+ * -------------|-------------------------|------------------------------
2565
+ * :boost | Float | The boost property is used to
2566
+ * | | set the default boost for a
2567
+ * | | field. This boost value will
2568
+ * | | used for all instances of the
2569
+ * | | field in the index unless
2570
+ * | | otherwise specified when you
2571
+ * | | create the field. All values
2572
+ * | | should be positive.
2573
+ * | |
2518
2574
  *
2519
2575
  * == Examples
2520
2576
  *
@@ -2625,7 +2681,8 @@ Init_FieldInfos(void)
2625
2681
  rb_define_method(cFieldInfos, "to_s", frt_fis_to_s, 0);
2626
2682
  rb_define_method(cFieldInfos, "create_index",
2627
2683
  frt_fis_create_index, 1);
2628
- rb_define_method(cFieldInfos, "fields", frt_fis_get_fields, -1);
2684
+ rb_define_method(cFieldInfos, "fields", frt_fis_get_fields, 0);
2685
+ rb_define_method(cFieldInfos, "tokenized_fields", frt_fis_get_tk_fields, 0);
2629
2686
  }
2630
2687
 
2631
2688
  /*
@@ -2717,21 +2774,33 @@ Init_TermDocEnum(void)
2717
2774
  rb_define_method(cTermDocEnum, "skip_to", frt_tde_skip_to, 1);
2718
2775
  }
2719
2776
 
2777
+ /* rdochack
2778
+ cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
2779
+ */
2780
+
2720
2781
  /*
2721
2782
  * Document-class: Ferret::Index::TermVector::TVOffsets
2722
2783
  *
2723
2784
  * == Summary
2724
2785
  *
2725
2786
  * Holds the start and end byte-offsets of a term in a field. For example, if
2726
- * the field was "the quick brown fox" then the start and end offsets of
2727
- * ["the", "quick", "brown", "fox"] would be [(0,3), (4,9), (10,15), (16,19)]
2728
- * respectively. See the Analysis module for more information on setting the
2729
- * offsets.
2787
+ * the field was "the quick brown fox" then the start and end offsets of:
2788
+ *
2789
+ * ["the", "quick", "brown", "fox"]
2790
+ *
2791
+ * Would be:
2792
+ *
2793
+ * [(0,3), (4,9), (10,15), (16,19)]
2794
+ *
2795
+ * See the Analysis module for more information on setting the offsets.
2730
2796
  */
2731
2797
  static void
2732
2798
  Init_TVOffsets(void)
2733
2799
  {
2734
2800
  const char *tv_offsets_class = "TVOffsets";
2801
+ /* rdochack
2802
+ cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
2803
+ */
2735
2804
  cTVOffsets = rb_struct_define(tv_offsets_class, "start", "end", NULL);
2736
2805
  rb_set_class_path(cTVOffsets, cTermVector, tv_offsets_class);
2737
2806
  rb_const_set(mIndex, rb_intern(tv_offsets_class), cTVOffsets);
@@ -2756,6 +2825,9 @@ static void
2756
2825
  Init_TVTerm(void)
2757
2826
  {
2758
2827
  const char *tv_term_class = "TVTerm";
2828
+ /* rdochack
2829
+ cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject);
2830
+ */
2759
2831
  cTVTerm = rb_struct_define(tv_term_class, "text", "positions", NULL);
2760
2832
  rb_set_class_path(cTVTerm, cTermVector, tv_term_class);
2761
2833
  rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm);
@@ -2795,6 +2867,9 @@ static void
2795
2867
  Init_TermVector(void)
2796
2868
  {
2797
2869
  const char *tv_class = "TermVector";
2870
+ /* rdochack
2871
+ cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
2872
+ */
2798
2873
  cTermVector = rb_struct_define(tv_class,
2799
2874
  "field", "terms", "offsets", NULL);
2800
2875
  rb_set_class_path(cTermVector, mIndex, tv_class);
@@ -3108,8 +3183,10 @@ Init_IndexReader(void)
3108
3183
  rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 2);
3109
3184
  rb_define_method(cIndexReader, "terms", frt_ir_terms, 1);
3110
3185
  rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 2);
3111
- rb_define_method(cIndexReader, "field_names", frt_ir_field_names, 0);
3186
+ rb_define_method(cIndexReader, "fields", frt_ir_fields, 0);
3187
+ rb_define_method(cIndexReader, "field_names", frt_ir_fields, 0);
3112
3188
  rb_define_method(cIndexReader, "field_infos", frt_ir_field_infos, 0);
3189
+ rb_define_method(cIndexReader, "tokenized_fields", frt_ir_tk_fields, 0);
3113
3190
  }
3114
3191
 
3115
3192
  /* rdoc hack
data/ext/r_qparser.c CHANGED
@@ -6,7 +6,9 @@ VALUE cQueryParseException;
6
6
 
7
7
  extern VALUE sym_analyzer;
8
8
  static VALUE sym_wild_card_downcase;
9
+ static VALUE sym_fields;
9
10
  static VALUE sym_all_fields;
11
+ static VALUE sym_tkz_fields;
10
12
  static VALUE sym_default_field;
11
13
  static VALUE sym_validate_fields;
12
14
  static VALUE sym_or_default;
@@ -42,9 +44,12 @@ static HashSet *
42
44
  frt_get_fields(VALUE rfields)
43
45
  {
44
46
  VALUE rval;
45
- HashSet *fields = hs_new_str(&free);
47
+ HashSet *fields;
46
48
  char *s, *p, *str;
47
49
 
50
+ if (rfields == Qnil) return NULL;
51
+
52
+ fields = hs_new_str(&free);
48
53
  if (TYPE(rfields) == T_ARRAY) {
49
54
  int i;
50
55
  for (i = 0; i < RARRAY(rfields)->len; i++) {
@@ -87,9 +92,12 @@ frt_get_fields(VALUE rfields)
87
92
  * :wild_card_downcase:: Default: true. Specifies whether wild-card queries
88
93
  * should be downcased or not since they are not
89
94
  * passed through the parser
90
- * :all_fields:: Default: []. Lets the query parser know what
95
+ * :fields:: Default: []. Lets the query parser know what
91
96
  * fields are available for searching, particularly
92
97
  * when the "*" is specified as the search field
98
+ * :tokenized_fields:: Default: :fields. Lets the query parser know which
99
+ * fields are tokenized so it knows which fields to
100
+ * run the analyzer over.
93
101
  * :validate_fields:: Default: false. Set to true if you want an
94
102
  * exception to be raised if there is an attempt to
95
103
  * search a non-existent field
@@ -118,6 +126,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
118
126
  bool has_options = false;
119
127
 
120
128
  HashSet *all_fields = NULL;
129
+ HashSet *tkz_fields = NULL;
121
130
  HashSet *def_fields = NULL;
122
131
  QParser *qp;
123
132
 
@@ -133,6 +142,12 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
133
142
  if (Qnil != (rval = rb_hash_aref(roptions, sym_all_fields))) {
134
143
  all_fields = frt_get_fields(rval);
135
144
  }
145
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_fields))) {
146
+ all_fields = frt_get_fields(rval);
147
+ }
148
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_tkz_fields))) {
149
+ tkz_fields = frt_get_fields(rval);
150
+ }
136
151
  } else {
137
152
  def_fields = frt_get_fields(roptions);
138
153
  }
@@ -145,7 +160,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
145
160
  analyzer = mb_standard_analyzer_new(true);
146
161
  }
147
162
 
148
- qp = qp_new(all_fields, def_fields, analyzer);
163
+ qp = qp_new(all_fields, def_fields, tkz_fields, analyzer);
149
164
  qp->allow_any_fields = true;
150
165
  qp->clean_str = true;
151
166
  /* handle options */
@@ -255,6 +270,48 @@ frt_qp_set_fields(VALUE self, VALUE rfields)
255
270
  return self;
256
271
  }
257
272
 
273
+ /*
274
+ * call-seq:
275
+ * query_parser.tokenized_fields -> Array of Symbols
276
+ *
277
+ * Returns the list of all tokenized_fields that the QueryParser knows about.
278
+ */
279
+ static VALUE
280
+ frt_qp_get_tkz_fields(VALUE self)
281
+ {
282
+ GET_QP;
283
+ int i;
284
+ HashSet *fields = qp->tokenized_fields;
285
+ if (fields) {
286
+ VALUE rfields = rb_ary_new();
287
+
288
+ for (i = 0; i < fields->size; i++) {
289
+ rb_ary_push(rfields, ID2SYM(rb_intern((char *)fields->elems[i])));
290
+ }
291
+
292
+ return rfields;
293
+ }
294
+ else {
295
+ return Qnil;
296
+ }
297
+ }
298
+
299
+ /*
300
+ * call-seq:
301
+ * query_parser.tokenized_fields = fields -> self
302
+ *
303
+ * Set the list of tokenized_fields. These tokenized_fields are tokenized in
304
+ * the queries. If this is set to Qnil then all fields will be tokenized.
305
+ */
306
+ static VALUE
307
+ frt_qp_set_tkz_fields(VALUE self, VALUE rfields)
308
+ {
309
+ GET_QP;
310
+ if (qp->tokenized_fields) hs_destroy(qp->tokenized_fields);
311
+ qp->tokenized_fields = frt_get_fields(rfields);
312
+ return self;
313
+ }
314
+
258
315
  /****************************************************************************
259
316
  *
260
317
  * Init function
@@ -483,7 +540,9 @@ Init_QueryParser(void)
483
540
  {
484
541
  /* hash keys */
485
542
  sym_wild_card_downcase = ID2SYM(rb_intern("wild_card_downcase"));
486
- sym_all_fields = ID2SYM(rb_intern("fields"));
543
+ sym_fields = ID2SYM(rb_intern("fields"));
544
+ sym_all_fields = ID2SYM(rb_intern("all_fields"));
545
+ sym_tkz_fields = ID2SYM(rb_intern("tokenized_fields"));
487
546
  sym_default_field = ID2SYM(rb_intern("default_field"));
488
547
  sym_validate_fields = ID2SYM(rb_intern("validate_fields"));
489
548
  sym_or_default = ID2SYM(rb_intern("or_default"));
@@ -500,6 +559,10 @@ Init_QueryParser(void)
500
559
  rb_define_method(cQueryParser, "parse", frt_qp_parse, 1);
501
560
  rb_define_method(cQueryParser, "fields", frt_qp_get_fields, 0);
502
561
  rb_define_method(cQueryParser, "fields=", frt_qp_set_fields, 1);
562
+ rb_define_method(cQueryParser, "tokenized_fields",
563
+ frt_qp_get_tkz_fields, 0);
564
+ rb_define_method(cQueryParser, "tokenized_fields=",
565
+ frt_qp_set_tkz_fields, 1);
503
566
 
504
567
  Init_QueryParseException();
505
568
  }