ferret 0.10.6 → 0.10.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/ext/analysis.c +136 -107
  2. data/ext/analysis.h +4 -0
  3. data/ext/bitvector.c +2 -2
  4. data/ext/bitvector.h +1 -1
  5. data/ext/compound_io.c +4 -4
  6. data/ext/defines.h +0 -2
  7. data/ext/filter.c +3 -3
  8. data/ext/fs_store.c +4 -4
  9. data/ext/hash.c +29 -18
  10. data/ext/hash.h +34 -16
  11. data/ext/hashset.c +6 -3
  12. data/ext/hashset.h +1 -1
  13. data/ext/index.c +22 -20
  14. data/ext/q_boolean.c +3 -3
  15. data/ext/q_const_score.c +1 -1
  16. data/ext/q_fuzzy.c +1 -1
  17. data/ext/q_match_all.c +1 -1
  18. data/ext/q_multi_term.c +2 -2
  19. data/ext/q_parser.c +21 -6
  20. data/ext/q_phrase.c +2 -2
  21. data/ext/q_prefix.c +1 -1
  22. data/ext/q_range.c +3 -3
  23. data/ext/q_span.c +8 -8
  24. data/ext/q_term.c +1 -1
  25. data/ext/q_wildcard.c +1 -1
  26. data/ext/r_analysis.c +10 -4
  27. data/ext/r_index.c +89 -12
  28. data/ext/r_qparser.c +67 -4
  29. data/ext/r_search.c +11 -1
  30. data/ext/r_store.c +51 -35
  31. data/ext/ram_store.c +18 -18
  32. data/ext/search.c +1 -1
  33. data/ext/search.h +25 -23
  34. data/ext/similarity.c +1 -1
  35. data/ext/sort.c +1 -1
  36. data/ext/store.c +22 -3
  37. data/ext/store.h +8 -2
  38. data/lib/ferret/index.rb +14 -4
  39. data/lib/ferret_version.rb +1 -1
  40. data/test/test_helper.rb +3 -0
  41. data/test/unit/analysis/tc_analyzer.rb +5 -5
  42. data/test/unit/analysis/tc_token_stream.rb +3 -3
  43. data/test/unit/index/tc_index_writer.rb +1 -1
  44. data/test/unit/query_parser/tc_query_parser.rb +7 -5
  45. data/test/unit/search/tc_filter.rb +1 -1
  46. data/test/unit/search/tc_fuzzy_query.rb +1 -1
  47. data/test/unit/search/tc_index_searcher.rb +1 -1
  48. data/test/unit/search/tc_multi_searcher.rb +1 -1
  49. data/test/unit/search/tc_search_and_sort.rb +1 -1
  50. data/test/unit/search/tc_spans.rb +1 -1
  51. metadata +4 -3
data/ext/q_phrase.c CHANGED
@@ -1015,11 +1015,11 @@ static Query *phq_rewrite(Query *self, IndexReader *ir)
1015
1015
  }
1016
1016
  }
1017
1017
 
1018
- static ulong phq_hash(Query *self)
1018
+ static unsigned long phq_hash(Query *self)
1019
1019
  {
1020
1020
  int i, j;
1021
1021
  PhraseQuery *phq = PhQ(self);
1022
- ulong hash = str_hash(phq->field);
1022
+ unsigned long hash = str_hash(phq->field);
1023
1023
  for (i = 0; i < phq->pos_cnt; i++) {
1024
1024
  char **terms = phq->positions[i].terms;
1025
1025
  for (j = ary_size(terms) - 1; j >= 0; j--) {
data/ext/q_prefix.c CHANGED
@@ -69,7 +69,7 @@ static void prq_destroy(Query *self)
69
69
  q_destroy_i(self);
70
70
  }
71
71
 
72
- static ulong prq_hash(Query *self)
72
+ static unsigned long prq_hash(Query *self)
73
73
  {
74
74
  return str_hash(PfxQ(self)->field) ^ str_hash(PfxQ(self)->prefix);
75
75
  }
data/ext/q_range.c CHANGED
@@ -74,7 +74,7 @@ static void range_destroy(Range *range)
74
74
  free(range);
75
75
  }
76
76
 
77
- static ulong range_hash(Range *filt)
77
+ static unsigned long range_hash(Range *filt)
78
78
  {
79
79
  return filt->include_lower | (filt->include_upper << 1)
80
80
  | ((str_hash(filt->field)
@@ -219,7 +219,7 @@ static BitVector *rfilt_get_bv_i(Filter *filt, IndexReader *ir)
219
219
  return bv;
220
220
  }
221
221
 
222
- static ulong rfilt_hash(Filter *filt)
222
+ static unsigned long rfilt_hash(Filter *filt)
223
223
  {
224
224
  return range_hash(RF(filt)->range);
225
225
  }
@@ -278,7 +278,7 @@ static Query *rq_rewrite(Query *self, IndexReader *ir)
278
278
  return csq_new_nr(filter);
279
279
  }
280
280
 
281
- static ulong rq_hash(Query *self)
281
+ static unsigned long rq_hash(Query *self)
282
282
  {
283
283
  return range_hash(RQ(self)->range);
284
284
  }
data/ext/q_span.c CHANGED
@@ -17,7 +17,7 @@
17
17
 
18
18
  #define SpQ(query) ((SpanQuery *)(query))
19
19
 
20
- static ulong spanq_hash(Query *self)
20
+ static unsigned long spanq_hash(Query *self)
21
21
  {
22
22
  return str_hash(SpQ(self)->field);
23
23
  }
@@ -1355,7 +1355,7 @@ static HashSet *spantq_get_terms(Query *self)
1355
1355
  return terms;
1356
1356
  }
1357
1357
 
1358
- static ulong spantq_hash(Query *self)
1358
+ static unsigned long spantq_hash(Query *self)
1359
1359
  {
1360
1360
  return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
1361
1361
  }
@@ -1430,7 +1430,7 @@ static void spanfq_destroy_i(Query *self)
1430
1430
  spanq_destroy_i(self);
1431
1431
  }
1432
1432
 
1433
- static ulong spanfq_hash(Query *self)
1433
+ static unsigned long spanfq_hash(Query *self)
1434
1434
  {
1435
1435
  return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
1436
1436
  ^ SpFQ(self)->end;
@@ -1573,10 +1573,10 @@ static void spanoq_destroy_i(Query *self)
1573
1573
  spanq_destroy_i(self);
1574
1574
  }
1575
1575
 
1576
- static ulong spanoq_hash(Query *self)
1576
+ static unsigned long spanoq_hash(Query *self)
1577
1577
  {
1578
1578
  int i;
1579
- ulong hash = spanq_hash(self);
1579
+ unsigned long hash = spanq_hash(self);
1580
1580
  SpanOrQuery *soq = SpOQ(self);
1581
1581
 
1582
1582
  for (i = 0; i < soq->c_cnt; i++) {
@@ -1756,10 +1756,10 @@ static void spannq_destroy(Query *self)
1756
1756
  spanq_destroy_i(self);
1757
1757
  }
1758
1758
 
1759
- static ulong spannq_hash(Query *self)
1759
+ static unsigned long spannq_hash(Query *self)
1760
1760
  {
1761
1761
  int i;
1762
- ulong hash = spanq_hash(self);
1762
+ unsigned long hash = spanq_hash(self);
1763
1763
  SpanNearQuery *snq = SpNQ(self);
1764
1764
 
1765
1765
  for (i = 0; i < snq->c_cnt; i++) {
@@ -1907,7 +1907,7 @@ static void spanxq_destroy(Query *self)
1907
1907
  spanq_destroy_i(self);
1908
1908
  }
1909
1909
 
1910
- static ulong spanxq_hash(Query *self)
1910
+ static unsigned long spanxq_hash(Query *self)
1911
1911
  {
1912
1912
  SpanNotQuery *sxq = SpXQ(self);
1913
1913
  return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
data/ext/q_term.c CHANGED
@@ -289,7 +289,7 @@ static void tq_extract_terms(Query *self, HashSet *terms)
289
289
  hs_add(terms, term_new(TQ(self)->field, TQ(self)->term));
290
290
  }
291
291
 
292
- static ulong tq_hash(Query *self)
292
+ static unsigned long tq_hash(Query *self)
293
293
  {
294
294
  return str_hash(TQ(self)->term) ^ str_hash(TQ(self)->field);
295
295
  }
data/ext/q_wildcard.c CHANGED
@@ -140,7 +140,7 @@ static void wcq_destroy(Query *self)
140
140
  q_destroy_i(self);
141
141
  }
142
142
 
143
- static ulong wcq_hash(Query *self)
143
+ static unsigned long wcq_hash(Query *self)
144
144
  {
145
145
  return str_hash(WCQ(self)->field) ^ str_hash(WCQ(self)->pattern);
146
146
  }
data/ext/r_analysis.c CHANGED
@@ -4,6 +4,8 @@
4
4
  #include "ferret.h"
5
5
  #include "analysis.h"
6
6
 
7
+ static char *frt_locale = NULL;
8
+
7
9
  static VALUE mAnalysis;
8
10
 
9
11
  static VALUE cToken;
@@ -808,6 +810,7 @@ static VALUE
808
810
  frt_letter_tokenizer_init(int argc, VALUE *argv, VALUE self)
809
811
  {
810
812
  TS_ARGS(false);
813
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
811
814
  return get_wrapped_ts(self, rstr, mb_letter_tokenizer_new(lower));
812
815
  }
813
816
 
@@ -836,6 +839,7 @@ static VALUE
836
839
  frt_whitespace_tokenizer_init(int argc, VALUE *argv, VALUE self)
837
840
  {
838
841
  TS_ARGS(false);
842
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
839
843
  return get_wrapped_ts(self, rstr, mb_whitespace_tokenizer_new(lower));
840
844
  }
841
845
 
@@ -863,6 +867,7 @@ frt_a_standard_tokenizer_init(VALUE self, VALUE rstr)
863
867
  static VALUE
864
868
  frt_standard_tokenizer_init(VALUE self, VALUE rstr)
865
869
  {
870
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
866
871
  return get_wrapped_ts(self, rstr, mb_standard_tokenizer_new());
867
872
  }
868
873
 
@@ -902,6 +907,7 @@ static VALUE
902
907
  frt_lowercase_filter_init(VALUE self, VALUE rsub_ts)
903
908
  {
904
909
  TokenStream *ts = frt_get_cwrapped_rts(rsub_ts);
910
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
905
911
  ts = mb_lowercase_filter_new(ts);
906
912
  object_add(&(TkFilt(ts)->sub_ts), rsub_ts);
907
913
 
@@ -1150,6 +1156,7 @@ frt_white_space_analyzer_init(int argc, VALUE *argv, VALUE self)
1150
1156
  {
1151
1157
  Analyzer *a;
1152
1158
  GET_LOWER(false);
1159
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
1153
1160
  a = mb_whitespace_analyzer_new(lower);
1154
1161
  Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
1155
1162
  object_add(a, self);
@@ -1192,6 +1199,7 @@ frt_letter_analyzer_init(int argc, VALUE *argv, VALUE self)
1192
1199
  {
1193
1200
  Analyzer *a;
1194
1201
  GET_LOWER(true);
1202
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
1195
1203
  a = mb_letter_analyzer_new(lower);
1196
1204
  Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
1197
1205
  object_add(a, self);
@@ -1263,6 +1271,7 @@ frt_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
1263
1271
  bool lower;
1264
1272
  VALUE rlower, rstop_words;
1265
1273
  Analyzer *a;
1274
+ if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
1266
1275
  rb_scan_args(argc, argv, "02", &rstop_words, &rlower);
1267
1276
  lower = ((rlower == Qnil) ? true : RTEST(rlower));
1268
1277
  if (rstop_words != Qnil) {
@@ -1390,8 +1399,6 @@ frt_re_analyzer_init(int argc, VALUE *argv, VALUE self)
1390
1399
  *
1391
1400
  ****************************************************************************/
1392
1401
 
1393
- static char *frt_locale = NULL;
1394
-
1395
1402
  /*
1396
1403
  * call-seq:
1397
1404
  * Ferret.locale -> locale_str
@@ -1415,7 +1422,7 @@ static VALUE frt_get_locale(VALUE self, VALUE locale)
1415
1422
  static VALUE frt_set_locale(VALUE self, VALUE locale)
1416
1423
  {
1417
1424
  char *l = ((locale == Qnil) ? NULL : RSTRING(rb_obj_as_string(locale))->ptr);
1418
- frt_locale = setlocale(LC_ALL, l);
1425
+ frt_locale = setlocale(LC_CTYPE, l);
1419
1426
  return frt_locale ? rb_str_new2(frt_locale) : Qnil;
1420
1427
  }
1421
1428
 
@@ -2188,7 +2195,6 @@ Init_Analysis(void)
2188
2195
  rb_define_const(mFerret, "OBJECT_SPACE", object_space);
2189
2196
 
2190
2197
  /*** * * Locale stuff * * ***/
2191
- frt_locale = setlocale(LC_ALL, "");
2192
2198
  rb_define_singleton_method(mFerret, "locale=", frt_set_locale, 1);
2193
2199
  rb_define_singleton_method(mFerret, "locale", frt_get_locale, 0);
2194
2200
 
data/ext/r_index.c CHANGED
@@ -240,9 +240,11 @@ frt_fi_is_indexed(VALUE self)
240
240
  * fi.tokenized? -> bool
241
241
  *
242
242
  * Return true if the field is tokenized. Tokenizing is the process of
243
- * breaking the field up into tokens. That is "the quick brown fox" becomes
244
- * ["the", "quick", "brown", "fox"] This is only possible if the field in
245
- * indexed.
243
+ * breaking the field up into tokens. That is "the quick brown fox" becomes:
244
+ *
245
+ * ["the", "quick", "brown", "fox"]
246
+ *
247
+ * A field can only be tokenized if it is indexed.
246
248
  */
247
249
  static VALUE
248
250
  frt_fi_is_tokenized(VALUE self)
@@ -595,7 +597,8 @@ frt_fis_create_index(VALUE self, VALUE rdir)
595
597
  * call-seq:
596
598
  * fis.fields -> symbol array
597
599
  *
598
- * Return a list of the the field names (as symbols) in the index.
600
+ * Return a list of the field names (as symbols) of all the fieldcs in the
601
+ * index.
599
602
  */
600
603
  static VALUE
601
604
  frt_fis_get_fields(VALUE self)
@@ -609,6 +612,26 @@ frt_fis_get_fields(VALUE self)
609
612
  return rfield_names;
610
613
  }
611
614
 
615
+ /*
616
+ * call-seq:
617
+ * fis.tokenized_fields -> symbol array
618
+ *
619
+ * Return a list of the field names (as symbols) of all the tokenized fields
620
+ * in the index.
621
+ */
622
+ static VALUE
623
+ frt_fis_get_tk_fields(VALUE self)
624
+ {
625
+ FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
626
+ VALUE rfield_names = rb_ary_new();
627
+ int i;
628
+ for (i = 0; i < fis->size; i++) {
629
+ if (!fi_is_tokenized(fis->fields[i])) continue;
630
+ rb_ary_push(rfield_names, ID2SYM(rb_intern(fis->fields[i]->name)));
631
+ }
632
+ return rfield_names;
633
+ }
634
+
612
635
  /****************************************************************************
613
636
  *
614
637
  * TermEnum Methods
@@ -2375,7 +2398,7 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
2375
2398
 
2376
2399
  /*
2377
2400
  * call-seq:
2378
- * index_reader.field_names -> array of field-names
2401
+ * index_reader.fields -> array of field-names
2379
2402
  *
2380
2403
  * Returns an array of field names in the index. This can be used to pass to
2381
2404
  * the QueryParser so that the QueryParser knows how to expand the "*"
@@ -2383,7 +2406,7 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
2383
2406
  * gathered from the FieldInfos object.
2384
2407
  */
2385
2408
  static VALUE
2386
- frt_ir_field_names(VALUE self)
2409
+ frt_ir_fields(VALUE self)
2387
2410
  {
2388
2411
  IndexReader *ir = (IndexReader *)DATA_PTR(self);
2389
2412
  FieldInfos *fis = ir->fis;
@@ -2408,6 +2431,29 @@ frt_ir_field_infos(VALUE self)
2408
2431
  return frt_get_field_infos(ir->fis);
2409
2432
  }
2410
2433
 
2434
+ /*
2435
+ * call-seq:
2436
+ * index_reader.tokenized_fields -> array of field-names
2437
+ *
2438
+ * Returns an array of field names of all of the tokenized fields in the
2439
+ * index. This can be used to pass to the QueryParser so that the QueryParser
2440
+ * knows how to expand the "*" wild-card to all fields in the index. A list
2441
+ * of field names can also be gathered from the FieldInfos object.
2442
+ */
2443
+ static VALUE
2444
+ frt_ir_tk_fields(VALUE self)
2445
+ {
2446
+ IndexReader *ir = (IndexReader *)DATA_PTR(self);
2447
+ FieldInfos *fis = ir->fis;
2448
+ VALUE rfield_names = rb_ary_new();
2449
+ int i;
2450
+ for (i = 0; i < fis->size; i++) {
2451
+ if (!fi_is_tokenized(fis->fields[i])) continue;
2452
+ rb_ary_push(rfield_names, ID2SYM(rb_intern(fis->fields[i]->name)));
2453
+ }
2454
+ return rfield_names;
2455
+ }
2456
+
2411
2457
  /****************************************************************************
2412
2458
  *
2413
2459
  * Init Functions
@@ -2515,6 +2561,16 @@ frt_ir_field_infos(VALUE self)
2515
2561
  * | |
2516
2562
  * | :with_positions_offsets | Store term-vectors with
2517
2563
  * | (default) | positions and offsets.
2564
+ * -------------|-------------------------|------------------------------
2565
+ * :boost | Float | The boost property is used to
2566
+ * | | set the default boost for a
2567
+ * | | field. This boost value will
2568
+ * | | used for all instances of the
2569
+ * | | field in the index unless
2570
+ * | | otherwise specified when you
2571
+ * | | create the field. All values
2572
+ * | | should be positive.
2573
+ * | |
2518
2574
  *
2519
2575
  * == Examples
2520
2576
  *
@@ -2625,7 +2681,8 @@ Init_FieldInfos(void)
2625
2681
  rb_define_method(cFieldInfos, "to_s", frt_fis_to_s, 0);
2626
2682
  rb_define_method(cFieldInfos, "create_index",
2627
2683
  frt_fis_create_index, 1);
2628
- rb_define_method(cFieldInfos, "fields", frt_fis_get_fields, -1);
2684
+ rb_define_method(cFieldInfos, "fields", frt_fis_get_fields, 0);
2685
+ rb_define_method(cFieldInfos, "tokenized_fields", frt_fis_get_tk_fields, 0);
2629
2686
  }
2630
2687
 
2631
2688
  /*
@@ -2717,21 +2774,33 @@ Init_TermDocEnum(void)
2717
2774
  rb_define_method(cTermDocEnum, "skip_to", frt_tde_skip_to, 1);
2718
2775
  }
2719
2776
 
2777
+ /* rdochack
2778
+ cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
2779
+ */
2780
+
2720
2781
  /*
2721
2782
  * Document-class: Ferret::Index::TermVector::TVOffsets
2722
2783
  *
2723
2784
  * == Summary
2724
2785
  *
2725
2786
  * Holds the start and end byte-offsets of a term in a field. For example, if
2726
- * the field was "the quick brown fox" then the start and end offsets of
2727
- * ["the", "quick", "brown", "fox"] would be [(0,3), (4,9), (10,15), (16,19)]
2728
- * respectively. See the Analysis module for more information on setting the
2729
- * offsets.
2787
+ * the field was "the quick brown fox" then the start and end offsets of:
2788
+ *
2789
+ * ["the", "quick", "brown", "fox"]
2790
+ *
2791
+ * Would be:
2792
+ *
2793
+ * [(0,3), (4,9), (10,15), (16,19)]
2794
+ *
2795
+ * See the Analysis module for more information on setting the offsets.
2730
2796
  */
2731
2797
  static void
2732
2798
  Init_TVOffsets(void)
2733
2799
  {
2734
2800
  const char *tv_offsets_class = "TVOffsets";
2801
+ /* rdochack
2802
+ cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
2803
+ */
2735
2804
  cTVOffsets = rb_struct_define(tv_offsets_class, "start", "end", NULL);
2736
2805
  rb_set_class_path(cTVOffsets, cTermVector, tv_offsets_class);
2737
2806
  rb_const_set(mIndex, rb_intern(tv_offsets_class), cTVOffsets);
@@ -2756,6 +2825,9 @@ static void
2756
2825
  Init_TVTerm(void)
2757
2826
  {
2758
2827
  const char *tv_term_class = "TVTerm";
2828
+ /* rdochack
2829
+ cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject);
2830
+ */
2759
2831
  cTVTerm = rb_struct_define(tv_term_class, "text", "positions", NULL);
2760
2832
  rb_set_class_path(cTVTerm, cTermVector, tv_term_class);
2761
2833
  rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm);
@@ -2795,6 +2867,9 @@ static void
2795
2867
  Init_TermVector(void)
2796
2868
  {
2797
2869
  const char *tv_class = "TermVector";
2870
+ /* rdochack
2871
+ cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
2872
+ */
2798
2873
  cTermVector = rb_struct_define(tv_class,
2799
2874
  "field", "terms", "offsets", NULL);
2800
2875
  rb_set_class_path(cTermVector, mIndex, tv_class);
@@ -3108,8 +3183,10 @@ Init_IndexReader(void)
3108
3183
  rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 2);
3109
3184
  rb_define_method(cIndexReader, "terms", frt_ir_terms, 1);
3110
3185
  rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 2);
3111
- rb_define_method(cIndexReader, "field_names", frt_ir_field_names, 0);
3186
+ rb_define_method(cIndexReader, "fields", frt_ir_fields, 0);
3187
+ rb_define_method(cIndexReader, "field_names", frt_ir_fields, 0);
3112
3188
  rb_define_method(cIndexReader, "field_infos", frt_ir_field_infos, 0);
3189
+ rb_define_method(cIndexReader, "tokenized_fields", frt_ir_tk_fields, 0);
3113
3190
  }
3114
3191
 
3115
3192
  /* rdoc hack
data/ext/r_qparser.c CHANGED
@@ -6,7 +6,9 @@ VALUE cQueryParseException;
6
6
 
7
7
  extern VALUE sym_analyzer;
8
8
  static VALUE sym_wild_card_downcase;
9
+ static VALUE sym_fields;
9
10
  static VALUE sym_all_fields;
11
+ static VALUE sym_tkz_fields;
10
12
  static VALUE sym_default_field;
11
13
  static VALUE sym_validate_fields;
12
14
  static VALUE sym_or_default;
@@ -42,9 +44,12 @@ static HashSet *
42
44
  frt_get_fields(VALUE rfields)
43
45
  {
44
46
  VALUE rval;
45
- HashSet *fields = hs_new_str(&free);
47
+ HashSet *fields;
46
48
  char *s, *p, *str;
47
49
 
50
+ if (rfields == Qnil) return NULL;
51
+
52
+ fields = hs_new_str(&free);
48
53
  if (TYPE(rfields) == T_ARRAY) {
49
54
  int i;
50
55
  for (i = 0; i < RARRAY(rfields)->len; i++) {
@@ -87,9 +92,12 @@ frt_get_fields(VALUE rfields)
87
92
  * :wild_card_downcase:: Default: true. Specifies whether wild-card queries
88
93
  * should be downcased or not since they are not
89
94
  * passed through the parser
90
- * :all_fields:: Default: []. Lets the query parser know what
95
+ * :fields:: Default: []. Lets the query parser know what
91
96
  * fields are available for searching, particularly
92
97
  * when the "*" is specified as the search field
98
+ * :tokenized_fields:: Default: :fields. Lets the query parser know which
99
+ * fields are tokenized so it knows which fields to
100
+ * run the analyzer over.
93
101
  * :validate_fields:: Default: false. Set to true if you want an
94
102
  * exception to be raised if there is an attempt to
95
103
  * search a non-existent field
@@ -118,6 +126,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
118
126
  bool has_options = false;
119
127
 
120
128
  HashSet *all_fields = NULL;
129
+ HashSet *tkz_fields = NULL;
121
130
  HashSet *def_fields = NULL;
122
131
  QParser *qp;
123
132
 
@@ -133,6 +142,12 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
133
142
  if (Qnil != (rval = rb_hash_aref(roptions, sym_all_fields))) {
134
143
  all_fields = frt_get_fields(rval);
135
144
  }
145
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_fields))) {
146
+ all_fields = frt_get_fields(rval);
147
+ }
148
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_tkz_fields))) {
149
+ tkz_fields = frt_get_fields(rval);
150
+ }
136
151
  } else {
137
152
  def_fields = frt_get_fields(roptions);
138
153
  }
@@ -145,7 +160,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
145
160
  analyzer = mb_standard_analyzer_new(true);
146
161
  }
147
162
 
148
- qp = qp_new(all_fields, def_fields, analyzer);
163
+ qp = qp_new(all_fields, def_fields, tkz_fields, analyzer);
149
164
  qp->allow_any_fields = true;
150
165
  qp->clean_str = true;
151
166
  /* handle options */
@@ -255,6 +270,48 @@ frt_qp_set_fields(VALUE self, VALUE rfields)
255
270
  return self;
256
271
  }
257
272
 
273
+ /*
274
+ * call-seq:
275
+ * query_parser.tokenized_fields -> Array of Symbols
276
+ *
277
+ * Returns the list of all tokenized_fields that the QueryParser knows about.
278
+ */
279
+ static VALUE
280
+ frt_qp_get_tkz_fields(VALUE self)
281
+ {
282
+ GET_QP;
283
+ int i;
284
+ HashSet *fields = qp->tokenized_fields;
285
+ if (fields) {
286
+ VALUE rfields = rb_ary_new();
287
+
288
+ for (i = 0; i < fields->size; i++) {
289
+ rb_ary_push(rfields, ID2SYM(rb_intern((char *)fields->elems[i])));
290
+ }
291
+
292
+ return rfields;
293
+ }
294
+ else {
295
+ return Qnil;
296
+ }
297
+ }
298
+
299
+ /*
300
+ * call-seq:
301
+ * query_parser.tokenized_fields = fields -> self
302
+ *
303
+ * Set the list of tokenized_fields. These tokenized_fields are tokenized in
304
+ * the queries. If this is set to Qnil then all fields will be tokenized.
305
+ */
306
+ static VALUE
307
+ frt_qp_set_tkz_fields(VALUE self, VALUE rfields)
308
+ {
309
+ GET_QP;
310
+ if (qp->tokenized_fields) hs_destroy(qp->tokenized_fields);
311
+ qp->tokenized_fields = frt_get_fields(rfields);
312
+ return self;
313
+ }
314
+
258
315
  /****************************************************************************
259
316
  *
260
317
  * Init function
@@ -483,7 +540,9 @@ Init_QueryParser(void)
483
540
  {
484
541
  /* hash keys */
485
542
  sym_wild_card_downcase = ID2SYM(rb_intern("wild_card_downcase"));
486
- sym_all_fields = ID2SYM(rb_intern("fields"));
543
+ sym_fields = ID2SYM(rb_intern("fields"));
544
+ sym_all_fields = ID2SYM(rb_intern("all_fields"));
545
+ sym_tkz_fields = ID2SYM(rb_intern("tokenized_fields"));
487
546
  sym_default_field = ID2SYM(rb_intern("default_field"));
488
547
  sym_validate_fields = ID2SYM(rb_intern("validate_fields"));
489
548
  sym_or_default = ID2SYM(rb_intern("or_default"));
@@ -500,6 +559,10 @@ Init_QueryParser(void)
500
559
  rb_define_method(cQueryParser, "parse", frt_qp_parse, 1);
501
560
  rb_define_method(cQueryParser, "fields", frt_qp_get_fields, 0);
502
561
  rb_define_method(cQueryParser, "fields=", frt_qp_set_fields, 1);
562
+ rb_define_method(cQueryParser, "tokenized_fields",
563
+ frt_qp_get_tkz_fields, 0);
564
+ rb_define_method(cQueryParser, "tokenized_fields=",
565
+ frt_qp_set_tkz_fields, 1);
503
566
 
504
567
  Init_QueryParseException();
505
568
  }