ferret 0.10.7 → 0.10.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/q_multi_term.c CHANGED
@@ -524,7 +524,7 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
524
524
  bptr += flen + 1;
525
525
  }
526
526
 
527
- *(bptr++) = '<';
527
+ *(bptr++) = '"';
528
528
  bt_pq_clone = pq_clone(boosted_terms);
529
529
  while ((bt = (BoostedTerm *)pq_pop(bt_pq_clone)) != NULL) {
530
530
  sprintf(bptr, "%s", bt->term);
@@ -540,10 +540,10 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
540
540
  }
541
541
  pq_destroy(bt_pq_clone);
542
542
 
543
- if (bptr[-1] == '<') {
543
+ if (bptr[-1] == '"') {
544
544
  bptr++; /* handle zero term case */
545
545
  }
546
- bptr[-1] = '>'; /* delete last '|' char */
546
+ bptr[-1] = '"'; /* delete last '|' char */
547
547
  bptr[ 0] = '\0';
548
548
 
549
549
  if (self->boost != 1.0) {
data/ext/q_parser.c CHANGED
@@ -1754,6 +1754,11 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
1754
1754
  char *bufp = buf;
1755
1755
  qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
1756
1756
 
1757
+ if (qp->dynbuf) {
1758
+ free(qp->dynbuf);
1759
+ qp->dynbuf = NULL;
1760
+ }
1761
+
1757
1762
  qp->qstrp--; /* need to back up one character */
1758
1763
 
1759
1764
  while (!strchr(not_word, (c=*qp->qstrp++))) {
@@ -1773,6 +1778,14 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
1773
1778
  default:
1774
1779
  *bufp++ = c;
1775
1780
  }
1781
+ /* we've exceeded the static buffer. switch to the dynamic
1782
+ one. */
1783
+ if (!qp->dynbuf && ((bufp - buf) == MAX_WORD_SIZE)) {
1784
+ qp->dynbuf = ALLOC_AND_ZERO_N(char, strlen(qp->qstr) + 1);
1785
+ strncpy(qp->dynbuf, buf, MAX_WORD_SIZE);
1786
+ buf = qp->dynbuf;
1787
+ bufp = buf + MAX_WORD_SIZE;
1788
+ }
1776
1789
  }
1777
1790
  qp->qstrp--;
1778
1791
  /* check for keywords. There are only four so we have a bit of a hack which
@@ -1843,7 +1856,7 @@ static int yyerror(QParser *qp, char const *msg)
1843
1856
  }
1844
1857
  mutex_unlock(&qp->mutex);
1845
1858
  RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
1846
- " was %se", buf, (char *)msg);
1859
+ " was %s", buf, (char *)msg);
1847
1860
  }
1848
1861
  return 0;
1849
1862
  }
@@ -2288,6 +2301,9 @@ void qp_destroy(QParser *self)
2288
2301
  if (self->tokenized_fields) {
2289
2302
  hs_destroy(self->tokenized_fields);
2290
2303
  }
2304
+ if (self->dynbuf) {
2305
+ free(self->dynbuf);
2306
+ }
2291
2307
  hs_destroy(self->all_fields);
2292
2308
  hs_destroy(self->fields_buf);
2293
2309
  h_destroy(self->field_cache);
@@ -2335,6 +2351,7 @@ QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
2335
2351
  self->analyzer = analyzer;
2336
2352
  self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
2337
2353
  self->buf_index = 0;
2354
+ self->dynbuf = 0;
2338
2355
  self->non_tokenizer = non_tokenizer_new();
2339
2356
  mutex_init(&self->mutex, NULL);
2340
2357
  return self;
data/ext/r_analysis.c CHANGED
@@ -1040,7 +1040,7 @@ static TokenStream *
1040
1040
  cwa_get_ts(Analyzer *a, char *field, char *text)
1041
1041
  {
1042
1042
  VALUE rts = rb_funcall(CWA(a)->ranalyzer, id_token_stream, 2,
1043
- rb_intern(field), rb_str_new2(text));
1043
+ ID2SYM(rb_intern(field)), rb_str_new2(text));
1044
1044
  return frt_get_cwrapped_rts(rts);
1045
1045
  }
1046
1046
 
@@ -1104,10 +1104,9 @@ frt_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstring)
1104
1104
  Analyzer *a;
1105
1105
  GET_A(a, self);
1106
1106
 
1107
- StringValue(rfield);
1108
1107
  StringValue(rstring);
1109
1108
 
1110
- ts = a_get_ts(a, RSTRING(rfield)->ptr, RSTRING(rstring)->ptr);
1109
+ ts = a_get_ts(a, frt_field(rfield), RSTRING(rstring)->ptr);
1111
1110
 
1112
1111
  /* Make sure that there is no entry already */
1113
1112
  object_set(&ts->text, rstring);
data/ext/r_search.c CHANGED
@@ -57,6 +57,8 @@ static VALUE sym_upper;
57
57
  static VALUE sym_lower;
58
58
  static VALUE sym_include_upper;
59
59
  static VALUE sym_include_lower;
60
+ static VALUE sym_upper_exclusive;
61
+ static VALUE sym_lower_exclusive;
60
62
 
61
63
  static VALUE sym_less_than;
62
64
  static VALUE sym_less_than_or_equal_to;
@@ -170,6 +172,12 @@ frt_get_td(TopDocs *td)
170
172
  return rtop_docs;
171
173
  }
172
174
 
175
+ /*
176
+ * call-seq:
177
+ * top_doc.to_s -> string
178
+ *
179
+ * Returns a string represention of the top_doc in readable format.
180
+ */
173
181
  static VALUE
174
182
  frt_td_to_s(VALUE self)
175
183
  {
@@ -180,7 +188,7 @@ frt_td_to_s(VALUE self)
180
188
  char *s = str;
181
189
  VALUE rstr;
182
190
 
183
- sprintf(s, "TopDocs: totalhits = %d, max_score = %f [\n",
191
+ sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
184
192
  FIX2INT(rb_funcall(self, id_total_hits, 0)),
185
193
  NUM2DBL(rb_funcall(self, id_max_score, 0)));
186
194
  s += strlen(s);
@@ -900,6 +908,14 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
900
908
  *uterm = StringValuePtr(v);
901
909
  *include_upper = true;
902
910
  }
911
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
912
+ *lterm = StringValuePtr(v);
913
+ *include_lower = false;
914
+ }
915
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
916
+ *uterm = StringValuePtr(v);
917
+ *include_upper = false;
918
+ }
903
919
  if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
904
920
  *include_lower = RTEST(v);
905
921
  }
@@ -956,6 +972,8 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
956
972
  * q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
957
973
  * # is equivalent to
958
974
  * q = RangeQuery.new(:date, :< => "200501")
975
+ * # is equivalent to
976
+ * q = RangeQuery.new(:date, :lower_exclusive => "200501")
959
977
  *
960
978
  * q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
961
979
  * # is equivalent to
@@ -993,9 +1011,15 @@ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
993
1011
  * the query it will do anything of value. See PhraseQuery#add_term.
994
1012
  */
995
1013
  static VALUE
996
- frt_phq_init(VALUE self, VALUE rfield)
1014
+ frt_phq_init(int argc, VALUE *argv, VALUE self)
997
1015
  {
998
- Query *q = phq_new(frt_field(rfield));
1016
+ VALUE rfield, rslop;
1017
+ Query *q;
1018
+ rb_scan_args(argc, argv, "11", &rfield, &rslop);
1019
+ q = phq_new(frt_field(rfield));
1020
+ if (argc == 2) {
1021
+ ((PhraseQuery *)q)->slop = FIX2INT(rslop);
1022
+ }
999
1023
  Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1000
1024
  object_add(q, self);
1001
1025
  return self;
@@ -1715,6 +1739,8 @@ frt_f_to_s(VALUE self)
1715
1739
  * f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
1716
1740
  * # is equivalent to
1717
1741
  * f = RangeFilter.new(:date, :< => "200501")
1742
+ * # is equivalent to
1743
+ * f = RangeFilter.new(:date, :lower_exclusive => "200501")
1718
1744
  *
1719
1745
  * f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
1720
1746
  * # is equivalent to
@@ -2991,6 +3017,8 @@ Init_RangeQuery(void)
2991
3017
  {
2992
3018
  sym_upper = ID2SYM(rb_intern("upper"));
2993
3019
  sym_lower = ID2SYM(rb_intern("lower"));
3020
+ sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
3021
+ sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
2994
3022
  sym_include_upper = ID2SYM(rb_intern("include_upper"));
2995
3023
  sym_include_lower = ID2SYM(rb_intern("include_lower"));
2996
3024
 
@@ -3074,7 +3102,7 @@ Init_PhraseQuery(void)
3074
3102
  cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
3075
3103
  rb_define_alloc_func(cPhraseQuery, frt_data_alloc);
3076
3104
 
3077
- rb_define_method(cPhraseQuery, "initialize", frt_phq_init, 1);
3105
+ rb_define_method(cPhraseQuery, "initialize", frt_phq_init, -1);
3078
3106
  rb_define_method(cPhraseQuery, "add_term", frt_phq_add, -1);
3079
3107
  rb_define_method(cPhraseQuery, "<<", frt_phq_add, -1);
3080
3108
  rb_define_method(cPhraseQuery, "slop", frt_phq_get_slop, 0);
data/ext/search.h CHANGED
@@ -819,6 +819,7 @@ typedef struct QParser
819
819
  char *qstr;
820
820
  char *qstrp;
821
821
  char buf[QP_CONC_WORDS][MAX_WORD_SIZE];
822
+ char *dynbuf;
822
823
  int buf_index;
823
824
  HashTable *field_cache;
824
825
  HashSet *fields;
data/lib/ferret.rb CHANGED
@@ -26,3 +26,4 @@ require 'ferret_ext'
26
26
  require 'ferret_version'
27
27
  require 'ferret/document'
28
28
  require 'ferret/index'
29
+ require 'ferret/field_infos'
@@ -0,0 +1,42 @@
1
+ require 'yaml'
2
+
3
+ class Ferret::Index::FieldInfos
4
+ # Load FieldInfos from a YAML file. The YAML file should look something like
5
+ # this:
6
+ # default:
7
+ # store: :yes
8
+ # index: :yes
9
+ # term_vector: :no
10
+ #
11
+ # fields:
12
+ # id:
13
+ # index: :untokenized
14
+ # term_vector: :no
15
+ #
16
+ # title:
17
+ # boost: 20.0
18
+ # term_vector: :no
19
+ #
20
+ # content:
21
+ # term_vector: :with_positions_offsets
22
+ #
23
+ def self.load(yaml_str)
24
+ info = YAML.load(yaml_str)
25
+ convert_strings_to_symbols(info)
26
+ fis = FieldInfos.new(info[:default])
27
+ fields = info[:fields]
28
+ fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
29
+ fis
30
+ end
31
+
32
+ private
33
+ def self.convert_strings_to_symbols(hash)
34
+ hash.keys.each do |key|
35
+ convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
36
+ if key.is_a?(String)
37
+ hash[key.intern] = hash[key]
38
+ hash.delete(key)
39
+ end
40
+ end
41
+ end
42
+ end
data/lib/ferret/index.rb CHANGED
@@ -90,6 +90,10 @@ module Ferret::Index
90
90
  end
91
91
  end
92
92
 
93
+ if (fi = options[:field_infos]).is_a?(String)
94
+ options[:field_infos] = FieldInfos.load(options[:field_infos])
95
+ end
96
+
93
97
  @close_dir = options[:close_dir]
94
98
  if options[:dir].is_a?(String)
95
99
  options[:path] = options[:dir]
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.10.7'
2
+ VERSION = '0.10.8'
3
3
  end
@@ -59,14 +59,14 @@ module SearcherTests
59
59
  def test_multi_term_query
60
60
  mtq = MultiTermQuery.new(:field, :max_terms => 4, :min_score => 0.5)
61
61
  check_hits(mtq, [])
62
- assert_equal("<>", mtq.to_s(:field))
63
- assert_equal("field:<>", mtq.to_s())
62
+ assert_equal('""', mtq.to_s(:field))
63
+ assert_equal('field:""', mtq.to_s)
64
64
 
65
65
  [
66
- ["brown", 1.0, "<brown>"],
67
- ["fox", 0.1, "<brown>"],
68
- ["fox", 0.6, "<fox^0.6|brown>"],
69
- ["fast", 50.0, "<fox^0.6|brown|fast^50.0>"]
66
+ ["brown", 1.0, '"brown"'],
67
+ ["fox", 0.1, '"brown"'],
68
+ ["fox", 0.6, '"fox^0.6|brown"'],
69
+ ["fast", 50.0, '"fox^0.6|brown|fast^50.0"']
70
70
  ].each do |term, boost, str|
71
71
  mtq.add_term(term, boost)
72
72
  assert_equal(str, mtq.to_s(:field))
@@ -74,13 +74,13 @@ module SearcherTests
74
74
  end
75
75
 
76
76
  mtq.boost = 80.1
77
- assert_equal("field:<fox^0.6|brown|fast^50.0>^80.1", mtq.to_s())
77
+ assert_equal('field:"fox^0.6|brown|fast^50.0"^80.1', mtq.to_s())
78
78
  mtq << "word1"
79
- assert_equal("field:<fox^0.6|brown|word1|fast^50.0>^80.1", mtq.to_s())
79
+ assert_equal('field:"fox^0.6|brown|word1|fast^50.0"^80.1', mtq.to_s())
80
80
  mtq << "word2"
81
- assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
81
+ assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
82
82
  mtq << "word3"
83
- assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
83
+ assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
84
84
 
85
85
  terms = mtq.terms(@searcher)
86
86
  assert(terms.index(Ferret::Term.new(:field, "brown")))
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.10.7
7
- date: 2006-09-24 00:00:00 +09:00
6
+ version: 0.10.8
7
+ date: 2006-09-25 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -165,6 +165,7 @@ files:
165
165
  - lib/ferret/number_tools.rb
166
166
  - lib/ferret/index.rb
167
167
  - lib/ferret/document.rb
168
+ - lib/ferret/field_infos.rb
168
169
  - test/test_all.rb
169
170
  - test/test_helper.rb
170
171
  - test/unit/ts_analysis.rb