ferret 0.10.7 → 0.10.8

Sign up to get free protection for your applications and to get access to all the features.
data/ext/q_multi_term.c CHANGED
@@ -524,7 +524,7 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
524
524
  bptr += flen + 1;
525
525
  }
526
526
 
527
- *(bptr++) = '<';
527
+ *(bptr++) = '"';
528
528
  bt_pq_clone = pq_clone(boosted_terms);
529
529
  while ((bt = (BoostedTerm *)pq_pop(bt_pq_clone)) != NULL) {
530
530
  sprintf(bptr, "%s", bt->term);
@@ -540,10 +540,10 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
540
540
  }
541
541
  pq_destroy(bt_pq_clone);
542
542
 
543
- if (bptr[-1] == '<') {
543
+ if (bptr[-1] == '"') {
544
544
  bptr++; /* handle zero term case */
545
545
  }
546
- bptr[-1] = '>'; /* delete last '|' char */
546
+ bptr[-1] = '"'; /* delete last '|' char */
547
547
  bptr[ 0] = '\0';
548
548
 
549
549
  if (self->boost != 1.0) {
data/ext/q_parser.c CHANGED
@@ -1754,6 +1754,11 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
1754
1754
  char *bufp = buf;
1755
1755
  qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
1756
1756
 
1757
+ if (qp->dynbuf) {
1758
+ free(qp->dynbuf);
1759
+ qp->dynbuf = NULL;
1760
+ }
1761
+
1757
1762
  qp->qstrp--; /* need to back up one character */
1758
1763
 
1759
1764
  while (!strchr(not_word, (c=*qp->qstrp++))) {
@@ -1773,6 +1778,14 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
1773
1778
  default:
1774
1779
  *bufp++ = c;
1775
1780
  }
1781
+ /* we've exceeded the static buffer. switch to the dynamic
1782
+ one. */
1783
+ if (!qp->dynbuf && ((bufp - buf) == MAX_WORD_SIZE)) {
1784
+ qp->dynbuf = ALLOC_AND_ZERO_N(char, strlen(qp->qstr) + 1);
1785
+ strncpy(qp->dynbuf, buf, MAX_WORD_SIZE);
1786
+ buf = qp->dynbuf;
1787
+ bufp = buf + MAX_WORD_SIZE;
1788
+ }
1776
1789
  }
1777
1790
  qp->qstrp--;
1778
1791
  /* check for keywords. There are only four so we have a bit of a hack which
@@ -1843,7 +1856,7 @@ static int yyerror(QParser *qp, char const *msg)
1843
1856
  }
1844
1857
  mutex_unlock(&qp->mutex);
1845
1858
  RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
1846
- " was %se", buf, (char *)msg);
1859
+ " was %s", buf, (char *)msg);
1847
1860
  }
1848
1861
  return 0;
1849
1862
  }
@@ -2288,6 +2301,9 @@ void qp_destroy(QParser *self)
2288
2301
  if (self->tokenized_fields) {
2289
2302
  hs_destroy(self->tokenized_fields);
2290
2303
  }
2304
+ if (self->dynbuf) {
2305
+ free(self->dynbuf);
2306
+ }
2291
2307
  hs_destroy(self->all_fields);
2292
2308
  hs_destroy(self->fields_buf);
2293
2309
  h_destroy(self->field_cache);
@@ -2335,6 +2351,7 @@ QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
2335
2351
  self->analyzer = analyzer;
2336
2352
  self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
2337
2353
  self->buf_index = 0;
2354
+ self->dynbuf = 0;
2338
2355
  self->non_tokenizer = non_tokenizer_new();
2339
2356
  mutex_init(&self->mutex, NULL);
2340
2357
  return self;
data/ext/r_analysis.c CHANGED
@@ -1040,7 +1040,7 @@ static TokenStream *
1040
1040
  cwa_get_ts(Analyzer *a, char *field, char *text)
1041
1041
  {
1042
1042
  VALUE rts = rb_funcall(CWA(a)->ranalyzer, id_token_stream, 2,
1043
- rb_intern(field), rb_str_new2(text));
1043
+ ID2SYM(rb_intern(field)), rb_str_new2(text));
1044
1044
  return frt_get_cwrapped_rts(rts);
1045
1045
  }
1046
1046
 
@@ -1104,10 +1104,9 @@ frt_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstring)
1104
1104
  Analyzer *a;
1105
1105
  GET_A(a, self);
1106
1106
 
1107
- StringValue(rfield);
1108
1107
  StringValue(rstring);
1109
1108
 
1110
- ts = a_get_ts(a, RSTRING(rfield)->ptr, RSTRING(rstring)->ptr);
1109
+ ts = a_get_ts(a, frt_field(rfield), RSTRING(rstring)->ptr);
1111
1110
 
1112
1111
  /* Make sure that there is no entry already */
1113
1112
  object_set(&ts->text, rstring);
data/ext/r_search.c CHANGED
@@ -57,6 +57,8 @@ static VALUE sym_upper;
57
57
  static VALUE sym_lower;
58
58
  static VALUE sym_include_upper;
59
59
  static VALUE sym_include_lower;
60
+ static VALUE sym_upper_exclusive;
61
+ static VALUE sym_lower_exclusive;
60
62
 
61
63
  static VALUE sym_less_than;
62
64
  static VALUE sym_less_than_or_equal_to;
@@ -170,6 +172,12 @@ frt_get_td(TopDocs *td)
170
172
  return rtop_docs;
171
173
  }
172
174
 
175
+ /*
176
+ * call-seq:
177
+ * top_doc.to_s -> string
178
+ *
179
+ * Returns a string represention of the top_doc in readable format.
180
+ */
173
181
  static VALUE
174
182
  frt_td_to_s(VALUE self)
175
183
  {
@@ -180,7 +188,7 @@ frt_td_to_s(VALUE self)
180
188
  char *s = str;
181
189
  VALUE rstr;
182
190
 
183
- sprintf(s, "TopDocs: totalhits = %d, max_score = %f [\n",
191
+ sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
184
192
  FIX2INT(rb_funcall(self, id_total_hits, 0)),
185
193
  NUM2DBL(rb_funcall(self, id_max_score, 0)));
186
194
  s += strlen(s);
@@ -900,6 +908,14 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
900
908
  *uterm = StringValuePtr(v);
901
909
  *include_upper = true;
902
910
  }
911
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
912
+ *lterm = StringValuePtr(v);
913
+ *include_lower = false;
914
+ }
915
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
916
+ *uterm = StringValuePtr(v);
917
+ *include_upper = false;
918
+ }
903
919
  if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
904
920
  *include_lower = RTEST(v);
905
921
  }
@@ -956,6 +972,8 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
956
972
  * q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
957
973
  * # is equivalent to
958
974
  * q = RangeQuery.new(:date, :< => "200501")
975
+ * # is equivalent to
976
+ * q = RangeQuery.new(:date, :lower_exclusive => "200501")
959
977
  *
960
978
  * q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
961
979
  * # is equivalent to
@@ -993,9 +1011,15 @@ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
993
1011
  * the query it will do anything of value. See PhraseQuery#add_term.
994
1012
  */
995
1013
  static VALUE
996
- frt_phq_init(VALUE self, VALUE rfield)
1014
+ frt_phq_init(int argc, VALUE *argv, VALUE self)
997
1015
  {
998
- Query *q = phq_new(frt_field(rfield));
1016
+ VALUE rfield, rslop;
1017
+ Query *q;
1018
+ rb_scan_args(argc, argv, "11", &rfield, &rslop);
1019
+ q = phq_new(frt_field(rfield));
1020
+ if (argc == 2) {
1021
+ ((PhraseQuery *)q)->slop = FIX2INT(rslop);
1022
+ }
999
1023
  Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1000
1024
  object_add(q, self);
1001
1025
  return self;
@@ -1715,6 +1739,8 @@ frt_f_to_s(VALUE self)
1715
1739
  * f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
1716
1740
  * # is equivalent to
1717
1741
  * f = RangeFilter.new(:date, :< => "200501")
1742
+ * # is equivalent to
1743
+ * f = RangeFilter.new(:date, :lower_exclusive => "200501")
1718
1744
  *
1719
1745
  * f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
1720
1746
  * # is equivalent to
@@ -2991,6 +3017,8 @@ Init_RangeQuery(void)
2991
3017
  {
2992
3018
  sym_upper = ID2SYM(rb_intern("upper"));
2993
3019
  sym_lower = ID2SYM(rb_intern("lower"));
3020
+ sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
3021
+ sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
2994
3022
  sym_include_upper = ID2SYM(rb_intern("include_upper"));
2995
3023
  sym_include_lower = ID2SYM(rb_intern("include_lower"));
2996
3024
 
@@ -3074,7 +3102,7 @@ Init_PhraseQuery(void)
3074
3102
  cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
3075
3103
  rb_define_alloc_func(cPhraseQuery, frt_data_alloc);
3076
3104
 
3077
- rb_define_method(cPhraseQuery, "initialize", frt_phq_init, 1);
3105
+ rb_define_method(cPhraseQuery, "initialize", frt_phq_init, -1);
3078
3106
  rb_define_method(cPhraseQuery, "add_term", frt_phq_add, -1);
3079
3107
  rb_define_method(cPhraseQuery, "<<", frt_phq_add, -1);
3080
3108
  rb_define_method(cPhraseQuery, "slop", frt_phq_get_slop, 0);
data/ext/search.h CHANGED
@@ -819,6 +819,7 @@ typedef struct QParser
819
819
  char *qstr;
820
820
  char *qstrp;
821
821
  char buf[QP_CONC_WORDS][MAX_WORD_SIZE];
822
+ char *dynbuf;
822
823
  int buf_index;
823
824
  HashTable *field_cache;
824
825
  HashSet *fields;
data/lib/ferret.rb CHANGED
@@ -26,3 +26,4 @@ require 'ferret_ext'
26
26
  require 'ferret_version'
27
27
  require 'ferret/document'
28
28
  require 'ferret/index'
29
+ require 'ferret/field_infos'
@@ -0,0 +1,42 @@
1
+ require 'yaml'
2
+
3
+ class Ferret::Index::FieldInfos
4
+ # Load FieldInfos from a YAML file. The YAML file should look something like
5
+ # this:
6
+ # default:
7
+ # store: :yes
8
+ # index: :yes
9
+ # term_vector: :no
10
+ #
11
+ # fields:
12
+ # id:
13
+ # index: :untokenized
14
+ # term_vector: :no
15
+ #
16
+ # title:
17
+ # boost: 20.0
18
+ # term_vector: :no
19
+ #
20
+ # content:
21
+ # term_vector: :with_positions_offsets
22
+ #
23
+ def self.load(yaml_str)
24
+ info = YAML.load(yaml_str)
25
+ convert_strings_to_symbols(info)
26
+ fis = FieldInfos.new(info[:default])
27
+ fields = info[:fields]
28
+ fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
29
+ fis
30
+ end
31
+
32
+ private
33
+ def self.convert_strings_to_symbols(hash)
34
+ hash.keys.each do |key|
35
+ convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
36
+ if key.is_a?(String)
37
+ hash[key.intern] = hash[key]
38
+ hash.delete(key)
39
+ end
40
+ end
41
+ end
42
+ end
data/lib/ferret/index.rb CHANGED
@@ -90,6 +90,10 @@ module Ferret::Index
90
90
  end
91
91
  end
92
92
 
93
+ if (fi = options[:field_infos]).is_a?(String)
94
+ options[:field_infos] = FieldInfos.load(options[:field_infos])
95
+ end
96
+
93
97
  @close_dir = options[:close_dir]
94
98
  if options[:dir].is_a?(String)
95
99
  options[:path] = options[:dir]
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.10.7'
2
+ VERSION = '0.10.8'
3
3
  end
@@ -59,14 +59,14 @@ module SearcherTests
59
59
  def test_multi_term_query
60
60
  mtq = MultiTermQuery.new(:field, :max_terms => 4, :min_score => 0.5)
61
61
  check_hits(mtq, [])
62
- assert_equal("<>", mtq.to_s(:field))
63
- assert_equal("field:<>", mtq.to_s())
62
+ assert_equal('""', mtq.to_s(:field))
63
+ assert_equal('field:""', mtq.to_s)
64
64
 
65
65
  [
66
- ["brown", 1.0, "<brown>"],
67
- ["fox", 0.1, "<brown>"],
68
- ["fox", 0.6, "<fox^0.6|brown>"],
69
- ["fast", 50.0, "<fox^0.6|brown|fast^50.0>"]
66
+ ["brown", 1.0, '"brown"'],
67
+ ["fox", 0.1, '"brown"'],
68
+ ["fox", 0.6, '"fox^0.6|brown"'],
69
+ ["fast", 50.0, '"fox^0.6|brown|fast^50.0"']
70
70
  ].each do |term, boost, str|
71
71
  mtq.add_term(term, boost)
72
72
  assert_equal(str, mtq.to_s(:field))
@@ -74,13 +74,13 @@ module SearcherTests
74
74
  end
75
75
 
76
76
  mtq.boost = 80.1
77
- assert_equal("field:<fox^0.6|brown|fast^50.0>^80.1", mtq.to_s())
77
+ assert_equal('field:"fox^0.6|brown|fast^50.0"^80.1', mtq.to_s())
78
78
  mtq << "word1"
79
- assert_equal("field:<fox^0.6|brown|word1|fast^50.0>^80.1", mtq.to_s())
79
+ assert_equal('field:"fox^0.6|brown|word1|fast^50.0"^80.1', mtq.to_s())
80
80
  mtq << "word2"
81
- assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
81
+ assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
82
82
  mtq << "word3"
83
- assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
83
+ assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
84
84
 
85
85
  terms = mtq.terms(@searcher)
86
86
  assert(terms.index(Ferret::Term.new(:field, "brown")))
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.10.7
7
- date: 2006-09-24 00:00:00 +09:00
6
+ version: 0.10.8
7
+ date: 2006-09-25 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -165,6 +165,7 @@ files:
165
165
  - lib/ferret/number_tools.rb
166
166
  - lib/ferret/index.rb
167
167
  - lib/ferret/document.rb
168
+ - lib/ferret/field_infos.rb
168
169
  - test/test_all.rb
169
170
  - test/test_helper.rb
170
171
  - test/unit/ts_analysis.rb