RubyGems - ferret - Versions diffs - 0.10.7 → 0.10.8 - Mend

ferret 0.10.7 → 0.10.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/ext/q_multi_term.c +3 -3
data/ext/q_parser.c +18 -1
data/ext/r_analysis.c +2 -3
data/ext/r_search.c +32 -4
data/ext/search.h +1 -0
data/lib/ferret.rb +1 -0
data/lib/ferret/field_infos.rb +42 -0
data/lib/ferret/index.rb +4 -0
data/lib/ferret_version.rb +1 -1
data/test/unit/search/tm_searcher.rb +10 -10
metadata +3 -2

data/ext/q_multi_term.c CHANGED Viewed

@@ -524,7 +524,7 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
         bptr += flen + 1;
     }
-    *(bptr++) = '<';
+    *(bptr++) = '"';
     bt_pq_clone = pq_clone(boosted_terms);
     while ((bt = (BoostedTerm *)pq_pop(bt_pq_clone)) != NULL) {
         sprintf(bptr, "%s", bt->term);
@@ -540,10 +540,10 @@ static char *multi_tq_to_s(Query *self, const char *curr_field)
     }
     pq_destroy(bt_pq_clone);
-    if (bptr[-1] == '<') {
+    if (bptr[-1] == '"') {
         bptr++; /* handle zero term case */
     }
-    bptr[-1] =  '>'; /* delete last '|' char */
+    bptr[-1] =  '"'; /* delete last '|' char */
     bptr[ 0] = '\0';
     if (self->boost != 1.0) {

data/ext/q_parser.c CHANGED Viewed

@@ -1754,6 +1754,11 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
     char *bufp = buf;
     qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
+    if (qp->dynbuf) {
+        free(qp->dynbuf);
+        qp->dynbuf = NULL;
+    }
     qp->qstrp--; /* need to back up one character */
     while (!strchr(not_word, (c=*qp->qstrp++))) {
@@ -1773,6 +1778,14 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
             default:
                 *bufp++ = c;
         }
+        /* we've exceeded the static buffer. switch to the dynamic
+           one. */
+        if (!qp->dynbuf && ((bufp - buf) == MAX_WORD_SIZE)) {
+            qp->dynbuf = ALLOC_AND_ZERO_N(char, strlen(qp->qstr) + 1);
+            strncpy(qp->dynbuf, buf, MAX_WORD_SIZE);
+            buf = qp->dynbuf;
+            bufp = buf + MAX_WORD_SIZE;
+        }
     }
     qp->qstrp--;
     /* check for keywords. There are only four so we have a bit of a hack which
@@ -1843,7 +1856,7 @@ static int yyerror(QParser *qp, char const *msg)
         }
         mutex_unlock(&qp->mutex);
         RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
-              " was %se", buf, (char *)msg);
+              " was %s", buf, (char *)msg);
     }
     return 0;
 }
@@ -2288,6 +2301,9 @@ void qp_destroy(QParser *self)
     if (self->tokenized_fields) {
         hs_destroy(self->tokenized_fields);
     }
+    if (self->dynbuf) {
+        free(self->dynbuf);
+    }
     hs_destroy(self->all_fields);
     hs_destroy(self->fields_buf);
     h_destroy(self->field_cache);
@@ -2335,6 +2351,7 @@ QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
     self->analyzer = analyzer;
     self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
     self->buf_index = 0;
+    self->dynbuf = 0;
     self->non_tokenizer = non_tokenizer_new();
     mutex_init(&self->mutex, NULL);
     return self;

data/ext/r_analysis.c CHANGED Viewed

@@ -1040,7 +1040,7 @@ static TokenStream *
 cwa_get_ts(Analyzer *a, char *field, char *text)
 {
     VALUE rts = rb_funcall(CWA(a)->ranalyzer, id_token_stream, 2,
-                           rb_intern(field), rb_str_new2(text));
+                           ID2SYM(rb_intern(field)), rb_str_new2(text));
     return frt_get_cwrapped_rts(rts);
 }
@@ -1104,10 +1104,9 @@ frt_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstring)
     Analyzer *a;
     GET_A(a, self);
-    StringValue(rfield);
     StringValue(rstring);
-    ts = a_get_ts(a, RSTRING(rfield)->ptr, RSTRING(rstring)->ptr);
+    ts = a_get_ts(a, frt_field(rfield), RSTRING(rstring)->ptr);
     /* Make sure that there is no entry already */
     object_set(&ts->text, rstring);

data/ext/r_search.c CHANGED Viewed

@@ -57,6 +57,8 @@ static VALUE sym_upper;
 static VALUE sym_lower;
 static VALUE sym_include_upper;
 static VALUE sym_include_lower;
+static VALUE sym_upper_exclusive;
+static VALUE sym_lower_exclusive;
 static VALUE sym_less_than;
 static VALUE sym_less_than_or_equal_to;
@@ -170,6 +172,12 @@ frt_get_td(TopDocs *td)
     return rtop_docs;
 }
+/*
+ *  call-seq:
+ *     top_doc.to_s -> string
+ *
+ *  Returns a string represention of the top_doc in readable format.
+ */
 static VALUE
 frt_td_to_s(VALUE self)
 {
@@ -180,7 +188,7 @@ frt_td_to_s(VALUE self)
     char *s = str;
     VALUE rstr;
-    sprintf(s, "TopDocs: totalhits = %d, max_score = %f [\n",
+    sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
             FIX2INT(rb_funcall(self, id_total_hits, 0)),
             NUM2DBL(rb_funcall(self, id_max_score, 0)));
     s += strlen(s);
@@ -900,6 +908,14 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
         *uterm = StringValuePtr(v);
         *include_upper = true;
     }
+    if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
+        *lterm = StringValuePtr(v);
+        *include_lower = false;
+    }
+    if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
+        *uterm = StringValuePtr(v);
+        *include_upper = false;
+    }
     if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
         *include_lower = RTEST(v);
     }
@@ -956,6 +972,8 @@ get_range_params(VALUE roptions, char **lterm, char **uterm,
  *    q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
  *    # is equivalent to
  *    q = RangeQuery.new(:date, :< => "200501")
+ *    # is equivalent to
+ *    q = RangeQuery.new(:date, :lower_exclusive => "200501")
  *
  *    q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
  *    # is equivalent to
@@ -993,9 +1011,15 @@ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
  *  the query it will do anything of value. See PhraseQuery#add_term.
  */
 static VALUE
-frt_phq_init(VALUE self, VALUE rfield)
+frt_phq_init(int argc, VALUE *argv, VALUE self)
 {
-    Query *q = phq_new(frt_field(rfield));
+    VALUE rfield, rslop;
+    Query *q;
+    rb_scan_args(argc, argv, "11", &rfield, &rslop);
+    q = phq_new(frt_field(rfield));
+    if (argc == 2) {
+        ((PhraseQuery *)q)->slop = FIX2INT(rslop);
+    }
     Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
     object_add(q, self);
     return self;
@@ -1715,6 +1739,8 @@ frt_f_to_s(VALUE self)
  *    f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
  *    # is equivalent to
  *    f = RangeFilter.new(:date, :< => "200501")
+ *    # is equivalent to
+ *    f = RangeFilter.new(:date, :lower_exclusive => "200501")
  *
  *    f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
  *    # is equivalent to
@@ -2991,6 +3017,8 @@ Init_RangeQuery(void)
 {
     sym_upper = ID2SYM(rb_intern("upper"));
     sym_lower = ID2SYM(rb_intern("lower"));
+    sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
+    sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
     sym_include_upper = ID2SYM(rb_intern("include_upper"));
     sym_include_lower = ID2SYM(rb_intern("include_lower"));
@@ -3074,7 +3102,7 @@ Init_PhraseQuery(void)
     cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
     rb_define_alloc_func(cPhraseQuery, frt_data_alloc);
-    rb_define_method(cPhraseQuery, "initialize", frt_phq_init, 1);
+    rb_define_method(cPhraseQuery, "initialize", frt_phq_init, -1);
     rb_define_method(cPhraseQuery, "add_term", frt_phq_add, -1);
     rb_define_method(cPhraseQuery, "<<", frt_phq_add, -1);
     rb_define_method(cPhraseQuery, "slop", frt_phq_get_slop, 0);

data/ext/search.h CHANGED Viewed

@@ -819,6 +819,7 @@ typedef struct QParser
     char *qstr;
     char *qstrp;
     char buf[QP_CONC_WORDS][MAX_WORD_SIZE];
+    char *dynbuf;
     int  buf_index;
     HashTable *field_cache;
     HashSet *fields;

data/lib/ferret.rb CHANGED Viewed

@@ -26,3 +26,4 @@ require 'ferret_ext'
 require 'ferret_version'
 require 'ferret/document'
 require 'ferret/index'
+require 'ferret/field_infos'

data/lib/ferret/field_infos.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require 'yaml'
+class Ferret::Index::FieldInfos
+  # Load FieldInfos from a YAML file. The YAML file should look something like
+  # this:
+  # default:
+  #   store: :yes
+  #   index: :yes
+  #   term_vector: :no
+  #
+  # fields:
+  #   id:
+  #     index: :untokenized
+  #     term_vector: :no
+  #
+  #   title:
+  #     boost: 20.0
+  #     term_vector: :no
+  #
+  #   content:
+  #     term_vector: :with_positions_offsets
+  #
+  def self.load(yaml_str)
+    info = YAML.load(yaml_str)
+    convert_strings_to_symbols(info)
+    fis = FieldInfos.new(info[:default])
+    fields = info[:fields]
+    fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
+    fis
+  end
+  private
+  def self.convert_strings_to_symbols(hash)
+    hash.keys.each do |key|
+      convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
+      if key.is_a?(String)
+        hash[key.intern] = hash[key]
+        hash.delete(key)
+      end
+    end
+  end
+end

data/lib/ferret/index.rb CHANGED Viewed

@@ -90,6 +90,10 @@ module Ferret::Index
         end
       end
+      if (fi = options[:field_infos]).is_a?(String)
+        options[:field_infos] = FieldInfos.load(options[:field_infos])
+      end
       @close_dir = options[:close_dir]
       if options[:dir].is_a?(String)
         options[:path] = options[:dir]

data/lib/ferret_version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Ferret
-  VERSION = '0.10.7'
+  VERSION = '0.10.8'
 end

data/test/unit/search/tm_searcher.rb CHANGED Viewed

@@ -59,14 +59,14 @@ module SearcherTests
   def test_multi_term_query
     mtq = MultiTermQuery.new(:field, :max_terms => 4, :min_score => 0.5)
     check_hits(mtq, [])
-    assert_equal("<>", mtq.to_s(:field))
-    assert_equal("field:<>", mtq.to_s())
+    assert_equal('""', mtq.to_s(:field))
+    assert_equal('field:""', mtq.to_s)
     [
-      ["brown", 1.0, "<brown>"],
-      ["fox",   0.1, "<brown>"],
-      ["fox",   0.6, "<fox^0.6|brown>"],
-      ["fast", 50.0, "<fox^0.6|brown|fast^50.0>"]
+      ["brown", 1.0, '"brown"'],
+      ["fox",   0.1, '"brown"'],
+      ["fox",   0.6, '"fox^0.6|brown"'],
+      ["fast", 50.0, '"fox^0.6|brown|fast^50.0"']
     ].each do |term, boost, str|
       mtq.add_term(term, boost)
       assert_equal(str, mtq.to_s(:field))
@@ -74,13 +74,13 @@ module SearcherTests
     end
     mtq.boost = 80.1
-    assert_equal("field:<fox^0.6|brown|fast^50.0>^80.1", mtq.to_s())
+    assert_equal('field:"fox^0.6|brown|fast^50.0"^80.1', mtq.to_s())
     mtq << "word1"
-    assert_equal("field:<fox^0.6|brown|word1|fast^50.0>^80.1", mtq.to_s())
+    assert_equal('field:"fox^0.6|brown|word1|fast^50.0"^80.1', mtq.to_s())
     mtq << "word2"
-    assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
+    assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
     mtq << "word3"
-    assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
+    assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
     terms = mtq.terms(@searcher)
     assert(terms.index(Ferret::Term.new(:field, "brown")))

metadata CHANGED Viewed

@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
 specification_version: 1
 name: ferret
 version: !ruby/object:Gem::Version
-  version: 0.10.7
-date: 2006-09-24 00:00:00 +09:00
+  version: 0.10.8
+date: 2006-09-25 00:00:00 +09:00
 summary: Ruby indexing library.
 require_paths:
 - lib
@@ -165,6 +165,7 @@ files:
 - lib/ferret/number_tools.rb
 - lib/ferret/index.rb
 - lib/ferret/document.rb
+- lib/ferret/field_infos.rb
 - test/test_all.rb
 - test/test_helper.rb
 - test/unit/ts_analysis.rb