RubyGems - ferret - Versions diffs - 0.10.13 → 0.10.14 - Mend

ferret 0.10.13 → 0.10.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

data/Rakefile +5 -2
data/ext/index.c +1 -1
data/ext/q_phrase.c +6 -1
data/ext/q_span.c +444 -6
data/ext/r_search.c +95 -1
data/ext/search.c +2 -1
data/ext/search.h +32 -1
data/ext/sort.c +1 -1
data/lib/ferret/index.rb +2 -0
data/lib/ferret_version.rb +1 -1
data/test/unit/analysis/tc_analyzer.rb +13 -13
data/test/unit/analysis/tc_token_stream.rb +5 -5
data/test/unit/search/tc_spans.rb +19 -3
data/test/unit/search/tm_searcher.rb +7 -1
metadata +123 -123

data/ext/r_search.c CHANGED Viewed

@@ -30,6 +30,8 @@ static VALUE cMatchAllQuery;
 static VALUE cConstantScoreQuery;
 static VALUE cFilteredQuery;
 static VALUE cSpanTermQuery;
+static VALUE cSpanMultiTermQuery;
+static VALUE cSpanPrefixQuery;
 static VALUE cSpanFirstQuery;
 static VALUE cSpanNearQuery;
 static VALUE cSpanOrQuery;
@@ -557,6 +559,12 @@ frt_get_q(Query *q)
                 self = MK_QUERY(cPrefixQuery, q);
                 break;
             case SPAN_TERM_QUERY:
+                self = MK_QUERY(cSpanMultiTermQuery, q);
+                break;
+            case SPAN_MULTI_TERM_QUERY:
+                self = MK_QUERY(cSpanPrefixQuery, q);
+                break;
+            case SPAN_PREFIX_QUERY:
                 self = MK_QUERY(cSpanTermQuery, q);
                 break;
             case SPAN_FIRST_QUERY:
@@ -1127,7 +1135,7 @@ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
 /*
  *  call-seq:
- *     PhraseQuery.new(field) -> phrase_query
+ *     PhraseQuery.new(field, slop = 0) -> phrase_query
  *
  *  Create a new PhraseQuery on the field +field+. You need to add terms to
  *  the query it will do anything of value. See PhraseQuery#add_term.
@@ -1590,6 +1598,54 @@ frt_spantq_init(VALUE self, VALUE rfield, VALUE rterm)
     return self;
 }
+/****************************************************************************
+ *
+ * SpanMultiTermQuery Methods
+ *
+ ****************************************************************************/
+/*
+ *  call-seq:
+ *     SpanMultiTermQuery.new(field, terms) -> query
+ *
+ *  Create a new SpanMultiTermQuery which matches all documents with the terms
+ *  +terms+ in the field +field+. +terms+ should be an array of Strings.
+ */
+static VALUE
+frt_spanmtq_init(VALUE self, VALUE rfield, VALUE rterms)
+{
+    Query *q = spanmtq_new(frt_field(rfield));
+    int i;
+    for (i = RARRAY(rterms)->len - 1; i >= 0; i--) {
+        spanmtq_add_term(q, StringValuePtr(RARRAY(rterms)->ptr[i]));
+    }
+    Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
+    object_add(q, self);
+    return self;
+}
+/****************************************************************************
+ *
+ * SpanPrefixQuery Methods
+ *
+ ****************************************************************************/
+/*
+ *  call-seq:
+ *     SpanPrefixQuery.new(field, prefix) -> query
+ *
+ *  Create a new SpanPrefixQuery which matches all documents with the prefix
+ *  +prefix+ in the field +field+.
+ */
+static VALUE
+frt_spanprq_init(VALUE self, VALUE rfield, VALUE rprefix)
+{
+    Query *q = spanprq_new(frt_field(rfield), StringValuePtr(rprefix));
+    Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
+    object_add(q, self);
+    return self;
+}
 /****************************************************************************
  *
  * SpanFirstQuery Methods
@@ -3467,6 +3523,42 @@ Init_SpanTermQuery(void)
     rb_define_method(cSpanTermQuery, "initialize", frt_spantq_init, 2);
 }
+/*
+ *  Document-class: Ferret::Search::Spans::SpanMultiTermQuery
+ *
+ *  == Summary
+ *
+ *  A SpanMultiTermQuery is the Spans version of MultiTermQuery, the only
+ *  difference being that it returns the start and end offset of all of its
+ *  matches for use by enclosing SpanQueries.
+ */
+static void
+Init_SpanMultiTermQuery(void)
+{
+    cSpanMultiTermQuery = rb_define_class_under(mSpans, "SpanMultiTermQuery", cQuery);
+    rb_define_alloc_func(cSpanMultiTermQuery, frt_data_alloc);
+    rb_define_method(cSpanMultiTermQuery, "initialize", frt_spanmtq_init, 2);
+}
+/*
+ *  Document-class: Ferret::Search::Spans::SpanPrefixQuery
+ *
+ *  == Summary
+ *
+ *  A SpanPrefixQuery is the Spans version of PrefixQuery, the only difference
+ *  being that it returns the start and end offset of all of its matches for
+ *  use by enclosing SpanQueries.
+ */
+static void
+Init_SpanPrefixQuery(void)
+{
+    cSpanPrefixQuery = rb_define_class_under(mSpans, "SpanPrefixQuery", cQuery);
+    rb_define_alloc_func(cSpanPrefixQuery, frt_data_alloc);
+    rb_define_method(cSpanPrefixQuery, "initialize", frt_spanprq_init, 2);
+}
 /*
  *  Document-class: Ferret::Search::Spans::SpanFirstQuery
  *
@@ -3652,6 +3744,8 @@ Init_Spans(void)
 {
     mSpans = rb_define_module_under(mSearch, "Spans");
     Init_SpanTermQuery();
+    Init_SpanMultiTermQuery();
+    Init_SpanPrefixQuery();
     Init_SpanFirstQuery();
     Init_SpanNearQuery();
     Init_SpanOrQuery();

data/ext/search.c CHANGED Viewed

@@ -281,7 +281,6 @@ static const char *QUERY_NAMES[] = {
     "MultiTermQuery",
     "BooleanQuery",
     "PhraseQuery",
-    "MultiPhraseQuery",
     "ConstantScoreQuery",
     "FilteredQuery",
     "MatchAllQuery",
@@ -290,6 +289,8 @@ static const char *QUERY_NAMES[] = {
     "FuzzyQuery",
     "PrefixQuery",
     "SpanTermQuery",
+    "SpanMultiTermQuery",
+    "SpanPrefixQuery",
     "SpanFirstQuery",
     "SpanOrQuery",
     "SpanNotQuery",

data/ext/search.h CHANGED Viewed

@@ -188,6 +188,8 @@ enum QUERY_TYPE
     FUZZY_QUERY,
     PREFIX_QUERY,
     SPAN_TERM_QUERY,
+    SPAN_MULTI_TERM_QUERY,
+    SPAN_PREFIX_QUERY,
     SPAN_FIRST_QUERY,
     SPAN_OR_QUERY,
     SPAN_NOT_QUERY,
@@ -342,7 +344,6 @@ typedef struct MTQSubQuery
 #define PREFIX_QUERY_MAX_TERMS 256
 typedef struct PrefixQuery
 {
     MTQSubQuery super;
@@ -480,6 +481,22 @@ typedef struct SpanTermQuery
 } SpanTermQuery;
 extern Query *spantq_new(const char *field, const char *term);
+/***************************************************************************
+ * SpanMultiTermQuery
+ ***************************************************************************/
+#define SPAN_MULTI_TERM_QUERY_CAPA 1024
+typedef struct SpanMultiTermQuery
+{
+    SpanQuery super;
+    char    **terms;
+    int       term_cnt;
+    int       term_capa;
+} SpanMultiTermQuery;
+extern Query *spanmtq_new(const char *field);
+extern Query *spanmtq_new_conf(const char *field, int max_size);
+extern void spanmtq_add_term(Query *self, const char *term);
 /***************************************************************************
  * SpanFirstQuery
@@ -544,6 +561,20 @@ extern Query *spanxq_new(Query *inc, Query *exc);
 extern Query *spanxq_new_nr(Query *inc, Query *exc);
+/***************************************************************************
+ * SpanPrefixQuery
+ ***************************************************************************/
+#define SPAN_PREFIX_QUERY_MAX_TERMS 256
+typedef struct SpanPrefixQuery
+{
+    SpanQuery   super;
+    char       *prefix;
+} SpanPrefixQuery;
+extern Query *spanprq_new(const char *field, const char *prefix);
 /***************************************************************************
  *

data/ext/sort.c CHANGED Viewed

@@ -559,7 +559,7 @@ void *field_cache_get_index(IndexReader *ir, SortField *sf)
     if (sf->type == SORT_TYPE_AUTO) {
         te = ir->terms(ir, field_num);
-        if (!te->next(te)) {
+        if (!te->next(te) && (ir->num_docs(ir) > 0)) {
             RAISE(ARG_ERROR,
                   "Cannot sort by field \"%s\" as there are no terms "
                   "in that field in the index.", sf->field);

data/lib/ferret/index.rb CHANGED Viewed

@@ -88,6 +88,8 @@ module Ferret::Index
         if @key.is_a?(Array)
           @key.flatten.map {|k| k.to_s.intern}
         end
+      else
+        @key = nil
       end
       if (fi = options[:field_infos]).is_a?(String)

data/lib/ferret_version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Ferret
-  VERSION = '0.10.13'
+  VERSION = '0.10.14'
 end

data/test/unit/analysis/tc_analyzer.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require File.dirname(__FILE__) + "/../../test_helper"
 class AnalyzerTest < Test::Unit::TestCase
   include Ferret::Analysis
-  def test_c_analyzer()
+  def test_analyzer()
     input = 'DBalmain@gmail.com is My E-Mail 523@#$ ADDRESS. 23#@$'
     a = Analyzer.new()
     t = a.token_stream("fieldname", input)
@@ -38,12 +38,12 @@ class AnalyzerTest < Test::Unit::TestCase
     assert_equal(Token.new("ADDRESS", 39, 46), t.next())
     assert(! t.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)
 class AsciiLetterAnalyzerTest < Test::Unit::TestCase
   include Ferret::Analysis
-  def test_c_letter_analyzer()
+  def test_letter_analyzer()
     input = 'DBalmain@gmail.com is My E-Mail 523@#$ ADDRESS. 23#@$'
     a = AsciiLetterAnalyzer.new()
     t = a.token_stream("fieldname", input)
@@ -83,7 +83,7 @@ end
 class LetterAnalyzerTest < Test::Unit::TestCase
   include Ferret::Analysis
-  def test_c_letter_analyzer()
+  def test_letter_analyzer()
     Ferret.locale = ""
     input = 'DBalmän@gmail.com is My e-mail 52   #$ address. 23#@$ ÁÄGÇ®ÊËÌ¯ÚØÃ¬ÖÎÍ'
     a = LetterAnalyzer.new(false)
@@ -131,12 +131,12 @@ class LetterAnalyzerTest < Test::Unit::TestCase
     assert_equal(Token.new("öîí", 80, 86), t.next)
     assert(! t.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)
 class AsciiWhiteSpaceAnalyzerTest < Test::Unit::TestCase
   include Ferret::Analysis
-  def test_c_white_space_analyzer()
+  def test_white_space_analyzer()
     input = 'DBalmain@gmail.com is My E-Mail 52   #$ ADDRESS. 23#@$'
     a = AsciiWhiteSpaceAnalyzer.new()
     t = a.token_stream("fieldname", input)
@@ -176,7 +176,7 @@ end
 class WhiteSpaceAnalyzerTest < Test::Unit::TestCase
   include Ferret::Analysis
-  def test_c_white_space_analyzer()
+  def test_white_space_analyzer()
     input = 'DBalmän@gmail.com is My e-mail 52   #$ address. 23#@$ ÁÄGÇ®ÊËÌ¯ÚØÃ¬ÖÎÍ'
     a = WhiteSpaceAnalyzer.new()
     t = a.token_stream("fieldname", input)
@@ -214,12 +214,12 @@ class WhiteSpaceAnalyzerTest < Test::Unit::TestCase
     assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
     assert(! t.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)
 class AsciiStandardAnalyzerTest < Test::Unit::TestCase
   include Ferret::Analysis
-  def test_c_standard_analyzer()
+  def test_standard_analyzer()
     input = 'DBalmain@gmail.com is My e-mail 52   #$ Address. 23#@$ http://www.google.com/results/ T.N.T. 123-1235-ASD-1234'
     a = AsciiStandardAnalyzer.new()
     t = a.token_stream("fieldname", input)
@@ -267,7 +267,7 @@ end
 class StandardAnalyzerTest < Test::Unit::TestCase
   include Ferret::Analysis
-  def test_c_standard_analyzer()
+  def test_standard_analyzer()
     input = 'DBalmán@gmail.com is My e-mail and the Address. 23#@$ http://www.google.com/results/ T.N.T. 123-1235-ASD-1234 23#@$ ÁÄGÇ®ÊËÌ¯ÚØÃ¬ÖÎÍ'
     a = StandardAnalyzer.new()
     t = a.token_stream("fieldname", input)
@@ -350,11 +350,11 @@ class StandardAnalyzerTest < Test::Unit::TestCase
     assert_equal(Token.new('öîí', 142, 148), t2.next)
     assert(! t2.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)
 class PerFieldAnalyzerTest < Test::Unit::TestCase
   include Ferret::Analysis
-  def test_c_per_field_analyzer()
+  def test_per_field_analyzer()
     input = 'DBalmain@gmail.com is My e-mail 52   #$ address. 23#@$'
     pfa = PerFieldAnalyzer.new(StandardAnalyzer.new())
     pfa['white'] = WhiteSpaceAnalyzer.new(false)
@@ -545,4 +545,4 @@ class CustomAnalyzerTest < Test::Unit::TestCase
     assert_equal(Token.new("dêbater", 36, 44), t.next)
     assert(! t.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)

data/test/unit/analysis/tc_token_stream.rb CHANGED Viewed

@@ -109,7 +109,7 @@ class LetterTokenizerTest < Test::Unit::TestCase
     assert_equal(Token.new('öîí', 80, 86), t.next)
     assert(! t.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)
 class AsciiWhiteSpaceTokenizerTest < Test::Unit::TestCase
   include Ferret::Analysis
@@ -186,7 +186,7 @@ class WhiteSpaceTokenizerTest < Test::Unit::TestCase
     assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
     assert(! t.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)
 class AsciiStandardTokenizerTest < Test::Unit::TestCase
   include Ferret::Analysis
@@ -275,7 +275,7 @@ class StandardTokenizerTest < Test::Unit::TestCase
     assert_equal(Token.new('www.davebalmain.com/trac-site', 25, 61), t.next)
     assert(! t.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)
 class RegExpTokenizerTest < Test::Unit::TestCase
   include Ferret::Analysis
@@ -428,7 +428,7 @@ END
     assert_equal(Token.new('szzzt', 256, 264), t.next)
     assert(! t.next())
   end
-end if (/mswin/i !~ RUBY_PLATFORM)
+end if (/utf-8/i !~ Ferret.locale)
 class StopFilterTest < Test::Unit::TestCase
   include Ferret::Analysis
@@ -467,7 +467,7 @@ class StemFilterTest < Test::Unit::TestCase
     assert_equal(Token.new("DEBate", 23, 31), t.next)
     assert_equal(Token.new("Debat", 32, 39), t.next)
-    if Ferret.locale.downcase.index("utf")
+    if Ferret.locale and Ferret.locale.downcase.index("utf")
       input = "Dêbate dêbates DÊBATED DÊBATing dêbater";
       t = StemFilter.new(LowerCaseFilter.new(LetterTokenizer.new(input)), :english)
       assert_equal(Token.new("dêbate", 0, 7), t.next)

data/test/unit/search/tc_spans.rb CHANGED Viewed

@@ -16,7 +16,7 @@ class SpansBasicTest < Test::Unit::TestCase
     [
       "start finish one two three four five six seven",
       "start one finish two three four five six seven",
-      "start one two finish three four five six seven",
+      "start one two finish three four five six seven flip",
       "start one two three finish four five six seven",
       "start one two three four finish five six seven",
       "start one two three four five finish six seven",
@@ -26,12 +26,12 @@ class SpansBasicTest < Test::Unit::TestCase
       "start one two three four five finish six seven",
       "start one two three four finish five six seven",
       "start one two three finish four five six seven",
-      "start one two finish three four five six seven",
+      "start one two finish three four five six seven flop",
       "start one finish two three four five six seven",
       "start finish one two three four five six seven",
       "start start  one two three four five six seven",
       "finish start one two three four five six seven",
-      "finish one start two three four five six seven",
+      "finish one start two three four five six seven toot",
       "finish one two start three four five six seven",
       "finish one two three start four five six seven",
       "finish one two three four start five six seven",
@@ -89,6 +89,18 @@ class SpansBasicTest < Test::Unit::TestCase
     tq = SpanTermQuery.new(:field, "eight")
     check_hits(tq, [6,7,8,22,23,24])
   end
+  def test_span_multi_term_query()
+    tq = SpanMultiTermQuery.new(:field, ["eight", "nine"])
+    check_hits(tq, [6,7,8,22,23,24], true)
+    tq = SpanMultiTermQuery.new(:field, ["flip", "flop", "toot", "nine"])
+    check_hits(tq, [2,7,12,17,23])
+  end
+  def test_span_prefix_query()
+    tq = SpanPrefixQuery.new(:field, "fl")
+    check_hits(tq, [2, 12], true)
+  end
   def test_span_near_query()
     tq1 = SpanTermQuery.new(:field, "start")
@@ -108,6 +120,10 @@ class SpansBasicTest < Test::Unit::TestCase
     check_hits(q, [0,1,2,3,4,10,11,12,13,14])
     q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4)
     check_hits(q, [0,1,2,3,4,10,11,12,13,14,16,17,18,19,20,26,27,28,29,30])
+    q = SpanNearQuery.new(:clauses => [
+                          SpanPrefixQuery.new(:field, 'se'),
+                          SpanPrefixQuery.new(:field, 'fl')], :slop => 0)
+    check_hits(q, [2, 12], true)
   end
   def test_span_not_query()

data/test/unit/search/tm_searcher.rb CHANGED Viewed

@@ -125,12 +125,18 @@ module SearcherTests
   def test_phrase_query()
     pq = PhraseQuery.new(:field)
+    assert_equal("\"\"", pq.to_s(:field))
+    assert_equal("field:\"\"", pq.to_s)
     pq << "quick" << "brown" << "fox"
     check_hits(pq, [1])
-    pq = PhraseQuery.new(:field)
+    pq = PhraseQuery.new(:field, 1)
     pq << "quick"
     pq.add_term("fox", 2)
+    check_hits(pq, [1,11,14,16])
+    pq.slop = 0
     check_hits(pq, [1,11,14])
     pq.slop = 1