ferret 0.10.13 → 0.10.14

Sign up to get free protection for your applications and to get access to all the features.
data/ext/r_search.c CHANGED
@@ -30,6 +30,8 @@ static VALUE cMatchAllQuery;
30
30
  static VALUE cConstantScoreQuery;
31
31
  static VALUE cFilteredQuery;
32
32
  static VALUE cSpanTermQuery;
33
+ static VALUE cSpanMultiTermQuery;
34
+ static VALUE cSpanPrefixQuery;
33
35
  static VALUE cSpanFirstQuery;
34
36
  static VALUE cSpanNearQuery;
35
37
  static VALUE cSpanOrQuery;
@@ -557,6 +559,12 @@ frt_get_q(Query *q)
557
559
  self = MK_QUERY(cPrefixQuery, q);
558
560
  break;
559
561
  case SPAN_TERM_QUERY:
562
+ self = MK_QUERY(cSpanMultiTermQuery, q);
563
+ break;
564
+ case SPAN_MULTI_TERM_QUERY:
565
+ self = MK_QUERY(cSpanPrefixQuery, q);
566
+ break;
567
+ case SPAN_PREFIX_QUERY:
560
568
  self = MK_QUERY(cSpanTermQuery, q);
561
569
  break;
562
570
  case SPAN_FIRST_QUERY:
@@ -1127,7 +1135,7 @@ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
1127
1135
 
1128
1136
  /*
1129
1137
  * call-seq:
1130
- * PhraseQuery.new(field) -> phrase_query
1138
+ * PhraseQuery.new(field, slop = 0) -> phrase_query
1131
1139
  *
1132
1140
  * Create a new PhraseQuery on the field +field+. You need to add terms to
1133
1141
  * the query it will do anything of value. See PhraseQuery#add_term.
@@ -1590,6 +1598,54 @@ frt_spantq_init(VALUE self, VALUE rfield, VALUE rterm)
1590
1598
  return self;
1591
1599
  }
1592
1600
 
1601
+ /****************************************************************************
1602
+ *
1603
+ * SpanMultiTermQuery Methods
1604
+ *
1605
+ ****************************************************************************/
1606
+
1607
+ /*
1608
+ * call-seq:
1609
+ * SpanMultiTermQuery.new(field, terms) -> query
1610
+ *
1611
+ * Create a new SpanMultiTermQuery which matches all documents with the terms
1612
+ * +terms+ in the field +field+. +terms+ should be an array of Strings.
1613
+ */
1614
+ static VALUE
1615
+ frt_spanmtq_init(VALUE self, VALUE rfield, VALUE rterms)
1616
+ {
1617
+ Query *q = spanmtq_new(frt_field(rfield));
1618
+ int i;
1619
+ for (i = RARRAY(rterms)->len - 1; i >= 0; i--) {
1620
+ spanmtq_add_term(q, StringValuePtr(RARRAY(rterms)->ptr[i]));
1621
+ }
1622
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1623
+ object_add(q, self);
1624
+ return self;
1625
+ }
1626
+
1627
+ /****************************************************************************
1628
+ *
1629
+ * SpanPrefixQuery Methods
1630
+ *
1631
+ ****************************************************************************/
1632
+
1633
+ /*
1634
+ * call-seq:
1635
+ * SpanPrefixQuery.new(field, prefix) -> query
1636
+ *
1637
+ * Create a new SpanPrefixQuery which matches all documents with the prefix
1638
+ * +prefix+ in the field +field+.
1639
+ */
1640
+ static VALUE
1641
+ frt_spanprq_init(VALUE self, VALUE rfield, VALUE rprefix)
1642
+ {
1643
+ Query *q = spanprq_new(frt_field(rfield), StringValuePtr(rprefix));
1644
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1645
+ object_add(q, self);
1646
+ return self;
1647
+ }
1648
+
1593
1649
  /****************************************************************************
1594
1650
  *
1595
1651
  * SpanFirstQuery Methods
@@ -3467,6 +3523,42 @@ Init_SpanTermQuery(void)
3467
3523
  rb_define_method(cSpanTermQuery, "initialize", frt_spantq_init, 2);
3468
3524
  }
3469
3525
 
3526
+ /*
3527
+ * Document-class: Ferret::Search::Spans::SpanMultiTermQuery
3528
+ *
3529
+ * == Summary
3530
+ *
3531
+ * A SpanMultiTermQuery is the Spans version of MultiTermQuery, the only
3532
+ * difference being that it returns the start and end offset of all of its
3533
+ * matches for use by enclosing SpanQueries.
3534
+ */
3535
+ static void
3536
+ Init_SpanMultiTermQuery(void)
3537
+ {
3538
+ cSpanMultiTermQuery = rb_define_class_under(mSpans, "SpanMultiTermQuery", cQuery);
3539
+ rb_define_alloc_func(cSpanMultiTermQuery, frt_data_alloc);
3540
+
3541
+ rb_define_method(cSpanMultiTermQuery, "initialize", frt_spanmtq_init, 2);
3542
+ }
3543
+
3544
+ /*
3545
+ * Document-class: Ferret::Search::Spans::SpanPrefixQuery
3546
+ *
3547
+ * == Summary
3548
+ *
3549
+ * A SpanPrefixQuery is the Spans version of PrefixQuery, the only difference
3550
+ * being that it returns the start and end offset of all of its matches for
3551
+ * use by enclosing SpanQueries.
3552
+ */
3553
+ static void
3554
+ Init_SpanPrefixQuery(void)
3555
+ {
3556
+ cSpanPrefixQuery = rb_define_class_under(mSpans, "SpanPrefixQuery", cQuery);
3557
+ rb_define_alloc_func(cSpanPrefixQuery, frt_data_alloc);
3558
+
3559
+ rb_define_method(cSpanPrefixQuery, "initialize", frt_spanprq_init, 2);
3560
+ }
3561
+
3470
3562
  /*
3471
3563
  * Document-class: Ferret::Search::Spans::SpanFirstQuery
3472
3564
  *
@@ -3652,6 +3744,8 @@ Init_Spans(void)
3652
3744
  {
3653
3745
  mSpans = rb_define_module_under(mSearch, "Spans");
3654
3746
  Init_SpanTermQuery();
3747
+ Init_SpanMultiTermQuery();
3748
+ Init_SpanPrefixQuery();
3655
3749
  Init_SpanFirstQuery();
3656
3750
  Init_SpanNearQuery();
3657
3751
  Init_SpanOrQuery();
data/ext/search.c CHANGED
@@ -281,7 +281,6 @@ static const char *QUERY_NAMES[] = {
281
281
  "MultiTermQuery",
282
282
  "BooleanQuery",
283
283
  "PhraseQuery",
284
- "MultiPhraseQuery",
285
284
  "ConstantScoreQuery",
286
285
  "FilteredQuery",
287
286
  "MatchAllQuery",
@@ -290,6 +289,8 @@ static const char *QUERY_NAMES[] = {
290
289
  "FuzzyQuery",
291
290
  "PrefixQuery",
292
291
  "SpanTermQuery",
292
+ "SpanMultiTermQuery",
293
+ "SpanPrefixQuery",
293
294
  "SpanFirstQuery",
294
295
  "SpanOrQuery",
295
296
  "SpanNotQuery",
data/ext/search.h CHANGED
@@ -188,6 +188,8 @@ enum QUERY_TYPE
188
188
  FUZZY_QUERY,
189
189
  PREFIX_QUERY,
190
190
  SPAN_TERM_QUERY,
191
+ SPAN_MULTI_TERM_QUERY,
192
+ SPAN_PREFIX_QUERY,
191
193
  SPAN_FIRST_QUERY,
192
194
  SPAN_OR_QUERY,
193
195
  SPAN_NOT_QUERY,
@@ -342,7 +344,6 @@ typedef struct MTQSubQuery
342
344
 
343
345
  #define PREFIX_QUERY_MAX_TERMS 256
344
346
 
345
-
346
347
  typedef struct PrefixQuery
347
348
  {
348
349
  MTQSubQuery super;
@@ -480,6 +481,22 @@ typedef struct SpanTermQuery
480
481
  } SpanTermQuery;
481
482
  extern Query *spantq_new(const char *field, const char *term);
482
483
 
484
+ /***************************************************************************
485
+ * SpanMultiTermQuery
486
+ ***************************************************************************/
487
+
488
+ #define SPAN_MULTI_TERM_QUERY_CAPA 1024
489
+ typedef struct SpanMultiTermQuery
490
+ {
491
+ SpanQuery super;
492
+ char **terms;
493
+ int term_cnt;
494
+ int term_capa;
495
+ } SpanMultiTermQuery;
496
+
497
+ extern Query *spanmtq_new(const char *field);
498
+ extern Query *spanmtq_new_conf(const char *field, int max_size);
499
+ extern void spanmtq_add_term(Query *self, const char *term);
483
500
 
484
501
  /***************************************************************************
485
502
  * SpanFirstQuery
@@ -544,6 +561,20 @@ extern Query *spanxq_new(Query *inc, Query *exc);
544
561
  extern Query *spanxq_new_nr(Query *inc, Query *exc);
545
562
 
546
563
 
564
+ /***************************************************************************
565
+ * SpanPrefixQuery
566
+ ***************************************************************************/
567
+
568
+ #define SPAN_PREFIX_QUERY_MAX_TERMS 256
569
+
570
+ typedef struct SpanPrefixQuery
571
+ {
572
+ SpanQuery super;
573
+ char *prefix;
574
+ } SpanPrefixQuery;
575
+
576
+ extern Query *spanprq_new(const char *field, const char *prefix);
577
+
547
578
 
548
579
  /***************************************************************************
549
580
  *
data/ext/sort.c CHANGED
@@ -559,7 +559,7 @@ void *field_cache_get_index(IndexReader *ir, SortField *sf)
559
559
 
560
560
  if (sf->type == SORT_TYPE_AUTO) {
561
561
  te = ir->terms(ir, field_num);
562
- if (!te->next(te)) {
562
+ if (!te->next(te) && (ir->num_docs(ir) > 0)) {
563
563
  RAISE(ARG_ERROR,
564
564
  "Cannot sort by field \"%s\" as there are no terms "
565
565
  "in that field in the index.", sf->field);
data/lib/ferret/index.rb CHANGED
@@ -88,6 +88,8 @@ module Ferret::Index
88
88
  if @key.is_a?(Array)
89
89
  @key.flatten.map {|k| k.to_s.intern}
90
90
  end
91
+ else
92
+ @key = nil
91
93
  end
92
94
 
93
95
  if (fi = options[:field_infos]).is_a?(String)
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.10.13'
2
+ VERSION = '0.10.14'
3
3
  end
@@ -3,7 +3,7 @@ require File.dirname(__FILE__) + "/../../test_helper"
3
3
  class AnalyzerTest < Test::Unit::TestCase
4
4
  include Ferret::Analysis
5
5
 
6
- def test_c_analyzer()
6
+ def test_analyzer()
7
7
  input = 'DBalmain@gmail.com is My E-Mail 523@#$ ADDRESS. 23#@$'
8
8
  a = Analyzer.new()
9
9
  t = a.token_stream("fieldname", input)
@@ -38,12 +38,12 @@ class AnalyzerTest < Test::Unit::TestCase
38
38
  assert_equal(Token.new("ADDRESS", 39, 46), t.next())
39
39
  assert(! t.next())
40
40
  end
41
- end if (/mswin/i !~ RUBY_PLATFORM)
41
+ end if (/utf-8/i !~ Ferret.locale)
42
42
 
43
43
  class AsciiLetterAnalyzerTest < Test::Unit::TestCase
44
44
  include Ferret::Analysis
45
45
 
46
- def test_c_letter_analyzer()
46
+ def test_letter_analyzer()
47
47
  input = 'DBalmain@gmail.com is My E-Mail 523@#$ ADDRESS. 23#@$'
48
48
  a = AsciiLetterAnalyzer.new()
49
49
  t = a.token_stream("fieldname", input)
@@ -83,7 +83,7 @@ end
83
83
  class LetterAnalyzerTest < Test::Unit::TestCase
84
84
  include Ferret::Analysis
85
85
 
86
- def test_c_letter_analyzer()
86
+ def test_letter_analyzer()
87
87
  Ferret.locale = ""
88
88
  input = 'DBalmän@gmail.com is My e-mail 52 #$ address. 23#@$ ÁÄGÇ®ÊË̯ÚØìÖÎÍ'
89
89
  a = LetterAnalyzer.new(false)
@@ -131,12 +131,12 @@ class LetterAnalyzerTest < Test::Unit::TestCase
131
131
  assert_equal(Token.new("öîí", 80, 86), t.next)
132
132
  assert(! t.next())
133
133
  end
134
- end if (/mswin/i !~ RUBY_PLATFORM)
134
+ end if (/utf-8/i !~ Ferret.locale)
135
135
 
136
136
  class AsciiWhiteSpaceAnalyzerTest < Test::Unit::TestCase
137
137
  include Ferret::Analysis
138
138
 
139
- def test_c_white_space_analyzer()
139
+ def test_white_space_analyzer()
140
140
  input = 'DBalmain@gmail.com is My E-Mail 52 #$ ADDRESS. 23#@$'
141
141
  a = AsciiWhiteSpaceAnalyzer.new()
142
142
  t = a.token_stream("fieldname", input)
@@ -176,7 +176,7 @@ end
176
176
  class WhiteSpaceAnalyzerTest < Test::Unit::TestCase
177
177
  include Ferret::Analysis
178
178
 
179
- def test_c_white_space_analyzer()
179
+ def test_white_space_analyzer()
180
180
  input = 'DBalmän@gmail.com is My e-mail 52 #$ address. 23#@$ ÁÄGÇ®ÊË̯ÚØìÖÎÍ'
181
181
  a = WhiteSpaceAnalyzer.new()
182
182
  t = a.token_stream("fieldname", input)
@@ -214,12 +214,12 @@ class WhiteSpaceAnalyzerTest < Test::Unit::TestCase
214
214
  assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
215
215
  assert(! t.next())
216
216
  end
217
- end if (/mswin/i !~ RUBY_PLATFORM)
217
+ end if (/utf-8/i !~ Ferret.locale)
218
218
 
219
219
  class AsciiStandardAnalyzerTest < Test::Unit::TestCase
220
220
  include Ferret::Analysis
221
221
 
222
- def test_c_standard_analyzer()
222
+ def test_standard_analyzer()
223
223
  input = 'DBalmain@gmail.com is My e-mail 52 #$ Address. 23#@$ http://www.google.com/results/ T.N.T. 123-1235-ASD-1234'
224
224
  a = AsciiStandardAnalyzer.new()
225
225
  t = a.token_stream("fieldname", input)
@@ -267,7 +267,7 @@ end
267
267
  class StandardAnalyzerTest < Test::Unit::TestCase
268
268
  include Ferret::Analysis
269
269
 
270
- def test_c_standard_analyzer()
270
+ def test_standard_analyzer()
271
271
  input = 'DBalmán@gmail.com is My e-mail and the Address. 23#@$ http://www.google.com/results/ T.N.T. 123-1235-ASD-1234 23#@$ ÁÄGÇ®ÊË̯ÚØìÖÎÍ'
272
272
  a = StandardAnalyzer.new()
273
273
  t = a.token_stream("fieldname", input)
@@ -350,11 +350,11 @@ class StandardAnalyzerTest < Test::Unit::TestCase
350
350
  assert_equal(Token.new('öîí', 142, 148), t2.next)
351
351
  assert(! t2.next())
352
352
  end
353
- end if (/mswin/i !~ RUBY_PLATFORM)
353
+ end if (/utf-8/i !~ Ferret.locale)
354
354
 
355
355
  class PerFieldAnalyzerTest < Test::Unit::TestCase
356
356
  include Ferret::Analysis
357
- def test_c_per_field_analyzer()
357
+ def test_per_field_analyzer()
358
358
  input = 'DBalmain@gmail.com is My e-mail 52 #$ address. 23#@$'
359
359
  pfa = PerFieldAnalyzer.new(StandardAnalyzer.new())
360
360
  pfa['white'] = WhiteSpaceAnalyzer.new(false)
@@ -545,4 +545,4 @@ class CustomAnalyzerTest < Test::Unit::TestCase
545
545
  assert_equal(Token.new("dêbater", 36, 44), t.next)
546
546
  assert(! t.next())
547
547
  end
548
- end if (/mswin/i !~ RUBY_PLATFORM)
548
+ end if (/utf-8/i !~ Ferret.locale)
@@ -109,7 +109,7 @@ class LetterTokenizerTest < Test::Unit::TestCase
109
109
  assert_equal(Token.new('öîí', 80, 86), t.next)
110
110
  assert(! t.next())
111
111
  end
112
- end if (/mswin/i !~ RUBY_PLATFORM)
112
+ end if (/utf-8/i !~ Ferret.locale)
113
113
 
114
114
  class AsciiWhiteSpaceTokenizerTest < Test::Unit::TestCase
115
115
  include Ferret::Analysis
@@ -186,7 +186,7 @@ class WhiteSpaceTokenizerTest < Test::Unit::TestCase
186
186
  assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
187
187
  assert(! t.next())
188
188
  end
189
- end if (/mswin/i !~ RUBY_PLATFORM)
189
+ end if (/utf-8/i !~ Ferret.locale)
190
190
 
191
191
  class AsciiStandardTokenizerTest < Test::Unit::TestCase
192
192
  include Ferret::Analysis
@@ -275,7 +275,7 @@ class StandardTokenizerTest < Test::Unit::TestCase
275
275
  assert_equal(Token.new('www.davebalmain.com/trac-site', 25, 61), t.next)
276
276
  assert(! t.next())
277
277
  end
278
- end if (/mswin/i !~ RUBY_PLATFORM)
278
+ end if (/utf-8/i !~ Ferret.locale)
279
279
 
280
280
  class RegExpTokenizerTest < Test::Unit::TestCase
281
281
  include Ferret::Analysis
@@ -428,7 +428,7 @@ END
428
428
  assert_equal(Token.new('szzzt', 256, 264), t.next)
429
429
  assert(! t.next())
430
430
  end
431
- end if (/mswin/i !~ RUBY_PLATFORM)
431
+ end if (/utf-8/i !~ Ferret.locale)
432
432
 
433
433
  class StopFilterTest < Test::Unit::TestCase
434
434
  include Ferret::Analysis
@@ -467,7 +467,7 @@ class StemFilterTest < Test::Unit::TestCase
467
467
  assert_equal(Token.new("DEBate", 23, 31), t.next)
468
468
  assert_equal(Token.new("Debat", 32, 39), t.next)
469
469
 
470
- if Ferret.locale.downcase.index("utf")
470
+ if Ferret.locale and Ferret.locale.downcase.index("utf")
471
471
  input = "Dêbate dêbates DÊBATED DÊBATing dêbater";
472
472
  t = StemFilter.new(LowerCaseFilter.new(LetterTokenizer.new(input)), :english)
473
473
  assert_equal(Token.new("dêbate", 0, 7), t.next)
@@ -16,7 +16,7 @@ class SpansBasicTest < Test::Unit::TestCase
16
16
  [
17
17
  "start finish one two three four five six seven",
18
18
  "start one finish two three four five six seven",
19
- "start one two finish three four five six seven",
19
+ "start one two finish three four five six seven flip",
20
20
  "start one two three finish four five six seven",
21
21
  "start one two three four finish five six seven",
22
22
  "start one two three four five finish six seven",
@@ -26,12 +26,12 @@ class SpansBasicTest < Test::Unit::TestCase
26
26
  "start one two three four five finish six seven",
27
27
  "start one two three four finish five six seven",
28
28
  "start one two three finish four five six seven",
29
- "start one two finish three four five six seven",
29
+ "start one two finish three four five six seven flop",
30
30
  "start one finish two three four five six seven",
31
31
  "start finish one two three four five six seven",
32
32
  "start start one two three four five six seven",
33
33
  "finish start one two three four five six seven",
34
- "finish one start two three four five six seven",
34
+ "finish one start two three four five six seven toot",
35
35
  "finish one two start three four five six seven",
36
36
  "finish one two three start four five six seven",
37
37
  "finish one two three four start five six seven",
@@ -89,6 +89,18 @@ class SpansBasicTest < Test::Unit::TestCase
89
89
  tq = SpanTermQuery.new(:field, "eight")
90
90
  check_hits(tq, [6,7,8,22,23,24])
91
91
  end
92
+
93
+ def test_span_multi_term_query()
94
+ tq = SpanMultiTermQuery.new(:field, ["eight", "nine"])
95
+ check_hits(tq, [6,7,8,22,23,24], true)
96
+ tq = SpanMultiTermQuery.new(:field, ["flip", "flop", "toot", "nine"])
97
+ check_hits(tq, [2,7,12,17,23])
98
+ end
99
+
100
+ def test_span_prefix_query()
101
+ tq = SpanPrefixQuery.new(:field, "fl")
102
+ check_hits(tq, [2, 12], true)
103
+ end
92
104
 
93
105
  def test_span_near_query()
94
106
  tq1 = SpanTermQuery.new(:field, "start")
@@ -108,6 +120,10 @@ class SpansBasicTest < Test::Unit::TestCase
108
120
  check_hits(q, [0,1,2,3,4,10,11,12,13,14])
109
121
  q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4)
110
122
  check_hits(q, [0,1,2,3,4,10,11,12,13,14,16,17,18,19,20,26,27,28,29,30])
123
+ q = SpanNearQuery.new(:clauses => [
124
+ SpanPrefixQuery.new(:field, 'se'),
125
+ SpanPrefixQuery.new(:field, 'fl')], :slop => 0)
126
+ check_hits(q, [2, 12], true)
111
127
  end
112
128
 
113
129
  def test_span_not_query()
@@ -125,12 +125,18 @@ module SearcherTests
125
125
 
126
126
  def test_phrase_query()
127
127
  pq = PhraseQuery.new(:field)
128
+ assert_equal("\"\"", pq.to_s(:field))
129
+ assert_equal("field:\"\"", pq.to_s)
130
+
128
131
  pq << "quick" << "brown" << "fox"
129
132
  check_hits(pq, [1])
130
133
 
131
- pq = PhraseQuery.new(:field)
134
+ pq = PhraseQuery.new(:field, 1)
132
135
  pq << "quick"
133
136
  pq.add_term("fox", 2)
137
+ check_hits(pq, [1,11,14,16])
138
+
139
+ pq.slop = 0
134
140
  check_hits(pq, [1,11,14])
135
141
 
136
142
  pq.slop = 1