ferret 0.10.11 → 0.10.12

Sign up to get free protection for your applications and to get access to all the features.
data/ext/search.c CHANGED
@@ -1690,8 +1690,8 @@ static void msea_close(Searcher *self)
1690
1690
  s = msea->searchers[i];
1691
1691
  s->close(s);
1692
1692
  }
1693
- free(msea->searchers);
1694
1693
  }
1694
+ free(msea->searchers);
1695
1695
  free(msea->starts);
1696
1696
  free(self);
1697
1697
  }
data/ext/sort.c CHANGED
@@ -538,7 +538,7 @@ SortField *sort_field_auto_new(char *field, bool reverse)
538
538
 
539
539
  void *field_cache_get_index(IndexReader *ir, SortField *sf)
540
540
  {
541
- void *index = NULL;
541
+ void *volatile index = NULL;
542
542
  int length = 0;
543
543
  TermEnum *volatile te = NULL;
544
544
  TermDocEnum *volatile tde = NULL;
data/ext/stopwords.c CHANGED
@@ -10,10 +10,9 @@
10
10
 
11
11
  const char *ENGLISH_STOP_WORDS[] = {
12
12
  "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
13
- "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t",
14
- "that",
13
+ "into", "is", "it", "no", "not", "of", "on", "or", "such", "that",
15
14
  "the", "their", "then", "there", "these", "they", "this", "to", "was",
16
- "will", "with", NULL
15
+ "with", NULL
17
16
  };
18
17
 
19
18
  const char *FULL_ENGLISH_STOP_WORDS[] = {
data/lib/ferret/index.rb CHANGED
@@ -527,6 +527,7 @@ module Ferret::Index
527
527
  @searcher = nil
528
528
  end
529
529
  end
530
+ alias :commit :flush
530
531
 
531
532
  # optimizes the index. This should only be called when the index will no
532
533
  # longer be updated very often, but will be read a lot.
@@ -670,7 +671,7 @@ module Ferret::Index
670
671
  latest = false
671
672
  begin
672
673
  latest = @reader.latest?
673
- rescue LockException => le
674
+ rescue LockError => le
674
675
  sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
675
676
  latest = @reader.latest?
676
677
  end
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.10.11'
2
+ VERSION = '0.10.12'
3
3
  end
@@ -368,6 +368,68 @@ class RegExpTokenizerTest < Test::Unit::TestCase
368
368
  end
369
369
  end
370
370
 
371
+ class MappingFilterTest < Test::Unit::TestCase
372
+ include Ferret::Analysis
373
+
374
+ def test_mapping_filter()
375
+ mapping = {
376
+ ['à','á','â','ã','ä','å','ā','ă'] => 'a',
377
+ 'æ' => 'ae',
378
+ ['ď','đ'] => 'd',
379
+ ['ç','ć','č','ĉ','ċ'] => 'c',
380
+ ['è','é','ê','ë','ē','ę','ě','ĕ','ė',] => 'e',
381
+ ['ƒ'] => 'f',
382
+ ['ĝ','ğ','ġ','ģ'] => 'g',
383
+ ['ĥ','ħ'] => 'h',
384
+ ['ì','ì','í','î','ï','ī','ĩ','ĭ'] => 'i',
385
+ ['į','ı','ij','ĵ'] => 'j',
386
+ ['ķ','ĸ'] => 'k',
387
+ ['ł','ľ','ĺ','ļ','ŀ'] => 'l',
388
+ ['ñ','ń','ň','ņ','ʼn','ŋ'] => 'n',
389
+ ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
390
+ 'œ' => 'oek',
391
+ 'ą' => 'q',
392
+ ['ŕ','ř','ŗ'] => 'r',
393
+ ['ś','š','ş','ŝ','ș'] => 's',
394
+ ['ť','ţ','ŧ','ț'] => 't',
395
+ ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
396
+ 'ŵ' => 'w',
397
+ ['ý','ÿ','ŷ'] => 'y',
398
+ ['ž','ż','ź'] => 'z'
399
+ }
400
+ input = <<END
401
+ aàáâãäåāăb cæd eďđf gçćčĉċh ièéêëēęěĕėj kƒl mĝğġģn oĥħp qììíîïīĩĭr sįıijĵt uķĸv
402
+ włľĺļŀx yñńňņʼnŋz aòóôõöøōőŏŏb cœd eąf gŕřŗh iśšşŝșj kťţŧțl mùúûüūůűŭũųn oŵp
403
+ qýÿŷr sžżźt
404
+ END
405
+ t = MappingFilter.new(LetterTokenizer.new(input), mapping)
406
+ assert_equal(Token.new('aaaaaaaaab', 0, 18), t.next)
407
+ assert_equal(Token.new('caed', 19, 23), t.next)
408
+ assert_equal(Token.new('eddf', 24, 30), t.next)
409
+ assert_equal(Token.new('gccccch', 31, 43), t.next)
410
+ assert_equal(Token.new('ieeeeeeeeej', 44, 64), t.next)
411
+ assert_equal(Token.new('kfl', 65, 69), t.next)
412
+ assert_equal(Token.new('mggggn', 70, 80), t.next)
413
+ assert_equal(Token.new('ohhp', 81, 87), t.next)
414
+ assert_equal(Token.new('qiiiiiiiir', 88, 106), t.next)
415
+ assert_equal(Token.new('sjjjjt', 107, 117), t.next)
416
+ assert_equal(Token.new('ukkv', 118, 124), t.next)
417
+ assert_equal(Token.new('wlllllx', 125, 137), t.next)
418
+ assert_equal(Token.new('ynnnnnnz', 138, 152), t.next)
419
+ assert_equal(Token.new('aoooooooooob', 153, 175), t.next)
420
+ assert_equal(Token.new('coekd', 176, 180), t.next)
421
+ assert_equal(Token.new('eqf', 181, 185), t.next)
422
+ assert_equal(Token.new('grrrh', 186, 194), t.next)
423
+ assert_equal(Token.new('isssssj', 195, 207), t.next)
424
+ assert_equal(Token.new('kttttl', 208, 218), t.next)
425
+ assert_equal(Token.new('muuuuuuuuuun', 219, 241), t.next)
426
+ assert_equal(Token.new('owp', 242, 246), t.next)
427
+ assert_equal(Token.new('qyyyr', 247, 255), t.next)
428
+ assert_equal(Token.new('szzzt', 256, 264), t.next)
429
+ assert(! t.next())
430
+ end
431
+ end if (/mswin/i !~ RUBY_PLATFORM)
432
+
371
433
  class StopFilterTest < Test::Unit::TestCase
372
434
  include Ferret::Analysis
373
435
 
@@ -525,7 +525,7 @@ class IndexTest < Test::Unit::TestCase
525
525
  {:content => "four", :date => "19390912"}
526
526
  ].each {|doc| index << doc}
527
527
 
528
- sf_date = SortField.new("date", {:sort_type => :integer})
528
+ sf_date = SortField.new("date", {:type => :integer})
529
529
  #top_docs = index.search("one", :sort => [sf_date, SortField::SCORE])
530
530
  top_docs = index.search("one", :sort => Sort.new("date"))
531
531
  assert_equal(3, top_docs.total_hits)
@@ -773,5 +773,23 @@ class IndexTest < Test::Unit::TestCase
773
773
  index.add_document({:content => "Content With Capitals"}, a)
774
774
  tv = index.reader.term_vector(0, :content)
775
775
  assert_equal("Capitals", tv.terms[0].text)
776
+ index.close
777
+ end
778
+
779
+ def test_top_doc_to_json
780
+ index = Ferret::I.new
781
+ [
782
+ {:f1 => "one"},
783
+ {:f2 => ["two",2,2.0]},
784
+ {:f3 => 3},
785
+ {:f4 => 4.0},
786
+ {:f5 => "five", :funny => '"' * 10_000}
787
+ ].each {|doc| index << doc}
788
+ json_str = index.search("one two 3 4.0 five",
789
+ :sort => Ferret::Search::Sort::INDEX_ORDER).to_json
790
+ assert(json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"f5":"five","funny":"' + '\'"\'' * 10_000 + '"}]' ||
791
+ json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"funny":"' + '\'"\'' * 10_000 + '","f5":"five"}]')
792
+ assert_equal('[]', index.search("xxx").to_json)
793
+ index.close
776
794
  end
777
795
  end
@@ -128,7 +128,7 @@ class SearchAndSortTest < Test::Unit::TestCase
128
128
 
129
129
  ## byte
130
130
  do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0],
131
- [SortField.new(:int, :type => :byte)])
131
+ SortField.new(:int, :type => :byte))
132
132
  do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3],
133
133
  [SortField.new(:int, :type => :byte, :reverse => true)])
134
134
 
@@ -133,6 +133,13 @@ class BitVectorTest < Test::Unit::TestCase
133
133
 
134
134
  assert_equal(bv2, and_bv, "and_bv should be empty")
135
135
  assert_equal(0, and_bv.count)
136
+
137
+ bv1 = BitVector.new
138
+ bv2 = BitVector.new.not!
139
+ bv1.set(10)
140
+ bv1.set(11)
141
+ bv1.set(20)
142
+ assert_equal(bv1, bv1 & bv2, "bv anded with empty not bv should be same")
136
143
  end
137
144
 
138
145
  def test_bv_or
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.10.11
7
- date: 2006-10-11 00:00:00 +09:00
6
+ version: 0.10.12
7
+ date: 2006-10-20 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -55,7 +55,6 @@ files:
55
55
  - ext/stopwords.c
56
56
  - ext/array.c
57
57
  - ext/index.c
58
- - ext/mem_pool.c
59
58
  - ext/compound_io.c
60
59
  - ext/q_prefix.c
61
60
  - ext/q_range.c
@@ -72,8 +71,10 @@ files:
72
71
  - ext/q_parser.c
73
72
  - ext/q_span.c
74
73
  - ext/term_vectors.c
74
+ - ext/multimapper.c
75
+ - ext/mempool.c
75
76
  - ext/priorityqueue.h
76
- - ext/defines.h
77
+ - ext/mempool.h
77
78
  - ext/posh.h
78
79
  - ext/store.h
79
80
  - ext/hashset.h
@@ -89,9 +90,10 @@ files:
89
90
  - ext/win32.h
90
91
  - ext/analysis.h
91
92
  - ext/search.h
92
- - ext/mem_pool.h
93
93
  - ext/array.h
94
94
  - ext/lang.h
95
+ - ext/config.h
96
+ - ext/multimapper.h
95
97
  - ext/stem_UTF_8_norwegian.c
96
98
  - ext/stem_UTF_8_danish.c
97
99
  - ext/stem_UTF_8_dutch.c
@@ -149,10 +151,10 @@ files:
149
151
  - ext/api.h
150
152
  - ext/header.h
151
153
  - ext/libstemmer.c
152
- - ext/modules.h
153
- - ext/libstemmer.h
154
154
  - ext/ferret.h
155
155
  - ext/ferret.c
156
+ - ext/modules.h
157
+ - ext/libstemmer.h
156
158
  - ext/r_analysis.c
157
159
  - ext/r_utils.c
158
160
  - ext/r_store.c