ferret 0.10.11 → 0.10.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/search.c CHANGED
@@ -1690,8 +1690,8 @@ static void msea_close(Searcher *self)
1690
1690
  s = msea->searchers[i];
1691
1691
  s->close(s);
1692
1692
  }
1693
- free(msea->searchers);
1694
1693
  }
1694
+ free(msea->searchers);
1695
1695
  free(msea->starts);
1696
1696
  free(self);
1697
1697
  }
data/ext/sort.c CHANGED
@@ -538,7 +538,7 @@ SortField *sort_field_auto_new(char *field, bool reverse)
538
538
 
539
539
  void *field_cache_get_index(IndexReader *ir, SortField *sf)
540
540
  {
541
- void *index = NULL;
541
+ void *volatile index = NULL;
542
542
  int length = 0;
543
543
  TermEnum *volatile te = NULL;
544
544
  TermDocEnum *volatile tde = NULL;
data/ext/stopwords.c CHANGED
@@ -10,10 +10,9 @@
10
10
 
11
11
  const char *ENGLISH_STOP_WORDS[] = {
12
12
  "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
13
- "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t",
14
- "that",
13
+ "into", "is", "it", "no", "not", "of", "on", "or", "such", "that",
15
14
  "the", "their", "then", "there", "these", "they", "this", "to", "was",
16
- "will", "with", NULL
15
+ "with", NULL
17
16
  };
18
17
 
19
18
  const char *FULL_ENGLISH_STOP_WORDS[] = {
data/lib/ferret/index.rb CHANGED
@@ -527,6 +527,7 @@ module Ferret::Index
527
527
  @searcher = nil
528
528
  end
529
529
  end
530
+ alias :commit :flush
530
531
 
531
532
  # optimizes the index. This should only be called when the index will no
532
533
  # longer be updated very often, but will be read a lot.
@@ -670,7 +671,7 @@ module Ferret::Index
670
671
  latest = false
671
672
  begin
672
673
  latest = @reader.latest?
673
- rescue LockException => le
674
+ rescue LockError => le
674
675
  sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
675
676
  latest = @reader.latest?
676
677
  end
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.10.11'
2
+ VERSION = '0.10.12'
3
3
  end
@@ -368,6 +368,68 @@ class RegExpTokenizerTest < Test::Unit::TestCase
368
368
  end
369
369
  end
370
370
 
371
+ class MappingFilterTest < Test::Unit::TestCase
372
+ include Ferret::Analysis
373
+
374
+ def test_mapping_filter()
375
+ mapping = {
376
+ ['à','á','â','ã','ä','å','ā','ă'] => 'a',
377
+ 'æ' => 'ae',
378
+ ['ď','đ'] => 'd',
379
+ ['ç','ć','č','ĉ','ċ'] => 'c',
380
+ ['è','é','ê','ë','ē','ę','ě','ĕ','ė',] => 'e',
381
+ ['ƒ'] => 'f',
382
+ ['ĝ','ğ','ġ','ģ'] => 'g',
383
+ ['ĥ','ħ'] => 'h',
384
+ ['ì','ì','í','î','ï','ī','ĩ','ĭ'] => 'i',
385
+ ['į','ı','ij','ĵ'] => 'j',
386
+ ['ķ','ĸ'] => 'k',
387
+ ['ł','ľ','ĺ','ļ','ŀ'] => 'l',
388
+ ['ñ','ń','ň','ņ','ʼn','ŋ'] => 'n',
389
+ ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
390
+ 'œ' => 'oek',
391
+ 'ą' => 'q',
392
+ ['ŕ','ř','ŗ'] => 'r',
393
+ ['ś','š','ş','ŝ','ș'] => 's',
394
+ ['ť','ţ','ŧ','ț'] => 't',
395
+ ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
396
+ 'ŵ' => 'w',
397
+ ['ý','ÿ','ŷ'] => 'y',
398
+ ['ž','ż','ź'] => 'z'
399
+ }
400
+ input = <<END
401
+ aàáâãäåāăb cæd eďđf gçćčĉċh ièéêëēęěĕėj kƒl mĝğġģn oĥħp qììíîïīĩĭr sįıijĵt uķĸv
402
+ włľĺļŀx yñńňņʼnŋz aòóôõöøōőŏŏb cœd eąf gŕřŗh iśšşŝșj kťţŧțl mùúûüūůűŭũųn oŵp
403
+ qýÿŷr sžżźt
404
+ END
405
+ t = MappingFilter.new(LetterTokenizer.new(input), mapping)
406
+ assert_equal(Token.new('aaaaaaaaab', 0, 18), t.next)
407
+ assert_equal(Token.new('caed', 19, 23), t.next)
408
+ assert_equal(Token.new('eddf', 24, 30), t.next)
409
+ assert_equal(Token.new('gccccch', 31, 43), t.next)
410
+ assert_equal(Token.new('ieeeeeeeeej', 44, 64), t.next)
411
+ assert_equal(Token.new('kfl', 65, 69), t.next)
412
+ assert_equal(Token.new('mggggn', 70, 80), t.next)
413
+ assert_equal(Token.new('ohhp', 81, 87), t.next)
414
+ assert_equal(Token.new('qiiiiiiiir', 88, 106), t.next)
415
+ assert_equal(Token.new('sjjjjt', 107, 117), t.next)
416
+ assert_equal(Token.new('ukkv', 118, 124), t.next)
417
+ assert_equal(Token.new('wlllllx', 125, 137), t.next)
418
+ assert_equal(Token.new('ynnnnnnz', 138, 152), t.next)
419
+ assert_equal(Token.new('aoooooooooob', 153, 175), t.next)
420
+ assert_equal(Token.new('coekd', 176, 180), t.next)
421
+ assert_equal(Token.new('eqf', 181, 185), t.next)
422
+ assert_equal(Token.new('grrrh', 186, 194), t.next)
423
+ assert_equal(Token.new('isssssj', 195, 207), t.next)
424
+ assert_equal(Token.new('kttttl', 208, 218), t.next)
425
+ assert_equal(Token.new('muuuuuuuuuun', 219, 241), t.next)
426
+ assert_equal(Token.new('owp', 242, 246), t.next)
427
+ assert_equal(Token.new('qyyyr', 247, 255), t.next)
428
+ assert_equal(Token.new('szzzt', 256, 264), t.next)
429
+ assert(! t.next())
430
+ end
431
+ end if (/mswin/i !~ RUBY_PLATFORM)
432
+
371
433
  class StopFilterTest < Test::Unit::TestCase
372
434
  include Ferret::Analysis
373
435
 
@@ -525,7 +525,7 @@ class IndexTest < Test::Unit::TestCase
525
525
  {:content => "four", :date => "19390912"}
526
526
  ].each {|doc| index << doc}
527
527
 
528
- sf_date = SortField.new("date", {:sort_type => :integer})
528
+ sf_date = SortField.new("date", {:type => :integer})
529
529
  #top_docs = index.search("one", :sort => [sf_date, SortField::SCORE])
530
530
  top_docs = index.search("one", :sort => Sort.new("date"))
531
531
  assert_equal(3, top_docs.total_hits)
@@ -773,5 +773,23 @@ class IndexTest < Test::Unit::TestCase
773
773
  index.add_document({:content => "Content With Capitals"}, a)
774
774
  tv = index.reader.term_vector(0, :content)
775
775
  assert_equal("Capitals", tv.terms[0].text)
776
+ index.close
777
+ end
778
+
779
+ def test_top_doc_to_json
780
+ index = Ferret::I.new
781
+ [
782
+ {:f1 => "one"},
783
+ {:f2 => ["two",2,2.0]},
784
+ {:f3 => 3},
785
+ {:f4 => 4.0},
786
+ {:f5 => "five", :funny => '"' * 10_000}
787
+ ].each {|doc| index << doc}
788
+ json_str = index.search("one two 3 4.0 five",
789
+ :sort => Ferret::Search::Sort::INDEX_ORDER).to_json
790
+ assert(json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"f5":"five","funny":"' + '\'"\'' * 10_000 + '"}]' ||
791
+ json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"funny":"' + '\'"\'' * 10_000 + '","f5":"five"}]')
792
+ assert_equal('[]', index.search("xxx").to_json)
793
+ index.close
776
794
  end
777
795
  end
@@ -128,7 +128,7 @@ class SearchAndSortTest < Test::Unit::TestCase
128
128
 
129
129
  ## byte
130
130
  do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0],
131
- [SortField.new(:int, :type => :byte)])
131
+ SortField.new(:int, :type => :byte))
132
132
  do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3],
133
133
  [SortField.new(:int, :type => :byte, :reverse => true)])
134
134
 
@@ -133,6 +133,13 @@ class BitVectorTest < Test::Unit::TestCase
133
133
 
134
134
  assert_equal(bv2, and_bv, "and_bv should be empty")
135
135
  assert_equal(0, and_bv.count)
136
+
137
+ bv1 = BitVector.new
138
+ bv2 = BitVector.new.not!
139
+ bv1.set(10)
140
+ bv1.set(11)
141
+ bv1.set(20)
142
+ assert_equal(bv1, bv1 & bv2, "bv anded with empty not bv should be same")
136
143
  end
137
144
 
138
145
  def test_bv_or
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.10.11
7
- date: 2006-10-11 00:00:00 +09:00
6
+ version: 0.10.12
7
+ date: 2006-10-20 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -55,7 +55,6 @@ files:
55
55
  - ext/stopwords.c
56
56
  - ext/array.c
57
57
  - ext/index.c
58
- - ext/mem_pool.c
59
58
  - ext/compound_io.c
60
59
  - ext/q_prefix.c
61
60
  - ext/q_range.c
@@ -72,8 +71,10 @@ files:
72
71
  - ext/q_parser.c
73
72
  - ext/q_span.c
74
73
  - ext/term_vectors.c
74
+ - ext/multimapper.c
75
+ - ext/mempool.c
75
76
  - ext/priorityqueue.h
76
- - ext/defines.h
77
+ - ext/mempool.h
77
78
  - ext/posh.h
78
79
  - ext/store.h
79
80
  - ext/hashset.h
@@ -89,9 +90,10 @@ files:
89
90
  - ext/win32.h
90
91
  - ext/analysis.h
91
92
  - ext/search.h
92
- - ext/mem_pool.h
93
93
  - ext/array.h
94
94
  - ext/lang.h
95
+ - ext/config.h
96
+ - ext/multimapper.h
95
97
  - ext/stem_UTF_8_norwegian.c
96
98
  - ext/stem_UTF_8_danish.c
97
99
  - ext/stem_UTF_8_dutch.c
@@ -149,10 +151,10 @@ files:
149
151
  - ext/api.h
150
152
  - ext/header.h
151
153
  - ext/libstemmer.c
152
- - ext/modules.h
153
- - ext/libstemmer.h
154
154
  - ext/ferret.h
155
155
  - ext/ferret.c
156
+ - ext/modules.h
157
+ - ext/libstemmer.h
156
158
  - ext/r_analysis.c
157
159
  - ext/r_utils.c
158
160
  - ext/r_store.c