ferret 0.11.6 → 0.11.8.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. data/README +10 -22
  2. data/RELEASE_CHANGES +137 -0
  3. data/RELEASE_NOTES +60 -0
  4. data/Rakefile +379 -274
  5. data/TODO +100 -8
  6. data/bin/ferret-browser +0 -0
  7. data/ext/BZLIB_blocksort.c +1094 -0
  8. data/ext/BZLIB_bzlib.c +1578 -0
  9. data/ext/BZLIB_compress.c +672 -0
  10. data/ext/BZLIB_crctable.c +104 -0
  11. data/ext/BZLIB_decompress.c +626 -0
  12. data/ext/BZLIB_huffman.c +205 -0
  13. data/ext/BZLIB_randtable.c +84 -0
  14. data/ext/{api.c → STEMMER_api.c} +7 -10
  15. data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
  16. data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
  17. data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
  18. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  19. data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
  20. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  21. data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
  22. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  25. data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
  26. data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
  27. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  29. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  30. data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
  31. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  32. data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
  33. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  34. data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
  35. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  36. data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
  37. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  38. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  39. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  40. data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
  41. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  42. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  43. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  44. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  45. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  46. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  47. data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
  48. data/ext/analysis.c +276 -121
  49. data/ext/analysis.h +190 -143
  50. data/ext/api.h +3 -4
  51. data/ext/array.c +5 -3
  52. data/ext/array.h +52 -43
  53. data/ext/bitvector.c +38 -482
  54. data/ext/bitvector.h +446 -124
  55. data/ext/bzlib.h +282 -0
  56. data/ext/bzlib_private.h +503 -0
  57. data/ext/compound_io.c +23 -22
  58. data/ext/config.h +21 -11
  59. data/ext/document.c +43 -40
  60. data/ext/document.h +31 -21
  61. data/ext/except.c +20 -38
  62. data/ext/except.h +89 -76
  63. data/ext/extconf.rb +3 -2
  64. data/ext/ferret.c +49 -35
  65. data/ext/ferret.h +14 -11
  66. data/ext/field_index.c +262 -0
  67. data/ext/field_index.h +52 -0
  68. data/ext/filter.c +11 -10
  69. data/ext/fs_store.c +65 -47
  70. data/ext/global.c +245 -165
  71. data/ext/global.h +252 -54
  72. data/ext/hash.c +200 -243
  73. data/ext/hash.h +205 -163
  74. data/ext/hashset.c +118 -96
  75. data/ext/hashset.h +110 -82
  76. data/ext/header.h +19 -19
  77. data/ext/helper.c +11 -10
  78. data/ext/helper.h +14 -6
  79. data/ext/index.c +745 -366
  80. data/ext/index.h +503 -529
  81. data/ext/internal.h +1020 -0
  82. data/ext/lang.c +10 -0
  83. data/ext/lang.h +35 -15
  84. data/ext/mempool.c +5 -4
  85. data/ext/mempool.h +30 -22
  86. data/ext/modules.h +35 -7
  87. data/ext/multimapper.c +43 -2
  88. data/ext/multimapper.h +32 -23
  89. data/ext/posh.c +0 -0
  90. data/ext/posh.h +4 -38
  91. data/ext/priorityqueue.c +10 -12
  92. data/ext/priorityqueue.h +33 -21
  93. data/ext/q_boolean.c +22 -9
  94. data/ext/q_const_score.c +3 -2
  95. data/ext/q_filtered_query.c +15 -12
  96. data/ext/q_fuzzy.c +147 -135
  97. data/ext/q_match_all.c +3 -2
  98. data/ext/q_multi_term.c +28 -32
  99. data/ext/q_parser.c +451 -173
  100. data/ext/q_phrase.c +158 -79
  101. data/ext/q_prefix.c +16 -18
  102. data/ext/q_range.c +363 -31
  103. data/ext/q_span.c +130 -141
  104. data/ext/q_term.c +21 -21
  105. data/ext/q_wildcard.c +19 -23
  106. data/ext/r_analysis.c +369 -242
  107. data/ext/r_index.c +421 -434
  108. data/ext/r_qparser.c +142 -92
  109. data/ext/r_search.c +790 -407
  110. data/ext/r_store.c +44 -44
  111. data/ext/r_utils.c +264 -96
  112. data/ext/ram_store.c +29 -23
  113. data/ext/scanner.c +895 -0
  114. data/ext/scanner.h +36 -0
  115. data/ext/scanner_mb.c +6701 -0
  116. data/ext/scanner_utf8.c +4415 -0
  117. data/ext/search.c +210 -87
  118. data/ext/search.h +556 -488
  119. data/ext/similarity.c +17 -16
  120. data/ext/similarity.h +51 -44
  121. data/ext/sort.c +157 -354
  122. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  123. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  124. data/ext/stem_UTF_8_hungarian.h +16 -0
  125. data/ext/stem_UTF_8_romanian.h +16 -0
  126. data/ext/stem_UTF_8_turkish.h +16 -0
  127. data/ext/stopwords.c +287 -278
  128. data/ext/store.c +57 -51
  129. data/ext/store.h +308 -286
  130. data/ext/symbol.c +10 -0
  131. data/ext/symbol.h +23 -0
  132. data/ext/term_vectors.c +14 -293
  133. data/ext/threading.h +22 -22
  134. data/ext/win32.h +12 -4
  135. data/lib/ferret.rb +2 -1
  136. data/lib/ferret/browser.rb +1 -1
  137. data/lib/ferret/field_symbol.rb +94 -0
  138. data/lib/ferret/index.rb +221 -34
  139. data/lib/ferret/number_tools.rb +6 -6
  140. data/lib/ferret/version.rb +3 -0
  141. data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
  142. data/test/test_helper.rb +7 -2
  143. data/test/test_installed.rb +1 -0
  144. data/test/threading/thread_safety_index_test.rb +10 -1
  145. data/test/threading/thread_safety_read_write_test.rb +4 -7
  146. data/test/threading/thread_safety_test.rb +0 -0
  147. data/test/unit/analysis/tc_analyzer.rb +29 -27
  148. data/test/unit/analysis/tc_token_stream.rb +23 -16
  149. data/test/unit/index/tc_index.rb +116 -11
  150. data/test/unit/index/tc_index_reader.rb +27 -27
  151. data/test/unit/index/tc_index_writer.rb +10 -0
  152. data/test/unit/index/th_doc.rb +38 -21
  153. data/test/unit/search/tc_filter.rb +31 -10
  154. data/test/unit/search/tc_index_searcher.rb +6 -0
  155. data/test/unit/search/tm_searcher.rb +53 -1
  156. data/test/unit/store/tc_fs_store.rb +40 -2
  157. data/test/unit/store/tc_ram_store.rb +0 -0
  158. data/test/unit/store/tm_store.rb +0 -0
  159. data/test/unit/store/tm_store_lock.rb +7 -6
  160. data/test/unit/tc_field_symbol.rb +26 -0
  161. data/test/unit/ts_analysis.rb +0 -0
  162. data/test/unit/ts_index.rb +0 -0
  163. data/test/unit/ts_store.rb +0 -0
  164. data/test/unit/ts_utils.rb +0 -0
  165. data/test/unit/utils/tc_number_tools.rb +0 -0
  166. data/test/utils/content_generator.rb +226 -0
  167. metadata +262 -221
  168. data/ext/inc/lang.h +0 -48
  169. data/ext/inc/threading.h +0 -31
  170. data/ext/stem_ISO_8859_1_english.c +0 -1156
  171. data/ext/stem_ISO_8859_1_french.c +0 -1276
  172. data/ext/stem_ISO_8859_1_italian.c +0 -1091
  173. data/ext/stem_ISO_8859_1_norwegian.c +0 -296
  174. data/ext/stem_ISO_8859_1_spanish.c +0 -1119
  175. data/ext/stem_ISO_8859_1_swedish.c +0 -307
  176. data/ext/stem_UTF_8_danish.c +0 -344
  177. data/ext/stem_UTF_8_english.c +0 -1176
  178. data/ext/stem_UTF_8_french.c +0 -1296
  179. data/ext/stem_UTF_8_italian.c +0 -1113
  180. data/ext/stem_UTF_8_norwegian.c +0 -302
  181. data/ext/stem_UTF_8_portuguese.c +0 -1055
  182. data/ext/stem_UTF_8_russian.c +0 -709
  183. data/ext/stem_UTF_8_spanish.c +0 -1137
  184. data/ext/stem_UTF_8_swedish.c +0 -313
  185. data/lib/ferret_version.rb +0 -3
@@ -353,7 +353,7 @@ class IndexTest < Test::Unit::TestCase
353
353
  assert_equal(0, top_docs.hits.size)
354
354
 
355
355
  iw = IndexWriter.new(:path => fs_path, :analyzer => WhiteSpaceAnalyzer.new)
356
- iw << {:f, "content3"}
356
+ iw << {:f => "content3"}
357
357
  iw.close()
358
358
 
359
359
  top_docs = index.search("content3")
@@ -462,6 +462,95 @@ class IndexTest < Test::Unit::TestCase
462
462
  index.close
463
463
  end
464
464
 
465
+ def test_index_key_batch0
466
+ data = {
467
+ "0" => {:id => "0", :val => "one"},
468
+ "0" => {:id => "0", :val => "two"},
469
+ "1" =>{:id => "1", :val => "three"},
470
+ "1" => {:id => "1", :val => "four"},
471
+ }
472
+
473
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
474
+ :key => :id)
475
+ index.batch_update data
476
+ assert_equal(2, index.size)
477
+ index.close
478
+ end
479
+
480
+ def test_index_key_batch1
481
+ data0 = {
482
+ "0" => {:id => "0", :val => "one"},
483
+ "0" => {:id => "0", :val => "two"},
484
+ "1" =>{:id => "1", :val => "three"},
485
+ "2" => {:id => "1", :val => "four"},
486
+ }
487
+
488
+ data1 = {
489
+ "0" => {:id => "0", :val => "one"},
490
+ "3" => {:id => "3", :val => "two"},
491
+ "2" =>{:id => "2", :val => "three"},
492
+ "1" => {:id => "1", :val => "four"},
493
+ "4" => {:id => "4", :val => "four"},
494
+ }
495
+
496
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
497
+ :key => :id)
498
+ index.batch_update data0
499
+ assert_equal(3, index.size)
500
+ index.batch_update data1
501
+ assert_equal(5, index.size)
502
+ index.close
503
+ end
504
+
505
+ def test_index_key_delete_batch0
506
+ data0 = {
507
+ "0" => {:id => "0", :val => "one"},
508
+ "0" => {:id => "0", :val => "two"},
509
+ "1" =>{:id => "1", :val => "three"},
510
+ "2" => {:id => "2", :val => "four"},
511
+ "0" => {:id => "0", :val => "four"},
512
+ }
513
+
514
+ data1 = ["0", "1"];
515
+
516
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new, :key => :id)
517
+ index.batch_update data0
518
+
519
+ assert_equal("four", index["0"][:val])
520
+ assert_equal("three", index["1"][:val])
521
+ assert_equal("four", index["2"][:val])
522
+
523
+ assert_equal(3, index.size)
524
+ index.delete data1
525
+ assert_equal(1, index.size)
526
+ assert_equal("four", index["2"][:val])
527
+
528
+ index.close
529
+ end
530
+
531
+ def test_index_key_delete_batch0
532
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
533
+ 1000.times {|i| index << {:id => "#{i}", :content => "content #{i}"}}
534
+ assert_equal(1000, index.size)
535
+ assert_equal("content 876", index['876'][:content])
536
+
537
+ new_docs = Array.new(1000) {|i| {:id => i, :content => "#{i} > content"}}
538
+ index.batch_update(new_docs)
539
+ assert_equal(1000, index.size)
540
+ assert_equal("128 > content", index['128'][:content])
541
+
542
+ new_docs = Array.new(1000) {|i| {:id => i.to_s, :content => "_(#{i})_"}}
543
+ index.batch_update(new_docs)
544
+ assert_equal(1000, index.size)
545
+ assert_equal("_(287)_", index['287'][:content])
546
+
547
+ new_docs = {}
548
+ 1000.times {|i| new_docs[i.to_s] = {:id => i, :content => "Hash(#{i})"}}
549
+ index.batch_update(new_docs)
550
+ assert_equal(1000, index.size)
551
+ assert_equal("Hash(78)", index['78'][:content])
552
+ end
553
+
465
554
  def test_index_multi_key
466
555
  index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
467
556
  :key => [:id, :table])
@@ -555,19 +644,23 @@ class IndexTest < Test::Unit::TestCase
555
644
 
556
645
  index.close
557
646
  end
558
-
647
+
648
+ # this test has been corrected to work as intended
649
+ # it now fails the same way on both 1.8 and 1.9 -- sds
559
650
  def test_auto_flush
560
651
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
561
652
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
562
653
 
563
- data = %q(one two three four five six seven eight nine ten eleven twelve)
654
+ data = %w(one two three four five six seven eight nine ten eleven twelve)
564
655
  index1 = Index.new(:path => fs_path, :auto_flush => true, :key => :id)
565
- index1 << "zero"
656
+ index1 << {:id => 0, :content => "zero"}
566
657
  index2 = Index.new(:path => fs_path, :auto_flush => true)
567
658
  begin
659
+ n = 1
568
660
  data.each do |datum|
569
- index1 << {:id => datum[0], :content => datum}
570
- index2 << {:id => datum[0], :content => datum}
661
+ index1 << {:id => n, :content => datum}
662
+ index2 << {:id => n, :content => datum}
663
+ n += 1
571
664
  end
572
665
  5.times do |i|
573
666
  index1.delete(i)
@@ -593,9 +686,9 @@ class IndexTest < Test::Unit::TestCase
593
686
 
594
687
  # Note: Adding keywords to either field1 or field2 gets rid of the error
595
688
 
596
- index << {:field1, ''}
597
- index << {:field2, ''}
598
- index << {:field3, 'foo bar baz'}
689
+ index << {:field1 => ''}
690
+ index << {:field2 => ''}
691
+ index << {:field3 => 'foo bar baz'}
599
692
 
600
693
  index.flush
601
694
  index.close
@@ -644,7 +737,7 @@ class IndexTest < Test::Unit::TestCase
644
737
  end
645
738
 
646
739
  def test_wildcard
647
- i = nil
740
+ j = nil
648
741
  Ferret::I.new do |i|
649
742
  i << "one"
650
743
  assert_equal(1, i.search("*").total_hits)
@@ -654,8 +747,9 @@ class IndexTest < Test::Unit::TestCase
654
747
  assert_equal(3, i.search("*").total_hits)
655
748
  assert_equal(3, i.search("id:*").total_hits)
656
749
  assert_equal(2, i.search('id:?*').total_hits)
750
+ j = i
657
751
  end
658
- assert_raise(StandardError) {i.close}
752
+ assert_raise(StandardError) {j.close}
659
753
  end
660
754
 
661
755
  def check_highlight(index, q, excerpt_length, num_excerpts, expected, field = :field)
@@ -759,4 +853,15 @@ class IndexTest < Test::Unit::TestCase
759
853
  index.query_delete('id:one')
760
854
  assert_equal(20, index.size)
761
855
  end
856
+
857
+ def test_query_update_delete_more_than_ten
858
+ index = Ferret::I.new
859
+ 20.times {|i| index << {:id => i, :find => 'match', :change => 'one'} }
860
+
861
+ assert_equal(20, index.search('find:match').total_hits)
862
+ index.query_update('find:match', {:change => 'two'})
863
+ assert_equal(20, index.search('find:match AND change:two').total_hits)
864
+ index.query_delete('find:match')
865
+ assert_equal(0, index.size)
866
+ end
762
867
  end
@@ -191,10 +191,10 @@ module IndexReaderCommon
191
191
  def do_test_term_vectors()
192
192
  expected_tv = TermVector.new(:body,
193
193
  [
194
- TVTerm.new("word1", [2, 4, 7]),
195
- TVTerm.new("word2", [3]),
196
- TVTerm.new("word3", [0, 5, 8, 9]),
197
- TVTerm.new("word4", [1, 6])
194
+ TVTerm.new("word1", 3, [2, 4, 7]),
195
+ TVTerm.new("word2", 1, [3]),
196
+ TVTerm.new("word3", 4, [0, 5, 8, 9]),
197
+ TVTerm.new("word4", 2, [1, 6])
198
198
  ],
199
199
  [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)})
200
200
 
@@ -209,13 +209,13 @@ module IndexReaderCommon
209
209
 
210
210
  tv = tvs[:author]
211
211
  assert_equal(:author, tv.field)
212
- assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms)
212
+ assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms)
213
213
  assert(tv.offsets.nil?)
214
214
 
215
215
 
216
216
  tv = tvs[:title]
217
217
  assert_equal(:title, tv.field)
218
- assert_equal([TVTerm.new("War And Peace", nil)], tv.terms)
218
+ assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms)
219
219
  assert_equal([TVOffsets.new(0, 13)], tv.offsets)
220
220
  end
221
221
 
@@ -254,19 +254,19 @@ module IndexReaderCommon
254
254
 
255
255
  norms = @ir.norms(:text)
256
256
 
257
- assert_equal(202, norms[ 3])
258
- assert_equal( 20, norms[25])
259
- assert_equal(200, norms[50])
260
- assert_equal(155, norms[63])
257
+ assert_equal(202, norms.bytes.to_a[ 3])
258
+ assert_equal( 20, norms.bytes.to_a[25])
259
+ assert_equal(200, norms.bytes.to_a[50])
260
+ assert_equal(155, norms.bytes.to_a[63])
261
261
 
262
262
  norms = @ir.norms(:title)
263
- assert_equal(1, norms[3])
263
+ assert_equal(1, norms.bytes.to_a[3])
264
264
 
265
265
  norms = @ir.norms(:body)
266
- assert_equal(12, norms[3])
266
+ assert_equal(12, norms.bytes.to_a[3])
267
267
 
268
268
  norms = @ir.norms(:author)
269
- assert_equal(145, norms[3])
269
+ assert_equal(145, norms.bytes.to_a[3])
270
270
 
271
271
  norms = @ir.norms(:year)
272
272
  # TODO: this returns two possible results depending on whether it is
@@ -277,10 +277,10 @@ module IndexReaderCommon
277
277
 
278
278
  norms = " " * 164
279
279
  @ir.get_norms_into(:text, norms, 100)
280
- assert_equal(202, norms[103])
281
- assert_equal( 20, norms[125])
282
- assert_equal(200, norms[150])
283
- assert_equal(155, norms[163])
280
+ assert_equal(202, norms.bytes.to_a[103])
281
+ assert_equal( 20, norms.bytes.to_a[125])
282
+ assert_equal(200, norms.bytes.to_a[150])
283
+ assert_equal(155, norms.bytes.to_a[163])
284
284
 
285
285
  @ir.commit()
286
286
 
@@ -290,10 +290,10 @@ module IndexReaderCommon
290
290
 
291
291
  norms = " " * 164
292
292
  ir2.get_norms_into(:text, norms, 100)
293
- assert_equal(202, norms[103])
294
- assert_equal( 20, norms[125])
295
- assert_equal(200, norms[150])
296
- assert_equal(155, norms[163])
293
+ assert_equal(202, norms.bytes.to_a[103])
294
+ assert_equal( 20, norms.bytes.to_a[125])
295
+ assert_equal(200, norms.bytes.to_a[150])
296
+ assert_equal(155, norms.bytes.to_a[163])
297
297
  ir2.close()
298
298
  end
299
299
 
@@ -608,10 +608,10 @@ class IndexReaderTest < Test::Unit::TestCase
608
608
  def do_test_term_vectors(ir)
609
609
  expected_tv = TermVector.new(:body,
610
610
  [
611
- TVTerm.new("word1", [2, 4, 7]),
612
- TVTerm.new("word2", [3]),
613
- TVTerm.new("word3", [0, 5, 8, 9]),
614
- TVTerm.new("word4", [1, 6])
611
+ TVTerm.new("word1", 3, [2, 4, 7]),
612
+ TVTerm.new("word2", 1, [3]),
613
+ TVTerm.new("word3", 4, [0, 5, 8, 9]),
614
+ TVTerm.new("word4", 2, [1, 6])
615
615
  ],
616
616
  [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)})
617
617
 
@@ -626,13 +626,13 @@ class IndexReaderTest < Test::Unit::TestCase
626
626
 
627
627
  tv = tvs[:author]
628
628
  assert_equal(:author, tv.field)
629
- assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms)
629
+ assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms)
630
630
  assert(tv.offsets.nil?)
631
631
 
632
632
 
633
633
  tv = tvs[:title]
634
634
  assert_equal(:title, tv.field)
635
- assert_equal([TVTerm.new("War And Peace", nil)], tv.terms)
635
+ assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms)
636
636
  assert_equal([TVOffsets.new(0, 13)], tv.offsets)
637
637
  end
638
638
 
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require File.dirname(__FILE__) + "/../../test_helper"
2
4
 
3
5
 
@@ -57,6 +59,14 @@ class IndexWriterTest < Test::Unit::TestCase
57
59
  iw.close()
58
60
  end
59
61
 
62
+ def test_adding_long_url
63
+ iw = IndexWriter.new(:dir => @dir,
64
+ :default_field => 'content')
65
+ iw << {:content => "http://" + 'x' * 255}
66
+ # The following line will cause a segfault prior to 0.11.6
67
+ iw << {:content => "http://" + 'x' * 1_000_000}
68
+ end
69
+
60
70
  private
61
71
 
62
72
  WORDS = [
@@ -281,32 +281,49 @@ module IndexTestHelper
281
281
  def self.prepare_search_docs
282
282
  i = 1
283
283
  [
284
- ["20050930", "cat1/", "word1" ],
285
- ["20051001", "cat1/sub1", "word1 word2 the quick brown fox" ],
286
- ["20051002", "cat1/sub1/subsub1", "word1 word3" ],
287
- ["20051003", "cat1/sub2", "word1 word3" ],
288
- ["20051004", "cat1/sub2/subsub2", "word1 word2" ],
289
- ["20051005", "cat2/sub1", "word1" ],
290
- ["20051006", "cat2/sub1", "word1 word3" ],
291
- ["20051007", "cat2/sub1", "word1" ],
292
- ["20051008", "cat2/sub1", "word1 word2 word3 the fast brown fox"],
293
- ["20051009", "cat3/sub1", "word1" ],
294
- ["20051010", "cat3/sub1", "word1" ],
295
- ["20051011", "cat3/sub1", "word1 word3 the quick red fox" ],
296
- ["20051012", "cat3/sub1", "word1" ],
297
- ["20051013", "cat1/sub2", "word1" ],
298
- ["20051014", "cat1/sub1", "word1 word3 the quick hairy fox" ],
299
- ["20051015", "cat1/sub2/subsub1", "word1" ],
300
- ["20051016", "cat1/sub1/subsub2",
301
- "word1 the quick fox is brown and hairy and a little red" ],
302
- ["20051017", "cat1/",
303
- "word1 the brown fox is quick and red" ]
304
- ].map do |date, category, field|
284
+ ["20050930", "cat1/", 0.123,
285
+ "word1" ],
286
+ ["20051001", "cat1/sub1", 0.954,
287
+ "word1 word2 the quick brown fox" ],
288
+ ["20051002", "cat1/sub1/subsub1", 908.125,
289
+ "word1 word3" ],
290
+ ["20051003", "cat1/sub2", 3999,
291
+ "word1 word3" ],
292
+ ["20051004", "cat1/sub2/subsub2", "+.3412",
293
+ "word1 word2" ],
294
+ ["20051005", "cat2/sub1", -1.298,
295
+ "word1" ],
296
+ ["20051006", "cat2/sub1", "2",
297
+ "word1 word3" ],
298
+ ["20051007", "cat2/sub1", "+8.894",
299
+ "word1" ],
300
+ ["20051008", "cat2/sub1", "+21235.2135",
301
+ "word1 word2 word3 the fast brown fox" ],
302
+ ["20051009", "cat3/sub1", "10.0",
303
+ "word1" ],
304
+ ["20051010", "cat3/sub1", 1,
305
+ "word1" ],
306
+ ["20051011", "cat3/sub1", -12518419,
307
+ "word1 word3 the quick red fox" ],
308
+ ["20051012", "cat3/sub1", "10",
309
+ "word1" ],
310
+ ["20051013", "cat1/sub2", "15682954",
311
+ "word1" ],
312
+ ["20051014", "cat1/sub1", "91239",
313
+ "word1 word3 the quick hairy fox" ],
314
+ ["20051015", "cat1/sub2/subsub1", "-.89321",
315
+ "word1" ],
316
+ ["20051016", "cat1/sub1/subsub2", -89,
317
+ "word1 the quick fox is brown and hairy and a little red" ],
318
+ ["20051017", "cat1/", "-1.0",
319
+ "word1 the brown fox is quick and red" ]
320
+ ].map do |date, category, number, field|
305
321
  doc = Ferret::Document.new(i)
306
322
  i += 1
307
323
  doc[:date] = date
308
324
  doc[:category] = category
309
325
  doc[:field] = field
326
+ doc[:number] = number
310
327
  doc
311
328
  end
312
329
  end
@@ -1,4 +1,5 @@
1
1
  require File.dirname(__FILE__) + "/../../test_helper"
2
+ require 'date'
2
3
 
3
4
 
4
5
  class FilterTest < Test::Unit::TestCase
@@ -39,16 +40,6 @@ class FilterTest < Test::Unit::TestCase
39
40
  end
40
41
  end
41
42
 
42
- def test_filter_proc
43
- searcher = Searcher.new(@dir)
44
- q = MatchAllQuery.new()
45
- filter_proc = lambda {|doc, score, s| (s[doc][:int] % 2) == 0}
46
- top_docs = searcher.search(q, :filter_proc => filter_proc)
47
- top_docs.hits.each do |hit|
48
- assert_equal(0, searcher[hit.doc][:int] % 2)
49
- end
50
- end
51
-
52
43
  def test_range_filter
53
44
  searcher = Searcher.new(@dir)
54
45
  q = MatchAllQuery.new()
@@ -132,4 +123,34 @@ class FilterTest < Test::Unit::TestCase
132
123
  filt = CustomFilter.new
133
124
  do_test_top_docs(searcher, q, [0, 2, 4], filt)
134
125
  end
126
+
127
+ def test_filter_proc
128
+ searcher = Searcher.new(@dir)
129
+ q = MatchAllQuery.new()
130
+ filter_proc = lambda {|doc, score, s| (s[doc][:int] % 2) == 0}
131
+ top_docs = searcher.search(q, :filter_proc => filter_proc)
132
+ top_docs.hits.each do |hit|
133
+ assert_equal(0, searcher[hit.doc][:int] % 2)
134
+ end
135
+ end
136
+
137
+ def test_score_modifying_filter_proc
138
+ searcher = Searcher.new(@dir)
139
+ q = MatchAllQuery.new()
140
+ start_date = Date.parse('2008-02-08')
141
+ date_half_life_50 = lambda do |doc, score, s|
142
+ days = (start_date - Date.parse(s[doc][:date], '%Y%m%d')).to_i
143
+ 1.0 / (2.0 ** (days.to_f / 50.0))
144
+ end
145
+ top_docs = searcher.search(q, :filter_proc => date_half_life_50)
146
+ docs = top_docs.hits.collect {|hit| hit.doc}
147
+ assert_equal(docs, [2,4,9,8,6,3,5,1,7,0])
148
+ rev_date_half_life_50 = lambda do |doc, score, s|
149
+ days = (start_date - Date.parse(s[doc][:date], '%Y%m%d')).to_i
150
+ 1.0 - 1.0 / (2.0 ** (days.to_f / 50.0))
151
+ end
152
+ top_docs = searcher.search(q, :filter_proc => rev_date_half_life_50)
153
+ docs = top_docs.hits.collect {|hit| hit.doc}
154
+ assert_equal(docs, [0,7,1,3,5,6,8,9,2,4])
155
+ end
135
156
  end
@@ -50,6 +50,12 @@ class SearcherTest < Test::Unit::TestCase
50
50
  assert(score_doc.score.approx_eql?(@searcher.explain(query, score_doc.doc).score),
51
51
  "Scores(#{score_doc.score} != #{@searcher.explain(query, score_doc.doc).score})")
52
52
  end
53
+
54
+ assert_equal(expected.sort, @searcher.scan(query))
55
+ if expected.size > 5
56
+ assert_equal(expected[0...5], @searcher.scan(query, :limit => 5))
57
+ assert_equal(expected[5..-1], @searcher.scan(query, :start_doc => expected[5]))
58
+ end
53
59
  end
54
60
 
55
61
  def test_get_doc()