sdsykes-ferret 0.11.6.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,77 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require File.dirname(__FILE__) + "/tm_store"
3
+ require File.dirname(__FILE__) + "/tm_store_lock"
4
+
5
+ class FSStoreTest < Test::Unit::TestCase
6
+ include Ferret::Store
7
+ include StoreTest
8
+ include StoreLockTest
9
+ def setup
10
+ @dpath = File.expand_path(File.join(File.dirname(__FILE__),
11
+ '../../temp/fsdir'))
12
+ @dir = FSDirectory.new(@dpath, true)
13
+ end
14
+
15
+ def teardown
16
+ @dir.refresh()
17
+ @dir.close()
18
+ end
19
+
20
+ def test_fslock
21
+ lock_name = "lfile"
22
+ lock_file_path = make_lock_file_path(lock_name)
23
+ assert(! File.exists?(lock_file_path), "There should be no lock file")
24
+ lock = @dir.make_lock(lock_name)
25
+ assert(! File.exists?(lock_file_path), "There should still be no lock file")
26
+ assert(! lock.locked?, "lock shouldn't be locked yet")
27
+
28
+ lock.obtain
29
+
30
+ assert(lock.locked?, "lock should now be locked")
31
+
32
+ assert(File.exists?(lock_file_path), "A lock file should have been created")
33
+
34
+ assert(@dir.exists?(lfname(lock_name)),"The lock should exist")
35
+
36
+ lock.release
37
+
38
+ assert(! lock.locked?, "lock should be freed again")
39
+ assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
40
+ end
41
+
42
+ # def make_and_loose_lock
43
+ # lock = @dir.make_lock("finalizer_lock")
44
+ # lock.obtain
45
+ # lock = nil
46
+ # end
47
+ #
48
+ # def test_fslock_finalizer
49
+ # lock_name = "finalizer_lock"
50
+ # lock_file_path = make_lock_file_path(lock_name)
51
+ # assert(! File.exists?(lock_file_path), "There should be no lock file")
52
+ #
53
+ # make_and_loose_lock
54
+ #
55
+ # #assert(File.exists?(lock_file_path), "There should now be a lock file")
56
+ #
57
+ # lock = @dir.make_lock(lock_name)
58
+ # assert(lock.locked?, "lock should now be locked")
59
+ #
60
+ # GC.start
61
+ #
62
+ # assert(! lock.locked?, "lock should be freed again")
63
+ # assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
64
+ # end
65
+ #
66
+ def make_lock_file_path(name)
67
+ lock_file_path = File.join(@dpath, lfname(name))
68
+ if File.exists?(lock_file_path) then
69
+ File.delete(lock_file_path)
70
+ end
71
+ return lock_file_path
72
+ end
73
+
74
+ def lfname(name)
75
+ "ferret-#{name}.lck"
76
+ end
77
+ end
@@ -0,0 +1,35 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require File.dirname(__FILE__) + "/tm_store"
3
+ require File.dirname(__FILE__) + "/tm_store_lock"
4
+
5
+ class RAMStoreTest < Test::Unit::TestCase
6
+ include StoreTest
7
+ include StoreLockTest
8
+ def setup
9
+ @dir = Ferret::Store::RAMDirectory.new
10
+ end
11
+
12
+ def teardown
13
+ @dir.close()
14
+ end
15
+
16
+ def test_ramlock
17
+ name = "lfile"
18
+ lfile = Ferret::Store::Directory::LOCK_PREFIX + name + ".lck"
19
+ assert(! @dir.exists?(lfile),
20
+ "There should be no lock file")
21
+ lock = @dir.make_lock(name)
22
+ assert(! @dir.exists?(lfile),
23
+ "There should still be no lock file")
24
+ assert(! @dir.exists?(lfile),
25
+ "The lock should be hidden by the FSDirectories directory scan")
26
+ assert(! lock.locked?, "lock shouldn't be locked yet")
27
+ lock.obtain
28
+ assert(lock.locked?, "lock should now be locked")
29
+ assert(@dir.exists?(lfile), "A lock file should have been created")
30
+ lock.release
31
+ assert(! lock.locked?, "lock should be freed again")
32
+ assert(! @dir.exists?(lfile),
33
+ "The lock file should have been deleted")
34
+ end
35
+ end
@@ -0,0 +1,34 @@
1
+ module StoreTest
2
+ # declare dir so inheritors can access it.
3
+ attr_accessor :dir
4
+
5
+ # test the basic file manipulation methods;
6
+ # - exists?
7
+ # - touch
8
+ # - delete
9
+ # - file_count
10
+ def test_basic_file_ops
11
+ assert_equal(0, @dir.file_count(), "directory should be empty")
12
+ assert(! @dir.exists?('filename'), "File should not exist")
13
+ @dir.touch('tmpfile1')
14
+ assert_equal(1, @dir.file_count(), "directory should have one file")
15
+ @dir.touch('tmpfile2')
16
+ assert_equal(2, @dir.file_count(), "directory should have two files")
17
+ assert(@dir.exists?('tmpfile1'), "'tmpfile1' should exist")
18
+ @dir.delete('tmpfile1')
19
+ assert(! @dir.exists?('tmpfile1'), "'tmpfile1' should no longer exist")
20
+ assert_equal(1, @dir.file_count(), "directory should have one file")
21
+ end
22
+
23
+ def test_rename
24
+ @dir.touch("from")
25
+ assert(@dir.exists?('from'), "File should exist")
26
+ assert(! @dir.exists?('to'), "File should not exist")
27
+ cnt_before = @dir.file_count()
28
+ @dir.rename('from', 'to')
29
+ cnt_after = @dir.file_count()
30
+ assert_equal(cnt_before, cnt_after, "the number of files shouldn't have changed")
31
+ assert(@dir.exists?('to'), "File should now exist")
32
+ assert(! @dir.exists?('from'), "File should no longer exist")
33
+ end
34
+ end
@@ -0,0 +1,68 @@
1
+ module StoreLockTest
2
+ class Switch
3
+ @@counter = 0
4
+ def Switch.counter() return @@counter end
5
+ def Switch.counter=(counter) @@counter = counter end
6
+ end
7
+
8
+ def test_locking()
9
+ lock_time_out = 0.001 # we want this test to run quickly
10
+ lock1 = @dir.make_lock("l.lck")
11
+ lock2 = @dir.make_lock("l.lck")
12
+
13
+ assert(!lock2.locked?)
14
+ assert(lock1.obtain(lock_time_out))
15
+ assert(lock2.locked?)
16
+
17
+ assert(! can_obtain_lock?(lock2))
18
+
19
+ exception_thrown = false
20
+ begin
21
+ lock2.while_locked(lock_time_out) do
22
+ assert(false, "lock should not have been obtained")
23
+ end
24
+ rescue
25
+ exception_thrown = true
26
+ ensure
27
+ assert(exception_thrown)
28
+ end
29
+
30
+ lock1.release()
31
+ assert(lock2.obtain(lock_time_out))
32
+ lock2.release()
33
+
34
+ t = Thread.new() do
35
+ lock1.while_locked(lock_time_out) do
36
+ Switch.counter = 1
37
+ # make sure lock2 obtain test was run
38
+ while Switch.counter < 2
39
+ end
40
+ Switch.counter = 3
41
+ end
42
+ end
43
+ t.run()
44
+
45
+ #make sure thread has started and lock been obtained
46
+ while Switch.counter < 1
47
+ end
48
+
49
+ assert(! can_obtain_lock?(lock2))
50
+
51
+ Switch.counter = 2
52
+ while Switch.counter < 3
53
+ end
54
+
55
+ assert(lock2.obtain(lock_time_out))
56
+ lock2.release()
57
+ end
58
+
59
+ def can_obtain_lock?(lock)
60
+ lock_time_out = 0.001 # we want this test to run quickly
61
+ begin
62
+ lock.obtain(lock_time_out)
63
+ return true
64
+ rescue
65
+ end
66
+ return false
67
+ end
68
+ end
@@ -0,0 +1,81 @@
1
+ require File.dirname(__FILE__) + "/../test_helper"
2
+
3
+ class DocumentTest < Test::Unit::TestCase
4
+ def test_field
5
+ f = Ferret::Field.new
6
+ assert_equal(0, f.size)
7
+ assert_equal(1.0, f.boost)
8
+
9
+ f2 = Ferret::Field.new
10
+ assert_equal(f, f2)
11
+
12
+ f << "section0"
13
+ assert_equal(1, f.size)
14
+ assert_equal(1.0, f.boost)
15
+ assert_equal("section0", f[0])
16
+ assert_not_equal(f, f2)
17
+
18
+ f << "section1"
19
+ assert_equal(2, f.size)
20
+ assert_equal(1.0, f.boost)
21
+ assert_equal("section0", f[0])
22
+ assert_equal("section1", f[1])
23
+ assert_equal('["section0", "section1"]', f.to_s)
24
+ assert_not_equal(f, f2)
25
+ f2 += f
26
+ assert_equal(f, f2)
27
+
28
+ f.boost = 4.0
29
+ assert_not_equal(f, f2)
30
+ assert_equal('["section0", "section1"]^4.0', f.to_s)
31
+
32
+ f2.boost = 4.0
33
+ assert_equal(f, f2)
34
+
35
+ f3 = Ferret::Field.new(["section0", "section1"], 4.0)
36
+ assert_equal(f, f3)
37
+ end
38
+
39
+ def test_document
40
+ d = Ferret::Document.new
41
+
42
+ d[:name] = Ferret::Field.new
43
+ d[:name] << "section0"
44
+ d[:name] << "section1"
45
+
46
+ assert_equal(1, d.size)
47
+ assert_equal(1.0, d.boost)
48
+ assert_equal(%(
49
+ Document {
50
+ :name => ["section0", "section1"]
51
+ }).strip, d.to_s)
52
+
53
+
54
+ d.boost = 123.0
55
+ d[:name] << "section2"
56
+ d[:name].boost = 321.0
57
+ assert_equal(123.0, d.boost)
58
+ assert_equal(321.0, d[:name].boost)
59
+ assert_equal(%(
60
+ Document {
61
+ :name => ["section0", "section1", "section2"]^321.0
62
+ }^123.0).strip, d.to_s)
63
+
64
+ d[:title] = "Shawshank Redemption"
65
+ d[:actors] = ["Tim Robbins", "Morgan Freeman"]
66
+
67
+ assert_equal(3, d.size)
68
+ assert_equal(%(
69
+ Document {
70
+ :actors => ["Tim Robbins", "Morgan Freeman"]
71
+ :name => ["section0", "section1", "section2"]^321.0
72
+ :title => "Shawshank Redemption"
73
+ }^123.0).strip, d.to_s)
74
+
75
+ d2 = Ferret::Document.new(123.0)
76
+ d2[:name] = Ferret::Field.new(["section0", "section1", "section2"], 321.0)
77
+ d2[:title] = "Shawshank Redemption"
78
+ d2[:actors] = ["Tim Robbins", "Morgan Freeman"]
79
+ assert_equal(d, d2)
80
+ end
81
+ end
@@ -0,0 +1,2 @@
1
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
+ load_test_dir('unit/analysis')
@@ -0,0 +1,2 @@
1
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
+ load_test_dir('unit/index')
@@ -0,0 +1,4 @@
1
+ if ENV['FERRET_DEV']
2
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
3
+ load_test_dir('unit/largefile')
4
+ end
@@ -0,0 +1,2 @@
1
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
+ load_test_dir('unit/query_parser')
@@ -0,0 +1,2 @@
1
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
+ load_test_dir('unit/search')
@@ -0,0 +1,2 @@
1
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
+ load_test_dir('unit/store')
@@ -0,0 +1,2 @@
1
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
+ load_test_dir('unit/utils')
@@ -0,0 +1,295 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class BitVectorTest < Test::Unit::TestCase
5
+ include Ferret::Utils
6
+
7
+ def test_bv_get_set()
8
+ bv = BitVector.new
9
+ assert_equal 0, bv.count
10
+
11
+ bv.set 10
12
+ assert bv.get(10)
13
+ assert bv[10]
14
+ assert_equal 1, bv.count
15
+
16
+ bv[10] = false
17
+ assert ! bv[10]
18
+
19
+ bv[10] = true
20
+ assert bv[10]
21
+
22
+ bv[10] = nil
23
+ assert ! bv[10]
24
+
25
+ bv[10] = true
26
+ assert bv[10]
27
+
28
+ bv.unset 10
29
+ assert ! bv[10]
30
+
31
+ bv[10] = true
32
+ assert bv[10]
33
+ end
34
+
35
+ def test_bv_count()
36
+ bv = BitVector.new
37
+ bv.set 10
38
+ assert_equal 1, bv.count
39
+
40
+ bv.set 20
41
+ assert bv.get(20)
42
+ assert_equal 2, bv.count
43
+
44
+ bv.set 21
45
+ assert bv.get(21)
46
+ assert_equal 3, bv.count
47
+
48
+ bv.unset 21
49
+ assert ! bv.get(21)
50
+ assert_equal 2, bv.count
51
+
52
+ bv[20] = nil
53
+ assert ! bv.get(20)
54
+ assert_equal 1, bv.count
55
+
56
+ (50..100).each {|i| bv.set i }
57
+ (50..100).each {|i| assert bv[i] }
58
+ assert bv.get(10)
59
+ assert_equal 52, bv.count
60
+
61
+ bv.clear
62
+ assert_equal 0, bv.count
63
+ (50..100).each {|i| assert ! bv[i] }
64
+ assert ! bv.get(10)
65
+ end
66
+
67
+ def test_bv_eql_hash
68
+ bv1 = BitVector.new
69
+ bv2 = BitVector.new
70
+ assert_equal(bv1, bv2)
71
+ assert_equal(bv1.hash, bv2.hash)
72
+
73
+ bv1.set(10)
74
+ assert_not_equal(bv1, bv2)
75
+ assert_not_equal(bv1.hash, bv2.hash)
76
+
77
+ bv2.set(10)
78
+ assert_equal(bv1, bv2)
79
+ assert_equal(bv1.hash, bv2.hash)
80
+
81
+ 10.times {|i| bv1.set(i * 31)}
82
+ assert_not_equal(bv1, bv2)
83
+ assert_not_equal(bv1.hash, bv2.hash)
84
+
85
+ 10.times {|i| bv2.set(i * 31)}
86
+ assert_equal(bv1, bv2)
87
+ assert_equal(bv1.hash, bv2.hash)
88
+
89
+ bv1.clear
90
+ assert_not_equal(bv1, bv2)
91
+ assert_not_equal(bv1.hash, bv2.hash)
92
+
93
+ bv2.clear
94
+ assert_equal(bv1, bv2)
95
+ assert_equal(bv1.hash, bv2.hash)
96
+ end
97
+
98
+ BV_COUNT = 500
99
+ BV_SIZE = 1000
100
+
101
+ def test_bv_and
102
+ bv1 = BitVector.new
103
+ bv2 = BitVector.new
104
+ set1 = set2 = count = 0
105
+
106
+ BV_COUNT.times do |i|
107
+ bit = rand(BV_SIZE)
108
+ bv1.set(bit)
109
+ set1 |= (1 << bit)
110
+ end
111
+
112
+ BV_COUNT.times do |i|
113
+ bit = rand(BV_SIZE)
114
+ bv2.set(bit)
115
+ bitmask = (1 << bit)
116
+ if ((set1 & bitmask) > 0) && ((set2 & bitmask) == 0)
117
+ set2 |= (1 << bit)
118
+ count += 1
119
+ end
120
+ end
121
+
122
+ and_bv = bv1 & bv2
123
+ assert_equal(count, and_bv.count)
124
+ BV_SIZE.times do |i|
125
+ assert_equal(((set2 & (1 << i)) > 0), and_bv[i])
126
+ end
127
+
128
+ bv2.and! bv1
129
+ assert_equal(bv2, and_bv)
130
+
131
+ bv2 = BitVector.new
132
+ and_bv = bv1 & bv2
133
+
134
+ assert_equal(bv2, and_bv, "and_bv should be empty")
135
+ assert_equal(0, and_bv.count)
136
+
137
+ bv1 = BitVector.new
138
+ bv2 = BitVector.new.not!
139
+ bv1.set(10)
140
+ bv1.set(11)
141
+ bv1.set(20)
142
+ assert_equal(bv1, bv1 & bv2, "bv anded with empty not bv should be same")
143
+ end
144
+
145
+ def test_bv_or
146
+ bv1 = BitVector.new
147
+ bv2 = BitVector.new
148
+ set = count = 0
149
+
150
+ BV_COUNT.times do |i|
151
+ bit = rand(BV_SIZE)
152
+ bv1.set(bit)
153
+ bitmask = (1 << bit)
154
+ if (set & bitmask) == 0
155
+ count += 1
156
+ set |= bitmask
157
+ end
158
+ end
159
+
160
+ BV_COUNT.times do |i|
161
+ bit = rand(BV_SIZE)
162
+ bv2.set(bit)
163
+ bitmask = (1 << bit)
164
+ if (set & bitmask) == 0
165
+ count += 1
166
+ set |= bitmask
167
+ end
168
+ end
169
+
170
+ or_bv = bv1 | bv2
171
+ assert_equal(count, or_bv.count)
172
+ BV_SIZE.times do |i|
173
+ assert_equal(((set & (1 << i)) > 0), or_bv[i])
174
+ end
175
+
176
+ bv2.or! bv1
177
+ assert_equal(bv2, or_bv)
178
+
179
+ bv2 = BitVector.new
180
+ or_bv = bv1 | bv2
181
+
182
+ assert_equal(bv1, or_bv)
183
+ end
184
+
185
+ def test_bv_xor
186
+ bv1 = BitVector.new
187
+ bv2 = BitVector.new
188
+ set1 = set2 = count = 0
189
+
190
+ BV_COUNT.times do |i|
191
+ bit = rand(BV_SIZE)
192
+ bv1.set(bit)
193
+ set1 |= (1 << bit)
194
+ end
195
+
196
+ BV_COUNT.times do |i|
197
+ bit = rand(BV_SIZE)
198
+ bv2.set(bit)
199
+ set2 |= (1 << bit)
200
+ end
201
+
202
+ bitmask = 1
203
+ set1 ^= set2
204
+ BV_SIZE.times do |i|
205
+ count += 1 if (set1 & bitmask) > 0
206
+ bitmask <<= 1
207
+ end
208
+
209
+ xor_bv = bv1 ^ bv2
210
+ BV_SIZE.times do |i|
211
+ assert_equal(((set1 & (1 << i)) > 0), xor_bv[i])
212
+ end
213
+ assert_equal(count, xor_bv.count)
214
+
215
+ bv2.xor! bv1
216
+ assert_equal(bv2, xor_bv)
217
+
218
+ bv2 = BitVector.new
219
+ xor_bv = bv1 ^ bv2
220
+
221
+ assert_equal(bv1, xor_bv)
222
+ end
223
+
224
+ def test_bv_not
225
+ bv = BitVector.new
226
+ [1, 5, 25, 41, 97, 185].each {|i| bv.set(i)}
227
+ not_bv = ~bv
228
+ assert_equal(bv.count, not_bv.count)
229
+ 200.times {|i| assert(bv[i] != not_bv[i])}
230
+
231
+ not_bv.not!
232
+ assert_equal(bv, not_bv)
233
+ end
234
+
235
+
236
+ SCAN_SIZE = 200
237
+ SCAN_INC = 97
238
+
239
+ def test_scan
240
+ bv = BitVector.new
241
+
242
+ SCAN_SIZE.times {|i| bv.set(i * SCAN_INC)}
243
+ not_bv = ~bv
244
+
245
+ SCAN_SIZE.times do |i|
246
+ assert_equal(i * SCAN_INC, bv.next_from((i - 1) * SCAN_INC + 1))
247
+ assert_equal(i * SCAN_INC, not_bv.next_unset_from((i - 1) * SCAN_INC + 1))
248
+ end
249
+ assert_equal(-1, bv.next_from((SCAN_SIZE - 1) * SCAN_INC + 1))
250
+ assert_equal(-1, not_bv.next_unset_from((SCAN_SIZE - 1) * SCAN_INC + 1))
251
+
252
+ bit = 0
253
+ bv.each {|i| assert_equal(bit, i); bit += SCAN_INC }
254
+ assert_equal(bit, SCAN_SIZE * SCAN_INC)
255
+
256
+ bit = 0
257
+ not_bv.each {|i| assert_equal(bit, i); bit += SCAN_INC }
258
+ assert_equal(bit, SCAN_SIZE * SCAN_INC)
259
+
260
+ bv.reset_scan
261
+ not_bv.reset_scan
262
+ SCAN_SIZE.times do |i|
263
+ assert_equal(i * SCAN_INC, bv.next)
264
+ assert_equal(i * SCAN_INC, not_bv.next_unset)
265
+ end
266
+ assert_equal(-1, bv.next)
267
+ assert_equal(-1, not_bv.next_unset)
268
+
269
+ bv.clear
270
+ SCAN_SIZE.times {|i| bv.set(i)}
271
+ not_bv = ~bv
272
+
273
+ SCAN_SIZE.times do |i|
274
+ assert_equal(i, bv.next)
275
+ assert_equal(i, not_bv.next_unset)
276
+ end
277
+ assert_equal(-1, bv.next)
278
+ assert_equal(-1, not_bv.next_unset)
279
+
280
+ bit = 0
281
+ bv.each {|i| assert_equal(bit, i); bit += 1 }
282
+ assert_equal(bit, SCAN_SIZE)
283
+
284
+ bit = 0
285
+ not_bv.each {|i| assert_equal(bit, i); bit += 1 }
286
+ assert_equal(bit, SCAN_SIZE)
287
+ end
288
+
289
+ def test_to_a
290
+ bv = BitVector.new
291
+ ary = (1..100).collect { rand(1000) }.sort.uniq
292
+ ary.each {|i| bv.set(i)}
293
+ assert_equal(ary, bv.to_a)
294
+ end
295
+ end