sdsykes-ferret 0.11.6.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +24 -0
- data/MIT-LICENSE +20 -0
- data/README +102 -0
- data/Rakefile +338 -0
- data/TODO +17 -0
- data/TUTORIAL +231 -0
- data/bin/ferret-browser +79 -0
- data/ext/analysis.c +1555 -0
- data/ext/analysis.h +219 -0
- data/ext/api.c +69 -0
- data/ext/api.h +27 -0
- data/ext/array.c +123 -0
- data/ext/array.h +53 -0
- data/ext/bitvector.c +540 -0
- data/ext/bitvector.h +272 -0
- data/ext/compound_io.c +383 -0
- data/ext/config.h +42 -0
- data/ext/document.c +156 -0
- data/ext/document.h +53 -0
- data/ext/except.c +120 -0
- data/ext/except.h +168 -0
- data/ext/extconf.rb +14 -0
- data/ext/ferret.c +402 -0
- data/ext/ferret.h +91 -0
- data/ext/filter.c +156 -0
- data/ext/fs_store.c +483 -0
- data/ext/global.c +418 -0
- data/ext/global.h +117 -0
- data/ext/hash.c +567 -0
- data/ext/hash.h +473 -0
- data/ext/hashset.c +170 -0
- data/ext/hashset.h +187 -0
- data/ext/header.h +58 -0
- data/ext/helper.c +62 -0
- data/ext/helper.h +13 -0
- data/ext/inc/lang.h +48 -0
- data/ext/inc/threading.h +31 -0
- data/ext/index.c +6425 -0
- data/ext/index.h +961 -0
- data/ext/lang.h +66 -0
- data/ext/libstemmer.c +92 -0
- data/ext/libstemmer.h +79 -0
- data/ext/mempool.c +87 -0
- data/ext/mempool.h +35 -0
- data/ext/modules.h +162 -0
- data/ext/multimapper.c +310 -0
- data/ext/multimapper.h +51 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +151 -0
- data/ext/priorityqueue.h +143 -0
- data/ext/q_boolean.c +1608 -0
- data/ext/q_const_score.c +161 -0
- data/ext/q_filtered_query.c +209 -0
- data/ext/q_fuzzy.c +268 -0
- data/ext/q_match_all.c +148 -0
- data/ext/q_multi_term.c +677 -0
- data/ext/q_parser.c +2825 -0
- data/ext/q_phrase.c +1126 -0
- data/ext/q_prefix.c +100 -0
- data/ext/q_range.c +350 -0
- data/ext/q_span.c +2402 -0
- data/ext/q_term.c +337 -0
- data/ext/q_wildcard.c +171 -0
- data/ext/r_analysis.c +2575 -0
- data/ext/r_index.c +3472 -0
- data/ext/r_qparser.c +585 -0
- data/ext/r_search.c +4105 -0
- data/ext/r_store.c +513 -0
- data/ext/r_utils.c +963 -0
- data/ext/ram_store.c +471 -0
- data/ext/search.c +1741 -0
- data/ext/search.h +885 -0
- data/ext/similarity.c +150 -0
- data/ext/similarity.h +82 -0
- data/ext/sort.c +983 -0
- data/ext/stem_ISO_8859_1_danish.c +338 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.c +635 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.c +1156 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.c +792 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.c +1276 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.c +512 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_italian.c +1091 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.c +296 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.c +776 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.c +1119 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_KOI8_R_russian.c +701 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.c +344 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.c +653 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.c +1176 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.c +808 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.c +1296 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.c +526 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_italian.c +1113 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.c +302 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.c +794 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.c +1055 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_russian.c +709 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.c +1137 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.c +313 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stopwords.c +401 -0
- data/ext/store.c +692 -0
- data/ext/store.h +777 -0
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/utilities.c +446 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +29 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/document.rb +130 -0
- data/lib/ferret/field_infos.rb +44 -0
- data/lib/ferret/index.rb +786 -0
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/setup.rb +1555 -0
- data/test/test_all.rb +5 -0
- data/test/test_helper.rb +24 -0
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +79 -0
- data/test/threading/thread_safety_read_write_test.rb +76 -0
- data/test/threading/thread_safety_test.rb +133 -0
- data/test/unit/analysis/tc_analyzer.rb +548 -0
- data/test/unit/analysis/tc_token_stream.rb +646 -0
- data/test/unit/index/tc_index.rb +762 -0
- data/test/unit/index/tc_index_reader.rb +699 -0
- data/test/unit/index/tc_index_writer.rb +437 -0
- data/test/unit/index/th_doc.rb +315 -0
- data/test/unit/largefile/tc_largefile.rb +46 -0
- data/test/unit/query_parser/tc_query_parser.rb +238 -0
- data/test/unit/search/tc_filter.rb +135 -0
- data/test/unit/search/tc_fuzzy_query.rb +147 -0
- data/test/unit/search/tc_index_searcher.rb +61 -0
- data/test/unit/search/tc_multi_searcher.rb +128 -0
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tc_search_and_sort.rb +179 -0
- data/test/unit/search/tc_sort.rb +49 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +190 -0
- data/test/unit/search/tm_searcher.rb +384 -0
- data/test/unit/store/tc_fs_store.rb +77 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +34 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +2 -0
- data/test/unit/ts_index.rb +2 -0
- data/test/unit/ts_largefile.rb +4 -0
- data/test/unit/ts_query_parser.rb +2 -0
- data/test/unit/ts_search.rb +2 -0
- data/test/unit/ts_store.rb +2 -0
- data/test/unit/ts_utils.rb +2 -0
- data/test/unit/utils/tc_bit_vector.rb +295 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +285 -0
@@ -0,0 +1,77 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/tm_store"
|
3
|
+
require File.dirname(__FILE__) + "/tm_store_lock"
|
4
|
+
|
5
|
+
class FSStoreTest < Test::Unit::TestCase
|
6
|
+
include Ferret::Store
|
7
|
+
include StoreTest
|
8
|
+
include StoreLockTest
|
9
|
+
def setup
|
10
|
+
@dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
11
|
+
'../../temp/fsdir'))
|
12
|
+
@dir = FSDirectory.new(@dpath, true)
|
13
|
+
end
|
14
|
+
|
15
|
+
def teardown
|
16
|
+
@dir.refresh()
|
17
|
+
@dir.close()
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_fslock
|
21
|
+
lock_name = "lfile"
|
22
|
+
lock_file_path = make_lock_file_path(lock_name)
|
23
|
+
assert(! File.exists?(lock_file_path), "There should be no lock file")
|
24
|
+
lock = @dir.make_lock(lock_name)
|
25
|
+
assert(! File.exists?(lock_file_path), "There should still be no lock file")
|
26
|
+
assert(! lock.locked?, "lock shouldn't be locked yet")
|
27
|
+
|
28
|
+
lock.obtain
|
29
|
+
|
30
|
+
assert(lock.locked?, "lock should now be locked")
|
31
|
+
|
32
|
+
assert(File.exists?(lock_file_path), "A lock file should have been created")
|
33
|
+
|
34
|
+
assert(@dir.exists?(lfname(lock_name)),"The lock should exist")
|
35
|
+
|
36
|
+
lock.release
|
37
|
+
|
38
|
+
assert(! lock.locked?, "lock should be freed again")
|
39
|
+
assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
|
40
|
+
end
|
41
|
+
|
42
|
+
# def make_and_loose_lock
|
43
|
+
# lock = @dir.make_lock("finalizer_lock")
|
44
|
+
# lock.obtain
|
45
|
+
# lock = nil
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# def test_fslock_finalizer
|
49
|
+
# lock_name = "finalizer_lock"
|
50
|
+
# lock_file_path = make_lock_file_path(lock_name)
|
51
|
+
# assert(! File.exists?(lock_file_path), "There should be no lock file")
|
52
|
+
#
|
53
|
+
# make_and_loose_lock
|
54
|
+
#
|
55
|
+
# #assert(File.exists?(lock_file_path), "There should now be a lock file")
|
56
|
+
#
|
57
|
+
# lock = @dir.make_lock(lock_name)
|
58
|
+
# assert(lock.locked?, "lock should now be locked")
|
59
|
+
#
|
60
|
+
# GC.start
|
61
|
+
#
|
62
|
+
# assert(! lock.locked?, "lock should be freed again")
|
63
|
+
# assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
def make_lock_file_path(name)
|
67
|
+
lock_file_path = File.join(@dpath, lfname(name))
|
68
|
+
if File.exists?(lock_file_path) then
|
69
|
+
File.delete(lock_file_path)
|
70
|
+
end
|
71
|
+
return lock_file_path
|
72
|
+
end
|
73
|
+
|
74
|
+
def lfname(name)
|
75
|
+
"ferret-#{name}.lck"
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/tm_store"
|
3
|
+
require File.dirname(__FILE__) + "/tm_store_lock"
|
4
|
+
|
5
|
+
class RAMStoreTest < Test::Unit::TestCase
|
6
|
+
include StoreTest
|
7
|
+
include StoreLockTest
|
8
|
+
def setup
|
9
|
+
@dir = Ferret::Store::RAMDirectory.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def teardown
|
13
|
+
@dir.close()
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_ramlock
|
17
|
+
name = "lfile"
|
18
|
+
lfile = Ferret::Store::Directory::LOCK_PREFIX + name + ".lck"
|
19
|
+
assert(! @dir.exists?(lfile),
|
20
|
+
"There should be no lock file")
|
21
|
+
lock = @dir.make_lock(name)
|
22
|
+
assert(! @dir.exists?(lfile),
|
23
|
+
"There should still be no lock file")
|
24
|
+
assert(! @dir.exists?(lfile),
|
25
|
+
"The lock should be hidden by the FSDirectories directory scan")
|
26
|
+
assert(! lock.locked?, "lock shouldn't be locked yet")
|
27
|
+
lock.obtain
|
28
|
+
assert(lock.locked?, "lock should now be locked")
|
29
|
+
assert(@dir.exists?(lfile), "A lock file should have been created")
|
30
|
+
lock.release
|
31
|
+
assert(! lock.locked?, "lock should be freed again")
|
32
|
+
assert(! @dir.exists?(lfile),
|
33
|
+
"The lock file should have been deleted")
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module StoreTest
|
2
|
+
# declare dir so inheritors can access it.
|
3
|
+
attr_accessor :dir
|
4
|
+
|
5
|
+
# test the basic file manipulation methods;
|
6
|
+
# - exists?
|
7
|
+
# - touch
|
8
|
+
# - delete
|
9
|
+
# - file_count
|
10
|
+
def test_basic_file_ops
|
11
|
+
assert_equal(0, @dir.file_count(), "directory should be empty")
|
12
|
+
assert(! @dir.exists?('filename'), "File should not exist")
|
13
|
+
@dir.touch('tmpfile1')
|
14
|
+
assert_equal(1, @dir.file_count(), "directory should have one file")
|
15
|
+
@dir.touch('tmpfile2')
|
16
|
+
assert_equal(2, @dir.file_count(), "directory should have two files")
|
17
|
+
assert(@dir.exists?('tmpfile1'), "'tmpfile1' should exist")
|
18
|
+
@dir.delete('tmpfile1')
|
19
|
+
assert(! @dir.exists?('tmpfile1'), "'tmpfile1' should no longer exist")
|
20
|
+
assert_equal(1, @dir.file_count(), "directory should have one file")
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_rename
|
24
|
+
@dir.touch("from")
|
25
|
+
assert(@dir.exists?('from'), "File should exist")
|
26
|
+
assert(! @dir.exists?('to'), "File should not exist")
|
27
|
+
cnt_before = @dir.file_count()
|
28
|
+
@dir.rename('from', 'to')
|
29
|
+
cnt_after = @dir.file_count()
|
30
|
+
assert_equal(cnt_before, cnt_after, "the number of files shouldn't have changed")
|
31
|
+
assert(@dir.exists?('to'), "File should now exist")
|
32
|
+
assert(! @dir.exists?('from'), "File should no longer exist")
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module StoreLockTest
|
2
|
+
class Switch
|
3
|
+
@@counter = 0
|
4
|
+
def Switch.counter() return @@counter end
|
5
|
+
def Switch.counter=(counter) @@counter = counter end
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_locking()
|
9
|
+
lock_time_out = 0.001 # we want this test to run quickly
|
10
|
+
lock1 = @dir.make_lock("l.lck")
|
11
|
+
lock2 = @dir.make_lock("l.lck")
|
12
|
+
|
13
|
+
assert(!lock2.locked?)
|
14
|
+
assert(lock1.obtain(lock_time_out))
|
15
|
+
assert(lock2.locked?)
|
16
|
+
|
17
|
+
assert(! can_obtain_lock?(lock2))
|
18
|
+
|
19
|
+
exception_thrown = false
|
20
|
+
begin
|
21
|
+
lock2.while_locked(lock_time_out) do
|
22
|
+
assert(false, "lock should not have been obtained")
|
23
|
+
end
|
24
|
+
rescue
|
25
|
+
exception_thrown = true
|
26
|
+
ensure
|
27
|
+
assert(exception_thrown)
|
28
|
+
end
|
29
|
+
|
30
|
+
lock1.release()
|
31
|
+
assert(lock2.obtain(lock_time_out))
|
32
|
+
lock2.release()
|
33
|
+
|
34
|
+
t = Thread.new() do
|
35
|
+
lock1.while_locked(lock_time_out) do
|
36
|
+
Switch.counter = 1
|
37
|
+
# make sure lock2 obtain test was run
|
38
|
+
while Switch.counter < 2
|
39
|
+
end
|
40
|
+
Switch.counter = 3
|
41
|
+
end
|
42
|
+
end
|
43
|
+
t.run()
|
44
|
+
|
45
|
+
#make sure thread has started and lock been obtained
|
46
|
+
while Switch.counter < 1
|
47
|
+
end
|
48
|
+
|
49
|
+
assert(! can_obtain_lock?(lock2))
|
50
|
+
|
51
|
+
Switch.counter = 2
|
52
|
+
while Switch.counter < 3
|
53
|
+
end
|
54
|
+
|
55
|
+
assert(lock2.obtain(lock_time_out))
|
56
|
+
lock2.release()
|
57
|
+
end
|
58
|
+
|
59
|
+
def can_obtain_lock?(lock)
|
60
|
+
lock_time_out = 0.001 # we want this test to run quickly
|
61
|
+
begin
|
62
|
+
lock.obtain(lock_time_out)
|
63
|
+
return true
|
64
|
+
rescue
|
65
|
+
end
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
|
3
|
+
class DocumentTest < Test::Unit::TestCase
|
4
|
+
def test_field
|
5
|
+
f = Ferret::Field.new
|
6
|
+
assert_equal(0, f.size)
|
7
|
+
assert_equal(1.0, f.boost)
|
8
|
+
|
9
|
+
f2 = Ferret::Field.new
|
10
|
+
assert_equal(f, f2)
|
11
|
+
|
12
|
+
f << "section0"
|
13
|
+
assert_equal(1, f.size)
|
14
|
+
assert_equal(1.0, f.boost)
|
15
|
+
assert_equal("section0", f[0])
|
16
|
+
assert_not_equal(f, f2)
|
17
|
+
|
18
|
+
f << "section1"
|
19
|
+
assert_equal(2, f.size)
|
20
|
+
assert_equal(1.0, f.boost)
|
21
|
+
assert_equal("section0", f[0])
|
22
|
+
assert_equal("section1", f[1])
|
23
|
+
assert_equal('["section0", "section1"]', f.to_s)
|
24
|
+
assert_not_equal(f, f2)
|
25
|
+
f2 += f
|
26
|
+
assert_equal(f, f2)
|
27
|
+
|
28
|
+
f.boost = 4.0
|
29
|
+
assert_not_equal(f, f2)
|
30
|
+
assert_equal('["section0", "section1"]^4.0', f.to_s)
|
31
|
+
|
32
|
+
f2.boost = 4.0
|
33
|
+
assert_equal(f, f2)
|
34
|
+
|
35
|
+
f3 = Ferret::Field.new(["section0", "section1"], 4.0)
|
36
|
+
assert_equal(f, f3)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_document
|
40
|
+
d = Ferret::Document.new
|
41
|
+
|
42
|
+
d[:name] = Ferret::Field.new
|
43
|
+
d[:name] << "section0"
|
44
|
+
d[:name] << "section1"
|
45
|
+
|
46
|
+
assert_equal(1, d.size)
|
47
|
+
assert_equal(1.0, d.boost)
|
48
|
+
assert_equal(%(
|
49
|
+
Document {
|
50
|
+
:name => ["section0", "section1"]
|
51
|
+
}).strip, d.to_s)
|
52
|
+
|
53
|
+
|
54
|
+
d.boost = 123.0
|
55
|
+
d[:name] << "section2"
|
56
|
+
d[:name].boost = 321.0
|
57
|
+
assert_equal(123.0, d.boost)
|
58
|
+
assert_equal(321.0, d[:name].boost)
|
59
|
+
assert_equal(%(
|
60
|
+
Document {
|
61
|
+
:name => ["section0", "section1", "section2"]^321.0
|
62
|
+
}^123.0).strip, d.to_s)
|
63
|
+
|
64
|
+
d[:title] = "Shawshank Redemption"
|
65
|
+
d[:actors] = ["Tim Robbins", "Morgan Freeman"]
|
66
|
+
|
67
|
+
assert_equal(3, d.size)
|
68
|
+
assert_equal(%(
|
69
|
+
Document {
|
70
|
+
:actors => ["Tim Robbins", "Morgan Freeman"]
|
71
|
+
:name => ["section0", "section1", "section2"]^321.0
|
72
|
+
:title => "Shawshank Redemption"
|
73
|
+
}^123.0).strip, d.to_s)
|
74
|
+
|
75
|
+
d2 = Ferret::Document.new(123.0)
|
76
|
+
d2[:name] = Ferret::Field.new(["section0", "section1", "section2"], 321.0)
|
77
|
+
d2[:title] = "Shawshank Redemption"
|
78
|
+
d2[:actors] = ["Tim Robbins", "Morgan Freeman"]
|
79
|
+
assert_equal(d, d2)
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,295 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class BitVectorTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Utils
|
6
|
+
|
7
|
+
def test_bv_get_set()
|
8
|
+
bv = BitVector.new
|
9
|
+
assert_equal 0, bv.count
|
10
|
+
|
11
|
+
bv.set 10
|
12
|
+
assert bv.get(10)
|
13
|
+
assert bv[10]
|
14
|
+
assert_equal 1, bv.count
|
15
|
+
|
16
|
+
bv[10] = false
|
17
|
+
assert ! bv[10]
|
18
|
+
|
19
|
+
bv[10] = true
|
20
|
+
assert bv[10]
|
21
|
+
|
22
|
+
bv[10] = nil
|
23
|
+
assert ! bv[10]
|
24
|
+
|
25
|
+
bv[10] = true
|
26
|
+
assert bv[10]
|
27
|
+
|
28
|
+
bv.unset 10
|
29
|
+
assert ! bv[10]
|
30
|
+
|
31
|
+
bv[10] = true
|
32
|
+
assert bv[10]
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_bv_count()
|
36
|
+
bv = BitVector.new
|
37
|
+
bv.set 10
|
38
|
+
assert_equal 1, bv.count
|
39
|
+
|
40
|
+
bv.set 20
|
41
|
+
assert bv.get(20)
|
42
|
+
assert_equal 2, bv.count
|
43
|
+
|
44
|
+
bv.set 21
|
45
|
+
assert bv.get(21)
|
46
|
+
assert_equal 3, bv.count
|
47
|
+
|
48
|
+
bv.unset 21
|
49
|
+
assert ! bv.get(21)
|
50
|
+
assert_equal 2, bv.count
|
51
|
+
|
52
|
+
bv[20] = nil
|
53
|
+
assert ! bv.get(20)
|
54
|
+
assert_equal 1, bv.count
|
55
|
+
|
56
|
+
(50..100).each {|i| bv.set i }
|
57
|
+
(50..100).each {|i| assert bv[i] }
|
58
|
+
assert bv.get(10)
|
59
|
+
assert_equal 52, bv.count
|
60
|
+
|
61
|
+
bv.clear
|
62
|
+
assert_equal 0, bv.count
|
63
|
+
(50..100).each {|i| assert ! bv[i] }
|
64
|
+
assert ! bv.get(10)
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_bv_eql_hash
|
68
|
+
bv1 = BitVector.new
|
69
|
+
bv2 = BitVector.new
|
70
|
+
assert_equal(bv1, bv2)
|
71
|
+
assert_equal(bv1.hash, bv2.hash)
|
72
|
+
|
73
|
+
bv1.set(10)
|
74
|
+
assert_not_equal(bv1, bv2)
|
75
|
+
assert_not_equal(bv1.hash, bv2.hash)
|
76
|
+
|
77
|
+
bv2.set(10)
|
78
|
+
assert_equal(bv1, bv2)
|
79
|
+
assert_equal(bv1.hash, bv2.hash)
|
80
|
+
|
81
|
+
10.times {|i| bv1.set(i * 31)}
|
82
|
+
assert_not_equal(bv1, bv2)
|
83
|
+
assert_not_equal(bv1.hash, bv2.hash)
|
84
|
+
|
85
|
+
10.times {|i| bv2.set(i * 31)}
|
86
|
+
assert_equal(bv1, bv2)
|
87
|
+
assert_equal(bv1.hash, bv2.hash)
|
88
|
+
|
89
|
+
bv1.clear
|
90
|
+
assert_not_equal(bv1, bv2)
|
91
|
+
assert_not_equal(bv1.hash, bv2.hash)
|
92
|
+
|
93
|
+
bv2.clear
|
94
|
+
assert_equal(bv1, bv2)
|
95
|
+
assert_equal(bv1.hash, bv2.hash)
|
96
|
+
end
|
97
|
+
|
98
|
+
BV_COUNT = 500
|
99
|
+
BV_SIZE = 1000
|
100
|
+
|
101
|
+
def test_bv_and
|
102
|
+
bv1 = BitVector.new
|
103
|
+
bv2 = BitVector.new
|
104
|
+
set1 = set2 = count = 0
|
105
|
+
|
106
|
+
BV_COUNT.times do |i|
|
107
|
+
bit = rand(BV_SIZE)
|
108
|
+
bv1.set(bit)
|
109
|
+
set1 |= (1 << bit)
|
110
|
+
end
|
111
|
+
|
112
|
+
BV_COUNT.times do |i|
|
113
|
+
bit = rand(BV_SIZE)
|
114
|
+
bv2.set(bit)
|
115
|
+
bitmask = (1 << bit)
|
116
|
+
if ((set1 & bitmask) > 0) && ((set2 & bitmask) == 0)
|
117
|
+
set2 |= (1 << bit)
|
118
|
+
count += 1
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
and_bv = bv1 & bv2
|
123
|
+
assert_equal(count, and_bv.count)
|
124
|
+
BV_SIZE.times do |i|
|
125
|
+
assert_equal(((set2 & (1 << i)) > 0), and_bv[i])
|
126
|
+
end
|
127
|
+
|
128
|
+
bv2.and! bv1
|
129
|
+
assert_equal(bv2, and_bv)
|
130
|
+
|
131
|
+
bv2 = BitVector.new
|
132
|
+
and_bv = bv1 & bv2
|
133
|
+
|
134
|
+
assert_equal(bv2, and_bv, "and_bv should be empty")
|
135
|
+
assert_equal(0, and_bv.count)
|
136
|
+
|
137
|
+
bv1 = BitVector.new
|
138
|
+
bv2 = BitVector.new.not!
|
139
|
+
bv1.set(10)
|
140
|
+
bv1.set(11)
|
141
|
+
bv1.set(20)
|
142
|
+
assert_equal(bv1, bv1 & bv2, "bv anded with empty not bv should be same")
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_bv_or
|
146
|
+
bv1 = BitVector.new
|
147
|
+
bv2 = BitVector.new
|
148
|
+
set = count = 0
|
149
|
+
|
150
|
+
BV_COUNT.times do |i|
|
151
|
+
bit = rand(BV_SIZE)
|
152
|
+
bv1.set(bit)
|
153
|
+
bitmask = (1 << bit)
|
154
|
+
if (set & bitmask) == 0
|
155
|
+
count += 1
|
156
|
+
set |= bitmask
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
BV_COUNT.times do |i|
|
161
|
+
bit = rand(BV_SIZE)
|
162
|
+
bv2.set(bit)
|
163
|
+
bitmask = (1 << bit)
|
164
|
+
if (set & bitmask) == 0
|
165
|
+
count += 1
|
166
|
+
set |= bitmask
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
or_bv = bv1 | bv2
|
171
|
+
assert_equal(count, or_bv.count)
|
172
|
+
BV_SIZE.times do |i|
|
173
|
+
assert_equal(((set & (1 << i)) > 0), or_bv[i])
|
174
|
+
end
|
175
|
+
|
176
|
+
bv2.or! bv1
|
177
|
+
assert_equal(bv2, or_bv)
|
178
|
+
|
179
|
+
bv2 = BitVector.new
|
180
|
+
or_bv = bv1 | bv2
|
181
|
+
|
182
|
+
assert_equal(bv1, or_bv)
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_bv_xor
|
186
|
+
bv1 = BitVector.new
|
187
|
+
bv2 = BitVector.new
|
188
|
+
set1 = set2 = count = 0
|
189
|
+
|
190
|
+
BV_COUNT.times do |i|
|
191
|
+
bit = rand(BV_SIZE)
|
192
|
+
bv1.set(bit)
|
193
|
+
set1 |= (1 << bit)
|
194
|
+
end
|
195
|
+
|
196
|
+
BV_COUNT.times do |i|
|
197
|
+
bit = rand(BV_SIZE)
|
198
|
+
bv2.set(bit)
|
199
|
+
set2 |= (1 << bit)
|
200
|
+
end
|
201
|
+
|
202
|
+
bitmask = 1
|
203
|
+
set1 ^= set2
|
204
|
+
BV_SIZE.times do |i|
|
205
|
+
count += 1 if (set1 & bitmask) > 0
|
206
|
+
bitmask <<= 1
|
207
|
+
end
|
208
|
+
|
209
|
+
xor_bv = bv1 ^ bv2
|
210
|
+
BV_SIZE.times do |i|
|
211
|
+
assert_equal(((set1 & (1 << i)) > 0), xor_bv[i])
|
212
|
+
end
|
213
|
+
assert_equal(count, xor_bv.count)
|
214
|
+
|
215
|
+
bv2.xor! bv1
|
216
|
+
assert_equal(bv2, xor_bv)
|
217
|
+
|
218
|
+
bv2 = BitVector.new
|
219
|
+
xor_bv = bv1 ^ bv2
|
220
|
+
|
221
|
+
assert_equal(bv1, xor_bv)
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_bv_not
|
225
|
+
bv = BitVector.new
|
226
|
+
[1, 5, 25, 41, 97, 185].each {|i| bv.set(i)}
|
227
|
+
not_bv = ~bv
|
228
|
+
assert_equal(bv.count, not_bv.count)
|
229
|
+
200.times {|i| assert(bv[i] != not_bv[i])}
|
230
|
+
|
231
|
+
not_bv.not!
|
232
|
+
assert_equal(bv, not_bv)
|
233
|
+
end
|
234
|
+
|
235
|
+
|
236
|
+
SCAN_SIZE = 200
|
237
|
+
SCAN_INC = 97
|
238
|
+
|
239
|
+
def test_scan
|
240
|
+
bv = BitVector.new
|
241
|
+
|
242
|
+
SCAN_SIZE.times {|i| bv.set(i * SCAN_INC)}
|
243
|
+
not_bv = ~bv
|
244
|
+
|
245
|
+
SCAN_SIZE.times do |i|
|
246
|
+
assert_equal(i * SCAN_INC, bv.next_from((i - 1) * SCAN_INC + 1))
|
247
|
+
assert_equal(i * SCAN_INC, not_bv.next_unset_from((i - 1) * SCAN_INC + 1))
|
248
|
+
end
|
249
|
+
assert_equal(-1, bv.next_from((SCAN_SIZE - 1) * SCAN_INC + 1))
|
250
|
+
assert_equal(-1, not_bv.next_unset_from((SCAN_SIZE - 1) * SCAN_INC + 1))
|
251
|
+
|
252
|
+
bit = 0
|
253
|
+
bv.each {|i| assert_equal(bit, i); bit += SCAN_INC }
|
254
|
+
assert_equal(bit, SCAN_SIZE * SCAN_INC)
|
255
|
+
|
256
|
+
bit = 0
|
257
|
+
not_bv.each {|i| assert_equal(bit, i); bit += SCAN_INC }
|
258
|
+
assert_equal(bit, SCAN_SIZE * SCAN_INC)
|
259
|
+
|
260
|
+
bv.reset_scan
|
261
|
+
not_bv.reset_scan
|
262
|
+
SCAN_SIZE.times do |i|
|
263
|
+
assert_equal(i * SCAN_INC, bv.next)
|
264
|
+
assert_equal(i * SCAN_INC, not_bv.next_unset)
|
265
|
+
end
|
266
|
+
assert_equal(-1, bv.next)
|
267
|
+
assert_equal(-1, not_bv.next_unset)
|
268
|
+
|
269
|
+
bv.clear
|
270
|
+
SCAN_SIZE.times {|i| bv.set(i)}
|
271
|
+
not_bv = ~bv
|
272
|
+
|
273
|
+
SCAN_SIZE.times do |i|
|
274
|
+
assert_equal(i, bv.next)
|
275
|
+
assert_equal(i, not_bv.next_unset)
|
276
|
+
end
|
277
|
+
assert_equal(-1, bv.next)
|
278
|
+
assert_equal(-1, not_bv.next_unset)
|
279
|
+
|
280
|
+
bit = 0
|
281
|
+
bv.each {|i| assert_equal(bit, i); bit += 1 }
|
282
|
+
assert_equal(bit, SCAN_SIZE)
|
283
|
+
|
284
|
+
bit = 0
|
285
|
+
not_bv.each {|i| assert_equal(bit, i); bit += 1 }
|
286
|
+
assert_equal(bit, SCAN_SIZE)
|
287
|
+
end
|
288
|
+
|
289
|
+
def test_to_a
|
290
|
+
bv = BitVector.new
|
291
|
+
ary = (1..100).collect { rand(1000) }.sort.uniq
|
292
|
+
ary.each {|i| bv.set(i)}
|
293
|
+
assert_equal(ary, bv.to_a)
|
294
|
+
end
|
295
|
+
end
|