ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -8,7 +8,7 @@ class FSStoreTest < Test::Unit::TestCase
8
8
  include StoreLockTest
9
9
  def setup
10
10
  @dpath = File.expand_path(File.join(File.dirname(__FILE__),
11
- '../../temp/fsdir'))
11
+ '../../temp/fsdir'))
12
12
  @dir = FSDirectory.new(@dpath, true)
13
13
  end
14
14
 
@@ -14,7 +14,7 @@ module StoreLockTest
14
14
  assert(lock1.obtain(lock_time_out))
15
15
  assert(lock2.locked?)
16
16
 
17
- assert(! obtain_lock_true_false(lock2))
17
+ assert(! can_obtain_lock?(lock2))
18
18
 
19
19
  exception_thrown = false
20
20
  begin
@@ -46,7 +46,7 @@ module StoreLockTest
46
46
  while Switch.counter < 1
47
47
  end
48
48
 
49
- assert(! obtain_lock_true_false(lock2))
49
+ assert(! can_obtain_lock?(lock2))
50
50
 
51
51
  Switch.counter = 2
52
52
  while Switch.counter < 3
@@ -56,7 +56,7 @@ module StoreLockTest
56
56
  lock2.release()
57
57
  end
58
58
 
59
- def obtain_lock_true_false(lock)
59
+ def can_obtain_lock?(lock)
60
60
  lock_time_out = 0.001 # we want this test to run quickly
61
61
  begin
62
62
  lock.obtain(lock_time_out)
@@ -0,0 +1,81 @@
1
+ require File.dirname(__FILE__) + "/../test_helper"
2
+
3
+ class DocumentTest < Test::Unit::TestCase
4
+ def test_field
5
+ f = Ferret::Field.new
6
+ assert_equal(0, f.size)
7
+ assert_equal(1.0, f.boost)
8
+
9
+ f2 = Ferret::Field.new
10
+ assert_equal(f, f2)
11
+
12
+ f << "section0"
13
+ assert_equal(1, f.size)
14
+ assert_equal(1.0, f.boost)
15
+ assert_equal("section0", f[0])
16
+ assert_not_equal(f, f2)
17
+
18
+ f << "section1"
19
+ assert_equal(2, f.size)
20
+ assert_equal(1.0, f.boost)
21
+ assert_equal("section0", f[0])
22
+ assert_equal("section1", f[1])
23
+ assert_equal('["section0", "section1"]', f.to_s)
24
+ assert_not_equal(f, f2)
25
+ f2 += f
26
+ assert_equal(f, f2)
27
+
28
+ f.boost = 4.0
29
+ assert_not_equal(f, f2)
30
+ assert_equal('["section0", "section1"]^4.0', f.to_s)
31
+
32
+ f2.boost = 4.0
33
+ assert_equal(f, f2)
34
+
35
+ f3 = Ferret::Field.new(["section0", "section1"], 4.0)
36
+ assert_equal(f, f3)
37
+ end
38
+
39
+ def test_document
40
+ d = Ferret::Document.new
41
+
42
+ d[:name] = Ferret::Field.new
43
+ d[:name] << "section0"
44
+ d[:name] << "section1"
45
+
46
+ assert_equal(1, d.size)
47
+ assert_equal(1.0, d.boost)
48
+ assert_equal(%(
49
+ Document {
50
+ :name => ["section0", "section1"]
51
+ }).strip, d.to_s)
52
+
53
+
54
+ d.boost = 123.0
55
+ d[:name] << "section2"
56
+ d[:name].boost = 321.0
57
+ assert_equal(123.0, d.boost)
58
+ assert_equal(321.0, d[:name].boost)
59
+ assert_equal(%(
60
+ Document {
61
+ :name => ["section0", "section1", "section2"]^321.0
62
+ }^123.0).strip, d.to_s)
63
+
64
+ d[:title] = "Shawshank Redemption"
65
+ d[:actors] = ["Tim Robbins", "Morgan Freeman"]
66
+
67
+ assert_equal(3, d.size)
68
+ assert_equal(%(
69
+ Document {
70
+ :actors => ["Tim Robbins", "Morgan Freeman"]
71
+ :name => ["section0", "section1", "section2"]^321.0
72
+ :title => "Shawshank Redemption"
73
+ }^123.0).strip, d.to_s)
74
+
75
+ d2 = Ferret::Document.new(123.0)
76
+ d2[:name] = Ferret::Field.new(["section0", "section1", "section2"], 321.0)
77
+ d2[:title] = "Shawshank Redemption"
78
+ d2[:actors] = ["Tim Robbins", "Morgan Freeman"]
79
+ assert_equal(d, d2)
80
+ end
81
+ end
@@ -1,2 +1,2 @@
1
1
  require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
- load_test_dir('unit/analysis')
2
+ load_test_dir('unit/utils')
@@ -1,2 +1,2 @@
1
1
  require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
- load_test_dir('unit/utils')
2
+ load_test_dir('unit/analysis')
@@ -0,0 +1,288 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class BitVectorTest < Test::Unit::TestCase
5
+ include Ferret::Utils
6
+
7
+ def test_bv_get_set()
8
+ bv = BitVector.new
9
+ assert_equal 0, bv.count
10
+
11
+ bv.set 10
12
+ assert bv.get(10)
13
+ assert bv[10]
14
+ assert_equal 1, bv.count
15
+
16
+ bv[10] = false
17
+ assert ! bv[10]
18
+
19
+ bv[10] = true
20
+ assert bv[10]
21
+
22
+ bv[10] = nil
23
+ assert ! bv[10]
24
+
25
+ bv[10] = true
26
+ assert bv[10]
27
+
28
+ bv.unset 10
29
+ assert ! bv[10]
30
+
31
+ bv[10] = true
32
+ assert bv[10]
33
+ end
34
+
35
+ def test_bv_count()
36
+ bv = BitVector.new
37
+ bv.set 10
38
+ assert_equal 1, bv.count
39
+
40
+ bv.set 20
41
+ assert bv.get(20)
42
+ assert_equal 2, bv.count
43
+
44
+ bv.set 21
45
+ assert bv.get(21)
46
+ assert_equal 3, bv.count
47
+
48
+ bv.unset 21
49
+ assert ! bv.get(21)
50
+ assert_equal 2, bv.count
51
+
52
+ bv[20] = nil
53
+ assert ! bv.get(20)
54
+ assert_equal 1, bv.count
55
+
56
+ (50..100).each {|i| bv.set i }
57
+ (50..100).each {|i| assert bv[i] }
58
+ assert bv.get(10)
59
+ assert_equal 52, bv.count
60
+
61
+ bv.clear
62
+ assert_equal 0, bv.count
63
+ (50..100).each {|i| assert ! bv[i] }
64
+ assert ! bv.get(10)
65
+ end
66
+
67
+ def test_bv_eql_hash
68
+ bv1 = BitVector.new
69
+ bv2 = BitVector.new
70
+ assert_equal(bv1, bv2)
71
+ assert_equal(bv1.hash, bv2.hash)
72
+
73
+ bv1.set(10)
74
+ assert_not_equal(bv1, bv2)
75
+ assert_not_equal(bv1.hash, bv2.hash)
76
+
77
+ bv2.set(10)
78
+ assert_equal(bv1, bv2)
79
+ assert_equal(bv1.hash, bv2.hash)
80
+
81
+ 10.times {|i| bv1.set(i * 31)}
82
+ assert_not_equal(bv1, bv2)
83
+ assert_not_equal(bv1.hash, bv2.hash)
84
+
85
+ 10.times {|i| bv2.set(i * 31)}
86
+ assert_equal(bv1, bv2)
87
+ assert_equal(bv1.hash, bv2.hash)
88
+
89
+ bv1.clear
90
+ assert_not_equal(bv1, bv2)
91
+ assert_not_equal(bv1.hash, bv2.hash)
92
+
93
+ bv2.clear
94
+ assert_equal(bv1, bv2)
95
+ assert_equal(bv1.hash, bv2.hash)
96
+ end
97
+
98
+ BV_COUNT = 500
99
+ BV_SIZE = 1000
100
+
101
+ def test_bv_and
102
+ bv1 = BitVector.new
103
+ bv2 = BitVector.new
104
+ set1 = set2 = count = 0
105
+
106
+ BV_COUNT.times do |i|
107
+ bit = rand(BV_SIZE)
108
+ bv1.set(bit)
109
+ set1 |= (1 << bit)
110
+ end
111
+
112
+ BV_COUNT.times do |i|
113
+ bit = rand(BV_SIZE)
114
+ bv2.set(bit)
115
+ bitmask = (1 << bit)
116
+ if ((set1 & bitmask) > 0) && ((set2 & bitmask) == 0)
117
+ set2 |= (1 << bit)
118
+ count += 1
119
+ end
120
+ end
121
+
122
+ and_bv = bv1 & bv2
123
+ assert_equal(count, and_bv.count)
124
+ BV_SIZE.times do |i|
125
+ assert_equal(((set2 & (1 << i)) > 0), and_bv[i])
126
+ end
127
+
128
+ bv2.and! bv1
129
+ assert_equal(bv2, and_bv)
130
+
131
+ bv2 = BitVector.new
132
+ and_bv = bv1 & bv2
133
+
134
+ assert_equal(bv2, and_bv, "and_bv should be empty")
135
+ assert_equal(0, and_bv.count)
136
+ end
137
+
138
+ def test_bv_or
139
+ bv1 = BitVector.new
140
+ bv2 = BitVector.new
141
+ set = count = 0
142
+
143
+ BV_COUNT.times do |i|
144
+ bit = rand(BV_SIZE)
145
+ bv1.set(bit)
146
+ bitmask = (1 << bit)
147
+ if (set & bitmask) == 0
148
+ count += 1
149
+ set |= bitmask
150
+ end
151
+ end
152
+
153
+ BV_COUNT.times do |i|
154
+ bit = rand(BV_SIZE)
155
+ bv2.set(bit)
156
+ bitmask = (1 << bit)
157
+ if (set & bitmask) == 0
158
+ count += 1
159
+ set |= bitmask
160
+ end
161
+ end
162
+
163
+ or_bv = bv1 | bv2
164
+ assert_equal(count, or_bv.count)
165
+ BV_SIZE.times do |i|
166
+ assert_equal(((set & (1 << i)) > 0), or_bv[i])
167
+ end
168
+
169
+ bv2.or! bv1
170
+ assert_equal(bv2, or_bv)
171
+
172
+ bv2 = BitVector.new
173
+ or_bv = bv1 | bv2
174
+
175
+ assert_equal(bv1, or_bv)
176
+ end
177
+
178
+ def test_bv_xor
179
+ bv1 = BitVector.new
180
+ bv2 = BitVector.new
181
+ set1 = set2 = count = 0
182
+
183
+ BV_COUNT.times do |i|
184
+ bit = rand(BV_SIZE)
185
+ bv1.set(bit)
186
+ set1 |= (1 << bit)
187
+ end
188
+
189
+ BV_COUNT.times do |i|
190
+ bit = rand(BV_SIZE)
191
+ bv2.set(bit)
192
+ set2 |= (1 << bit)
193
+ end
194
+
195
+ bitmask = 1
196
+ set1 ^= set2
197
+ BV_SIZE.times do |i|
198
+ count += 1 if (set1 & bitmask) > 0
199
+ bitmask <<= 1
200
+ end
201
+
202
+ xor_bv = bv1 ^ bv2
203
+ BV_SIZE.times do |i|
204
+ assert_equal(((set1 & (1 << i)) > 0), xor_bv[i])
205
+ end
206
+ assert_equal(count, xor_bv.count)
207
+
208
+ bv2.xor! bv1
209
+ assert_equal(bv2, xor_bv)
210
+
211
+ bv2 = BitVector.new
212
+ xor_bv = bv1 ^ bv2
213
+
214
+ assert_equal(bv1, xor_bv)
215
+ end
216
+
217
+ def test_bv_not
218
+ bv = BitVector.new
219
+ [1, 5, 25, 41, 97, 185].each {|i| bv.set(i)}
220
+ not_bv = ~bv
221
+ assert_equal(bv.count, not_bv.count)
222
+ 200.times {|i| assert(bv[i] != not_bv[i])}
223
+
224
+ not_bv.not!
225
+ assert_equal(bv, not_bv)
226
+ end
227
+
228
+
229
+ SCAN_SIZE = 200
230
+ SCAN_INC = 97
231
+
232
+ def test_scan
233
+ bv = BitVector.new
234
+
235
+ SCAN_SIZE.times {|i| bv.set(i * SCAN_INC)}
236
+ not_bv = ~bv
237
+
238
+ SCAN_SIZE.times do |i|
239
+ assert_equal(i * SCAN_INC, bv.next_from((i - 1) * SCAN_INC + 1))
240
+ assert_equal(i * SCAN_INC, not_bv.next_unset_from((i - 1) * SCAN_INC + 1))
241
+ end
242
+ assert_equal(-1, bv.next_from((SCAN_SIZE - 1) * SCAN_INC + 1))
243
+ assert_equal(-1, not_bv.next_unset_from((SCAN_SIZE - 1) * SCAN_INC + 1))
244
+
245
+ bit = 0
246
+ bv.each {|i| assert_equal(bit, i); bit += SCAN_INC }
247
+ assert_equal(bit, SCAN_SIZE * SCAN_INC)
248
+
249
+ bit = 0
250
+ not_bv.each {|i| assert_equal(bit, i); bit += SCAN_INC }
251
+ assert_equal(bit, SCAN_SIZE * SCAN_INC)
252
+
253
+ bv.reset_scan
254
+ not_bv.reset_scan
255
+ SCAN_SIZE.times do |i|
256
+ assert_equal(i * SCAN_INC, bv.next)
257
+ assert_equal(i * SCAN_INC, not_bv.next_unset)
258
+ end
259
+ assert_equal(-1, bv.next)
260
+ assert_equal(-1, not_bv.next_unset)
261
+
262
+ bv.clear
263
+ SCAN_SIZE.times {|i| bv.set(i)}
264
+ not_bv = ~bv
265
+
266
+ SCAN_SIZE.times do |i|
267
+ assert_equal(i, bv.next)
268
+ assert_equal(i, not_bv.next_unset)
269
+ end
270
+ assert_equal(-1, bv.next)
271
+ assert_equal(-1, not_bv.next_unset)
272
+
273
+ bit = 0
274
+ bv.each {|i| assert_equal(bit, i); bit += 1 }
275
+ assert_equal(bit, SCAN_SIZE)
276
+
277
+ bit = 0
278
+ not_bv.each {|i| assert_equal(bit, i); bit += 1 }
279
+ assert_equal(bit, SCAN_SIZE)
280
+ end
281
+
282
+ def test_to_a
283
+ bv = BitVector.new
284
+ ary = (1..100).collect { rand(1000) }.sort.uniq
285
+ ary.each {|i| bv.set(i)}
286
+ assert_equal(ary, bv.to_a)
287
+ end
288
+ end
@@ -0,0 +1,117 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require 'ferret/number_tools'
3
+
4
+
5
+ class NumberToolsTest < Test::Unit::TestCase
6
+ include Ferret::Utils
7
+
8
+ def test_to_i_lex_near_zero()
9
+ (-10..10).each do |num|
10
+ assert(num.to_s_lex > (num-1).to_s_lex,
11
+ "Strings should sort correctly but " +
12
+ "#{num.to_s_lex} <= #{(num-1).to_s_lex}")
13
+ assert_equal(num, num.to_s_lex.to_i_lex)
14
+ end
15
+ end
16
+
17
+ def test_to_i_pad_near_zero()
18
+ (1..10).each do |num|
19
+ assert(num.to_s_pad(3) > (num-1).to_s_pad(3),
20
+ "Strings should sort correctly but " +
21
+ "#{num.to_s_pad(3)} <= #{(num-1).to_s_pad(3)}")
22
+ assert_equal(num, num.to_s_pad(3).to_i)
23
+ end
24
+ end
25
+
26
+ def test_to_i_lex_larger_numbers
27
+ 100.times do
28
+ num1 = rand(10000000000000000000000000000000000)
29
+ num2 = rand(10000000000000000000000000000000000)
30
+ num1 *= -1 if rand(2) == 0
31
+ num2 *= -1 if rand(2) == 0
32
+
33
+ assert_equal(num1, num1.to_s_lex.to_i_lex)
34
+ assert_equal(num2, num2.to_s_lex.to_i_lex)
35
+ assert_equal(num1 < num2, num1.to_s_lex < num2.to_s_lex,
36
+ "Strings should sort correctly but " +
37
+ "#{num1} < #{num2} == #{num1 < num2} but " +
38
+ "#{num1.to_s_lex} < #{num2.to_s_lex} == " +
39
+ "#{num1.to_s_lex < num2.to_s_lex}")
40
+ end
41
+ end
42
+
43
+ def test_to_i_pad
44
+ 100.times do
45
+ num1 = rand(10000000000000000000000000000000000)
46
+ num2 = rand(10000000000000000000000000000000000)
47
+ assert_equal(num1, num1.to_s_pad(35).to_i)
48
+ assert_equal(num2, num2.to_s_pad(35).to_i)
49
+ assert_equal(num1 < num2, num1.to_s_pad(35) < num2.to_s_pad(35),
50
+ "Strings should sort correctly but " +
51
+ "#{num1} < #{num2} == #{num1 < num2} but " +
52
+ "#{num1.to_s_pad(35)} < #{num2.to_s_pad(35)} == " +
53
+ "#{num1.to_s_pad(35) < num2.to_s_pad(35)}")
54
+ end
55
+ end
56
+
57
+ def test_time_to_s_lex
58
+ t_num = Time.now.to_i
59
+
60
+ 10.times do
61
+ t1 = Time.now - rand(t_num)
62
+ t2 = Time.now - rand(t_num)
63
+ assert_equal(t1.to_s, t1.to_s_lex(:second).to_time_lex.to_s)
64
+ assert_equal(t2.to_s, t2.to_s_lex(:second).to_time_lex.to_s)
65
+ [:year, :month, :day, :hour, :minute, :second, :millisecond].each do |prec|
66
+ t1_x = t1.to_s_lex(prec).to_time_lex
67
+ t2_x = t2.to_s_lex(prec).to_time_lex
68
+ assert_equal(t1_x < t2_x, t1.to_s_lex(prec) < t2.to_s_lex(prec),
69
+ "Strings should sort correctly but " +
70
+ "#{t1_x} < #{t2_x} == #{t1_x < t2_x} but " +
71
+ "#{t1.to_s_lex(prec)} < #{t2.to_s_lex(prec)} == " +
72
+ "#{t1.to_s_lex(prec) < t2.to_s_lex(prec)}")
73
+ end
74
+ end
75
+ end
76
+
77
+ def test_date_to_s_lex
78
+ 10.times do
79
+ d1 = Date.civil(rand(2200), rand(12) + 1, rand(28) + 1)
80
+ d2 = Date.civil(rand(2200), rand(12) + 1, rand(28) + 1)
81
+ assert_equal(d1.to_s, d1.to_s_lex(:day).to_date_lex.to_s)
82
+ assert_equal(d2.to_s, d2.to_s_lex(:day).to_date_lex.to_s)
83
+ [:year, :month, :day].each do |prec|
84
+ d1_x = d1.to_s_lex(prec).to_date_lex
85
+ d2_x = d2.to_s_lex(prec).to_date_lex
86
+ assert_equal(d1_x < d2_x, d1.to_s_lex(prec) < d2.to_s_lex(prec),
87
+ "Strings should sort correctly but " +
88
+ "#{d1_x} < #{d2_x} == #{d1_x < d2_x} but " +
89
+ "#{d1.to_s_lex(prec)} < #{d2.to_s_lex(prec)} == " +
90
+ "#{d1.to_s_lex(prec) < d2.to_s_lex(prec)}")
91
+ end
92
+
93
+ end
94
+ end
95
+
96
+ def test_date_time_to_s_lex
97
+ 10.times do
98
+ d1 = DateTime.strptime("#{rand(2200)}-#{rand(12)+1}-#{rand(28)+1} "+
99
+ "#{rand(24)}:#{rand(60)}:#{rand(60)}",
100
+ "%Y-%m-%d %H:%M:%S")
101
+ d2 = DateTime.strptime("#{rand(2200)}-#{rand(12)+1}-#{rand(28)+1} "+
102
+ "#{rand(24)}:#{rand(60)}:#{rand(60)}",
103
+ "%Y-%m-%d %H:%M:%S")
104
+ assert_equal(d1.to_s, d1.to_s_lex(:second).to_date_time_lex.to_s)
105
+ assert_equal(d2.to_s, d2.to_s_lex(:second).to_date_time_lex.to_s)
106
+ [:year, :month, :day, :hour, :minute, :second].each do |prec|
107
+ d1_x = d1.to_s_lex(prec).to_date_lex
108
+ d2_x = d2.to_s_lex(prec).to_date_lex
109
+ assert_equal(d1_x < d2_x, d1.to_s_lex(prec) < d2.to_s_lex(prec),
110
+ "Strings should sort correctly but " +
111
+ "#{d1_x} < #{d2_x} == #{d1_x < d2_x} but " +
112
+ "#{d1.to_s_lex(prec)} < #{d2.to_s_lex(prec)} == " +
113
+ "#{d1.to_s_lex(prec) < d2.to_s_lex(prec)}")
114
+ end
115
+ end
116
+ end
117
+ end