ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,2 +0,0 @@
1
- module StoreLockTest
2
- end
@@ -1,2 +0,0 @@
1
- require File.join(File.dirname(__FILE__), "../test_helper.rb")
2
- load_test_dir('unit/document')
@@ -1,73 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class BitVectorTest < Test::Unit::TestCase
5
- include Ferret::Utils
6
-
7
- def test_bignum_conversion()
8
- j = 256
9
- 10.times do
10
- j *= j
11
- assert_equal(j, BitVector.string_to_bignum(BitVector.bignum_to_string(j)))
12
- end
13
- end
14
-
15
- def test_bv()
16
- bv = BitVector.new
17
- assert_equal(0, bv.count)
18
- bv.set(10)
19
- assert(bv.get(10))
20
- assert_equal(1, bv.count)
21
- bv.set(10)
22
- assert(bv.get(10))
23
- assert_equal(1, bv.count)
24
- bv.set(20)
25
- assert(bv.get(20))
26
- assert_equal(2, bv.count)
27
- bv.set(21)
28
- assert(bv.get(21))
29
- assert_equal(3, bv.count)
30
- bv.clear(21)
31
- assert(!bv.get(21))
32
- assert_equal(2, bv.count)
33
- bv.clear(20)
34
- assert(!bv.get(20))
35
- assert_equal(1, bv.count)
36
- assert(bv.get(10))
37
- end
38
-
39
- def test_bv_rw()
40
- dir = Ferret::Store::RAMDirectory.new
41
- bv = BitVector.new
42
- assert_equal(0, bv.count)
43
- bv.set(5)
44
- assert_equal(1, bv.count)
45
- bv.set(8)
46
- assert_equal(2, bv.count)
47
- bv.set(13)
48
- assert_equal(3, bv.count)
49
- bv.set(21)
50
- assert_equal(4, bv.count)
51
- bv.set(34)
52
- assert_equal(5, bv.count)
53
- bv.write(dir, "bv.test")
54
- bv = nil
55
- bv = BitVector.read(dir, "bv.test")
56
- assert(!bv.get(4))
57
- assert(bv.get(5))
58
- assert(!bv.get(6))
59
- assert(!bv.get(7))
60
- assert(bv.get(8))
61
- assert(!bv.get(9))
62
- assert(!bv.get(12))
63
- assert(bv.get(13))
64
- assert(!bv.get(14))
65
- assert(!bv.get(20))
66
- assert(bv.get(21))
67
- assert(!bv.get(22))
68
- assert(!bv.get(33))
69
- assert(bv.get(34))
70
- assert(!bv.get(35))
71
- assert_equal(5, bv.count)
72
- end
73
- end
@@ -1,50 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class DateToolsTest < Test::Unit::TestCase
5
- include Ferret::Utils
6
-
7
- def test_serialization()
8
- # grab time to the nearest millisecond
9
- t = Time.at((Time.now().to_i*1000).floor()/1000)
10
-
11
- s = DateTools.serialize_time(t)
12
-
13
- t_after = DateTools.deserialize_time(s)
14
- assert_equal(t, t_after, "date changed after serialization")
15
- end
16
-
17
- def test_serialization_constants()
18
- # assert existance of these constants
19
- assert(DateTools::MAX_SERIALIZED_DATE_STRING)
20
- assert(DateTools::MIN_SERIALIZED_DATE_STRING)
21
- end
22
-
23
- def test_time_to_s()
24
- t = Time.mktime(2004, 9, 5, 22, 33, 44, 555000)
25
-
26
- assert_equal("2004", DateTools.time_to_s(t, DateTools::Resolution::YEAR))
27
- assert_equal("200409", DateTools.time_to_s(t, DateTools::Resolution::MONTH))
28
- assert_equal("20040905", DateTools.time_to_s(t, DateTools::Resolution::DAY))
29
- assert_equal("2004090522", DateTools.time_to_s(t, DateTools::Resolution::HOUR))
30
- assert_equal("200409052233", DateTools.time_to_s(t, DateTools::Resolution::MINUTE))
31
- assert_equal("20040905223344", DateTools.time_to_s(t, DateTools::Resolution::SECOND))
32
- assert_equal("20040905223344555", DateTools.time_to_s(t, DateTools::Resolution::MILLISECOND))
33
- end
34
-
35
- def test_s_to_time()
36
- assert_equal(Time.mktime(2004), DateTools.s_to_time("2004"))
37
- assert_equal(Time.mktime(2004, 9), DateTools.s_to_time("200409"))
38
- assert_equal(Time.mktime(2004, 9, 5), DateTools.s_to_time("20040905"))
39
- assert_equal(Time.mktime(2004, 9, 5, 22), DateTools.s_to_time("2004090522"))
40
- assert_equal(Time.mktime(2004, 9, 5, 22, 33), DateTools.s_to_time("200409052233"))
41
- assert_equal(Time.mktime(2004, 9, 5, 22, 33, 44), DateTools.s_to_time("20040905223344"))
42
- assert_equal(Time.mktime(2004, 9, 5, 22, 33, 44, 555000), DateTools.s_to_time("20040905223344555"))
43
- end
44
-
45
- def test_round()
46
- t = Time.mktime(2004, 9, 5, 22, 33, 44, 555000)
47
- assert_equal(Time.mktime(2004, 9, 5), DateTools.round(t, DateTools::Resolution::DAY))
48
- end
49
-
50
- end
@@ -1,59 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class NumberToolsTest < Test::Unit::TestCase
5
- include Ferret::Utils
6
-
7
- def test_near_zero()
8
- 2.times() do |i|
9
- 2.times() { |j| subtest_two_longs(i, j) }
10
- end
11
- end
12
-
13
- def test_max()
14
- # make sure the constants convert to their equivelents
15
- assert_equal(NumberTools::LONG_MAX_VALUE, NumberTools.s_to_long(NumberTools::MAX_STRING_VALUE))
16
- assert_equal(NumberTools::MAX_STRING_VALUE, NumberTools.long_to_s(NumberTools::LONG_MAX_VALUE))
17
- # test near MAX, too
18
-
19
- l = NumberTools::LONG_MAX_VALUE
20
- subtest_two_longs(l, l - 1)
21
- end
22
-
23
- def test_min()
24
- # make sure the constants convert to their equivelents
25
- assert_equal(NumberTools::LONG_MIN_VALUE, NumberTools.s_to_long(NumberTools::MIN_STRING_VALUE))
26
- assert_equal(NumberTools::MIN_STRING_VALUE, NumberTools.long_to_s(NumberTools::LONG_MIN_VALUE))
27
-
28
- # test near MIN, too
29
- l = NumberTools::LONG_MIN_VALUE
30
- subtest_two_longs(l, l + 1)
31
- end
32
-
33
- def subtest_two_longs(i, j)
34
- # convert to strings
35
- a = NumberTools.long_to_s(i)
36
- b = NumberTools.long_to_s(j)
37
-
38
- # are they the right length?
39
- assert_equal(NumberTools::STR_SIZE, a.length())
40
- assert_equal(NumberTools::STR_SIZE, b.length())
41
-
42
- # are they the right order?
43
- if (i < j)
44
- assert(a < b)
45
- elsif (i > j)
46
- assert(a > b)
47
- else
48
- assert_equal(a, b)
49
- end
50
-
51
- # can we convert them back to longs?
52
- i2 = NumberTools.s_to_long(a)
53
- j2 = NumberTools.s_to_long(b)
54
-
55
- assert_equal(i, i2)
56
- assert_equal(j, j2)
57
- end
58
-
59
- end
@@ -1,40 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class ParameterTest < Test::Unit::TestCase
5
- include Ferret::Utils
6
-
7
- class Param1 < Parameter
8
- VAL1 = Param1.new("VAL")
9
- end
10
-
11
- class Param2 < Parameter
12
- VAL1 = Param2.new("VAL")
13
- end
14
-
15
- def test_parameter_cmp()
16
- assert_raise(ArgumentError) do
17
- class <<Param1
18
- v = Param1.new("VAL")
19
- end
20
- end
21
- assert_raise(ArgumentError) do
22
- class <<Param1
23
- v = Param2.new("VAL")
24
- end
25
- end
26
-
27
- p1 = Param1::VAL1
28
- p2 = Param1::VAL1
29
- p3 = Param2::VAL1
30
- assert_equal(p1, p2)
31
- assert_not_equal(p1, p3)
32
- end
33
-
34
- def test_marshalling()
35
- p1 = Param1::VAL1
36
- data = Marshal.dump(p1)
37
- p2 = Marshal.load(data)
38
- assert_equal(p1, p2)
39
- end
40
- end
@@ -1,62 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class PriorityQueueTest < Test::Unit::TestCase
5
- include Ferret::Utils
6
-
7
- PQ_STRESS_SIZE = 1000
8
-
9
- def test_pq()
10
- pq = PriorityQueue.new(4)
11
- assert_equal(0, pq.size)
12
- pq.push("bword")
13
- assert_equal(1, pq.size)
14
- assert_equal("bword", pq.top)
15
- pq.push("cword")
16
- assert_equal(2, pq.size)
17
- assert_equal("bword", pq.top)
18
- pq.push("aword")
19
- assert_equal(3, pq.size)
20
- assert_equal("aword", pq.top)
21
- pq.push("dword")
22
- assert_equal(4, pq.size)
23
- assert_equal("aword", pq.top)
24
- assert_equal("aword", pq.pop())
25
- assert_equal(3, pq.size)
26
- assert_equal("bword", pq.pop())
27
- assert_equal(2, pq.size)
28
- assert_equal("cword", pq.pop())
29
- assert_equal(1, pq.size)
30
- assert_equal("dword", pq.pop())
31
- assert_equal(0, pq.size)
32
- end
33
-
34
- def test_pq_clear()
35
- pq = PriorityQueue.new(3)
36
- pq.push("word1")
37
- pq.push("word2")
38
- pq.push("word3")
39
- assert_equal(3, pq.size)
40
- pq.clear()
41
- assert_equal(0, pq.size)
42
- end
43
-
44
-
45
- #define PQ_STRESS_SIZE 1000
46
- def test_stress_pq()
47
- pq = PriorityQueue.new(PQ_STRESS_SIZE)
48
- PQ_STRESS_SIZE.times do
49
- pq.push("<#{(rand * PQ_STRESS_SIZE).to_i}>")
50
- end
51
-
52
- prev = pq.pop()
53
- (PQ_STRESS_SIZE - 1).times do
54
- curr = pq.pop()
55
- if (prev > curr)
56
- assert(prev <= curr, "previous #{prev} should be less than current #{curr}")
57
- end
58
- prev = curr
59
- end
60
- pq.clear()
61
- end
62
- end
@@ -1,21 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class StringHelperTest < Test::Unit::TestCase
5
- include Ferret::Utils
6
-
7
- def test_string_difference()
8
- assert_equal(3, StringHelper.string_difference("David", "Dave"))
9
- assert_equal(0, StringHelper.string_difference("David", "Erik"))
10
- assert_equal(4, StringHelper.string_difference("book", "bookworm"))
11
- end
12
-
13
- def test_string_reader
14
- sr = StringHelper::StringReader.new("TestString")
15
- assert_equal("T", sr.read(1))
16
- assert_equal("es", sr.read(2))
17
- assert_equal("tStr", sr.read(4))
18
- assert_equal("ing", sr.read(100))
19
- assert_nil(sr.read(100))
20
- end
21
- end
@@ -1,61 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
- require 'thread'
3
-
4
-
5
- class ThreadTest < Test::Unit::TestCase
6
- include Ferret::Utils
7
-
8
- NUM_THREADS = 100
9
-
10
- def test_basic_get_and_set()
11
- Thread.current.clear_local
12
- b = "hello"
13
- Thread.current.set_local(b, "dave")
14
- assert_equal("dave", Thread.current.get_local(b))
15
- end
16
-
17
- def test_objects_die
18
- Thread.current.clear_local
19
- a = []
20
- 10.times {|i| a[i] = "#{i}"; Thread.current.set_local(a[i], i) }
21
- 10.times {|i| assert_equal(i, Thread.current.get_local(a[i])) }
22
- assert_equal(10, Thread.current.local_size)
23
- GC.start
24
- assert_equal(10, Thread.current.local_size)
25
- 10.times {|i| a[i] = nil; }
26
- #puts w
27
-
28
- # this is a hack to get the GC to collect the last ref created above
29
- x = WeakKeyHash.new()
30
- 10.times {|i| a[i] = "#{i}"; x[a[i]] = i }
31
-
32
- assert_equal(10, Thread.current.local_size)
33
- GC.start
34
- assert(0, Thread.current.local_size)
35
- end
36
-
37
- class ThreadTester
38
- def initialize(val)
39
- Thread.current.set_local(self, val)
40
- end
41
- def inc
42
- val = Thread.current.get_local(self) + 1
43
- Thread.current.set_local(self, val)
44
- return val
45
- end
46
- end
47
-
48
- def single_thread
49
- tt = ThreadTester.new(start = rand(10000000))
50
- ((start+1)..start+11).each {|i| assert_equal(i, tt.inc) }
51
- end
52
-
53
- def test_threads_dont_share
54
- threads = []
55
- NUM_THREADS.times do
56
- threads << Thread.new { single_thread }
57
- end
58
-
59
- threads.each {|t| t.join}
60
- end
61
- end
@@ -1,25 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class WeakKeyHashTest < Test::Unit::TestCase
5
- include Ferret::Utils
6
-
7
- def test_objects_are_destroyed()
8
- w = WeakKeyHash.new()
9
- a = []
10
- 10.times {|i| a[i] = "#{i}"; w[a[i]] = i }
11
- 10.times {|i| assert_equal(i, w[a[i]]) }
12
- assert_equal(10, w.size)
13
- 10.times {|i| a[i] = nil; }
14
- #puts w
15
-
16
- # this is a hack to get the GC to collect the last ref created above
17
- x = WeakKeyHash.new()
18
- 10.times {|i| a[i] = "#{i}"; x[a[i]] = i }
19
-
20
- GC.start
21
- #puts w.size
22
- #puts w
23
- assert(0, w.size)
24
- end
25
- end
@@ -1,132 +0,0 @@
1
- # Author: Matthew D Moss
2
- #
3
- # Writtern for ruby quiz #25
4
- #
5
- class JapaneseTranslator
6
- # My knowledge of counting Japanese is limited, so this may not
7
- # be entirely correct; in particular, I don't know what rules
8
- # to follow after 'hyaku man' (1,000,000).
9
- # I also combine a digit with its group, such as 'gohyaku' rather
10
- # than 'go hyaku'; I just like reading it better that way.
11
-
12
- DIGITS = %w(zero ichi ni san yon go roku nana hachi kyu)
13
- GROUPS = %w(nothingtoseeheremovealong ju hyaku sen)
14
- MAN = 10000
15
-
16
- def to_spoken(val)
17
- case val <=> 0
18
- when -1
19
- '- ' + to_spoken(-val)
20
- when 0
21
- DIGITS[0]
22
- else
23
- group(val, 0)
24
- end
25
- end
26
-
27
- private
28
-
29
- def group(val, level)
30
- if val >= MAN
31
- group(val / MAN, 0) + 'man ' + group(val % MAN, 0)
32
- else
33
- case val
34
- when 0
35
- ''
36
- when 1
37
- level == 0 ? DIGITS[val] : GROUPS[level]
38
- when 2...10
39
- DIGITS[val] + (GROUPS[level] if level > 0).to_s
40
- else
41
- group(val / 10, level+1) + ' ' + group(val % 10, level)
42
- end
43
- end
44
- end
45
- end
46
-
47
-
48
- class USEnglishTranslator
49
- # Formal, US English. Optional 'and'. Will not produce things
50
- # such as 'twelve hundred' but rather 'one thousand two hundred'.
51
- # The use of 'and' is incomplete; it is sometimes missed.
52
-
53
- DIGITS = %w(zero one two three four five six seven eight nine)
54
- TEENS = %w(ten eleven twelve thirteen fourteen fifteen sixteen
55
- seventeen eighteen nineteen)
56
- TENS = %w(hello world twenty thirty forty fifty sixty seventy
57
- eighty ninety)
58
- GROUPS = %w(thousand million billion trillion quadrillion
59
- quintillion sextillion septillion octillion nonillion
60
- decillion)
61
- K = 1000
62
-
63
- def initialize(conjunction = true)
64
- @conjunction = conjunction
65
- end
66
-
67
- def to_spoken(val)
68
- case val <=> 0
69
- when -1
70
- 'negative ' + to_spoken(-val)
71
- when 0
72
- DIGITS[0]
73
- else
74
- group(val, 0).flatten.join(' ')
75
- end
76
- end
77
-
78
- private
79
-
80
- def group(val, level)
81
- x = group(val / K, level + 1) << GROUPS[level] if val >= K
82
- x.to_a << under_1000(val % K, level)
83
- end
84
-
85
- def under_1000(val, level)
86
- x = [DIGITS[val / 100]] << 'hundred' if val >= 100
87
- x.to_a << under_100(val % 100, (level == 0 and not x.nil?))
88
- end
89
-
90
- def under_100(val, junction)
91
- x = [('and' if @conjunction and junction)] # wyf?
92
- case val
93
- when 0
94
- []
95
- when 1...10
96
- x << DIGITS[val]
97
- when 10...20
98
- x << TEENS[val - 10]
99
- else
100
- d = val % 10
101
- x << (TENS[val / 10] + ('-' + DIGITS[d] if d != 0).to_s)
102
- end
103
- end
104
- end
105
-
106
-
107
- class Integer
108
- def to_spoken(translator = USEnglishTranslator.new)
109
- translator.to_spoken(self).squeeze(' ').strip
110
- end
111
- end
112
-
113
- if $0 == __FILE__
114
- SAMPLES = [ 0, 1, 2, 5, 10, 11, 14, 18, 20, 21, 29, 33, 42, 50, 87, 99,
115
- 100, 101, 110, 167, 199, 200, 201, 276, 300, 314, 500, 610,
116
- 1000, 1039, 1347, 2309, 3098, 23501, 32767, 70000, 5480283,
117
- 2435489238, 234100090000, -42, -2001 ]
118
-
119
- TRANSLATORS = { 'US English' => USEnglishTranslator.new,
120
- 'Japanese' => JapaneseTranslator.new }
121
-
122
-
123
- # main
124
- TRANSLATORS.each do |lang, translator|
125
- puts
126
- puts lang
127
- puts '-' * lang.length
128
- SAMPLES.each do |val|
129
- puts "%12d => %s" % [val, val.to_spoken(translator)]
130
- end
131
- end
132
- end