ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,47 +0,0 @@
1
- module Ferret::Utils
2
- module StringHelper
3
- # Methods for manipulating strings.
4
-
5
- class StringReader
6
- attr_reader :length
7
-
8
- def initialize(str)
9
- @str = str
10
- @pointer = 0
11
- @length = @str.length
12
- end
13
-
14
- def read(len = nil)
15
- return @str if len.nil?
16
-
17
- return nil if @pointer > @length
18
-
19
- res = @str[@pointer, len]
20
- @pointer += len
21
- return res
22
- end
23
-
24
- def reset() @pointer = 0 end
25
-
26
- def close() str = nil end
27
- end
28
-
29
- # Compares two strings, character by character, and returns the
30
- # first position where the two strings differ from one another.
31
- # eg.
32
- # string_difference('dustbin', 'dusty') # => 4
33
- # string_difference('dustbin', 'evening') # => 0
34
- # string_difference('eve', 'evening') # => 3
35
- #
36
- # s1:: The first string to compare
37
- # s2:: The second string to compare
38
- # returns:: The first position where the two strings differ.
39
- def StringHelper.string_difference(s1, s2)
40
- len = [s1.length, s2.length].min
41
- len.times do |i|
42
- return i if (s1[i] != s2[i])
43
- end
44
- return len
45
- end
46
- end
47
- end
@@ -1,28 +0,0 @@
1
- require 'thread'
2
- class Thread
3
- def make_deleter
4
- lambda{|id| @ferret_cache.delete(id)}
5
- end
6
-
7
- # Set the local value for the thread
8
- def set_local(key, value)
9
- @del ||= make_deleter
10
- @ferret_cache ||= {}
11
- ObjectSpace.define_finalizer(key, @del)
12
- @ferret_cache[key.object_id] = value
13
- end
14
-
15
- # Get the local value for the thread
16
- def get_local(key)
17
- return (@ferret_cache ||= {})[key.object_id]
18
- end
19
-
20
- # Returns the number of local variables stored. Useful for testing.
21
- def local_size
22
- return (@ferret_cache ||= {}).size
23
- end
24
-
25
- def clear_local
26
- (@ferret_cache ||= {}).clear
27
- end
28
- end
@@ -1,60 +0,0 @@
1
- module Ferret::Utils
2
-
3
- require 'weakref'
4
- require 'monitor'
5
-
6
- # This class implements a weak key hash. ie all keys that are stored in this
7
- # hash can still be garbage collected, and if they are garbage collected
8
- # then the key and it's corresponding value will be deleted from the hash.
9
- # eg.
10
- # name = "david"
11
- # last_names = WeakKeyHash.new()
12
- # last_names[name] = "balmain"
13
- # puts last_names["david"] #=>"balmain"
14
- # GC.start
15
- # puts last_names["david"] #=>"balmain"
16
- # name = nil
17
- # GC.start
18
- # # the name "david" will now have been garbage collected so it should
19
- # # have been removed from the hash
20
- # puts last_names["david"] #=>nil
21
- #
22
- # WeakKeyHash subclasses Monitor so it can be synchronized on.
23
- #
24
- # === NOTE
25
- # Unfortunately the ruby garbage collector is not always predictable so your
26
- # results may differ but each key should eventually be freed when all other
27
- # references have been removed and the garbage collector is ready.
28
- class WeakKeyHash < Monitor
29
- # Create a new WeakKeyHash.
30
- def initialize
31
- super()
32
- @hash = {}
33
- @deleter = lambda{|id| @hash.delete(id)}
34
- end
35
-
36
- # Set the value for the key just like a Hash
37
- def []=(key, value)
38
- ObjectSpace.define_finalizer(key, @deleter)
39
- @hash[key.object_id] = value
40
- end
41
-
42
- # Get the value for the key
43
- def [](key)
44
- return @hash[key.object_id]
45
- end
46
-
47
- # Return the number of elements in the Hash
48
- def size
49
- @hash.size
50
- end
51
-
52
- # Print a string representation the WeakKeyHash
53
- def to_s
54
- buffer = ""
55
- @hash.each_pair {|key, value| buffer << "<#{ObjectSpace._id2ref(key)}=>#{value}>"}
56
- return buffer
57
- end
58
-
59
- end
60
- end
data/lib/rferret.rb DELETED
@@ -1,37 +0,0 @@
1
- $: << File.dirname(__FILE__)
2
- #--
3
- # Copyright (c) 2005 David Balmain
4
- #
5
- # Permission is hereby granted, free of charge, to any person obtaining
6
- # a copy of this software and associated documentation files (the
7
- # "Software"), to deal in the Software without restriction, including
8
- # without limitation the rights to use, copy, modify, merge, publish,
9
- # distribute, sublicense, and/or sell copies of the Software, and to
10
- # permit persons to whom the Software is furnished to do so, subject to
11
- # the following conditions:
12
- #
13
- # The above copyright notice and this permission notice shall be
14
- # included in all copies or substantial portions of the Software.
15
- #
16
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
- #++
24
- # :include: ../TUTORIAL
25
- module Ferret
26
- VERSION = '0.9.6'
27
- end
28
-
29
- $ferret_pure_ruby = true
30
- require 'ferret/utils'
31
- require 'ferret/document'
32
- require 'ferret/stemmers'
33
- require 'ferret/analysis'
34
- require 'ferret/store'
35
- require 'ferret/index'
36
- require 'ferret/search'
37
- require 'ferret/query_parser'
@@ -1,106 +0,0 @@
1
- # This code was taken from rails and is under the same license as ferret.
2
- # Thanks go to David Heinemeier Hansson
3
- class CodeStatistics
4
- def initialize(*pairs)
5
- @pairs = pairs
6
- @statistics = calculate_statistics
7
- @total = calculate_total if pairs.length > 1
8
- end
9
-
10
- def to_s
11
- print_header
12
- @pairs.each { |pair| print_line(pair.first, @statistics[pair.first]) }
13
- print_splitter
14
-
15
- if @total
16
- print_line("Total", @total)
17
- print_splitter
18
- end
19
-
20
- print_code_test_stats
21
- end
22
-
23
- private
24
- def calculate_statistics
25
- @pairs.inject({}) { |stats, pair| stats[pair.first] = calculate_directory_statistics(pair.last); stats }
26
- end
27
-
28
- def calculate_directory_statistics(directory, pattern = /.*\.rb$/)
29
- stats = { "lines" => 0, "codelines" => 0, "classes" => 0, "methods" => 0 }
30
-
31
- Dir.foreach(directory) do |file_name|
32
- if File.stat(directory + "/" + file_name).directory? and (/^\./ !~ file_name)
33
- newstats = calculate_directory_statistics(directory + "/" + file_name, pattern)
34
- stats.each { |k, v| stats[k] += newstats[k] }
35
- end
36
-
37
- next unless file_name =~ pattern
38
-
39
- f = File.open(directory + "/" + file_name)
40
-
41
- while line = f.gets
42
- stats["lines"] += 1
43
- stats["classes"] += 1 if line =~ /class [A-Z]/
44
- stats["methods"] += 1 if line =~ /def [a-z]/
45
- stats["codelines"] += 1 unless line =~ /^\s*$/ || line =~ /^\s*#/
46
- end
47
- end
48
-
49
- stats
50
- end
51
-
52
- def calculate_total
53
- total = { "lines" => 0, "codelines" => 0, "classes" => 0, "methods" => 0 }
54
- @statistics.each_value { |pair| pair.each { |k, v| total[k] += v } }
55
- total
56
- end
57
-
58
- def calculate_code
59
- code_loc = 0
60
- @statistics.each { |k, v| code_loc += v['codelines'] unless ['Units', 'Functionals'].include? k }
61
- code_loc
62
- end
63
-
64
- def calculate_tests
65
- test_loc = 0
66
- @statistics.each { |k, v| test_loc += v['codelines'] if ['Units', 'Functionals'].include? k }
67
- test_loc
68
- end
69
-
70
- def print_header
71
- print_splitter
72
- puts "| Name | Lines | LOC | Classes | Methods | M/C | LOC/M |"
73
- print_splitter
74
- end
75
-
76
- def print_splitter
77
- puts "+----------------------+-------+-------+---------+---------+-----+-------+"
78
- end
79
-
80
- def print_line(name, statistics)
81
- m_over_c = (statistics["methods"] / statistics["classes"]) rescue m_over_c = 0
82
- loc_over_m = (statistics["codelines"] / statistics["methods"]) - 2 rescue loc_over_m = 0
83
-
84
- start = if ['Units', 'Functionals'].include? name
85
- "| #{name.ljust(18)} "
86
- else
87
- "| #{name.ljust(20)} "
88
- end
89
-
90
- puts start +
91
- "| #{statistics["lines"].to_s.rjust(5)} " +
92
- "| #{statistics["codelines"].to_s.rjust(5)} " +
93
- "| #{statistics["classes"].to_s.rjust(7)} " +
94
- "| #{statistics["methods"].to_s.rjust(7)} " +
95
- "| #{m_over_c.to_s.rjust(3)} " +
96
- "| #{loc_over_m.to_s.rjust(5)} |"
97
- end
98
-
99
- def print_code_test_stats
100
- code = calculate_code
101
- tests = calculate_tests
102
-
103
- puts " Code LOC: #{code} Test LOC: #{tests} Code to Test Ratio: 1:#{sprintf("%.1f", tests.to_f/code)}"
104
- puts ""
105
- end
106
- end
@@ -1,76 +0,0 @@
1
- require File.dirname(__FILE__) + "/../test_helper"
2
- require 'benchmark'
3
-
4
- class RAMStoreTest < Test::Unit::TestCase
5
- def setup
6
- @dir = Ferret::Store::RAMDirectory.new
7
- end
8
-
9
- def teardown
10
- @dir.close()
11
- end
12
-
13
- def test_rw_bytes
14
- bytes = [0x34, 0x87, 0xF9, 0xEA, 0x00, 0xFF]
15
- rw_test(bytes, "byte")
16
- end
17
-
18
- def test_rw_ints
19
- ints = [-2147483648, 2147483647, -1, 0]
20
- rw_test(ints, "int")
21
- end
22
-
23
- def test_rw_longs
24
- longs = [-9223372036854775808, 9223372036854775807, -1, 0]
25
- rw_test(longs, "long")
26
- end
27
-
28
- def test_rw_uints
29
- uints = [0xffffffff, 100000, 0]
30
- rw_test(uints, "uint")
31
- end
32
-
33
- def test_rw_ulongs
34
- ulongs = [0xffffffffffffffff, 100000000000000, 0]
35
- rw_test(ulongs, "ulong")
36
- end
37
-
38
- def test_rw_vints
39
- vints = [ 0xF8DC843342FE3484234987FE98AB987C897D214D123D123458EFBE2E238BACDEB9878790ABCDEF123DEF23988B89C,
40
- 0x0000000000000000000000000000000000000000,
41
- 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF]
42
- rw_test(vints, "vint")
43
- end
44
-
45
- def test_rw_vlongs
46
- vlongs = [ 0xF8DC843342FE3484234987FE98AB987C897D214D123D123458EFBE2E238BACDEB9878790ABCDEF123DEF23988B89C,
47
- 0x0000000000000000000000000000000000000000,
48
- 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF]
49
- rw_test(vlongs, "vlong")
50
- end
51
-
52
- def test_rw_strings
53
- strings = ['This is a ruby ferret test string ~!@#$%^&*()`123456790-=\)_+|', 'This is another string. I\'ll make this one a little longer than the last one. But I guess we need a few shorter ones too.', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']
54
- rw_test(strings, "string")
55
- end
56
-
57
- # this test fills up the output stream so that the buffer will have to be
58
- # written a few times. It then uses seek to make sure that it works
59
- # correctly
60
-
61
- def rw_test(values, type)
62
- puts "\nrw_#{type} test"
63
- Benchmark.bmbm do |x|
64
- x.report("write") do
65
- ostream = @dir.create_output("rw_#{type}.test")
66
- 1000.times {values.each { |b| ostream.__send__("write_" + type, b) }}
67
- ostream.close
68
- end
69
- x.report("read") do
70
- istream = @dir.open_input("rw_#{type}.test")
71
- 1000.times {values.each { |b| assert_equal(b, istream.__send__("read_" + type), "#{type} should be equal") }}
72
- istream.close
73
- end
74
- end
75
- end
76
- end
@@ -1,26 +0,0 @@
1
- $:.unshift File.join(File.dirname(__FILE__), '../../lib')
2
-
3
- require 'ferret'
4
-
5
- vints = [ 9223372036854775807,
6
- 0x00,
7
- 0xFFFFFFFFFFFFFFFF]
8
- t = Time.new
9
- 10.times do
10
- dpath = File.join(File.dirname(__FILE__),
11
- 'fsdir')
12
- dir = Ferret::Store::FSDirectory.new(dpath, true)
13
-
14
- 100.times do
15
- ostream = dir.create_output("rw_vint.test")
16
- 300.times { |i| ostream.write_vint(vints[i%3]) }
17
- ostream.close
18
- istream = dir.open_input("rw_vint.test")
19
- 300.times { istream.read_vint }
20
- istream.close
21
- end
22
-
23
- dir.close
24
- end
25
-
26
- puts "took #{Time.new - t} seconds"
@@ -1,81 +0,0 @@
1
- require File.dirname(__FILE__) + "/../test_helper"
2
- require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
3
- require 'thread'
4
-
5
- class IndexThreadSafetyTest < Test::Unit::TestCase
6
- include Ferret::Index
7
- include Ferret::Document
8
-
9
- INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
10
- ITERATIONS = 100
11
- NUM_THREADS = 10
12
- ANALYZER = Ferret::Analysis::Analyzer.new()
13
-
14
- def setup
15
- @index = Index.new(:path => 'index2',
16
- :create => true,
17
- :analyzer => ANALYZER,
18
- :default_field => 'contents')
19
- end
20
-
21
- def indexing_thread()
22
- ITERATIONS.times do
23
- choice = rand()
24
-
25
- if choice > 0.98
26
- do_optimize
27
- elsif choice > 0.9
28
- do_delete_doc
29
- elsif choice > 0.7
30
- do_search
31
- else
32
- do_add_doc
33
- end
34
- end
35
- rescue => e
36
- puts e
37
- puts e.backtrace
38
- @index = nil
39
- raise e
40
- end
41
-
42
- def do_optimize
43
- puts "Optimizing the index"
44
- @index.optimize
45
- end
46
-
47
- def do_delete_doc
48
- return if @index.size == 0
49
- doc_num = rand(@index.size)
50
- puts "Deleting #{doc_num} from index which has#{@index.has_deletions? ? "" : " no"} deletions"
51
- puts "document was already deleted" if (@index.deleted?(doc_num))
52
- @index.delete(doc_num)
53
- end
54
-
55
- def do_add_doc
56
- d = Document.new()
57
- n = rand(0xFFFFFFFF)
58
- d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
59
- d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
60
- puts("Adding #{n}")
61
- @index << d
62
- end
63
-
64
- def do_search
65
- n = rand(0xFFFFFFFF)
66
- puts("Searching for #{n}")
67
- hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
68
- puts "Hit for #{n}: #{@index[d]["id"]} - #{s}"
69
- end
70
- puts("Searched for #{n}: total = #{hits}")
71
- end
72
-
73
- def test_threading
74
- threads = []
75
- NUM_THREADS.times do
76
- threads << Thread.new { indexing_thread }
77
- end
78
-
79
- threads.each {|t| t.join}
80
- end
81
- end