ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,98 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class SearchAndSortTest < Test::Unit::TestCase
4
+ include Ferret::Document
5
+ include Ferret::Search
6
+ include Ferret::Store
7
+ include Ferret::Analysis
8
+ include Ferret::Index
9
+
10
+ def add_doc(hash, writer)
11
+ doc = Document.new()
12
+ hash.each_pair do |field, text|
13
+ doc << Field.new(field, text, Field::Store::NO, Field::Index::UNTOKENIZED)
14
+ end
15
+ doc.boost = hash["float"].to_f
16
+ writer << doc
17
+ end
18
+
19
+ def setup()
20
+ @dir = RAMDirectory.new()
21
+ iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
22
+ docs = [ # len mod
23
+ {"search"=>"findall","string"=>"a","int"=>"6","float"=>"0.01"}, # 4 0
24
+ {"search"=>"findall","string"=>"c","int"=>"5","float"=>"0.1"}, # 3 3
25
+ {"search"=>"findall","string"=>"e","int"=>"2","float"=>"0.001"}, # 5 1
26
+ {"search"=>"findall","string"=>"g","int"=>"1","float"=>"1.0"}, # 3 3
27
+ {"search"=>"findall","string"=>"i","int"=>"3","float"=>"0.0001"}, # 6 2
28
+ {"search"=>"findall","string"=>"j","int"=>"4","float"=>"10.0"}, # 4 0
29
+ {"search"=>"findall","string"=>"h","int"=>"5","float"=>"0.00001"}, # 7 3
30
+ {"search"=>"findall","string"=>"f","int"=>"2","float"=>"100.0"}, # 5 1
31
+ {"search"=>"findall","string"=>"d","int"=>"3","float"=>"1000.0"}, # 6 2
32
+ {"search"=>"findall","string"=>"b","int"=>"4","float"=>"0.000001"} # 8 0
33
+ ]
34
+ docs.each {|doc| add_doc(doc, iw)}
35
+ iw.close
36
+ end
37
+
38
+ def tear_down()
39
+ @dir.close()
40
+ end
41
+
42
+ def do_test_top_docs(is, query, expected, sort = nil)
43
+ top_docs = is.search(query, {:sort => sort})
44
+ top_docs.total_hits.times do |i|
45
+ assert_equal(expected[i], top_docs.score_docs[i].doc)
46
+ end
47
+ end
48
+
49
+ def test_sorts()
50
+ is = IndexSearcher.new(@dir)
51
+ q = TermQuery.new(Term.new("search", "findall"))
52
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9])
53
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort::RELEVANCE)
54
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], [SortField::FIELD_SCORE])
55
+ do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], Sort::INDEX_ORDER)
56
+ do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], [SortField::FIELD_DOC])
57
+
58
+ ## int
59
+ sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT})
60
+ do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], [sf_int])
61
+ do_test_top_docs(is, q, [0,1,6,5,9,8,4,7,2,3], [sf_int, SortField::FIELD_SCORE])
62
+ sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT, :reverse => true})
63
+ do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], [sf_int])
64
+
65
+ ## float
66
+ sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
67
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
68
+ sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT, :reverse => true})
69
+ do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new([sf_float, SortField::FIELD_SCORE]))
70
+
71
+ ## str
72
+ sf_str = SortField.new("string", {:sort_type => SortField::SortType::STRING})
73
+ do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], [sf_str, SortField::FIELD_SCORE])
74
+
75
+ ## auto
76
+ do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new("string"))
77
+ do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], Sort.new(["int"]))
78
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float"))
79
+ do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float", true))
80
+ do_test_top_docs(is, q, [0,1,6,9,5,8,4,2,7,3], Sort.new(["int", "string"]))
81
+ do_test_top_docs(is, q, [3,7,2,4,8,5,9,6,1,0], Sort.new(["int", "string"], true))
82
+ end
83
+
84
+ LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
85
+ LENGTH_MODULO = SortField::SortType.new("length_mod", lambda{|str| str.length},
86
+ lambda{|i, j| (i%4) <=> (j%4)})
87
+ def test_special_sorts
88
+ is = IndexSearcher.new(@dir)
89
+ q = TermQuery.new(Term.new("search", "findall"))
90
+ sf = SortField.new("float", {:sort_type => LENGTH})
91
+ do_test_top_docs(is, q, [9,6,4,8,2,7,0,5,1,3], [sf])
92
+ sf = SortField.new("float", {:sort_type => LENGTH_MODULO})
93
+ do_test_top_docs(is, q, [1,3,6,4,8,2,7,0,5,9], [sf])
94
+ sf = SortField.new("float", {:sort_type => LENGTH,
95
+ :comparator => lambda{|i,j| (j%4) <=> (i%4)}})
96
+ do_test_top_docs(is, q, [0,5,9,2,7,4,8,1,3,6], [sf])
97
+ end
98
+ end
@@ -0,0 +1,37 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class SimilarityTest < Test::Unit::TestCase
5
+ include Ferret::Search
6
+ include Ferret::Index
7
+
8
+ def test_byte_float_conversion()
9
+ 256.times do |i|
10
+ assert_equal(i, Similarity.float_to_byte(Similarity.byte_to_float(i)))
11
+ assert_equal(Similarity.byte_to_float(i), Similarity::NORM_TABLE[i])
12
+ assert_equal(i, Similarity.encode_norm(Similarity.decode_norm(i)))
13
+ end
14
+ end
15
+
16
+ def test_default_similarity
17
+ dsim = DefaultSimilarity.new()
18
+ assert_equal(1.0/4, dsim.length_norm("field", 16))
19
+ assert_equal(1.0/4, dsim.query_norm(16))
20
+ assert_equal(3.0, dsim.tf(9))
21
+ assert_equal(1.0/10, dsim.sloppy_freq(9))
22
+ assert_equal(1.0, dsim.idf(9, 10))
23
+ assert_equal(4.0, dsim.coord(12, 3))
24
+ searcher = Object.new
25
+ def searcher.doc_freq(term) 9 end
26
+ def searcher.max_doc() 10 end
27
+ term = Term.new("field", "text")
28
+ assert_equal(1.0, dsim.idf_term(term, searcher))
29
+ terms = [
30
+ Term.new("field1", "text1"),
31
+ Term.new("field1", "text2"),
32
+ Term.new("field2", "text3"),
33
+ Term.new("field2", "text4")
34
+ ]
35
+ assert_equal(4.0, dsim.idf_phrase(terms, searcher))
36
+ end
37
+ end
@@ -0,0 +1,48 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class SortTest < Test::Unit::TestCase
4
+ include Ferret::Search
5
+
6
+ def test_basic()
7
+ s = Sort::RELEVANCE
8
+ assert_equal(2, s.fields.size)
9
+ assert_equal(SortField::FIELD_SCORE, s.fields[0])
10
+ assert_equal(SortField::FIELD_DOC, s.fields[1])
11
+
12
+ s = Sort::INDEX_ORDER
13
+ assert_equal(1, s.fields.size)
14
+ assert_equal(SortField::FIELD_DOC, s.fields[0])
15
+ end
16
+
17
+ def test_string_init()
18
+ s = Sort.new("field")
19
+ assert_equal(2, s.fields.size)
20
+ assert_equal(SortField::SortType::AUTO, s.fields[0].sort_type)
21
+ assert_equal("field", s.fields[0].name)
22
+ assert_equal(SortField::FIELD_DOC, s.fields[1])
23
+
24
+ s = Sort.new(["field1", "field2", "field3"])
25
+ assert_equal(3, s.fields.size)
26
+ assert_equal(SortField::SortType::AUTO, s.fields[0].sort_type)
27
+ assert_equal("field1", s.fields[0].name)
28
+ assert_equal(SortField::SortType::AUTO, s.fields[1].sort_type)
29
+ assert_equal("field2", s.fields[1].name)
30
+ assert_equal(SortField::SortType::AUTO, s.fields[2].sort_type)
31
+ assert_equal("field3", s.fields[2].name)
32
+ end
33
+
34
+ def test_multi_fields()
35
+ sf1 = SortField.new("field", {:sort_type => SortField::SortType::INT,
36
+ :reverse => true})
37
+ sf2 = SortField::FIELD_SCORE
38
+ sf3 = SortField::FIELD_DOC
39
+ s = Sort.new([sf1, sf2, sf3])
40
+
41
+ assert_equal(3, s.fields.size)
42
+ assert_equal(SortField::SortType::INT, s.fields[0].sort_type)
43
+ assert_equal("field", s.fields[0].name)
44
+ assert(s.fields[0].reverse?)
45
+ assert_equal(SortField::FIELD_SCORE, s.fields[1])
46
+ assert_equal(SortField::FIELD_DOC, s.fields[2])
47
+ end
48
+ end
@@ -0,0 +1,27 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class SortFieldTest < Test::Unit::TestCase
4
+ include Ferret::Search
5
+
6
+ def test_field_score()
7
+ fs = SortField::FIELD_SCORE
8
+ assert_equal(SortField::SortType::SCORE, fs.sort_type)
9
+ assert_nil(fs.name)
10
+ assert(!fs.reverse?, "FIELD_SCORE should not be reverse")
11
+ assert_nil(fs.comparator)
12
+ end
13
+
14
+ def test_field_doc()
15
+ fs = SortField::FIELD_DOC
16
+ assert_equal(SortField::SortType::DOC, fs.sort_type)
17
+ assert_nil(fs.name)
18
+ assert(!fs.reverse?, "FIELD_DOC should not be reverse")
19
+ assert_nil(fs.comparator)
20
+ end
21
+
22
+ def test_error_raised()
23
+ assert_raise(ArgumentError) {
24
+ fs = SortField.new(nil, {:sort_type => SortField::SortType::INT})
25
+ }
26
+ end
27
+ end
@@ -0,0 +1,153 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class SpansBasicTest < Test::Unit::TestCase
5
+ include Ferret::Document
6
+ include Ferret::Search
7
+ include Ferret::Store
8
+ include Ferret::Index
9
+ include Ferret::Search::Spans
10
+ include Ferret::Analysis
11
+
12
+ def setup()
13
+ data = [
14
+ "start finish one two three four five six seven",
15
+ "start one finish two three four five six seven",
16
+ "start one two finish three four five six seven",
17
+ "start one two three finish four five six seven",
18
+ "start one two three four finish five six seven",
19
+ "start one two three four five finish six seven",
20
+ "start one two three four five six finish seven eight",
21
+ "start one two three four five six seven finish eight nine",
22
+ "start one two three four five six finish seven eight",
23
+ "start one two three four five finish six seven",
24
+ "start one two three four finish five six seven",
25
+ "start one two three finish four five six seven",
26
+ "start one two finish three four five six seven",
27
+ "start one finish two three four five six seven",
28
+ "start finish one two three four five six seven",
29
+ "start start one two three four five six seven",
30
+ "finish start one two three four five six seven",
31
+ "finish one start two three four five six seven",
32
+ "finish one two start three four five six seven",
33
+ "finish one two three start four five six seven",
34
+ "finish one two three four start five six seven",
35
+ "finish one two three four five start six seven",
36
+ "finish one two three four five six start seven eight",
37
+ "finish one two three four five six seven start eight nine",
38
+ "finish one two three four five six start seven eight",
39
+ "finish one two three four five start six seven",
40
+ "finish one two three four start five six seven",
41
+ "finish one two three start four five six seven",
42
+ "finish one two start three four five six seven",
43
+ "finish one start two three four five six seven",
44
+ "finish start one two three four five six seven"
45
+ ]
46
+ @dir = RAMDirectory.new
47
+ iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
48
+ data.each do |line|
49
+ doc = Document.new()
50
+ doc << Field.new("field", line, Field::Store::NO, Field::Index::TOKENIZED)
51
+ iw << doc
52
+ end
53
+
54
+ iw.close()
55
+
56
+ @is = IndexSearcher.new(@dir)
57
+ end
58
+
59
+ def tear_down()
60
+ @iw.close
61
+ @dir.close
62
+ end
63
+
64
+ def number_split(i)
65
+ if (i < 10)
66
+ return "<#{i}>"
67
+ elsif (i < 100)
68
+ return "<#{((i/10)*10)}> <#{i%10}>"
69
+ else
70
+ return "<#{((i/100)*100)}> <#{(((i%100)/10)*10)}> <#{i%10}>"
71
+ end
72
+ end
73
+
74
+ def check_hits(query, expected, test_explain = false, top=nil)
75
+ top_docs = @is.search(query, {:num_docs => expected.length})
76
+ assert_equal(expected.length, top_docs.score_docs.size)
77
+ assert_equal(top, top_docs.score_docs[0].doc) if top
78
+ assert_equal(expected.length, top_docs.total_hits)
79
+ top_docs.score_docs.each do |score_doc|
80
+ assert(expected.include?(score_doc.doc),
81
+ "#{score_doc.doc} was found unexpectedly")
82
+ if test_explain
83
+ assert(score_doc.score =~ @is.explain(query, score_doc.doc).value,
84
+ "Scores(#{score_doc.score} != #{@is.explain(query, score_doc.doc).value})")
85
+ end
86
+ end
87
+ end
88
+
89
+ def test_span_term_query()
90
+ tq = SpanTermQuery.new(Term.new("field", "nine"))
91
+ check_hits(tq, [7,23], true)
92
+ tq = SpanTermQuery.new(Term.new("field", "eight"))
93
+ check_hits(tq, [6,7,8,22,23,24])
94
+ end
95
+
96
+ def test_span_near_query()
97
+ tq1 = SpanTermQuery.new(Term.new("field", "start"))
98
+ tq2 = SpanTermQuery.new(Term.new("field", "finish"))
99
+ q = SpanNearQuery.new([tq1, tq2], 0, true)
100
+ check_hits(q, [0,14], true)
101
+ q = SpanNearQuery.new([tq1, tq2], 0, false)
102
+ check_hits(q, [0,14,16,30], true)
103
+ q = SpanNearQuery.new([tq1, tq2], 1, true)
104
+ check_hits(q, [0,1,13,14])
105
+ q = SpanNearQuery.new([tq1, tq2], 1, false)
106
+ check_hits(q, [0,1,13,14,16,17,29,30])
107
+ q = SpanNearQuery.new([tq1, tq2], 4, true)
108
+ check_hits(q, [0,1,2,3,4,10,11,12,13,14])
109
+ q = SpanNearQuery.new([tq1, tq2], 4, false)
110
+ check_hits(q, [0,1,2,3,4,10,11,12,13,14,16,17,18,19,20,26,27,28,29,30])
111
+ end
112
+
113
+ def test_span_not_query()
114
+ tq1 = SpanTermQuery.new(Term.new("field", "start"))
115
+ tq2 = SpanTermQuery.new(Term.new("field", "finish"))
116
+ tq3 = SpanTermQuery.new(Term.new("field", "two"))
117
+ tq4 = SpanTermQuery.new(Term.new("field", "five"))
118
+ nearq1 = SpanNearQuery.new([tq1, tq2], 4, true)
119
+ nearq2 = SpanNearQuery.new([tq3, tq4], 4, true)
120
+ q = SpanNotQuery.new(nearq1, nearq2)
121
+ check_hits(q, [0,1,13,14], true)
122
+ nearq1 = SpanNearQuery.new([tq1, tq2], 4, false)
123
+ q = SpanNotQuery.new(nearq1, nearq2)
124
+ check_hits(q, [0,1,13,14,16,17,29,30])
125
+ nearq1 = SpanNearQuery.new([tq1, tq3], 4, true)
126
+ nearq2 = SpanNearQuery.new([tq2, tq4], 8, false)
127
+ q = SpanNotQuery.new(nearq1, nearq2)
128
+ check_hits(q, [2,3,4,5,6,7,8,9,10,11,12,15])
129
+ end
130
+
131
+ def test_span_first_query()
132
+ finish_first = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
133
+ tq = SpanTermQuery.new(Term.new("field", "finish"))
134
+ q = SpanFirstQuery.new(tq, 1)
135
+ check_hits(q, finish_first, true)
136
+ q = SpanFirstQuery.new(tq, 5)
137
+ check_hits(q, [0,1,2,3,11,12,13,14]+finish_first, false)
138
+ end
139
+
140
+ def test_span_or_query_query()
141
+ tq1 = SpanTermQuery.new(Term.new("field", "start"))
142
+ tq2 = SpanTermQuery.new(Term.new("field", "finish"))
143
+ tq3 = SpanTermQuery.new(Term.new("field", "five"))
144
+ nearq1 = SpanNearQuery.new([tq1, tq2], 1, true)
145
+ nearq2 = SpanNearQuery.new([tq2, tq3], 0, false)
146
+ q = SpanOrQuery.new([nearq1, nearq2])
147
+ check_hits(q, [0,1,4,5,9,10,13,14], false)
148
+ nearq1 = SpanNearQuery.new([tq1, tq2], 0, false)
149
+ nearq2 = SpanNearQuery.new([tq2, tq3], 1, false)
150
+ q = SpanOrQuery.new([nearq1, nearq2])
151
+ check_hits(q, [0,3,4,5,6,8,9,10,11,14,16,30], false)
152
+ end
153
+ end
@@ -0,0 +1,84 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require File.dirname(__FILE__) + "/tm_store"
3
+ require File.dirname(__FILE__) + "/tm_store_lock"
4
+
5
+ module Ferret::Store
6
+
7
+ class FSDirectory
8
+ def FSDirectory.directory_cache
9
+ @@Directories
10
+ end
11
+
12
+ def ref_count
13
+ @ref_count
14
+ end
15
+
16
+ def get_lock_prefix
17
+ lock_prefix
18
+ end
19
+ end
20
+ end
21
+
22
+ class FSStoreTest < Test::Unit::TestCase
23
+ include Ferret::Store
24
+ include StoreTest
25
+ include StoreLockTest
26
+ def setup
27
+ @dpath = File.join(File.dirname(__FILE__),
28
+ '../../temp/fsdir')
29
+ @dir = FSDirectory.get_directory(@dpath, true)
30
+ end
31
+
32
+ def teardown
33
+ @dir.refresh()
34
+ @dir.close()
35
+ end
36
+
37
+ def test_cache
38
+ dir_path = File.join(File.dirname(__FILE__),
39
+ '/../../temp/cachetest')
40
+ assert(! FSDirectory.directory_cache[dir_path],
41
+ "this directory should not be cached yet")
42
+ @dir1 = FSDirectory.get_directory(dir_path, true)
43
+ assert(FSDirectory.directory_cache[dir_path],
44
+ "this directory should now be cached")
45
+ assert_equal(@dir1.ref_count, 1,
46
+ "There is one reference so the refcount should now be 1")
47
+ @dir2 = FSDirectory.get_directory(dir_path, true)
48
+ assert(@dir1 === @dir2,
49
+ "The directory should be cached so the same directory object should have been returned")
50
+ assert_equal(@dir1.ref_count, 2,
51
+ "There are two references so the refcount should now be 2")
52
+ @dir1.close
53
+ assert(FSDirectory.directory_cache[dir_path],
54
+ "this directory shouldn't have been removed yet")
55
+ assert_equal(@dir2.ref_count, 1,
56
+ "There is one reference so the refcount should now be 1")
57
+ @dir2.close
58
+ assert(! FSDirectory.directory_cache[dir_path],
59
+ "this directory should have been removed from the cache")
60
+ end
61
+
62
+ def test_fslock
63
+ name = "lfile"
64
+ lock_file_path = @dpath + "/" + @dir.get_lock_prefix() + name
65
+ if File.exists?(lock_file_path) then
66
+ File.delete(lock_file_path)
67
+ end
68
+ assert(! File.exists?(lock_file_path),
69
+ "There should be no lock file")
70
+ lock = @dir.make_lock(name)
71
+ assert(! File.exists?(lock_file_path),
72
+ "There should still be no lock file")
73
+ assert(! @dir.exists?(lock_file_path),
74
+ "The lock should be hidden by the FSDirectories directory scan")
75
+ assert(! lock.locked?, "lock shouldn't be locked yet")
76
+ lock.obtain
77
+ assert(lock.locked?, "lock should now be locked")
78
+ assert(File.exists?(lock_file_path),
79
+ "A lock file should have been created")
80
+ lock.release
81
+ assert(! lock.locked?, "lock should be freed again")
82
+ assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
83
+ end
84
+ end