ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
@@ -25,70 +25,4 @@ class FieldTest < Test::Unit::TestCase
25
25
  assert_equal("WITH_OFFSETS", Field::TermVector::WITH_OFFSETS.to_s)
26
26
  assert_equal("WITH_POSITIONS_OFFSETS", Field::TermVector::WITH_POSITIONS_OFFSETS.to_s)
27
27
  end
28
-
29
- def test_standard_field()
30
- f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
31
- assert_equal("name", f.name)
32
- assert_equal("value", f.data)
33
- assert_equal(true, f.stored?)
34
- assert_equal(true, f.compressed?)
35
- assert_equal(true, f.indexed?)
36
- assert_equal(true, f.tokenized?)
37
- assert_equal(false, f.store_term_vector?)
38
- assert_equal(false, f.store_offsets?)
39
- assert_equal(false, f.store_positions?)
40
- assert_equal(false, f.omit_norms?)
41
- assert_equal(false, f.binary?)
42
- assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
43
- end
44
-
45
- def test_set_store()
46
- f = Field.new("name", nil, Field::Store::COMPRESS, Field::Index::TOKENIZED)
47
- f.stored = Field::Store::NO
48
- assert_equal(false, f.stored?)
49
- assert_equal(false, f.compressed?)
50
- assert_equal("indexed,tokenized,<name:>", f.to_s)
51
- end
52
-
53
- def test_set_index()
54
- f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
55
- f.index = Field::Index::NO
56
- assert_equal(false, f.indexed?)
57
- assert_equal(false, f.tokenized?)
58
- assert_equal(false, f.omit_norms?)
59
- assert_equal("stored/compressed,<name:value>", f.to_s)
60
- f.index = Field::Index::NO_NORMS
61
- assert_equal(true, f.indexed?)
62
- assert_equal(false, f.tokenized?)
63
- assert_equal(true, f.omit_norms?)
64
- assert_equal("stored/compressed,indexed,omit_norms,<name:value>", f.to_s)
65
- end
66
-
67
- def test_set_term_vector()
68
- f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
69
- f.store_term_vector = Field::TermVector::WITH_POSITIONS_OFFSETS
70
- assert_equal(true, f.store_term_vector?)
71
- assert_equal(true, f.store_offsets?)
72
- assert_equal(true, f.store_positions?)
73
- assert_equal("stored/compressed,indexed,tokenized,store_term_vector,tv_offset,tv_position,<name:value>", f.to_s)
74
- end
75
-
76
- def test_new_binary_field()
77
- tmp = []
78
- 256.times {|i| tmp[i] = i}
79
- bin = tmp.pack("c*")
80
- f = Field.new_binary_field("name", bin, Field::Store::YES)
81
- assert_equal("name", f.name)
82
- assert_equal(bin, f.data)
83
- assert_equal(true, f.stored?)
84
- assert_equal(false, f.compressed?)
85
- assert_equal(false, f.indexed?)
86
- assert_equal(false, f.tokenized?)
87
- assert_equal(false, f.store_term_vector?)
88
- assert_equal(false, f.store_offsets?)
89
- assert_equal(false, f.store_positions?)
90
- assert_equal(false, f.omit_norms?)
91
- assert_equal(true, f.binary?)
92
- assert_equal("stored/uncompressed,binary,<name:#{bin}>", f.to_s)
93
- end
94
28
  end
@@ -16,7 +16,10 @@ class IndexTest < Test::Unit::TestCase
16
16
 
17
17
  def check_results(index, query, expected)
18
18
  cnt = 0
19
+ #puts "#{query} - #{expected.inspect}"
20
+ #puts index.size
19
21
  index.search_each(query) do |doc, score|
22
+ #puts "doc-#{doc} score=#{score}"
20
23
  assert(expected.index(doc))
21
24
  cnt += 1
22
25
  end
@@ -136,7 +139,7 @@ class IndexTest < Test::Unit::TestCase
136
139
  do_test_index_with_hash(index)
137
140
  index.close
138
141
 
139
- index = Index.new(:default_field => "def_field")
142
+ index = Index.new(:default_field => "def_field", :id_field => "id")
140
143
  do_test_index_with_doc_array(index)
141
144
  index.close
142
145
  end
@@ -144,7 +147,11 @@ class IndexTest < Test::Unit::TestCase
144
147
  def test_fs_index
145
148
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
146
149
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
147
- assert_raise(Errno::ENOENT) {Index.new(:path => fs_path, :create_if_missing => false, :default_field => "def_field")}
150
+ assert_raise(StandardError) do
151
+ Index.new(:path => fs_path,
152
+ :create_if_missing => false,
153
+ :default_field => "def_field")
154
+ end
148
155
  index = Index.new(:path => fs_path, :default_field => "def_field")
149
156
  do_test_index_with_array(index)
150
157
  index.close
@@ -155,7 +162,9 @@ class IndexTest < Test::Unit::TestCase
155
162
  index.close
156
163
 
157
164
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
158
- index = Index.new(:path => fs_path, :default_field => "def_field")
165
+ index = Index.new(:path => fs_path,
166
+ :default_field => "def_field",
167
+ :id_field => "id")
159
168
  do_test_index_with_doc_array(index)
160
169
  index.close
161
170
  end
@@ -317,12 +326,16 @@ class IndexTest < Test::Unit::TestCase
317
326
  index2 << "document 2"
318
327
  assert_equal(2, index2.size)
319
328
  assert_equal(2, index.size)
329
+ top_docs = index.search("content3")
330
+ assert_equal(0, top_docs.size)
320
331
 
321
332
  iw = IndexWriter.new(fs_path, :analyzer => WhiteSpaceAnalyzer.new())
322
333
  doc = Document.new
323
334
  doc << Field.new("f", "content3", Field::Store::YES, Field::Index::TOKENIZED)
324
335
  iw << doc
325
336
  iw.close()
337
+ top_docs = index.search("content3")
338
+ assert_equal(1, top_docs.size)
326
339
  assert_equal(3, index.size)
327
340
  assert_equal("content3", index[2]["f"])
328
341
  index.close
@@ -373,7 +386,8 @@ class IndexTest < Test::Unit::TestCase
373
386
  {:id => 9, :cat => "/cat2/subcat5", :content => "content9"},
374
387
  ]
375
388
  index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
376
- :default_field => :content)
389
+ :default_field => :content,
390
+ :id_field => :id)
377
391
  data.each { |doc| index << doc }
378
392
  assert_equal(10, index.size)
379
393
  assert_equal("content5", index["5"][:content])
@@ -449,6 +463,39 @@ class IndexTest < Test::Unit::TestCase
449
463
  index.close
450
464
  end
451
465
 
466
+ def test_index_multi_key_untokenized
467
+ data = [
468
+ {:id => 0, :table => "Product", :product => "tent"},
469
+ {:id => 0, :table => "location", :location => "first floor"},
470
+ {:id => 0, :table => "Product", :product => "super tent"},
471
+ {:id => 0, :table => "location", :location => "second floor"},
472
+ {:id => 1, :table => "Product", :product => "backback"},
473
+ {:id => 1, :table => "location", :location => "second floor"},
474
+ {:id => 1, :table => "location", :location => "first floor"},
475
+ {:id => 1, :table => "Product", :product => "rucksack"},
476
+ {:id => 1, :table => "Product", :product => "backpack"}
477
+ ]
478
+ index = Index.new(:analyzer => Analyzer.new,
479
+ :key => ["id", "table"])
480
+ data.each do |dat|
481
+ doc = Document.new
482
+ dat.each_pair do |key, value|
483
+ if ([:id, :table].include?(key))
484
+ doc << Field.new(key, value, Field::Store::YES, Field::Index::UNTOKENIZED)
485
+ else
486
+ doc << Field.new(key, value, Field::Store::YES, Field::Index::TOKENIZED)
487
+ end
488
+ end
489
+ index << doc
490
+ end
491
+ assert_equal(4, index.size)
492
+ assert_equal("super tent", index[0][:product])
493
+ assert_equal("second floor", index[1][:location])
494
+ assert_equal("backpack", index[3][:product])
495
+ assert_equal("first floor", index[2][:location])
496
+ index.close
497
+ end
498
+
452
499
  def test_sortby_date
453
500
  data = [
454
501
  {:content => "one", :date => "20051023"},
@@ -459,7 +506,7 @@ class IndexTest < Test::Unit::TestCase
459
506
  {:content => "three", :date => "19790531"},
460
507
  {:content => "one", :date => "19770725"},
461
508
  {:content => "two", :date => "19751226"},
462
- {:content => "three", :date => "19390912"}
509
+ {:content => "four", :date => "19390912"}
463
510
  ]
464
511
  index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
465
512
  data.each { |doc|
@@ -481,8 +528,17 @@ class IndexTest < Test::Unit::TestCase
481
528
  assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
482
529
  assert_equal("three four", index[top_docs.score_docs[0].doc][:content])
483
530
  assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
484
- assert_equal("three", index[top_docs.score_docs[1].doc][:content])
531
+ assert_equal("four", index[top_docs.score_docs[1].doc][:content])
485
532
  assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
533
+
534
+ top_docs = index.search("one two three four",
535
+ :sort => [:date, :content])
536
+ assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
537
+ assert_equal("four", index[top_docs.score_docs[0].doc][:content])
538
+ assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
539
+ assert_equal("three four", index[top_docs.score_docs[1].doc][:content])
540
+ assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
541
+
486
542
  index.close
487
543
  end
488
544
 
@@ -15,6 +15,47 @@ module IndexReaderCommon
15
15
 
16
16
  do_test_get_doc()
17
17
 
18
+ do_test_term_enum()
19
+ end
20
+
21
+ def do_test_term_enum()
22
+ te = @ir.terms
23
+
24
+ assert(te.next?)
25
+ assert_equal(Term.new("author", "Leo"), te.term)
26
+ assert_equal(1, te.doc_freq)
27
+ assert(te.next?)
28
+ assert_equal(Term.new("author", "Tolstoy"), te.term)
29
+ assert_equal(1, te.doc_freq)
30
+ assert(te.next?)
31
+ assert_equal(Term.new("body", "And"), te.term)
32
+ assert_equal(1, te.doc_freq)
33
+
34
+
35
+ assert(te.skip_to(Term.new("body", "Not")))
36
+ assert_equal(Term.new("body", "Not"), te.term)
37
+ assert_equal(1, te.doc_freq)
38
+ assert(te.next?)
39
+ assert_equal(Term.new("body", "Random"), te.term)
40
+ assert_equal(16, te.doc_freq)
41
+
42
+ assert(te.skip_to(Term.new("text", "which")))
43
+ assert(Term.new("text", "which"), te.term)
44
+ assert_equal(1, te.doc_freq)
45
+ assert(te.next?)
46
+ assert_equal(Term.new("title", "War And Peace"), te.term)
47
+ assert_equal(1, te.doc_freq)
48
+ assert(!te.next?)
49
+
50
+ te.close
51
+
52
+ te = @ir.terms_from(Term.new("body", "Not"))
53
+ assert_equal(Term.new("body", "Not"), te.term)
54
+ assert_equal(1, te.doc_freq)
55
+ assert(te.next?)
56
+ assert_equal(Term.new("body", "Random"), te.term)
57
+ assert_equal(16, te.doc_freq)
58
+ te.close
18
59
  end
19
60
 
20
61
  def do_test_term_doc_enum()
@@ -155,7 +196,7 @@ module IndexReaderCommon
155
196
 
156
197
  assert_equal("body", tv.field)
157
198
  assert_equal(["word1", "word2", "word3", "word4"], tv.terms)
158
- assert_equal([3, 1, 4, 2], tv.term_frequencies)
199
+ assert_equal([3, 1, 4, 2], tv.freqs)
159
200
  assert_equal([[2, 4, 7], [3], [0, 5, 8, 9], [1,6]], tv.positions)
160
201
  assert_equal([[t(12,17), t(24,29), t(42,47)],
161
202
  [t(18,23)],
@@ -489,15 +530,15 @@ class IndexReaderTest < Test::Unit::TestCase
489
530
  doc << Field.new("title", "this is the title DocField", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
490
531
  doc << Field.new("author", "this is the author field", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
491
532
 
492
- fis = FieldInfos.new()
493
- fis << doc
494
- assert_equal(4, fis.size)
533
+ #fis = FieldInfos.new()
534
+ #fis << doc
535
+ #assert_equal(4, fis.size)
495
536
 
496
- fi = fis["tag"]
497
- assert_equal(true, fi.indexed?)
498
- assert_equal(true, fi.store_term_vector?)
499
- assert_equal(true, fi.store_positions?)
500
- assert_equal(true, fi.store_offsets?)
537
+ #fi = fis["tag"]
538
+ #assert_equal(true, fi.indexed?)
539
+ #assert_equal(true, fi.store_term_vector?)
540
+ #assert_equal(true, fi.store_positions?)
541
+ #assert_equal(true, fi.store_offsets?)
501
542
 
502
543
  iw << doc
503
544
  iw.close()
@@ -549,7 +590,7 @@ class IndexReaderTest < Test::Unit::TestCase
549
590
 
550
591
  assert_equal("body", tv.field)
551
592
  assert_equal(["word1", "word2", "word3", "word4"], tv.terms)
552
- assert_equal([3, 1, 4, 2], tv.term_frequencies)
593
+ assert_equal([3, 1, 4, 2], tv.freqs)
553
594
  assert_equal([[2, 4, 7], [3], [0, 5, 8, 9], [1,6]], tv.positions)
554
595
  assert_equal([[t(12,17), t(24,29), t(42,47)],
555
596
  [t(18,23)],
@@ -30,8 +30,6 @@ class IndexWriterTest < Test::Unit::TestCase
30
30
  def test_add_document
31
31
  iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
32
32
  doc = IndexTestHelper.prepare_document()
33
- infos = FieldInfos.new
34
- infos << doc
35
33
  iw.add_document(doc)
36
34
  assert_equal(1, iw.doc_count)
37
35
  iw.close()
@@ -44,8 +42,6 @@ class IndexWriterTest < Test::Unit::TestCase
44
42
  iw.merge_factor = 3
45
43
  iw.min_merge_docs = 3
46
44
  docs = IndexTestHelper.prepare_book_list()
47
- infos = FieldInfos.new
48
- infos << docs[0]
49
45
  docs.each_with_index do |doc, i|
50
46
  #puts "Index doc " + i.to_s
51
47
  iw.add_document(doc)
@@ -1,6 +1,5 @@
1
1
  require File.dirname(__FILE__) + "/../../test_helper"
2
2
 
3
-
4
3
  class TermTest < Test::Unit::TestCase
5
4
  include Ferret::Index
6
5
  def test_term()
@@ -16,7 +15,6 @@ class TermTest < Test::Unit::TestCase
16
15
  assert(term1 == term4)
17
16
  assert(term1.eql?(term4))
18
17
  term4.set!("field3", "text3")
19
- assert(term1 != term4)
18
+ assert_not_equal(term1, term4)
20
19
  end
21
-
22
20
  end
@@ -5,14 +5,14 @@ class TermVectorOffsetInfoTest < Test::Unit::TestCase
5
5
  include Ferret::Index
6
6
  def test_tvoi()
7
7
  t1 = TermVectorOffsetInfo.new(1, 3)
8
- assert_equal(t1.start_offset, 1)
9
- assert_equal(t1.end_offset, 3)
8
+ assert_equal(t1.start, 1)
9
+ assert_equal(t1.end, 3)
10
10
  t2 = TermVectorOffsetInfo.new(1, 3)
11
11
  assert(t1 == t2)
12
- t2.start_offset = 2
12
+ t2.start = 2
13
13
  assert(t1 != t2)
14
- t2.start_offset = 1
15
- t2.end_offset = 1
14
+ t2.start = 1
15
+ t2.end = 1
16
16
  assert(t1 != t2)
17
17
  end
18
18
  end
@@ -17,7 +17,7 @@ class SegmentTermVectorTest < Test::Unit::TestCase
17
17
 
18
18
  def test_index_of()
19
19
  assert_equal(0, @stv.index_of("Apples"))
20
- assert_equal(4, @stv.term_frequencies[@stv.index_of("Apples")])
20
+ assert_equal(4, @stv.freqs[@stv.index_of("Apples")])
21
21
  end
22
22
 
23
23
  def test_indexes_of()
@@ -56,7 +56,7 @@ class SegmentTermVectorWithPosOffsetsTest < Test::Unit::TestCase
56
56
 
57
57
  def test_index_of()
58
58
  assert_equal(0, @stv.index_of("Apples"))
59
- assert_equal(4, @stv.term_frequencies[@stv.index_of("Apples")])
59
+ assert_equal(4, @stv.freqs[@stv.index_of("Apples")])
60
60
  end
61
61
 
62
62
  def test_indexes_of()
@@ -33,12 +33,12 @@ class TermVectorsIOTest < Test::Unit::TestCase
33
33
 
34
34
  assert_equal(2, tv.size)
35
35
  assert_equal("text1", tv.terms[0])
36
- assert_equal(1, tv.term_frequencies[0])
36
+ assert_equal(1, tv.freqs[0])
37
37
  assert_equal(1, tv.positions[0][0])
38
38
  assert_equal(t(0,4), tv.offsets[0][0])
39
39
 
40
40
  assert_equal("text2", tv.terms[1])
41
- assert_equal(2, tv.term_frequencies[1])
41
+ assert_equal(2, tv.freqs[1])
42
42
  assert_equal(3, tv.positions[1][0])
43
43
  assert_equal(t(5,10), tv.offsets[1][0])
44
44
  assert_equal(4, tv.positions[1][1])
@@ -77,7 +77,7 @@ class TermVectorsIOTest < Test::Unit::TestCase
77
77
 
78
78
  assert_equal(2, tv.size)
79
79
  assert_equal("word1", tv.terms[0])
80
- assert_equal(3, tv.term_frequencies[0])
80
+ assert_equal(3, tv.freqs[0])
81
81
  assert_equal(1, tv.positions[0][0])
82
82
  assert_equal(5, tv.positions[0][1])
83
83
  assert_equal(8, tv.positions[0][2])
@@ -86,7 +86,7 @@ class TermVectorsIOTest < Test::Unit::TestCase
86
86
  assert_equal(t(45,50), tv.offsets[0][2])
87
87
 
88
88
  assert_equal("word2", tv.terms[1])
89
- assert_equal(2, tv.term_frequencies[1])
89
+ assert_equal(2, tv.freqs[1])
90
90
  assert_equal(2, tv.positions[1][0])
91
91
  assert_equal(9, tv.positions[1][1])
92
92
  assert_equal(t(6,11), tv.offsets[1][0])
@@ -0,0 +1,138 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class QueryParserTest < Test::Unit::TestCase
4
+
5
+ def test_strings()
6
+ parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "field", "f1", "f2"])
7
+ pairs = [
8
+ ['', ''],
9
+ ['word', 'word'],
10
+ ['field:word', 'field:word'],
11
+ ['"word1 word2 word3"', '"word word word"'],
12
+ ['"word1 2342 word3"', '"word word"'],
13
+ ['field:"one two three"', 'field:"one two three"'],
14
+ ['field:"one 222 three"', 'field:"one three"'],
15
+ ['field:"one <> three"', 'field:"one <> three"'],
16
+ ['field:"one <> three <>"', 'field:"one <> three"'],
17
+ ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
18
+ ['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
19
+ ['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
20
+ ['field:"testing|trucks"', 'field:testing field:trucks'],
21
+ ['[aaa bbb]', '[aaa bbb]'],
22
+ ['{aaa bbb]', '{aaa bbb]'],
23
+ ['field:[aaa bbb}', 'field:[aaa bbb}'],
24
+ ['{aaa bbb}', '{aaa bbb}'],
25
+ ['{aaa>', '{aaa>'],
26
+ ['[aaa>', '[aaa>'],
27
+ ['field:<aaa}', 'field:<aaa}'],
28
+ ['<aaa]', '<aaa]'],
29
+ ['>aaa', '{aaa>'],
30
+ ['>=aaa', '[aaa>'],
31
+ ['<aaa', '<aaa}'],
32
+ ['field:<=aaa', 'field:<aaa]'],
33
+ ['REQ one REQ two', '+one +two'],
34
+ ['REQ one two', '+one two'],
35
+ ['one REQ two', 'one +two'],
36
+ ['+one +two', '+one +two'],
37
+ ['+one two', '+one two'],
38
+ ['one +two', 'one +two'],
39
+ ['-one -two', '-one -two'],
40
+ ['-one two', '-one two'],
41
+ ['one -two', 'one -two'],
42
+ ['!one !two', '-one -two'],
43
+ ['!one two', '-one two'],
44
+ ['one !two', 'one -two'],
45
+ ['NOT one NOT two', '-one -two'],
46
+ ['NOT one two', '-one two'],
47
+ ['one NOT two', 'one -two'],
48
+ ['one two', 'one two'],
49
+ ['one OR two', 'one two'],
50
+ ['one AND two', '+one +two'],
51
+ ['one two AND three', 'one two +three'],
52
+ ['one two OR three', 'one two three'],
53
+ ['one (two AND three)', 'one (+two +three)'],
54
+ ['one AND (two OR three)', '+one +(two three)'],
55
+ ['field:(one AND (two OR three))', '+field:one +(field:two field:three)'],
56
+ ['one AND (two OR [aaa vvv})', '+one +(two [aaa vvv})'],
57
+ ['one AND (f1:two OR f2:three) AND four', '+one +(f1:two f2:three) +four'],
58
+ ['one^1.23', 'one^1.23'],
59
+ ['(one AND two)^100.23', '(+one +two)^100.23'],
60
+ ['field:(one AND two)^100.23', '(+field:one +field:two)^100.23'],
61
+ ['field:(one AND [aaa bbb]^23.3)^100.23', '(+field:one +field:[aaa bbb]^23.3)^100.23'],
62
+ ['(REQ field:"one two three")^23', 'field:"one two three"^23.0'],
63
+ ['asdf~0.2', 'asdf~0.2'],
64
+ ['field:asdf~0.2', 'field:asdf~0.2'],
65
+ ['asdf~0.2^100.0', 'asdf~0.2^100.0'],
66
+ ['field:asdf~0.2^0.1', 'field:asdf~0.2^0.1'],
67
+ ['field:"asdf <> asdf|asdf"~4', 'field:"asdf <> asdf|asdf"~4'],
68
+ ['"one two three four five"~5', '"one two three four five"~5'],
69
+ ['ab?de', 'ab?de'],
70
+ ['ab*de', 'ab*de'],
71
+ ['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'],
72
+ ['field:a* AND field:(b*)', '+field:a* +field:b*'],
73
+ ['field:abc~ AND field:(b*)', '+field:abc~ +field:b*'],
74
+ ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
75
+
76
+ ['*:xxx', 'xxx field:xxx f1:xxx f2:xxx'],
77
+ ['f1|f2:xxx', 'f1:xxx f2:xxx'],
78
+
79
+ ['*:asd~0.2', 'asd~0.2 field:asd~0.2 f1:asd~0.2 f2:asd~0.2'],
80
+ ['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'],
81
+
82
+ ['*:a?d*^20.0', '(a?d* field:a?d* f1:a?d* f2:a?d*)^20.0'],
83
+ ['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'],
84
+
85
+ ['*:"asdf <> xxx|yyy"', '"asdf <> xxx|yyy" field:"asdf <> xxx|yyy" f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
86
+ ['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
87
+
88
+ ['*:[bbb xxx]', '[bbb xxx] field:[bbb xxx] f1:[bbb xxx] f2:[bbb xxx]'],
89
+ ['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'],
90
+
91
+ ['*:(xxx AND bbb)', '+(xxx field:xxx f1:xxx f2:xxx) +(bbb field:bbb f1:bbb f2:bbb)'],
92
+ ['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'],
93
+ ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
94
+ ['"onewordphrase"', 'onewordphrase']
95
+ ]
96
+
97
+ pairs.each do |query_str, expected|
98
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
99
+ end
100
+ end
101
+
102
+ def test_qp_with_standard_analyzer()
103
+ parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
104
+ :analyzer => Ferret::Analysis::StandardAnalyzer.new)
105
+ pairs = [
106
+ ['key:1234', 'key:1234'],
107
+ ['key:(1234)', 'key:1234']
108
+ ]
109
+
110
+ pairs.each do |query_str, expected|
111
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
112
+ end
113
+ end
114
+
115
+ def do_test_query_parse_exception_raised(str)
116
+ parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
117
+ assert_raise(Ferret::QueryParser::QueryParseException) do
118
+ parser.parse(str)
119
+ end
120
+ end
121
+
122
+
123
+ def test_bad_queries
124
+ parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2"],
125
+ :handle_parse_errors => true)
126
+
127
+ pairs = [
128
+ ['::*word', 'word'],
129
+ ['()*&)(*^&*(', ''],
130
+ ['()*&one)(*two(*&"', '"one two"']
131
+ ]
132
+
133
+ pairs.each do |query_str, expected|
134
+ do_test_query_parse_exception_raised(query_str)
135
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
136
+ end
137
+ end
138
+ end