ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
@@ -25,70 +25,4 @@ class FieldTest < Test::Unit::TestCase
25
25
  assert_equal("WITH_OFFSETS", Field::TermVector::WITH_OFFSETS.to_s)
26
26
  assert_equal("WITH_POSITIONS_OFFSETS", Field::TermVector::WITH_POSITIONS_OFFSETS.to_s)
27
27
  end
28
-
29
- def test_standard_field()
30
- f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
31
- assert_equal("name", f.name)
32
- assert_equal("value", f.data)
33
- assert_equal(true, f.stored?)
34
- assert_equal(true, f.compressed?)
35
- assert_equal(true, f.indexed?)
36
- assert_equal(true, f.tokenized?)
37
- assert_equal(false, f.store_term_vector?)
38
- assert_equal(false, f.store_offsets?)
39
- assert_equal(false, f.store_positions?)
40
- assert_equal(false, f.omit_norms?)
41
- assert_equal(false, f.binary?)
42
- assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
43
- end
44
-
45
- def test_set_store()
46
- f = Field.new("name", nil, Field::Store::COMPRESS, Field::Index::TOKENIZED)
47
- f.stored = Field::Store::NO
48
- assert_equal(false, f.stored?)
49
- assert_equal(false, f.compressed?)
50
- assert_equal("indexed,tokenized,<name:>", f.to_s)
51
- end
52
-
53
- def test_set_index()
54
- f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
55
- f.index = Field::Index::NO
56
- assert_equal(false, f.indexed?)
57
- assert_equal(false, f.tokenized?)
58
- assert_equal(false, f.omit_norms?)
59
- assert_equal("stored/compressed,<name:value>", f.to_s)
60
- f.index = Field::Index::NO_NORMS
61
- assert_equal(true, f.indexed?)
62
- assert_equal(false, f.tokenized?)
63
- assert_equal(true, f.omit_norms?)
64
- assert_equal("stored/compressed,indexed,omit_norms,<name:value>", f.to_s)
65
- end
66
-
67
- def test_set_term_vector()
68
- f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
69
- f.store_term_vector = Field::TermVector::WITH_POSITIONS_OFFSETS
70
- assert_equal(true, f.store_term_vector?)
71
- assert_equal(true, f.store_offsets?)
72
- assert_equal(true, f.store_positions?)
73
- assert_equal("stored/compressed,indexed,tokenized,store_term_vector,tv_offset,tv_position,<name:value>", f.to_s)
74
- end
75
-
76
- def test_new_binary_field()
77
- tmp = []
78
- 256.times {|i| tmp[i] = i}
79
- bin = tmp.pack("c*")
80
- f = Field.new_binary_field("name", bin, Field::Store::YES)
81
- assert_equal("name", f.name)
82
- assert_equal(bin, f.data)
83
- assert_equal(true, f.stored?)
84
- assert_equal(false, f.compressed?)
85
- assert_equal(false, f.indexed?)
86
- assert_equal(false, f.tokenized?)
87
- assert_equal(false, f.store_term_vector?)
88
- assert_equal(false, f.store_offsets?)
89
- assert_equal(false, f.store_positions?)
90
- assert_equal(false, f.omit_norms?)
91
- assert_equal(true, f.binary?)
92
- assert_equal("stored/uncompressed,binary,<name:#{bin}>", f.to_s)
93
- end
94
28
  end
@@ -16,7 +16,10 @@ class IndexTest < Test::Unit::TestCase
16
16
 
17
17
  def check_results(index, query, expected)
18
18
  cnt = 0
19
+ #puts "#{query} - #{expected.inspect}"
20
+ #puts index.size
19
21
  index.search_each(query) do |doc, score|
22
+ #puts "doc-#{doc} score=#{score}"
20
23
  assert(expected.index(doc))
21
24
  cnt += 1
22
25
  end
@@ -136,7 +139,7 @@ class IndexTest < Test::Unit::TestCase
136
139
  do_test_index_with_hash(index)
137
140
  index.close
138
141
 
139
- index = Index.new(:default_field => "def_field")
142
+ index = Index.new(:default_field => "def_field", :id_field => "id")
140
143
  do_test_index_with_doc_array(index)
141
144
  index.close
142
145
  end
@@ -144,7 +147,11 @@ class IndexTest < Test::Unit::TestCase
144
147
  def test_fs_index
145
148
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
146
149
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
147
- assert_raise(Errno::ENOENT) {Index.new(:path => fs_path, :create_if_missing => false, :default_field => "def_field")}
150
+ assert_raise(StandardError) do
151
+ Index.new(:path => fs_path,
152
+ :create_if_missing => false,
153
+ :default_field => "def_field")
154
+ end
148
155
  index = Index.new(:path => fs_path, :default_field => "def_field")
149
156
  do_test_index_with_array(index)
150
157
  index.close
@@ -155,7 +162,9 @@ class IndexTest < Test::Unit::TestCase
155
162
  index.close
156
163
 
157
164
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
158
- index = Index.new(:path => fs_path, :default_field => "def_field")
165
+ index = Index.new(:path => fs_path,
166
+ :default_field => "def_field",
167
+ :id_field => "id")
159
168
  do_test_index_with_doc_array(index)
160
169
  index.close
161
170
  end
@@ -317,12 +326,16 @@ class IndexTest < Test::Unit::TestCase
317
326
  index2 << "document 2"
318
327
  assert_equal(2, index2.size)
319
328
  assert_equal(2, index.size)
329
+ top_docs = index.search("content3")
330
+ assert_equal(0, top_docs.size)
320
331
 
321
332
  iw = IndexWriter.new(fs_path, :analyzer => WhiteSpaceAnalyzer.new())
322
333
  doc = Document.new
323
334
  doc << Field.new("f", "content3", Field::Store::YES, Field::Index::TOKENIZED)
324
335
  iw << doc
325
336
  iw.close()
337
+ top_docs = index.search("content3")
338
+ assert_equal(1, top_docs.size)
326
339
  assert_equal(3, index.size)
327
340
  assert_equal("content3", index[2]["f"])
328
341
  index.close
@@ -373,7 +386,8 @@ class IndexTest < Test::Unit::TestCase
373
386
  {:id => 9, :cat => "/cat2/subcat5", :content => "content9"},
374
387
  ]
375
388
  index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
376
- :default_field => :content)
389
+ :default_field => :content,
390
+ :id_field => :id)
377
391
  data.each { |doc| index << doc }
378
392
  assert_equal(10, index.size)
379
393
  assert_equal("content5", index["5"][:content])
@@ -449,6 +463,39 @@ class IndexTest < Test::Unit::TestCase
449
463
  index.close
450
464
  end
451
465
 
466
+ def test_index_multi_key_untokenized
467
+ data = [
468
+ {:id => 0, :table => "Product", :product => "tent"},
469
+ {:id => 0, :table => "location", :location => "first floor"},
470
+ {:id => 0, :table => "Product", :product => "super tent"},
471
+ {:id => 0, :table => "location", :location => "second floor"},
472
+ {:id => 1, :table => "Product", :product => "backback"},
473
+ {:id => 1, :table => "location", :location => "second floor"},
474
+ {:id => 1, :table => "location", :location => "first floor"},
475
+ {:id => 1, :table => "Product", :product => "rucksack"},
476
+ {:id => 1, :table => "Product", :product => "backpack"}
477
+ ]
478
+ index = Index.new(:analyzer => Analyzer.new,
479
+ :key => ["id", "table"])
480
+ data.each do |dat|
481
+ doc = Document.new
482
+ dat.each_pair do |key, value|
483
+ if ([:id, :table].include?(key))
484
+ doc << Field.new(key, value, Field::Store::YES, Field::Index::UNTOKENIZED)
485
+ else
486
+ doc << Field.new(key, value, Field::Store::YES, Field::Index::TOKENIZED)
487
+ end
488
+ end
489
+ index << doc
490
+ end
491
+ assert_equal(4, index.size)
492
+ assert_equal("super tent", index[0][:product])
493
+ assert_equal("second floor", index[1][:location])
494
+ assert_equal("backpack", index[3][:product])
495
+ assert_equal("first floor", index[2][:location])
496
+ index.close
497
+ end
498
+
452
499
  def test_sortby_date
453
500
  data = [
454
501
  {:content => "one", :date => "20051023"},
@@ -459,7 +506,7 @@ class IndexTest < Test::Unit::TestCase
459
506
  {:content => "three", :date => "19790531"},
460
507
  {:content => "one", :date => "19770725"},
461
508
  {:content => "two", :date => "19751226"},
462
- {:content => "three", :date => "19390912"}
509
+ {:content => "four", :date => "19390912"}
463
510
  ]
464
511
  index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
465
512
  data.each { |doc|
@@ -481,8 +528,17 @@ class IndexTest < Test::Unit::TestCase
481
528
  assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
482
529
  assert_equal("three four", index[top_docs.score_docs[0].doc][:content])
483
530
  assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
484
- assert_equal("three", index[top_docs.score_docs[1].doc][:content])
531
+ assert_equal("four", index[top_docs.score_docs[1].doc][:content])
485
532
  assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
533
+
534
+ top_docs = index.search("one two three four",
535
+ :sort => [:date, :content])
536
+ assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
537
+ assert_equal("four", index[top_docs.score_docs[0].doc][:content])
538
+ assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
539
+ assert_equal("three four", index[top_docs.score_docs[1].doc][:content])
540
+ assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
541
+
486
542
  index.close
487
543
  end
488
544
 
@@ -15,6 +15,47 @@ module IndexReaderCommon
15
15
 
16
16
  do_test_get_doc()
17
17
 
18
+ do_test_term_enum()
19
+ end
20
+
21
+ def do_test_term_enum()
22
+ te = @ir.terms
23
+
24
+ assert(te.next?)
25
+ assert_equal(Term.new("author", "Leo"), te.term)
26
+ assert_equal(1, te.doc_freq)
27
+ assert(te.next?)
28
+ assert_equal(Term.new("author", "Tolstoy"), te.term)
29
+ assert_equal(1, te.doc_freq)
30
+ assert(te.next?)
31
+ assert_equal(Term.new("body", "And"), te.term)
32
+ assert_equal(1, te.doc_freq)
33
+
34
+
35
+ assert(te.skip_to(Term.new("body", "Not")))
36
+ assert_equal(Term.new("body", "Not"), te.term)
37
+ assert_equal(1, te.doc_freq)
38
+ assert(te.next?)
39
+ assert_equal(Term.new("body", "Random"), te.term)
40
+ assert_equal(16, te.doc_freq)
41
+
42
+ assert(te.skip_to(Term.new("text", "which")))
43
+ assert(Term.new("text", "which"), te.term)
44
+ assert_equal(1, te.doc_freq)
45
+ assert(te.next?)
46
+ assert_equal(Term.new("title", "War And Peace"), te.term)
47
+ assert_equal(1, te.doc_freq)
48
+ assert(!te.next?)
49
+
50
+ te.close
51
+
52
+ te = @ir.terms_from(Term.new("body", "Not"))
53
+ assert_equal(Term.new("body", "Not"), te.term)
54
+ assert_equal(1, te.doc_freq)
55
+ assert(te.next?)
56
+ assert_equal(Term.new("body", "Random"), te.term)
57
+ assert_equal(16, te.doc_freq)
58
+ te.close
18
59
  end
19
60
 
20
61
  def do_test_term_doc_enum()
@@ -155,7 +196,7 @@ module IndexReaderCommon
155
196
 
156
197
  assert_equal("body", tv.field)
157
198
  assert_equal(["word1", "word2", "word3", "word4"], tv.terms)
158
- assert_equal([3, 1, 4, 2], tv.term_frequencies)
199
+ assert_equal([3, 1, 4, 2], tv.freqs)
159
200
  assert_equal([[2, 4, 7], [3], [0, 5, 8, 9], [1,6]], tv.positions)
160
201
  assert_equal([[t(12,17), t(24,29), t(42,47)],
161
202
  [t(18,23)],
@@ -489,15 +530,15 @@ class IndexReaderTest < Test::Unit::TestCase
489
530
  doc << Field.new("title", "this is the title DocField", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
490
531
  doc << Field.new("author", "this is the author field", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
491
532
 
492
- fis = FieldInfos.new()
493
- fis << doc
494
- assert_equal(4, fis.size)
533
+ #fis = FieldInfos.new()
534
+ #fis << doc
535
+ #assert_equal(4, fis.size)
495
536
 
496
- fi = fis["tag"]
497
- assert_equal(true, fi.indexed?)
498
- assert_equal(true, fi.store_term_vector?)
499
- assert_equal(true, fi.store_positions?)
500
- assert_equal(true, fi.store_offsets?)
537
+ #fi = fis["tag"]
538
+ #assert_equal(true, fi.indexed?)
539
+ #assert_equal(true, fi.store_term_vector?)
540
+ #assert_equal(true, fi.store_positions?)
541
+ #assert_equal(true, fi.store_offsets?)
501
542
 
502
543
  iw << doc
503
544
  iw.close()
@@ -549,7 +590,7 @@ class IndexReaderTest < Test::Unit::TestCase
549
590
 
550
591
  assert_equal("body", tv.field)
551
592
  assert_equal(["word1", "word2", "word3", "word4"], tv.terms)
552
- assert_equal([3, 1, 4, 2], tv.term_frequencies)
593
+ assert_equal([3, 1, 4, 2], tv.freqs)
553
594
  assert_equal([[2, 4, 7], [3], [0, 5, 8, 9], [1,6]], tv.positions)
554
595
  assert_equal([[t(12,17), t(24,29), t(42,47)],
555
596
  [t(18,23)],
@@ -30,8 +30,6 @@ class IndexWriterTest < Test::Unit::TestCase
30
30
  def test_add_document
31
31
  iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
32
32
  doc = IndexTestHelper.prepare_document()
33
- infos = FieldInfos.new
34
- infos << doc
35
33
  iw.add_document(doc)
36
34
  assert_equal(1, iw.doc_count)
37
35
  iw.close()
@@ -44,8 +42,6 @@ class IndexWriterTest < Test::Unit::TestCase
44
42
  iw.merge_factor = 3
45
43
  iw.min_merge_docs = 3
46
44
  docs = IndexTestHelper.prepare_book_list()
47
- infos = FieldInfos.new
48
- infos << docs[0]
49
45
  docs.each_with_index do |doc, i|
50
46
  #puts "Index doc " + i.to_s
51
47
  iw.add_document(doc)
@@ -1,6 +1,5 @@
1
1
  require File.dirname(__FILE__) + "/../../test_helper"
2
2
 
3
-
4
3
  class TermTest < Test::Unit::TestCase
5
4
  include Ferret::Index
6
5
  def test_term()
@@ -16,7 +15,6 @@ class TermTest < Test::Unit::TestCase
16
15
  assert(term1 == term4)
17
16
  assert(term1.eql?(term4))
18
17
  term4.set!("field3", "text3")
19
- assert(term1 != term4)
18
+ assert_not_equal(term1, term4)
20
19
  end
21
-
22
20
  end
@@ -5,14 +5,14 @@ class TermVectorOffsetInfoTest < Test::Unit::TestCase
5
5
  include Ferret::Index
6
6
  def test_tvoi()
7
7
  t1 = TermVectorOffsetInfo.new(1, 3)
8
- assert_equal(t1.start_offset, 1)
9
- assert_equal(t1.end_offset, 3)
8
+ assert_equal(t1.start, 1)
9
+ assert_equal(t1.end, 3)
10
10
  t2 = TermVectorOffsetInfo.new(1, 3)
11
11
  assert(t1 == t2)
12
- t2.start_offset = 2
12
+ t2.start = 2
13
13
  assert(t1 != t2)
14
- t2.start_offset = 1
15
- t2.end_offset = 1
14
+ t2.start = 1
15
+ t2.end = 1
16
16
  assert(t1 != t2)
17
17
  end
18
18
  end
@@ -17,7 +17,7 @@ class SegmentTermVectorTest < Test::Unit::TestCase
17
17
 
18
18
  def test_index_of()
19
19
  assert_equal(0, @stv.index_of("Apples"))
20
- assert_equal(4, @stv.term_frequencies[@stv.index_of("Apples")])
20
+ assert_equal(4, @stv.freqs[@stv.index_of("Apples")])
21
21
  end
22
22
 
23
23
  def test_indexes_of()
@@ -56,7 +56,7 @@ class SegmentTermVectorWithPosOffsetsTest < Test::Unit::TestCase
56
56
 
57
57
  def test_index_of()
58
58
  assert_equal(0, @stv.index_of("Apples"))
59
- assert_equal(4, @stv.term_frequencies[@stv.index_of("Apples")])
59
+ assert_equal(4, @stv.freqs[@stv.index_of("Apples")])
60
60
  end
61
61
 
62
62
  def test_indexes_of()
@@ -33,12 +33,12 @@ class TermVectorsIOTest < Test::Unit::TestCase
33
33
 
34
34
  assert_equal(2, tv.size)
35
35
  assert_equal("text1", tv.terms[0])
36
- assert_equal(1, tv.term_frequencies[0])
36
+ assert_equal(1, tv.freqs[0])
37
37
  assert_equal(1, tv.positions[0][0])
38
38
  assert_equal(t(0,4), tv.offsets[0][0])
39
39
 
40
40
  assert_equal("text2", tv.terms[1])
41
- assert_equal(2, tv.term_frequencies[1])
41
+ assert_equal(2, tv.freqs[1])
42
42
  assert_equal(3, tv.positions[1][0])
43
43
  assert_equal(t(5,10), tv.offsets[1][0])
44
44
  assert_equal(4, tv.positions[1][1])
@@ -77,7 +77,7 @@ class TermVectorsIOTest < Test::Unit::TestCase
77
77
 
78
78
  assert_equal(2, tv.size)
79
79
  assert_equal("word1", tv.terms[0])
80
- assert_equal(3, tv.term_frequencies[0])
80
+ assert_equal(3, tv.freqs[0])
81
81
  assert_equal(1, tv.positions[0][0])
82
82
  assert_equal(5, tv.positions[0][1])
83
83
  assert_equal(8, tv.positions[0][2])
@@ -86,7 +86,7 @@ class TermVectorsIOTest < Test::Unit::TestCase
86
86
  assert_equal(t(45,50), tv.offsets[0][2])
87
87
 
88
88
  assert_equal("word2", tv.terms[1])
89
- assert_equal(2, tv.term_frequencies[1])
89
+ assert_equal(2, tv.freqs[1])
90
90
  assert_equal(2, tv.positions[1][0])
91
91
  assert_equal(9, tv.positions[1][1])
92
92
  assert_equal(t(6,11), tv.offsets[1][0])
@@ -0,0 +1,138 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class QueryParserTest < Test::Unit::TestCase
4
+
5
+ def test_strings()
6
+ parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "field", "f1", "f2"])
7
+ pairs = [
8
+ ['', ''],
9
+ ['word', 'word'],
10
+ ['field:word', 'field:word'],
11
+ ['"word1 word2 word3"', '"word word word"'],
12
+ ['"word1 2342 word3"', '"word word"'],
13
+ ['field:"one two three"', 'field:"one two three"'],
14
+ ['field:"one 222 three"', 'field:"one three"'],
15
+ ['field:"one <> three"', 'field:"one <> three"'],
16
+ ['field:"one <> three <>"', 'field:"one <> three"'],
17
+ ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
18
+ ['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
19
+ ['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
20
+ ['field:"testing|trucks"', 'field:testing field:trucks'],
21
+ ['[aaa bbb]', '[aaa bbb]'],
22
+ ['{aaa bbb]', '{aaa bbb]'],
23
+ ['field:[aaa bbb}', 'field:[aaa bbb}'],
24
+ ['{aaa bbb}', '{aaa bbb}'],
25
+ ['{aaa>', '{aaa>'],
26
+ ['[aaa>', '[aaa>'],
27
+ ['field:<aaa}', 'field:<aaa}'],
28
+ ['<aaa]', '<aaa]'],
29
+ ['>aaa', '{aaa>'],
30
+ ['>=aaa', '[aaa>'],
31
+ ['<aaa', '<aaa}'],
32
+ ['field:<=aaa', 'field:<aaa]'],
33
+ ['REQ one REQ two', '+one +two'],
34
+ ['REQ one two', '+one two'],
35
+ ['one REQ two', 'one +two'],
36
+ ['+one +two', '+one +two'],
37
+ ['+one two', '+one two'],
38
+ ['one +two', 'one +two'],
39
+ ['-one -two', '-one -two'],
40
+ ['-one two', '-one two'],
41
+ ['one -two', 'one -two'],
42
+ ['!one !two', '-one -two'],
43
+ ['!one two', '-one two'],
44
+ ['one !two', 'one -two'],
45
+ ['NOT one NOT two', '-one -two'],
46
+ ['NOT one two', '-one two'],
47
+ ['one NOT two', 'one -two'],
48
+ ['one two', 'one two'],
49
+ ['one OR two', 'one two'],
50
+ ['one AND two', '+one +two'],
51
+ ['one two AND three', 'one two +three'],
52
+ ['one two OR three', 'one two three'],
53
+ ['one (two AND three)', 'one (+two +three)'],
54
+ ['one AND (two OR three)', '+one +(two three)'],
55
+ ['field:(one AND (two OR three))', '+field:one +(field:two field:three)'],
56
+ ['one AND (two OR [aaa vvv})', '+one +(two [aaa vvv})'],
57
+ ['one AND (f1:two OR f2:three) AND four', '+one +(f1:two f2:three) +four'],
58
+ ['one^1.23', 'one^1.23'],
59
+ ['(one AND two)^100.23', '(+one +two)^100.23'],
60
+ ['field:(one AND two)^100.23', '(+field:one +field:two)^100.23'],
61
+ ['field:(one AND [aaa bbb]^23.3)^100.23', '(+field:one +field:[aaa bbb]^23.3)^100.23'],
62
+ ['(REQ field:"one two three")^23', 'field:"one two three"^23.0'],
63
+ ['asdf~0.2', 'asdf~0.2'],
64
+ ['field:asdf~0.2', 'field:asdf~0.2'],
65
+ ['asdf~0.2^100.0', 'asdf~0.2^100.0'],
66
+ ['field:asdf~0.2^0.1', 'field:asdf~0.2^0.1'],
67
+ ['field:"asdf <> asdf|asdf"~4', 'field:"asdf <> asdf|asdf"~4'],
68
+ ['"one two three four five"~5', '"one two three four five"~5'],
69
+ ['ab?de', 'ab?de'],
70
+ ['ab*de', 'ab*de'],
71
+ ['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'],
72
+ ['field:a* AND field:(b*)', '+field:a* +field:b*'],
73
+ ['field:abc~ AND field:(b*)', '+field:abc~ +field:b*'],
74
+ ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
75
+
76
+ ['*:xxx', 'xxx field:xxx f1:xxx f2:xxx'],
77
+ ['f1|f2:xxx', 'f1:xxx f2:xxx'],
78
+
79
+ ['*:asd~0.2', 'asd~0.2 field:asd~0.2 f1:asd~0.2 f2:asd~0.2'],
80
+ ['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'],
81
+
82
+ ['*:a?d*^20.0', '(a?d* field:a?d* f1:a?d* f2:a?d*)^20.0'],
83
+ ['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'],
84
+
85
+ ['*:"asdf <> xxx|yyy"', '"asdf <> xxx|yyy" field:"asdf <> xxx|yyy" f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
86
+ ['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
87
+
88
+ ['*:[bbb xxx]', '[bbb xxx] field:[bbb xxx] f1:[bbb xxx] f2:[bbb xxx]'],
89
+ ['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'],
90
+
91
+ ['*:(xxx AND bbb)', '+(xxx field:xxx f1:xxx f2:xxx) +(bbb field:bbb f1:bbb f2:bbb)'],
92
+ ['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'],
93
+ ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
94
+ ['"onewordphrase"', 'onewordphrase']
95
+ ]
96
+
97
+ pairs.each do |query_str, expected|
98
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
99
+ end
100
+ end
101
+
102
+ def test_qp_with_standard_analyzer()
103
+ parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
104
+ :analyzer => Ferret::Analysis::StandardAnalyzer.new)
105
+ pairs = [
106
+ ['key:1234', 'key:1234'],
107
+ ['key:(1234)', 'key:1234']
108
+ ]
109
+
110
+ pairs.each do |query_str, expected|
111
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
112
+ end
113
+ end
114
+
115
+ def do_test_query_parse_exception_raised(str)
116
+ parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
117
+ assert_raise(Ferret::QueryParser::QueryParseException) do
118
+ parser.parse(str)
119
+ end
120
+ end
121
+
122
+
123
+ def test_bad_queries
124
+ parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2"],
125
+ :handle_parse_errors => true)
126
+
127
+ pairs = [
128
+ ['::*word', 'word'],
129
+ ['()*&)(*^&*(', ''],
130
+ ['()*&one)(*two(*&"', '"one two"']
131
+ ]
132
+
133
+ pairs.each do |query_str, expected|
134
+ do_test_query_parse_exception_raised(query_str)
135
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
136
+ end
137
+ end
138
+ end