ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
@@ -123,16 +123,17 @@ module Ferret::Search
123
123
  query_expl = Explanation.new()
124
124
  query_expl.description = "query_weight(#{@query}), product of:"
125
125
 
126
- boost_expl = Explanation.new(@query.boost(), "boost")
127
- (query_expl << boost_expl) if (@query.boost() != 1.0)
128
-
126
+ boost = @query.boost()
127
+ if boost != 1.0
128
+ boost_expl = Explanation.new(boost, "boost")
129
+ query_expl << boost_expl
130
+ end
129
131
  query_expl << idf_expl
130
132
 
131
133
  query_norm_expl = Explanation.new(@query_norm,"query_norm")
132
134
  query_expl << query_norm_expl
133
135
 
134
- query_expl.value =
135
- boost_expl.value * idf_expl.value * query_norm_expl.value
136
+ query_expl.value = boost * @idf * @query_norm
136
137
 
137
138
  result << query_expl
138
139
 
@@ -127,7 +127,7 @@ module Ferret::Search
127
127
  query_norm_expl = Explanation.new(@query_norm, "query_norm")
128
128
  query_expl << query_norm_expl
129
129
 
130
- query_expl.value = boost * @idf * query_norm_expl.value
130
+ query_expl.value = boost * @idf * @query_norm
131
131
 
132
132
  result << query_expl
133
133
 
@@ -150,15 +150,12 @@ module Ferret::Search
150
150
  field_expl << field_norm_expl
151
151
 
152
152
  field_expl.value = tf_expl.value * @idf * field_norm
153
-
154
153
  result << field_expl
155
154
 
156
- # combine them
157
- result.value = query_expl.value * field_expl.value
158
-
159
- if query_expl.value == 1.0
155
+ if (query_expl.value == 1.0)
160
156
  return field_expl
161
157
  else
158
+ result.value = query_expl.value * field_expl.value
162
159
  return result
163
160
  end
164
161
  end
@@ -23,10 +23,10 @@ module Ferret::Search
23
23
  term.text[0,prefix_length] != prefix_text)
24
24
  break
25
25
  end
26
- tq = TermQuery.new(term) # found a match
27
- tq.boost = boost() # set the boost
28
- bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
29
- #puts("added " + term)
26
+ tq = TermQuery.new(term) # found a match
27
+ tq.boost = boost() # set the boost
28
+ bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
29
+ #puts("added " + term)
30
30
  end while (enumerator.next?)
31
31
  ensure
32
32
  enumerator.close()
@@ -87,8 +87,10 @@ module Ferret::Search
87
87
  SortField.new(field, {:sort_type => SortField::SortType::AUTO,
88
88
  :reverse => reverse})
89
89
  end
90
- @fields << SortField::FIELD_DOC if @fields.size == 1
91
90
  end
91
+ doc_sort_added = false
92
+ @fields.each {|f| doc_sort_added = true if f == SortField::FIELD_DOC }
93
+ @fields << SortField::FIELD_DOC if not doc_sort_added
92
94
  end
93
95
 
94
96
  # Represents sorting by computed relevance. Using this sort criteria returns
@@ -56,16 +56,16 @@ module Ferret::Search
56
56
  # name:: Name of field to sort by. Can be +nil+ if +sort_type+ is SCORE or
57
57
  # DOC.
58
58
  #
59
- # A hash with the followind values can also be supplied;
60
- # sort_type:: Type of values in the terms.
61
- # reverse:: True if natural order should be reversed.
62
- # comparator:: a proc used to compare two values from the index. You can
63
- # also give this value to the SortType object that you pass.
64
- def initialize(name = nil, args= {})
59
+ # An options hash with the followind values can also be supplied;
60
+ # sort_type:: Type of values in the terms.
61
+ # reverse:: True if natural order should be reversed.
62
+ # comparator:: A proc used to compare two values from the index. You can
63
+ # also give this value to the SortType object that you pass.
64
+ def initialize(name = nil, options= {})
65
65
  @name = name.to_s if name
66
- @sort_type = args[:sort_type]||SortType::AUTO
67
- @reverse = args[:reverse]||false
68
- @comparator = args[:comparator]||@sort_type.comparator
66
+ @sort_type = options[:sort_type]||SortType::AUTO
67
+ @reverse = options[:reverse]||false
68
+ @comparator = options[:comparator]||@sort_type.comparator
69
69
  if (@name == nil and @sort_type != SortType::DOC and
70
70
  @sort_type != SortType::SCORE)
71
71
  raise ArgumentError, "You must supply a field name for your sort field"
@@ -62,7 +62,7 @@ module Ferret::Search::Spans
62
62
  @length = finish() - start() # compute new length
63
63
  @parent.total_length += @length # add new length to total
64
64
 
65
- if (@parent.max == nil or doc() > @parent.max.doc() or # maintain max
65
+ if (@parent.max.nil? or doc() > @parent.max.doc() or # maintain max
66
66
  (doc() == @parent.max.doc and finish() > @parent.max.finish))
67
67
  @parent.max = self
68
68
  end
@@ -49,7 +49,7 @@ module Ferret::Search::Spans
49
49
  def to_s(field = nil)
50
50
  buffer = "span_near(["
51
51
  buffer << @clauses.map {|c| c.to_s(field)}.join(", ")
52
- buffer << "], #{@stop}, #{@in_order})"
52
+ buffer << "], #{@slop}, #{@in_order})"
53
53
  return buffer
54
54
  end
55
55
 
@@ -6,7 +6,7 @@ module Ferret::Search::Spans
6
6
  @query = query
7
7
  @terms = query.terms()
8
8
 
9
- @idf = @query.similarity(searcher).idf_phrase(@terms, searcher)
9
+ @idf = @similarity.idf_phrase(@terms, searcher)
10
10
  end
11
11
 
12
12
  attr_reader :query, :value
@@ -2,7 +2,7 @@ module Ferret::Search::Spans
2
2
  # Expert: an enumeration of span matches. Used to implement span searching.
3
3
  # Each span represents a range of term positions within a document. Matches
4
4
  # are enumerated in order, by increasing document number, within that by
5
- # increasing start position and ensure by increasing finish position.
5
+ # increasing start position and finally by increasing finish position.
6
6
  class SpansEnum
7
7
  # Move to the next match, returning true iff any such exists.
8
8
  def next?()
@@ -13,12 +13,12 @@ module Ferret::Search::Spans
13
13
  # greater than or equal to _target_. Returns true iff there is such a
14
14
  # match. Behaves as if written:
15
15
  #
16
- # def skip_to(target)
17
- # begin
18
- # return false if (!next?)
19
- # end while (target > doc)
20
- # return true
21
- # end
16
+ # def skip_to(target)
17
+ # begin
18
+ # return false if (!next?)
19
+ # end while (target > doc)
20
+ # return true
21
+ # end
22
22
  #
23
23
  # Most implementations are considerably more efficient than that.
24
24
  def skip_to(target)
@@ -38,7 +38,7 @@ module Ferret::Store
38
38
  super()
39
39
  if create then FileUtils.mkdir_p(path) end
40
40
  if not File.directory?(path) then
41
- raise "There is no directory: #{path}. Use create = true to create one"
41
+ raise IOError, "There is no directory: #{path}. Use create = true to create one"
42
42
  end
43
43
  @dir = Dir.new(path)
44
44
  # put the lock_dir here as well if no default exists.
@@ -182,7 +182,7 @@ module Ferret::Store
182
182
 
183
183
  # Construct a Lock.
184
184
  def make_lock(name)
185
- FSLock.new(@lock_dir.path + "/" + lock_prefix() + name)
185
+ FSLock.new(@lock_dir.path + "/" + lock_prefix() + name + ".lck")
186
186
  end
187
187
 
188
188
  # Closes the store.
@@ -285,7 +285,11 @@ module Ferret::Store
285
285
  attr_reader :length, :file
286
286
 
287
287
  def initialize(path)
288
- @file = File.open(path, "rb")
288
+ begin
289
+ @file = File.open(path, "rb")
290
+ rescue Errno::ENOENT => e
291
+ raise StandardError.new(e.message)
292
+ end
289
293
  @file.extend(MonitorMixin)
290
294
  #class <<@file
291
295
  # attr_accessor :ref_count
@@ -312,7 +316,7 @@ module Ferret::Store
312
316
  private
313
317
 
314
318
  def read_internal(b, offset, length)
315
- @file.synchronize do
319
+ #@file.synchronize do
316
320
  position = pos()
317
321
  if position != @file.pos
318
322
  @file.seek(position)
@@ -322,7 +326,7 @@ module Ferret::Store
322
326
  raise EOFError, "Read past EOF in #{@file.path}"
323
327
  end
324
328
  b[offset, bytes.length] = bytes
325
- end
329
+ #end
326
330
  end
327
331
 
328
332
  def seek_internal(pos)
@@ -340,7 +344,7 @@ module Ferret::Store
340
344
 
341
345
  # returns the lock prefix for this directory
342
346
  def lock_prefix
343
- LOCK_PREFIX + Digest::MD5.hexdigest(@dir.path)
347
+ LOCK_PREFIX
344
348
  end
345
349
 
346
350
  # Unfortunately, on Windows, Dir does not refresh when rewind is called
@@ -89,7 +89,7 @@ module Ferret::Store
89
89
 
90
90
  # Construct a Lock.
91
91
  def make_lock(name)
92
- RAMLock.new(LOCK_PREFIX + name, self)
92
+ RAMLock.new(LOCK_PREFIX + name + ".lck", self)
93
93
  end
94
94
 
95
95
 
@@ -252,14 +252,14 @@ module Ferret::Store
252
252
  # obtain the lock on the data source
253
253
  def obtain(lock_timeout = 1)
254
254
  MAX_ATTEMPTS.times do
255
- @dir.synchronize do
255
+ #@dir.synchronize do
256
256
  # create a file if none exists. If one already exists
257
257
  # then someone beat us to the lock so return false
258
258
  if (! locked?) then
259
259
  @dir.create_output(@lock_file)
260
260
  return true
261
261
  end
262
- end
262
+ #end
263
263
  # lock was not obtained so sleep for timeout then try again.
264
264
  sleep(lock_timeout)
265
265
  end
data/lib/rferret.rb ADDED
@@ -0,0 +1,36 @@
1
+ #--
2
+ # Copyright (c) 2005 David Balmain
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+ # :include: ../TUTORIAL
24
+ module Ferret
25
+ VERSION = '0.9.0'
26
+ end
27
+
28
+ $ferret_pure_ruby = true
29
+ require 'ferret/utils'
30
+ require 'ferret/document'
31
+ require 'ferret/stemmers'
32
+ require 'ferret/analysis'
33
+ require 'ferret/store'
34
+ require 'ferret/index'
35
+ require 'ferret/search'
36
+ require 'ferret/query_parser'
@@ -7,8 +7,8 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
7
7
  include Ferret::Document
8
8
 
9
9
  INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
10
- ITERATIONS = 100000
11
- NUM_THREADS = 2
10
+ ITERATIONS = 100
11
+ NUM_THREADS = 10
12
12
  ANALYZER = Ferret::Analysis::Analyzer.new()
13
13
 
14
14
  def setup
data/test/test_helper.rb CHANGED
@@ -2,13 +2,27 @@ $:.unshift File.dirname(__FILE__)
2
2
  $:.unshift File.join(File.dirname(__FILE__), '../lib')
3
3
  $:.unshift File.join(File.dirname(__FILE__), '../ext')
4
4
 
5
+ class Float
6
+ def =~(o)
7
+ return (1 - self/o).abs < 0.00001
8
+ end
9
+ end
10
+
5
11
  require 'test/unit'
6
- require 'ferret'
7
12
  require 'unit/index/th_doc'
13
+ if $ferret_pure_ruby
14
+ require 'rferret'
15
+ else
16
+ require 'ferret'
17
+ end
8
18
 
9
19
  def load_test_dir(dir)
10
20
  dir = File.join(File.dirname(__FILE__), dir)
11
21
  Dir.foreach(dir) do |file|
12
- require File.join(dir, file) if file =~ /^t[mcs]_.*\.rb$/
22
+ if $ferret_pure_ruby
23
+ require File.join(dir, file) if file =~ /^t?[mcs]_.*\.rb$/
24
+ else
25
+ require File.join(dir, file) if file =~ /^[mcs]_.*\.rb$/
26
+ end
13
27
  end
14
28
  end
@@ -0,0 +1,25 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class TokenTest < Test::Unit::TestCase
4
+ include Ferret::Analysis
5
+
6
+ def test_token()
7
+ tk1 = Token.new("DBalmain", 1, 8, 5, "token")
8
+ assert_equal(tk1, Token.new("DBalmain", 1, 8))
9
+ assert_not_equal(tk1, Token.new("DBalmain", 0, 8))
10
+ assert_not_equal(tk1, Token.new("DBalmain", 1, 9))
11
+ assert_not_equal(tk1, Token.new("Dbalmain", 1, 8))
12
+ assert(tk1 < Token.new("CBalmain", 2, 7))
13
+ assert(tk1 > Token.new("EBalmain", 0, 9))
14
+ assert(tk1 < Token.new("CBalmain", 1, 9))
15
+ assert(tk1 > Token.new("EBalmain", 1, 7))
16
+ assert(tk1 < Token.new("EBalmain", 1, 8))
17
+ assert(tk1 > Token.new("CBalmain", 1, 8))
18
+ assert_equal("DBalmain", tk1.text)
19
+ tk1.text = "Hello"
20
+ assert_equal("Hello", tk1.text)
21
+ assert_equal(1, tk1.start_offset)
22
+ assert_equal(8, tk1.end_offset)
23
+ assert_equal(5, tk1.pos_inc)
24
+ end
25
+ end
@@ -25,7 +25,7 @@ class PerFieldAnalyzerWrapperTest < Test::Unit::TestCase
25
25
  assert_equal(Token.new('My', 22, 24), t.next())
26
26
  assert_equal(Token.new('e-mail', 25, 31), t.next())
27
27
  assert_equal(Token.new("ADDRESS", 32, 39), t.next())
28
- if ( token = t.next()): puts token.term_text end
28
+ if ( token = t.next()): puts token.text end
29
29
  assert(! t.next())
30
30
  input.reset()
31
31
  t = aw.token_stream("body", input)
@@ -4,7 +4,7 @@ class StandardAnalyzerTest < Test::Unit::TestCase
4
4
  include Ferret::Utils::StringHelper
5
5
  include Ferret::Analysis
6
6
 
7
- def test_lettertokenizer()
7
+ def test_standard_analyzer()
8
8
  input = StringReader.new('D.Ba_l-n@gma-l.com AB&Sons Toys\'r\'us you\'re she\'s, #$%^$%*& job@dot I.B.M. the an AnD THEIR')
9
9
  sa = StandardAnalyzer.new()
10
10
  t = sa.token_stream("field", input)
@@ -0,0 +1,98 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class FieldTest < Test::Unit::TestCase
5
+ include Ferret::Document
6
+ include Ferret::Utils
7
+
8
+ def test_store()
9
+ assert_not_nil(Field::Store::COMPRESS)
10
+ assert_not_nil(Field::Store::YES)
11
+ assert_not_nil(Field::Store::NO)
12
+ end
13
+
14
+ def test_index()
15
+ assert_not_nil(Field::Index::TOKENIZED)
16
+ assert_not_nil(Field::Index::UNTOKENIZED)
17
+ assert_not_nil(Field::Index::NO)
18
+ assert_not_nil(Field::Index::NO_NORMS)
19
+ end
20
+
21
+ def test_term_vector()
22
+ assert_not_nil(Field::TermVector::YES)
23
+ assert_not_nil(Field::TermVector::NO)
24
+ assert_not_nil(Field::TermVector::WITH_POSITIONS)
25
+ assert_not_nil(Field::TermVector::WITH_OFFSETS)
26
+ assert_not_nil(Field::TermVector::WITH_POSITIONS_OFFSETS)
27
+ end
28
+
29
+ def test_standard_field()
30
+ f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
31
+ assert_equal("name", f.name)
32
+ assert_equal("value", f.data)
33
+ assert_equal(true, f.stored?)
34
+ assert_equal(true, f.compressed?)
35
+ assert_equal(true, f.indexed?)
36
+ assert_equal(true, f.tokenized?)
37
+ assert_equal(false, f.store_term_vector?)
38
+ assert_equal(false, f.store_offsets?)
39
+ assert_equal(false, f.store_positions?)
40
+ assert_equal(false, f.omit_norms?)
41
+ assert_equal(false, f.binary?)
42
+ assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
43
+ f.data = "183"
44
+ f.boost = 0.001
45
+ assert_equal("183", f.data)
46
+ assert(0.001 =~ f.boost)
47
+ end
48
+
49
+ def test_set_store()
50
+ f = Field.new("name", "", Field::Store::COMPRESS, Field::Index::TOKENIZED)
51
+ f.store = Field::Store::NO
52
+ assert_equal(false, f.stored?)
53
+ assert_equal(false, f.compressed?)
54
+ assert_equal("indexed,tokenized,<name:>", f.to_s)
55
+ end
56
+
57
+ def test_set_index()
58
+ f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
59
+ f.index = Field::Index::NO
60
+ assert_equal(false, f.indexed?)
61
+ assert_equal(false, f.tokenized?)
62
+ assert_equal(false, f.omit_norms?)
63
+ assert_equal("stored/compressed,<name:value>", f.to_s)
64
+ f.index = Field::Index::NO_NORMS
65
+ assert_equal(true, f.indexed?)
66
+ assert_equal(false, f.tokenized?)
67
+ assert_equal(true, f.omit_norms?)
68
+ assert_equal("stored/compressed,indexed,omit_norms,<name:value>", f.to_s)
69
+ end
70
+
71
+ def test_set_term_vector()
72
+ f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
73
+ f.term_vector = Field::TermVector::WITH_POSITIONS_OFFSETS
74
+ assert_equal(true, f.store_term_vector?)
75
+ assert_equal(true, f.store_offsets?)
76
+ assert_equal(true, f.store_positions?)
77
+ assert_equal("stored/compressed,indexed,tokenized,store_term_vector,store_offsets,store_positions,<name:value>", f.to_s)
78
+ end
79
+
80
+ def test_new_binary_field()
81
+ tmp = []
82
+ 256.times {|i| tmp[i] = i}
83
+ bin = tmp.pack("c*")
84
+ f = Field.new_binary_field("name", bin, Field::Store::YES)
85
+ assert_equal("name", f.name)
86
+ assert_equal(bin, f.data)
87
+ assert_equal(true, f.stored?)
88
+ assert_equal(false, f.compressed?)
89
+ assert_equal(false, f.indexed?)
90
+ assert_equal(false, f.tokenized?)
91
+ assert_equal(false, f.store_term_vector?)
92
+ assert_equal(false, f.store_offsets?)
93
+ assert_equal(false, f.store_positions?)
94
+ assert_equal(false, f.omit_norms?)
95
+ assert_equal(true, f.binary?)
96
+ assert_equal("stored/uncompressed,binary,<name:=bin_data=>", f.to_s)
97
+ end
98
+ end