ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
@@ -123,16 +123,17 @@ module Ferret::Search
123
123
  query_expl = Explanation.new()
124
124
  query_expl.description = "query_weight(#{@query}), product of:"
125
125
 
126
- boost_expl = Explanation.new(@query.boost(), "boost")
127
- (query_expl << boost_expl) if (@query.boost() != 1.0)
128
-
126
+ boost = @query.boost()
127
+ if boost != 1.0
128
+ boost_expl = Explanation.new(boost, "boost")
129
+ query_expl << boost_expl
130
+ end
129
131
  query_expl << idf_expl
130
132
 
131
133
  query_norm_expl = Explanation.new(@query_norm,"query_norm")
132
134
  query_expl << query_norm_expl
133
135
 
134
- query_expl.value =
135
- boost_expl.value * idf_expl.value * query_norm_expl.value
136
+ query_expl.value = boost * @idf * @query_norm
136
137
 
137
138
  result << query_expl
138
139
 
@@ -127,7 +127,7 @@ module Ferret::Search
127
127
  query_norm_expl = Explanation.new(@query_norm, "query_norm")
128
128
  query_expl << query_norm_expl
129
129
 
130
- query_expl.value = boost * @idf * query_norm_expl.value
130
+ query_expl.value = boost * @idf * @query_norm
131
131
 
132
132
  result << query_expl
133
133
 
@@ -150,15 +150,12 @@ module Ferret::Search
150
150
  field_expl << field_norm_expl
151
151
 
152
152
  field_expl.value = tf_expl.value * @idf * field_norm
153
-
154
153
  result << field_expl
155
154
 
156
- # combine them
157
- result.value = query_expl.value * field_expl.value
158
-
159
- if query_expl.value == 1.0
155
+ if (query_expl.value == 1.0)
160
156
  return field_expl
161
157
  else
158
+ result.value = query_expl.value * field_expl.value
162
159
  return result
163
160
  end
164
161
  end
@@ -23,10 +23,10 @@ module Ferret::Search
23
23
  term.text[0,prefix_length] != prefix_text)
24
24
  break
25
25
  end
26
- tq = TermQuery.new(term) # found a match
27
- tq.boost = boost() # set the boost
28
- bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
29
- #puts("added " + term)
26
+ tq = TermQuery.new(term) # found a match
27
+ tq.boost = boost() # set the boost
28
+ bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
29
+ #puts("added " + term)
30
30
  end while (enumerator.next?)
31
31
  ensure
32
32
  enumerator.close()
@@ -87,8 +87,10 @@ module Ferret::Search
87
87
  SortField.new(field, {:sort_type => SortField::SortType::AUTO,
88
88
  :reverse => reverse})
89
89
  end
90
- @fields << SortField::FIELD_DOC if @fields.size == 1
91
90
  end
91
+ doc_sort_added = false
92
+ @fields.each {|f| doc_sort_added = true if f == SortField::FIELD_DOC }
93
+ @fields << SortField::FIELD_DOC if not doc_sort_added
92
94
  end
93
95
 
94
96
  # Represents sorting by computed relevance. Using this sort criteria returns
@@ -56,16 +56,16 @@ module Ferret::Search
56
56
  # name:: Name of field to sort by. Can be +nil+ if +sort_type+ is SCORE or
57
57
  # DOC.
58
58
  #
59
- # A hash with the followind values can also be supplied;
60
- # sort_type:: Type of values in the terms.
61
- # reverse:: True if natural order should be reversed.
62
- # comparator:: a proc used to compare two values from the index. You can
63
- # also give this value to the SortType object that you pass.
64
- def initialize(name = nil, args= {})
59
+ # An options hash with the followind values can also be supplied;
60
+ # sort_type:: Type of values in the terms.
61
+ # reverse:: True if natural order should be reversed.
62
+ # comparator:: A proc used to compare two values from the index. You can
63
+ # also give this value to the SortType object that you pass.
64
+ def initialize(name = nil, options= {})
65
65
  @name = name.to_s if name
66
- @sort_type = args[:sort_type]||SortType::AUTO
67
- @reverse = args[:reverse]||false
68
- @comparator = args[:comparator]||@sort_type.comparator
66
+ @sort_type = options[:sort_type]||SortType::AUTO
67
+ @reverse = options[:reverse]||false
68
+ @comparator = options[:comparator]||@sort_type.comparator
69
69
  if (@name == nil and @sort_type != SortType::DOC and
70
70
  @sort_type != SortType::SCORE)
71
71
  raise ArgumentError, "You must supply a field name for your sort field"
@@ -62,7 +62,7 @@ module Ferret::Search::Spans
62
62
  @length = finish() - start() # compute new length
63
63
  @parent.total_length += @length # add new length to total
64
64
 
65
- if (@parent.max == nil or doc() > @parent.max.doc() or # maintain max
65
+ if (@parent.max.nil? or doc() > @parent.max.doc() or # maintain max
66
66
  (doc() == @parent.max.doc and finish() > @parent.max.finish))
67
67
  @parent.max = self
68
68
  end
@@ -49,7 +49,7 @@ module Ferret::Search::Spans
49
49
  def to_s(field = nil)
50
50
  buffer = "span_near(["
51
51
  buffer << @clauses.map {|c| c.to_s(field)}.join(", ")
52
- buffer << "], #{@stop}, #{@in_order})"
52
+ buffer << "], #{@slop}, #{@in_order})"
53
53
  return buffer
54
54
  end
55
55
 
@@ -6,7 +6,7 @@ module Ferret::Search::Spans
6
6
  @query = query
7
7
  @terms = query.terms()
8
8
 
9
- @idf = @query.similarity(searcher).idf_phrase(@terms, searcher)
9
+ @idf = @similarity.idf_phrase(@terms, searcher)
10
10
  end
11
11
 
12
12
  attr_reader :query, :value
@@ -2,7 +2,7 @@ module Ferret::Search::Spans
2
2
  # Expert: an enumeration of span matches. Used to implement span searching.
3
3
  # Each span represents a range of term positions within a document. Matches
4
4
  # are enumerated in order, by increasing document number, within that by
5
- # increasing start position and ensure by increasing finish position.
5
+ # increasing start position and finally by increasing finish position.
6
6
  class SpansEnum
7
7
  # Move to the next match, returning true iff any such exists.
8
8
  def next?()
@@ -13,12 +13,12 @@ module Ferret::Search::Spans
13
13
  # greater than or equal to _target_. Returns true iff there is such a
14
14
  # match. Behaves as if written:
15
15
  #
16
- # def skip_to(target)
17
- # begin
18
- # return false if (!next?)
19
- # end while (target > doc)
20
- # return true
21
- # end
16
+ # def skip_to(target)
17
+ # begin
18
+ # return false if (!next?)
19
+ # end while (target > doc)
20
+ # return true
21
+ # end
22
22
  #
23
23
  # Most implementations are considerably more efficient than that.
24
24
  def skip_to(target)
@@ -38,7 +38,7 @@ module Ferret::Store
38
38
  super()
39
39
  if create then FileUtils.mkdir_p(path) end
40
40
  if not File.directory?(path) then
41
- raise "There is no directory: #{path}. Use create = true to create one"
41
+ raise IOError, "There is no directory: #{path}. Use create = true to create one"
42
42
  end
43
43
  @dir = Dir.new(path)
44
44
  # put the lock_dir here as well if no default exists.
@@ -182,7 +182,7 @@ module Ferret::Store
182
182
 
183
183
  # Construct a Lock.
184
184
  def make_lock(name)
185
- FSLock.new(@lock_dir.path + "/" + lock_prefix() + name)
185
+ FSLock.new(@lock_dir.path + "/" + lock_prefix() + name + ".lck")
186
186
  end
187
187
 
188
188
  # Closes the store.
@@ -285,7 +285,11 @@ module Ferret::Store
285
285
  attr_reader :length, :file
286
286
 
287
287
  def initialize(path)
288
- @file = File.open(path, "rb")
288
+ begin
289
+ @file = File.open(path, "rb")
290
+ rescue Errno::ENOENT => e
291
+ raise StandardError.new(e.message)
292
+ end
289
293
  @file.extend(MonitorMixin)
290
294
  #class <<@file
291
295
  # attr_accessor :ref_count
@@ -312,7 +316,7 @@ module Ferret::Store
312
316
  private
313
317
 
314
318
  def read_internal(b, offset, length)
315
- @file.synchronize do
319
+ #@file.synchronize do
316
320
  position = pos()
317
321
  if position != @file.pos
318
322
  @file.seek(position)
@@ -322,7 +326,7 @@ module Ferret::Store
322
326
  raise EOFError, "Read past EOF in #{@file.path}"
323
327
  end
324
328
  b[offset, bytes.length] = bytes
325
- end
329
+ #end
326
330
  end
327
331
 
328
332
  def seek_internal(pos)
@@ -340,7 +344,7 @@ module Ferret::Store
340
344
 
341
345
  # returns the lock prefix for this directory
342
346
  def lock_prefix
343
- LOCK_PREFIX + Digest::MD5.hexdigest(@dir.path)
347
+ LOCK_PREFIX
344
348
  end
345
349
 
346
350
  # Unfortunately, on Windows, Dir does not refresh when rewind is called
@@ -89,7 +89,7 @@ module Ferret::Store
89
89
 
90
90
  # Construct a Lock.
91
91
  def make_lock(name)
92
- RAMLock.new(LOCK_PREFIX + name, self)
92
+ RAMLock.new(LOCK_PREFIX + name + ".lck", self)
93
93
  end
94
94
 
95
95
 
@@ -252,14 +252,14 @@ module Ferret::Store
252
252
  # obtain the lock on the data source
253
253
  def obtain(lock_timeout = 1)
254
254
  MAX_ATTEMPTS.times do
255
- @dir.synchronize do
255
+ #@dir.synchronize do
256
256
  # create a file if none exists. If one already exists
257
257
  # then someone beat us to the lock so return false
258
258
  if (! locked?) then
259
259
  @dir.create_output(@lock_file)
260
260
  return true
261
261
  end
262
- end
262
+ #end
263
263
  # lock was not obtained so sleep for timeout then try again.
264
264
  sleep(lock_timeout)
265
265
  end
data/lib/rferret.rb ADDED
@@ -0,0 +1,36 @@
1
+ #--
2
+ # Copyright (c) 2005 David Balmain
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+ # :include: ../TUTORIAL
24
+ module Ferret
25
+ VERSION = '0.9.0'
26
+ end
27
+
28
+ $ferret_pure_ruby = true
29
+ require 'ferret/utils'
30
+ require 'ferret/document'
31
+ require 'ferret/stemmers'
32
+ require 'ferret/analysis'
33
+ require 'ferret/store'
34
+ require 'ferret/index'
35
+ require 'ferret/search'
36
+ require 'ferret/query_parser'
@@ -7,8 +7,8 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
7
7
  include Ferret::Document
8
8
 
9
9
  INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
10
- ITERATIONS = 100000
11
- NUM_THREADS = 2
10
+ ITERATIONS = 100
11
+ NUM_THREADS = 10
12
12
  ANALYZER = Ferret::Analysis::Analyzer.new()
13
13
 
14
14
  def setup
data/test/test_helper.rb CHANGED
@@ -2,13 +2,27 @@ $:.unshift File.dirname(__FILE__)
2
2
  $:.unshift File.join(File.dirname(__FILE__), '../lib')
3
3
  $:.unshift File.join(File.dirname(__FILE__), '../ext')
4
4
 
5
+ class Float
6
+ def =~(o)
7
+ return (1 - self/o).abs < 0.00001
8
+ end
9
+ end
10
+
5
11
  require 'test/unit'
6
- require 'ferret'
7
12
  require 'unit/index/th_doc'
13
+ if $ferret_pure_ruby
14
+ require 'rferret'
15
+ else
16
+ require 'ferret'
17
+ end
8
18
 
9
19
  def load_test_dir(dir)
10
20
  dir = File.join(File.dirname(__FILE__), dir)
11
21
  Dir.foreach(dir) do |file|
12
- require File.join(dir, file) if file =~ /^t[mcs]_.*\.rb$/
22
+ if $ferret_pure_ruby
23
+ require File.join(dir, file) if file =~ /^t?[mcs]_.*\.rb$/
24
+ else
25
+ require File.join(dir, file) if file =~ /^[mcs]_.*\.rb$/
26
+ end
13
27
  end
14
28
  end
@@ -0,0 +1,25 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class TokenTest < Test::Unit::TestCase
4
+ include Ferret::Analysis
5
+
6
+ def test_token()
7
+ tk1 = Token.new("DBalmain", 1, 8, 5, "token")
8
+ assert_equal(tk1, Token.new("DBalmain", 1, 8))
9
+ assert_not_equal(tk1, Token.new("DBalmain", 0, 8))
10
+ assert_not_equal(tk1, Token.new("DBalmain", 1, 9))
11
+ assert_not_equal(tk1, Token.new("Dbalmain", 1, 8))
12
+ assert(tk1 < Token.new("CBalmain", 2, 7))
13
+ assert(tk1 > Token.new("EBalmain", 0, 9))
14
+ assert(tk1 < Token.new("CBalmain", 1, 9))
15
+ assert(tk1 > Token.new("EBalmain", 1, 7))
16
+ assert(tk1 < Token.new("EBalmain", 1, 8))
17
+ assert(tk1 > Token.new("CBalmain", 1, 8))
18
+ assert_equal("DBalmain", tk1.text)
19
+ tk1.text = "Hello"
20
+ assert_equal("Hello", tk1.text)
21
+ assert_equal(1, tk1.start_offset)
22
+ assert_equal(8, tk1.end_offset)
23
+ assert_equal(5, tk1.pos_inc)
24
+ end
25
+ end
@@ -25,7 +25,7 @@ class PerFieldAnalyzerWrapperTest < Test::Unit::TestCase
25
25
  assert_equal(Token.new('My', 22, 24), t.next())
26
26
  assert_equal(Token.new('e-mail', 25, 31), t.next())
27
27
  assert_equal(Token.new("ADDRESS", 32, 39), t.next())
28
- if ( token = t.next()): puts token.term_text end
28
+ if ( token = t.next()): puts token.text end
29
29
  assert(! t.next())
30
30
  input.reset()
31
31
  t = aw.token_stream("body", input)
@@ -4,7 +4,7 @@ class StandardAnalyzerTest < Test::Unit::TestCase
4
4
  include Ferret::Utils::StringHelper
5
5
  include Ferret::Analysis
6
6
 
7
- def test_lettertokenizer()
7
+ def test_standard_analyzer()
8
8
  input = StringReader.new('D.Ba_l-n@gma-l.com AB&Sons Toys\'r\'us you\'re she\'s, #$%^$%*& job@dot I.B.M. the an AnD THEIR')
9
9
  sa = StandardAnalyzer.new()
10
10
  t = sa.token_stream("field", input)
@@ -0,0 +1,98 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class FieldTest < Test::Unit::TestCase
5
+ include Ferret::Document
6
+ include Ferret::Utils
7
+
8
+ def test_store()
9
+ assert_not_nil(Field::Store::COMPRESS)
10
+ assert_not_nil(Field::Store::YES)
11
+ assert_not_nil(Field::Store::NO)
12
+ end
13
+
14
+ def test_index()
15
+ assert_not_nil(Field::Index::TOKENIZED)
16
+ assert_not_nil(Field::Index::UNTOKENIZED)
17
+ assert_not_nil(Field::Index::NO)
18
+ assert_not_nil(Field::Index::NO_NORMS)
19
+ end
20
+
21
+ def test_term_vector()
22
+ assert_not_nil(Field::TermVector::YES)
23
+ assert_not_nil(Field::TermVector::NO)
24
+ assert_not_nil(Field::TermVector::WITH_POSITIONS)
25
+ assert_not_nil(Field::TermVector::WITH_OFFSETS)
26
+ assert_not_nil(Field::TermVector::WITH_POSITIONS_OFFSETS)
27
+ end
28
+
29
+ def test_standard_field()
30
+ f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
31
+ assert_equal("name", f.name)
32
+ assert_equal("value", f.data)
33
+ assert_equal(true, f.stored?)
34
+ assert_equal(true, f.compressed?)
35
+ assert_equal(true, f.indexed?)
36
+ assert_equal(true, f.tokenized?)
37
+ assert_equal(false, f.store_term_vector?)
38
+ assert_equal(false, f.store_offsets?)
39
+ assert_equal(false, f.store_positions?)
40
+ assert_equal(false, f.omit_norms?)
41
+ assert_equal(false, f.binary?)
42
+ assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
43
+ f.data = "183"
44
+ f.boost = 0.001
45
+ assert_equal("183", f.data)
46
+ assert(0.001 =~ f.boost)
47
+ end
48
+
49
+ def test_set_store()
50
+ f = Field.new("name", "", Field::Store::COMPRESS, Field::Index::TOKENIZED)
51
+ f.store = Field::Store::NO
52
+ assert_equal(false, f.stored?)
53
+ assert_equal(false, f.compressed?)
54
+ assert_equal("indexed,tokenized,<name:>", f.to_s)
55
+ end
56
+
57
+ def test_set_index()
58
+ f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
59
+ f.index = Field::Index::NO
60
+ assert_equal(false, f.indexed?)
61
+ assert_equal(false, f.tokenized?)
62
+ assert_equal(false, f.omit_norms?)
63
+ assert_equal("stored/compressed,<name:value>", f.to_s)
64
+ f.index = Field::Index::NO_NORMS
65
+ assert_equal(true, f.indexed?)
66
+ assert_equal(false, f.tokenized?)
67
+ assert_equal(true, f.omit_norms?)
68
+ assert_equal("stored/compressed,indexed,omit_norms,<name:value>", f.to_s)
69
+ end
70
+
71
+ def test_set_term_vector()
72
+ f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
73
+ f.term_vector = Field::TermVector::WITH_POSITIONS_OFFSETS
74
+ assert_equal(true, f.store_term_vector?)
75
+ assert_equal(true, f.store_offsets?)
76
+ assert_equal(true, f.store_positions?)
77
+ assert_equal("stored/compressed,indexed,tokenized,store_term_vector,store_offsets,store_positions,<name:value>", f.to_s)
78
+ end
79
+
80
+ def test_new_binary_field()
81
+ tmp = []
82
+ 256.times {|i| tmp[i] = i}
83
+ bin = tmp.pack("c*")
84
+ f = Field.new_binary_field("name", bin, Field::Store::YES)
85
+ assert_equal("name", f.name)
86
+ assert_equal(bin, f.data)
87
+ assert_equal(true, f.stored?)
88
+ assert_equal(false, f.compressed?)
89
+ assert_equal(false, f.indexed?)
90
+ assert_equal(false, f.tokenized?)
91
+ assert_equal(false, f.store_term_vector?)
92
+ assert_equal(false, f.store_offsets?)
93
+ assert_equal(false, f.store_positions?)
94
+ assert_equal(false, f.omit_norms?)
95
+ assert_equal(true, f.binary?)
96
+ assert_equal("stored/uncompressed,binary,<name:=bin_data=>", f.to_s)
97
+ end
98
+ end