ferret 0.3.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
@@ -123,16 +123,17 @@ module Ferret::Search
|
|
123
123
|
query_expl = Explanation.new()
|
124
124
|
query_expl.description = "query_weight(#{@query}), product of:"
|
125
125
|
|
126
|
-
|
127
|
-
|
128
|
-
|
126
|
+
boost = @query.boost()
|
127
|
+
if boost != 1.0
|
128
|
+
boost_expl = Explanation.new(boost, "boost")
|
129
|
+
query_expl << boost_expl
|
130
|
+
end
|
129
131
|
query_expl << idf_expl
|
130
132
|
|
131
133
|
query_norm_expl = Explanation.new(@query_norm,"query_norm")
|
132
134
|
query_expl << query_norm_expl
|
133
135
|
|
134
|
-
query_expl.value =
|
135
|
-
boost_expl.value * idf_expl.value * query_norm_expl.value
|
136
|
+
query_expl.value = boost * @idf * @query_norm
|
136
137
|
|
137
138
|
result << query_expl
|
138
139
|
|
@@ -127,7 +127,7 @@ module Ferret::Search
|
|
127
127
|
query_norm_expl = Explanation.new(@query_norm, "query_norm")
|
128
128
|
query_expl << query_norm_expl
|
129
129
|
|
130
|
-
query_expl.value = boost * @idf *
|
130
|
+
query_expl.value = boost * @idf * @query_norm
|
131
131
|
|
132
132
|
result << query_expl
|
133
133
|
|
@@ -150,15 +150,12 @@ module Ferret::Search
|
|
150
150
|
field_expl << field_norm_expl
|
151
151
|
|
152
152
|
field_expl.value = tf_expl.value * @idf * field_norm
|
153
|
-
|
154
153
|
result << field_expl
|
155
154
|
|
156
|
-
|
157
|
-
result.value = query_expl.value * field_expl.value
|
158
|
-
|
159
|
-
if query_expl.value == 1.0
|
155
|
+
if (query_expl.value == 1.0)
|
160
156
|
return field_expl
|
161
157
|
else
|
158
|
+
result.value = query_expl.value * field_expl.value
|
162
159
|
return result
|
163
160
|
end
|
164
161
|
end
|
@@ -23,10 +23,10 @@ module Ferret::Search
|
|
23
23
|
term.text[0,prefix_length] != prefix_text)
|
24
24
|
break
|
25
25
|
end
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
tq = TermQuery.new(term) # found a match
|
27
|
+
tq.boost = boost() # set the boost
|
28
|
+
bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
|
29
|
+
#puts("added " + term)
|
30
30
|
end while (enumerator.next?)
|
31
31
|
ensure
|
32
32
|
enumerator.close()
|
data/lib/ferret/search/sort.rb
CHANGED
@@ -87,8 +87,10 @@ module Ferret::Search
|
|
87
87
|
SortField.new(field, {:sort_type => SortField::SortType::AUTO,
|
88
88
|
:reverse => reverse})
|
89
89
|
end
|
90
|
-
@fields << SortField::FIELD_DOC if @fields.size == 1
|
91
90
|
end
|
91
|
+
doc_sort_added = false
|
92
|
+
@fields.each {|f| doc_sort_added = true if f == SortField::FIELD_DOC }
|
93
|
+
@fields << SortField::FIELD_DOC if not doc_sort_added
|
92
94
|
end
|
93
95
|
|
94
96
|
# Represents sorting by computed relevance. Using this sort criteria returns
|
@@ -56,16 +56,16 @@ module Ferret::Search
|
|
56
56
|
# name:: Name of field to sort by. Can be +nil+ if +sort_type+ is SCORE or
|
57
57
|
# DOC.
|
58
58
|
#
|
59
|
-
#
|
60
|
-
# sort_type::
|
61
|
-
# reverse::
|
62
|
-
# comparator::
|
63
|
-
#
|
64
|
-
def initialize(name = nil,
|
59
|
+
# An options hash with the followind values can also be supplied;
|
60
|
+
# sort_type:: Type of values in the terms.
|
61
|
+
# reverse:: True if natural order should be reversed.
|
62
|
+
# comparator:: A proc used to compare two values from the index. You can
|
63
|
+
# also give this value to the SortType object that you pass.
|
64
|
+
def initialize(name = nil, options= {})
|
65
65
|
@name = name.to_s if name
|
66
|
-
@sort_type =
|
67
|
-
@reverse =
|
68
|
-
@comparator =
|
66
|
+
@sort_type = options[:sort_type]||SortType::AUTO
|
67
|
+
@reverse = options[:reverse]||false
|
68
|
+
@comparator = options[:comparator]||@sort_type.comparator
|
69
69
|
if (@name == nil and @sort_type != SortType::DOC and
|
70
70
|
@sort_type != SortType::SCORE)
|
71
71
|
raise ArgumentError, "You must supply a field name for your sort field"
|
@@ -62,7 +62,7 @@ module Ferret::Search::Spans
|
|
62
62
|
@length = finish() - start() # compute new length
|
63
63
|
@parent.total_length += @length # add new length to total
|
64
64
|
|
65
|
-
if (@parent.max
|
65
|
+
if (@parent.max.nil? or doc() > @parent.max.doc() or # maintain max
|
66
66
|
(doc() == @parent.max.doc and finish() > @parent.max.finish))
|
67
67
|
@parent.max = self
|
68
68
|
end
|
@@ -2,7 +2,7 @@ module Ferret::Search::Spans
|
|
2
2
|
# Expert: an enumeration of span matches. Used to implement span searching.
|
3
3
|
# Each span represents a range of term positions within a document. Matches
|
4
4
|
# are enumerated in order, by increasing document number, within that by
|
5
|
-
# increasing start position and
|
5
|
+
# increasing start position and finally by increasing finish position.
|
6
6
|
class SpansEnum
|
7
7
|
# Move to the next match, returning true iff any such exists.
|
8
8
|
def next?()
|
@@ -13,12 +13,12 @@ module Ferret::Search::Spans
|
|
13
13
|
# greater than or equal to _target_. Returns true iff there is such a
|
14
14
|
# match. Behaves as if written:
|
15
15
|
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
16
|
+
# def skip_to(target)
|
17
|
+
# begin
|
18
|
+
# return false if (!next?)
|
19
|
+
# end while (target > doc)
|
20
|
+
# return true
|
21
|
+
# end
|
22
22
|
#
|
23
23
|
# Most implementations are considerably more efficient than that.
|
24
24
|
def skip_to(target)
|
@@ -38,7 +38,7 @@ module Ferret::Store
|
|
38
38
|
super()
|
39
39
|
if create then FileUtils.mkdir_p(path) end
|
40
40
|
if not File.directory?(path) then
|
41
|
-
raise "There is no directory: #{path}. Use create = true to create one"
|
41
|
+
raise IOError, "There is no directory: #{path}. Use create = true to create one"
|
42
42
|
end
|
43
43
|
@dir = Dir.new(path)
|
44
44
|
# put the lock_dir here as well if no default exists.
|
@@ -182,7 +182,7 @@ module Ferret::Store
|
|
182
182
|
|
183
183
|
# Construct a Lock.
|
184
184
|
def make_lock(name)
|
185
|
-
FSLock.new(@lock_dir.path + "/" + lock_prefix() + name)
|
185
|
+
FSLock.new(@lock_dir.path + "/" + lock_prefix() + name + ".lck")
|
186
186
|
end
|
187
187
|
|
188
188
|
# Closes the store.
|
@@ -285,7 +285,11 @@ module Ferret::Store
|
|
285
285
|
attr_reader :length, :file
|
286
286
|
|
287
287
|
def initialize(path)
|
288
|
-
|
288
|
+
begin
|
289
|
+
@file = File.open(path, "rb")
|
290
|
+
rescue Errno::ENOENT => e
|
291
|
+
raise StandardError.new(e.message)
|
292
|
+
end
|
289
293
|
@file.extend(MonitorMixin)
|
290
294
|
#class <<@file
|
291
295
|
# attr_accessor :ref_count
|
@@ -312,7 +316,7 @@ module Ferret::Store
|
|
312
316
|
private
|
313
317
|
|
314
318
|
def read_internal(b, offset, length)
|
315
|
-
|
319
|
+
#@file.synchronize do
|
316
320
|
position = pos()
|
317
321
|
if position != @file.pos
|
318
322
|
@file.seek(position)
|
@@ -322,7 +326,7 @@ module Ferret::Store
|
|
322
326
|
raise EOFError, "Read past EOF in #{@file.path}"
|
323
327
|
end
|
324
328
|
b[offset, bytes.length] = bytes
|
325
|
-
end
|
329
|
+
#end
|
326
330
|
end
|
327
331
|
|
328
332
|
def seek_internal(pos)
|
@@ -340,7 +344,7 @@ module Ferret::Store
|
|
340
344
|
|
341
345
|
# returns the lock prefix for this directory
|
342
346
|
def lock_prefix
|
343
|
-
LOCK_PREFIX
|
347
|
+
LOCK_PREFIX
|
344
348
|
end
|
345
349
|
|
346
350
|
# Unfortunately, on Windows, Dir does not refresh when rewind is called
|
@@ -89,7 +89,7 @@ module Ferret::Store
|
|
89
89
|
|
90
90
|
# Construct a Lock.
|
91
91
|
def make_lock(name)
|
92
|
-
RAMLock.new(LOCK_PREFIX + name, self)
|
92
|
+
RAMLock.new(LOCK_PREFIX + name + ".lck", self)
|
93
93
|
end
|
94
94
|
|
95
95
|
|
@@ -252,14 +252,14 @@ module Ferret::Store
|
|
252
252
|
# obtain the lock on the data source
|
253
253
|
def obtain(lock_timeout = 1)
|
254
254
|
MAX_ATTEMPTS.times do
|
255
|
-
|
255
|
+
#@dir.synchronize do
|
256
256
|
# create a file if none exists. If one already exists
|
257
257
|
# then someone beat us to the lock so return false
|
258
258
|
if (! locked?) then
|
259
259
|
@dir.create_output(@lock_file)
|
260
260
|
return true
|
261
261
|
end
|
262
|
-
end
|
262
|
+
#end
|
263
263
|
# lock was not obtained so sleep for timeout then try again.
|
264
264
|
sleep(lock_timeout)
|
265
265
|
end
|
data/lib/rferret.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2005 David Balmain
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
# :include: ../TUTORIAL
|
24
|
+
module Ferret
|
25
|
+
VERSION = '0.9.0'
|
26
|
+
end
|
27
|
+
|
28
|
+
$ferret_pure_ruby = true
|
29
|
+
require 'ferret/utils'
|
30
|
+
require 'ferret/document'
|
31
|
+
require 'ferret/stemmers'
|
32
|
+
require 'ferret/analysis'
|
33
|
+
require 'ferret/store'
|
34
|
+
require 'ferret/index'
|
35
|
+
require 'ferret/search'
|
36
|
+
require 'ferret/query_parser'
|
@@ -7,8 +7,8 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
|
|
7
7
|
include Ferret::Document
|
8
8
|
|
9
9
|
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
10
|
-
ITERATIONS =
|
11
|
-
NUM_THREADS =
|
10
|
+
ITERATIONS = 100
|
11
|
+
NUM_THREADS = 10
|
12
12
|
ANALYZER = Ferret::Analysis::Analyzer.new()
|
13
13
|
|
14
14
|
def setup
|
data/test/test_helper.rb
CHANGED
@@ -2,13 +2,27 @@ $:.unshift File.dirname(__FILE__)
|
|
2
2
|
$:.unshift File.join(File.dirname(__FILE__), '../lib')
|
3
3
|
$:.unshift File.join(File.dirname(__FILE__), '../ext')
|
4
4
|
|
5
|
+
class Float
|
6
|
+
def =~(o)
|
7
|
+
return (1 - self/o).abs < 0.00001
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
5
11
|
require 'test/unit'
|
6
|
-
require 'ferret'
|
7
12
|
require 'unit/index/th_doc'
|
13
|
+
if $ferret_pure_ruby
|
14
|
+
require 'rferret'
|
15
|
+
else
|
16
|
+
require 'ferret'
|
17
|
+
end
|
8
18
|
|
9
19
|
def load_test_dir(dir)
|
10
20
|
dir = File.join(File.dirname(__FILE__), dir)
|
11
21
|
Dir.foreach(dir) do |file|
|
12
|
-
|
22
|
+
if $ferret_pure_ruby
|
23
|
+
require File.join(dir, file) if file =~ /^t?[mcs]_.*\.rb$/
|
24
|
+
else
|
25
|
+
require File.join(dir, file) if file =~ /^[mcs]_.*\.rb$/
|
26
|
+
end
|
13
27
|
end
|
14
28
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class TokenTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Analysis
|
5
|
+
|
6
|
+
def test_token()
|
7
|
+
tk1 = Token.new("DBalmain", 1, 8, 5, "token")
|
8
|
+
assert_equal(tk1, Token.new("DBalmain", 1, 8))
|
9
|
+
assert_not_equal(tk1, Token.new("DBalmain", 0, 8))
|
10
|
+
assert_not_equal(tk1, Token.new("DBalmain", 1, 9))
|
11
|
+
assert_not_equal(tk1, Token.new("Dbalmain", 1, 8))
|
12
|
+
assert(tk1 < Token.new("CBalmain", 2, 7))
|
13
|
+
assert(tk1 > Token.new("EBalmain", 0, 9))
|
14
|
+
assert(tk1 < Token.new("CBalmain", 1, 9))
|
15
|
+
assert(tk1 > Token.new("EBalmain", 1, 7))
|
16
|
+
assert(tk1 < Token.new("EBalmain", 1, 8))
|
17
|
+
assert(tk1 > Token.new("CBalmain", 1, 8))
|
18
|
+
assert_equal("DBalmain", tk1.text)
|
19
|
+
tk1.text = "Hello"
|
20
|
+
assert_equal("Hello", tk1.text)
|
21
|
+
assert_equal(1, tk1.start_offset)
|
22
|
+
assert_equal(8, tk1.end_offset)
|
23
|
+
assert_equal(5, tk1.pos_inc)
|
24
|
+
end
|
25
|
+
end
|
@@ -25,7 +25,7 @@ class PerFieldAnalyzerWrapperTest < Test::Unit::TestCase
|
|
25
25
|
assert_equal(Token.new('My', 22, 24), t.next())
|
26
26
|
assert_equal(Token.new('e-mail', 25, 31), t.next())
|
27
27
|
assert_equal(Token.new("ADDRESS", 32, 39), t.next())
|
28
|
-
if ( token = t.next()): puts token.
|
28
|
+
if ( token = t.next()): puts token.text end
|
29
29
|
assert(! t.next())
|
30
30
|
input.reset()
|
31
31
|
t = aw.token_stream("body", input)
|
@@ -4,7 +4,7 @@ class StandardAnalyzerTest < Test::Unit::TestCase
|
|
4
4
|
include Ferret::Utils::StringHelper
|
5
5
|
include Ferret::Analysis
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_standard_analyzer()
|
8
8
|
input = StringReader.new('D.Ba_l-n@gma-l.com AB&Sons Toys\'r\'us you\'re she\'s, #$%^$%*& job@dot I.B.M. the an AnD THEIR')
|
9
9
|
sa = StandardAnalyzer.new()
|
10
10
|
t = sa.token_stream("field", input)
|
File without changes
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class FieldTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Document
|
6
|
+
include Ferret::Utils
|
7
|
+
|
8
|
+
def test_store()
|
9
|
+
assert_not_nil(Field::Store::COMPRESS)
|
10
|
+
assert_not_nil(Field::Store::YES)
|
11
|
+
assert_not_nil(Field::Store::NO)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_index()
|
15
|
+
assert_not_nil(Field::Index::TOKENIZED)
|
16
|
+
assert_not_nil(Field::Index::UNTOKENIZED)
|
17
|
+
assert_not_nil(Field::Index::NO)
|
18
|
+
assert_not_nil(Field::Index::NO_NORMS)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_term_vector()
|
22
|
+
assert_not_nil(Field::TermVector::YES)
|
23
|
+
assert_not_nil(Field::TermVector::NO)
|
24
|
+
assert_not_nil(Field::TermVector::WITH_POSITIONS)
|
25
|
+
assert_not_nil(Field::TermVector::WITH_OFFSETS)
|
26
|
+
assert_not_nil(Field::TermVector::WITH_POSITIONS_OFFSETS)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_standard_field()
|
30
|
+
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
31
|
+
assert_equal("name", f.name)
|
32
|
+
assert_equal("value", f.data)
|
33
|
+
assert_equal(true, f.stored?)
|
34
|
+
assert_equal(true, f.compressed?)
|
35
|
+
assert_equal(true, f.indexed?)
|
36
|
+
assert_equal(true, f.tokenized?)
|
37
|
+
assert_equal(false, f.store_term_vector?)
|
38
|
+
assert_equal(false, f.store_offsets?)
|
39
|
+
assert_equal(false, f.store_positions?)
|
40
|
+
assert_equal(false, f.omit_norms?)
|
41
|
+
assert_equal(false, f.binary?)
|
42
|
+
assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
|
43
|
+
f.data = "183"
|
44
|
+
f.boost = 0.001
|
45
|
+
assert_equal("183", f.data)
|
46
|
+
assert(0.001 =~ f.boost)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_set_store()
|
50
|
+
f = Field.new("name", "", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
51
|
+
f.store = Field::Store::NO
|
52
|
+
assert_equal(false, f.stored?)
|
53
|
+
assert_equal(false, f.compressed?)
|
54
|
+
assert_equal("indexed,tokenized,<name:>", f.to_s)
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_set_index()
|
58
|
+
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
59
|
+
f.index = Field::Index::NO
|
60
|
+
assert_equal(false, f.indexed?)
|
61
|
+
assert_equal(false, f.tokenized?)
|
62
|
+
assert_equal(false, f.omit_norms?)
|
63
|
+
assert_equal("stored/compressed,<name:value>", f.to_s)
|
64
|
+
f.index = Field::Index::NO_NORMS
|
65
|
+
assert_equal(true, f.indexed?)
|
66
|
+
assert_equal(false, f.tokenized?)
|
67
|
+
assert_equal(true, f.omit_norms?)
|
68
|
+
assert_equal("stored/compressed,indexed,omit_norms,<name:value>", f.to_s)
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_set_term_vector()
|
72
|
+
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
73
|
+
f.term_vector = Field::TermVector::WITH_POSITIONS_OFFSETS
|
74
|
+
assert_equal(true, f.store_term_vector?)
|
75
|
+
assert_equal(true, f.store_offsets?)
|
76
|
+
assert_equal(true, f.store_positions?)
|
77
|
+
assert_equal("stored/compressed,indexed,tokenized,store_term_vector,store_offsets,store_positions,<name:value>", f.to_s)
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_new_binary_field()
|
81
|
+
tmp = []
|
82
|
+
256.times {|i| tmp[i] = i}
|
83
|
+
bin = tmp.pack("c*")
|
84
|
+
f = Field.new_binary_field("name", bin, Field::Store::YES)
|
85
|
+
assert_equal("name", f.name)
|
86
|
+
assert_equal(bin, f.data)
|
87
|
+
assert_equal(true, f.stored?)
|
88
|
+
assert_equal(false, f.compressed?)
|
89
|
+
assert_equal(false, f.indexed?)
|
90
|
+
assert_equal(false, f.tokenized?)
|
91
|
+
assert_equal(false, f.store_term_vector?)
|
92
|
+
assert_equal(false, f.store_offsets?)
|
93
|
+
assert_equal(false, f.store_positions?)
|
94
|
+
assert_equal(false, f.omit_norms?)
|
95
|
+
assert_equal(true, f.binary?)
|
96
|
+
assert_equal("stored/uncompressed,binary,<name:=bin_data=>", f.to_s)
|
97
|
+
end
|
98
|
+
end
|