ferret 0.3.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
@@ -123,16 +123,17 @@ module Ferret::Search
|
|
123
123
|
query_expl = Explanation.new()
|
124
124
|
query_expl.description = "query_weight(#{@query}), product of:"
|
125
125
|
|
126
|
-
|
127
|
-
|
128
|
-
|
126
|
+
boost = @query.boost()
|
127
|
+
if boost != 1.0
|
128
|
+
boost_expl = Explanation.new(boost, "boost")
|
129
|
+
query_expl << boost_expl
|
130
|
+
end
|
129
131
|
query_expl << idf_expl
|
130
132
|
|
131
133
|
query_norm_expl = Explanation.new(@query_norm,"query_norm")
|
132
134
|
query_expl << query_norm_expl
|
133
135
|
|
134
|
-
query_expl.value =
|
135
|
-
boost_expl.value * idf_expl.value * query_norm_expl.value
|
136
|
+
query_expl.value = boost * @idf * @query_norm
|
136
137
|
|
137
138
|
result << query_expl
|
138
139
|
|
@@ -127,7 +127,7 @@ module Ferret::Search
|
|
127
127
|
query_norm_expl = Explanation.new(@query_norm, "query_norm")
|
128
128
|
query_expl << query_norm_expl
|
129
129
|
|
130
|
-
query_expl.value = boost * @idf *
|
130
|
+
query_expl.value = boost * @idf * @query_norm
|
131
131
|
|
132
132
|
result << query_expl
|
133
133
|
|
@@ -150,15 +150,12 @@ module Ferret::Search
|
|
150
150
|
field_expl << field_norm_expl
|
151
151
|
|
152
152
|
field_expl.value = tf_expl.value * @idf * field_norm
|
153
|
-
|
154
153
|
result << field_expl
|
155
154
|
|
156
|
-
|
157
|
-
result.value = query_expl.value * field_expl.value
|
158
|
-
|
159
|
-
if query_expl.value == 1.0
|
155
|
+
if (query_expl.value == 1.0)
|
160
156
|
return field_expl
|
161
157
|
else
|
158
|
+
result.value = query_expl.value * field_expl.value
|
162
159
|
return result
|
163
160
|
end
|
164
161
|
end
|
@@ -23,10 +23,10 @@ module Ferret::Search
|
|
23
23
|
term.text[0,prefix_length] != prefix_text)
|
24
24
|
break
|
25
25
|
end
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
tq = TermQuery.new(term) # found a match
|
27
|
+
tq.boost = boost() # set the boost
|
28
|
+
bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
|
29
|
+
#puts("added " + term)
|
30
30
|
end while (enumerator.next?)
|
31
31
|
ensure
|
32
32
|
enumerator.close()
|
data/lib/ferret/search/sort.rb
CHANGED
@@ -87,8 +87,10 @@ module Ferret::Search
|
|
87
87
|
SortField.new(field, {:sort_type => SortField::SortType::AUTO,
|
88
88
|
:reverse => reverse})
|
89
89
|
end
|
90
|
-
@fields << SortField::FIELD_DOC if @fields.size == 1
|
91
90
|
end
|
91
|
+
doc_sort_added = false
|
92
|
+
@fields.each {|f| doc_sort_added = true if f == SortField::FIELD_DOC }
|
93
|
+
@fields << SortField::FIELD_DOC if not doc_sort_added
|
92
94
|
end
|
93
95
|
|
94
96
|
# Represents sorting by computed relevance. Using this sort criteria returns
|
@@ -56,16 +56,16 @@ module Ferret::Search
|
|
56
56
|
# name:: Name of field to sort by. Can be +nil+ if +sort_type+ is SCORE or
|
57
57
|
# DOC.
|
58
58
|
#
|
59
|
-
#
|
60
|
-
# sort_type::
|
61
|
-
# reverse::
|
62
|
-
# comparator::
|
63
|
-
#
|
64
|
-
def initialize(name = nil,
|
59
|
+
# An options hash with the followind values can also be supplied;
|
60
|
+
# sort_type:: Type of values in the terms.
|
61
|
+
# reverse:: True if natural order should be reversed.
|
62
|
+
# comparator:: A proc used to compare two values from the index. You can
|
63
|
+
# also give this value to the SortType object that you pass.
|
64
|
+
def initialize(name = nil, options= {})
|
65
65
|
@name = name.to_s if name
|
66
|
-
@sort_type =
|
67
|
-
@reverse =
|
68
|
-
@comparator =
|
66
|
+
@sort_type = options[:sort_type]||SortType::AUTO
|
67
|
+
@reverse = options[:reverse]||false
|
68
|
+
@comparator = options[:comparator]||@sort_type.comparator
|
69
69
|
if (@name == nil and @sort_type != SortType::DOC and
|
70
70
|
@sort_type != SortType::SCORE)
|
71
71
|
raise ArgumentError, "You must supply a field name for your sort field"
|
@@ -62,7 +62,7 @@ module Ferret::Search::Spans
|
|
62
62
|
@length = finish() - start() # compute new length
|
63
63
|
@parent.total_length += @length # add new length to total
|
64
64
|
|
65
|
-
if (@parent.max
|
65
|
+
if (@parent.max.nil? or doc() > @parent.max.doc() or # maintain max
|
66
66
|
(doc() == @parent.max.doc and finish() > @parent.max.finish))
|
67
67
|
@parent.max = self
|
68
68
|
end
|
@@ -2,7 +2,7 @@ module Ferret::Search::Spans
|
|
2
2
|
# Expert: an enumeration of span matches. Used to implement span searching.
|
3
3
|
# Each span represents a range of term positions within a document. Matches
|
4
4
|
# are enumerated in order, by increasing document number, within that by
|
5
|
-
# increasing start position and
|
5
|
+
# increasing start position and finally by increasing finish position.
|
6
6
|
class SpansEnum
|
7
7
|
# Move to the next match, returning true iff any such exists.
|
8
8
|
def next?()
|
@@ -13,12 +13,12 @@ module Ferret::Search::Spans
|
|
13
13
|
# greater than or equal to _target_. Returns true iff there is such a
|
14
14
|
# match. Behaves as if written:
|
15
15
|
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
16
|
+
# def skip_to(target)
|
17
|
+
# begin
|
18
|
+
# return false if (!next?)
|
19
|
+
# end while (target > doc)
|
20
|
+
# return true
|
21
|
+
# end
|
22
22
|
#
|
23
23
|
# Most implementations are considerably more efficient than that.
|
24
24
|
def skip_to(target)
|
@@ -38,7 +38,7 @@ module Ferret::Store
|
|
38
38
|
super()
|
39
39
|
if create then FileUtils.mkdir_p(path) end
|
40
40
|
if not File.directory?(path) then
|
41
|
-
raise "There is no directory: #{path}. Use create = true to create one"
|
41
|
+
raise IOError, "There is no directory: #{path}. Use create = true to create one"
|
42
42
|
end
|
43
43
|
@dir = Dir.new(path)
|
44
44
|
# put the lock_dir here as well if no default exists.
|
@@ -182,7 +182,7 @@ module Ferret::Store
|
|
182
182
|
|
183
183
|
# Construct a Lock.
|
184
184
|
def make_lock(name)
|
185
|
-
FSLock.new(@lock_dir.path + "/" + lock_prefix() + name)
|
185
|
+
FSLock.new(@lock_dir.path + "/" + lock_prefix() + name + ".lck")
|
186
186
|
end
|
187
187
|
|
188
188
|
# Closes the store.
|
@@ -285,7 +285,11 @@ module Ferret::Store
|
|
285
285
|
attr_reader :length, :file
|
286
286
|
|
287
287
|
def initialize(path)
|
288
|
-
|
288
|
+
begin
|
289
|
+
@file = File.open(path, "rb")
|
290
|
+
rescue Errno::ENOENT => e
|
291
|
+
raise StandardError.new(e.message)
|
292
|
+
end
|
289
293
|
@file.extend(MonitorMixin)
|
290
294
|
#class <<@file
|
291
295
|
# attr_accessor :ref_count
|
@@ -312,7 +316,7 @@ module Ferret::Store
|
|
312
316
|
private
|
313
317
|
|
314
318
|
def read_internal(b, offset, length)
|
315
|
-
|
319
|
+
#@file.synchronize do
|
316
320
|
position = pos()
|
317
321
|
if position != @file.pos
|
318
322
|
@file.seek(position)
|
@@ -322,7 +326,7 @@ module Ferret::Store
|
|
322
326
|
raise EOFError, "Read past EOF in #{@file.path}"
|
323
327
|
end
|
324
328
|
b[offset, bytes.length] = bytes
|
325
|
-
end
|
329
|
+
#end
|
326
330
|
end
|
327
331
|
|
328
332
|
def seek_internal(pos)
|
@@ -340,7 +344,7 @@ module Ferret::Store
|
|
340
344
|
|
341
345
|
# returns the lock prefix for this directory
|
342
346
|
def lock_prefix
|
343
|
-
LOCK_PREFIX
|
347
|
+
LOCK_PREFIX
|
344
348
|
end
|
345
349
|
|
346
350
|
# Unfortunately, on Windows, Dir does not refresh when rewind is called
|
@@ -89,7 +89,7 @@ module Ferret::Store
|
|
89
89
|
|
90
90
|
# Construct a Lock.
|
91
91
|
def make_lock(name)
|
92
|
-
RAMLock.new(LOCK_PREFIX + name, self)
|
92
|
+
RAMLock.new(LOCK_PREFIX + name + ".lck", self)
|
93
93
|
end
|
94
94
|
|
95
95
|
|
@@ -252,14 +252,14 @@ module Ferret::Store
|
|
252
252
|
# obtain the lock on the data source
|
253
253
|
def obtain(lock_timeout = 1)
|
254
254
|
MAX_ATTEMPTS.times do
|
255
|
-
|
255
|
+
#@dir.synchronize do
|
256
256
|
# create a file if none exists. If one already exists
|
257
257
|
# then someone beat us to the lock so return false
|
258
258
|
if (! locked?) then
|
259
259
|
@dir.create_output(@lock_file)
|
260
260
|
return true
|
261
261
|
end
|
262
|
-
end
|
262
|
+
#end
|
263
263
|
# lock was not obtained so sleep for timeout then try again.
|
264
264
|
sleep(lock_timeout)
|
265
265
|
end
|
data/lib/rferret.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2005 David Balmain
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
# :include: ../TUTORIAL
|
24
|
+
module Ferret
|
25
|
+
VERSION = '0.9.0'
|
26
|
+
end
|
27
|
+
|
28
|
+
$ferret_pure_ruby = true
|
29
|
+
require 'ferret/utils'
|
30
|
+
require 'ferret/document'
|
31
|
+
require 'ferret/stemmers'
|
32
|
+
require 'ferret/analysis'
|
33
|
+
require 'ferret/store'
|
34
|
+
require 'ferret/index'
|
35
|
+
require 'ferret/search'
|
36
|
+
require 'ferret/query_parser'
|
@@ -7,8 +7,8 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
|
|
7
7
|
include Ferret::Document
|
8
8
|
|
9
9
|
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
10
|
-
ITERATIONS =
|
11
|
-
NUM_THREADS =
|
10
|
+
ITERATIONS = 100
|
11
|
+
NUM_THREADS = 10
|
12
12
|
ANALYZER = Ferret::Analysis::Analyzer.new()
|
13
13
|
|
14
14
|
def setup
|
data/test/test_helper.rb
CHANGED
@@ -2,13 +2,27 @@ $:.unshift File.dirname(__FILE__)
|
|
2
2
|
$:.unshift File.join(File.dirname(__FILE__), '../lib')
|
3
3
|
$:.unshift File.join(File.dirname(__FILE__), '../ext')
|
4
4
|
|
5
|
+
class Float
|
6
|
+
def =~(o)
|
7
|
+
return (1 - self/o).abs < 0.00001
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
5
11
|
require 'test/unit'
|
6
|
-
require 'ferret'
|
7
12
|
require 'unit/index/th_doc'
|
13
|
+
if $ferret_pure_ruby
|
14
|
+
require 'rferret'
|
15
|
+
else
|
16
|
+
require 'ferret'
|
17
|
+
end
|
8
18
|
|
9
19
|
def load_test_dir(dir)
|
10
20
|
dir = File.join(File.dirname(__FILE__), dir)
|
11
21
|
Dir.foreach(dir) do |file|
|
12
|
-
|
22
|
+
if $ferret_pure_ruby
|
23
|
+
require File.join(dir, file) if file =~ /^t?[mcs]_.*\.rb$/
|
24
|
+
else
|
25
|
+
require File.join(dir, file) if file =~ /^[mcs]_.*\.rb$/
|
26
|
+
end
|
13
27
|
end
|
14
28
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class TokenTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Analysis
|
5
|
+
|
6
|
+
def test_token()
|
7
|
+
tk1 = Token.new("DBalmain", 1, 8, 5, "token")
|
8
|
+
assert_equal(tk1, Token.new("DBalmain", 1, 8))
|
9
|
+
assert_not_equal(tk1, Token.new("DBalmain", 0, 8))
|
10
|
+
assert_not_equal(tk1, Token.new("DBalmain", 1, 9))
|
11
|
+
assert_not_equal(tk1, Token.new("Dbalmain", 1, 8))
|
12
|
+
assert(tk1 < Token.new("CBalmain", 2, 7))
|
13
|
+
assert(tk1 > Token.new("EBalmain", 0, 9))
|
14
|
+
assert(tk1 < Token.new("CBalmain", 1, 9))
|
15
|
+
assert(tk1 > Token.new("EBalmain", 1, 7))
|
16
|
+
assert(tk1 < Token.new("EBalmain", 1, 8))
|
17
|
+
assert(tk1 > Token.new("CBalmain", 1, 8))
|
18
|
+
assert_equal("DBalmain", tk1.text)
|
19
|
+
tk1.text = "Hello"
|
20
|
+
assert_equal("Hello", tk1.text)
|
21
|
+
assert_equal(1, tk1.start_offset)
|
22
|
+
assert_equal(8, tk1.end_offset)
|
23
|
+
assert_equal(5, tk1.pos_inc)
|
24
|
+
end
|
25
|
+
end
|
@@ -25,7 +25,7 @@ class PerFieldAnalyzerWrapperTest < Test::Unit::TestCase
|
|
25
25
|
assert_equal(Token.new('My', 22, 24), t.next())
|
26
26
|
assert_equal(Token.new('e-mail', 25, 31), t.next())
|
27
27
|
assert_equal(Token.new("ADDRESS", 32, 39), t.next())
|
28
|
-
if ( token = t.next()): puts token.
|
28
|
+
if ( token = t.next()): puts token.text end
|
29
29
|
assert(! t.next())
|
30
30
|
input.reset()
|
31
31
|
t = aw.token_stream("body", input)
|
@@ -4,7 +4,7 @@ class StandardAnalyzerTest < Test::Unit::TestCase
|
|
4
4
|
include Ferret::Utils::StringHelper
|
5
5
|
include Ferret::Analysis
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_standard_analyzer()
|
8
8
|
input = StringReader.new('D.Ba_l-n@gma-l.com AB&Sons Toys\'r\'us you\'re she\'s, #$%^$%*& job@dot I.B.M. the an AnD THEIR')
|
9
9
|
sa = StandardAnalyzer.new()
|
10
10
|
t = sa.token_stream("field", input)
|
File without changes
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class FieldTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Document
|
6
|
+
include Ferret::Utils
|
7
|
+
|
8
|
+
def test_store()
|
9
|
+
assert_not_nil(Field::Store::COMPRESS)
|
10
|
+
assert_not_nil(Field::Store::YES)
|
11
|
+
assert_not_nil(Field::Store::NO)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_index()
|
15
|
+
assert_not_nil(Field::Index::TOKENIZED)
|
16
|
+
assert_not_nil(Field::Index::UNTOKENIZED)
|
17
|
+
assert_not_nil(Field::Index::NO)
|
18
|
+
assert_not_nil(Field::Index::NO_NORMS)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_term_vector()
|
22
|
+
assert_not_nil(Field::TermVector::YES)
|
23
|
+
assert_not_nil(Field::TermVector::NO)
|
24
|
+
assert_not_nil(Field::TermVector::WITH_POSITIONS)
|
25
|
+
assert_not_nil(Field::TermVector::WITH_OFFSETS)
|
26
|
+
assert_not_nil(Field::TermVector::WITH_POSITIONS_OFFSETS)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_standard_field()
|
30
|
+
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
31
|
+
assert_equal("name", f.name)
|
32
|
+
assert_equal("value", f.data)
|
33
|
+
assert_equal(true, f.stored?)
|
34
|
+
assert_equal(true, f.compressed?)
|
35
|
+
assert_equal(true, f.indexed?)
|
36
|
+
assert_equal(true, f.tokenized?)
|
37
|
+
assert_equal(false, f.store_term_vector?)
|
38
|
+
assert_equal(false, f.store_offsets?)
|
39
|
+
assert_equal(false, f.store_positions?)
|
40
|
+
assert_equal(false, f.omit_norms?)
|
41
|
+
assert_equal(false, f.binary?)
|
42
|
+
assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
|
43
|
+
f.data = "183"
|
44
|
+
f.boost = 0.001
|
45
|
+
assert_equal("183", f.data)
|
46
|
+
assert(0.001 =~ f.boost)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_set_store()
|
50
|
+
f = Field.new("name", "", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
51
|
+
f.store = Field::Store::NO
|
52
|
+
assert_equal(false, f.stored?)
|
53
|
+
assert_equal(false, f.compressed?)
|
54
|
+
assert_equal("indexed,tokenized,<name:>", f.to_s)
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_set_index()
|
58
|
+
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
59
|
+
f.index = Field::Index::NO
|
60
|
+
assert_equal(false, f.indexed?)
|
61
|
+
assert_equal(false, f.tokenized?)
|
62
|
+
assert_equal(false, f.omit_norms?)
|
63
|
+
assert_equal("stored/compressed,<name:value>", f.to_s)
|
64
|
+
f.index = Field::Index::NO_NORMS
|
65
|
+
assert_equal(true, f.indexed?)
|
66
|
+
assert_equal(false, f.tokenized?)
|
67
|
+
assert_equal(true, f.omit_norms?)
|
68
|
+
assert_equal("stored/compressed,indexed,omit_norms,<name:value>", f.to_s)
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_set_term_vector()
|
72
|
+
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
73
|
+
f.term_vector = Field::TermVector::WITH_POSITIONS_OFFSETS
|
74
|
+
assert_equal(true, f.store_term_vector?)
|
75
|
+
assert_equal(true, f.store_offsets?)
|
76
|
+
assert_equal(true, f.store_positions?)
|
77
|
+
assert_equal("stored/compressed,indexed,tokenized,store_term_vector,store_offsets,store_positions,<name:value>", f.to_s)
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_new_binary_field()
|
81
|
+
tmp = []
|
82
|
+
256.times {|i| tmp[i] = i}
|
83
|
+
bin = tmp.pack("c*")
|
84
|
+
f = Field.new_binary_field("name", bin, Field::Store::YES)
|
85
|
+
assert_equal("name", f.name)
|
86
|
+
assert_equal(bin, f.data)
|
87
|
+
assert_equal(true, f.stored?)
|
88
|
+
assert_equal(false, f.compressed?)
|
89
|
+
assert_equal(false, f.indexed?)
|
90
|
+
assert_equal(false, f.tokenized?)
|
91
|
+
assert_equal(false, f.store_term_vector?)
|
92
|
+
assert_equal(false, f.store_offsets?)
|
93
|
+
assert_equal(false, f.store_positions?)
|
94
|
+
assert_equal(false, f.omit_norms?)
|
95
|
+
assert_equal(true, f.binary?)
|
96
|
+
assert_equal("stored/uncompressed,binary,<name:=bin_data=>", f.to_s)
|
97
|
+
end
|
98
|
+
end
|