RubyGems - ferret - Versions diffs - 0.2.1 → 0.2.2 - Mend

ferret 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/lib/ferret.rb +1 -1
data/lib/ferret/analysis/analyzers.rb +5 -4
data/lib/ferret/document/field.rb +8 -21
data/lib/ferret/index/compound_file_io.rb +14 -22
data/lib/ferret/index/index.rb +20 -1
data/lib/ferret/index/index_reader.rb +1 -1
data/lib/ferret/index/index_writer.rb +6 -6
data/lib/ferret/index/segment_reader.rb +13 -12
data/lib/ferret/query_parser.rb +43 -13
data/lib/ferret/query_parser/query_parser.tab.rb +42 -13
data/lib/ferret/search/index_searcher.rb +16 -12
data/lib/ferret/search/term_scorer.rb +0 -2
data/test/unit/document/tc_field.rb +6 -1
data/test/unit/query_parser/tc_query_parser.rb +29 -4
data/test/unit/search/tc_index_searcher.rb +33 -1
metadata +2 -2

data/lib/ferret.rb CHANGED

@@ -22,7 +22,7 @@
 #++
 # :include: ../TUTORIAL
 module Ferret
-  VERSION = '0.2.1'
+  VERSION = '0.2.2'
 end
 require 'ferret/utils'

data/lib/ferret/analysis/analyzers.rb CHANGED

@@ -32,9 +32,8 @@ module Ferret::Analysis
     # An array containing some common English words that are not usually useful
     # for searching.
     ENGLISH_STOP_WORDS = [
-      "a", "an", "and", "are", "as", "at", "be", "but", "by",
-      "for", "if", "in", "into", "is", "it",
-      "no", "not", "of", "on", "or", "s", "such",
+      "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if",
+      "in", "into", "is", "it", "no", "not", "of", "on", "or", "s", "such",
       "t", "that", "the", "their", "then", "there", "these",
       "they", "this", "to", "was", "will", "with"
     ]
@@ -51,6 +50,8 @@ module Ferret::Analysis
   end
   # An Analyzer that filters LetterTokenizer with LowerCaseFilter.
+  # This analyzer subclasses the StopAnalyzer so you can add your own
+  # stoplist the same way. See StopAnalyzer.
   class StandardAnalyzer < StopAnalyzer
     def token_stream(field, string)
       return StopFilter.new(LowerCaseFilter.new(StandardTokenizer.new(string)), @stop_words)
@@ -84,7 +85,7 @@ module Ferret::Analysis
     def token_stream(field, string)
       analyzer = @analyzers[field]
       if (analyzer == nil)
-        analyzer = @default_analyzer;
+        analyzer = @default_analyzer
       end
       return analyzer.token_stream(field, string)

data/lib/ferret/document/field.rb CHANGED

@@ -277,28 +277,15 @@ module Ferret::Document
       str = ""
       if (@stored)
         str << "stored"
-        @str << @compressed ? "/compressed," : "/uncompressed,"
+        str << (@compressed ? "/compressed," : "/uncompressed,")
       end
-      if (@indexed) then str << "indexed," end
-      if (@tokenized) then str << "tokenized," end
-      if (@store_term_vector) then str << "store_term_vector," end
-      if (@store_offset)
-        str << "term_vector_offsets,"
-      end
-      if (@store_position)
-        str << "term_vector_position,"
-      end
-      if (@binary) then str << "binary," end
-      str << '<'
-      str << @name
-      str << ':'
-      if (@data != null)
-        str << @data.to_s
-      end
-      str << '>'
+      str << "indexed," if (@indexed)
+      str << "tokenized," if (@tokenized)
+      str << "store_term_vector," if (@store_term_vector)
+      str << "tv_offset," if (@store_offset)
+      str << "tv_position," if (@store_position)
+      str << "binary," if (@binary)
+      str << "<#{@name}:#{data}>"
     end
   end
 end

data/lib/ferret/index/compound_file_io.rb CHANGED

@@ -107,10 +107,10 @@ module Ferret::Index
     end
     # Not implemented
-    def delete(name) raise(UnsupportedOperationError) end
+    def remove(name) raise(NotImplementedError) end
     # Not implemented
-    def rename(from, to) raise(UnsupportedOperationError) end
+    def rename(from, to) raise(NotImplementedError) end
     # Returns the length of a file in the directory.
     def length(name)
@@ -120,10 +120,10 @@ module Ferret::Index
     end
     # Not implemented
-    def create_output(name) raise(UnsupportedOperationError) end
+    def create_output(name) raise(NotImplementedError) end
     # Not implemented
-    def make_lock(name) raise(UnsupportedOperationError) end
+    def make_lock(name) raise(NotImplementedError) end
     # Implementation of an IndexInput that reads from a portion of the
     # compound file.
@@ -206,8 +206,8 @@ module Ferret::Index
     # Add a source stream. _file_name_ is the string by which the
     # sub-stream will be known in the compound stream.
     #
-    # Throws:: StateError if this writer is closed
-    # Throws:: ArgumentError if a file with the same name
+    # Raises:: StateError if this writer is closed
+    # Raises:: ArgumentError if a file with the same name
     #          has been added already
     def add_file(file_name)
       if @merged
@@ -253,7 +253,7 @@ module Ferret::Index
         # Remember the positions of directory entries so that we can
         # adjust the offsets later
         @file_entries.each do |fe|
-          fe.directory_offset = os.pos()
+          fe.dir_offset = os.pos()
           os.write_long(0)  # for now
           os.write_string(fe.file_name)
         end
@@ -267,7 +267,7 @@ module Ferret::Index
         # Write the data offsets into the directory of the compound stream
         @file_entries.each do |fe|
-          os.seek(fe.directory_offset)
+          os.seek(fe.dir_offset)
           os.write_long(fe.data_offset)
         end
@@ -292,15 +292,7 @@ module Ferret::Index
     private
       # Internal class for holding a file
-      class FileEntry
-        attr_accessor :file_name, :directory_offset, :data_offset
-        def initialize(file_name)
-          @file_name = file_name
-        end
-      end
+      FileEntry = Struct.new(:file_name, :dir_offset, :data_offset)
       # Copy the contents of the file with specified extension into the
       # provided output stream. Use a buffer for moving data
@@ -324,9 +316,9 @@ module Ferret::Index
           # Verify that remainder is 0
           if (remainder != 0)
             raise(IOError,
-              "Non-zero remainder length after copying: " + remainder.to_s +
-                " (id: " + source.file_name + ", length: " + length.to_s +
-                ", buffer size: " + Ferret::Store::BUFFER_SIZE.to_s + ")")
+              "Non-zero remainder length after copying: #{remainder} " +
+              "(id: #{source.file_name}, length: #{length}, buffer size: " +
+              " #{Ferret::Store::BUFFER_SIZE})")
           end
           # Verify that the output length diff is equal to original file
@@ -334,8 +326,8 @@ module Ferret::Index
           diff = end_ptr - start_ptr
           if (diff != length)
             raise(IOError,
-              "Difference in the output file offsets " + diff.to_s +
-                " does not match the original file length " + length.to_s)
+              "Difference in the output file offsets #{diff}" +
+              " does not match the original file length #{length}")
           end
         ensure

data/lib/ferret/index/index.rb CHANGED

@@ -76,6 +76,23 @@ module Ferret::Index
     #                        be replaced by the new object. This will slow
     #                        down indexing so it should not be used if
     #                        performance is a concern.
+    # use_compound_file::    Uses a compound file to store the index. This
+    #                        prevents an error being raised for having too
+    #                        many files open at the same time. The default is
+    #                        true but performance is better if this is set to
+    #                        false.
+    # handle_parse_errors::  Set this to true if you want the QueryParser to
+    #                        degrade gracefully on errors. If the query parser
+    #                        fails to parse this query, it will try to parse
+    #                        it as a straight boolean query on the default
+    #                        field ignoring all query punctuation. If this
+    #                        fails, it will return an empty TermQuery. If you
+    #                        use this and you need to know why your query
+    #                        isn't working you can use the Query#to_s method
+    #                        on the query returned to see what is happening to
+    #                        your query.  This defualts to true. If you set it
+    #                        to false a QueryParseException is raised on a
+    #                        query parse error.
     #
     # Some examples;
     #
@@ -86,7 +103,8 @@ module Ferret::Index
     #
     #   index = Index::Index.new(:dir => directory,
     #                            :close_dir => false
-    #                            :default_slop => 2)
+    #                            :default_slop => 2,
+    #                            :handle_parse_errors => false)
     #
     def initialize(options = {})
       super()
@@ -117,6 +135,7 @@ module Ferret::Index
         @default_search_field = (@options[:default_search_field] || \
                                  @options[:default_field] || "*")
         @default_field = @options[:default_field] || ""
+        @options[:handle_parse_errors] = true if @options[:handle_parse_errors].nil?
         @open = true
         @qp = nil
       end

data/lib/ferret/index/index_reader.rb CHANGED

@@ -100,7 +100,7 @@ module Ferret::Index
       if directory.nil?
         directory = Ferret::Store::RAMDirectory.new
       elsif directory.is_a?(String)
-        directory = Ferret::Store::FSDirectory.new(directory, true)
+        directory = Ferret::Store::FSDirectory.new(directory, false)
       end
       directory.synchronize do # in- & inter-process sync
         commit_lock = directory.make_lock(IndexWriter::COMMIT_LOCK_NAME)

data/lib/ferret/index/index_writer.rb CHANGED

@@ -83,21 +83,21 @@ module Index
       @close_dir = options[:close_dir] || false
       @use_compound_file = (options[:use_compound_file] != false) # ie default true
       @analyzer = options[:analyzer] || Ferret::Analysis::StandardAnalyzer.new
-      @merge_factor = DEFAULT_MERGE_FACTOR
-      @min_merge_docs = DEFAULT_MIN_MERGE_DOCS
-      @max_merge_docs = DEFAULT_MAX_MERGE_DOCS
-      @max_field_length = DEFAULT_MAX_FIELD_LENGTH
-      @term_index_interval = DEFAULT_TERM_INDEX_INTERVAL
+      @merge_factor = options[:merge_factor] || DEFAULT_MERGE_FACTOR
+      @min_merge_docs = options[:min_merge_docs] || DEFAULT_MIN_MERGE_DOCS
+      @max_merge_docs = options[:max_merge_docs] || DEFAULT_MAX_MERGE_DOCS
+      @max_field_length = options[:max_field_length] || DEFAULT_MAX_FIELD_LENGTH
+      @term_index_interval = options[:term_index_interval] || DEFAULT_TERM_INDEX_INTERVAL
       @similarity = Search::Similarity.default
       @segment_infos = SegmentInfos.new()
       @ram_directory = Ferret::Store::RAMDirectory.new()
       # Make sure that the lock is released when this object is destroyed
-      define_finalizer(self, proc { |id| @write_lock.release() if @write_lock})
       @write_lock = @directory.make_lock(WRITE_LOCK_NAME)
       @write_lock.obtain(WRITE_LOCK_TIMEOUT) # obtain write lock
+      define_finalizer(@write_lock, proc { |id| @write_lock.release() if @write_lock})
       @directory.synchronize() do # in- & inter-process sync
         @directory.make_lock(COMMIT_LOCK_NAME).while_locked(COMMIT_LOCK_TIMEOUT) do

data/lib/ferret/index/segment_reader.rb CHANGED

@@ -16,16 +16,17 @@ module Ferret::Index
       @segment = info.name
       @cfs_reader = nil
-      cfs = directory
-      if directory.exists?(@segment + '.cfs') then
+      dir = directory
+      #if directory.exists?(@segment + '.cfs') then
+      if SegmentReader.uses_compound_file?(info)
         @cfs_reader = CompoundFileReader.new(directory, @segment + '.cfs')
-        cfs = @cfs_reader
+        dir = @cfs_reader
       end
-      @field_infos = FieldInfos.new(cfs, @segment + '.fnm')
-      @fields_reader = FieldsReader.new(cfs, @segment, @field_infos)
+      @field_infos = FieldInfos.new(dir, @segment + '.fnm')
+      @fields_reader = FieldsReader.new(dir, @segment, @field_infos)
-      @term_infos = TermInfosReader.new(cfs, @segment, @field_infos)
+      @term_infos = TermInfosReader.new(dir, @segment, @field_infos)
       @deleted_docs = nil
       @deleted_docs_dirty = false
       if SegmentReader.has_deletions?(info) then
@@ -33,16 +34,16 @@ module Ferret::Index
           Ferret::Utils::BitVector.read(directory, @segment + '.del')
       end
-      @freq_stream = cfs.open_input(@segment + '.frq')
-      @prox_stream = cfs.open_input(@segment + '.prx')
+      @freq_stream = dir.open_input(@segment + '.frq')
+      @prox_stream = dir.open_input(@segment + '.prx')
       @norms = {}
       @norms.extend(MonitorMixin)
       @norms_dirty = false
-      open_norms(cfs)
+      open_norms(dir)
       @tv_reader_orig = nil
       if @field_infos.has_vectors? then
-        @tv_reader_orig = TermVectorsReader.new(cfs, @segment, @field_infos)
+        @tv_reader_orig = TermVectorsReader.new(dir, @segment, @field_infos)
       end
     end
@@ -128,9 +129,9 @@ module Ferret::Index
       @field_infos.each_with_index do |fi, i|
         if (fi.indexed?)
           if @cfs_reader.nil?
-            name = @segment + ".f" + i.to_s
+            name = "#{@segment}.f#{i}"
           else
-            name = @segment + ".s" + i.to_s
+            name = "#{@segment}.s#{i}"
           end
           if (@directory.exists?(name))
             file_names << name

data/lib/ferret/query_parser.rb CHANGED

@@ -242,17 +242,29 @@ module Ferret
     #
     # === Options
     #
-    # analyzer::      The analyzer is used to break phrases up into terms and
-    #                 to turn terms in tokens recognized in the index.
-    #                 Analysis::Analyzer is the default
-    # occur_default:: Set to either BooleanClause::Occur::SHOULD (default)
-    #                 or BooleanClause::Occur::MUST to specify the default
-    #                 Occur operator.
-    # wild_lower::    Set to false if you don't want the terms in fuzzy and
-    #                 wild queries to be set to lower case. You should do this
-    #                 if your analyzer doesn't downcase. The default is true.
-    # default_slop::  Set the default slop for phrase queries. This defaults
-    #                 to 0.
+    # analyzer::            The analyzer is used to break phrases up into
+    #                       terms and to turn terms in tokens recognized in
+    #                       the index.  Analysis::Analyzer is the default
+    # occur_default::       Set to either BooleanClause::Occur::SHOULD
+    #                       (default) or BooleanClause::Occur::MUST to specify
+    #                       the default Occur operator.
+    # wild_lower::          Set to false if you don't want the terms in fuzzy
+    #                       and wild queries to be set to lower case. You
+    #                       should do this if your analyzer doesn't downcase.
+    #                       The default is true.
+    # default_slop::        Set the default slop for phrase queries. This
+    #                       defaults to 0.
+    # handle_parse_errors:: Set this to true if you want the QueryParser to
+    #                       degrade gracefully on errors. If the query parser
+    #                       fails to parse this query, it will try to parse it
+    #                       as a straight boolean query on the default field
+    #                       ignoring all query punctuation. If this fails, it
+    #                       will return an empty TermQuery. If you use this
+    #                       and you need to know why your query isn't working
+    #                       you can use the Query#to_s method on the query
+    #                       returned to see what is happening to your query.
+    #                       This defualts to false, in which case a
+    #                       QueryParseException is thrown.
     def initialize(default_field = "", options = {})
     end
@@ -263,10 +275,10 @@ module Ferret
     # Set to false if you don't want the terms in fuzzy and wild queries to be
     # set to lower case. You should do this if your analyzer doesn't downcase.
-    def wild_lower()
+    def wild_lower=()
     end
-    # Returns the value of wild_lower. See #wild_lower.
+    # Returns the value of wild_lower. See #wild_lower=.
     def wild_lower?()
     end
@@ -276,7 +288,25 @@ module Ferret
     # if you'd like to do your own query string cleaning.
     def clean_string(str)
     end
+    # The exception thrown when there is an error parsing the query string.
+    # This also holds the Racc::ParseError that was thrown in case you want to
+    # investigate why a query won't parse.
+    class QueryParseException < Exception
+      attr_reader :parse_error
+      # Create a new QueryParseException
+      #
+      # error::       An error string describing the query that failed
+      # parse_error:: The actual parse error that was thrown by Racc. It is a
+      #               Racc::ParseError object.
+      def initialize(error, parse_error)
+        super(error)
+        @parse_error = parse_error
+      end
+    end
   end
 end
 require 'ferret/query_parser/query_parser.tab.rb'

data/lib/ferret/query_parser/query_parser.tab.rb CHANGED

@@ -11,15 +11,8 @@ module Ferret
   class QueryParser < Racc::Parser
-module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d44076', 'lib/ferret/query_parser/query_parser.y', 126
-  attr_accessor :default_field, :fields
-  # true if you want to downcase wild card queries. This is set to try by
-  # default.
-  attr_writer :wild_lower
-  def wild_lower?() @wild_lower end
+module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id81dbd43492', 'lib/ferret/query_parser/query_parser.y', 126
+  attr_accessor :default_field, :fields, :handle_parse_errors
   def initialize(default_field = "*", options = {})
     @yydebug = true
@@ -32,6 +25,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
     @occur_default = options[:occur_default] || BooleanClause::Occur::SHOULD
     @default_slop = options[:default_slop] || 0
     @fields = options[:fields]||[]
+    @handle_parse_errors = options[:handle_parse_errors] || false
   end
   RESERVED = {
@@ -50,6 +44,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
   EWCHR = %q,:()\[\]{}!+"~^\-\|<>\=,
   def parse(str)
+    orig_str = str
     str = clean_string(str)
     str.strip!
     @q = []
@@ -82,10 +77,24 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
       end
       str = $'
     end
-    @q.push [ false, '$' ]
+    if @q.empty?
+      return TermQuery.new(Term.new(@default_field, ""))
+    end
+    @q.push([ false, '$' ])
     #p @q
-    do_parse
+    begin
+      query = do_parse
+    rescue Racc::ParseError => e
+      if @handle_parse_errors
+        @field = @default_field
+        query = _get_bad_query(orig_str)
+      else
+        raise QueryParseException.new("Could not parse #{str}", e)
+      end
+    end
+    return query
   end
   def next_token
@@ -160,6 +169,25 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
     return new_str.pack("c*")
   end
+  def get_bad_query(field, str)
+    tokens = []
+    stream = @analyzer.token_stream(field, str)
+    while token = stream.next
+      tokens << token
+    end
+    if tokens.length == 0
+      return TermQuery.new(Term.new(field, ""))
+    elsif tokens.length == 1
+      return TermQuery.new(Term.new(field, tokens[0].term_text))
+    else
+      bq = BooleanQuery.new()
+      tokens.each do |token|
+        bq << BooleanClause.new(TermQuery.new(Term.new(field, token.term_text)))
+      end
+      return bq
+    end
+  end
   def get_range_query(field, start_word, end_word, inc_upper, inc_lower)
      RangeQuery.new(field, start_word, end_word, inc_upper, inc_lower)
   end
@@ -374,7 +402,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
     return qp.parse(query)
   end
-..end lib/ferret/query_parser/query_parser.y modeval..id9e08d44076
+..end lib/ferret/query_parser/query_parser.y modeval..id81dbd43492
 ##### racc 1.4.4 generates ###
@@ -893,7 +921,8 @@ if __FILE__ == $0
   parser = Ferret::QueryParser.new("default",
                                    :fields => ["f1", "f2", "f3"],
-                                   :analyzer => Ferret::Analysis::StandardAnalyzer.new)
+                                   :analyzer => Ferret::Analysis::StandardAnalyzer.new,
+                                   :handle_parse_errors => true)
   $stdin.each do |line|
     query = parser.parse(line)

data/lib/ferret/search/index_searcher.rb CHANGED

@@ -90,12 +90,17 @@ module Ferret::Search
       filter = options[:filter]
       first_doc = options[:first_doc]||0
       num_docs = options[:num_docs]||10
+      max_size = first_doc + num_docs
       sort = options[:sort]
-      if (num_docs <= 0)  # nil might be returned from hq.top() below.
+      if (num_docs <= 0)
         raise ArgumentError, "num_docs must be > 0 to run a search"
       end
+      if (first_doc < 0)
+        raise ArgumentError, "first_doc must be >= 0 to run a search"
+      end
       scorer = query.weight(self).scorer(@reader)
       if (scorer == nil)
         return TopDocs.new(0, [])
@@ -104,33 +109,32 @@ module Ferret::Search
       bits = (filter.nil? ? nil : filter.bits(@reader))
       if (sort)
         fields = sort.is_a?(Array) ? sort : sort.fields
-        hq = FieldSortedHitQueue.new(@reader, fields, num_docs + first_doc)
+        hq = FieldSortedHitQueue.new(@reader, fields, max_size)
       else
-        hq = HitQueue.new(num_docs + first_doc)
+        hq = HitQueue.new(max_size)
       end
       total_hits = 0
       min_score = 0.0
       scorer.each_hit() do |doc, score|
         if score > 0.0 and (bits.nil? or bits.get(doc)) # skip docs not in bits
           total_hits += 1
-          if hq.size < num_docs or score >= min_score
+          if hq.size < max_size or score >= min_score
             hq.insert(ScoreDoc.new(doc, score))
             min_score = hq.top.score # maintain min_score
           end
         end
       end
-      score_docs = Array.new(hq.size)
+      score_docs = []
       if (hq.size > first_doc)
-        score_docs = Array.new(hq.size - first_doc)
-        first_doc.times { hq.pop }
-        (hq.size - 1).downto(0) do |i|
-          score_docs[i] = hq.pop
+        if (hq.size - first_doc) < num_docs
+          num_docs = hq.size - first_doc
+        end
+        num_docs.times do
+          score_docs.unshift(hq.pop)
         end
-      else
-        score_docs = []
-        hq.clear
       end
+      hq.clear
       return TopDocs.new(total_hits, score_docs)
     end

data/lib/ferret/search/term_scorer.rb CHANGED

@@ -25,8 +25,6 @@ module Ferret::Search
       @weight = weight
       @term_docs = td
       @norms = norms
-      #XXX
-      @norms_size = @norms.size
       @weight_value = weight.value
       SCORE_CACHE_SIZE.times do |i|

data/test/unit/document/tc_field.rb CHANGED

@@ -37,13 +37,15 @@ class FieldTest < Test::Unit::TestCase
     assert_equal(false, f.store_offsets?)
     assert_equal(false, f.store_positions?)
     assert_equal(false, f.binary?)
+    assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
   end
   def test_set_store()
-    f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
+    f = Field.new("name", nil, Field::Store::COMPRESS, Field::Index::TOKENIZED)
     f.stored = Field::Store::NO
     assert_equal(false, f.stored?)
     assert_equal(false, f.compressed?)
+    assert_equal("indexed,tokenized,<name:>", f.to_s)
   end
   def test_set_index()
@@ -51,6 +53,7 @@ class FieldTest < Test::Unit::TestCase
     f.index = Field::Index::NO
     assert_equal(false, f.indexed?)
     assert_equal(false, f.tokenized?)
+    assert_equal("stored/compressed,<name:value>", f.to_s)
   end
   def test_set_term_vector()
@@ -59,6 +62,7 @@ class FieldTest < Test::Unit::TestCase
     assert_equal(true, f.store_term_vector?)
     assert_equal(true, f.store_offsets?)
     assert_equal(true, f.store_positions?)
+    assert_equal("stored/compressed,indexed,tokenized,store_term_vector,tv_offset,tv_position,<name:value>", f.to_s)
   end
   def test_new_binary_field()
@@ -76,5 +80,6 @@ class FieldTest < Test::Unit::TestCase
     assert_equal(false, f.store_offsets?)
     assert_equal(false, f.store_positions?)
     assert_equal(true, f.binary?)
+    assert_equal("stored/uncompressed,binary,<name:#{bin}>", f.to_s)
   end
 end

data/test/unit/query_parser/tc_query_parser.rb CHANGED

@@ -5,6 +5,7 @@ class QueryParserTest < Test::Unit::TestCase
   def test_strings()
     parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
     pairs = [
+      ['', ''],
       ['word', 'word'],
       ['field:word', 'field:word'],
       ['"word1 word2 word3"', '"word word word"'],
@@ -92,8 +93,8 @@ class QueryParserTest < Test::Unit::TestCase
       ['"onewordphrase"', 'onewordphrase']
     ]
-    pairs.each do |pair|
-      assert_equal(pair[1], parser.parse(pair[0]).to_s(parser.default_field))
+    pairs.each do |query_str, expected|
+      assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
     end
   end
@@ -105,8 +106,32 @@ class QueryParserTest < Test::Unit::TestCase
       ['key:(1234)', 'key:1234']
     ]
-    pairs.each do |pair|
-      assert_equal(pair[1], parser.parse(pair[0]).to_s(parser.default_field))
+    pairs.each do |query_str, expected|
+      assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
+    end
+  end
+  def do_test_query_parse_exception_raised(str)
+    parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
+    assert_raise(Ferret::QueryParser::QueryParseException) do
+      parser.parse(str)
+    end
+  end
+  def test_bad_queries
+    parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2"],
+                                            :handle_parse_errors => true)
+    pairs = [
+      ['(*word', 'word'],
+      ['()*&)(*^&*(', ''],
+      ['()*&one)(*two(*&"', 'one two']
+    ]
+    pairs.each do |query_str, expected|
+      do_test_query_parse_exception_raised(query_str)
+      assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
     end
   end
 end

data/test/unit/search/tc_index_searcher.rb CHANGED

@@ -46,6 +46,15 @@ class IndexSearcherTest < Test::Unit::TestCase
     end
   end
+  def check_docs(query, options, expected=[])
+    top_docs = @is.search(query, options)
+    docs = top_docs.score_docs
+    assert_equal(expected.length, docs.length)
+    docs.length.times do |i|
+      assert_equal(expected[i], docs[i].doc)
+    end
+  end
   def test_get_doc()
     assert_equal(18, @is.max_doc)
     assert_equal("20050930", @is.doc(0).values(:date))
@@ -57,15 +66,38 @@ class IndexSearcherTest < Test::Unit::TestCase
     tq.boost = 100
     check_hits(tq, [1,4,8])
+    tq = TermQuery.new(Term.new("field", ""));
+    check_hits(tq, [])
     tq = TermQuery.new(Term.new("field", "word1"));
     top_docs = @is.search(tq)
-    #puts top_docs.score_docs
     assert_equal(@documents.size, top_docs.total_hits)
     assert_equal(10, top_docs.score_docs.size)
     top_docs = @is.search(tq, {:num_docs => 20})
     assert_equal(@documents.size, top_docs.score_docs.size)
   end
+  def test_first_doc
+    tq = TermQuery.new(Term.new("field", "word1"));
+    tq.boost = 100
+    top_docs = @is.search(tq, {:num_docs => 100})
+    expected = []
+    top_docs.score_docs.each do |score_doc|
+      expected << score_doc.doc
+    end
+    assert_raise(ArgumentError) { @is.search(tq, {:first_doc => -1}) }
+    assert_raise(ArgumentError) { @is.search(tq, {:num_docs => 0}) }
+    assert_raise(ArgumentError) { @is.search(tq, {:num_docs => -1}) }
+    check_docs(tq, {:num_docs => 8, :first_doc => 0}, expected[0,8])
+    check_docs(tq, {:num_docs => 3, :first_doc => 1}, expected[1,3])
+    check_docs(tq, {:num_docs => 6, :first_doc => 2}, expected[2,6])
+    check_docs(tq, {:num_docs => 2, :first_doc => expected.length}, [])
+    check_docs(tq, {:num_docs => 2, :first_doc => expected.length + 100}, [])
+  end
   def test_boolean_query
     bq = BooleanQuery.new()
     tq1 = TermQuery.new(Term.new("field", "word1"))

metadata CHANGED

@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
 specification_version: 1
 name: ferret
 version: !ruby/object:Gem::Version
-  version: 0.2.1
-date: 2005-11-14 00:00:00 +09:00
+  version: 0.2.2
+date: 2005-11-22 00:00:00 +09:00
 summary: Ruby indexing library.
 require_paths:
   - lib