RubyGems - ferret - Versions diffs - 0.2.1 → 0.2.2 - Mend

ferret 0.2.1 → 0.2.2

Files changed (16) hide show

data/lib/ferret.rb +1 -1
data/lib/ferret/analysis/analyzers.rb +5 -4
data/lib/ferret/document/field.rb +8 -21
data/lib/ferret/index/compound_file_io.rb +14 -22
data/lib/ferret/index/index.rb +20 -1
data/lib/ferret/index/index_reader.rb +1 -1
data/lib/ferret/index/index_writer.rb +6 -6
data/lib/ferret/index/segment_reader.rb +13 -12
data/lib/ferret/query_parser.rb +43 -13
data/lib/ferret/query_parser/query_parser.tab.rb +42 -13
data/lib/ferret/search/index_searcher.rb +16 -12
data/lib/ferret/search/term_scorer.rb +0 -2
data/test/unit/document/tc_field.rb +6 -1
data/test/unit/query_parser/tc_query_parser.rb +29 -4
data/test/unit/search/tc_index_searcher.rb +33 -1
metadata +2 -2

data/lib/ferret.rb CHANGED

@@ -22,7 +22,7 @@
 #++
 # :include: ../TUTORIAL
 module Ferret
-  VERSION = '0.2.1'
+  VERSION = '0.2.2'
 end
 require 'ferret/utils'

data/lib/ferret/analysis/analyzers.rb CHANGED

@@ -32,9 +32,8 @@ module Ferret::Analysis
     # An array containing some common English words that are not usually useful
     # for searching.
     ENGLISH_STOP_WORDS = [
-      "a", "an", "and", "are", "as", "at", "be", "but", "by",
-      "for", "if", "in", "into", "is", "it",
-      "no", "not", "of", "on", "or", "s", "such",
+      "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if",
+      "in", "into", "is", "it", "no", "not", "of", "on", "or", "s", "such",
       "t", "that", "the", "their", "then", "there", "these",
       "they", "this", "to", "was", "will", "with"
     ]
@@ -51,6 +50,8 @@ module Ferret::Analysis
   end
   # An Analyzer that filters LetterTokenizer with LowerCaseFilter.
+  # This analyzer subclasses the StopAnalyzer so you can add your own
+  # stoplist the same way. See StopAnalyzer.
   class StandardAnalyzer < StopAnalyzer
     def token_stream(field, string)
       return StopFilter.new(LowerCaseFilter.new(StandardTokenizer.new(string)), @stop_words)
@@ -84,7 +85,7 @@ module Ferret::Analysis
     def token_stream(field, string)
       analyzer = @analyzers[field]
       if (analyzer == nil)
-        analyzer = @default_analyzer;
+        analyzer = @default_analyzer
       end
       return analyzer.token_stream(field, string)

data/lib/ferret/document/field.rb CHANGED

@@ -277,28 +277,15 @@ module Ferret::Document
       str = ""
       if (@stored)
         str << "stored"
-        @str << @compressed ? "/compressed," : "/uncompressed,"
+        str << (@compressed ? "/compressed," : "/uncompressed,")
       end
-      if (@indexed) then str << "indexed," end
-      if (@tokenized) then str << "tokenized," end
-      if (@store_term_vector) then str << "store_term_vector," end
-      if (@store_offset)
-        str << "term_vector_offsets,"
-      end
-      if (@store_position)
-        str << "term_vector_position,"
-      end
-      if (@binary) then str << "binary," end
-      str << '<'
-      str << @name
-      str << ':'
-      if (@data != null)
-        str << @data.to_s
-      end
-      str << '>'
+      str << "indexed," if (@indexed)
+      str << "tokenized," if (@tokenized)
+      str << "store_term_vector," if (@store_term_vector)
+      str << "tv_offset," if (@store_offset)
+      str << "tv_position," if (@store_position)
+      str << "binary," if (@binary)
+      str << "<#{@name}:#{data}>"
     end
   end
 end

data/lib/ferret/index/compound_file_io.rb CHANGED

@@ -107,10 +107,10 @@ module Ferret::Index
     end
     # Not implemented
-    def delete(name) raise(UnsupportedOperationError) end
+    def remove(name) raise(NotImplementedError) end
     # Not implemented
-    def rename(from, to) raise(UnsupportedOperationError) end
+    def rename(from, to) raise(NotImplementedError) end
     # Returns the length of a file in the directory.
     def length(name)
@@ -120,10 +120,10 @@ module Ferret::Index
     end
     # Not implemented
-    def create_output(name) raise(UnsupportedOperationError) end
+    def create_output(name) raise(NotImplementedError) end
     # Not implemented
-    def make_lock(name) raise(UnsupportedOperationError) end
+    def make_lock(name) raise(NotImplementedError) end
     # Implementation of an IndexInput that reads from a portion of the
     # compound file.
@@ -206,8 +206,8 @@ module Ferret::Index
     # Add a source stream. _file_name_ is the string by which the
     # sub-stream will be known in the compound stream.
     #
-    # Throws:: StateError if this writer is closed
-    # Throws:: ArgumentError if a file with the same name
+    # Raises:: StateError if this writer is closed
+    # Raises:: ArgumentError if a file with the same name
     #          has been added already
     def add_file(file_name)
       if @merged
@@ -253,7 +253,7 @@ module Ferret::Index
         # Remember the positions of directory entries so that we can
         # adjust the offsets later
         @file_entries.each do |fe|
-          fe.directory_offset = os.pos()
+          fe.dir_offset = os.pos()
           os.write_long(0)  # for now
           os.write_string(fe.file_name)
         end
@@ -267,7 +267,7 @@ module Ferret::Index
         # Write the data offsets into the directory of the compound stream
         @file_entries.each do |fe|
-          os.seek(fe.directory_offset)
+          os.seek(fe.dir_offset)
           os.write_long(fe.data_offset)
         end
@@ -292,15 +292,7 @@ module Ferret::Index
     private
       # Internal class for holding a file
-      class FileEntry
-        attr_accessor :file_name, :directory_offset, :data_offset
-        def initialize(file_name)
-          @file_name = file_name
-        end
-      end
+      FileEntry = Struct.new(:file_name, :dir_offset, :data_offset)
       # Copy the contents of the file with specified extension into the
       # provided output stream. Use a buffer for moving data
@@ -324,9 +316,9 @@ module Ferret::Index
           # Verify that remainder is 0
           if (remainder != 0)
             raise(IOError,
-              "Non-zero remainder length after copying: " + remainder.to_s +
-                " (id: " + source.file_name + ", length: " + length.to_s +
-                ", buffer size: " + Ferret::Store::BUFFER_SIZE.to_s + ")")
+              "Non-zero remainder length after copying: #{remainder} " +
+              "(id: #{source.file_name}, length: #{length}, buffer size: " +
+              " #{Ferret::Store::BUFFER_SIZE})")
           end
           # Verify that the output length diff is equal to original file
@@ -334,8 +326,8 @@ module Ferret::Index
           diff = end_ptr - start_ptr
           if (diff != length)
             raise(IOError,
-              "Difference in the output file offsets " + diff.to_s +
-                " does not match the original file length " + length.to_s)
+              "Difference in the output file offsets #{diff}" +
+              " does not match the original file length #{length}")
           end
         ensure

data/lib/ferret/index/index.rb CHANGED

@@ -76,6 +76,23 @@ module Ferret::Index
     #                        be replaced by the new object. This will slow
     #                        down indexing so it should not be used if
     #                        performance is a concern.
+    # use_compound_file::    Uses a compound file to store the index. This
+    #                        prevents an error being raised for having too
+    #                        many files open at the same time. The default is
+    #                        true but performance is better if this is set to
+    #                        false.
+    # handle_parse_errors::  Set this to true if you want the QueryParser to
+    #                        degrade gracefully on errors. If the query parser
+    #                        fails to parse this query, it will try to parse
+    #                        it as a straight boolean query on the default
+    #                        field ignoring all query punctuation. If this
+    #                        fails, it will return an empty TermQuery. If you
+    #                        use this and you need to know why your query
+    #                        isn't working you can use the Query#to_s method
+    #                        on the query returned to see what is happening to
+    #                        your query.  This defualts to true. If you set it
+    #                        to false a QueryParseException is raised on a
+    #                        query parse error.
     #
     # Some examples;
     #
@@ -86,7 +103,8 @@ module Ferret::Index
     #
     #   index = Index::Index.new(:dir => directory,
     #                            :close_dir => false
-    #                            :default_slop => 2)
+    #                            :default_slop => 2,
+    #                            :handle_parse_errors => false)
     #
     def initialize(options = {})
       super()
@@ -117,6 +135,7 @@ module Ferret::Index
         @default_search_field = (@options[:default_search_field] || \
                                  @options[:default_field] || "*")
         @default_field = @options[:default_field] || ""
+        @options[:handle_parse_errors] = true if @options[:handle_parse_errors].nil?
         @open = true
         @qp = nil
       end

data/lib/ferret/index/index_reader.rb CHANGED

@@ -100,7 +100,7 @@ module Ferret::Index
       if directory.nil?
         directory = Ferret::Store::RAMDirectory.new
       elsif directory.is_a?(String)
-        directory = Ferret::Store::FSDirectory.new(directory, true)
+        directory = Ferret::Store::FSDirectory.new(directory, false)
       end
       directory.synchronize do # in- & inter-process sync
         commit_lock = directory.make_lock(IndexWriter::COMMIT_LOCK_NAME)

data/lib/ferret/index/index_writer.rb CHANGED

@@ -83,21 +83,21 @@ module Index
       @close_dir = options[:close_dir] || false
       @use_compound_file = (options[:use_compound_file] != false) # ie default true
       @analyzer = options[:analyzer] || Ferret::Analysis::StandardAnalyzer.new
-      @merge_factor = DEFAULT_MERGE_FACTOR
-      @min_merge_docs = DEFAULT_MIN_MERGE_DOCS
-      @max_merge_docs = DEFAULT_MAX_MERGE_DOCS
-      @max_field_length = DEFAULT_MAX_FIELD_LENGTH
-      @term_index_interval = DEFAULT_TERM_INDEX_INTERVAL
+      @merge_factor = options[:merge_factor] || DEFAULT_MERGE_FACTOR
+      @min_merge_docs = options[:min_merge_docs] || DEFAULT_MIN_MERGE_DOCS
+      @max_merge_docs = options[:max_merge_docs] || DEFAULT_MAX_MERGE_DOCS
+      @max_field_length = options[:max_field_length] || DEFAULT_MAX_FIELD_LENGTH
+      @term_index_interval = options[:term_index_interval] || DEFAULT_TERM_INDEX_INTERVAL
       @similarity = Search::Similarity.default
       @segment_infos = SegmentInfos.new()
       @ram_directory = Ferret::Store::RAMDirectory.new()
       # Make sure that the lock is released when this object is destroyed
-      define_finalizer(self, proc { |id| @write_lock.release() if @write_lock})
       @write_lock = @directory.make_lock(WRITE_LOCK_NAME)
       @write_lock.obtain(WRITE_LOCK_TIMEOUT) # obtain write lock
+      define_finalizer(@write_lock, proc { |id| @write_lock.release() if @write_lock})
       @directory.synchronize() do # in- & inter-process sync
         @directory.make_lock(COMMIT_LOCK_NAME).while_locked(COMMIT_LOCK_TIMEOUT) do

data/lib/ferret/index/segment_reader.rb CHANGED

@@ -16,16 +16,17 @@ module Ferret::Index
       @segment = info.name
       @cfs_reader = nil
-      cfs = directory
-      if directory.exists?(@segment + '.cfs') then
+      dir = directory
+      #if directory.exists?(@segment + '.cfs') then
+      if SegmentReader.uses_compound_file?(info)
         @cfs_reader = CompoundFileReader.new(directory, @segment + '.cfs')
-        cfs = @cfs_reader
+        dir = @cfs_reader
       end
-      @field_infos = FieldInfos.new(cfs, @segment + '.fnm')
-      @fields_reader = FieldsReader.new(cfs, @segment, @field_infos)
+      @field_infos = FieldInfos.new(dir, @segment + '.fnm')
+      @fields_reader = FieldsReader.new(dir, @segment, @field_infos)
-      @term_infos = TermInfosReader.new(cfs, @segment, @field_infos)
+      @term_infos = TermInfosReader.new(dir, @segment, @field_infos)
       @deleted_docs = nil
       @deleted_docs_dirty = false
       if SegmentReader.has_deletions?(info) then
@@ -33,16 +34,16 @@ module Ferret::Index
           Ferret::Utils::BitVector.read(directory, @segment + '.del')
       end
-      @freq_stream = cfs.open_input(@segment + '.frq')
-      @prox_stream = cfs.open_input(@segment + '.prx')
+      @freq_stream = dir.open_input(@segment + '.frq')
+      @prox_stream = dir.open_input(@segment + '.prx')
       @norms = {}
       @norms.extend(MonitorMixin)
       @norms_dirty = false
-      open_norms(cfs)
+      open_norms(dir)
       @tv_reader_orig = nil
       if @field_infos.has_vectors? then
-        @tv_reader_orig = TermVectorsReader.new(cfs, @segment, @field_infos)
+        @tv_reader_orig = TermVectorsReader.new(dir, @segment, @field_infos)
       end
     end
@@ -128,9 +129,9 @@ module Ferret::Index
       @field_infos.each_with_index do |fi, i|
         if (fi.indexed?)
           if @cfs_reader.nil?
-            name = @segment + ".f" + i.to_s
+            name = "#{@segment}.f#{i}"
           else
-            name = @segment + ".s" + i.to_s
+            name = "#{@segment}.s#{i}"
           end
           if (@directory.exists?(name))
             file_names << name

data/lib/ferret/query_parser.rb CHANGED

@@ -242,17 +242,29 @@ module Ferret
     #
     # === Options
     #
-    # analyzer::      The analyzer is used to break phrases up into terms and
-    #                 to turn terms in tokens recognized in the index.
-    #                 Analysis::Analyzer is the default
-    # occur_default:: Set to either BooleanClause::Occur::SHOULD (default)
-    #                 or BooleanClause::Occur::MUST to specify the default
-    #                 Occur operator.
-    # wild_lower::    Set to false if you don't want the terms in fuzzy and
-    #                 wild queries to be set to lower case. You should do this
-    #                 if your analyzer doesn't downcase. The default is true.
-    # default_slop::  Set the default slop for phrase queries. This defaults
-    #                 to 0.
+    # analyzer::            The analyzer is used to break phrases up into
+    #                       terms and to turn terms in tokens recognized in
+    #                       the index.  Analysis::Analyzer is the default
+    # occur_default::       Set to either BooleanClause::Occur::SHOULD
+    #                       (default) or BooleanClause::Occur::MUST to specify
+    #                       the default Occur operator.
+    # wild_lower::          Set to false if you don't want the terms in fuzzy
+    #                       and wild queries to be set to lower case. You
+    #                       should do this if your analyzer doesn't downcase.
+    #                       The default is true.
+    # default_slop::        Set the default slop for phrase queries. This
+    #                       defaults to 0.
+    # handle_parse_errors:: Set this to true if you want the QueryParser to
+    #                       degrade gracefully on errors. If the query parser
+    #                       fails to parse this query, it will try to parse it
+    #                       as a straight boolean query on the default field
+    #                       ignoring all query punctuation. If this fails, it
+    #                       will return an empty TermQuery. If you use this
+    #                       and you need to know why your query isn't working
+    #                       you can use the Query#to_s method on the query
+    #                       returned to see what is happening to your query.
+    #                       This defualts to false, in which case a
+    #                       QueryParseException is thrown.
     def initialize(default_field = "", options = {})
     end
@@ -263,10 +275,10 @@ module Ferret
     # Set to false if you don't want the terms in fuzzy and wild queries to be
     # set to lower case. You should do this if your analyzer doesn't downcase.
-    def wild_lower()
+    def wild_lower=()
     end
-    # Returns the value of wild_lower. See #wild_lower.
+    # Returns the value of wild_lower. See #wild_lower=.
     def wild_lower?()
     end
@@ -276,7 +288,25 @@ module Ferret
     # if you'd like to do your own query string cleaning.
     def clean_string(str)
     end
+    # The exception thrown when there is an error parsing the query string.
+    # This also holds the Racc::ParseError that was thrown in case you want to
+    # investigate why a query won't parse.
+    class QueryParseException < Exception
+      attr_reader :parse_error
+      # Create a new QueryParseException
+      #
+      # error::       An error string describing the query that failed
+      # parse_error:: The actual parse error that was thrown by Racc. It is a
+      #               Racc::ParseError object.
+      def initialize(error, parse_error)
+        super(error)
+        @parse_error = parse_error
+      end
+    end
   end
 end
 require 'ferret/query_parser/query_parser.tab.rb'

data/lib/ferret/query_parser/query_parser.tab.rb CHANGED

@@ -11,15 +11,8 @@ module Ferret
   class QueryParser < Racc::Parser
-module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d44076', 'lib/ferret/query_parser/query_parser.y', 126
-  attr_accessor :default_field, :fields
-  # true if you want to downcase wild card queries. This is set to try by
-  # default.
-  attr_writer :wild_lower
-  def wild_lower?() @wild_lower end
+module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id81dbd43492', 'lib/ferret/query_parser/query_parser.y', 126
+  attr_accessor :default_field, :fields, :handle_parse_errors
   def initialize(default_field = "*", options = {})
     @yydebug = true
@@ -32,6 +25,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
     @occur_default = options[:occur_default] || BooleanClause::Occur::SHOULD
     @default_slop = options[:default_slop] || 0
     @fields = options[:fields]||[]
+    @handle_parse_errors = options[:handle_parse_errors] || false
   end
   RESERVED = {
@@ -50,6 +44,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
   EWCHR = %q,:()\[\]{}!+"~^\-\|<>\=,
   def parse(str)
+    orig_str = str
     str = clean_string(str)
     str.strip!
     @q = []
@@ -82,10 +77,24 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
       end
       str = $'
     end
-    @q.push [ false, '$' ]
+    if @q.empty?
+      return TermQuery.new(Term.new(@default_field, ""))
+    end
+    @q.push([ false, '$' ])
     #p @q
-    do_parse
+    begin
+      query = do_parse
+    rescue Racc::ParseError => e
+      if @handle_parse_errors
+        @field = @default_field
+        query = _get_bad_query(orig_str)
+      else
+        raise QueryParseException.new("Could not parse #{str}", e)
+      end
+    end
+    return query
   end
   def next_token
@@ -160,6 +169,25 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
     return new_str.pack("c*")
   end
+  def get_bad_query(field, str)
+    tokens = []
+    stream = @analyzer.token_stream(field, str)
+    while token = stream.next
+      tokens << token
+    end
+    if tokens.length == 0
+      return TermQuery.new(Term.new(field, ""))
+    elsif tokens.length == 1
+      return TermQuery.new(Term.new(field, tokens[0].term_text))
+    else
+      bq = BooleanQuery.new()
+      tokens.each do |token|
+        bq << BooleanClause.new(TermQuery.new(Term.new(field, token.term_text)))
+      end
+      return bq
+    end
+  end
   def get_range_query(field, start_word, end_word, inc_upper, inc_lower)
      RangeQuery.new(field, start_word, end_word, inc_upper, inc_lower)
   end
@@ -374,7 +402,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id9e08d4407
     return qp.parse(query)
   end
-..end lib/ferret/query_parser/query_parser.y modeval..id9e08d44076
+..end lib/ferret/query_parser/query_parser.y modeval..id81dbd43492
 ##### racc 1.4.4 generates ###
@@ -893,7 +921,8 @@ if __FILE__ == $0
   parser = Ferret::QueryParser.new("default",
                                    :fields => ["f1", "f2", "f3"],
-                                   :analyzer => Ferret::Analysis::StandardAnalyzer.new)
+                                   :analyzer => Ferret::Analysis::StandardAnalyzer.new,
+                                   :handle_parse_errors => true)
   $stdin.each do |line|
     query = parser.parse(line)

data/lib/ferret/search/index_searcher.rb CHANGED

@@ -90,12 +90,17 @@ module Ferret::Search
       filter = options[:filter]
       first_doc = options[:first_doc]||0
       num_docs = options[:num_docs]||10
+      max_size = first_doc + num_docs
       sort = options[:sort]
-      if (num_docs <= 0)  # nil might be returned from hq.top() below.
+      if (num_docs <= 0)
         raise ArgumentError, "num_docs must be > 0 to run a search"
       end
+      if (first_doc < 0)
+        raise ArgumentError, "first_doc must be >= 0 to run a search"
+      end
       scorer = query.weight(self).scorer(@reader)
       if (scorer == nil)
         return TopDocs.new(0, [])
@@ -104,33 +109,32 @@ module Ferret::Search
       bits = (filter.nil? ? nil : filter.bits(@reader))
       if (sort)
         fields = sort.is_a?(Array) ? sort : sort.fields
-        hq = FieldSortedHitQueue.new(@reader, fields, num_docs + first_doc)
+        hq = FieldSortedHitQueue.new(@reader, fields, max_size)
       else
-        hq = HitQueue.new(num_docs + first_doc)
+        hq = HitQueue.new(max_size)
       end
       total_hits = 0
       min_score = 0.0
       scorer.each_hit() do |doc, score|
         if score > 0.0 and (bits.nil? or bits.get(doc)) # skip docs not in bits
           total_hits += 1
-          if hq.size < num_docs or score >= min_score
+          if hq.size < max_size or score >= min_score
             hq.insert(ScoreDoc.new(doc, score))
             min_score = hq.top.score # maintain min_score
           end
         end
       end
-      score_docs = Array.new(hq.size)
+      score_docs = []
       if (hq.size > first_doc)
-        score_docs = Array.new(hq.size - first_doc)
-        first_doc.times { hq.pop }
-        (hq.size - 1).downto(0) do |i|
-          score_docs[i] = hq.pop
+        if (hq.size - first_doc) < num_docs
+          num_docs = hq.size - first_doc
+        end
+        num_docs.times do
+          score_docs.unshift(hq.pop)
         end
-      else
-        score_docs = []
-        hq.clear
       end
+      hq.clear
       return TopDocs.new(total_hits, score_docs)
     end

data/lib/ferret/search/term_scorer.rb CHANGED

@@ -25,8 +25,6 @@ module Ferret::Search
       @weight = weight
       @term_docs = td
       @norms = norms
-      #XXX
-      @norms_size = @norms.size
       @weight_value = weight.value
       SCORE_CACHE_SIZE.times do |i|

data/test/unit/document/tc_field.rb CHANGED

@@ -37,13 +37,15 @@ class FieldTest < Test::Unit::TestCase
     assert_equal(false, f.store_offsets?)
     assert_equal(false, f.store_positions?)
     assert_equal(false, f.binary?)
+    assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
   end
   def test_set_store()
-    f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
+    f = Field.new("name", nil, Field::Store::COMPRESS, Field::Index::TOKENIZED)
     f.stored = Field::Store::NO
     assert_equal(false, f.stored?)
     assert_equal(false, f.compressed?)
+    assert_equal("indexed,tokenized,<name:>", f.to_s)
   end
   def test_set_index()
@@ -51,6 +53,7 @@ class FieldTest < Test::Unit::TestCase
     f.index = Field::Index::NO
     assert_equal(false, f.indexed?)
     assert_equal(false, f.tokenized?)
+    assert_equal("stored/compressed,<name:value>", f.to_s)
   end
   def test_set_term_vector()
@@ -59,6 +62,7 @@ class FieldTest < Test::Unit::TestCase
     assert_equal(true, f.store_term_vector?)
     assert_equal(true, f.store_offsets?)
     assert_equal(true, f.store_positions?)
+    assert_equal("stored/compressed,indexed,tokenized,store_term_vector,tv_offset,tv_position,<name:value>", f.to_s)
   end
   def test_new_binary_field()
@@ -76,5 +80,6 @@ class FieldTest < Test::Unit::TestCase
     assert_equal(false, f.store_offsets?)
     assert_equal(false, f.store_positions?)
     assert_equal(true, f.binary?)
+    assert_equal("stored/uncompressed,binary,<name:#{bin}>", f.to_s)
   end
 end

data/test/unit/query_parser/tc_query_parser.rb CHANGED

@@ -5,6 +5,7 @@ class QueryParserTest < Test::Unit::TestCase
   def test_strings()
     parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
     pairs = [
+      ['', ''],
       ['word', 'word'],
       ['field:word', 'field:word'],
       ['"word1 word2 word3"', '"word word word"'],
@@ -92,8 +93,8 @@ class QueryParserTest < Test::Unit::TestCase
       ['"onewordphrase"', 'onewordphrase']
     ]
-    pairs.each do |pair|
-      assert_equal(pair[1], parser.parse(pair[0]).to_s(parser.default_field))
+    pairs.each do |query_str, expected|
+      assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
     end
   end
@@ -105,8 +106,32 @@ class QueryParserTest < Test::Unit::TestCase
       ['key:(1234)', 'key:1234']
     ]
-    pairs.each do |pair|
-      assert_equal(pair[1], parser.parse(pair[0]).to_s(parser.default_field))
+    pairs.each do |query_str, expected|
+      assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
+    end
+  end
+  def do_test_query_parse_exception_raised(str)
+    parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
+    assert_raise(Ferret::QueryParser::QueryParseException) do
+      parser.parse(str)
+    end
+  end
+  def test_bad_queries
+    parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2"],
+                                            :handle_parse_errors => true)
+    pairs = [
+      ['(*word', 'word'],
+      ['()*&)(*^&*(', ''],
+      ['()*&one)(*two(*&"', 'one two']
+    ]
+    pairs.each do |query_str, expected|
+      do_test_query_parse_exception_raised(query_str)
+      assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
     end
   end
 end

data/test/unit/search/tc_index_searcher.rb CHANGED

@@ -46,6 +46,15 @@ class IndexSearcherTest < Test::Unit::TestCase
     end
   end
+  def check_docs(query, options, expected=[])
+    top_docs = @is.search(query, options)
+    docs = top_docs.score_docs
+    assert_equal(expected.length, docs.length)
+    docs.length.times do |i|
+      assert_equal(expected[i], docs[i].doc)
+    end
+  end
   def test_get_doc()
     assert_equal(18, @is.max_doc)
     assert_equal("20050930", @is.doc(0).values(:date))
@@ -57,15 +66,38 @@ class IndexSearcherTest < Test::Unit::TestCase
     tq.boost = 100
     check_hits(tq, [1,4,8])
+    tq = TermQuery.new(Term.new("field", ""));
+    check_hits(tq, [])
     tq = TermQuery.new(Term.new("field", "word1"));
     top_docs = @is.search(tq)
-    #puts top_docs.score_docs
     assert_equal(@documents.size, top_docs.total_hits)
     assert_equal(10, top_docs.score_docs.size)
     top_docs = @is.search(tq, {:num_docs => 20})
     assert_equal(@documents.size, top_docs.score_docs.size)
   end
+  def test_first_doc
+    tq = TermQuery.new(Term.new("field", "word1"));
+    tq.boost = 100
+    top_docs = @is.search(tq, {:num_docs => 100})
+    expected = []
+    top_docs.score_docs.each do |score_doc|
+      expected << score_doc.doc
+    end
+    assert_raise(ArgumentError) { @is.search(tq, {:first_doc => -1}) }
+    assert_raise(ArgumentError) { @is.search(tq, {:num_docs => 0}) }
+    assert_raise(ArgumentError) { @is.search(tq, {:num_docs => -1}) }
+    check_docs(tq, {:num_docs => 8, :first_doc => 0}, expected[0,8])
+    check_docs(tq, {:num_docs => 3, :first_doc => 1}, expected[1,3])
+    check_docs(tq, {:num_docs => 6, :first_doc => 2}, expected[2,6])
+    check_docs(tq, {:num_docs => 2, :first_doc => expected.length}, [])
+    check_docs(tq, {:num_docs => 2, :first_doc => expected.length + 100}, [])
+  end
   def test_boolean_query
     bq = BooleanQuery.new()
     tq1 = TermQuery.new(Term.new("field", "word1"))

metadata CHANGED

@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
 specification_version: 1
 name: ferret
 version: !ruby/object:Gem::Version
-  version: 0.2.1
-date: 2005-11-14 00:00:00 +09:00
+  version: 0.2.2
+date: 2005-11-22 00:00:00 +09:00
 summary: Ruby indexing library.
 require_paths:
   - lib