RubyGems - ferret - Versions diffs - 0.1.3 → 0.1.4 - Mend

ferret 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

data/Rakefile +1 -1
data/TODO +3 -0
data/ext/dummy.exe +0 -0
data/lib/ferret.rb +1 -1
data/lib/ferret/analysis/token.rb +6 -0
data/lib/ferret/analysis/tokenizers.rb +5 -5
data/lib/ferret/document/document.rb +10 -13
data/lib/ferret/index/compound_file_io.rb +12 -9
data/lib/ferret/index/field_infos.rb +0 -6
data/lib/ferret/index/index.rb +220 -102
data/lib/ferret/index/index_reader.rb +22 -2
data/lib/ferret/index/index_writer.rb +55 -14
data/lib/ferret/index/multi_reader.rb +279 -279
data/lib/ferret/index/segment_infos.rb +3 -3
data/lib/ferret/index/segment_merger.rb +7 -6
data/lib/ferret/index/segment_reader.rb +23 -7
data/lib/ferret/index/segment_term_enum.rb +6 -7
data/lib/ferret/index/term_buffer.rb +3 -5
data/lib/ferret/index/term_doc_enum.rb +7 -2
data/lib/ferret/index/term_infos_io.rb +15 -8
data/lib/ferret/query_parser/query_parser.tab.rb +49 -45
data/lib/ferret/search/boolean_query.rb +3 -4
data/lib/ferret/search/boolean_scorer.rb +11 -11
data/lib/ferret/search/caching_wrapper_filter.rb +1 -1
data/lib/ferret/search/disjunction_sum_scorer.rb +9 -7
data/lib/ferret/search/field_cache.rb +1 -2
data/lib/ferret/search/field_sorted_hit_queue.rb +1 -1
data/lib/ferret/search/fuzzy_term_enum.rb +64 -58
data/lib/ferret/search/index_searcher.rb +16 -9
data/lib/ferret/search/prefix_query.rb +7 -0
data/lib/ferret/search/query_filter.rb +1 -1
data/lib/ferret/search/term_scorer.rb +5 -1
data/lib/ferret/search/top_docs.rb +12 -0
data/lib/ferret/store/buffered_index_io.rb +5 -6
data/lib/ferret/store/fs_store.rb +47 -33
data/lib/ferret/store/ram_store.rb +2 -2
data/lib/ferret/utils.rb +1 -0
data/lib/ferret/utils/bit_vector.rb +20 -2
data/lib/ferret/utils/thread_local.rb +28 -0
data/lib/ferret/utils/weak_key_hash.rb +11 -2
data/test/benchmark/tb_rw_vint.rb +1 -1
data/test/functional/thread_safety_index_test.rb +81 -0
data/test/functional/thread_safety_test.rb +137 -0
data/test/test_all.rb +3 -7
data/test/test_helper.rb +2 -1
data/test/unit/index/tc_compound_file_io.rb +2 -2
data/test/unit/index/tc_index.rb +128 -6
data/test/unit/index/tc_index_reader.rb +1 -1
data/test/unit/index/tc_segment_infos.rb +1 -1
data/test/unit/index/th_doc.rb +1 -1
data/test/unit/search/tc_index_searcher.rb +6 -0
data/test/unit/store/tc_fs_store.rb +3 -3
data/test/unit/utils/tc_bit_vector.rb +8 -0
data/test/unit/utils/tc_thread.rb +61 -0
data/test/unit/utils/tc_weak_key_hash.rb +2 -2
data/test/utils/number_to_spoken.rb +132 -0
metadata +7 -2

data/Rakefile CHANGED Viewed

@@ -196,7 +196,7 @@ end
 desc "Make a new release"
 task :prerelease => [:clobber, :all_tests, :parsers]
-#task :package => [:prerelease]
+task :package => [:prerelease]
 task :tag => [:prerelease]
 task :update_version => [:prerelease]
 task :release => [:tag, :update_version, :package] do

data/TODO CHANGED Viewed

@@ -5,8 +5,11 @@ Send suggestions for this list to mailto:dbalmain@gmail.com
 === To Do
 * Add the ability to persist an in memory index to Ferret::Index::Index
+* Make a dll for people on Windows
 === Done
 * Add UTF-8 support
 * Multi Field Query
+* Test threading
+* Compile a proper dummy executable

data/ext/dummy.exe CHANGED Viewed

Binary file

data/lib/ferret.rb CHANGED Viewed

@@ -22,7 +22,7 @@
 #++
 # :include: ../TUTORIAL
 module Ferret
-  VERSION = '0.1.3'
+  VERSION = '0.1.4'
 end
 require 'ferret/utils'

data/lib/ferret/analysis/token.rb CHANGED Viewed

@@ -35,6 +35,12 @@ module Ferret::Analysis
       @position_increment = pos_inc
     end
+    def eql?(o)
+      return (o.instance_of?(Token) and @start_offset == o.start_offset and
+              @end_offset == o.end_offset and @term_text = o.term_text)
+    end
+    alias :== :eql?
     # Tokens are sorted by the position in the text at which they occur, ie
     # the start_offset. If two tokens have the same start offset, (see
     # position_increment=) then, they are sorted by the end_offset and then

data/lib/ferret/analysis/tokenizers.rb CHANGED Viewed

@@ -26,7 +26,7 @@ module Ferret::Analysis
   #
   #   class LetterTokenizer < RegExpTokenizer
   #       def token_re()
-  #         /[a-zA-Z]+/
+  #         /[[:alpha:]]+/
   #       end
   #   end
   class RegExpTokenizer < Tokenizer
@@ -63,7 +63,7 @@ module Ferret::Analysis
     protected
       # returns the regular expression used to find the next token
       def token_re
-        /[a-zA-Z]+/
+        /[[:alpha:]]+/
       end
       # Called on each token to normalize it before it is added to the
@@ -75,13 +75,13 @@ module Ferret::Analysis
   # A LetterTokenizer is a tokenizer that divides text at non-letters.
   # That's to say, it defines tokens as maximal strings of adjacent letters,
-  # as defined by the regular expression _/[a-zA-Z]+/_.
+  # as defined by the regular expression _/[[:alpha:]]+/_.
   class LetterTokenizer < RegExpTokenizer
     protected
       # Collects only characters which satisfy the regular expression
-      # _/[a-zA-Z]+/_.
+      # _/[[:alpha:]]+/_.
       def token_re()
-        /[a-zA-Z]+/
+        /[[:alpha:]]+/
       end
   end

data/lib/ferret/document/document.rb CHANGED Viewed

@@ -69,13 +69,13 @@ module Ferret::Document
     # document has to be deleted from an index and a new changed version of
     # that document has to be added.
     def add_field(field)
-      (@fields[field.name] ||= []) << field
+      (@fields[field.name.to_s] ||= []) << field
     end
     alias :<< :add_field
     # Removes the first field of this name if it exists.
     def remove_field(name)
-      @fields[name].delete_at(0)
+      @fields[name.to_s].delete_at(0)
     end
     # Removes all fields with the given name from the document.
@@ -89,7 +89,7 @@ module Ferret::Document
     # this, a document has to be deleted from an index and a new changed
     # version of that document has to be added.
     def remove_fields(name)
-      @fields.delete(name)
+      @fields.delete(name.to_s)
     end
     # Returns the first field with the given name.
@@ -98,7 +98,7 @@ module Ferret::Document
     # name:: the name of the field
     # Return:: a _Field_ array
     def field(name)
-      @fields[name] ? @fields[name][0] : nil
+      @fields[name.to_s] ? @fields[name.to_s][0] : nil
     end
     # Returns an array of all fields with the given name.
@@ -107,7 +107,7 @@ module Ferret::Document
     # name:: the name of the field
     # Return:: a _Field_ array
     def fields(name)
-      @fields[name]
+      @fields[name.to_s]
     end
     # Returns an array of values of the field specified as the method
@@ -116,8 +116,8 @@ module Ferret::Document
     # name:: the name of the field
     # Return:: a _String_ of field values
     def values(name)
-      return nil if @fields[name].nil?
-      @fields[name].map {|f| f.data if not f.binary? }.join(" ")
+      return nil if @fields[name.to_s].nil?
+      @fields[name.to_s].map {|f| f.data if not f.binary? }.join(" ")
     end
     alias :[] :values
@@ -125,7 +125,7 @@ module Ferret::Document
     # field of that name then it will set the data in the first field of that
     # name.
     def []=(field_name, data)
-      field = field(field_name)
+      field = field(field_name.to_s)
       raise ArgumentError, "Field does not exist" unless field
       field.data = data
     end
@@ -137,16 +137,13 @@ module Ferret::Document
     # Return:: a _String_ of field values
     def binaries(name)
       binaries = []
-      @fields[name].each {|f| binaries << f.data if f.binary? }
+      @fields[name.to_s].each {|f| binaries << f.data if f.binary? }
       return binaries
     end
     # Prints the fields of a document for human consumption.#/
     def to_s()
-      field_str = ""
-      @fields.each_key { |name| field_str += name + " " }
-      field_str[-1] = ">"
-      return "Document<" + field_str
+      return "Document<#{@fields.keys.join(" ")}>"
     end
   end
 end

data/lib/ferret/index/compound_file_io.rb CHANGED Viewed

@@ -92,7 +92,7 @@ module Ferret::Index
     end
     # Returns true iff a file with the given name exists.
-    def file_exists(name)
+    def exists?(name)
       return @entries.key?(name)
     end
@@ -113,7 +113,7 @@ module Ferret::Index
     def rename(from, to) raise(UnsupportedOperationError) end
     # Returns the length of a file in the directory.
-    def file_length(name)
+    def length(name)
       e = @entries[name]
       if (e == nil): raise(IOError, "File " + name + " does not exist") end
       return e.length
@@ -188,6 +188,9 @@ module Ferret::Index
   # data section, and a UTF String with that file's extension.
   class CompoundFileWriter
+    class StateError < Exception
+    end
     attr_reader :directory, :file_name
     # Create the compound stream in the specified file. The file name is the
@@ -203,16 +206,16 @@ module Ferret::Index
     # Add a source stream. _file_name_ is the string by which the
     # sub-stream will be known in the compound stream.
     #
-    # Throws:: IllegalStateError if this writer is closed
-    # Throws:: IllegalArgumentError if a file with the same name
+    # Throws:: StateError if this writer is closed
+    # Throws:: ArgumentError if a file with the same name
     #          has been added already
     def add_file(file_name)
       if @merged
-        raise(IllegalStateError, "Can't add extensions after merge has been called")
+        raise(StateError, "Can't add extensions after merge has been called")
       end
       if not @ids.add?(file_name)
-        raise(IllegalArgumentError, "File " + file + " already added")
+        raise(ArgumentError, "File #{file_name} already added")
       end
       entry = FileEntry.new(file_name)
@@ -224,16 +227,16 @@ module Ferret::Index
     # compound stream. After successful merge, the source files
     # are deleted.
     #
-    # Throws:: IllegalStateException if close() had been called before or
+    # Throws:: StateException if close() had been called before or
     #          if no file has been added to this object
     def close()
       if @merged
-        raise(IllegalStateException, "Merge already performed")
+        raise(StateException, "Merge already performed")
       end
       if @file_entries.empty?
-        raise(IllegalStateException, "No entries to merge have been defined")
+        raise(StateException, "No entries to merge have been defined")
       end
       @merged = true

data/lib/ferret/index/field_infos.rb CHANGED Viewed

@@ -27,12 +27,6 @@ module Ferret
         end
       end
-      # Returns the number of fields that have been added to this field infos
-      # object.
-      def size
-        return @fi_array.size
-      end
       # Automatically adds all of the fields from the document if they haven't
       # been added already. Or it will update the values.
       def add_doc_fields(doc)

data/lib/ferret/index/index.rb CHANGED Viewed

@@ -1,7 +1,11 @@
+require 'monitor'
 module Ferret::Index
   # This is a simplified interface to the index. See the TUTORIAL for more
   # information on how to use this class.
   class Index
+    include MonitorMixin
     include Ferret::Store
     include Ferret::Search
     include Ferret::Document
@@ -77,9 +81,10 @@ module Ferret::Index
     #                            :default_slop => 2)
     #
     def initialize(options = {})
+      super()
+      options[:create_if_missing] = true if options[:create_if_missing].nil?
       if options[:path]
-        options[:create_if_missing] = true if options[:create_if_missing].nil?
-        @dir = FSDirectory.new(options[:path], true)
+        @dir = FSDirectory.new(options[:path], options[:create])
         options[:close_dir] = true
       elsif options[:dir]
         @dir = options[:dir]
@@ -88,29 +93,34 @@ module Ferret::Index
         @dir = RAMDirectory.new
       end
-      @options = options
-      @writer = IndexWriter.new(@dir, options)
-      options[:analyzer] = @analyzer = @writer.analyzer
-      @has_writes = false
-      @reader = nil
-      @options.delete(:create) # only want to create the first time if at all
-      @close_dir = @options.delete(:close_dir) || false # we'll hold this here
-      @default_search_field = (@options[:default_search_field] || \
-                               @options[:default_field] || "*")
-      @default_field = @options[:default_field] || ""
-      @open = true
+      @dir.synchronize do
+        @options = options
+        @writer = IndexWriter.new(@dir, options)
+        options[:analyzer] = @analyzer = @writer.analyzer
+        @has_writes = false
+        @reader = nil
+        @options.delete(:create) # only want to create the first time if at all
+        @close_dir = @options.delete(:close_dir) || false # we'll hold this here
+        @default_search_field = (@options[:default_search_field] || \
+                                 @options[:default_field] || "*")
+        @default_field = @options[:default_field] || ""
+        @open = true
+        @qp = nil
+      end
     end
     # Closes this index by closing its associated reader and writer objects.
     def close
-      if not @open
-        raise "tried to close an already closed directory"
-      end
-      @reader.close() if @reader
-      @writer.close() if @writer
-      @dir.close()
+      @dir.synchronize do
+        if not @open
+          raise "tried to close an already closed directory"
+        end
+        @reader.close() if @reader
+        @writer.close() if @writer
+        @dir.close()
-      @open = false
+        @open = false
+      end
     end
     # Get the reader for this index.
@@ -133,6 +143,7 @@ module Ferret::Index
       ensure_writer_open()
       return @writer
     end
+    protected :reader, :writer, :searcher
     # Adds a document to this index, using the provided analyzer instead of
     # the local analyzer if provided.  If the document contains more than
@@ -147,27 +158,28 @@ module Ferret::Index
     #   index << "This is a new document to be indexed"
     #   index << ["And here", "is another", "new document", "to be indexed"]
     #
-    # But these are pretty simple documents. If this is all you want to index you
-    # could probably just use SimpleSearch. So let's give our documents some fields;
+    # But these are pretty simple documents. If this is all you want to index
+    # you could probably just use SimpleSearch. So let's give our documents
+    # some fields;
     #
     #   index << {:title => "Programming Ruby", :content => "blah blah blah"}
     #   index << {:title => "Programming Ruby", :content => "yada yada yada"}
     #
-    # Or if you are indexing data stored in a database, you'll probably want to
-    # store the id;
+    # Or if you are indexing data stored in a database, you'll probably want
+    # to store the id;
     #
     #   index << {:id => row.id, :title => row.title, :date => row.date}
     #
-    # The methods above while store all of the input data as well tokenizing and
-    # indexing it. Sometimes we won't want to tokenize (divide the string into
-    # tokens) the data. For example, we might want to leave the title as a complete
-    # string and only allow searchs for that complete string. Sometimes we won't
-    # want to store the data as it's already stored in the database so it'll be a
-    # waste to store it in the index. Or perhaps we are doing without a database and
-    # using Ferret to store all of our data, in which case we might not want to
-    # index it. For example, if we are storing images in the index, we won't want to
-    # index them. All of this can be done using Ferret's Ferret::Document module.
-    # eg;
+    # The methods above while store all of the input data as well tokenizing
+    # and indexing it. Sometimes we won't want to tokenize (divide the string
+    # into tokens) the data. For example, we might want to leave the title as
+    # a complete string and only allow searchs for that complete string.
+    # Sometimes we won't want to store the data as it's already stored in the
+    # database so it'll be a waste to store it in the index. Or perhaps we are
+    # doing without a database and using Ferret to store all of our data, in
+    # which case we might not want to index it. For example, if we are storing
+    # images in the index, we won't want to index them. All of this can be
+    # done using Ferret's Ferret::Document module.  eg;
     #
     #   include Ferret::Document
     #   doc = Document.new
@@ -177,35 +189,37 @@ module Ferret::Index
     #   doc << Field.new("image", row.image, Field::Store::YES, Field::Index::NO)
     #   index << doc
     #
-    # You can also compress the data that you are storing or store term vectors with
-    # the data. Read more about this in Ferret::Document::Field.
+    # You can also compress the data that you are storing or store term
+    # vectors with the data. Read more about this in Ferret::Document::Field.
     def add_document(doc, analyzer = nil)
-      ensure_writer_open()
-      fdoc = nil
-      if doc.is_a?(String)
-        fdoc = Document.new
-        fdoc << Field.new(@default_field, doc,
-                          Field::Store::YES, Field::Index::TOKENIZED)
-      elsif doc.is_a?(Array)
-        fdoc = Document.new
-        doc.each() do |field|
-          fdoc << Field.new(@default_field, field,
+      @dir.synchronize do
+        ensure_writer_open()
+        fdoc = nil
+        if doc.is_a?(String)
+          fdoc = Document.new
+          fdoc << Field.new(@default_field, doc,
                             Field::Store::YES, Field::Index::TOKENIZED)
+        elsif doc.is_a?(Array)
+          fdoc = Document.new
+          doc.each() do |field|
+            fdoc << Field.new(@default_field, field,
+                              Field::Store::YES, Field::Index::TOKENIZED)
+          end
+        elsif doc.is_a?(Hash)
+          fdoc = Document.new
+          doc.each_pair() do |field, text|
+            fdoc << Field.new(field.to_s, text.to_s,
+                              Field::Store::YES, Field::Index::TOKENIZED)
+          end
+        elsif doc.is_a?(Document)
+          fdoc = doc
+        else
+          raise ArgumentError, "Unknown document type #{doc.class}"
         end
-      elsif doc.is_a?(Hash)
-        fdoc = Document.new
-        doc.each_pair() do |field, text|
-          fdoc << Field.new(field.to_s, text.to_s,
-                            Field::Store::YES, Field::Index::TOKENIZED)
-        end
-      elsif doc.is_a?(Document)
-        fdoc = doc
-      else
-        raise ArgumentError, "Unknown document type #{doc.class}"
-      end
-      @has_writes = true
+        @has_writes = true
-      @writer.add_document(fdoc, analyzer || @writer.analyzer)
+        @writer.add_document(fdoc, analyzer || @writer.analyzer)
+      end
     end
     alias :<< :add_document
@@ -213,24 +227,16 @@ module Ferret::Index
     # pass to this method. You can also pass a hash with one or more of the
     # following; {filter, num_docs, first_doc, sort}
     #
-    # query::    the query to run on the index
-    # filter::   filters docs from the search result
-    # first_doc:: The index in the results of the first doc retrieved.
-    #   Default is 0
-    # num_docs:: The number of results returned. Default is 10
-    # sort::     an array of SortFields describing how to sort the results.
+    # query::      The query to run on the index
+    # filter::     Filters docs from the search result
+    # first_doc::  The index in the results of the first doc retrieved.
+    #              Default is 0
+    # num_docs::   The number of results returned. Default is 10
+    # sort::       An array of SortFields describing how to sort the results.
     def search(query, options = {})
-      ensure_searcher_open()
-      if query.is_a?(String)
-        if @qp.nil?
-          @qp = Ferret::QueryParser.new(@default_search_field, @options)
-        end
-        # we need to set this ever time, in case a new field has been added
-        @qp.fields = @reader.get_field_names.to_a
-        query = @qp.parse(query)
+      @dir.synchronize do
+        return do_search(query, options)
       end
-      return @searcher.search(query, options)
     end
     # See Index#search
@@ -241,9 +247,14 @@ module Ferret::Index
     #     puts "hit document number #{doc} with a score of #{score}"
     #   end
     #
+    # returns:: The total number of hits.
     def search_each(query, options = {}) # :yield: doc, score
-      search(query, options).score_docs.each do |score_doc|
-        yield score_doc.doc, score_doc.score
+      @dir.synchronize do
+        hits = do_search(query, options)
+        hits.score_docs.each do |score_doc|
+          yield score_doc.doc, score_doc.score
+        end
+        return hits.total_hits
       end
     end
@@ -253,14 +264,16 @@ module Ferret::Index
     # id:: The number of the document to retrieve, or the term used as the id
     #      for the document we wish to retrieve
     def doc(id)
-      ensure_reader_open()
-      if id.is_a?(String)
-        t = Term.new("id", id.to_s)
-        return @reader.get_document_with_term(t)
-      elsif id.is_a?(Term)
-        return @reader.get_document_with_term(id)
-      else
-        return @reader.get_document(id)
+      @dir.synchronize do
+        ensure_reader_open()
+        if id.is_a?(String)
+          t = Term.new("id", id.to_s)
+          return @reader.get_document_with_term(t)
+        elsif id.is_a?(Term)
+          return @reader.get_document_with_term(id)
+        else
+          return @reader.get_document(id)
+        end
       end
     end
     alias :[] :doc
@@ -271,28 +284,34 @@ module Ferret::Index
     #
     # id:: The number of the document to delete
     def delete(id)
-      ensure_reader_open()
-      if id.is_a?(String)
-        t = Term.new("id", id.to_s)
-        return @reader.delete_docs_with_term(t)
-      elsif id.is_a?(Term)
-        return @reader.delete_docs_with_term(id)
-      else
-        return @reader.delete(id)
+      @dir.synchronize do
+        ensure_reader_open()
+        if id.is_a?(String)
+          t = Term.new("id", id.to_s)
+          return @reader.delete_docs_with_term(t)
+        elsif id.is_a?(Term)
+          return @reader.delete_docs_with_term(id)
+        else
+          return @reader.delete(id)
+        end
       end
     end
     # Returns true if document +n+ has been deleted
     def deleted?(n)
-      ensure_reader_open()
-      return @reader.deleted?(n)
+      @dir.synchronize do
+        ensure_reader_open()
+        return @reader.deleted?(n)
+      end
     end
     # Returns true if any documents have been deleted since the index was last
     # flushed.
     def has_deletions?()
-      ensure_reader_open()
-      return @reader.has_deletions?
+      @dir.synchronize do
+        ensure_reader_open()
+        return @reader.has_deletions?
+      end
     end
     # Returns true if any documents have been added to the index since the
@@ -301,18 +320,102 @@ module Ferret::Index
       return @has_writes
     end
+    # Flushes all writes to the index. This will not optimize the index but it
+    # will make sure that all writes are written to it.
+    #
+    # NOTE: this is not necessary if you are only using this class. All writes
+    # will automatically flush when you perform an operation that reads the
+    # index.
+    def flush()
+      @dir.synchronize do
+        @reader.close if @reader
+        @writer.close if @writer
+        @reader = nil
+        @writer = nil
+        @searcher = nil
+      end
+    end
     # optimizes the index. This should only be called when the index will no
     # longer be updated very often, but will be read a lot.
     def optimize()
-      ensure_writer_open()
-      @writer.optimize()
-      @modified = true
+      @dir.synchronize do
+        ensure_writer_open()
+        @writer.optimize()
+        @modified = true
+      end
     end
     # returns the number of documents in the index
     def size()
-      ensure_reader_open()
-      return @reader.num_docs()
+      @dir.synchronize do
+        ensure_reader_open()
+        return @reader.num_docs()
+      end
+    end
+    # Merges all segments from an index or an array of indexes into this
+    # index. You can pass a single Index::Index, Index::Reader,
+    # Store::Directory or an array of any single one of these.
+    #
+    # This may be used to parallelize batch indexing. A large document
+    # collection can be broken into sub-collections. Each sub-collection can
+    # be indexed in parallel, on a different thread, process or machine and
+    # perhaps all in memory. The complete index can then be created by
+    # merging sub-collection indexes with this method.
+    #
+    # After this completes, the index is optimized.
+    def add_indexes(indexes)
+      @dir.synchronize do
+        indexes = [indexes].flatten   # make sure we have an array
+        return if indexes.size == 0 # nothing to do
+        if indexes[0].is_a?(Index)
+          readers = indexes.map {|index| index.reader }
+          indexes = readers
+        end
+        if indexes[0].is_a?(IndexReader)
+          ensure_reader_open
+          indexes.delete(@reader) # we don't want to merge with self
+          ensure_writer_open
+          @writer.add_indexes_readers(indexes)
+        elsif indexes[0].is_a?(Ferret::Store::Directory)
+          indexes.delete(@dir) # we don't want to merge with self
+          ensure_writer_open
+          @writer.add_indexes(indexes)
+        else
+          raise ArgumentError, "Unknown index type when trying to merge indexes"
+        end
+      end
+    end
+    # This is a simple utility method for saving an in memory or RAM index to
+    # the file system. The same thing can be achieved by using the
+    # Index::Index#add_indexes method and you will have more options when
+    # creating the new index, however this is a simple way to turn a RAM index
+    # into a file system index.
+    #
+    # directory:: This can either be a Store::Directory object or a string
+    #             representing the path to the directory where you would
+    #             like to store the the index.
+    #
+    # create::    True if you'd like to create the directory if it doesn't
+    #             exist or copy over an existing directory. False if you'd
+    #             like to merge with the existing directory. This defaults to
+    #             false.
+    def persist(directory, create = true)
+      synchronize do
+        flush
+        old_dir = @dir
+        if directory.is_a?(String)
+          @dir = FSDirectory.new(directory, create)
+          @options[:close_dir] = true
+        elsif directory.is_a?(Ferret::Store::Directory)
+          @dir = directory
+        end
+        ensure_writer_open
+        @writer.add_indexes([old_dir])
+      end
     end
     protected
@@ -343,5 +446,20 @@ module Ferret::Index
         ensure_reader_open()
         @searcher = IndexSearcher.new(@reader)
       end
+    private
+      def do_search(query, options)
+        ensure_searcher_open()
+        if query.is_a?(String)
+          if @qp.nil?
+            @qp = Ferret::QueryParser.new(@default_search_field, @options)
+          end
+          # we need to set this ever time, in case a new field has been added
+          @qp.fields = @reader.get_field_names.to_a
+          query = @qp.parse(query)
+        end
+        return @searcher.search(query, options)
+      end
   end
 end