RubyGems - ferret - Versions diffs - 0.9.6 → 0.10.0 - Mend

ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (295) hide show

data/MIT-LICENSE +1 -1
data/README +12 -24
data/Rakefile +38 -54
data/TODO +14 -17
data/ext/analysis.c +982 -823
data/ext/analysis.h +133 -76
data/ext/array.c +96 -58
data/ext/array.h +40 -13
data/ext/bitvector.c +476 -118
data/ext/bitvector.h +264 -22
data/ext/compound_io.c +217 -229
data/ext/defines.h +49 -0
data/ext/document.c +107 -317
data/ext/document.h +31 -65
data/ext/except.c +81 -36
data/ext/except.h +117 -55
data/ext/extconf.rb +2 -9
data/ext/ferret.c +211 -104
data/ext/ferret.h +22 -11
data/ext/filter.c +97 -82
data/ext/fs_store.c +348 -367
data/ext/global.c +226 -188
data/ext/global.h +44 -26
data/ext/hash.c +474 -391
data/ext/hash.h +441 -68
data/ext/hashset.c +124 -96
data/ext/hashset.h +169 -20
data/ext/helper.c +56 -5
data/ext/helper.h +7 -0
data/ext/inc/lang.h +29 -49
data/ext/inc/threading.h +31 -0
data/ext/ind.c +288 -278
data/ext/ind.h +68 -0
data/ext/index.c +5688 -0
data/ext/index.h +663 -616
data/ext/lang.h +29 -49
data/ext/libstemmer.c +3 -3
data/ext/mem_pool.c +84 -0
data/ext/mem_pool.h +35 -0
data/ext/posh.c +1006 -0
data/ext/posh.h +1007 -0
data/ext/priorityqueue.c +117 -194
data/ext/priorityqueue.h +135 -39
data/ext/q_boolean.c +1305 -1108
data/ext/q_const_score.c +106 -93
data/ext/q_filtered_query.c +138 -135
data/ext/q_fuzzy.c +206 -242
data/ext/q_match_all.c +94 -80
data/ext/q_multi_term.c +663 -0
data/ext/q_parser.c +667 -593
data/ext/q_phrase.c +992 -555
data/ext/q_prefix.c +72 -61
data/ext/q_range.c +235 -210
data/ext/q_span.c +1480 -1166
data/ext/q_term.c +273 -246
data/ext/q_wildcard.c +127 -114
data/ext/r_analysis.c +1720 -711
data/ext/r_index.c +3049 -0
data/ext/r_qparser.c +433 -146
data/ext/r_search.c +2934 -1993
data/ext/r_store.c +372 -143
data/ext/r_utils.c +941 -0
data/ext/ram_store.c +330 -326
data/ext/search.c +1291 -668
data/ext/search.h +403 -702
data/ext/similarity.c +91 -113
data/ext/similarity.h +45 -30
data/ext/sort.c +721 -484
data/ext/stopwords.c +361 -273
data/ext/store.c +556 -58
data/ext/store.h +706 -126
data/ext/tags +3578 -2780
data/ext/term_vectors.c +352 -0
data/ext/threading.h +31 -0
data/ext/win32.h +54 -0
data/lib/ferret.rb +5 -17
data/lib/ferret/document.rb +130 -2
data/lib/ferret/index.rb +577 -26
data/lib/ferret/number_tools.rb +157 -0
data/lib/ferret_version.rb +3 -0
data/test/test_helper.rb +5 -13
data/test/unit/analysis/tc_analyzer.rb +513 -1
data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
data/test/unit/index/tc_index.rb +183 -240
data/test/unit/index/tc_index_reader.rb +312 -479
data/test/unit/index/tc_index_writer.rb +397 -13
data/test/unit/index/th_doc.rb +269 -206
data/test/unit/query_parser/tc_query_parser.rb +40 -33
data/test/unit/search/tc_filter.rb +59 -71
data/test/unit/search/tc_fuzzy_query.rb +24 -16
data/test/unit/search/tc_index_searcher.rb +23 -201
data/test/unit/search/tc_multi_searcher.rb +78 -226
data/test/unit/search/tc_search_and_sort.rb +93 -81
data/test/unit/search/tc_sort.rb +23 -23
data/test/unit/search/tc_sort_field.rb +7 -7
data/test/unit/search/tc_spans.rb +51 -47
data/test/unit/search/tm_searcher.rb +339 -0
data/test/unit/store/tc_fs_store.rb +1 -1
data/test/unit/store/tm_store_lock.rb +3 -3
data/test/unit/tc_document.rb +81 -0
data/test/unit/ts_analysis.rb +1 -1
data/test/unit/ts_utils.rb +1 -1
data/test/unit/utils/tc_bit_vector.rb +288 -0
data/test/unit/utils/tc_number_tools.rb +117 -0
data/test/unit/utils/tc_priority_queue.rb +106 -0
metadata +140 -301
data/CHANGELOG +0 -9
data/ext/dummy.exe +0 -0
data/ext/field.c +0 -408
data/ext/frtio.h +0 -13
data/ext/inc/except.h +0 -90
data/ext/index_io.c +0 -382
data/ext/index_rw.c +0 -2658
data/ext/lang.c +0 -41
data/ext/nix_io.c +0 -134
data/ext/q_multi_phrase.c +0 -380
data/ext/r_doc.c +0 -582
data/ext/r_index_io.c +0 -1021
data/ext/r_term.c +0 -219
data/ext/term.c +0 -820
data/ext/termdocs.c +0 -611
data/ext/vector.c +0 -637
data/ext/w32_io.c +0 -150
data/lib/ferret/analysis.rb +0 -11
data/lib/ferret/analysis/analyzers.rb +0 -112
data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
data/lib/ferret/analysis/token.rb +0 -100
data/lib/ferret/analysis/token_filters.rb +0 -86
data/lib/ferret/analysis/token_stream.rb +0 -26
data/lib/ferret/analysis/tokenizers.rb +0 -112
data/lib/ferret/analysis/word_list_loader.rb +0 -27
data/lib/ferret/document/document.rb +0 -152
data/lib/ferret/document/field.rb +0 -312
data/lib/ferret/index/compound_file_io.rb +0 -338
data/lib/ferret/index/document_writer.rb +0 -289
data/lib/ferret/index/field_infos.rb +0 -279
data/lib/ferret/index/fields_io.rb +0 -181
data/lib/ferret/index/index.rb +0 -675
data/lib/ferret/index/index_file_names.rb +0 -33
data/lib/ferret/index/index_reader.rb +0 -503
data/lib/ferret/index/index_writer.rb +0 -534
data/lib/ferret/index/multi_reader.rb +0 -377
data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
data/lib/ferret/index/segment_infos.rb +0 -130
data/lib/ferret/index/segment_merge_info.rb +0 -49
data/lib/ferret/index/segment_merge_queue.rb +0 -16
data/lib/ferret/index/segment_merger.rb +0 -358
data/lib/ferret/index/segment_reader.rb +0 -412
data/lib/ferret/index/segment_term_enum.rb +0 -169
data/lib/ferret/index/segment_term_vector.rb +0 -58
data/lib/ferret/index/term.rb +0 -53
data/lib/ferret/index/term_buffer.rb +0 -83
data/lib/ferret/index/term_doc_enum.rb +0 -291
data/lib/ferret/index/term_enum.rb +0 -52
data/lib/ferret/index/term_info.rb +0 -37
data/lib/ferret/index/term_infos_io.rb +0 -321
data/lib/ferret/index/term_vector_offset_info.rb +0 -20
data/lib/ferret/index/term_vectors_io.rb +0 -553
data/lib/ferret/query_parser.rb +0 -312
data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
data/lib/ferret/search.rb +0 -50
data/lib/ferret/search/boolean_clause.rb +0 -100
data/lib/ferret/search/boolean_query.rb +0 -299
data/lib/ferret/search/boolean_scorer.rb +0 -294
data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
data/lib/ferret/search/conjunction_scorer.rb +0 -99
data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
data/lib/ferret/search/explanation.rb +0 -41
data/lib/ferret/search/field_cache.rb +0 -215
data/lib/ferret/search/field_doc.rb +0 -31
data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
data/lib/ferret/search/filter.rb +0 -11
data/lib/ferret/search/filtered_query.rb +0 -130
data/lib/ferret/search/filtered_term_enum.rb +0 -79
data/lib/ferret/search/fuzzy_query.rb +0 -154
data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
data/lib/ferret/search/hit_collector.rb +0 -34
data/lib/ferret/search/hit_queue.rb +0 -11
data/lib/ferret/search/index_searcher.rb +0 -200
data/lib/ferret/search/match_all_query.rb +0 -104
data/lib/ferret/search/multi_phrase_query.rb +0 -216
data/lib/ferret/search/multi_searcher.rb +0 -261
data/lib/ferret/search/multi_term_query.rb +0 -65
data/lib/ferret/search/non_matching_scorer.rb +0 -22
data/lib/ferret/search/phrase_positions.rb +0 -55
data/lib/ferret/search/phrase_query.rb +0 -214
data/lib/ferret/search/phrase_scorer.rb +0 -152
data/lib/ferret/search/prefix_query.rb +0 -54
data/lib/ferret/search/query.rb +0 -140
data/lib/ferret/search/query_filter.rb +0 -51
data/lib/ferret/search/range_filter.rb +0 -103
data/lib/ferret/search/range_query.rb +0 -139
data/lib/ferret/search/req_excl_scorer.rb +0 -125
data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
data/lib/ferret/search/score_doc.rb +0 -38
data/lib/ferret/search/score_doc_comparator.rb +0 -114
data/lib/ferret/search/scorer.rb +0 -91
data/lib/ferret/search/similarity.rb +0 -278
data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
data/lib/ferret/search/sort.rb +0 -112
data/lib/ferret/search/sort_comparator.rb +0 -60
data/lib/ferret/search/sort_field.rb +0 -91
data/lib/ferret/search/spans.rb +0 -12
data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
data/lib/ferret/search/spans/span_first_query.rb +0 -79
data/lib/ferret/search/spans/span_near_query.rb +0 -108
data/lib/ferret/search/spans/span_not_query.rb +0 -130
data/lib/ferret/search/spans/span_or_query.rb +0 -176
data/lib/ferret/search/spans/span_query.rb +0 -25
data/lib/ferret/search/spans/span_scorer.rb +0 -74
data/lib/ferret/search/spans/span_term_query.rb +0 -105
data/lib/ferret/search/spans/span_weight.rb +0 -84
data/lib/ferret/search/spans/spans_enum.rb +0 -44
data/lib/ferret/search/term_query.rb +0 -128
data/lib/ferret/search/term_scorer.rb +0 -183
data/lib/ferret/search/top_docs.rb +0 -36
data/lib/ferret/search/top_field_docs.rb +0 -17
data/lib/ferret/search/weight.rb +0 -54
data/lib/ferret/search/wildcard_query.rb +0 -26
data/lib/ferret/search/wildcard_term_enum.rb +0 -61
data/lib/ferret/stemmers.rb +0 -1
data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
data/lib/ferret/store.rb +0 -5
data/lib/ferret/store/buffered_index_io.rb +0 -190
data/lib/ferret/store/directory.rb +0 -141
data/lib/ferret/store/fs_store.rb +0 -381
data/lib/ferret/store/index_io.rb +0 -245
data/lib/ferret/store/ram_store.rb +0 -286
data/lib/ferret/utils.rb +0 -8
data/lib/ferret/utils/bit_vector.rb +0 -123
data/lib/ferret/utils/date_tools.rb +0 -138
data/lib/ferret/utils/number_tools.rb +0 -91
data/lib/ferret/utils/parameter.rb +0 -41
data/lib/ferret/utils/priority_queue.rb +0 -120
data/lib/ferret/utils/string_helper.rb +0 -47
data/lib/ferret/utils/thread_local.rb +0 -28
data/lib/ferret/utils/weak_key_hash.rb +0 -60
data/lib/rferret.rb +0 -37
data/rake_utils/code_statistics.rb +0 -106
data/test/benchmark/tb_ram_store.rb +0 -76
data/test/benchmark/tb_rw_vint.rb +0 -26
data/test/functional/thread_safety_index_test.rb +0 -81
data/test/functional/thread_safety_test.rb +0 -137
data/test/longrunning/tc_numbertools.rb +0 -60
data/test/longrunning/tm_store.rb +0 -19
data/test/unit/analysis/ctc_analyzer.rb +0 -532
data/test/unit/analysis/data/wordfile +0 -6
data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
data/test/unit/analysis/rtc_stop_filter.rb +0 -14
data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
data/test/unit/analysis/tc_token.rb +0 -25
data/test/unit/document/rtc_field.rb +0 -28
data/test/unit/document/tc_document.rb +0 -47
data/test/unit/document/tc_field.rb +0 -98
data/test/unit/index/rtc_compound_file_io.rb +0 -107
data/test/unit/index/rtc_field_infos.rb +0 -127
data/test/unit/index/rtc_fields_io.rb +0 -167
data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
data/test/unit/index/rtc_segment_infos.rb +0 -74
data/test/unit/index/rtc_segment_term_docs.rb +0 -17
data/test/unit/index/rtc_segment_term_enum.rb +0 -60
data/test/unit/index/rtc_segment_term_vector.rb +0 -71
data/test/unit/index/rtc_term_buffer.rb +0 -57
data/test/unit/index/rtc_term_info.rb +0 -19
data/test/unit/index/rtc_term_infos_io.rb +0 -192
data/test/unit/index/rtc_term_vectors_io.rb +0 -108
data/test/unit/index/tc_term.rb +0 -27
data/test/unit/index/tc_term_voi.rb +0 -18
data/test/unit/search/rtc_similarity.rb +0 -37
data/test/unit/search/rtc_sort_field.rb +0 -14
data/test/unit/search/tc_multi_searcher2.rb +0 -126
data/test/unit/store/rtc_fs_store.rb +0 -62
data/test/unit/store/rtc_ram_store.rb +0 -15
data/test/unit/store/rtm_store.rb +0 -150
data/test/unit/store/rtm_store_lock.rb +0 -2
data/test/unit/ts_document.rb +0 -2
data/test/unit/utils/rtc_bit_vector.rb +0 -73
data/test/unit/utils/rtc_date_tools.rb +0 -50
data/test/unit/utils/rtc_number_tools.rb +0 -59
data/test/unit/utils/rtc_parameter.rb +0 -40
data/test/unit/utils/rtc_priority_queue.rb +0 -62
data/test/unit/utils/rtc_string_helper.rb +0 -21
data/test/unit/utils/rtc_thread.rb +0 -61
data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
data/test/utils/number_to_spoken.rb +0 -132

data/lib/ferret/document.rb CHANGED Viewed

@@ -1,2 +1,130 @@
-require 'ferret/document/field'
-require 'ferret/document/document'
+module Ferret
+  # Instead of using documents to add data to an index you can use Hashes and
+  # Arrays. The only real benefits of using a Document over a Hash are pretty
+  # printing and the boost attribute. You can add the boost attribute to
+  # Hashes and arrays using the BoostMixin. For example;
+  #
+  #    class Hash
+  #      include BoostMixin
+  #    end
+  #
+  #    class Array
+  #      include BoostMixin
+  #    end
+  #
+  #    class String
+  #      include BoostMixin
+  #    end
+  module BoostMixin
+    attr_accessor :boost
+  end
+  # Documents are the unit of indexing and search.
+  #
+  # A Document is a set of fields.  Each field has a name and an array of
+  # textual values. If you are coming from a Lucene background you should note
+  # that Fields don't have any properties except for the boost property. You
+  # should use the FieldInfos class to set field properties accross the whole
+  # index instead.
+  #
+  # === Boost
+  #
+  # The boost attribute makes a Document more important in the index. That is,
+  # you can increase the score of a match for queries that match a particular
+  # document, making it more likely to appear at the top of search results.
+  # You may, for example, want to boost products that have a higher user
+  # rating so that they are more likely to appear in search results.
+  #
+  # Note: that fields which are _not_ stored (see FieldInfos) are _not_
+  # available in documents retrieved from the index, e.g. Searcher#doc or
+  # IndexReader#doc.
+  #
+  # Note: that modifying a Document retrieved from the index will not modify
+  # the document contained within the index. You need to delete the old
+  # version of the document and add the new version of the document.
+  class Document < Hash
+    include BoostMixin
+    # Create a new Document object with a boost. The boost defaults to 1.0.
+    def initialize(boost = 1.0)
+      @boost = boost
+    end
+    # Return true if the documents are equal, ie they have the same fields
+    def eql?(o)
+      return (o.is_a? Document and (o.boost == @boost) and
+              (self.keys == o.keys) and (self.values == o.values))
+    end
+    alias :== :eql?
+    # Create a string represention of the document
+    def to_s
+      buf = ["Document {"]
+      self.keys.sort_by {|key| key.to_s}.each do |key|
+        val = self[key]
+        val_str = if val.instance_of? Array then %{["#{val.join('", "')}"]}
+                  elsif val.is_a? Field then val.to_s
+                  else %{"#{val.to_s}"}
+                  end
+        buf << "  :#{key} => #{val_str}"
+      end
+      buf << ["}#{@boost == 1.0 ? "" : "^" + @boost.to_s}"]
+      return buf.join("\n")
+    end
+  end
+  # A Field is a section of a Document. A Field is basically an array with a
+  # boost attribute. It also provides pretty printing of the field with the
+  # #to_s method.
+  #
+  # === Boost
+  #
+  # The boost attribute makes a field more important in the index. That is,
+  # you can increase the score of a match for queries that match terms in a
+  # boosted field. You may, for example, want to boost a title field so that
+  # matches that match in the :title field score more highly than matches that
+  # match in the :contents field.
+  #
+  # Note: If you'd like to use boosted fields without having to use
+  # the Field class you can just include the BoostMixin in the Array class.
+  # See BoostMixin.
+  class Field < Array
+    include BoostMixin
+    # Create a new Field object. You can pass data to the field as either a
+    # string;
+    #
+    #    f = Field.new("This is the fields data")
+    #
+    # or as an array of strings;
+    #
+    #    f = Field.new(["this", "is", "an", "array", "of", "field", "data"])
+    #
+    # Of course Fields can also be boosted;
+    #
+    #    f = Field.new("field data", 1000.0)
+    def initialize(data = [], boost = 1.0)
+      @boost = boost
+      if data.is_a? Array
+        data.each {|v| self << v}
+      else
+        self << data.to_s
+      end
+    end
+    def eql?(o)
+      return (o.is_a? Field and (o.boost == @boost) and super(o))
+    end
+    alias :== :eql?
+    def +(o)
+      return Field.new(super(o), self.boost)
+    end
+    def to_s
+      buf = %{["#{self.join('", "')}"]}
+      buf << "^#@boost" if @boost != 1.0
+      return buf
+    end
+  end
+end

data/lib/ferret/index.rb CHANGED Viewed

@@ -1,26 +1,577 @@
-require 'ferret/index/index_file_names'
-require 'ferret/index/term'
-require 'ferret/index/term_buffer'
-require 'ferret/index/term_doc_enum'
-require 'ferret/index/multiple_term_doc_pos_enum'
-require 'ferret/index/term_enum'
-require 'ferret/index/term_info'
-require 'ferret/index/term_infos_io'
-require 'ferret/index/term_vector_offset_info'
-require 'ferret/index/term_vectors_io'
-require 'ferret/index/field_infos'
-require 'ferret/index/fields_io'
-require 'ferret/index/compound_file_io'
-require 'ferret/index/term_buffer'
-require 'ferret/index/segment_term_enum'
-require 'ferret/index/segment_term_vector'
-require 'ferret/index/segment_merge_info'
-require 'ferret/index/segment_merge_queue'
-require 'ferret/index/segment_infos'
-require 'ferret/index/document_writer'
-require 'ferret/index/index_reader'
-require 'ferret/index/index_writer'
-require 'ferret/index/multi_reader'
-require 'ferret/index/segment_merger'
-require 'ferret/index/segment_reader'
-require 'ferret/index/index'
+require 'monitor'
+module Ferret::Index
+  # This is a simplified interface to the index. See the TUTORIAL for more
+  # information on how to use this class.
+  class Index
+    include MonitorMixin
+    include Ferret::Store
+    include Ferret::Search
+    attr_reader :options
+    # If you create an Index without any options, it'll simply create an index
+    # in memory. But this class is highly configurable and every option that
+    # you can supply to IndexWriter and QueryParser, you can also set here.
+    # Please look at the options for the constructors to these classes.
+    #
+    # === Options
+    #
+    # See;
+    #
+    # * QueryParser
+    # * IndexWriter
+    #
+    # default_input_field::   Default: "id". This specifies the default field
+    #                         that will be used when you add a simple string
+    #                         to the index using #add_document or <<.
+    # id_field:               Default: "id". This field is as the field to
+    #                         search when doing searches on a term. For
+    #                         example, if you do a lookup by term "cat", ie
+    #                         index["cat"], this will be the field that is
+    #                         searched.
+    # key::                   Default: nil. Expert: This should only be used
+    #                         if you really know what you are doing. Basically
+    #                         you can set a field or an array of fields to be
+    #                         the key for the index. So if you add a document
+    #                         with a same key as an existing document, the
+    #                         existing document will be replaced by the new
+    #                         object.  Using a multiple field key will slow
+    #                         down indexing so it should not be done if
+    #                         performance is a concern. A single field key (or
+    #                         id) should be find however. Also, you must make
+    #                         sure that your key/keys are either untokenized
+    #                         or that they are not broken up by the analyzer.
+    # auto_flush::            Default: false. Set this option to true if you
+    #                         want the index automatically flushed every time
+    #                         you do a write (includes delete) to the index.
+    #                         This is useful if you have multiple processes
+    #                         accessing the index and you don't want lock
+    #                         errors. Setting :auto_flush to true has a huge
+    #                         performance impact so don't use it if you are
+    #                         concerned about performance. In that case you
+    #                         should think about setting up a DRb indexing
+    #                         service.
+    #
+    # Some examples;
+    #
+    #   index = Index::Index.new(:analyzer => WhiteSpaceAnalyzer.new())
+    #
+    #   index = Index::Index.new(:path => '/path/to/index',
+    #                            :create_if_missing => false,
+    #                            :auto_flush => true)
+    #
+    #   index = Index::Index.new(:dir => directory,
+    #                            :default_slop => 2,
+    #                            :handle_parse_errors => false)
+    #
+    def initialize(options = {})
+      super()
+      if options[:key]
+        @key = options[:key]
+        if @key.is_a?(Array)
+          @key.flatten.map {|k| k.to_s.intern}
+        end
+      end
+      if options[:dir].is_a?(String)
+        options[:path] = options[:dir]
+      end
+      if options[:path]
+        begin
+          @dir = FSDirectory.new(options[:path], options[:create])
+        rescue IOError => io
+          @dir = FSDirectory.new(options[:path], options[:create_if_missing])
+        end
+      elsif options[:dir]
+        @dir = options[:dir]
+      else
+        options[:create] = true # this should always be true for a new RAMDir
+        @dir = RAMDirectory.new
+      end
+      options[:dir] = @dir
+      @dir.extend(MonitorMixin)
+      @dir.synchronize do
+        @options = options
+        @writer = IndexWriter.new(options) # create the index if need be
+        options[:analyzer] = @analyzer = @writer.analyzer
+        @writer.close
+        @writer = nil
+        @reader = nil
+        @options.delete(:create) # only want to create the first time if at all
+        @auto_flush = @options[:auto_flush] || false
+        if (@options[:id_field].nil? and
+            @key.is_a?(Symbol))
+          @id_field = @key
+        else
+          @id_field = @options[:id_field] || :id
+        end
+        @default_field = (@options[:default_field]||= :*)
+        @default_input_field = options[:default_input_field] || @id_field
+        if @default_input_field.respond_to?(:intern)
+          @default_input_field = @default_input_field.intern
+        end
+        @open = true
+        @qp = nil
+      end
+    end
+    # Closes this index by closing its associated reader and writer objects.
+    def close
+      @dir.synchronize do
+        if not @open
+          raise "tried to close an already closed directory"
+        end
+        @searcher.close() if @searcher
+        @reader.close() if @reader
+        @writer.close() if @writer
+        @dir.close()
+        @open = false
+      end
+    end
+    # Get the reader for this index.
+    # NOTE:: This will close the writer from this index.
+    def reader
+      ensure_reader_open()
+      return @reader
+    end
+    # Get the searcher for this index.
+    # NOTE:: This will close the writer from this index.
+    def searcher
+      ensure_searcher_open()
+      return @searcher
+    end
+    # Get the writer for this index.
+    # NOTE:: This will close the reader from this index.
+    def writer
+      ensure_writer_open()
+      return @writer
+    end
+    protected :reader, :writer, :searcher
+    # Adds a document to this index, using the provided analyzer instead of
+    # the local analyzer if provided.  If the document contains more than
+    # IndexWriter::MAX_FIELD_LENGTH terms for a given field, the remainder are
+    # discarded.
+    #
+    # There are three ways to add a document to the index.
+    # To add a document you can simply add a string or an array of strings.
+    # This will store all the strings in the "" (ie empty string) field
+    # (unless you specify the default_field when you create the index).
+    #
+    #   index << "This is a new document to be indexed"
+    #   index << ["And here", "is another", "new document", "to be indexed"]
+    #
+    # But these are pretty simple documents. If this is all you want to index
+    # you could probably just use SimpleSearch. So let's give our documents
+    # some fields;
+    #
+    #   index << {:title => "Programming Ruby", :content => "blah blah blah"}
+    #   index << {:title => "Programming Ruby", :content => "yada yada yada"}
+    #
+    # Or if you are indexing data stored in a database, you'll probably want
+    # to store the id;
+    #
+    #   index << {:id => row.id, :title => row.title, :date => row.date}
+    #
+    # See FieldInfos for more information on how to set field properties.
+    def add_document(doc, analyzer = nil)
+      @dir.synchronize do
+        if doc.is_a?(String) or doc.is_a?(Array)
+          doc = {@default_input_field => doc}
+        end
+        # delete existing documents with the same key
+        if @key
+          if @key.is_a?(Array)
+            query = @key.inject(BooleanQuery.new()) do |bq, field|
+              bq.add_query(TermQuery.new(field, doc[field].to_s), :must)
+              bq
+            end
+            query_delete(query)
+          else
+            id = doc[@key].to_s
+            if id
+              delete(id)
+              @writer.commit
+            end
+          end
+        end
+        ensure_writer_open()
+        old_analyzer = @writer.analyzer if analyzer
+        @writer.add_document(doc)
+        @writer.analyzer = old_analyzer if analyzer
+        flush() if @auto_flush
+      end
+    end
+    alias :<< :add_document
+    # The main search method for the index. You need to create a query to
+    # pass to this method. You can also pass a hash with one or more of the
+    # following; {filter, num_docs, first_doc, sort}
+    #
+    # query::       The query to run on the index
+    # filter::      Filters docs from the search result
+    # first_doc::   The index in the results of the first doc retrieved.
+    #               Default is 0
+    # num_docs::    The number of results returned. Default is 10
+    # sort::        An array of SortFields describing how to sort the results.
+    # filter_proc:: A proc which takes |doc_id, score, searcher| as arguments
+    #               and returns true if the document passes the filter.
+    def search(query, options = {})
+      @dir.synchronize do
+        return do_search(query, options)
+      end
+    end
+    # See Index#search
+    #
+    # This method yields the doc and score for each hit.
+    # eg.
+    #   index.search_each() do |doc, score|
+    #     puts "hit document number #{doc} with a score of #{score}"
+    #   end
+    #
+    # returns:: The total number of hits.
+    def search_each(query, options = {}) # :yield: doc, score
+      @dir.synchronize do
+        ensure_searcher_open()
+        query = process_query(query)
+        @searcher.search_each(query) do |doc, score|
+          yield doc, score
+        end
+      end
+    end
+    # Retrieve the document referenced by the document number +id+, if id is
+    # an integer or the first document with term +id+ if +id+ is a term.
+    #
+    # id:: The number of the document to retrieve, or the term used as the :id
+    #      for the document we wish to retrieve
+    def doc(id)
+      @dir.synchronize do
+        ensure_reader_open()
+        if id.kind_of?(String) or id.kind_of?(Symbol)
+          term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
+          id = term_doc_enum.next? ? term_doc_enum.doc : nil
+        end
+        return @reader[id] if id.is_a? Integer
+        if id
+          raise(ArgumentError, "key to Index to access a document must be " +
+                "an Integer or a String")
+        end
+      end
+      return nil
+    end
+    alias :[] :doc
+    # Delete the document referenced by the document number +id+ if +id+ is an
+    # integer or all of the documents which have the term +id+ if +id+ is a
+    # term..
+    #
+    # id:: The number of the document to delete
+    def delete(id)
+      @dir.synchronize do
+        if id.is_a?(String) or id.is_a?(Symbol)
+          ensure_writer_open()
+          @writer.delete(@id_field, id.to_s)
+        elsif id.is_a?(Integer)
+          ensure_reader_open()
+          cnt = @reader.delete(id)
+        else
+          raise ArgumentError, "Cannot delete for id of type #{id.class}"
+        end
+        flush() if @auto_flush
+      end
+      return self
+    end
+    # Delete all documents returned by the query.
+    #
+    # query:: The query to find documents you wish to delete. Can either be a
+    #         string (in which case it is parsed by the standard query parser)
+    #         or an actual query object.
+    def query_delete(query)
+      @dir.synchronize do
+        ensure_searcher_open()
+        query = process_query(query)
+        @searcher.search_each(query) do |doc, score|
+          @reader.delete(doc)
+        end
+        flush() if @auto_flush
+      end
+    end
+    # Returns true if document +n+ has been deleted
+    def deleted?(n)
+      @dir.synchronize do
+        ensure_reader_open()
+        return @reader.deleted?(n)
+      end
+    end
+    # Update the document referenced by the document number +id+ if +id+ is an
+    # integer or all of the documents which have the term +id+ if +id+ is a
+    # term..
+    #
+    # id::      The number of the document to update. Can also be a string
+    #           representing the value in the +id+ field. Also consider using
+    #           the :key attribute.
+    # new_doc:: The document to replace the old document with
+    def update(id, new_doc)
+      @dir.synchronize do
+        delete(id)
+        if id.is_a?(String) or id.is_a?(Symbol)
+          @writer.commit
+        else
+          ensure_writer_open()
+        end
+        @writer << new_doc
+        flush() if @auto_flush
+      end
+    end
+    # Update all the documents returned by the query.
+    #
+    # query::   The query to find documents you wish to update. Can either be
+    #           a string (in which case it is parsed by the standard query
+    #           parser) or an actual query object.
+    # new_val:: The values we are updating. This can be a string in which case
+    #           the default field is updated, or it can be a hash, in which
+    #           case, all fields in the hash are merged into the old hash.
+    #           That is, the old fields are replaced by values in the new hash
+    #           if they exist.
+    #
+    # === Example
+    #
+    #   index << {:id => "26", :title => "Babylon", :artist => "David Grey"}
+    #   index << {:id => "29", :title => "My Oh My", :artist => "David Grey"}
+    #
+    #   # correct
+    #   index.query_update('artist:"David Grey"', {:artist => "David Gray"})
+    #
+    #   index["26"]
+    #     #=> {:id => "26", :title => "Babylon", :artist => "David Gray"}
+    #   index["28"]
+    #     #=> {:id => "28", :title => "My Oh My", :artist => "David Gray"}
+    #
+    def query_update(query, new_val)
+      @dir.synchronize do
+        ensure_searcher_open()
+        docs_to_add = []
+        query = process_query(query)
+        @searcher.search_each(query) do |id, score|
+          document = @searcher[id].load
+          if new_val.is_a?(Hash)
+            document.merge!(new_val)
+          else new_val.is_a?(String) or new_val.is_a?(Symbol)
+            document[@default_input_field] = new_val.to_s
+          end
+          docs_to_add << document
+          @reader.delete(id)
+        end
+        ensure_writer_open()
+        docs_to_add.each {|doc| @writer << doc }
+        flush() if @auto_flush
+      end
+    end
+    # Returns true if any documents have been deleted since the index was last
+    # flushed.
+    def has_deletions?()
+      @dir.synchronize do
+        ensure_reader_open()
+        return @reader.has_deletions?
+      end
+    end
+    # Flushes all writes to the index. This will not optimize the index but it
+    # will make sure that all writes are written to it.
+    #
+    # NOTE: this is not necessary if you are only using this class. All writes
+    # will automatically flush when you perform an operation that reads the
+    # index.
+    def flush()
+      @dir.synchronize do
+        @searcher.close if @searcher
+        @reader.close if @reader
+        @writer.close if @writer
+        @reader = nil
+        @writer = nil
+        @searcher = nil
+      end
+    end
+    # optimizes the index. This should only be called when the index will no
+    # longer be updated very often, but will be read a lot.
+    def optimize()
+      @dir.synchronize do
+        ensure_writer_open()
+        @writer.optimize()
+        @writer.close()
+        @writer = nil
+      end
+    end
+    # returns the number of documents in the index
+    def size()
+      @dir.synchronize do
+        ensure_reader_open()
+        return @reader.num_docs()
+      end
+    end
+    # Merges all segments from an index or an array of indexes into this
+    # index. You can pass a single Index::Index, Index::Reader,
+    # Store::Directory or an array of any single one of these.
+    #
+    # This may be used to parallelize batch indexing. A large document
+    # collection can be broken into sub-collections. Each sub-collection can
+    # be indexed in parallel, on a different thread, process or machine and
+    # perhaps all in memory. The complete index can then be created by
+    # merging sub-collection indexes with this method.
+    #
+    # After this completes, the index is optimized.
+    def add_indexes(indexes)
+      @dir.synchronize do
+        indexes = [indexes].flatten   # make sure we have an array
+        return if indexes.size == 0 # nothing to do
+        if indexes[0].is_a?(Index)
+          indexes.delete(self) # don't merge with self
+          indexes = indexes.map {|index| index.reader }
+        elsif indexes[0].is_a?(Ferret::Store::Directory)
+          indexes.delete(@dir) # don't merge with self
+          indexes = indexes.map {|dir| IndexReader.new(dir) }
+        elsif indexes[0].is_a?(IndexReader)
+          indexes.delete(@reader) # don't merge with self
+        else
+          raise ArgumentError, "Unknown index type when trying to merge indexes"
+        end
+        ensure_writer_open
+        @writer.add_readers(indexes)
+      end
+    end
+    # This is a simple utility method for saving an in memory or RAM index to
+    # the file system. The same thing can be achieved by using the
+    # Index::Index#add_indexes method and you will have more options when
+    # creating the new index, however this is a simple way to turn a RAM index
+    # into a file system index.
+    #
+    # directory:: This can either be a Store::Directory object or a String
+    #             representing the path to the directory where you would
+    #             like to store the the index.
+    #
+    # create::    True if you'd like to create the directory if it doesn't
+    #             exist or copy over an existing directory. False if you'd
+    #             like to merge with the existing directory. This defaults to
+    #             false.
+    def persist(directory, create = true)
+      synchronize do
+        flush()
+        old_dir = @dir
+        if directory.is_a?(String)
+          @dir = FSDirectory.new(directory, create)
+        elsif directory.is_a?(Ferret::Store::Directory)
+          @dir = directory
+        end
+        @dir.extend(MonitorMixin)
+        @options[:dir] = @dir
+        @options[:create_if_missing] = true
+        add_indexes([old_dir])
+      end
+    end
+    def to_s
+      buf = ""
+      (0...(size)).each do |i|
+        buf << self[i].to_s + "\n" if not deleted?(i)
+      end
+      buf
+    end
+    # Returns an Explanation that describes how +doc+ scored against
+    # +query+.
+    #
+    # This is intended to be used in developing Similarity implementations,
+    # and, for good performance, should not be displayed with every hit.
+    # Computing an explanation is as expensive as executing the query over the
+    # entire index.
+    def explain(query, doc)
+      synchronize do
+        ensure_searcher_open()
+        query = process_query(query)
+        return @searcher.explain(query, doc)
+      end
+    end
+    protected
+      def ensure_writer_open()
+        raise "tried to use a closed index" if not @open
+        return if @writer
+        if @reader
+          @searcher.close if @searcher
+          @reader.close
+          @reader = nil
+          @searcher = nil
+        end
+        @writer = IndexWriter.new(@options)
+      end
+      # returns the new reader if one is opened
+      def ensure_reader_open()
+        raise "tried to use a closed index" if not @open
+        if @reader
+          if not @reader.latest?
+            return @reader = IndexReader.new(@dir)
+          end
+        else
+          if @writer
+            @writer.close
+            @writer = nil
+          end
+          return @reader = IndexReader.new(@dir)
+        end
+        return false
+      end
+      def ensure_searcher_open()
+        raise "tried to use a closed index" if not @open
+        if ensure_reader_open() or not @searcher
+          @searcher = Searcher.new(@reader)
+        end
+      end
+    private
+      def do_search(query, options)
+        ensure_searcher_open()
+        query = process_query(query)
+        return @searcher.search(query, options)
+      end
+      def process_query(query)
+        if query.is_a?(String)
+          if @qp.nil?
+            @qp = Ferret::QueryParser.new(@options)
+          end
+          # we need to set this ever time, in case a new field has been added
+          @qp.fields = @reader.field_names
+          query = @qp.parse(query)
+        end
+        return query
+      end
+  end
+end