RubyGems - jk-ferret - Versions diffs - 0.11.8.2 - Mend

jk-ferret 0.11.8.2

Files changed (228) hide show

data/CHANGELOG +24 -0
data/MIT-LICENSE +20 -0
data/README +90 -0
data/RELEASE_CHANGES +137 -0
data/RELEASE_NOTES +60 -0
data/Rakefile +443 -0
data/TODO +109 -0
data/TUTORIAL +231 -0
data/bin/ferret-browser +79 -0
data/ext/BZLIB_blocksort.c +1094 -0
data/ext/BZLIB_bzlib.c +1578 -0
data/ext/BZLIB_compress.c +672 -0
data/ext/BZLIB_crctable.c +104 -0
data/ext/BZLIB_decompress.c +626 -0
data/ext/BZLIB_huffman.c +205 -0
data/ext/BZLIB_randtable.c +84 -0
data/ext/STEMMER_api.c +66 -0
data/ext/STEMMER_libstemmer.c +93 -0
data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
data/ext/STEMMER_stem_UTF_8_german.c +509 -0
data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
data/ext/STEMMER_utilities.c +478 -0
data/ext/analysis.c +1710 -0
data/ext/analysis.h +266 -0
data/ext/api.h +26 -0
data/ext/array.c +125 -0
data/ext/array.h +62 -0
data/ext/bitvector.c +96 -0
data/ext/bitvector.h +594 -0
data/ext/bzlib.h +282 -0
data/ext/bzlib_private.h +503 -0
data/ext/compound_io.c +384 -0
data/ext/config.h +52 -0
data/ext/document.c +159 -0
data/ext/document.h +63 -0
data/ext/except.c +102 -0
data/ext/except.h +176 -0
data/ext/extconf.rb +15 -0
data/ext/ferret.c +416 -0
data/ext/ferret.h +94 -0
data/ext/field_index.c +262 -0
data/ext/field_index.h +52 -0
data/ext/filter.c +157 -0
data/ext/fs_store.c +493 -0
data/ext/global.c +458 -0
data/ext/global.h +302 -0
data/ext/hash.c +524 -0
data/ext/hash.h +515 -0
data/ext/hashset.c +192 -0
data/ext/hashset.h +215 -0
data/ext/header.h +58 -0
data/ext/helper.c +63 -0
data/ext/helper.h +21 -0
data/ext/index.c +6804 -0
data/ext/index.h +935 -0
data/ext/internal.h +1019 -0
data/ext/lang.c +10 -0
data/ext/lang.h +68 -0
data/ext/libstemmer.h +79 -0
data/ext/mempool.c +88 -0
data/ext/mempool.h +43 -0
data/ext/modules.h +190 -0
data/ext/multimapper.c +351 -0
data/ext/multimapper.h +60 -0
data/ext/posh.c +1006 -0
data/ext/posh.h +973 -0
data/ext/priorityqueue.c +149 -0
data/ext/priorityqueue.h +155 -0
data/ext/q_boolean.c +1621 -0
data/ext/q_const_score.c +162 -0
data/ext/q_filtered_query.c +212 -0
data/ext/q_fuzzy.c +280 -0
data/ext/q_match_all.c +149 -0
data/ext/q_multi_term.c +673 -0
data/ext/q_parser.c +3103 -0
data/ext/q_phrase.c +1206 -0
data/ext/q_prefix.c +98 -0
data/ext/q_range.c +682 -0
data/ext/q_span.c +2390 -0
data/ext/q_term.c +337 -0
data/ext/q_wildcard.c +167 -0
data/ext/r_analysis.c +2626 -0
data/ext/r_index.c +3468 -0
data/ext/r_qparser.c +635 -0
data/ext/r_search.c +4490 -0
data/ext/r_store.c +513 -0
data/ext/r_utils.c +1131 -0
data/ext/ram_store.c +476 -0
data/ext/scanner.c +895 -0
data/ext/scanner.h +36 -0
data/ext/scanner_mb.c +6701 -0
data/ext/scanner_utf8.c +4415 -0
data/ext/search.c +1864 -0
data/ext/search.h +953 -0
data/ext/similarity.c +151 -0
data/ext/similarity.h +89 -0
data/ext/sort.c +786 -0
data/ext/stem_ISO_8859_1_danish.h +16 -0
data/ext/stem_ISO_8859_1_dutch.h +16 -0
data/ext/stem_ISO_8859_1_english.h +16 -0
data/ext/stem_ISO_8859_1_finnish.h +16 -0
data/ext/stem_ISO_8859_1_french.h +16 -0
data/ext/stem_ISO_8859_1_german.h +16 -0
data/ext/stem_ISO_8859_1_hungarian.h +16 -0
data/ext/stem_ISO_8859_1_italian.h +16 -0
data/ext/stem_ISO_8859_1_norwegian.h +16 -0
data/ext/stem_ISO_8859_1_porter.h +16 -0
data/ext/stem_ISO_8859_1_portuguese.h +16 -0
data/ext/stem_ISO_8859_1_spanish.h +16 -0
data/ext/stem_ISO_8859_1_swedish.h +16 -0
data/ext/stem_ISO_8859_2_romanian.h +16 -0
data/ext/stem_KOI8_R_russian.h +16 -0
data/ext/stem_UTF_8_danish.h +16 -0
data/ext/stem_UTF_8_dutch.h +16 -0
data/ext/stem_UTF_8_english.h +16 -0
data/ext/stem_UTF_8_finnish.h +16 -0
data/ext/stem_UTF_8_french.h +16 -0
data/ext/stem_UTF_8_german.h +16 -0
data/ext/stem_UTF_8_hungarian.h +16 -0
data/ext/stem_UTF_8_italian.h +16 -0
data/ext/stem_UTF_8_norwegian.h +16 -0
data/ext/stem_UTF_8_porter.h +16 -0
data/ext/stem_UTF_8_portuguese.h +16 -0
data/ext/stem_UTF_8_romanian.h +16 -0
data/ext/stem_UTF_8_russian.h +16 -0
data/ext/stem_UTF_8_spanish.h +16 -0
data/ext/stem_UTF_8_swedish.h +16 -0
data/ext/stem_UTF_8_turkish.h +16 -0
data/ext/stopwords.c +410 -0
data/ext/store.c +698 -0
data/ext/store.h +799 -0
data/ext/symbol.c +10 -0
data/ext/symbol.h +23 -0
data/ext/term_vectors.c +73 -0
data/ext/threading.h +31 -0
data/ext/win32.h +62 -0
data/lib/ferret.rb +30 -0
data/lib/ferret/browser.rb +246 -0
data/lib/ferret/browser/s/global.js +192 -0
data/lib/ferret/browser/s/style.css +148 -0
data/lib/ferret/browser/views/document/list.rhtml +49 -0
data/lib/ferret/browser/views/document/show.rhtml +27 -0
data/lib/ferret/browser/views/error/index.rhtml +7 -0
data/lib/ferret/browser/views/help/index.rhtml +8 -0
data/lib/ferret/browser/views/home/index.rhtml +29 -0
data/lib/ferret/browser/views/layout.rhtml +22 -0
data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
data/lib/ferret/browser/views/term/index.rhtml +199 -0
data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
data/lib/ferret/browser/webrick.rb +14 -0
data/lib/ferret/document.rb +130 -0
data/lib/ferret/field_infos.rb +44 -0
data/lib/ferret/field_symbol.rb +87 -0
data/lib/ferret/index.rb +973 -0
data/lib/ferret/number_tools.rb +157 -0
data/lib/ferret/version.rb +3 -0
data/setup.rb +1555 -0
data/test/long_running/largefile/tc_largefile.rb +46 -0
data/test/test_all.rb +5 -0
data/test/test_helper.rb +29 -0
data/test/test_installed.rb +1 -0
data/test/threading/number_to_spoken.rb +132 -0
data/test/threading/thread_safety_index_test.rb +88 -0
data/test/threading/thread_safety_read_write_test.rb +73 -0
data/test/threading/thread_safety_test.rb +133 -0
data/test/unit/analysis/tc_analyzer.rb +550 -0
data/test/unit/analysis/tc_token_stream.rb +653 -0
data/test/unit/index/tc_index.rb +867 -0
data/test/unit/index/tc_index_reader.rb +699 -0
data/test/unit/index/tc_index_writer.rb +447 -0
data/test/unit/index/th_doc.rb +332 -0
data/test/unit/query_parser/tc_query_parser.rb +238 -0
data/test/unit/search/tc_filter.rb +156 -0
data/test/unit/search/tc_fuzzy_query.rb +147 -0
data/test/unit/search/tc_index_searcher.rb +67 -0
data/test/unit/search/tc_multi_searcher.rb +128 -0
data/test/unit/search/tc_multiple_search_requests.rb +58 -0
data/test/unit/search/tc_search_and_sort.rb +179 -0
data/test/unit/search/tc_sort.rb +49 -0
data/test/unit/search/tc_sort_field.rb +27 -0
data/test/unit/search/tc_spans.rb +190 -0
data/test/unit/search/tm_searcher.rb +436 -0
data/test/unit/store/tc_fs_store.rb +115 -0
data/test/unit/store/tc_ram_store.rb +35 -0
data/test/unit/store/tm_store.rb +34 -0
data/test/unit/store/tm_store_lock.rb +68 -0
data/test/unit/tc_document.rb +81 -0
data/test/unit/tc_field_symbol.rb +26 -0
data/test/unit/ts_analysis.rb +2 -0
data/test/unit/ts_index.rb +2 -0
data/test/unit/ts_largefile.rb +4 -0
data/test/unit/ts_query_parser.rb +2 -0
data/test/unit/ts_search.rb +2 -0
data/test/unit/ts_store.rb +2 -0
data/test/unit/ts_utils.rb +2 -0
data/test/unit/utils/tc_bit_vector.rb +295 -0
data/test/unit/utils/tc_number_tools.rb +117 -0
data/test/unit/utils/tc_priority_queue.rb +106 -0
data/test/utils/content_generator.rb +226 -0
metadata +319 -0

data/ext/r_index.c ADDED Viewed

@@ -0,0 +1,3468 @@
+#include "ferret.h"
+#include "index.h"
+#include <st.h>
+VALUE mIndex;
+VALUE cFieldInfo;
+VALUE cFieldInfos;
+VALUE cTVOffsets;
+VALUE cTVTerm;
+VALUE cTermVector;
+VALUE cTermEnum;
+VALUE cTermDocEnum;
+VALUE cLazyDoc;
+VALUE cLazyDocData;
+VALUE cIndexWriter;
+VALUE cIndexReader;
+VALUE sym_analyzer;
+static VALUE sym_close_dir;
+static VALUE sym_create;
+static VALUE sym_create_if_missing;
+static VALUE sym_chunk_size;
+static VALUE sym_max_buffer_memory;
+static VALUE sym_index_interval;
+static VALUE sym_skip_interval;
+static VALUE sym_merge_factor;
+static VALUE sym_max_buffered_docs;
+static VALUE sym_max_merge_docs;
+static VALUE sym_max_field_length;
+static VALUE sym_use_compound_file;
+static VALUE sym_boost;
+static VALUE sym_field_infos;
+static VALUE sym_store;
+static VALUE sym_index;
+static VALUE sym_term_vector;
+static VALUE sym_compress;
+static VALUE sym_compressed;
+static VALUE sym_untokenized;
+static VALUE sym_omit_norms;
+static VALUE sym_untokenized_omit_norms;
+static VALUE sym_with_positions;
+static VALUE sym_with_offsets;
+static VALUE sym_with_positions_offsets;
+static Symbol fsym_content;
+static ID id_term;
+static ID id_fields;
+static ID id_fld_num_map;
+static ID id_field_num;
+static ID id_boost;
+extern void frb_set_term(VALUE rterm, Term *t);
+extern Analyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
+extern VALUE frb_get_analyzer(Analyzer *a);
+/****************************************************************************
+ *
+ * FieldInfo Methods
+ *
+ ****************************************************************************/
+static void
+frb_fi_free(void *p)
+{
+    object_del(p);
+    fi_deref((FieldInfo *)p);
+}
+static void
+frb_fi_get_params(VALUE roptions,
+                  StoreValue *store,
+                  IndexValue *index,
+                  TermVectorValue *term_vector,
+                  float *boost)
+{
+    VALUE v;
+    Check_Type(roptions, T_HASH);
+    v = rb_hash_aref(roptions, sym_boost);
+    if (Qnil != v) {
+        *boost = (float)NUM2DBL(v);
+    } else {
+        *boost = 1.0f;
+    }
+    v = rb_hash_aref(roptions, sym_store);
+    if (Qnil != v) Check_Type(v, T_SYMBOL);
+    if (v == sym_no || v == sym_false || v == Qfalse) {
+        *store = STORE_NO;
+    } else if (v == sym_yes || v == sym_true || v == Qtrue) {
+        *store = STORE_YES;
+    } else if (v == sym_compress || v == sym_compressed) {
+        *store = STORE_COMPRESS;
+    } else if (v == Qnil) {
+        /* leave as default */
+    } else {
+        rb_raise(rb_eArgError, ":%s isn't a valid argument for :store."
+                 " Please choose from [:yes, :no, :compressed]",
+                 rb_id2name(SYM2ID(v)));
+    }
+    v = rb_hash_aref(roptions, sym_index);
+    if (Qnil != v) Check_Type(v, T_SYMBOL);
+    if (v == sym_no || v == sym_false || v == Qfalse) {
+        *index = INDEX_NO;
+    } else if (v == sym_yes || v == sym_true || v == Qtrue) {
+        *index = INDEX_YES;
+    } else if (v == sym_untokenized) {
+        *index = INDEX_UNTOKENIZED;
+    } else if (v == sym_omit_norms) {
+        *index = INDEX_YES_OMIT_NORMS;
+    } else if (v == sym_untokenized_omit_norms) {
+        *index = INDEX_UNTOKENIZED_OMIT_NORMS;
+    } else if (v == Qnil) {
+        /* leave as default */
+    } else {
+        rb_raise(rb_eArgError, ":%s isn't a valid argument for :index."
+                 " Please choose from [:no, :yes, :untokenized, "
+                 ":omit_norms, :untokenized_omit_norms]",
+                 rb_id2name(SYM2ID(v)));
+    }
+    v = rb_hash_aref(roptions, sym_term_vector);
+    if (Qnil != v) Check_Type(v, T_SYMBOL);
+    if (v == sym_no || v == sym_false || v == Qfalse) {
+        *term_vector = TERM_VECTOR_NO;
+    } else if (v == sym_yes || v == sym_true || v == Qtrue) {
+        *term_vector = TERM_VECTOR_YES;
+    } else if (v == sym_with_positions) {
+        *term_vector = TERM_VECTOR_WITH_POSITIONS;
+    } else if (v == sym_with_offsets) {
+        *term_vector = TERM_VECTOR_WITH_OFFSETS;
+    } else if (v == sym_with_positions_offsets) {
+        *term_vector = TERM_VECTOR_WITH_POSITIONS_OFFSETS;
+    } else if (v == Qnil) {
+        /* leave as default */
+    } else {
+        rb_raise(rb_eArgError, ":%s isn't a valid argument for "
+                 ":term_vector. Please choose from [:no, :yes, "
+                 ":with_positions, :with_offsets, "
+                 ":with_positions_offsets]",
+                 rb_id2name(SYM2ID(v)));
+    }
+}
+static VALUE
+frb_get_field_info(FieldInfo *fi)
+{
+    VALUE rfi = Qnil;
+    if (fi) {
+        rfi = object_get(fi);
+        if (rfi == Qnil) {
+            rfi = Data_Wrap_Struct(cFieldInfo, NULL, &frb_fi_free, fi);
+            REF(fi);
+            object_add(fi, rfi);
+        }
+    }
+    return rfi;
+}
+/*
+ *  call-seq:
+ *     FieldInfo.new(name, options = {}) -> field_info
+ *
+ *  Create a new FieldInfo object with the name +name+ and the properties
+ *  specified in +options+. The available options are [:store, :index,
+ *  :term_vector, :boost]. See the description of FieldInfo for more
+ *  information on these properties.
+ */
+static VALUE
+frb_fi_init(int argc, VALUE *argv, VALUE self)
+{
+    VALUE roptions, rname;
+    FieldInfo *fi;
+    StoreValue store = STORE_YES;
+    IndexValue index = INDEX_YES;
+    TermVectorValue term_vector = TERM_VECTOR_WITH_POSITIONS_OFFSETS;
+    float boost = 1.0f;
+    rb_scan_args(argc, argv, "11", &rname, &roptions);
+    if (argc > 1) {
+        frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
+    }
+    fi = fi_new(frb_field(rname), store, index, term_vector);
+    fi->boost = boost;
+    Frt_Wrap_Struct(self, NULL, &frb_fi_free, fi);
+    object_add(fi, self);
+    return self;
+}
+/*
+ *  call-seq:
+ *     fi.name -> symbol
+ *
+ *  Return the name of the field
+ */
+static VALUE
+frb_fi_name(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return ID2SYM(fi->name);
+}
+/*
+ *  call-seq:
+ *     fi.stored? -> bool
+ *
+ *  Return true if the field is stored in the index.
+ */
+static VALUE
+frb_fi_is_stored(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_is_stored(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.compressed? -> bool
+ *
+ *  Return true if the field is stored in the index in compressed format.
+ */
+static VALUE
+frb_fi_is_compressed(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_is_compressed(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.indexed? -> bool
+ *
+ *  Return true if the field is indexed, ie searchable in the index.
+ */
+static VALUE
+frb_fi_is_indexed(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_is_indexed(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.tokenized? -> bool
+ *
+ *  Return true if the field is tokenized. Tokenizing is the process of
+ *  breaking the field up into tokens. That is "the quick brown fox" becomes:
+ *
+ *    ["the", "quick", "brown", "fox"]
+ *
+ *  A field can only be tokenized if it is indexed.
+ */
+static VALUE
+frb_fi_is_tokenized(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_is_tokenized(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.omit_norms? -> bool
+ *
+ *  Return true if the field omits the norm file. The norm file is the file
+ *  used to store the field boosts for an indexed field. If you do not boost
+ *  any fields, and you can live without scoring based on field length then
+ *  you can omit the norms file. This will give the index a slight performance
+ *  boost and it will use less memory, especially for indexes which have a
+ *  large number of documents.
+ */
+static VALUE
+frb_fi_omit_norms(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_omit_norms(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.store_term_vector? -> bool
+ *
+ *  Return true if the term-vectors are stored for this field.
+ */
+static VALUE
+frb_fi_store_term_vector(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_store_term_vector(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.store_positions? -> bool
+ *
+ *  Return true if positions are stored with the term-vectors for this field.
+ */
+static VALUE
+frb_fi_store_positions(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_store_positions(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.store_offsets? -> bool
+ *
+ *  Return true if offsets are stored with the term-vectors for this field.
+ */
+static VALUE
+frb_fi_store_offsets(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_store_offsets(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.has_norms? -> bool
+ *
+ *  Return true if this field has a norms file. This is the same as calling;
+ *
+ *    fi.indexed? and not fi.omit_norms?
+ */
+static VALUE
+frb_fi_has_norms(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return fi_has_norms(fi) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     fi.boost -> boost
+ *
+ *  Return the default boost for this field
+ */
+static VALUE
+frb_fi_boost(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    return rb_float_new((double)fi->boost);
+}
+/*
+ *  call-seq:
+ *     fi.to_s -> string
+ *
+ *  Return a string representation of the FieldInfo object.
+ */
+static VALUE
+frb_fi_to_s(VALUE self)
+{
+    FieldInfo *fi = (FieldInfo *)DATA_PTR(self);
+    char *fi_s = fi_to_s(fi);
+    VALUE rfi_s = rb_str_new2(fi_s);
+    free(fi_s);
+    return rfi_s;
+}
+/****************************************************************************
+ *
+ * FieldInfos Methods
+ *
+ ****************************************************************************/
+static void
+frb_fis_free(void *p)
+{
+    object_del(p);
+    fis_deref((FieldInfos *)p);
+}
+static void
+frb_fis_mark(void *p)
+{
+    int i;
+    FieldInfos *fis = (FieldInfos *)p;
+    for (i = 0; i < fis->size; i++) {
+        frb_gc_mark(fis->fields[i]);
+    }
+}
+static VALUE
+frb_get_field_infos(FieldInfos *fis)
+{
+    VALUE rfis = Qnil;
+    if (fis) {
+        rfis = object_get(fis);
+        if (rfis == Qnil) {
+            rfis = Data_Wrap_Struct(cFieldInfos, &frb_fis_mark, &frb_fis_free,
+                                    fis);
+            REF(fis);
+            object_add(fis, rfis);
+        }
+    }
+    return rfis;
+}
+/*
+ *  call-seq:
+ *     FieldInfos.new(defaults = {}) -> field_infos
+ *
+ *  Create a new FieldInfos object which uses the default values for fields
+ *  specified in the +default+ hash parameter. See FieldInfo for available
+ *  property values.
+ */
+static VALUE
+frb_fis_init(int argc, VALUE *argv, VALUE self)
+{
+    VALUE roptions;
+    FieldInfos *fis;
+    StoreValue store = STORE_YES;
+    IndexValue index = INDEX_YES;
+    TermVectorValue term_vector = TERM_VECTOR_WITH_POSITIONS_OFFSETS;
+    float boost;
+    rb_scan_args(argc, argv, "01", &roptions);
+    if (argc > 0) {
+        frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
+    }
+    fis = fis_new(store, index, term_vector);
+    Frt_Wrap_Struct(self, &frb_fis_mark, &frb_fis_free, fis);
+    object_add(fis, self);
+    return self;
+}
+/*
+ *  call-seq:
+ *     fis.to_a -> array
+ *
+ *  Return an array of the FieldInfo objects contained but this FieldInfos
+ *  object.
+ */
+static VALUE
+frb_fis_to_a(VALUE self)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    VALUE rary = rb_ary_new();
+    int i;
+    for (i = 0; i < fis->size; i++) {
+        rb_ary_push(rary, frb_get_field_info(fis->fields[i]));
+    }
+    return rary;
+}
+/*
+ *  call-seq:
+ *     fis[name] -> field_info
+ *     fis[number] -> field_info
+ *
+ *  Get the FieldInfo object. FieldInfo objects can be referenced by either
+ *  their field-number of the field-name (which must be a symbol). For
+ *  example;
+ *
+ *    fi = fis[:name]
+ *    fi = fis[2]
+ */
+static VALUE
+frb_fis_get(VALUE self, VALUE ridx)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    VALUE rfi = Qnil;
+    switch (TYPE(ridx)) {
+        case T_FIXNUM: {
+            int index = FIX2INT(ridx);
+            if (index < 0) index += fis->size;
+            if (index < 0 || index >= fis->size) {
+                rb_raise(rb_eArgError, "index of %d is out of range (0..%d)\n",
+                         index, fis->size - 1);
+            }
+            rfi = frb_get_field_info(fis->fields[index]);
+            break;
+                       }
+        case T_SYMBOL:
+        case T_STRING:
+            rfi = frb_get_field_info(fis_get_field(fis, frb_field(ridx)));
+            break;
+            /*
+        case T_STRING:
+            rfi = frb_get_field_info(fis_get_field(fis, StringValuePtr(ridx)));
+            break;
+            */
+        default:
+            rb_raise(rb_eArgError, "Can't index FieldInfos with %s",
+                     rs2s(rb_obj_as_string(ridx)));
+            break;
+    }
+    return rfi;
+}
+/*
+ *  call-seq:
+ *     fis << fi -> fis
+ *     fis.add(fi) -> fis
+ *
+ *  Add a FieldInfo object. Use the FieldInfos#add_field method where
+ *  possible.
+ */
+static VALUE
+frb_fis_add(VALUE self, VALUE rfi)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    FieldInfo *fi = (FieldInfo *)frb_rb_data_ptr(rfi);
+    fis_add_field(fis, fi);
+    REF(fi);
+    return self;
+}
+/*
+ *  call-seq:
+ *     fis.add_field(name, properties = {} -> fis
+ *
+ *  Add a new field to the FieldInfos object. See FieldInfo for a description
+ *  of the available properties.
+ */
+static VALUE
+frb_fis_add_field(int argc, VALUE *argv, VALUE self)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    FieldInfo *fi;
+    StoreValue store = fis->store;
+    IndexValue index = fis->index;
+    TermVectorValue term_vector = fis->term_vector;
+    float boost = 1.0f;
+    VALUE rname, roptions;
+    rb_scan_args(argc, argv, "11", &rname, &roptions);
+    if (argc > 1) {
+        frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
+    }
+    fi = fi_new(frb_field(rname), store, index, term_vector);
+    fi->boost = boost;
+    fis_add_field(fis, fi);
+    return self;
+}
+/*
+ *  call-seq:
+ *     fis.each {|fi| do_something } -> fis
+ *
+ *  Iterate through the FieldInfo objects.
+ */
+static VALUE
+frb_fis_each(VALUE self)
+{
+    int i;
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    for (i = 0; i < fis->size; i++) {
+        rb_yield(frb_get_field_info(fis->fields[i]));
+    }
+    return self;
+}
+/*
+ *  call-seq:
+ *     fis.to_s -> string
+ *
+ *  Return a string representation of the FieldInfos object.
+ */
+static VALUE
+frb_fis_to_s(VALUE self)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    char *fis_s = fis_to_s(fis);
+    VALUE rfis_s = rb_str_new2(fis_s);
+    free(fis_s);
+    return rfis_s;
+}
+/*
+ *  call-seq:
+ *     fis.size -> int
+ *
+ *  Return the number of fields in the FieldInfos object.
+ */
+static VALUE
+frb_fis_size(VALUE self)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    return INT2FIX(fis->size);
+}
+/*
+ *  call-seq:
+ *     fis.create_index(dir) -> self
+ *
+ *  Create a new index in the directory specified. The directory +dir+ can
+ *  either be a string path representing a directory on the file-system or an
+ *  actual directory object. Care should be taken when using this method. Any
+ *  existing index (or other files for that matter) will be deleted from the
+ *  directory and overwritten by the new index.
+ */
+static VALUE
+frb_fis_create_index(VALUE self, VALUE rdir)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    Store *store = NULL;
+    if (TYPE(rdir) == T_DATA) {
+        store = DATA_PTR(rdir);
+        REF(store);
+    } else {
+        StringValue(rdir);
+        frb_create_dir(rdir);
+        store = open_fs_store(rs2s(rdir));
+    }
+    index_create(store, fis);
+    store_deref(store);
+    return self;
+}
+/*
+ *  call-seq:
+ *     fis.fields -> symbol array
+ *     fis.field_names -> symbol array
+ *
+ *  Return a list of the field names (as symbols) of all the fields in the
+ *  index.
+ */
+static VALUE
+frb_fis_get_fields(VALUE self)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    VALUE rfield_names = rb_ary_new();
+    int i;
+    for (i = 0; i < fis->size; i++) {
+        rb_ary_push(rfield_names, FSYM2SYM(fis->fields[i]->name));
+    }
+    return rfield_names;
+}
+/*
+ *  call-seq:
+ *     fis.tokenized_fields -> symbol array
+ *
+ *  Return a list of the field names (as symbols) of all the tokenized fields
+ *  in the index.
+ */
+static VALUE
+frb_fis_get_tk_fields(VALUE self)
+{
+    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
+    VALUE rfield_names = rb_ary_new();
+    int i;
+    for (i = 0; i < fis->size; i++) {
+        if (!fi_is_tokenized(fis->fields[i])) continue;
+        rb_ary_push(rfield_names, FSYM2SYM(fis->fields[i]->name));
+    }
+    return rfield_names;
+}
+/****************************************************************************
+ *
+ * TermEnum Methods
+ *
+ ****************************************************************************/
+static void
+frb_te_free(void *p)
+{
+    TermEnum *te = (TermEnum *)p;
+    te->close(te);
+}
+static VALUE
+frb_te_get_set_term(VALUE self, const char *term)
+{
+    TermEnum *te = (TermEnum *)DATA_PTR(self);
+    VALUE str = term ? rb_str_new(term, te->curr_term_len) : Qnil;
+    rb_ivar_set(self, id_term, str);
+    return str;
+}
+static VALUE
+frb_get_te(VALUE rir, TermEnum *te)
+{
+    VALUE self = Qnil;
+    if (te != NULL) {
+        self = Data_Wrap_Struct(cTermEnum, NULL, &frb_te_free, te);
+        frb_te_get_set_term(self, te->curr_term);
+        rb_ivar_set(self, id_fld_num_map, rb_ivar_get(rir, id_fld_num_map));
+    }
+    return self;
+}
+/*
+ *  call-seq:
+ *     term_enum.next -> term_string
+ *
+ *  Returns the next term in the enumeration or nil otherwise.
+ */
+static VALUE
+frb_te_next(VALUE self)
+{
+    TermEnum *te = (TermEnum *)DATA_PTR(self);
+    return frb_te_get_set_term(self, te->next(te));
+}
+/*
+ *  call-seq:
+ *     term_enum.term -> term_string
+ *
+ *  Returns the current term pointed to by the enum. This method should only
+ *  be called after a successful call to TermEnum#next.
+ */
+static VALUE
+frb_te_term(VALUE self)
+{
+    return rb_ivar_get(self, id_term);
+}
+/*
+ *  call-seq:
+ *     term_enum.doc_freq -> integer
+ *
+ *  Returns the document frequency of the current term pointed to by the enum.
+ *  That is the number of documents that this term appears in. The method
+ *  should only be called after a successful call to TermEnum#next.
+ */
+static VALUE
+frb_te_doc_freq(VALUE self)
+{
+    TermEnum *te = (TermEnum *)DATA_PTR(self);
+    return INT2FIX(te->curr_ti.doc_freq);
+}
+/*
+ *  call-seq:
+ *     term_enum.skip_to(target) -> term
+ *
+ *  Skip to term +target+. This method can skip forwards or backwards. If you
+ *  want to skip back to the start, pass the empty string "". That is;
+ *
+ *    term_enum.skip_to("")
+ *
+ *  Returns the first term greater than or equal to +target+
+ */
+static VALUE
+frb_te_skip_to(VALUE self, VALUE rterm)
+{
+    TermEnum *te = (TermEnum *)DATA_PTR(self);
+    return frb_te_get_set_term(self, te->skip_to(te, rs2s(rterm)));
+}
+/*
+ *  call-seq:
+ *     term_enum.each {|term, doc_freq| do_something() } -> term_count
+ *
+ *  Iterates through all the terms in the field, yielding the term and the
+ *  document frequency.
+ */
+static VALUE
+frb_te_each(VALUE self)
+{
+    TermEnum *te = (TermEnum *)DATA_PTR(self);
+    char *term;
+    int term_cnt = 0;
+    VALUE vals = rb_ary_new2(2);
+    rb_ary_store(vals, 0, Qnil);
+    rb_ary_store(vals, 1, Qnil);
+    /* each is being called so there will be no current term */
+    rb_ivar_set(self, id_term, Qnil);
+    while (NULL != (term = te->next(te))) {
+        term_cnt++;
+        RARRAY_PTR(vals)[0] = rb_str_new(term, te->curr_term_len);
+        RARRAY_PTR(vals)[1] = INT2FIX(te->curr_ti.doc_freq);
+        rb_yield(vals);
+    }
+    return INT2FIX(term_cnt);
+}
+/*
+ *  call-seq:
+ *     term_enum.set_field(field) -> self
+ *
+ *  Set the field for the term_enum. The field value should be a symbol as
+ *  usual. For example, to scan all title terms you'd do this;
+ *
+ *    term_enum.set_field(:title).each do |term, doc_freq|
+ *      do_something()
+ *    end
+ */
+static VALUE
+frb_te_set_field(VALUE self, VALUE rfield)
+{
+    TermEnum *te = (TermEnum *)DATA_PTR(self);
+    int field_num = 0;
+    VALUE rfnum_map = rb_ivar_get(self, id_fld_num_map);
+    VALUE rfnum = rb_hash_aref(rfnum_map, rfield);
+    if (rfnum != Qnil) {
+        field_num = FIX2INT(rfnum);
+        rb_ivar_set(self, id_field_num, rfnum);
+    } else {
+        Check_Type(rfield, T_SYMBOL);
+        rb_raise(rb_eArgError, "field %s doesn't exist in the index",
+                 frb_field(rfield));
+    }
+    te->set_field(te, field_num);
+    return self;
+}
+/*
+ *  call-seq:
+ *     term_enum.to_json() -> string
+ *
+ *  Returns a JSON representation of the term enum. You can speed this up by
+ *  having the method return arrays instead of objects, simply by passing an
+ *  argument to the to_json method. For example;
+ *
+ *    term_enum.to_json() #=>
+ *    # [
+ *    #   {"term":"apple","frequency":12},
+ *    #   {"term":"banana","frequency":2},
+ *    #   {"term":"cantaloupe","frequency":12}
+ *    # ]
+ *
+ *    term_enum.to_json(:fast) #=>
+ *    # [
+ *    #   ["apple",12],
+ *    #   ["banana",2],
+ *    #   ["cantaloupe",12]
+ *    # ]
+ */
+static VALUE
+frb_te_to_json(int argc, VALUE *argv, VALUE self)
+{
+    TermEnum *te = (TermEnum *)DATA_PTR(self);
+    VALUE rjson;
+    char *json, *jp;
+    char *term;
+    int capa = 65536;
+    jp = json = ALLOC_N(char, capa);
+    *(jp++) = '[';
+    if (argc > 0) {
+        while (NULL != (term = te->next(te))) {
+            /* enough room for for term after converting " to '"' and frequency
+             * plus some extra for good measure */
+            *(jp++) = '[';
+            if (te->curr_term_len * 3 + (jp - json) + 100 > capa) {
+                capa <<= 1;
+                REALLOC_N(json, char, capa);
+            }
+            jp = json_concat_string(jp, term);
+            *(jp++) = ',';
+            sprintf(jp, "%d", te->curr_ti.doc_freq);
+            jp += strlen(jp);
+            *(jp++) = ']';
+            *(jp++) = ',';
+        }
+    }
+    else {
+        while (NULL != (term = te->next(te))) {
+            /* enough room for for term after converting " to '"' and frequency
+             * plus some extra for good measure */
+            if (te->curr_term_len * 3 + (jp - json) + 100 > capa) {
+                capa <<= 1;
+                REALLOC_N(json, char, capa);
+            }
+            *(jp++) = '{';
+            memcpy(jp, "\"term\":", 7);
+            jp += 7;
+            jp = json_concat_string(jp, term);
+            *(jp++) = ',';
+            memcpy(jp, "\"frequency\":", 12);
+            jp += 12;
+            sprintf(jp, "%d", te->curr_ti.doc_freq);
+            jp += strlen(jp);
+            *(jp++) = '}';
+            *(jp++) = ',';
+        }
+    }
+    if (*(jp-1) == ',') jp--;
+    *(jp++) = ']';
+    *jp = '\0';
+    rjson = rb_str_new2(json);
+    free(json);
+    return rjson;
+}
+/****************************************************************************
+ *
+ * TermDocEnum Methods
+ *
+ ****************************************************************************/
+static void
+frb_tde_free(void *p)
+{
+    TermDocEnum *tde = (TermDocEnum *)p;
+    tde->close(tde);
+}
+static VALUE
+frb_get_tde(VALUE rir, TermDocEnum *tde)
+{
+    VALUE self = Data_Wrap_Struct(cTermDocEnum, NULL, &frb_tde_free, tde);
+    rb_ivar_set(self, id_fld_num_map, rb_ivar_get(rir, id_fld_num_map));
+    return self;
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.seek(field, term) -> self
+ *
+ *  Seek the term +term+ in the index for +field+. After you call this method
+ *  you can call next or each to skip through the documents and positions of
+ *  this particular term.
+ */
+static VALUE
+frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    char *term;
+    VALUE rfnum_map = rb_ivar_get(self, id_fld_num_map);
+    VALUE rfnum = rb_hash_aref(rfnum_map, rfield);
+    int field_num = -1;
+    term = StringValuePtr(rterm);
+    if (rfnum != Qnil) {
+        field_num = FIX2INT(rfnum);
+    } else {
+        rb_raise(rb_eArgError, "field %s doesn't exist in the index",
+                 frb_field(rfield));
+    }
+    tde->seek(tde, field_num, term);
+    return self;
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.seek_term_enum(term_enum) -> self
+ *
+ *  Seek the current term in +term_enum+. You could just use the standard seek
+ *  method like this;
+ *
+ *    term_doc_enum.seek(term_enum.term)
+ *
+ *  However the +seek_term_enum+ method saves an index lookup so should offer
+ *  a large performance improvement.
+ */
+static VALUE
+frb_tde_seek_te(VALUE self, VALUE rterm_enum)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    TermEnum *te = (TermEnum *)frb_rb_data_ptr(rterm_enum);
+    tde->seek_te(tde, te);
+    return self;
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.doc -> doc_id
+ *
+ *  Returns the current document number pointed to by the +term_doc_enum+.
+ */
+static VALUE
+frb_tde_doc(VALUE self)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    return INT2FIX(tde->doc_num(tde));
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.doc -> doc_id
+ *
+ *  Returns the frequency of the current document pointed to by the
+ *  +term_doc_enum+.
+ */
+static VALUE
+frb_tde_freq(VALUE self)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    return INT2FIX(tde->freq(tde));
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.doc -> doc_id
+ *
+ *  Move forward to the next document in the enumeration. Returns +true+ if
+ *  there is another document or +false+ otherwise.
+ */
+static VALUE
+frb_tde_next(VALUE self)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    return tde->next(tde) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.doc -> doc_id
+ *
+ *  Move forward to the next document in the enumeration. Returns +true+ if
+ *  there is another document or +false+ otherwise.
+ */
+static VALUE
+frb_tde_next_position(VALUE self)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    int pos;
+    if (tde->next_position == NULL) {
+        rb_raise(rb_eNotImpError, "to scan through positions you must create "
+                 "the TermDocEnum with Index#term_positions method rather "
+                 "than the Index#term_docs method");
+    }
+    pos = tde->next_position(tde);
+    return pos >= 0 ? INT2FIX(pos) : Qnil;
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.each {|doc_id, freq| do_something() } -> doc_count
+ *
+ *  Iterate through the documents and document frequencies in the
+ *  +term_doc_enum+.
+ *
+ *  NOTE: this method can only be called once after each seek. If you need to
+ *  call +#each+ again then you should call +#seek+ again too.
+ */
+static VALUE
+frb_tde_each(VALUE self)
+{
+    int doc_cnt = 0;
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    VALUE vals = rb_ary_new2(2);
+    rb_ary_store(vals, 0, Qnil);
+    rb_ary_store(vals, 1, Qnil);
+    while (tde->next(tde)) {
+        doc_cnt++;
+        RARRAY_PTR(vals)[0] = INT2FIX(tde->doc_num(tde));
+        RARRAY_PTR(vals)[1] = INT2FIX(tde->freq(tde));
+        rb_yield(vals);
+    }
+    return INT2FIX(doc_cnt);
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.to_json() -> string
+ *
+ *  Returns a json representation of the term doc enum. It will also add the
+ *  term positions if they are available. You can speed this up by having the
+ *  method return arrays instead of objects, simply by passing an argument to
+ *  the to_json method. For example;
+ *
+ *    term_doc_enum.to_json() #=>
+ *    # [
+ *    #   {"document":1,"frequency":12},
+ *    #   {"document":11,"frequency":1},
+ *    #   {"document":29,"frequency":120},
+ *    #   {"document":30,"frequency":3}
+ *    # ]
+ *
+ *    term_doc_enum.to_json(:fast) #=>
+ *    # [
+ *    #   [1,12],
+ *    #   [11,1],
+ *    #   [29,120],
+ *    #   [30,3]
+ *    # ]
+ */
+static VALUE
+frb_tde_to_json(int argc, VALUE *argv, VALUE self)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    VALUE rjson;
+    char *json, *jp;
+    int capa = 65536;
+    char *format;
+    char close = (argc > 0) ? ']' : '}';
+    bool do_positions = tde->next_position != NULL;
+    jp = json = ALLOC_N(char, capa);
+    *(jp++) = '[';
+    if (do_positions) {
+        if (argc == 0) {
+            format = "{\"document\":%d,\"frequency\":%d,\"positions\":[";
+        }
+        else {
+            format = "[%d,%d,[";
+        }
+    }
+    else {
+        if (argc == 0) {
+            format = "{\"document\":%d,\"frequency\":%d},";
+        }
+        else {
+            format = "[%d,%d],";
+        }
+    }
+    while (tde->next(tde)) {
+        /* 100 chars should be enough room for an extra entry */
+        if ((jp - json) + 100 + tde->freq(tde) * 20 > capa) {
+            capa <<= 1;
+            REALLOC_N(json, char, capa);
+        }
+        sprintf(jp, format, tde->doc_num(tde), tde->freq(tde));
+        jp += strlen(jp);
+        if (do_positions) {
+            int pos;
+            while (0 <= (pos = tde->next_position(tde))) {
+                sprintf(jp, "%d,", pos);
+                jp += strlen(jp);
+            }
+            if (*(jp - 1) == ',') jp--;
+            *(jp++) = ']';
+            *(jp++) = close;
+            *(jp++) = ',';
+        }
+    }
+    if (*(jp - 1) == ',') jp--;
+    *(jp++) = ']';
+    *jp = '\0';
+    rjson = rb_str_new2(json);
+    free(json);
+    return rjson;
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.each_position {|pos| do_something } -> term_doc_enum
+ *
+ *  Iterate through each of the positions occupied by the current term in the
+ *  current document. This can only be called once per document. It can be
+ *  used within the each method. For example, to print the terms documents and
+ *  positions;
+ *
+ *    tde.each do |doc_id, freq|
+ *      puts "term appeared #{freq} times in document #{doc_id}:"
+ *      positions = []
+ *      tde.each_position {|pos| positions << pos}
+ *      puts "  #{positions.join(', ')}"
+ *    end
+ */
+static VALUE
+frb_tde_each_position(VALUE self)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    int pos;
+    if (tde->next_position == NULL) {
+        rb_raise(rb_eNotImpError, "to scan through positions you must create "
+                 "the TermDocEnum with Index#term_positions method rather "
+                 "than the Index#term_docs method");
+    }
+    while (0 <= (pos = tde->next_position(tde))) {
+        rb_yield(INT2FIX(pos));
+    }
+    return self;
+}
+/*
+ *  call-seq:
+ *     term_doc_enum.skip_to(target) -> bool
+ *
+ *  Skip to the required document number +target+ and return true if there is
+ *  a document >= +target+.
+ */
+static VALUE
+frb_tde_skip_to(VALUE self, VALUE rtarget)
+{
+    TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
+    return tde->skip_to(tde, FIX2INT(rtarget)) ? Qtrue : Qfalse;
+}
+/****************************************************************************
+ *
+ * TVOffsets Methods
+ *
+ ****************************************************************************/
+static VALUE
+frb_get_tv_offsets(Offset *offset)
+{
+    return rb_struct_new(cTVOffsets,
+                         ULL2NUM((u64)offset->start),
+                         ULL2NUM((u64)offset->end),
+                         NULL);
+}
+/****************************************************************************
+ *
+ * TVTerm Methods
+ *
+ ****************************************************************************/
+static VALUE
+frb_get_tv_term(TVTerm *tv_term)
+{
+    int i;
+    const int freq = tv_term->freq;
+    VALUE rtext;
+    VALUE rpositions = Qnil;
+    rtext = rb_str_new2(tv_term->text);
+    if (tv_term->positions) {
+        int *positions = tv_term->positions;
+        rpositions = rb_ary_new2(freq);
+        for (i = 0; i < freq; i++) {
+          rb_ary_store(rpositions, i, INT2FIX(positions[i]));
+        }
+    }
+    return rb_struct_new(cTVTerm, rtext, INT2FIX(freq), rpositions, NULL);
+}
+/****************************************************************************
+ *
+ * TermVector Methods
+ *
+ ****************************************************************************/
+static VALUE
+frb_get_tv(TermVector *tv)
+{
+    int i;
+    TVTerm *terms = tv->terms;
+    const int t_cnt = tv->term_cnt;
+    const int o_cnt = tv->offset_cnt;
+    VALUE rfield, rterms;
+    VALUE roffsets = Qnil;
+    rfield = FSYM2SYM(tv->field);
+    rterms = rb_ary_new2(t_cnt);
+    for (i = 0; i < t_cnt; i++) {
+      rb_ary_store(rterms, i, frb_get_tv_term(&terms[i]));
+    }
+    if (tv->offsets) {
+        Offset *offsets = tv->offsets;
+        roffsets = rb_ary_new2(o_cnt);
+        for (i = 0; i < o_cnt; i++) {
+          rb_ary_store(roffsets, i, frb_get_tv_offsets(&offsets[i]));
+        }
+    }
+    return rb_struct_new(cTermVector, rfield, rterms, roffsets, NULL);
+}
+/****************************************************************************
+ *
+ * IndexWriter Methods
+ *
+ ****************************************************************************/
+void
+frb_iw_free(void *p)
+{
+    iw_close((IndexWriter *)p);
+}
+void
+frb_iw_mark(void *p)
+{
+    IndexWriter *iw = (IndexWriter *)p;
+    frb_gc_mark(iw->analyzer);
+    frb_gc_mark(iw->store);
+    frb_gc_mark(iw->fis);
+}
+/*
+ *  call-seq:
+ *     index_writer.close -> nil
+ *
+ *  Close the IndexWriter. This will close and free all resources used
+ *  exclusively by the index writer. The garbage collector will do this
+ *  automatically if not called explicitly.
+ */
+static VALUE
+frb_iw_close(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    Frt_Unwrap_Struct(self);
+    iw_close(iw);
+    return Qnil;
+}
+#define SET_INT_ATTR(attr) \
+    do {\
+        if (RTEST(rval = rb_hash_aref(roptions, sym_##attr)))\
+            config.attr = FIX2INT(rval);\
+    } while (0)
+/*
+ *  call-seq:
+ *     IndexWriter.new(options = {}) -> index_writer
+ *
+ *  Create a new IndexWriter. You should either pass a path or a directory to
+ *  this constructor. For example, here are three ways you can create an
+ *  IndexWriter;
+ *
+ *    dir = RAMDirectory.new()
+ *    iw = IndexWriter.new(:dir => dir)
+ *
+ *    dir = FSDirectory.new("/path/to/index")
+ *    iw = IndexWriter.new(:dir => dir)
+ *
+ *    iw = IndexWriter.new(:path => "/path/to/index")
+ *
+ * See IndexWriter for more options.
+ */
+static VALUE
+frb_iw_init(int argc, VALUE *argv, VALUE self)
+{
+    VALUE roptions, rval;
+    bool create = false;
+    bool create_if_missing = true;
+    Store *store = NULL;
+    Analyzer *analyzer = NULL;
+    IndexWriter *volatile iw = NULL;
+    Config config = default_config;
+    rb_scan_args(argc, argv, "01", &roptions);
+    if (argc > 0) {
+        Check_Type(roptions, T_HASH);
+        if ((rval = rb_hash_aref(roptions, sym_dir)) != Qnil) {
+            Check_Type(rval, T_DATA);
+            store = DATA_PTR(rval);
+        } else if ((rval = rb_hash_aref(roptions, sym_path)) != Qnil) {
+            StringValue(rval);
+            frb_create_dir(rval);
+            store = open_fs_store(rs2s(rval));
+            DEREF(store);
+        }
+        /* Let ruby's garbage collector handle the closing of the store
+           if (!close_dir) {
+           close_dir = RTEST(rb_hash_aref(roptions, sym_close_dir));
+           }
+           */
+        /* use_compound_file defaults to true */
+        config.use_compound_file =
+            (rb_hash_aref(roptions, sym_use_compound_file) == Qfalse)
+            ? false
+            : true;
+        if ((rval = rb_hash_aref(roptions, sym_analyzer)) != Qnil) {
+            analyzer = frb_get_cwrapped_analyzer(rval);
+        }
+        create = RTEST(rb_hash_aref(roptions, sym_create));
+        if ((rval = rb_hash_aref(roptions, sym_create_if_missing)) != Qnil) {
+            create_if_missing = RTEST(rval);
+        }
+        SET_INT_ATTR(chunk_size);
+        SET_INT_ATTR(max_buffer_memory);
+        SET_INT_ATTR(index_interval);
+        SET_INT_ATTR(skip_interval);
+        SET_INT_ATTR(merge_factor);
+        SET_INT_ATTR(max_buffered_docs);
+        SET_INT_ATTR(max_merge_docs);
+        SET_INT_ATTR(max_field_length);
+    }
+    if (NULL == store) {
+        store = open_ram_store();
+        DEREF(store);
+    }
+    if (!create && create_if_missing && !store->exists(store, "segments")) {
+        create = true;
+    }
+    if (create) {
+        FieldInfos *fis;
+        if ((rval = rb_hash_aref(roptions, sym_field_infos)) != Qnil) {
+            Data_Get_Struct(rval, FieldInfos, fis);
+            index_create(store, fis);
+        } else {
+            fis = fis_new(STORE_YES, INDEX_YES,
+                          TERM_VECTOR_WITH_POSITIONS_OFFSETS);
+            index_create(store, fis);
+            fis_deref(fis);
+        }
+    }
+    iw = iw_open(store, analyzer, &config);
+    Frt_Wrap_Struct(self, &frb_iw_mark, &frb_iw_free, iw);
+    if (rb_block_given_p()) {
+        rb_yield(self);
+        frb_iw_close(self);
+        return Qnil;
+    } else {
+        return self;
+    }
+}
+/*
+ *  call-seq:
+ *     iw.doc_count -> number
+ *
+ *  Returns the number of documents in the Index. Note that deletions won't be
+ *  taken into account until the IndexWriter has been committed.
+ */
+static VALUE
+frb_iw_get_doc_count(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw_doc_count(iw));
+}
+static int
+frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
+{
+    if (key == Qundef) {
+        return ST_CONTINUE;
+    } else {
+        Document *doc = (Document *)arg;
+        Symbol field = frb_field(key);
+        VALUE val;
+        DocField *df;
+        if (NULL == (df = doc_get_field(doc, field))) {
+            df = df_new(field);
+        }
+        if (rb_respond_to(value, id_boost)) {
+            df->boost = (float)NUM2DBL(rb_funcall(value, id_boost, 0));
+        }
+        switch (TYPE(value)) {
+            case T_ARRAY:
+                {
+                    int i;
+                    df->destroy_data = true;
+                    for (i = 0; i < RARRAY_LEN(value); i++) {
+                        val = rb_obj_as_string(RARRAY_PTR(value)[i]);
+                        df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
+                    }
+                }
+                break;
+            case T_STRING:
+                df_add_data_len(df, rs2s(value), RSTRING_LEN(value));
+                break;
+            default:
+                val = rb_obj_as_string(value);
+                df->destroy_data = true;
+                df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
+                break;
+        }
+        doc_add_field(doc, df);
+    }
+    return ST_CONTINUE;
+}
+static Document *
+frb_get_doc(VALUE rdoc)
+{
+    VALUE val;
+    Document *doc = doc_new();
+    DocField *df;
+    if (rb_respond_to(rdoc, id_boost)) {
+        doc->boost = (float)NUM2DBL(rb_funcall(rdoc, id_boost, 0));
+    }
+    switch (TYPE(rdoc)) {
+        case T_HASH:
+            rb_hash_foreach(rdoc, frb_hash_to_doc_i, (VALUE)doc);
+            break;
+        case T_ARRAY:
+            {
+                int i;
+                df = df_new(fsym_content);
+                df->destroy_data = true;
+                for (i = 0; i < RARRAY_LEN(rdoc); i++) {
+                    val = rb_obj_as_string(RARRAY_PTR(rdoc)[i]);
+                    df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
+                }
+                doc_add_field(doc, df);
+            }
+            break;
+        case T_SYMBOL:
+            /* TODO: clean up this ugly cast */
+            df = df_add_data(df_new(fsym_content), (char *)rb_id2name(SYM2ID(rdoc)));
+            doc_add_field(doc, df);
+            break;
+        case T_STRING:
+            df = df_add_data_len(df_new(fsym_content), rs2s(rdoc),
+                                 RSTRING_LEN(rdoc));
+            doc_add_field(doc, df);
+            break;
+        default:
+            val = rb_obj_as_string(rdoc);
+            df = df_add_data_len(df_new(fsym_content), rstrdup(val),
+                                 RSTRING_LEN(val));
+            df->destroy_data = true;
+            doc_add_field(doc, df);
+            break;
+    }
+    return doc;
+}
+/*
+ *  call-seq:
+ *     iw << document -> iw
+ *     iw.add_document(document) -> iw
+ *
+ *  Add a document to the index. See Document. A document can also be a simple
+ *  hash object.
+ */
+static VALUE
+frb_iw_add_doc(VALUE self, VALUE rdoc)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    Document *doc = frb_get_doc(rdoc);
+    iw_add_doc(iw, doc);
+    doc_destroy(doc);
+    return self;
+}
+/*
+ *  call-seq:
+ *     iw.optimize -> iw
+ *
+ *  Optimize the index for searching. This commits any unwritten data to the
+ *  index and optimizes the index into a single segment to improve search
+ *  performance. This is an expensive operation and should not be called too
+ *  often. The best time to call this is at the end of a long batch indexing
+ *  process. Note that calling the optimize method do not in any way effect
+ *  indexing speed (except for the time taken to complete the optimization
+ *  process).
+ */
+static VALUE
+frb_iw_optimize(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw_optimize(iw);
+    return self;
+}
+/*
+ *  call-seq:
+ *     iw.commit -> iw
+ *
+ *  Explicitly commit any changes to the index that may be hanging around in
+ *  memory. You should call this method if you want to read the latest index
+ *  with an IndexWriter.
+ */
+static VALUE
+frb_iw_commit(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw_commit(iw);
+    return self;
+}
+/*
+ *  call-seq:
+ *     iw.add_readers(reader_array) -> iw
+ *
+ *  Use this method to merge other indexes into the one being written by
+ *  IndexWriter. This is useful for parallel indexing. You can have several
+ *  indexing processes running in parallel, possibly even on different
+ *  machines. Then you can finish by merging all of the indexes into a single
+ *  index.
+ */
+static VALUE
+frb_iw_add_readers(VALUE self, VALUE rreaders)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    int i;
+    IndexReader **irs;
+    Check_Type(rreaders, T_ARRAY);
+    irs = ALLOC_N(IndexReader *, RARRAY_LEN(rreaders));
+    i = RARRAY_LEN(rreaders);
+    while (i-- > 0) {
+        IndexReader *ir;
+        Data_Get_Struct(RARRAY_PTR(rreaders)[i], IndexReader, ir);
+        irs[i] = ir;
+    }
+    iw_add_readers(iw, irs, RARRAY_LEN(rreaders));
+    free(irs);
+    return self;
+}
+/*
+ *  call-seq:
+ *     iw.delete(field, term)  -> iw
+ *     iw.delete(field, terms) -> iw
+ *
+ *  Delete all documents in the index with the given +term+ or +terms+ in the
+ *  field +field+. You should usually have a unique document id which you use
+ *  with this method, rather then deleting all documents with the word "the"
+ *  in them. There are of course exceptions to this rule. For example, you may
+ *  want to delete all documents with the term "viagra" when deleting spam.
+ */
+static VALUE
+frb_iw_delete(VALUE self, VALUE rfield, VALUE rterm)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    if (TYPE(rterm) == T_ARRAY) {
+        const int term_cnt = RARRAY_LEN(rterm);
+        int i;
+        char **terms = ALLOC_N(char *, term_cnt);
+        for (i = 0; i < term_cnt; i++) {
+            terms[i] = StringValuePtr(RARRAY_PTR(rterm)[i]);
+        }
+        iw_delete_terms(iw, frb_field(rfield), terms, term_cnt);
+        free(terms);
+    } else {
+        iw_delete_term(iw, frb_field(rfield), StringValuePtr(rterm));
+    }
+    return self;
+}
+/*
+ *  call-seq:
+ *     index_writer.field_infos -> FieldInfos
+ *
+ *  Get the FieldInfos object for this IndexWriter. This is useful if you need
+ *  to dynamically add new fields to the index with specific properties.
+ */
+static VALUE
+frb_iw_field_infos(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return frb_get_field_infos(iw->fis);
+}
+/*
+ *  call-seq:
+ *     index_writer.analyzer -> Analyzer
+ *
+ *  Get the Analyzer for this IndexWriter. This is useful if you need
+ *  to use the same analyzer in a QueryParser.
+ */
+static VALUE
+frb_iw_get_analyzer(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return frb_get_analyzer(iw->analyzer);
+}
+/*
+ *  call-seq:
+ *     index_writer.analyzer -> Analyzer
+ *
+ *  Set the Analyzer for this IndexWriter. This is useful if you need to
+ *  change the analyzer for a special document. It is risky though as the
+ *  same analyzer will be used for all documents during search.
+ */
+static VALUE
+frb_iw_set_analyzer(VALUE self, VALUE ranalyzer)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    a_deref(iw->analyzer);
+    iw->analyzer = frb_get_cwrapped_analyzer(ranalyzer);
+    return ranalyzer;
+}
+/*
+ *  call-seq:
+ *     index_writer.version -> int
+ *
+ *  Returns the current version of the index writer.
+ */
+static VALUE
+frb_iw_version(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return ULL2NUM(iw->sis->version);
+}
+/*
+ *  call-seq:
+ *     iw.chunk_size -> number
+ *
+ *  Return the current value of chunk_size
+ */
+static VALUE
+frb_iw_get_chunk_size(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw->config.chunk_size);
+}
+/*
+ *  call-seq:
+ *     iw.chunk_size = chunk_size -> chunk_size
+ *
+ *  Set the chunk_size parameter
+ */
+static VALUE
+frb_iw_set_chunk_size(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.chunk_size = FIX2INT(rval);
+    return rval;
+}
+/*
+ *  call-seq:
+ *     iw.max_buffer_memory -> number
+ *
+ *  Return the current value of max_buffer_memory
+ */
+static VALUE
+frb_iw_get_max_buffer_memory(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw->config.max_buffer_memory);
+}
+/*
+ *  call-seq:
+ *     iw.max_buffer_memory = max_buffer_memory -> max_buffer_memory
+ *
+ *  Set the max_buffer_memory parameter
+ */
+static VALUE
+frb_iw_set_max_buffer_memory(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.max_buffer_memory = FIX2INT(rval);
+    return rval;
+}
+/*
+ *  call-seq:
+ *     iw.term_index_interval -> number
+ *
+ *  Return the current value of term_index_interval
+ */
+static VALUE
+frb_iw_get_index_interval(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw->config.index_interval);
+}
+/*
+ *  call-seq:
+ *     iw.term_index_interval = term_index_interval -> term_index_interval
+ *
+ *  Set the term_index_interval parameter
+ */
+static VALUE
+frb_iw_set_index_interval(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.index_interval = FIX2INT(rval);
+    return rval;
+}
+/*
+ *  call-seq:
+ *     iw.doc_skip_interval -> number
+ *
+ *  Return the current value of doc_skip_interval
+ */
+static VALUE
+frb_iw_get_skip_interval(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw->config.skip_interval);
+}
+/*
+ *  call-seq:
+ *     iw.doc_skip_interval = doc_skip_interval -> doc_skip_interval
+ *
+ *  Set the doc_skip_interval parameter
+ */
+static VALUE
+frb_iw_set_skip_interval(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.skip_interval = FIX2INT(rval);
+    return rval;
+}
+/*
+ *  call-seq:
+ *     iw.merge_factor -> number
+ *
+ *  Return the current value of merge_factor
+ */
+static VALUE
+frb_iw_get_merge_factor(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw->config.merge_factor);
+}
+/*
+ *  call-seq:
+ *     iw.merge_factor = merge_factor -> merge_factor
+ *
+ *  Set the merge_factor parameter
+ */
+static VALUE
+frb_iw_set_merge_factor(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.merge_factor = FIX2INT(rval);
+    return rval;
+}
+/*
+ *  call-seq:
+ *     iw.max_buffered_docs -> number
+ *
+ *  Return the current value of max_buffered_docs
+ */
+static VALUE
+frb_iw_get_max_buffered_docs(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw->config.max_buffered_docs);
+}
+/*
+ *  call-seq:
+ *     iw.max_buffered_docs = max_buffered_docs -> max_buffered_docs
+ *
+ *  Set the max_buffered_docs parameter
+ */
+static VALUE
+frb_iw_set_max_buffered_docs(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.max_buffered_docs = FIX2INT(rval);
+    return rval;
+}
+/*
+ *  call-seq:
+ *     iw.max_merge_docs -> number
+ *
+ *  Return the current value of max_merge_docs
+ */
+static VALUE
+frb_iw_get_max_merge_docs(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw->config.max_merge_docs);
+}
+/*
+ *  call-seq:
+ *     iw.max_merge_docs = max_merge_docs -> max_merge_docs
+ *
+ *  Set the max_merge_docs parameter
+ */
+static VALUE
+frb_iw_set_max_merge_docs(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.max_merge_docs = FIX2INT(rval);
+    return rval;
+}
+/*
+ *  call-seq:
+ *     iw.max_field_length -> number
+ *
+ *  Return the current value of max_field_length
+ */
+static VALUE
+frb_iw_get_max_field_length(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return INT2FIX(iw->config.max_field_length);
+}
+/*
+ *  call-seq:
+ *     iw.max_field_length = max_field_length -> max_field_length
+ *
+ *  Set the max_field_length parameter
+ */
+static VALUE
+frb_iw_set_max_field_length(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.max_field_length = FIX2INT(rval);
+    return rval;
+}
+/*
+ *  call-seq:
+ *     iw.use_compound_file -> number
+ *
+ *  Return the current value of use_compound_file
+ */
+static VALUE
+frb_iw_get_use_compound_file(VALUE self)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    return iw->config.use_compound_file ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     iw.use_compound_file = use_compound_file -> use_compound_file
+ *
+ *  Set the use_compound_file parameter
+ */
+static VALUE
+frb_iw_set_use_compound_file(VALUE self, VALUE rval)
+{
+    IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
+    iw->config.use_compound_file = RTEST(rval);
+    return rval;
+}
+/****************************************************************************
+ *
+ * LazyDoc Methods
+ *
+ ****************************************************************************/
+static void
+frb_lzd_date_free(void *p)
+{
+    lazy_doc_close((LazyDoc *)p);
+}
+static VALUE
+frb_lazy_df_load(VALUE self, VALUE rkey, LazyDocField *lazy_df)
+{
+    VALUE rdata = Qnil;
+    if (lazy_df) {
+        if (lazy_df->size == 1) {
+            char *data = lazy_df_get_data(lazy_df, 0);
+            rdata = rb_str_new(data, lazy_df->len);
+        } else {
+            int i;
+            rdata = rb_ary_new2(lazy_df->size);
+            for (i = 0; i < lazy_df->size; i++) {
+                char *data = lazy_df_get_data(lazy_df, i);
+                rb_ary_store(rdata, i, rb_str_new(data, lazy_df->data[i].length));
+            }
+        }
+        rb_hash_aset(self, rkey, rdata);
+    }
+    return rdata;
+}
+/*
+ *  call-seq:
+ *     lazy_doc.default(key) -> string
+ *
+ *  This method is used internally to lazily load fields. You should never
+ *  really need to call it yourself.
+ */
+static VALUE
+frb_lzd_default(VALUE self, VALUE rkey)
+{
+    LazyDoc *lazy_doc = (LazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
+    Symbol field = frb_field(rkey);
+    VALUE rfield = FSYM2SYM(field);
+    return frb_lazy_df_load(self, rfield, lazy_doc_get(lazy_doc, field));
+}
+/*
+ *  call-seq:
+ *     lazy_doc.fields -> array of available fields
+ *
+ *  Returns the list of fields stored for this particular document. If you try
+ *  to access any of these fields in the document the field will be loaded.
+ *  Try to access any other field an nil will be returned.
+ */
+static VALUE
+frb_lzd_fields(VALUE self)
+{
+    return rb_ivar_get(self, id_fields);
+}
+/*
+ *  call-seq:
+ *     lazy_doc.load -> lazy_doc
+ *
+ *  Load all unloaded fields in the document from the index.
+ */
+static VALUE
+frb_lzd_load(VALUE self)
+{
+    LazyDoc *lazy_doc = (LazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
+    int i;
+    for (i = 0; i < lazy_doc->size; i++) {
+        LazyDocField *lazy_df = lazy_doc->fields[i];
+        frb_lazy_df_load(self, FSYM2SYM(lazy_df->name), lazy_df);
+    }
+    return self;
+}
+VALUE
+frb_get_lazy_doc(LazyDoc *lazy_doc)
+{
+    int i;
+    VALUE rfields = rb_ary_new2(lazy_doc->size);
+    VALUE self, rdata;
+    self = rb_hash_new();
+    OBJSETUP(self, cLazyDoc, T_HASH);
+    rdata = Data_Wrap_Struct(cLazyDocData, NULL, &frb_lzd_date_free, lazy_doc);
+    rb_ivar_set(self, id_data, rdata);
+    for (i = 0; i < lazy_doc->size; i++) {
+      rb_ary_store(rfields, i, FSYM2SYM(lazy_doc->fields[i]->name));
+    }
+    rb_ivar_set(self, id_fields, rfields);
+    return self;
+}
+/****************************************************************************
+ *
+ * IndexReader Methods
+ *
+ ****************************************************************************/
+void
+frb_ir_free(void *p)
+{
+    object_del(p);
+    ir_close((IndexReader *)p);
+}
+void
+frb_ir_mark(void *p)
+{
+    IndexReader *ir = (IndexReader *)p;
+    frb_gc_mark(ir->store);
+}
+static VALUE frb_ir_close(VALUE self);
+void
+frb_mr_mark(void *p)
+{
+    MultiReader *mr = (MultiReader *)p;
+    int i;
+    for (i = 0; i < mr->r_cnt; i++) {
+        frb_gc_mark(mr->sub_readers[i]);
+    }
+}
+/*
+ *  call-seq:
+ *     IndexReader.new(dir) -> index_reader
+ *
+ *  Create a new IndexReader. You can either pass a string path to a
+ *  file-system directory or an actual Ferret::Store::Directory object. For
+ *  example;
+ *
+ *    dir = RAMDirectory.new()
+ *    iw = IndexReader.new(dir)
+ *
+ *    dir = FSDirectory.new("/path/to/index")
+ *    iw = IndexReader.new(dir)
+ *
+ *    iw = IndexReader.new("/path/to/index")
+ *
+ *  You can also create a what used to be known as a MultiReader by passing an
+ *  array of IndexReader objects, Ferret::Store::Directory objects or
+ *  file-system paths;
+ *
+ *    iw = IndexReader.new([dir, dir2, dir3])
+ *
+ *    iw = IndexReader.new([reader1, reader2, reader3])
+ *
+ *    iw = IndexReader.new(["/path/to/index1", "/path/to/index2"])
+ */
+static VALUE
+frb_ir_init(VALUE self, VALUE rdir)
+{
+    Store *store = NULL;
+    IndexReader *ir;
+    int i;
+    FieldInfos *fis;
+    VALUE rfield_num_map = rb_hash_new();
+    if (TYPE(rdir) == T_ARRAY) {
+        VALUE rdirs = rdir;
+        const int reader_cnt = RARRAY_LEN(rdir);
+        IndexReader **sub_readers = ALLOC_N(IndexReader *, reader_cnt);
+        int i;
+        for (i = 0; i < reader_cnt; i++) {
+            rdir = RARRAY_PTR(rdirs)[i];
+            switch (TYPE(rdir)) {
+                case T_DATA:
+                    if (CLASS_OF(rdir) == cIndexReader) {
+                        Data_Get_Struct(rdir, IndexReader, sub_readers[i]);
+                        REF(sub_readers[i]);
+                        continue;
+                    } else if (RTEST(rb_obj_is_kind_of(rdir, cDirectory))) {
+                        store = DATA_PTR(rdir);
+                    } else {
+                        rb_raise(rb_eArgError, "A Multi-IndexReader can only "
+                                 "be created from other IndexReaders, "
+                                 "Directory objects or file-system paths. "
+                                 "Not %s",
+                                 rs2s(rb_obj_as_string(rdir)));
+                    }
+                    break;
+                case T_STRING:
+                    frb_create_dir(rdir);
+                    store = open_fs_store(rs2s(rdir));
+                    DEREF(store);
+                    break;
+                default:
+                    rb_raise(rb_eArgError, "%s isn't a valid directory "
+                             "argument. You should use either a String or "
+                             "a Directory",
+                             rs2s(rb_obj_as_string(rdir)));
+                    break;
+            }
+            sub_readers[i] = ir_open(store);
+        }
+        ir = mr_open(sub_readers, reader_cnt);
+        Frt_Wrap_Struct(self, &frb_mr_mark, &frb_ir_free, ir);
+    } else {
+        switch (TYPE(rdir)) {
+            case T_DATA:
+                store = DATA_PTR(rdir);
+                break;
+            case T_STRING:
+                frb_create_dir(rdir);
+                store = open_fs_store(rs2s(rdir));
+                DEREF(store);
+                break;
+            default:
+                rb_raise(rb_eArgError, "%s isn't a valid directory argument. "
+                         "You should use either a String or a Directory",
+                         rs2s(rb_obj_as_string(rdir)));
+                break;
+        }
+        ir = ir_open(store);
+        Frt_Wrap_Struct(self, &frb_ir_mark, &frb_ir_free, ir);
+    }
+    object_add(ir, self);
+    fis = ir->fis;
+    for (i = 0; i < fis->size; i++) {
+        FieldInfo *fi = fis->fields[i];
+        rb_hash_aset(rfield_num_map,
+                     FSYM2SYM(fi->name),
+                     INT2FIX(fi->number));
+    }
+    rb_ivar_set(self, id_fld_num_map, rfield_num_map);
+    return self;
+}
+/*
+ *  call-seq:
+ *     index_reader.set_norm(doc_id, field, val)
+ *
+ *  Expert: change the boost value for a +field+ in document at +doc_id+.
+ *  +val+ should be an integer in the range 0..255 which corresponds to an
+ *  encoded float value.
+ */
+static VALUE
+frb_ir_set_norm(VALUE self, VALUE rdoc_id, VALUE rfield, VALUE rval)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    ir_set_norm(ir, FIX2INT(rdoc_id), frb_field(rfield), (uchar)NUM2CHR(rval));
+    return self;
+}
+/*
+ *  call-seq:
+ *     index_reader.norms(field) -> string
+ *
+ *  Expert: Returns a string containing the norm values for a field. The
+ *  string length will be equal to the number of documents in the index and it
+ *  could have null bytes.
+ */
+static VALUE
+frb_ir_norms(VALUE self, VALUE rfield)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    uchar *norms;
+    norms = ir_get_norms(ir, frb_field(rfield));
+    if (norms) {
+        return rb_str_new((char *)norms, ir->max_doc(ir));
+    } else {
+        return Qnil;
+    }
+}
+/*
+ *  call-seq:
+ *     index_reader.get_norms_into(field, buffer, offset) -> buffer
+ *
+ *  Expert: Get the norm values into a string +buffer+ starting at +offset+.
+ */
+static VALUE
+frb_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    int offset;
+    offset = FIX2INT(roffset);
+    Check_Type(rnorms, T_STRING);
+    if (RSTRING_LEN(rnorms) < offset + ir->max_doc(ir)) {
+        rb_raise(rb_eArgError, "supplied a string of length:%d to "
+                 "IndexReader#get_norms_into but needed a string of length "
+                 "offset:%d + maxdoc:%d",
+                 RSTRING_LEN(rnorms), offset, ir->max_doc(ir));
+    }
+    ir_get_norms_into(ir, frb_field(rfield),
+                      (uchar *)rs2s(rnorms) + offset);
+    return rnorms;
+}
+/*
+ *  call-seq:
+ *     index_reader.commit -> index_reader
+ *
+ *  Commit any deletes made by this particular IndexReader to the index. This
+ *  will use open a Commit lock.
+ */
+static VALUE
+frb_ir_commit(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    ir_commit(ir);
+    return self;
+}
+/*
+ *  call-seq:
+ *     index_reader.close -> index_reader
+ *
+ *  Close the IndexReader. This method also commits any deletions made by this
+ *  IndexReader. This method will be called explicitly by the garbage
+ *  collector but you should call it explicitly to commit any changes as soon
+ *  as possible and to close any locks held by the object to prevent locking
+ *  errors.
+ */
+static VALUE
+frb_ir_close(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    object_del(ir);
+    Frt_Unwrap_Struct(self);
+    ir_close(ir);
+    return self;
+}
+/*
+ *  call-seq:
+ *     index_reader.has_deletions? -> bool
+ *
+ *  Return true if the index has any deletions, either uncommitted by this
+ *  IndexReader or committed by any other IndexReader.
+ */
+static VALUE
+frb_ir_has_deletions(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return ir->has_deletions(ir) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     index_reader.delete(doc_id) -> index_reader
+ *
+ *  Delete document referenced internally by document id +doc_id+. The
+ *  document_id is the number used to reference documents in the index and is
+ *  returned by search methods.
+ */
+static VALUE
+frb_ir_delete(VALUE self, VALUE rdoc_id)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    ir_delete_doc(ir, FIX2INT(rdoc_id));
+    return self;
+}
+/*
+ *  call-seq:
+ *     index_reader.deleted?(doc_id) -> bool
+ *
+ *  Returns true if the document at +doc_id+ has been deleted.
+ */
+static VALUE
+frb_ir_is_deleted(VALUE self, VALUE rdoc_id)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return ir->is_deleted(ir, FIX2INT(rdoc_id)) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     index_reader.max_doc -> number
+ *
+ *  Returns 1 + the maximum document id in the index. It is the
+ *  document_id that will be used by the next document added to the index. If
+ *  there are no deletions, this number also refers to the number of documents
+ *  in the index.
+ */
+static VALUE
+frb_ir_max_doc(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return INT2FIX(ir->max_doc(ir));
+}
+/*
+ *  call-seq:
+ *     index_reader.num_docs -> number
+ *
+ *  Returns the number of accessible (not deleted) documents in the index.
+ *  This will be equal to IndexReader#max_doc if there have been no documents
+ *  deleted from the index.
+ */
+static VALUE
+frb_ir_num_docs(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return INT2FIX(ir->num_docs(ir));
+}
+/*
+ *  call-seq:
+ *     index_reader.undelete_all -> index_reader
+ *
+ *  Undelete all deleted documents in the index. This is kind of like a
+ *  rollback feature. Not that once an index is committed or a merge happens
+ *  during index, deletions will be committed and undelete_all will have no
+ *  effect on these documents.
+ */
+static VALUE
+frb_ir_undelete_all(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    ir_undelete_all(ir);
+    return self;
+}
+static VALUE
+frb_get_doc_range(IndexReader *ir, int pos, int len, int max)
+{
+    VALUE ary;
+    int i;
+    max = min2(max, pos+len);
+    len = max - pos;
+    ary = rb_ary_new2(len);
+    for (i = 0; i < len; i++) {
+      rb_ary_store(ary, i, frb_get_lazy_doc(ir->get_lazy_doc(ir, i + pos)));
+    }
+    return ary;
+}
+/*
+ *  call-seq:
+ *     index_reader.get_document(doc_id) -> LazyDoc
+ *     index_reader[doc_id] -> LazyDoc
+ *
+ *  Retrieve a document from the index. See LazyDoc for more details on the
+ *  document returned. Documents are referenced internally by document ids
+ *  which are returned by the Searchers search methods.
+ */
+static VALUE
+frb_ir_get_doc(int argc, VALUE *argv, VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    VALUE arg1, arg2;
+    long pos, len;
+    long max = ir->max_doc(ir);
+    rb_scan_args(argc, argv, "11", &arg1, &arg2);
+    if (argc == 1) {
+        if (FIXNUM_P(arg1)) {
+            pos = FIX2INT(arg1);
+            pos = (pos < 0) ? (max + pos) : pos;
+            if (pos < 0 || pos >= max) {
+                rb_raise(rb_eArgError, "index %d is out of range [%d..%d] for "
+                         "IndexReader#[]", pos, 0, max, -1);
+            }
+            return frb_get_lazy_doc(ir->get_lazy_doc(ir, pos));
+        }
+        /* check if idx is Range */
+        /* FIXME: test this with dodgy values */
+        switch (rb_range_beg_len(arg1, &pos, &len, max, 0)) {
+            case Qfalse:
+                rb_raise(rb_eArgError, ":%s isn't a valid argument for "
+                         "IndexReader.get_document(index)",
+                         rb_id2name(SYM2ID(arg1)));
+            case Qnil:
+                return Qnil;
+            default:
+                return frb_get_doc_range(ir, pos, len, max);
+        }
+    }
+    else {
+        pos = FIX2LONG(arg1);
+        len = FIX2LONG(arg2);
+        return frb_get_doc_range(ir, pos, len, max);
+    }
+}
+/*
+ *  call-seq:
+ *     index_reader.is_latest? -> bool
+ *
+ *  Return true if the index version referenced by this IndexReader is the
+ *  latest version of the index. If it isn't you should close and reopen the
+ *  index to search the latest documents added to the index.
+ */
+static VALUE
+frb_ir_is_latest(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return ir_is_latest(ir) ? Qtrue : Qfalse;
+}
+/*
+ *  call-seq:
+ *     index_reader.term_vector(doc_id, field) -> TermVector
+ *
+ *  Return the TermVector for the field +field+ in the document at +doc_id+ in
+ *  the index. Return nil if no such term_vector exists. See TermVector.
+ */
+static VALUE
+frb_ir_term_vector(VALUE self, VALUE rdoc_id, VALUE rfield)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    TermVector *tv;
+    VALUE rtv;
+    tv = ir->term_vector(ir, FIX2INT(rdoc_id), frb_field(rfield));
+    if (tv) {
+        rtv = frb_get_tv(tv);
+        tv_destroy(tv);
+        return rtv;
+    }
+    else {
+        return Qnil;
+    }
+}
+static void
+frb_add_each_tv(void *key, void *value, void *rtvs)
+{
+    rb_hash_aset((VALUE)rtvs, ID2SYM((ID)key), frb_get_tv(value));
+}
+/*
+ *  call-seq:
+ *     index_reader.term_vectors(doc_id) -> hash of TermVector
+ *
+ *  Return the TermVectors for the document at +doc_id+ in the index. The
+ *  value returned is a hash of the TermVectors for each field in the document
+ *  and they are referenced by field names (as symbols).
+ */
+static VALUE
+frb_ir_term_vectors(VALUE self, VALUE rdoc_id)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    Hash *tvs = ir->term_vectors(ir, FIX2INT(rdoc_id));
+    VALUE rtvs = rb_hash_new();
+    h_each(tvs, &frb_add_each_tv, (void *)rtvs);
+    h_destroy(tvs);
+    return rtvs;
+}
+/*
+ *  call-seq:
+ *     index_reader.term_docs -> TermDocEnum
+ *
+ *  Builds a TermDocEnum (term-document enumerator) for the index. You can use
+ *  this object to iterate through the documents in which certain terms occur.
+ *  See TermDocEnum for more info.
+ */
+static VALUE
+frb_ir_term_docs(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return frb_get_tde(self, ir->term_docs(ir));
+}
+/*
+ *  call-seq:
+ *     index_reader.term_docs_for(field, term) -> TermDocEnum
+ *
+ *  Builds a TermDocEnum to iterate through the documents that contain the
+ *  term +term+ in the field +field+. See TermDocEnum for more info.
+ */
+static VALUE
+frb_ir_term_docs_for(VALUE self, VALUE rfield, VALUE rterm)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return frb_get_tde(self, ir_term_docs_for(ir,
+                                              frb_field(rfield),
+                                              StringValuePtr(rterm)));
+}
+/*
+ *  call-seq:
+ *     index_reader.term_positions -> TermDocEnum
+ *
+ *  Same as IndexReader#term_docs except the TermDocEnum will also allow you
+ *  to scan through the positions at which a term occurs. See TermDocEnum for
+ *  more info.
+ */
+static VALUE
+frb_ir_term_positions(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return frb_get_tde(self, ir->term_positions(ir));
+}
+/*
+ *  call-seq:
+ *     index_reader.term_positions_for(field, term) -> TermDocEnum
+ *
+ *  Same as IndexReader#term_docs_for(field, term) except the TermDocEnum will
+ *  also allow you to scan through the positions at which a term occurs. See
+ *  TermDocEnum for more info.
+ */
+static VALUE
+frb_ir_t_pos_for(VALUE self, VALUE rfield, VALUE rterm)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return frb_get_tde(self, ir_term_positions_for(ir,
+                                                   frb_field(rfield),
+                                                   StringValuePtr(rterm)));
+}
+/*
+ *  call-seq:
+ *     index_reader.doc_freq(field, term) -> integer
+ *
+ *  Return the number of documents in which the term +term+ appears in the
+ *  field +field+.
+ */
+static VALUE
+frb_ir_doc_freq(VALUE self, VALUE rfield, VALUE rterm)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return INT2FIX(ir_doc_freq(ir,
+                               frb_field(rfield),
+                               StringValuePtr(rterm)));
+}
+/*
+ *  call-seq:
+ *     index_reader.terms(field) -> TermEnum
+ *
+ *  Returns a term enumerator which allows you to iterate through all the
+ *  terms in the field +field+ in the index.
+ */
+static VALUE
+frb_ir_terms(VALUE self, VALUE rfield)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return frb_get_te(self, ir_terms(ir, frb_field(rfield)));
+}
+/*
+ *  call-seq:
+ *     index_reader.terms_from(field, term) -> TermEnum
+ *
+ *  Same as IndexReader#terms(fields) except that it starts the enumerator off
+ *  at term +term+.
+ */
+static VALUE
+frb_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return frb_get_te(self, ir_terms_from(ir,
+                                          frb_field(rfield),
+                                          StringValuePtr(rterm)));
+}
+/*
+ *  call-seq:
+ *     index_reader.term_count(field) -> int
+ *
+ *  Same return a count of the number of terms in the field
+ */
+static VALUE
+frb_ir_term_count(VALUE self, VALUE rfield)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    TermEnum *te = ir_terms(ir, frb_field(rfield));
+    int count = 0;
+    while (te->next(te)) {
+        count++;
+    }
+    te->close(te);
+    return INT2FIX(count);
+}
+/*
+ *  call-seq:
+ *     index_reader.fields -> array of field-names
+ *
+ *  Returns an array of field names in the index. This can be used to pass to
+ *  the QueryParser so that the QueryParser knows how to expand the "*"
+ *  wild-card to all fields in the index. A list of field names can also be
+ *  gathered from the FieldInfos object.
+ */
+static VALUE
+frb_ir_fields(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    FieldInfos *fis = ir->fis;
+    VALUE rfield_names = rb_ary_new();
+    int i;
+    for (i = 0; i < fis->size; i++) {
+        rb_ary_push(rfield_names, FSYM2SYM(fis->fields[i]->name));
+    }
+    return rfield_names;
+}
+/*
+ *  call-seq:
+ *     index_reader.field_infos -> FieldInfos
+ *
+ *  Get the FieldInfos object for this IndexReader.
+ */
+static VALUE
+frb_ir_field_infos(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return frb_get_field_infos(ir->fis);
+}
+/*
+ *  call-seq:
+ *     index_reader.tokenized_fields -> array of field-names
+ *
+ *  Returns an array of field names of all of the tokenized fields in the
+ *  index. This can be used to pass to the QueryParser so that the QueryParser
+ *  knows how to expand the "*" wild-card to all fields in the index. A list
+ *  of field names can also be gathered from the FieldInfos object.
+ */
+static VALUE
+frb_ir_tk_fields(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    FieldInfos *fis = ir->fis;
+    VALUE rfield_names = rb_ary_new();
+    int i;
+    for (i = 0; i < fis->size; i++) {
+        if (!fi_is_tokenized(fis->fields[i])) continue;
+        rb_ary_push(rfield_names, FSYM2SYM(fis->fields[i]->name));
+    }
+    return rfield_names;
+}
+/*
+ *  call-seq:
+ *     index_reader.version -> int
+ *
+ *  Returns the current version of the index reader.
+ */
+static VALUE
+frb_ir_version(VALUE self)
+{
+    IndexReader *ir = (IndexReader *)DATA_PTR(self);
+    return ULL2NUM(ir->sis->version);
+}
+/****************************************************************************
+ *
+ * Init Functions
+ *
+ ****************************************************************************/
+/*
+ *  Document-class: Ferret::Index::FieldInfo
+ *
+ *  == Summary
+ *
+ *  The FieldInfo class is the field descriptor for the index. It specifies
+ *  whether a field is compressed or not or whether it should be indexed and
+ *  tokenized. Every field has a name which must be a symbol. There are three
+ *  properties that you can set, +:store+, +:index+ and +:term_vector+. You
+ *  can also set the default +:boost+ for a field as well.
+ *
+ *  == Properties
+ *
+ *  === :store
+ *
+ *  The +:store+ property allows you to specify how a field is stored. You can
+ *  leave a field unstored (+:no+), store it in it's original format (+:yes+)
+ *  or store it in compressed format (+:compressed+). By default the document
+ *  is stored in its original format. If the field is large and it is stored
+ *  elsewhere where it is easily accessible you might want to leave it
+ *  unstored. This will keep the index size a lot smaller and make the
+ *  indexing process a lot faster. For example, you should probably leave the
+ *  +:content+ field unstored when indexing all the documents in your
+ *  file-system.
+ *
+ *  === :index
+ *
+ *  The +:index+ property allows you to specify how a field is indexed. A
+ *  field must be indexed to be searchable. However, a field doesn't need to
+ *  be indexed to be store in the Ferret index. You may want to use the index
+ *  as a simple database and store things like images or MP3s in the index. By
+ *  default each field is indexed and tokenized (split into tokens) (+:yes+).
+ *  If you don't want to index the field use +:no+. If you want the field
+ *  indexed but not tokenized, use +:untokenized+. Do this for the fields you
+ *  wish to sort by. There are two other values for +:index+; +:omit_norms+
+ *  and +:untokenized_omit_norms+. These values correspond to +:yes+ and
+ *  +:untokenized+ respectively and are useful if you are not boosting any
+ *  fields and you'd like to speed up the index. The norms file is the file
+ *  which contains the boost values for each document for a particular field.
+ *
+ *  === :term_vector
+ *
+ *  See TermVector for a description of term-vectors. You can specify whether
+ *  or not you would like to store term-vectors. The available options are
+ *  +:no+, +:yes+, +:with_positions+, +:with_offsets+ and
+ *  +:with_positions_offsets+. Note that you need to store the positions to
+ *  associate offsets with individual terms in the term_vector.
+ *
+ *  == Property Table
+ *
+ *    Property       Value                     Description
+ *    ------------------------------------------------------------------------
+ *     :store       | :no                     | Don't store field
+ *                  |                         |
+ *                  | :yes (default)          | Store field in its original
+ *                  |                         | format. Use this value if you
+ *                  |                         | want to highlight matches.
+ *                  |                         | or print match excerpts a la
+ *                  |                         | Google search.
+ *                  |                         |
+ *                  | :compressed             | Store field in compressed
+ *                  |                         | format.
+ *     -------------|-------------------------|------------------------------
+ *     :index       | :no                     | Do not make this field
+ *                  |                         | searchable.
+ *                  |                         |
+ *                  | :yes (default)          | Make this field searchable and
+ *                  |                         | tokenized its contents.
+ *                  |                         |
+ *                  | :untokenized            | Make this field searchable but
+ *                  |                         | do not tokenize its contents.
+ *                  |                         | use this value for fields you
+ *                  |                         | wish to sort by.
+ *                  |                         |
+ *                  | :omit_norms             | Same as :yes except omit the
+ *                  |                         | norms file. The norms file can
+ *                  |                         | be omitted if you don't boost
+ *                  |                         | any fields and you don't need
+ *                  |                         | scoring based on field length.
+ *                  |                         |
+ *                  | :untokenized_omit_norms | Same as :untokenized except omit
+ *                  |                         | the norms file. Norms files can
+ *                  |                         | be omitted if you don't boost
+ *                  |                         | any fields and you don't need
+ *                  |                         | scoring based on field length.
+ *                  |                         |
+ *     -------------|-------------------------|------------------------------
+ *     :term_vector | :no                     | Don't store term-vectors
+ *                  |                         |
+ *                  | :yes                    | Store term-vectors without
+ *                  |                         | storing positions or offsets.
+ *                  |                         |
+ *                  | :with_positions         | Store term-vectors with
+ *                  |                         | positions.
+ *                  |                         |
+ *                  | :with_offsets           | Store term-vectors with
+ *                  |                         | offsets.
+ *                  |                         |
+ *                  | :with_positions_offsets | Store term-vectors with
+ *                  | (default)               | positions and offsets.
+ *     -------------|-------------------------|------------------------------
+ *     :boost       | Float                   | The boost property is used to
+ *                  |                         | set the default boost for a
+ *                  |                         | field. This boost value will
+ *                  |                         | used for all instances of the
+ *                  |                         | field in the index unless
+ *                  |                         | otherwise specified when you
+ *                  |                         | create the field. All values
+ *                  |                         | should be positive.
+ *                  |                         |
+ *
+ *  == Examples
+ *
+ *    fi = FieldInfo.new(:title, :index => :untokenized, :term_vector => :no,
+ *                       :boost => 10.0)
+ *
+ *    fi = FieldInfo.new(:content)
+ *
+ *    fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
+ *                       :term_vector => :no)
+ *
+ *    fi = FieldInfo.new(:image, :store => :compressed, :index => :no,
+ *                       :term_vector => :no)
+ */
+static void
+Init_FieldInfo(void)
+{
+    sym_store = ID2SYM(rb_intern("store"));
+    sym_index = ID2SYM(rb_intern("index"));
+    sym_term_vector = ID2SYM(rb_intern("term_vector"));
+    sym_compress = ID2SYM(rb_intern("compress"));
+    sym_compressed = ID2SYM(rb_intern("compressed"));
+    sym_untokenized = ID2SYM(rb_intern("untokenized"));
+    sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
+    sym_untokenized_omit_norms = ID2SYM(rb_intern("untokenized_omit_norms"));
+    sym_with_positions = ID2SYM(rb_intern("with_positions"));
+    sym_with_offsets = ID2SYM(rb_intern("with_offsets"));
+    sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
+    cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
+    rb_define_alloc_func(cFieldInfo, frb_data_alloc);
+    rb_define_method(cFieldInfo, "initialize",  frb_fi_init, -1);
+    rb_define_method(cFieldInfo, "name",        frb_fi_name, 0);
+    rb_define_method(cFieldInfo, "stored?",     frb_fi_is_stored, 0);
+    rb_define_method(cFieldInfo, "compressed?", frb_fi_is_compressed, 0);
+    rb_define_method(cFieldInfo, "indexed?",    frb_fi_is_indexed, 0);
+    rb_define_method(cFieldInfo, "tokenized?",  frb_fi_is_tokenized, 0);
+    rb_define_method(cFieldInfo, "omit_norms?", frb_fi_omit_norms, 0);
+    rb_define_method(cFieldInfo, "store_term_vector?",
+                                                frb_fi_store_term_vector, 0);
+    rb_define_method(cFieldInfo, "store_positions?",
+                                                frb_fi_store_positions, 0);
+    rb_define_method(cFieldInfo, "store_offsets?",
+                                                frb_fi_store_offsets, 0);
+    rb_define_method(cFieldInfo, "has_norms?",  frb_fi_has_norms, 0);
+    rb_define_method(cFieldInfo, "boost",       frb_fi_boost, 0);
+    rb_define_method(cFieldInfo, "to_s",        frb_fi_to_s, 0);
+}
+/*
+ *  Document-class: Ferret::Index::FieldInfos
+ *
+ *  == Summary
+ *
+ *  The FieldInfos class holds all the field descriptors for an index. It is
+ *  this class that is used to create a new index using the
+ *  FieldInfos#create_index method. If you are happy with the default
+ *  properties for FieldInfo then you don't need to worry about this class.
+ *  IndexWriter can create the index for you. Otherwise you should set up the
+ *  index like in the example;
+ *
+ *  == Example
+ *
+ *    field_infos = FieldInfos.new(:term_vector => :no)
+ *
+ *    field_infos.add_field(:title, :index => :untokenized, :term_vector => :no,
+ *                          :boost => 10.0)
+ *
+ *    field_infos.add_field(:content)
+ *
+ *    field_infos.add_field(:created_on, :index => :untokenized_omit_norms,
+ *                          :term_vector => :no)
+ *
+ *    field_infos.add_field(:image, :store => :compressed, :index => :no,
+ *                          :term_vector => :no)
+ *
+ *    field_infos.create_index("/path/to/index")
+ *
+ *  == Default Properties
+ *
+ *  See FieldInfo for the available field property values.
+ *
+ *  When you create the FieldInfos object you specify the default properties
+ *  for the fields. Often you'll specify all of the fields in the index before
+ *  you create the index so the default values won't come into play. However,
+ *  it is possible to continue to dynamically add fields as indexing goes
+ *  along. If you add a document to the index which has fields that the index
+ *  doesn't know about then the default properties are used for the new field.
+ */
+static void
+Init_FieldInfos(void)
+{
+    Init_FieldInfo();
+    cFieldInfos = rb_define_class_under(mIndex, "FieldInfos", rb_cObject);
+    rb_define_alloc_func(cFieldInfos, frb_data_alloc);
+    rb_define_method(cFieldInfos, "initialize", frb_fis_init, -1);
+    rb_define_method(cFieldInfos, "to_a",       frb_fis_to_a, 0);
+    rb_define_method(cFieldInfos, "[]",         frb_fis_get, 1);
+    rb_define_method(cFieldInfos, "add",        frb_fis_add, 1);
+    rb_define_method(cFieldInfos, "<<",         frb_fis_add, 1);
+    rb_define_method(cFieldInfos, "add_field",  frb_fis_add_field, -1);
+    rb_define_method(cFieldInfos, "each",       frb_fis_each, 0);
+    rb_define_method(cFieldInfos, "to_s",       frb_fis_to_s, 0);
+    rb_define_method(cFieldInfos, "size",       frb_fis_size, 0);
+    rb_define_method(cFieldInfos, "create_index",
+                                                frb_fis_create_index, 1);
+    rb_define_method(cFieldInfos, "fields",     frb_fis_get_fields, 0);
+    rb_define_method(cFieldInfos, "tokenized_fields", frb_fis_get_tk_fields, 0);
+}
+/*
+ *  Document-class: Ferret::Index::TermEnum
+ *
+ *  == Summary
+ *
+ *  The TermEnum object is used to iterate through the terms in a field. To
+ *  get a TermEnum you need to use the IndexReader#terms(field) method.
+ *
+ *  == Example
+ *
+ *    te = index_reader.terms(:content)
+ *
+ *    te.each {|term, doc_freq| puts "#{term} occurred #{doc_freq} times" }
+ *
+ *    # or you could do it like this;
+ *    te = index_reader.terms(:content)
+ *
+ *    while te.next?
+ *      puts "#{te.term} occured in #{te.doc_freq} documents in the index"
+ *    end
+ */
+static void
+Init_TermEnum(void)
+{
+    id_term = rb_intern("@term");
+    cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
+    rb_define_alloc_func(cTermEnum, frb_data_alloc);
+    rb_define_method(cTermEnum, "next?",    frb_te_next, 0);
+    rb_define_method(cTermEnum, "term",     frb_te_term, 0);
+    rb_define_method(cTermEnum, "doc_freq", frb_te_doc_freq, 0);
+    rb_define_method(cTermEnum, "skip_to",  frb_te_skip_to, 1);
+    rb_define_method(cTermEnum, "each",     frb_te_each, 0);
+    rb_define_method(cTermEnum, "field=",   frb_te_set_field, 1);
+    rb_define_method(cTermEnum, "set_field",frb_te_set_field, 1);
+    rb_define_method(cTermEnum, "to_json",  frb_te_to_json, -1);
+}
+/*
+ *  Document-class: Ferret::Index::TermDocEnum
+ *
+ *  == Summary
+ *
+ *  Use a TermDocEnum to iterate through the documents that contain a
+ *  particular term. You can also iterate through the positions which the term
+ *  occurs in a document.
+ *
+ *
+ *  == Example
+ *
+ *    tde = index_reader.term_docs_for(:content, "fox")
+ *
+ *    tde.each do |doc_id, freq|
+ *      puts "fox appeared #{freq} times in document #{doc_id}:"
+ *      positions = []
+ *      tde.each_position {|pos| positions << pos}
+ *      puts "  #{positions.join(', ')}"
+ *    end
+ *
+ *    # or you can do it like this;
+ *    tde.seek(:title, "red")
+ *    while tde.next?
+ *      puts "red appeared #{tde.freq} times in document #{tde.doc}:"
+ *      positions = []
+ *      while pos = tde.next_position
+ *        positions << pos
+ *      end
+ *      puts "  #{positions.join(', ')}"
+ *    end
+ */
+static void
+Init_TermDocEnum(void)
+{
+    id_fld_num_map = rb_intern("@field_num_map");
+    id_field_num = rb_intern("@field_num");
+    cTermDocEnum = rb_define_class_under(mIndex, "TermDocEnum", rb_cObject);
+    rb_define_alloc_func(cTermDocEnum, frb_data_alloc);
+    rb_define_method(cTermDocEnum, "seek",           frb_tde_seek, 2);
+    rb_define_method(cTermDocEnum, "seek_term_enum", frb_tde_seek_te, 1);
+    rb_define_method(cTermDocEnum, "doc",            frb_tde_doc, 0);
+    rb_define_method(cTermDocEnum, "freq",           frb_tde_freq, 0);
+    rb_define_method(cTermDocEnum, "next?",          frb_tde_next, 0);
+    rb_define_method(cTermDocEnum, "next_position",  frb_tde_next_position, 0);
+    rb_define_method(cTermDocEnum, "each",           frb_tde_each, 0);
+    rb_define_method(cTermDocEnum, "each_position",  frb_tde_each_position, 0);
+    rb_define_method(cTermDocEnum, "skip_to",        frb_tde_skip_to, 1);
+    rb_define_method(cTermDocEnum, "to_json",        frb_tde_to_json, -1);
+}
+/* rdochack
+cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
+*/
+/*
+ *  Document-class: Ferret::Index::TermVector::TVOffsets
+ *
+ *  == Summary
+ *
+ *  Holds the start and end byte-offsets of a term in a field. For example, if
+ *  the field was "the quick brown fox" then the start and end offsets of:
+ *
+ *    ["the", "quick", "brown", "fox"]
+ *
+ *  Would be:
+ *
+ *    [(0,3), (4,9), (10,15), (16,19)]
+ *
+ *  See the Analysis module for more information on setting the offsets.
+ */
+static void
+Init_TVOffsets(void)
+{
+    const char *tv_offsets_class = "TVOffsets";
+    /* rdochack
+    cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
+    */
+    cTVOffsets = rb_struct_define(tv_offsets_class, "start", "end", NULL);
+    rb_set_class_path(cTVOffsets, cTermVector, tv_offsets_class);
+    rb_const_set(mIndex, rb_intern(tv_offsets_class), cTVOffsets);
+}
+/*
+ *  Document-class: Ferret::Index::TermVector::TVTerm
+ *
+ *  == Summary
+ *
+ *  The TVTerm class holds the term information for each term in a TermVector.
+ *  That is it holds the term's text and its positions in the document. You
+ *  can use those positions to reference the offsets for the term.
+ *
+ *  == Example
+ *
+ *    tv = index_reader.term_vector(:content)
+ *    tv_term = tv.find {|tvt| tvt.term = "fox"}
+ *    offsets = tv_term.positions.collect {|pos| tv.offsets[pos]}
+ */
+static void
+Init_TVTerm(void)
+{
+    const char *tv_term_class = "TVTerm";
+    /* rdochack
+    cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject);
+    */
+    cTVTerm = rb_struct_define(tv_term_class, "text", "freq", "positions", NULL);
+    rb_set_class_path(cTVTerm, cTermVector, tv_term_class);
+    rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm);
+}
+/*
+ *  Document-class: Ferret::Index::TermVector
+ *
+ *  == Summary
+ *
+ *  TermVectors are most commonly used for creating search result excerpts and
+ *  highlight search matches in results. This is all done internally so you
+ *  won't need to worry about the TermVector object. There are some other
+ *  reasons you may want to use the TermVectors object however. For example,
+ *  you may wish to see which terms are the most commonly occurring terms in a
+ *  document to implement a MoreLikeThis search.
+ *
+ *  == Example
+ *
+ *    tv = index_reader.term_vector(doc_id, :content)
+ *    tv_term = tv.find {|tvt| tvt.term = "fox"}
+ *
+ *    # get the term frequency
+ *    term_freq = tv_term.positions.size
+ *
+ *    # get the offsets for a term
+ *    offsets = tv_term.positions.collect {|pos| tv.offsets[pos]}
+ *
+ *  == Note
+ *
+ *  +positions+ and +offsets+ can be +nil+ depending on what you set the
+ *  +:term_vector+ to when you set the FieldInfo object for the field. Note in
+ *  particular that you need to store both positions and offsets if you want
+ *  to associate offsets with particular terms.
+ */
+static void
+Init_TermVector(void)
+{
+    const char *tv_class = "TermVector";
+    /* rdochack
+    cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
+    */
+    cTermVector = rb_struct_define(tv_class,
+                                   "field", "terms", "offsets", NULL);
+    rb_set_class_path(cTermVector, mIndex, tv_class);
+    rb_const_set(mIndex, rb_intern(tv_class), cTermVector);
+    Init_TVOffsets();
+    Init_TVTerm();
+}
+/*
+ *  Document-class: Ferret::Index::IndexWriter
+ *
+ *  == Summary
+ *
+ *  The IndexWriter is the class used to add documents to an index. You can
+ *  also delete documents from the index using this class. The indexing
+ *  process is highly customizable and the IndexWriter has the following
+ *  parameters;
+ *
+ *  dir::                 This is an Ferret::Store::Directory object. You
+ *                        should either pass a +:dir+ or a +:path+ when
+ *                        creating an index.
+ *  path::                A string representing the path to the index
+ *                        directory. If you are creating the index for the
+ *                        first time the directory will be created if it's
+ *                        missing. You should not choose a directory which
+ *                        contains other files as they could be over-written.
+ *                        To protect against this set +:create_if_missing+ to
+ *                        false.
+ *  create_if_missing::   Default: true. Create the index if no index is
+ *                        found in the specified directory. Otherwise, use
+ *                        the existing index.
+ *  create::              Default: false. Creates the index, even if one
+ *                        already exists.  That means any existing index will
+ *                        be deleted. It is probably better to use the
+ *                        create_if_missing option so that the index is only
+ *                        created the first time when it doesn't exist.
+ *  field_infos::         Default FieldInfos.new. The FieldInfos object to use
+ *                        when creating a new index if +:create_if_missing+ or
+ *                        +:create+ is set to true. If an existing index is
+ *                        opened then this parameter is ignored.
+ *  analyzer::            Default: Ferret::Analysis::StandardAnalyzer.
+ *                        Sets the default analyzer for the index. This is
+ *                        used by both the IndexWriter and the QueryParser
+ *                        to tokenize the input. The default is the
+ *                        StandardAnalyzer.
+ *  chunk_size::          Default: 0x100000 or 1Mb. Memory performance tuning
+ *                        parameter. Sets the default size of chunks of memory
+ *                        malloced for use during indexing. You can usually
+ *                        leave this parameter as is.
+ *  max_buffer_memory::   Default: 0x1000000 or 16Mb. Memory performance
+ *                        tuning parameter. Sets the amount of memory to be
+ *                        used by the indexing process. Set to a larger value
+ *                        to increase indexing speed. Note that this only
+ *                        includes memory used by the indexing process, not
+ *                        the rest of your ruby application.
+ *  term_index_interval:: Default: 128. The skip interval between terms in the
+ *                        term dictionary. A smaller value will possibly
+ *                        increase search performance while also increasing
+ *                        memory usage and impacting negatively impacting
+ *                        indexing performance.
+ *  doc_skip_interval::   Default: 16. The skip interval for document numbers
+ *                        in the index. As with +:term_index_interval+ you
+ *                        have a trade-off. A smaller number may increase
+ *                        search performance while also increasing memory
+ *                        usage and impacting negatively impacting indexing
+ *                        performance.
+ *  merge_factor::        Default: 10. This must never be less than 2.
+ *                        Specifies the number of segments of a certain size
+ *                        that must exist before they are merged. A larger
+ *                        value will improve indexing performance while
+ *                        slowing search performance.
+ *  max_buffered_docs::   Default: 10000. The maximum number of documents that
+ *                        may be stored in memory before being written to the
+ *                        index. If you have a lot of memory and are indexing
+ *                        a large number of small documents (like products in
+ *                        a product database for example) you may want to set
+ *                        this to a much higher number (like
+ *                        Ferret::FIX_INT_MAX). If you are worried about your
+ *                        application crashing during the middle of index you
+ *                        might set this to a smaller number so that the index
+ *                        is committed more often. This is like having an
+ *                        auto-save in a word processor application.
+ *  max_merge_docs::      Set this value to limit the number of documents that
+ *                        go into a single segment. Use this to avoid
+ *                        extremely long merge times during indexing which can
+ *                        make your application seem unresponsive. This is
+ *                        only necessary for very large indexes (millions of
+ *                        documents).
+ *  max_field_length::    Default: 10000. The maximum number of terms added to
+ *                        a single field.  This can be useful to protect the
+ *                        indexer when indexing documents from the web for
+ *                        example. Usually the most important terms will occur
+ *                        early on in a document so you can often safely
+ *                        ignore the terms in a field after a certain number
+ *                        of them. If you wanted to speed up indexing and same
+ *                        space in your index you may only want to index the
+ *                        first 1000 terms in a field. On the other hand, if
+ *                        you want to be more thorough and you are indexing
+ *                        documents from your file-system you may set this
+ *                        parameter to Ferret::FIX_INT_MAX.
+ *  use_compound_file::   Default: true. Uses a compound file to store the
+ *                        index. This prevents an error being raised for
+ *                        having too many files open at the same time. The
+ *                        default is true but performance is better if this is
+ *                        set to false.
+ *
+ *
+ *  === Deleting Documents
+ *
+ *  Both IndexReader and IndexWriter allow you to delete documents. You should
+ *  use the IndexReader to delete documents by document id and IndexWriter to
+ *  delete documents by term which we'll explain now. It is preferrable to
+ *  delete documents from an index using IndexWriter for performance reasons.
+ *  To delete documents using the IndexWriter you should give each document in
+ *  the index a unique ID. If you are indexing documents from the file-system
+ *  this unique ID will be the full file path. If indexing documents from the
+ *  database you should use the primary key as the ID field. You can then
+ *  use the delete method to delete a file referenced by the ID. For example;
+ *
+ *    index_writer.delete(:id, "/path/to/indexed/file")
+ */
+void
+Init_IndexWriter(void)
+{
+    id_boost = rb_intern("boost");
+    sym_create              = ID2SYM(rb_intern("create"));
+    sym_create_if_missing   = ID2SYM(rb_intern("create_if_missing"));
+    sym_field_infos         = ID2SYM(rb_intern("field_infos"));
+    sym_chunk_size          = ID2SYM(rb_intern("chunk_size"));
+    sym_max_buffer_memory   = ID2SYM(rb_intern("max_buffer_memory"));
+    sym_index_interval      = ID2SYM(rb_intern("term_index_interval"));
+    sym_skip_interval       = ID2SYM(rb_intern("doc_skip_interval"));
+    sym_merge_factor        = ID2SYM(rb_intern("merge_factor"));
+    sym_max_buffered_docs   = ID2SYM(rb_intern("max_buffered_docs"));
+    sym_max_merge_docs      = ID2SYM(rb_intern("max_merge_docs"));
+    sym_max_field_length    = ID2SYM(rb_intern("max_field_length"));
+    sym_use_compound_file   = ID2SYM(rb_intern("use_compound_file"));
+    cIndexWriter = rb_define_class_under(mIndex, "IndexWriter", rb_cObject);
+    rb_define_alloc_func(cIndexWriter, frb_data_alloc);
+    rb_define_const(cIndexWriter, "WRITE_LOCK_TIMEOUT", INT2FIX(1));
+    rb_define_const(cIndexWriter, "COMMIT_LOCK_TIMEOUT", INT2FIX(10));
+    rb_define_const(cIndexWriter, "WRITE_LOCK_NAME",
+                    rb_str_new2(WRITE_LOCK_NAME));
+    rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME",
+                    rb_str_new2(COMMIT_LOCK_NAME));
+    rb_define_const(cIndexWriter, "DEFAULT_CHUNK_SIZE",
+                    INT2FIX(default_config.chunk_size));
+    rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFER_MEMORY",
+                    INT2FIX(default_config.max_buffer_memory));
+    rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL",
+                    INT2FIX(default_config.index_interval));
+    rb_define_const(cIndexWriter, "DEFAULT_DOC_SKIP_INTERVAL",
+                    INT2FIX(default_config.skip_interval));
+    rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR",
+                    INT2FIX(default_config.merge_factor));
+    rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFERED_DOCS",
+                    INT2FIX(default_config.max_buffered_docs));
+    rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS",
+                    INT2FIX(default_config.max_merge_docs));
+    rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH",
+                    INT2FIX(default_config.max_field_length));
+    rb_define_const(cIndexWriter, "DEFAULT_USE_COMPOUND_FILE",
+                    default_config.use_compound_file ? Qtrue : Qfalse);
+    rb_define_method(cIndexWriter, "initialize",    frb_iw_init, -1);
+    rb_define_method(cIndexWriter, "doc_count",     frb_iw_get_doc_count, 0);
+    rb_define_method(cIndexWriter, "close",         frb_iw_close, 0);
+    rb_define_method(cIndexWriter, "add_document",  frb_iw_add_doc, 1);
+    rb_define_method(cIndexWriter, "<<",            frb_iw_add_doc, 1);
+    rb_define_method(cIndexWriter, "optimize",      frb_iw_optimize, 0);
+    rb_define_method(cIndexWriter, "commit",        frb_iw_commit, 0);
+    rb_define_method(cIndexWriter, "add_readers",   frb_iw_add_readers, 1);
+    rb_define_method(cIndexWriter, "delete",        frb_iw_delete, 2);
+    rb_define_method(cIndexWriter, "field_infos",   frb_iw_field_infos, 0);
+    rb_define_method(cIndexWriter, "analyzer",      frb_iw_get_analyzer, 0);
+    rb_define_method(cIndexWriter, "analyzer=",     frb_iw_set_analyzer, 1);
+    rb_define_method(cIndexWriter, "version",       frb_iw_version, 0);
+    rb_define_method(cIndexWriter, "chunk_size",
+                     frb_iw_get_chunk_size, 0);
+    rb_define_method(cIndexWriter, "chunk_size=",
+                     frb_iw_set_chunk_size, 1);
+    rb_define_method(cIndexWriter, "max_buffer_memory",
+                     frb_iw_get_max_buffer_memory, 0);
+    rb_define_method(cIndexWriter, "max_buffer_memory=",
+                     frb_iw_set_max_buffer_memory, 1);
+    rb_define_method(cIndexWriter, "term_index_interval",
+                     frb_iw_get_index_interval, 0);
+    rb_define_method(cIndexWriter, "term_index_interval=",
+                     frb_iw_set_index_interval, 1);
+    rb_define_method(cIndexWriter, "doc_skip_interval",
+                     frb_iw_get_skip_interval, 0);
+    rb_define_method(cIndexWriter, "doc_skip_interval=",
+                     frb_iw_set_skip_interval, 1);
+    rb_define_method(cIndexWriter, "merge_factor",
+                     frb_iw_get_merge_factor, 0);
+    rb_define_method(cIndexWriter, "merge_factor=",
+                     frb_iw_set_merge_factor, 1);
+    rb_define_method(cIndexWriter, "max_buffered_docs",
+                     frb_iw_get_max_buffered_docs, 0);
+    rb_define_method(cIndexWriter, "max_buffered_docs=",
+                     frb_iw_set_max_buffered_docs, 1);
+    rb_define_method(cIndexWriter, "max_merge_docs",
+                     frb_iw_get_max_merge_docs, 0);
+    rb_define_method(cIndexWriter, "max_merge_docs=",
+                     frb_iw_set_max_merge_docs, 1);
+    rb_define_method(cIndexWriter, "max_field_length",
+                     frb_iw_get_max_field_length, 0);
+    rb_define_method(cIndexWriter, "max_field_length=",
+                     frb_iw_set_max_field_length, 1);
+    rb_define_method(cIndexWriter, "use_compound_file",
+                     frb_iw_get_use_compound_file, 0);
+    rb_define_method(cIndexWriter, "use_compound_file=",
+                     frb_iw_set_use_compound_file, 1);
+}
+/*
+ *  Document-class: Ferret::Index::LazyDoc
+ *
+ *  == Summary
+ *
+ *  When a document is retrieved from the index a LazyDoc is returned.
+ *  Actually, LazyDoc is just a modified Hash object which lazily adds fields
+ *  to itself when they are accessed. You should not that they keys method
+ *  will return nothing until you actually access one of the fields. To see
+ *  what fields are available use LazyDoc#fields rather than LazyDoc#keys. To
+ *  load all fields use the LazyDoc#load method.
+ *
+ *  == Example
+ *
+ *    doc = index_reader[0]
+ *
+ *    doc.keys     #=> []
+ *    doc.values   #=> []
+ *    doc.fields   #=> [:title, :content]
+ *
+ *    title = doc[:title] #=> "the title"
+ *    doc.keys     #=> [:title]
+ *    doc.values   #=> ["the title"]
+ *    doc.fields   #=> [:title, :content]
+ *
+ *    doc.load
+ *    doc.keys     #=> [:title, :content]
+ *    doc.values   #=> ["the title", "the content"]
+ *    doc.fields   #=> [:title, :content]
+ */
+void
+Init_LazyDoc(void)
+{
+    id_fields = rb_intern("@fields");
+    cLazyDoc = rb_define_class_under(mIndex, "LazyDoc", rb_cHash);
+    rb_define_method(cLazyDoc, "default",   frb_lzd_default, 1);
+    rb_define_method(cLazyDoc, "load",      frb_lzd_load, 0);
+    rb_define_method(cLazyDoc, "fields",    frb_lzd_fields, 0);
+    cLazyDocData = rb_define_class_under(cLazyDoc, "LazyDocData", rb_cObject);
+    rb_define_alloc_func(cLazyDocData, frb_data_alloc);
+}
+/*
+ *  Document-class: Ferret::Index::IndexReader
+ *
+ *  == Summary
+ *
+ *  IndexReader is used for reading data from the index. This class is usually
+ *  used directly for more advanced tasks like iterating through terms in an
+ *  index, accessing term-vectors or deleting documents by document id. It is
+ *  also used internally by IndexSearcher.
+ */
+void
+Init_IndexReader(void)
+{
+    cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
+    rb_define_alloc_func(cIndexReader, frb_data_alloc);
+    rb_define_method(cIndexReader, "initialize",    frb_ir_init, 1);
+    rb_define_method(cIndexReader, "set_norm",      frb_ir_set_norm, 3);
+    rb_define_method(cIndexReader, "norms",         frb_ir_norms, 1);
+    rb_define_method(cIndexReader, "get_norms_into",frb_ir_get_norms_into, 3);
+    rb_define_method(cIndexReader, "commit",        frb_ir_commit, 0);
+    rb_define_method(cIndexReader, "close",         frb_ir_close, 0);
+    rb_define_method(cIndexReader, "has_deletions?",frb_ir_has_deletions, 0);
+    rb_define_method(cIndexReader, "delete",        frb_ir_delete, 1);
+    rb_define_method(cIndexReader, "deleted?",      frb_ir_is_deleted, 1);
+    rb_define_method(cIndexReader, "max_doc",       frb_ir_max_doc, 0);
+    rb_define_method(cIndexReader, "num_docs",      frb_ir_num_docs, 0);
+    rb_define_method(cIndexReader, "undelete_all",  frb_ir_undelete_all, 0);
+    rb_define_method(cIndexReader, "latest?",       frb_ir_is_latest, 0);
+    rb_define_method(cIndexReader, "get_document",  frb_ir_get_doc, -1);
+    rb_define_method(cIndexReader, "[]",            frb_ir_get_doc, -1);
+    rb_define_method(cIndexReader, "term_vector",   frb_ir_term_vector, 2);
+    rb_define_method(cIndexReader, "term_vectors",  frb_ir_term_vectors, 1);
+    rb_define_method(cIndexReader, "term_docs",     frb_ir_term_docs, 0);
+    rb_define_method(cIndexReader, "term_positions",frb_ir_term_positions, 0);
+    rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
+    rb_define_method(cIndexReader, "term_positions_for", frb_ir_t_pos_for, 2);
+    rb_define_method(cIndexReader, "doc_freq",      frb_ir_doc_freq, 2);
+    rb_define_method(cIndexReader, "terms",         frb_ir_terms, 1);
+    rb_define_method(cIndexReader, "terms_from",    frb_ir_terms_from, 2);
+    rb_define_method(cIndexReader, "term_count",    frb_ir_term_count, 1);
+    rb_define_method(cIndexReader, "fields",        frb_ir_fields, 0);
+    rb_define_method(cIndexReader, "field_names",   frb_ir_fields, 0);
+    rb_define_method(cIndexReader, "field_infos",   frb_ir_field_infos, 0);
+    rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
+    rb_define_method(cIndexReader, "version",       frb_ir_version, 0);
+}
+/* rdoc hack
+extern VALUE mFerret = rb_define_module("Ferret");
+*/
+/*
+ *  Document-module: Ferret::Index
+ *
+ *  == Summary
+ *
+ *  The Index module contains all the classes used for adding to and
+ *  retrieving from the index. The important classes to know about are;
+ *
+ *  * FieldInfo
+ *  * FieldInfos
+ *  * IndexWriter
+ *  * IndexReader
+ *  * LazyDoc
+ *
+ *  The other classes in this module are useful for more advanced uses like
+ *  building tag clouds, creating more-like-this queries, custom highlighting
+ *  etc. They are also useful for index browsers.
+ */
+void
+Init_Index(void)
+{
+    mIndex = rb_define_module_under(mFerret, "Index");
+    sym_boost     = ID2SYM(rb_intern("boost"));
+    sym_analyzer  = ID2SYM(rb_intern("analyzer"));
+    sym_close_dir = ID2SYM(rb_intern("close_dir"));
+    fsym_content  = I("content");
+    Init_TermVector();
+    Init_TermEnum();
+    Init_TermDocEnum();
+    Init_FieldInfos();
+    Init_LazyDoc();
+    Init_IndexWriter();
+    Init_IndexReader();
+}