RubyGems - ferret - Versions diffs - 0.11.4 → 0.11.5 - Mend

ferret 0.11.4 → 0.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

data/Rakefile +1 -0
data/TUTORIAL +3 -3
data/ext/analysis.c +12 -9
data/ext/array.c +10 -10
data/ext/array.h +8 -1
data/ext/bitvector.c +2 -2
data/ext/except.c +1 -1
data/ext/ferret.c +2 -2
data/ext/ferret.h +1 -1
data/ext/fs_store.c +13 -2
data/ext/global.c +4 -4
data/ext/global.h +6 -0
data/ext/hash.c +1 -1
data/ext/helper.c +1 -1
data/ext/helper.h +1 -1
data/ext/index.c +48 -22
data/ext/index.h +17 -16
data/ext/mempool.c +4 -1
data/ext/mempool.h +1 -1
data/ext/multimapper.c +2 -2
data/ext/q_fuzzy.c +2 -2
data/ext/q_multi_term.c +2 -2
data/ext/q_parser.c +39 -8
data/ext/q_range.c +32 -1
data/ext/r_analysis.c +66 -28
data/ext/r_index.c +18 -19
data/ext/r_qparser.c +21 -6
data/ext/r_search.c +74 -49
data/ext/r_store.c +1 -1
data/ext/r_utils.c +17 -17
data/ext/search.c +10 -5
data/ext/search.h +3 -1
data/ext/sort.c +2 -2
data/ext/stopwords.c +23 -34
data/ext/store.c +9 -9
data/ext/store.h +5 -4
data/lib/ferret/document.rb +2 -2
data/lib/ferret/field_infos.rb +37 -35
data/lib/ferret/index.rb +16 -6
data/lib/ferret/number_tools.rb +2 -2
data/lib/ferret_version.rb +1 -1
data/test/unit/analysis/tc_token_stream.rb +40 -0
data/test/unit/index/tc_index.rb +64 -101
data/test/unit/index/tc_index_reader.rb +13 -0
data/test/unit/largefile/tc_largefile.rb +46 -0
data/test/unit/query_parser/tc_query_parser.rb +17 -1
data/test/unit/search/tc_multiple_search_requests.rb +58 -0
data/test/unit/search/tm_searcher.rb +27 -1
data/test/unit/ts_largefile.rb +4 -0
metadata +147 -144

data/ext/r_index.c CHANGED Viewed

@@ -274,7 +274,7 @@ frt_fi_is_tokenized(VALUE self)
  *  used to store the field boosts for an indexed field. If you do not boost
  *  any fields, and you can live without scoring based on field length then
  *  you can omit the norms file. This will give the index a slight performance
- *  boost and it will use less memory, escpecially for indexes which have a
+ *  boost and it will use less memory, especially for indexes which have a
  *  large number of documents.
  */
 static VALUE
@@ -623,7 +623,7 @@ frt_fis_create_index(VALUE self, VALUE rdir)
  *  call-seq:
  *     fis.fields -> symbol array
  *
- *  Return a list of the field names (as symbols) of all the fieldcs in the
+ *  Return a list of the field names (as symbols) of all the fields in the
  *  index.
  */
 static VALUE
@@ -1415,7 +1415,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
  *     iw.doc_count -> number
  *
  *  Returns the number of documents in the Index. Note that deletions won't be
- *  taken into account until the IndexWriter has been commited.
+ *  taken into account until the IndexWriter has been committed.
  */
 static VALUE
 frt_iw_get_doc_count(VALUE self)
@@ -1660,7 +1660,7 @@ frt_iw_get_analyzer(VALUE self)
  *
  *  Set the Analyzer for this IndexWriter. This is useful if you need to
  *  change the analyzer for a special document. It is risky though as the
- *  same anlyzer will be used for all documents during search.
+ *  same analyzer will be used for all documents during search.
  */
 static VALUE
 frt_iw_set_analyzer(VALUE self, VALUE ranalyzer)
@@ -2191,7 +2191,7 @@ frt_ir_init(VALUE self, VALUE rdir)
  *
  *  Expert: change the boost value for a +field+ in document at +doc_id+.
  *  +val+ should be an integer in the range 0..255 which corresponds to an
- *  encoced float value.
+ *  encoded float value.
  */
 static VALUE
 frt_ir_set_norm(VALUE self, VALUE rdoc_id, VALUE rfield, VALUE rval)
@@ -2267,7 +2267,7 @@ frt_ir_commit(VALUE self)
  *     index_reader.close -> index_reader
  *
  *  Close the IndexReader. This method also commits any deletions made by this
- *  IndexReader. Thise method will be called explicitly by the garbage
+ *  IndexReader. This method will be called explicitly by the garbage
  *  collector but you should call it explicitly to commit any changes as soon
  *  as possible and to close any locks held by the object to prevent locking
  *  errors.
@@ -2286,7 +2286,7 @@ frt_ir_close(VALUE self)
  *  call-seq:
  *     index_reader.has_deletions? -> bool
  *
- *  Return true if the index has any deletions, either uncommited by this
+ *  Return true if the index has any deletions, either uncommitted by this
  *  IndexReader or committed by any other IndexReader.
  */
 static VALUE
@@ -2329,7 +2329,7 @@ frt_ir_is_deleted(VALUE self, VALUE rdoc_id)
  *  call-seq:
  *     index_reader.max_doc -> number
  *
- *  Returns 1 + the maximum document id in the index. It is the the
+ *  Returns 1 + the maximum document id in the index. It is the
  *  document_id that will be used by the next document added to the index. If
  *  there are no deletions, this number also refers to the number of documents
  *  in the index.
@@ -2361,7 +2361,7 @@ frt_ir_num_docs(VALUE self)
  *     index_reader.undelete_all -> index_reader
  *
  *  Undelete all deleted documents in the index. This is kind of like a
- *  rollback feature. Not that once an index is commited or a merge happens
+ *  rollback feature. Not that once an index is committed or a merge happens
  *  during index, deletions will be committed and undelete_all will have no
  *  effect on these documents.
  */
@@ -2434,7 +2434,6 @@ frt_ir_get_doc(int argc, VALUE *argv, VALUE self)
         len = FIX2LONG(arg2);
         return frt_get_doc_range(ir, pos, len, max);
     }
-    return Qnil;
 }
 /*
@@ -2713,7 +2712,7 @@ frt_ir_version(VALUE self)
  *
  *  == Summary
  *
- *  The FieldInfo class is the field descripter for the index. It specifies
+ *  The FieldInfo class is the field descriptor for the index. It specifies
  *  whether a field is compressed or not or whether it should be indexed and
  *  tokenized. Every field has a name which must be a symbol. There are three
  *  properties that you can set, +:store+, +:index+ and +:term_vector+. You
@@ -2740,7 +2739,7 @@ frt_ir_version(VALUE self)
  *  be indexed to be store in the Ferret index. You may want to use the index
  *  as a simple database and store things like images or MP3s in the index. By
  *  default each field is indexed and tokenized (split into tokens) (+:yes+).
- *  If you don't want to index the field use +:no+. If you wan the field
+ *  If you don't want to index the field use +:no+. If you want the field
  *  indexed but not tokenized, use +:untokenized+. Do this for the fields you
  *  wish to sort by. There are two other values for +:index+; +:omit_norms+
  *  and +:untokenized_omit_norms+. These values correspond to +:yes+ and
@@ -2754,7 +2753,7 @@ frt_ir_version(VALUE self)
  *  or not you would like to store term-vectors. The available options are
  *  +:no+, +:yes+, +:with_positions+, +:with_offsets+ and
  *  +:with_positions_offsets+. Note that you need to store the positions to
- *  asscociate offsets with individual terms in the term_vector.
+ *  associate offsets with individual terms in the term_vector.
  *
  *  == Property Table
  *
@@ -2946,7 +2945,7 @@ Init_FieldInfos(void)
  *
  *    te = index_reader.terms(:content)
  *
- *    te.each {|term, doc_freq| puts "#{term} occured #{doc_freq} times" }
+ *    te.each {|term, doc_freq| puts "#{term} occurred #{doc_freq} times" }
  *
  *    # or you could do it like this;
  *    te = index_reader.terms(:content)
@@ -3093,7 +3092,7 @@ Init_TVTerm(void)
  *  highlight search matches in results. This is all done internally so you
  *  won't need to worry about the TermVector object. There are some other
  *  reasons you may want to use the TermVectors object however. For example,
- *  you may wish to see which terms are the most commonly occuring terms in a
+ *  you may wish to see which terms are the most commonly occurring terms in a
  *  document to implement a MoreLikeThis search.
  *
  *  == Example
@@ -3112,7 +3111,7 @@ Init_TVTerm(void)
  *  +positions+ and +offsets+ can be +nil+ depending on what you set the
  *  +:term_vector+ to when you set the FieldInfo object for the field. Note in
  *  particular that you need to store both positions and offsets if you want
- *  to asscociate offsets with particular terms.
+ *  to associate offsets with particular terms.
  */
 static void
 Init_TermVector(void)
@@ -3136,7 +3135,7 @@ Init_TermVector(void)
  *  == Summary
  *
  *  The IndexWriter is the class used to add documents to an index. You can
- *  also delete docuements from the index using this class. The indexing
+ *  also delete documents from the index using this class. The indexing
  *  process is highly customizable and the IndexWriter has the following
  *  parameters;
  *
@@ -3212,7 +3211,7 @@ Init_TermVector(void)
  *                        documents).
  *  max_field_length::    Default: 10000. The maximum number of terms added to
  *                        a single field.  This can be useful to protect the
- *                        indexer when indexing documents fromt the web for
+ *                        indexer when indexing documents from the web for
  *                        example. Usually the most important terms will occur
  *                        early on in a document so you can often safely
  *                        ignore the terms in a field after a certain number
@@ -3221,7 +3220,7 @@ Init_TermVector(void)
  *                        first 1000 terms in a field. On the other hand, if
  *                        you want to be more thorough and you are indexing
  *                        documents from your file-system you may set this
- *                        paramter to Ferret::FIX_INT_MAX.
+ *                        parameter to Ferret::FIX_INT_MAX.
  *  use_compound_file::   Default: true. Uses a compound file to store the
  *                        index. This prevents an error being raised for
  *                        having too many files open at the same time. The

data/ext/r_qparser.c CHANGED Viewed

@@ -16,6 +16,7 @@ static VALUE sym_default_slop;
 static VALUE sym_handle_parse_errors;
 static VALUE sym_clean_string;
 static VALUE sym_max_clauses;
+static VALUE sym_use_keywords;
 extern VALUE frt_get_analyzer(Analyzer *a);
 extern VALUE frt_get_q(Query *q);
@@ -116,11 +117,20 @@ frt_get_fields(VALUE rfields)
  *                         of terms allowed in multi, prefix, wild-card or
  *                         fuzzy queries when those queries are generated by
  *                         rewriting other queries
+ *  :use_keywords:         Default: true. By default AND, OR, NOT and REQ are
+ *                         keywords used by the query parser. Sometimes this
+ *                         is undesirable. For example, if your application
+ *                         allows searching for US states by their
+ *                         abbreviation, then OR will be a common query
+ *                         string. By setting :use_keywords to false, OR will
+ *                         no longer be a keyword allowing searches for the
+ *                         state of Oregon. You will still be able to use
+ *                         boolean queries by using the + and - characters.
  */
 static VALUE
 frt_qp_init(int argc, VALUE *argv, VALUE self)
 {
-    VALUE roptions;
+    VALUE roptions = Qnil;
     VALUE rval;
     Analyzer *analyzer = NULL;
     bool has_options = false;
@@ -150,6 +160,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
             }
         } else {
             def_fields = frt_get_fields(roptions);
+            roptions = Qnil;
         }
     }
     if (all_fields == NULL) {
@@ -165,7 +176,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
     qp->clean_str = true;
     qp->handle_parse_errors = true;
     /* handle options */
-    if (argc > 0) {
+    if (roptions != Qnil) {
         if (Qnil != (rval = rb_hash_aref(roptions, sym_handle_parse_errors))) {
             qp->handle_parse_errors = RTEST(rval);
         }
@@ -187,6 +198,9 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
         if (Qnil != (rval = rb_hash_aref(roptions, sym_max_clauses))) {
             qp->max_clauses = FIX2INT(rval);
         }
+        if (Qnil != (rval = rb_hash_aref(roptions, sym_use_keywords))) {
+            qp->use_keywords = RTEST(rval);
+        }
     }
     Frt_Wrap_Struct(self, frt_qp_mark, frt_qp_free, qp);
     object_add(qp, self);
@@ -493,8 +507,8 @@ Init_QueryParseException(void)
  *  === WildQuery
  *
  *  A wild query is a query using the pattern matching characters * and ?. *
- *  matchs 0 or more characters while ? matchs a single character. This type
- *  of query can be really useful for matching heirarchical categories for
+ *  matches 0 or more characters while ? matches a single character. This type
+ *  of query can be really useful for matching hierarchical categories for
  *  example. Let's say we had this structure;
  *
  *    /sport/skiing
@@ -514,7 +528,7 @@ Init_QueryParseException(void)
  *  the wild characters at the beginning of the query as it'll have to iterate
  *  through every term in that field. Having said that, some fields like the
  *  category field above will only have a small number of distinct fields so
- *  this could be ok.
+ *  this could be okay.
  *
  *  === FuzzyQuery
  *
@@ -531,7 +545,7 @@ Init_QueryParseException(void)
  *    'content:Ostralya~0.4'
  *
  *  Note that this query can be quite expensive. If you'd like to use this
- *  query, you may want to set a mininum prefix length in the FuzzyQuery
+ *  query, you may want to set a minimum prefix length in the FuzzyQuery
  *  class. This can substantially reduce the number of terms that the query
  *  will iterate over.
  *
@@ -551,6 +565,7 @@ Init_QueryParser(void)
     sym_handle_parse_errors = ID2SYM(rb_intern("handle_parse_errors"));
     sym_clean_string = ID2SYM(rb_intern("clean_string"));
     sym_max_clauses = ID2SYM(rb_intern("max_clauses"));
+    sym_use_keywords = ID2SYM(rb_intern("use_keywords"));
     /* QueryParser */
     cQueryParser = rb_define_class_under(mFerret, "QueryParser", rb_cObject);

data/ext/r_search.c CHANGED Viewed

@@ -179,7 +179,7 @@ frt_get_td(TopDocs *td, VALUE rsearcher)
  *  call-seq:
  *     top_doc.to_s(field = :id) -> string
  *
- *  Returns a string represention of the top_doc in readable format.
+ *  Returns a string representation of the top_doc in readable format.
  */
 static VALUE
 frt_td_to_s(int argc, VALUE *argv, VALUE self)
@@ -197,7 +197,7 @@ frt_td_to_s(int argc, VALUE *argv, VALUE self)
         field = frt_field(argv[0]);
     }
-    sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
+    sprintf(s, "TopDocs: total_hits = %ld, max_score = %f [\n",
             FIX2INT(rb_funcall(self, id_total_hits, 0)),
             NUM2DBL(rb_funcall(self, id_max_score, 0)));
     s += strlen(s);
@@ -224,7 +224,7 @@ frt_td_to_s(int argc, VALUE *argv, VALUE self)
     return rstr;
 }
-__inline char *
+static INLINE char *
 frt_lzd_load_to_json(LazyDoc *lzd, char **str, char *s, int *slen)
 {
 	int i, j;
@@ -270,7 +270,7 @@ frt_lzd_load_to_json(LazyDoc *lzd, char **str, char *s, int *slen)
  *  call-seq:
  *     top_doc.to_json() -> string
  *
- *  Returns a json represention of the top_doc.
+ *  Returns a json representation of the top_doc.
  */
 static VALUE
 frt_td_to_json(VALUE self)
@@ -318,7 +318,7 @@ frt_td_to_json(VALUE self)
  *  call-seq:
  *     explanation.to_s -> string
  *
- *  Returns a string represention of the explantion in readable format.
+ *  Returns a string representation of the explanation in readable format.
  */
 static VALUE
 frt_expl_to_s(VALUE self)
@@ -334,7 +334,7 @@ frt_expl_to_s(VALUE self)
  *  call-seq:
  *     explanation.to_html -> string
  *
- *  Returns an html represention of the explantion in readable format.
+ *  Returns an html representation of the explanation in readable format.
  */
 static VALUE
 frt_expl_to_html(VALUE self)
@@ -403,7 +403,7 @@ frt_q_to_s(int argc, VALUE *argv, VALUE self)
  *  call-seq:
  *     query.boost
  *
- *  Returns the queries boost value. See the Query desription for more
+ *  Returns the queries boost value. See the Query description for more
  *  information on Query boosts.
  */
 static VALUE
@@ -417,7 +417,7 @@ frt_q_get_boost(VALUE self)
  *  call-seq:
  *     query.boost = boost -> boost
  *
- *  Set the boost for a query. See the Query desription for more information
+ *  Set the boost for a query. See the Query description for more information
  *  on Query boosts.
  */
 static VALUE
@@ -582,7 +582,7 @@ static VALUE
 frt_tq_init(VALUE self, VALUE rfield, VALUE rterm)
 {
     char *field = frt_field(rfield);
-    char *term = StringValuePtr(rterm);
+    char *term = rs2s(rb_obj_as_string(rterm));
     Query *q = tq_new(field, term);
     Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
     object_add(q, self);
@@ -795,7 +795,7 @@ frt_bc_init(int argc, VALUE *argv, VALUE self)
  *  call-seq:
  *     clause.query -> query
  *
- *  Returnt the query object wrapped by this BooleanClause.
+ *  Return the query object wrapped by this BooleanClause.
  */
 static VALUE
 frt_bc_get_query(VALUE self)
@@ -921,7 +921,7 @@ frt_bq_mark(void *p)
  *     BooleanQuery.new(coord_disable = false)
  *
  *  Create a new BooleanQuery. If you don't care about the scores of the
- *  sub-queries added the the query (as would be the case for many
+ *  sub-queries added to the query (as would be the case for many
  *  automatically generated queries) you can disable the coord_factor of the
  *  score. This will slightly improve performance for the query. Usually you
  *  should leave this parameter as is.
@@ -1309,7 +1309,7 @@ frt_wcq_init(int argc, VALUE *argv, VALUE self)
  *                    distance is measured. This parameter is used to improve
  *                    performance.  With a +:prefix_length+ of 0, all terms in
  *                    the index must be checked which can be quite a
- *                    performance hit.  By setting theprefix length to a
+ *                    performance hit.  By setting the prefix length to a
  *                    larger number you minimize the number of terms that need
  *                    to be checked.  Even 1 will cut down the work by a
  *                    factor of about 26 depending on your character set and
@@ -1501,7 +1501,7 @@ frt_maq_init(VALUE self)
  *     ConstantScoreQuery.new(filter) -> query
  *
  *  Create a ConstantScoreQuery which uses +filter+ to match documents giving
- *  each document a consant score.
+ *  each document a constant score.
  */
 static VALUE
 frt_csq_init(VALUE self, VALUE rfilter)
@@ -1688,7 +1688,7 @@ frt_spannq_mark(void *p)
  *  :slop::     Default: 0. Works exactly like a PhraseQuery slop. It is the
  *              amount of slop allowed in the match (the term edit distance
  *              allowed in the match).
- *  :in_order:: Defualt: false. Specifies whether or not the matches have to
+ *  :in_order:: Default: false. Specifies whether or not the matches have to
  *              occur in the order they were added to the query. When slop is
  *              set to 0, this parameter will make no difference.
  */
@@ -1862,7 +1862,7 @@ frt_f_free(void *p)
  *  call-seq:
  *     filter.to_s -> string
  *
- *  Return a human readable string represting the Filter object that the
+ *  Return a human readable string representing the Filter object that the
  *  method was called on.
  */
 static VALUE
@@ -2415,7 +2415,7 @@ frt_sea_doc(VALUE self, VALUE rdoc_id)
  *  call-seq:
  *     searcher.max_doc -> number
  *
- *  Returns 1 + the maximum document id in the index. It is the the
+ *  Returns 1 + the maximum document id in the index. It is the
  *  document_id that will be used by the next document added to the index. If
  *  there are no deletions, this number also refers to the number of documents
  *  in the index.
@@ -2555,8 +2555,13 @@ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
  *  :sort::         A Sort object or sort string describing how the field
  *                  should be sorted. A sort string is made up of field names
  *                  which cannot contain spaces and the word "DESC" if you
- *                  want the field reversed, all seperated by commas. For
- *                  example; "rating DESC, author, title"
+ *                  want the field reversed, all separated by commas. For
+ *                  example; "rating DESC, author, title". Note that Ferret
+ *                  will try to determine a field's type by looking at the
+ *                  first term in the index and seeing if it can be parsed as
+ *                  an integer or a float. Keep this in mind as you may need
+ *                  to specify a fields type to sort it correctly. For more
+ *                  on this, see the documentation for SortField
  *  :filter::       a Filter object to filter the search results with
  *  :filter_proc::  a filter Proc is a Proc which takes the doc_id, the score
  *                  and the Searcher object as its parameters and returns a
@@ -2602,8 +2607,13 @@ frt_sea_search(int argc, VALUE *argv, VALUE self)
  *  :sort::         A Sort object or sort string describing how the field
  *                  should be sorted. A sort string is made up of field names
  *                  which cannot contain spaces and the word "DESC" if you
- *                  want the field reversed, all seperated by commas. For
- *                  example; "rating DESC, author, title"
+ *                  want the field reversed, all separated by commas. For
+ *                  example; "rating DESC, author, title". Note that Ferret
+ *                  will try to determine a field's type by looking at the
+ *                  first term in the index and seeing if it can be parsed as
+ *                  an integer or a float. Keep this in mind as you may need
+ *                  to specify a fields type to sort it correctly. For more
+ *                  on this, see the documentation for SortField
  *  :filter::       a Filter object to filter the search results with
  *  :filter_proc::  a filter Proc is a Proc which takes the doc_id, the score
  *                  and the Searcher object as its parameters and returns a
@@ -2685,7 +2695,7 @@ frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
  *  :ellipsis::         Default: "...". This is the string that is appended at
  *                      the beginning and end of excerpts (unless the excerpt
  *                      hits the start or end of the field. You'll probably
- *                      want to change this so a Unicode elipsis character.
+ *                      want to change this so a Unicode ellipsis character.
  */
 static VALUE
 frt_sea_highlight(int argc, VALUE *argv, VALUE self)
@@ -2702,26 +2712,31 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
     rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
     Data_Get_Struct(rquery, Query, query);
-    if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
-        num_excerpts =  FIX2INT(v);
-    }
-    if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
-        if (v == sym_all) {
-            num_excerpts = 1;
-            excerpt_length = INT_MAX/2;
+    if (argc > 3) {
+        if (TYPE(roptions) != T_HASH) {
+           rb_raise(rb_eArgError, "The fourth argument to Searcher#highlight must be a hash");
         }
-        else {
-            excerpt_length = FIX2INT(v);
+        if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
+            num_excerpts =  FIX2INT(v);
+        }
+        if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
+            if (v == sym_all) {
+                num_excerpts = 1;
+                excerpt_length = INT_MAX/2;
+            }
+            else {
+                excerpt_length = FIX2INT(v);
+            }
+        }
+        if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
+            pre_tag = rs2s(rb_obj_as_string(v));
+        }
+        if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
+            post_tag = rs2s(rb_obj_as_string(v));
+        }
+        if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
+            ellipsis = rs2s(rb_obj_as_string(v));
         }
-    }
-    if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
-        pre_tag = rs2s(rb_obj_as_string(v));
-    }
-    if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
-        post_tag = rs2s(rb_obj_as_string(v));
-    }
-    if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
-        ellipsis = rs2s(rb_obj_as_string(v));
     }
     if ((excerpts = searcher_highlight(sea,
@@ -2771,7 +2786,7 @@ frt_sea_mark(void *p)
  *     Searcher.new(obj) -> Searcher
  *
  *  Create a new Searcher object. +dir+ can either be a string path to an
- *  index directory on the file-sytem, an actual Ferret::Store::Directory
+ *  index directory on the file-system, an actual Ferret::Store::Directory
  *  object or a Ferret::Index::IndexReader. You should use the IndexReader for
  *  searching multiple indexes. Just open the IndexReader on multiple
  *  directories.
@@ -2898,7 +2913,7 @@ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
  *  document id of the document that matches along with the score for the
  *  match. The score is a positive Float value. The score contained in a hit
  *  is not normalized so it can be greater than 1.0. To normalize scores to
- *  the range 0.0..1.0 devide the scores by TopDocs#max_score.
+ *  the range 0.0..1.0 divide the scores by TopDocs#max_score.
  */
 static void
 Init_Hit(void)
@@ -3546,7 +3561,7 @@ Init_SpanPrefixQuery(void)
  *
  *  == Summary
  *
- *  A SpanFirstQuery resticts a query to search in the first +end+ bytes of a
+ *  A SpanFirstQuery restricts a query to search in the first +end+ bytes of a
  *  field. This is useful since often the most important information in a
  *  document is at the start of the document.
  *
@@ -3577,7 +3592,7 @@ Init_SpanFirstQuery(void)
  *
  *  A SpanNearQuery is like a combination between a PhraseQuery and a
  *  BooleanQuery. It matches sub-SpanQueries which are added as clauses but
- *  those clauses must occur within a +slop+ edit distance of eachother. You
+ *  those clauses must occur within a +slop+ edit distance of each other. You
  *  can also specify that clauses must occur +in_order+.
  *
  *  == Example
@@ -3801,7 +3816,7 @@ Init_QueryFilter(void)
  *  A Filter is used to filter query results. It is usually passed to one of
  *  Searcher's search methods however it can also be used inside a
  *  ConstantScoreQuery or a FilteredQuery. To implement your own Filter you
- *  must implement the methoed #get_bitvector(index_reader) which returns a
+ *  must implement the method #get_bitvector(index_reader) which returns a
  *  BitVector with set bits corresponding to documents that are allowed by
  *  this Filter.
  *
@@ -3839,16 +3854,23 @@ Init_Filter(void)
  *  The type of the SortField is set by passing it as a parameter to the
  *  constructor. The +:auto+ type specifies that the SortField should detect
  *  the sort type by looking at the data in the field. This is the default
- *  type. Care should be taken however when using the :auto sort-type since
- *  numbers will occur before other strings in the index so if you are sorting
- *  a field with both numbers and strings (like a title field which might have
- *  "24" and "Prison Break") then the sort_field will think it is sorting
- *  integers when it really should sort by string.
+ *  :type value although it is recommended that you explicitly specify the
+ *  fields type.
  *
  *  == Example
  *
  *    title_sf = SortField.new(:title, :type => :string)
  *    rating_sf = SortField.new(:rating, :type => float, :reverse => true)
+ *
+ *
+ *  Note 1: Care should be taken when using the :auto sort-type since numbers
+ *  will occur before other strings in the index so if you are sorting a field
+ *  with both numbers and strings (like a title field which might have "24"
+ *  and "Prison Break") then the sort_field will think it is sorting integers
+ *  when it really should be sorting strings.
+ *
+ *  Note 2: When sorting by integer, integers are only 4 bytes so anything
+ *  larger will cause strange sorting behaviour.
  */
 static void
 Init_SortField(void)
@@ -3923,6 +3945,9 @@ Init_SortField(void)
  *    sf_rating = SortField.new(:rating, :type => :float, :reverse => true)
  *    sf_title = SortField.new(:title, :type => :string)
  *    sort = Sort.new([sf_rating, sf_title])
+ *
+ *  Remember that the :type parameter for SortField is set to :auto be default
+ *  be I strongly recommend you specify a :type value.
  */
 static void
 Init_Sort(void)