RubyGems - ferret - Versions diffs - 0.11.6 → 0.11.8.4 - Mend

ferret 0.11.6 → 0.11.8.4

Files changed (185) hide show

data/README +10 -22
data/RELEASE_CHANGES +137 -0
data/RELEASE_NOTES +60 -0
data/Rakefile +379 -274
data/TODO +100 -8
data/bin/ferret-browser +0 -0
data/ext/BZLIB_blocksort.c +1094 -0
data/ext/BZLIB_bzlib.c +1578 -0
data/ext/BZLIB_compress.c +672 -0
data/ext/BZLIB_crctable.c +104 -0
data/ext/BZLIB_decompress.c +626 -0
data/ext/BZLIB_huffman.c +205 -0
data/ext/BZLIB_randtable.c +84 -0
data/ext/{api.c → STEMMER_api.c} +7 -10
data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
data/ext/analysis.c +276 -121
data/ext/analysis.h +190 -143
data/ext/api.h +3 -4
data/ext/array.c +5 -3
data/ext/array.h +52 -43
data/ext/bitvector.c +38 -482
data/ext/bitvector.h +446 -124
data/ext/bzlib.h +282 -0
data/ext/bzlib_private.h +503 -0
data/ext/compound_io.c +23 -22
data/ext/config.h +21 -11
data/ext/document.c +43 -40
data/ext/document.h +31 -21
data/ext/except.c +20 -38
data/ext/except.h +89 -76
data/ext/extconf.rb +3 -2
data/ext/ferret.c +49 -35
data/ext/ferret.h +14 -11
data/ext/field_index.c +262 -0
data/ext/field_index.h +52 -0
data/ext/filter.c +11 -10
data/ext/fs_store.c +65 -47
data/ext/global.c +245 -165
data/ext/global.h +252 -54
data/ext/hash.c +200 -243
data/ext/hash.h +205 -163
data/ext/hashset.c +118 -96
data/ext/hashset.h +110 -82
data/ext/header.h +19 -19
data/ext/helper.c +11 -10
data/ext/helper.h +14 -6
data/ext/index.c +745 -366
data/ext/index.h +503 -529
data/ext/internal.h +1020 -0
data/ext/lang.c +10 -0
data/ext/lang.h +35 -15
data/ext/mempool.c +5 -4
data/ext/mempool.h +30 -22
data/ext/modules.h +35 -7
data/ext/multimapper.c +43 -2
data/ext/multimapper.h +32 -23
data/ext/posh.c +0 -0
data/ext/posh.h +4 -38
data/ext/priorityqueue.c +10 -12
data/ext/priorityqueue.h +33 -21
data/ext/q_boolean.c +22 -9
data/ext/q_const_score.c +3 -2
data/ext/q_filtered_query.c +15 -12
data/ext/q_fuzzy.c +147 -135
data/ext/q_match_all.c +3 -2
data/ext/q_multi_term.c +28 -32
data/ext/q_parser.c +451 -173
data/ext/q_phrase.c +158 -79
data/ext/q_prefix.c +16 -18
data/ext/q_range.c +363 -31
data/ext/q_span.c +130 -141
data/ext/q_term.c +21 -21
data/ext/q_wildcard.c +19 -23
data/ext/r_analysis.c +369 -242
data/ext/r_index.c +421 -434
data/ext/r_qparser.c +142 -92
data/ext/r_search.c +790 -407
data/ext/r_store.c +44 -44
data/ext/r_utils.c +264 -96
data/ext/ram_store.c +29 -23
data/ext/scanner.c +895 -0
data/ext/scanner.h +36 -0
data/ext/scanner_mb.c +6701 -0
data/ext/scanner_utf8.c +4415 -0
data/ext/search.c +210 -87
data/ext/search.h +556 -488
data/ext/similarity.c +17 -16
data/ext/similarity.h +51 -44
data/ext/sort.c +157 -354
data/ext/stem_ISO_8859_1_hungarian.h +16 -0
data/ext/stem_ISO_8859_2_romanian.h +16 -0
data/ext/stem_UTF_8_hungarian.h +16 -0
data/ext/stem_UTF_8_romanian.h +16 -0
data/ext/stem_UTF_8_turkish.h +16 -0
data/ext/stopwords.c +287 -278
data/ext/store.c +57 -51
data/ext/store.h +308 -286
data/ext/symbol.c +10 -0
data/ext/symbol.h +23 -0
data/ext/term_vectors.c +14 -293
data/ext/threading.h +22 -22
data/ext/win32.h +12 -4
data/lib/ferret.rb +2 -1
data/lib/ferret/browser.rb +1 -1
data/lib/ferret/field_symbol.rb +94 -0
data/lib/ferret/index.rb +221 -34
data/lib/ferret/number_tools.rb +6 -6
data/lib/ferret/version.rb +3 -0
data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
data/test/test_helper.rb +7 -2
data/test/test_installed.rb +1 -0
data/test/threading/thread_safety_index_test.rb +10 -1
data/test/threading/thread_safety_read_write_test.rb +4 -7
data/test/threading/thread_safety_test.rb +0 -0
data/test/unit/analysis/tc_analyzer.rb +29 -27
data/test/unit/analysis/tc_token_stream.rb +23 -16
data/test/unit/index/tc_index.rb +116 -11
data/test/unit/index/tc_index_reader.rb +27 -27
data/test/unit/index/tc_index_writer.rb +10 -0
data/test/unit/index/th_doc.rb +38 -21
data/test/unit/search/tc_filter.rb +31 -10
data/test/unit/search/tc_index_searcher.rb +6 -0
data/test/unit/search/tm_searcher.rb +53 -1
data/test/unit/store/tc_fs_store.rb +40 -2
data/test/unit/store/tc_ram_store.rb +0 -0
data/test/unit/store/tm_store.rb +0 -0
data/test/unit/store/tm_store_lock.rb +7 -6
data/test/unit/tc_field_symbol.rb +26 -0
data/test/unit/ts_analysis.rb +0 -0
data/test/unit/ts_index.rb +0 -0
data/test/unit/ts_store.rb +0 -0
data/test/unit/ts_utils.rb +0 -0
data/test/unit/utils/tc_number_tools.rb +0 -0
data/test/utils/content_generator.rb +226 -0
metadata +262 -221
data/ext/inc/lang.h +0 -48
data/ext/inc/threading.h +0 -31
data/ext/stem_ISO_8859_1_english.c +0 -1156
data/ext/stem_ISO_8859_1_french.c +0 -1276
data/ext/stem_ISO_8859_1_italian.c +0 -1091
data/ext/stem_ISO_8859_1_norwegian.c +0 -296
data/ext/stem_ISO_8859_1_spanish.c +0 -1119
data/ext/stem_ISO_8859_1_swedish.c +0 -307
data/ext/stem_UTF_8_danish.c +0 -344
data/ext/stem_UTF_8_english.c +0 -1176
data/ext/stem_UTF_8_french.c +0 -1296
data/ext/stem_UTF_8_italian.c +0 -1113
data/ext/stem_UTF_8_norwegian.c +0 -302
data/ext/stem_UTF_8_portuguese.c +0 -1055
data/ext/stem_UTF_8_russian.c +0 -709
data/ext/stem_UTF_8_spanish.c +0 -1137
data/ext/stem_UTF_8_swedish.c +0 -313
data/lib/ferret_version.rb +0 -3

data/ext/q_parser.c CHANGED

@@ -58,6 +58,14 @@
 /* Using locations.  */
 #define YYLSP_NEEDED 0
+/* Substitute the variable and function names.  */
+#define yyparse frt_parse
+#define yylex   frt_lex
+#define yyerror frt_error
+#define yylval  frt_lval
+#define yychar  frt_char
+#define yydebug frt_debug
+#define yynerrs frt_nerrs
 /* Tokens.  */
@@ -90,14 +98,17 @@
 /* Copy the first part of user declarations.  */
-#line 1 "src/q_parser.y"
+#line 84 "src/q_parser.y"
 #include <string.h>
 #include <ctype.h>
 #include <wctype.h>
+#include <assert.h>
 #include "except.h"
 #include "search.h"
 #include "array.h"
+#include "symbol.h"
+#include "internal.h"
 typedef struct Phrase {
     int             size;
@@ -138,7 +149,7 @@ int qp_default_fuzzy_pre_len = 0;
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 27 "src/q_parser.y"
+#line 113 "src/q_parser.y"
 {
     Query *query;
     BooleanClause *bcls;
@@ -148,7 +159,7 @@ typedef union YYSTYPE
     char *str;
 }
 /* Line 187 of yacc.c.  */
-#line 152 "y.tab.c"
+#line 163 "src/q_parser.c"
 	YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
@@ -158,7 +169,7 @@ typedef union YYSTYPE
 /* Copy the second part of user declarations.  */
-#line 35 "src/q_parser.y"
+#line 121 "src/q_parser.y"
 static int yylex(YYSTYPE *lvalp, QParser *qp);
 static int yyerror(QParser *qp, char const *msg);
@@ -169,17 +180,19 @@ static Query *get_bool_q(BCArray *bca);
 static BCArray *first_cls(BooleanClause *boolean_clause);
 static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause);
 static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause);
-static BCArray *add_default_cls(QParser *qp, BCArray *bca, BooleanClause *clause);
+static BCArray *add_default_cls(QParser *qp, BCArray *bca,
+                                BooleanClause *clause);
 static void bca_destroy(BCArray *bca);
-static BooleanClause *get_bool_cls(Query *q, unsigned int occur);
+static BooleanClause *get_bool_cls(Query *q, BCType occur);
-static Query *get_term_q(QParser *qp, char *field, char *word);
-static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
-static Query *get_wild_q(QParser *qp, char *field, char *pattern);
+static Query *get_term_q(QParser *qp, Symbol field, char *word);
+static Query *get_fuzzy_q(QParser *qp, Symbol field, char *word,
+                          char *slop);
+static Query *get_wild_q(QParser *qp, Symbol field, char *pattern);
-static HashSet *first_field(QParser *qp, char *field);
-static HashSet *add_field(QParser *qp, char *field);
+static HashSet *first_field(QParser *qp, const char *field);
+static HashSet *add_field(QParser *qp, const char *field);
 static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
@@ -188,22 +201,32 @@ static Phrase *ph_add_word(Phrase *self, char *word);
 static Phrase *ph_add_multi_word(Phrase *self, char *word);
 static void ph_destroy(Phrase *self);
-static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
+static Query *get_r_q(QParser *qp, Symbol field, char *from, char *to,
                       bool inc_lower, bool inc_upper);
+static void qp_push_fields(QParser *self, HashSet *fields, bool destroy);
+static void qp_pop_fields(QParser *self);
+/**
+ * +FLDS+ calls +func+ for all fields on top of the field stack. +func+
+ * must return a query. If there is more than one field on top of FieldStack
+ * then +FLDS+ will combing all the queries returned by +func+ into a single
+ * BooleanQuery which it than assigns to +q+. If there is only one field, the
+ * return value of +func+ is assigned to +q+ directly.
+ */
 #define FLDS(q, func) do {\
     TRY {\
-        char *field;\
+        Symbol field;\
         if (qp->fields->size == 0) {\
             q = NULL;\
         } else if (qp->fields->size == 1) {\
-            field = (char *)qp->fields->elems[0];\
+            field = (Symbol)qp->fields->first->elem;\
             q = func;\
         } else {\
-            int i;Query *sq;\
+            Query *volatile sq; HashSetEntry *volatile hse;\
             q = bq_new_max(false, qp->max_clauses);\
-            for (i = 0; i < qp->fields->size; i++) {\
-                field = (char *)qp->fields->elems[i];\
+            for (hse = qp->fields->first; hse; hse = hse->next) {\
+                field = (Symbol)hse->elem;\
                 sq = func;\
                 TRY\
                   if (sq) bq_add_query_nr(q, sq, BC_SHOULD);\
@@ -234,7 +257,7 @@ static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
 /* Line 216 of yacc.c.  */
-#line 238 "y.tab.c"
+#line 261 "src/q_parser.c"
 #ifdef short
 # undef short
@@ -535,14 +558,14 @@ static const yytype_int8 yyrhs[] =
 };
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
-static const yytype_uint8 yyrline[] =
+static const yytype_uint16 yyrline[] =
 {
-       0,   128,   128,   129,   131,   132,   133,   134,   136,   137,
-     138,   140,   141,   143,   144,   145,   146,   147,   148,   149,
-     151,   152,   153,   155,   157,   157,   159,   159,   159,   162,
-     163,   165,   166,   167,   168,   170,   171,   172,   173,   174,
-     176,   177,   178,   179,   180,   181,   182,   183,   184,   185,
-     186,   187
+       0,   226,   226,   227,   229,   230,   231,   232,   234,   235,
+     236,   238,   239,   241,   242,   243,   244,   245,   246,   247,
+     249,   250,   251,   253,   255,   255,   257,   257,   257,   260,
+     261,   263,   264,   265,   266,   268,   269,   270,   271,   272,
+     274,   275,   276,   277,   278,   279,   280,   281,   282,   283,
+     284,   285
 };
 #endif
@@ -1201,59 +1224,59 @@ yydestruct (yymsg, yytype, yyvaluep, qp)
   switch (yytype)
     {
       case 27: /* "bool_q" */
-#line 123 "src/q_parser.y"
+#line 221 "src/q_parser.y"
 	{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
-#line 1207 "y.tab.c"
+#line 1230 "src/q_parser.c"
 	break;
       case 28: /* "bool_clss" */
-#line 125 "src/q_parser.y"
+#line 223 "src/q_parser.y"
 	{ if ((yyvaluep->bclss) && qp->destruct) bca_destroy((yyvaluep->bclss)); };
-#line 1212 "y.tab.c"
+#line 1235 "src/q_parser.c"
 	break;
       case 29: /* "bool_cls" */
-#line 124 "src/q_parser.y"
+#line 222 "src/q_parser.y"
 	{ if ((yyvaluep->bcls) && qp->destruct) bc_deref((yyvaluep->bcls)); };
-#line 1217 "y.tab.c"
+#line 1240 "src/q_parser.c"
 	break;
       case 30: /* "boosted_q" */
-#line 123 "src/q_parser.y"
+#line 221 "src/q_parser.y"
 	{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
-#line 1222 "y.tab.c"
+#line 1245 "src/q_parser.c"
 	break;
       case 31: /* "q" */
-#line 123 "src/q_parser.y"
+#line 221 "src/q_parser.y"
 	{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
-#line 1227 "y.tab.c"
+#line 1250 "src/q_parser.c"
 	break;
       case 32: /* "term_q" */
-#line 123 "src/q_parser.y"
+#line 221 "src/q_parser.y"
 	{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
-#line 1232 "y.tab.c"
+#line 1255 "src/q_parser.c"
 	break;
       case 33: /* "wild_q" */
-#line 123 "src/q_parser.y"
+#line 221 "src/q_parser.y"
 	{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
-#line 1237 "y.tab.c"
+#line 1260 "src/q_parser.c"
 	break;
       case 34: /* "field_q" */
-#line 123 "src/q_parser.y"
+#line 221 "src/q_parser.y"
 	{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
-#line 1242 "y.tab.c"
+#line 1265 "src/q_parser.c"
 	break;
       case 39: /* "phrase_q" */
-#line 123 "src/q_parser.y"
+#line 221 "src/q_parser.y"
 	{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
-#line 1247 "y.tab.c"
+#line 1270 "src/q_parser.c"
 	break;
       case 40: /* "ph_words" */
-#line 126 "src/q_parser.y"
+#line 224 "src/q_parser.y"
 	{ if ((yyvaluep->phrase) && qp->destruct) ph_destroy((yyvaluep->phrase)); };
-#line 1252 "y.tab.c"
+#line 1275 "src/q_parser.c"
 	break;
       case 41: /* "range_q" */
-#line 123 "src/q_parser.y"
+#line 221 "src/q_parser.y"
 	{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
-#line 1257 "y.tab.c"
+#line 1280 "src/q_parser.c"
 	break;
       default:
@@ -1562,228 +1585,228 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 128 "src/q_parser.y"
+#line 226 "src/q_parser.y"
     {   qp->result = (yyval.query) = NULL; }
     break;
   case 3:
-#line 129 "src/q_parser.y"
+#line 227 "src/q_parser.y"
     { T qp->result = (yyval.query) = get_bool_q((yyvsp[(1) - (1)].bclss)); E }
     break;
   case 4:
-#line 131 "src/q_parser.y"
+#line 229 "src/q_parser.y"
     { T (yyval.bclss) = first_cls((yyvsp[(1) - (1)].bcls)); E }
     break;
   case 5:
-#line 132 "src/q_parser.y"
+#line 230 "src/q_parser.y"
     { T (yyval.bclss) = add_and_cls((yyvsp[(1) - (3)].bclss), (yyvsp[(3) - (3)].bcls)); E }
     break;
   case 6:
-#line 133 "src/q_parser.y"
+#line 231 "src/q_parser.y"
     { T (yyval.bclss) = add_or_cls((yyvsp[(1) - (3)].bclss), (yyvsp[(3) - (3)].bcls)); E }
     break;
   case 7:
-#line 134 "src/q_parser.y"
+#line 232 "src/q_parser.y"
     { T (yyval.bclss) = add_default_cls(qp, (yyvsp[(1) - (2)].bclss), (yyvsp[(2) - (2)].bcls)); E }
     break;
   case 8:
-#line 136 "src/q_parser.y"
+#line 234 "src/q_parser.y"
     { T (yyval.bcls) = get_bool_cls((yyvsp[(2) - (2)].query), BC_MUST); E }
     break;
   case 9:
-#line 137 "src/q_parser.y"
+#line 235 "src/q_parser.y"
     { T (yyval.bcls) = get_bool_cls((yyvsp[(2) - (2)].query), BC_MUST_NOT); E }
     break;
   case 10:
-#line 138 "src/q_parser.y"
+#line 236 "src/q_parser.y"
     { T (yyval.bcls) = get_bool_cls((yyvsp[(1) - (1)].query), BC_SHOULD); E }
     break;
   case 12:
-#line 141 "src/q_parser.y"
+#line 239 "src/q_parser.y"
     { T if ((yyvsp[(1) - (3)].query)) sscanf((yyvsp[(3) - (3)].str),"%f",&((yyvsp[(1) - (3)].query)->boost));  (yyval.query)=(yyvsp[(1) - (3)].query); E }
     break;
   case 14:
-#line 144 "src/q_parser.y"
+#line 242 "src/q_parser.y"
     { T (yyval.query) = bq_new_max(true, qp->max_clauses); E }
     break;
   case 15:
-#line 145 "src/q_parser.y"
+#line 243 "src/q_parser.y"
     { T (yyval.query) = get_bool_q((yyvsp[(2) - (3)].bclss)); E }
     break;
   case 20:
-#line 151 "src/q_parser.y"
+#line 249 "src/q_parser.y"
     { FLDS((yyval.query), get_term_q(qp, field, (yyvsp[(1) - (1)].str))); Y}
     break;
   case 21:
-#line 152 "src/q_parser.y"
+#line 250 "src/q_parser.y"
     { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].str))); Y}
     break;
   case 22:
-#line 153 "src/q_parser.y"
+#line 251 "src/q_parser.y"
     { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[(1) - (2)].str), NULL)); Y}
     break;
   case 23:
-#line 155 "src/q_parser.y"
+#line 253 "src/q_parser.y"
     { FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[(1) - (1)].str))); Y}
     break;
   case 24:
-#line 157 "src/q_parser.y"
-    { qp->fields = qp->def_fields; }
+#line 255 "src/q_parser.y"
+    { qp_pop_fields(qp); }
     break;
   case 25:
-#line 158 "src/q_parser.y"
+#line 256 "src/q_parser.y"
     { (yyval.query) = (yyvsp[(3) - (4)].query); }
     break;
   case 26:
-#line 159 "src/q_parser.y"
-    { qp->fields = qp->all_fields; }
+#line 257 "src/q_parser.y"
+    { qp_push_fields(qp, qp->all_fields, false); }
     break;
   case 27:
-#line 159 "src/q_parser.y"
-    {qp->fields = qp->def_fields;}
+#line 257 "src/q_parser.y"
+    { qp_pop_fields(qp); }
     break;
   case 28:
-#line 160 "src/q_parser.y"
+#line 258 "src/q_parser.y"
     { (yyval.query) = (yyvsp[(4) - (5)].query); }
     break;
   case 29:
-#line 162 "src/q_parser.y"
+#line 260 "src/q_parser.y"
     { (yyval.hashset) = first_field(qp, (yyvsp[(1) - (1)].str)); }
     break;
   case 30:
-#line 163 "src/q_parser.y"
+#line 261 "src/q_parser.y"
     { (yyval.hashset) = add_field(qp, (yyvsp[(3) - (3)].str));}
     break;
   case 31:
-#line 165 "src/q_parser.y"
+#line 263 "src/q_parser.y"
     { (yyval.query) = get_phrase_q(qp, (yyvsp[(2) - (3)].phrase), NULL); }
     break;
   case 32:
-#line 166 "src/q_parser.y"
+#line 264 "src/q_parser.y"
     { (yyval.query) = get_phrase_q(qp, (yyvsp[(2) - (5)].phrase), (yyvsp[(5) - (5)].str)); }
     break;
   case 33:
-#line 167 "src/q_parser.y"
+#line 265 "src/q_parser.y"
     { (yyval.query) = NULL; }
     break;
   case 34:
-#line 168 "src/q_parser.y"
+#line 266 "src/q_parser.y"
     { (yyval.query) = NULL; (void)(yyvsp[(4) - (4)].str);}
     break;
   case 35:
-#line 170 "src/q_parser.y"
+#line 268 "src/q_parser.y"
     { (yyval.phrase) = ph_first_word((yyvsp[(1) - (1)].str)); }
     break;
   case 36:
-#line 171 "src/q_parser.y"
+#line 269 "src/q_parser.y"
     { (yyval.phrase) = ph_first_word(NULL); }
     break;
   case 37:
-#line 172 "src/q_parser.y"
+#line 270 "src/q_parser.y"
     { (yyval.phrase) = ph_add_word((yyvsp[(1) - (2)].phrase), (yyvsp[(2) - (2)].str)); }
     break;
   case 38:
-#line 173 "src/q_parser.y"
+#line 271 "src/q_parser.y"
     { (yyval.phrase) = ph_add_word((yyvsp[(1) - (3)].phrase), NULL); }
     break;
   case 39:
-#line 174 "src/q_parser.y"
+#line 272 "src/q_parser.y"
     { (yyval.phrase) = ph_add_multi_word((yyvsp[(1) - (3)].phrase), (yyvsp[(3) - (3)].str));  }
     break;
   case 40:
-#line 176 "src/q_parser.y"
+#line 274 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str),  (yyvsp[(3) - (4)].str),  true,  true)); Y}
     break;
   case 41:
-#line 177 "src/q_parser.y"
+#line 275 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str),  (yyvsp[(3) - (4)].str),  true,  false)); Y}
     break;
   case 42:
-#line 178 "src/q_parser.y"
+#line 276 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str),  (yyvsp[(3) - (4)].str),  false, true)); Y}
     break;
   case 43:
-#line 179 "src/q_parser.y"
+#line 277 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str),  (yyvsp[(3) - (4)].str),  false, false)); Y}
     break;
   case 44:
-#line 180 "src/q_parser.y"
+#line 278 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (3)].str),  false, false)); Y}
     break;
   case 45:
-#line 181 "src/q_parser.y"
+#line 279 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (3)].str),  false, true)); Y}
     break;
   case 46:
-#line 182 "src/q_parser.y"
+#line 280 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (3)].str),  NULL,true,  false)); Y}
     break;
   case 47:
-#line 183 "src/q_parser.y"
+#line 281 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (3)].str),  NULL,false, false)); Y}
     break;
   case 48:
-#line 184 "src/q_parser.y"
+#line 282 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (2)].str),  false, false)); Y}
     break;
   case 49:
-#line 185 "src/q_parser.y"
+#line 283 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(3) - (3)].str),  false, true)); Y}
     break;
   case 50:
-#line 186 "src/q_parser.y"
+#line 284 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(3) - (3)].str),  NULL,true,  false)); Y}
     break;
   case 51:
-#line 187 "src/q_parser.y"
+#line 285 "src/q_parser.y"
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (2)].str),  NULL,false, false)); Y}
     break;
 /* Line 1267 of yacc.c.  */
-#line 1787 "y.tab.c"
+#line 1810 "src/q_parser.c"
       default: break;
     }
   YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1997,12 +2020,22 @@ yyreturn:
 }
-#line 189 "src/q_parser.y"
+#line 287 "src/q_parser.y"
-const char *special_char = "&:()[]{}!\"~^|<>=*?+-";
-const char *not_word =   " \t()[]{}!\"~^|<>=";
+static const char *special_char = "&:()[]{}!\"~^|<>=*?+-";
+static const char *not_word =   " \t()[]{}!\"~^|<>=";
+/**
+ * +get_word+ gets the next query-word from the query string. A query-word is
+ * basically a string of non-special or escaped special characters. It is
+ * Analyzer agnostic. It is up to the get_*_q methods to tokenize the word and
+ * turn it into a +Query+. See the documentation for each get_*_q method to
+ * see how it handles tokenization.
+ *
+ * Note that +get_word+ is also responsible for returning field names and
+ * matching the special tokens 'AND', 'NOT', 'REQ' and 'OR'.
+ */
 static int get_word(YYSTYPE *lvalp, QParser *qp)
 {
     bool is_wild = false;
@@ -2046,8 +2079,10 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
             default:
                 *bufp++ = c;
         }
-        /* we've exceeded the static buffer. switch to the dynamic
-           one. */
+        /* we've exceeded the static buffer. switch to the dynamic one. The
+         * dynamic buffer is allocated enough space to hold the whole query
+         * string so it's capacity doesn't need to be checked again once
+         * allocated. */
         if (!qp->dynbuf && ((bufp - buf) == MAX_WORD_SIZE)) {
             qp->dynbuf = ALLOC_AND_ZERO_N(char, strlen(qp->qstr) + 1);
             strncpy(qp->dynbuf, buf, MAX_WORD_SIZE);
@@ -2057,8 +2092,8 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
     }
 get_word_done:
     qp->qstrp--;
-    /* check for keywords. There are only four so we have a bit of a hack which
-     * just checks for all of them. */
+    /* check for keywords. There are only four so we have a bit of a hack
+     * which just checks for all of them. */
     *bufp = '\0';
     len = (int)(bufp - buf);
     if (qp->use_keywords) {
@@ -2078,6 +2113,33 @@ get_word_done:
     return QWRD;
 }
+/**
+ * +yylex+ is the lexing method called by the QueryParser. It breaks the
+ * query up into special characters;
+ *
+ *     ( "&:()[]{}!\"~^|<>=*?+-" )
+ *
+ * and tokens;
+ *
+ *   - QWRD
+ *   - WILD_STR
+ *   - AND['AND', '&&']
+ *   - OR['OR', '||']
+ *   - REQ['REQ', '+']
+ *   - NOT['NOT', '-', '~']
+ *
+ * QWRD tokens are query word tokens which are made up of characters other
+ * than the special characters. They can also contain special characters when
+ * escaped with a backslash '\'. WILD_STR is the same as QWRD except that it
+ * may also contain '?' and '*' characters.
+ *
+ * If any of the special chars are seen they will usually be returned straight
+ * away. The exceptions are the wild chars '*' and '?', and '&' which will be
+ * treated as a plain old word character unless followed by another '&'.
+ *
+ * If no special characters or tokens are found then yylex delegates to
+ * +get_word+ which will fetch the next query-word.
+ */
 static int yylex(YYSTYPE *lvalp, QParser *qp)
 {
     char c, nc;
@@ -2116,6 +2178,11 @@ static int yylex(YYSTYPE *lvalp, QParser *qp)
     return get_word(lvalp, qp);
 }
+/**
+ * yyerror gets called if there is an parse error with the yacc parser.
+ * It is responsible for clearing any memory that was allocated during the
+ * parsing process.
+ */
 static int yyerror(QParser *qp, char const *msg)
 {
     qp->destruct = true;
@@ -2131,19 +2198,30 @@ static int yyerror(QParser *qp, char const *msg)
                  "couldn't parse query ``%s''. Error message "
                  " was %s", buf, (char *)msg);
     }
+    while (qp->fields_top->next != NULL) {
+        qp_pop_fields(qp);
+    }
     return 0;
 }
 #define BQ(query) ((BooleanQuery *)(query))
-static TokenStream *get_cached_ts(QParser *qp, char *field, char *text)
+/**
+ * The QueryParser caches a tokenizer for each field so that it doesn't need
+ * to allocate a new tokenizer for each term in the query. This would be quite
+ * expensive as tokenizers use quite a large hunk of memory.
+ *
+ * This method returns the query parser for a particular field and sets it up
+ * with the text to be tokenized.
+ */
+static TokenStream *get_cached_ts(QParser *qp, Symbol field, char *text)
 {
     TokenStream *ts;
-    if (!qp->tokenized_fields || hs_exists(qp->tokenized_fields, field)) {
-        ts = h_get(qp->ts_cache, field);
+    if (hs_exists(qp->tokenized_fields, field)) {
+        ts = (TokenStream *)h_get(qp->ts_cache, field);
         if (!ts) {
             ts = a_get_ts(qp->analyzer, field, text);
-            h_set(qp->ts_cache, estrdup(field), ts);
+            h_set(qp->ts_cache, field, ts);
         }
         else {
             ts->reset(ts, text);
@@ -2156,16 +2234,11 @@ static TokenStream *get_cached_ts(QParser *qp, char *field, char *text)
     return ts;
 }
-static char *get_cached_field(HashTable *field_cache, const char *field)
-{
-    char *cached_field = h_get(field_cache, field);
-    if (!cached_field) {
-        cached_field = estrdup(field);
-        h_set(field_cache, cached_field, cached_field);
-    }
-    return cached_field;
-}
+/**
+ * Turns a BooleanClause array into a BooleanQuery. It will optimize the query
+ * if 0 or 1 clauses are present to NULL or the actual query in the clause
+ * respectively.
+ */
 static Query *get_bool_q(BCArray *bca)
 {
     Query *q;
@@ -2201,6 +2274,10 @@ static Query *get_bool_q(BCArray *bca)
     return q;
 }
+/**
+ * Base method for appending BooleanClauses to a BooleanClause array. This
+ * method doesn't care about the type of clause (MUST, SHOULD, MUST_NOT).
+ */
 static void bca_add_clause(BCArray *bca, BooleanClause *clause)
 {
     if (bca->size >= bca->capa) {
@@ -2211,6 +2288,10 @@ static void bca_add_clause(BCArray *bca, BooleanClause *clause)
     bca->size++;
 }
+/**
+ * Add the first clause to a BooleanClause array. This method is also
+ * responsible for allocating a new BooleanClause array.
+ */
 static BCArray *first_cls(BooleanClause *clause)
 {
     BCArray *bca = ALLOC_AND_ZERO(BCArray);
@@ -2222,6 +2303,12 @@ static BCArray *first_cls(BooleanClause *clause)
     return bca;
 }
+/**
+ * Add AND clause to the BooleanClause array. The means that it will set the
+ * clause being added and the previously added clause from SHOULD clauses to
+ * MUST clauses. (If they are currently MUST_NOT clauses they stay as they
+ * are.)
+ */
 static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
 {
     if (clause) {
@@ -2238,6 +2325,9 @@ static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
     return bca;
 }
+/**
+ * Add SHOULD clause to the BooleanClause array.
+ */
 static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
 {
     if (clause) {
@@ -2246,6 +2336,10 @@ static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
     return bca;
 }
+/**
+ * Add AND or OR clause to the BooleanClause array, depending on the default
+ * clause type.
+ */
 static BCArray *add_default_cls(QParser *qp, BCArray *bca,
                                 BooleanClause *clause)
 {
@@ -2258,6 +2352,9 @@ static BCArray *add_default_cls(QParser *qp, BCArray *bca,
     return bca;
 }
+/**
+ * destroy array of BooleanClauses
+ */
 static void bca_destroy(BCArray *bca)
 {
     int i;
@@ -2268,7 +2365,10 @@ static void bca_destroy(BCArray *bca)
     free(bca);
 }
-static BooleanClause *get_bool_cls(Query *q, unsigned int occur)
+/**
+ * Turn a query into a BooleanClause for addition to a BooleanQuery.
+ */
+static BooleanClause *get_bool_cls(Query *q, BCType occur)
 {
     if (q) {
         return bc_new(q, occur);
@@ -2278,7 +2378,15 @@ static BooleanClause *get_bool_cls(Query *q, unsigned int occur)
     }
 }
-static Query *get_term_q(QParser *qp, char *field, char *word)
+/**
+ * Create a TermQuery. The word will be tokenized and if the tokenization
+ * produces more than one token, a PhraseQuery will be returned. For example,
+ * if the word is dbalmain@gmail.com and a LetterTokenizer is used then a
+ * PhraseQuery "dbalmain gmail com" will be returned which is actually exactly
+ * what we want as it will match any documents containing the same email
+ * address and tokenized with the same tokenizer.
+ */
+static Query *get_term_q(QParser *qp, Symbol field, char *word)
 {
     Query *q;
     Token *token;
@@ -2311,7 +2419,13 @@ static Query *get_term_q(QParser *qp, char *field, char *word)
     return q;
 }
-static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
+/**
+ * Create a FuzzyQuery. The word will be tokenized and only the first token
+ * will be used. If there are any more tokens after tokenization, they will be
+ * ignored.
+ */
+static Query *get_fuzzy_q(QParser *qp, Symbol field, char *word,
+                          char *slop_str)
 {
     Query *q;
     Token *token;
@@ -2332,6 +2446,10 @@ static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
     return q;
 }
+/**
+ * Downcase a string taking locale into account and works for multibyte
+ * character sets.
+ */
 static char *lower_str(char *str)
 {
     const int max_len = (int)strlen(str) + 1;
@@ -2357,7 +2475,16 @@ static char *lower_str(char *str)
     return str;
 }
-static Query *get_wild_q(QParser *qp, char *field, char *pattern)
+/**
+ * Create a WildCardQuery. No tokenization will be performed on the pattern
+ * but the pattern will be downcased if +qp->wild_lower+ is set to true and
+ * the field in question is a tokenized field.
+ *
+ * Note: this method will not always return a WildCardQuery. It could be
+ * optimized to a MatchAllQuery if the pattern is '*' or a PrefixQuery if the
+ * only wild char (*, ?) in the pattern is a '*' at the end of the pattern.
+ */
+static Query *get_wild_q(QParser *qp, Symbol field, char *pattern)
 {
     Query *q;
     bool is_prefix = false;
@@ -2398,22 +2525,32 @@ static Query *get_wild_q(QParser *qp, char *field, char *pattern)
     return q;
 }
-static HashSet *add_field(QParser *qp, char *field)
+/**
+ * Adds another field to the top of the FieldStack.
+ */
+static HashSet *add_field(QParser *qp, const char *field_name)
 {
+    Symbol field = intern(field_name);
     if (qp->allow_any_fields || hs_exists(qp->all_fields, field)) {
-        hs_add(qp->fields, get_cached_field(qp->field_cache, field));
+        hs_add(qp->fields, field);
     }
     return qp->fields;
 }
-static HashSet *first_field(QParser *qp, char *field)
+/**
+ * The method gets called when a field modifier ("field1|field2:") is seen. It
+ * will push a new FieldStack object onto the stack and add +field+ to its
+ * fields set.
+ */
+static HashSet *first_field(QParser *qp, const char *field)
 {
-    qp->fields = qp->fields_buf;
-    qp->fields->size = 0;
-    h_clear(qp->fields->ht);
+    qp_push_fields(qp, hs_new_ptr(NULL), true);
     return add_field(qp, field);
 }
+/**
+ * Destroy a phrase object freeing all allocated memory.
+ */
 static void ph_destroy(Phrase *self)
 {
     int i;
@@ -2425,6 +2562,9 @@ static void ph_destroy(Phrase *self)
 }
+/**
+ * Allocate a new Phrase object
+ */
 static Phrase *ph_new()
 {
   Phrase *self = ALLOC_AND_ZERO(Phrase);
@@ -2433,6 +2573,10 @@ static Phrase *ph_new()
   return self;
 }
+/**
+ * Add the first word to the phrase. This method is also in charge of
+ * allocating a new Phrase object.
+ */
 static Phrase *ph_first_word(char *word)
 {
     Phrase *self = ph_new();
@@ -2444,6 +2588,9 @@ static Phrase *ph_first_word(char *word)
     return self;
 }
+/**
+ * Add a new word to the Phrase
+ */
 static Phrase *ph_add_word(Phrase *self, char *word)
 {
     if (word) {
@@ -2466,6 +2613,10 @@ static Phrase *ph_add_word(Phrase *self, char *word)
     return self;
 }
+/**
+ * Adds a word to the Phrase object in the same position as the previous word
+ * added to the Phrase. This will later be turned into a multi-PhraseQuery.
+ */
 static Phrase *ph_add_multi_word(Phrase *self, char *word)
 {
     const int index = self->size - 1;
@@ -2477,7 +2628,35 @@ static Phrase *ph_add_multi_word(Phrase *self, char *word)
     return self;
 }
-static Query *get_phrase_query(QParser *qp, char *field,
+/**
+ * Build a phrase query for a single field. It might seem like a better idea
+ * to build the PhraseQuery once and duplicate it for each field but this
+ * would be buggy in the case of PerFieldAnalyzers in which case a different
+ * tokenizer could be used for each field.
+ *
+ * Note that the query object returned by this method is not always a
+ * PhraseQuery. If there is only one term in the query then the query is
+ * simplified to a TermQuery. If there are multiple terms but only a single
+ * position, then a MultiTermQuery is retured.
+ *
+ * Note that each word in the query gets tokenized. Unlike get_term_q, if the
+ * word gets tokenized into more than one token, the rest of the tokens are
+ * ignored. For example, if you have the phrase;
+ *
+ *      "email: dbalmain@gmail.com"
+ *
+ * the Phrase object will contain to positions with the words 'email:' and
+ * 'dbalmain@gmail.com'. Now, if you are using a LetterTokenizer then the
+ * second word will be tokenized into the tokens ['dbalmain', 'gmail', 'com']
+ * and only the first token will be used, so the resulting phrase query will
+ * actually look like this;
+ *
+ *      "email dbalmain"
+ *
+ * This problem can easily be solved by using the StandardTokenizer or any
+ * custom tokenizer which will leave dbalmain@gmail.com as a single token.
+ */
+static Query *get_phrase_query(QParser *qp, Symbol field,
                                Phrase *phrase, char *slop_str)
 {
     const int pos_cnt = phrase->size;
@@ -2497,13 +2676,14 @@ static Query *get_phrase_query(QParser *qp, char *field,
             for (i = 0; i < word_count; i++) {
                 token = ts_next(get_cached_ts(qp, field, words[i]));
-                free(words[i]);
                 if (token) {
+                    free(words[i]);
                     last_word = words[i] = estrdup(token->text);
                     ++term_cnt;
                 }
                 else {
-                    words[i] = estrdup("");
+                    /* empty words will later be ignored */
+                    words[i][0] = '\0';
                 }
             }
@@ -2517,6 +2697,7 @@ static Query *get_phrase_query(QParser *qp, char *field,
                 default:
                     q = multi_tq_new_conf(field, term_cnt, 0.0);
                     for (i = 0; i < word_count; i++) {
+                        /* ignore empty words */
                         if (words[i][0]) {
                             multi_tq_add_term(q, words[i]);
                         }
@@ -2582,19 +2763,31 @@ static Query *get_phrase_query(QParser *qp, char *field,
     return q;
 }
+/**
+ * Get a phrase query from the Phrase object. The Phrase object is built up by
+ * the query parser as the all PhraseQuery didn't work well for this. Once the
+ * PhraseQuery has been built the Phrase object needs to be destroyed.
+ */
 static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
 {
-    Query *q = NULL;
+    Query *volatile q = NULL;
     FLDS(q, get_phrase_query(qp, field, phrase, slop_str));
     ph_destroy(phrase);
     return q;
 }
-static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
+/**
+ * Gets a RangeQuery object.
+ *
+ * Just like with WildCardQuery, RangeQuery needs to downcase its terms if the
+ * tokenizer also downcased its terms.
+ */
+static Query *get_r_q(QParser *qp, Symbol field, char *from, char *to,
                       bool inc_lower, bool inc_upper)
 {
     Query *rq;
-    if (qp->wild_lower) {
+    if (qp->wild_lower
+        && (!qp->tokenized_fields || hs_exists(qp->tokenized_fields, field))) {
         if (from) {
             lower_str(from);
         }
@@ -2603,6 +2796,9 @@ static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
         }
     }
 /*
+ * terms don't get tokenized as it doesn't really make sense to do so for
+ * range queries.
     if (from) {
         TokenStream *stream = get_cached_ts(qp, field, from);
         Token *token = ts_next(stream);
@@ -2615,34 +2811,81 @@ static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
     }
 */
-    rq = rq_new(field, from, to, inc_lower, inc_upper);
+    rq = qp->use_typed_range_query ?
+        trq_new(field, from, to, inc_lower, inc_upper) :
+        rq_new(field, from, to, inc_lower, inc_upper);
     return rq;
 }
-void qp_destroy(QParser *self)
+/**
+ * Every time the query parser sees a new field modifier ("field1|field2:")
+ * it pushes a new FieldStack object onto the stack and sets its fields to the
+ * fields specified in the fields modifier. If the field modifier is '*',
+ * fs->fields is set to all_fields. fs->fields is set to +qp->def_field+ at
+ * the bottom of the stack (ie the very first set of fields pushed onto the
+ * stack).
+ */
+static void qp_push_fields(QParser *self, HashSet *fields, bool destroy)
 {
-    if (self->close_def_fields) {
-        hs_destroy(self->def_fields);
+    FieldStack *fs = ALLOC(FieldStack);
+    fs->next    = self->fields_top;
+    fs->fields  = fields;
+    fs->destroy = destroy;
+    self->fields_top = fs;
+    self->fields = fields;
+}
+/**
+ * Pops the top of the fields stack and frees any memory used by it. This will
+ * get called when query modified by a field modifier ("field1|field2:") has
+ * been fully parsed and the field specifier no longer applies.
+ */
+static void qp_pop_fields(QParser *self)
+{
+    FieldStack *fs = self->fields_top;
+    if (fs->destroy) {
+        hs_destroy(fs->fields);
+    }
+    self->fields_top = fs->next;
+    if (self->fields_top) {
+        self->fields = self->fields_top->fields;
     }
-    if (self->tokenized_fields) {
+    free(fs);
+}
+/**
+ * Free all memory allocated by the QueryParser.
+ */
+void qp_destroy(QParser *self)
+{
+    if (self->tokenized_fields != self->all_fields) {
         hs_destroy(self->tokenized_fields);
     }
-    if (self->dynbuf) {
-        free(self->dynbuf);
+    if (self->def_fields != self->all_fields) {
+        hs_destroy(self->def_fields);
     }
     hs_destroy(self->all_fields);
-    hs_destroy(self->fields_buf);
-    h_destroy(self->field_cache);
+    qp_pop_fields(self);
+    assert(NULL == self->fields_top);
     h_destroy(self->ts_cache);
     tk_destroy(self->non_tokenizer);
     a_deref(self->analyzer);
     free(self);
 }
-QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
-                HashSet *tokenized_fields, Analyzer *analyzer)
+/**
+ * Creates a new QueryParser setting all boolean parameters to their defaults.
+ * If +def_fields+ is NULL then +all_fields+ is used in place of +def_fields+.
+ * Not also that this method ensures that all fields that exist in
+ * +def_fields+ must also exist in +all_fields+. This should make sense.
+ */
+QParser *qp_new(Analyzer *analyzer)
 {
-    int i;
     QParser *self = ALLOC(QParser);
     self->or_default = true;
     self->wild_lower = true;
@@ -2651,48 +2894,66 @@ QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
     self->handle_parse_errors = false;
     self->allow_any_fields = false;
     self->use_keywords = true;
+    self->use_typed_range_query = false;
     self->def_slop = 0;
-    self->fields_buf = hs_new_str(NULL);
-    self->all_fields = all_fields;
-    self->tokenized_fields = tokenized_fields;
-    if (def_fields) {
-        self->def_fields = def_fields;
-        for (i = 0; i < self->def_fields->size; i++) {
-            if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
-                hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
-            }
-        }
-        self->close_def_fields = true;
-    }
-    else {
-        self->def_fields = all_fields;
-        self->close_def_fields = false;
-    }
-    self->field_cache = h_new_str((free_ft)NULL, &free);
-    for (i = 0; i < self->all_fields->size; i++) {
-        char *field = estrdup(self->all_fields->elems[i]);
-        h_set(self->field_cache, field, field);
-    }
-    self->fields = self->def_fields;
+    self->tokenized_fields = hs_new_ptr(NULL);
+    self->all_fields = hs_new_ptr(NULL);
+    self->def_fields = hs_new_ptr(NULL);
+    self->fields_top = NULL;
+    qp_push_fields(self, self->def_fields, false);
     /* make sure all_fields contains the default fields */
     self->analyzer = analyzer;
-    self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
+    self->ts_cache = h_new_ptr((free_ft)&ts_deref);
     self->buf_index = 0;
-    self->dynbuf = 0;
+    self->dynbuf = NULL;
     self->non_tokenizer = non_tokenizer_new();
     mutex_init(&self->mutex, NULL);
     return self;
 }
+void qp_add_field(QParser *self,
+                  Symbol field,
+                  bool is_default,
+                  bool is_tokenized)
+{
+    hs_add(self->all_fields, field);
+    if (is_default) {
+        hs_add(self->def_fields, field);
+    }
+    if (is_tokenized) {
+        hs_add(self->tokenized_fields, field);
+    }
+}
 /* these chars have meaning within phrases */
 static const char *PHRASE_CHARS = "<>|\"";
-static void str_insert(char *str, int len, char chr)
+/**
+ * +str_insert_char+ inserts a character at the beginning of a string by
+ * shifting the rest of the string right.
+ */
+static void str_insert_char(char *str, int len, char chr)
 {
     memmove(str+1, str, len*sizeof(char));
     *str = chr;
 }
+/**
+ * +qp_clean_str+ basically scans the query string and ensures that all open
+ * and close parentheses '()' and quotes '"' are balanced. It does this by
+ * inserting or appending extra parentheses or quotes to the string. This
+ * obviously won't necessarily be exactly what the user wanted but we are
+ * never going to know that anyway. The main job of this method is to help the
+ * query at least parse correctly.
+ *
+ * It also checks that all special characters within phrases (ie between
+ * quotes) are escaped correctly unless they have meaning within a phrase
+ * ( <>,|," ). Note that '<' and '>' will also be escaped unless the appear
+ * together like so; '<>'.
+ */
 char *qp_clean_str(char *str)
 {
     int b, pb = -1;
@@ -2711,8 +2972,8 @@ char *qp_clean_str(char *str)
                 *nsp++ = '\\'; /* this was left off the first time through */
             }
             *nsp++ = b;
-            /* \\ has escaped itself so has no power. Assign pb random char : */
-            pb = ((b == '\\') ? ':' : b);
+            /* \ has escaped itself so has no power. Assign pb random char 'r' */
+            pb = ((b == '\\') ? 'r' : b);
             continue;
         }
         switch (b) {
@@ -2737,7 +2998,7 @@ char *qp_clean_str(char *str)
             case ')':
                 if (!quote_open) {
                     if (br_cnt == 0) {
-                        str_insert(new_str, (int)(nsp - new_str), '(');
+                        str_insert_char(new_str, (int)(nsp - new_str), '(');
                         nsp++;
                     }
                     else {
@@ -2782,18 +3043,35 @@ char *qp_clean_str(char *str)
     return new_str;
 }
-Query *qp_get_bad_query(QParser *qp, char *str)
+/**
+ * Takes a string and finds whatever tokens it can using the QueryParser's
+ * analyzer. It then turns these tokens (if any) into a boolean query. If it
+ * fails to find any tokens, this method will return NULL.
+ */
+static Query *qp_get_bad_query(QParser *qp, char *str)
 {
     Query *volatile q = NULL;
     qp->recovering = true;
+    assert(qp->fields_top->next == NULL);
     FLDS(q, get_term_q(qp, field, str));
     return q;
 }
+/**
+ * +qp_parse+ takes a string and turns it into a Query object using Ferret's
+ * query language. It must either raise an error or return a query object. It
+ * must not return NULL. If the yacc parser fails it will use a very basic
+ * boolean query parser which takes whatever tokens it can find in the query
+ * and terns them into a boolean query on the default fields.
+ */
 Query *qp_parse(QParser *self, char *qstr)
 {
     Query *result = NULL;
     mutex_lock(&self->mutex);
+    /* if qp->fields_top->next is not NULL we have a left over field-stack
+     * object that was not popped during the last query parse */
+    assert(NULL == self->fields_top->next);
     self->recovering = self->destruct = false;
     if (self->clean_str) {
         self->qstrp = self->qstr = qp_clean_str(qstr);