RubyGems - ferret - Versions diffs - 0.11.6 → 0.11.8.4 - Mend

ferret 0.11.6 → 0.11.8.4

Files changed (185) hide show

data/README +10 -22
data/RELEASE_CHANGES +137 -0
data/RELEASE_NOTES +60 -0
data/Rakefile +379 -274
data/TODO +100 -8
data/bin/ferret-browser +0 -0
data/ext/BZLIB_blocksort.c +1094 -0
data/ext/BZLIB_bzlib.c +1578 -0
data/ext/BZLIB_compress.c +672 -0
data/ext/BZLIB_crctable.c +104 -0
data/ext/BZLIB_decompress.c +626 -0
data/ext/BZLIB_huffman.c +205 -0
data/ext/BZLIB_randtable.c +84 -0
data/ext/{api.c → STEMMER_api.c} +7 -10
data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
data/ext/analysis.c +276 -121
data/ext/analysis.h +190 -143
data/ext/api.h +3 -4
data/ext/array.c +5 -3
data/ext/array.h +52 -43
data/ext/bitvector.c +38 -482
data/ext/bitvector.h +446 -124
data/ext/bzlib.h +282 -0
data/ext/bzlib_private.h +503 -0
data/ext/compound_io.c +23 -22
data/ext/config.h +21 -11
data/ext/document.c +43 -40
data/ext/document.h +31 -21
data/ext/except.c +20 -38
data/ext/except.h +89 -76
data/ext/extconf.rb +3 -2
data/ext/ferret.c +49 -35
data/ext/ferret.h +14 -11
data/ext/field_index.c +262 -0
data/ext/field_index.h +52 -0
data/ext/filter.c +11 -10
data/ext/fs_store.c +65 -47
data/ext/global.c +245 -165
data/ext/global.h +252 -54
data/ext/hash.c +200 -243
data/ext/hash.h +205 -163
data/ext/hashset.c +118 -96
data/ext/hashset.h +110 -82
data/ext/header.h +19 -19
data/ext/helper.c +11 -10
data/ext/helper.h +14 -6
data/ext/index.c +745 -366
data/ext/index.h +503 -529
data/ext/internal.h +1020 -0
data/ext/lang.c +10 -0
data/ext/lang.h +35 -15
data/ext/mempool.c +5 -4
data/ext/mempool.h +30 -22
data/ext/modules.h +35 -7
data/ext/multimapper.c +43 -2
data/ext/multimapper.h +32 -23
data/ext/posh.c +0 -0
data/ext/posh.h +4 -38
data/ext/priorityqueue.c +10 -12
data/ext/priorityqueue.h +33 -21
data/ext/q_boolean.c +22 -9
data/ext/q_const_score.c +3 -2
data/ext/q_filtered_query.c +15 -12
data/ext/q_fuzzy.c +147 -135
data/ext/q_match_all.c +3 -2
data/ext/q_multi_term.c +28 -32
data/ext/q_parser.c +451 -173
data/ext/q_phrase.c +158 -79
data/ext/q_prefix.c +16 -18
data/ext/q_range.c +363 -31
data/ext/q_span.c +130 -141
data/ext/q_term.c +21 -21
data/ext/q_wildcard.c +19 -23
data/ext/r_analysis.c +369 -242
data/ext/r_index.c +421 -434
data/ext/r_qparser.c +142 -92
data/ext/r_search.c +790 -407
data/ext/r_store.c +44 -44
data/ext/r_utils.c +264 -96
data/ext/ram_store.c +29 -23
data/ext/scanner.c +895 -0
data/ext/scanner.h +36 -0
data/ext/scanner_mb.c +6701 -0
data/ext/scanner_utf8.c +4415 -0
data/ext/search.c +210 -87
data/ext/search.h +556 -488
data/ext/similarity.c +17 -16
data/ext/similarity.h +51 -44
data/ext/sort.c +157 -354
data/ext/stem_ISO_8859_1_hungarian.h +16 -0
data/ext/stem_ISO_8859_2_romanian.h +16 -0
data/ext/stem_UTF_8_hungarian.h +16 -0
data/ext/stem_UTF_8_romanian.h +16 -0
data/ext/stem_UTF_8_turkish.h +16 -0
data/ext/stopwords.c +287 -278
data/ext/store.c +57 -51
data/ext/store.h +308 -286
data/ext/symbol.c +10 -0
data/ext/symbol.h +23 -0
data/ext/term_vectors.c +14 -293
data/ext/threading.h +22 -22
data/ext/win32.h +12 -4
data/lib/ferret.rb +2 -1
data/lib/ferret/browser.rb +1 -1
data/lib/ferret/field_symbol.rb +94 -0
data/lib/ferret/index.rb +221 -34
data/lib/ferret/number_tools.rb +6 -6
data/lib/ferret/version.rb +3 -0
data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
data/test/test_helper.rb +7 -2
data/test/test_installed.rb +1 -0
data/test/threading/thread_safety_index_test.rb +10 -1
data/test/threading/thread_safety_read_write_test.rb +4 -7
data/test/threading/thread_safety_test.rb +0 -0
data/test/unit/analysis/tc_analyzer.rb +29 -27
data/test/unit/analysis/tc_token_stream.rb +23 -16
data/test/unit/index/tc_index.rb +116 -11
data/test/unit/index/tc_index_reader.rb +27 -27
data/test/unit/index/tc_index_writer.rb +10 -0
data/test/unit/index/th_doc.rb +38 -21
data/test/unit/search/tc_filter.rb +31 -10
data/test/unit/search/tc_index_searcher.rb +6 -0
data/test/unit/search/tm_searcher.rb +53 -1
data/test/unit/store/tc_fs_store.rb +40 -2
data/test/unit/store/tc_ram_store.rb +0 -0
data/test/unit/store/tm_store.rb +0 -0
data/test/unit/store/tm_store_lock.rb +7 -6
data/test/unit/tc_field_symbol.rb +26 -0
data/test/unit/ts_analysis.rb +0 -0
data/test/unit/ts_index.rb +0 -0
data/test/unit/ts_store.rb +0 -0
data/test/unit/ts_utils.rb +0 -0
data/test/unit/utils/tc_number_tools.rb +0 -0
data/test/utils/content_generator.rb +226 -0
metadata +262 -221
data/ext/inc/lang.h +0 -48
data/ext/inc/threading.h +0 -31
data/ext/stem_ISO_8859_1_english.c +0 -1156
data/ext/stem_ISO_8859_1_french.c +0 -1276
data/ext/stem_ISO_8859_1_italian.c +0 -1091
data/ext/stem_ISO_8859_1_norwegian.c +0 -296
data/ext/stem_ISO_8859_1_spanish.c +0 -1119
data/ext/stem_ISO_8859_1_swedish.c +0 -307
data/ext/stem_UTF_8_danish.c +0 -344
data/ext/stem_UTF_8_english.c +0 -1176
data/ext/stem_UTF_8_french.c +0 -1296
data/ext/stem_UTF_8_italian.c +0 -1113
data/ext/stem_UTF_8_norwegian.c +0 -302
data/ext/stem_UTF_8_portuguese.c +0 -1055
data/ext/stem_UTF_8_russian.c +0 -709
data/ext/stem_UTF_8_spanish.c +0 -1137
data/ext/stem_UTF_8_swedish.c +0 -313
data/lib/ferret_version.rb +0 -3

data/ext/q_phrase.c CHANGED

@@ -2,9 +2,18 @@
 #include <limits.h>
 #include "search.h"
 #include "array.h"
+#include "symbol.h"
+#include "internal.h"
 #define PhQ(query) ((PhraseQuery *)(query))
+/**
+ * Use to sort the phrase positions into positional order. For phrase
+ * positions matching at the same position (a very unusual case) we order by
+ * first terms. The only real reason for the sorting by first terms is to get
+ * consistant order of positions when testing. Functionally it makes no
+ * difference.
+ */
 static int phrase_pos_cmp(const void *p1, const void *p2)
 {
     int pos1 = ((PhrasePosition *)p1)->pos;
@@ -43,6 +52,8 @@ typedef struct PhPos
 static bool pp_next(PhPos *self)
 {
     TermDocEnum *tpe = self->tpe;
+    assert(tpe);
     if (!tpe->next(tpe)) {
         tpe->close(tpe);            /* close stream */
         self->tpe = NULL;
@@ -57,6 +68,8 @@ static bool pp_next(PhPos *self)
 static bool pp_skip_to(PhPos *self, int doc_num)
 {
     TermDocEnum *tpe = self->tpe;
+    assert(tpe);
     if (!tpe->skip_to(tpe, doc_num)) {
         tpe->close(tpe);            /* close stream */
         self->tpe = NULL;
@@ -114,19 +127,15 @@ static int pp_pos_cmp(const void *const p1, const void *const p2)
 static bool pp_less_than(const PhPos *pp1, const PhPos *pp2)
 {
-    /* docs will all be equal when this method is used */
-    return pp1->position < pp2->position;
-    /*
-    if (PP(p)->doc == PP(p)->doc) {
-        return PP(p)->position < PP(p)->position;
+    if (pp1->position == pp2->position) {
+        return pp1->offset < pp2->offset;
     }
     else {
-        return PP(p)->doc < PP(p)->doc;
+        return pp1->position < pp2->position;
     }
-    */
 }
-void pp_destroy(PhPos *pp)
+static void pp_destroy(PhPos *pp)
 {
     if (pp->tpe) {
         pp->tpe->close(pp->tpe);
@@ -134,7 +143,7 @@ void pp_destroy(PhPos *pp)
     free(pp);
 }
-PhPos *pp_new(TermDocEnum *tpe, int offset)
+static PhPos *pp_new(TermDocEnum *tpe, int offset)
 {
     PhPos *self = ALLOC(PhPos);
@@ -165,6 +174,7 @@ typedef struct PhraseScorer
     int     slop;
     bool    first_time : 1;
     bool    more : 1;
+    bool    check_repeats : 1;
 } PhraseScorer;
 static void phsc_init(PhraseScorer *phsc)
@@ -232,7 +242,7 @@ static float phsc_score(Scorer *self)
     /* normalize */
     return raw_score * sim_decode_norm(
         self->similarity,
-        phsc->norms[phsc->phrase_pos[phsc->pp_first_idx]->doc]);
+        phsc->norms[self->doc]);
 }
 static bool phsc_next(Scorer *self)
@@ -276,8 +286,8 @@ static Explanation *phsc_explain(Scorer *self, int doc_num)
     phsc_skip_to(self, doc_num);
-    phrase_freq = (self->doc == doc_num) ? phsc->freq : (float)0.0;
-    return expl_new(sim_tf(self->similarity, phrase_freq),
+    phrase_freq = (self->doc == doc_num) ? phsc->freq : 0.0f;
+    return expl_new(sim_tf(self->similarity, phrase_freq),
                     "tf(phrase_freq=%f)", phrase_freq);
 }
@@ -292,12 +302,17 @@ static void phsc_destroy(Scorer *self)
     scorer_destroy_i(self);
 }
-static Scorer *phsc_new(Weight *weight, TermDocEnum **term_pos_enum,
+static Scorer *phsc_new(Weight *weight,
+                        TermDocEnum **term_pos_enum,
                         PhrasePosition *positions, int pos_cnt,
-                        Similarity *similarity, uchar *norms)
+                        Similarity *similarity,
+                        uchar *norms,
+                        int slop)
 {
     int i;
     Scorer *self                = scorer_new(PhraseScorer, similarity);
+    HashSet *term_set           = NULL;
     PhSc(self)->weight          = weight;
     PhSc(self)->norms           = norms;
@@ -305,14 +320,34 @@ static Scorer *phsc_new(Weight *weight, TermDocEnum **term_pos_enum,
     PhSc(self)->phrase_pos      = ALLOC_N(PhPos *, pos_cnt);
     PhSc(self)->pp_first_idx    = 0;
     PhSc(self)->pp_cnt          = pos_cnt;
-    PhSc(self)->slop            = 0;
+    PhSc(self)->slop            = slop;
     PhSc(self)->first_time      = true;
     PhSc(self)->more            = true;
+    PhSc(self)->check_repeats   = false;
+    if (slop) {
+        term_set = hs_new_str((free_ft)NULL);
+    }
     for (i = 0; i < pos_cnt; i++) {
+        /* check for repeats */
+        if (slop && !PhSc(self)->check_repeats) {
+            char **terms = positions[i].terms;
+            const int t_cnt = ary_size(terms);
+            int j;
+            for (j = 0; j < t_cnt; j++) {
+                if (hs_add(term_set, terms[j])) {
+                    PhSc(self)->check_repeats = true;
+                    break;
+                }
+            }
+        }
         PhSc(self)->phrase_pos[i] = pp_new(term_pos_enum[i], positions[i].pos);
     }
+    if (slop) {
+        hs_destroy(term_set);
+    }
     self->score     = &phsc_score;
     self->next      = &phsc_next;
     self->skip_to   = &phsc_skip_to;
@@ -363,7 +398,7 @@ static float ephsc_phrase_freq(Scorer *self)
         freq += 1.0; /* all equal: a match */
     } while (pp_next_position(last));
-    /* maintain first position */
+    /* maintain first position */
     phsc->pp_first_idx = pp_first_idx;
     return freq;
 }
@@ -373,8 +408,13 @@ static Scorer *exact_phrase_scorer_new(Weight *weight,
                                        PhrasePosition *positions, int pp_cnt,
                                        Similarity *similarity, uchar *norms)
 {
-    Scorer *self =
-        phsc_new(weight, term_pos_enum, positions, pp_cnt, similarity, norms);
+    Scorer *self = phsc_new(weight,
+                            term_pos_enum,
+                            positions,
+                            pp_cnt,
+                            similarity,
+                            norms,
+                            0);
     PhSc(self)->phrase_freq = &ephsc_phrase_freq;
     return self;
@@ -384,6 +424,33 @@ static Scorer *exact_phrase_scorer_new(Weight *weight,
  * SloppyPhraseScorer
  ***************************************************************************/
+static bool sphsc_check_repeats(PhPos *pp,
+                                PhPos **positions,
+                                const int p_cnt)
+{
+    int j;
+    for (j = 0; j < p_cnt; j++) {
+        PhPos *ppj = positions[j];
+        /* If offsets are equal, either we are at the current PhPos +pp+ or
+         * +pp+ and +ppj+ are supposed to match in the same position in which
+         * case we don't need to check. */
+        if (ppj->offset == pp->offset) {
+            continue;
+        }
+        /* the two phrase positions are matching on the same term
+         * which we want to avoid */
+        if ((ppj->position + ppj->offset) == (pp->position + pp->offset)) {
+            if (!pp_next_position(pp)) {
+                /* We have no matches for this document */
+                return false;
+            }
+            /* we changed the position so we need to start check again */
+            j = -1;
+        }
+    }
+    return true;
+}
 static float sphsc_phrase_freq(Scorer *self)
 {
     PhraseScorer *phsc = PhSc(self);
@@ -393,11 +460,21 @@ static float sphsc_phrase_freq(Scorer *self)
     int last_pos = 0, pos, next_pos, start, match_length, i;
     bool done = false;
+    bool check_repeats = phsc->check_repeats;
     float freq = 0.0;
     for (i = 0; i < pp_cnt; i++) {
+        bool res;
         pp = phsc->phrase_pos[i];
-        pp_first_position(pp);
+        /* we should always have at least one position or this functions
+         * shouldn't have been called. */
+        res = pp_first_position(pp);
+        assert(res);(void)res;
+        if (check_repeats && i > 0) {
+            if (!sphsc_check_repeats(pp, phsc->phrase_pos, i - 1)) {
+                goto return_freq;
+            }
+        }
         if (pp->position > last_pos) {
             last_pos = pp->position;
         }
@@ -405,13 +482,15 @@ static float sphsc_phrase_freq(Scorer *self)
     }
     do {
-        pp = pq_pop(pq);
+        pp = (PhPos *)pq_pop(pq);
         pos = start = pp->position;
         next_pos = PP(pq_top(pq))->position;
         while (pos <= next_pos) {
             start = pos;        /* advance pp to min window */
-            if (!pp_next_position(pp)) {
-                done = true;    /* ran out of a positions for a term - done */
+            if (!pp_next_position(pp)
+                || (check_repeats
+                    && !sphsc_check_repeats(pp, phsc->phrase_pos, pp_cnt))) {
+                done = true;
                 break;
             }
             pos = pp->position;
@@ -429,6 +508,8 @@ static float sphsc_phrase_freq(Scorer *self)
         pq_push(pq, pp);        /* restore pq */
     } while (!done);
+return_freq:
     pq_destroy(pq);
     return freq;
 }
@@ -439,10 +520,14 @@ static Scorer *sloppy_phrase_scorer_new(Weight *weight,
                                         int pp_cnt, Similarity *similarity,
                                         int slop, uchar *norms)
 {
-    Scorer *self =
-        phsc_new(weight, term_pos_enum, positions, pp_cnt, similarity, norms);
+    Scorer *self = phsc_new(weight,
+                            term_pos_enum,
+                            positions,
+                            pp_cnt,
+                            similarity,
+                            norms,
+                            slop);
-    PhSc(self)->slop        = slop;
     PhSc(self)->phrase_freq = &sphsc_phrase_freq;
     return self;
 }
@@ -467,7 +552,7 @@ static Scorer *phw_scorer(Weight *self, IndexReader *ir)
     PhrasePosition *positions = phq->positions;
     const int pos_cnt = phq->pos_cnt;
     const int field_num = fis_get_field_num(ir->fis, phq->field);
     if (pos_cnt == 0 || field_num < 0) {
         return NULL;
     }
@@ -484,15 +569,8 @@ static Scorer *phw_scorer(Weight *self, IndexReader *ir)
         else {
             tps[i] = mtdpe_new(ir, field_num, terms, t_cnt);
         }
-        if (tps[i] == NULL) {
-            /* free everything we just created and return NULL */
-            int j;
-            for (j = 0; j < i; j++) {
-                tps[i]->close(tps[i]);
-            }
-            free(tps);
-            return NULL;
-        }
+        /* neither mtdpe_new nor ir->term_positions should return NULL */
+        assert(NULL != tps[i]);
     }
     if (phq->slop == 0) {       /* optimize exact (common) case */
@@ -509,7 +587,7 @@ static Scorer *phw_scorer(Weight *self, IndexReader *ir)
     return phsc;
 }
-Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
+static Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
 {
     Explanation *expl;
     Explanation *idf_expl1;
@@ -530,12 +608,13 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
     char *doc_freqs = NULL;
     size_t len = 0, pos = 0;
     const int field_num = fis_get_field_num(ir->fis, phq->field);
+    const char *field = S(phq->field);
     if (field_num < 0) {
-        return expl_new(0.0, "field \"%s\" does not exist in the index", phq->field);
+        return expl_new(0.0, "field \"%s\" does not exist in the index", field);
     }
-    query_str = self->query->to_s(self->query, "");
+    query_str = self->query->to_s(self->query, NULL);
     expl = expl_new(0.0, "weight(%s in %d), product of:", query_str, doc_num);
@@ -554,16 +633,15 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
         const int t_cnt = ary_size(terms);
         for (j = 0; j < t_cnt; j++) {
             char *term = terms[j];
-            sprintf(doc_freqs + pos, "%s=%d, ",
-                    term, ir->doc_freq(ir, field_num, term));
-            pos += strlen(doc_freqs + pos);
+            pos += sprintf(doc_freqs + pos, "%s=%d, ",
+                           term, ir->doc_freq(ir, field_num, term));
         }
     }
     pos -= 2; /* remove ", " from the end */
     doc_freqs[pos] = 0;
-    idf_expl1 = expl_new(self->idf, "idf(%s:<%s>)", phq->field, doc_freqs);
-    idf_expl2 = expl_new(self->idf, "idf(%s:<%s>)", phq->field, doc_freqs);
+    idf_expl1 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs);
+    idf_expl2 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs);
     free(doc_freqs);
     /* explain query weight */
@@ -597,7 +675,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
         ? sim_decode_norm(self->similarity, field_norms[doc_num])
         : (float)0.0;
     field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
-                               phq->field, doc_num);
+                               field, doc_num);
     expl_add_detail(field_expl, field_norm_expl);
@@ -644,7 +722,7 @@ typedef struct TVPosEnum
     int size;
     int offset;
     int pos;
-    int positions[];
+    int positions[1];
 } TVPosEnum;
 static bool tvpe_next(TVPosEnum *self)
@@ -684,8 +762,7 @@ static bool tvpe_lt(TVPosEnum *tvpe1, TVPosEnum *tvpe2)
 static TVPosEnum *tvpe_new(int *positions, int size, int offset)
 {
-    TVPosEnum *self = (TVPosEnum *)emalloc(sizeof(TVPosEnum)
-                                           + size * sizeof(int));
+    TVPosEnum *self = (TVPosEnum*)emalloc(sizeof(TVPosEnum) + size*sizeof(int));
     memcpy(self->positions, positions, size * sizeof(int));
     self->size = size;
     self->offset = offset;
@@ -705,13 +782,11 @@ static TVPosEnum *tvpe_new_merge(char **terms, int t_cnt, TermVector *tv,
         TVTerm *tv_term = tv_get_tv_term(tv, terms[i]);
         if (tv_term) {
             TVPosEnum *tvpe = tvpe_new(tv_term->positions, tv_term->freq, 0);
-            if (tvpe_next(tvpe)) {
-                pq_push(tvpe_pq, tvpe);
-                total_positions += tv_term->freq;
-            }
-            else {
-                free(tvpe);
-            }
+            /* got tv_term so tvpe_next should always return true once here */
+            bool res = tvpe_next(tvpe);
+            assert(res);(void)res;
+            pq_push(tvpe_pq, tvpe);
+            total_positions += tv_term->freq;
         }
     }
     if (tvpe_pq->size == 0) {
@@ -759,7 +834,7 @@ static TVPosEnum *get_tvpe(TermVector *tv, char **terms, int t_cnt, int offset)
 static MatchVector *phq_get_matchv_i(Query *self, MatchVector *mv,
                                      TermVector *tv)
 {
-    if (strcmp(tv->field, PhQ(self)->field) == 0) {
+    if (tv->field == PhQ(self)->field) {
         const int pos_cnt = PhQ(self)->pos_cnt;
         int i;
         int slop = PhQ(self)->slop;
@@ -785,7 +860,7 @@ static MatchVector *phq_get_matchv_i(Query *self, MatchVector *mv,
                 }
             }
             while (! done) {
-                TVPosEnum *tvpe = pq_pop(tvpe_pq);
+                TVPosEnum *tvpe = (TVPosEnum *)pq_pop(tvpe_pq);
                 int pos;
                 int start = pos = tvpe->pos;
                 int next_pos = ((TVPosEnum *)pq_top(tvpe_pq))->pos;
@@ -840,7 +915,7 @@ static MatchVector *phq_get_matchv_i(Query *self, MatchVector *mv,
             first = tvpe_a[0];
             last = tvpe_a[pos_cnt - 1];
             while (!done) {
                 while (first->pos < last->pos) {
                     if (tvpe_skip_to(first, last->pos)) {
@@ -855,7 +930,7 @@ static MatchVector *phq_get_matchv_i(Query *self, MatchVector *mv,
                 }
                 if (!done) {
                     matchv_add(mv, tvpe_a[0]->pos + tvpe_a[0]->offset,
-                               tvpe_a[pos_cnt-1]->pos + tvpe_a[pos_cnt-1]->offset);
+                               tvpe_a[pos_cnt-1]->pos + tvpe_a[pos_cnt-1]->offset);
                 }
                 if (!tvpe_next(last)) {
                     done = true;
@@ -887,19 +962,21 @@ static void phq_extract_terms(Query *self, HashSet *term_set)
     }
 }
-static char *phq_to_s(Query *self, const char *field)
+static char *phq_to_s(Query *self, Symbol default_field)
 {
     PhraseQuery *phq = PhQ(self);
     const int pos_cnt = phq->pos_cnt;
     PhrasePosition *positions = phq->positions;
+    const char *field = S(phq->field);
+    int flen = strlen(field);
     int i, j, buf_index = 0, pos, last_pos;
     size_t len = 0;
     char *buffer;
     if (phq->pos_cnt == 0) {
-        if (strcmp(field, phq->field) != 0) {
-            return strfmt("%s:\"\"", phq->field);
+        if (default_field != phq->field) {
+            return strfmt("%s:\"\"", field);
         }
         else {
             return estrdup("\"\"");
@@ -909,7 +986,7 @@ static char *phq_to_s(Query *self, const char *field)
     /* sort the phrase positions by position */
     qsort(positions, pos_cnt, sizeof(PhrasePosition), &phrase_pos_cmp);
-    len = strlen(phq->field) + 1;
+    len = flen + 1;
     for (i = 0; i < pos_cnt; i++) {
         char **terms = phq->positions[i].terms;
@@ -924,11 +1001,10 @@ static char *phq_to_s(Query *self, const char *field)
     buffer = ALLOC_N(char, len);
-    if (strcmp(field, phq->field) != 0) {
-        len = strlen(phq->field);
-        memcpy(buffer, phq->field, len);
-        buffer[len] = ':';
-        buf_index += len + 1;
+    if (default_field != phq->field) {
+        memcpy(buffer, field, flen);
+        buffer[flen] = ':';
+        buf_index += flen + 1;
     }
     buffer[buf_index++] = '"';
@@ -968,8 +1044,7 @@ static char *phq_to_s(Query *self, const char *field)
     buffer[buf_index] = 0;
     if (phq->slop != 0) {
-        sprintf(buffer + buf_index, "~%d", phq->slop);
-        buf_index += strlen(buffer + buf_index);
+        buf_index += sprintf(buffer + buf_index, "~%d", phq->slop);
     }
     if (self->boost != 1.0) {
@@ -984,7 +1059,6 @@ static void phq_destroy(Query *self)
 {
     PhraseQuery *phq = PhQ(self);
     int i;
-    free(phq->field);
     for (i = 0; i < phq->pos_cnt; i++) {
         ary_destroy(phq->positions[i].terms, &free);
     }
@@ -1024,12 +1098,12 @@ static unsigned long phq_hash(Query *self)
 {
     int i, j;
     PhraseQuery *phq = PhQ(self);
-    unsigned long hash = str_hash(phq->field);
+    unsigned long hash = sym_hash(phq->field);
     for (i = 0; i < phq->pos_cnt; i++) {
         char **terms = phq->positions[i].terms;
         for (j = ary_size(terms) - 1; j >= 0; j--) {
             hash = (hash << 1) ^ (str_hash(terms[j])
-                                  ^ phq->positions[i].pos);
+                               ^ phq->positions[i].pos);
         }
     }
     return (hash ^ phq->slop);
@@ -1041,7 +1115,7 @@ static int phq_eq(Query *self, Query *o)
     PhraseQuery *phq1 = PhQ(self);
     PhraseQuery *phq2 = PhQ(o);
     if (phq1->slop != phq2->slop
-        || strcmp(phq1->field, phq2->field) != 0
+        || phq1->field != phq2->field
         || phq1->pos_cnt != phq2->pos_cnt) {
         return false;
     }
@@ -1049,7 +1123,7 @@ static int phq_eq(Query *self, Query *o)
         char **terms1 = phq1->positions[i].terms;
         char **terms2 = phq2->positions[i].terms;
         const int t_cnt = ary_size(terms1);
-        if (t_cnt != ary_size(terms2)
+        if (t_cnt != ary_size(terms2)
             || phq1->positions[i].pos != phq2->positions[i].pos) {
             return false;
         }
@@ -1062,11 +1136,11 @@ static int phq_eq(Query *self, Query *o)
     return true;
 }
-Query *phq_new(const char *field)
+Query *phq_new(Symbol field)
 {
     Query *self = q_new(PhraseQuery);
-    PhQ(self)->field        = estrdup(field);
+    PhQ(self)->field        = field;
     PhQ(self)->pos_cnt      = 0;
     PhQ(self)->pos_capa     = PhQ_INIT_CAPA;
     PhQ(self)->positions    = ALLOC_N(PhrasePosition, PhQ_INIT_CAPA);
@@ -1105,7 +1179,7 @@ void phq_add_term(Query *self, const char *term, int pos_inc)
     int position;
     if (phq->pos_cnt == 0) {
         position = 0;
-    }
+    }
     else {
         position = phq->positions[phq->pos_cnt - 1].pos + pos_inc;
     }
@@ -1124,3 +1198,8 @@ void phq_append_multi_term(Query *self, const char *term)
         ary_push(phq->positions[index].terms, estrdup(term));
     }
 }
+void frt_phq_set_slop(FrtQuery *self, int slop)
+{
+    PhQ(self)->slop = slop;
+}