ferret 0.3.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
 - data/Rakefile +51 -25
 - data/ext/analysis.c +553 -0
 - data/ext/analysis.h +76 -0
 - data/ext/array.c +83 -0
 - data/ext/array.h +19 -0
 - data/ext/bitvector.c +164 -0
 - data/ext/bitvector.h +29 -0
 - data/ext/compound_io.c +335 -0
 - data/ext/document.c +336 -0
 - data/ext/document.h +87 -0
 - data/ext/ferret.c +88 -47
 - data/ext/ferret.h +43 -109
 - data/ext/field.c +395 -0
 - data/ext/filter.c +103 -0
 - data/ext/fs_store.c +352 -0
 - data/ext/global.c +219 -0
 - data/ext/global.h +73 -0
 - data/ext/hash.c +446 -0
 - data/ext/hash.h +80 -0
 - data/ext/hashset.c +141 -0
 - data/ext/hashset.h +37 -0
 - data/ext/helper.c +11 -0
 - data/ext/helper.h +5 -0
 - data/ext/inc/lang.h +41 -0
 - data/ext/ind.c +389 -0
 - data/ext/index.h +884 -0
 - data/ext/index_io.c +269 -415
 - data/ext/index_rw.c +2543 -0
 - data/ext/lang.c +31 -0
 - data/ext/lang.h +41 -0
 - data/ext/priorityqueue.c +228 -0
 - data/ext/priorityqueue.h +44 -0
 - data/ext/q_boolean.c +1331 -0
 - data/ext/q_const_score.c +154 -0
 - data/ext/q_fuzzy.c +287 -0
 - data/ext/q_match_all.c +142 -0
 - data/ext/q_multi_phrase.c +343 -0
 - data/ext/q_parser.c +2180 -0
 - data/ext/q_phrase.c +657 -0
 - data/ext/q_prefix.c +75 -0
 - data/ext/q_range.c +247 -0
 - data/ext/q_span.c +1566 -0
 - data/ext/q_term.c +308 -0
 - data/ext/q_wildcard.c +146 -0
 - data/ext/r_analysis.c +255 -0
 - data/ext/r_doc.c +578 -0
 - data/ext/r_index_io.c +996 -0
 - data/ext/r_qparser.c +158 -0
 - data/ext/r_search.c +2321 -0
 - data/ext/r_store.c +263 -0
 - data/ext/r_term.c +219 -0
 - data/ext/ram_store.c +447 -0
 - data/ext/search.c +524 -0
 - data/ext/search.h +1065 -0
 - data/ext/similarity.c +143 -39
 - data/ext/sort.c +661 -0
 - data/ext/store.c +35 -0
 - data/ext/store.h +152 -0
 - data/ext/term.c +704 -143
 - data/ext/termdocs.c +599 -0
 - data/ext/vector.c +594 -0
 - data/lib/ferret.rb +9 -10
 - data/lib/ferret/analysis/analyzers.rb +2 -2
 - data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
 - data/lib/ferret/analysis/token.rb +14 -14
 - data/lib/ferret/analysis/token_filters.rb +3 -3
 - data/lib/ferret/document/field.rb +16 -17
 - data/lib/ferret/index/document_writer.rb +4 -4
 - data/lib/ferret/index/index.rb +39 -23
 - data/lib/ferret/index/index_writer.rb +2 -2
 - data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
 - data/lib/ferret/index/segment_term_vector.rb +4 -4
 - data/lib/ferret/index/term.rb +5 -1
 - data/lib/ferret/index/term_vector_offset_info.rb +6 -6
 - data/lib/ferret/index/term_vectors_io.rb +5 -5
 - data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
 - data/lib/ferret/search.rb +1 -1
 - data/lib/ferret/search/boolean_query.rb +2 -1
 - data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
 - data/lib/ferret/search/fuzzy_query.rb +2 -1
 - data/lib/ferret/search/index_searcher.rb +3 -0
 - data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
 - data/lib/ferret/search/multi_phrase_query.rb +6 -5
 - data/lib/ferret/search/phrase_query.rb +3 -6
 - data/lib/ferret/search/prefix_query.rb +4 -4
 - data/lib/ferret/search/sort.rb +3 -1
 - data/lib/ferret/search/sort_field.rb +9 -9
 - data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
 - data/lib/ferret/search/spans/span_near_query.rb +1 -1
 - data/lib/ferret/search/spans/span_weight.rb +1 -1
 - data/lib/ferret/search/spans/spans_enum.rb +7 -7
 - data/lib/ferret/store/fs_store.rb +10 -6
 - data/lib/ferret/store/ram_store.rb +3 -3
 - data/lib/rferret.rb +36 -0
 - data/test/functional/thread_safety_index_test.rb +2 -2
 - data/test/test_helper.rb +16 -2
 - data/test/unit/analysis/c_token.rb +25 -0
 - data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
 - data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
 - data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
 - data/test/unit/document/c_field.rb +98 -0
 - data/test/unit/document/tc_field.rb +0 -66
 - data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
 - data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
 - data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
 - data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
 - data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
 - data/test/unit/index/tc_segment_term_vector.rb +2 -2
 - data/test/unit/index/tc_term_vectors_io.rb +4 -4
 - data/test/unit/query_parser/c_query_parser.rb +138 -0
 - data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
 - data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
 - data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
 - data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
 - data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
 - data/test/unit/search/c_sort_field.rb +27 -0
 - data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
 - data/test/unit/search/tc_sort_field.rb +7 -20
 - data/test/unit/store/c_fs_store.rb +76 -0
 - data/test/unit/store/c_ram_store.rb +35 -0
 - data/test/unit/store/m_store.rb +34 -0
 - data/test/unit/store/m_store_lock.rb +68 -0
 - data/test/unit/store/tc_fs_store.rb +0 -53
 - data/test/unit/store/tc_ram_store.rb +0 -20
 - data/test/unit/store/tm_store.rb +0 -30
 - data/test/unit/store/tm_store_lock.rb +0 -66
 - metadata +84 -31
 - data/ext/Makefile +0 -140
 - data/ext/ferret_ext.so +0 -0
 - data/ext/priority_queue.c +0 -232
 - data/ext/ram_directory.c +0 -321
 - data/ext/segment_merge_queue.c +0 -37
 - data/ext/segment_term_enum.c +0 -326
 - data/ext/string_helper.c +0 -42
 - data/ext/tags +0 -344
 - data/ext/term_buffer.c +0 -230
 - data/ext/term_infos_reader.c +0 -54
 - data/ext/terminfo.c +0 -160
 - data/ext/token.c +0 -93
 - data/ext/util.c +0 -12
 
    
        data/ext/index_rw.c
    ADDED
    
    | 
         @@ -0,0 +1,2543 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #include <index.h>
         
     | 
| 
      
 2 
     | 
    
         
            +
            #include <stdlib.h>
         
     | 
| 
      
 3 
     | 
    
         
            +
            #include <string.h>
         
     | 
| 
      
 4 
     | 
    
         
            +
            #include <array.h>
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            const char *INDEX_EXTENSIONS[] = {
         
     | 
| 
      
 7 
     | 
    
         
            +
                "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
         
     | 
| 
      
 8 
     | 
    
         
            +
                "tvx", "tvd", "tvf", "tvp"
         
     | 
| 
      
 9 
     | 
    
         
            +
            };
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            const char *COMPOUND_EXTENSIONS[] = {
         
     | 
| 
      
 12 
     | 
    
         
            +
                    "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"
         
     | 
| 
      
 13 
     | 
    
         
            +
            };
         
     | 
| 
      
 14 
     | 
    
         
            +
                  
         
     | 
| 
      
 15 
     | 
    
         
            +
            const char *VECTOR_EXTENSIONS[] = {
         
     | 
| 
      
 16 
     | 
    
         
            +
                    "tvx", "tvd", "tvf"
         
     | 
| 
      
 17 
     | 
    
         
            +
            };
         
     | 
| 
      
 18 
     | 
    
         
            +
                  
         
     | 
| 
      
 19 
     | 
    
         
            +
            FerretConfig config = {
         
     | 
| 
      
 20 
     | 
    
         
            +
              10,       // default merge_factor
         
     | 
| 
      
 21 
     | 
    
         
            +
              10,       // default min_merge_docs
         
     | 
| 
      
 22 
     | 
    
         
            +
              INT_MAX,  // default max_merge_docs
         
     | 
| 
      
 23 
     | 
    
         
            +
              10000,    // default max_field_length
         
     | 
| 
      
 24 
     | 
    
         
            +
              128       // default term_index_interval
         
     | 
| 
      
 25 
     | 
    
         
            +
            };
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            /***************************************************************************
         
     | 
| 
      
 28 
     | 
    
         
            +
             *
         
     | 
| 
      
 29 
     | 
    
         
            +
             * CacheObject
         
     | 
| 
      
 30 
     | 
    
         
            +
             *
         
     | 
| 
      
 31 
     | 
    
         
            +
             ***************************************************************************/
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            unsigned int co_hash(const void *key)
         
     | 
| 
      
 34 
     | 
    
         
            +
            {
         
     | 
| 
      
 35 
     | 
    
         
            +
              return (unsigned int)key;
         
     | 
| 
      
 36 
     | 
    
         
            +
            }
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            int co_eq(const void *key1, const void *key2)
         
     | 
| 
      
 39 
     | 
    
         
            +
            {
         
     | 
| 
      
 40 
     | 
    
         
            +
              return (key1 == key2);
         
     | 
| 
      
 41 
     | 
    
         
            +
            }
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            void co_destroy(void *p)
         
     | 
| 
      
 44 
     | 
    
         
            +
            {
         
     | 
| 
      
 45 
     | 
    
         
            +
              CacheObject *co = (CacheObject *)p;
         
     | 
| 
      
 46 
     | 
    
         
            +
              h_rem(co->ref_tab1, co->ref2, false);
         
     | 
| 
      
 47 
     | 
    
         
            +
              h_rem(co->ref_tab2, co->ref1, false);
         
     | 
| 
      
 48 
     | 
    
         
            +
              co->destroy(co->obj);
         
     | 
| 
      
 49 
     | 
    
         
            +
              free(co);
         
     | 
| 
      
 50 
     | 
    
         
            +
            }
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
            CacheObject *co_create(HshTable *ref_tab1, HshTable *ref_tab2,
         
     | 
| 
      
 53 
     | 
    
         
            +
                void *ref1, void *ref2, void (*destroy)(void *p), void *obj)
         
     | 
| 
      
 54 
     | 
    
         
            +
            {
         
     | 
| 
      
 55 
     | 
    
         
            +
              CacheObject *co = ALLOC(CacheObject);
         
     | 
| 
      
 56 
     | 
    
         
            +
              h_set(ref_tab1, ref2, co);
         
     | 
| 
      
 57 
     | 
    
         
            +
              h_set(ref_tab2, ref1, co);
         
     | 
| 
      
 58 
     | 
    
         
            +
              co->ref_tab1 = ref_tab1;
         
     | 
| 
      
 59 
     | 
    
         
            +
              co->ref_tab2 = ref_tab2;
         
     | 
| 
      
 60 
     | 
    
         
            +
              co->ref1 = ref1;
         
     | 
| 
      
 61 
     | 
    
         
            +
              co->ref2 = ref2;
         
     | 
| 
      
 62 
     | 
    
         
            +
              co->destroy = destroy;
         
     | 
| 
      
 63 
     | 
    
         
            +
              co->obj = obj;
         
     | 
| 
      
 64 
     | 
    
         
            +
              return co;
         
     | 
| 
      
 65 
     | 
    
         
            +
            }
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
            HshTable *co_hsh_create()
         
     | 
| 
      
 68 
     | 
    
         
            +
            {
         
     | 
| 
      
 69 
     | 
    
         
            +
              return h_new(&co_hash, &co_eq, NULL, &co_destroy);
         
     | 
| 
      
 70 
     | 
    
         
            +
            }
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
            /***************************************************************************
         
     | 
| 
      
 73 
     | 
    
         
            +
             *
         
     | 
| 
      
 74 
     | 
    
         
            +
             * Posting
         
     | 
| 
      
 75 
     | 
    
         
            +
             *
         
     | 
| 
      
 76 
     | 
    
         
            +
             ***************************************************************************/
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
            Posting *p_create(Term *term, int position, TVOffsetInfo *offset)
         
     | 
| 
      
 79 
     | 
    
         
            +
            {
         
     | 
| 
      
 80 
     | 
    
         
            +
              Posting *p = ALLOC(Posting);
         
     | 
| 
      
 81 
     | 
    
         
            +
              p->freq = 1;
         
     | 
| 
      
 82 
     | 
    
         
            +
              p->size = 1;
         
     | 
| 
      
 83 
     | 
    
         
            +
              p->term = term;
         
     | 
| 
      
 84 
     | 
    
         
            +
              p->positions = ALLOC(int);
         
     | 
| 
      
 85 
     | 
    
         
            +
              p->positions[0] = position;
         
     | 
| 
      
 86 
     | 
    
         
            +
              p->offsets = ALLOC(TVOffsetInfo *);
         
     | 
| 
      
 87 
     | 
    
         
            +
              p->offsets[0] = offset;
         
     | 
| 
      
 88 
     | 
    
         
            +
              return p;
         
     | 
| 
      
 89 
     | 
    
         
            +
            }
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
            void p_destroy(void *p)
         
     | 
| 
      
 92 
     | 
    
         
            +
            {
         
     | 
| 
      
 93 
     | 
    
         
            +
              // the positions and offsets will be put in a TVTerm so no need to free
         
     | 
| 
      
 94 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 95 
     | 
    
         
            +
              Posting *post = (Posting *)p;
         
     | 
| 
      
 96 
     | 
    
         
            +
              free(post->positions);
         
     | 
| 
      
 97 
     | 
    
         
            +
              for (i = 0; i < post->freq; i++)
         
     | 
| 
      
 98 
     | 
    
         
            +
                tvoi_destroy(post->offsets[i]);
         
     | 
| 
      
 99 
     | 
    
         
            +
              free(post->offsets);
         
     | 
| 
      
 100 
     | 
    
         
            +
              free(p);
         
     | 
| 
      
 101 
     | 
    
         
            +
            }
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
            void p_add_occurance(Posting *p, int position, TVOffsetInfo *offset)
         
     | 
| 
      
 104 
     | 
    
         
            +
            {
         
     | 
| 
      
 105 
     | 
    
         
            +
              if (p->freq >= p->size) {
         
     | 
| 
      
 106 
     | 
    
         
            +
                p->size *= 2;
         
     | 
| 
      
 107 
     | 
    
         
            +
                REALLOC_N(p->positions, int, p->size);
         
     | 
| 
      
 108 
     | 
    
         
            +
                REALLOC_N(p->offsets, TVOffsetInfo *, p->size);
         
     | 
| 
      
 109 
     | 
    
         
            +
              }
         
     | 
| 
      
 110 
     | 
    
         
            +
              p->positions[p->freq] = position;
         
     | 
| 
      
 111 
     | 
    
         
            +
              p->offsets[p->freq] = offset;
         
     | 
| 
      
 112 
     | 
    
         
            +
              p->freq++;
         
     | 
| 
      
 113 
     | 
    
         
            +
            }
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
            inline int p_cmp(const void *const p1, const void *const p2)
         
     | 
| 
      
 116 
     | 
    
         
            +
            {
         
     | 
| 
      
 117 
     | 
    
         
            +
              Term *t1 = (*(Posting **)p1)->term;
         
     | 
| 
      
 118 
     | 
    
         
            +
              Term *t2 = (*(Posting **)p2)->term;
         
     | 
| 
      
 119 
     | 
    
         
            +
              int res = strcmp(t1->field, t2->field);
         
     | 
| 
      
 120 
     | 
    
         
            +
              if (res != 0) {
         
     | 
| 
      
 121 
     | 
    
         
            +
                return res;
         
     | 
| 
      
 122 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 123 
     | 
    
         
            +
                return strcmp(t1->text, t2->text);
         
     | 
| 
      
 124 
     | 
    
         
            +
              }
         
     | 
| 
      
 125 
     | 
    
         
            +
            }
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
            DocumentWriter *dw_open(Store *store,
         
     | 
| 
      
 128 
     | 
    
         
            +
                                    Analyzer *analyzer,
         
     | 
| 
      
 129 
     | 
    
         
            +
                                    Similarity *similarity,
         
     | 
| 
      
 130 
     | 
    
         
            +
                                    int max_field_length,
         
     | 
| 
      
 131 
     | 
    
         
            +
                                    int term_index_interval)
         
     | 
| 
      
 132 
     | 
    
         
            +
            {
         
     | 
| 
      
 133 
     | 
    
         
            +
              DocumentWriter *dw = ALLOC(DocumentWriter);
         
     | 
| 
      
 134 
     | 
    
         
            +
              dw->store = store;
         
     | 
| 
      
 135 
     | 
    
         
            +
              dw->analyzer = analyzer;
         
     | 
| 
      
 136 
     | 
    
         
            +
              dw->similarity = similarity;
         
     | 
| 
      
 137 
     | 
    
         
            +
              dw->fis = NULL;
         
     | 
| 
      
 138 
     | 
    
         
            +
              dw->postingtable = h_new(&term_hash, &term_eq, &term_destroy, &p_destroy);
         
     | 
| 
      
 139 
     | 
    
         
            +
              dw->max_field_length = max_field_length;
         
     | 
| 
      
 140 
     | 
    
         
            +
              dw->term_index_interval = term_index_interval;
         
     | 
| 
      
 141 
     | 
    
         
            +
              return dw;
         
     | 
| 
      
 142 
     | 
    
         
            +
            }
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
            void dw_close(DocumentWriter *dw)
         
     | 
| 
      
 145 
     | 
    
         
            +
            {
         
     | 
| 
      
 146 
     | 
    
         
            +
              if (dw->fis) fis_destroy(dw->fis);
         
     | 
| 
      
 147 
     | 
    
         
            +
              h_destroy(dw->postingtable);
         
     | 
| 
      
 148 
     | 
    
         
            +
              free(dw);
         
     | 
| 
      
 149 
     | 
    
         
            +
            }
         
     | 
| 
      
 150 
     | 
    
         
            +
             
     | 
| 
      
 151 
     | 
    
         
            +
            void dw_add_position(DocumentWriter *dw, char *field, char *text,
         
     | 
| 
      
 152 
     | 
    
         
            +
                int position, TVOffsetInfo *offset) 
         
     | 
| 
      
 153 
     | 
    
         
            +
            {
         
     | 
| 
      
 154 
     | 
    
         
            +
              Term termbuf = {field, text}, *term;
         
     | 
| 
      
 155 
     | 
    
         
            +
              Posting *p = (Posting *)h_get(dw->postingtable, &termbuf);
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
              if (p) { // word seen before
         
     | 
| 
      
 158 
     | 
    
         
            +
                // double the size of posting to make room for more posts.
         
     | 
| 
      
 159 
     | 
    
         
            +
                if (p->freq >= p->size) {
         
     | 
| 
      
 160 
     | 
    
         
            +
                  p->size <<= 1;
         
     | 
| 
      
 161 
     | 
    
         
            +
                  REALLOC_N(p->positions, int, p->size);
         
     | 
| 
      
 162 
     | 
    
         
            +
                  p->offsets = REALLOC_N(p->offsets, TVOffsetInfo *, p->size);
         
     | 
| 
      
 163 
     | 
    
         
            +
                }
         
     | 
| 
      
 164 
     | 
    
         
            +
                p->positions[p->freq] = position;    // add new position
         
     | 
| 
      
 165 
     | 
    
         
            +
                p->offsets[p->freq] = offset;        // add new position
         
     | 
| 
      
 166 
     | 
    
         
            +
                p->freq++;                        // update frequency
         
     | 
| 
      
 167 
     | 
    
         
            +
              } else { // word not seen before
         
     | 
| 
      
 168 
     | 
    
         
            +
                term = term_create(field, text);
         
     | 
| 
      
 169 
     | 
    
         
            +
                h_set(dw->postingtable, term, p_create(term, position, offset));
         
     | 
| 
      
 170 
     | 
    
         
            +
              }
         
     | 
| 
      
 171 
     | 
    
         
            +
            }
         
     | 
| 
      
 172 
     | 
    
         
            +
             
     | 
| 
      
 173 
     | 
    
         
            +
            void dw_invert_doc(DocumentWriter *dw, Document *doc)
         
     | 
| 
      
 174 
     | 
    
         
            +
            {
         
     | 
| 
      
 175 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 176 
     | 
    
         
            +
              int dfcnt = doc->dfcnt;
         
     | 
| 
      
 177 
     | 
    
         
            +
              char *field_name, *text;
         
     | 
| 
      
 178 
     | 
    
         
            +
              int field_number, length, position, offset, slen;
         
     | 
| 
      
 179 
     | 
    
         
            +
              TokenStream *stream;
         
     | 
| 
      
 180 
     | 
    
         
            +
              Token *token;
         
     | 
| 
      
 181 
     | 
    
         
            +
              FieldInfo *fi;
         
     | 
| 
      
 182 
     | 
    
         
            +
             
     | 
| 
      
 183 
     | 
    
         
            +
              DocField **fields = doc->df_arr, *field;
         
     | 
| 
      
 184 
     | 
    
         
            +
              for (i = 0; i < dfcnt; i++) {
         
     | 
| 
      
 185 
     | 
    
         
            +
                field = fields[i];
         
     | 
| 
      
 186 
     | 
    
         
            +
                field_name = field->name;
         
     | 
| 
      
 187 
     | 
    
         
            +
                fi = ((FieldInfo *)ht_get(dw->fis->by_name, field_name));
         
     | 
| 
      
 188 
     | 
    
         
            +
                field_number = fi->number;
         
     | 
| 
      
 189 
     | 
    
         
            +
             
     | 
| 
      
 190 
     | 
    
         
            +
                length = dw->field_lengths[field_number];
         
     | 
| 
      
 191 
     | 
    
         
            +
                offset = dw->field_offsets[field_number];
         
     | 
| 
      
 192 
     | 
    
         
            +
                position = dw->field_positions[field_number];
         
     | 
| 
      
 193 
     | 
    
         
            +
             
     | 
| 
      
 194 
     | 
    
         
            +
                if (fi->is_indexed) {
         
     | 
| 
      
 195 
     | 
    
         
            +
                  if (!field->is_tokenized) {// un-tokenized field
         
     | 
| 
      
 196 
     | 
    
         
            +
                    text = field->data;
         
     | 
| 
      
 197 
     | 
    
         
            +
                    slen = strlen(text);
         
     | 
| 
      
 198 
     | 
    
         
            +
                    if (fi->store_offset) {
         
     | 
| 
      
 199 
     | 
    
         
            +
                      dw_add_position(dw, field_name, text, position,
         
     | 
| 
      
 200 
     | 
    
         
            +
                          tvoi_create(offset, offset+slen));
         
     | 
| 
      
 201 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 202 
     | 
    
         
            +
                      dw_add_position(dw, field_name, text, position, NULL);
         
     | 
| 
      
 203 
     | 
    
         
            +
                    }
         
     | 
| 
      
 204 
     | 
    
         
            +
                    offset += slen;
         
     | 
| 
      
 205 
     | 
    
         
            +
                    length++;
         
     | 
| 
      
 206 
     | 
    
         
            +
                  } else {
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                    // Tokenize field and add to posting_table
         
     | 
| 
      
 209 
     | 
    
         
            +
                    stream = a_get_ts(dw->analyzer, field_name, field->data);
         
     | 
| 
      
 210 
     | 
    
         
            +
             
     | 
| 
      
 211 
     | 
    
         
            +
                    while ((token = ts_next(stream)) != NULL) {
         
     | 
| 
      
 212 
     | 
    
         
            +
                      position += (token->pos_inc - 1);
         
     | 
| 
      
 213 
     | 
    
         
            +
             
     | 
| 
      
 214 
     | 
    
         
            +
                      if (fi->store_offset) {
         
     | 
| 
      
 215 
     | 
    
         
            +
                        dw_add_position(dw, 
         
     | 
| 
      
 216 
     | 
    
         
            +
                            field_name,
         
     | 
| 
      
 217 
     | 
    
         
            +
                            token->text,
         
     | 
| 
      
 218 
     | 
    
         
            +
                            position,
         
     | 
| 
      
 219 
     | 
    
         
            +
                            tvoi_create(offset + token->start, offset + token->end));
         
     | 
| 
      
 220 
     | 
    
         
            +
                        position++;
         
     | 
| 
      
 221 
     | 
    
         
            +
                      } else {
         
     | 
| 
      
 222 
     | 
    
         
            +
                        dw_add_position(dw, field_name, token->text, position, NULL);
         
     | 
| 
      
 223 
     | 
    
         
            +
                        position++;
         
     | 
| 
      
 224 
     | 
    
         
            +
                      }
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
                      length++;
         
     | 
| 
      
 227 
     | 
    
         
            +
                      // stop if we reach the max field length
         
     | 
| 
      
 228 
     | 
    
         
            +
                      if (length > dw->max_field_length)
         
     | 
| 
      
 229 
     | 
    
         
            +
                        break;
         
     | 
| 
      
 230 
     | 
    
         
            +
                    }
         
     | 
| 
      
 231 
     | 
    
         
            +
             
     | 
| 
      
 232 
     | 
    
         
            +
                    if (token)
         
     | 
| 
      
 233 
     | 
    
         
            +
                      offset += token->end + 1;
         
     | 
| 
      
 234 
     | 
    
         
            +
                  }
         
     | 
| 
      
 235 
     | 
    
         
            +
                  dw->field_lengths[field_number] = length;
         
     | 
| 
      
 236 
     | 
    
         
            +
                  dw->field_offsets[field_number] = offset;
         
     | 
| 
      
 237 
     | 
    
         
            +
                  dw->field_positions[field_number] = position;
         
     | 
| 
      
 238 
     | 
    
         
            +
                  dw->field_boosts[field_number] *= field->boost;
         
     | 
| 
      
 239 
     | 
    
         
            +
                }
         
     | 
| 
      
 240 
     | 
    
         
            +
              }
         
     | 
| 
      
 241 
     | 
    
         
            +
            }
         
     | 
| 
      
 242 
     | 
    
         
            +
             
     | 
| 
      
 243 
     | 
    
         
            +
            Posting **dw_sort_posting_table(DocumentWriter *dw)
         
     | 
| 
      
 244 
     | 
    
         
            +
            {
         
     | 
| 
      
 245 
     | 
    
         
            +
              HshTable *ht = dw->postingtable;
         
     | 
| 
      
 246 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 247 
     | 
    
         
            +
              dw->pcnt = i = ht->used;
         
     | 
| 
      
 248 
     | 
    
         
            +
              Posting **postings = ALLOC_N(Posting *, i);
         
     | 
| 
      
 249 
     | 
    
         
            +
              HshEntry *he = ht->table;
         
     | 
| 
      
 250 
     | 
    
         
            +
              while (i > 0) {
         
     | 
| 
      
 251 
     | 
    
         
            +
                if (he->value != NULL) {
         
     | 
| 
      
 252 
     | 
    
         
            +
                  i--;
         
     | 
| 
      
 253 
     | 
    
         
            +
                  postings[i] = (Posting *)he->value;
         
     | 
| 
      
 254 
     | 
    
         
            +
                }
         
     | 
| 
      
 255 
     | 
    
         
            +
                he++;
         
     | 
| 
      
 256 
     | 
    
         
            +
              }
         
     | 
| 
      
 257 
     | 
    
         
            +
              qsort(postings, dw->pcnt, sizeof(Posting *), &p_cmp);
         
     | 
| 
      
 258 
     | 
    
         
            +
              return postings;
         
     | 
| 
      
 259 
     | 
    
         
            +
            }
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
            void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
         
     | 
| 
      
 262 
     | 
    
         
            +
            {
         
     | 
| 
      
 263 
     | 
    
         
            +
              OutStream *freq_out, *prox_out;
         
     | 
| 
      
 264 
     | 
    
         
            +
              TermInfosWriter *tiw;
         
     | 
| 
      
 265 
     | 
    
         
            +
              TermVectorsWriter *tvw = NULL;
         
     | 
| 
      
 266 
     | 
    
         
            +
              FieldInfo *fi;
         
     | 
| 
      
 267 
     | 
    
         
            +
              Store *store = dw->store;
         
     | 
| 
      
 268 
     | 
    
         
            +
              TermInfo *ti;
         
     | 
| 
      
 269 
     | 
    
         
            +
              Posting *posting;
         
     | 
| 
      
 270 
     | 
    
         
            +
              int i, j, posting_freq, position, last_position;
         
     | 
| 
      
 271 
     | 
    
         
            +
              char fname[SEGMENT_NAME_MAX_LENGTH], *curr_field = NULL, *term_field;
         
     | 
| 
      
 272 
     | 
    
         
            +
              strcpy(fname, segment);
         
     | 
| 
      
 273 
     | 
    
         
            +
             
     | 
| 
      
 274 
     | 
    
         
            +
              //open files for inverse index storage
         
     | 
| 
      
 275 
     | 
    
         
            +
              sprintf(fname, "%s.frq", segment);
         
     | 
| 
      
 276 
     | 
    
         
            +
              freq_out = store->create_output(store, fname);
         
     | 
| 
      
 277 
     | 
    
         
            +
              sprintf(fname, "%s.prx", segment);
         
     | 
| 
      
 278 
     | 
    
         
            +
              prox_out = store->create_output(store, fname);
         
     | 
| 
      
 279 
     | 
    
         
            +
              tiw = tiw_open(store, segment, dw->fis, dw->term_index_interval);
         
     | 
| 
      
 280 
     | 
    
         
            +
              ti = ti_create(0, 0, 0, 0);
         
     | 
| 
      
 281 
     | 
    
         
            +
             
     | 
| 
      
 282 
     | 
    
         
            +
              for (i = 0; i < dw->pcnt; i++) {
         
     | 
| 
      
 283 
     | 
    
         
            +
                posting = postings[i];
         
     | 
| 
      
 284 
     | 
    
         
            +
             
     | 
| 
      
 285 
     | 
    
         
            +
                // add an entry to the dictionary with pointers to prox and freq_out files
         
     | 
| 
      
 286 
     | 
    
         
            +
                ti_set(ti, 1, os_pos(freq_out), os_pos(prox_out), -1);
         
     | 
| 
      
 287 
     | 
    
         
            +
                tiw_add(tiw, posting->term, ti);
         
     | 
| 
      
 288 
     | 
    
         
            +
             
     | 
| 
      
 289 
     | 
    
         
            +
                // add an entry to the freq_out file
         
     | 
| 
      
 290 
     | 
    
         
            +
                posting_freq = posting->freq;
         
     | 
| 
      
 291 
     | 
    
         
            +
                if (posting_freq == 1) {         // optimize freq=1
         
     | 
| 
      
 292 
     | 
    
         
            +
                  os_write_vint(freq_out, 1);            // set low bit of doc num.
         
     | 
| 
      
 293 
     | 
    
         
            +
                } else {
         
     | 
| 
      
 294 
     | 
    
         
            +
                  os_write_vint(freq_out, 0);            // the doc number
         
     | 
| 
      
 295 
     | 
    
         
            +
                  os_write_vint(freq_out, posting_freq); // frequency in doc
         
     | 
| 
      
 296 
     | 
    
         
            +
                }
         
     | 
| 
      
 297 
     | 
    
         
            +
             
     | 
| 
      
 298 
     | 
    
         
            +
                last_position = 0;               // write positions
         
     | 
| 
      
 299 
     | 
    
         
            +
             
     | 
| 
      
 300 
     | 
    
         
            +
                for (j = 0; j < posting_freq; j++) {
         
     | 
| 
      
 301 
     | 
    
         
            +
                  position = posting->positions[j];
         
     | 
| 
      
 302 
     | 
    
         
            +
                  os_write_vint(prox_out, position - last_position);
         
     | 
| 
      
 303 
     | 
    
         
            +
                  last_position = position;
         
     | 
| 
      
 304 
     | 
    
         
            +
                }
         
     | 
| 
      
 305 
     | 
    
         
            +
             
     | 
| 
      
 306 
     | 
    
         
            +
                // check to see if we switched to a new field
         
     | 
| 
      
 307 
     | 
    
         
            +
                term_field = posting->term->field;
         
     | 
| 
      
 308 
     | 
    
         
            +
                if (curr_field != term_field)  {
         
     | 
| 
      
 309 
     | 
    
         
            +
                  // changing field - see if there is something to save
         
     | 
| 
      
 310 
     | 
    
         
            +
                  curr_field = term_field;
         
     | 
| 
      
 311 
     | 
    
         
            +
                  fi = (FieldInfo *)ht_get(dw->fis->by_name, curr_field);
         
     | 
| 
      
 312 
     | 
    
         
            +
                  if (fi->store_tv) {
         
     | 
| 
      
 313 
     | 
    
         
            +
                    if (tvw == NULL) {
         
     | 
| 
      
 314 
     | 
    
         
            +
                      tvw = tvw_open(store, segment, dw->fis);
         
     | 
| 
      
 315 
     | 
    
         
            +
                      tvw_open_doc(tvw);
         
     | 
| 
      
 316 
     | 
    
         
            +
                    }
         
     | 
| 
      
 317 
     | 
    
         
            +
                    tvw_open_field(tvw, curr_field);
         
     | 
| 
      
 318 
     | 
    
         
            +
             
     | 
| 
      
 319 
     | 
    
         
            +
                  } else if (tvw != NULL) {
         
     | 
| 
      
 320 
     | 
    
         
            +
                    tvw_close_field(tvw);
         
     | 
| 
      
 321 
     | 
    
         
            +
                  }
         
     | 
| 
      
 322 
     | 
    
         
            +
                }
         
     | 
| 
      
 323 
     | 
    
         
            +
                // tvw->curr_field != NULL implies field is still open
         
     | 
| 
      
 324 
     | 
    
         
            +
                if (tvw != NULL && tvw->curr_field != NULL) {
         
     | 
| 
      
 325 
     | 
    
         
            +
                  tvw_add_term(tvw, posting->term->text, posting_freq, posting->positions, posting->offsets);
         
     | 
| 
      
 326 
     | 
    
         
            +
                }
         
     | 
| 
      
 327 
     | 
    
         
            +
              }
         
     | 
| 
      
 328 
     | 
    
         
            +
              if (tvw != NULL) {
         
     | 
| 
      
 329 
     | 
    
         
            +
                tvw_close_doc(tvw);
         
     | 
| 
      
 330 
     | 
    
         
            +
                tvw_close(tvw);
         
     | 
| 
      
 331 
     | 
    
         
            +
              }
         
     | 
| 
      
 332 
     | 
    
         
            +
              // make an effort to close all streams we can but remember and re-raise
         
     | 
| 
      
 333 
     | 
    
         
            +
              // the last exception encountered in this process
         
     | 
| 
      
 334 
     | 
    
         
            +
              os_close(freq_out);
         
     | 
| 
      
 335 
     | 
    
         
            +
              os_close(prox_out);
         
     | 
| 
      
 336 
     | 
    
         
            +
              tiw_close(tiw);
         
     | 
| 
      
 337 
     | 
    
         
            +
              ti_destroy(ti);
         
     | 
| 
      
 338 
     | 
    
         
            +
            }
         
     | 
| 
      
 339 
     | 
    
         
            +
             
     | 
| 
      
 340 
     | 
    
         
            +
            void dw_write_norms(DocumentWriter *dw, char *segment)
         
     | 
| 
      
 341 
     | 
    
         
            +
            {
         
     | 
| 
      
 342 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 343 
     | 
    
         
            +
              float norm;
         
     | 
| 
      
 344 
     | 
    
         
            +
              OutStream *norms_out;
         
     | 
| 
      
 345 
     | 
    
         
            +
              char fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 346 
     | 
    
         
            +
              FieldInfos *fis = dw->fis;
         
     | 
| 
      
 347 
     | 
    
         
            +
              FieldInfo *fi;
         
     | 
| 
      
 348 
     | 
    
         
            +
             
     | 
| 
      
 349 
     | 
    
         
            +
              for (i = 0; i < fis->fcnt; i++) {
         
     | 
| 
      
 350 
     | 
    
         
            +
                fi = fis->by_number[i];
         
     | 
| 
      
 351 
     | 
    
         
            +
                
         
     | 
| 
      
 352 
     | 
    
         
            +
                if (fi->is_indexed && !fi->omit_norms) {
         
     | 
| 
      
 353 
     | 
    
         
            +
                  norm = dw->field_boosts[i] * sim_length_norm(dw->similarity, fi->name, dw->field_lengths[i]);
         
     | 
| 
      
 354 
     | 
    
         
            +
                  sprintf(fname, "%s.f%d", segment, i);
         
     | 
| 
      
 355 
     | 
    
         
            +
                  norms_out = dw->store->create_output(dw->store, fname);
         
     | 
| 
      
 356 
     | 
    
         
            +
                  os_write_byte(norms_out, sim_encode_norm(dw->similarity, norm));
         
     | 
| 
      
 357 
     | 
    
         
            +
                  os_close(norms_out);
         
     | 
| 
      
 358 
     | 
    
         
            +
                }
         
     | 
| 
      
 359 
     | 
    
         
            +
              }
         
     | 
| 
      
 360 
     | 
    
         
            +
            }
         
     | 
| 
      
 361 
     | 
    
         
            +
             
     | 
| 
      
 362 
     | 
    
         
            +
            void dw_add_doc(DocumentWriter *dw, char *segment, Document *doc)
         
     | 
| 
      
 363 
     | 
    
         
            +
            {
         
     | 
| 
      
 364 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 365 
     | 
    
         
            +
              // write field names
         
     | 
| 
      
 366 
     | 
    
         
            +
              dw->fis = fis_create();
         
     | 
| 
      
 367 
     | 
    
         
            +
              fis_add_doc(dw->fis, doc);
         
     | 
| 
      
 368 
     | 
    
         
            +
              fis_write(dw->fis, dw->store, segment, ".fnm");
         
     | 
| 
      
 369 
     | 
    
         
            +
             
     | 
| 
      
 370 
     | 
    
         
            +
              // write field values
         
     | 
| 
      
 371 
     | 
    
         
            +
              FieldsWriter *fw = fw_open(dw->store, segment, dw->fis);
         
     | 
| 
      
 372 
     | 
    
         
            +
              fw_add_doc(fw, doc);
         
     | 
| 
      
 373 
     | 
    
         
            +
              fw_close(fw);
         
     | 
| 
      
 374 
     | 
    
         
            +
             
     | 
| 
      
 375 
     | 
    
         
            +
              // invert doc into posting_table
         
     | 
| 
      
 376 
     | 
    
         
            +
              h_clear(dw->postingtable);        // clear posting_table
         
     | 
| 
      
 377 
     | 
    
         
            +
             
     | 
| 
      
 378 
     | 
    
         
            +
              dw->field_boosts = ALLOC_N(float, dw->fis->fcnt);
         
     | 
| 
      
 379 
     | 
    
         
            +
              dw->field_lengths = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
         
     | 
| 
      
 380 
     | 
    
         
            +
              dw->field_offsets = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
         
     | 
| 
      
 381 
     | 
    
         
            +
              dw->field_positions = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
         
     | 
| 
      
 382 
     | 
    
         
            +
             
     | 
| 
      
 383 
     | 
    
         
            +
              for (i = 0; i < dw->fis->fcnt; i++)
         
     | 
| 
      
 384 
     | 
    
         
            +
                dw->field_boosts[i] = doc->boost;
         
     | 
| 
      
 385 
     | 
    
         
            +
             
     | 
| 
      
 386 
     | 
    
         
            +
              dw_invert_doc(dw, doc);
         
     | 
| 
      
 387 
     | 
    
         
            +
             
     | 
| 
      
 388 
     | 
    
         
            +
              // sort posting_table into an array
         
     | 
| 
      
 389 
     | 
    
         
            +
              Posting **postings = dw_sort_posting_table(dw);
         
     | 
| 
      
 390 
     | 
    
         
            +
             
     | 
| 
      
 391 
     | 
    
         
            +
              // write postings
         
     | 
| 
      
 392 
     | 
    
         
            +
              dw_write_postings(dw, postings, segment);
         
     | 
| 
      
 393 
     | 
    
         
            +
              free(postings);
         
     | 
| 
      
 394 
     | 
    
         
            +
             
     | 
| 
      
 395 
     | 
    
         
            +
              // write norms of indexed fields
         
     | 
| 
      
 396 
     | 
    
         
            +
              dw_write_norms(dw, segment);
         
     | 
| 
      
 397 
     | 
    
         
            +
             
     | 
| 
      
 398 
     | 
    
         
            +
              free(dw->field_boosts);
         
     | 
| 
      
 399 
     | 
    
         
            +
              free(dw->field_lengths);
         
     | 
| 
      
 400 
     | 
    
         
            +
              free(dw->field_offsets);
         
     | 
| 
      
 401 
     | 
    
         
            +
              free(dw->field_positions);
         
     | 
| 
      
 402 
     | 
    
         
            +
            }
         
     | 
| 
      
 403 
     | 
    
         
            +
             
     | 
| 
      
 404 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 405 
     | 
    
         
            +
             *
         
     | 
| 
      
 406 
     | 
    
         
            +
             * SegmentInfo
         
     | 
| 
      
 407 
     | 
    
         
            +
             *
         
     | 
| 
      
 408 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 409 
     | 
    
         
            +
             
     | 
| 
      
 410 
     | 
    
         
            +
            SegmentInfo *si_create(char *name, int doc_cnt, Store *store)
         
     | 
| 
      
 411 
     | 
    
         
            +
            {
         
     | 
| 
      
 412 
     | 
    
         
            +
              SegmentInfo *si = ALLOC(SegmentInfo);
         
     | 
| 
      
 413 
     | 
    
         
            +
              si->name = name;
         
     | 
| 
      
 414 
     | 
    
         
            +
              si->doc_cnt = doc_cnt;
         
     | 
| 
      
 415 
     | 
    
         
            +
              si->store = store;
         
     | 
| 
      
 416 
     | 
    
         
            +
              return si;
         
     | 
| 
      
 417 
     | 
    
         
            +
            }
         
     | 
| 
      
 418 
     | 
    
         
            +
             
     | 
| 
      
 419 
     | 
    
         
            +
            void si_destroy(void *p)
         
     | 
| 
      
 420 
     | 
    
         
            +
            {
         
     | 
| 
      
 421 
     | 
    
         
            +
              SegmentInfo *si = (SegmentInfo *)p;
         
     | 
| 
      
 422 
     | 
    
         
            +
              free(si->name);
         
     | 
| 
      
 423 
     | 
    
         
            +
              free(si);
         
     | 
| 
      
 424 
     | 
    
         
            +
            }
         
     | 
| 
      
 425 
     | 
    
         
            +
             
     | 
| 
      
 426 
     | 
    
         
            +
            bool si_has_deletions(SegmentInfo *si)
         
     | 
| 
      
 427 
     | 
    
         
            +
            {
         
     | 
| 
      
 428 
     | 
    
         
            +
              char del_file_name[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 429 
     | 
    
         
            +
              sprintf(del_file_name, "%s.del", si->name);
         
     | 
| 
      
 430 
     | 
    
         
            +
              return si->store->exists(si->store, del_file_name);
         
     | 
| 
      
 431 
     | 
    
         
            +
            }
         
     | 
| 
      
 432 
     | 
    
         
            +
             
     | 
| 
      
 433 
     | 
    
         
            +
            bool si_uses_compound_file(SegmentInfo *si)
         
     | 
| 
      
 434 
     | 
    
         
            +
            {
         
     | 
| 
      
 435 
     | 
    
         
            +
              char compound_file_name[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 436 
     | 
    
         
            +
              sprintf(compound_file_name, "%s.cfs", si->name);
         
     | 
| 
      
 437 
     | 
    
         
            +
              return si->store->exists(si->store, compound_file_name);
         
     | 
| 
      
 438 
     | 
    
         
            +
            }
         
     | 
| 
      
 439 
     | 
    
         
            +
             
     | 
| 
      
 440 
     | 
    
         
            +
            struct NormTester {
         
     | 
| 
      
 441 
     | 
    
         
            +
              bool has_norm_file;
         
     | 
| 
      
 442 
     | 
    
         
            +
              char *segment_name;
         
     | 
| 
      
 443 
     | 
    
         
            +
            };
         
     | 
| 
      
 444 
     | 
    
         
            +
            void is_norm_file(char *fname, void *arg)
         
     | 
| 
      
 445 
     | 
    
         
            +
            {
         
     | 
| 
      
 446 
     | 
    
         
            +
              struct NormTester *nt = (struct NormTester *)arg;
         
     | 
| 
      
 447 
     | 
    
         
            +
              char norm_file_pattern[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 448 
     | 
    
         
            +
              sprintf(norm_file_pattern, "%s.s", nt->segment_name);
         
     | 
| 
      
 449 
     | 
    
         
            +
              if (strncmp(fname, norm_file_pattern, strlen(norm_file_pattern)) == 0) {
         
     | 
| 
      
 450 
     | 
    
         
            +
                nt->has_norm_file = true;
         
     | 
| 
      
 451 
     | 
    
         
            +
              }
         
     | 
| 
      
 452 
     | 
    
         
            +
            }
         
     | 
| 
      
 453 
     | 
    
         
            +
             
     | 
| 
      
 454 
     | 
    
         
            +
            bool si_has_separate_norms(SegmentInfo *si)
         
     | 
| 
      
 455 
     | 
    
         
            +
            {
         
     | 
| 
      
 456 
     | 
    
         
            +
              struct NormTester nt;
         
     | 
| 
      
 457 
     | 
    
         
            +
              nt.segment_name = si->name;
         
     | 
| 
      
 458 
     | 
    
         
            +
              nt.has_norm_file = false;
         
     | 
| 
      
 459 
     | 
    
         
            +
              si->store->each(si->store, &is_norm_file, &nt);
         
     | 
| 
      
 460 
     | 
    
         
            +
             
     | 
| 
      
 461 
     | 
    
         
            +
              return nt.has_norm_file;
         
     | 
| 
      
 462 
     | 
    
         
            +
            }
         
     | 
| 
      
 463 
     | 
    
         
            +
             
     | 
| 
      
 464 
     | 
    
         
            +
             
     | 
| 
      
 465 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 466 
     | 
    
         
            +
             *
         
     | 
| 
      
 467 
     | 
    
         
            +
             * SegmentInfos
         
     | 
| 
      
 468 
     | 
    
         
            +
             *
         
     | 
| 
      
 469 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 470 
     | 
    
         
            +
             
     | 
| 
      
 471 
     | 
    
         
            +
            #include <time.h>
         
     | 
| 
      
 472 
     | 
    
         
            +
            #define FORMAT -1
         
     | 
| 
      
 473 
     | 
    
         
            +
            #define SEGMENT_FILENAME "segments"
         
     | 
| 
      
 474 
     | 
    
         
            +
            #define TEMPORARY_SEGMENT_FILENAME "segments.new"
         
     | 
| 
      
 475 
     | 
    
         
            +
             
     | 
| 
      
 476 
     | 
    
         
            +
            SegmentInfos *sis_create()
         
     | 
| 
      
 477 
     | 
    
         
            +
            {
         
     | 
| 
      
 478 
     | 
    
         
            +
              SegmentInfos *sis = ALLOC(SegmentInfos);
         
     | 
| 
      
 479 
     | 
    
         
            +
              sis->format = FORMAT;
         
     | 
| 
      
 480 
     | 
    
         
            +
              sis->version = (unsigned int)time(NULL);
         
     | 
| 
      
 481 
     | 
    
         
            +
              sis->scnt = 0;
         
     | 
| 
      
 482 
     | 
    
         
            +
              sis->counter = 0;
         
     | 
| 
      
 483 
     | 
    
         
            +
              sis->size = 4;
         
     | 
| 
      
 484 
     | 
    
         
            +
              sis->segs = ALLOC_N(SegmentInfo *, sis->size);
         
     | 
| 
      
 485 
     | 
    
         
            +
              return sis;
         
     | 
| 
      
 486 
     | 
    
         
            +
            }
         
     | 
| 
      
 487 
     | 
    
         
            +
             
     | 
| 
      
 488 
     | 
    
         
            +
            void sis_destroy_not_infos(void *p)
         
     | 
| 
      
 489 
     | 
    
         
            +
            {
         
     | 
| 
      
 490 
     | 
    
         
            +
              SegmentInfos *sis = (SegmentInfos *)p;
         
     | 
| 
      
 491 
     | 
    
         
            +
              free(sis->segs);
         
     | 
| 
      
 492 
     | 
    
         
            +
              free(p);
         
     | 
| 
      
 493 
     | 
    
         
            +
            }
         
     | 
| 
      
 494 
     | 
    
         
            +
             
     | 
| 
      
 495 
     | 
    
         
            +
            void sis_destroy(void *p)
         
     | 
| 
      
 496 
     | 
    
         
            +
            {
         
     | 
| 
      
 497 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 498 
     | 
    
         
            +
              SegmentInfos *sis = (SegmentInfos *)p;
         
     | 
| 
      
 499 
     | 
    
         
            +
              for (i = 0; i < sis->scnt; i++)
         
     | 
| 
      
 500 
     | 
    
         
            +
                si_destroy(sis->segs[i]);
         
     | 
| 
      
 501 
     | 
    
         
            +
              free(sis->segs);
         
     | 
| 
      
 502 
     | 
    
         
            +
              free(p);
         
     | 
| 
      
 503 
     | 
    
         
            +
            }
         
     | 
| 
      
 504 
     | 
    
         
            +
             
     | 
| 
      
 505 
     | 
    
         
            +
            void sis_add_si(SegmentInfos *sis, SegmentInfo *si)
         
     | 
| 
      
 506 
     | 
    
         
            +
            {
         
     | 
| 
      
 507 
     | 
    
         
            +
              if (sis->scnt >= sis->size) {
         
     | 
| 
      
 508 
     | 
    
         
            +
                sis->size = sis->scnt * 2;
         
     | 
| 
      
 509 
     | 
    
         
            +
                REALLOC_N(sis->segs, SegmentInfo *, sis->size);
         
     | 
| 
      
 510 
     | 
    
         
            +
              }
         
     | 
| 
      
 511 
     | 
    
         
            +
              sis->segs[sis->scnt] = si;
         
     | 
| 
      
 512 
     | 
    
         
            +
              sis->scnt++;
         
     | 
| 
      
 513 
     | 
    
         
            +
            }
         
     | 
| 
      
 514 
     | 
    
         
            +
             
     | 
| 
      
 515 
     | 
    
         
            +
            void sis_del_at(SegmentInfos *sis, int at)
         
     | 
| 
      
 516 
     | 
    
         
            +
            {
         
     | 
| 
      
 517 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 518 
     | 
    
         
            +
              si_destroy(sis->segs[at]);
         
     | 
| 
      
 519 
     | 
    
         
            +
              sis->scnt--;
         
     | 
| 
      
 520 
     | 
    
         
            +
              for (i = at; i < sis->scnt; i++)
         
     | 
| 
      
 521 
     | 
    
         
            +
                sis->segs[i] = sis->segs[i+1];
         
     | 
| 
      
 522 
     | 
    
         
            +
            }
         
     | 
| 
      
 523 
     | 
    
         
            +
             
     | 
| 
      
 524 
     | 
    
         
            +
            void sis_del_from_to(SegmentInfos *sis, int from, int to)
         
     | 
| 
      
 525 
     | 
    
         
            +
            {
         
     | 
| 
      
 526 
     | 
    
         
            +
              int i, num_to_del = to - from;
         
     | 
| 
      
 527 
     | 
    
         
            +
              sis->scnt -= num_to_del;
         
     | 
| 
      
 528 
     | 
    
         
            +
              for (i = from; i < to; i++) {
         
     | 
| 
      
 529 
     | 
    
         
            +
                si_destroy(sis->segs[i]);
         
     | 
| 
      
 530 
     | 
    
         
            +
              }
         
     | 
| 
      
 531 
     | 
    
         
            +
              for (i = from; i < sis->scnt; i++) {
         
     | 
| 
      
 532 
     | 
    
         
            +
                sis->segs[i] = sis->segs[i+num_to_del];
         
     | 
| 
      
 533 
     | 
    
         
            +
              }
         
     | 
| 
      
 534 
     | 
    
         
            +
            }
         
     | 
| 
      
 535 
     | 
    
         
            +
             
     | 
| 
      
 536 
     | 
    
         
            +
            void sis_clear(SegmentInfos *sis)
         
     | 
| 
      
 537 
     | 
    
         
            +
            {
         
     | 
| 
      
 538 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 539 
     | 
    
         
            +
              for (i = 0; i < sis->scnt; i++) {
         
     | 
| 
      
 540 
     | 
    
         
            +
                si_destroy(sis->segs[i]);
         
     | 
| 
      
 541 
     | 
    
         
            +
              }
         
     | 
| 
      
 542 
     | 
    
         
            +
              sis->scnt = 0;
         
     | 
| 
      
 543 
     | 
    
         
            +
            }
         
     | 
| 
      
 544 
     | 
    
         
            +
             
     | 
| 
      
 545 
     | 
    
         
            +
            void sis_read(SegmentInfos *sis, Store *store)
         
     | 
| 
      
 546 
     | 
    
         
            +
            {
         
     | 
| 
      
 547 
     | 
    
         
            +
              int doc_cnt;
         
     | 
| 
      
 548 
     | 
    
         
            +
              char *name;
         
     | 
| 
      
 549 
     | 
    
         
            +
              InStream *is = store->open_input(store, SEGMENT_FILENAME);
         
     | 
| 
      
 550 
     | 
    
         
            +
              sis->format = is_read_int(is);
         
     | 
| 
      
 551 
     | 
    
         
            +
             
     | 
| 
      
 552 
     | 
    
         
            +
              if (sis->format < 0) { // file contains explicit format info
         
     | 
| 
      
 553 
     | 
    
         
            +
                // check that it is a format we can understand
         
     | 
| 
      
 554 
     | 
    
         
            +
                if (sis->format < FORMAT)
         
     | 
| 
      
 555 
     | 
    
         
            +
                  eprintf(ERROR, "Unknown format version: %ld", sis->format);
         
     | 
| 
      
 556 
     | 
    
         
            +
                sis->version = is_read_long(is);
         
     | 
| 
      
 557 
     | 
    
         
            +
                sis->counter = is_read_int(is);
         
     | 
| 
      
 558 
     | 
    
         
            +
              } else { // file is in old format without explicit format info
         
     | 
| 
      
 559 
     | 
    
         
            +
                sis->counter = sis->format;
         
     | 
| 
      
 560 
     | 
    
         
            +
              }
         
     | 
| 
      
 561 
     | 
    
         
            +
                
         
     | 
| 
      
 562 
     | 
    
         
            +
              int seg_count = is_read_int(is);
         
     | 
| 
      
 563 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 564 
     | 
    
         
            +
              for (i = 0; i < seg_count; i++) {
         
     | 
| 
      
 565 
     | 
    
         
            +
                name = is_read_string(is);
         
     | 
| 
      
 566 
     | 
    
         
            +
                doc_cnt = is_read_int(is);
         
     | 
| 
      
 567 
     | 
    
         
            +
                sis_add_si(sis, si_create(name, doc_cnt, store));
         
     | 
| 
      
 568 
     | 
    
         
            +
              }
         
     | 
| 
      
 569 
     | 
    
         
            +
             
         
     | 
| 
      
 570 
     | 
    
         
            +
              if (sis->format >= 0) {
         
     | 
| 
      
 571 
     | 
    
         
            +
                // in old format the version number may be at the end of the file
         
     | 
| 
      
 572 
     | 
    
         
            +
                if (is_pos(is) >= is_length(is))
         
     | 
| 
      
 573 
     | 
    
         
            +
                  sis->version = 0; // old file format without version number
         
     | 
| 
      
 574 
     | 
    
         
            +
                else
         
     | 
| 
      
 575 
     | 
    
         
            +
                  sis->version = is_read_long(is); // read version
         
     | 
| 
      
 576 
     | 
    
         
            +
              }
         
     | 
| 
      
 577 
     | 
    
         
            +
              is_close(is);
         
     | 
| 
      
 578 
     | 
    
         
            +
            }
         
     | 
| 
      
 579 
     | 
    
         
            +
             
     | 
| 
      
 580 
     | 
    
         
            +
            void sis_write(SegmentInfos *sis, Store *store)
         
     | 
| 
      
 581 
     | 
    
         
            +
            {
         
     | 
| 
      
 582 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 583 
     | 
    
         
            +
              SegmentInfo *si;
         
     | 
| 
      
 584 
     | 
    
         
            +
              OutStream *os = store->create_output(store, TEMPORARY_SEGMENT_FILENAME);
         
     | 
| 
      
 585 
     | 
    
         
            +
              os_write_int(os, FORMAT);
         
     | 
| 
      
 586 
     | 
    
         
            +
              os_write_long(os, ++(sis->version)); // every write changes the index
         
     | 
| 
      
 587 
     | 
    
         
            +
              os_write_int(os, sis->counter);
         
     | 
| 
      
 588 
     | 
    
         
            +
              os_write_int(os, sis->scnt); 
         
     | 
| 
      
 589 
     | 
    
         
            +
              for (i = 0; i < sis->scnt; i++) {
         
     | 
| 
      
 590 
     | 
    
         
            +
                si = sis->segs[i];
         
     | 
| 
      
 591 
     | 
    
         
            +
                os_write_string(os, si->name);
         
     | 
| 
      
 592 
     | 
    
         
            +
                os_write_int(os, si->doc_cnt);
         
     | 
| 
      
 593 
     | 
    
         
            +
              }
         
     | 
| 
      
 594 
     | 
    
         
            +
              
         
     | 
| 
      
 595 
     | 
    
         
            +
              os_close(os);
         
     | 
| 
      
 596 
     | 
    
         
            +
             
     | 
| 
      
 597 
     | 
    
         
            +
              //install new segment info
         
     | 
| 
      
 598 
     | 
    
         
            +
              store->rename(store, TEMPORARY_SEGMENT_FILENAME, SEGMENT_FILENAME);
         
     | 
| 
      
 599 
     | 
    
         
            +
            }
         
     | 
| 
      
 600 
     | 
    
         
            +
             
     | 
| 
      
 601 
     | 
    
         
            +
            int sis_read_current_version(Store *store)
         
     | 
| 
      
 602 
     | 
    
         
            +
            {
         
     | 
| 
      
 603 
     | 
    
         
            +
              if (!store->exists(store, SEGMENT_FILENAME))
         
     | 
| 
      
 604 
     | 
    
         
            +
                return 0;
         
     | 
| 
      
 605 
     | 
    
         
            +
              InStream *is = store->open_input(store, SEGMENT_FILENAME);
         
     | 
| 
      
 606 
     | 
    
         
            +
              int format = 0;
         
     | 
| 
      
 607 
     | 
    
         
            +
              int version = 0;
         
     | 
| 
      
 608 
     | 
    
         
            +
              format = is_read_int(is);
         
     | 
| 
      
 609 
     | 
    
         
            +
              if (format < 0) {
         
     | 
| 
      
 610 
     | 
    
         
            +
                if (format < FORMAT) 
         
     | 
| 
      
 611 
     | 
    
         
            +
                  eprintf(ERROR, "Unknown format version: %ld", format);
         
     | 
| 
      
 612 
     | 
    
         
            +
                version = is_read_long(is);
         
     | 
| 
      
 613 
     | 
    
         
            +
              }
         
     | 
| 
      
 614 
     | 
    
         
            +
              is_close(is);
         
     | 
| 
      
 615 
     | 
    
         
            +
               
         
     | 
| 
      
 616 
     | 
    
         
            +
              if (format < 0)
         
     | 
| 
      
 617 
     | 
    
         
            +
                return version;
         
     | 
| 
      
 618 
     | 
    
         
            +
             
     | 
| 
      
 619 
     | 
    
         
            +
              // We cannot be sure about the format of the file.
         
     | 
| 
      
 620 
     | 
    
         
            +
              // Therefore we have to read the whole file and cannot simply
         
     | 
| 
      
 621 
     | 
    
         
            +
              // seek to the version entry.
         
     | 
| 
      
 622 
     | 
    
         
            +
             
     | 
| 
      
 623 
     | 
    
         
            +
              SegmentInfos *sis = sis_create();
         
     | 
| 
      
 624 
     | 
    
         
            +
              sis_read(sis, store);
         
     | 
| 
      
 625 
     | 
    
         
            +
              version = sis->version;
         
     | 
| 
      
 626 
     | 
    
         
            +
              sis_destroy(sis);
         
     | 
| 
      
 627 
     | 
    
         
            +
              return version;
         
     | 
| 
      
 628 
     | 
    
         
            +
            }
         
     | 
| 
      
 629 
     | 
    
         
            +
             
     | 
| 
      
 630 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 631 
     | 
    
         
            +
             *
         
     | 
| 
      
 632 
     | 
    
         
            +
             * IndexWriter
         
     | 
| 
      
 633 
     | 
    
         
            +
             *
         
     | 
| 
      
 634 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 635 
     | 
    
         
            +
             
     | 
| 
      
 636 
     | 
    
         
            +
            IndexWriter *iw_open(Store *store, Analyzer *analyzer,
         
     | 
| 
      
 637 
     | 
    
         
            +
                bool create, bool close_store, bool close_analyzer)
         
     | 
| 
      
 638 
     | 
    
         
            +
            {
         
     | 
| 
      
 639 
     | 
    
         
            +
              IndexWriter *iw = ALLOC(IndexWriter);
         
     | 
| 
      
 640 
     | 
    
         
            +
              if (create)
         
     | 
| 
      
 641 
     | 
    
         
            +
                store->clear_all(store);
         
     | 
| 
      
 642 
     | 
    
         
            +
              mutex_init(&iw->mutex, NULL);
         
     | 
| 
      
 643 
     | 
    
         
            +
              iw->merge_factor = config.merge_factor;
         
     | 
| 
      
 644 
     | 
    
         
            +
              iw->min_merge_docs = config.min_merge_docs;
         
     | 
| 
      
 645 
     | 
    
         
            +
              iw->max_merge_docs = config.max_merge_docs;
         
     | 
| 
      
 646 
     | 
    
         
            +
              iw->max_field_length = config.max_field_length;
         
     | 
| 
      
 647 
     | 
    
         
            +
              iw->term_index_interval = config.term_index_interval;
         
     | 
| 
      
 648 
     | 
    
         
            +
              iw->use_compound_file = true;
         
     | 
| 
      
 649 
     | 
    
         
            +
              iw->store = store;
         
     | 
| 
      
 650 
     | 
    
         
            +
              iw->close_store = close_store;
         
     | 
| 
      
 651 
     | 
    
         
            +
              iw->close_analyzer = close_analyzer;
         
     | 
| 
      
 652 
     | 
    
         
            +
              iw->analyzer = analyzer;
         
     | 
| 
      
 653 
     | 
    
         
            +
              iw->sis = sis_create();
         
     | 
| 
      
 654 
     | 
    
         
            +
              iw->similarity = sim_create_default();
         
     | 
| 
      
 655 
     | 
    
         
            +
              iw->ram_store = open_ram_store();
         
     | 
| 
      
 656 
     | 
    
         
            +
             
     | 
| 
      
 657 
     | 
    
         
            +
              mutex_lock(&store->mutex);
         
     | 
| 
      
 658 
     | 
    
         
            +
              // keep the write_lock obtained until the IndexWriter is closed.
         
     | 
| 
      
 659 
     | 
    
         
            +
              iw->write_lock = store->open_lock(store, WRITE_LOCK_NAME);
         
     | 
| 
      
 660 
     | 
    
         
            +
              if (!iw->write_lock->obtain(iw->write_lock)) {
         
     | 
| 
      
 661 
     | 
    
         
            +
                eprintf(STATE_ERROR,
         
     | 
| 
      
 662 
     | 
    
         
            +
                    "Could not obtain write lock when trying to write index");
         
     | 
| 
      
 663 
     | 
    
         
            +
              }
         
     | 
| 
      
 664 
     | 
    
         
            +
             
     | 
| 
      
 665 
     | 
    
         
            +
              if (create) {
         
     | 
| 
      
 666 
     | 
    
         
            +
                Lock *commit_lock = store->open_lock(store, COMMIT_LOCK_NAME);
         
     | 
| 
      
 667 
     | 
    
         
            +
                if (!commit_lock->obtain(commit_lock)) {
         
     | 
| 
      
 668 
     | 
    
         
            +
                  eprintf(STATE_ERROR,
         
     | 
| 
      
 669 
     | 
    
         
            +
                      "Could not obtain commit lock when trying to commit index");
         
     | 
| 
      
 670 
     | 
    
         
            +
                }
         
     | 
| 
      
 671 
     | 
    
         
            +
                // commit the index
         
     | 
| 
      
 672 
     | 
    
         
            +
                store->clear(store);
         
     | 
| 
      
 673 
     | 
    
         
            +
                sis_write(iw->sis, store);
         
     | 
| 
      
 674 
     | 
    
         
            +
                //
         
     | 
| 
      
 675 
     | 
    
         
            +
                commit_lock->release(commit_lock);
         
     | 
| 
      
 676 
     | 
    
         
            +
                store->close_lock(commit_lock);
         
     | 
| 
      
 677 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 678 
     | 
    
         
            +
                sis_read(iw->sis, store);
         
     | 
| 
      
 679 
     | 
    
         
            +
              }
         
     | 
| 
      
 680 
     | 
    
         
            +
              mutex_unlock(&store->mutex);
         
     | 
| 
      
 681 
     | 
    
         
            +
              return iw;
         
     | 
| 
      
 682 
     | 
    
         
            +
            }
         
     | 
| 
      
 683 
     | 
    
         
            +
             
     | 
| 
      
 684 
     | 
    
         
            +
            const char base36_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
         
     | 
| 
      
 685 
     | 
    
         
            +
             
     | 
| 
      
 686 
     | 
    
         
            +
            char *new_segment_name(int counter) 
         
     | 
| 
      
 687 
     | 
    
         
            +
            {
         
     | 
| 
      
 688 
     | 
    
         
            +
              char buf[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 689 
     | 
    
         
            +
              buf[SEGMENT_NAME_MAX_LENGTH - 1] = '\0';
         
     | 
| 
      
 690 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 691 
     | 
    
         
            +
              for (i = SEGMENT_NAME_MAX_LENGTH - 2; ; i--) {
         
     | 
| 
      
 692 
     | 
    
         
            +
                buf[i] = base36_digitmap[counter%36];
         
     | 
| 
      
 693 
     | 
    
         
            +
                counter /= 36;
         
     | 
| 
      
 694 
     | 
    
         
            +
                if (counter == 0) break;
         
     | 
| 
      
 695 
     | 
    
         
            +
              }
         
     | 
| 
      
 696 
     | 
    
         
            +
              i--;
         
     | 
| 
      
 697 
     | 
    
         
            +
              buf[i] = '_';
         
     | 
| 
      
 698 
     | 
    
         
            +
              return estrdup(&buf[i]);
         
     | 
| 
      
 699 
     | 
    
         
            +
            }
         
     | 
| 
      
 700 
     | 
    
         
            +
             
     | 
| 
      
 701 
     | 
    
         
            +
            int iw_doc_count(IndexWriter *iw)
         
     | 
| 
      
 702 
     | 
    
         
            +
            {
         
     | 
| 
      
 703 
     | 
    
         
            +
              int i, doc_cnt = 0;
         
     | 
| 
      
 704 
     | 
    
         
            +
              mutex_lock(&iw->mutex);
         
     | 
| 
      
 705 
     | 
    
         
            +
              for (i = 0; i < iw->sis->scnt; i++)
         
     | 
| 
      
 706 
     | 
    
         
            +
                doc_cnt += iw->sis->segs[i]->doc_cnt;
         
     | 
| 
      
 707 
     | 
    
         
            +
              mutex_unlock(&iw->mutex);
         
     | 
| 
      
 708 
     | 
    
         
            +
              return doc_cnt;
         
     | 
| 
      
 709 
     | 
    
         
            +
            }
         
     | 
| 
      
 710 
     | 
    
         
            +
             
     | 
| 
      
 711 
     | 
    
         
            +
            void delete_files(Array *file_names, Store *store)
         
     | 
| 
      
 712 
     | 
    
         
            +
            {
         
     | 
| 
      
 713 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 714 
     | 
    
         
            +
              for (i = 0; i < file_names->size; i++) {
         
     | 
| 
      
 715 
     | 
    
         
            +
                store->remove(store, (char *)file_names->elems[i]);
         
     | 
| 
      
 716 
     | 
    
         
            +
              }
         
     | 
| 
      
 717 
     | 
    
         
            +
              ary_destroy(file_names);
         
     | 
| 
      
 718 
     | 
    
         
            +
            }
         
     | 
| 
      
 719 
     | 
    
         
            +
             
     | 
| 
      
 720 
     | 
    
         
            +
             
     | 
| 
      
 721 
     | 
    
         
            +
            Array *sr_file_names(IndexReader *ir);
         
     | 
| 
      
 722 
     | 
    
         
            +
            void iw_delete_segments(IndexWriter *iw, IndexReader **segment_readers, int del_cnt)
         
     | 
| 
      
 723 
     | 
    
         
            +
            {
         
     | 
| 
      
 724 
     | 
    
         
            +
              // The java version keeps a record of files that it couldn't delete. This
         
     | 
| 
      
 725 
     | 
    
         
            +
              // shouldn't be a problem on linux I hope.
         
     | 
| 
      
 726 
     | 
    
         
            +
              IndexReader *ir;
         
     | 
| 
      
 727 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 728 
     | 
    
         
            +
              for (i = 0; i < del_cnt; i++) {
         
     | 
| 
      
 729 
     | 
    
         
            +
                ir = segment_readers[i];
         
     | 
| 
      
 730 
     | 
    
         
            +
                delete_files(sr_file_names(ir), ir->store);
         
     | 
| 
      
 731 
     | 
    
         
            +
              }
         
     | 
| 
      
 732 
     | 
    
         
            +
            }
         
     | 
| 
      
 733 
     | 
    
         
            +
             
     | 
| 
      
 734 
     | 
    
         
            +
            void make_compound_file(IndexWriter *iw, char *merged_name, SegmentMerger *merger)
         
     | 
| 
      
 735 
     | 
    
         
            +
            {
         
     | 
| 
      
 736 
     | 
    
         
            +
              char merged_tmp[SEGMENT_NAME_MAX_LENGTH], merged_cfs[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 737 
     | 
    
         
            +
             
     | 
| 
      
 738 
     | 
    
         
            +
              mutex_lock(&iw->store->mutex);
         
     | 
| 
      
 739 
     | 
    
         
            +
              sprintf(merged_tmp, "%s.tmp", merged_name);
         
     | 
| 
      
 740 
     | 
    
         
            +
              sprintf(merged_cfs, "%s.cfs", merged_name);
         
     | 
| 
      
 741 
     | 
    
         
            +
             
     | 
| 
      
 742 
     | 
    
         
            +
              Array *files_to_delete = sm_create_compound_file(merger, merged_tmp);
         
     | 
| 
      
 743 
     | 
    
         
            +
              Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
         
     | 
| 
      
 744 
     | 
    
         
            +
             
     | 
| 
      
 745 
     | 
    
         
            +
              if (!commit_lock->obtain(commit_lock)) {
         
     | 
| 
      
 746 
     | 
    
         
            +
                eprintf(STATE_ERROR,
         
     | 
| 
      
 747 
     | 
    
         
            +
                    "Could not obtain commit lock when  trying to commit index");
         
     | 
| 
      
 748 
     | 
    
         
            +
              }
         
     | 
| 
      
 749 
     | 
    
         
            +
             
     | 
| 
      
 750 
     | 
    
         
            +
              // make compound file visible for SegmentReaders
         
     | 
| 
      
 751 
     | 
    
         
            +
              iw->store->rename(iw->store, merged_tmp, merged_cfs);
         
     | 
| 
      
 752 
     | 
    
         
            +
              // delete now unused files of segment
         
     | 
| 
      
 753 
     | 
    
         
            +
              delete_files(files_to_delete, iw->store);
         
     | 
| 
      
 754 
     | 
    
         
            +
             
     | 
| 
      
 755 
     | 
    
         
            +
              commit_lock->release(commit_lock);
         
     | 
| 
      
 756 
     | 
    
         
            +
              iw->store->close_lock(commit_lock);
         
     | 
| 
      
 757 
     | 
    
         
            +
              mutex_unlock(&iw->store->mutex);
         
     | 
| 
      
 758 
     | 
    
         
            +
            }
         
     | 
| 
      
 759 
     | 
    
         
            +
             
     | 
| 
      
 760 
     | 
    
         
            +
            void iw_merge_segments_with_max(IndexWriter *iw, int min_segment, int max_segment)
         
     | 
| 
      
 761 
     | 
    
         
            +
            {
         
     | 
| 
      
 762 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 763 
     | 
    
         
            +
              IndexReader *segments_to_delete[max_segment - min_segment];
         
     | 
| 
      
 764 
     | 
    
         
            +
              int del_cnt = 0;
         
     | 
| 
      
 765 
     | 
    
         
            +
             
     | 
| 
      
 766 
     | 
    
         
            +
              char *merged_name = new_segment_name(iw->sis->counter++);
         
     | 
| 
      
 767 
     | 
    
         
            +
             
     | 
| 
      
 768 
     | 
    
         
            +
              SegmentMerger *merger = sm_create(iw->store, merged_name, iw->term_index_interval);
         
     | 
| 
      
 769 
     | 
    
         
            +
              IndexReader *reader;
         
     | 
| 
      
 770 
     | 
    
         
            +
             
     | 
| 
      
 771 
     | 
    
         
            +
             
     | 
| 
      
 772 
     | 
    
         
            +
              for (i = min_segment; i < max_segment; i++) {
         
     | 
| 
      
 773 
     | 
    
         
            +
                reader = sr_open(iw->sis, i, false, false);
         
     | 
| 
      
 774 
     | 
    
         
            +
                sm_add(merger, reader);
         
     | 
| 
      
 775 
     | 
    
         
            +
                if ((reader->store == iw->store) || // if we own the directory
         
     | 
| 
      
 776 
     | 
    
         
            +
                    (reader->store == iw->ram_store)) {
         
     | 
| 
      
 777 
     | 
    
         
            +
                  segments_to_delete[del_cnt++] = reader;   // queue segment for deletion
         
     | 
| 
      
 778 
     | 
    
         
            +
                }
         
     | 
| 
      
 779 
     | 
    
         
            +
              }
         
     | 
| 
      
 780 
     | 
    
         
            +
              
         
     | 
| 
      
 781 
     | 
    
         
            +
              int merged_doc_count = sm_merge(merger);
         
     | 
| 
      
 782 
     | 
    
         
            +
             
     | 
| 
      
 783 
     | 
    
         
            +
              sis_del_from_to(iw->sis, min_segment, max_segment);
         
     | 
| 
      
 784 
     | 
    
         
            +
             
     | 
| 
      
 785 
     | 
    
         
            +
              sis_add_si(iw->sis, si_create(merged_name, merged_doc_count, iw->store));
         
     | 
| 
      
 786 
     | 
    
         
            +
             
     | 
| 
      
 787 
     | 
    
         
            +
              // close readers before we attempt to delete now-obsolete segments
         
     | 
| 
      
 788 
     | 
    
         
            +
             
     | 
| 
      
 789 
     | 
    
         
            +
              mutex_lock(&iw->store->mutex);
         
     | 
| 
      
 790 
     | 
    
         
            +
              Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
         
     | 
| 
      
 791 
     | 
    
         
            +
              if (!commit_lock->obtain(commit_lock)) {
         
     | 
| 
      
 792 
     | 
    
         
            +
                eprintf(STATE_ERROR,
         
     | 
| 
      
 793 
     | 
    
         
            +
                    "Could not obtain commit lock when trying to commit index");
         
     | 
| 
      
 794 
     | 
    
         
            +
              }
         
     | 
| 
      
 795 
     | 
    
         
            +
              // commit the index
         
     | 
| 
      
 796 
     | 
    
         
            +
              sis_write(iw->sis, iw->store);
         
     | 
| 
      
 797 
     | 
    
         
            +
              iw_delete_segments(iw, segments_to_delete, del_cnt);
         
     | 
| 
      
 798 
     | 
    
         
            +
              //
         
     | 
| 
      
 799 
     | 
    
         
            +
              commit_lock->release(commit_lock);
         
     | 
| 
      
 800 
     | 
    
         
            +
              iw->store->close_lock(commit_lock);
         
     | 
| 
      
 801 
     | 
    
         
            +
              mutex_unlock(&iw->store->mutex);
         
     | 
| 
      
 802 
     | 
    
         
            +
             
     | 
| 
      
 803 
     | 
    
         
            +
              if (iw->use_compound_file) {
         
     | 
| 
      
 804 
     | 
    
         
            +
                make_compound_file(iw, merged_name, merger);
         
     | 
| 
      
 805 
     | 
    
         
            +
              }
         
     | 
| 
      
 806 
     | 
    
         
            +
             
     | 
| 
      
 807 
     | 
    
         
            +
              sm_destroy(merger);
         
     | 
| 
      
 808 
     | 
    
         
            +
            }
         
     | 
| 
      
 809 
     | 
    
         
            +
             
     | 
| 
      
 810 
     | 
    
         
            +
            void iw_merge_segments(IndexWriter *iw, int min_segment)
         
     | 
| 
      
 811 
     | 
    
         
            +
            {
         
     | 
| 
      
 812 
     | 
    
         
            +
              iw_merge_segments_with_max(iw, min_segment, iw->sis->scnt);
         
     | 
| 
      
 813 
     | 
    
         
            +
            }
         
     | 
| 
      
 814 
     | 
    
         
            +
             
     | 
| 
      
 815 
     | 
    
         
            +
            void iw_maybe_merge_segments(IndexWriter *iw)
         
     | 
| 
      
 816 
     | 
    
         
            +
            {
         
     | 
| 
      
 817 
     | 
    
         
            +
              int target_merge_docs = iw->min_merge_docs;
         
     | 
| 
      
 818 
     | 
    
         
            +
              int min_segment, merge_docs;
         
     | 
| 
      
 819 
     | 
    
         
            +
              SegmentInfo *si;
         
     | 
| 
      
 820 
     | 
    
         
            +
             
     | 
| 
      
 821 
     | 
    
         
            +
              while (target_merge_docs <= iw->max_merge_docs) {
         
     | 
| 
      
 822 
     | 
    
         
            +
                // find segments smaller than current target size
         
     | 
| 
      
 823 
     | 
    
         
            +
                min_segment = iw->sis->scnt - 1;
         
     | 
| 
      
 824 
     | 
    
         
            +
                merge_docs = 0;
         
     | 
| 
      
 825 
     | 
    
         
            +
                while (min_segment >= 0) {
         
     | 
| 
      
 826 
     | 
    
         
            +
                  si = iw->sis->segs[min_segment];
         
     | 
| 
      
 827 
     | 
    
         
            +
                  if (si->doc_cnt >= target_merge_docs)
         
     | 
| 
      
 828 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 829 
     | 
    
         
            +
                  merge_docs += si->doc_cnt;
         
     | 
| 
      
 830 
     | 
    
         
            +
                  min_segment -= 1;
         
     | 
| 
      
 831 
     | 
    
         
            +
                }
         
     | 
| 
      
 832 
     | 
    
         
            +
             
     | 
| 
      
 833 
     | 
    
         
            +
                if (merge_docs >= target_merge_docs)      // found a merge to do
         
     | 
| 
      
 834 
     | 
    
         
            +
                  iw_merge_segments(iw, min_segment + 1);
         
     | 
| 
      
 835 
     | 
    
         
            +
                else
         
     | 
| 
      
 836 
     | 
    
         
            +
                  break;
         
     | 
| 
      
 837 
     | 
    
         
            +
             
     | 
| 
      
 838 
     | 
    
         
            +
                target_merge_docs *= iw->merge_factor; // increase target size
         
     | 
| 
      
 839 
     | 
    
         
            +
              }
         
     | 
| 
      
 840 
     | 
    
         
            +
            }
         
     | 
| 
      
 841 
     | 
    
         
            +
             
     | 
| 
      
 842 
     | 
    
         
            +
            void iw_flush_ram_segments(IndexWriter *iw)
         
     | 
| 
      
 843 
     | 
    
         
            +
            {
         
     | 
| 
      
 844 
     | 
    
         
            +
              int min_segment = iw->sis->scnt-1;
         
     | 
| 
      
 845 
     | 
    
         
            +
              int doc_count = 0;
         
     | 
| 
      
 846 
     | 
    
         
            +
              SegmentInfo **segs = iw->sis->segs;
         
     | 
| 
      
 847 
     | 
    
         
            +
              while ((min_segment >= 0) &&
         
     | 
| 
      
 848 
     | 
    
         
            +
                      (segs[min_segment]->store == iw->ram_store)) {
         
     | 
| 
      
 849 
     | 
    
         
            +
                doc_count += segs[min_segment]->doc_cnt;
         
     | 
| 
      
 850 
     | 
    
         
            +
                min_segment--;
         
     | 
| 
      
 851 
     | 
    
         
            +
              }
         
     | 
| 
      
 852 
     | 
    
         
            +
              /* the following if statement is actually incrementing for different
         
     | 
| 
      
 853 
     | 
    
         
            +
               * reasons. If min_segment < 0 then we must increment as we searched
         
     | 
| 
      
 854 
     | 
    
         
            +
               * off the end. If the top segment is not ram_store there are no 
         
     | 
| 
      
 855 
     | 
    
         
            +
               * ram segments to flush so we increment so the next check will return
         
     | 
| 
      
 856 
     | 
    
         
            +
               * us from this function. Lastly, the min_segment stopped at a segment
         
     | 
| 
      
 857 
     | 
    
         
            +
               * that wasn't the ram segment. But if it fit's in with the merge
         
     | 
| 
      
 858 
     | 
    
         
            +
               * factor, why not merge it. Otherwise we leave it and increment min_seg
         
     | 
| 
      
 859 
     | 
    
         
            +
               */
         
     | 
| 
      
 860 
     | 
    
         
            +
              if (min_segment < 0 ||                          // add one FS segment?
         
     | 
| 
      
 861 
     | 
    
         
            +
                  (doc_count + segs[min_segment]->doc_cnt) > iw->merge_factor ||
         
     | 
| 
      
 862 
     | 
    
         
            +
                  (segs[iw->sis->scnt-1]->store != iw->ram_store))
         
     | 
| 
      
 863 
     | 
    
         
            +
                min_segment++;
         
     | 
| 
      
 864 
     | 
    
         
            +
              if (min_segment >= iw->sis->scnt)
         
     | 
| 
      
 865 
     | 
    
         
            +
                return;
         
     | 
| 
      
 866 
     | 
    
         
            +
              iw_merge_segments(iw, min_segment);
         
     | 
| 
      
 867 
     | 
    
         
            +
            }
         
     | 
| 
      
 868 
     | 
    
         
            +
             
     | 
| 
      
 869 
     | 
    
         
            +
            void iw_add_doc(IndexWriter *iw, Document *doc)
         
     | 
| 
      
 870 
     | 
    
         
            +
            {
         
     | 
| 
      
 871 
     | 
    
         
            +
              DocumentWriter *dw;
         
     | 
| 
      
 872 
     | 
    
         
            +
              char *segment_name;
         
     | 
| 
      
 873 
     | 
    
         
            +
             
     | 
| 
      
 874 
     | 
    
         
            +
              mutex_lock(&iw->mutex);
         
     | 
| 
      
 875 
     | 
    
         
            +
              dw = dw_open(iw->ram_store,
         
     | 
| 
      
 876 
     | 
    
         
            +
                           iw->analyzer,
         
     | 
| 
      
 877 
     | 
    
         
            +
                           iw->similarity,
         
     | 
| 
      
 878 
     | 
    
         
            +
                           iw->max_field_length,
         
     | 
| 
      
 879 
     | 
    
         
            +
                           iw->term_index_interval);
         
     | 
| 
      
 880 
     | 
    
         
            +
              segment_name = new_segment_name(iw->sis->counter++);
         
     | 
| 
      
 881 
     | 
    
         
            +
              dw_add_doc(dw, segment_name, doc);
         
     | 
| 
      
 882 
     | 
    
         
            +
              dw_close(dw);
         
     | 
| 
      
 883 
     | 
    
         
            +
              sis_add_si(iw->sis, si_create(segment_name, 1, iw->ram_store));
         
     | 
| 
      
 884 
     | 
    
         
            +
              iw_maybe_merge_segments(iw);
         
     | 
| 
      
 885 
     | 
    
         
            +
              mutex_unlock(&iw->mutex);
         
     | 
| 
      
 886 
     | 
    
         
            +
            }
         
     | 
| 
      
 887 
     | 
    
         
            +
             
     | 
| 
      
 888 
     | 
    
         
            +
            static inline void iw_optimize_internal(IndexWriter *iw)
         
     | 
| 
      
 889 
     | 
    
         
            +
            {
         
     | 
| 
      
 890 
     | 
    
         
            +
              int min_segment;
         
     | 
| 
      
 891 
     | 
    
         
            +
              iw_flush_ram_segments(iw);
         
     | 
| 
      
 892 
     | 
    
         
            +
              while (iw->sis->scnt > 1 ||
         
     | 
| 
      
 893 
     | 
    
         
            +
                      (iw->sis->scnt == 1 &&
         
     | 
| 
      
 894 
     | 
    
         
            +
                        (  si_has_deletions(iw->sis->segs[0]) ||
         
     | 
| 
      
 895 
     | 
    
         
            +
                          (iw->sis->segs[0]->store != iw->store) ||
         
     | 
| 
      
 896 
     | 
    
         
            +
                          (iw->use_compound_file &&
         
     | 
| 
      
 897 
     | 
    
         
            +
                            (!si_uses_compound_file(iw->sis->segs[0]) ||
         
     | 
| 
      
 898 
     | 
    
         
            +
                              si_has_separate_norms(iw->sis->segs[0])))))) {
         
     | 
| 
      
 899 
     | 
    
         
            +
                min_segment = iw->sis->scnt - iw->merge_factor;
         
     | 
| 
      
 900 
     | 
    
         
            +
                iw_merge_segments(iw, min_segment < 0 ? 0 : min_segment);
         
     | 
| 
      
 901 
     | 
    
         
            +
              }
         
     | 
| 
      
 902 
     | 
    
         
            +
            }
         
     | 
| 
      
 903 
     | 
    
         
            +
            void iw_optimize(IndexWriter *iw)
         
     | 
| 
      
 904 
     | 
    
         
            +
            {
         
     | 
| 
      
 905 
     | 
    
         
            +
              mutex_lock(&iw->mutex);
         
     | 
| 
      
 906 
     | 
    
         
            +
              iw_optimize_internal(iw);
         
     | 
| 
      
 907 
     | 
    
         
            +
              mutex_unlock(&iw->mutex);
         
     | 
| 
      
 908 
     | 
    
         
            +
            }
         
     | 
| 
      
 909 
     | 
    
         
            +
             
     | 
| 
      
 910 
     | 
    
         
            +
            void iw_close(IndexWriter *iw)
         
     | 
| 
      
 911 
     | 
    
         
            +
            {
         
     | 
| 
      
 912 
     | 
    
         
            +
              mutex_lock(&iw->mutex);
         
     | 
| 
      
 913 
     | 
    
         
            +
              iw_flush_ram_segments(iw);
         
     | 
| 
      
 914 
     | 
    
         
            +
              ram_close(iw->ram_store);
         
     | 
| 
      
 915 
     | 
    
         
            +
              sis_destroy(iw->sis);
         
     | 
| 
      
 916 
     | 
    
         
            +
             
     | 
| 
      
 917 
     | 
    
         
            +
              sim_destroy(iw->similarity);
         
     | 
| 
      
 918 
     | 
    
         
            +
              if (iw->close_analyzer) a_destroy(iw->analyzer);
         
     | 
| 
      
 919 
     | 
    
         
            +
             
     | 
| 
      
 920 
     | 
    
         
            +
              iw->write_lock->release(iw->write_lock);
         
     | 
| 
      
 921 
     | 
    
         
            +
              iw->store->close_lock(iw->write_lock);
         
     | 
| 
      
 922 
     | 
    
         
            +
             
     | 
| 
      
 923 
     | 
    
         
            +
              if (iw->close_store)
         
     | 
| 
      
 924 
     | 
    
         
            +
                store_close(iw->store);
         
     | 
| 
      
 925 
     | 
    
         
            +
              mutex_destroy(&iw->mutex);
         
     | 
| 
      
 926 
     | 
    
         
            +
              free(iw);
         
     | 
| 
      
 927 
     | 
    
         
            +
            }
         
     | 
| 
      
 928 
     | 
    
         
            +
             
     | 
| 
      
 929 
     | 
    
         
            +
            void iw_add_indexes(IndexWriter *iw, Store **stores, int cnt)
         
     | 
| 
      
 930 
     | 
    
         
            +
            {
         
     | 
| 
      
 931 
     | 
    
         
            +
              int i, j, end, start;
         
     | 
| 
      
 932 
     | 
    
         
            +
             
     | 
| 
      
 933 
     | 
    
         
            +
              mutex_lock(&iw->mutex);
         
     | 
| 
      
 934 
     | 
    
         
            +
              iw_optimize_internal(iw); // start with zero or 1 seg
         
     | 
| 
      
 935 
     | 
    
         
            +
             
     | 
| 
      
 936 
     | 
    
         
            +
              start = iw->sis->scnt;
         
     | 
| 
      
 937 
     | 
    
         
            +
             
     | 
| 
      
 938 
     | 
    
         
            +
              for (i = 0; i < cnt; i++) {
         
     | 
| 
      
 939 
     | 
    
         
            +
                Store *store = stores[i];
         
     | 
| 
      
 940 
     | 
    
         
            +
                SegmentInfos *sis = sis_create(); // read infos from dir
         
     | 
| 
      
 941 
     | 
    
         
            +
                sis_read(sis, store);
         
     | 
| 
      
 942 
     | 
    
         
            +
             
     | 
| 
      
 943 
     | 
    
         
            +
                for (j = 0; j < sis->scnt; j++) {
         
     | 
| 
      
 944 
     | 
    
         
            +
                  SegmentInfo *si = sis->segs[j];
         
     | 
| 
      
 945 
     | 
    
         
            +
                  sis_add_si(iw->sis, si);
         
     | 
| 
      
 946 
     | 
    
         
            +
                }
         
     | 
| 
      
 947 
     | 
    
         
            +
                sis_destroy_not_infos(sis);
         
     | 
| 
      
 948 
     | 
    
         
            +
              }
         
     | 
| 
      
 949 
     | 
    
         
            +
             
     | 
| 
      
 950 
     | 
    
         
            +
              // merge newly added segments in log(n) passes
         
     | 
| 
      
 951 
     | 
    
         
            +
              while (iw->sis->scnt > start + iw->merge_factor) {
         
     | 
| 
      
 952 
     | 
    
         
            +
                for (i = start + 1; i < iw->sis->scnt; i++) {
         
     | 
| 
      
 953 
     | 
    
         
            +
                  end = MIN(iw->sis->scnt, i + iw->merge_factor);
         
     | 
| 
      
 954 
     | 
    
         
            +
                  if (end - i > 1) {
         
     | 
| 
      
 955 
     | 
    
         
            +
                    iw_merge_segments_with_max(iw, i, end);
         
     | 
| 
      
 956 
     | 
    
         
            +
                  }
         
     | 
| 
      
 957 
     | 
    
         
            +
                }
         
     | 
| 
      
 958 
     | 
    
         
            +
              }
         
     | 
| 
      
 959 
     | 
    
         
            +
             
     | 
| 
      
 960 
     | 
    
         
            +
              // final cleanup
         
     | 
| 
      
 961 
     | 
    
         
            +
              iw_optimize_internal(iw);
         
     | 
| 
      
 962 
     | 
    
         
            +
              mutex_unlock(&iw->mutex);
         
     | 
| 
      
 963 
     | 
    
         
            +
            }
         
     | 
| 
      
 964 
     | 
    
         
            +
             
     | 
| 
      
 965 
     | 
    
         
            +
             
     | 
| 
      
 966 
     | 
    
         
            +
            /**
         
     | 
| 
      
 967 
     | 
    
         
            +
             * This adds an array of readers to the index leaving the added readers open.
         
     | 
| 
      
 968 
     | 
    
         
            +
             */
         
     | 
| 
      
 969 
     | 
    
         
            +
            void iw_add_readers(IndexWriter *iw, IndexReader **irs, int cnt)
         
     | 
| 
      
 970 
     | 
    
         
            +
            {
         
     | 
| 
      
 971 
     | 
    
         
            +
              IndexReader *ir = NULL;
         
     | 
| 
      
 972 
     | 
    
         
            +
              int i, del_cnt = 0;
         
     | 
| 
      
 973 
     | 
    
         
            +
             
     | 
| 
      
 974 
     | 
    
         
            +
              mutex_lock(&iw->mutex);
         
     | 
| 
      
 975 
     | 
    
         
            +
              iw_optimize_internal(iw); // start with zero or 1 seg
         
     | 
| 
      
 976 
     | 
    
         
            +
             
     | 
| 
      
 977 
     | 
    
         
            +
              char *merged_name = new_segment_name(iw->sis->counter++);
         
     | 
| 
      
 978 
     | 
    
         
            +
             
     | 
| 
      
 979 
     | 
    
         
            +
              SegmentMerger *merger = sm_create(iw->store, merged_name, iw->term_index_interval);
         
     | 
| 
      
 980 
     | 
    
         
            +
              merger->readers->free_elem = NULL; // don't close readers
         
     | 
| 
      
 981 
     | 
    
         
            +
             
     | 
| 
      
 982 
     | 
    
         
            +
              if (iw->sis->scnt == 1) {// add existing index, if any
         
     | 
| 
      
 983 
     | 
    
         
            +
                ir = sr_open_si(iw->sis->segs[0]);
         
     | 
| 
      
 984 
     | 
    
         
            +
                sm_add(merger, ir);
         
     | 
| 
      
 985 
     | 
    
         
            +
                del_cnt = 1;
         
     | 
| 
      
 986 
     | 
    
         
            +
              }
         
     | 
| 
      
 987 
     | 
    
         
            +
             
     | 
| 
      
 988 
     | 
    
         
            +
              for (i = 0; i < cnt; i++) {
         
     | 
| 
      
 989 
     | 
    
         
            +
                sm_add(merger, irs[i]);
         
     | 
| 
      
 990 
     | 
    
         
            +
              }
         
     | 
| 
      
 991 
     | 
    
         
            +
             
     | 
| 
      
 992 
     | 
    
         
            +
              int doc_count = sm_merge(merger); // merge 'em
         
     | 
| 
      
 993 
     | 
    
         
            +
             
     | 
| 
      
 994 
     | 
    
         
            +
              // pop old infos and add new ones.
         
     | 
| 
      
 995 
     | 
    
         
            +
              sis_clear(iw->sis);
         
     | 
| 
      
 996 
     | 
    
         
            +
              sis_add_si(iw->sis, si_create(merged_name, doc_count, iw->store));
         
     | 
| 
      
 997 
     | 
    
         
            +
             
     | 
| 
      
 998 
     | 
    
         
            +
             
     | 
| 
      
 999 
     | 
    
         
            +
              Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
         
     | 
| 
      
 1000 
     | 
    
         
            +
              if (!commit_lock->obtain(commit_lock)) // obtain write lock
         
     | 
| 
      
 1001 
     | 
    
         
            +
                eprintf(STATE_ERROR, "Index locked for commit: %s", COMMIT_LOCK_NAME);
         
     | 
| 
      
 1002 
     | 
    
         
            +
             
     | 
| 
      
 1003 
     | 
    
         
            +
              sis_write(iw->sis, iw->store); // commit changes
         
     | 
| 
      
 1004 
     | 
    
         
            +
              iw_delete_segments(iw, &ir, del_cnt);
         
     | 
| 
      
 1005 
     | 
    
         
            +
              if (ir) ir_close(ir);
         
     | 
| 
      
 1006 
     | 
    
         
            +
             
     | 
| 
      
 1007 
     | 
    
         
            +
              commit_lock->release(commit_lock);
         
     | 
| 
      
 1008 
     | 
    
         
            +
              iw->store->close_lock(commit_lock);
         
     | 
| 
      
 1009 
     | 
    
         
            +
             
     | 
| 
      
 1010 
     | 
    
         
            +
              if (iw->use_compound_file) {
         
     | 
| 
      
 1011 
     | 
    
         
            +
                make_compound_file(iw, merged_name, merger);
         
     | 
| 
      
 1012 
     | 
    
         
            +
              }
         
     | 
| 
      
 1013 
     | 
    
         
            +
             
     | 
| 
      
 1014 
     | 
    
         
            +
              iw_optimize_internal(iw);
         
     | 
| 
      
 1015 
     | 
    
         
            +
              sm_destroy(merger);
         
     | 
| 
      
 1016 
     | 
    
         
            +
             
     | 
| 
      
 1017 
     | 
    
         
            +
              mutex_unlock(&iw->mutex);
         
     | 
| 
      
 1018 
     | 
    
         
            +
            }
         
     | 
| 
      
 1019 
     | 
    
         
            +
             
     | 
| 
      
 1020 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 1021 
     | 
    
         
            +
             *
         
     | 
| 
      
 1022 
     | 
    
         
            +
             * Norm
         
     | 
| 
      
 1023 
     | 
    
         
            +
             *
         
     | 
| 
      
 1024 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 1025 
     | 
    
         
            +
             
     | 
| 
      
 1026 
     | 
    
         
            +
            Norm *norm_create(InStream *is, int field_num)
         
     | 
| 
      
 1027 
     | 
    
         
            +
            {
         
     | 
| 
      
 1028 
     | 
    
         
            +
              Norm *norm = ALLOC(Norm);
         
     | 
| 
      
 1029 
     | 
    
         
            +
              norm->is = is;
         
     | 
| 
      
 1030 
     | 
    
         
            +
              norm->field_num = field_num;
         
     | 
| 
      
 1031 
     | 
    
         
            +
              norm->bytes = NULL;
         
     | 
| 
      
 1032 
     | 
    
         
            +
              norm->is_dirty = false;
         
     | 
| 
      
 1033 
     | 
    
         
            +
              return norm;
         
     | 
| 
      
 1034 
     | 
    
         
            +
            }
         
     | 
| 
      
 1035 
     | 
    
         
            +
             
     | 
| 
      
 1036 
     | 
    
         
            +
            void norm_destroy(void *p)
         
     | 
| 
      
 1037 
     | 
    
         
            +
            {
         
     | 
| 
      
 1038 
     | 
    
         
            +
              Norm *norm = (Norm *)p;
         
     | 
| 
      
 1039 
     | 
    
         
            +
              is_close(norm->is);
         
     | 
| 
      
 1040 
     | 
    
         
            +
              if (norm->bytes != NULL) free(norm->bytes);
         
     | 
| 
      
 1041 
     | 
    
         
            +
              free(norm);
         
     | 
| 
      
 1042 
     | 
    
         
            +
            }
         
     | 
| 
      
 1043 
     | 
    
         
            +
             
     | 
| 
      
 1044 
     | 
    
         
            +
            void norm_rewrite(Norm *norm, Store *store, char *segment,
         
     | 
| 
      
 1045 
     | 
    
         
            +
                int doc_count, Store *cfs_store)
         
     | 
| 
      
 1046 
     | 
    
         
            +
            {
         
     | 
| 
      
 1047 
     | 
    
         
            +
              if (norm->bytes == NULL)
         
     | 
| 
      
 1048 
     | 
    
         
            +
                return; // These norms do not need to be rewritten
         
     | 
| 
      
 1049 
     | 
    
         
            +
             
     | 
| 
      
 1050 
     | 
    
         
            +
              char tmp_fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 1051 
     | 
    
         
            +
              char norm_fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 1052 
     | 
    
         
            +
              sprintf(tmp_fname, "%s.tmp", segment);
         
     | 
| 
      
 1053 
     | 
    
         
            +
              OutStream *os = store->create_output(store, tmp_fname);
         
     | 
| 
      
 1054 
     | 
    
         
            +
              os_write_bytes(os, norm->bytes, doc_count);
         
     | 
| 
      
 1055 
     | 
    
         
            +
              os_close(os);
         
     | 
| 
      
 1056 
     | 
    
         
            +
              if (cfs_store) {
         
     | 
| 
      
 1057 
     | 
    
         
            +
                sprintf(norm_fname, "%s.s%d", segment, norm->field_num);
         
     | 
| 
      
 1058 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 1059 
     | 
    
         
            +
                sprintf(norm_fname, "%s.f%d", segment, norm->field_num);
         
     | 
| 
      
 1060 
     | 
    
         
            +
              }
         
     | 
| 
      
 1061 
     | 
    
         
            +
              store->rename(store, tmp_fname, norm_fname);
         
     | 
| 
      
 1062 
     | 
    
         
            +
              norm->is_dirty = false;
         
     | 
| 
      
 1063 
     | 
    
         
            +
            }
         
     | 
| 
      
 1064 
     | 
    
         
            +
             
     | 
| 
      
 1065 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 1066 
     | 
    
         
            +
             *
         
     | 
| 
      
 1067 
     | 
    
         
            +
             * SegmentReader
         
     | 
| 
      
 1068 
     | 
    
         
            +
             *
         
     | 
| 
      
 1069 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 1070 
     | 
    
         
            +
             
     | 
| 
      
 1071 
     | 
    
         
            +
            #define GET_SR SegmentReader *sr = (SegmentReader *)ir->data;
         
     | 
| 
      
 1072 
     | 
    
         
            +
             
     | 
| 
      
 1073 
     | 
    
         
            +
            int sr_max_doc(IndexReader *ir)
         
     | 
| 
      
 1074 
     | 
    
         
            +
            {
         
     | 
| 
      
 1075 
     | 
    
         
            +
              return ((SegmentReader *)ir->data)->fr->len;
         
     | 
| 
      
 1076 
     | 
    
         
            +
            }
         
     | 
| 
      
 1077 
     | 
    
         
            +
             
     | 
| 
      
 1078 
     | 
    
         
            +
            static inline void sr_close_norms(SegmentReader *sr)
         
     | 
| 
      
 1079 
     | 
    
         
            +
            {
         
     | 
| 
      
 1080 
     | 
    
         
            +
              h_destroy(sr->norms);
         
     | 
| 
      
 1081 
     | 
    
         
            +
            }
         
     | 
| 
      
 1082 
     | 
    
         
            +
             
     | 
| 
      
 1083 
     | 
    
         
            +
            static inline TermVectorsReader *sr_tvr(SegmentReader *sr)
         
     | 
| 
      
 1084 
     | 
    
         
            +
            {
         
     | 
| 
      
 1085 
     | 
    
         
            +
              TermVectorsReader *tvr;
         
     | 
| 
      
 1086 
     | 
    
         
            +
              if ((tvr = thread_getspecific(sr->thread_tvr)) == NULL) {
         
     | 
| 
      
 1087 
     | 
    
         
            +
                tvr = tvr_clone(sr->orig_tvr);
         
     | 
| 
      
 1088 
     | 
    
         
            +
                if (tvr == NULL) printf("scuk\n");
         
     | 
| 
      
 1089 
     | 
    
         
            +
                ary_append(sr->tvr_bucket, tvr);
         
     | 
| 
      
 1090 
     | 
    
         
            +
                thread_setspecific(sr->thread_tvr, tvr);
         
     | 
| 
      
 1091 
     | 
    
         
            +
              }
         
     | 
| 
      
 1092 
     | 
    
         
            +
              return tvr;
         
     | 
| 
      
 1093 
     | 
    
         
            +
            }
         
     | 
| 
      
 1094 
     | 
    
         
            +
             
     | 
| 
      
 1095 
     | 
    
         
            +
            void sr_close(IndexReader *ir)
         
     | 
| 
      
 1096 
     | 
    
         
            +
            {
         
     | 
| 
      
 1097 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1098 
     | 
    
         
            +
              fr_close(sr->fr);
         
     | 
| 
      
 1099 
     | 
    
         
            +
              tir_close(sr->tir);
         
     | 
| 
      
 1100 
     | 
    
         
            +
             
     | 
| 
      
 1101 
     | 
    
         
            +
              if (sr->freq_in) is_close(sr->freq_in);
         
     | 
| 
      
 1102 
     | 
    
         
            +
              if (sr->prox_in) is_close(sr->prox_in);
         
     | 
| 
      
 1103 
     | 
    
         
            +
              fis_destroy(sr->fis);
         
     | 
| 
      
 1104 
     | 
    
         
            +
             
     | 
| 
      
 1105 
     | 
    
         
            +
              sr_close_norms(sr);
         
     | 
| 
      
 1106 
     | 
    
         
            +
              
         
     | 
| 
      
 1107 
     | 
    
         
            +
              if (sr->orig_tvr) {
         
     | 
| 
      
 1108 
     | 
    
         
            +
                tvr_close(sr->orig_tvr);
         
     | 
| 
      
 1109 
     | 
    
         
            +
                thread_key_delete(sr->thread_tvr);
         
     | 
| 
      
 1110 
     | 
    
         
            +
                ary_destroy(sr->tvr_bucket);
         
     | 
| 
      
 1111 
     | 
    
         
            +
              }
         
     | 
| 
      
 1112 
     | 
    
         
            +
              if (sr->deleted_docs) bv_destroy(sr->deleted_docs);
         
     | 
| 
      
 1113 
     | 
    
         
            +
              if (sr->cfs_store) sr->cfs_store->close(sr->cfs_store);
         
     | 
| 
      
 1114 
     | 
    
         
            +
              if (sr->fake_norms) free(sr->fake_norms);
         
     | 
| 
      
 1115 
     | 
    
         
            +
              free(sr->segment);
         
     | 
| 
      
 1116 
     | 
    
         
            +
              free(sr);
         
     | 
| 
      
 1117 
     | 
    
         
            +
            }
         
     | 
| 
      
 1118 
     | 
    
         
            +
             
     | 
| 
      
 1119 
     | 
    
         
            +
            void sr_delete_doc(IndexReader *ir, int doc_num) 
         
     | 
| 
      
 1120 
     | 
    
         
            +
            {
         
     | 
| 
      
 1121 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1122 
     | 
    
         
            +
              if (sr->deleted_docs == NULL)
         
     | 
| 
      
 1123 
     | 
    
         
            +
                sr->deleted_docs = bv_create();
         
     | 
| 
      
 1124 
     | 
    
         
            +
             
     | 
| 
      
 1125 
     | 
    
         
            +
              sr->deleted_docs_dirty = true;
         
     | 
| 
      
 1126 
     | 
    
         
            +
              sr->undelete_all = false;
         
     | 
| 
      
 1127 
     | 
    
         
            +
              bv_set(sr->deleted_docs, doc_num);
         
     | 
| 
      
 1128 
     | 
    
         
            +
            }
         
     | 
| 
      
 1129 
     | 
    
         
            +
             
     | 
| 
      
 1130 
     | 
    
         
            +
            static inline bool sr_is_deleted_internal(IndexReader *ir, int doc_num)
         
     | 
| 
      
 1131 
     | 
    
         
            +
            {
         
     | 
| 
      
 1132 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1133 
     | 
    
         
            +
              return (sr->deleted_docs != NULL && bv_get(sr->deleted_docs, doc_num));
         
     | 
| 
      
 1134 
     | 
    
         
            +
            }
         
     | 
| 
      
 1135 
     | 
    
         
            +
             
     | 
| 
      
 1136 
     | 
    
         
            +
            bool sr_is_deleted(IndexReader *ir, int doc_num)
         
     | 
| 
      
 1137 
     | 
    
         
            +
            {
         
     | 
| 
      
 1138 
     | 
    
         
            +
              bool is_del;
         
     | 
| 
      
 1139 
     | 
    
         
            +
             
     | 
| 
      
 1140 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1141 
     | 
    
         
            +
              is_del = sr_is_deleted_internal(ir, doc_num);
         
     | 
| 
      
 1142 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1143 
     | 
    
         
            +
             
     | 
| 
      
 1144 
     | 
    
         
            +
              return is_del;
         
     | 
| 
      
 1145 
     | 
    
         
            +
            }
         
     | 
| 
      
 1146 
     | 
    
         
            +
             
     | 
| 
      
 1147 
     | 
    
         
            +
            bool sr_has_norms(IndexReader *ir, char *field)
         
     | 
| 
      
 1148 
     | 
    
         
            +
            {
         
     | 
| 
      
 1149 
     | 
    
         
            +
              bool has_norms;
         
     | 
| 
      
 1150 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1151 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1152 
     | 
    
         
            +
              has_norms = h_has_key(sr->norms, field);
         
     | 
| 
      
 1153 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1154 
     | 
    
         
            +
             
     | 
| 
      
 1155 
     | 
    
         
            +
              return has_norms;
         
     | 
| 
      
 1156 
     | 
    
         
            +
            }
         
     | 
| 
      
 1157 
     | 
    
         
            +
             
     | 
| 
      
 1158 
     | 
    
         
            +
            bool sr_has_deletions(IndexReader *ir)
         
     | 
| 
      
 1159 
     | 
    
         
            +
            {
         
     | 
| 
      
 1160 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1161 
     | 
    
         
            +
              return (sr->deleted_docs != NULL);
         
     | 
| 
      
 1162 
     | 
    
         
            +
            }
         
     | 
| 
      
 1163 
     | 
    
         
            +
             
     | 
| 
      
 1164 
     | 
    
         
            +
            void sr_undelete_all(IndexReader *ir)
         
     | 
| 
      
 1165 
     | 
    
         
            +
            {
         
     | 
| 
      
 1166 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1167 
     | 
    
         
            +
              sr->undelete_all = true;
         
     | 
| 
      
 1168 
     | 
    
         
            +
              sr->deleted_docs_dirty = false;
         
     | 
| 
      
 1169 
     | 
    
         
            +
              if (sr->deleted_docs != NULL) bv_destroy(sr->deleted_docs);
         
     | 
| 
      
 1170 
     | 
    
         
            +
              sr->deleted_docs = NULL;
         
     | 
| 
      
 1171 
     | 
    
         
            +
            }
         
     | 
| 
      
 1172 
     | 
    
         
            +
             
     | 
| 
      
 1173 
     | 
    
         
            +
            TermEnum *sr_terms(IndexReader *ir)
         
     | 
| 
      
 1174 
     | 
    
         
            +
            {
         
     | 
| 
      
 1175 
     | 
    
         
            +
              TermEnum *te = ((SegmentReader *)ir->data)->tir->orig_te;
         
     | 
| 
      
 1176 
     | 
    
         
            +
              return te->clone(te);
         
     | 
| 
      
 1177 
     | 
    
         
            +
            }
         
     | 
| 
      
 1178 
     | 
    
         
            +
             
     | 
| 
      
 1179 
     | 
    
         
            +
            TermEnum *sr_terms_from(IndexReader *ir, Term *term)
         
     | 
| 
      
 1180 
     | 
    
         
            +
            {
         
     | 
| 
      
 1181 
     | 
    
         
            +
              TermEnum *te = ((SegmentReader *)ir->data)->tir->orig_te;
         
     | 
| 
      
 1182 
     | 
    
         
            +
              TermEnum *ret_te = te->clone(te);
         
     | 
| 
      
 1183 
     | 
    
         
            +
              te_skip_to(ret_te, term);
         
     | 
| 
      
 1184 
     | 
    
         
            +
              return ret_te;
         
     | 
| 
      
 1185 
     | 
    
         
            +
            }
         
     | 
| 
      
 1186 
     | 
    
         
            +
             
     | 
| 
      
 1187 
     | 
    
         
            +
            Document *sr_get_doc(IndexReader *ir, int doc_num)
         
     | 
| 
      
 1188 
     | 
    
         
            +
            {
         
     | 
| 
      
 1189 
     | 
    
         
            +
              Document *doc;
         
     | 
| 
      
 1190 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1191 
     | 
    
         
            +
              if (sr_is_deleted_internal(ir, doc_num)) {
         
     | 
| 
      
 1192 
     | 
    
         
            +
                mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1193 
     | 
    
         
            +
                eprintf(STATE_ERROR,
         
     | 
| 
      
 1194 
     | 
    
         
            +
                    "Tried to get doc <%ld> that has already been deleted", doc_num);
         
     | 
| 
      
 1195 
     | 
    
         
            +
              }
         
     | 
| 
      
 1196 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1197 
     | 
    
         
            +
              doc = fr_get_doc(sr->fr, doc_num);
         
     | 
| 
      
 1198 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1199 
     | 
    
         
            +
              return doc;
         
     | 
| 
      
 1200 
     | 
    
         
            +
            }
         
     | 
| 
      
 1201 
     | 
    
         
            +
             
     | 
| 
      
 1202 
     | 
    
         
            +
            static inline void
         
     | 
| 
      
 1203 
     | 
    
         
            +
            sr_get_norms_into_internal(IndexReader *ir, char *field, uchar *buf, int offset)
         
     | 
| 
      
 1204 
     | 
    
         
            +
            {
         
     | 
| 
      
 1205 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1206 
     | 
    
         
            +
              Norm *norm = h_get(sr->norms, field);
         
     | 
| 
      
 1207 
     | 
    
         
            +
              if (norm == NULL) {
         
     | 
| 
      
 1208 
     | 
    
         
            +
                memset(buf + offset*sizeof(uchar), 0, sr_max_doc(ir)*sizeof(uchar));
         
     | 
| 
      
 1209 
     | 
    
         
            +
              } else if (norm->bytes != NULL) { // can copy from cache
         
     | 
| 
      
 1210 
     | 
    
         
            +
                memcpy(buf + offset*sizeof(uchar), norm->bytes, sr_max_doc(ir)*sizeof(uchar));
         
     | 
| 
      
 1211 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 1212 
     | 
    
         
            +
                InStream *norm_in = is_clone(norm->is);
         
     | 
| 
      
 1213 
     | 
    
         
            +
                // read from disk
         
     | 
| 
      
 1214 
     | 
    
         
            +
                is_seek(norm_in, 0);
         
     | 
| 
      
 1215 
     | 
    
         
            +
                is_read_bytes(norm_in, buf, offset, sr_max_doc(ir));
         
     | 
| 
      
 1216 
     | 
    
         
            +
                is_close(norm_in);
         
     | 
| 
      
 1217 
     | 
    
         
            +
              }
         
     | 
| 
      
 1218 
     | 
    
         
            +
            }
         
     | 
| 
      
 1219 
     | 
    
         
            +
             
     | 
| 
      
 1220 
     | 
    
         
            +
            void sr_get_norms_into(IndexReader *ir, char *field, uchar *buf, int offset)
         
     | 
| 
      
 1221 
     | 
    
         
            +
            {
         
     | 
| 
      
 1222 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1223 
     | 
    
         
            +
              sr_get_norms_into_internal(ir, field, buf, offset);
         
     | 
| 
      
 1224 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1225 
     | 
    
         
            +
            }
         
     | 
| 
      
 1226 
     | 
    
         
            +
             
     | 
| 
      
 1227 
     | 
    
         
            +
            static inline uchar *sr_get_norms_internal(IndexReader *ir, char *field)
         
     | 
| 
      
 1228 
     | 
    
         
            +
            {
         
     | 
| 
      
 1229 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1230 
     | 
    
         
            +
              Norm *norm = h_get(sr->norms, field);
         
     | 
| 
      
 1231 
     | 
    
         
            +
              if (norm == NULL)                             // not an indexed field
         
     | 
| 
      
 1232 
     | 
    
         
            +
                return NULL;
         
     | 
| 
      
 1233 
     | 
    
         
            +
             
     | 
| 
      
 1234 
     | 
    
         
            +
              if (norm->bytes == NULL) {                    // value not yet read
         
     | 
| 
      
 1235 
     | 
    
         
            +
                uchar *bytes = ALLOC_N(uchar, ir->max_doc(ir));
         
     | 
| 
      
 1236 
     | 
    
         
            +
                sr_get_norms_into_internal(ir, field, bytes, 0);
         
     | 
| 
      
 1237 
     | 
    
         
            +
                norm->bytes = bytes;                        // cache it
         
     | 
| 
      
 1238 
     | 
    
         
            +
              }
         
     | 
| 
      
 1239 
     | 
    
         
            +
              return norm->bytes;
         
     | 
| 
      
 1240 
     | 
    
         
            +
            }
         
     | 
| 
      
 1241 
     | 
    
         
            +
             
     | 
| 
      
 1242 
     | 
    
         
            +
            uchar *sr_get_norms(IndexReader *ir, char *field)
         
     | 
| 
      
 1243 
     | 
    
         
            +
            {
         
     | 
| 
      
 1244 
     | 
    
         
            +
              uchar *norms;
         
     | 
| 
      
 1245 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1246 
     | 
    
         
            +
              norms = sr_get_norms_internal(ir, field);
         
     | 
| 
      
 1247 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1248 
     | 
    
         
            +
              return norms;
         
     | 
| 
      
 1249 
     | 
    
         
            +
            }
         
     | 
| 
      
 1250 
     | 
    
         
            +
             
     | 
| 
      
 1251 
     | 
    
         
            +
            static inline uchar *sr_get_norms_always(IndexReader *ir, char *field)
         
     | 
| 
      
 1252 
     | 
    
         
            +
            {
         
     | 
| 
      
 1253 
     | 
    
         
            +
              uchar *bytes;
         
     | 
| 
      
 1254 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1255 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1256 
     | 
    
         
            +
             
     | 
| 
      
 1257 
     | 
    
         
            +
              bytes = sr_get_norms_internal(ir, field);
         
     | 
| 
      
 1258 
     | 
    
         
            +
              if (bytes == NULL) {
         
     | 
| 
      
 1259 
     | 
    
         
            +
                if (sr->fake_norms) {
         
     | 
| 
      
 1260 
     | 
    
         
            +
                  bytes = sr->fake_norms;
         
     | 
| 
      
 1261 
     | 
    
         
            +
                } else {
         
     | 
| 
      
 1262 
     | 
    
         
            +
                  int len = ir->max_doc(ir);
         
     | 
| 
      
 1263 
     | 
    
         
            +
                  sr->fake_norms = bytes = ALLOC_N(uchar, len);
         
     | 
| 
      
 1264 
     | 
    
         
            +
                  memset(bytes, 0, len);
         
     | 
| 
      
 1265 
     | 
    
         
            +
                }
         
     | 
| 
      
 1266 
     | 
    
         
            +
              }
         
     | 
| 
      
 1267 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1268 
     | 
    
         
            +
              return bytes;
         
     | 
| 
      
 1269 
     | 
    
         
            +
            }
         
     | 
| 
      
 1270 
     | 
    
         
            +
             
     | 
| 
      
 1271 
     | 
    
         
            +
            void sr_set_norm(IndexReader *ir, int doc_num, char *field, uchar val)
         
     | 
| 
      
 1272 
     | 
    
         
            +
            {
         
     | 
| 
      
 1273 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1274 
     | 
    
         
            +
              Norm *norm;
         
     | 
| 
      
 1275 
     | 
    
         
            +
              
         
     | 
| 
      
 1276 
     | 
    
         
            +
              norm = h_get(sr->norms, field);
         
     | 
| 
      
 1277 
     | 
    
         
            +
              if (norm != NULL) { /* an indexed field */
         
     | 
| 
      
 1278 
     | 
    
         
            +
                norm->is_dirty = true;                           // mark it dirty
         
     | 
| 
      
 1279 
     | 
    
         
            +
                sr->norms_dirty = true;
         
     | 
| 
      
 1280 
     | 
    
         
            +
             
     | 
| 
      
 1281 
     | 
    
         
            +
                sr_get_norms_internal(ir, field)[doc_num] = val;
         
     | 
| 
      
 1282 
     | 
    
         
            +
              }
         
     | 
| 
      
 1283 
     | 
    
         
            +
            }
         
     | 
| 
      
 1284 
     | 
    
         
            +
             
     | 
| 
      
 1285 
     | 
    
         
            +
            int sr_doc_freq(IndexReader *ir, Term *t)
         
     | 
| 
      
 1286 
     | 
    
         
            +
            {
         
     | 
| 
      
 1287 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1288 
     | 
    
         
            +
              TermInfo *ti = tir_get_ti(sr->tir, t);
         
     | 
| 
      
 1289 
     | 
    
         
            +
              if (ti != NULL) {
         
     | 
| 
      
 1290 
     | 
    
         
            +
                int df = ti->doc_freq;
         
     | 
| 
      
 1291 
     | 
    
         
            +
                ti_destroy(ti);
         
     | 
| 
      
 1292 
     | 
    
         
            +
                return df;
         
     | 
| 
      
 1293 
     | 
    
         
            +
              } else return 0;
         
     | 
| 
      
 1294 
     | 
    
         
            +
            }
         
     | 
| 
      
 1295 
     | 
    
         
            +
             
     | 
| 
      
 1296 
     | 
    
         
            +
            Array *sr_file_names(IndexReader *ir)
         
     | 
| 
      
 1297 
     | 
    
         
            +
            {
         
     | 
| 
      
 1298 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1299 
     | 
    
         
            +
              Array *file_names = ary_create(0, &efree);
         
     | 
| 
      
 1300 
     | 
    
         
            +
              FieldInfo *fi;
         
     | 
| 
      
 1301 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1302 
     | 
    
         
            +
              char fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 1303 
     | 
    
         
            +
             
     | 
| 
      
 1304 
     | 
    
         
            +
              for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
         
     | 
| 
      
 1305 
     | 
    
         
            +
                sprintf(fname, "%s.%s", sr->segment, INDEX_EXTENSIONS[i]);
         
     | 
| 
      
 1306 
     | 
    
         
            +
                if (ir->store->exists(ir->store, fname))
         
     | 
| 
      
 1307 
     | 
    
         
            +
                  ary_append(file_names, estrdup(fname));
         
     | 
| 
      
 1308 
     | 
    
         
            +
              }
         
     | 
| 
      
 1309 
     | 
    
         
            +
             
     | 
| 
      
 1310 
     | 
    
         
            +
              for (i = 0; i < sr->fis->fcnt; i++) {
         
     | 
| 
      
 1311 
     | 
    
         
            +
                fi = sr->fis->by_number[i];
         
     | 
| 
      
 1312 
     | 
    
         
            +
                if (fi->is_indexed && !fi->omit_norms) {
         
     | 
| 
      
 1313 
     | 
    
         
            +
                  if (sr->cfs_store) {
         
     | 
| 
      
 1314 
     | 
    
         
            +
                    sprintf(fname, "%s.s%d", sr->segment, i);
         
     | 
| 
      
 1315 
     | 
    
         
            +
                  } else {
         
     | 
| 
      
 1316 
     | 
    
         
            +
                    sprintf(fname, "%s.f%d", sr->segment, i);
         
     | 
| 
      
 1317 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1318 
     | 
    
         
            +
                  if (ir->store->exists(ir->store, fname))
         
     | 
| 
      
 1319 
     | 
    
         
            +
                    ary_append(file_names, estrdup(fname));
         
     | 
| 
      
 1320 
     | 
    
         
            +
                }
         
     | 
| 
      
 1321 
     | 
    
         
            +
              }
         
     | 
| 
      
 1322 
     | 
    
         
            +
              return file_names;
         
     | 
| 
      
 1323 
     | 
    
         
            +
            }
         
     | 
| 
      
 1324 
     | 
    
         
            +
             
     | 
| 
      
 1325 
     | 
    
         
            +
            HashSet *sr_get_field_names(IndexReader *ir, int field_type)
         
     | 
| 
      
 1326 
     | 
    
         
            +
            {
         
     | 
| 
      
 1327 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1328 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1329 
     | 
    
         
            +
              HashSet *field_set = hs_str_create(NULL);
         
     | 
| 
      
 1330 
     | 
    
         
            +
              FieldInfo *fi;
         
     | 
| 
      
 1331 
     | 
    
         
            +
              for (i = 0; i < sr->fis->fcnt; i++) {
         
     | 
| 
      
 1332 
     | 
    
         
            +
                fi = sr->fis->by_number[i];
         
     | 
| 
      
 1333 
     | 
    
         
            +
                switch(field_type) {
         
     | 
| 
      
 1334 
     | 
    
         
            +
                  case IR_ALL:
         
     | 
| 
      
 1335 
     | 
    
         
            +
                    hs_add(field_set, fi->name);
         
     | 
| 
      
 1336 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1337 
     | 
    
         
            +
                  case IR_UNINDEXED:
         
     | 
| 
      
 1338 
     | 
    
         
            +
                    if (!fi->is_indexed) hs_add(field_set, fi->name);
         
     | 
| 
      
 1339 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1340 
     | 
    
         
            +
                  case IR_INDEXED:
         
     | 
| 
      
 1341 
     | 
    
         
            +
                    if (fi->is_indexed) hs_add(field_set, fi->name);
         
     | 
| 
      
 1342 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1343 
     | 
    
         
            +
                  case IR_INDEXED_NO_TERM_VECTOR:
         
     | 
| 
      
 1344 
     | 
    
         
            +
                    if (fi->is_indexed && !fi->store_tv) hs_add(field_set, fi->name);
         
     | 
| 
      
 1345 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1346 
     | 
    
         
            +
                  case IR_TERM_VECTOR:
         
     | 
| 
      
 1347 
     | 
    
         
            +
                    if (fi->store_tv && !fi->store_pos && !fi->store_offset)
         
     | 
| 
      
 1348 
     | 
    
         
            +
                      hs_add(field_set, fi->name);
         
     | 
| 
      
 1349 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1350 
     | 
    
         
            +
                  case IR_INDEXED_WITH_TERM_VECTOR:
         
     | 
| 
      
 1351 
     | 
    
         
            +
                    if (fi->is_indexed && fi->store_tv) hs_add(field_set, fi->name);
         
     | 
| 
      
 1352 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1353 
     | 
    
         
            +
                  case IR_TERM_VECTOR_WITH_POSITION:
         
     | 
| 
      
 1354 
     | 
    
         
            +
                    if (fi->store_pos && !fi->store_offset) hs_add(field_set, fi->name);
         
     | 
| 
      
 1355 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1356 
     | 
    
         
            +
                  case IR_TERM_VECTOR_WITH_OFFSET:
         
     | 
| 
      
 1357 
     | 
    
         
            +
                    if (!fi->store_pos && fi->store_offset) hs_add(field_set, fi->name);
         
     | 
| 
      
 1358 
     | 
    
         
            +
                  case IR_TERM_VECTOR_WITH_POSITION_OFFSET:
         
     | 
| 
      
 1359 
     | 
    
         
            +
                    if (fi->store_pos && fi->store_offset) hs_add(field_set, fi->name);
         
     | 
| 
      
 1360 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1361 
     | 
    
         
            +
                  default:
         
     | 
| 
      
 1362 
     | 
    
         
            +
                    eprintf(ARG_ERROR, "Invalid field_type <%ld>.", field_type);
         
     | 
| 
      
 1363 
     | 
    
         
            +
                }
         
     | 
| 
      
 1364 
     | 
    
         
            +
              }
         
     | 
| 
      
 1365 
     | 
    
         
            +
              return field_set;
         
     | 
| 
      
 1366 
     | 
    
         
            +
            }
         
     | 
| 
      
 1367 
     | 
    
         
            +
             
     | 
| 
      
 1368 
     | 
    
         
            +
            int sr_num_docs(IndexReader *ir)
         
     | 
| 
      
 1369 
     | 
    
         
            +
            {
         
     | 
| 
      
 1370 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1371 
     | 
    
         
            +
             
     | 
| 
      
 1372 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1373 
     | 
    
         
            +
              int num_docs = sr_max_doc(ir);
         
     | 
| 
      
 1374 
     | 
    
         
            +
              if (sr->deleted_docs != NULL)
         
     | 
| 
      
 1375 
     | 
    
         
            +
                num_docs -= sr->deleted_docs->count;
         
     | 
| 
      
 1376 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1377 
     | 
    
         
            +
              return num_docs;
         
     | 
| 
      
 1378 
     | 
    
         
            +
            }
         
     | 
| 
      
 1379 
     | 
    
         
            +
             
     | 
| 
      
 1380 
     | 
    
         
            +
            TermDocEnum *sr_term_docs(IndexReader *ir)
         
     | 
| 
      
 1381 
     | 
    
         
            +
            {
         
     | 
| 
      
 1382 
     | 
    
         
            +
              return stde_create(ir);
         
     | 
| 
      
 1383 
     | 
    
         
            +
            }
         
     | 
| 
      
 1384 
     | 
    
         
            +
             
     | 
| 
      
 1385 
     | 
    
         
            +
            TermDocEnum *sr_term_positions(IndexReader *ir)
         
     | 
| 
      
 1386 
     | 
    
         
            +
            {
         
     | 
| 
      
 1387 
     | 
    
         
            +
              return stpe_create(ir);
         
     | 
| 
      
 1388 
     | 
    
         
            +
            }
         
     | 
| 
      
 1389 
     | 
    
         
            +
             
     | 
| 
      
 1390 
     | 
    
         
            +
            void sr_open_norms(IndexReader *ir, Store *cfs_store)
         
     | 
| 
      
 1391 
     | 
    
         
            +
            {
         
     | 
| 
      
 1392 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1393 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1394 
     | 
    
         
            +
              FieldInfo *fi;
         
     | 
| 
      
 1395 
     | 
    
         
            +
              Store *tmp_store;
         
     | 
| 
      
 1396 
     | 
    
         
            +
              char fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 1397 
     | 
    
         
            +
              for (i = 0; i < sr->fis->fcnt; i++) {
         
     | 
| 
      
 1398 
     | 
    
         
            +
                tmp_store = ir->store;
         
     | 
| 
      
 1399 
     | 
    
         
            +
                fi = sr->fis->by_number[i];
         
     | 
| 
      
 1400 
     | 
    
         
            +
                if (fi->is_indexed && !fi->omit_norms) {
         
     | 
| 
      
 1401 
     | 
    
         
            +
                  sprintf(fname, "%s.s%d", sr->segment, fi->number);
         
     | 
| 
      
 1402 
     | 
    
         
            +
                  if (! tmp_store->exists(tmp_store, fname)) {
         
     | 
| 
      
 1403 
     | 
    
         
            +
                    sprintf(fname, "%s.f%d", sr->segment, fi->number);
         
     | 
| 
      
 1404 
     | 
    
         
            +
                    tmp_store = cfs_store;
         
     | 
| 
      
 1405 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1406 
     | 
    
         
            +
                  h_set(sr->norms, fi->name,
         
     | 
| 
      
 1407 
     | 
    
         
            +
                      norm_create(tmp_store->open_input(tmp_store, fname), fi->number));
         
     | 
| 
      
 1408 
     | 
    
         
            +
                }
         
     | 
| 
      
 1409 
     | 
    
         
            +
              }
         
     | 
| 
      
 1410 
     | 
    
         
            +
              sr->norms_dirty = false;
         
     | 
| 
      
 1411 
     | 
    
         
            +
            }
         
     | 
| 
      
 1412 
     | 
    
         
            +
             
     | 
| 
      
 1413 
     | 
    
         
            +
            TermVector *sr_get_term_vector(IndexReader *ir, int doc_num, char *field)
         
     | 
| 
      
 1414 
     | 
    
         
            +
            {
         
     | 
| 
      
 1415 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1416 
     | 
    
         
            +
              FieldInfo *fi = (FieldInfo *)ht_get(sr->fis->by_name, field);
         
     | 
| 
      
 1417 
     | 
    
         
            +
              TermVectorsReader *tvr;
         
     | 
| 
      
 1418 
     | 
    
         
            +
             
     | 
| 
      
 1419 
     | 
    
         
            +
              if (fi == NULL || !fi->store_tv || !sr->orig_tvr || !(tvr = sr_tvr(sr)))
         
     | 
| 
      
 1420 
     | 
    
         
            +
                return NULL;
         
     | 
| 
      
 1421 
     | 
    
         
            +
             
     | 
| 
      
 1422 
     | 
    
         
            +
              return tvr_get_field_tv(tvr, doc_num, field);
         
     | 
| 
      
 1423 
     | 
    
         
            +
            }
         
     | 
| 
      
 1424 
     | 
    
         
            +
             
     | 
| 
      
 1425 
     | 
    
         
            +
            Array *sr_get_term_vectors(IndexReader *ir, int doc_num)
         
     | 
| 
      
 1426 
     | 
    
         
            +
            {
         
     | 
| 
      
 1427 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1428 
     | 
    
         
            +
              TermVectorsReader *tvr;
         
     | 
| 
      
 1429 
     | 
    
         
            +
              if (sr->orig_tvr == NULL || (tvr = sr_tvr(sr)) == NULL)
         
     | 
| 
      
 1430 
     | 
    
         
            +
                return NULL;
         
     | 
| 
      
 1431 
     | 
    
         
            +
             
     | 
| 
      
 1432 
     | 
    
         
            +
              return tvr_get_tv(tvr, doc_num);
         
     | 
| 
      
 1433 
     | 
    
         
            +
            }
         
     | 
| 
      
 1434 
     | 
    
         
            +
             
     | 
| 
      
 1435 
     | 
    
         
            +
            void sr_commit(IndexReader *ir)
         
     | 
| 
      
 1436 
     | 
    
         
            +
            {
         
     | 
| 
      
 1437 
     | 
    
         
            +
              GET_SR;
         
     | 
| 
      
 1438 
     | 
    
         
            +
              char tmp_fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 1439 
     | 
    
         
            +
              char del_fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 1440 
     | 
    
         
            +
              sprintf(del_fname, "%s.del", sr->segment);
         
     | 
| 
      
 1441 
     | 
    
         
            +
             
     | 
| 
      
 1442 
     | 
    
         
            +
              if (sr->deleted_docs_dirty) { // re-write deleted 
         
     | 
| 
      
 1443 
     | 
    
         
            +
                sprintf(tmp_fname, "%s.tmp", sr->segment);
         
     | 
| 
      
 1444 
     | 
    
         
            +
                bv_write(sr->deleted_docs, ir->store, tmp_fname);
         
     | 
| 
      
 1445 
     | 
    
         
            +
                ir->store->rename(ir->store, tmp_fname, del_fname);
         
     | 
| 
      
 1446 
     | 
    
         
            +
              }
         
     | 
| 
      
 1447 
     | 
    
         
            +
              if (sr->undelete_all && ir->store->exists(ir->store, del_fname))
         
     | 
| 
      
 1448 
     | 
    
         
            +
                ir->store->remove(ir->store, del_fname);
         
     | 
| 
      
 1449 
     | 
    
         
            +
              if (sr->norms_dirty) {// re-write norms 
         
     | 
| 
      
 1450 
     | 
    
         
            +
                int i;
         
     | 
| 
      
 1451 
     | 
    
         
            +
                FieldInfo *fi;
         
     | 
| 
      
 1452 
     | 
    
         
            +
                for (i = 0; i < sr->fis->fcnt; i++) {
         
     | 
| 
      
 1453 
     | 
    
         
            +
                  fi = sr->fis->by_number[i];
         
     | 
| 
      
 1454 
     | 
    
         
            +
                  if (fi->is_indexed) {
         
     | 
| 
      
 1455 
     | 
    
         
            +
                    norm_rewrite((Norm *)h_get(sr->norms, fi->name), ir->store,
         
     | 
| 
      
 1456 
     | 
    
         
            +
                        sr->segment, sr_max_doc(ir), sr->cfs_store);
         
     | 
| 
      
 1457 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1458 
     | 
    
         
            +
                }
         
     | 
| 
      
 1459 
     | 
    
         
            +
              }
         
     | 
| 
      
 1460 
     | 
    
         
            +
              sr->deleted_docs_dirty = false;
         
     | 
| 
      
 1461 
     | 
    
         
            +
              sr->norms_dirty = false;
         
     | 
| 
      
 1462 
     | 
    
         
            +
              sr->undelete_all = false;
         
     | 
| 
      
 1463 
     | 
    
         
            +
            }
         
     | 
| 
      
 1464 
     | 
    
         
            +
             
     | 
| 
      
 1465 
     | 
    
         
            +
            IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
         
     | 
| 
      
 1466 
     | 
    
         
            +
            {
         
     | 
| 
      
 1467 
     | 
    
         
            +
              Store *store = si->store;
         
     | 
| 
      
 1468 
     | 
    
         
            +
              SegmentReader *sr = ALLOC(SegmentReader);
         
     | 
| 
      
 1469 
     | 
    
         
            +
              ir->get_term_vector = &sr_get_term_vector;
         
     | 
| 
      
 1470 
     | 
    
         
            +
              ir->get_term_vectors = &sr_get_term_vectors;
         
     | 
| 
      
 1471 
     | 
    
         
            +
              ir->num_docs = &sr_num_docs;
         
     | 
| 
      
 1472 
     | 
    
         
            +
              ir->max_doc = &sr_max_doc;
         
     | 
| 
      
 1473 
     | 
    
         
            +
              ir->get_doc = &sr_get_doc;
         
     | 
| 
      
 1474 
     | 
    
         
            +
              ir->get_norms_into = &sr_get_norms_into;
         
     | 
| 
      
 1475 
     | 
    
         
            +
              ir->get_norms = &sr_get_norms;
         
     | 
| 
      
 1476 
     | 
    
         
            +
              ir->get_norms_always = &sr_get_norms_always;
         
     | 
| 
      
 1477 
     | 
    
         
            +
              ir->do_set_norm = &sr_set_norm;
         
     | 
| 
      
 1478 
     | 
    
         
            +
              ir->terms = &sr_terms;
         
     | 
| 
      
 1479 
     | 
    
         
            +
              ir->terms_from = &sr_terms_from;
         
     | 
| 
      
 1480 
     | 
    
         
            +
              ir->doc_freq = &sr_doc_freq;
         
     | 
| 
      
 1481 
     | 
    
         
            +
              ir->term_docs = &sr_term_docs;
         
     | 
| 
      
 1482 
     | 
    
         
            +
              ir->term_positions = &sr_term_positions;
         
     | 
| 
      
 1483 
     | 
    
         
            +
              ir->do_delete_doc = &sr_delete_doc;
         
     | 
| 
      
 1484 
     | 
    
         
            +
              ir->is_deleted = &sr_is_deleted;
         
     | 
| 
      
 1485 
     | 
    
         
            +
              ir->has_norms = &sr_has_norms;
         
     | 
| 
      
 1486 
     | 
    
         
            +
              ir->has_deletions = &sr_has_deletions;
         
     | 
| 
      
 1487 
     | 
    
         
            +
              ir->do_undelete_all = &sr_undelete_all;
         
     | 
| 
      
 1488 
     | 
    
         
            +
              ir->get_field_names = &sr_get_field_names;
         
     | 
| 
      
 1489 
     | 
    
         
            +
              ir->do_commit = &sr_commit;
         
     | 
| 
      
 1490 
     | 
    
         
            +
              ir->do_close = &sr_close;
         
     | 
| 
      
 1491 
     | 
    
         
            +
              ir->data = sr;
         
     | 
| 
      
 1492 
     | 
    
         
            +
              sr->segment = estrdup(si->name);
         
     | 
| 
      
 1493 
     | 
    
         
            +
              char fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 1494 
     | 
    
         
            +
              sr->cfs_store = NULL;
         
     | 
| 
      
 1495 
     | 
    
         
            +
              sr->fake_norms = NULL;
         
     | 
| 
      
 1496 
     | 
    
         
            +
              sprintf(fname, "%s.cfs", sr->segment);
         
     | 
| 
      
 1497 
     | 
    
         
            +
              if (store->exists(store, fname)) {
         
     | 
| 
      
 1498 
     | 
    
         
            +
                sr->cfs_store = open_cmpd_store(store, fname);
         
     | 
| 
      
 1499 
     | 
    
         
            +
                store = sr->cfs_store;
         
     | 
| 
      
 1500 
     | 
    
         
            +
              }
         
     | 
| 
      
 1501 
     | 
    
         
            +
             
     | 
| 
      
 1502 
     | 
    
         
            +
              sprintf(fname, "%s.fnm", sr->segment);
         
     | 
| 
      
 1503 
     | 
    
         
            +
              sr->fis = fis_open(store, fname);
         
     | 
| 
      
 1504 
     | 
    
         
            +
              sr->fr = fr_open(store, sr->segment, sr->fis);
         
     | 
| 
      
 1505 
     | 
    
         
            +
             
     | 
| 
      
 1506 
     | 
    
         
            +
              sr->tir = tir_open(store, sr->segment, sr->fis);
         
     | 
| 
      
 1507 
     | 
    
         
            +
              sr->deleted_docs = NULL;
         
     | 
| 
      
 1508 
     | 
    
         
            +
              sr->deleted_docs_dirty = false;
         
     | 
| 
      
 1509 
     | 
    
         
            +
              sr->undelete_all = false;
         
     | 
| 
      
 1510 
     | 
    
         
            +
              if (si_has_deletions(si)) {
         
     | 
| 
      
 1511 
     | 
    
         
            +
                sprintf(fname, "%s.del", sr->segment);
         
     | 
| 
      
 1512 
     | 
    
         
            +
                sr->deleted_docs = bv_read(si->store, fname);
         
     | 
| 
      
 1513 
     | 
    
         
            +
              }
         
     | 
| 
      
 1514 
     | 
    
         
            +
             
     | 
| 
      
 1515 
     | 
    
         
            +
              sprintf(fname, "%s.frq", sr->segment);
         
     | 
| 
      
 1516 
     | 
    
         
            +
              sr->freq_in = store->open_input(store, fname);
         
     | 
| 
      
 1517 
     | 
    
         
            +
              sprintf(fname, "%s.prx", sr->segment);
         
     | 
| 
      
 1518 
     | 
    
         
            +
              sr->prox_in = store->open_input(store, fname);
         
     | 
| 
      
 1519 
     | 
    
         
            +
              sr->norms = h_new_str(NULL, &norm_destroy);
         
     | 
| 
      
 1520 
     | 
    
         
            +
              sr_open_norms(ir, store);
         
     | 
| 
      
 1521 
     | 
    
         
            +
             
     | 
| 
      
 1522 
     | 
    
         
            +
              if (fis_has_vectors(sr->fis)) {
         
     | 
| 
      
 1523 
     | 
    
         
            +
                sr->orig_tvr = tvr_open(store, sr->segment, sr->fis);
         
     | 
| 
      
 1524 
     | 
    
         
            +
                thread_key_create(&sr->thread_tvr, NULL);
         
     | 
| 
      
 1525 
     | 
    
         
            +
                sr->tvr_bucket = ary_create(1, (destroy_func_t)&tvr_close);
         
     | 
| 
      
 1526 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 1527 
     | 
    
         
            +
                sr->orig_tvr = NULL;
         
     | 
| 
      
 1528 
     | 
    
         
            +
              }
         
     | 
| 
      
 1529 
     | 
    
         
            +
              return ir;
         
     | 
| 
      
 1530 
     | 
    
         
            +
            }
         
     | 
| 
      
 1531 
     | 
    
         
            +
             
     | 
| 
      
 1532 
     | 
    
         
            +
            IndexReader *sr_open_si(SegmentInfo *si)
         
     | 
| 
      
 1533 
     | 
    
         
            +
            {
         
     | 
| 
      
 1534 
     | 
    
         
            +
              IndexReader *ir = ir_create(si->store, NULL, false, false);
         
     | 
| 
      
 1535 
     | 
    
         
            +
              return sr_open_internal(ir, si);
         
     | 
| 
      
 1536 
     | 
    
         
            +
            }
         
     | 
| 
      
 1537 
     | 
    
         
            +
             
     | 
| 
      
 1538 
     | 
    
         
            +
            IndexReader *sr_open(SegmentInfos *sis, int si_num, int is_owner, int close_store)
         
     | 
| 
      
 1539 
     | 
    
         
            +
            {
         
     | 
| 
      
 1540 
     | 
    
         
            +
              SegmentInfo *si = sis->segs[si_num];
         
     | 
| 
      
 1541 
     | 
    
         
            +
              IndexReader *ir = ir_create(si->store, sis, is_owner, close_store);
         
     | 
| 
      
 1542 
     | 
    
         
            +
              return sr_open_internal(ir, si);
         
     | 
| 
      
 1543 
     | 
    
         
            +
            }
         
     | 
| 
      
 1544 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 1545 
     | 
    
         
            +
             *
         
     | 
| 
      
 1546 
     | 
    
         
            +
             * MultiReader
         
     | 
| 
      
 1547 
     | 
    
         
            +
             *
         
     | 
| 
      
 1548 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 1549 
     | 
    
         
            +
             
     | 
| 
      
 1550 
     | 
    
         
            +
            #define GET_MR MultiReader *mr = (MultiReader *)ir->data
         
     | 
| 
      
 1551 
     | 
    
         
            +
            #define GET_READER(doc_num) MultiReader *mr = (MultiReader *)ir->data;\
         
     | 
| 
      
 1552 
     | 
    
         
            +
              int i = mr_reader_index(mr, doc_num);\
         
     | 
| 
      
 1553 
     | 
    
         
            +
              IndexReader *reader = mr->sub_readers[i];
         
     | 
| 
      
 1554 
     | 
    
         
            +
             
     | 
| 
      
 1555 
     | 
    
         
            +
             
     | 
| 
      
 1556 
     | 
    
         
            +
             
     | 
| 
      
 1557 
     | 
    
         
            +
            int mr_reader_index(MultiReader *mr, int doc_num)
         
     | 
| 
      
 1558 
     | 
    
         
            +
            {
         
     | 
| 
      
 1559 
     | 
    
         
            +
              int lo = 0;                       // search @starts array
         
     | 
| 
      
 1560 
     | 
    
         
            +
              int hi = mr->rcnt - 1;            // for first element less
         
     | 
| 
      
 1561 
     | 
    
         
            +
              int mid;
         
     | 
| 
      
 1562 
     | 
    
         
            +
              int mid_value;
         
     | 
| 
      
 1563 
     | 
    
         
            +
             
     | 
| 
      
 1564 
     | 
    
         
            +
              while (hi >= lo) {
         
     | 
| 
      
 1565 
     | 
    
         
            +
                mid = (lo + hi) >> 1;
         
     | 
| 
      
 1566 
     | 
    
         
            +
                mid_value = mr->starts[mid];
         
     | 
| 
      
 1567 
     | 
    
         
            +
                if (doc_num < mid_value) {
         
     | 
| 
      
 1568 
     | 
    
         
            +
                  hi = mid - 1;
         
     | 
| 
      
 1569 
     | 
    
         
            +
                } else if (doc_num > mid_value) {
         
     | 
| 
      
 1570 
     | 
    
         
            +
                  lo = mid + 1;
         
     | 
| 
      
 1571 
     | 
    
         
            +
                } else { // found a match
         
     | 
| 
      
 1572 
     | 
    
         
            +
                  while ((mid+1 < mr->rcnt) && (mr->starts[mid+1] == mid_value))
         
     | 
| 
      
 1573 
     | 
    
         
            +
                    mid += 1; // scan to last match in case we have empty segments
         
     | 
| 
      
 1574 
     | 
    
         
            +
                  return mid;
         
     | 
| 
      
 1575 
     | 
    
         
            +
                }
         
     | 
| 
      
 1576 
     | 
    
         
            +
              }
         
     | 
| 
      
 1577 
     | 
    
         
            +
              return hi;
         
     | 
| 
      
 1578 
     | 
    
         
            +
            }
         
     | 
| 
      
 1579 
     | 
    
         
            +
             
     | 
| 
      
 1580 
     | 
    
         
            +
            TermVector *mr_get_term_vector(IndexReader *ir, int doc_num, char *field)
         
     | 
| 
      
 1581 
     | 
    
         
            +
            {
         
     | 
| 
      
 1582 
     | 
    
         
            +
              GET_READER(doc_num);
         
     | 
| 
      
 1583 
     | 
    
         
            +
              return reader->get_term_vector(reader, doc_num - mr->starts[i], field);
         
     | 
| 
      
 1584 
     | 
    
         
            +
            }
         
     | 
| 
      
 1585 
     | 
    
         
            +
             
     | 
| 
      
 1586 
     | 
    
         
            +
            Array *mr_get_term_vectors(IndexReader *ir, int doc_num)
         
     | 
| 
      
 1587 
     | 
    
         
            +
            {
         
     | 
| 
      
 1588 
     | 
    
         
            +
              GET_READER(doc_num);
         
     | 
| 
      
 1589 
     | 
    
         
            +
              return reader->get_term_vectors(reader, doc_num - mr->starts[i]);
         
     | 
| 
      
 1590 
     | 
    
         
            +
            }
         
     | 
| 
      
 1591 
     | 
    
         
            +
             
     | 
| 
      
 1592 
     | 
    
         
            +
            int mr_num_docs(IndexReader *ir)
         
     | 
| 
      
 1593 
     | 
    
         
            +
            {
         
     | 
| 
      
 1594 
     | 
    
         
            +
              int i, num_docs;
         
     | 
| 
      
 1595 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1596 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1597 
     | 
    
         
            +
              if (mr->num_docs_cache == -1) {
         
     | 
| 
      
 1598 
     | 
    
         
            +
                IndexReader *reader;
         
     | 
| 
      
 1599 
     | 
    
         
            +
                mr->num_docs_cache = 0;
         
     | 
| 
      
 1600 
     | 
    
         
            +
                for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1601 
     | 
    
         
            +
                  reader = mr->sub_readers[i];
         
     | 
| 
      
 1602 
     | 
    
         
            +
                  mr->num_docs_cache += reader->num_docs(reader);
         
     | 
| 
      
 1603 
     | 
    
         
            +
                }
         
     | 
| 
      
 1604 
     | 
    
         
            +
              }
         
     | 
| 
      
 1605 
     | 
    
         
            +
              num_docs = mr->num_docs_cache;
         
     | 
| 
      
 1606 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1607 
     | 
    
         
            +
             
     | 
| 
      
 1608 
     | 
    
         
            +
              return num_docs;
         
     | 
| 
      
 1609 
     | 
    
         
            +
            }
         
     | 
| 
      
 1610 
     | 
    
         
            +
             
     | 
| 
      
 1611 
     | 
    
         
            +
            int mr_max_doc(IndexReader *ir)
         
     | 
| 
      
 1612 
     | 
    
         
            +
            {
         
     | 
| 
      
 1613 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1614 
     | 
    
         
            +
              return mr->max_doc;
         
     | 
| 
      
 1615 
     | 
    
         
            +
            }
         
     | 
| 
      
 1616 
     | 
    
         
            +
             
     | 
| 
      
 1617 
     | 
    
         
            +
            Document *mr_get_doc(IndexReader *ir, int doc_num)
         
     | 
| 
      
 1618 
     | 
    
         
            +
            {
         
     | 
| 
      
 1619 
     | 
    
         
            +
              GET_READER(doc_num);
         
     | 
| 
      
 1620 
     | 
    
         
            +
              return reader->get_doc(reader, doc_num - mr->starts[i]);
         
     | 
| 
      
 1621 
     | 
    
         
            +
            }
         
     | 
| 
      
 1622 
     | 
    
         
            +
             
     | 
| 
      
 1623 
     | 
    
         
            +
            void mr_get_norms_into(IndexReader *ir, char *field, uchar *buf, int offset)
         
     | 
| 
      
 1624 
     | 
    
         
            +
            {
         
     | 
| 
      
 1625 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1626 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1627 
     | 
    
         
            +
             
     | 
| 
      
 1628 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1629 
     | 
    
         
            +
              uchar *bytes = h_get(mr->norms_cache, field);
         
     | 
| 
      
 1630 
     | 
    
         
            +
              if (bytes != NULL) {
         
     | 
| 
      
 1631 
     | 
    
         
            +
                memcpy(buf + offset, bytes, mr->max_doc);
         
     | 
| 
      
 1632 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 1633 
     | 
    
         
            +
                IndexReader *reader;
         
     | 
| 
      
 1634 
     | 
    
         
            +
                for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1635 
     | 
    
         
            +
                  reader = mr->sub_readers[i];
         
     | 
| 
      
 1636 
     | 
    
         
            +
                  reader->get_norms_into(reader, field, buf, offset + mr->starts[i]);
         
     | 
| 
      
 1637 
     | 
    
         
            +
                }
         
     | 
| 
      
 1638 
     | 
    
         
            +
              }
         
     | 
| 
      
 1639 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1640 
     | 
    
         
            +
            }
         
     | 
| 
      
 1641 
     | 
    
         
            +
             
     | 
| 
      
 1642 
     | 
    
         
            +
            uchar *mr_get_norms(IndexReader *ir, char *field)
         
     | 
| 
      
 1643 
     | 
    
         
            +
            {
         
     | 
| 
      
 1644 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1645 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1646 
     | 
    
         
            +
              uchar *bytes;
         
     | 
| 
      
 1647 
     | 
    
         
            +
              IndexReader *reader;
         
     | 
| 
      
 1648 
     | 
    
         
            +
             
     | 
| 
      
 1649 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 1650 
     | 
    
         
            +
              bytes = h_get(mr->norms_cache, field);
         
     | 
| 
      
 1651 
     | 
    
         
            +
              if (bytes == NULL) {
         
     | 
| 
      
 1652 
     | 
    
         
            +
                bytes = ALLOC_N(uchar, mr->max_doc);
         
     | 
| 
      
 1653 
     | 
    
         
            +
             
     | 
| 
      
 1654 
     | 
    
         
            +
                for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1655 
     | 
    
         
            +
                  reader = mr->sub_readers[i];
         
     | 
| 
      
 1656 
     | 
    
         
            +
                  reader->get_norms_into(reader, field, bytes, mr->starts[i]);
         
     | 
| 
      
 1657 
     | 
    
         
            +
                }
         
     | 
| 
      
 1658 
     | 
    
         
            +
                h_set(mr->norms_cache, field, bytes); // update cache
         
     | 
| 
      
 1659 
     | 
    
         
            +
              }
         
     | 
| 
      
 1660 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 1661 
     | 
    
         
            +
             
     | 
| 
      
 1662 
     | 
    
         
            +
              return bytes;
         
     | 
| 
      
 1663 
     | 
    
         
            +
            }
         
     | 
| 
      
 1664 
     | 
    
         
            +
             
     | 
| 
      
 1665 
     | 
    
         
            +
            void mr_set_norm(IndexReader *ir, int doc_num, char *field, uchar val)
         
     | 
| 
      
 1666 
     | 
    
         
            +
            {
         
     | 
| 
      
 1667 
     | 
    
         
            +
              GET_READER(doc_num);
         
     | 
| 
      
 1668 
     | 
    
         
            +
              h_del(mr->norms_cache, field);               // clear cache
         
     | 
| 
      
 1669 
     | 
    
         
            +
              ir_set_norm(reader, doc_num - mr->starts[i], field, val);
         
     | 
| 
      
 1670 
     | 
    
         
            +
            }
         
     | 
| 
      
 1671 
     | 
    
         
            +
             
     | 
| 
      
 1672 
     | 
    
         
            +
            TermEnum *mr_terms(IndexReader *ir)
         
     | 
| 
      
 1673 
     | 
    
         
            +
            {
         
     | 
| 
      
 1674 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1675 
     | 
    
         
            +
              return mte_create(mr->sub_readers, mr->starts, mr->rcnt, NULL);
         
     | 
| 
      
 1676 
     | 
    
         
            +
            }
         
     | 
| 
      
 1677 
     | 
    
         
            +
             
     | 
| 
      
 1678 
     | 
    
         
            +
            TermEnum *mr_terms_from(IndexReader *ir, Term *term)
         
     | 
| 
      
 1679 
     | 
    
         
            +
            {
         
     | 
| 
      
 1680 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1681 
     | 
    
         
            +
              return mte_create(mr->sub_readers, mr->starts, mr->rcnt, term);
         
     | 
| 
      
 1682 
     | 
    
         
            +
            }
         
     | 
| 
      
 1683 
     | 
    
         
            +
             
     | 
| 
      
 1684 
     | 
    
         
            +
            int mr_doc_freq(IndexReader *ir, Term *t)
         
     | 
| 
      
 1685 
     | 
    
         
            +
            {
         
     | 
| 
      
 1686 
     | 
    
         
            +
              int total = 0, i;          // sum freqs in segments
         
     | 
| 
      
 1687 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1688 
     | 
    
         
            +
             
     | 
| 
      
 1689 
     | 
    
         
            +
              IndexReader *reader;
         
     | 
| 
      
 1690 
     | 
    
         
            +
              for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1691 
     | 
    
         
            +
                reader = mr->sub_readers[i];
         
     | 
| 
      
 1692 
     | 
    
         
            +
                total += reader->doc_freq(reader, t);
         
     | 
| 
      
 1693 
     | 
    
         
            +
              }
         
     | 
| 
      
 1694 
     | 
    
         
            +
              return total;
         
     | 
| 
      
 1695 
     | 
    
         
            +
            }
         
     | 
| 
      
 1696 
     | 
    
         
            +
             
     | 
| 
      
 1697 
     | 
    
         
            +
            TermDocEnum *mr_term_docs(IndexReader *ir)
         
     | 
| 
      
 1698 
     | 
    
         
            +
            {
         
     | 
| 
      
 1699 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1700 
     | 
    
         
            +
              return mtde_create(mr->sub_readers, mr->starts, mr->rcnt);
         
     | 
| 
      
 1701 
     | 
    
         
            +
            }
         
     | 
| 
      
 1702 
     | 
    
         
            +
             
     | 
| 
      
 1703 
     | 
    
         
            +
            TermDocEnum *mr_term_positions(IndexReader *ir)
         
     | 
| 
      
 1704 
     | 
    
         
            +
            {
         
     | 
| 
      
 1705 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1706 
     | 
    
         
            +
              return mtpe_create(mr->sub_readers, mr->starts, mr->rcnt);
         
     | 
| 
      
 1707 
     | 
    
         
            +
            }
         
     | 
| 
      
 1708 
     | 
    
         
            +
             
     | 
| 
      
 1709 
     | 
    
         
            +
            void mr_delete_doc(IndexReader *ir, int doc_num)
         
     | 
| 
      
 1710 
     | 
    
         
            +
            {
         
     | 
| 
      
 1711 
     | 
    
         
            +
              GET_READER(doc_num);
         
     | 
| 
      
 1712 
     | 
    
         
            +
              mr->num_docs_cache = -1;                     // invalidate cache
         
     | 
| 
      
 1713 
     | 
    
         
            +
             
     | 
| 
      
 1714 
     | 
    
         
            +
              reader->do_delete_doc(reader, doc_num - mr->starts[i]); // dispatch to segment reader
         
     | 
| 
      
 1715 
     | 
    
         
            +
              mr->has_deletions = true;
         
     | 
| 
      
 1716 
     | 
    
         
            +
            }
         
     | 
| 
      
 1717 
     | 
    
         
            +
             
     | 
| 
      
 1718 
     | 
    
         
            +
            bool mr_is_deleted(IndexReader *ir, int doc_num)
         
     | 
| 
      
 1719 
     | 
    
         
            +
            {
         
     | 
| 
      
 1720 
     | 
    
         
            +
              GET_READER(doc_num);
         
     | 
| 
      
 1721 
     | 
    
         
            +
              return reader->is_deleted(reader, doc_num - mr->starts[i]);
         
     | 
| 
      
 1722 
     | 
    
         
            +
            }
         
     | 
| 
      
 1723 
     | 
    
         
            +
             
     | 
| 
      
 1724 
     | 
    
         
            +
            bool mr_has_norms(IndexReader *ir, char *field)
         
     | 
| 
      
 1725 
     | 
    
         
            +
            {
         
     | 
| 
      
 1726 
     | 
    
         
            +
              bool has_norms = false;
         
     | 
| 
      
 1727 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1728 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1729 
     | 
    
         
            +
             
     | 
| 
      
 1730 
     | 
    
         
            +
              IndexReader *reader;
         
     | 
| 
      
 1731 
     | 
    
         
            +
              for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1732 
     | 
    
         
            +
                reader = mr->sub_readers[i];
         
     | 
| 
      
 1733 
     | 
    
         
            +
                if (reader->has_norms(reader, field)) {
         
     | 
| 
      
 1734 
     | 
    
         
            +
                  has_norms = true;
         
     | 
| 
      
 1735 
     | 
    
         
            +
                  break;
         
     | 
| 
      
 1736 
     | 
    
         
            +
                }
         
     | 
| 
      
 1737 
     | 
    
         
            +
              }
         
     | 
| 
      
 1738 
     | 
    
         
            +
             
     | 
| 
      
 1739 
     | 
    
         
            +
              return has_norms;
         
     | 
| 
      
 1740 
     | 
    
         
            +
            }
         
     | 
| 
      
 1741 
     | 
    
         
            +
             
     | 
| 
      
 1742 
     | 
    
         
            +
            bool mr_has_deletions(IndexReader *ir)
         
     | 
| 
      
 1743 
     | 
    
         
            +
            {
         
     | 
| 
      
 1744 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1745 
     | 
    
         
            +
              return mr->has_deletions;
         
     | 
| 
      
 1746 
     | 
    
         
            +
            }
         
     | 
| 
      
 1747 
     | 
    
         
            +
             
     | 
| 
      
 1748 
     | 
    
         
            +
            void mr_undelete_all(IndexReader *ir)
         
     | 
| 
      
 1749 
     | 
    
         
            +
            {
         
     | 
| 
      
 1750 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1751 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1752 
     | 
    
         
            +
              mr->num_docs_cache = -1;                     // invalidate cache
         
     | 
| 
      
 1753 
     | 
    
         
            +
              IndexReader *reader;
         
     | 
| 
      
 1754 
     | 
    
         
            +
              for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1755 
     | 
    
         
            +
                reader = mr->sub_readers[i];
         
     | 
| 
      
 1756 
     | 
    
         
            +
                reader->do_undelete_all(reader);
         
     | 
| 
      
 1757 
     | 
    
         
            +
              }
         
     | 
| 
      
 1758 
     | 
    
         
            +
              mr->has_deletions = false;
         
     | 
| 
      
 1759 
     | 
    
         
            +
            }
         
     | 
| 
      
 1760 
     | 
    
         
            +
             
     | 
| 
      
 1761 
     | 
    
         
            +
            HashSet *mr_get_field_names(IndexReader *ir, int field_type)
         
     | 
| 
      
 1762 
     | 
    
         
            +
            {
         
     | 
| 
      
 1763 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1764 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1765 
     | 
    
         
            +
              HashSet *field_set = hs_str_create(NULL);
         
     | 
| 
      
 1766 
     | 
    
         
            +
              IndexReader *reader;
         
     | 
| 
      
 1767 
     | 
    
         
            +
              for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1768 
     | 
    
         
            +
                reader = mr->sub_readers[i];
         
     | 
| 
      
 1769 
     | 
    
         
            +
                hs_merge(field_set, reader->get_field_names(reader, field_type));
         
     | 
| 
      
 1770 
     | 
    
         
            +
              }
         
     | 
| 
      
 1771 
     | 
    
         
            +
              return field_set;
         
     | 
| 
      
 1772 
     | 
    
         
            +
            }
         
     | 
| 
      
 1773 
     | 
    
         
            +
             
     | 
| 
      
 1774 
     | 
    
         
            +
            void mr_commit(IndexReader *ir)
         
     | 
| 
      
 1775 
     | 
    
         
            +
            {
         
     | 
| 
      
 1776 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1777 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1778 
     | 
    
         
            +
              IndexReader *reader;
         
     | 
| 
      
 1779 
     | 
    
         
            +
              for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1780 
     | 
    
         
            +
                reader = mr->sub_readers[i];
         
     | 
| 
      
 1781 
     | 
    
         
            +
                reader->do_commit(reader);
         
     | 
| 
      
 1782 
     | 
    
         
            +
              }
         
     | 
| 
      
 1783 
     | 
    
         
            +
            }
         
     | 
| 
      
 1784 
     | 
    
         
            +
             
     | 
| 
      
 1785 
     | 
    
         
            +
            void mr_close(IndexReader *ir)
         
     | 
| 
      
 1786 
     | 
    
         
            +
            {
         
     | 
| 
      
 1787 
     | 
    
         
            +
              GET_MR;
         
     | 
| 
      
 1788 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1789 
     | 
    
         
            +
              IndexReader *reader;
         
     | 
| 
      
 1790 
     | 
    
         
            +
              for (i = 0; i < mr->rcnt; i++) {
         
     | 
| 
      
 1791 
     | 
    
         
            +
                reader = mr->sub_readers[i];
         
     | 
| 
      
 1792 
     | 
    
         
            +
                ir_close(reader);
         
     | 
| 
      
 1793 
     | 
    
         
            +
              }
         
     | 
| 
      
 1794 
     | 
    
         
            +
              free(mr->sub_readers);
         
     | 
| 
      
 1795 
     | 
    
         
            +
              h_destroy(mr->norms_cache);
         
     | 
| 
      
 1796 
     | 
    
         
            +
              free(mr->starts);
         
     | 
| 
      
 1797 
     | 
    
         
            +
              free(mr);
         
     | 
| 
      
 1798 
     | 
    
         
            +
            }
         
     | 
| 
      
 1799 
     | 
    
         
            +
             
     | 
| 
      
 1800 
     | 
    
         
            +
            IndexReader *mr_open(Store *store,
         
     | 
| 
      
 1801 
     | 
    
         
            +
                SegmentInfos *sis,
         
     | 
| 
      
 1802 
     | 
    
         
            +
                IndexReader **sub_readers,
         
     | 
| 
      
 1803 
     | 
    
         
            +
                int rcnt,
         
     | 
| 
      
 1804 
     | 
    
         
            +
                int close_store)
         
     | 
| 
      
 1805 
     | 
    
         
            +
            {
         
     | 
| 
      
 1806 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1807 
     | 
    
         
            +
              MultiReader *mr = ALLOC(MultiReader);
         
     | 
| 
      
 1808 
     | 
    
         
            +
              IndexReader *sub_reader;
         
     | 
| 
      
 1809 
     | 
    
         
            +
              mr->sub_readers = sub_readers;
         
     | 
| 
      
 1810 
     | 
    
         
            +
              mr->rcnt = rcnt;
         
     | 
| 
      
 1811 
     | 
    
         
            +
             
     | 
| 
      
 1812 
     | 
    
         
            +
              mr->max_doc = 0;
         
     | 
| 
      
 1813 
     | 
    
         
            +
              mr->num_docs_cache = -1;
         
     | 
| 
      
 1814 
     | 
    
         
            +
              mr->has_deletions = false;
         
     | 
| 
      
 1815 
     | 
    
         
            +
             
     | 
| 
      
 1816 
     | 
    
         
            +
              mr->starts = ALLOC_N(int, (rcnt+1));
         
     | 
| 
      
 1817 
     | 
    
         
            +
              for (i = 0; i < rcnt; i++) {
         
     | 
| 
      
 1818 
     | 
    
         
            +
                sub_reader = sub_readers[i];
         
     | 
| 
      
 1819 
     | 
    
         
            +
                mr->starts[i] = mr->max_doc;
         
     | 
| 
      
 1820 
     | 
    
         
            +
                mr->max_doc += sub_reader->max_doc(sub_reader); // compute max_docs
         
     | 
| 
      
 1821 
     | 
    
         
            +
             
     | 
| 
      
 1822 
     | 
    
         
            +
                if (sub_reader->has_deletions(sub_reader))
         
     | 
| 
      
 1823 
     | 
    
         
            +
                  mr->has_deletions = true;
         
     | 
| 
      
 1824 
     | 
    
         
            +
              }
         
     | 
| 
      
 1825 
     | 
    
         
            +
              mr->starts[rcnt] = mr->max_doc;
         
     | 
| 
      
 1826 
     | 
    
         
            +
              mr->norms_cache = h_new_str(NULL, &efree);
         
     | 
| 
      
 1827 
     | 
    
         
            +
             
     | 
| 
      
 1828 
     | 
    
         
            +
              IndexReader *ir = ir_create(store, sis, true, close_store);
         
     | 
| 
      
 1829 
     | 
    
         
            +
              ir->get_term_vector = &mr_get_term_vector;
         
     | 
| 
      
 1830 
     | 
    
         
            +
              ir->get_term_vectors = &mr_get_term_vectors;
         
     | 
| 
      
 1831 
     | 
    
         
            +
              ir->num_docs = &mr_num_docs;
         
     | 
| 
      
 1832 
     | 
    
         
            +
              ir->max_doc = &mr_max_doc;
         
     | 
| 
      
 1833 
     | 
    
         
            +
              ir->get_doc = &mr_get_doc;
         
     | 
| 
      
 1834 
     | 
    
         
            +
              ir->get_norms_into = &mr_get_norms_into;
         
     | 
| 
      
 1835 
     | 
    
         
            +
              ir->get_norms = &mr_get_norms;
         
     | 
| 
      
 1836 
     | 
    
         
            +
              ir->get_norms_always = &mr_get_norms;
         
     | 
| 
      
 1837 
     | 
    
         
            +
              ir->do_set_norm = &mr_set_norm;
         
     | 
| 
      
 1838 
     | 
    
         
            +
              ir->terms = &mr_terms;
         
     | 
| 
      
 1839 
     | 
    
         
            +
              ir->terms_from = &mr_terms_from;
         
     | 
| 
      
 1840 
     | 
    
         
            +
              ir->doc_freq = &mr_doc_freq;
         
     | 
| 
      
 1841 
     | 
    
         
            +
              ir->term_docs = &mr_term_docs;
         
     | 
| 
      
 1842 
     | 
    
         
            +
              ir->term_positions = &mr_term_positions;
         
     | 
| 
      
 1843 
     | 
    
         
            +
              ir->do_delete_doc = &mr_delete_doc;
         
     | 
| 
      
 1844 
     | 
    
         
            +
              ir->is_deleted = &mr_is_deleted;
         
     | 
| 
      
 1845 
     | 
    
         
            +
              ir->has_norms = &mr_has_norms;
         
     | 
| 
      
 1846 
     | 
    
         
            +
              ir->has_deletions = &mr_has_deletions;
         
     | 
| 
      
 1847 
     | 
    
         
            +
              ir->do_undelete_all = &mr_undelete_all;
         
     | 
| 
      
 1848 
     | 
    
         
            +
              ir->get_field_names = &mr_get_field_names;
         
     | 
| 
      
 1849 
     | 
    
         
            +
              ir->do_commit = &mr_commit;
         
     | 
| 
      
 1850 
     | 
    
         
            +
              ir->do_close = &mr_close;
         
     | 
| 
      
 1851 
     | 
    
         
            +
              ir->data = mr;
         
     | 
| 
      
 1852 
     | 
    
         
            +
             
     | 
| 
      
 1853 
     | 
    
         
            +
              return ir;
         
     | 
| 
      
 1854 
     | 
    
         
            +
            }
         
     | 
| 
      
 1855 
     | 
    
         
            +
             
     | 
| 
      
 1856 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 1857 
     | 
    
         
            +
             *
         
     | 
| 
      
 1858 
     | 
    
         
            +
             * SegmentMergeInfo
         
     | 
| 
      
 1859 
     | 
    
         
            +
             *
         
     | 
| 
      
 1860 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 1861 
     | 
    
         
            +
             
     | 
| 
      
 1862 
     | 
    
         
            +
            bool smi_lt(void *p1, void *p2)
         
     | 
| 
      
 1863 
     | 
    
         
            +
            {
         
     | 
| 
      
 1864 
     | 
    
         
            +
              SegmentMergeInfo *smi1 = (SegmentMergeInfo *)p1;
         
     | 
| 
      
 1865 
     | 
    
         
            +
              SegmentMergeInfo *smi2 = (SegmentMergeInfo *)p2;
         
     | 
| 
      
 1866 
     | 
    
         
            +
             
     | 
| 
      
 1867 
     | 
    
         
            +
              int cmpres = tb_cmp(smi1->tb, smi2->tb);
         
     | 
| 
      
 1868 
     | 
    
         
            +
              if (cmpres == 0) {
         
     | 
| 
      
 1869 
     | 
    
         
            +
                return smi1->base < smi2->base;
         
     | 
| 
      
 1870 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 1871 
     | 
    
         
            +
                return cmpres < 0;
         
     | 
| 
      
 1872 
     | 
    
         
            +
              }
         
     | 
| 
      
 1873 
     | 
    
         
            +
            }
         
     | 
| 
      
 1874 
     | 
    
         
            +
             
     | 
| 
      
 1875 
     | 
    
         
            +
            int *smi_load_doc_map(SegmentMergeInfo *smi)
         
     | 
| 
      
 1876 
     | 
    
         
            +
            {
         
     | 
| 
      
 1877 
     | 
    
         
            +
              IndexReader *ir = smi->ir;
         
     | 
| 
      
 1878 
     | 
    
         
            +
              if (ir->has_deletions(ir) && (smi->doc_map == NULL)) {
         
     | 
| 
      
 1879 
     | 
    
         
            +
                int max_doc = ir->max_doc(ir);
         
     | 
| 
      
 1880 
     | 
    
         
            +
                smi->doc_map = ALLOC_N(int, max_doc);
         
     | 
| 
      
 1881 
     | 
    
         
            +
                int j = 0, i;
         
     | 
| 
      
 1882 
     | 
    
         
            +
                for (i = 0; i < max_doc; i++) {
         
     | 
| 
      
 1883 
     | 
    
         
            +
                  if (ir->is_deleted(ir, i)) {
         
     | 
| 
      
 1884 
     | 
    
         
            +
                    smi->doc_map[i] = -1;
         
     | 
| 
      
 1885 
     | 
    
         
            +
                  } else {
         
     | 
| 
      
 1886 
     | 
    
         
            +
                    smi->doc_map[i] = j++;
         
     | 
| 
      
 1887 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1888 
     | 
    
         
            +
                }
         
     | 
| 
      
 1889 
     | 
    
         
            +
              }
         
     | 
| 
      
 1890 
     | 
    
         
            +
              return smi->doc_map;
         
     | 
| 
      
 1891 
     | 
    
         
            +
            }
         
     | 
| 
      
 1892 
     | 
    
         
            +
             
     | 
| 
      
 1893 
     | 
    
         
            +
            SegmentMergeInfo *smi_create(int base, TermEnum *te, IndexReader *ir)
         
     | 
| 
      
 1894 
     | 
    
         
            +
            {
         
     | 
| 
      
 1895 
     | 
    
         
            +
              SegmentMergeInfo *smi = ALLOC(SegmentMergeInfo);
         
     | 
| 
      
 1896 
     | 
    
         
            +
              smi->base = base;
         
     | 
| 
      
 1897 
     | 
    
         
            +
              smi->ir = ir;
         
     | 
| 
      
 1898 
     | 
    
         
            +
              smi->te = te;
         
     | 
| 
      
 1899 
     | 
    
         
            +
              smi->tb = te->tb_curr;
         
     | 
| 
      
 1900 
     | 
    
         
            +
              smi->postings = ir->term_positions(ir);
         
     | 
| 
      
 1901 
     | 
    
         
            +
              smi->doc_map = NULL;
         
     | 
| 
      
 1902 
     | 
    
         
            +
              return smi;
         
     | 
| 
      
 1903 
     | 
    
         
            +
            }
         
     | 
| 
      
 1904 
     | 
    
         
            +
             
     | 
| 
      
 1905 
     | 
    
         
            +
            void smi_destroy(void *p)
         
     | 
| 
      
 1906 
     | 
    
         
            +
            {
         
     | 
| 
      
 1907 
     | 
    
         
            +
              SegmentMergeInfo *smi = (SegmentMergeInfo *)p;
         
     | 
| 
      
 1908 
     | 
    
         
            +
              smi->postings->close(smi->postings);
         
     | 
| 
      
 1909 
     | 
    
         
            +
              smi->te->close(smi->te);
         
     | 
| 
      
 1910 
     | 
    
         
            +
              if (smi->doc_map != NULL)
         
     | 
| 
      
 1911 
     | 
    
         
            +
                free(smi->doc_map);
         
     | 
| 
      
 1912 
     | 
    
         
            +
              free(smi);
         
     | 
| 
      
 1913 
     | 
    
         
            +
            }
         
     | 
| 
      
 1914 
     | 
    
         
            +
             
     | 
| 
      
 1915 
     | 
    
         
            +
            TermBuffer *smi_next(SegmentMergeInfo *smi)
         
     | 
| 
      
 1916 
     | 
    
         
            +
            {
         
     | 
| 
      
 1917 
     | 
    
         
            +
              return (smi->tb = smi->te->next(smi->te));
         
     | 
| 
      
 1918 
     | 
    
         
            +
            }
         
     | 
| 
      
 1919 
     | 
    
         
            +
             
     | 
| 
      
 1920 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 1921 
     | 
    
         
            +
             *
         
     | 
| 
      
 1922 
     | 
    
         
            +
             * SegmentMerger
         
     | 
| 
      
 1923 
     | 
    
         
            +
             *
         
     | 
| 
      
 1924 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 1925 
     | 
    
         
            +
             
     | 
| 
      
 1926 
     | 
    
         
            +
            SegmentMerger *sm_create(Store *store, char *name, int term_index_interval)
         
     | 
| 
      
 1927 
     | 
    
         
            +
            {
         
     | 
| 
      
 1928 
     | 
    
         
            +
              SegmentMerger *sm = ALLOC(SegmentMerger);
         
     | 
| 
      
 1929 
     | 
    
         
            +
              sm->store = store;
         
     | 
| 
      
 1930 
     | 
    
         
            +
              sm->name = estrdup(name);
         
     | 
| 
      
 1931 
     | 
    
         
            +
              sm->readers = ary_create(config.merge_factor, &ir_destroy);
         
     | 
| 
      
 1932 
     | 
    
         
            +
              sm->fis = NULL;
         
     | 
| 
      
 1933 
     | 
    
         
            +
              sm->freq_out = NULL;
         
     | 
| 
      
 1934 
     | 
    
         
            +
              sm->prox_out = NULL;
         
     | 
| 
      
 1935 
     | 
    
         
            +
              sm->tiw = NULL;
         
     | 
| 
      
 1936 
     | 
    
         
            +
              sm->queue = NULL;
         
     | 
| 
      
 1937 
     | 
    
         
            +
              sm->ti = ti_create(0, 0, 0, 0);
         
     | 
| 
      
 1938 
     | 
    
         
            +
              sm->term_index_interval = term_index_interval;
         
     | 
| 
      
 1939 
     | 
    
         
            +
              sm->skip_buffer = ram_create_buffer();
         
     | 
| 
      
 1940 
     | 
    
         
            +
              sm->skip_interval = -1;
         
     | 
| 
      
 1941 
     | 
    
         
            +
              return sm;
         
     | 
| 
      
 1942 
     | 
    
         
            +
            }
         
     | 
| 
      
 1943 
     | 
    
         
            +
             
     | 
| 
      
 1944 
     | 
    
         
            +
            void sm_close(SegmentMerger *sm)
         
     | 
| 
      
 1945 
     | 
    
         
            +
            {
         
     | 
| 
      
 1946 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1947 
     | 
    
         
            +
              if (sm->freq_out != NULL) os_close(sm->freq_out);
         
     | 
| 
      
 1948 
     | 
    
         
            +
              if (sm->prox_out != NULL) os_close(sm->prox_out);
         
     | 
| 
      
 1949 
     | 
    
         
            +
              if (sm->tiw != NULL) {
         
     | 
| 
      
 1950 
     | 
    
         
            +
                for (i = 0; i < sm->terms_buf_size; i++)
         
     | 
| 
      
 1951 
     | 
    
         
            +
                  free(sm->terms_buf[i].text);
         
     | 
| 
      
 1952 
     | 
    
         
            +
                free(sm->terms_buf);
         
     | 
| 
      
 1953 
     | 
    
         
            +
                tiw_close(sm->tiw);
         
     | 
| 
      
 1954 
     | 
    
         
            +
              }
         
     | 
| 
      
 1955 
     | 
    
         
            +
              if (sm->queue != NULL) pq_destroy(sm->queue);
         
     | 
| 
      
 1956 
     | 
    
         
            +
              sm->freq_out = NULL;
         
     | 
| 
      
 1957 
     | 
    
         
            +
              sm->prox_out = NULL;
         
     | 
| 
      
 1958 
     | 
    
         
            +
              sm->tiw = NULL;
         
     | 
| 
      
 1959 
     | 
    
         
            +
              sm->queue = NULL;
         
     | 
| 
      
 1960 
     | 
    
         
            +
            }
         
     | 
| 
      
 1961 
     | 
    
         
            +
             
     | 
| 
      
 1962 
     | 
    
         
            +
            void sm_destroy(void *p)
         
     | 
| 
      
 1963 
     | 
    
         
            +
            {
         
     | 
| 
      
 1964 
     | 
    
         
            +
              SegmentMerger *sm = (SegmentMerger *)p;
         
     | 
| 
      
 1965 
     | 
    
         
            +
              if (sm->fis != NULL) fis_destroy(sm->fis);
         
     | 
| 
      
 1966 
     | 
    
         
            +
              ary_destroy(sm->readers);
         
     | 
| 
      
 1967 
     | 
    
         
            +
              sm_close(sm);
         
     | 
| 
      
 1968 
     | 
    
         
            +
              free(sm->name);
         
     | 
| 
      
 1969 
     | 
    
         
            +
              ti_destroy(sm->ti);
         
     | 
| 
      
 1970 
     | 
    
         
            +
              ram_destroy_buffer(sm->skip_buffer);
         
     | 
| 
      
 1971 
     | 
    
         
            +
              free(sm);
         
     | 
| 
      
 1972 
     | 
    
         
            +
            }
         
     | 
| 
      
 1973 
     | 
    
         
            +
             
     | 
| 
      
 1974 
     | 
    
         
            +
            void sm_add(SegmentMerger *sm, IndexReader *ir)
         
     | 
| 
      
 1975 
     | 
    
         
            +
            {
         
     | 
| 
      
 1976 
     | 
    
         
            +
              ary_append(sm->readers, ir);
         
     | 
| 
      
 1977 
     | 
    
         
            +
            }
         
     | 
| 
      
 1978 
     | 
    
         
            +
             
     | 
| 
      
 1979 
     | 
    
         
            +
            static inline void sm_add_indexed(IndexReader *ir,
         
     | 
| 
      
 1980 
     | 
    
         
            +
                FieldInfos *fis,
         
     | 
| 
      
 1981 
     | 
    
         
            +
                HashSet *fields,
         
     | 
| 
      
 1982 
     | 
    
         
            +
                bool store_tv,
         
     | 
| 
      
 1983 
     | 
    
         
            +
                bool store_pos,
         
     | 
| 
      
 1984 
     | 
    
         
            +
                bool store_offset)
         
     | 
| 
      
 1985 
     | 
    
         
            +
            {
         
     | 
| 
      
 1986 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 1987 
     | 
    
         
            +
              char *field;
         
     | 
| 
      
 1988 
     | 
    
         
            +
              for (i = 0; i < fields->size; i++) {
         
     | 
| 
      
 1989 
     | 
    
         
            +
                field = (char *)fields->elems[i];
         
     | 
| 
      
 1990 
     | 
    
         
            +
                fis_add(fis, field, true, store_tv, store_pos, store_offset,
         
     | 
| 
      
 1991 
     | 
    
         
            +
                    !ir->has_norms(ir, field));
         
     | 
| 
      
 1992 
     | 
    
         
            +
              }
         
     | 
| 
      
 1993 
     | 
    
         
            +
              hs_destroy(fields);
         
     | 
| 
      
 1994 
     | 
    
         
            +
            }
         
     | 
| 
      
 1995 
     | 
    
         
            +
             
     | 
| 
      
 1996 
     | 
    
         
            +
            int sm_merge_fields(SegmentMerger *sm)
         
     | 
| 
      
 1997 
     | 
    
         
            +
            {
         
     | 
| 
      
 1998 
     | 
    
         
            +
              int i, j, maxdoc;
         
     | 
| 
      
 1999 
     | 
    
         
            +
              FieldInfos *fis = sm->fis = fis_create();
         
     | 
| 
      
 2000 
     | 
    
         
            +
              int doc_count = 0;
         
     | 
| 
      
 2001 
     | 
    
         
            +
              Document *doc;
         
     | 
| 
      
 2002 
     | 
    
         
            +
              for (i = 0; i < sm->readers->size; i++) {
         
     | 
| 
      
 2003 
     | 
    
         
            +
                IndexReader *ir = sm->readers->elems[i];
         
     | 
| 
      
 2004 
     | 
    
         
            +
                
         
     | 
| 
      
 2005 
     | 
    
         
            +
                sm_add_indexed(ir, fis,
         
     | 
| 
      
 2006 
     | 
    
         
            +
                    ir->get_field_names(ir, IR_TERM_VECTOR_WITH_POSITION_OFFSET),
         
     | 
| 
      
 2007 
     | 
    
         
            +
                    true, true, true);
         
     | 
| 
      
 2008 
     | 
    
         
            +
                sm_add_indexed(ir, fis,
         
     | 
| 
      
 2009 
     | 
    
         
            +
                    ir->get_field_names(ir, IR_TERM_VECTOR_WITH_POSITION),
         
     | 
| 
      
 2010 
     | 
    
         
            +
                    true, true, false);
         
     | 
| 
      
 2011 
     | 
    
         
            +
                sm_add_indexed(ir, fis,
         
     | 
| 
      
 2012 
     | 
    
         
            +
                    ir->get_field_names(ir, IR_TERM_VECTOR_WITH_OFFSET),
         
     | 
| 
      
 2013 
     | 
    
         
            +
                    true, false, true);
         
     | 
| 
      
 2014 
     | 
    
         
            +
                sm_add_indexed(ir, fis, ir->get_field_names(ir, IR_TERM_VECTOR),
         
     | 
| 
      
 2015 
     | 
    
         
            +
                    true, false, false);
         
     | 
| 
      
 2016 
     | 
    
         
            +
                sm_add_indexed(ir, fis, ir->get_field_names(ir, IR_INDEXED),
         
     | 
| 
      
 2017 
     | 
    
         
            +
                    false, false, false);
         
     | 
| 
      
 2018 
     | 
    
         
            +
                fis_add_fields(fis, ir->get_field_names(ir, IR_UNINDEXED),
         
     | 
| 
      
 2019 
     | 
    
         
            +
                    false, false, false, false, false);
         
     | 
| 
      
 2020 
     | 
    
         
            +
              }
         
     | 
| 
      
 2021 
     | 
    
         
            +
              fis_write(fis, sm->store, sm->name, ".fnm");
         
     | 
| 
      
 2022 
     | 
    
         
            +
             
     | 
| 
      
 2023 
     | 
    
         
            +
              // merge field values
         
     | 
| 
      
 2024 
     | 
    
         
            +
              FieldsWriter *fw = fw_open(sm->store, sm->name, fis);
         
     | 
| 
      
 2025 
     | 
    
         
            +
             
     | 
| 
      
 2026 
     | 
    
         
            +
              for (i = 0; i < sm->readers->size; i++) {
         
     | 
| 
      
 2027 
     | 
    
         
            +
                IndexReader *ir = sm->readers->elems[i];
         
     | 
| 
      
 2028 
     | 
    
         
            +
                maxdoc = ir->max_doc(ir);
         
     | 
| 
      
 2029 
     | 
    
         
            +
                for (j = 0; j < maxdoc; j++) {
         
     | 
| 
      
 2030 
     | 
    
         
            +
                  if (!ir->is_deleted(ir, j)) { // skip deleted docs
         
     | 
| 
      
 2031 
     | 
    
         
            +
                    doc = ir->get_doc(ir, j);
         
     | 
| 
      
 2032 
     | 
    
         
            +
                    fw_add_doc(fw, doc);
         
     | 
| 
      
 2033 
     | 
    
         
            +
                    doc_destroy(doc);
         
     | 
| 
      
 2034 
     | 
    
         
            +
                    doc_count++;
         
     | 
| 
      
 2035 
     | 
    
         
            +
                  }
         
     | 
| 
      
 2036 
     | 
    
         
            +
                }
         
     | 
| 
      
 2037 
     | 
    
         
            +
              }
         
     | 
| 
      
 2038 
     | 
    
         
            +
              fw_close(fw);
         
     | 
| 
      
 2039 
     | 
    
         
            +
              return doc_count;
         
     | 
| 
      
 2040 
     | 
    
         
            +
            }
         
     | 
| 
      
 2041 
     | 
    
         
            +
             
     | 
| 
      
 2042 
     | 
    
         
            +
            void sm_reset_skip(SegmentMerger *sm)
         
     | 
| 
      
 2043 
     | 
    
         
            +
            {
         
     | 
| 
      
 2044 
     | 
    
         
            +
              ramo_reset(sm->skip_buffer);
         
     | 
| 
      
 2045 
     | 
    
         
            +
              sm->last_skip_doc = 0;
         
     | 
| 
      
 2046 
     | 
    
         
            +
              sm->last_skip_freq_pointer = os_pos(sm->freq_out);
         
     | 
| 
      
 2047 
     | 
    
         
            +
              sm->last_skip_prox_pointer = os_pos(sm->prox_out);
         
     | 
| 
      
 2048 
     | 
    
         
            +
            }
         
     | 
| 
      
 2049 
     | 
    
         
            +
             
     | 
| 
      
 2050 
     | 
    
         
            +
            inline void sm_buffer_skip(SegmentMerger *sm, int doc)
         
     | 
| 
      
 2051 
     | 
    
         
            +
            {
         
     | 
| 
      
 2052 
     | 
    
         
            +
              int freq_pointer = os_pos(sm->freq_out);
         
     | 
| 
      
 2053 
     | 
    
         
            +
              int prox_pointer = os_pos(sm->prox_out);
         
     | 
| 
      
 2054 
     | 
    
         
            +
             
     | 
| 
      
 2055 
     | 
    
         
            +
              os_write_vint(sm->skip_buffer, doc - sm->last_skip_doc);
         
     | 
| 
      
 2056 
     | 
    
         
            +
              os_write_vint(sm->skip_buffer, freq_pointer - sm->last_skip_freq_pointer);
         
     | 
| 
      
 2057 
     | 
    
         
            +
              os_write_vint(sm->skip_buffer, prox_pointer - sm->last_skip_prox_pointer);
         
     | 
| 
      
 2058 
     | 
    
         
            +
             
     | 
| 
      
 2059 
     | 
    
         
            +
              sm->last_skip_doc = doc;
         
     | 
| 
      
 2060 
     | 
    
         
            +
              sm->last_skip_freq_pointer = freq_pointer;
         
     | 
| 
      
 2061 
     | 
    
         
            +
              sm->last_skip_prox_pointer = prox_pointer;
         
     | 
| 
      
 2062 
     | 
    
         
            +
            }
         
     | 
| 
      
 2063 
     | 
    
         
            +
             
     | 
| 
      
 2064 
     | 
    
         
            +
            int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **smis, int cnt)
         
     | 
| 
      
 2065 
     | 
    
         
            +
            {
         
     | 
| 
      
 2066 
     | 
    
         
            +
              int i, j;
         
     | 
| 
      
 2067 
     | 
    
         
            +
              int last_doc = 0, base, doc, doc_code, freq, last_position, position;
         
     | 
| 
      
 2068 
     | 
    
         
            +
              int *doc_map = NULL;
         
     | 
| 
      
 2069 
     | 
    
         
            +
              int df = 0;            // number of docs w/ term
         
     | 
| 
      
 2070 
     | 
    
         
            +
              TermDocEnum *postings;
         
     | 
| 
      
 2071 
     | 
    
         
            +
              SegmentMergeInfo *smi;
         
     | 
| 
      
 2072 
     | 
    
         
            +
              sm_reset_skip(sm);
         
     | 
| 
      
 2073 
     | 
    
         
            +
              for (i = 0; i < cnt; i++) {
         
     | 
| 
      
 2074 
     | 
    
         
            +
                smi = smis[i];
         
     | 
| 
      
 2075 
     | 
    
         
            +
                postings = smi->postings;
         
     | 
| 
      
 2076 
     | 
    
         
            +
                base = smi->base;
         
     | 
| 
      
 2077 
     | 
    
         
            +
                doc_map = smi_load_doc_map(smi);
         
     | 
| 
      
 2078 
     | 
    
         
            +
             
     | 
| 
      
 2079 
     | 
    
         
            +
                stde_seek_ti(postings, smi->te->ti_curr);
         
     | 
| 
      
 2080 
     | 
    
         
            +
                while (postings->next(postings)) {
         
     | 
| 
      
 2081 
     | 
    
         
            +
                  doc = postings->doc_num(postings);
         
     | 
| 
      
 2082 
     | 
    
         
            +
                  if (doc_map != NULL) 
         
     | 
| 
      
 2083 
     | 
    
         
            +
                    doc = doc_map[doc]; // work around deletions
         
     | 
| 
      
 2084 
     | 
    
         
            +
                  doc += base;          // convert to merged space
         
     | 
| 
      
 2085 
     | 
    
         
            +
             
     | 
| 
      
 2086 
     | 
    
         
            +
                  if (doc < last_doc)
         
     | 
| 
      
 2087 
     | 
    
         
            +
                    eprintf(STATE_ERROR,
         
     | 
| 
      
 2088 
     | 
    
         
            +
                        "docs out of order curent doc = %ld and previous doc = %ld",
         
     | 
| 
      
 2089 
     | 
    
         
            +
                        doc, last_doc);
         
     | 
| 
      
 2090 
     | 
    
         
            +
             
     | 
| 
      
 2091 
     | 
    
         
            +
                  df++;
         
     | 
| 
      
 2092 
     | 
    
         
            +
             
     | 
| 
      
 2093 
     | 
    
         
            +
                  if ((df % sm->skip_interval) == 0) 
         
     | 
| 
      
 2094 
     | 
    
         
            +
                    sm_buffer_skip(sm, last_doc);
         
     | 
| 
      
 2095 
     | 
    
         
            +
             
     | 
| 
      
 2096 
     | 
    
         
            +
                  doc_code = (doc - last_doc) << 1;    // use low bit to flag freq=1
         
     | 
| 
      
 2097 
     | 
    
         
            +
                  last_doc = doc;
         
     | 
| 
      
 2098 
     | 
    
         
            +
             
     | 
| 
      
 2099 
     | 
    
         
            +
                  freq = postings->freq(postings);
         
     | 
| 
      
 2100 
     | 
    
         
            +
                  if (freq == 1) {
         
     | 
| 
      
 2101 
     | 
    
         
            +
                    os_write_vint(sm->freq_out, doc_code | 1); // write doc & freq=1
         
     | 
| 
      
 2102 
     | 
    
         
            +
                  } else {
         
     | 
| 
      
 2103 
     | 
    
         
            +
                    os_write_vint(sm->freq_out, doc_code); // write doc
         
     | 
| 
      
 2104 
     | 
    
         
            +
                    os_write_vint(sm->freq_out, freq);     // write freqency in doc
         
     | 
| 
      
 2105 
     | 
    
         
            +
                  }
         
     | 
| 
      
 2106 
     | 
    
         
            +
                    
         
     | 
| 
      
 2107 
     | 
    
         
            +
             
     | 
| 
      
 2108 
     | 
    
         
            +
                  last_position = 0;        // write position deltas
         
     | 
| 
      
 2109 
     | 
    
         
            +
                  for (j = 0; j < freq; j++) {
         
     | 
| 
      
 2110 
     | 
    
         
            +
                    position = postings->next_position(postings);
         
     | 
| 
      
 2111 
     | 
    
         
            +
                    os_write_vint(sm->prox_out, position - last_position);
         
     | 
| 
      
 2112 
     | 
    
         
            +
                    last_position = position;
         
     | 
| 
      
 2113 
     | 
    
         
            +
                  }
         
     | 
| 
      
 2114 
     | 
    
         
            +
                }
         
     | 
| 
      
 2115 
     | 
    
         
            +
              }
         
     | 
| 
      
 2116 
     | 
    
         
            +
              return df;
         
     | 
| 
      
 2117 
     | 
    
         
            +
            }
         
     | 
| 
      
 2118 
     | 
    
         
            +
             
     | 
| 
      
 2119 
     | 
    
         
            +
            int sm_write_skip(SegmentMerger *sm)
         
     | 
| 
      
 2120 
     | 
    
         
            +
            {
         
     | 
| 
      
 2121 
     | 
    
         
            +
              int skip_pointer = os_pos(sm->freq_out);
         
     | 
| 
      
 2122 
     | 
    
         
            +
              ramo_write_to(sm->skip_buffer, sm->freq_out);
         
     | 
| 
      
 2123 
     | 
    
         
            +
              return skip_pointer;
         
     | 
| 
      
 2124 
     | 
    
         
            +
            }
         
     | 
| 
      
 2125 
     | 
    
         
            +
             
     | 
| 
      
 2126 
     | 
    
         
            +
            Term *sm_tb_to_term(SegmentMerger *sm, TermBuffer *tb)
         
     | 
| 
      
 2127 
     | 
    
         
            +
            {
         
     | 
| 
      
 2128 
     | 
    
         
            +
              int index = sm->terms_buf_pointer % sm->terms_buf_size;
         
     | 
| 
      
 2129 
     | 
    
         
            +
              sm->terms_buf_pointer++;
         
     | 
| 
      
 2130 
     | 
    
         
            +
              sm->terms_buf[index].field = tb->field;
         
     | 
| 
      
 2131 
     | 
    
         
            +
              strcpy(sm->terms_buf[index].text, tb->text);
         
     | 
| 
      
 2132 
     | 
    
         
            +
              return &(sm->terms_buf[index]);
         
     | 
| 
      
 2133 
     | 
    
         
            +
            }
         
     | 
| 
      
 2134 
     | 
    
         
            +
             
     | 
| 
      
 2135 
     | 
    
         
            +
            void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **smis, int cnt)
         
     | 
| 
      
 2136 
     | 
    
         
            +
            {
         
     | 
| 
      
 2137 
     | 
    
         
            +
              int freq_pointer = os_pos(sm->freq_out);
         
     | 
| 
      
 2138 
     | 
    
         
            +
              int prox_pointer = os_pos(sm->prox_out);
         
     | 
| 
      
 2139 
     | 
    
         
            +
             
     | 
| 
      
 2140 
     | 
    
         
            +
              int df = sm_append_postings(sm, smis, cnt);      // append posting data
         
     | 
| 
      
 2141 
     | 
    
         
            +
             
     | 
| 
      
 2142 
     | 
    
         
            +
              int skip_pointer = sm_write_skip(sm);
         
     | 
| 
      
 2143 
     | 
    
         
            +
             
     | 
| 
      
 2144 
     | 
    
         
            +
              if (df > 0) {
         
     | 
| 
      
 2145 
     | 
    
         
            +
                // add an entry to the dictionary with pointers to prox and freq files
         
     | 
| 
      
 2146 
     | 
    
         
            +
                ti_set(sm->ti, df, freq_pointer, prox_pointer, (skip_pointer - freq_pointer));
         
     | 
| 
      
 2147 
     | 
    
         
            +
                tiw_add(sm->tiw, sm_tb_to_term(sm, smis[0]->tb), sm->ti);
         
     | 
| 
      
 2148 
     | 
    
         
            +
              }
         
     | 
| 
      
 2149 
     | 
    
         
            +
            }
         
     | 
| 
      
 2150 
     | 
    
         
            +
             
     | 
| 
      
 2151 
     | 
    
         
            +
            void sm_merge_term_infos(SegmentMerger *sm)
         
     | 
| 
      
 2152 
     | 
    
         
            +
            {
         
     | 
| 
      
 2153 
     | 
    
         
            +
              int base = 0;
         
     | 
| 
      
 2154 
     | 
    
         
            +
              int i, match_size;
         
     | 
| 
      
 2155 
     | 
    
         
            +
              IndexReader *ir;
         
     | 
| 
      
 2156 
     | 
    
         
            +
              TermEnum *te;
         
     | 
| 
      
 2157 
     | 
    
         
            +
              SegmentMergeInfo *smi, *top;
         
     | 
| 
      
 2158 
     | 
    
         
            +
              TermBuffer *tb;
         
     | 
| 
      
 2159 
     | 
    
         
            +
             
     | 
| 
      
 2160 
     | 
    
         
            +
              for (i = 0; i < sm->readers->size; i++) {
         
     | 
| 
      
 2161 
     | 
    
         
            +
                ir = sm->readers->elems[i];
         
     | 
| 
      
 2162 
     | 
    
         
            +
                te = ir->terms(ir);
         
     | 
| 
      
 2163 
     | 
    
         
            +
                smi = smi_create(base, te, ir);
         
     | 
| 
      
 2164 
     | 
    
         
            +
                base += ir->num_docs(ir);
         
     | 
| 
      
 2165 
     | 
    
         
            +
                if (smi_next(smi) != NULL)
         
     | 
| 
      
 2166 
     | 
    
         
            +
                  pq_push(sm->queue, smi); // initialize @queue
         
     | 
| 
      
 2167 
     | 
    
         
            +
                else
         
     | 
| 
      
 2168 
     | 
    
         
            +
                  smi_destroy(smi);
         
     | 
| 
      
 2169 
     | 
    
         
            +
              }
         
     | 
| 
      
 2170 
     | 
    
         
            +
             
     | 
| 
      
 2171 
     | 
    
         
            +
              SegmentMergeInfo **match = ALLOC_N(SegmentMergeInfo *, sm->readers->size);
         
     | 
| 
      
 2172 
     | 
    
         
            +
             
     | 
| 
      
 2173 
     | 
    
         
            +
              while (sm->queue->count > 0) {
         
     | 
| 
      
 2174 
     | 
    
         
            +
                //  for (i = 1; i <= sm->queue->count; i++) {
         
     | 
| 
      
 2175 
     | 
    
         
            +
                //    printf("<{%s:%s}>", ((SegmentMergeInfo *)sm->queue->heap[i])->tb->field,
         
     | 
| 
      
 2176 
     | 
    
         
            +
                //                        ((SegmentMergeInfo *)sm->queue->heap[i])->tb->text);
         
     | 
| 
      
 2177 
     | 
    
         
            +
                //  }printf("\n\n");
         
     | 
| 
      
 2178 
     | 
    
         
            +
                match_size = 0;     // pop matching terms
         
     | 
| 
      
 2179 
     | 
    
         
            +
                match[match_size] = pq_pop(sm->queue);
         
     | 
| 
      
 2180 
     | 
    
         
            +
                match_size++;
         
     | 
| 
      
 2181 
     | 
    
         
            +
                tb = match[0]->tb;
         
     | 
| 
      
 2182 
     | 
    
         
            +
                top = pq_top(sm->queue);
         
     | 
| 
      
 2183 
     | 
    
         
            +
                while ((top != NULL) && (tb_cmp(tb, top->tb) == 0)) {
         
     | 
| 
      
 2184 
     | 
    
         
            +
                  match[match_size] = pq_pop(sm->queue);
         
     | 
| 
      
 2185 
     | 
    
         
            +
                  match_size++;
         
     | 
| 
      
 2186 
     | 
    
         
            +
                  top = pq_top(sm->queue);
         
     | 
| 
      
 2187 
     | 
    
         
            +
                }
         
     | 
| 
      
 2188 
     | 
    
         
            +
             
     | 
| 
      
 2189 
     | 
    
         
            +
                //printf(">%s:%s<\n", match[0]->tb->field, match[0]->tb->text);
         
     | 
| 
      
 2190 
     | 
    
         
            +
                sm_merge_term_info(sm, match, match_size);      // add new TermInfo
         
     | 
| 
      
 2191 
     | 
    
         
            +
             
     | 
| 
      
 2192 
     | 
    
         
            +
                while (match_size > 0) {
         
     | 
| 
      
 2193 
     | 
    
         
            +
                  match_size--;
         
     | 
| 
      
 2194 
     | 
    
         
            +
                  smi = match[match_size];
         
     | 
| 
      
 2195 
     | 
    
         
            +
                  if (smi_next(smi) != NULL)
         
     | 
| 
      
 2196 
     | 
    
         
            +
                    pq_push(sm->queue, smi); // restore queue
         
     | 
| 
      
 2197 
     | 
    
         
            +
                  else
         
     | 
| 
      
 2198 
     | 
    
         
            +
                    smi_destroy(smi);      // done with a segment
         
     | 
| 
      
 2199 
     | 
    
         
            +
                }
         
     | 
| 
      
 2200 
     | 
    
         
            +
              }
         
     | 
| 
      
 2201 
     | 
    
         
            +
              free(match);
         
     | 
| 
      
 2202 
     | 
    
         
            +
            }
         
     | 
| 
      
 2203 
     | 
    
         
            +
             
     | 
| 
      
 2204 
     | 
    
         
            +
            void sm_merge_terms(SegmentMerger *sm)
         
     | 
| 
      
 2205 
     | 
    
         
            +
            {
         
     | 
| 
      
 2206 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 2207 
     | 
    
         
            +
              char fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 2208 
     | 
    
         
            +
              sprintf(fname, "%s.frq", sm->name);
         
     | 
| 
      
 2209 
     | 
    
         
            +
              sm->freq_out = sm->store->create_output(sm->store, fname);
         
     | 
| 
      
 2210 
     | 
    
         
            +
              sprintf(fname, "%s.prx", sm->name);
         
     | 
| 
      
 2211 
     | 
    
         
            +
              sm->prox_out = sm->store->create_output(sm->store, fname);
         
     | 
| 
      
 2212 
     | 
    
         
            +
              sm->tiw = tiw_open(sm->store, sm->name, sm->fis, sm->term_index_interval);
         
     | 
| 
      
 2213 
     | 
    
         
            +
              // terms_buf_pointer holds a buffer of terms since the TermInfosWriter needs
         
     | 
| 
      
 2214 
     | 
    
         
            +
              // to keep the last index_interval terms so that it can compare the last term
         
     | 
| 
      
 2215 
     | 
    
         
            +
              // put in the index with the next one. So the size of the buffer must by 
         
     | 
| 
      
 2216 
     | 
    
         
            +
              // index_interval + 2.
         
     | 
| 
      
 2217 
     | 
    
         
            +
              sm->terms_buf_pointer = 0;
         
     | 
| 
      
 2218 
     | 
    
         
            +
              sm->terms_buf_size = sm->tiw->index_interval + 2;
         
     | 
| 
      
 2219 
     | 
    
         
            +
              sm->terms_buf = ALLOC_N(Term, sm->terms_buf_size);
         
     | 
| 
      
 2220 
     | 
    
         
            +
              for (i = 0; i < sm->terms_buf_size; i++) {
         
     | 
| 
      
 2221 
     | 
    
         
            +
                sm->terms_buf[i].field = NULL;
         
     | 
| 
      
 2222 
     | 
    
         
            +
                sm->terms_buf[i].text = ALLOC_N(char, MAX_WORD_SIZE);
         
     | 
| 
      
 2223 
     | 
    
         
            +
              }
         
     | 
| 
      
 2224 
     | 
    
         
            +
              sm->skip_interval = sm->tiw->skip_interval;
         
     | 
| 
      
 2225 
     | 
    
         
            +
              sm->queue = pq_create(sm->readers->size, &smi_lt);
         
     | 
| 
      
 2226 
     | 
    
         
            +
             
     | 
| 
      
 2227 
     | 
    
         
            +
              sm_merge_term_infos(sm);
         
     | 
| 
      
 2228 
     | 
    
         
            +
             
     | 
| 
      
 2229 
     | 
    
         
            +
              sm_close(sm);
         
     | 
| 
      
 2230 
     | 
    
         
            +
            }
         
     | 
| 
      
 2231 
     | 
    
         
            +
             
     | 
| 
      
 2232 
     | 
    
         
            +
            void sm_merge_norms(SegmentMerger *sm)
         
     | 
| 
      
 2233 
     | 
    
         
            +
            {
         
     | 
| 
      
 2234 
     | 
    
         
            +
              int i, j, k, max_doc;
         
     | 
| 
      
 2235 
     | 
    
         
            +
              uchar *norm_buf;
         
     | 
| 
      
 2236 
     | 
    
         
            +
              FieldInfo *fi;
         
     | 
| 
      
 2237 
     | 
    
         
            +
              OutStream *os;
         
     | 
| 
      
 2238 
     | 
    
         
            +
              char fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 2239 
     | 
    
         
            +
              IndexReader *ir;
         
     | 
| 
      
 2240 
     | 
    
         
            +
              for (i = 0; i < sm->fis->fcnt; i++) {
         
     | 
| 
      
 2241 
     | 
    
         
            +
                fi = sm->fis->by_number[i];
         
     | 
| 
      
 2242 
     | 
    
         
            +
                if (fi->is_indexed && !fi->omit_norms)  {
         
     | 
| 
      
 2243 
     | 
    
         
            +
                  sprintf(fname, "%s.f%d", sm->name, i);
         
     | 
| 
      
 2244 
     | 
    
         
            +
                  os = sm->store->create_output(sm->store, fname);
         
     | 
| 
      
 2245 
     | 
    
         
            +
                  for (j = 0; j < sm->readers->size; j++) {
         
     | 
| 
      
 2246 
     | 
    
         
            +
                    ir = sm->readers->elems[j];
         
     | 
| 
      
 2247 
     | 
    
         
            +
                    max_doc = ir->max_doc(ir);
         
     | 
| 
      
 2248 
     | 
    
         
            +
                    norm_buf = ALLOC_N(uchar, max_doc);
         
     | 
| 
      
 2249 
     | 
    
         
            +
                    memset(norm_buf, 0, sizeof(uchar) * max_doc);
         
     | 
| 
      
 2250 
     | 
    
         
            +
                    ir->get_norms_into(ir, fi->name, norm_buf, 0);
         
     | 
| 
      
 2251 
     | 
    
         
            +
                    for (k = 0; k < max_doc; k++) {
         
     | 
| 
      
 2252 
     | 
    
         
            +
                      if (!ir->is_deleted(ir, k)) {
         
     | 
| 
      
 2253 
     | 
    
         
            +
                        os_write_byte(os, norm_buf[k]);
         
     | 
| 
      
 2254 
     | 
    
         
            +
                      }
         
     | 
| 
      
 2255 
     | 
    
         
            +
                    }
         
     | 
| 
      
 2256 
     | 
    
         
            +
                    free(norm_buf);
         
     | 
| 
      
 2257 
     | 
    
         
            +
                  }
         
     | 
| 
      
 2258 
     | 
    
         
            +
                  os_close(os);
         
     | 
| 
      
 2259 
     | 
    
         
            +
                }
         
     | 
| 
      
 2260 
     | 
    
         
            +
              }
         
     | 
| 
      
 2261 
     | 
    
         
            +
            }
         
     | 
| 
      
 2262 
     | 
    
         
            +
             
     | 
| 
      
 2263 
     | 
    
         
            +
            void sm_merge_vectors(SegmentMerger *sm)
         
     | 
| 
      
 2264 
     | 
    
         
            +
            {
         
     | 
| 
      
 2265 
     | 
    
         
            +
              int i, j, max_doc;
         
     | 
| 
      
 2266 
     | 
    
         
            +
              TermVectorsWriter *tvw = tvw_open(sm->store, sm->name, sm->fis);
         
     | 
| 
      
 2267 
     | 
    
         
            +
              IndexReader *ir;
         
     | 
| 
      
 2268 
     | 
    
         
            +
              Array *tvs;
         
     | 
| 
      
 2269 
     | 
    
         
            +
              for (i = 0; i < sm->readers->size; i++) {
         
     | 
| 
      
 2270 
     | 
    
         
            +
                ir = sm->readers->elems[i];
         
     | 
| 
      
 2271 
     | 
    
         
            +
                max_doc = ir->max_doc(ir);
         
     | 
| 
      
 2272 
     | 
    
         
            +
                for (j = 0; j < max_doc; j++) {
         
     | 
| 
      
 2273 
     | 
    
         
            +
                  // skip deleted docs
         
     | 
| 
      
 2274 
     | 
    
         
            +
                  if (! ir->is_deleted(ir, j)) {
         
     | 
| 
      
 2275 
     | 
    
         
            +
                    tvs = ir->get_term_vectors(ir, j);
         
     | 
| 
      
 2276 
     | 
    
         
            +
                    tvw_add_all_doc_vectors(tvw, tvs);
         
     | 
| 
      
 2277 
     | 
    
         
            +
                    ary_destroy(tvs);
         
     | 
| 
      
 2278 
     | 
    
         
            +
                  }
         
     | 
| 
      
 2279 
     | 
    
         
            +
                }
         
     | 
| 
      
 2280 
     | 
    
         
            +
              }
         
     | 
| 
      
 2281 
     | 
    
         
            +
              tvw_close(tvw);
         
     | 
| 
      
 2282 
     | 
    
         
            +
            }
         
     | 
| 
      
 2283 
     | 
    
         
            +
             
     | 
| 
      
 2284 
     | 
    
         
            +
            int sm_merge(SegmentMerger *sm)
         
     | 
| 
      
 2285 
     | 
    
         
            +
            {
         
     | 
| 
      
 2286 
     | 
    
         
            +
              int doc_count = sm_merge_fields(sm);
         
     | 
| 
      
 2287 
     | 
    
         
            +
              sm_merge_terms(sm);
         
     | 
| 
      
 2288 
     | 
    
         
            +
              sm_merge_norms(sm);
         
     | 
| 
      
 2289 
     | 
    
         
            +
              if (fis_has_vectors(sm->fis))
         
     | 
| 
      
 2290 
     | 
    
         
            +
                sm_merge_vectors(sm);
         
     | 
| 
      
 2291 
     | 
    
         
            +
              return doc_count;
         
     | 
| 
      
 2292 
     | 
    
         
            +
            }
         
     | 
| 
      
 2293 
     | 
    
         
            +
             
     | 
| 
      
 2294 
     | 
    
         
            +
            Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
         
     | 
| 
      
 2295 
     | 
    
         
            +
            {
         
     | 
| 
      
 2296 
     | 
    
         
            +
              Array *files = ary_create(0, &efree);
         
     | 
| 
      
 2297 
     | 
    
         
            +
              CompoundWriter *cw = open_cw(sm->store, file_name);
         
     | 
| 
      
 2298 
     | 
    
         
            +
              FieldInfo *fi;
         
     | 
| 
      
 2299 
     | 
    
         
            +
              char fname[SEGMENT_NAME_MAX_LENGTH];
         
     | 
| 
      
 2300 
     | 
    
         
            +
             
     | 
| 
      
 2301 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 2302 
     | 
    
         
            +
              for (i = 0; i < NELEMS(COMPOUND_EXTENSIONS); i++) {
         
     | 
| 
      
 2303 
     | 
    
         
            +
                sprintf(fname, "%s.%s", sm->name, COMPOUND_EXTENSIONS[i]);
         
     | 
| 
      
 2304 
     | 
    
         
            +
                ary_append(files, estrdup(fname));
         
     | 
| 
      
 2305 
     | 
    
         
            +
              }
         
     | 
| 
      
 2306 
     | 
    
         
            +
             
     | 
| 
      
 2307 
     | 
    
         
            +
              // Field norm files
         
     | 
| 
      
 2308 
     | 
    
         
            +
              for (i = 0; i < sm->fis->fcnt; i++) {
         
     | 
| 
      
 2309 
     | 
    
         
            +
                fi = sm->fis->by_number[i];
         
     | 
| 
      
 2310 
     | 
    
         
            +
                if (fi->is_indexed && !fi->omit_norms) {
         
     | 
| 
      
 2311 
     | 
    
         
            +
                  sprintf(fname, "%s.f%d", sm->name, i);
         
     | 
| 
      
 2312 
     | 
    
         
            +
                  ary_append(files, estrdup(fname));
         
     | 
| 
      
 2313 
     | 
    
         
            +
                }
         
     | 
| 
      
 2314 
     | 
    
         
            +
              }
         
     | 
| 
      
 2315 
     | 
    
         
            +
             
     | 
| 
      
 2316 
     | 
    
         
            +
              // Vector files
         
     | 
| 
      
 2317 
     | 
    
         
            +
              if (fis_has_vectors(sm->fis)) {
         
     | 
| 
      
 2318 
     | 
    
         
            +
                for (i = 0; i < NELEMS(VECTOR_EXTENSIONS); i++) {
         
     | 
| 
      
 2319 
     | 
    
         
            +
                  sprintf(fname, "%s.%s", sm->name, VECTOR_EXTENSIONS[i]);
         
     | 
| 
      
 2320 
     | 
    
         
            +
                  ary_append(files, estrdup(fname));
         
     | 
| 
      
 2321 
     | 
    
         
            +
                }
         
     | 
| 
      
 2322 
     | 
    
         
            +
              }
         
     | 
| 
      
 2323 
     | 
    
         
            +
             
     | 
| 
      
 2324 
     | 
    
         
            +
              // Now merge all added files
         
     | 
| 
      
 2325 
     | 
    
         
            +
              for (i = 0; i < files->size; i++) {
         
     | 
| 
      
 2326 
     | 
    
         
            +
                cw_add_file(cw, (char *)files->elems[i]);
         
     | 
| 
      
 2327 
     | 
    
         
            +
              }
         
     | 
| 
      
 2328 
     | 
    
         
            +
              
         
     | 
| 
      
 2329 
     | 
    
         
            +
              // Perform the merge
         
     | 
| 
      
 2330 
     | 
    
         
            +
              cw_close(cw);
         
     | 
| 
      
 2331 
     | 
    
         
            +
             
         
     | 
| 
      
 2332 
     | 
    
         
            +
              return files;
         
     | 
| 
      
 2333 
     | 
    
         
            +
            }
         
     | 
| 
      
 2334 
     | 
    
         
            +
             
     | 
| 
      
 2335 
     | 
    
         
            +
            /****************************************************************************
         
     | 
| 
      
 2336 
     | 
    
         
            +
             *
         
     | 
| 
      
 2337 
     | 
    
         
            +
             * IndexReader
         
     | 
| 
      
 2338 
     | 
    
         
            +
             *
         
     | 
| 
      
 2339 
     | 
    
         
            +
             ****************************************************************************/
         
     | 
| 
      
 2340 
     | 
    
         
            +
             
     | 
| 
      
 2341 
     | 
    
         
            +
            void ir_acquire_not_necessary(IndexReader *ir) {}
         
     | 
| 
      
 2342 
     | 
    
         
            +
            void ir_acquire_write_lock(IndexReader *ir)
         
     | 
| 
      
 2343 
     | 
    
         
            +
            {
         
     | 
| 
      
 2344 
     | 
    
         
            +
              if (ir->is_stale)
         
     | 
| 
      
 2345 
     | 
    
         
            +
                eprintf(STATE_ERROR, "IndexReader out of date and no longer valid for delete, undelete, or set_norm operations");
         
     | 
| 
      
 2346 
     | 
    
         
            +
             
     | 
| 
      
 2347 
     | 
    
         
            +
              if (ir->write_lock == NULL) {
         
     | 
| 
      
 2348 
     | 
    
         
            +
                ir->write_lock = ir->store->open_lock(ir->store, WRITE_LOCK_NAME);
         
     | 
| 
      
 2349 
     | 
    
         
            +
                if (!ir->write_lock->obtain(ir->write_lock)) // obtain write lock
         
     | 
| 
      
 2350 
     | 
    
         
            +
                  eprintf(STATE_ERROR, "Index locked for write: %s", WRITE_LOCK_NAME);
         
     | 
| 
      
 2351 
     | 
    
         
            +
             
     | 
| 
      
 2352 
     | 
    
         
            +
                // we have to check whether index has changed since this reader was opened.
         
     | 
| 
      
 2353 
     | 
    
         
            +
                // if so, this reader is no longer valid for deletion
         
     | 
| 
      
 2354 
     | 
    
         
            +
                if (sis_read_current_version(ir->store) > ir->sis->version) {
         
     | 
| 
      
 2355 
     | 
    
         
            +
                  ir->is_stale = true;
         
     | 
| 
      
 2356 
     | 
    
         
            +
                  ir->write_lock->release(ir->write_lock);
         
     | 
| 
      
 2357 
     | 
    
         
            +
                  ir->store->close_lock(ir->write_lock);
         
     | 
| 
      
 2358 
     | 
    
         
            +
                  ir->write_lock = NULL;
         
     | 
| 
      
 2359 
     | 
    
         
            +
                  eprintf(STATE_ERROR, "IndexReader out of date and no longer valid for delete, undelete, or set_norm operations");
         
     | 
| 
      
 2360 
     | 
    
         
            +
                }
         
     | 
| 
      
 2361 
     | 
    
         
            +
              }
         
     | 
| 
      
 2362 
     | 
    
         
            +
            }
         
     | 
| 
      
 2363 
     | 
    
         
            +
             
     | 
| 
      
 2364 
     | 
    
         
            +
            IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_store)
         
     | 
| 
      
 2365 
     | 
    
         
            +
            {
         
     | 
| 
      
 2366 
     | 
    
         
            +
              IndexReader *ir = ALLOC(IndexReader);
         
     | 
| 
      
 2367 
     | 
    
         
            +
             
     | 
| 
      
 2368 
     | 
    
         
            +
              mutex_init(&ir->mutex, NULL);
         
     | 
| 
      
 2369 
     | 
    
         
            +
              ir->is_owner = is_owner;
         
     | 
| 
      
 2370 
     | 
    
         
            +
              if (is_owner) {
         
     | 
| 
      
 2371 
     | 
    
         
            +
                ir->acquire_write_lock = &ir_acquire_write_lock;
         
     | 
| 
      
 2372 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 2373 
     | 
    
         
            +
                ir->acquire_write_lock = &ir_acquire_not_necessary;
         
     | 
| 
      
 2374 
     | 
    
         
            +
              }
         
     | 
| 
      
 2375 
     | 
    
         
            +
             
     | 
| 
      
 2376 
     | 
    
         
            +
              ir->store = store;
         
     | 
| 
      
 2377 
     | 
    
         
            +
              ir->close_store = close_store;
         
     | 
| 
      
 2378 
     | 
    
         
            +
              ir->sis = sis;
         
     | 
| 
      
 2379 
     | 
    
         
            +
              ir->has_changes = false;
         
     | 
| 
      
 2380 
     | 
    
         
            +
              ir->is_stale = false;
         
     | 
| 
      
 2381 
     | 
    
         
            +
              ir->write_lock = NULL;
         
     | 
| 
      
 2382 
     | 
    
         
            +
              ir->cache = NULL;
         
     | 
| 
      
 2383 
     | 
    
         
            +
              ir->sort_cache = NULL;
         
     | 
| 
      
 2384 
     | 
    
         
            +
              return ir;
         
     | 
| 
      
 2385 
     | 
    
         
            +
            }
         
     | 
| 
      
 2386 
     | 
    
         
            +
             
     | 
| 
      
 2387 
     | 
    
         
            +
            IndexReader *ir_open(Store *store, int close_store)
         
     | 
| 
      
 2388 
     | 
    
         
            +
            {
         
     | 
| 
      
 2389 
     | 
    
         
            +
              int i;
         
     | 
| 
      
 2390 
     | 
    
         
            +
              IndexReader *ir;
         
     | 
| 
      
 2391 
     | 
    
         
            +
              SegmentInfos *sis;
         
     | 
| 
      
 2392 
     | 
    
         
            +
             
     | 
| 
      
 2393 
     | 
    
         
            +
              mutex_lock(&store->mutex);
         
     | 
| 
      
 2394 
     | 
    
         
            +
              sis = sis_create();
         
     | 
| 
      
 2395 
     | 
    
         
            +
              sis_read(sis, store);
         
     | 
| 
      
 2396 
     | 
    
         
            +
              if (sis->scnt == 1) {
         
     | 
| 
      
 2397 
     | 
    
         
            +
                ir = sr_open(sis, 0, true, close_store);
         
     | 
| 
      
 2398 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 2399 
     | 
    
         
            +
                IndexReader **readers = ALLOC_N(IndexReader *, sis->scnt);
         
     | 
| 
      
 2400 
     | 
    
         
            +
                for (i = 0; i < sis->scnt; i++) {
         
     | 
| 
      
 2401 
     | 
    
         
            +
                  readers[i] = sr_open(sis, i, false, false);
         
     | 
| 
      
 2402 
     | 
    
         
            +
                }
         
     | 
| 
      
 2403 
     | 
    
         
            +
                ir = mr_open(store, sis, readers, sis->scnt, close_store);
         
     | 
| 
      
 2404 
     | 
    
         
            +
              }
         
     | 
| 
      
 2405 
     | 
    
         
            +
              mutex_unlock(&store->mutex);
         
     | 
| 
      
 2406 
     | 
    
         
            +
              return ir;
         
     | 
| 
      
 2407 
     | 
    
         
            +
            }
         
     | 
| 
      
 2408 
     | 
    
         
            +
             
     | 
| 
      
 2409 
     | 
    
         
            +
            bool ir_index_exists(Store *store)
         
     | 
| 
      
 2410 
     | 
    
         
            +
            {
         
     | 
| 
      
 2411 
     | 
    
         
            +
              return store->exists(store, "segments");
         
     | 
| 
      
 2412 
     | 
    
         
            +
            }
         
     | 
| 
      
 2413 
     | 
    
         
            +
             
     | 
| 
      
 2414 
     | 
    
         
            +
            void ir_set_norm(IndexReader *ir, int doc_num, char *field, uchar val)
         
     | 
| 
      
 2415 
     | 
    
         
            +
            {
         
     | 
| 
      
 2416 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 2417 
     | 
    
         
            +
              ir->acquire_write_lock(ir);
         
     | 
| 
      
 2418 
     | 
    
         
            +
              ir->do_set_norm(ir, doc_num, field, val);
         
     | 
| 
      
 2419 
     | 
    
         
            +
              ir->has_changes = true;
         
     | 
| 
      
 2420 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 2421 
     | 
    
         
            +
            }
         
     | 
| 
      
 2422 
     | 
    
         
            +
             
     | 
| 
      
 2423 
     | 
    
         
            +
            void ir_undelete_all(IndexReader *ir)
         
     | 
| 
      
 2424 
     | 
    
         
            +
            {
         
     | 
| 
      
 2425 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 2426 
     | 
    
         
            +
              ir->acquire_write_lock(ir);
         
     | 
| 
      
 2427 
     | 
    
         
            +
              ir->do_undelete_all(ir);
         
     | 
| 
      
 2428 
     | 
    
         
            +
              ir->has_changes = true;
         
     | 
| 
      
 2429 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 2430 
     | 
    
         
            +
            }
         
     | 
| 
      
 2431 
     | 
    
         
            +
             
     | 
| 
      
 2432 
     | 
    
         
            +
            void ir_delete_doc(IndexReader *ir, int doc_num)
         
     | 
| 
      
 2433 
     | 
    
         
            +
            {
         
     | 
| 
      
 2434 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 2435 
     | 
    
         
            +
              ir->acquire_write_lock(ir);
         
     | 
| 
      
 2436 
     | 
    
         
            +
              ir->do_delete_doc(ir, doc_num);
         
     | 
| 
      
 2437 
     | 
    
         
            +
              ir->has_changes = true;
         
     | 
| 
      
 2438 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 2439 
     | 
    
         
            +
            }
         
     | 
| 
      
 2440 
     | 
    
         
            +
             
     | 
| 
      
 2441 
     | 
    
         
            +
            Document *ir_get_doc_with_term(IndexReader *ir, Term *term)
         
     | 
| 
      
 2442 
     | 
    
         
            +
            {
         
     | 
| 
      
 2443 
     | 
    
         
            +
              TermDocEnum *tde = ir_term_docs_for(ir, term);
         
     | 
| 
      
 2444 
     | 
    
         
            +
              if (!tde) return NULL;
         
     | 
| 
      
 2445 
     | 
    
         
            +
             
     | 
| 
      
 2446 
     | 
    
         
            +
              Document *doc = NULL;
         
     | 
| 
      
 2447 
     | 
    
         
            +
              if (tde->next(tde))
         
     | 
| 
      
 2448 
     | 
    
         
            +
                doc = ir->get_doc(ir, tde->doc_num(tde));
         
     | 
| 
      
 2449 
     | 
    
         
            +
              tde->close(tde);
         
     | 
| 
      
 2450 
     | 
    
         
            +
              return doc;
         
     | 
| 
      
 2451 
     | 
    
         
            +
            }
         
     | 
| 
      
 2452 
     | 
    
         
            +
             
     | 
| 
      
 2453 
     | 
    
         
            +
            TermDocEnum *ir_term_docs_for(IndexReader *ir, Term *term)
         
     | 
| 
      
 2454 
     | 
    
         
            +
            {
         
     | 
| 
      
 2455 
     | 
    
         
            +
              TermDocEnum *tde = ir->term_docs(ir);
         
     | 
| 
      
 2456 
     | 
    
         
            +
              tde->seek(tde, term);
         
     | 
| 
      
 2457 
     | 
    
         
            +
              return tde;
         
     | 
| 
      
 2458 
     | 
    
         
            +
            }
         
     | 
| 
      
 2459 
     | 
    
         
            +
             
     | 
| 
      
 2460 
     | 
    
         
            +
            TermDocEnum *ir_term_positions_for(IndexReader *ir, Term *term)
         
     | 
| 
      
 2461 
     | 
    
         
            +
            {
         
     | 
| 
      
 2462 
     | 
    
         
            +
              TermDocEnum *tde = ir->term_positions(ir);
         
     | 
| 
      
 2463 
     | 
    
         
            +
              tde->seek(tde, term);
         
     | 
| 
      
 2464 
     | 
    
         
            +
              return tde;
         
     | 
| 
      
 2465 
     | 
    
         
            +
            }
         
     | 
| 
      
 2466 
     | 
    
         
            +
             
     | 
| 
      
 2467 
     | 
    
         
            +
            void ir_commit_internal(IndexReader *ir)
         
     | 
| 
      
 2468 
     | 
    
         
            +
            {
         
     | 
| 
      
 2469 
     | 
    
         
            +
              if (ir->has_changes) {
         
     | 
| 
      
 2470 
     | 
    
         
            +
                if (ir->is_owner) {
         
     | 
| 
      
 2471 
     | 
    
         
            +
             
     | 
| 
      
 2472 
     | 
    
         
            +
                  mutex_lock(&ir->store->mutex);
         
     | 
| 
      
 2473 
     | 
    
         
            +
                  Lock *commit_lock = ir->store->open_lock(ir->store, COMMIT_LOCK_NAME);
         
     | 
| 
      
 2474 
     | 
    
         
            +
                  if (!commit_lock->obtain(commit_lock)) // obtain write lock
         
     | 
| 
      
 2475 
     | 
    
         
            +
                    eprintf(STATE_ERROR, "Index locked for commit: %s", COMMIT_LOCK_NAME);
         
     | 
| 
      
 2476 
     | 
    
         
            +
             
     | 
| 
      
 2477 
     | 
    
         
            +
                  ir->do_commit(ir);
         
     | 
| 
      
 2478 
     | 
    
         
            +
                  sis_write(ir->sis, ir->store);
         
     | 
| 
      
 2479 
     | 
    
         
            +
             
     | 
| 
      
 2480 
     | 
    
         
            +
                  commit_lock->release(commit_lock);
         
     | 
| 
      
 2481 
     | 
    
         
            +
                  ir->store->close_lock(commit_lock);
         
     | 
| 
      
 2482 
     | 
    
         
            +
                  mutex_unlock(&ir->store->mutex);
         
     | 
| 
      
 2483 
     | 
    
         
            +
             
     | 
| 
      
 2484 
     | 
    
         
            +
                  if (ir->write_lock != NULL) {
         
     | 
| 
      
 2485 
     | 
    
         
            +
                    ir->write_lock->release(ir->write_lock);  // release write lock
         
     | 
| 
      
 2486 
     | 
    
         
            +
                    ir->store->close_lock(ir->write_lock);
         
     | 
| 
      
 2487 
     | 
    
         
            +
                    ir->write_lock = NULL;
         
     | 
| 
      
 2488 
     | 
    
         
            +
                  }
         
     | 
| 
      
 2489 
     | 
    
         
            +
                } else {
         
     | 
| 
      
 2490 
     | 
    
         
            +
                  ir->do_commit(ir);
         
     | 
| 
      
 2491 
     | 
    
         
            +
                }
         
     | 
| 
      
 2492 
     | 
    
         
            +
                ir->has_changes = false;
         
     | 
| 
      
 2493 
     | 
    
         
            +
              }
         
     | 
| 
      
 2494 
     | 
    
         
            +
            }
         
     | 
| 
      
 2495 
     | 
    
         
            +
             
     | 
| 
      
 2496 
     | 
    
         
            +
            void ir_commit(IndexReader *ir)
         
     | 
| 
      
 2497 
     | 
    
         
            +
            {
         
     | 
| 
      
 2498 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 2499 
     | 
    
         
            +
              ir_commit_internal(ir);
         
     | 
| 
      
 2500 
     | 
    
         
            +
              mutex_unlock(&ir->mutex);
         
     | 
| 
      
 2501 
     | 
    
         
            +
            }
         
     | 
| 
      
 2502 
     | 
    
         
            +
             
     | 
| 
      
 2503 
     | 
    
         
            +
            void ir_close(IndexReader *ir)
         
     | 
| 
      
 2504 
     | 
    
         
            +
            {
         
     | 
| 
      
 2505 
     | 
    
         
            +
              mutex_lock(&ir->mutex);
         
     | 
| 
      
 2506 
     | 
    
         
            +
              ir_commit_internal(ir);
         
     | 
| 
      
 2507 
     | 
    
         
            +
              ir->do_close(ir);
         
     | 
| 
      
 2508 
     | 
    
         
            +
              if (ir->close_store) {
         
     | 
| 
      
 2509 
     | 
    
         
            +
                ir->store->close(ir->store);
         
     | 
| 
      
 2510 
     | 
    
         
            +
              }
         
     | 
| 
      
 2511 
     | 
    
         
            +
              if (ir->is_owner) {
         
     | 
| 
      
 2512 
     | 
    
         
            +
                sis_destroy(ir->sis);
         
     | 
| 
      
 2513 
     | 
    
         
            +
              }
         
     | 
| 
      
 2514 
     | 
    
         
            +
              if (ir->cache) {
         
     | 
| 
      
 2515 
     | 
    
         
            +
                h_destroy(ir->cache);
         
     | 
| 
      
 2516 
     | 
    
         
            +
              }
         
     | 
| 
      
 2517 
     | 
    
         
            +
              if (ir->sort_cache) {
         
     | 
| 
      
 2518 
     | 
    
         
            +
                h_destroy(ir->sort_cache);
         
     | 
| 
      
 2519 
     | 
    
         
            +
              }
         
     | 
| 
      
 2520 
     | 
    
         
            +
             
     | 
| 
      
 2521 
     | 
    
         
            +
              mutex_destroy(&ir->mutex);
         
     | 
| 
      
 2522 
     | 
    
         
            +
              free(ir);
         
     | 
| 
      
 2523 
     | 
    
         
            +
            }
         
     | 
| 
      
 2524 
     | 
    
         
            +
             
     | 
| 
      
 2525 
     | 
    
         
            +
            void ir_destroy(void *p)
         
     | 
| 
      
 2526 
     | 
    
         
            +
            {
         
     | 
| 
      
 2527 
     | 
    
         
            +
              IndexReader *ir = (IndexReader *)p;
         
     | 
| 
      
 2528 
     | 
    
         
            +
              ir_close(ir);
         
     | 
| 
      
 2529 
     | 
    
         
            +
            }
         
     | 
| 
      
 2530 
     | 
    
         
            +
             
     | 
| 
      
 2531 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2532 
     | 
    
         
            +
             * Don't call this method if the cache already exists
         
     | 
| 
      
 2533 
     | 
    
         
            +
             **/
         
     | 
| 
      
 2534 
     | 
    
         
            +
            void ir_add_cache(IndexReader *ir)
         
     | 
| 
      
 2535 
     | 
    
         
            +
            {
         
     | 
| 
      
 2536 
     | 
    
         
            +
              ir->cache = co_hsh_create();
         
     | 
| 
      
 2537 
     | 
    
         
            +
            }
         
     | 
| 
      
 2538 
     | 
    
         
            +
             
     | 
| 
      
 2539 
     | 
    
         
            +
            bool ir_is_latest(IndexReader *ir)
         
     | 
| 
      
 2540 
     | 
    
         
            +
            {
         
     | 
| 
      
 2541 
     | 
    
         
            +
              return sis_read_current_version(ir->store) == ir->sis->version;
         
     | 
| 
      
 2542 
     | 
    
         
            +
            }
         
     | 
| 
      
 2543 
     | 
    
         
            +
             
     |