RubyGems - ferret - Versions diffs - 0.9.1 → 0.9.2 - Mend

ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

data/README +6 -5
data/Rakefile +34 -13
data/TODO +1 -0
data/TUTORIAL +1 -1
data/ext/analysis.c +87 -70
data/ext/analysis.h +18 -6
data/ext/array.c +1 -2
data/ext/array.h +1 -1
data/ext/bitvector.c +10 -6
data/ext/bitvector.h +2 -2
data/ext/compound_io.c +30 -27
data/ext/document.c +15 -15
data/ext/document.h +5 -5
data/ext/except.c +2 -0
data/ext/except.h +25 -23
data/ext/extconf.rb +1 -0
data/ext/ferret.c +10 -8
data/ext/ferret.h +9 -8
data/ext/field.c +29 -25
data/ext/filter.c +52 -14
data/ext/frtio.h +13 -0
data/ext/fs_store.c +115 -170
data/ext/global.c +9 -8
data/ext/global.h +17 -13
data/ext/hash.c +13 -19
data/ext/hash.h +11 -11
data/ext/hashset.c +5 -7
data/ext/hashset.h +9 -8
data/ext/helper.c +1 -1
data/ext/helper.h +2 -1
data/ext/inc/except.h +25 -23
data/ext/inc/lang.h +11 -1
data/ext/ind.c +33 -21
data/ext/index.h +44 -39
data/ext/index_io.c +61 -57
data/ext/index_rw.c +418 -361
data/ext/lang.c +10 -0
data/ext/lang.h +11 -1
data/ext/nix_io.c +135 -0
data/ext/priorityqueue.c +16 -16
data/ext/priorityqueue.h +9 -6
data/ext/q_boolean.c +128 -76
data/ext/q_const_score.c +20 -20
data/ext/q_filtered_query.c +20 -20
data/ext/q_fuzzy.c +37 -23
data/ext/q_match_all.c +15 -19
data/ext/q_multi_phrase.c +87 -46
data/ext/q_parser.c +247 -119
data/ext/q_phrase.c +86 -52
data/ext/q_prefix.c +25 -14
data/ext/q_range.c +59 -14
data/ext/q_span.c +263 -172
data/ext/q_term.c +62 -51
data/ext/q_wildcard.c +24 -13
data/ext/r_analysis.c +328 -80
data/ext/r_doc.c +11 -6
data/ext/r_index_io.c +40 -32
data/ext/r_qparser.c +15 -14
data/ext/r_search.c +270 -152
data/ext/r_store.c +32 -17
data/ext/ram_store.c +38 -22
data/ext/search.c +617 -87
data/ext/search.h +227 -163
data/ext/similarity.c +54 -45
data/ext/similarity.h +3 -3
data/ext/sort.c +132 -53
data/ext/store.c +21 -2
data/ext/store.h +14 -14
data/ext/tags +4322 -232
data/ext/term.c +140 -109
data/ext/termdocs.c +74 -60
data/ext/vector.c +181 -152
data/ext/w32_io.c +150 -0
data/lib/ferret.rb +1 -1
data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
data/lib/ferret/document/field.rb +1 -1
data/lib/ferret/index/field_infos.rb +1 -1
data/lib/ferret/index/term.rb +1 -1
data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
data/lib/ferret/search.rb +1 -0
data/lib/ferret/search/boolean_query.rb +0 -4
data/lib/ferret/search/index_searcher.rb +21 -8
data/lib/ferret/search/multi_phrase_query.rb +7 -0
data/lib/ferret/search/multi_searcher.rb +261 -0
data/lib/ferret/search/phrase_query.rb +1 -1
data/lib/ferret/search/query.rb +34 -5
data/lib/ferret/search/sort.rb +7 -3
data/lib/ferret/search/sort_field.rb +8 -4
data/lib/ferret/store/fs_store.rb +13 -6
data/lib/ferret/store/index_io.rb +0 -14
data/lib/ferret/store/ram_store.rb +3 -2
data/lib/rferret.rb +1 -1
data/test/unit/analysis/ctc_analyzer.rb +131 -0
data/test/unit/analysis/ctc_tokenstream.rb +98 -9
data/test/unit/index/tc_index.rb +40 -1
data/test/unit/index/tc_term.rb +7 -0
data/test/unit/index/th_doc.rb +8 -0
data/test/unit/query_parser/tc_query_parser.rb +6 -4
data/test/unit/search/rtc_sort_field.rb +6 -6
data/test/unit/search/tc_index_searcher.rb +8 -0
data/test/unit/search/tc_multi_searcher.rb +275 -0
data/test/unit/search/tc_multi_searcher2.rb +126 -0
data/test/unit/search/tc_search_and_sort.rb +66 -0
metadata +31 -26
data/test/unit/query_parser/rtc_query_parser.rb +0 -138

data/ext/w32_io.c ADDED Viewed

@@ -0,0 +1,150 @@
+#ifdef WIN32
+#include "global.h"
+#include "store.h"
+#include <stdio.h>
+#include <io.h>
+#include <errno.h>
+#include <string.h>
+/**
+ * Create a filepath for a file in the store using the operating systems
+ * default file seperator.
+ */
+char *join_path(char *buf, const char *base, const char *filename)
+{
+  sprintf(buf, "%s\\%s", base, filename);
+  return buf;
+}
+bool exists(char *path)
+{
+  int fd = _open(path, 0);
+  if (fd < 0) {
+    if (errno != ENOENT) {
+      RAISE(IO_ERROR, strerror(errno));
+    }
+    return false;
+  }
+  _close(fd);
+  return true;
+}
+int fcount(char *path)
+{
+  char buf[MAX_FILE_PATH];
+  struct _finddata_t fd;
+  intptr_t d;
+  int cnt = 0;
+  join_path(buf, path, "*");
+  if ((d = _findfirst(buf, &fd)) < 0) {
+    RAISE(IO_ERROR, strerror(errno));
+  }
+  do {
+    if (fd.name[0] != '.') {
+      cnt++;
+    }
+  } while (_findnext(d, &fd) == 0);
+  _findclose(d);
+  return cnt;
+}
+void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
+{
+  char buf[MAX_FILE_PATH];
+  struct _finddata_t fd;
+  intptr_t d;
+  join_path(buf, path, "*");
+  if ((d = _findfirst(buf, &fd)) < 0) {
+    RAISE(IO_ERROR, strerror(errno));
+  }
+  while (_findnext(d, &fd) == 0) {
+    if (fd.name[0] != '.' && !file_is_lock(fd.name)) {
+      func(fd.name, arg);
+    }
+  }
+  _findclose(d);
+}
+/**
+ * Clear all the locks in the store.
+ *
+ * @param store the store to clear the locks from
+ * @throws IO_ERROR if there is an error opening the directory
+ */
+void fs_clear_locks(Store *store)
+{
+  char buf[MAX_FILE_PATH];
+  struct _finddata_t fd;
+  intptr_t d;
+  join_path(buf, store->dir.path, "*");
+  if ((d = _findfirst(buf, &fd)) < 0) {
+    RAISE(IO_ERROR, strerror(errno));
+  }
+  while (_findnext(d, &fd) == 0) {
+    if (file_is_lock(fd.name)) {
+      remove(join_path(buf, store->dir.path, fd.name));
+    }
+  }
+  _findclose(d);
+}
+/**
+ * Clear all files from the store except the lock files.
+ *
+ * @param store the store to clear all the files from
+ * @throws IO_ERROR if there is an error deleting the files
+ */
+void fs_clear(Store *store)
+{
+  char buf[MAX_FILE_PATH];
+  struct _finddata_t fd;
+  intptr_t d;
+  join_path(buf, store->dir.path, "*");
+  if ((d = _findfirst(buf, &fd)) < 0) {
+    RAISE(IO_ERROR, strerror(errno));
+  }
+  while (_findnext(d, &fd) == 0) {
+    if (fd.name[0] != '.' && !file_is_lock(fd.name)) {
+      remove(join_path(buf, store->dir.path, fd.name));
+    }
+  }
+  _findclose(d);
+}
+/**
+ * Clear all files from the store including the lock files.
+ *
+ * @param store the store to clear all the files from
+ * @throws IO_ERROR if there is an error deleting the files
+ */
+void fs_clear_all(Store *store)
+{
+  char buf[MAX_FILE_PATH];
+  struct _finddata_t fd;
+  intptr_t d;
+  join_path(buf, store->dir.path, "*");
+  if ((d = _findfirst(buf, &fd)) < 0) {
+    RAISE(IO_ERROR, strerror(errno));
+  }
+  while (_findnext(d, &fd) == 0) {
+    if (fd.name[0] != '.') {
+      remove(join_path(buf, store->dir.path, fd.name));
+    }
+  }
+  _findclose(d);
+}
+#endif

data/lib/ferret.rb CHANGED Viewed

@@ -22,7 +22,7 @@
 #++
 # :include: ../TUTORIAL
 module Ferret
-  VERSION = '0.9.0'
+  VERSION = '0.9.2'
 end
 # try and load the C extension but it isn't necessary.

data/lib/ferret/analysis/standard_tokenizer.rb CHANGED Viewed

@@ -13,14 +13,15 @@ module Ferret::Analysis
   # addresses, phone numbers, etc.
   class StandardTokenizer < RegExpTokenizer
-    ALPHA      = /[[:alpha:]]+/
+    ALPHA      = /[[:alpha:]_-]+/
     APOSTROPHE = /#{ALPHA}('#{ALPHA})+/
     ACRONYM    = /#{ALPHA}\.(#{ALPHA}\.)+/
     P          = /[_\/.,-]/
     HASDIGIT   = /\w*\d\w*/
-    TOKEN_RE   = /[[:alpha:]]+(('[[:alpha:]]+)+
-                              |\.([[:alpha:]]\.)+
+    TOKEN_RE   = /#{ALPHA}+(('#{ALPHA}+)+
+                              |\.(#{ALPHA}\.)+
                               |(@|\&)\w+([-.]\w+)*
+                              |:\/\/\w+([-.\/]\w+)*
                               )
                  |\w+(([\-._]\w+)*\@\w+([-.]\w+)+
                      |#{P}#{HASDIGIT}(#{P}\w+#{P}#{HASDIGIT})*(#{P}\w+)?

data/lib/ferret/document/field.rb CHANGED Viewed

@@ -307,6 +307,6 @@ module Ferret::Document
       str << "omit_norms," if (@omit_norms)
       str << "binary," if (@binary)
       str << "<#{@name}:#{@binary ? '=bin_data=' : data}>"
-    end
+    end
   end
 end

data/lib/ferret/index/field_infos.rb CHANGED Viewed

@@ -104,7 +104,7 @@ module Ferret
       # Retrieve the field_info object by either field number or field name.
       def [](index)
         if index.is_a? Integer
-          if index == NOT_A_FIELD || index < 0 # < 0 is for C extensions
+          if index >= NOT_A_FIELD || index < 0 # < 0 is for C extensions
             return FieldInfo.new("", false, NOT_A_FIELD, false)
           end
           return @fi_array[index]

data/lib/ferret/index/term.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Ferret::Index
     # Constructs a Term with the given field and text
     def initialize(fld_name, txt)
-      @field = fld_name
+      @field = fld_name.to_s
       @text = txt.to_s
     end

data/lib/ferret/query_parser/query_parser.tab.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module Ferret
   class QueryParser < Racc::Parser
-module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id94697a9944', 'lib/ferret/query_parser/query_parser.y', 126
+module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id155b60f3fb', 'lib/ferret/query_parser/query_parser.y', 126
   attr_accessor :default_field, :fields, :handle_parse_errors
   def initialize(default_field = "*", options = {})
@@ -20,7 +20,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id94697a994
       default_field = default_field.split("|")
     end
     @field = @default_field = default_field
-    @analyzer = options[:analyzer] || Analysis::Analyzer.new
+    @analyzer = options[:analyzer] || Analysis::StandardAnalyzer.new
     @wild_lower = options[:wild_lower].nil? ? true : options[:wild_lower]
     @occur_default = options[:occur_default] || BooleanClause::Occur::SHOULD
     @default_slop = options[:default_slop] || 0
@@ -170,23 +170,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id94697a994
   end
   def get_bad_query(field, str)
-    get_term_query(field, str)
-    #tokens = []
-    #stream = @analyzer.token_stream(field, str)
-    #while token = stream.next
-    #  tokens << token
-    #end
-    #if tokens.length == 0
-    #  return TermQuery.new(Term.new(field, ""))
-    #elsif tokens.length == 1
-    #  return TermQuery.new(Term.new(field, tokens[0].text))
-    #else
-    #  bq = BooleanQuery.new()
-    #  tokens.each do |token|
-    #    bq << BooleanClause.new(TermQuery.new(Term.new(field, token.text)))
-    #  end
-    #  return bq
-    #end
+    get_term_query(field, str) || BooleanQuery.new()
   end
   def get_range_query(field, start_word, end_word, inc_upper, inc_lower)
@@ -200,7 +184,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id94697a994
       tokens << token
     end
     if tokens.length == 0
-      return TermQuery.new(Term.new(field, ""))
+      return nil
     elsif tokens.length == 1
       return TermQuery.new(Term.new(field, tokens[0].text))
     else
@@ -365,14 +349,14 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id94697a994
   def get_boolean_query(clauses)
     # possible that we got all nil clauses so check
-    return nil if clauses.nil?
+    bq = BooleanQuery.new()
+    return bq if clauses.nil?
     clauses.compact!
-    return nil if clauses.size == 0
+    return bq if clauses.size == 0
     if clauses.size == 1 and not clauses[0].prohibited?
       return clauses[0].query
     end
-    bq = BooleanQuery.new()
     clauses.each {|clause| bq << clause }
     return bq
   end
@@ -414,7 +398,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id94697a994
     return qp.parse(query)
   end
-..end lib/ferret/query_parser/query_parser.y modeval..id94697a9944
+..end lib/ferret/query_parser/query_parser.y modeval..id155b60f3fb
 ##### racc 1.4.4 generates ###

data/lib/ferret/search.rb CHANGED Viewed

@@ -47,3 +47,4 @@ require 'ferret/search/filtered_query.rb'
 require 'ferret/search/match_all_query.rb'
 require 'ferret/search/spans.rb'
 require 'ferret/search/index_searcher.rb'
+require 'ferret/search/multi_searcher.rb'

data/lib/ferret/search/boolean_query.rb CHANGED Viewed

@@ -248,10 +248,6 @@ module Ferret::Search
       end
     end
-    def combine(queries)
-      return Query.merge_boolean_queries(queries)
-    end
     def initialize_copy(o)
       super
       @clauses = o.clauses.clone

data/lib/ferret/search/index_searcher.rb CHANGED Viewed

@@ -104,7 +104,13 @@ module Ferret::Search
         raise ArgumentError, "first_doc must be >= 0 to run a search"
       end
-      scorer = query.weight(self).scorer(@reader)
+      # for MultiSearcher: the weight is computed across all searchers
+      if query.is_a? Weight
+        scorer = query.scorer(@reader)
+      else
+        scorer = query.weight(self).scorer(@reader)
+      end
       if (scorer == nil)
         return TopDocs.new(0, [])
       end
@@ -117,14 +123,10 @@ module Ferret::Search
         hq = HitQueue.new(max_size)
       end
       total_hits = 0
-      min_score = 0.0
       scorer.each_hit() do |doc, score|
         if score > 0.0 and (bits.nil? or bits.get(doc)) # skip docs not in bits
           total_hits += 1
-          if hq.size < max_size or score >= min_score
-            hq.insert(ScoreDoc.new(doc, score))
-            min_score = hq.top.score # maintain min_score
-          end
+          hq.insert(ScoreDoc.new(doc, score))
         end
       end
@@ -148,7 +150,12 @@ module Ferret::Search
     # usually want your hits sorted at least by score so you should use the
     # #search method.
     def search_each(query, filter = nil)
-      scorer = query.weight(self).scorer(@reader)
+      # for MultiSearcher: the weight is computed across all searchers
+      if query.is_a? Weight
+        scorer = query.scorer(@reader)
+      else
+        scorer = query.weight(self).scorer(@reader)
+      end
       return if scorer == nil
       bits = (filter.nil? ? nil : filter.bits(@reader))
       scorer.each_hit() do |doc, score|
@@ -175,13 +182,19 @@ module Ferret::Search
     # Returns an Explanation that describes how +doc+ scored against
     # +query+.
+    # A weight may be given as first parameter instead of the query, too.
     #
     # This is intended to be used in developing Similarity implementations,
     # and, for good performance, should not be displayed with every hit.
     # Computing an explanation is as expensive as executing the query over the
     # entire index.
     def explain(query, doc)
-      return query.weight(self).explain(@reader, doc)
+      if query.is_a? Weight
+        weight = query
+      else
+        weight = query.weight(self)
+      end
+      return weight.explain(@reader, doc)
     end
   end
 end

data/lib/ferret/search/multi_phrase_query.rb CHANGED Viewed

@@ -181,6 +181,13 @@ module Ferret::Search
       end
     end
+    # See Query#extract_terms()
+    def extract_terms(query_terms)
+      @term_arrays.each { |terms|
+        query_terms.merge(terms)
+      }
+    end
     def create_weight(searcher)
       return MultiPhraseWeight.new(self, searcher)
     end

data/lib/ferret/search/multi_searcher.rb ADDED Viewed

@@ -0,0 +1,261 @@
+module Ferret::Search
+  # Implements searching multiple IndexSearchers at once
+  #
+  # Applications usually need only call the @link #search(Query)
+  # or @link #search(Query,Filter) methods. For performance reasons it is
+  # recommended to open only one Searcher and use it for all of your searches.
+  class MultiSearcher
+    include Ferret::Index
+    attr_accessor :similarity, :searchers
+    # Creates a MultiSearcher searching across all the searchers
+    # in the provided array.
+    #
+    def initialize(args)
+      @searchers = Array.new(args)
+      @similarity = Similarity.default
+      # initialize reader lookup array
+      @max_doc = 0
+      @starts = Array.new(@searchers.size + 1)
+      @searchers.each_with_index { |searcher, i|
+        @starts[i] = @max_doc
+        @max_doc += searcher.max_doc
+      }
+      @starts[@searchers.size] = @max_doc
+    end
+    # closes all underlying Searchers
+    def close()
+      @searchers.each { |searcher| searcher.close() }
+    end
+    # Expert: Returns the number of documents containing +term+.
+    # Called by search code to compute term weights.
+    # See IndexReader#doc_freq
+    def doc_freq(term)
+      return @searchers.inject(0) { |df, searcher|
+        df + searcher.doc_freq(term)
+      }
+    end
+    # Expert: For each term in the terms array, calculates the number of
+    # documents containing +term+. Returns an array with these
+    # document frequencies. Used to minimize number of remote calls.
+    def doc_freqs(terms)
+      result = Array.new
+      terms.each {|term, i| result << doc_freq(term)}
+      return result
+    end
+    # Expert: Returns the stored fields of document +n+.
+    #
+    # See IndexReader#get_document
+    def doc(n)
+      i = sub_searcher(n)
+      return @searchers[i].doc(n - @starts[i])
+    end
+    # Returns index of the searcher for document <code>n</code> in the
+    # array used to construct this searcher.
+    def sub_searcher(n)
+      lo = 0			            # search starts array
+      hi = @searchers.size - 1  # for first element less
+						                  # than n, return its index
+      while hi >= lo do
+        mid = (lo + hi) >> 1
+        midValue = @starts[mid]
+        if n < midValue
+          hi = mid - 1;
+        elsif n > midValue
+          lo = mid + 1;
+        else                   # found a match
+          while mid+1 < @searchers.size && @starts[mid+1] == midValue do
+            mid += 1                # scan to last match
+          end
+          return mid
+        end
+      end
+      return hi
+    end
+    # Returns the document number of document <code>n</code> within its
+    # sub-index.
+    def sub_doc(n)
+      return n - @starts[sub_searcher(n)]
+    end
+    # Expert: Returns one greater than the largest possible document number.
+    # Called by search code to compute term weights.
+    # See IndexReader#max_doc
+    def max_doc
+      return @max_doc
+    end
+    # Create weight in multiple index scenario.
+    #
+    # Distributed query processing is done in the following steps:
+    # 1. rewrite query
+    # 2. extract necessary terms
+    # 3. collect dfs for these terms from the Searchables
+    # 4. create query weight using aggregate dfs.
+    # 5. distribute that weight to Searchables
+    # 6. merge results
+    #
+    # Steps 1-4 are done here, 5+6 in the search() methods
+    def create_weight(query)
+      # step 1
+      rewritten_query = self.rewrite(query)
+      # step 2
+      terms = Set.new
+      rewritten_query.extract_terms(terms)
+      # step 3
+      aggregated_dfs = Array.new(terms.size, 0)
+      @searchers.each { |searcher|
+        dfs = searcher.doc_freqs(terms)
+        dfs.each_with_index { |df,i|
+          aggregated_dfs[i] += df
+        }
+      }
+      df_map = Hash.new
+      terms.each_with_index { |term,i|
+        df_map[term] = aggregated_dfs[i]
+      }
+      # step 4
+      cache_sim = CachedDfSource.new(df_map, self.max_doc, self.similarity)
+      return rewritten_query.weight(cache_sim)
+    end
+    def search(query, options = {})
+      filter = options[:filter]
+      first_doc = options[:first_doc]||0
+      num_docs = options[:num_docs]||10
+      max_size = first_doc + num_docs
+      sort = options[:sort]
+      if (num_docs <= 0)
+        raise ArgumentError, "num_docs must be > 0 to run a search"
+      end
+      if (first_doc < 0)
+        raise ArgumentError, "first_doc must be >= 0 to run a search"
+      end
+      if (sort)
+        raise NotImplementedError
+        #fields = sort.is_a?(Array) ? sort : sort.fields
+        #hq = FieldDocSortedHitQueue.new(fields, max_size)
+      else
+        hq = HitQueue.new(max_size)
+      end
+      total_hits = 0
+      weight = create_weight(query)
+      @searchers.each_with_index { |searcher,i|     # search each searcher
+        docs = searcher.search(weight,
+                               :filter => filter,
+                               #:sort => sort,
+                               :num_docs => max_size,
+                               :first_doc => 0)
+        total_hits += docs.total_hits  # update total_hits
+        docs.score_docs.each { |score_doc|
+          score_doc.doc += @starts[i]   # convert doc
+          break unless hq.insert(score_doc) # no more scores > min_score
+        }
+      }
+      score_docs = []
+      if (hq.size > first_doc)
+        if (hq.size - first_doc) < num_docs
+          num_docs = hq.size - first_doc
+        end
+        num_docs.times do
+          score_docs.unshift(hq.pop)
+        end
+      end
+      hq.clear
+      return TopDocs.new(total_hits, score_docs)
+    end
+    def search_each(query, filter = nil, &block)
+      weight = create_weight(query)
+      @searchers.each { |searcher|     # search each searcher
+        searcher.search_each(weight, filter, &block)
+      }
+    end
+    # rewrites the query into a query that can be processed by the search
+    # methods. For example, a Fuzzy query is turned into a massive boolean
+    # query.
+    #
+    # original:: The original query to be rewritten.
+    def rewrite(original)
+      #print "multi_searcher#rewrite: #{original}\n"
+      queries = []
+      @searchers.each { |searcher|
+        queries << searcher.rewrite(original)
+      }
+      return queries.first.combine(queries)
+    end
+    # Returns an Explanation that describes how +doc+ scored against
+    # +query+.
+    #
+    # This is intended to be used in developing Similarity implementations,
+    # and, for good performance, should not be displayed with every hit.
+    # Computing an explanation is as expensive as executing the query over the
+    # entire index.
+    def explain(query, doc)
+      i = sub_searcher(doc)
+      return @searchers[i].explain(create_weight(query), doc-@starts[i])
+    end
+  end
+  # Document Frequency cache acting as a Dummy-Searcher.
+  # This class is no full-fledged Searcher, but only supports
+  # the methods necessary to initialize Weights.
+  class CachedDfSource
+    attr_reader :max_doc, :similarity
+    def initialize(df_map, max_doc, similarity)
+      @df_map = df_map
+      @max_doc = max_doc
+      @similarity = similarity
+    end
+    def doc_freq(term)
+      return @df_map[term]
+    end
+    def doc_freqs(terms)
+      result = Array.new
+      terms.each { |term|
+        result << doc_freq(term)
+      }
+      return result
+    end
+    def rewrite(query)
+      # this is a bit of a hack. We know that a query which
+      # creates a Weight based on this Dummy-Searcher is
+      # always already rewritten (see preparedWeight()).
+      # Therefore we just return the unmodified query here
+      return query
+    end
+  end
+end