RubyGems - fastri - Versions diffs - 0.1.1.1 → 0.2.0.1 - Mend

fastri 0.1.1.1 → 0.2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

data/CHANGES +12 -0
data/Rakefile +5 -4
data/bin/fastri-server +77 -35
data/bin/fri +145 -8
data/bin/ri-emacs +1 -1
data/lib/fastri/full_text_index.rb +245 -0
data/lib/fastri/full_text_indexer.rb +100 -0
data/lib/fastri/ri_index.rb +30 -0
data/lib/fastri/ri_service.rb +6 -0
data/lib/fastri/util.rb +83 -0
data/lib/fastri/version.rb +6 -1
data/test/test_full_text_index.rb +182 -0
data/test/test_full_text_indexer.rb +84 -0
data/test/test_integration_full_text_index.rb +43 -0
data/test/test_ri_index.rb +99 -1
data/test/test_util.rb +38 -0
metadata +14 -3

data/lib/fastri/full_text_indexer.rb ADDED Viewed

@@ -0,0 +1,100 @@
+# Copyright (C) 2006  Mauricio Fernandez <mfp@acm.org>
+#
+require 'fastri/version'
+module FastRI
+class FullTextIndexer
+  WORD_RE    = /[A-Za-z0-9_]+/
+  NONWORD_RE = /[^A-Za-z0-9_]+/
+  MAGIC      = "FastRI full-text index #{FASTRI_FT_INDEX_FORMAT}\0"
+  def initialize(max_querysize)
+    @documents = []
+    @doc_hash  = {}
+    @max_wordsize = max_querysize
+  end
+  def add_document(name, data, metadata = {})
+    @doc_hash[name] = [data, metadata]
+    @documents << name
+  end
+  def data(name)
+    @doc_hash[name][0]
+  end
+  def documents
+    @documents = @documents.uniq
+  end
+  def preprocess(str)
+    str.gsub(/\0/,"")
+  end
+  require 'strscan'
+  def find_suffixes(text, offset)
+    find_suffixes_simple(text, WORD_RE, NONWORD_RE, offset)
+  end
+  def find_suffixes_simple(string, word_re, nonword_re, offset)
+    suffixes = []
+    sc = StringScanner.new(string)
+    until sc.eos?
+      sc.skip(nonword_re)
+      len = string.size
+      loop do
+        break if sc.pos == len
+        suffixes << offset + sc.pos
+        skipped_word = sc.skip(word_re)
+        break unless skipped_word
+        loop do
+          skipped_nonword = sc.skip(nonword_re)
+          break unless skipped_nonword
+        end
+      end
+    end
+    suffixes
+  end
+  require 'enumerator'
+  def build_index(full_text_IO, suffix_array_IO)
+    fulltext = ""
+    io = StringIO.new(fulltext)
+    io.write MAGIC
+    full_text_IO.write MAGIC
+    documents.each do |doc|
+      data, metadata = @doc_hash[doc]
+      io.write(data)
+      full_text_IO.write(data)
+      meta_txt = Marshal.dump(metadata)
+      footer = "\0....#{doc}\0#{meta_txt}\0"
+      footer[1,4] = [footer.size - 5].pack("V")
+      io.write(footer)
+      full_text_IO.write(footer)
+    end
+    scanner = StringScanner.new(fulltext)
+    scanner.scan(Regexp.new(Regexp.escape(MAGIC)))
+    count = 0
+    suffixes = []
+    until scanner.eos?
+      count += 1
+      start = scanner.pos
+      text = scanner.scan_until(/\0/)
+      suffixes.concat find_suffixes(text[0..-2], start)
+      len = scanner.scan(/..../).unpack("V")[0]
+      #puts "LEN: #{len}  #{scanner.pos}  #{scanner.string.size}"
+      #puts "#{scanner.string[scanner.pos,20].inspect}"
+      scanner.pos += len
+      #scanner.terminate if !text
+    end
+    sorted = suffixes.sort_by{|x| fulltext[x, @max_wordsize]}
+    sorted.each_slice(10000){|x| suffix_array_IO.write x.pack("V*")}
+    nil
+  end
+end # class FullTextIndexer
+end # module FastRI

data/lib/fastri/ri_index.rb CHANGED Viewed

@@ -3,9 +3,37 @@
 require 'rdoc/ri/ri_cache'
 require 'rdoc/ri/ri_reader'
+require 'rdoc/ri/ri_descriptions'
 require 'fastri/version'
+# This is taken straight from 1.8.5's rdoc/ri/ri_descriptions.rb.
+# Older releases have a buggy #merge_in that crashes when old.comment is nil.
+if RUBY_RELEASE_DATE < "2006-06-15"
+  module ::RI # :nodoc:
+    class ModuleDescription # :nodoc:
+      remove_method :merge_in
+      # merge in another class desscription into this one
+      def merge_in(old)
+        merge(@class_methods, old.class_methods)
+        merge(@instance_methods, old.instance_methods)
+        merge(@attributes, old.attributes)
+        merge(@constants, old.constants)
+        merge(@includes, old.includes)
+        if @comment.nil? || @comment.empty?
+          @comment = old.comment
+        else
+          unless old.comment.nil? or old.comment.empty? then
+            @comment << SM::Flow::RULE.new
+            @comment.concat old.comment
+          end
+        end
+      end
+    end
+  end
+end
 module FastRI
 # This class provides the same functionality as RiReader, with some
@@ -466,6 +494,8 @@ class RiIndex
       when /[#.]\S+/
         method_entry = get_entry(@method_array, entry_or_name, MethodEntry, nil)
         source_paths_for(method_entry)
+      when ""
+        []
       else
         class_entry = get_entry(@namespace_array, entry_or_name, ClassEntry, nil)
         source_paths_for(class_entry)

data/lib/fastri/ri_service.rb CHANGED Viewed

@@ -273,6 +273,12 @@ class RiService
       m.add_matcher(:partial_ci) do
         m.yield @ri_reader.methods_under_matching("", /#{sep_re}#{name}/i, true)
       end
+      m.add_matcher(:anywhere) do
+        m.yield @ri_reader.methods_under_matching("", /#{sep_re}.*#{name}/, true)
+      end
+      m.add_matcher(:anywhere_ci) do
+        m.yield @ri_reader.methods_under_matching("", /#{sep_re}.*#{name}/i, true)
+      end
     end
     matcher.get_matches(order)
   end

data/lib/fastri/util.rb ADDED Viewed

@@ -0,0 +1,83 @@
+# Copyright (C) 2006  Mauricio Fernandez <mfp@acm.org>
+require 'rdoc/ri/ri_paths'
+begin
+  require 'rubygems'
+rescue LoadError
+end
+require 'rdoc/ri/ri_writer'
+module FastRI
+module Util
+  # Return an array of <tt>[name, version, path]</tt> arrays corresponding to
+  # the last version of each installed gem. +path+ is the base path of the RI
+  # documentation from the gem. If the version cannot be determined, it will
+  # be +nil+, and the corresponding gem might be repeated in the output array
+  # (once per version).
+  def gem_directories_unique
+    return [] unless defined? Gem
+    gemdirs = Dir["#{Gem.path}/doc/*/ri"]
+    gems = Hash.new{|h,k| h[k] = []}
+    gemdirs.each do |path|
+      gemname, version = %r{/([^/]+)-(.*)/ri$}.match(path).captures
+      if gemname.nil? # doesn't follow any conventions :(
+        gems[path[%r{/([^/]+)/ri$}, 1]] << [nil, path]
+      else
+        gems[gemname] << [version, path]
+      end
+    end
+    gems.sort_by{|name, _| name}.map do |name, versions|
+      version, path = versions.sort.last
+      [name, version, File.expand_path(path)]
+    end
+  end
+  module_function :gem_directories_unique
+  # Return the <tt>[name, version, path]</tt> array for the gem owning the RI
+  # information stored in +path+, or +nil+.
+  def gem_info_for_path(path, gem_dir_info = FastRI::Util.gem_directories_unique)
+    path = File.expand_path(path)
+    matches = gem_dir_info.select{|name, version, gem_path| path.index(gem_path) == 0}
+    matches.sort_by{|name, version, gem_path| [gem_path.size, version, name]}.last
+  end
+  module_function :gem_info_for_path
+  # Return the +full_name+ (in ClassEntry or MethodEntry's sense) given a path
+  # to a .yaml file relative to a "base RI DB path".
+  def gem_relpath_to_full_name(relpath)
+    case relpath
+    when %r{^(.*)/cdesc-([^/]*)\.yaml$}
+      path, name = $~.captures
+      (path.split(%r{/})[0..-2] << name).join("::")
+    when %r{^(.*)/([^/]*)-(i|c)\.yaml$}
+      path, escaped_name, type = $~.captures
+      name = RI::RiWriter.external_to_internal(escaped_name)
+      sep = ( type == 'c' ) ? "." : "#"
+      path.gsub("/", "::") + sep + name
+    end
+  end
+  module_function :gem_relpath_to_full_name
+  # Returns the home directory (win32-aware).
+  def find_home
+    # stolen from RubyGems
+    ['HOME', 'USERPROFILE'].each do |homekey|
+      return ENV[homekey] if ENV[homekey]
+    end
+    if ENV['HOMEDRIVE'] && ENV['HOMEPATH']
+      return "#{ENV['HOMEDRIVE']}:#{ENV['HOMEPATH']}"
+    end
+    begin
+      File.expand_path("~")
+    rescue StandardError => ex
+      if File::ALT_SEPARATOR
+        "C:/"
+      else
+        "/"
+      end
+    end
+  end
+  module_function :find_home
+end # module Util
+end # module FastRI

data/lib/fastri/version.rb CHANGED Viewed

@@ -2,7 +2,12 @@
 #
 module FastRI
-  FASTRI_VERSION      = "0.1.1"
+  FASTRI_VERSION      = "0.2.0"
+  FASTRI_RELEASE_DATE = "2006-11-15"
   FASTRI_INDEX_FORMAT = "0.1.0"
+  FASTRI_FT_INDEX_FORMAT = "0.0.0"
+  FASTRI_FT_INDEX_FORMAT_MAJOR = "0"
+  FASTRI_FT_INDEX_FORMAT_MINOR = "0"
+  FASTRI_FT_INDEX_FORMAT_TEENY = "0"
 end
 # vi: set sw=2 expandtab:

data/test/test_full_text_index.rb ADDED Viewed

@@ -0,0 +1,182 @@
+require 'test/unit'
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+$:.unshift "lib"
+require 'fastri/full_text_index'
+class TestFullTextIndex < Test::Unit::TestCase
+  require 'stringio'
+  include FastRI
+  magic = FullTextIndexer::MAGIC
+  data = <<EOF
+#{magic}this is a test
+\r\000\000\000foo.txt\000\004\b{\000
+zzzz
+\r\000\000\000bar.txt\000\004\b{\000
+EOF
+  DATA = (data.split(/\n/) << "").join("\0")
+  SUFFIXES = %w[a\ test is\ a test this zzzz].map{|w| [DATA.index(w)].pack("V")}.join("")
+  data = <<EOF
+#{magic}this is a test
+\r\000\000\000foo.txt\000\004\b{\000
+zzzz this
+\r\000\000\000bar.txt\000\004\b{\000
+EOF
+  DATA2 = (data.split(/\n/) << "").join("\0")
+  SUFFIXES2 = ["a test", "is a", "test", "this\0", "this", "zzzz"].map{|x| [DATA2.index(x)].pack("V")}.join("")
+  data = <<EOF
+#{magic}this is a test
+SIZ1foo.txt\000#{Marshal.dump({:foo => :bar, :bar => 1})}
+zzzz this
+SIZ2bar.txt\000#{Marshal.dump({:foo => :baz, :bar => 42})}
+EOF
+  lines = data.split(/\n/)
+  len1 = lines[1].size - 4 + 1
+  lines[1].sub!(/SIZ1/, [len1].pack("V"))
+  len2 = lines[3].size - 4 + 1
+  lines[3].sub!(/SIZ2/, [len2].pack("V"))
+  DATA3 = (lines << "").join("\0")
+  SUFFIXES3 = ["a test", "is a", "test", "this\0", "this", "zzzz"].map{|x| [DATA3.index(x)].pack("V")}.join("")
+  def setup
+    @index = FullTextIndex.new_from_ios(StringIO.new(DATA), StringIO.new(SUFFIXES))
+    @index2 = FullTextIndex.new_from_ios(StringIO.new(DATA2), StringIO.new(SUFFIXES2))
+    @index3 = FullTextIndex.new_from_ios(StringIO.new(DATA3), StringIO.new(SUFFIXES3))
+  end
+  def test_new_from_ios
+    a = nil
+    assert_nothing_raised { a = FullTextIndex.new_from_ios(StringIO.new(DATA), StringIO.new(SUFFIXES)) }
+    assert_equal(FullTextIndex::DEFAULT_OPTIONS[:max_query_size], a.max_query_size)
+  end
+  def test_lookup_basic
+    %w[this is a test].each do |term|
+      result = @index.lookup(term)
+      assert_kind_of(FullTextIndex::Result, result)
+      assert_equal(term, result.query)
+      assert_equal("foo.txt", result.path)
+    end
+    assert_equal(0, @index.lookup("a").index)
+    assert_equal(2, @index.lookup("t").index)
+    assert_equal(3, @index.lookup("th").index)
+    assert_equal(4, @index.lookup("z").index)
+    assert_equal("bar.txt", @index.lookup("z").path)
+  end
+  def test_lookup_metadata
+    assert_equal({}, @index.lookup("test").metadata)
+    assert_equal({}, @index.lookup("zzzz").metadata)
+    assert_equal({:foo => :bar, :bar => 1}, @index3.lookup("test").metadata)
+    assert_equal({:foo => :baz, :bar => 42}, @index3.lookup("zzz").metadata)
+  end
+  def test_Result_text
+    assert_equal("t", @index.lookup("this").text(1))
+    assert_equal("this", @index.lookup("this").text(4))
+    assert_equal("this is a ", @index.lookup("this").text(10))
+    assert_equal("this is a test ", @index.lookup("th").text(100))
+    assert_equal("test ", @index.lookup("t").text(10))
+    assert_equal("test ", @index.lookup("t").text(20))
+    assert_equal("z", @index.lookup("z").text(1))
+    assert_equal("zzzz", @index.lookup("z").text(10))
+  end
+  def test_Result_context
+    assert_equal(" a ", @index.lookup("a").context(1))
+    assert_equal("s a t", @index.lookup("a").context(2))
+    assert_equal("is a te", @index.lookup("a").context(3))
+    assert_equal("s is a test", @index.lookup("a").context(5))
+    assert_equal("this is a test ", @index.lookup("a").context(10))
+  end
+  def test_Result_context_non_initial_entry
+    assert_equal("zz", @index.lookup("z").context(1))
+    assert_equal("zzz", @index.lookup("z").context(2))
+    assert_equal("zzzz", @index.lookup("z").context(3))
+    assert_equal("zzzz", @index.lookup("z").context(4))
+    assert_equal("zzzz", @index.lookup("z").context(10))
+  end
+  def test_lookup_nonexistent
+    assert_nil(@index.lookup("bogus"))
+  end
+  def test_next_match_basic
+    first = @index2.lookup("t")
+    assert_equal("foo.txt", first.path)
+    assert_equal(2, first.index)
+    assert_equal("test ", first.text(10))
+    second = @index2.next_match(first)
+    assert_equal("bar.txt", second.path)
+    assert_equal(3, second.index)
+    assert_equal("this", second.text(10))
+    third = @index2.next_match(second)
+    assert_kind_of(FullTextIndex::Result, third)
+    assert_equal(4, third.index)
+    assert_equal("this is a ", third.text(10))
+    assert_nil(@index2.next_match(third))
+  end
+  def test_next_match_restricted
+    first = @index2.lookup("t")
+    assert_equal("foo.txt", first.path)
+    assert_equal(2, first.index)
+    assert_equal("test ", first.text(10))
+    second = @index2.next_match(first, "this is")
+    assert_equal("foo.txt", second.path)
+    assert_equal(4, second.index)
+    assert_equal("this is a ", second.text(10))
+    assert_nil(@index2.next_match(first, "foo"))
+  end
+  def test_next_match_regexp
+    first = @index2.lookup("t")
+    assert_equal("foo.txt", first.path)
+    assert_equal(2, first.index)
+    assert_equal("test ", first.text(10))
+    second = @index2.next_match(first, /.*test/)
+    assert_equal("foo.txt", second.path)
+    assert_equal(4, second.index)
+    assert_equal("this is a test ", second.text(20))
+  end
+  def test_next_matches
+    first = @index2.lookup("t")
+    all = [first] + @index2.next_matches(first)
+    assert_equal([2, 3, 4], all.map{|x| x.index})
+    assert_equal(["foo.txt", "bar.txt", "foo.txt"], all.map{|x| x.path})
+    one, two, three = *all
+    assert_equal(["test ", "this", "this is a test "], all.map{|x| x.text(20)})
+  end
+  def test_next_matches_restricted
+    first = @index2.lookup("t")
+    assert_equal([], @index2.next_matches(first, "this is not"))
+    all = @index2.next_matches(first, "this is")
+    assert_equal(["foo.txt"], all.map{|x| x.path})
+    assert_equal([4], all.map{|x| x.index})
+    assert_equal(["this is a test "], all.map{|x| x.text(20)})
+  end
+  def test_next_matches_regexp
+    first = @index2.lookup("t")
+    all = @index2.next_matches(first, /.*test/)
+    assert_equal(["foo.txt"], all.map{|x| x.path})
+    assert_equal([4], all.map{|x| x.index})
+    assert_equal(["this is a test "], all.map{|x| x.text(20)})
+  end
+end

data/test/test_full_text_indexer.rb ADDED Viewed

@@ -0,0 +1,84 @@
+require 'test/unit'
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+$:.unshift "lib"
+require 'fastri/full_text_indexer'
+class TestFullTextIndexer < Test::Unit::TestCase
+  require 'stringio'
+  include FastRI
+  def setup
+    @indexer = FullTextIndexer.new(20)
+  end
+  DATA1 =  "this is a test " * 1000
+  DATA2 =  "this is another test " * 1000
+  def test_add_document
+    @indexer.add_document("foo.txt", DATA1)
+    assert_equal(["foo.txt"], @indexer.documents)
+    assert_equal(DATA1, @indexer.data("foo.txt"))
+    @indexer.add_document("foo.txt", DATA2)
+    assert_equal(["foo.txt"], @indexer.documents)
+    assert_equal(DATA2, @indexer.data("foo.txt"))
+    @indexer.add_document("bar.txt", DATA2)
+    assert_equal(["foo.txt", "bar.txt"], @indexer.documents)
+    assert_equal(DATA2, @indexer.data("bar.txt"))
+  end
+  def test_preprocess
+    data = "this is a \0foo bar\0 bla"
+    assert_equal("this is a foo bar bla", @indexer.preprocess(data))
+  end
+  def test_find_suffixes_simple
+    data = <<EOF
+this is a simple test with these words: Aaaa01 0.1 _asdA1
+EOF
+    assert_equal([0, 5, 8, 10, 17, 22, 27, 33, 40, 47, 49, 51],
+                 @indexer.find_suffixes_simple(data, /[A-Za-z0-9_]+/, /[^A-Za-z0-9_]+/,0))
+    assert_equal([0, 5, 8, 10, 17, 22, 27, 33, 40, 52],
+                 @indexer.find_suffixes_simple(data, /[A-Za-z]+/, /[^A-Za-z]+/, 0))
+    assert_equal([0, 5, 8, 10, 17, 22, 27, 33, 40, 52].map{|x| x+10},
+                 @indexer.find_suffixes_simple(data, /[A-Za-z]+/, /[^A-Za-z]+/, 10))
+    assert_equal([0, 1, 2, 3, 5, 6, 8, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20,
+                 22, 23, 24, 25, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37,
+                 40, 41, 42, 43, 52, 53, 54, 55],
+                 @indexer.find_suffixes_simple(data, /[A-Za-z]/, /[^A-Za-z]+/, 0))
+    assert_equal([0, 5], @indexer.find_suffixes_simple("abcd\ndefg", /\S+/, /\s+/, 0))
+    assert_equal([1, 6], @indexer.find_suffixes_simple("abcd\ndefg", /\S+/, /\s+/, 1))
+  end
+  def test_build_index_trivial
+    @indexer.add_document("foo.txt", DATA1)
+    fulltext    = StringIO.new("")
+    suffixarray = StringIO.new("")
+    @indexer.build_index(fulltext, suffixarray)
+    assert_equal(["\000\r\000\000\000foo.txt\000\004\b{\000\000"],
+                 fulltext.string[-200..-1].scan(/\0.*$/))
+    assert_equal(4000 * 4, suffixarray.string.size)
+  end
+  def build_index_test_helper(data, suffixes)
+    @indexer.add_document("foo.txt", data)
+    offset = FullTextIndexer::MAGIC.size
+    suffixes = suffixes.map{|x| x + offset}
+    sorted   = suffixes.sort_by{|i| data[i - offset]}
+    f_io  = StringIO.new("")
+    sa_io = StringIO.new("")
+    @indexer.build_index(f_io, sa_io)
+    assert_equal(sorted, sa_io.string.scan(/..../m).map{|x| x.unpack("V")[0]})
+  end
+  def test_build_index_harder
+    data = <<EOF
+a bcd efghi jklmn opqrst
+EOF
+    suffixes = [0, 2, 6, 12, 18]
+    build_index_test_helper(data, suffixes)
+    data = <<EOF
+e xcd afghi zklmn bpqrst
+EOF
+    suffixes = [0, 2, 6, 12, 18]
+    build_index_test_helper(data, suffixes)
+  end
+end