RubyGems - gonzui - Versions diffs - 1.2-x86-mswin32-60 - Mend

gonzui 1.2-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

data/AUTHORS.txt +9 -0
data/History.txt +5539 -0
data/Manifest.txt +115 -0
data/PostInstall.txt +17 -0
data/README.rdoc +149 -0
data/Rakefile +28 -0
data/bin/gonzui-db +167 -0
data/bin/gonzui-import +177 -0
data/bin/gonzui-remove +58 -0
data/bin/gonzui-search +68 -0
data/bin/gonzui-server +176 -0
data/bin/gonzui-update +53 -0
data/data/gonzui/catalog/catalog.ja +80 -0
data/data/gonzui/doc/favicon.ico +0 -0
data/data/gonzui/doc/folder.png +0 -0
data/data/gonzui/doc/gonzui.css +279 -0
data/data/gonzui/doc/gonzui.js +111 -0
data/data/gonzui/doc/text.png +0 -0
data/data/gonzuirc.sample +29 -0
data/ext/autopack/autopack.c +88 -0
data/ext/autopack/extconf.rb +3 -0
data/ext/delta/delta.c +147 -0
data/ext/delta/extconf.rb +5 -0
data/ext/texttokenizer/extconf.rb +5 -0
data/ext/texttokenizer/texttokenizer.c +93 -0
data/ext/xmlformatter/extconf.rb +5 -0
data/ext/xmlformatter/xmlformatter.c +207 -0
data/lib/gonzui.rb +59 -0
data/lib/gonzui/apt.rb +193 -0
data/lib/gonzui/autopack.so +0 -0
data/lib/gonzui/bdbdbm.rb +118 -0
data/lib/gonzui/cmdapp.rb +14 -0
data/lib/gonzui/cmdapp/app.rb +175 -0
data/lib/gonzui/cmdapp/search.rb +134 -0
data/lib/gonzui/config.rb +117 -0
data/lib/gonzui/content.rb +19 -0
data/lib/gonzui/dbm.rb +673 -0
data/lib/gonzui/deindexer.rb +162 -0
data/lib/gonzui/delta.rb +49 -0
data/lib/gonzui/delta.so +0 -0
data/lib/gonzui/extractor.rb +347 -0
data/lib/gonzui/fetcher.rb +309 -0
data/lib/gonzui/gettext.rb +144 -0
data/lib/gonzui/importer.rb +84 -0
data/lib/gonzui/indexer.rb +316 -0
data/lib/gonzui/info.rb +80 -0
data/lib/gonzui/license.rb +100 -0
data/lib/gonzui/logger.rb +48 -0
data/lib/gonzui/monitor.rb +177 -0
data/lib/gonzui/progressbar.rb +235 -0
data/lib/gonzui/remover.rb +38 -0
data/lib/gonzui/searcher.rb +330 -0
data/lib/gonzui/searchquery.rb +235 -0
data/lib/gonzui/searchresult.rb +111 -0
data/lib/gonzui/texttokenizer.so +0 -0
data/lib/gonzui/updater.rb +254 -0
data/lib/gonzui/util.rb +415 -0
data/lib/gonzui/vcs.rb +128 -0
data/lib/gonzui/webapp.rb +25 -0
data/lib/gonzui/webapp/advsearch.rb +123 -0
data/lib/gonzui/webapp/filehandler.rb +24 -0
data/lib/gonzui/webapp/jsfeed.rb +61 -0
data/lib/gonzui/webapp/markup.rb +445 -0
data/lib/gonzui/webapp/search.rb +269 -0
data/lib/gonzui/webapp/servlet.rb +319 -0
data/lib/gonzui/webapp/snippet.rb +155 -0
data/lib/gonzui/webapp/source.rb +37 -0
data/lib/gonzui/webapp/stat.rb +137 -0
data/lib/gonzui/webapp/top.rb +63 -0
data/lib/gonzui/webapp/uri.rb +140 -0
data/lib/gonzui/webapp/webrick.rb +48 -0
data/lib/gonzui/webapp/xmlformatter.so +0 -0
data/script/console +10 -0
data/script/destroy +14 -0
data/script/generate +14 -0
data/script/makemanifest.rb +21 -0
data/tasks/extconf.rake +13 -0
data/tasks/extconf/autopack.rake +43 -0
data/tasks/extconf/delta.rake +43 -0
data/tasks/extconf/texttokenizer.rake +43 -0
data/tasks/extconf/xmlformatter.rake +43 -0
data/test/_external_tools.rb +13 -0
data/test/_test-util.rb +142 -0
data/test/foo/Makefile.foo +66 -0
data/test/foo/bar.c +5 -0
data/test/foo/bar.h +6 -0
data/test/foo/foo.c +25 -0
data/test/foo/foo.spec +33 -0
data/test/test_apt.rb +42 -0
data/test/test_autopack_extn.rb +7 -0
data/test/test_bdbdbm.rb +79 -0
data/test/test_cmdapp-app.rb +35 -0
data/test/test_cmdapp-search.rb +99 -0
data/test/test_config.rb +28 -0
data/test/test_content.rb +15 -0
data/test/test_dbm.rb +171 -0
data/test/test_deindexer.rb +50 -0
data/test/test_delta.rb +66 -0
data/test/test_extractor.rb +78 -0
data/test/test_fetcher.rb +75 -0
data/test/test_gettext.rb +50 -0
data/test/test_gonzui.rb +11 -0
data/test/test_helper.rb +10 -0
data/test/test_importer.rb +56 -0
data/test/test_indexer.rb +37 -0
data/test/test_info.rb +82 -0
data/test/test_license.rb +49 -0
data/test/test_logger.rb +60 -0
data/test/test_monitor.rb +23 -0
data/test/test_searcher.rb +37 -0
data/test/test_searchquery.rb +27 -0
data/test/test_searchresult.rb +43 -0
data/test/test_texttokenizer.rb +47 -0
data/test/test_updater.rb +95 -0
data/test/test_util.rb +149 -0
data/test/test_vcs.rb +61 -0
data/test/test_webapp-markup.rb +42 -0
data/test/test_webapp-util.rb +19 -0
data/test/test_webapp-xmlformatter.rb +19 -0
metadata +292 -0

data/lib/gonzui/importer.rb ADDED Viewed

@@ -0,0 +1,84 @@
+#
+# importer.rb - import contents to gonzui.db
+#
+# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
+#     All rights reserved.
+#     This is free software with ABSOLUTELY NO WARRANTY.
+#
+# You can redistribute it and/or modify it under the terms of
+# the GNU General Public License version 2.
+#
+require 'uri'
+module Gonzui
+  class ImporterError < GonzuiError; end
+  class Importer < AbstractUpdater
+    def initialize(config, options = {})
+      super(config, options)
+      # to be initialized
+      @last_package_name = nil
+    end
+    attr_reader :last_package_name
+    private
+    def import_package(fetcher, source_uri)
+      package_name = fetcher.package_name
+      raise ImporterError.new("#{package_name}: already exists") if
+        @dbm.has_package?(package_name)
+      relative_paths = fetcher.collect
+      pbar = make_progress_bar(package_name, relative_paths.length)
+      begin
+        relative_paths.each {|relative_path|
+          begin
+            normalized_path = File.join(package_name, relative_path)
+            content = nil
+            begin
+              content = fetcher.fetch(relative_path)
+            rescue => e
+              vprintf("fetch failed: %s: %s\n%s", relative_path, e.message)
+              next
+            end
+            index_content(source_uri, normalized_path, content)
+          ensure
+            pbar.inc
+          end
+        }
+      ensure
+        @dbm.flush_cache
+      end
+      pbar.finish
+      @npackages += 1
+      @last_package_name = package_name
+    end
+    def do_task_name
+      "imported"
+    end
+    public
+    def import(source_uri)
+      fetcher = Fetcher.new(@config, source_uri)
+      begin
+        import_package(fetcher, source_uri)
+      ensure
+        fetcher.finish
+      end
+    end
+    def summary
+      summary = super
+      if @config.verbose
+        stat = Indexer.statistics
+        summary += "\n" + stat unless stat.empty?
+      end
+      return summary
+    end
+    def finish
+      @dbm.close
+    end
+  end
+end

data/lib/gonzui/indexer.rb ADDED Viewed

@@ -0,0 +1,316 @@
+#
+# indexer.rb - indexer implementation
+#
+# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
+#     All rights reserved.
+#     This is free software with ABSOLUTELY NO WARRANTY.
+#
+# You can redistribute it and/or modify it under the terms of
+# the GNU General Public License version 2.
+#
+require 'ftools'
+require 'digest/md5'
+require 'langscan'
+module Gonzui
+  class IndexerError < GonzuiError; end
+  class Indexer
+    include Util
+    @@performance_monitor = PerformanceMonitor.new
+    def self.statistics
+      return "" if @@performance_monitor.empty?
+      pm = @@performance_monitor
+      summary = "Performance statistics:\n"
+      summary << pm.heading
+      summary << pm.format([Indexer, :index],
+                           [Indexer, :read_content],
+                           [Indexer, :add_license],
+                           [Indexer, :index_content])
+      labels = LangScan.modules.map {|m|
+        [m, :scan]
+      }.push([Indexer, :add_fragment],
+             [Indexer, :flush_cache])
+      summary << pm.format([Indexer, :index_content], *labels)
+      return summary
+    end
+    def initialize(config, dbm, source_uri, normalized_path, content,
+                   options = {})
+      @config = config
+      @dbm = dbm
+      @normalized_path = normalized_path
+      @source_uri = source_uri
+      @content = content
+      @content_hash = Digest::MD5.hexdigest(content.text)
+      @noindex_formats = (options[:noindex_formats] or @config.noindex_formats)
+      @package_name = get_package_name
+      @seqno      = 0
+      @word_cache = {}
+      @wordinfo_cache = {}
+      @digest_cache = []
+      # to be initialized
+      @format_id  = nil
+      @license_id = nil
+      @license_abbrev = nil
+      @encoding = nil
+      @nlines = nil
+      @package_id = nil
+      @path_id = nil
+      @bols = [] # positions of beginning of lines
+      @indexed_p = false
+      initialize_profilers_if_necessary
+    end
+    def initialize_profilers_if_necessary
+      # profiler
+      if @config.verbose
+        @@performance_monitor.profile(Indexer, :index)
+        @@performance_monitor.profile(Indexer, :read_content)
+        @@performance_monitor.profile(Indexer, :index_content)
+        @@performance_monitor.profile(Indexer, :add_fragment)
+        @@performance_monitor.profile(Indexer, :add_license)
+        @@performance_monitor.profile(Indexer, :flush_cache)
+      end
+    end
+    def read_content
+      content, @encoding = normalize_content(@content.text)
+      @content.text = content
+      @nlines = 0
+      pos = 0
+      @content.text.each_line {|line|
+        @bols.push(pos)
+        @nlines += 1
+        pos += line.length
+      }
+    end
+    # allow 0x09 (TAB), 0x0a (LF), 0x0c(^L), 0x0d (CR) 0x1b (ESC)
+    allowed = [0x09, 0x0a, 0x0c, 0x0d, 0x1b]
+    pattern = "["
+    pattern << (0...0x20).find_all {|x|
+      not allowed.include?(x)
+    }.map {|x| sprintf("\\x%02x", x) }.join
+    pattern << "]"
+    BinaryRegexp = Regexp.new(pattern)
+    def binary_content?(content)
+      BinaryRegexp.match(content)
+    end
+    def convert_to_utf8(content)
+      encoding = "ascii"
+      if binary_content?(content)
+        encoding = "binary"
+      else
+        if @config.utf8
+          content, encoding = UTF8.to_utf8(content)
+        end
+      end
+      return content, encoding
+    end
+    def normalize_content(content)
+      content, encoding = convert_to_utf8(content)
+      unless encoding == "binary"
+        content = content.untabify
+        content.gsub!(/\r\n?/, "\n")
+      end
+      return content, encoding
+    end
+    def get_package_name
+      parts = @normalized_path.split("/")
+      if parts.length < 2
+        raise IndexerError.new("normalized path should not be flat")
+      end
+      package_name = parts.first
+      if package_name.size == 0 || package_name == "." || package_name == ".."
+        package_name = File.basename(@source_uri.path)
+      end
+      return package_name
+    end
+    def add_text(fragment, type_id)
+      text = fragment.text
+      byteno = fragment.byteno
+      TextTokenizer.each_word(text) {|word, pos|
+        add_word(word, byteno + pos, type_id)
+      }
+    end
+    def add_fragment(fragment)
+      type_id = @dbm.get_type_id(fragment.type)
+      if LangScan::Type.splittable?(fragment.type)
+        add_text(fragment, type_id)
+      else
+        add_word(fragment.text, fragment.byteno, type_id)
+      end
+      @digest_cache.push(fragment.byteno, fragment.text.length, type_id)
+    end
+    def flush_cache
+      all_word_ids = @wordinfo_cache.keys.sort!
+      all_word_ids.each {|word_id|
+        path_word_id = AutoPack.pack_id2(@path_id, word_id)
+        @dbm.pathwordid_info[path_word_id] =
+          DeltaDumper.dump_tuples(WordInfo, @wordinfo_cache[word_id])
+      }
+      @dbm.put_pathid_wordids(@package_id, @path_id, all_word_ids)
+      @dbm.pathid_wordids[@path_id] = DeltaDumper.dump_ids(all_word_ids)
+      @dbm.pathid_digest[@path_id] =
+        DeltaDumper.dump_tuples(DigestInfo, @digest_cache)
+      @dbm.pathid_bols[@path_id] = DeltaDumper.dump_fixnums(@bols)
+      @wordinfo_cache.clear
+      @dbm.word_id_counter.flush
+    end
+    def add_property(abbrev, name, counter, make_key, pkgid_ids)
+      id = @dbm.send(counter).get_id2(abbrev, name)
+      @dbm.send(pkgid_ids)[@package_id] = id
+      @dbm.increase_counter(@dbm.send(make_key, abbrev))
+      return id
+    end
+    def add_format(format_abbrev, format_name)
+      @format_id = add_property(format_abbrev,
+                                format_name,
+                                :format_id_counter,
+                                :make_ncontents_by_format_key,
+                                :pkgid_fmtids)
+    end
+    def add_license
+      detector = LicenseDetector.new(@content.text)
+      license = detector.detect
+      @license_id = add_property(license.abbrev,
+                                 license.name,
+                                 :license_id_counter,
+                                 :make_ncontents_by_license_key,
+                                 :pkgid_lcsids)
+      @license_abbrev = license.abbrev
+    end
+    def add_path
+      assert_equal(false, @dbm.path_pathid.include?(@normalized_path))
+      @path_id = @dbm.path_id_counter.make_new_id
+      @dbm.path_pathid[@normalized_path] = @path_id
+      @dbm.pathid_path[@path_id] = @normalized_path
+      @dbm.pkgid_pathids[@package_id] = @path_id
+    end
+    def get_fragments(scanner)
+      @@performance_monitor.profile(scanner, :scan) if @config.verbose
+      fragments = []
+      scanner.scan(@content.text) {|fragment|
+        fragments.push(fragment) if LangScan::Type.include?(fragment.type)
+      }
+      fragments = fragments.sort_by {|fragment| fragment.byteno }
+      return fragments
+    end
+    def add_word(word, byteno, type_id)
+      word_id = @dbm.word_id_counter.get_id(word)
+      array = (@wordinfo_cache[word_id] ||= [])
+      array.push(@seqno, byteno, type_id)
+      @seqno += 1
+    end
+    def add_package_if_necessary
+      if @dbm.has_package?(@package_name)
+        @package_id = @dbm.get_package_id(@package_name)
+      else
+        @package_id = @dbm.package_id_counter.make_new_id
+        @dbm.pkg_pkgid[@package_name] = @package_id
+        @dbm.pkgid_pkg[@package_id] = @package_name
+        @dbm.pkgid_src[@package_id] = @source_uri.to_s
+        @dbm.put_package_options(@package_id)
+      end
+    end
+    def make_content_info
+      ContentInfo.dump(@content.length, @content.mtime.to_i,
+                       Time.now.to_i, @format_id, @license_id,
+                       @nlines, @indexed_p)
+    end
+    def index_content(scanner)
+      fragments = []
+      begin
+        fragments = get_fragments(scanner)
+      rescue
+        # fallback to the text scanner
+        unless scanner == LangScan::Text
+          vprintf("#{@normalized_path}: fallback to LangScan::Text")
+          scanner = LangScan::Text
+          retry
+        end
+      end
+      fragments.each {|fragment| add_fragment(fragment) }
+      flush_cache
+      @dbm.increase_counter(:ncontents_indexed)
+      @dbm.increase_counter(:nlines_indexed, @nlines)
+      @indexed_p = true
+    end
+    def add_content_common(format_abbrev, format_name)
+      add_format(format_abbrev, format_name)
+      add_license
+      @dbm.pathid_pkgid[@path_id] = @package_id
+      @dbm.pathid_content[@path_id] = @content.text
+      @dbm.pathid_info[@path_id] = make_content_info
+      @dbm.pathid_hash[@path_id] = @content_hash
+      vprintf("added (%s): %s (%s)", format_abbrev,
+              @normalized_path, @license_abbrev)
+    end
+    def add_binary_content
+      add_content_common("binary", "Binary")
+    end
+    def make_scanner
+      scanner = LangScan.choose(@normalized_path, @content.text)
+      scanner = LangScan::Text if scanner.nil?
+      return scanner
+    end
+    def indexable?(scanner)
+      not @noindex_formats.include?(scanner.abbrev)
+    end
+    def add_content_with_indexing
+      scanner = make_scanner
+      if indexable?(scanner)
+        index_content(scanner)
+      else
+        vprintf("skip indexing: %s", @normalized_path)
+      end
+      add_content_common(scanner.abbrev, scanner.name)
+    end
+    def add_content
+      if @encoding == "binary"
+        add_binary_content
+      else
+        add_content_with_indexing
+      end
+    end
+    public
+    def index
+      read_content
+      add_package_if_necessary
+      add_path
+      add_content
+    end
+  end
+end

data/lib/gonzui/info.rb ADDED Viewed

@@ -0,0 +1,80 @@
+#
+# info.rb - information classes
+#
+# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
+#     All rights reserved.
+#     This is free software with ABSOLUTELY NO WARRANTY.
+#
+# You can redistribute it and/or modify it under the terms of
+# the GNU General Public License version 2.
+#
+module Gonzui
+  module BytenoMixin
+    def end_byteno
+      byteno + length
+    end
+    def range
+      byteno ... (byteno + length)
+    end
+  end
+  WordInfo = Struct.new(:word_id, :path_id,
+                        :seqno, :byteno, :type_id, :type, :lineno)
+  class WordInfo
+    include BytenoMixin
+    # dump info
+    DeltaSize = 2
+    UnitSize = 3
+    def match?(target_type)
+      target_type == :all or target_type == self.type
+    end
+  end
+  DigestInfo = Struct.new(:byteno, :length, :type_id, :type)
+  class DigestInfo
+    include BytenoMixin
+    # dump info
+    DeltaSize = 1
+    UnitSize = 3
+  end
+  ContentInfo = Struct.new(:size, :mtime, :itime,
+                           :format_id, :license_id,
+                           :nlines, :indexed_p)
+  class ContentInfo
+    extend Util
+    PACK_FORMAT = "w*"
+    def self.load(dump)
+      info = self.new(*dump.unpack(PACK_FORMAT))
+      info.indexed_p = if info.indexed_p == 1 then true else false end
+      return info
+    end
+    def self.dump(size, mtime, itime, format_id,
+                  license_id,  nlines, indexed_p)
+      indexed_p = if indexed_p then 1 else 0 end
+      # FIXME: It could happen for some cases.
+      if mtime < 0
+        vprintf("minus mtime found: %d", mtime)
+        mtime = Time.now.to_i
+      end
+      [size, mtime, itime, format_id,
+        license_id, nlines, indexed_p].pack(PACK_FORMAT)
+    end
+    def indexed?
+      self.indexed_p
+    end
+  end
+  Occurrence = Struct.new(:byteno, :lineno, :length)
+  class Occurrence
+    include BytenoMixin
+  end
+end