gonzui 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS.txt +9 -0
- data/History.txt +5539 -0
- data/Manifest.txt +115 -0
- data/PostInstall.txt +17 -0
- data/README.rdoc +149 -0
- data/Rakefile +28 -0
- data/bin/gonzui-db +167 -0
- data/bin/gonzui-import +177 -0
- data/bin/gonzui-remove +58 -0
- data/bin/gonzui-search +68 -0
- data/bin/gonzui-server +176 -0
- data/bin/gonzui-update +53 -0
- data/data/gonzui/catalog/catalog.ja +80 -0
- data/data/gonzui/doc/favicon.ico +0 -0
- data/data/gonzui/doc/folder.png +0 -0
- data/data/gonzui/doc/gonzui.css +279 -0
- data/data/gonzui/doc/gonzui.js +111 -0
- data/data/gonzui/doc/text.png +0 -0
- data/data/gonzuirc.sample +29 -0
- data/ext/autopack/autopack.c +88 -0
- data/ext/autopack/extconf.rb +3 -0
- data/ext/delta/delta.c +147 -0
- data/ext/delta/extconf.rb +5 -0
- data/ext/texttokenizer/extconf.rb +5 -0
- data/ext/texttokenizer/texttokenizer.c +93 -0
- data/ext/xmlformatter/extconf.rb +5 -0
- data/ext/xmlformatter/xmlformatter.c +207 -0
- data/lib/gonzui.rb +59 -0
- data/lib/gonzui/apt.rb +193 -0
- data/lib/gonzui/bdbdbm.rb +118 -0
- data/lib/gonzui/cmdapp.rb +14 -0
- data/lib/gonzui/cmdapp/app.rb +175 -0
- data/lib/gonzui/cmdapp/search.rb +134 -0
- data/lib/gonzui/config.rb +117 -0
- data/lib/gonzui/content.rb +19 -0
- data/lib/gonzui/dbm.rb +673 -0
- data/lib/gonzui/deindexer.rb +162 -0
- data/lib/gonzui/delta.rb +49 -0
- data/lib/gonzui/extractor.rb +347 -0
- data/lib/gonzui/fetcher.rb +309 -0
- data/lib/gonzui/gettext.rb +144 -0
- data/lib/gonzui/importer.rb +84 -0
- data/lib/gonzui/indexer.rb +316 -0
- data/lib/gonzui/info.rb +80 -0
- data/lib/gonzui/license.rb +100 -0
- data/lib/gonzui/logger.rb +48 -0
- data/lib/gonzui/monitor.rb +177 -0
- data/lib/gonzui/progressbar.rb +235 -0
- data/lib/gonzui/remover.rb +38 -0
- data/lib/gonzui/searcher.rb +330 -0
- data/lib/gonzui/searchquery.rb +235 -0
- data/lib/gonzui/searchresult.rb +111 -0
- data/lib/gonzui/updater.rb +254 -0
- data/lib/gonzui/util.rb +415 -0
- data/lib/gonzui/vcs.rb +128 -0
- data/lib/gonzui/webapp.rb +25 -0
- data/lib/gonzui/webapp/advsearch.rb +123 -0
- data/lib/gonzui/webapp/filehandler.rb +24 -0
- data/lib/gonzui/webapp/jsfeed.rb +61 -0
- data/lib/gonzui/webapp/markup.rb +445 -0
- data/lib/gonzui/webapp/search.rb +269 -0
- data/lib/gonzui/webapp/servlet.rb +319 -0
- data/lib/gonzui/webapp/snippet.rb +155 -0
- data/lib/gonzui/webapp/source.rb +37 -0
- data/lib/gonzui/webapp/stat.rb +137 -0
- data/lib/gonzui/webapp/top.rb +63 -0
- data/lib/gonzui/webapp/uri.rb +140 -0
- data/lib/gonzui/webapp/webrick.rb +48 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/autopack.rake +43 -0
- data/tasks/extconf/delta.rake +43 -0
- data/tasks/extconf/texttokenizer.rake +43 -0
- data/tasks/extconf/xmlformatter.rake +43 -0
- data/test/_external_tools.rb +13 -0
- data/test/_test-util.rb +142 -0
- data/test/foo/Makefile.foo +66 -0
- data/test/foo/bar.c +5 -0
- data/test/foo/bar.h +6 -0
- data/test/foo/foo.c +25 -0
- data/test/foo/foo.spec +33 -0
- data/test/test_apt.rb +42 -0
- data/test/test_autopack_extn.rb +7 -0
- data/test/test_bdbdbm.rb +79 -0
- data/test/test_cmdapp-app.rb +35 -0
- data/test/test_cmdapp-search.rb +99 -0
- data/test/test_config.rb +28 -0
- data/test/test_content.rb +15 -0
- data/test/test_dbm.rb +171 -0
- data/test/test_deindexer.rb +50 -0
- data/test/test_delta.rb +66 -0
- data/test/test_extractor.rb +78 -0
- data/test/test_fetcher.rb +75 -0
- data/test/test_gettext.rb +50 -0
- data/test/test_gonzui.rb +11 -0
- data/test/test_helper.rb +10 -0
- data/test/test_importer.rb +56 -0
- data/test/test_indexer.rb +37 -0
- data/test/test_info.rb +82 -0
- data/test/test_license.rb +49 -0
- data/test/test_logger.rb +60 -0
- data/test/test_monitor.rb +23 -0
- data/test/test_searcher.rb +37 -0
- data/test/test_searchquery.rb +27 -0
- data/test/test_searchresult.rb +43 -0
- data/test/test_texttokenizer.rb +47 -0
- data/test/test_updater.rb +95 -0
- data/test/test_util.rb +149 -0
- data/test/test_vcs.rb +61 -0
- data/test/test_webapp-markup.rb +42 -0
- data/test/test_webapp-util.rb +19 -0
- data/test/test_webapp-xmlformatter.rb +19 -0
- metadata +291 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
#
|
2
|
+
# search.rb - command line searcher
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
class CommandLineSearcher
|
14
|
+
def initialize(config, options)
|
15
|
+
@config = config
|
16
|
+
@dbm = DBM.open(@config, true)
|
17
|
+
@out = (options['out'] or STDOUT)
|
18
|
+
@nlines = options['line-number']
|
19
|
+
|
20
|
+
@show_method = if options['context']
|
21
|
+
:show_context_lines
|
22
|
+
elsif options['count']
|
23
|
+
:show_count
|
24
|
+
else
|
25
|
+
:show_line
|
26
|
+
end
|
27
|
+
|
28
|
+
@package_name = options['package']
|
29
|
+
|
30
|
+
@ncontexts = options['context'].to_i
|
31
|
+
|
32
|
+
@search_method = :find_all
|
33
|
+
@search_method = :find_all_by_prefix if options['prefix']
|
34
|
+
@search_method = :find_all_by_regexp if options['regexp']
|
35
|
+
|
36
|
+
@use_regexp = options['regexp']
|
37
|
+
@use_color = options['color']
|
38
|
+
@no_filename = options['no-filename']
|
39
|
+
|
40
|
+
@target_type = :all
|
41
|
+
if options['type']
|
42
|
+
type = options['type'].intern
|
43
|
+
eprintf("unknown type: #{type}") unless LangScan::Type.include?(type)
|
44
|
+
@target_type = type
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
def highlight(string, start_tag = "\x1b[01;31m", end_tag = "\x1b[00m")
|
50
|
+
sprintf("%s%s%s", start_tag, string, end_tag)
|
51
|
+
end
|
52
|
+
|
53
|
+
def show_line(content, path, regexp, info)
|
54
|
+
range = content.line_range(info.byteno)
|
55
|
+
filename = if @no_filename then "" else path + ":" end
|
56
|
+
linemark = if @nlines then info.lineno.to_s + ":" else "" end
|
57
|
+
word = @dbm.get_word(info.word_id)
|
58
|
+
pre = content.substring(range.first...info.byteno)
|
59
|
+
post = content.substring((info.byteno + word.length)...range.last)
|
60
|
+
mid = word
|
61
|
+
mid = highlight(mid) if @use_color
|
62
|
+
@out.printf("%s%s%s%s%s\n", filename, linemark, pre, mid, post)
|
63
|
+
end
|
64
|
+
|
65
|
+
def show_context_lines(content, path, regexp, info)
|
66
|
+
@out.printf("== %s\n", path) unless @no_filename
|
67
|
+
content.each_line_range(info.byteno, @ncontexts) {|lineno_offset, range|
|
68
|
+
lineno = info.lineno + lineno_offset
|
69
|
+
linemark = if @nlines
|
70
|
+
mark = if lineno == info.lineno then ":" else "-" end
|
71
|
+
lineno.to_s + mark
|
72
|
+
else
|
73
|
+
""
|
74
|
+
end
|
75
|
+
if range.include?(info.byteno)
|
76
|
+
word = @dbm.get_word(info.word_id)
|
77
|
+
pre = content.substring(range.first...info.byteno)
|
78
|
+
post_range = (info.byteno + word.length)...range.last
|
79
|
+
post = content.substring(post_range)
|
80
|
+
mid = word
|
81
|
+
mid = highlight(mid) if @use_color
|
82
|
+
@out.printf("%s%s%s%s\n", linemark, pre, mid, post)
|
83
|
+
else
|
84
|
+
@out.printf("%s%s\n", linemark, content.substring(range))
|
85
|
+
end
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def show_result(regexp, info)
|
90
|
+
content = @dbm.get_content(info.path_id)
|
91
|
+
path = @dbm.get_path(info.path_id)
|
92
|
+
send(@show_method, content, path, regexp, info)
|
93
|
+
end
|
94
|
+
|
95
|
+
def package_match?(target_package_id, info)
|
96
|
+
package_id = @dbm.get_package_id_from_path_id(info.path_id)
|
97
|
+
return target_package_id == package_id
|
98
|
+
end
|
99
|
+
|
100
|
+
public
|
101
|
+
def search(pattern)
|
102
|
+
separator = ""
|
103
|
+
regexp = if @use_regexp
|
104
|
+
Regexp.new(pattern)
|
105
|
+
else
|
106
|
+
Regexp.new(Regexp.quote(pattern))
|
107
|
+
end
|
108
|
+
results = @dbm.send(@search_method, pattern)
|
109
|
+
prev_lineno = prev_path_id = nil
|
110
|
+
target_package_id = @dbm.get_package_id(@package_name) if @package_name
|
111
|
+
results.sort_by {|x| [x.path_id, x.byteno] }.each {|info|
|
112
|
+
next if prev_lineno and prev_path_id and
|
113
|
+
info.path_id == prev_path_id and
|
114
|
+
info.lineno == prev_lineno
|
115
|
+
if info.match?(@target_type)
|
116
|
+
unless @show_method == :show_count
|
117
|
+
if @package_name.nil? or package_match?(target_package_id, info)
|
118
|
+
@out.print separator
|
119
|
+
show_result(regexp, info)
|
120
|
+
separator = "\n" if @show_method == :show_context_lines
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
prev_lineno = info.lineno
|
125
|
+
prev_path_id = info.path_id
|
126
|
+
}
|
127
|
+
puts results.length if @show_method == :show_count
|
128
|
+
end
|
129
|
+
|
130
|
+
def finish
|
131
|
+
@dbm.close
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
#
|
2
|
+
# config.rb - a config library
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
class Config
|
14
|
+
include Util
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
#
|
18
|
+
# All paths should be expanded to absolute paths
|
19
|
+
# because the current directory would be changed when
|
20
|
+
# a process becomes a daemon.
|
21
|
+
#
|
22
|
+
@temporary_directory = ENV['tmp'] || "/tmp"
|
23
|
+
@db_directory = File.expand_path("gonzui.db")
|
24
|
+
@cache_directory = File.join(@db_directory, "cache")
|
25
|
+
@gonzui_log_file = File.expand_path("gonzui.log")
|
26
|
+
|
27
|
+
@db_cache_size = 5 * 1024 ** 2
|
28
|
+
|
29
|
+
@quiet = false
|
30
|
+
@verbose = false
|
31
|
+
|
32
|
+
@utf8 = true
|
33
|
+
@encoding_preference = UTF8::Preference
|
34
|
+
|
35
|
+
@noindex_formats = []
|
36
|
+
# FIXME: should be more flexible
|
37
|
+
@exclude_pattern = /~$|\.bak$|CVS|\.svn|\.git/
|
38
|
+
|
39
|
+
#
|
40
|
+
# For gonzui-server
|
41
|
+
#
|
42
|
+
@pid_file = File.expand_path("gonzui.pid")
|
43
|
+
@daemon = false
|
44
|
+
@access_log_file = File.expand_path("access.log")
|
45
|
+
@catalog_directory = choose_directory("catalog")
|
46
|
+
@doc_directory = choose_directory("doc")
|
47
|
+
@http_port = Gonzui::HTTP_PORT
|
48
|
+
@bind_address = '*'
|
49
|
+
@user = nil
|
50
|
+
@group = nil
|
51
|
+
@site_title = "gonzui"
|
52
|
+
@base_mount_point = "/"
|
53
|
+
|
54
|
+
@default_results_per_page = 10
|
55
|
+
@max_results_per_page = 50
|
56
|
+
@max_pages = 20
|
57
|
+
@max_words = 10
|
58
|
+
@max_packages_per_page = 100
|
59
|
+
@nresults_candidates = [10, 20, 30, 50]
|
60
|
+
|
61
|
+
set_user_and_group if unix?
|
62
|
+
instance_variables.each {|name|
|
63
|
+
self.class.class_eval {
|
64
|
+
attr_accessor name.delete("@")
|
65
|
+
}
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def choose_directory(base_name)
|
71
|
+
directory = nil
|
72
|
+
[base_name,
|
73
|
+
File.join(File.dirname($0), "..", Gonzui::PKGDATADIR, base_name),
|
74
|
+
File.join(Gonzui::PKGDATADIR, base_name)].each do |d|
|
75
|
+
directory = d
|
76
|
+
break if File.directory?(directory)
|
77
|
+
end
|
78
|
+
return File.expand_path(directory)
|
79
|
+
end
|
80
|
+
|
81
|
+
def set_user_and_group
|
82
|
+
require 'etc'
|
83
|
+
u = Etc::getpwuid(Process.uid)
|
84
|
+
g = Etc::getgrgid(Process.gid)
|
85
|
+
@user = u.name
|
86
|
+
@group = g.name
|
87
|
+
end
|
88
|
+
|
89
|
+
def keys
|
90
|
+
instance_variables.map {|name| name.delete("@").intern }
|
91
|
+
end
|
92
|
+
|
93
|
+
public
|
94
|
+
def max_results_overall
|
95
|
+
@max_results_per_page * @max_pages
|
96
|
+
end
|
97
|
+
|
98
|
+
def dump(out = STDOUT)
|
99
|
+
len = keys.map {|key| key.inspect.length }.max
|
100
|
+
out.puts "{"
|
101
|
+
keys.sort_by {|key| key.to_s }.each {|key|
|
102
|
+
out.printf(" %-#{len}s => %s,\n", key.inspect, send(key).inspect)
|
103
|
+
}
|
104
|
+
out.puts "}"
|
105
|
+
end
|
106
|
+
|
107
|
+
def load(file_name)
|
108
|
+
f = File.open(file_name)
|
109
|
+
hash = eval(f.read)
|
110
|
+
f.close
|
111
|
+
return if hash.nil?
|
112
|
+
hash.each {|key, value|
|
113
|
+
send(key.to_s + "=", value)
|
114
|
+
}
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#
|
2
|
+
# content.rb - content implementation
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
Content = Struct.new(:text, :mtime, :path)
|
14
|
+
class Content
|
15
|
+
def length
|
16
|
+
self.text.length
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/gonzui/dbm.rb
ADDED
@@ -0,0 +1,673 @@
|
|
1
|
+
#
|
2
|
+
# dbm.rb - gonzui DB library
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
require 'zlib'
|
12
|
+
|
13
|
+
module Gonzui
|
14
|
+
class IncompatibleDBError < GonzuiError; end
|
15
|
+
DB_VERSION = "13"
|
16
|
+
|
17
|
+
module DBM
|
18
|
+
module_function
|
19
|
+
def open(config, read_only = false)
|
20
|
+
File.mkpath(config.db_directory) unless read_only
|
21
|
+
|
22
|
+
dbm_class = BDBDBM # to be pluggable
|
23
|
+
dbm = dbm_class.new(config, read_only)
|
24
|
+
if block_given?
|
25
|
+
begin
|
26
|
+
yield(dbm)
|
27
|
+
ensure
|
28
|
+
dbm.close
|
29
|
+
end
|
30
|
+
else
|
31
|
+
return dbm
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class IDCounter
|
37
|
+
def initialize(dbm, id_name, counter, db, rev_db, alt_db)
|
38
|
+
@dbm = dbm
|
39
|
+
@id_name = id_name
|
40
|
+
@counter = counter
|
41
|
+
@db = dbm.send(db)
|
42
|
+
@rev_db = dbm.send(rev_db)
|
43
|
+
@alt_db = if alt_db then dbm.send(alt_db) else nil end
|
44
|
+
|
45
|
+
@count = 0
|
46
|
+
@cache = {}
|
47
|
+
@last_id = (@dbm.seq[make_last_key] or -1)
|
48
|
+
end
|
49
|
+
|
50
|
+
def flush
|
51
|
+
if @count > 0
|
52
|
+
@dbm.increase_counter(@counter, @count)
|
53
|
+
@dbm.seq[make_last_key] = @last_id if @last_id >= 0
|
54
|
+
@count = 0
|
55
|
+
@cache = {}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def make_last_key
|
60
|
+
"last_" + @id_name.to_s
|
61
|
+
end
|
62
|
+
|
63
|
+
def make_new_id
|
64
|
+
@count += 1
|
65
|
+
@last_id += 1
|
66
|
+
return @last_id
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_id(text)
|
70
|
+
id = @cache[text]
|
71
|
+
if id.nil?
|
72
|
+
id = @db[text]
|
73
|
+
if id.nil?
|
74
|
+
id = make_new_id
|
75
|
+
@db[text] = id
|
76
|
+
@rev_db[id] = text
|
77
|
+
end
|
78
|
+
@cache[text] = id
|
79
|
+
end
|
80
|
+
return id
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_id2(text, alt)
|
84
|
+
id = @cache[text]
|
85
|
+
if id.nil?
|
86
|
+
id = @db[text]
|
87
|
+
if id.nil?
|
88
|
+
id = make_new_id
|
89
|
+
@db[text] = id
|
90
|
+
@rev_db[id] = text
|
91
|
+
@alt_db[id] = alt
|
92
|
+
end
|
93
|
+
@cache[text] = id
|
94
|
+
end
|
95
|
+
return id
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
module AutoPack
|
100
|
+
Adaptor = Struct.new(:store, :fetch)
|
101
|
+
ID = Adaptor.new(lambda {|id| pack_id(id) },
|
102
|
+
lambda {|str| unpack_id(str) })
|
103
|
+
Fixnum = Adaptor.new(lambda {|id| pack_fixnum(id) },
|
104
|
+
lambda {|str| unpack_fixnum(str) })
|
105
|
+
Symbol = Adaptor.new(lambda {|sym| sym.to_s},
|
106
|
+
lambda {|str| str.intern})
|
107
|
+
String = Adaptor.new(nil, nil)
|
108
|
+
GZString = Adaptor.new(lambda {|str| Zlib::Deflate.deflate(str) },
|
109
|
+
lambda {|str| Zlib::Inflate.inflate(str) })
|
110
|
+
end
|
111
|
+
|
112
|
+
class DBMError < GonzuiError; end
|
113
|
+
class AbstractDBM
|
114
|
+
include Util
|
115
|
+
|
116
|
+
ap = AutoPack # for short
|
117
|
+
DBTable = [
|
118
|
+
[:fmtid_fmt, ap::ID, ap::String, false],
|
119
|
+
[:fmtid_fabbr, ap::ID, ap::String, false],
|
120
|
+
[:fabbr_fmtid, ap::String, ap::ID, false],
|
121
|
+
[:lcsid_lcs, ap::ID, ap::String, false],
|
122
|
+
[:lcsid_labbr, ap::ID, ap::String, false],
|
123
|
+
[:labbr_lcsid, ap::String, ap::ID, false],
|
124
|
+
[:seq, ap::String, ap::Fixnum, false],
|
125
|
+
[:stat, ap::String, ap::Fixnum, false],
|
126
|
+
[:pkg_pkgid, ap::String, ap::ID, false],
|
127
|
+
[:pkgid_pkg, ap::ID, ap::String, false],
|
128
|
+
[:pkgid_pathids, ap::ID, ap::ID, true],
|
129
|
+
[:pkgid_fmtids, ap::ID, ap::ID, true],
|
130
|
+
[:pkgid_lcsids, ap::ID, ap::ID, true],
|
131
|
+
[:pkgid_options, ap::ID, ap::String, true],
|
132
|
+
[:pkgid_src, ap::ID, ap::String, false],
|
133
|
+
[:path_pathid, ap::String, ap::ID, false],
|
134
|
+
[:pathid_digest, ap::ID, ap::GZString, false],
|
135
|
+
[:pathid_info, ap::ID, ap::String, false],
|
136
|
+
[:pathid_content, ap::ID, ap::GZString, false],
|
137
|
+
[:pathid_bols, ap::ID, ap::GZString, false],
|
138
|
+
[:pathid_hash, ap::ID, ap::String, false],
|
139
|
+
[:pathid_path, ap::ID, ap::String, false],
|
140
|
+
[:pathid_pkgid, ap::ID, ap::ID, false],
|
141
|
+
[:pathid_wordids, ap::ID, ap::GZString, false],
|
142
|
+
[:type_typeid, ap::Symbol, ap::ID, false],
|
143
|
+
[:typeid_type, ap::ID, ap::Symbol, false],
|
144
|
+
[:word_wordid, ap::String, ap::ID, false],
|
145
|
+
[:wordid_pkgids, ap::ID, ap::ID, true],
|
146
|
+
[:wordid_word, ap::ID, ap::String, false],
|
147
|
+
[:pkgwordid_pathids, ap::String, ap::ID, true],
|
148
|
+
[:pathwordid_info, ap::String, ap::String, false],
|
149
|
+
[:version, ap::String, ap::String, false],
|
150
|
+
]
|
151
|
+
|
152
|
+
IDTable = [
|
153
|
+
# id_name, # of id text -> id id -> text id -> alt
|
154
|
+
[:type_id, :ntypes, :type_typeid, :typeid_type, nil],
|
155
|
+
[:word_id, :nwords, :word_wordid, :wordid_word, nil],
|
156
|
+
[:path_id, :ncontents, :path_pathid, :pathid_path, nil],
|
157
|
+
[:package_id, :npackages, :pkg_pkgid, :pkgid_pkg, nil],
|
158
|
+
[:format_id, :nformats, :fabbr_fmtid, :fmtid_fabbr, :fmtid_fmt],
|
159
|
+
[:license_id, :nlicenses, :labbr_lcsid, :lcsid_labbr, :lcsid_lcs],
|
160
|
+
]
|
161
|
+
|
162
|
+
def initialize(config, read_only = false)
|
163
|
+
raise "#{config.db_directory}: No such directory" unless
|
164
|
+
File.directory?(config.db_directory)
|
165
|
+
@config = config
|
166
|
+
|
167
|
+
validate_db_version
|
168
|
+
@db_opened = {}
|
169
|
+
DBTable.each {|db_name, key_type, value_type, dupsort|
|
170
|
+
open_db(db_name, key_type, value_type, dupsort)
|
171
|
+
}
|
172
|
+
put_db_version unless read_only
|
173
|
+
init_id_counters
|
174
|
+
|
175
|
+
@opened = true
|
176
|
+
@current_package_id = nil
|
177
|
+
@wordid_pathids_cache = {}
|
178
|
+
end
|
179
|
+
|
180
|
+
private
|
181
|
+
def init_id_counters
|
182
|
+
@id_counters = []
|
183
|
+
IDTable.each {|id_name, counter, db, rev_db, alt_db|
|
184
|
+
counter = IDCounter.new(self, id_name, counter, db, rev_db, alt_db)
|
185
|
+
name = "@" + id_name.to_s + "_counter"
|
186
|
+
instance_variable_set(name, counter)
|
187
|
+
self.class.class_eval {
|
188
|
+
attr_reader name.delete("@")
|
189
|
+
}
|
190
|
+
@id_counters << counter
|
191
|
+
}
|
192
|
+
end
|
193
|
+
|
194
|
+
def collect_all_results(word_id)
|
195
|
+
results = []
|
196
|
+
if word_id
|
197
|
+
get_package_ids(word_id).each {|package_id|
|
198
|
+
path_ids = get_path_ids_from_package_and_word_id(package_id, word_id)
|
199
|
+
path_ids.each {|path_id|
|
200
|
+
results.concat(get_all_word_info(path_id, word_id))
|
201
|
+
}
|
202
|
+
}
|
203
|
+
end
|
204
|
+
return results
|
205
|
+
end
|
206
|
+
|
207
|
+
def db_exist?
|
208
|
+
return false unless File.directory?(@config.db_directory)
|
209
|
+
entries = Dir.entries_without_dots(@config.db_directory)
|
210
|
+
# filter out file names like __db.001.
|
211
|
+
entries = entries.find_all {|entry| not /^__/.match(entry) }
|
212
|
+
if entries.empty?
|
213
|
+
return false
|
214
|
+
else
|
215
|
+
return true
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def decrease_counter(key, step = 1)
|
220
|
+
value = get_counter(key) - step
|
221
|
+
raise DBMError.new("counter #{key} becomes minus") if value < 0
|
222
|
+
@stat[key.to_s] = value
|
223
|
+
end
|
224
|
+
|
225
|
+
|
226
|
+
def do_open_db(name, key_type, value_type, dupsort)
|
227
|
+
raise NotImplementedError.new
|
228
|
+
end
|
229
|
+
|
230
|
+
def each_property(id_name, get_abbrev, &block)
|
231
|
+
properties = []
|
232
|
+
self.send(id_name).each {|id, name|
|
233
|
+
abbrev = self.send(get_abbrev, id)
|
234
|
+
properties.push([id, abbrev, name])
|
235
|
+
}
|
236
|
+
properties.sort_by {|id, abbrev, name| name }.each {|id, abbrev, name|
|
237
|
+
block.call(id, abbrev, name)
|
238
|
+
}
|
239
|
+
end
|
240
|
+
|
241
|
+
def get_bols(path_id)
|
242
|
+
DeltaDumper.undump_fixnums(@pathid_bols[path_id])
|
243
|
+
end
|
244
|
+
|
245
|
+
def open_db(db_name, key_type, value_type, dupsort)
|
246
|
+
return if @db_opened.include?(db_name)
|
247
|
+
db = do_open_db(db_name, key_type, value_type, dupsort)
|
248
|
+
@db_opened[db_name] = db
|
249
|
+
|
250
|
+
name = "@" + db_name.to_s
|
251
|
+
instance_variable_set(name, db)
|
252
|
+
self.class.class_eval {
|
253
|
+
attr_reader name.delete("@")
|
254
|
+
}
|
255
|
+
return db
|
256
|
+
end
|
257
|
+
|
258
|
+
def put_db_version
|
259
|
+
@version["version"] = DB_VERSION
|
260
|
+
end
|
261
|
+
|
262
|
+
def validate_db_version
|
263
|
+
return unless db_exist?
|
264
|
+
version = "unknown"
|
265
|
+
begin
|
266
|
+
db = do_open_db(:version, AutoPack::String, AutoPack::String, false)
|
267
|
+
version = db["version"]
|
268
|
+
db.close
|
269
|
+
rescue BDB::Fatal
|
270
|
+
end
|
271
|
+
if version != DB_VERSION
|
272
|
+
m = sprintf("DB format is incomatible (version %s expected but %s)",
|
273
|
+
DB_VERSION, version)
|
274
|
+
raise IncompatibleDBError.new(m)
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def verify_stat_integrity
|
279
|
+
assert_equal_all(get_nformats,
|
280
|
+
fmtid_fmt.length,
|
281
|
+
fmtid_fabbr.length,
|
282
|
+
fabbr_fmtid.length)
|
283
|
+
assert_equal_all(get_npackages,
|
284
|
+
pkgid_pkg.length,
|
285
|
+
pkg_pkgid.length)
|
286
|
+
assert_equal_all(get_ncontents,
|
287
|
+
path_pathid.length,
|
288
|
+
pathid_path.length,
|
289
|
+
pathid_content.length,
|
290
|
+
pathid_info.length)
|
291
|
+
assert_equal_all(get_nwords,
|
292
|
+
word_wordid.length)
|
293
|
+
nlines_indexed = 0
|
294
|
+
@pathid_info.each_key {|path_id|
|
295
|
+
info = get_content_info(path_id)
|
296
|
+
nlines_indexed += info.nlines if info.indexed?
|
297
|
+
}
|
298
|
+
assert_equal(get_nlines_indexed, nlines_indexed)
|
299
|
+
end
|
300
|
+
|
301
|
+
def verify_seq_integrity
|
302
|
+
IDTable.each {|id_name, counter, db, rev_db, alt_db|
|
303
|
+
id = (self.send(rev_db).get_last_key or 0)
|
304
|
+
assert(id <= (@seq["last_" + id_name.to_s] or 0))
|
305
|
+
}
|
306
|
+
end
|
307
|
+
|
308
|
+
public
|
309
|
+
def binary_content?(path_id)
|
310
|
+
format_id = get_format_id_from_path_id(path_id)
|
311
|
+
get_format_abbrev(format_id) == "binary"
|
312
|
+
end
|
313
|
+
|
314
|
+
def close
|
315
|
+
flush_cache
|
316
|
+
raise DBMError.new("dbm is already closed") unless @opened
|
317
|
+
@db_opened.each {|name, db|
|
318
|
+
db.close
|
319
|
+
}
|
320
|
+
@opened = false
|
321
|
+
end
|
322
|
+
|
323
|
+
def consistent?
|
324
|
+
verify_stat_integrity
|
325
|
+
verify_seq_integrity
|
326
|
+
return true
|
327
|
+
end
|
328
|
+
|
329
|
+
def decrease_counter(key, step = 1)
|
330
|
+
value = get_counter(key) - step
|
331
|
+
raise DBMError.new("counter #{key} becomes minus") if value < 0
|
332
|
+
@stat[key.to_s] = value
|
333
|
+
end
|
334
|
+
|
335
|
+
def each_db_name
|
336
|
+
@db_opened.each_key {|db_name| yield(db_name.to_s) }
|
337
|
+
end
|
338
|
+
|
339
|
+
def each_format(&block)
|
340
|
+
each_property(:fmtid_fmt, :get_format_abbrev, &block)
|
341
|
+
end
|
342
|
+
|
343
|
+
def each_license(&block)
|
344
|
+
each_property(:lcsid_lcs, :get_license_abbrev, &block)
|
345
|
+
end
|
346
|
+
|
347
|
+
def each_package_name
|
348
|
+
@pkgid_pkg.each_value {|value| yield(value) }
|
349
|
+
end
|
350
|
+
|
351
|
+
def each_word(&block)
|
352
|
+
@word_wordid.each_key {|word| yield(word) }
|
353
|
+
end
|
354
|
+
|
355
|
+
def find_all(pattern)
|
356
|
+
word_id = @word_wordid[pattern]
|
357
|
+
results = collect_all_results(word_id)
|
358
|
+
return results
|
359
|
+
end
|
360
|
+
|
361
|
+
def find_all_by_prefix(pattern)
|
362
|
+
raise NotImplementedError.new("should be implemented in a sub class")
|
363
|
+
end
|
364
|
+
|
365
|
+
def find_all_by_regexp(pattern)
|
366
|
+
regexp = Regexp.new(pattern)
|
367
|
+
results = []
|
368
|
+
@word_wordid.each {|word, word_id|
|
369
|
+
if regexp.match(word)
|
370
|
+
results.concat(collect_all_results(word_id))
|
371
|
+
end
|
372
|
+
}
|
373
|
+
return results
|
374
|
+
end
|
375
|
+
|
376
|
+
def find_word_info(path_id, word_id)
|
377
|
+
get_all_word_info(path_id, word_id).each {|info|
|
378
|
+
yield(info)
|
379
|
+
}
|
380
|
+
end
|
381
|
+
|
382
|
+
def flush_cache
|
383
|
+
wordids = @wordid_pathids_cache.keys.sort!
|
384
|
+
wordids.each {|word_id|
|
385
|
+
package_word_id = AutoPack.pack_id2(@current_package_id, word_id)
|
386
|
+
@wordid_pathids_cache[word_id].each {|path_id|
|
387
|
+
@pkgwordid_pathids[package_word_id] = path_id
|
388
|
+
}
|
389
|
+
}
|
390
|
+
wordids.each {|word_id|
|
391
|
+
@wordid_pkgids[word_id] = @current_package_id
|
392
|
+
}
|
393
|
+
@wordid_pathids_cache.clear
|
394
|
+
@id_counters.each {|counter| counter.flush}
|
395
|
+
end
|
396
|
+
|
397
|
+
def get_all_word_info(path_id, word_id)
|
398
|
+
path_word_id = AutoPack.pack_id2(path_id, word_id)
|
399
|
+
dump = @pathwordid_info[path_word_id]
|
400
|
+
return [] if dump.nil?
|
401
|
+
bols = get_bols(path_id)
|
402
|
+
bol = bols.shift
|
403
|
+
assert_equal(0, bol)
|
404
|
+
|
405
|
+
lineno = 0
|
406
|
+
DeltaDumper.undump_tuples(WordInfo, dump).map {|seqno, byteno, type_id|
|
407
|
+
while bol and bol <= byteno
|
408
|
+
lineno += 1
|
409
|
+
bol = bols.shift
|
410
|
+
end
|
411
|
+
type = get_type(type_id)
|
412
|
+
WordInfo.new(word_id, path_id, seqno, byteno, type_id, type, lineno)
|
413
|
+
}
|
414
|
+
end
|
415
|
+
|
416
|
+
def get_content_hash(path_id)
|
417
|
+
@pathid_hash[path_id]
|
418
|
+
end
|
419
|
+
|
420
|
+
def get_counter(key)
|
421
|
+
@stat[key.to_s] or 0
|
422
|
+
end
|
423
|
+
|
424
|
+
def get_content(path_id)
|
425
|
+
@pathid_content[path_id]
|
426
|
+
end
|
427
|
+
|
428
|
+
def get_content_info(path_id)
|
429
|
+
dump = @pathid_info[path_id]
|
430
|
+
assert_non_nil(dump)
|
431
|
+
return ContentInfo.load(dump)
|
432
|
+
end
|
433
|
+
|
434
|
+
def get_digest(path_id)
|
435
|
+
dump = @pathid_digest[path_id]
|
436
|
+
return [] if dump.nil?
|
437
|
+
DeltaDumper.undump_tuples(DigestInfo, dump).map {|data|
|
438
|
+
data.push(get_type(data.last))
|
439
|
+
DigestInfo.new(*data)
|
440
|
+
}
|
441
|
+
end
|
442
|
+
|
443
|
+
def get_format_abbrev(format_id)
|
444
|
+
@fmtid_fabbr[format_id]
|
445
|
+
end
|
446
|
+
|
447
|
+
def get_format_id(format_abbrev)
|
448
|
+
@fabbr_fmtid[format_abbrev]
|
449
|
+
end
|
450
|
+
|
451
|
+
def get_format_id_from_path_id(path_id)
|
452
|
+
get_content_info(path_id).format_id
|
453
|
+
end
|
454
|
+
|
455
|
+
def get_format_ids_from_package_id(package_id)
|
456
|
+
@pkgid_fmtids.duplicates(package_id)
|
457
|
+
end
|
458
|
+
|
459
|
+
def get_format_name(format_id)
|
460
|
+
@fmtid_fmt[format_id]
|
461
|
+
end
|
462
|
+
|
463
|
+
def get_license_abbrev(license_id)
|
464
|
+
@lcsid_labbr[license_id]
|
465
|
+
end
|
466
|
+
|
467
|
+
def get_license_id(license_abbrev)
|
468
|
+
@labbr_lcsid[license_abbrev]
|
469
|
+
end
|
470
|
+
|
471
|
+
def get_license_id_from_path_id(path_id)
|
472
|
+
get_content_info(path_id).license_id
|
473
|
+
end
|
474
|
+
|
475
|
+
def get_license_ids_from_package_id(package_id)
|
476
|
+
@pkgid_lcsids.duplicates(package_id)
|
477
|
+
end
|
478
|
+
|
479
|
+
def get_license_name(license_id)
|
480
|
+
@lcsid_lcs[license_id]
|
481
|
+
end
|
482
|
+
|
483
|
+
def get_ncontents
|
484
|
+
get_counter(:ncontents)
|
485
|
+
end
|
486
|
+
|
487
|
+
def get_ncontents_by_format_id(format_id)
|
488
|
+
format_abbrev = get_format_abbrev(format_id)
|
489
|
+
key = make_ncontents_by_format_key(format_abbrev)
|
490
|
+
return get_counter(key)
|
491
|
+
end
|
492
|
+
|
493
|
+
def get_ncontents_by_license_id(license_id)
|
494
|
+
license_abbrev = get_license_abbrev(license_id)
|
495
|
+
key = make_ncontents_by_license_key(license_abbrev)
|
496
|
+
return get_counter(key)
|
497
|
+
end
|
498
|
+
|
499
|
+
def get_ncontents_indexed
|
500
|
+
get_counter(:ncontents_indexed)
|
501
|
+
end
|
502
|
+
|
503
|
+
def get_ncontents_in_package(package_name)
|
504
|
+
package_id = get_package_id(package_name)
|
505
|
+
@pkgid_pathids.duplicates(package_id).length
|
506
|
+
end
|
507
|
+
|
508
|
+
def get_nformats
|
509
|
+
get_counter(:nformats)
|
510
|
+
end
|
511
|
+
|
512
|
+
def get_nlines_indexed
|
513
|
+
get_counter(:nlines_indexed)
|
514
|
+
end
|
515
|
+
|
516
|
+
def get_npackages
|
517
|
+
get_counter(:npackages)
|
518
|
+
end
|
519
|
+
|
520
|
+
def get_nwords
|
521
|
+
get_counter(:nwords)
|
522
|
+
end
|
523
|
+
|
524
|
+
def get_package_id(package_name)
|
525
|
+
@pkg_pkgid[package_name]
|
526
|
+
end
|
527
|
+
|
528
|
+
def get_package_id_from_path_id(path_id)
|
529
|
+
@pathid_pkgid[path_id]
|
530
|
+
end
|
531
|
+
|
532
|
+
def get_package_ids(word_id)
|
533
|
+
@wordid_pkgids.duplicates(word_id)
|
534
|
+
end
|
535
|
+
|
536
|
+
def get_package_name(package_id)
|
537
|
+
@pkgid_pkg[package_id]
|
538
|
+
end
|
539
|
+
|
540
|
+
def get_package_names
|
541
|
+
@pkgid_pkg.values
|
542
|
+
end
|
543
|
+
|
544
|
+
def get_package_options(package_id)
|
545
|
+
options = {}
|
546
|
+
values = @pkgid_options.duplicates(package_id)
|
547
|
+
values.each {|value|
|
548
|
+
k, v = value.split(":", 2)
|
549
|
+
k = k.intern
|
550
|
+
case k
|
551
|
+
when :exclude_pattern
|
552
|
+
v = Regexp.new(v)
|
553
|
+
when :noindex_formats
|
554
|
+
v = v.split(",")
|
555
|
+
else
|
556
|
+
raise DBMError.new("#{k}: unknown option")
|
557
|
+
end
|
558
|
+
options[k] = v
|
559
|
+
}
|
560
|
+
assert(options[:exclude_pattern])
|
561
|
+
assert(options[:noindex_formats])
|
562
|
+
return options
|
563
|
+
end
|
564
|
+
|
565
|
+
def get_path(path_id)
|
566
|
+
@pathid_path[path_id]
|
567
|
+
end
|
568
|
+
|
569
|
+
def get_path_id(path)
|
570
|
+
@path_pathid[path]
|
571
|
+
end
|
572
|
+
|
573
|
+
def get_path_ids(package_id)
|
574
|
+
@pkgid_pathids.duplicates(package_id)
|
575
|
+
end
|
576
|
+
|
577
|
+
def get_path_ids_from_package_and_word_id(package_id, word_id)
|
578
|
+
package_word_id = AutoPack.pack_id2(package_id, word_id)
|
579
|
+
return @pkgwordid_pathids.duplicates(package_word_id)
|
580
|
+
end
|
581
|
+
|
582
|
+
def get_source_uri(package_id)
|
583
|
+
@pkgid_src[package_id]
|
584
|
+
end
|
585
|
+
|
586
|
+
def get_type(type_id)
|
587
|
+
@typeid_type[type_id]
|
588
|
+
end
|
589
|
+
|
590
|
+
def get_type_id(type)
|
591
|
+
@type_id_counter.get_id(type)
|
592
|
+
end
|
593
|
+
|
594
|
+
def get_word(word_id)
|
595
|
+
@wordid_word[word_id]
|
596
|
+
end
|
597
|
+
|
598
|
+
def get_word_id(word)
|
599
|
+
@word_wordid[word]
|
600
|
+
end
|
601
|
+
|
602
|
+
def get_word_ids(path_id)
|
603
|
+
DeltaDumper.undump_ids(@pathid_wordids[path_id])
|
604
|
+
end
|
605
|
+
|
606
|
+
def has_format_id?(format_id)
|
607
|
+
@fmtid_fmt.has_key?(format_id)
|
608
|
+
end
|
609
|
+
|
610
|
+
def has_format_abbrev?(format_abbrev)
|
611
|
+
@fabbr_fmtid.has_key?(format_abbrev)
|
612
|
+
end
|
613
|
+
|
614
|
+
def has_license_id?(license_id)
|
615
|
+
@lcsid_lcs.has_key?(license_id)
|
616
|
+
end
|
617
|
+
|
618
|
+
def has_license_abbrev?(license_abbrev)
|
619
|
+
@labbr_lcsid.has_key?(license_abbrev)
|
620
|
+
end
|
621
|
+
|
622
|
+
def has_package?(package_name)
|
623
|
+
@pkg_pkgid.include?(package_name)
|
624
|
+
end
|
625
|
+
|
626
|
+
def has_path?(path)
|
627
|
+
@path_pathid.include?(path)
|
628
|
+
end
|
629
|
+
|
630
|
+
def has_type?(type)
|
631
|
+
@type_typeid.include?(type)
|
632
|
+
end
|
633
|
+
|
634
|
+
def has_word?(word)
|
635
|
+
wordid = @word_wordid[word]
|
636
|
+
if wordid
|
637
|
+
return true
|
638
|
+
else
|
639
|
+
return false
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
def increase_counter(key, step = 1)
|
644
|
+
@stat[key.to_s] = get_counter(key) + step
|
645
|
+
end
|
646
|
+
|
647
|
+
def make_ncontents_by_format_key(format_abbrev)
|
648
|
+
("ncontents_format_" + format_abbrev).intern
|
649
|
+
end
|
650
|
+
|
651
|
+
def make_ncontents_by_license_key(license_abbrev)
|
652
|
+
("ncontents_license_" + license_abbrev).downcase.intern
|
653
|
+
end
|
654
|
+
|
655
|
+
# FIXME: Ad hoc serialization. We avoid using Marshal
|
656
|
+
# not to make the DB Ruby-dependent.
|
657
|
+
def put_package_options(package_id)
|
658
|
+
@pkgid_options[package_id] = sprintf("exclude_pattern:%s",
|
659
|
+
@config.exclude_pattern.to_s)
|
660
|
+
@pkgid_options[package_id] = sprintf("noindex_formats:%s",
|
661
|
+
@config.noindex_formats.join(","))
|
662
|
+
end
|
663
|
+
|
664
|
+
def put_pathid_wordids(package_id, path_id, word_ids)
|
665
|
+
@current_package_id = package_id
|
666
|
+
word_ids.each {|word_id|
|
667
|
+
pathids = (@wordid_pathids_cache[word_id] ||= [])
|
668
|
+
pathids << path_id
|
669
|
+
}
|
670
|
+
end
|
671
|
+
end
|
672
|
+
end
|
673
|
+
|