gonzui 1.2-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS.txt +9 -0
- data/History.txt +5539 -0
- data/Manifest.txt +115 -0
- data/PostInstall.txt +17 -0
- data/README.rdoc +149 -0
- data/Rakefile +28 -0
- data/bin/gonzui-db +167 -0
- data/bin/gonzui-import +177 -0
- data/bin/gonzui-remove +58 -0
- data/bin/gonzui-search +68 -0
- data/bin/gonzui-server +176 -0
- data/bin/gonzui-update +53 -0
- data/data/gonzui/catalog/catalog.ja +80 -0
- data/data/gonzui/doc/favicon.ico +0 -0
- data/data/gonzui/doc/folder.png +0 -0
- data/data/gonzui/doc/gonzui.css +279 -0
- data/data/gonzui/doc/gonzui.js +111 -0
- data/data/gonzui/doc/text.png +0 -0
- data/data/gonzuirc.sample +29 -0
- data/ext/autopack/autopack.c +88 -0
- data/ext/autopack/extconf.rb +3 -0
- data/ext/delta/delta.c +147 -0
- data/ext/delta/extconf.rb +5 -0
- data/ext/texttokenizer/extconf.rb +5 -0
- data/ext/texttokenizer/texttokenizer.c +93 -0
- data/ext/xmlformatter/extconf.rb +5 -0
- data/ext/xmlformatter/xmlformatter.c +207 -0
- data/lib/gonzui.rb +59 -0
- data/lib/gonzui/apt.rb +193 -0
- data/lib/gonzui/autopack.so +0 -0
- data/lib/gonzui/bdbdbm.rb +118 -0
- data/lib/gonzui/cmdapp.rb +14 -0
- data/lib/gonzui/cmdapp/app.rb +175 -0
- data/lib/gonzui/cmdapp/search.rb +134 -0
- data/lib/gonzui/config.rb +117 -0
- data/lib/gonzui/content.rb +19 -0
- data/lib/gonzui/dbm.rb +673 -0
- data/lib/gonzui/deindexer.rb +162 -0
- data/lib/gonzui/delta.rb +49 -0
- data/lib/gonzui/delta.so +0 -0
- data/lib/gonzui/extractor.rb +347 -0
- data/lib/gonzui/fetcher.rb +309 -0
- data/lib/gonzui/gettext.rb +144 -0
- data/lib/gonzui/importer.rb +84 -0
- data/lib/gonzui/indexer.rb +316 -0
- data/lib/gonzui/info.rb +80 -0
- data/lib/gonzui/license.rb +100 -0
- data/lib/gonzui/logger.rb +48 -0
- data/lib/gonzui/monitor.rb +177 -0
- data/lib/gonzui/progressbar.rb +235 -0
- data/lib/gonzui/remover.rb +38 -0
- data/lib/gonzui/searcher.rb +330 -0
- data/lib/gonzui/searchquery.rb +235 -0
- data/lib/gonzui/searchresult.rb +111 -0
- data/lib/gonzui/texttokenizer.so +0 -0
- data/lib/gonzui/updater.rb +254 -0
- data/lib/gonzui/util.rb +415 -0
- data/lib/gonzui/vcs.rb +128 -0
- data/lib/gonzui/webapp.rb +25 -0
- data/lib/gonzui/webapp/advsearch.rb +123 -0
- data/lib/gonzui/webapp/filehandler.rb +24 -0
- data/lib/gonzui/webapp/jsfeed.rb +61 -0
- data/lib/gonzui/webapp/markup.rb +445 -0
- data/lib/gonzui/webapp/search.rb +269 -0
- data/lib/gonzui/webapp/servlet.rb +319 -0
- data/lib/gonzui/webapp/snippet.rb +155 -0
- data/lib/gonzui/webapp/source.rb +37 -0
- data/lib/gonzui/webapp/stat.rb +137 -0
- data/lib/gonzui/webapp/top.rb +63 -0
- data/lib/gonzui/webapp/uri.rb +140 -0
- data/lib/gonzui/webapp/webrick.rb +48 -0
- data/lib/gonzui/webapp/xmlformatter.so +0 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/autopack.rake +43 -0
- data/tasks/extconf/delta.rake +43 -0
- data/tasks/extconf/texttokenizer.rake +43 -0
- data/tasks/extconf/xmlformatter.rake +43 -0
- data/test/_external_tools.rb +13 -0
- data/test/_test-util.rb +142 -0
- data/test/foo/Makefile.foo +66 -0
- data/test/foo/bar.c +5 -0
- data/test/foo/bar.h +6 -0
- data/test/foo/foo.c +25 -0
- data/test/foo/foo.spec +33 -0
- data/test/test_apt.rb +42 -0
- data/test/test_autopack_extn.rb +7 -0
- data/test/test_bdbdbm.rb +79 -0
- data/test/test_cmdapp-app.rb +35 -0
- data/test/test_cmdapp-search.rb +99 -0
- data/test/test_config.rb +28 -0
- data/test/test_content.rb +15 -0
- data/test/test_dbm.rb +171 -0
- data/test/test_deindexer.rb +50 -0
- data/test/test_delta.rb +66 -0
- data/test/test_extractor.rb +78 -0
- data/test/test_fetcher.rb +75 -0
- data/test/test_gettext.rb +50 -0
- data/test/test_gonzui.rb +11 -0
- data/test/test_helper.rb +10 -0
- data/test/test_importer.rb +56 -0
- data/test/test_indexer.rb +37 -0
- data/test/test_info.rb +82 -0
- data/test/test_license.rb +49 -0
- data/test/test_logger.rb +60 -0
- data/test/test_monitor.rb +23 -0
- data/test/test_searcher.rb +37 -0
- data/test/test_searchquery.rb +27 -0
- data/test/test_searchresult.rb +43 -0
- data/test/test_texttokenizer.rb +47 -0
- data/test/test_updater.rb +95 -0
- data/test/test_util.rb +149 -0
- data/test/test_vcs.rb +61 -0
- data/test/test_webapp-markup.rb +42 -0
- data/test/test_webapp-util.rb +19 -0
- data/test/test_webapp-xmlformatter.rb +19 -0
- metadata +292 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
#
|
2
|
+
# search.rb - command line searcher
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
class CommandLineSearcher
|
14
|
+
def initialize(config, options)
|
15
|
+
@config = config
|
16
|
+
@dbm = DBM.open(@config, true)
|
17
|
+
@out = (options['out'] or STDOUT)
|
18
|
+
@nlines = options['line-number']
|
19
|
+
|
20
|
+
@show_method = if options['context']
|
21
|
+
:show_context_lines
|
22
|
+
elsif options['count']
|
23
|
+
:show_count
|
24
|
+
else
|
25
|
+
:show_line
|
26
|
+
end
|
27
|
+
|
28
|
+
@package_name = options['package']
|
29
|
+
|
30
|
+
@ncontexts = options['context'].to_i
|
31
|
+
|
32
|
+
@search_method = :find_all
|
33
|
+
@search_method = :find_all_by_prefix if options['prefix']
|
34
|
+
@search_method = :find_all_by_regexp if options['regexp']
|
35
|
+
|
36
|
+
@use_regexp = options['regexp']
|
37
|
+
@use_color = options['color']
|
38
|
+
@no_filename = options['no-filename']
|
39
|
+
|
40
|
+
@target_type = :all
|
41
|
+
if options['type']
|
42
|
+
type = options['type'].intern
|
43
|
+
eprintf("unknown type: #{type}") unless LangScan::Type.include?(type)
|
44
|
+
@target_type = type
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
def highlight(string, start_tag = "\x1b[01;31m", end_tag = "\x1b[00m")
|
50
|
+
sprintf("%s%s%s", start_tag, string, end_tag)
|
51
|
+
end
|
52
|
+
|
53
|
+
def show_line(content, path, regexp, info)
|
54
|
+
range = content.line_range(info.byteno)
|
55
|
+
filename = if @no_filename then "" else path + ":" end
|
56
|
+
linemark = if @nlines then info.lineno.to_s + ":" else "" end
|
57
|
+
word = @dbm.get_word(info.word_id)
|
58
|
+
pre = content.substring(range.first...info.byteno)
|
59
|
+
post = content.substring((info.byteno + word.length)...range.last)
|
60
|
+
mid = word
|
61
|
+
mid = highlight(mid) if @use_color
|
62
|
+
@out.printf("%s%s%s%s%s\n", filename, linemark, pre, mid, post)
|
63
|
+
end
|
64
|
+
|
65
|
+
def show_context_lines(content, path, regexp, info)
|
66
|
+
@out.printf("== %s\n", path) unless @no_filename
|
67
|
+
content.each_line_range(info.byteno, @ncontexts) {|lineno_offset, range|
|
68
|
+
lineno = info.lineno + lineno_offset
|
69
|
+
linemark = if @nlines
|
70
|
+
mark = if lineno == info.lineno then ":" else "-" end
|
71
|
+
lineno.to_s + mark
|
72
|
+
else
|
73
|
+
""
|
74
|
+
end
|
75
|
+
if range.include?(info.byteno)
|
76
|
+
word = @dbm.get_word(info.word_id)
|
77
|
+
pre = content.substring(range.first...info.byteno)
|
78
|
+
post_range = (info.byteno + word.length)...range.last
|
79
|
+
post = content.substring(post_range)
|
80
|
+
mid = word
|
81
|
+
mid = highlight(mid) if @use_color
|
82
|
+
@out.printf("%s%s%s%s\n", linemark, pre, mid, post)
|
83
|
+
else
|
84
|
+
@out.printf("%s%s\n", linemark, content.substring(range))
|
85
|
+
end
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def show_result(regexp, info)
|
90
|
+
content = @dbm.get_content(info.path_id)
|
91
|
+
path = @dbm.get_path(info.path_id)
|
92
|
+
send(@show_method, content, path, regexp, info)
|
93
|
+
end
|
94
|
+
|
95
|
+
def package_match?(target_package_id, info)
|
96
|
+
package_id = @dbm.get_package_id_from_path_id(info.path_id)
|
97
|
+
return target_package_id == package_id
|
98
|
+
end
|
99
|
+
|
100
|
+
public
|
101
|
+
def search(pattern)
|
102
|
+
separator = ""
|
103
|
+
regexp = if @use_regexp
|
104
|
+
Regexp.new(pattern)
|
105
|
+
else
|
106
|
+
Regexp.new(Regexp.quote(pattern))
|
107
|
+
end
|
108
|
+
results = @dbm.send(@search_method, pattern)
|
109
|
+
prev_lineno = prev_path_id = nil
|
110
|
+
target_package_id = @dbm.get_package_id(@package_name) if @package_name
|
111
|
+
results.sort_by {|x| [x.path_id, x.byteno] }.each {|info|
|
112
|
+
next if prev_lineno and prev_path_id and
|
113
|
+
info.path_id == prev_path_id and
|
114
|
+
info.lineno == prev_lineno
|
115
|
+
if info.match?(@target_type)
|
116
|
+
unless @show_method == :show_count
|
117
|
+
if @package_name.nil? or package_match?(target_package_id, info)
|
118
|
+
@out.print separator
|
119
|
+
show_result(regexp, info)
|
120
|
+
separator = "\n" if @show_method == :show_context_lines
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
prev_lineno = info.lineno
|
125
|
+
prev_path_id = info.path_id
|
126
|
+
}
|
127
|
+
puts results.length if @show_method == :show_count
|
128
|
+
end
|
129
|
+
|
130
|
+
def finish
|
131
|
+
@dbm.close
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
#
|
2
|
+
# config.rb - a config library
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
class Config
|
14
|
+
include Util
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
#
|
18
|
+
# All paths should be expanded to absolute paths
|
19
|
+
# because the current directory would be changed when
|
20
|
+
# a process becomes a daemon.
|
21
|
+
#
|
22
|
+
@temporary_directory = ENV['tmp'] || "/tmp"
|
23
|
+
@db_directory = File.expand_path("gonzui.db")
|
24
|
+
@cache_directory = File.join(@db_directory, "cache")
|
25
|
+
@gonzui_log_file = File.expand_path("gonzui.log")
|
26
|
+
|
27
|
+
@db_cache_size = 5 * 1024 ** 2
|
28
|
+
|
29
|
+
@quiet = false
|
30
|
+
@verbose = false
|
31
|
+
|
32
|
+
@utf8 = true
|
33
|
+
@encoding_preference = UTF8::Preference
|
34
|
+
|
35
|
+
@noindex_formats = []
|
36
|
+
# FIXME: should be more flexible
|
37
|
+
@exclude_pattern = /~$|\.bak$|CVS|\.svn|\.git/
|
38
|
+
|
39
|
+
#
|
40
|
+
# For gonzui-server
|
41
|
+
#
|
42
|
+
@pid_file = File.expand_path("gonzui.pid")
|
43
|
+
@daemon = false
|
44
|
+
@access_log_file = File.expand_path("access.log")
|
45
|
+
@catalog_directory = choose_directory("catalog")
|
46
|
+
@doc_directory = choose_directory("doc")
|
47
|
+
@http_port = Gonzui::HTTP_PORT
|
48
|
+
@bind_address = '*'
|
49
|
+
@user = nil
|
50
|
+
@group = nil
|
51
|
+
@site_title = "gonzui"
|
52
|
+
@base_mount_point = "/"
|
53
|
+
|
54
|
+
@default_results_per_page = 10
|
55
|
+
@max_results_per_page = 50
|
56
|
+
@max_pages = 20
|
57
|
+
@max_words = 10
|
58
|
+
@max_packages_per_page = 100
|
59
|
+
@nresults_candidates = [10, 20, 30, 50]
|
60
|
+
|
61
|
+
set_user_and_group if unix?
|
62
|
+
instance_variables.each {|name|
|
63
|
+
self.class.class_eval {
|
64
|
+
attr_accessor name.delete("@")
|
65
|
+
}
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def choose_directory(base_name)
|
71
|
+
directory = nil
|
72
|
+
[base_name,
|
73
|
+
File.join(File.dirname($0), "..", Gonzui::PKGDATADIR, base_name),
|
74
|
+
File.join(Gonzui::PKGDATADIR, base_name)].each do |d|
|
75
|
+
directory = d
|
76
|
+
break if File.directory?(directory)
|
77
|
+
end
|
78
|
+
return File.expand_path(directory)
|
79
|
+
end
|
80
|
+
|
81
|
+
def set_user_and_group
|
82
|
+
require 'etc'
|
83
|
+
u = Etc::getpwuid(Process.uid)
|
84
|
+
g = Etc::getgrgid(Process.gid)
|
85
|
+
@user = u.name
|
86
|
+
@group = g.name
|
87
|
+
end
|
88
|
+
|
89
|
+
def keys
|
90
|
+
instance_variables.map {|name| name.delete("@").intern }
|
91
|
+
end
|
92
|
+
|
93
|
+
public
|
94
|
+
def max_results_overall
|
95
|
+
@max_results_per_page * @max_pages
|
96
|
+
end
|
97
|
+
|
98
|
+
def dump(out = STDOUT)
|
99
|
+
len = keys.map {|key| key.inspect.length }.max
|
100
|
+
out.puts "{"
|
101
|
+
keys.sort_by {|key| key.to_s }.each {|key|
|
102
|
+
out.printf(" %-#{len}s => %s,\n", key.inspect, send(key).inspect)
|
103
|
+
}
|
104
|
+
out.puts "}"
|
105
|
+
end
|
106
|
+
|
107
|
+
def load(file_name)
|
108
|
+
f = File.open(file_name)
|
109
|
+
hash = eval(f.read)
|
110
|
+
f.close
|
111
|
+
return if hash.nil?
|
112
|
+
hash.each {|key, value|
|
113
|
+
send(key.to_s + "=", value)
|
114
|
+
}
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#
|
2
|
+
# content.rb - content implementation
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
Content = Struct.new(:text, :mtime, :path)
|
14
|
+
class Content
|
15
|
+
def length
|
16
|
+
self.text.length
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/gonzui/dbm.rb
ADDED
@@ -0,0 +1,673 @@
|
|
1
|
+
#
|
2
|
+
# dbm.rb - gonzui DB library
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
require 'zlib'
|
12
|
+
|
13
|
+
module Gonzui
|
14
|
+
class IncompatibleDBError < GonzuiError; end
|
15
|
+
DB_VERSION = "13"
|
16
|
+
|
17
|
+
module DBM
|
18
|
+
module_function
|
19
|
+
def open(config, read_only = false)
|
20
|
+
File.mkpath(config.db_directory) unless read_only
|
21
|
+
|
22
|
+
dbm_class = BDBDBM # to be pluggable
|
23
|
+
dbm = dbm_class.new(config, read_only)
|
24
|
+
if block_given?
|
25
|
+
begin
|
26
|
+
yield(dbm)
|
27
|
+
ensure
|
28
|
+
dbm.close
|
29
|
+
end
|
30
|
+
else
|
31
|
+
return dbm
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class IDCounter
|
37
|
+
def initialize(dbm, id_name, counter, db, rev_db, alt_db)
|
38
|
+
@dbm = dbm
|
39
|
+
@id_name = id_name
|
40
|
+
@counter = counter
|
41
|
+
@db = dbm.send(db)
|
42
|
+
@rev_db = dbm.send(rev_db)
|
43
|
+
@alt_db = if alt_db then dbm.send(alt_db) else nil end
|
44
|
+
|
45
|
+
@count = 0
|
46
|
+
@cache = {}
|
47
|
+
@last_id = (@dbm.seq[make_last_key] or -1)
|
48
|
+
end
|
49
|
+
|
50
|
+
def flush
|
51
|
+
if @count > 0
|
52
|
+
@dbm.increase_counter(@counter, @count)
|
53
|
+
@dbm.seq[make_last_key] = @last_id if @last_id >= 0
|
54
|
+
@count = 0
|
55
|
+
@cache = {}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def make_last_key
|
60
|
+
"last_" + @id_name.to_s
|
61
|
+
end
|
62
|
+
|
63
|
+
def make_new_id
|
64
|
+
@count += 1
|
65
|
+
@last_id += 1
|
66
|
+
return @last_id
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_id(text)
|
70
|
+
id = @cache[text]
|
71
|
+
if id.nil?
|
72
|
+
id = @db[text]
|
73
|
+
if id.nil?
|
74
|
+
id = make_new_id
|
75
|
+
@db[text] = id
|
76
|
+
@rev_db[id] = text
|
77
|
+
end
|
78
|
+
@cache[text] = id
|
79
|
+
end
|
80
|
+
return id
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_id2(text, alt)
|
84
|
+
id = @cache[text]
|
85
|
+
if id.nil?
|
86
|
+
id = @db[text]
|
87
|
+
if id.nil?
|
88
|
+
id = make_new_id
|
89
|
+
@db[text] = id
|
90
|
+
@rev_db[id] = text
|
91
|
+
@alt_db[id] = alt
|
92
|
+
end
|
93
|
+
@cache[text] = id
|
94
|
+
end
|
95
|
+
return id
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
module AutoPack
|
100
|
+
Adaptor = Struct.new(:store, :fetch)
|
101
|
+
ID = Adaptor.new(lambda {|id| pack_id(id) },
|
102
|
+
lambda {|str| unpack_id(str) })
|
103
|
+
Fixnum = Adaptor.new(lambda {|id| pack_fixnum(id) },
|
104
|
+
lambda {|str| unpack_fixnum(str) })
|
105
|
+
Symbol = Adaptor.new(lambda {|sym| sym.to_s},
|
106
|
+
lambda {|str| str.intern})
|
107
|
+
String = Adaptor.new(nil, nil)
|
108
|
+
GZString = Adaptor.new(lambda {|str| Zlib::Deflate.deflate(str) },
|
109
|
+
lambda {|str| Zlib::Inflate.inflate(str) })
|
110
|
+
end
|
111
|
+
|
112
|
+
class DBMError < GonzuiError; end
|
113
|
+
class AbstractDBM
|
114
|
+
include Util
|
115
|
+
|
116
|
+
ap = AutoPack # for short
|
117
|
+
DBTable = [
|
118
|
+
[:fmtid_fmt, ap::ID, ap::String, false],
|
119
|
+
[:fmtid_fabbr, ap::ID, ap::String, false],
|
120
|
+
[:fabbr_fmtid, ap::String, ap::ID, false],
|
121
|
+
[:lcsid_lcs, ap::ID, ap::String, false],
|
122
|
+
[:lcsid_labbr, ap::ID, ap::String, false],
|
123
|
+
[:labbr_lcsid, ap::String, ap::ID, false],
|
124
|
+
[:seq, ap::String, ap::Fixnum, false],
|
125
|
+
[:stat, ap::String, ap::Fixnum, false],
|
126
|
+
[:pkg_pkgid, ap::String, ap::ID, false],
|
127
|
+
[:pkgid_pkg, ap::ID, ap::String, false],
|
128
|
+
[:pkgid_pathids, ap::ID, ap::ID, true],
|
129
|
+
[:pkgid_fmtids, ap::ID, ap::ID, true],
|
130
|
+
[:pkgid_lcsids, ap::ID, ap::ID, true],
|
131
|
+
[:pkgid_options, ap::ID, ap::String, true],
|
132
|
+
[:pkgid_src, ap::ID, ap::String, false],
|
133
|
+
[:path_pathid, ap::String, ap::ID, false],
|
134
|
+
[:pathid_digest, ap::ID, ap::GZString, false],
|
135
|
+
[:pathid_info, ap::ID, ap::String, false],
|
136
|
+
[:pathid_content, ap::ID, ap::GZString, false],
|
137
|
+
[:pathid_bols, ap::ID, ap::GZString, false],
|
138
|
+
[:pathid_hash, ap::ID, ap::String, false],
|
139
|
+
[:pathid_path, ap::ID, ap::String, false],
|
140
|
+
[:pathid_pkgid, ap::ID, ap::ID, false],
|
141
|
+
[:pathid_wordids, ap::ID, ap::GZString, false],
|
142
|
+
[:type_typeid, ap::Symbol, ap::ID, false],
|
143
|
+
[:typeid_type, ap::ID, ap::Symbol, false],
|
144
|
+
[:word_wordid, ap::String, ap::ID, false],
|
145
|
+
[:wordid_pkgids, ap::ID, ap::ID, true],
|
146
|
+
[:wordid_word, ap::ID, ap::String, false],
|
147
|
+
[:pkgwordid_pathids, ap::String, ap::ID, true],
|
148
|
+
[:pathwordid_info, ap::String, ap::String, false],
|
149
|
+
[:version, ap::String, ap::String, false],
|
150
|
+
]
|
151
|
+
|
152
|
+
IDTable = [
|
153
|
+
# id_name, # of id text -> id id -> text id -> alt
|
154
|
+
[:type_id, :ntypes, :type_typeid, :typeid_type, nil],
|
155
|
+
[:word_id, :nwords, :word_wordid, :wordid_word, nil],
|
156
|
+
[:path_id, :ncontents, :path_pathid, :pathid_path, nil],
|
157
|
+
[:package_id, :npackages, :pkg_pkgid, :pkgid_pkg, nil],
|
158
|
+
[:format_id, :nformats, :fabbr_fmtid, :fmtid_fabbr, :fmtid_fmt],
|
159
|
+
[:license_id, :nlicenses, :labbr_lcsid, :lcsid_labbr, :lcsid_lcs],
|
160
|
+
]
|
161
|
+
|
162
|
+
def initialize(config, read_only = false)
|
163
|
+
raise "#{config.db_directory}: No such directory" unless
|
164
|
+
File.directory?(config.db_directory)
|
165
|
+
@config = config
|
166
|
+
|
167
|
+
validate_db_version
|
168
|
+
@db_opened = {}
|
169
|
+
DBTable.each {|db_name, key_type, value_type, dupsort|
|
170
|
+
open_db(db_name, key_type, value_type, dupsort)
|
171
|
+
}
|
172
|
+
put_db_version unless read_only
|
173
|
+
init_id_counters
|
174
|
+
|
175
|
+
@opened = true
|
176
|
+
@current_package_id = nil
|
177
|
+
@wordid_pathids_cache = {}
|
178
|
+
end
|
179
|
+
|
180
|
+
private
|
181
|
+
def init_id_counters
|
182
|
+
@id_counters = []
|
183
|
+
IDTable.each {|id_name, counter, db, rev_db, alt_db|
|
184
|
+
counter = IDCounter.new(self, id_name, counter, db, rev_db, alt_db)
|
185
|
+
name = "@" + id_name.to_s + "_counter"
|
186
|
+
instance_variable_set(name, counter)
|
187
|
+
self.class.class_eval {
|
188
|
+
attr_reader name.delete("@")
|
189
|
+
}
|
190
|
+
@id_counters << counter
|
191
|
+
}
|
192
|
+
end
|
193
|
+
|
194
|
+
def collect_all_results(word_id)
|
195
|
+
results = []
|
196
|
+
if word_id
|
197
|
+
get_package_ids(word_id).each {|package_id|
|
198
|
+
path_ids = get_path_ids_from_package_and_word_id(package_id, word_id)
|
199
|
+
path_ids.each {|path_id|
|
200
|
+
results.concat(get_all_word_info(path_id, word_id))
|
201
|
+
}
|
202
|
+
}
|
203
|
+
end
|
204
|
+
return results
|
205
|
+
end
|
206
|
+
|
207
|
+
def db_exist?
|
208
|
+
return false unless File.directory?(@config.db_directory)
|
209
|
+
entries = Dir.entries_without_dots(@config.db_directory)
|
210
|
+
# filter out file names like __db.001.
|
211
|
+
entries = entries.find_all {|entry| not /^__/.match(entry) }
|
212
|
+
if entries.empty?
|
213
|
+
return false
|
214
|
+
else
|
215
|
+
return true
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def decrease_counter(key, step = 1)
|
220
|
+
value = get_counter(key) - step
|
221
|
+
raise DBMError.new("counter #{key} becomes minus") if value < 0
|
222
|
+
@stat[key.to_s] = value
|
223
|
+
end
|
224
|
+
|
225
|
+
|
226
|
+
def do_open_db(name, key_type, value_type, dupsort)
|
227
|
+
raise NotImplementedError.new
|
228
|
+
end
|
229
|
+
|
230
|
+
def each_property(id_name, get_abbrev, &block)
|
231
|
+
properties = []
|
232
|
+
self.send(id_name).each {|id, name|
|
233
|
+
abbrev = self.send(get_abbrev, id)
|
234
|
+
properties.push([id, abbrev, name])
|
235
|
+
}
|
236
|
+
properties.sort_by {|id, abbrev, name| name }.each {|id, abbrev, name|
|
237
|
+
block.call(id, abbrev, name)
|
238
|
+
}
|
239
|
+
end
|
240
|
+
|
241
|
+
def get_bols(path_id)
|
242
|
+
DeltaDumper.undump_fixnums(@pathid_bols[path_id])
|
243
|
+
end
|
244
|
+
|
245
|
+
def open_db(db_name, key_type, value_type, dupsort)
|
246
|
+
return if @db_opened.include?(db_name)
|
247
|
+
db = do_open_db(db_name, key_type, value_type, dupsort)
|
248
|
+
@db_opened[db_name] = db
|
249
|
+
|
250
|
+
name = "@" + db_name.to_s
|
251
|
+
instance_variable_set(name, db)
|
252
|
+
self.class.class_eval {
|
253
|
+
attr_reader name.delete("@")
|
254
|
+
}
|
255
|
+
return db
|
256
|
+
end
|
257
|
+
|
258
|
+
def put_db_version
|
259
|
+
@version["version"] = DB_VERSION
|
260
|
+
end
|
261
|
+
|
262
|
+
def validate_db_version
|
263
|
+
return unless db_exist?
|
264
|
+
version = "unknown"
|
265
|
+
begin
|
266
|
+
db = do_open_db(:version, AutoPack::String, AutoPack::String, false)
|
267
|
+
version = db["version"]
|
268
|
+
db.close
|
269
|
+
rescue BDB::Fatal
|
270
|
+
end
|
271
|
+
if version != DB_VERSION
|
272
|
+
m = sprintf("DB format is incomatible (version %s expected but %s)",
|
273
|
+
DB_VERSION, version)
|
274
|
+
raise IncompatibleDBError.new(m)
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def verify_stat_integrity
|
279
|
+
assert_equal_all(get_nformats,
|
280
|
+
fmtid_fmt.length,
|
281
|
+
fmtid_fabbr.length,
|
282
|
+
fabbr_fmtid.length)
|
283
|
+
assert_equal_all(get_npackages,
|
284
|
+
pkgid_pkg.length,
|
285
|
+
pkg_pkgid.length)
|
286
|
+
assert_equal_all(get_ncontents,
|
287
|
+
path_pathid.length,
|
288
|
+
pathid_path.length,
|
289
|
+
pathid_content.length,
|
290
|
+
pathid_info.length)
|
291
|
+
assert_equal_all(get_nwords,
|
292
|
+
word_wordid.length)
|
293
|
+
nlines_indexed = 0
|
294
|
+
@pathid_info.each_key {|path_id|
|
295
|
+
info = get_content_info(path_id)
|
296
|
+
nlines_indexed += info.nlines if info.indexed?
|
297
|
+
}
|
298
|
+
assert_equal(get_nlines_indexed, nlines_indexed)
|
299
|
+
end
|
300
|
+
|
301
|
+
def verify_seq_integrity
|
302
|
+
IDTable.each {|id_name, counter, db, rev_db, alt_db|
|
303
|
+
id = (self.send(rev_db).get_last_key or 0)
|
304
|
+
assert(id <= (@seq["last_" + id_name.to_s] or 0))
|
305
|
+
}
|
306
|
+
end
|
307
|
+
|
308
|
+
public
|
309
|
+
def binary_content?(path_id)
|
310
|
+
format_id = get_format_id_from_path_id(path_id)
|
311
|
+
get_format_abbrev(format_id) == "binary"
|
312
|
+
end
|
313
|
+
|
314
|
+
def close
|
315
|
+
flush_cache
|
316
|
+
raise DBMError.new("dbm is already closed") unless @opened
|
317
|
+
@db_opened.each {|name, db|
|
318
|
+
db.close
|
319
|
+
}
|
320
|
+
@opened = false
|
321
|
+
end
|
322
|
+
|
323
|
+
def consistent?
|
324
|
+
verify_stat_integrity
|
325
|
+
verify_seq_integrity
|
326
|
+
return true
|
327
|
+
end
|
328
|
+
|
329
|
+
def decrease_counter(key, step = 1)
|
330
|
+
value = get_counter(key) - step
|
331
|
+
raise DBMError.new("counter #{key} becomes minus") if value < 0
|
332
|
+
@stat[key.to_s] = value
|
333
|
+
end
|
334
|
+
|
335
|
+
def each_db_name
|
336
|
+
@db_opened.each_key {|db_name| yield(db_name.to_s) }
|
337
|
+
end
|
338
|
+
|
339
|
+
def each_format(&block)
|
340
|
+
each_property(:fmtid_fmt, :get_format_abbrev, &block)
|
341
|
+
end
|
342
|
+
|
343
|
+
def each_license(&block)
|
344
|
+
each_property(:lcsid_lcs, :get_license_abbrev, &block)
|
345
|
+
end
|
346
|
+
|
347
|
+
def each_package_name
|
348
|
+
@pkgid_pkg.each_value {|value| yield(value) }
|
349
|
+
end
|
350
|
+
|
351
|
+
def each_word(&block)
|
352
|
+
@word_wordid.each_key {|word| yield(word) }
|
353
|
+
end
|
354
|
+
|
355
|
+
def find_all(pattern)
|
356
|
+
word_id = @word_wordid[pattern]
|
357
|
+
results = collect_all_results(word_id)
|
358
|
+
return results
|
359
|
+
end
|
360
|
+
|
361
|
+
def find_all_by_prefix(pattern)
|
362
|
+
raise NotImplementedError.new("should be implemented in a sub class")
|
363
|
+
end
|
364
|
+
|
365
|
+
def find_all_by_regexp(pattern)
|
366
|
+
regexp = Regexp.new(pattern)
|
367
|
+
results = []
|
368
|
+
@word_wordid.each {|word, word_id|
|
369
|
+
if regexp.match(word)
|
370
|
+
results.concat(collect_all_results(word_id))
|
371
|
+
end
|
372
|
+
}
|
373
|
+
return results
|
374
|
+
end
|
375
|
+
|
376
|
+
def find_word_info(path_id, word_id)
|
377
|
+
get_all_word_info(path_id, word_id).each {|info|
|
378
|
+
yield(info)
|
379
|
+
}
|
380
|
+
end
|
381
|
+
|
382
|
+
def flush_cache
|
383
|
+
wordids = @wordid_pathids_cache.keys.sort!
|
384
|
+
wordids.each {|word_id|
|
385
|
+
package_word_id = AutoPack.pack_id2(@current_package_id, word_id)
|
386
|
+
@wordid_pathids_cache[word_id].each {|path_id|
|
387
|
+
@pkgwordid_pathids[package_word_id] = path_id
|
388
|
+
}
|
389
|
+
}
|
390
|
+
wordids.each {|word_id|
|
391
|
+
@wordid_pkgids[word_id] = @current_package_id
|
392
|
+
}
|
393
|
+
@wordid_pathids_cache.clear
|
394
|
+
@id_counters.each {|counter| counter.flush}
|
395
|
+
end
|
396
|
+
|
397
|
+
def get_all_word_info(path_id, word_id)
|
398
|
+
path_word_id = AutoPack.pack_id2(path_id, word_id)
|
399
|
+
dump = @pathwordid_info[path_word_id]
|
400
|
+
return [] if dump.nil?
|
401
|
+
bols = get_bols(path_id)
|
402
|
+
bol = bols.shift
|
403
|
+
assert_equal(0, bol)
|
404
|
+
|
405
|
+
lineno = 0
|
406
|
+
DeltaDumper.undump_tuples(WordInfo, dump).map {|seqno, byteno, type_id|
|
407
|
+
while bol and bol <= byteno
|
408
|
+
lineno += 1
|
409
|
+
bol = bols.shift
|
410
|
+
end
|
411
|
+
type = get_type(type_id)
|
412
|
+
WordInfo.new(word_id, path_id, seqno, byteno, type_id, type, lineno)
|
413
|
+
}
|
414
|
+
end
|
415
|
+
|
416
|
+
def get_content_hash(path_id)
|
417
|
+
@pathid_hash[path_id]
|
418
|
+
end
|
419
|
+
|
420
|
+
def get_counter(key)
|
421
|
+
@stat[key.to_s] or 0
|
422
|
+
end
|
423
|
+
|
424
|
+
def get_content(path_id)
|
425
|
+
@pathid_content[path_id]
|
426
|
+
end
|
427
|
+
|
428
|
+
def get_content_info(path_id)
|
429
|
+
dump = @pathid_info[path_id]
|
430
|
+
assert_non_nil(dump)
|
431
|
+
return ContentInfo.load(dump)
|
432
|
+
end
|
433
|
+
|
434
|
+
def get_digest(path_id)
|
435
|
+
dump = @pathid_digest[path_id]
|
436
|
+
return [] if dump.nil?
|
437
|
+
DeltaDumper.undump_tuples(DigestInfo, dump).map {|data|
|
438
|
+
data.push(get_type(data.last))
|
439
|
+
DigestInfo.new(*data)
|
440
|
+
}
|
441
|
+
end
|
442
|
+
|
443
|
+
def get_format_abbrev(format_id)
|
444
|
+
@fmtid_fabbr[format_id]
|
445
|
+
end
|
446
|
+
|
447
|
+
def get_format_id(format_abbrev)
|
448
|
+
@fabbr_fmtid[format_abbrev]
|
449
|
+
end
|
450
|
+
|
451
|
+
def get_format_id_from_path_id(path_id)
|
452
|
+
get_content_info(path_id).format_id
|
453
|
+
end
|
454
|
+
|
455
|
+
def get_format_ids_from_package_id(package_id)
|
456
|
+
@pkgid_fmtids.duplicates(package_id)
|
457
|
+
end
|
458
|
+
|
459
|
+
def get_format_name(format_id)
|
460
|
+
@fmtid_fmt[format_id]
|
461
|
+
end
|
462
|
+
|
463
|
+
def get_license_abbrev(license_id)
|
464
|
+
@lcsid_labbr[license_id]
|
465
|
+
end
|
466
|
+
|
467
|
+
def get_license_id(license_abbrev)
|
468
|
+
@labbr_lcsid[license_abbrev]
|
469
|
+
end
|
470
|
+
|
471
|
+
def get_license_id_from_path_id(path_id)
|
472
|
+
get_content_info(path_id).license_id
|
473
|
+
end
|
474
|
+
|
475
|
+
def get_license_ids_from_package_id(package_id)
|
476
|
+
@pkgid_lcsids.duplicates(package_id)
|
477
|
+
end
|
478
|
+
|
479
|
+
def get_license_name(license_id)
|
480
|
+
@lcsid_lcs[license_id]
|
481
|
+
end
|
482
|
+
|
483
|
+
def get_ncontents
|
484
|
+
get_counter(:ncontents)
|
485
|
+
end
|
486
|
+
|
487
|
+
def get_ncontents_by_format_id(format_id)
|
488
|
+
format_abbrev = get_format_abbrev(format_id)
|
489
|
+
key = make_ncontents_by_format_key(format_abbrev)
|
490
|
+
return get_counter(key)
|
491
|
+
end
|
492
|
+
|
493
|
+
def get_ncontents_by_license_id(license_id)
|
494
|
+
license_abbrev = get_license_abbrev(license_id)
|
495
|
+
key = make_ncontents_by_license_key(license_abbrev)
|
496
|
+
return get_counter(key)
|
497
|
+
end
|
498
|
+
|
499
|
+
def get_ncontents_indexed
|
500
|
+
get_counter(:ncontents_indexed)
|
501
|
+
end
|
502
|
+
|
503
|
+
def get_ncontents_in_package(package_name)
|
504
|
+
package_id = get_package_id(package_name)
|
505
|
+
@pkgid_pathids.duplicates(package_id).length
|
506
|
+
end
|
507
|
+
|
508
|
+
def get_nformats
|
509
|
+
get_counter(:nformats)
|
510
|
+
end
|
511
|
+
|
512
|
+
def get_nlines_indexed
|
513
|
+
get_counter(:nlines_indexed)
|
514
|
+
end
|
515
|
+
|
516
|
+
def get_npackages
|
517
|
+
get_counter(:npackages)
|
518
|
+
end
|
519
|
+
|
520
|
+
def get_nwords
|
521
|
+
get_counter(:nwords)
|
522
|
+
end
|
523
|
+
|
524
|
+
def get_package_id(package_name)
|
525
|
+
@pkg_pkgid[package_name]
|
526
|
+
end
|
527
|
+
|
528
|
+
def get_package_id_from_path_id(path_id)
|
529
|
+
@pathid_pkgid[path_id]
|
530
|
+
end
|
531
|
+
|
532
|
+
def get_package_ids(word_id)
|
533
|
+
@wordid_pkgids.duplicates(word_id)
|
534
|
+
end
|
535
|
+
|
536
|
+
def get_package_name(package_id)
|
537
|
+
@pkgid_pkg[package_id]
|
538
|
+
end
|
539
|
+
|
540
|
+
def get_package_names
|
541
|
+
@pkgid_pkg.values
|
542
|
+
end
|
543
|
+
|
544
|
+
def get_package_options(package_id)
|
545
|
+
options = {}
|
546
|
+
values = @pkgid_options.duplicates(package_id)
|
547
|
+
values.each {|value|
|
548
|
+
k, v = value.split(":", 2)
|
549
|
+
k = k.intern
|
550
|
+
case k
|
551
|
+
when :exclude_pattern
|
552
|
+
v = Regexp.new(v)
|
553
|
+
when :noindex_formats
|
554
|
+
v = v.split(",")
|
555
|
+
else
|
556
|
+
raise DBMError.new("#{k}: unknown option")
|
557
|
+
end
|
558
|
+
options[k] = v
|
559
|
+
}
|
560
|
+
assert(options[:exclude_pattern])
|
561
|
+
assert(options[:noindex_formats])
|
562
|
+
return options
|
563
|
+
end
|
564
|
+
|
565
|
+
def get_path(path_id)
|
566
|
+
@pathid_path[path_id]
|
567
|
+
end
|
568
|
+
|
569
|
+
def get_path_id(path)
|
570
|
+
@path_pathid[path]
|
571
|
+
end
|
572
|
+
|
573
|
+
def get_path_ids(package_id)
|
574
|
+
@pkgid_pathids.duplicates(package_id)
|
575
|
+
end
|
576
|
+
|
577
|
+
def get_path_ids_from_package_and_word_id(package_id, word_id)
|
578
|
+
package_word_id = AutoPack.pack_id2(package_id, word_id)
|
579
|
+
return @pkgwordid_pathids.duplicates(package_word_id)
|
580
|
+
end
|
581
|
+
|
582
|
+
def get_source_uri(package_id)
|
583
|
+
@pkgid_src[package_id]
|
584
|
+
end
|
585
|
+
|
586
|
+
def get_type(type_id)
|
587
|
+
@typeid_type[type_id]
|
588
|
+
end
|
589
|
+
|
590
|
+
def get_type_id(type)
|
591
|
+
@type_id_counter.get_id(type)
|
592
|
+
end
|
593
|
+
|
594
|
+
def get_word(word_id)
|
595
|
+
@wordid_word[word_id]
|
596
|
+
end
|
597
|
+
|
598
|
+
def get_word_id(word)
|
599
|
+
@word_wordid[word]
|
600
|
+
end
|
601
|
+
|
602
|
+
def get_word_ids(path_id)
|
603
|
+
DeltaDumper.undump_ids(@pathid_wordids[path_id])
|
604
|
+
end
|
605
|
+
|
606
|
+
def has_format_id?(format_id)
|
607
|
+
@fmtid_fmt.has_key?(format_id)
|
608
|
+
end
|
609
|
+
|
610
|
+
def has_format_abbrev?(format_abbrev)
|
611
|
+
@fabbr_fmtid.has_key?(format_abbrev)
|
612
|
+
end
|
613
|
+
|
614
|
+
def has_license_id?(license_id)
|
615
|
+
@lcsid_lcs.has_key?(license_id)
|
616
|
+
end
|
617
|
+
|
618
|
+
def has_license_abbrev?(license_abbrev)
|
619
|
+
@labbr_lcsid.has_key?(license_abbrev)
|
620
|
+
end
|
621
|
+
|
622
|
+
def has_package?(package_name)
|
623
|
+
@pkg_pkgid.include?(package_name)
|
624
|
+
end
|
625
|
+
|
626
|
+
def has_path?(path)
|
627
|
+
@path_pathid.include?(path)
|
628
|
+
end
|
629
|
+
|
630
|
+
def has_type?(type)
|
631
|
+
@type_typeid.include?(type)
|
632
|
+
end
|
633
|
+
|
634
|
+
def has_word?(word)
|
635
|
+
wordid = @word_wordid[word]
|
636
|
+
if wordid
|
637
|
+
return true
|
638
|
+
else
|
639
|
+
return false
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
def increase_counter(key, step = 1)
|
644
|
+
@stat[key.to_s] = get_counter(key) + step
|
645
|
+
end
|
646
|
+
|
647
|
+
def make_ncontents_by_format_key(format_abbrev)
|
648
|
+
("ncontents_format_" + format_abbrev).intern
|
649
|
+
end
|
650
|
+
|
651
|
+
def make_ncontents_by_license_key(license_abbrev)
|
652
|
+
("ncontents_license_" + license_abbrev).downcase.intern
|
653
|
+
end
|
654
|
+
|
655
|
+
# FIXME: Ad hoc serialization. We avoid using Marshal
|
656
|
+
# not to make the DB Ruby-dependent.
|
657
|
+
def put_package_options(package_id)
|
658
|
+
@pkgid_options[package_id] = sprintf("exclude_pattern:%s",
|
659
|
+
@config.exclude_pattern.to_s)
|
660
|
+
@pkgid_options[package_id] = sprintf("noindex_formats:%s",
|
661
|
+
@config.noindex_formats.join(","))
|
662
|
+
end
|
663
|
+
|
664
|
+
def put_pathid_wordids(package_id, path_id, word_ids)
|
665
|
+
@current_package_id = package_id
|
666
|
+
word_ids.each {|word_id|
|
667
|
+
pathids = (@wordid_pathids_cache[word_id] ||= [])
|
668
|
+
pathids << path_id
|
669
|
+
}
|
670
|
+
end
|
671
|
+
end
|
672
|
+
end
|
673
|
+
|