gonzui 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. data/AUTHORS.txt +9 -0
  2. data/History.txt +5539 -0
  3. data/Manifest.txt +115 -0
  4. data/PostInstall.txt +17 -0
  5. data/README.rdoc +149 -0
  6. data/Rakefile +28 -0
  7. data/bin/gonzui-db +167 -0
  8. data/bin/gonzui-import +177 -0
  9. data/bin/gonzui-remove +58 -0
  10. data/bin/gonzui-search +68 -0
  11. data/bin/gonzui-server +176 -0
  12. data/bin/gonzui-update +53 -0
  13. data/data/gonzui/catalog/catalog.ja +80 -0
  14. data/data/gonzui/doc/favicon.ico +0 -0
  15. data/data/gonzui/doc/folder.png +0 -0
  16. data/data/gonzui/doc/gonzui.css +279 -0
  17. data/data/gonzui/doc/gonzui.js +111 -0
  18. data/data/gonzui/doc/text.png +0 -0
  19. data/data/gonzuirc.sample +29 -0
  20. data/ext/autopack/autopack.c +88 -0
  21. data/ext/autopack/extconf.rb +3 -0
  22. data/ext/delta/delta.c +147 -0
  23. data/ext/delta/extconf.rb +5 -0
  24. data/ext/texttokenizer/extconf.rb +5 -0
  25. data/ext/texttokenizer/texttokenizer.c +93 -0
  26. data/ext/xmlformatter/extconf.rb +5 -0
  27. data/ext/xmlformatter/xmlformatter.c +207 -0
  28. data/lib/gonzui.rb +59 -0
  29. data/lib/gonzui/apt.rb +193 -0
  30. data/lib/gonzui/bdbdbm.rb +118 -0
  31. data/lib/gonzui/cmdapp.rb +14 -0
  32. data/lib/gonzui/cmdapp/app.rb +175 -0
  33. data/lib/gonzui/cmdapp/search.rb +134 -0
  34. data/lib/gonzui/config.rb +117 -0
  35. data/lib/gonzui/content.rb +19 -0
  36. data/lib/gonzui/dbm.rb +673 -0
  37. data/lib/gonzui/deindexer.rb +162 -0
  38. data/lib/gonzui/delta.rb +49 -0
  39. data/lib/gonzui/extractor.rb +347 -0
  40. data/lib/gonzui/fetcher.rb +309 -0
  41. data/lib/gonzui/gettext.rb +144 -0
  42. data/lib/gonzui/importer.rb +84 -0
  43. data/lib/gonzui/indexer.rb +316 -0
  44. data/lib/gonzui/info.rb +80 -0
  45. data/lib/gonzui/license.rb +100 -0
  46. data/lib/gonzui/logger.rb +48 -0
  47. data/lib/gonzui/monitor.rb +177 -0
  48. data/lib/gonzui/progressbar.rb +235 -0
  49. data/lib/gonzui/remover.rb +38 -0
  50. data/lib/gonzui/searcher.rb +330 -0
  51. data/lib/gonzui/searchquery.rb +235 -0
  52. data/lib/gonzui/searchresult.rb +111 -0
  53. data/lib/gonzui/updater.rb +254 -0
  54. data/lib/gonzui/util.rb +415 -0
  55. data/lib/gonzui/vcs.rb +128 -0
  56. data/lib/gonzui/webapp.rb +25 -0
  57. data/lib/gonzui/webapp/advsearch.rb +123 -0
  58. data/lib/gonzui/webapp/filehandler.rb +24 -0
  59. data/lib/gonzui/webapp/jsfeed.rb +61 -0
  60. data/lib/gonzui/webapp/markup.rb +445 -0
  61. data/lib/gonzui/webapp/search.rb +269 -0
  62. data/lib/gonzui/webapp/servlet.rb +319 -0
  63. data/lib/gonzui/webapp/snippet.rb +155 -0
  64. data/lib/gonzui/webapp/source.rb +37 -0
  65. data/lib/gonzui/webapp/stat.rb +137 -0
  66. data/lib/gonzui/webapp/top.rb +63 -0
  67. data/lib/gonzui/webapp/uri.rb +140 -0
  68. data/lib/gonzui/webapp/webrick.rb +48 -0
  69. data/script/console +10 -0
  70. data/script/destroy +14 -0
  71. data/script/generate +14 -0
  72. data/script/makemanifest.rb +21 -0
  73. data/tasks/extconf.rake +13 -0
  74. data/tasks/extconf/autopack.rake +43 -0
  75. data/tasks/extconf/delta.rake +43 -0
  76. data/tasks/extconf/texttokenizer.rake +43 -0
  77. data/tasks/extconf/xmlformatter.rake +43 -0
  78. data/test/_external_tools.rb +13 -0
  79. data/test/_test-util.rb +142 -0
  80. data/test/foo/Makefile.foo +66 -0
  81. data/test/foo/bar.c +5 -0
  82. data/test/foo/bar.h +6 -0
  83. data/test/foo/foo.c +25 -0
  84. data/test/foo/foo.spec +33 -0
  85. data/test/test_apt.rb +42 -0
  86. data/test/test_autopack_extn.rb +7 -0
  87. data/test/test_bdbdbm.rb +79 -0
  88. data/test/test_cmdapp-app.rb +35 -0
  89. data/test/test_cmdapp-search.rb +99 -0
  90. data/test/test_config.rb +28 -0
  91. data/test/test_content.rb +15 -0
  92. data/test/test_dbm.rb +171 -0
  93. data/test/test_deindexer.rb +50 -0
  94. data/test/test_delta.rb +66 -0
  95. data/test/test_extractor.rb +78 -0
  96. data/test/test_fetcher.rb +75 -0
  97. data/test/test_gettext.rb +50 -0
  98. data/test/test_gonzui.rb +11 -0
  99. data/test/test_helper.rb +10 -0
  100. data/test/test_importer.rb +56 -0
  101. data/test/test_indexer.rb +37 -0
  102. data/test/test_info.rb +82 -0
  103. data/test/test_license.rb +49 -0
  104. data/test/test_logger.rb +60 -0
  105. data/test/test_monitor.rb +23 -0
  106. data/test/test_searcher.rb +37 -0
  107. data/test/test_searchquery.rb +27 -0
  108. data/test/test_searchresult.rb +43 -0
  109. data/test/test_texttokenizer.rb +47 -0
  110. data/test/test_updater.rb +95 -0
  111. data/test/test_util.rb +149 -0
  112. data/test/test_vcs.rb +61 -0
  113. data/test/test_webapp-markup.rb +42 -0
  114. data/test/test_webapp-util.rb +19 -0
  115. data/test/test_webapp-xmlformatter.rb +19 -0
  116. metadata +291 -0
@@ -0,0 +1,134 @@
1
+ #
2
+ # search.rb - command line searcher
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ class CommandLineSearcher
14
+ def initialize(config, options)
15
+ @config = config
16
+ @dbm = DBM.open(@config, true)
17
+ @out = (options['out'] or STDOUT)
18
+ @nlines = options['line-number']
19
+
20
+ @show_method = if options['context']
21
+ :show_context_lines
22
+ elsif options['count']
23
+ :show_count
24
+ else
25
+ :show_line
26
+ end
27
+
28
+ @package_name = options['package']
29
+
30
+ @ncontexts = options['context'].to_i
31
+
32
+ @search_method = :find_all
33
+ @search_method = :find_all_by_prefix if options['prefix']
34
+ @search_method = :find_all_by_regexp if options['regexp']
35
+
36
+ @use_regexp = options['regexp']
37
+ @use_color = options['color']
38
+ @no_filename = options['no-filename']
39
+
40
+ @target_type = :all
41
+ if options['type']
42
+ type = options['type'].intern
43
+ eprintf("unknown type: #{type}") unless LangScan::Type.include?(type)
44
+ @target_type = type
45
+ end
46
+ end
47
+
48
+ private
49
+ def highlight(string, start_tag = "\x1b[01;31m", end_tag = "\x1b[00m")
50
+ sprintf("%s%s%s", start_tag, string, end_tag)
51
+ end
52
+
53
+ def show_line(content, path, regexp, info)
54
+ range = content.line_range(info.byteno)
55
+ filename = if @no_filename then "" else path + ":" end
56
+ linemark = if @nlines then info.lineno.to_s + ":" else "" end
57
+ word = @dbm.get_word(info.word_id)
58
+ pre = content.substring(range.first...info.byteno)
59
+ post = content.substring((info.byteno + word.length)...range.last)
60
+ mid = word
61
+ mid = highlight(mid) if @use_color
62
+ @out.printf("%s%s%s%s%s\n", filename, linemark, pre, mid, post)
63
+ end
64
+
65
+ def show_context_lines(content, path, regexp, info)
66
+ @out.printf("== %s\n", path) unless @no_filename
67
+ content.each_line_range(info.byteno, @ncontexts) {|lineno_offset, range|
68
+ lineno = info.lineno + lineno_offset
69
+ linemark = if @nlines
70
+ mark = if lineno == info.lineno then ":" else "-" end
71
+ lineno.to_s + mark
72
+ else
73
+ ""
74
+ end
75
+ if range.include?(info.byteno)
76
+ word = @dbm.get_word(info.word_id)
77
+ pre = content.substring(range.first...info.byteno)
78
+ post_range = (info.byteno + word.length)...range.last
79
+ post = content.substring(post_range)
80
+ mid = word
81
+ mid = highlight(mid) if @use_color
82
+ @out.printf("%s%s%s%s\n", linemark, pre, mid, post)
83
+ else
84
+ @out.printf("%s%s\n", linemark, content.substring(range))
85
+ end
86
+ }
87
+ end
88
+
89
+ def show_result(regexp, info)
90
+ content = @dbm.get_content(info.path_id)
91
+ path = @dbm.get_path(info.path_id)
92
+ send(@show_method, content, path, regexp, info)
93
+ end
94
+
95
+ def package_match?(target_package_id, info)
96
+ package_id = @dbm.get_package_id_from_path_id(info.path_id)
97
+ return target_package_id == package_id
98
+ end
99
+
100
+ public
101
+ def search(pattern)
102
+ separator = ""
103
+ regexp = if @use_regexp
104
+ Regexp.new(pattern)
105
+ else
106
+ Regexp.new(Regexp.quote(pattern))
107
+ end
108
+ results = @dbm.send(@search_method, pattern)
109
+ prev_lineno = prev_path_id = nil
110
+ target_package_id = @dbm.get_package_id(@package_name) if @package_name
111
+ results.sort_by {|x| [x.path_id, x.byteno] }.each {|info|
112
+ next if prev_lineno and prev_path_id and
113
+ info.path_id == prev_path_id and
114
+ info.lineno == prev_lineno
115
+ if info.match?(@target_type)
116
+ unless @show_method == :show_count
117
+ if @package_name.nil? or package_match?(target_package_id, info)
118
+ @out.print separator
119
+ show_result(regexp, info)
120
+ separator = "\n" if @show_method == :show_context_lines
121
+ end
122
+ end
123
+ end
124
+ prev_lineno = info.lineno
125
+ prev_path_id = info.path_id
126
+ }
127
+ puts results.length if @show_method == :show_count
128
+ end
129
+
130
+ def finish
131
+ @dbm.close
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,117 @@
1
+ #
2
+ # config.rb - a config library
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ class Config
14
+ include Util
15
+
16
+ def initialize
17
+ #
18
+ # All paths should be expanded to absolute paths
19
+ # because the current directory would be changed when
20
+ # a process becomes a daemon.
21
+ #
22
+ @temporary_directory = ENV['tmp'] || "/tmp"
23
+ @db_directory = File.expand_path("gonzui.db")
24
+ @cache_directory = File.join(@db_directory, "cache")
25
+ @gonzui_log_file = File.expand_path("gonzui.log")
26
+
27
+ @db_cache_size = 5 * 1024 ** 2
28
+
29
+ @quiet = false
30
+ @verbose = false
31
+
32
+ @utf8 = true
33
+ @encoding_preference = UTF8::Preference
34
+
35
+ @noindex_formats = []
36
+ # FIXME: should be more flexible
37
+ @exclude_pattern = /~$|\.bak$|CVS|\.svn|\.git/
38
+
39
+ #
40
+ # For gonzui-server
41
+ #
42
+ @pid_file = File.expand_path("gonzui.pid")
43
+ @daemon = false
44
+ @access_log_file = File.expand_path("access.log")
45
+ @catalog_directory = choose_directory("catalog")
46
+ @doc_directory = choose_directory("doc")
47
+ @http_port = Gonzui::HTTP_PORT
48
+ @bind_address = '*'
49
+ @user = nil
50
+ @group = nil
51
+ @site_title = "gonzui"
52
+ @base_mount_point = "/"
53
+
54
+ @default_results_per_page = 10
55
+ @max_results_per_page = 50
56
+ @max_pages = 20
57
+ @max_words = 10
58
+ @max_packages_per_page = 100
59
+ @nresults_candidates = [10, 20, 30, 50]
60
+
61
+ set_user_and_group if unix?
62
+ instance_variables.each {|name|
63
+ self.class.class_eval {
64
+ attr_accessor name.delete("@")
65
+ }
66
+ }
67
+ end
68
+
69
+ private
70
+ def choose_directory(base_name)
71
+ directory = nil
72
+ [base_name,
73
+ File.join(File.dirname($0), "..", Gonzui::PKGDATADIR, base_name),
74
+ File.join(Gonzui::PKGDATADIR, base_name)].each do |d|
75
+ directory = d
76
+ break if File.directory?(directory)
77
+ end
78
+ return File.expand_path(directory)
79
+ end
80
+
81
+ def set_user_and_group
82
+ require 'etc'
83
+ u = Etc::getpwuid(Process.uid)
84
+ g = Etc::getgrgid(Process.gid)
85
+ @user = u.name
86
+ @group = g.name
87
+ end
88
+
89
+ def keys
90
+ instance_variables.map {|name| name.delete("@").intern }
91
+ end
92
+
93
+ public
94
+ def max_results_overall
95
+ @max_results_per_page * @max_pages
96
+ end
97
+
98
+ def dump(out = STDOUT)
99
+ len = keys.map {|key| key.inspect.length }.max
100
+ out.puts "{"
101
+ keys.sort_by {|key| key.to_s }.each {|key|
102
+ out.printf(" %-#{len}s => %s,\n", key.inspect, send(key).inspect)
103
+ }
104
+ out.puts "}"
105
+ end
106
+
107
+ def load(file_name)
108
+ f = File.open(file_name)
109
+ hash = eval(f.read)
110
+ f.close
111
+ return if hash.nil?
112
+ hash.each {|key, value|
113
+ send(key.to_s + "=", value)
114
+ }
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,19 @@
1
+ #
2
+ # content.rb - content implementation
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ Content = Struct.new(:text, :mtime, :path)
14
+ class Content
15
+ def length
16
+ self.text.length
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,673 @@
1
+ #
2
+ # dbm.rb - gonzui DB library
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+ require 'zlib'
12
+
13
+ module Gonzui
14
+ class IncompatibleDBError < GonzuiError; end
15
+ DB_VERSION = "13"
16
+
17
+ module DBM
18
+ module_function
19
+ def open(config, read_only = false)
20
+ File.mkpath(config.db_directory) unless read_only
21
+
22
+ dbm_class = BDBDBM # to be pluggable
23
+ dbm = dbm_class.new(config, read_only)
24
+ if block_given?
25
+ begin
26
+ yield(dbm)
27
+ ensure
28
+ dbm.close
29
+ end
30
+ else
31
+ return dbm
32
+ end
33
+ end
34
+ end
35
+
36
+ class IDCounter
37
+ def initialize(dbm, id_name, counter, db, rev_db, alt_db)
38
+ @dbm = dbm
39
+ @id_name = id_name
40
+ @counter = counter
41
+ @db = dbm.send(db)
42
+ @rev_db = dbm.send(rev_db)
43
+ @alt_db = if alt_db then dbm.send(alt_db) else nil end
44
+
45
+ @count = 0
46
+ @cache = {}
47
+ @last_id = (@dbm.seq[make_last_key] or -1)
48
+ end
49
+
50
+ def flush
51
+ if @count > 0
52
+ @dbm.increase_counter(@counter, @count)
53
+ @dbm.seq[make_last_key] = @last_id if @last_id >= 0
54
+ @count = 0
55
+ @cache = {}
56
+ end
57
+ end
58
+
59
+ def make_last_key
60
+ "last_" + @id_name.to_s
61
+ end
62
+
63
+ def make_new_id
64
+ @count += 1
65
+ @last_id += 1
66
+ return @last_id
67
+ end
68
+
69
+ def get_id(text)
70
+ id = @cache[text]
71
+ if id.nil?
72
+ id = @db[text]
73
+ if id.nil?
74
+ id = make_new_id
75
+ @db[text] = id
76
+ @rev_db[id] = text
77
+ end
78
+ @cache[text] = id
79
+ end
80
+ return id
81
+ end
82
+
83
+ def get_id2(text, alt)
84
+ id = @cache[text]
85
+ if id.nil?
86
+ id = @db[text]
87
+ if id.nil?
88
+ id = make_new_id
89
+ @db[text] = id
90
+ @rev_db[id] = text
91
+ @alt_db[id] = alt
92
+ end
93
+ @cache[text] = id
94
+ end
95
+ return id
96
+ end
97
+ end
98
+
99
+ module AutoPack
100
+ Adaptor = Struct.new(:store, :fetch)
101
+ ID = Adaptor.new(lambda {|id| pack_id(id) },
102
+ lambda {|str| unpack_id(str) })
103
+ Fixnum = Adaptor.new(lambda {|id| pack_fixnum(id) },
104
+ lambda {|str| unpack_fixnum(str) })
105
+ Symbol = Adaptor.new(lambda {|sym| sym.to_s},
106
+ lambda {|str| str.intern})
107
+ String = Adaptor.new(nil, nil)
108
+ GZString = Adaptor.new(lambda {|str| Zlib::Deflate.deflate(str) },
109
+ lambda {|str| Zlib::Inflate.inflate(str) })
110
+ end
111
+
112
+ class DBMError < GonzuiError; end
113
+ class AbstractDBM
114
+ include Util
115
+
116
+ ap = AutoPack # for short
117
+ DBTable = [
118
+ [:fmtid_fmt, ap::ID, ap::String, false],
119
+ [:fmtid_fabbr, ap::ID, ap::String, false],
120
+ [:fabbr_fmtid, ap::String, ap::ID, false],
121
+ [:lcsid_lcs, ap::ID, ap::String, false],
122
+ [:lcsid_labbr, ap::ID, ap::String, false],
123
+ [:labbr_lcsid, ap::String, ap::ID, false],
124
+ [:seq, ap::String, ap::Fixnum, false],
125
+ [:stat, ap::String, ap::Fixnum, false],
126
+ [:pkg_pkgid, ap::String, ap::ID, false],
127
+ [:pkgid_pkg, ap::ID, ap::String, false],
128
+ [:pkgid_pathids, ap::ID, ap::ID, true],
129
+ [:pkgid_fmtids, ap::ID, ap::ID, true],
130
+ [:pkgid_lcsids, ap::ID, ap::ID, true],
131
+ [:pkgid_options, ap::ID, ap::String, true],
132
+ [:pkgid_src, ap::ID, ap::String, false],
133
+ [:path_pathid, ap::String, ap::ID, false],
134
+ [:pathid_digest, ap::ID, ap::GZString, false],
135
+ [:pathid_info, ap::ID, ap::String, false],
136
+ [:pathid_content, ap::ID, ap::GZString, false],
137
+ [:pathid_bols, ap::ID, ap::GZString, false],
138
+ [:pathid_hash, ap::ID, ap::String, false],
139
+ [:pathid_path, ap::ID, ap::String, false],
140
+ [:pathid_pkgid, ap::ID, ap::ID, false],
141
+ [:pathid_wordids, ap::ID, ap::GZString, false],
142
+ [:type_typeid, ap::Symbol, ap::ID, false],
143
+ [:typeid_type, ap::ID, ap::Symbol, false],
144
+ [:word_wordid, ap::String, ap::ID, false],
145
+ [:wordid_pkgids, ap::ID, ap::ID, true],
146
+ [:wordid_word, ap::ID, ap::String, false],
147
+ [:pkgwordid_pathids, ap::String, ap::ID, true],
148
+ [:pathwordid_info, ap::String, ap::String, false],
149
+ [:version, ap::String, ap::String, false],
150
+ ]
151
+
152
+ IDTable = [
153
+ # id_name, # of id text -> id id -> text id -> alt
154
+ [:type_id, :ntypes, :type_typeid, :typeid_type, nil],
155
+ [:word_id, :nwords, :word_wordid, :wordid_word, nil],
156
+ [:path_id, :ncontents, :path_pathid, :pathid_path, nil],
157
+ [:package_id, :npackages, :pkg_pkgid, :pkgid_pkg, nil],
158
+ [:format_id, :nformats, :fabbr_fmtid, :fmtid_fabbr, :fmtid_fmt],
159
+ [:license_id, :nlicenses, :labbr_lcsid, :lcsid_labbr, :lcsid_lcs],
160
+ ]
161
+
162
+ def initialize(config, read_only = false)
163
+ raise "#{config.db_directory}: No such directory" unless
164
+ File.directory?(config.db_directory)
165
+ @config = config
166
+
167
+ validate_db_version
168
+ @db_opened = {}
169
+ DBTable.each {|db_name, key_type, value_type, dupsort|
170
+ open_db(db_name, key_type, value_type, dupsort)
171
+ }
172
+ put_db_version unless read_only
173
+ init_id_counters
174
+
175
+ @opened = true
176
+ @current_package_id = nil
177
+ @wordid_pathids_cache = {}
178
+ end
179
+
180
+ private
181
+ def init_id_counters
182
+ @id_counters = []
183
+ IDTable.each {|id_name, counter, db, rev_db, alt_db|
184
+ counter = IDCounter.new(self, id_name, counter, db, rev_db, alt_db)
185
+ name = "@" + id_name.to_s + "_counter"
186
+ instance_variable_set(name, counter)
187
+ self.class.class_eval {
188
+ attr_reader name.delete("@")
189
+ }
190
+ @id_counters << counter
191
+ }
192
+ end
193
+
194
+ def collect_all_results(word_id)
195
+ results = []
196
+ if word_id
197
+ get_package_ids(word_id).each {|package_id|
198
+ path_ids = get_path_ids_from_package_and_word_id(package_id, word_id)
199
+ path_ids.each {|path_id|
200
+ results.concat(get_all_word_info(path_id, word_id))
201
+ }
202
+ }
203
+ end
204
+ return results
205
+ end
206
+
207
+ def db_exist?
208
+ return false unless File.directory?(@config.db_directory)
209
+ entries = Dir.entries_without_dots(@config.db_directory)
210
+ # filter out file names like __db.001.
211
+ entries = entries.find_all {|entry| not /^__/.match(entry) }
212
+ if entries.empty?
213
+ return false
214
+ else
215
+ return true
216
+ end
217
+ end
218
+
219
+ def decrease_counter(key, step = 1)
220
+ value = get_counter(key) - step
221
+ raise DBMError.new("counter #{key} becomes minus") if value < 0
222
+ @stat[key.to_s] = value
223
+ end
224
+
225
+
226
+ def do_open_db(name, key_type, value_type, dupsort)
227
+ raise NotImplementedError.new
228
+ end
229
+
230
+ def each_property(id_name, get_abbrev, &block)
231
+ properties = []
232
+ self.send(id_name).each {|id, name|
233
+ abbrev = self.send(get_abbrev, id)
234
+ properties.push([id, abbrev, name])
235
+ }
236
+ properties.sort_by {|id, abbrev, name| name }.each {|id, abbrev, name|
237
+ block.call(id, abbrev, name)
238
+ }
239
+ end
240
+
241
+ def get_bols(path_id)
242
+ DeltaDumper.undump_fixnums(@pathid_bols[path_id])
243
+ end
244
+
245
+ def open_db(db_name, key_type, value_type, dupsort)
246
+ return if @db_opened.include?(db_name)
247
+ db = do_open_db(db_name, key_type, value_type, dupsort)
248
+ @db_opened[db_name] = db
249
+
250
+ name = "@" + db_name.to_s
251
+ instance_variable_set(name, db)
252
+ self.class.class_eval {
253
+ attr_reader name.delete("@")
254
+ }
255
+ return db
256
+ end
257
+
258
+ def put_db_version
259
+ @version["version"] = DB_VERSION
260
+ end
261
+
262
+ def validate_db_version
263
+ return unless db_exist?
264
+ version = "unknown"
265
+ begin
266
+ db = do_open_db(:version, AutoPack::String, AutoPack::String, false)
267
+ version = db["version"]
268
+ db.close
269
+ rescue BDB::Fatal
270
+ end
271
+ if version != DB_VERSION
272
+ m = sprintf("DB format is incomatible (version %s expected but %s)",
273
+ DB_VERSION, version)
274
+ raise IncompatibleDBError.new(m)
275
+ end
276
+ end
277
+
278
+ def verify_stat_integrity
279
+ assert_equal_all(get_nformats,
280
+ fmtid_fmt.length,
281
+ fmtid_fabbr.length,
282
+ fabbr_fmtid.length)
283
+ assert_equal_all(get_npackages,
284
+ pkgid_pkg.length,
285
+ pkg_pkgid.length)
286
+ assert_equal_all(get_ncontents,
287
+ path_pathid.length,
288
+ pathid_path.length,
289
+ pathid_content.length,
290
+ pathid_info.length)
291
+ assert_equal_all(get_nwords,
292
+ word_wordid.length)
293
+ nlines_indexed = 0
294
+ @pathid_info.each_key {|path_id|
295
+ info = get_content_info(path_id)
296
+ nlines_indexed += info.nlines if info.indexed?
297
+ }
298
+ assert_equal(get_nlines_indexed, nlines_indexed)
299
+ end
300
+
301
+ def verify_seq_integrity
302
+ IDTable.each {|id_name, counter, db, rev_db, alt_db|
303
+ id = (self.send(rev_db).get_last_key or 0)
304
+ assert(id <= (@seq["last_" + id_name.to_s] or 0))
305
+ }
306
+ end
307
+
308
+ public
309
+ def binary_content?(path_id)
310
+ format_id = get_format_id_from_path_id(path_id)
311
+ get_format_abbrev(format_id) == "binary"
312
+ end
313
+
314
+ def close
315
+ flush_cache
316
+ raise DBMError.new("dbm is already closed") unless @opened
317
+ @db_opened.each {|name, db|
318
+ db.close
319
+ }
320
+ @opened = false
321
+ end
322
+
323
+ def consistent?
324
+ verify_stat_integrity
325
+ verify_seq_integrity
326
+ return true
327
+ end
328
+
329
+ def decrease_counter(key, step = 1)
330
+ value = get_counter(key) - step
331
+ raise DBMError.new("counter #{key} becomes minus") if value < 0
332
+ @stat[key.to_s] = value
333
+ end
334
+
335
+ def each_db_name
336
+ @db_opened.each_key {|db_name| yield(db_name.to_s) }
337
+ end
338
+
339
+ def each_format(&block)
340
+ each_property(:fmtid_fmt, :get_format_abbrev, &block)
341
+ end
342
+
343
+ def each_license(&block)
344
+ each_property(:lcsid_lcs, :get_license_abbrev, &block)
345
+ end
346
+
347
+ def each_package_name
348
+ @pkgid_pkg.each_value {|value| yield(value) }
349
+ end
350
+
351
+ def each_word(&block)
352
+ @word_wordid.each_key {|word| yield(word) }
353
+ end
354
+
355
+ def find_all(pattern)
356
+ word_id = @word_wordid[pattern]
357
+ results = collect_all_results(word_id)
358
+ return results
359
+ end
360
+
361
+ def find_all_by_prefix(pattern)
362
+ raise NotImplementedError.new("should be implemented in a sub class")
363
+ end
364
+
365
+ def find_all_by_regexp(pattern)
366
+ regexp = Regexp.new(pattern)
367
+ results = []
368
+ @word_wordid.each {|word, word_id|
369
+ if regexp.match(word)
370
+ results.concat(collect_all_results(word_id))
371
+ end
372
+ }
373
+ return results
374
+ end
375
+
376
+ def find_word_info(path_id, word_id)
377
+ get_all_word_info(path_id, word_id).each {|info|
378
+ yield(info)
379
+ }
380
+ end
381
+
382
+ def flush_cache
383
+ wordids = @wordid_pathids_cache.keys.sort!
384
+ wordids.each {|word_id|
385
+ package_word_id = AutoPack.pack_id2(@current_package_id, word_id)
386
+ @wordid_pathids_cache[word_id].each {|path_id|
387
+ @pkgwordid_pathids[package_word_id] = path_id
388
+ }
389
+ }
390
+ wordids.each {|word_id|
391
+ @wordid_pkgids[word_id] = @current_package_id
392
+ }
393
+ @wordid_pathids_cache.clear
394
+ @id_counters.each {|counter| counter.flush}
395
+ end
396
+
397
+ def get_all_word_info(path_id, word_id)
398
+ path_word_id = AutoPack.pack_id2(path_id, word_id)
399
+ dump = @pathwordid_info[path_word_id]
400
+ return [] if dump.nil?
401
+ bols = get_bols(path_id)
402
+ bol = bols.shift
403
+ assert_equal(0, bol)
404
+
405
+ lineno = 0
406
+ DeltaDumper.undump_tuples(WordInfo, dump).map {|seqno, byteno, type_id|
407
+ while bol and bol <= byteno
408
+ lineno += 1
409
+ bol = bols.shift
410
+ end
411
+ type = get_type(type_id)
412
+ WordInfo.new(word_id, path_id, seqno, byteno, type_id, type, lineno)
413
+ }
414
+ end
415
+
416
+ def get_content_hash(path_id)
417
+ @pathid_hash[path_id]
418
+ end
419
+
420
+ def get_counter(key)
421
+ @stat[key.to_s] or 0
422
+ end
423
+
424
+ def get_content(path_id)
425
+ @pathid_content[path_id]
426
+ end
427
+
428
+ def get_content_info(path_id)
429
+ dump = @pathid_info[path_id]
430
+ assert_non_nil(dump)
431
+ return ContentInfo.load(dump)
432
+ end
433
+
434
+ def get_digest(path_id)
435
+ dump = @pathid_digest[path_id]
436
+ return [] if dump.nil?
437
+ DeltaDumper.undump_tuples(DigestInfo, dump).map {|data|
438
+ data.push(get_type(data.last))
439
+ DigestInfo.new(*data)
440
+ }
441
+ end
442
+
443
+ def get_format_abbrev(format_id)
444
+ @fmtid_fabbr[format_id]
445
+ end
446
+
447
+ def get_format_id(format_abbrev)
448
+ @fabbr_fmtid[format_abbrev]
449
+ end
450
+
451
+ def get_format_id_from_path_id(path_id)
452
+ get_content_info(path_id).format_id
453
+ end
454
+
455
+ def get_format_ids_from_package_id(package_id)
456
+ @pkgid_fmtids.duplicates(package_id)
457
+ end
458
+
459
+ def get_format_name(format_id)
460
+ @fmtid_fmt[format_id]
461
+ end
462
+
463
+ def get_license_abbrev(license_id)
464
+ @lcsid_labbr[license_id]
465
+ end
466
+
467
+ def get_license_id(license_abbrev)
468
+ @labbr_lcsid[license_abbrev]
469
+ end
470
+
471
+ def get_license_id_from_path_id(path_id)
472
+ get_content_info(path_id).license_id
473
+ end
474
+
475
+ def get_license_ids_from_package_id(package_id)
476
+ @pkgid_lcsids.duplicates(package_id)
477
+ end
478
+
479
+ def get_license_name(license_id)
480
+ @lcsid_lcs[license_id]
481
+ end
482
+
483
+ def get_ncontents
484
+ get_counter(:ncontents)
485
+ end
486
+
487
+ def get_ncontents_by_format_id(format_id)
488
+ format_abbrev = get_format_abbrev(format_id)
489
+ key = make_ncontents_by_format_key(format_abbrev)
490
+ return get_counter(key)
491
+ end
492
+
493
+ def get_ncontents_by_license_id(license_id)
494
+ license_abbrev = get_license_abbrev(license_id)
495
+ key = make_ncontents_by_license_key(license_abbrev)
496
+ return get_counter(key)
497
+ end
498
+
499
+ def get_ncontents_indexed
500
+ get_counter(:ncontents_indexed)
501
+ end
502
+
503
+ def get_ncontents_in_package(package_name)
504
+ package_id = get_package_id(package_name)
505
+ @pkgid_pathids.duplicates(package_id).length
506
+ end
507
+
508
+ def get_nformats
509
+ get_counter(:nformats)
510
+ end
511
+
512
+ def get_nlines_indexed
513
+ get_counter(:nlines_indexed)
514
+ end
515
+
516
+ def get_npackages
517
+ get_counter(:npackages)
518
+ end
519
+
520
+ def get_nwords
521
+ get_counter(:nwords)
522
+ end
523
+
524
+ def get_package_id(package_name)
525
+ @pkg_pkgid[package_name]
526
+ end
527
+
528
+ def get_package_id_from_path_id(path_id)
529
+ @pathid_pkgid[path_id]
530
+ end
531
+
532
+ def get_package_ids(word_id)
533
+ @wordid_pkgids.duplicates(word_id)
534
+ end
535
+
536
+ def get_package_name(package_id)
537
+ @pkgid_pkg[package_id]
538
+ end
539
+
540
+ def get_package_names
541
+ @pkgid_pkg.values
542
+ end
543
+
544
+ def get_package_options(package_id)
545
+ options = {}
546
+ values = @pkgid_options.duplicates(package_id)
547
+ values.each {|value|
548
+ k, v = value.split(":", 2)
549
+ k = k.intern
550
+ case k
551
+ when :exclude_pattern
552
+ v = Regexp.new(v)
553
+ when :noindex_formats
554
+ v = v.split(",")
555
+ else
556
+ raise DBMError.new("#{k}: unknown option")
557
+ end
558
+ options[k] = v
559
+ }
560
+ assert(options[:exclude_pattern])
561
+ assert(options[:noindex_formats])
562
+ return options
563
+ end
564
+
565
+ def get_path(path_id)
566
+ @pathid_path[path_id]
567
+ end
568
+
569
+ def get_path_id(path)
570
+ @path_pathid[path]
571
+ end
572
+
573
+ def get_path_ids(package_id)
574
+ @pkgid_pathids.duplicates(package_id)
575
+ end
576
+
577
+ def get_path_ids_from_package_and_word_id(package_id, word_id)
578
+ package_word_id = AutoPack.pack_id2(package_id, word_id)
579
+ return @pkgwordid_pathids.duplicates(package_word_id)
580
+ end
581
+
582
+ def get_source_uri(package_id)
583
+ @pkgid_src[package_id]
584
+ end
585
+
586
+ def get_type(type_id)
587
+ @typeid_type[type_id]
588
+ end
589
+
590
+ def get_type_id(type)
591
+ @type_id_counter.get_id(type)
592
+ end
593
+
594
+ def get_word(word_id)
595
+ @wordid_word[word_id]
596
+ end
597
+
598
+ def get_word_id(word)
599
+ @word_wordid[word]
600
+ end
601
+
602
+ def get_word_ids(path_id)
603
+ DeltaDumper.undump_ids(@pathid_wordids[path_id])
604
+ end
605
+
606
+ def has_format_id?(format_id)
607
+ @fmtid_fmt.has_key?(format_id)
608
+ end
609
+
610
+ def has_format_abbrev?(format_abbrev)
611
+ @fabbr_fmtid.has_key?(format_abbrev)
612
+ end
613
+
614
+ def has_license_id?(license_id)
615
+ @lcsid_lcs.has_key?(license_id)
616
+ end
617
+
618
+ def has_license_abbrev?(license_abbrev)
619
+ @labbr_lcsid.has_key?(license_abbrev)
620
+ end
621
+
622
+ def has_package?(package_name)
623
+ @pkg_pkgid.include?(package_name)
624
+ end
625
+
626
+ def has_path?(path)
627
+ @path_pathid.include?(path)
628
+ end
629
+
630
+ def has_type?(type)
631
+ @type_typeid.include?(type)
632
+ end
633
+
634
+ def has_word?(word)
635
+ wordid = @word_wordid[word]
636
+ if wordid
637
+ return true
638
+ else
639
+ return false
640
+ end
641
+ end
642
+
643
+ def increase_counter(key, step = 1)
644
+ @stat[key.to_s] = get_counter(key) + step
645
+ end
646
+
647
+ def make_ncontents_by_format_key(format_abbrev)
648
+ ("ncontents_format_" + format_abbrev).intern
649
+ end
650
+
651
+ def make_ncontents_by_license_key(license_abbrev)
652
+ ("ncontents_license_" + license_abbrev).downcase.intern
653
+ end
654
+
655
+ # FIXME: Ad hoc serialization. We avoid using Marshal
656
+ # not to make the DB Ruby-dependent.
657
+ def put_package_options(package_id)
658
+ @pkgid_options[package_id] = sprintf("exclude_pattern:%s",
659
+ @config.exclude_pattern.to_s)
660
+ @pkgid_options[package_id] = sprintf("noindex_formats:%s",
661
+ @config.noindex_formats.join(","))
662
+ end
663
+
664
+ def put_pathid_wordids(package_id, path_id, word_ids)
665
+ @current_package_id = package_id
666
+ word_ids.each {|word_id|
667
+ pathids = (@wordid_pathids_cache[word_id] ||= [])
668
+ pathids << path_id
669
+ }
670
+ end
671
+ end
672
+ end
673
+