gonzui 1.2-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. data/AUTHORS.txt +9 -0
  2. data/History.txt +5539 -0
  3. data/Manifest.txt +115 -0
  4. data/PostInstall.txt +17 -0
  5. data/README.rdoc +149 -0
  6. data/Rakefile +28 -0
  7. data/bin/gonzui-db +167 -0
  8. data/bin/gonzui-import +177 -0
  9. data/bin/gonzui-remove +58 -0
  10. data/bin/gonzui-search +68 -0
  11. data/bin/gonzui-server +176 -0
  12. data/bin/gonzui-update +53 -0
  13. data/data/gonzui/catalog/catalog.ja +80 -0
  14. data/data/gonzui/doc/favicon.ico +0 -0
  15. data/data/gonzui/doc/folder.png +0 -0
  16. data/data/gonzui/doc/gonzui.css +279 -0
  17. data/data/gonzui/doc/gonzui.js +111 -0
  18. data/data/gonzui/doc/text.png +0 -0
  19. data/data/gonzuirc.sample +29 -0
  20. data/ext/autopack/autopack.c +88 -0
  21. data/ext/autopack/extconf.rb +3 -0
  22. data/ext/delta/delta.c +147 -0
  23. data/ext/delta/extconf.rb +5 -0
  24. data/ext/texttokenizer/extconf.rb +5 -0
  25. data/ext/texttokenizer/texttokenizer.c +93 -0
  26. data/ext/xmlformatter/extconf.rb +5 -0
  27. data/ext/xmlformatter/xmlformatter.c +207 -0
  28. data/lib/gonzui.rb +59 -0
  29. data/lib/gonzui/apt.rb +193 -0
  30. data/lib/gonzui/autopack.so +0 -0
  31. data/lib/gonzui/bdbdbm.rb +118 -0
  32. data/lib/gonzui/cmdapp.rb +14 -0
  33. data/lib/gonzui/cmdapp/app.rb +175 -0
  34. data/lib/gonzui/cmdapp/search.rb +134 -0
  35. data/lib/gonzui/config.rb +117 -0
  36. data/lib/gonzui/content.rb +19 -0
  37. data/lib/gonzui/dbm.rb +673 -0
  38. data/lib/gonzui/deindexer.rb +162 -0
  39. data/lib/gonzui/delta.rb +49 -0
  40. data/lib/gonzui/delta.so +0 -0
  41. data/lib/gonzui/extractor.rb +347 -0
  42. data/lib/gonzui/fetcher.rb +309 -0
  43. data/lib/gonzui/gettext.rb +144 -0
  44. data/lib/gonzui/importer.rb +84 -0
  45. data/lib/gonzui/indexer.rb +316 -0
  46. data/lib/gonzui/info.rb +80 -0
  47. data/lib/gonzui/license.rb +100 -0
  48. data/lib/gonzui/logger.rb +48 -0
  49. data/lib/gonzui/monitor.rb +177 -0
  50. data/lib/gonzui/progressbar.rb +235 -0
  51. data/lib/gonzui/remover.rb +38 -0
  52. data/lib/gonzui/searcher.rb +330 -0
  53. data/lib/gonzui/searchquery.rb +235 -0
  54. data/lib/gonzui/searchresult.rb +111 -0
  55. data/lib/gonzui/texttokenizer.so +0 -0
  56. data/lib/gonzui/updater.rb +254 -0
  57. data/lib/gonzui/util.rb +415 -0
  58. data/lib/gonzui/vcs.rb +128 -0
  59. data/lib/gonzui/webapp.rb +25 -0
  60. data/lib/gonzui/webapp/advsearch.rb +123 -0
  61. data/lib/gonzui/webapp/filehandler.rb +24 -0
  62. data/lib/gonzui/webapp/jsfeed.rb +61 -0
  63. data/lib/gonzui/webapp/markup.rb +445 -0
  64. data/lib/gonzui/webapp/search.rb +269 -0
  65. data/lib/gonzui/webapp/servlet.rb +319 -0
  66. data/lib/gonzui/webapp/snippet.rb +155 -0
  67. data/lib/gonzui/webapp/source.rb +37 -0
  68. data/lib/gonzui/webapp/stat.rb +137 -0
  69. data/lib/gonzui/webapp/top.rb +63 -0
  70. data/lib/gonzui/webapp/uri.rb +140 -0
  71. data/lib/gonzui/webapp/webrick.rb +48 -0
  72. data/lib/gonzui/webapp/xmlformatter.so +0 -0
  73. data/script/console +10 -0
  74. data/script/destroy +14 -0
  75. data/script/generate +14 -0
  76. data/script/makemanifest.rb +21 -0
  77. data/tasks/extconf.rake +13 -0
  78. data/tasks/extconf/autopack.rake +43 -0
  79. data/tasks/extconf/delta.rake +43 -0
  80. data/tasks/extconf/texttokenizer.rake +43 -0
  81. data/tasks/extconf/xmlformatter.rake +43 -0
  82. data/test/_external_tools.rb +13 -0
  83. data/test/_test-util.rb +142 -0
  84. data/test/foo/Makefile.foo +66 -0
  85. data/test/foo/bar.c +5 -0
  86. data/test/foo/bar.h +6 -0
  87. data/test/foo/foo.c +25 -0
  88. data/test/foo/foo.spec +33 -0
  89. data/test/test_apt.rb +42 -0
  90. data/test/test_autopack_extn.rb +7 -0
  91. data/test/test_bdbdbm.rb +79 -0
  92. data/test/test_cmdapp-app.rb +35 -0
  93. data/test/test_cmdapp-search.rb +99 -0
  94. data/test/test_config.rb +28 -0
  95. data/test/test_content.rb +15 -0
  96. data/test/test_dbm.rb +171 -0
  97. data/test/test_deindexer.rb +50 -0
  98. data/test/test_delta.rb +66 -0
  99. data/test/test_extractor.rb +78 -0
  100. data/test/test_fetcher.rb +75 -0
  101. data/test/test_gettext.rb +50 -0
  102. data/test/test_gonzui.rb +11 -0
  103. data/test/test_helper.rb +10 -0
  104. data/test/test_importer.rb +56 -0
  105. data/test/test_indexer.rb +37 -0
  106. data/test/test_info.rb +82 -0
  107. data/test/test_license.rb +49 -0
  108. data/test/test_logger.rb +60 -0
  109. data/test/test_monitor.rb +23 -0
  110. data/test/test_searcher.rb +37 -0
  111. data/test/test_searchquery.rb +27 -0
  112. data/test/test_searchresult.rb +43 -0
  113. data/test/test_texttokenizer.rb +47 -0
  114. data/test/test_updater.rb +95 -0
  115. data/test/test_util.rb +149 -0
  116. data/test/test_vcs.rb +61 -0
  117. data/test/test_webapp-markup.rb +42 -0
  118. data/test/test_webapp-util.rb +19 -0
  119. data/test/test_webapp-xmlformatter.rb +19 -0
  120. metadata +292 -0
@@ -0,0 +1,134 @@
1
+ #
2
+ # search.rb - command line searcher
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ class CommandLineSearcher
14
+ def initialize(config, options)
15
+ @config = config
16
+ @dbm = DBM.open(@config, true)
17
+ @out = (options['out'] or STDOUT)
18
+ @nlines = options['line-number']
19
+
20
+ @show_method = if options['context']
21
+ :show_context_lines
22
+ elsif options['count']
23
+ :show_count
24
+ else
25
+ :show_line
26
+ end
27
+
28
+ @package_name = options['package']
29
+
30
+ @ncontexts = options['context'].to_i
31
+
32
+ @search_method = :find_all
33
+ @search_method = :find_all_by_prefix if options['prefix']
34
+ @search_method = :find_all_by_regexp if options['regexp']
35
+
36
+ @use_regexp = options['regexp']
37
+ @use_color = options['color']
38
+ @no_filename = options['no-filename']
39
+
40
+ @target_type = :all
41
+ if options['type']
42
+ type = options['type'].intern
43
+ eprintf("unknown type: #{type}") unless LangScan::Type.include?(type)
44
+ @target_type = type
45
+ end
46
+ end
47
+
48
+ private
49
+ def highlight(string, start_tag = "\x1b[01;31m", end_tag = "\x1b[00m")
50
+ sprintf("%s%s%s", start_tag, string, end_tag)
51
+ end
52
+
53
+ def show_line(content, path, regexp, info)
54
+ range = content.line_range(info.byteno)
55
+ filename = if @no_filename then "" else path + ":" end
56
+ linemark = if @nlines then info.lineno.to_s + ":" else "" end
57
+ word = @dbm.get_word(info.word_id)
58
+ pre = content.substring(range.first...info.byteno)
59
+ post = content.substring((info.byteno + word.length)...range.last)
60
+ mid = word
61
+ mid = highlight(mid) if @use_color
62
+ @out.printf("%s%s%s%s%s\n", filename, linemark, pre, mid, post)
63
+ end
64
+
65
+ def show_context_lines(content, path, regexp, info)
66
+ @out.printf("== %s\n", path) unless @no_filename
67
+ content.each_line_range(info.byteno, @ncontexts) {|lineno_offset, range|
68
+ lineno = info.lineno + lineno_offset
69
+ linemark = if @nlines
70
+ mark = if lineno == info.lineno then ":" else "-" end
71
+ lineno.to_s + mark
72
+ else
73
+ ""
74
+ end
75
+ if range.include?(info.byteno)
76
+ word = @dbm.get_word(info.word_id)
77
+ pre = content.substring(range.first...info.byteno)
78
+ post_range = (info.byteno + word.length)...range.last
79
+ post = content.substring(post_range)
80
+ mid = word
81
+ mid = highlight(mid) if @use_color
82
+ @out.printf("%s%s%s%s\n", linemark, pre, mid, post)
83
+ else
84
+ @out.printf("%s%s\n", linemark, content.substring(range))
85
+ end
86
+ }
87
+ end
88
+
89
+ def show_result(regexp, info)
90
+ content = @dbm.get_content(info.path_id)
91
+ path = @dbm.get_path(info.path_id)
92
+ send(@show_method, content, path, regexp, info)
93
+ end
94
+
95
+ def package_match?(target_package_id, info)
96
+ package_id = @dbm.get_package_id_from_path_id(info.path_id)
97
+ return target_package_id == package_id
98
+ end
99
+
100
+ public
101
+ def search(pattern)
102
+ separator = ""
103
+ regexp = if @use_regexp
104
+ Regexp.new(pattern)
105
+ else
106
+ Regexp.new(Regexp.quote(pattern))
107
+ end
108
+ results = @dbm.send(@search_method, pattern)
109
+ prev_lineno = prev_path_id = nil
110
+ target_package_id = @dbm.get_package_id(@package_name) if @package_name
111
+ results.sort_by {|x| [x.path_id, x.byteno] }.each {|info|
112
+ next if prev_lineno and prev_path_id and
113
+ info.path_id == prev_path_id and
114
+ info.lineno == prev_lineno
115
+ if info.match?(@target_type)
116
+ unless @show_method == :show_count
117
+ if @package_name.nil? or package_match?(target_package_id, info)
118
+ @out.print separator
119
+ show_result(regexp, info)
120
+ separator = "\n" if @show_method == :show_context_lines
121
+ end
122
+ end
123
+ end
124
+ prev_lineno = info.lineno
125
+ prev_path_id = info.path_id
126
+ }
127
+ puts results.length if @show_method == :show_count
128
+ end
129
+
130
+ def finish
131
+ @dbm.close
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,117 @@
1
+ #
2
+ # config.rb - a config library
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ class Config
14
+ include Util
15
+
16
+ def initialize
17
+ #
18
+ # All paths should be expanded to absolute paths
19
+ # because the current directory would be changed when
20
+ # a process becomes a daemon.
21
+ #
22
+ @temporary_directory = ENV['tmp'] || "/tmp"
23
+ @db_directory = File.expand_path("gonzui.db")
24
+ @cache_directory = File.join(@db_directory, "cache")
25
+ @gonzui_log_file = File.expand_path("gonzui.log")
26
+
27
+ @db_cache_size = 5 * 1024 ** 2
28
+
29
+ @quiet = false
30
+ @verbose = false
31
+
32
+ @utf8 = true
33
+ @encoding_preference = UTF8::Preference
34
+
35
+ @noindex_formats = []
36
+ # FIXME: should be more flexible
37
+ @exclude_pattern = /~$|\.bak$|CVS|\.svn|\.git/
38
+
39
+ #
40
+ # For gonzui-server
41
+ #
42
+ @pid_file = File.expand_path("gonzui.pid")
43
+ @daemon = false
44
+ @access_log_file = File.expand_path("access.log")
45
+ @catalog_directory = choose_directory("catalog")
46
+ @doc_directory = choose_directory("doc")
47
+ @http_port = Gonzui::HTTP_PORT
48
+ @bind_address = '*'
49
+ @user = nil
50
+ @group = nil
51
+ @site_title = "gonzui"
52
+ @base_mount_point = "/"
53
+
54
+ @default_results_per_page = 10
55
+ @max_results_per_page = 50
56
+ @max_pages = 20
57
+ @max_words = 10
58
+ @max_packages_per_page = 100
59
+ @nresults_candidates = [10, 20, 30, 50]
60
+
61
+ set_user_and_group if unix?
62
+ instance_variables.each {|name|
63
+ self.class.class_eval {
64
+ attr_accessor name.delete("@")
65
+ }
66
+ }
67
+ end
68
+
69
+ private
70
+ def choose_directory(base_name)
71
+ directory = nil
72
+ [base_name,
73
+ File.join(File.dirname($0), "..", Gonzui::PKGDATADIR, base_name),
74
+ File.join(Gonzui::PKGDATADIR, base_name)].each do |d|
75
+ directory = d
76
+ break if File.directory?(directory)
77
+ end
78
+ return File.expand_path(directory)
79
+ end
80
+
81
+ def set_user_and_group
82
+ require 'etc'
83
+ u = Etc::getpwuid(Process.uid)
84
+ g = Etc::getgrgid(Process.gid)
85
+ @user = u.name
86
+ @group = g.name
87
+ end
88
+
89
+ def keys
90
+ instance_variables.map {|name| name.delete("@").intern }
91
+ end
92
+
93
+ public
94
+ def max_results_overall
95
+ @max_results_per_page * @max_pages
96
+ end
97
+
98
+ def dump(out = STDOUT)
99
+ len = keys.map {|key| key.inspect.length }.max
100
+ out.puts "{"
101
+ keys.sort_by {|key| key.to_s }.each {|key|
102
+ out.printf(" %-#{len}s => %s,\n", key.inspect, send(key).inspect)
103
+ }
104
+ out.puts "}"
105
+ end
106
+
107
+ def load(file_name)
108
+ f = File.open(file_name)
109
+ hash = eval(f.read)
110
+ f.close
111
+ return if hash.nil?
112
+ hash.each {|key, value|
113
+ send(key.to_s + "=", value)
114
+ }
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,19 @@
1
+ #
2
+ # content.rb - content implementation
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ Content = Struct.new(:text, :mtime, :path)
14
+ class Content
15
+ def length
16
+ self.text.length
17
+ end
18
+ end
19
+ end
data/lib/gonzui/dbm.rb ADDED
@@ -0,0 +1,673 @@
1
+ #
2
+ # dbm.rb - gonzui DB library
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+ require 'zlib'
12
+
13
+ module Gonzui
14
+ class IncompatibleDBError < GonzuiError; end
15
+ DB_VERSION = "13"
16
+
17
+ module DBM
18
+ module_function
19
+ def open(config, read_only = false)
20
+ File.mkpath(config.db_directory) unless read_only
21
+
22
+ dbm_class = BDBDBM # to be pluggable
23
+ dbm = dbm_class.new(config, read_only)
24
+ if block_given?
25
+ begin
26
+ yield(dbm)
27
+ ensure
28
+ dbm.close
29
+ end
30
+ else
31
+ return dbm
32
+ end
33
+ end
34
+ end
35
+
36
+ class IDCounter
37
+ def initialize(dbm, id_name, counter, db, rev_db, alt_db)
38
+ @dbm = dbm
39
+ @id_name = id_name
40
+ @counter = counter
41
+ @db = dbm.send(db)
42
+ @rev_db = dbm.send(rev_db)
43
+ @alt_db = if alt_db then dbm.send(alt_db) else nil end
44
+
45
+ @count = 0
46
+ @cache = {}
47
+ @last_id = (@dbm.seq[make_last_key] or -1)
48
+ end
49
+
50
+ def flush
51
+ if @count > 0
52
+ @dbm.increase_counter(@counter, @count)
53
+ @dbm.seq[make_last_key] = @last_id if @last_id >= 0
54
+ @count = 0
55
+ @cache = {}
56
+ end
57
+ end
58
+
59
+ def make_last_key
60
+ "last_" + @id_name.to_s
61
+ end
62
+
63
+ def make_new_id
64
+ @count += 1
65
+ @last_id += 1
66
+ return @last_id
67
+ end
68
+
69
+ def get_id(text)
70
+ id = @cache[text]
71
+ if id.nil?
72
+ id = @db[text]
73
+ if id.nil?
74
+ id = make_new_id
75
+ @db[text] = id
76
+ @rev_db[id] = text
77
+ end
78
+ @cache[text] = id
79
+ end
80
+ return id
81
+ end
82
+
83
+ def get_id2(text, alt)
84
+ id = @cache[text]
85
+ if id.nil?
86
+ id = @db[text]
87
+ if id.nil?
88
+ id = make_new_id
89
+ @db[text] = id
90
+ @rev_db[id] = text
91
+ @alt_db[id] = alt
92
+ end
93
+ @cache[text] = id
94
+ end
95
+ return id
96
+ end
97
+ end
98
+
99
+ module AutoPack
100
+ Adaptor = Struct.new(:store, :fetch)
101
+ ID = Adaptor.new(lambda {|id| pack_id(id) },
102
+ lambda {|str| unpack_id(str) })
103
+ Fixnum = Adaptor.new(lambda {|id| pack_fixnum(id) },
104
+ lambda {|str| unpack_fixnum(str) })
105
+ Symbol = Adaptor.new(lambda {|sym| sym.to_s},
106
+ lambda {|str| str.intern})
107
+ String = Adaptor.new(nil, nil)
108
+ GZString = Adaptor.new(lambda {|str| Zlib::Deflate.deflate(str) },
109
+ lambda {|str| Zlib::Inflate.inflate(str) })
110
+ end
111
+
112
+ class DBMError < GonzuiError; end
113
+ class AbstractDBM
114
+ include Util
115
+
116
+ ap = AutoPack # for short
117
+ DBTable = [
118
+ [:fmtid_fmt, ap::ID, ap::String, false],
119
+ [:fmtid_fabbr, ap::ID, ap::String, false],
120
+ [:fabbr_fmtid, ap::String, ap::ID, false],
121
+ [:lcsid_lcs, ap::ID, ap::String, false],
122
+ [:lcsid_labbr, ap::ID, ap::String, false],
123
+ [:labbr_lcsid, ap::String, ap::ID, false],
124
+ [:seq, ap::String, ap::Fixnum, false],
125
+ [:stat, ap::String, ap::Fixnum, false],
126
+ [:pkg_pkgid, ap::String, ap::ID, false],
127
+ [:pkgid_pkg, ap::ID, ap::String, false],
128
+ [:pkgid_pathids, ap::ID, ap::ID, true],
129
+ [:pkgid_fmtids, ap::ID, ap::ID, true],
130
+ [:pkgid_lcsids, ap::ID, ap::ID, true],
131
+ [:pkgid_options, ap::ID, ap::String, true],
132
+ [:pkgid_src, ap::ID, ap::String, false],
133
+ [:path_pathid, ap::String, ap::ID, false],
134
+ [:pathid_digest, ap::ID, ap::GZString, false],
135
+ [:pathid_info, ap::ID, ap::String, false],
136
+ [:pathid_content, ap::ID, ap::GZString, false],
137
+ [:pathid_bols, ap::ID, ap::GZString, false],
138
+ [:pathid_hash, ap::ID, ap::String, false],
139
+ [:pathid_path, ap::ID, ap::String, false],
140
+ [:pathid_pkgid, ap::ID, ap::ID, false],
141
+ [:pathid_wordids, ap::ID, ap::GZString, false],
142
+ [:type_typeid, ap::Symbol, ap::ID, false],
143
+ [:typeid_type, ap::ID, ap::Symbol, false],
144
+ [:word_wordid, ap::String, ap::ID, false],
145
+ [:wordid_pkgids, ap::ID, ap::ID, true],
146
+ [:wordid_word, ap::ID, ap::String, false],
147
+ [:pkgwordid_pathids, ap::String, ap::ID, true],
148
+ [:pathwordid_info, ap::String, ap::String, false],
149
+ [:version, ap::String, ap::String, false],
150
+ ]
151
+
152
+ IDTable = [
153
+ # id_name, # of id text -> id id -> text id -> alt
154
+ [:type_id, :ntypes, :type_typeid, :typeid_type, nil],
155
+ [:word_id, :nwords, :word_wordid, :wordid_word, nil],
156
+ [:path_id, :ncontents, :path_pathid, :pathid_path, nil],
157
+ [:package_id, :npackages, :pkg_pkgid, :pkgid_pkg, nil],
158
+ [:format_id, :nformats, :fabbr_fmtid, :fmtid_fabbr, :fmtid_fmt],
159
+ [:license_id, :nlicenses, :labbr_lcsid, :lcsid_labbr, :lcsid_lcs],
160
+ ]
161
+
162
+ def initialize(config, read_only = false)
163
+ raise "#{config.db_directory}: No such directory" unless
164
+ File.directory?(config.db_directory)
165
+ @config = config
166
+
167
+ validate_db_version
168
+ @db_opened = {}
169
+ DBTable.each {|db_name, key_type, value_type, dupsort|
170
+ open_db(db_name, key_type, value_type, dupsort)
171
+ }
172
+ put_db_version unless read_only
173
+ init_id_counters
174
+
175
+ @opened = true
176
+ @current_package_id = nil
177
+ @wordid_pathids_cache = {}
178
+ end
179
+
180
+ private
181
+ def init_id_counters
182
+ @id_counters = []
183
+ IDTable.each {|id_name, counter, db, rev_db, alt_db|
184
+ counter = IDCounter.new(self, id_name, counter, db, rev_db, alt_db)
185
+ name = "@" + id_name.to_s + "_counter"
186
+ instance_variable_set(name, counter)
187
+ self.class.class_eval {
188
+ attr_reader name.delete("@")
189
+ }
190
+ @id_counters << counter
191
+ }
192
+ end
193
+
194
+ def collect_all_results(word_id)
195
+ results = []
196
+ if word_id
197
+ get_package_ids(word_id).each {|package_id|
198
+ path_ids = get_path_ids_from_package_and_word_id(package_id, word_id)
199
+ path_ids.each {|path_id|
200
+ results.concat(get_all_word_info(path_id, word_id))
201
+ }
202
+ }
203
+ end
204
+ return results
205
+ end
206
+
207
+ def db_exist?
208
+ return false unless File.directory?(@config.db_directory)
209
+ entries = Dir.entries_without_dots(@config.db_directory)
210
+ # filter out file names like __db.001.
211
+ entries = entries.find_all {|entry| not /^__/.match(entry) }
212
+ if entries.empty?
213
+ return false
214
+ else
215
+ return true
216
+ end
217
+ end
218
+
219
+ def decrease_counter(key, step = 1)
220
+ value = get_counter(key) - step
221
+ raise DBMError.new("counter #{key} becomes minus") if value < 0
222
+ @stat[key.to_s] = value
223
+ end
224
+
225
+
226
+ def do_open_db(name, key_type, value_type, dupsort)
227
+ raise NotImplementedError.new
228
+ end
229
+
230
+ def each_property(id_name, get_abbrev, &block)
231
+ properties = []
232
+ self.send(id_name).each {|id, name|
233
+ abbrev = self.send(get_abbrev, id)
234
+ properties.push([id, abbrev, name])
235
+ }
236
+ properties.sort_by {|id, abbrev, name| name }.each {|id, abbrev, name|
237
+ block.call(id, abbrev, name)
238
+ }
239
+ end
240
+
241
+ def get_bols(path_id)
242
+ DeltaDumper.undump_fixnums(@pathid_bols[path_id])
243
+ end
244
+
245
+ def open_db(db_name, key_type, value_type, dupsort)
246
+ return if @db_opened.include?(db_name)
247
+ db = do_open_db(db_name, key_type, value_type, dupsort)
248
+ @db_opened[db_name] = db
249
+
250
+ name = "@" + db_name.to_s
251
+ instance_variable_set(name, db)
252
+ self.class.class_eval {
253
+ attr_reader name.delete("@")
254
+ }
255
+ return db
256
+ end
257
+
258
+ def put_db_version
259
+ @version["version"] = DB_VERSION
260
+ end
261
+
262
+ def validate_db_version
263
+ return unless db_exist?
264
+ version = "unknown"
265
+ begin
266
+ db = do_open_db(:version, AutoPack::String, AutoPack::String, false)
267
+ version = db["version"]
268
+ db.close
269
+ rescue BDB::Fatal
270
+ end
271
+ if version != DB_VERSION
272
+ m = sprintf("DB format is incomatible (version %s expected but %s)",
273
+ DB_VERSION, version)
274
+ raise IncompatibleDBError.new(m)
275
+ end
276
+ end
277
+
278
+ def verify_stat_integrity
279
+ assert_equal_all(get_nformats,
280
+ fmtid_fmt.length,
281
+ fmtid_fabbr.length,
282
+ fabbr_fmtid.length)
283
+ assert_equal_all(get_npackages,
284
+ pkgid_pkg.length,
285
+ pkg_pkgid.length)
286
+ assert_equal_all(get_ncontents,
287
+ path_pathid.length,
288
+ pathid_path.length,
289
+ pathid_content.length,
290
+ pathid_info.length)
291
+ assert_equal_all(get_nwords,
292
+ word_wordid.length)
293
+ nlines_indexed = 0
294
+ @pathid_info.each_key {|path_id|
295
+ info = get_content_info(path_id)
296
+ nlines_indexed += info.nlines if info.indexed?
297
+ }
298
+ assert_equal(get_nlines_indexed, nlines_indexed)
299
+ end
300
+
301
+ def verify_seq_integrity
302
+ IDTable.each {|id_name, counter, db, rev_db, alt_db|
303
+ id = (self.send(rev_db).get_last_key or 0)
304
+ assert(id <= (@seq["last_" + id_name.to_s] or 0))
305
+ }
306
+ end
307
+
308
+ public
309
+ def binary_content?(path_id)
310
+ format_id = get_format_id_from_path_id(path_id)
311
+ get_format_abbrev(format_id) == "binary"
312
+ end
313
+
314
+ def close
315
+ flush_cache
316
+ raise DBMError.new("dbm is already closed") unless @opened
317
+ @db_opened.each {|name, db|
318
+ db.close
319
+ }
320
+ @opened = false
321
+ end
322
+
323
+ def consistent?
324
+ verify_stat_integrity
325
+ verify_seq_integrity
326
+ return true
327
+ end
328
+
329
+ def decrease_counter(key, step = 1)
330
+ value = get_counter(key) - step
331
+ raise DBMError.new("counter #{key} becomes minus") if value < 0
332
+ @stat[key.to_s] = value
333
+ end
334
+
335
+ def each_db_name
336
+ @db_opened.each_key {|db_name| yield(db_name.to_s) }
337
+ end
338
+
339
+ def each_format(&block)
340
+ each_property(:fmtid_fmt, :get_format_abbrev, &block)
341
+ end
342
+
343
+ def each_license(&block)
344
+ each_property(:lcsid_lcs, :get_license_abbrev, &block)
345
+ end
346
+
347
+ def each_package_name
348
+ @pkgid_pkg.each_value {|value| yield(value) }
349
+ end
350
+
351
+ def each_word(&block)
352
+ @word_wordid.each_key {|word| yield(word) }
353
+ end
354
+
355
+ def find_all(pattern)
356
+ word_id = @word_wordid[pattern]
357
+ results = collect_all_results(word_id)
358
+ return results
359
+ end
360
+
361
+ def find_all_by_prefix(pattern)
362
+ raise NotImplementedError.new("should be implemented in a sub class")
363
+ end
364
+
365
+ def find_all_by_regexp(pattern)
366
+ regexp = Regexp.new(pattern)
367
+ results = []
368
+ @word_wordid.each {|word, word_id|
369
+ if regexp.match(word)
370
+ results.concat(collect_all_results(word_id))
371
+ end
372
+ }
373
+ return results
374
+ end
375
+
376
+ def find_word_info(path_id, word_id)
377
+ get_all_word_info(path_id, word_id).each {|info|
378
+ yield(info)
379
+ }
380
+ end
381
+
382
+ def flush_cache
383
+ wordids = @wordid_pathids_cache.keys.sort!
384
+ wordids.each {|word_id|
385
+ package_word_id = AutoPack.pack_id2(@current_package_id, word_id)
386
+ @wordid_pathids_cache[word_id].each {|path_id|
387
+ @pkgwordid_pathids[package_word_id] = path_id
388
+ }
389
+ }
390
+ wordids.each {|word_id|
391
+ @wordid_pkgids[word_id] = @current_package_id
392
+ }
393
+ @wordid_pathids_cache.clear
394
+ @id_counters.each {|counter| counter.flush}
395
+ end
396
+
397
+ def get_all_word_info(path_id, word_id)
398
+ path_word_id = AutoPack.pack_id2(path_id, word_id)
399
+ dump = @pathwordid_info[path_word_id]
400
+ return [] if dump.nil?
401
+ bols = get_bols(path_id)
402
+ bol = bols.shift
403
+ assert_equal(0, bol)
404
+
405
+ lineno = 0
406
+ DeltaDumper.undump_tuples(WordInfo, dump).map {|seqno, byteno, type_id|
407
+ while bol and bol <= byteno
408
+ lineno += 1
409
+ bol = bols.shift
410
+ end
411
+ type = get_type(type_id)
412
+ WordInfo.new(word_id, path_id, seqno, byteno, type_id, type, lineno)
413
+ }
414
+ end
415
+
416
+ def get_content_hash(path_id)
417
+ @pathid_hash[path_id]
418
+ end
419
+
420
+ def get_counter(key)
421
+ @stat[key.to_s] or 0
422
+ end
423
+
424
+ def get_content(path_id)
425
+ @pathid_content[path_id]
426
+ end
427
+
428
+ def get_content_info(path_id)
429
+ dump = @pathid_info[path_id]
430
+ assert_non_nil(dump)
431
+ return ContentInfo.load(dump)
432
+ end
433
+
434
+ def get_digest(path_id)
435
+ dump = @pathid_digest[path_id]
436
+ return [] if dump.nil?
437
+ DeltaDumper.undump_tuples(DigestInfo, dump).map {|data|
438
+ data.push(get_type(data.last))
439
+ DigestInfo.new(*data)
440
+ }
441
+ end
442
+
443
+ def get_format_abbrev(format_id)
444
+ @fmtid_fabbr[format_id]
445
+ end
446
+
447
+ def get_format_id(format_abbrev)
448
+ @fabbr_fmtid[format_abbrev]
449
+ end
450
+
451
+ def get_format_id_from_path_id(path_id)
452
+ get_content_info(path_id).format_id
453
+ end
454
+
455
+ def get_format_ids_from_package_id(package_id)
456
+ @pkgid_fmtids.duplicates(package_id)
457
+ end
458
+
459
+ def get_format_name(format_id)
460
+ @fmtid_fmt[format_id]
461
+ end
462
+
463
+ def get_license_abbrev(license_id)
464
+ @lcsid_labbr[license_id]
465
+ end
466
+
467
+ def get_license_id(license_abbrev)
468
+ @labbr_lcsid[license_abbrev]
469
+ end
470
+
471
+ def get_license_id_from_path_id(path_id)
472
+ get_content_info(path_id).license_id
473
+ end
474
+
475
+ def get_license_ids_from_package_id(package_id)
476
+ @pkgid_lcsids.duplicates(package_id)
477
+ end
478
+
479
+ def get_license_name(license_id)
480
+ @lcsid_lcs[license_id]
481
+ end
482
+
483
+ def get_ncontents
484
+ get_counter(:ncontents)
485
+ end
486
+
487
+ def get_ncontents_by_format_id(format_id)
488
+ format_abbrev = get_format_abbrev(format_id)
489
+ key = make_ncontents_by_format_key(format_abbrev)
490
+ return get_counter(key)
491
+ end
492
+
493
+ def get_ncontents_by_license_id(license_id)
494
+ license_abbrev = get_license_abbrev(license_id)
495
+ key = make_ncontents_by_license_key(license_abbrev)
496
+ return get_counter(key)
497
+ end
498
+
499
+ def get_ncontents_indexed
500
+ get_counter(:ncontents_indexed)
501
+ end
502
+
503
+ def get_ncontents_in_package(package_name)
504
+ package_id = get_package_id(package_name)
505
+ @pkgid_pathids.duplicates(package_id).length
506
+ end
507
+
508
+ def get_nformats
509
+ get_counter(:nformats)
510
+ end
511
+
512
+ def get_nlines_indexed
513
+ get_counter(:nlines_indexed)
514
+ end
515
+
516
+ def get_npackages
517
+ get_counter(:npackages)
518
+ end
519
+
520
+ def get_nwords
521
+ get_counter(:nwords)
522
+ end
523
+
524
+ def get_package_id(package_name)
525
+ @pkg_pkgid[package_name]
526
+ end
527
+
528
+ def get_package_id_from_path_id(path_id)
529
+ @pathid_pkgid[path_id]
530
+ end
531
+
532
+ def get_package_ids(word_id)
533
+ @wordid_pkgids.duplicates(word_id)
534
+ end
535
+
536
+ def get_package_name(package_id)
537
+ @pkgid_pkg[package_id]
538
+ end
539
+
540
+ def get_package_names
541
+ @pkgid_pkg.values
542
+ end
543
+
544
+ def get_package_options(package_id)
545
+ options = {}
546
+ values = @pkgid_options.duplicates(package_id)
547
+ values.each {|value|
548
+ k, v = value.split(":", 2)
549
+ k = k.intern
550
+ case k
551
+ when :exclude_pattern
552
+ v = Regexp.new(v)
553
+ when :noindex_formats
554
+ v = v.split(",")
555
+ else
556
+ raise DBMError.new("#{k}: unknown option")
557
+ end
558
+ options[k] = v
559
+ }
560
+ assert(options[:exclude_pattern])
561
+ assert(options[:noindex_formats])
562
+ return options
563
+ end
564
+
565
+ def get_path(path_id)
566
+ @pathid_path[path_id]
567
+ end
568
+
569
+ def get_path_id(path)
570
+ @path_pathid[path]
571
+ end
572
+
573
+ def get_path_ids(package_id)
574
+ @pkgid_pathids.duplicates(package_id)
575
+ end
576
+
577
+ def get_path_ids_from_package_and_word_id(package_id, word_id)
578
+ package_word_id = AutoPack.pack_id2(package_id, word_id)
579
+ return @pkgwordid_pathids.duplicates(package_word_id)
580
+ end
581
+
582
+ def get_source_uri(package_id)
583
+ @pkgid_src[package_id]
584
+ end
585
+
586
+ def get_type(type_id)
587
+ @typeid_type[type_id]
588
+ end
589
+
590
+ def get_type_id(type)
591
+ @type_id_counter.get_id(type)
592
+ end
593
+
594
+ def get_word(word_id)
595
+ @wordid_word[word_id]
596
+ end
597
+
598
+ def get_word_id(word)
599
+ @word_wordid[word]
600
+ end
601
+
602
+ def get_word_ids(path_id)
603
+ DeltaDumper.undump_ids(@pathid_wordids[path_id])
604
+ end
605
+
606
+ def has_format_id?(format_id)
607
+ @fmtid_fmt.has_key?(format_id)
608
+ end
609
+
610
+ def has_format_abbrev?(format_abbrev)
611
+ @fabbr_fmtid.has_key?(format_abbrev)
612
+ end
613
+
614
+ def has_license_id?(license_id)
615
+ @lcsid_lcs.has_key?(license_id)
616
+ end
617
+
618
+ def has_license_abbrev?(license_abbrev)
619
+ @labbr_lcsid.has_key?(license_abbrev)
620
+ end
621
+
622
+ def has_package?(package_name)
623
+ @pkg_pkgid.include?(package_name)
624
+ end
625
+
626
+ def has_path?(path)
627
+ @path_pathid.include?(path)
628
+ end
629
+
630
+ def has_type?(type)
631
+ @type_typeid.include?(type)
632
+ end
633
+
634
+ def has_word?(word)
635
+ wordid = @word_wordid[word]
636
+ if wordid
637
+ return true
638
+ else
639
+ return false
640
+ end
641
+ end
642
+
643
+ def increase_counter(key, step = 1)
644
+ @stat[key.to_s] = get_counter(key) + step
645
+ end
646
+
647
+ def make_ncontents_by_format_key(format_abbrev)
648
+ ("ncontents_format_" + format_abbrev).intern
649
+ end
650
+
651
+ def make_ncontents_by_license_key(license_abbrev)
652
+ ("ncontents_license_" + license_abbrev).downcase.intern
653
+ end
654
+
655
+ # FIXME: Ad hoc serialization. We avoid using Marshal
656
+ # not to make the DB Ruby-dependent.
657
+ def put_package_options(package_id)
658
+ @pkgid_options[package_id] = sprintf("exclude_pattern:%s",
659
+ @config.exclude_pattern.to_s)
660
+ @pkgid_options[package_id] = sprintf("noindex_formats:%s",
661
+ @config.noindex_formats.join(","))
662
+ end
663
+
664
+ def put_pathid_wordids(package_id, path_id, word_ids)
665
+ @current_package_id = package_id
666
+ word_ids.each {|word_id|
667
+ pathids = (@wordid_pathids_cache[word_id] ||= [])
668
+ pathids << path_id
669
+ }
670
+ end
671
+ end
672
+ end
673
+