gonzui 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS.txt +9 -0
- data/History.txt +5539 -0
- data/Manifest.txt +115 -0
- data/PostInstall.txt +17 -0
- data/README.rdoc +149 -0
- data/Rakefile +28 -0
- data/bin/gonzui-db +167 -0
- data/bin/gonzui-import +177 -0
- data/bin/gonzui-remove +58 -0
- data/bin/gonzui-search +68 -0
- data/bin/gonzui-server +176 -0
- data/bin/gonzui-update +53 -0
- data/data/gonzui/catalog/catalog.ja +80 -0
- data/data/gonzui/doc/favicon.ico +0 -0
- data/data/gonzui/doc/folder.png +0 -0
- data/data/gonzui/doc/gonzui.css +279 -0
- data/data/gonzui/doc/gonzui.js +111 -0
- data/data/gonzui/doc/text.png +0 -0
- data/data/gonzuirc.sample +29 -0
- data/ext/autopack/autopack.c +88 -0
- data/ext/autopack/extconf.rb +3 -0
- data/ext/delta/delta.c +147 -0
- data/ext/delta/extconf.rb +5 -0
- data/ext/texttokenizer/extconf.rb +5 -0
- data/ext/texttokenizer/texttokenizer.c +93 -0
- data/ext/xmlformatter/extconf.rb +5 -0
- data/ext/xmlformatter/xmlformatter.c +207 -0
- data/lib/gonzui.rb +59 -0
- data/lib/gonzui/apt.rb +193 -0
- data/lib/gonzui/bdbdbm.rb +118 -0
- data/lib/gonzui/cmdapp.rb +14 -0
- data/lib/gonzui/cmdapp/app.rb +175 -0
- data/lib/gonzui/cmdapp/search.rb +134 -0
- data/lib/gonzui/config.rb +117 -0
- data/lib/gonzui/content.rb +19 -0
- data/lib/gonzui/dbm.rb +673 -0
- data/lib/gonzui/deindexer.rb +162 -0
- data/lib/gonzui/delta.rb +49 -0
- data/lib/gonzui/extractor.rb +347 -0
- data/lib/gonzui/fetcher.rb +309 -0
- data/lib/gonzui/gettext.rb +144 -0
- data/lib/gonzui/importer.rb +84 -0
- data/lib/gonzui/indexer.rb +316 -0
- data/lib/gonzui/info.rb +80 -0
- data/lib/gonzui/license.rb +100 -0
- data/lib/gonzui/logger.rb +48 -0
- data/lib/gonzui/monitor.rb +177 -0
- data/lib/gonzui/progressbar.rb +235 -0
- data/lib/gonzui/remover.rb +38 -0
- data/lib/gonzui/searcher.rb +330 -0
- data/lib/gonzui/searchquery.rb +235 -0
- data/lib/gonzui/searchresult.rb +111 -0
- data/lib/gonzui/updater.rb +254 -0
- data/lib/gonzui/util.rb +415 -0
- data/lib/gonzui/vcs.rb +128 -0
- data/lib/gonzui/webapp.rb +25 -0
- data/lib/gonzui/webapp/advsearch.rb +123 -0
- data/lib/gonzui/webapp/filehandler.rb +24 -0
- data/lib/gonzui/webapp/jsfeed.rb +61 -0
- data/lib/gonzui/webapp/markup.rb +445 -0
- data/lib/gonzui/webapp/search.rb +269 -0
- data/lib/gonzui/webapp/servlet.rb +319 -0
- data/lib/gonzui/webapp/snippet.rb +155 -0
- data/lib/gonzui/webapp/source.rb +37 -0
- data/lib/gonzui/webapp/stat.rb +137 -0
- data/lib/gonzui/webapp/top.rb +63 -0
- data/lib/gonzui/webapp/uri.rb +140 -0
- data/lib/gonzui/webapp/webrick.rb +48 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/autopack.rake +43 -0
- data/tasks/extconf/delta.rake +43 -0
- data/tasks/extconf/texttokenizer.rake +43 -0
- data/tasks/extconf/xmlformatter.rake +43 -0
- data/test/_external_tools.rb +13 -0
- data/test/_test-util.rb +142 -0
- data/test/foo/Makefile.foo +66 -0
- data/test/foo/bar.c +5 -0
- data/test/foo/bar.h +6 -0
- data/test/foo/foo.c +25 -0
- data/test/foo/foo.spec +33 -0
- data/test/test_apt.rb +42 -0
- data/test/test_autopack_extn.rb +7 -0
- data/test/test_bdbdbm.rb +79 -0
- data/test/test_cmdapp-app.rb +35 -0
- data/test/test_cmdapp-search.rb +99 -0
- data/test/test_config.rb +28 -0
- data/test/test_content.rb +15 -0
- data/test/test_dbm.rb +171 -0
- data/test/test_deindexer.rb +50 -0
- data/test/test_delta.rb +66 -0
- data/test/test_extractor.rb +78 -0
- data/test/test_fetcher.rb +75 -0
- data/test/test_gettext.rb +50 -0
- data/test/test_gonzui.rb +11 -0
- data/test/test_helper.rb +10 -0
- data/test/test_importer.rb +56 -0
- data/test/test_indexer.rb +37 -0
- data/test/test_info.rb +82 -0
- data/test/test_license.rb +49 -0
- data/test/test_logger.rb +60 -0
- data/test/test_monitor.rb +23 -0
- data/test/test_searcher.rb +37 -0
- data/test/test_searchquery.rb +27 -0
- data/test/test_searchresult.rb +43 -0
- data/test/test_texttokenizer.rb +47 -0
- data/test/test_updater.rb +95 -0
- data/test/test_util.rb +149 -0
- data/test/test_vcs.rb +61 -0
- data/test/test_webapp-markup.rb +42 -0
- data/test/test_webapp-util.rb +19 -0
- data/test/test_webapp-xmlformatter.rb +19 -0
- metadata +291 -0
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# importer.rb - import contents to gonzui.db
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'uri'
|
13
|
+
|
14
|
+
module Gonzui
|
15
|
+
class ImporterError < GonzuiError; end
|
16
|
+
|
17
|
+
class Importer < AbstractUpdater
|
18
|
+
def initialize(config, options = {})
|
19
|
+
super(config, options)
|
20
|
+
# to be initialized
|
21
|
+
@last_package_name = nil
|
22
|
+
end
|
23
|
+
attr_reader :last_package_name
|
24
|
+
|
25
|
+
private
|
26
|
+
def import_package(fetcher, source_uri)
|
27
|
+
package_name = fetcher.package_name
|
28
|
+
raise ImporterError.new("#{package_name}: already exists") if
|
29
|
+
@dbm.has_package?(package_name)
|
30
|
+
|
31
|
+
relative_paths = fetcher.collect
|
32
|
+
pbar = make_progress_bar(package_name, relative_paths.length)
|
33
|
+
begin
|
34
|
+
relative_paths.each {|relative_path|
|
35
|
+
begin
|
36
|
+
normalized_path = File.join(package_name, relative_path)
|
37
|
+
content = nil
|
38
|
+
begin
|
39
|
+
content = fetcher.fetch(relative_path)
|
40
|
+
rescue => e
|
41
|
+
vprintf("fetch failed: %s: %s\n%s", relative_path, e.message)
|
42
|
+
next
|
43
|
+
end
|
44
|
+
index_content(source_uri, normalized_path, content)
|
45
|
+
ensure
|
46
|
+
pbar.inc
|
47
|
+
end
|
48
|
+
}
|
49
|
+
ensure
|
50
|
+
@dbm.flush_cache
|
51
|
+
end
|
52
|
+
pbar.finish
|
53
|
+
@npackages += 1
|
54
|
+
@last_package_name = package_name
|
55
|
+
end
|
56
|
+
|
57
|
+
def do_task_name
|
58
|
+
"imported"
|
59
|
+
end
|
60
|
+
|
61
|
+
public
|
62
|
+
def import(source_uri)
|
63
|
+
fetcher = Fetcher.new(@config, source_uri)
|
64
|
+
begin
|
65
|
+
import_package(fetcher, source_uri)
|
66
|
+
ensure
|
67
|
+
fetcher.finish
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def summary
|
72
|
+
summary = super
|
73
|
+
if @config.verbose
|
74
|
+
stat = Indexer.statistics
|
75
|
+
summary += "\n" + stat unless stat.empty?
|
76
|
+
end
|
77
|
+
return summary
|
78
|
+
end
|
79
|
+
|
80
|
+
def finish
|
81
|
+
@dbm.close
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,316 @@
|
|
1
|
+
#
|
2
|
+
# indexer.rb - indexer implementation
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'ftools'
|
13
|
+
require 'digest/md5'
|
14
|
+
require 'langscan'
|
15
|
+
|
16
|
+
module Gonzui
|
17
|
+
class IndexerError < GonzuiError; end
|
18
|
+
|
19
|
+
class Indexer
|
20
|
+
include Util
|
21
|
+
|
22
|
+
@@performance_monitor = PerformanceMonitor.new
|
23
|
+
|
24
|
+
def self.statistics
|
25
|
+
return "" if @@performance_monitor.empty?
|
26
|
+
pm = @@performance_monitor
|
27
|
+
summary = "Performance statistics:\n"
|
28
|
+
summary << pm.heading
|
29
|
+
summary << pm.format([Indexer, :index],
|
30
|
+
[Indexer, :read_content],
|
31
|
+
[Indexer, :add_license],
|
32
|
+
[Indexer, :index_content])
|
33
|
+
labels = LangScan.modules.map {|m|
|
34
|
+
[m, :scan]
|
35
|
+
}.push([Indexer, :add_fragment],
|
36
|
+
[Indexer, :flush_cache])
|
37
|
+
summary << pm.format([Indexer, :index_content], *labels)
|
38
|
+
return summary
|
39
|
+
end
|
40
|
+
|
41
|
+
def initialize(config, dbm, source_uri, normalized_path, content,
|
42
|
+
options = {})
|
43
|
+
@config = config
|
44
|
+
@dbm = dbm
|
45
|
+
@normalized_path = normalized_path
|
46
|
+
@source_uri = source_uri
|
47
|
+
@content = content
|
48
|
+
@content_hash = Digest::MD5.hexdigest(content.text)
|
49
|
+
@noindex_formats = (options[:noindex_formats] or @config.noindex_formats)
|
50
|
+
|
51
|
+
@package_name = get_package_name
|
52
|
+
@seqno = 0
|
53
|
+
|
54
|
+
@word_cache = {}
|
55
|
+
@wordinfo_cache = {}
|
56
|
+
@digest_cache = []
|
57
|
+
|
58
|
+
# to be initialized
|
59
|
+
@format_id = nil
|
60
|
+
@license_id = nil
|
61
|
+
@license_abbrev = nil
|
62
|
+
@encoding = nil
|
63
|
+
@nlines = nil
|
64
|
+
@package_id = nil
|
65
|
+
@path_id = nil
|
66
|
+
@bols = [] # positions of beginning of lines
|
67
|
+
@indexed_p = false
|
68
|
+
|
69
|
+
initialize_profilers_if_necessary
|
70
|
+
end
|
71
|
+
|
72
|
+
def initialize_profilers_if_necessary
|
73
|
+
# profiler
|
74
|
+
if @config.verbose
|
75
|
+
@@performance_monitor.profile(Indexer, :index)
|
76
|
+
@@performance_monitor.profile(Indexer, :read_content)
|
77
|
+
@@performance_monitor.profile(Indexer, :index_content)
|
78
|
+
@@performance_monitor.profile(Indexer, :add_fragment)
|
79
|
+
@@performance_monitor.profile(Indexer, :add_license)
|
80
|
+
@@performance_monitor.profile(Indexer, :flush_cache)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def read_content
|
85
|
+
content, @encoding = normalize_content(@content.text)
|
86
|
+
@content.text = content
|
87
|
+
@nlines = 0
|
88
|
+
pos = 0
|
89
|
+
@content.text.each_line {|line|
|
90
|
+
@bols.push(pos)
|
91
|
+
@nlines += 1
|
92
|
+
pos += line.length
|
93
|
+
}
|
94
|
+
end
|
95
|
+
|
96
|
+
# allow 0x09 (TAB), 0x0a (LF), 0x0c(^L), 0x0d (CR) 0x1b (ESC)
|
97
|
+
allowed = [0x09, 0x0a, 0x0c, 0x0d, 0x1b]
|
98
|
+
pattern = "["
|
99
|
+
pattern << (0...0x20).find_all {|x|
|
100
|
+
not allowed.include?(x)
|
101
|
+
}.map {|x| sprintf("\\x%02x", x) }.join
|
102
|
+
pattern << "]"
|
103
|
+
BinaryRegexp = Regexp.new(pattern)
|
104
|
+
|
105
|
+
def binary_content?(content)
|
106
|
+
BinaryRegexp.match(content)
|
107
|
+
end
|
108
|
+
|
109
|
+
def convert_to_utf8(content)
|
110
|
+
encoding = "ascii"
|
111
|
+
if binary_content?(content)
|
112
|
+
encoding = "binary"
|
113
|
+
else
|
114
|
+
if @config.utf8
|
115
|
+
content, encoding = UTF8.to_utf8(content)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
return content, encoding
|
119
|
+
end
|
120
|
+
|
121
|
+
def normalize_content(content)
|
122
|
+
content, encoding = convert_to_utf8(content)
|
123
|
+
unless encoding == "binary"
|
124
|
+
content = content.untabify
|
125
|
+
content.gsub!(/\r\n?/, "\n")
|
126
|
+
end
|
127
|
+
return content, encoding
|
128
|
+
end
|
129
|
+
|
130
|
+
def get_package_name
|
131
|
+
parts = @normalized_path.split("/")
|
132
|
+
if parts.length < 2
|
133
|
+
raise IndexerError.new("normalized path should not be flat")
|
134
|
+
end
|
135
|
+
package_name = parts.first
|
136
|
+
if package_name.size == 0 || package_name == "." || package_name == ".."
|
137
|
+
package_name = File.basename(@source_uri.path)
|
138
|
+
end
|
139
|
+
return package_name
|
140
|
+
end
|
141
|
+
|
142
|
+
def add_text(fragment, type_id)
|
143
|
+
text = fragment.text
|
144
|
+
byteno = fragment.byteno
|
145
|
+
TextTokenizer.each_word(text) {|word, pos|
|
146
|
+
add_word(word, byteno + pos, type_id)
|
147
|
+
}
|
148
|
+
end
|
149
|
+
|
150
|
+
def add_fragment(fragment)
|
151
|
+
type_id = @dbm.get_type_id(fragment.type)
|
152
|
+
if LangScan::Type.splittable?(fragment.type)
|
153
|
+
add_text(fragment, type_id)
|
154
|
+
else
|
155
|
+
add_word(fragment.text, fragment.byteno, type_id)
|
156
|
+
end
|
157
|
+
|
158
|
+
@digest_cache.push(fragment.byteno, fragment.text.length, type_id)
|
159
|
+
end
|
160
|
+
|
161
|
+
def flush_cache
|
162
|
+
all_word_ids = @wordinfo_cache.keys.sort!
|
163
|
+
all_word_ids.each {|word_id|
|
164
|
+
path_word_id = AutoPack.pack_id2(@path_id, word_id)
|
165
|
+
@dbm.pathwordid_info[path_word_id] =
|
166
|
+
DeltaDumper.dump_tuples(WordInfo, @wordinfo_cache[word_id])
|
167
|
+
}
|
168
|
+
@dbm.put_pathid_wordids(@package_id, @path_id, all_word_ids)
|
169
|
+
@dbm.pathid_wordids[@path_id] = DeltaDumper.dump_ids(all_word_ids)
|
170
|
+
@dbm.pathid_digest[@path_id] =
|
171
|
+
DeltaDumper.dump_tuples(DigestInfo, @digest_cache)
|
172
|
+
@dbm.pathid_bols[@path_id] = DeltaDumper.dump_fixnums(@bols)
|
173
|
+
@wordinfo_cache.clear
|
174
|
+
@dbm.word_id_counter.flush
|
175
|
+
end
|
176
|
+
|
177
|
+
def add_property(abbrev, name, counter, make_key, pkgid_ids)
|
178
|
+
id = @dbm.send(counter).get_id2(abbrev, name)
|
179
|
+
@dbm.send(pkgid_ids)[@package_id] = id
|
180
|
+
@dbm.increase_counter(@dbm.send(make_key, abbrev))
|
181
|
+
return id
|
182
|
+
end
|
183
|
+
|
184
|
+
def add_format(format_abbrev, format_name)
|
185
|
+
@format_id = add_property(format_abbrev,
|
186
|
+
format_name,
|
187
|
+
:format_id_counter,
|
188
|
+
:make_ncontents_by_format_key,
|
189
|
+
:pkgid_fmtids)
|
190
|
+
end
|
191
|
+
|
192
|
+
def add_license
|
193
|
+
detector = LicenseDetector.new(@content.text)
|
194
|
+
license = detector.detect
|
195
|
+
@license_id = add_property(license.abbrev,
|
196
|
+
license.name,
|
197
|
+
:license_id_counter,
|
198
|
+
:make_ncontents_by_license_key,
|
199
|
+
:pkgid_lcsids)
|
200
|
+
@license_abbrev = license.abbrev
|
201
|
+
end
|
202
|
+
|
203
|
+
def add_path
|
204
|
+
assert_equal(false, @dbm.path_pathid.include?(@normalized_path))
|
205
|
+
@path_id = @dbm.path_id_counter.make_new_id
|
206
|
+
@dbm.path_pathid[@normalized_path] = @path_id
|
207
|
+
@dbm.pathid_path[@path_id] = @normalized_path
|
208
|
+
@dbm.pkgid_pathids[@package_id] = @path_id
|
209
|
+
end
|
210
|
+
|
211
|
+
def get_fragments(scanner)
|
212
|
+
@@performance_monitor.profile(scanner, :scan) if @config.verbose
|
213
|
+
fragments = []
|
214
|
+
scanner.scan(@content.text) {|fragment|
|
215
|
+
fragments.push(fragment) if LangScan::Type.include?(fragment.type)
|
216
|
+
}
|
217
|
+
fragments = fragments.sort_by {|fragment| fragment.byteno }
|
218
|
+
return fragments
|
219
|
+
end
|
220
|
+
|
221
|
+
def add_word(word, byteno, type_id)
|
222
|
+
word_id = @dbm.word_id_counter.get_id(word)
|
223
|
+
array = (@wordinfo_cache[word_id] ||= [])
|
224
|
+
array.push(@seqno, byteno, type_id)
|
225
|
+
@seqno += 1
|
226
|
+
end
|
227
|
+
|
228
|
+
def add_package_if_necessary
|
229
|
+
if @dbm.has_package?(@package_name)
|
230
|
+
@package_id = @dbm.get_package_id(@package_name)
|
231
|
+
else
|
232
|
+
@package_id = @dbm.package_id_counter.make_new_id
|
233
|
+
@dbm.pkg_pkgid[@package_name] = @package_id
|
234
|
+
@dbm.pkgid_pkg[@package_id] = @package_name
|
235
|
+
@dbm.pkgid_src[@package_id] = @source_uri.to_s
|
236
|
+
@dbm.put_package_options(@package_id)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def make_content_info
|
241
|
+
ContentInfo.dump(@content.length, @content.mtime.to_i,
|
242
|
+
Time.now.to_i, @format_id, @license_id,
|
243
|
+
@nlines, @indexed_p)
|
244
|
+
end
|
245
|
+
|
246
|
+
def index_content(scanner)
|
247
|
+
fragments = []
|
248
|
+
begin
|
249
|
+
fragments = get_fragments(scanner)
|
250
|
+
rescue
|
251
|
+
# fallback to the text scanner
|
252
|
+
unless scanner == LangScan::Text
|
253
|
+
vprintf("#{@normalized_path}: fallback to LangScan::Text")
|
254
|
+
scanner = LangScan::Text
|
255
|
+
retry
|
256
|
+
end
|
257
|
+
end
|
258
|
+
fragments.each {|fragment| add_fragment(fragment) }
|
259
|
+
flush_cache
|
260
|
+
@dbm.increase_counter(:ncontents_indexed)
|
261
|
+
@dbm.increase_counter(:nlines_indexed, @nlines)
|
262
|
+
@indexed_p = true
|
263
|
+
end
|
264
|
+
|
265
|
+
def add_content_common(format_abbrev, format_name)
|
266
|
+
add_format(format_abbrev, format_name)
|
267
|
+
add_license
|
268
|
+
@dbm.pathid_pkgid[@path_id] = @package_id
|
269
|
+
@dbm.pathid_content[@path_id] = @content.text
|
270
|
+
@dbm.pathid_info[@path_id] = make_content_info
|
271
|
+
@dbm.pathid_hash[@path_id] = @content_hash
|
272
|
+
vprintf("added (%s): %s (%s)", format_abbrev,
|
273
|
+
@normalized_path, @license_abbrev)
|
274
|
+
end
|
275
|
+
|
276
|
+
def add_binary_content
|
277
|
+
add_content_common("binary", "Binary")
|
278
|
+
end
|
279
|
+
|
280
|
+
def make_scanner
|
281
|
+
scanner = LangScan.choose(@normalized_path, @content.text)
|
282
|
+
scanner = LangScan::Text if scanner.nil?
|
283
|
+
return scanner
|
284
|
+
end
|
285
|
+
|
286
|
+
def indexable?(scanner)
|
287
|
+
not @noindex_formats.include?(scanner.abbrev)
|
288
|
+
end
|
289
|
+
|
290
|
+
def add_content_with_indexing
|
291
|
+
scanner = make_scanner
|
292
|
+
if indexable?(scanner)
|
293
|
+
index_content(scanner)
|
294
|
+
else
|
295
|
+
vprintf("skip indexing: %s", @normalized_path)
|
296
|
+
end
|
297
|
+
add_content_common(scanner.abbrev, scanner.name)
|
298
|
+
end
|
299
|
+
|
300
|
+
def add_content
|
301
|
+
if @encoding == "binary"
|
302
|
+
add_binary_content
|
303
|
+
else
|
304
|
+
add_content_with_indexing
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
public
|
309
|
+
def index
|
310
|
+
read_content
|
311
|
+
add_package_if_necessary
|
312
|
+
add_path
|
313
|
+
add_content
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
data/lib/gonzui/info.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
#
|
2
|
+
# info.rb - information classes
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
module BytenoMixin
|
14
|
+
def end_byteno
|
15
|
+
byteno + length
|
16
|
+
end
|
17
|
+
|
18
|
+
def range
|
19
|
+
byteno ... (byteno + length)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
WordInfo = Struct.new(:word_id, :path_id,
|
24
|
+
:seqno, :byteno, :type_id, :type, :lineno)
|
25
|
+
class WordInfo
|
26
|
+
include BytenoMixin
|
27
|
+
|
28
|
+
# dump info
|
29
|
+
DeltaSize = 2
|
30
|
+
UnitSize = 3
|
31
|
+
|
32
|
+
def match?(target_type)
|
33
|
+
target_type == :all or target_type == self.type
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
DigestInfo = Struct.new(:byteno, :length, :type_id, :type)
|
38
|
+
class DigestInfo
|
39
|
+
include BytenoMixin
|
40
|
+
|
41
|
+
# dump info
|
42
|
+
DeltaSize = 1
|
43
|
+
UnitSize = 3
|
44
|
+
end
|
45
|
+
|
46
|
+
ContentInfo = Struct.new(:size, :mtime, :itime,
|
47
|
+
:format_id, :license_id,
|
48
|
+
:nlines, :indexed_p)
|
49
|
+
class ContentInfo
|
50
|
+
extend Util
|
51
|
+
PACK_FORMAT = "w*"
|
52
|
+
|
53
|
+
def self.load(dump)
|
54
|
+
info = self.new(*dump.unpack(PACK_FORMAT))
|
55
|
+
info.indexed_p = if info.indexed_p == 1 then true else false end
|
56
|
+
return info
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.dump(size, mtime, itime, format_id,
|
60
|
+
license_id, nlines, indexed_p)
|
61
|
+
indexed_p = if indexed_p then 1 else 0 end
|
62
|
+
# FIXME: It could happen for some cases.
|
63
|
+
if mtime < 0
|
64
|
+
vprintf("minus mtime found: %d", mtime)
|
65
|
+
mtime = Time.now.to_i
|
66
|
+
end
|
67
|
+
[size, mtime, itime, format_id,
|
68
|
+
license_id, nlines, indexed_p].pack(PACK_FORMAT)
|
69
|
+
end
|
70
|
+
|
71
|
+
def indexed?
|
72
|
+
self.indexed_p
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
Occurrence = Struct.new(:byteno, :lineno, :length)
|
77
|
+
class Occurrence
|
78
|
+
include BytenoMixin
|
79
|
+
end
|
80
|
+
end
|