fastri 0.1.1.1 → 0.2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +12 -0
- data/Rakefile +5 -4
- data/bin/fastri-server +77 -35
- data/bin/fri +145 -8
- data/bin/ri-emacs +1 -1
- data/lib/fastri/full_text_index.rb +245 -0
- data/lib/fastri/full_text_indexer.rb +100 -0
- data/lib/fastri/ri_index.rb +30 -0
- data/lib/fastri/ri_service.rb +6 -0
- data/lib/fastri/util.rb +83 -0
- data/lib/fastri/version.rb +6 -1
- data/test/test_full_text_index.rb +182 -0
- data/test/test_full_text_indexer.rb +84 -0
- data/test/test_integration_full_text_index.rb +43 -0
- data/test/test_ri_index.rb +99 -1
- data/test/test_util.rb +38 -0
- metadata +14 -3
data/CHANGES
CHANGED
@@ -1,6 +1,18 @@
|
|
1
1
|
|
2
2
|
User-visible changes in FastRI
|
3
3
|
|
4
|
+
Since version 0.1.1 (2006-11-10)
|
5
|
+
================================
|
6
|
+
Features
|
7
|
+
--------
|
8
|
+
* fri can do full-text search (-S, --full-text); try fri -S byte order
|
9
|
+
* fri can now determine where a method actually came from for core classes
|
10
|
+
e.g. fri File.inject -> docs for Enumerable#inject
|
11
|
+
* you can specify which ports the DRb services must bind to:
|
12
|
+
fastri-server -s 192.168.1.2:54321
|
13
|
+
fri -s 192.168.1.2:12345
|
14
|
+
* new search methods: "anywhere" (a) and "anywhere, case-indep." (A)
|
15
|
+
|
4
16
|
Since version 0.1.0 (2006-11-08)
|
5
17
|
================================
|
6
18
|
Features
|
data/Rakefile
CHANGED
@@ -42,10 +42,11 @@ Spec = Gem::Specification.new do |s|
|
|
42
42
|
s.summary = "RI docs across machines, faster and smarter than ri."
|
43
43
|
s.description = <<EOF
|
44
44
|
FastRI is an alternative to the ri command-line tool. It is *much* faster, and
|
45
|
-
also allows you to offer RI lookup services over DRb. FastRI is
|
46
|
-
|
47
|
-
"full path".
|
48
|
-
to a core class were added by a
|
45
|
+
also allows you to offer RI lookup services over DRb. FastRI is smarter than
|
46
|
+
ri, and can find classes anywhere in the hierarchy without specifying the
|
47
|
+
"full path". FastRI can perform fast full-text searches. It also knows about
|
48
|
+
gems, and can tell you e.g. which extensions to a core class were added by a
|
49
|
+
specific gem.
|
49
50
|
EOF
|
50
51
|
s.files = PKG_FILES.to_a
|
51
52
|
s.require_path = 'lib'
|
data/bin/fastri-server
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
-
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# fastri-server: serve RI documentation over DRb
|
3
3
|
# Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
|
4
4
|
|
5
5
|
require 'fastri/version'
|
6
6
|
require 'fastri/ri_index'
|
7
7
|
require 'fastri/ri_service'
|
8
|
+
require 'fastri/util'
|
9
|
+
require 'fastri/full_text_indexer'
|
8
10
|
require 'enumerator'
|
9
11
|
|
10
12
|
FASTRI_SERVER_VERSION = "0.0.1"
|
@@ -18,25 +20,10 @@ def make_index(index_file)
|
|
18
20
|
paths = [ RI::Paths::SYSDIR, RI::Paths::SITEDIR, RI::Paths::HOMEDIR ].find_all do |p|
|
19
21
|
p && File.directory?(p)
|
20
22
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
gemdirs.each do |path|
|
26
|
-
gemname, version = %r{/([^/]+)-(.*)/ri$}.match(path).captures
|
27
|
-
if gemname.nil? # doesn't follow any conventions :(
|
28
|
-
gems[path[%r{/([^/]+)/ri$}, 1]] << ["unknown", path]
|
29
|
-
else
|
30
|
-
gems[gemname] << [version, path]
|
31
|
-
end
|
32
|
-
end
|
33
|
-
gems.sort_by{|name, _| name}.each do |name, versions|
|
34
|
-
version, path = versions.sort.last
|
35
|
-
puts "Indexing RI docs for #{name} version #{version}."
|
36
|
-
paths << path
|
37
|
-
end
|
38
|
-
rescue LoadError
|
39
|
-
end
|
23
|
+
FastRI::Util.gem_directories_unique.each do |name, version, path|
|
24
|
+
paths << path
|
25
|
+
puts "Indexing RI docs for #{name} version #{version || "unknown"}."
|
26
|
+
end
|
40
27
|
|
41
28
|
puts "Building index."
|
42
29
|
t0 = Time.new
|
@@ -52,21 +39,59 @@ EOF
|
|
52
39
|
ri_reader
|
53
40
|
end
|
54
41
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
42
|
+
def linearize(comment)
|
43
|
+
case s = comment["body"]
|
44
|
+
when String; s
|
45
|
+
else
|
46
|
+
if Array === (y = comment["contents"])
|
47
|
+
y.map{|z| linearize(z)}.join("\n")
|
48
|
+
elsif s = comment["text"]
|
49
|
+
s
|
50
|
+
else
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def make_full_text_index(dir)
|
57
|
+
paths = [ RI::Paths::SYSDIR, RI::Paths::SITEDIR, RI::Paths::HOMEDIR ].find_all do |p|
|
58
|
+
p && File.directory?(p)
|
59
59
|
end
|
60
|
-
|
61
|
-
|
60
|
+
FastRI::Util.gem_directories_unique.each do |name, version, path|
|
61
|
+
paths << path
|
62
|
+
puts "Indexing RI docs for #{name} version #{version || "unknown"}."
|
62
63
|
end
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
64
|
+
unless File.exist?(dir)
|
65
|
+
Dir.mkdir(dir)
|
66
|
+
end
|
67
|
+
indexer = FastRI::FullTextIndexer.new(40)
|
68
|
+
bad = 0
|
69
|
+
paths.each do |path|
|
70
|
+
Dir["#{path}/**/*.yaml"].each do |yamlfile|
|
71
|
+
yaml = File.read(yamlfile)
|
72
|
+
begin
|
73
|
+
data = YAML.load(yaml.gsub(/ \!.*/, ''))
|
74
|
+
rescue Exception
|
75
|
+
bad += 1
|
76
|
+
#puts "Couldn't load #{yamlfile}"
|
77
|
+
next
|
78
|
+
end
|
79
|
+
|
80
|
+
desc = (data['comment']||[]).map{|x| linearize(x)}.join("\n")
|
81
|
+
desc.gsub!(/<\/?(em|b|tt|ul|ol|table)>/, "")
|
82
|
+
desc.gsub!(/"/, "'")
|
83
|
+
desc.gsub!(/</, "<")
|
84
|
+
desc.gsub!(/>/, ">")
|
85
|
+
desc.gsub!(/&/, "&")
|
86
|
+
unless desc.empty?
|
87
|
+
indexer.add_document(yamlfile, desc)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
File.open(File.join(dir, "full_text.dat"), "wb") do |fulltextIO|
|
93
|
+
File.open(File.join(dir, "suffixes.dat"), "wb") do |suffixesIO|
|
94
|
+
indexer.build_index(fulltextIO, suffixesIO)
|
70
95
|
end
|
71
96
|
end
|
72
97
|
end
|
@@ -75,8 +100,12 @@ end
|
|
75
100
|
|
76
101
|
require 'optparse'
|
77
102
|
|
103
|
+
home = FastRI::Util.find_home
|
78
104
|
options = {:allowed_hosts => ["127.0.0.1"], :addr => "127.0.0.1",
|
79
|
-
:index_file => File.join(
|
105
|
+
:index_file => File.join(home, ".fastri-index"),
|
106
|
+
:do_full_text => false,
|
107
|
+
:full_text_dir => File.join(home, ".fastri-fulltext"),
|
108
|
+
}
|
80
109
|
OptionParser.new do |opts|
|
81
110
|
opts.banner = "Usage: fastri-server.rb [options]"
|
82
111
|
|
@@ -100,6 +129,17 @@ OptionParser.new do |opts|
|
|
100
129
|
exit 0
|
101
130
|
end
|
102
131
|
|
132
|
+
opts.on("-F", "--full-text-dir DIR", "Place full-text index in DIR",
|
133
|
+
"(default: #{options[:full_text_dir]})") do |dir|
|
134
|
+
options[:full_text_dir] = dir if dir
|
135
|
+
options[:do_full_text] = true
|
136
|
+
end
|
137
|
+
|
138
|
+
opts.on("-B", "--rebuild-full-text", "Rebuild full-text index.") do
|
139
|
+
make_full_text_index(options[:full_text_dir])
|
140
|
+
exit 0
|
141
|
+
end
|
142
|
+
|
103
143
|
opts.on("-h", "--help", "Show this help message") do
|
104
144
|
puts opts
|
105
145
|
exit
|
@@ -129,7 +169,9 @@ options[:allowed_hosts].each{|host| acl_opt.concat ["allow", host.strip]}
|
|
129
169
|
acl = ACL.new(acl_opt)
|
130
170
|
DRb.install_acl(acl)
|
131
171
|
|
132
|
-
|
172
|
+
ip = options[:addr][/^[^:]+/] || "127.0.0.1"
|
173
|
+
port = options[:addr][/:(\d+)/, 1] || 0
|
174
|
+
drb_addr = "druby://#{ip}:#{port}"
|
133
175
|
DRb.start_service(drb_addr)
|
134
176
|
|
135
177
|
$SAFE = 1
|
data/bin/fri
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
-
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# fri: access RI documentation through DRb
|
3
3
|
# Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
|
4
4
|
#
|
5
5
|
|
6
6
|
require 'rinda/ring'
|
7
7
|
require 'optparse'
|
8
|
+
require 'fastri/util'
|
9
|
+
require 'fastri/full_text_index'
|
8
10
|
|
9
11
|
# we bind to 127.0.0.1 by default, because otherwise Ruby will try with
|
10
12
|
# 0.0.0.0, which results in a DNS request, adding way too much latency
|
@@ -27,6 +29,8 @@ options = {
|
|
27
29
|
:nested_partial, :nested_partial_ci,
|
28
30
|
],
|
29
31
|
:show_matches => false,
|
32
|
+
:do_full_text => false,
|
33
|
+
:full_text_dir => File.join(FastRI::Util.find_home, ".fastri-fulltext"),
|
30
34
|
}
|
31
35
|
override_addr_env = false
|
32
36
|
optparser = OptionParser.new do |opts|
|
@@ -41,17 +45,28 @@ optparser = OptionParser.new do |opts|
|
|
41
45
|
order_mapping = {
|
42
46
|
'e' => :exact, 'E' => :exact_ci, 'n' => :nested, 'N' => :nested_ci,
|
43
47
|
'p' => :partial, 'P' => :partial_ci, 'x' => :nested_partial,
|
44
|
-
'X' => :nested_partial_ci
|
48
|
+
'X' => :nested_partial_ci, 'a' => :anywhere, 'A' => :anywhere_ci,
|
45
49
|
}
|
46
50
|
opts.on("-O", "--order ORDER", "Specify lookup order.",
|
47
51
|
"(default: eEnNpPxX)", "Uppercase: case-indep.",
|
48
|
-
"e:
|
49
|
-
"x:
|
52
|
+
"e:exact n:nested p:partial (completion)",
|
53
|
+
"x:nested and partial",
|
54
|
+
"a:match method name anywhere") do |order|
|
50
55
|
options[:lookup_order] = order.split(//).map{|x| order_mapping[x]}.compact
|
51
56
|
end
|
52
57
|
|
53
58
|
opts.on("--show-matches", "Only show matching entries."){ options[:show_matches] = true }
|
54
59
|
|
60
|
+
opts.on("-S", "--full-text", "Perform full-text search.") do
|
61
|
+
options[:do_full_text] = true
|
62
|
+
end
|
63
|
+
|
64
|
+
opts.on("-F", "--full-text-dir DIR", "Use full-text index in DIR",
|
65
|
+
"(default: #{options[:full_text_dir]})") do |dir|
|
66
|
+
options[:full_text_dir] = dir if dir
|
67
|
+
options[:do_full_text] = true
|
68
|
+
end
|
69
|
+
|
55
70
|
opts.on("-f", "--format FMT", "Format to use when displaying output:",
|
56
71
|
" ansi, plain (default: #{options[:format]})") do |format|
|
57
72
|
options[:format] = format
|
@@ -74,12 +89,53 @@ if ARGV.empty?
|
|
74
89
|
exit
|
75
90
|
end
|
76
91
|
|
92
|
+
# {{{ try to find where the method comes from exactly
|
93
|
+
def help_method_extract(m) # :nodoc:
|
94
|
+
unless m.inspect =~ %r[\A#<(?:Unbound)?Method: (.*?)>\Z]
|
95
|
+
raise "Cannot parse result of #{m.class}#inspect: #{m.inspect}"
|
96
|
+
end
|
97
|
+
$1.sub(/\A.*?\((.*?)\)(.*)\Z/){ "#{$1}#{$2}" }.sub(/\./, "::").sub(/#<Class:(.*?)>#/) { "#{$1}::" }
|
98
|
+
end
|
99
|
+
|
100
|
+
def magic_help(query)
|
101
|
+
if query =~ /\A(.*?)(#|::|\.)(.*)\Z/
|
102
|
+
c, k, m = $1, $2, $3
|
103
|
+
begin
|
104
|
+
c = Object.const_get(c)
|
105
|
+
m = case k
|
106
|
+
when "#"
|
107
|
+
c.instance_method(m)
|
108
|
+
when "::"
|
109
|
+
c.method(m)
|
110
|
+
when "."
|
111
|
+
begin
|
112
|
+
c.method(m)
|
113
|
+
rescue NameError
|
114
|
+
c.instance_method(m)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
help_method_extract(m)
|
118
|
+
rescue Exception
|
119
|
+
query
|
120
|
+
end
|
121
|
+
else
|
122
|
+
query
|
123
|
+
end
|
124
|
+
end
|
125
|
+
help_query = magic_help(ARGV[0])
|
126
|
+
|
127
|
+
#{{{ determine the address to bind to
|
77
128
|
if override_addr_env
|
78
|
-
|
129
|
+
addr_spec = options[:addr]
|
79
130
|
else
|
80
|
-
|
131
|
+
addr_spec = ENV["FASTRI_ADDR"] || options[:addr]
|
81
132
|
end
|
82
133
|
|
134
|
+
ip = addr_spec[/^[^:]+/] || "127.0.0.1"
|
135
|
+
port = addr_spec[/:(\d+)/, 1] || 0
|
136
|
+
addr = "druby://#{ip}:#{port}"
|
137
|
+
|
138
|
+
#{{{ start DRb and perform request
|
83
139
|
begin
|
84
140
|
DRb.start_service(addr)
|
85
141
|
ring_server = Rinda::RingFinger.primary
|
@@ -103,9 +159,90 @@ info_options = {
|
|
103
159
|
:width => options[:width],
|
104
160
|
:lookup_order => options[:lookup_order],
|
105
161
|
}
|
162
|
+
|
163
|
+
MAX_CONTEXT_LINES = 20
|
164
|
+
def context_wrap(text, width)
|
165
|
+
"... " +
|
166
|
+
text.gsub(/(.{1,#{width-4}})( +|$\n?)|(.{1,#{width-4}})/, "\\1\\3\n").chomp
|
167
|
+
end
|
168
|
+
|
169
|
+
def display_fulltext_search_results(results, gem_dir_info = FastRI::Util.gem_directories_unique,
|
170
|
+
width = 78)
|
171
|
+
return if results.empty?
|
172
|
+
path = File.expand_path(results[0].path)
|
173
|
+
gem_name, version, gem_path = FastRI::Util.gem_info_for_path(path, gem_dir_info)
|
174
|
+
if gem_name
|
175
|
+
rel_path = path[/#{Regexp.escape(gem_path)}\/(.*)/, 1]
|
176
|
+
if rel_path
|
177
|
+
entry_name = FastRI::Util.gem_relpath_to_full_name(rel_path)
|
178
|
+
end
|
179
|
+
puts "Found in #{gem_name} #{version} #{entry_name}"
|
180
|
+
else
|
181
|
+
rdoc_system_path = File.expand_path(RI::Paths::SYSDIR)
|
182
|
+
if path.index(rdoc_system_path)
|
183
|
+
rel_path = path[/#{Regexp.escape(rdoc_system_path)}\/(.*)/, 1]
|
184
|
+
puts "Found in system #{FastRI::Util.gem_relpath_to_full_name(rel_path)}"
|
185
|
+
else
|
186
|
+
puts "Found in #{path}:"
|
187
|
+
end
|
188
|
+
end
|
189
|
+
text = results.map do |result|
|
190
|
+
context = result.context(120)
|
191
|
+
from = (context.rindex("\n", context.index(result.query)) || -1) + 1
|
192
|
+
to = (context.index("\n", context.index(result.query)) || 0) - 1
|
193
|
+
context_wrap(context[from..to], width)
|
194
|
+
end
|
195
|
+
puts
|
196
|
+
puts text.uniq[0...MAX_CONTEXT_LINES]
|
197
|
+
puts
|
198
|
+
end
|
199
|
+
|
200
|
+
if options[:do_full_text]
|
201
|
+
fulltext = File.join(options[:full_text_dir], "full_text.dat")
|
202
|
+
suffixes = File.join(options[:full_text_dir], "suffixes.dat")
|
203
|
+
begin
|
204
|
+
index = FastRI::FullTextIndex.new_from_filenames(fulltext, suffixes)
|
205
|
+
rescue Exception
|
206
|
+
puts <<EOF
|
207
|
+
Couldn't open the full-text index:
|
208
|
+
#{fulltext}
|
209
|
+
#{suffixes}
|
210
|
+
|
211
|
+
The index needs to be rebuilt with
|
212
|
+
fastri-server -B
|
213
|
+
EOF
|
214
|
+
exit(-1)
|
215
|
+
end
|
216
|
+
gem_dir_info = FastRI::Util.gem_directories_unique
|
217
|
+
match_sets = ARGV.map do |query|
|
218
|
+
result = index.lookup(query)
|
219
|
+
if result
|
220
|
+
index.next_matches(result) + [result]
|
221
|
+
else
|
222
|
+
[]
|
223
|
+
end
|
224
|
+
end
|
225
|
+
path_map = Hash.new{|h,k| h[k] = []}
|
226
|
+
match_sets.each{|matches| matches.each{|m| path_map[m.path] << m} }
|
227
|
+
paths = match_sets[1..-1].inject(match_sets[0].map{|x| x.path}.uniq) do |s,x|
|
228
|
+
s & x.map{|y| y.path}.uniq
|
229
|
+
end
|
230
|
+
if paths.empty?
|
231
|
+
puts "nil"
|
232
|
+
else
|
233
|
+
puts "#{paths.size} hits"
|
234
|
+
paths.sort_by{|path| -path_map[path].size}.map do |path|
|
235
|
+
puts "=" * options[:width]
|
236
|
+
display_fulltext_search_results(path_map[path], gem_dir_info, options[:width])
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
exit 0
|
241
|
+
end
|
242
|
+
|
106
243
|
if options[:show_matches]
|
107
|
-
puts service.matches(
|
244
|
+
puts service.matches(help_query, info_options).sort
|
108
245
|
else
|
109
|
-
puts service.info(
|
246
|
+
puts service.info(help_query, info_options)
|
110
247
|
end
|
111
248
|
# vi: set sw=2 expandtab:
|
data/bin/ri-emacs
CHANGED
@@ -0,0 +1,245 @@
|
|
1
|
+
# Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
|
2
|
+
#
|
3
|
+
|
4
|
+
require 'fastri/full_text_indexer'
|
5
|
+
require 'stringio'
|
6
|
+
|
7
|
+
module FastRI
|
8
|
+
|
9
|
+
class FullTextIndex
|
10
|
+
MAX_QUERY_SIZE = 20
|
11
|
+
MAX_REGEXP_MATCH_SIZE = 255
|
12
|
+
class Result
|
13
|
+
attr_reader :path, :query, :index, :metadata
|
14
|
+
|
15
|
+
def initialize(searcher, query, index, path, metadata)
|
16
|
+
@searcher = searcher
|
17
|
+
@index = index
|
18
|
+
@query = query
|
19
|
+
@path = path
|
20
|
+
@metadata = metadata
|
21
|
+
end
|
22
|
+
|
23
|
+
def context(size)
|
24
|
+
@searcher.fetch_data(@index, 2*size+1, -size)
|
25
|
+
end
|
26
|
+
|
27
|
+
def text(size)
|
28
|
+
@searcher.fetch_data(@index, size, 0)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class << self; private :new end
|
33
|
+
|
34
|
+
DEFAULT_OPTIONS = {
|
35
|
+
:max_query_size => MAX_QUERY_SIZE,
|
36
|
+
}
|
37
|
+
|
38
|
+
def self.new_from_ios(fulltext_IO, suffix_arrray_IO, options = {})
|
39
|
+
new(:io, fulltext_IO, suffix_arrray_IO, options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.new_from_filenames(fulltext_fname, suffix_arrray_fname, options = {})
|
43
|
+
new(:filenames, fulltext_fname, suffix_arrray_fname, options)
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_reader :max_query_size
|
47
|
+
def initialize(type, fulltext, sarray, options)
|
48
|
+
options = DEFAULT_OPTIONS.merge(options)
|
49
|
+
case type
|
50
|
+
when :io
|
51
|
+
@fulltext_IO = fulltext
|
52
|
+
@sarray_IO = sarray
|
53
|
+
when :filenames
|
54
|
+
@fulltext_fname = fulltext
|
55
|
+
@sarray_fname = sarray
|
56
|
+
else raise "Unknown type"
|
57
|
+
end
|
58
|
+
@type = type
|
59
|
+
@max_query_size = options[:max_query_size]
|
60
|
+
check_magic
|
61
|
+
end
|
62
|
+
|
63
|
+
def lookup(term)
|
64
|
+
get_fulltext_IO do |fulltextIO|
|
65
|
+
get_sarray_IO do |sarrayIO|
|
66
|
+
case sarrayIO
|
67
|
+
when StringIO
|
68
|
+
num_suffixes = sarrayIO.string.size / 4 - 1
|
69
|
+
else
|
70
|
+
num_suffixes = sarrayIO.stat.size / 4 - 1
|
71
|
+
end
|
72
|
+
|
73
|
+
index, offset = binary_search(sarrayIO, fulltextIO, term, 0, num_suffixes)
|
74
|
+
if offset
|
75
|
+
fulltextIO.pos = offset
|
76
|
+
path, metadata = find_metadata(fulltextIO)
|
77
|
+
return Result.new(self, term, index, path, metadata) if path
|
78
|
+
else
|
79
|
+
nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def next_match(result, term_or_regexp = "")
|
86
|
+
case term_or_regexp
|
87
|
+
when String; size = [result.query.size, term_or_regexp.size].max
|
88
|
+
when Regexp; size = MAX_REGEXP_MATCH_SIZE
|
89
|
+
end
|
90
|
+
get_fulltext_IO do |fulltextIO|
|
91
|
+
get_sarray_IO do |sarrayIO|
|
92
|
+
idx = result.index
|
93
|
+
loop do
|
94
|
+
idx += 1
|
95
|
+
str = get_string(sarrayIO, fulltextIO, idx, size)
|
96
|
+
upto = str.index("\0")
|
97
|
+
str = str[0, upto] if upto
|
98
|
+
break unless str.index(result.query) == 0
|
99
|
+
if str[term_or_regexp]
|
100
|
+
fulltextIO.pos = index_to_offset(sarrayIO, idx)
|
101
|
+
path, metadata = find_metadata(fulltextIO)
|
102
|
+
return Result.new(self, result.query, idx, path, metadata) if path
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def next_matches(result, term_or_regexp = "")
|
110
|
+
case term_or_regexp
|
111
|
+
when String; size = [result.query.size, term_or_regexp.size].max
|
112
|
+
when Regexp; size = MAX_REGEXP_MATCH_SIZE
|
113
|
+
end
|
114
|
+
ret = []
|
115
|
+
get_fulltext_IO do |fulltextIO|
|
116
|
+
get_sarray_IO do |sarrayIO|
|
117
|
+
idx = result.index
|
118
|
+
loop do
|
119
|
+
idx += 1
|
120
|
+
str = get_string(sarrayIO, fulltextIO, idx, size)
|
121
|
+
upto = str.index("\0")
|
122
|
+
str = str[0, upto] if upto
|
123
|
+
break unless str.index(result.query) == 0
|
124
|
+
if str[term_or_regexp]
|
125
|
+
fulltextIO.pos = index_to_offset(sarrayIO, idx)
|
126
|
+
path, metadata = find_metadata(fulltextIO)
|
127
|
+
ret << Result.new(self, result.query, idx, path, metadata) if path
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
ret
|
134
|
+
end
|
135
|
+
|
136
|
+
def fetch_data(index, size, offset = 0)
|
137
|
+
raise "Bad offset" unless offset <= 0
|
138
|
+
get_fulltext_IO do |fulltextIO|
|
139
|
+
get_sarray_IO do |sarrayIO|
|
140
|
+
base = index_to_offset(sarrayIO, index)
|
141
|
+
actual_offset = offset
|
142
|
+
newsize = size
|
143
|
+
if base + offset < 0 # at the beginning
|
144
|
+
excess = (base + offset).abs # remember offset is < 0
|
145
|
+
newsize = size - excess
|
146
|
+
actual_offset = offset + excess
|
147
|
+
end
|
148
|
+
str = get_string(sarrayIO, fulltextIO, index, newsize, offset)
|
149
|
+
from = (str.rindex("\0", -actual_offset) || -1) + 1
|
150
|
+
to = (str.index("\0", -actual_offset) || 0) - 1
|
151
|
+
str[from..to]
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
def check_magic
|
158
|
+
get_fulltext_IO do |io|
|
159
|
+
io.rewind
|
160
|
+
header = io.read(FullTextIndexer::MAGIC.size)
|
161
|
+
raise "Unsupported index format." unless header
|
162
|
+
version = header[/\d+\.\d+\.\d+/]
|
163
|
+
raise "Unsupported index format." unless version
|
164
|
+
major, minor, teeny = version.scan(/\d+/)
|
165
|
+
if major != FASTRI_FT_INDEX_FORMAT_MAJOR or
|
166
|
+
minor > FASTRI_FT_INDEX_FORMAT_MINOR
|
167
|
+
raise "Unsupported index format"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def get_fulltext_IO
|
173
|
+
case @type
|
174
|
+
when :io; yield @fulltext_IO
|
175
|
+
when :filenames
|
176
|
+
File.open(@fulltext_fname, "rb"){|f| yield f}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def get_sarray_IO
|
181
|
+
case @type
|
182
|
+
when :io; yield @sarray_IO
|
183
|
+
when :filenames
|
184
|
+
File.open(@sarray_fname, "rb"){|f| yield f}
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def index_to_offset(sarrayIO, index)
|
189
|
+
sarrayIO.pos = index * 4
|
190
|
+
sarrayIO.read(4).unpack("V")[0]
|
191
|
+
end
|
192
|
+
|
193
|
+
def find_metadata(fulltextIO)
|
194
|
+
oldtext = ""
|
195
|
+
loop do
|
196
|
+
text = fulltextIO.read(4096)
|
197
|
+
break unless text
|
198
|
+
if idx = text.index("\0")
|
199
|
+
if idx + 4 >= text.size
|
200
|
+
text.concat(fulltextIO.read(4096))
|
201
|
+
end
|
202
|
+
len = text[idx+1, 4].unpack("V")[0]
|
203
|
+
missing = idx + 5 + len - text.size
|
204
|
+
if missing > 0
|
205
|
+
text.concat(fulltextIO.read(missing))
|
206
|
+
end
|
207
|
+
footer = text[idx + 5, len - 1]
|
208
|
+
path, metadata = /(.*?)\0(.*)/m.match(footer).captures
|
209
|
+
return [path, Marshal.load(metadata)]
|
210
|
+
end
|
211
|
+
oldtext = text
|
212
|
+
end
|
213
|
+
nil
|
214
|
+
end
|
215
|
+
|
216
|
+
def get_string(sarrayIO, fulltextIO, index, size, off = 0)
|
217
|
+
sarrayIO.pos = index * 4
|
218
|
+
offset = sarrayIO.read(4).unpack("V")[0]
|
219
|
+
fulltextIO.pos = [offset + off, 0].max
|
220
|
+
fulltextIO.read(size)
|
221
|
+
end
|
222
|
+
|
223
|
+
def binary_search(sarrayIO, fulltextIO, term, from, to)
|
224
|
+
#puts "BINARY #{from} -- #{to}"
|
225
|
+
#left = get_string(sarrayIO, fulltextIO, from, @max_query_size)
|
226
|
+
#right = get_string(sarrayIO, fulltextIO, to, @max_query_size)
|
227
|
+
#puts " #{left.inspect} -- #{right.inspect}"
|
228
|
+
middle = (from + to) / 2
|
229
|
+
pivot = get_string(sarrayIO, fulltextIO, middle, @max_query_size)
|
230
|
+
if from == to
|
231
|
+
if pivot.index(term) == 0
|
232
|
+
sarrayIO.pos = middle * 4
|
233
|
+
[middle, sarrayIO.read(4).unpack("V")[0]]
|
234
|
+
else
|
235
|
+
nil
|
236
|
+
end
|
237
|
+
elsif term <= pivot
|
238
|
+
binary_search(sarrayIO, fulltextIO, term, from, middle)
|
239
|
+
elsif term > pivot
|
240
|
+
binary_search(sarrayIO, fulltextIO, term, middle+1, to)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end # class FullTextIndex
|
244
|
+
|
245
|
+
end # module FastRI
|