dakrone-fastri 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +61 -0
- data/COPYING +340 -0
- data/LEGAL +4 -0
- data/LICENSE +56 -0
- data/README.en +102 -0
- data/Rakefile +26 -0
- data/THANKS +36 -0
- data/bin/fastri-server +251 -0
- data/bin/fri +353 -0
- data/bin/ri-emacs +202 -0
- data/fastri.gemspec +64 -0
- data/indexer.rb +135 -0
- data/lib/fastri/full_text_index.rb +245 -0
- data/lib/fastri/full_text_indexer.rb +100 -0
- data/lib/fastri/name_descriptor.rb +71 -0
- data/lib/fastri/ri_index.rb +601 -0
- data/lib/fastri/ri_service.rb +430 -0
- data/lib/fastri/util.rb +183 -0
- data/lib/fastri/version.rb +13 -0
- data/lookup.rb +197 -0
- data/pre-install.rb +11 -0
- data/setup.rb +1585 -0
- data/test/test_full_text_index.rb +182 -0
- data/test/test_full_text_indexer.rb +84 -0
- data/test/test_functional_ri_service.rb +60 -0
- data/test/test_integration_full_text_index.rb +43 -0
- data/test/test_name_descriptor.rb +35 -0
- data/test/test_ri_index.rb +389 -0
- data/test/test_util.rb +91 -0
- metadata +84 -0
data/bin/ri-emacs
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
## drop-in replacement for the ri-emacs helper script for use
|
3
|
+
# with ri-ruby.el, using the FastRI service via DRb
|
4
|
+
#
|
5
|
+
# Based on ri-emacs.rb by Kristof Bastiaensen <kristof@vleeuwen.org>
|
6
|
+
#
|
7
|
+
# Copyright (C) 2004,2006 Kristof Bastiaensen
|
8
|
+
# 2006 Mauricio Fernandez <mfp@acm.org>
|
9
|
+
#
|
10
|
+
# This program is free software; you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU General Public License as published by
|
12
|
+
# the Free Software Foundation; either version 2 of the License, or
|
13
|
+
# (at your option) any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU General Public License
|
21
|
+
# along with this program; if not, write to the Free Software
|
22
|
+
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
23
|
+
#----------------------------------------------------------------------
|
24
|
+
|
25
|
+
require 'rinda/ring'
|
26
|
+
require 'optparse'
|
27
|
+
require 'fastri/util'
|
28
|
+
|
29
|
+
# {{{ cmdline parsing and service discovery
|
30
|
+
# we bind to 127.0.0.1 by default, because otherwise Ruby will try with
|
31
|
+
# 0.0.0.0, which results in a DNS request, adding way too much latency
|
32
|
+
options = {:addr => "127.0.0.1", :width => ENV['RI_EMACS_COLUMNS'] ? ENV['RI_EMACS_COLUMNS'].to_i : 72}
|
33
|
+
override_addr_env = false
|
34
|
+
optparser = OptionParser.new do |opts|
|
35
|
+
opts.banner = "Usage: ri-emacs [options] <query>"
|
36
|
+
|
37
|
+
opts.on("-s", "--bind [ADDR]", "Bind to ADDR for incoming DRb connections.",
|
38
|
+
"(default: 127.0.0.1)") do |addr|
|
39
|
+
options[:addr] = addr
|
40
|
+
override_addr_env = true
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on("-w", "--width WIDTH", "Set the width of the output.") do |width|
|
44
|
+
options[:width] = width
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on("-h", "--help", "Show this help message") do
|
48
|
+
puts opts
|
49
|
+
exit
|
50
|
+
end
|
51
|
+
end
|
52
|
+
optparser.parse!
|
53
|
+
|
54
|
+
if override_addr_env
|
55
|
+
addr = "druby://#{options[:addr]}:0"
|
56
|
+
else
|
57
|
+
addr = "druby://#{ENV["FASTRI_ADDR"]||options[:addr]}:0"
|
58
|
+
end
|
59
|
+
|
60
|
+
begin
|
61
|
+
DRb.start_service(addr)
|
62
|
+
ring_server = Rinda::RingFinger.primary
|
63
|
+
rescue Exception
|
64
|
+
puts <<EOF
|
65
|
+
Couldn't initialize DRb and locate the Ring server.
|
66
|
+
|
67
|
+
Please make sure that:
|
68
|
+
* the fastri-server is running, the server is bound to the correct interface,
|
69
|
+
and the ACL setup allows connections from this host
|
70
|
+
* fri is using the correct interface for incoming DRb requests:
|
71
|
+
either set the FASTRI_ADDR environment variable, or use --bind ADDR, e.g
|
72
|
+
export FASTRI_ADDR="192.168.1.12"
|
73
|
+
fri Array
|
74
|
+
EOF
|
75
|
+
exit(-1) # '
|
76
|
+
end
|
77
|
+
service = ring_server.read([:name, :FastRI, nil, nil])[2]
|
78
|
+
|
79
|
+
class EventLoop
|
80
|
+
include FastRI::Util::MagicHelp
|
81
|
+
|
82
|
+
def initialize(ri, options)
|
83
|
+
@ri = ri
|
84
|
+
@opts = options
|
85
|
+
end
|
86
|
+
|
87
|
+
def run
|
88
|
+
puts "READY"
|
89
|
+
loop do
|
90
|
+
line = $stdin.gets
|
91
|
+
cmd, p = /(\w+)(.*)$/.match(line)[1..2]
|
92
|
+
p.strip!
|
93
|
+
case cmd
|
94
|
+
when "TRY_COMPLETION"; puts complete_try(p)
|
95
|
+
when "COMPLETE_ALL"; puts complete_all(p)
|
96
|
+
when "LAMBDA"; puts complete_lambda(p)
|
97
|
+
when "CLASS_LIST"; puts class_list(p)
|
98
|
+
when "CLASS_LIST_WITH_FLAG"; puts class_list_with_flag(p)
|
99
|
+
when "DISPLAY_ARGS"; display_args(p)
|
100
|
+
when "DISPLAY_INFO"; display_info(p)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def complete_try(keyw)
|
106
|
+
list = @ri.completion_list(keyw)
|
107
|
+
if list.nil?
|
108
|
+
return "nil"
|
109
|
+
elsif list.size == 1 and
|
110
|
+
list[0].split(/(::)|#|\./) == keyw.split(/(::)|#|\./)
|
111
|
+
return "t"
|
112
|
+
end
|
113
|
+
|
114
|
+
first = list.shift;
|
115
|
+
if first =~ /(.*)((?:::)|(?:#))(.*)/
|
116
|
+
other = $1 + ($2 == "::" ? "#" : "::") + $3
|
117
|
+
end
|
118
|
+
|
119
|
+
len = first.size
|
120
|
+
match_both = false
|
121
|
+
list.each do |w|
|
122
|
+
while w[0, len] != first[0, len]
|
123
|
+
if other and w[0, len] == other[0, len]
|
124
|
+
match_both = true
|
125
|
+
break
|
126
|
+
end
|
127
|
+
len -= 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
if match_both
|
132
|
+
return other.sub(/(.*)((?:::)|(?:#))/) { $1 + "." }[0, len].inspect
|
133
|
+
else
|
134
|
+
return first[0, len].inspect
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def complete_all(keyw)
|
139
|
+
list = @ri.completion_list(keyw)
|
140
|
+
if list.nil?
|
141
|
+
"nil"
|
142
|
+
else
|
143
|
+
"(" + list.map { |w| w.inspect }.join(" ") + ")"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def complete_lambda(keyw)
|
148
|
+
list = @ri.completion_list(keyw)
|
149
|
+
if list.nil?
|
150
|
+
"nil"
|
151
|
+
else
|
152
|
+
if list.find { |n| n.split(/(::)|#|\./) == keyw.split(/(::)|#|\./) }
|
153
|
+
"t"
|
154
|
+
else
|
155
|
+
"nil"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def class_list(keyw)
|
161
|
+
list = @ri.class_list(keyw)
|
162
|
+
if list
|
163
|
+
"(" + list.map{|x| "(#{x.inspect})"}.join(" ") + ")"
|
164
|
+
else
|
165
|
+
"nil"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def class_list_with_flag(keyw)
|
170
|
+
list = @ri.class_list_with_flag(keyw)
|
171
|
+
if list
|
172
|
+
"(" + list.map{|x| "(#{x.inspect})"}.join(" ") + ")"
|
173
|
+
else
|
174
|
+
"nil"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def display_(what, keyw)
|
179
|
+
data = @ri.__send__(what, magic_help(keyw), :width => @opts[:width])
|
180
|
+
if data
|
181
|
+
puts data
|
182
|
+
elsif (new_keyw = FastRI::Util.change_query_method_type(keyw)) != keyw
|
183
|
+
puts @ri.__send__(what, new_keyw, :width => @opts[:width])
|
184
|
+
end
|
185
|
+
puts "RI_EMACS_END_OF_INFO"
|
186
|
+
end
|
187
|
+
|
188
|
+
def display_args(keyw)
|
189
|
+
display_ :args, keyw
|
190
|
+
end
|
191
|
+
|
192
|
+
def display_info(keyw)
|
193
|
+
display_ :info, keyw
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
#{{{ event loop
|
199
|
+
#$stdout.sync = true # better not set sync=true, causes problems with emacs
|
200
|
+
EventLoop.new(service, options).run
|
201
|
+
|
202
|
+
# vi: set sw=2 expandtab:
|
data/fastri.gemspec
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{fastri}
|
5
|
+
s.version = "0.3.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Mauricio Fernandez", "Lee Hinman"]
|
9
|
+
s.date = %q{2009-07-17}
|
10
|
+
s.default_executable = %q{fri}
|
11
|
+
s.description = %q{Fastri is RI, fast.}
|
12
|
+
s.email = %q{lee@writequit.org}
|
13
|
+
s.executables = ["fastri-server", "fri", "ri-emacs"]
|
14
|
+
s.extra_rdoc_files = [
|
15
|
+
"README.en"
|
16
|
+
]
|
17
|
+
s.files = [
|
18
|
+
"fastri.gemspec",
|
19
|
+
"Rakefile",
|
20
|
+
"README.en",
|
21
|
+
"THANKS",
|
22
|
+
"CHANGES",
|
23
|
+
"COPYING",
|
24
|
+
"LEGAL",
|
25
|
+
"LICENSE",
|
26
|
+
"bin/fastri-server",
|
27
|
+
"bin/fri",
|
28
|
+
"bin/ri-emacs",
|
29
|
+
"lib/fastri/full_text_index.rb",
|
30
|
+
"lib/fastri/full_text_indexer.rb",
|
31
|
+
"lib/fastri/name_descriptor.rb",
|
32
|
+
"lib/fastri/ri_index.rb",
|
33
|
+
"lib/fastri/ri_service.rb",
|
34
|
+
"lib/fastri/util.rb",
|
35
|
+
"lib/fastri/version.rb",
|
36
|
+
"indexer.rb",
|
37
|
+
"lookup.rb",
|
38
|
+
"pre-install.rb",
|
39
|
+
"setup.rb",
|
40
|
+
"indexer.rb",
|
41
|
+
"test/test_full_text_index.rb",
|
42
|
+
"test/test_full_text_indexer.rb",
|
43
|
+
"test/test_functional_ri_service.rb",
|
44
|
+
"test/test_integration_full_text_index.rb",
|
45
|
+
"test/test_name_descriptor.rb",
|
46
|
+
"test/test_ri_index.rb",
|
47
|
+
"test/test_util.rb"
|
48
|
+
]
|
49
|
+
s.homepage = %q{http://github.com/dakrone/fastri}
|
50
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
51
|
+
s.require_paths = ["lib"]
|
52
|
+
s.rubygems_version = %q{0.3.1}
|
53
|
+
s.summary = %q{Fastri is RI, fast.}
|
54
|
+
|
55
|
+
if s.respond_to? :specification_version then
|
56
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
57
|
+
s.specification_version = 3
|
58
|
+
|
59
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
60
|
+
else
|
61
|
+
end
|
62
|
+
else
|
63
|
+
end
|
64
|
+
end
|
data/indexer.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
|
3
|
+
#
|
4
|
+
# Full-text indexing of the RI documentation.
|
5
|
+
# This is the proof of concept that evolved into FastRI's full-text searching
|
6
|
+
# subsystem.
|
7
|
+
|
8
|
+
class IndexBuilder
|
9
|
+
MAXWORD_SIZE = 20
|
10
|
+
def initialize(fulltext_file, index_file)
|
11
|
+
@fulltext_file = fulltext_file
|
12
|
+
@index_file = index_file
|
13
|
+
@fulltext = ""
|
14
|
+
end
|
15
|
+
|
16
|
+
def add_document(name, contents)
|
17
|
+
@fulltext << preprocess(contents)
|
18
|
+
@fulltext << "\0#{name}\0"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'strscan'
|
22
|
+
require 'enumerator'
|
23
|
+
def finish
|
24
|
+
File.open(@fulltext_file, "w"){|f| f.puts @fulltext }
|
25
|
+
scanner = StringScanner.new(@fulltext)
|
26
|
+
|
27
|
+
count = 0
|
28
|
+
suffixes = []
|
29
|
+
until scanner.eos?
|
30
|
+
count += 1
|
31
|
+
if count == 100
|
32
|
+
print "%3d%%\r" % (100 * scanner.pos / @fulltext.size)
|
33
|
+
$stdout.flush
|
34
|
+
count = 0
|
35
|
+
end
|
36
|
+
start = scanner.pos
|
37
|
+
text = scanner.scan_until(/\0.*?\0/)
|
38
|
+
text = text.sub(/\0.*?\0$/,"")
|
39
|
+
suffixes.concat find_suffixes(text, start)
|
40
|
+
scanner.terminate if !text
|
41
|
+
end
|
42
|
+
puts "Suffixes: #{suffixes.size}"
|
43
|
+
t0 = Time.new
|
44
|
+
sorted = suffixes.sort_by{|x| @fulltext[x,MAXWORD_SIZE]}
|
45
|
+
File.open(@index_file, "w") do |f|
|
46
|
+
sorted.each_slice(10000){|x| f.write x.pack("V*")}
|
47
|
+
end
|
48
|
+
File.open("suffixes", "w"){|f| sorted.each{|i| f.puts @fulltext[i,MAXWORD_SIZE].inspect}}
|
49
|
+
puts "Processed in #{Time.new - t0} seconds"
|
50
|
+
end
|
51
|
+
|
52
|
+
require 'strscan'
|
53
|
+
def find_suffixes(string, offset)
|
54
|
+
suffixes = []
|
55
|
+
sc = StringScanner.new(string)
|
56
|
+
until sc.eos?
|
57
|
+
sc.skip(/([^A-Za-z_]|\n)*/)
|
58
|
+
len = string.size
|
59
|
+
loop do
|
60
|
+
break if sc.pos == len
|
61
|
+
suffixes << offset + sc.pos
|
62
|
+
break unless sc.skip(/[A-Za-z0-9_]+([^A-Za-z0-9_]|\n)*/)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
suffixes
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def preprocess(str)
|
70
|
+
str.gsub(/\0/,"")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def linearize(comment)
|
75
|
+
case s = comment["body"]
|
76
|
+
when String; s
|
77
|
+
else
|
78
|
+
if Array === (y = comment["contents"])
|
79
|
+
y.map{|z| linearize(z)}.join("\n")
|
80
|
+
elsif s = comment["text"]
|
81
|
+
s
|
82
|
+
else
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
require 'rdoc/ri/ri_paths'
|
89
|
+
require 'yaml'
|
90
|
+
$:.unshift "lib"
|
91
|
+
require 'fastri/util'
|
92
|
+
|
93
|
+
#paths = RI::Paths::PATH
|
94
|
+
gem_paths = FastRI::Util.gem_directories_unique.map{|_,_,path| path}
|
95
|
+
paths = [ RI::Paths::SYSDIR, RI::Paths::SITEDIR, RI::Paths::HOMEDIR ].find_all do |p|
|
96
|
+
p && File.directory?(p)
|
97
|
+
end
|
98
|
+
paths.concat gem_paths
|
99
|
+
indexer = IndexBuilder.new("test_FULLTEXT", "test_INDEX")
|
100
|
+
bad = 0
|
101
|
+
paths.each do |path|
|
102
|
+
Dir["#{path}/**/*.yaml"].each do |yamlfile|
|
103
|
+
yaml = File.read(yamlfile)
|
104
|
+
begin
|
105
|
+
data = YAML.load(yaml.gsub(/ \!.*/, ''))
|
106
|
+
rescue Exception
|
107
|
+
bad += 1
|
108
|
+
puts "Couldn't load #{yamlfile}"
|
109
|
+
#puts "=" * 80
|
110
|
+
#puts yaml
|
111
|
+
next
|
112
|
+
end
|
113
|
+
|
114
|
+
desc = (data['comment']||[]).map{|x| linearize(x)}.join("\n")
|
115
|
+
desc.gsub!(/<\/?(em|b|tt|ul|ol|table)>/, "")
|
116
|
+
desc.gsub!(/"/, "'")
|
117
|
+
desc.gsub!(/</, "<")
|
118
|
+
desc.gsub!(/>/, ">")
|
119
|
+
desc.gsub!(/&/, "&")
|
120
|
+
=begin
|
121
|
+
puts "=" * 80
|
122
|
+
puts yamlfile
|
123
|
+
puts "-" * 80
|
124
|
+
puts yaml
|
125
|
+
puts "-" * 80
|
126
|
+
puts desc
|
127
|
+
$stdin.gets
|
128
|
+
=end
|
129
|
+
unless desc.empty?
|
130
|
+
indexer.add_document(yamlfile, desc)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
puts "BAD files: #{bad}"
|
135
|
+
indexer.finish
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
|
2
|
+
#
|
3
|
+
|
4
|
+
require 'fastri/full_text_indexer'
|
5
|
+
require 'stringio'
|
6
|
+
|
7
|
+
module FastRI
|
8
|
+
|
9
|
+
class FullTextIndex
|
10
|
+
MAX_QUERY_SIZE = 20
|
11
|
+
MAX_REGEXP_MATCH_SIZE = 255
|
12
|
+
class Result
|
13
|
+
attr_reader :path, :query, :index, :metadata
|
14
|
+
|
15
|
+
def initialize(searcher, query, index, path, metadata)
|
16
|
+
@searcher = searcher
|
17
|
+
@index = index
|
18
|
+
@query = query
|
19
|
+
@path = path
|
20
|
+
@metadata = metadata
|
21
|
+
end
|
22
|
+
|
23
|
+
def context(size)
|
24
|
+
@searcher.fetch_data(@index, 2*size+1, -size)
|
25
|
+
end
|
26
|
+
|
27
|
+
def text(size)
|
28
|
+
@searcher.fetch_data(@index, size, 0)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class << self; private :new end
|
33
|
+
|
34
|
+
DEFAULT_OPTIONS = {
|
35
|
+
:max_query_size => MAX_QUERY_SIZE,
|
36
|
+
}
|
37
|
+
|
38
|
+
def self.new_from_ios(fulltext_IO, suffix_arrray_IO, options = {})
|
39
|
+
new(:io, fulltext_IO, suffix_arrray_IO, options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.new_from_filenames(fulltext_fname, suffix_arrray_fname, options = {})
|
43
|
+
new(:filenames, fulltext_fname, suffix_arrray_fname, options)
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_reader :max_query_size
|
47
|
+
def initialize(type, fulltext, sarray, options)
|
48
|
+
options = DEFAULT_OPTIONS.merge(options)
|
49
|
+
case type
|
50
|
+
when :io
|
51
|
+
@fulltext_IO = fulltext
|
52
|
+
@sarray_IO = sarray
|
53
|
+
when :filenames
|
54
|
+
@fulltext_fname = fulltext
|
55
|
+
@sarray_fname = sarray
|
56
|
+
else raise "Unknown type"
|
57
|
+
end
|
58
|
+
@type = type
|
59
|
+
@max_query_size = options[:max_query_size]
|
60
|
+
check_magic
|
61
|
+
end
|
62
|
+
|
63
|
+
def lookup(term)
|
64
|
+
get_fulltext_IO do |fulltextIO|
|
65
|
+
get_sarray_IO do |sarrayIO|
|
66
|
+
case sarrayIO
|
67
|
+
when StringIO
|
68
|
+
num_suffixes = sarrayIO.string.size / 4 - 1
|
69
|
+
else
|
70
|
+
num_suffixes = sarrayIO.stat.size / 4 - 1
|
71
|
+
end
|
72
|
+
|
73
|
+
index, offset = binary_search(sarrayIO, fulltextIO, term, 0, num_suffixes)
|
74
|
+
if offset
|
75
|
+
fulltextIO.pos = offset
|
76
|
+
path, metadata = find_metadata(fulltextIO)
|
77
|
+
return Result.new(self, term, index, path, metadata) if path
|
78
|
+
else
|
79
|
+
nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def next_match(result, term_or_regexp = "")
|
86
|
+
case term_or_regexp
|
87
|
+
when String; size = [result.query.size, term_or_regexp.size].max
|
88
|
+
when Regexp; size = MAX_REGEXP_MATCH_SIZE
|
89
|
+
end
|
90
|
+
get_fulltext_IO do |fulltextIO|
|
91
|
+
get_sarray_IO do |sarrayIO|
|
92
|
+
idx = result.index
|
93
|
+
loop do
|
94
|
+
idx += 1
|
95
|
+
str = get_string(sarrayIO, fulltextIO, idx, size)
|
96
|
+
upto = str.index("\0")
|
97
|
+
str = str[0, upto] if upto
|
98
|
+
break unless str.index(result.query) == 0
|
99
|
+
if str[term_or_regexp]
|
100
|
+
fulltextIO.pos = index_to_offset(sarrayIO, idx)
|
101
|
+
path, metadata = find_metadata(fulltextIO)
|
102
|
+
return Result.new(self, result.query, idx, path, metadata) if path
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def next_matches(result, term_or_regexp = "")
|
110
|
+
case term_or_regexp
|
111
|
+
when String; size = [result.query.size, term_or_regexp.size].max
|
112
|
+
when Regexp; size = MAX_REGEXP_MATCH_SIZE
|
113
|
+
end
|
114
|
+
ret = []
|
115
|
+
get_fulltext_IO do |fulltextIO|
|
116
|
+
get_sarray_IO do |sarrayIO|
|
117
|
+
idx = result.index
|
118
|
+
loop do
|
119
|
+
idx += 1
|
120
|
+
str = get_string(sarrayIO, fulltextIO, idx, size)
|
121
|
+
upto = str.index("\0")
|
122
|
+
str = str[0, upto] if upto
|
123
|
+
break unless str.index(result.query) == 0
|
124
|
+
if str[term_or_regexp]
|
125
|
+
fulltextIO.pos = index_to_offset(sarrayIO, idx)
|
126
|
+
path, metadata = find_metadata(fulltextIO)
|
127
|
+
ret << Result.new(self, result.query, idx, path, metadata) if path
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
ret
|
134
|
+
end
|
135
|
+
|
136
|
+
def fetch_data(index, size, offset = 0)
|
137
|
+
raise "Bad offset" unless offset <= 0
|
138
|
+
get_fulltext_IO do |fulltextIO|
|
139
|
+
get_sarray_IO do |sarrayIO|
|
140
|
+
base = index_to_offset(sarrayIO, index)
|
141
|
+
actual_offset = offset
|
142
|
+
newsize = size
|
143
|
+
if base + offset < 0 # at the beginning
|
144
|
+
excess = (base + offset).abs # remember offset is < 0
|
145
|
+
newsize = size - excess
|
146
|
+
actual_offset = offset + excess
|
147
|
+
end
|
148
|
+
str = get_string(sarrayIO, fulltextIO, index, newsize, offset)
|
149
|
+
from = (str.rindex("\0", -actual_offset) || -1) + 1
|
150
|
+
to = (str.index("\0", -actual_offset) || 0) - 1
|
151
|
+
str[from..to]
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
def check_magic
|
158
|
+
get_fulltext_IO do |io|
|
159
|
+
io.rewind
|
160
|
+
header = io.read(FullTextIndexer::MAGIC.size)
|
161
|
+
raise "Unsupported index format." unless header
|
162
|
+
version = header[/\d+\.\d+\.\d+/]
|
163
|
+
raise "Unsupported index format." unless version
|
164
|
+
major, minor, teeny = version.scan(/\d+/)
|
165
|
+
if major != FASTRI_FT_INDEX_FORMAT_MAJOR or
|
166
|
+
minor > FASTRI_FT_INDEX_FORMAT_MINOR
|
167
|
+
raise "Unsupported index format"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def get_fulltext_IO
|
173
|
+
case @type
|
174
|
+
when :io; yield @fulltext_IO
|
175
|
+
when :filenames
|
176
|
+
File.open(@fulltext_fname, "rb"){|f| yield f}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def get_sarray_IO
|
181
|
+
case @type
|
182
|
+
when :io; yield @sarray_IO
|
183
|
+
when :filenames
|
184
|
+
File.open(@sarray_fname, "rb"){|f| yield f}
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def index_to_offset(sarrayIO, index)
|
189
|
+
sarrayIO.pos = index * 4
|
190
|
+
sarrayIO.read(4).unpack("V")[0]
|
191
|
+
end
|
192
|
+
|
193
|
+
def find_metadata(fulltextIO)
|
194
|
+
oldtext = ""
|
195
|
+
loop do
|
196
|
+
text = fulltextIO.read(4096)
|
197
|
+
break unless text
|
198
|
+
if idx = text.index("\0")
|
199
|
+
if idx + 4 >= text.size
|
200
|
+
text.concat(fulltextIO.read(4096))
|
201
|
+
end
|
202
|
+
len = text[idx+1, 4].unpack("V")[0]
|
203
|
+
missing = idx + 5 + len - text.size
|
204
|
+
if missing > 0
|
205
|
+
text.concat(fulltextIO.read(missing))
|
206
|
+
end
|
207
|
+
footer = text[idx + 5, len - 1]
|
208
|
+
path, metadata = /(.*?)\0(.*)/m.match(footer).captures
|
209
|
+
return [path, Marshal.load(metadata)]
|
210
|
+
end
|
211
|
+
oldtext = text
|
212
|
+
end
|
213
|
+
nil
|
214
|
+
end
|
215
|
+
|
216
|
+
def get_string(sarrayIO, fulltextIO, index, size, off = 0)
|
217
|
+
sarrayIO.pos = index * 4
|
218
|
+
offset = sarrayIO.read(4).unpack("V")[0]
|
219
|
+
fulltextIO.pos = [offset + off, 0].max
|
220
|
+
fulltextIO.read(size)
|
221
|
+
end
|
222
|
+
|
223
|
+
def binary_search(sarrayIO, fulltextIO, term, from, to)
|
224
|
+
#puts "BINARY #{from} -- #{to}"
|
225
|
+
#left = get_string(sarrayIO, fulltextIO, from, @max_query_size)
|
226
|
+
#right = get_string(sarrayIO, fulltextIO, to, @max_query_size)
|
227
|
+
#puts " #{left.inspect} -- #{right.inspect}"
|
228
|
+
middle = (from + to) / 2
|
229
|
+
pivot = get_string(sarrayIO, fulltextIO, middle, @max_query_size)
|
230
|
+
if from == to
|
231
|
+
if pivot.index(term) == 0
|
232
|
+
sarrayIO.pos = middle * 4
|
233
|
+
[middle, sarrayIO.read(4).unpack("V")[0]]
|
234
|
+
else
|
235
|
+
nil
|
236
|
+
end
|
237
|
+
elsif term <= pivot
|
238
|
+
binary_search(sarrayIO, fulltextIO, term, from, middle)
|
239
|
+
elsif term > pivot
|
240
|
+
binary_search(sarrayIO, fulltextIO, term, middle+1, to)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end # class FullTextIndex
|
244
|
+
|
245
|
+
end # module FastRI
|