dakrone-fastri 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +61 -0
- data/COPYING +340 -0
- data/LEGAL +4 -0
- data/LICENSE +56 -0
- data/README.en +102 -0
- data/Rakefile +26 -0
- data/THANKS +36 -0
- data/bin/fastri-server +251 -0
- data/bin/fri +353 -0
- data/bin/ri-emacs +202 -0
- data/fastri.gemspec +64 -0
- data/indexer.rb +135 -0
- data/lib/fastri/full_text_index.rb +245 -0
- data/lib/fastri/full_text_indexer.rb +100 -0
- data/lib/fastri/name_descriptor.rb +71 -0
- data/lib/fastri/ri_index.rb +601 -0
- data/lib/fastri/ri_service.rb +430 -0
- data/lib/fastri/util.rb +183 -0
- data/lib/fastri/version.rb +13 -0
- data/lookup.rb +197 -0
- data/pre-install.rb +11 -0
- data/setup.rb +1585 -0
- data/test/test_full_text_index.rb +182 -0
- data/test/test_full_text_indexer.rb +84 -0
- data/test/test_functional_ri_service.rb +60 -0
- data/test/test_integration_full_text_index.rb +43 -0
- data/test/test_name_descriptor.rb +35 -0
- data/test/test_ri_index.rb +389 -0
- data/test/test_util.rb +91 -0
- metadata +84 -0
data/bin/ri-emacs
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
## drop-in replacement for the ri-emacs helper script for use
|
3
|
+
# with ri-ruby.el, using the FastRI service via DRb
|
4
|
+
#
|
5
|
+
# Based on ri-emacs.rb by Kristof Bastiaensen <kristof@vleeuwen.org>
|
6
|
+
#
|
7
|
+
# Copyright (C) 2004,2006 Kristof Bastiaensen
|
8
|
+
# 2006 Mauricio Fernandez <mfp@acm.org>
|
9
|
+
#
|
10
|
+
# This program is free software; you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU General Public License as published by
|
12
|
+
# the Free Software Foundation; either version 2 of the License, or
|
13
|
+
# (at your option) any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU General Public License
|
21
|
+
# along with this program; if not, write to the Free Software
|
22
|
+
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
23
|
+
#----------------------------------------------------------------------
|
24
|
+
|
25
|
+
require 'rinda/ring'
|
26
|
+
require 'optparse'
|
27
|
+
require 'fastri/util'
|
28
|
+
|
29
|
+
# {{{ cmdline parsing and service discovery
|
30
|
+
# we bind to 127.0.0.1 by default, because otherwise Ruby will try with
|
31
|
+
# 0.0.0.0, which results in a DNS request, adding way too much latency
|
32
|
+
options = {:addr => "127.0.0.1", :width => ENV['RI_EMACS_COLUMNS'] ? ENV['RI_EMACS_COLUMNS'].to_i : 72}
|
33
|
+
override_addr_env = false
|
34
|
+
optparser = OptionParser.new do |opts|
|
35
|
+
opts.banner = "Usage: ri-emacs [options] <query>"
|
36
|
+
|
37
|
+
opts.on("-s", "--bind [ADDR]", "Bind to ADDR for incoming DRb connections.",
|
38
|
+
"(default: 127.0.0.1)") do |addr|
|
39
|
+
options[:addr] = addr
|
40
|
+
override_addr_env = true
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on("-w", "--width WIDTH", "Set the width of the output.") do |width|
|
44
|
+
options[:width] = width
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on("-h", "--help", "Show this help message") do
|
48
|
+
puts opts
|
49
|
+
exit
|
50
|
+
end
|
51
|
+
end
|
52
|
+
optparser.parse!
|
53
|
+
|
54
|
+
if override_addr_env
|
55
|
+
addr = "druby://#{options[:addr]}:0"
|
56
|
+
else
|
57
|
+
addr = "druby://#{ENV["FASTRI_ADDR"]||options[:addr]}:0"
|
58
|
+
end
|
59
|
+
|
60
|
+
begin
|
61
|
+
DRb.start_service(addr)
|
62
|
+
ring_server = Rinda::RingFinger.primary
|
63
|
+
rescue Exception
|
64
|
+
puts <<EOF
|
65
|
+
Couldn't initialize DRb and locate the Ring server.
|
66
|
+
|
67
|
+
Please make sure that:
|
68
|
+
* the fastri-server is running, the server is bound to the correct interface,
|
69
|
+
and the ACL setup allows connections from this host
|
70
|
+
* fri is using the correct interface for incoming DRb requests:
|
71
|
+
either set the FASTRI_ADDR environment variable, or use --bind ADDR, e.g
|
72
|
+
export FASTRI_ADDR="192.168.1.12"
|
73
|
+
fri Array
|
74
|
+
EOF
|
75
|
+
exit(-1) # '
|
76
|
+
end
|
77
|
+
service = ring_server.read([:name, :FastRI, nil, nil])[2]
|
78
|
+
|
79
|
+
class EventLoop
|
80
|
+
include FastRI::Util::MagicHelp
|
81
|
+
|
82
|
+
def initialize(ri, options)
|
83
|
+
@ri = ri
|
84
|
+
@opts = options
|
85
|
+
end
|
86
|
+
|
87
|
+
def run
|
88
|
+
puts "READY"
|
89
|
+
loop do
|
90
|
+
line = $stdin.gets
|
91
|
+
cmd, p = /(\w+)(.*)$/.match(line)[1..2]
|
92
|
+
p.strip!
|
93
|
+
case cmd
|
94
|
+
when "TRY_COMPLETION"; puts complete_try(p)
|
95
|
+
when "COMPLETE_ALL"; puts complete_all(p)
|
96
|
+
when "LAMBDA"; puts complete_lambda(p)
|
97
|
+
when "CLASS_LIST"; puts class_list(p)
|
98
|
+
when "CLASS_LIST_WITH_FLAG"; puts class_list_with_flag(p)
|
99
|
+
when "DISPLAY_ARGS"; display_args(p)
|
100
|
+
when "DISPLAY_INFO"; display_info(p)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def complete_try(keyw)
|
106
|
+
list = @ri.completion_list(keyw)
|
107
|
+
if list.nil?
|
108
|
+
return "nil"
|
109
|
+
elsif list.size == 1 and
|
110
|
+
list[0].split(/(::)|#|\./) == keyw.split(/(::)|#|\./)
|
111
|
+
return "t"
|
112
|
+
end
|
113
|
+
|
114
|
+
first = list.shift;
|
115
|
+
if first =~ /(.*)((?:::)|(?:#))(.*)/
|
116
|
+
other = $1 + ($2 == "::" ? "#" : "::") + $3
|
117
|
+
end
|
118
|
+
|
119
|
+
len = first.size
|
120
|
+
match_both = false
|
121
|
+
list.each do |w|
|
122
|
+
while w[0, len] != first[0, len]
|
123
|
+
if other and w[0, len] == other[0, len]
|
124
|
+
match_both = true
|
125
|
+
break
|
126
|
+
end
|
127
|
+
len -= 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
if match_both
|
132
|
+
return other.sub(/(.*)((?:::)|(?:#))/) { $1 + "." }[0, len].inspect
|
133
|
+
else
|
134
|
+
return first[0, len].inspect
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def complete_all(keyw)
|
139
|
+
list = @ri.completion_list(keyw)
|
140
|
+
if list.nil?
|
141
|
+
"nil"
|
142
|
+
else
|
143
|
+
"(" + list.map { |w| w.inspect }.join(" ") + ")"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def complete_lambda(keyw)
|
148
|
+
list = @ri.completion_list(keyw)
|
149
|
+
if list.nil?
|
150
|
+
"nil"
|
151
|
+
else
|
152
|
+
if list.find { |n| n.split(/(::)|#|\./) == keyw.split(/(::)|#|\./) }
|
153
|
+
"t"
|
154
|
+
else
|
155
|
+
"nil"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def class_list(keyw)
|
161
|
+
list = @ri.class_list(keyw)
|
162
|
+
if list
|
163
|
+
"(" + list.map{|x| "(#{x.inspect})"}.join(" ") + ")"
|
164
|
+
else
|
165
|
+
"nil"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def class_list_with_flag(keyw)
|
170
|
+
list = @ri.class_list_with_flag(keyw)
|
171
|
+
if list
|
172
|
+
"(" + list.map{|x| "(#{x.inspect})"}.join(" ") + ")"
|
173
|
+
else
|
174
|
+
"nil"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def display_(what, keyw)
|
179
|
+
data = @ri.__send__(what, magic_help(keyw), :width => @opts[:width])
|
180
|
+
if data
|
181
|
+
puts data
|
182
|
+
elsif (new_keyw = FastRI::Util.change_query_method_type(keyw)) != keyw
|
183
|
+
puts @ri.__send__(what, new_keyw, :width => @opts[:width])
|
184
|
+
end
|
185
|
+
puts "RI_EMACS_END_OF_INFO"
|
186
|
+
end
|
187
|
+
|
188
|
+
def display_args(keyw)
|
189
|
+
display_ :args, keyw
|
190
|
+
end
|
191
|
+
|
192
|
+
def display_info(keyw)
|
193
|
+
display_ :info, keyw
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
#{{{ event loop
|
199
|
+
#$stdout.sync = true # better not set sync=true, causes problems with emacs
|
200
|
+
EventLoop.new(service, options).run
|
201
|
+
|
202
|
+
# vi: set sw=2 expandtab:
|
data/fastri.gemspec
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{fastri}
|
5
|
+
s.version = "0.3.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Mauricio Fernandez", "Lee Hinman"]
|
9
|
+
s.date = %q{2009-07-17}
|
10
|
+
s.default_executable = %q{fri}
|
11
|
+
s.description = %q{Fastri is RI, fast.}
|
12
|
+
s.email = %q{lee@writequit.org}
|
13
|
+
s.executables = ["fastri-server", "fri", "ri-emacs"]
|
14
|
+
s.extra_rdoc_files = [
|
15
|
+
"README.en"
|
16
|
+
]
|
17
|
+
s.files = [
|
18
|
+
"fastri.gemspec",
|
19
|
+
"Rakefile",
|
20
|
+
"README.en",
|
21
|
+
"THANKS",
|
22
|
+
"CHANGES",
|
23
|
+
"COPYING",
|
24
|
+
"LEGAL",
|
25
|
+
"LICENSE",
|
26
|
+
"bin/fastri-server",
|
27
|
+
"bin/fri",
|
28
|
+
"bin/ri-emacs",
|
29
|
+
"lib/fastri/full_text_index.rb",
|
30
|
+
"lib/fastri/full_text_indexer.rb",
|
31
|
+
"lib/fastri/name_descriptor.rb",
|
32
|
+
"lib/fastri/ri_index.rb",
|
33
|
+
"lib/fastri/ri_service.rb",
|
34
|
+
"lib/fastri/util.rb",
|
35
|
+
"lib/fastri/version.rb",
|
36
|
+
"indexer.rb",
|
37
|
+
"lookup.rb",
|
38
|
+
"pre-install.rb",
|
39
|
+
"setup.rb",
|
40
|
+
"indexer.rb",
|
41
|
+
"test/test_full_text_index.rb",
|
42
|
+
"test/test_full_text_indexer.rb",
|
43
|
+
"test/test_functional_ri_service.rb",
|
44
|
+
"test/test_integration_full_text_index.rb",
|
45
|
+
"test/test_name_descriptor.rb",
|
46
|
+
"test/test_ri_index.rb",
|
47
|
+
"test/test_util.rb"
|
48
|
+
]
|
49
|
+
s.homepage = %q{http://github.com/dakrone/fastri}
|
50
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
51
|
+
s.require_paths = ["lib"]
|
52
|
+
s.rubygems_version = %q{0.3.1}
|
53
|
+
s.summary = %q{Fastri is RI, fast.}
|
54
|
+
|
55
|
+
if s.respond_to? :specification_version then
|
56
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
57
|
+
s.specification_version = 3
|
58
|
+
|
59
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
60
|
+
else
|
61
|
+
end
|
62
|
+
else
|
63
|
+
end
|
64
|
+
end
|
data/indexer.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
|
3
|
+
#
|
4
|
+
# Full-text indexing of the RI documentation.
|
5
|
+
# This is the proof of concept that evolved into FastRI's full-text searching
|
6
|
+
# subsystem.
|
7
|
+
|
8
|
+
class IndexBuilder
|
9
|
+
MAXWORD_SIZE = 20
|
10
|
+
def initialize(fulltext_file, index_file)
|
11
|
+
@fulltext_file = fulltext_file
|
12
|
+
@index_file = index_file
|
13
|
+
@fulltext = ""
|
14
|
+
end
|
15
|
+
|
16
|
+
def add_document(name, contents)
|
17
|
+
@fulltext << preprocess(contents)
|
18
|
+
@fulltext << "\0#{name}\0"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'strscan'
|
22
|
+
require 'enumerator'
|
23
|
+
def finish
|
24
|
+
File.open(@fulltext_file, "w"){|f| f.puts @fulltext }
|
25
|
+
scanner = StringScanner.new(@fulltext)
|
26
|
+
|
27
|
+
count = 0
|
28
|
+
suffixes = []
|
29
|
+
until scanner.eos?
|
30
|
+
count += 1
|
31
|
+
if count == 100
|
32
|
+
print "%3d%%\r" % (100 * scanner.pos / @fulltext.size)
|
33
|
+
$stdout.flush
|
34
|
+
count = 0
|
35
|
+
end
|
36
|
+
start = scanner.pos
|
37
|
+
text = scanner.scan_until(/\0.*?\0/)
|
38
|
+
text = text.sub(/\0.*?\0$/,"")
|
39
|
+
suffixes.concat find_suffixes(text, start)
|
40
|
+
scanner.terminate if !text
|
41
|
+
end
|
42
|
+
puts "Suffixes: #{suffixes.size}"
|
43
|
+
t0 = Time.new
|
44
|
+
sorted = suffixes.sort_by{|x| @fulltext[x,MAXWORD_SIZE]}
|
45
|
+
File.open(@index_file, "w") do |f|
|
46
|
+
sorted.each_slice(10000){|x| f.write x.pack("V*")}
|
47
|
+
end
|
48
|
+
File.open("suffixes", "w"){|f| sorted.each{|i| f.puts @fulltext[i,MAXWORD_SIZE].inspect}}
|
49
|
+
puts "Processed in #{Time.new - t0} seconds"
|
50
|
+
end
|
51
|
+
|
52
|
+
require 'strscan'
|
53
|
+
def find_suffixes(string, offset)
|
54
|
+
suffixes = []
|
55
|
+
sc = StringScanner.new(string)
|
56
|
+
until sc.eos?
|
57
|
+
sc.skip(/([^A-Za-z_]|\n)*/)
|
58
|
+
len = string.size
|
59
|
+
loop do
|
60
|
+
break if sc.pos == len
|
61
|
+
suffixes << offset + sc.pos
|
62
|
+
break unless sc.skip(/[A-Za-z0-9_]+([^A-Za-z0-9_]|\n)*/)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
suffixes
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def preprocess(str)
|
70
|
+
str.gsub(/\0/,"")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def linearize(comment)
|
75
|
+
case s = comment["body"]
|
76
|
+
when String; s
|
77
|
+
else
|
78
|
+
if Array === (y = comment["contents"])
|
79
|
+
y.map{|z| linearize(z)}.join("\n")
|
80
|
+
elsif s = comment["text"]
|
81
|
+
s
|
82
|
+
else
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
require 'rdoc/ri/ri_paths'
|
89
|
+
require 'yaml'
|
90
|
+
$:.unshift "lib"
|
91
|
+
require 'fastri/util'
|
92
|
+
|
93
|
+
#paths = RI::Paths::PATH
|
94
|
+
gem_paths = FastRI::Util.gem_directories_unique.map{|_,_,path| path}
|
95
|
+
paths = [ RI::Paths::SYSDIR, RI::Paths::SITEDIR, RI::Paths::HOMEDIR ].find_all do |p|
|
96
|
+
p && File.directory?(p)
|
97
|
+
end
|
98
|
+
paths.concat gem_paths
|
99
|
+
indexer = IndexBuilder.new("test_FULLTEXT", "test_INDEX")
|
100
|
+
bad = 0
|
101
|
+
paths.each do |path|
|
102
|
+
Dir["#{path}/**/*.yaml"].each do |yamlfile|
|
103
|
+
yaml = File.read(yamlfile)
|
104
|
+
begin
|
105
|
+
data = YAML.load(yaml.gsub(/ \!.*/, ''))
|
106
|
+
rescue Exception
|
107
|
+
bad += 1
|
108
|
+
puts "Couldn't load #{yamlfile}"
|
109
|
+
#puts "=" * 80
|
110
|
+
#puts yaml
|
111
|
+
next
|
112
|
+
end
|
113
|
+
|
114
|
+
desc = (data['comment']||[]).map{|x| linearize(x)}.join("\n")
|
115
|
+
desc.gsub!(/<\/?(em|b|tt|ul|ol|table)>/, "")
|
116
|
+
desc.gsub!(/"/, "'")
|
117
|
+
desc.gsub!(/</, "<")
|
118
|
+
desc.gsub!(/>/, ">")
|
119
|
+
desc.gsub!(/&/, "&")
|
120
|
+
=begin
|
121
|
+
puts "=" * 80
|
122
|
+
puts yamlfile
|
123
|
+
puts "-" * 80
|
124
|
+
puts yaml
|
125
|
+
puts "-" * 80
|
126
|
+
puts desc
|
127
|
+
$stdin.gets
|
128
|
+
=end
|
129
|
+
unless desc.empty?
|
130
|
+
indexer.add_document(yamlfile, desc)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
puts "BAD files: #{bad}"
|
135
|
+
indexer.finish
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
|
2
|
+
#
|
3
|
+
|
4
|
+
require 'fastri/full_text_indexer'
|
5
|
+
require 'stringio'
|
6
|
+
|
7
|
+
module FastRI
|
8
|
+
|
9
|
+
class FullTextIndex
|
10
|
+
MAX_QUERY_SIZE = 20
|
11
|
+
MAX_REGEXP_MATCH_SIZE = 255
|
12
|
+
class Result
|
13
|
+
attr_reader :path, :query, :index, :metadata
|
14
|
+
|
15
|
+
def initialize(searcher, query, index, path, metadata)
|
16
|
+
@searcher = searcher
|
17
|
+
@index = index
|
18
|
+
@query = query
|
19
|
+
@path = path
|
20
|
+
@metadata = metadata
|
21
|
+
end
|
22
|
+
|
23
|
+
def context(size)
|
24
|
+
@searcher.fetch_data(@index, 2*size+1, -size)
|
25
|
+
end
|
26
|
+
|
27
|
+
def text(size)
|
28
|
+
@searcher.fetch_data(@index, size, 0)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class << self; private :new end
|
33
|
+
|
34
|
+
DEFAULT_OPTIONS = {
|
35
|
+
:max_query_size => MAX_QUERY_SIZE,
|
36
|
+
}
|
37
|
+
|
38
|
+
def self.new_from_ios(fulltext_IO, suffix_arrray_IO, options = {})
|
39
|
+
new(:io, fulltext_IO, suffix_arrray_IO, options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.new_from_filenames(fulltext_fname, suffix_arrray_fname, options = {})
|
43
|
+
new(:filenames, fulltext_fname, suffix_arrray_fname, options)
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_reader :max_query_size
|
47
|
+
def initialize(type, fulltext, sarray, options)
|
48
|
+
options = DEFAULT_OPTIONS.merge(options)
|
49
|
+
case type
|
50
|
+
when :io
|
51
|
+
@fulltext_IO = fulltext
|
52
|
+
@sarray_IO = sarray
|
53
|
+
when :filenames
|
54
|
+
@fulltext_fname = fulltext
|
55
|
+
@sarray_fname = sarray
|
56
|
+
else raise "Unknown type"
|
57
|
+
end
|
58
|
+
@type = type
|
59
|
+
@max_query_size = options[:max_query_size]
|
60
|
+
check_magic
|
61
|
+
end
|
62
|
+
|
63
|
+
def lookup(term)
|
64
|
+
get_fulltext_IO do |fulltextIO|
|
65
|
+
get_sarray_IO do |sarrayIO|
|
66
|
+
case sarrayIO
|
67
|
+
when StringIO
|
68
|
+
num_suffixes = sarrayIO.string.size / 4 - 1
|
69
|
+
else
|
70
|
+
num_suffixes = sarrayIO.stat.size / 4 - 1
|
71
|
+
end
|
72
|
+
|
73
|
+
index, offset = binary_search(sarrayIO, fulltextIO, term, 0, num_suffixes)
|
74
|
+
if offset
|
75
|
+
fulltextIO.pos = offset
|
76
|
+
path, metadata = find_metadata(fulltextIO)
|
77
|
+
return Result.new(self, term, index, path, metadata) if path
|
78
|
+
else
|
79
|
+
nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def next_match(result, term_or_regexp = "")
|
86
|
+
case term_or_regexp
|
87
|
+
when String; size = [result.query.size, term_or_regexp.size].max
|
88
|
+
when Regexp; size = MAX_REGEXP_MATCH_SIZE
|
89
|
+
end
|
90
|
+
get_fulltext_IO do |fulltextIO|
|
91
|
+
get_sarray_IO do |sarrayIO|
|
92
|
+
idx = result.index
|
93
|
+
loop do
|
94
|
+
idx += 1
|
95
|
+
str = get_string(sarrayIO, fulltextIO, idx, size)
|
96
|
+
upto = str.index("\0")
|
97
|
+
str = str[0, upto] if upto
|
98
|
+
break unless str.index(result.query) == 0
|
99
|
+
if str[term_or_regexp]
|
100
|
+
fulltextIO.pos = index_to_offset(sarrayIO, idx)
|
101
|
+
path, metadata = find_metadata(fulltextIO)
|
102
|
+
return Result.new(self, result.query, idx, path, metadata) if path
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def next_matches(result, term_or_regexp = "")
|
110
|
+
case term_or_regexp
|
111
|
+
when String; size = [result.query.size, term_or_regexp.size].max
|
112
|
+
when Regexp; size = MAX_REGEXP_MATCH_SIZE
|
113
|
+
end
|
114
|
+
ret = []
|
115
|
+
get_fulltext_IO do |fulltextIO|
|
116
|
+
get_sarray_IO do |sarrayIO|
|
117
|
+
idx = result.index
|
118
|
+
loop do
|
119
|
+
idx += 1
|
120
|
+
str = get_string(sarrayIO, fulltextIO, idx, size)
|
121
|
+
upto = str.index("\0")
|
122
|
+
str = str[0, upto] if upto
|
123
|
+
break unless str.index(result.query) == 0
|
124
|
+
if str[term_or_regexp]
|
125
|
+
fulltextIO.pos = index_to_offset(sarrayIO, idx)
|
126
|
+
path, metadata = find_metadata(fulltextIO)
|
127
|
+
ret << Result.new(self, result.query, idx, path, metadata) if path
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
ret
|
134
|
+
end
|
135
|
+
|
136
|
+
def fetch_data(index, size, offset = 0)
|
137
|
+
raise "Bad offset" unless offset <= 0
|
138
|
+
get_fulltext_IO do |fulltextIO|
|
139
|
+
get_sarray_IO do |sarrayIO|
|
140
|
+
base = index_to_offset(sarrayIO, index)
|
141
|
+
actual_offset = offset
|
142
|
+
newsize = size
|
143
|
+
if base + offset < 0 # at the beginning
|
144
|
+
excess = (base + offset).abs # remember offset is < 0
|
145
|
+
newsize = size - excess
|
146
|
+
actual_offset = offset + excess
|
147
|
+
end
|
148
|
+
str = get_string(sarrayIO, fulltextIO, index, newsize, offset)
|
149
|
+
from = (str.rindex("\0", -actual_offset) || -1) + 1
|
150
|
+
to = (str.index("\0", -actual_offset) || 0) - 1
|
151
|
+
str[from..to]
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
def check_magic
|
158
|
+
get_fulltext_IO do |io|
|
159
|
+
io.rewind
|
160
|
+
header = io.read(FullTextIndexer::MAGIC.size)
|
161
|
+
raise "Unsupported index format." unless header
|
162
|
+
version = header[/\d+\.\d+\.\d+/]
|
163
|
+
raise "Unsupported index format." unless version
|
164
|
+
major, minor, teeny = version.scan(/\d+/)
|
165
|
+
if major != FASTRI_FT_INDEX_FORMAT_MAJOR or
|
166
|
+
minor > FASTRI_FT_INDEX_FORMAT_MINOR
|
167
|
+
raise "Unsupported index format"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def get_fulltext_IO
|
173
|
+
case @type
|
174
|
+
when :io; yield @fulltext_IO
|
175
|
+
when :filenames
|
176
|
+
File.open(@fulltext_fname, "rb"){|f| yield f}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def get_sarray_IO
|
181
|
+
case @type
|
182
|
+
when :io; yield @sarray_IO
|
183
|
+
when :filenames
|
184
|
+
File.open(@sarray_fname, "rb"){|f| yield f}
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def index_to_offset(sarrayIO, index)
|
189
|
+
sarrayIO.pos = index * 4
|
190
|
+
sarrayIO.read(4).unpack("V")[0]
|
191
|
+
end
|
192
|
+
|
193
|
+
def find_metadata(fulltextIO)
|
194
|
+
oldtext = ""
|
195
|
+
loop do
|
196
|
+
text = fulltextIO.read(4096)
|
197
|
+
break unless text
|
198
|
+
if idx = text.index("\0")
|
199
|
+
if idx + 4 >= text.size
|
200
|
+
text.concat(fulltextIO.read(4096))
|
201
|
+
end
|
202
|
+
len = text[idx+1, 4].unpack("V")[0]
|
203
|
+
missing = idx + 5 + len - text.size
|
204
|
+
if missing > 0
|
205
|
+
text.concat(fulltextIO.read(missing))
|
206
|
+
end
|
207
|
+
footer = text[idx + 5, len - 1]
|
208
|
+
path, metadata = /(.*?)\0(.*)/m.match(footer).captures
|
209
|
+
return [path, Marshal.load(metadata)]
|
210
|
+
end
|
211
|
+
oldtext = text
|
212
|
+
end
|
213
|
+
nil
|
214
|
+
end
|
215
|
+
|
216
|
+
def get_string(sarrayIO, fulltextIO, index, size, off = 0)
|
217
|
+
sarrayIO.pos = index * 4
|
218
|
+
offset = sarrayIO.read(4).unpack("V")[0]
|
219
|
+
fulltextIO.pos = [offset + off, 0].max
|
220
|
+
fulltextIO.read(size)
|
221
|
+
end
|
222
|
+
|
223
|
+
def binary_search(sarrayIO, fulltextIO, term, from, to)
|
224
|
+
#puts "BINARY #{from} -- #{to}"
|
225
|
+
#left = get_string(sarrayIO, fulltextIO, from, @max_query_size)
|
226
|
+
#right = get_string(sarrayIO, fulltextIO, to, @max_query_size)
|
227
|
+
#puts " #{left.inspect} -- #{right.inspect}"
|
228
|
+
middle = (from + to) / 2
|
229
|
+
pivot = get_string(sarrayIO, fulltextIO, middle, @max_query_size)
|
230
|
+
if from == to
|
231
|
+
if pivot.index(term) == 0
|
232
|
+
sarrayIO.pos = middle * 4
|
233
|
+
[middle, sarrayIO.read(4).unpack("V")[0]]
|
234
|
+
else
|
235
|
+
nil
|
236
|
+
end
|
237
|
+
elsif term <= pivot
|
238
|
+
binary_search(sarrayIO, fulltextIO, term, from, middle)
|
239
|
+
elsif term > pivot
|
240
|
+
binary_search(sarrayIO, fulltextIO, term, middle+1, to)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end # class FullTextIndex
|
244
|
+
|
245
|
+
end # module FastRI
|