dakrone-fastri 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/ri-emacs ADDED
@@ -0,0 +1,202 @@
1
+ #!/usr/bin/env ruby
2
+ ## drop-in replacement for the ri-emacs helper script for use
3
+ # with ri-ruby.el, using the FastRI service via DRb
4
+ #
5
+ # Based on ri-emacs.rb by Kristof Bastiaensen <kristof@vleeuwen.org>
6
+ #
7
+ # Copyright (C) 2004,2006 Kristof Bastiaensen
8
+ # 2006 Mauricio Fernandez <mfp@acm.org>
9
+ #
10
+ # This program is free software; you can redistribute it and/or modify
11
+ # it under the terms of the GNU General Public License as published by
12
+ # the Free Software Foundation; either version 2 of the License, or
13
+ # (at your option) any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23
+ #----------------------------------------------------------------------
24
+
25
+ require 'rinda/ring'
26
+ require 'optparse'
27
+ require 'fastri/util'
28
+
29
+ # {{{ cmdline parsing and service discovery
30
+ # we bind to 127.0.0.1 by default, because otherwise Ruby will try with
31
+ # 0.0.0.0, which results in a DNS request, adding way too much latency
32
+ options = {:addr => "127.0.0.1", :width => ENV['RI_EMACS_COLUMNS'] ? ENV['RI_EMACS_COLUMNS'].to_i : 72}
33
+ override_addr_env = false
34
+ optparser = OptionParser.new do |opts|
35
+ opts.banner = "Usage: ri-emacs [options] <query>"
36
+
37
+ opts.on("-s", "--bind [ADDR]", "Bind to ADDR for incoming DRb connections.",
38
+ "(default: 127.0.0.1)") do |addr|
39
+ options[:addr] = addr
40
+ override_addr_env = true
41
+ end
42
+
43
+ opts.on("-w", "--width WIDTH", "Set the width of the output.") do |width|
44
+ options[:width] = width
45
+ end
46
+
47
+ opts.on("-h", "--help", "Show this help message") do
48
+ puts opts
49
+ exit
50
+ end
51
+ end
52
+ optparser.parse!
53
+
54
+ if override_addr_env
55
+ addr = "druby://#{options[:addr]}:0"
56
+ else
57
+ addr = "druby://#{ENV["FASTRI_ADDR"]||options[:addr]}:0"
58
+ end
59
+
60
+ begin
61
+ DRb.start_service(addr)
62
+ ring_server = Rinda::RingFinger.primary
63
+ rescue Exception
64
+ puts <<EOF
65
+ Couldn't initialize DRb and locate the Ring server.
66
+
67
+ Please make sure that:
68
+ * the fastri-server is running, the server is bound to the correct interface,
69
+ and the ACL setup allows connections from this host
70
+ * fri is using the correct interface for incoming DRb requests:
71
+ either set the FASTRI_ADDR environment variable, or use --bind ADDR, e.g
72
+ export FASTRI_ADDR="192.168.1.12"
73
+ fri Array
74
+ EOF
75
+ exit(-1) # '
76
+ end
77
+ service = ring_server.read([:name, :FastRI, nil, nil])[2]
78
+
79
+ class EventLoop
80
+ include FastRI::Util::MagicHelp
81
+
82
+ def initialize(ri, options)
83
+ @ri = ri
84
+ @opts = options
85
+ end
86
+
87
+ def run
88
+ puts "READY"
89
+ loop do
90
+ line = $stdin.gets
91
+ cmd, p = /(\w+)(.*)$/.match(line)[1..2]
92
+ p.strip!
93
+ case cmd
94
+ when "TRY_COMPLETION"; puts complete_try(p)
95
+ when "COMPLETE_ALL"; puts complete_all(p)
96
+ when "LAMBDA"; puts complete_lambda(p)
97
+ when "CLASS_LIST"; puts class_list(p)
98
+ when "CLASS_LIST_WITH_FLAG"; puts class_list_with_flag(p)
99
+ when "DISPLAY_ARGS"; display_args(p)
100
+ when "DISPLAY_INFO"; display_info(p)
101
+ end
102
+ end
103
+ end
104
+
105
+ def complete_try(keyw)
106
+ list = @ri.completion_list(keyw)
107
+ if list.nil?
108
+ return "nil"
109
+ elsif list.size == 1 and
110
+ list[0].split(/(::)|#|\./) == keyw.split(/(::)|#|\./)
111
+ return "t"
112
+ end
113
+
114
+ first = list.shift;
115
+ if first =~ /(.*)((?:::)|(?:#))(.*)/
116
+ other = $1 + ($2 == "::" ? "#" : "::") + $3
117
+ end
118
+
119
+ len = first.size
120
+ match_both = false
121
+ list.each do |w|
122
+ while w[0, len] != first[0, len]
123
+ if other and w[0, len] == other[0, len]
124
+ match_both = true
125
+ break
126
+ end
127
+ len -= 1
128
+ end
129
+ end
130
+
131
+ if match_both
132
+ return other.sub(/(.*)((?:::)|(?:#))/) { $1 + "." }[0, len].inspect
133
+ else
134
+ return first[0, len].inspect
135
+ end
136
+ end
137
+
138
+ def complete_all(keyw)
139
+ list = @ri.completion_list(keyw)
140
+ if list.nil?
141
+ "nil"
142
+ else
143
+ "(" + list.map { |w| w.inspect }.join(" ") + ")"
144
+ end
145
+ end
146
+
147
+ def complete_lambda(keyw)
148
+ list = @ri.completion_list(keyw)
149
+ if list.nil?
150
+ "nil"
151
+ else
152
+ if list.find { |n| n.split(/(::)|#|\./) == keyw.split(/(::)|#|\./) }
153
+ "t"
154
+ else
155
+ "nil"
156
+ end
157
+ end
158
+ end
159
+
160
+ def class_list(keyw)
161
+ list = @ri.class_list(keyw)
162
+ if list
163
+ "(" + list.map{|x| "(#{x.inspect})"}.join(" ") + ")"
164
+ else
165
+ "nil"
166
+ end
167
+ end
168
+
169
+ def class_list_with_flag(keyw)
170
+ list = @ri.class_list_with_flag(keyw)
171
+ if list
172
+ "(" + list.map{|x| "(#{x.inspect})"}.join(" ") + ")"
173
+ else
174
+ "nil"
175
+ end
176
+ end
177
+
178
+ def display_(what, keyw)
179
+ data = @ri.__send__(what, magic_help(keyw), :width => @opts[:width])
180
+ if data
181
+ puts data
182
+ elsif (new_keyw = FastRI::Util.change_query_method_type(keyw)) != keyw
183
+ puts @ri.__send__(what, new_keyw, :width => @opts[:width])
184
+ end
185
+ puts "RI_EMACS_END_OF_INFO"
186
+ end
187
+
188
+ def display_args(keyw)
189
+ display_ :args, keyw
190
+ end
191
+
192
+ def display_info(keyw)
193
+ display_ :info, keyw
194
+ end
195
+ end
196
+
197
+
198
+ #{{{ event loop
199
+ #$stdout.sync = true # better not set sync=true, causes problems with emacs
200
+ EventLoop.new(service, options).run
201
+
202
+ # vi: set sw=2 expandtab:
data/fastri.gemspec ADDED
@@ -0,0 +1,64 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{fastri}
5
+ s.version = "0.3.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Mauricio Fernandez", "Lee Hinman"]
9
+ s.date = %q{2009-07-17}
10
+ s.default_executable = %q{fri}
11
+ s.description = %q{Fastri is RI, fast.}
12
+ s.email = %q{lee@writequit.org}
13
+ s.executables = ["fastri-server", "fri", "ri-emacs"]
14
+ s.extra_rdoc_files = [
15
+ "README.en"
16
+ ]
17
+ s.files = [
18
+ "fastri.gemspec",
19
+ "Rakefile",
20
+ "README.en",
21
+ "THANKS",
22
+ "CHANGES",
23
+ "COPYING",
24
+ "LEGAL",
25
+ "LICENSE",
26
+ "bin/fastri-server",
27
+ "bin/fri",
28
+ "bin/ri-emacs",
29
+ "lib/fastri/full_text_index.rb",
30
+ "lib/fastri/full_text_indexer.rb",
31
+ "lib/fastri/name_descriptor.rb",
32
+ "lib/fastri/ri_index.rb",
33
+ "lib/fastri/ri_service.rb",
34
+ "lib/fastri/util.rb",
35
+ "lib/fastri/version.rb",
36
+ "indexer.rb",
37
+ "lookup.rb",
38
+ "pre-install.rb",
39
+ "setup.rb",
40
+ "indexer.rb",
41
+ "test/test_full_text_index.rb",
42
+ "test/test_full_text_indexer.rb",
43
+ "test/test_functional_ri_service.rb",
44
+ "test/test_integration_full_text_index.rb",
45
+ "test/test_name_descriptor.rb",
46
+ "test/test_ri_index.rb",
47
+ "test/test_util.rb"
48
+ ]
49
+ s.homepage = %q{http://github.com/dakrone/fastri}
50
+ s.rdoc_options = ["--charset=UTF-8"]
51
+ s.require_paths = ["lib"]
52
+ s.rubygems_version = %q{0.3.1}
53
+ s.summary = %q{Fastri is RI, fast.}
54
+
55
+ if s.respond_to? :specification_version then
56
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
57
+ s.specification_version = 3
58
+
59
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
60
+ else
61
+ end
62
+ else
63
+ end
64
+ end
data/indexer.rb ADDED
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
3
+ #
4
+ # Full-text indexing of the RI documentation.
5
+ # This is the proof of concept that evolved into FastRI's full-text searching
6
+ # subsystem.
7
+
8
+ class IndexBuilder
9
+ MAXWORD_SIZE = 20
10
+ def initialize(fulltext_file, index_file)
11
+ @fulltext_file = fulltext_file
12
+ @index_file = index_file
13
+ @fulltext = ""
14
+ end
15
+
16
+ def add_document(name, contents)
17
+ @fulltext << preprocess(contents)
18
+ @fulltext << "\0#{name}\0"
19
+ end
20
+
21
+ require 'strscan'
22
+ require 'enumerator'
23
+ def finish
24
+ File.open(@fulltext_file, "w"){|f| f.puts @fulltext }
25
+ scanner = StringScanner.new(@fulltext)
26
+
27
+ count = 0
28
+ suffixes = []
29
+ until scanner.eos?
30
+ count += 1
31
+ if count == 100
32
+ print "%3d%%\r" % (100 * scanner.pos / @fulltext.size)
33
+ $stdout.flush
34
+ count = 0
35
+ end
36
+ start = scanner.pos
37
+ text = scanner.scan_until(/\0.*?\0/)
38
+ text = text.sub(/\0.*?\0$/,"")
39
+ suffixes.concat find_suffixes(text, start)
40
+ scanner.terminate if !text
41
+ end
42
+ puts "Suffixes: #{suffixes.size}"
43
+ t0 = Time.new
44
+ sorted = suffixes.sort_by{|x| @fulltext[x,MAXWORD_SIZE]}
45
+ File.open(@index_file, "w") do |f|
46
+ sorted.each_slice(10000){|x| f.write x.pack("V*")}
47
+ end
48
+ File.open("suffixes", "w"){|f| sorted.each{|i| f.puts @fulltext[i,MAXWORD_SIZE].inspect}}
49
+ puts "Processed in #{Time.new - t0} seconds"
50
+ end
51
+
52
+ require 'strscan'
53
+ def find_suffixes(string, offset)
54
+ suffixes = []
55
+ sc = StringScanner.new(string)
56
+ until sc.eos?
57
+ sc.skip(/([^A-Za-z_]|\n)*/)
58
+ len = string.size
59
+ loop do
60
+ break if sc.pos == len
61
+ suffixes << offset + sc.pos
62
+ break unless sc.skip(/[A-Za-z0-9_]+([^A-Za-z0-9_]|\n)*/)
63
+ end
64
+ end
65
+ suffixes
66
+ end
67
+
68
+ private
69
+ def preprocess(str)
70
+ str.gsub(/\0/,"")
71
+ end
72
+ end
73
+
74
+ def linearize(comment)
75
+ case s = comment["body"]
76
+ when String; s
77
+ else
78
+ if Array === (y = comment["contents"])
79
+ y.map{|z| linearize(z)}.join("\n")
80
+ elsif s = comment["text"]
81
+ s
82
+ else
83
+ nil
84
+ end
85
+ end
86
+ end
87
+
88
+ require 'rdoc/ri/ri_paths'
89
+ require 'yaml'
90
+ $:.unshift "lib"
91
+ require 'fastri/util'
92
+
93
+ #paths = RI::Paths::PATH
94
+ gem_paths = FastRI::Util.gem_directories_unique.map{|_,_,path| path}
95
+ paths = [ RI::Paths::SYSDIR, RI::Paths::SITEDIR, RI::Paths::HOMEDIR ].find_all do |p|
96
+ p && File.directory?(p)
97
+ end
98
+ paths.concat gem_paths
99
+ indexer = IndexBuilder.new("test_FULLTEXT", "test_INDEX")
100
+ bad = 0
101
+ paths.each do |path|
102
+ Dir["#{path}/**/*.yaml"].each do |yamlfile|
103
+ yaml = File.read(yamlfile)
104
+ begin
105
+ data = YAML.load(yaml.gsub(/ \!.*/, ''))
106
+ rescue Exception
107
+ bad += 1
108
+ puts "Couldn't load #{yamlfile}"
109
+ #puts "=" * 80
110
+ #puts yaml
111
+ next
112
+ end
113
+
114
+ desc = (data['comment']||[]).map{|x| linearize(x)}.join("\n")
115
+ desc.gsub!(/<\/?(em|b|tt|ul|ol|table)>/, "")
116
+ desc.gsub!(/&quot;/, "'")
117
+ desc.gsub!(/&lt;/, "<")
118
+ desc.gsub!(/&gt;/, ">")
119
+ desc.gsub!(/&amp;/, "&")
120
+ =begin
121
+ puts "=" * 80
122
+ puts yamlfile
123
+ puts "-" * 80
124
+ puts yaml
125
+ puts "-" * 80
126
+ puts desc
127
+ $stdin.gets
128
+ =end
129
+ unless desc.empty?
130
+ indexer.add_document(yamlfile, desc)
131
+ end
132
+ end
133
+ end
134
+ puts "BAD files: #{bad}"
135
+ indexer.finish
@@ -0,0 +1,245 @@
1
+ # Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
2
+ #
3
+
4
+ require 'fastri/full_text_indexer'
5
+ require 'stringio'
6
+
7
+ module FastRI
8
+
9
+ class FullTextIndex
10
+ MAX_QUERY_SIZE = 20
11
+ MAX_REGEXP_MATCH_SIZE = 255
12
+ class Result
13
+ attr_reader :path, :query, :index, :metadata
14
+
15
+ def initialize(searcher, query, index, path, metadata)
16
+ @searcher = searcher
17
+ @index = index
18
+ @query = query
19
+ @path = path
20
+ @metadata = metadata
21
+ end
22
+
23
+ def context(size)
24
+ @searcher.fetch_data(@index, 2*size+1, -size)
25
+ end
26
+
27
+ def text(size)
28
+ @searcher.fetch_data(@index, size, 0)
29
+ end
30
+ end
31
+
32
+ class << self; private :new end
33
+
34
+ DEFAULT_OPTIONS = {
35
+ :max_query_size => MAX_QUERY_SIZE,
36
+ }
37
+
38
+ def self.new_from_ios(fulltext_IO, suffix_arrray_IO, options = {})
39
+ new(:io, fulltext_IO, suffix_arrray_IO, options)
40
+ end
41
+
42
+ def self.new_from_filenames(fulltext_fname, suffix_arrray_fname, options = {})
43
+ new(:filenames, fulltext_fname, suffix_arrray_fname, options)
44
+ end
45
+
46
+ attr_reader :max_query_size
47
+ def initialize(type, fulltext, sarray, options)
48
+ options = DEFAULT_OPTIONS.merge(options)
49
+ case type
50
+ when :io
51
+ @fulltext_IO = fulltext
52
+ @sarray_IO = sarray
53
+ when :filenames
54
+ @fulltext_fname = fulltext
55
+ @sarray_fname = sarray
56
+ else raise "Unknown type"
57
+ end
58
+ @type = type
59
+ @max_query_size = options[:max_query_size]
60
+ check_magic
61
+ end
62
+
63
+ def lookup(term)
64
+ get_fulltext_IO do |fulltextIO|
65
+ get_sarray_IO do |sarrayIO|
66
+ case sarrayIO
67
+ when StringIO
68
+ num_suffixes = sarrayIO.string.size / 4 - 1
69
+ else
70
+ num_suffixes = sarrayIO.stat.size / 4 - 1
71
+ end
72
+
73
+ index, offset = binary_search(sarrayIO, fulltextIO, term, 0, num_suffixes)
74
+ if offset
75
+ fulltextIO.pos = offset
76
+ path, metadata = find_metadata(fulltextIO)
77
+ return Result.new(self, term, index, path, metadata) if path
78
+ else
79
+ nil
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ def next_match(result, term_or_regexp = "")
86
+ case term_or_regexp
87
+ when String; size = [result.query.size, term_or_regexp.size].max
88
+ when Regexp; size = MAX_REGEXP_MATCH_SIZE
89
+ end
90
+ get_fulltext_IO do |fulltextIO|
91
+ get_sarray_IO do |sarrayIO|
92
+ idx = result.index
93
+ loop do
94
+ idx += 1
95
+ str = get_string(sarrayIO, fulltextIO, idx, size)
96
+ upto = str.index("\0")
97
+ str = str[0, upto] if upto
98
+ break unless str.index(result.query) == 0
99
+ if str[term_or_regexp]
100
+ fulltextIO.pos = index_to_offset(sarrayIO, idx)
101
+ path, metadata = find_metadata(fulltextIO)
102
+ return Result.new(self, result.query, idx, path, metadata) if path
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ def next_matches(result, term_or_regexp = "")
110
+ case term_or_regexp
111
+ when String; size = [result.query.size, term_or_regexp.size].max
112
+ when Regexp; size = MAX_REGEXP_MATCH_SIZE
113
+ end
114
+ ret = []
115
+ get_fulltext_IO do |fulltextIO|
116
+ get_sarray_IO do |sarrayIO|
117
+ idx = result.index
118
+ loop do
119
+ idx += 1
120
+ str = get_string(sarrayIO, fulltextIO, idx, size)
121
+ upto = str.index("\0")
122
+ str = str[0, upto] if upto
123
+ break unless str.index(result.query) == 0
124
+ if str[term_or_regexp]
125
+ fulltextIO.pos = index_to_offset(sarrayIO, idx)
126
+ path, metadata = find_metadata(fulltextIO)
127
+ ret << Result.new(self, result.query, idx, path, metadata) if path
128
+ end
129
+ end
130
+ end
131
+ end
132
+
133
+ ret
134
+ end
135
+
136
+ def fetch_data(index, size, offset = 0)
137
+ raise "Bad offset" unless offset <= 0
138
+ get_fulltext_IO do |fulltextIO|
139
+ get_sarray_IO do |sarrayIO|
140
+ base = index_to_offset(sarrayIO, index)
141
+ actual_offset = offset
142
+ newsize = size
143
+ if base + offset < 0 # at the beginning
144
+ excess = (base + offset).abs # remember offset is < 0
145
+ newsize = size - excess
146
+ actual_offset = offset + excess
147
+ end
148
+ str = get_string(sarrayIO, fulltextIO, index, newsize, offset)
149
+ from = (str.rindex("\0", -actual_offset) || -1) + 1
150
+ to = (str.index("\0", -actual_offset) || 0) - 1
151
+ str[from..to]
152
+ end
153
+ end
154
+ end
155
+
156
+ private
157
+ def check_magic
158
+ get_fulltext_IO do |io|
159
+ io.rewind
160
+ header = io.read(FullTextIndexer::MAGIC.size)
161
+ raise "Unsupported index format." unless header
162
+ version = header[/\d+\.\d+\.\d+/]
163
+ raise "Unsupported index format." unless version
164
+ major, minor, teeny = version.scan(/\d+/)
165
+ if major != FASTRI_FT_INDEX_FORMAT_MAJOR or
166
+ minor > FASTRI_FT_INDEX_FORMAT_MINOR
167
+ raise "Unsupported index format"
168
+ end
169
+ end
170
+ end
171
+
172
+ def get_fulltext_IO
173
+ case @type
174
+ when :io; yield @fulltext_IO
175
+ when :filenames
176
+ File.open(@fulltext_fname, "rb"){|f| yield f}
177
+ end
178
+ end
179
+
180
+ def get_sarray_IO
181
+ case @type
182
+ when :io; yield @sarray_IO
183
+ when :filenames
184
+ File.open(@sarray_fname, "rb"){|f| yield f}
185
+ end
186
+ end
187
+
188
+ def index_to_offset(sarrayIO, index)
189
+ sarrayIO.pos = index * 4
190
+ sarrayIO.read(4).unpack("V")[0]
191
+ end
192
+
193
+ def find_metadata(fulltextIO)
194
+ oldtext = ""
195
+ loop do
196
+ text = fulltextIO.read(4096)
197
+ break unless text
198
+ if idx = text.index("\0")
199
+ if idx + 4 >= text.size
200
+ text.concat(fulltextIO.read(4096))
201
+ end
202
+ len = text[idx+1, 4].unpack("V")[0]
203
+ missing = idx + 5 + len - text.size
204
+ if missing > 0
205
+ text.concat(fulltextIO.read(missing))
206
+ end
207
+ footer = text[idx + 5, len - 1]
208
+ path, metadata = /(.*?)\0(.*)/m.match(footer).captures
209
+ return [path, Marshal.load(metadata)]
210
+ end
211
+ oldtext = text
212
+ end
213
+ nil
214
+ end
215
+
216
+ def get_string(sarrayIO, fulltextIO, index, size, off = 0)
217
+ sarrayIO.pos = index * 4
218
+ offset = sarrayIO.read(4).unpack("V")[0]
219
+ fulltextIO.pos = [offset + off, 0].max
220
+ fulltextIO.read(size)
221
+ end
222
+
223
+ def binary_search(sarrayIO, fulltextIO, term, from, to)
224
+ #puts "BINARY #{from} -- #{to}"
225
+ #left = get_string(sarrayIO, fulltextIO, from, @max_query_size)
226
+ #right = get_string(sarrayIO, fulltextIO, to, @max_query_size)
227
+ #puts " #{left.inspect} -- #{right.inspect}"
228
+ middle = (from + to) / 2
229
+ pivot = get_string(sarrayIO, fulltextIO, middle, @max_query_size)
230
+ if from == to
231
+ if pivot.index(term) == 0
232
+ sarrayIO.pos = middle * 4
233
+ [middle, sarrayIO.read(4).unpack("V")[0]]
234
+ else
235
+ nil
236
+ end
237
+ elsif term <= pivot
238
+ binary_search(sarrayIO, fulltextIO, term, from, middle)
239
+ elsif term > pivot
240
+ binary_search(sarrayIO, fulltextIO, term, middle+1, to)
241
+ end
242
+ end
243
+ end # class FullTextIndex
244
+
245
+ end # module FastRI