dakrone-fastri 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/ri-emacs ADDED
@@ -0,0 +1,202 @@
1
+ #!/usr/bin/env ruby
2
+ ## drop-in replacement for the ri-emacs helper script for use
3
+ # with ri-ruby.el, using the FastRI service via DRb
4
+ #
5
+ # Based on ri-emacs.rb by Kristof Bastiaensen <kristof@vleeuwen.org>
6
+ #
7
+ # Copyright (C) 2004,2006 Kristof Bastiaensen
8
+ # 2006 Mauricio Fernandez <mfp@acm.org>
9
+ #
10
+ # This program is free software; you can redistribute it and/or modify
11
+ # it under the terms of the GNU General Public License as published by
12
+ # the Free Software Foundation; either version 2 of the License, or
13
+ # (at your option) any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23
+ #----------------------------------------------------------------------
24
+
25
+ require 'rinda/ring'
26
+ require 'optparse'
27
+ require 'fastri/util'
28
+
29
+ # {{{ cmdline parsing and service discovery
30
+ # we bind to 127.0.0.1 by default, because otherwise Ruby will try with
31
+ # 0.0.0.0, which results in a DNS request, adding way too much latency
32
+ options = {:addr => "127.0.0.1", :width => ENV['RI_EMACS_COLUMNS'] ? ENV['RI_EMACS_COLUMNS'].to_i : 72}
33
+ override_addr_env = false
34
+ optparser = OptionParser.new do |opts|
35
+ opts.banner = "Usage: ri-emacs [options] <query>"
36
+
37
+ opts.on("-s", "--bind [ADDR]", "Bind to ADDR for incoming DRb connections.",
38
+ "(default: 127.0.0.1)") do |addr|
39
+ options[:addr] = addr
40
+ override_addr_env = true
41
+ end
42
+
43
+ opts.on("-w", "--width WIDTH", "Set the width of the output.") do |width|
44
+ options[:width] = width
45
+ end
46
+
47
+ opts.on("-h", "--help", "Show this help message") do
48
+ puts opts
49
+ exit
50
+ end
51
+ end
52
+ optparser.parse!
53
+
54
+ if override_addr_env
55
+ addr = "druby://#{options[:addr]}:0"
56
+ else
57
+ addr = "druby://#{ENV["FASTRI_ADDR"]||options[:addr]}:0"
58
+ end
59
+
60
+ begin
61
+ DRb.start_service(addr)
62
+ ring_server = Rinda::RingFinger.primary
63
+ rescue Exception
64
+ puts <<EOF
65
+ Couldn't initialize DRb and locate the Ring server.
66
+
67
+ Please make sure that:
68
+ * the fastri-server is running, the server is bound to the correct interface,
69
+ and the ACL setup allows connections from this host
70
+ * fri is using the correct interface for incoming DRb requests:
71
+ either set the FASTRI_ADDR environment variable, or use --bind ADDR, e.g
72
+ export FASTRI_ADDR="192.168.1.12"
73
+ fri Array
74
+ EOF
75
+ exit(-1) # '
76
+ end
77
+ service = ring_server.read([:name, :FastRI, nil, nil])[2]
78
+
79
+ class EventLoop
80
+ include FastRI::Util::MagicHelp
81
+
82
+ def initialize(ri, options)
83
+ @ri = ri
84
+ @opts = options
85
+ end
86
+
87
+ def run
88
+ puts "READY"
89
+ loop do
90
+ line = $stdin.gets
91
+ cmd, p = /(\w+)(.*)$/.match(line)[1..2]
92
+ p.strip!
93
+ case cmd
94
+ when "TRY_COMPLETION"; puts complete_try(p)
95
+ when "COMPLETE_ALL"; puts complete_all(p)
96
+ when "LAMBDA"; puts complete_lambda(p)
97
+ when "CLASS_LIST"; puts class_list(p)
98
+ when "CLASS_LIST_WITH_FLAG"; puts class_list_with_flag(p)
99
+ when "DISPLAY_ARGS"; display_args(p)
100
+ when "DISPLAY_INFO"; display_info(p)
101
+ end
102
+ end
103
+ end
104
+
105
+ def complete_try(keyw)
106
+ list = @ri.completion_list(keyw)
107
+ if list.nil?
108
+ return "nil"
109
+ elsif list.size == 1 and
110
+ list[0].split(/(::)|#|\./) == keyw.split(/(::)|#|\./)
111
+ return "t"
112
+ end
113
+
114
+ first = list.shift;
115
+ if first =~ /(.*)((?:::)|(?:#))(.*)/
116
+ other = $1 + ($2 == "::" ? "#" : "::") + $3
117
+ end
118
+
119
+ len = first.size
120
+ match_both = false
121
+ list.each do |w|
122
+ while w[0, len] != first[0, len]
123
+ if other and w[0, len] == other[0, len]
124
+ match_both = true
125
+ break
126
+ end
127
+ len -= 1
128
+ end
129
+ end
130
+
131
+ if match_both
132
+ return other.sub(/(.*)((?:::)|(?:#))/) { $1 + "." }[0, len].inspect
133
+ else
134
+ return first[0, len].inspect
135
+ end
136
+ end
137
+
138
+ def complete_all(keyw)
139
+ list = @ri.completion_list(keyw)
140
+ if list.nil?
141
+ "nil"
142
+ else
143
+ "(" + list.map { |w| w.inspect }.join(" ") + ")"
144
+ end
145
+ end
146
+
147
+ def complete_lambda(keyw)
148
+ list = @ri.completion_list(keyw)
149
+ if list.nil?
150
+ "nil"
151
+ else
152
+ if list.find { |n| n.split(/(::)|#|\./) == keyw.split(/(::)|#|\./) }
153
+ "t"
154
+ else
155
+ "nil"
156
+ end
157
+ end
158
+ end
159
+
160
+ def class_list(keyw)
161
+ list = @ri.class_list(keyw)
162
+ if list
163
+ "(" + list.map{|x| "(#{x.inspect})"}.join(" ") + ")"
164
+ else
165
+ "nil"
166
+ end
167
+ end
168
+
169
+ def class_list_with_flag(keyw)
170
+ list = @ri.class_list_with_flag(keyw)
171
+ if list
172
+ "(" + list.map{|x| "(#{x.inspect})"}.join(" ") + ")"
173
+ else
174
+ "nil"
175
+ end
176
+ end
177
+
178
+ def display_(what, keyw)
179
+ data = @ri.__send__(what, magic_help(keyw), :width => @opts[:width])
180
+ if data
181
+ puts data
182
+ elsif (new_keyw = FastRI::Util.change_query_method_type(keyw)) != keyw
183
+ puts @ri.__send__(what, new_keyw, :width => @opts[:width])
184
+ end
185
+ puts "RI_EMACS_END_OF_INFO"
186
+ end
187
+
188
+ def display_args(keyw)
189
+ display_ :args, keyw
190
+ end
191
+
192
+ def display_info(keyw)
193
+ display_ :info, keyw
194
+ end
195
+ end
196
+
197
+
198
+ #{{{ event loop
199
+ #$stdout.sync = true # better not set sync=true, causes problems with emacs
200
+ EventLoop.new(service, options).run
201
+
202
+ # vi: set sw=2 expandtab:
data/fastri.gemspec ADDED
@@ -0,0 +1,64 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{fastri}
5
+ s.version = "0.3.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Mauricio Fernandez", "Lee Hinman"]
9
+ s.date = %q{2009-07-17}
10
+ s.default_executable = %q{fri}
11
+ s.description = %q{Fastri is RI, fast.}
12
+ s.email = %q{lee@writequit.org}
13
+ s.executables = ["fastri-server", "fri", "ri-emacs"]
14
+ s.extra_rdoc_files = [
15
+ "README.en"
16
+ ]
17
+ s.files = [
18
+ "fastri.gemspec",
19
+ "Rakefile",
20
+ "README.en",
21
+ "THANKS",
22
+ "CHANGES",
23
+ "COPYING",
24
+ "LEGAL",
25
+ "LICENSE",
26
+ "bin/fastri-server",
27
+ "bin/fri",
28
+ "bin/ri-emacs",
29
+ "lib/fastri/full_text_index.rb",
30
+ "lib/fastri/full_text_indexer.rb",
31
+ "lib/fastri/name_descriptor.rb",
32
+ "lib/fastri/ri_index.rb",
33
+ "lib/fastri/ri_service.rb",
34
+ "lib/fastri/util.rb",
35
+ "lib/fastri/version.rb",
36
+ "indexer.rb",
37
+ "lookup.rb",
38
+ "pre-install.rb",
39
+ "setup.rb",
40
+ "indexer.rb",
41
+ "test/test_full_text_index.rb",
42
+ "test/test_full_text_indexer.rb",
43
+ "test/test_functional_ri_service.rb",
44
+ "test/test_integration_full_text_index.rb",
45
+ "test/test_name_descriptor.rb",
46
+ "test/test_ri_index.rb",
47
+ "test/test_util.rb"
48
+ ]
49
+ s.homepage = %q{http://github.com/dakrone/fastri}
50
+ s.rdoc_options = ["--charset=UTF-8"]
51
+ s.require_paths = ["lib"]
52
+ s.rubygems_version = %q{0.3.1}
53
+ s.summary = %q{Fastri is RI, fast.}
54
+
55
+ if s.respond_to? :specification_version then
56
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
57
+ s.specification_version = 3
58
+
59
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
60
+ else
61
+ end
62
+ else
63
+ end
64
+ end
data/indexer.rb ADDED
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
3
+ #
4
+ # Full-text indexing of the RI documentation.
5
+ # This is the proof of concept that evolved into FastRI's full-text searching
6
+ # subsystem.
7
+
8
+ class IndexBuilder
9
+ MAXWORD_SIZE = 20
10
+ def initialize(fulltext_file, index_file)
11
+ @fulltext_file = fulltext_file
12
+ @index_file = index_file
13
+ @fulltext = ""
14
+ end
15
+
16
+ def add_document(name, contents)
17
+ @fulltext << preprocess(contents)
18
+ @fulltext << "\0#{name}\0"
19
+ end
20
+
21
+ require 'strscan'
22
+ require 'enumerator'
23
+ def finish
24
+ File.open(@fulltext_file, "w"){|f| f.puts @fulltext }
25
+ scanner = StringScanner.new(@fulltext)
26
+
27
+ count = 0
28
+ suffixes = []
29
+ until scanner.eos?
30
+ count += 1
31
+ if count == 100
32
+ print "%3d%%\r" % (100 * scanner.pos / @fulltext.size)
33
+ $stdout.flush
34
+ count = 0
35
+ end
36
+ start = scanner.pos
37
+ text = scanner.scan_until(/\0.*?\0/)
38
+ text = text.sub(/\0.*?\0$/,"")
39
+ suffixes.concat find_suffixes(text, start)
40
+ scanner.terminate if !text
41
+ end
42
+ puts "Suffixes: #{suffixes.size}"
43
+ t0 = Time.new
44
+ sorted = suffixes.sort_by{|x| @fulltext[x,MAXWORD_SIZE]}
45
+ File.open(@index_file, "w") do |f|
46
+ sorted.each_slice(10000){|x| f.write x.pack("V*")}
47
+ end
48
+ File.open("suffixes", "w"){|f| sorted.each{|i| f.puts @fulltext[i,MAXWORD_SIZE].inspect}}
49
+ puts "Processed in #{Time.new - t0} seconds"
50
+ end
51
+
52
+ require 'strscan'
53
+ def find_suffixes(string, offset)
54
+ suffixes = []
55
+ sc = StringScanner.new(string)
56
+ until sc.eos?
57
+ sc.skip(/([^A-Za-z_]|\n)*/)
58
+ len = string.size
59
+ loop do
60
+ break if sc.pos == len
61
+ suffixes << offset + sc.pos
62
+ break unless sc.skip(/[A-Za-z0-9_]+([^A-Za-z0-9_]|\n)*/)
63
+ end
64
+ end
65
+ suffixes
66
+ end
67
+
68
+ private
69
+ def preprocess(str)
70
+ str.gsub(/\0/,"")
71
+ end
72
+ end
73
+
74
+ def linearize(comment)
75
+ case s = comment["body"]
76
+ when String; s
77
+ else
78
+ if Array === (y = comment["contents"])
79
+ y.map{|z| linearize(z)}.join("\n")
80
+ elsif s = comment["text"]
81
+ s
82
+ else
83
+ nil
84
+ end
85
+ end
86
+ end
87
+
88
+ require 'rdoc/ri/ri_paths'
89
+ require 'yaml'
90
+ $:.unshift "lib"
91
+ require 'fastri/util'
92
+
93
+ #paths = RI::Paths::PATH
94
+ gem_paths = FastRI::Util.gem_directories_unique.map{|_,_,path| path}
95
+ paths = [ RI::Paths::SYSDIR, RI::Paths::SITEDIR, RI::Paths::HOMEDIR ].find_all do |p|
96
+ p && File.directory?(p)
97
+ end
98
+ paths.concat gem_paths
99
+ indexer = IndexBuilder.new("test_FULLTEXT", "test_INDEX")
100
+ bad = 0
101
+ paths.each do |path|
102
+ Dir["#{path}/**/*.yaml"].each do |yamlfile|
103
+ yaml = File.read(yamlfile)
104
+ begin
105
+ data = YAML.load(yaml.gsub(/ \!.*/, ''))
106
+ rescue Exception
107
+ bad += 1
108
+ puts "Couldn't load #{yamlfile}"
109
+ #puts "=" * 80
110
+ #puts yaml
111
+ next
112
+ end
113
+
114
+ desc = (data['comment']||[]).map{|x| linearize(x)}.join("\n")
115
+ desc.gsub!(/<\/?(em|b|tt|ul|ol|table)>/, "")
116
+ desc.gsub!(/&quot;/, "'")
117
+ desc.gsub!(/&lt;/, "<")
118
+ desc.gsub!(/&gt;/, ">")
119
+ desc.gsub!(/&amp;/, "&")
120
+ =begin
121
+ puts "=" * 80
122
+ puts yamlfile
123
+ puts "-" * 80
124
+ puts yaml
125
+ puts "-" * 80
126
+ puts desc
127
+ $stdin.gets
128
+ =end
129
+ unless desc.empty?
130
+ indexer.add_document(yamlfile, desc)
131
+ end
132
+ end
133
+ end
134
+ puts "BAD files: #{bad}"
135
+ indexer.finish
@@ -0,0 +1,245 @@
1
+ # Copyright (C) 2006 Mauricio Fernandez <mfp@acm.org>
2
+ #
3
+
4
+ require 'fastri/full_text_indexer'
5
+ require 'stringio'
6
+
7
+ module FastRI
8
+
9
+ class FullTextIndex
10
+ MAX_QUERY_SIZE = 20
11
+ MAX_REGEXP_MATCH_SIZE = 255
12
+ class Result
13
+ attr_reader :path, :query, :index, :metadata
14
+
15
+ def initialize(searcher, query, index, path, metadata)
16
+ @searcher = searcher
17
+ @index = index
18
+ @query = query
19
+ @path = path
20
+ @metadata = metadata
21
+ end
22
+
23
+ def context(size)
24
+ @searcher.fetch_data(@index, 2*size+1, -size)
25
+ end
26
+
27
+ def text(size)
28
+ @searcher.fetch_data(@index, size, 0)
29
+ end
30
+ end
31
+
32
+ class << self; private :new end
33
+
34
+ DEFAULT_OPTIONS = {
35
+ :max_query_size => MAX_QUERY_SIZE,
36
+ }
37
+
38
+ def self.new_from_ios(fulltext_IO, suffix_arrray_IO, options = {})
39
+ new(:io, fulltext_IO, suffix_arrray_IO, options)
40
+ end
41
+
42
+ def self.new_from_filenames(fulltext_fname, suffix_arrray_fname, options = {})
43
+ new(:filenames, fulltext_fname, suffix_arrray_fname, options)
44
+ end
45
+
46
+ attr_reader :max_query_size
47
+ def initialize(type, fulltext, sarray, options)
48
+ options = DEFAULT_OPTIONS.merge(options)
49
+ case type
50
+ when :io
51
+ @fulltext_IO = fulltext
52
+ @sarray_IO = sarray
53
+ when :filenames
54
+ @fulltext_fname = fulltext
55
+ @sarray_fname = sarray
56
+ else raise "Unknown type"
57
+ end
58
+ @type = type
59
+ @max_query_size = options[:max_query_size]
60
+ check_magic
61
+ end
62
+
63
+ def lookup(term)
64
+ get_fulltext_IO do |fulltextIO|
65
+ get_sarray_IO do |sarrayIO|
66
+ case sarrayIO
67
+ when StringIO
68
+ num_suffixes = sarrayIO.string.size / 4 - 1
69
+ else
70
+ num_suffixes = sarrayIO.stat.size / 4 - 1
71
+ end
72
+
73
+ index, offset = binary_search(sarrayIO, fulltextIO, term, 0, num_suffixes)
74
+ if offset
75
+ fulltextIO.pos = offset
76
+ path, metadata = find_metadata(fulltextIO)
77
+ return Result.new(self, term, index, path, metadata) if path
78
+ else
79
+ nil
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ def next_match(result, term_or_regexp = "")
86
+ case term_or_regexp
87
+ when String; size = [result.query.size, term_or_regexp.size].max
88
+ when Regexp; size = MAX_REGEXP_MATCH_SIZE
89
+ end
90
+ get_fulltext_IO do |fulltextIO|
91
+ get_sarray_IO do |sarrayIO|
92
+ idx = result.index
93
+ loop do
94
+ idx += 1
95
+ str = get_string(sarrayIO, fulltextIO, idx, size)
96
+ upto = str.index("\0")
97
+ str = str[0, upto] if upto
98
+ break unless str.index(result.query) == 0
99
+ if str[term_or_regexp]
100
+ fulltextIO.pos = index_to_offset(sarrayIO, idx)
101
+ path, metadata = find_metadata(fulltextIO)
102
+ return Result.new(self, result.query, idx, path, metadata) if path
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ def next_matches(result, term_or_regexp = "")
110
+ case term_or_regexp
111
+ when String; size = [result.query.size, term_or_regexp.size].max
112
+ when Regexp; size = MAX_REGEXP_MATCH_SIZE
113
+ end
114
+ ret = []
115
+ get_fulltext_IO do |fulltextIO|
116
+ get_sarray_IO do |sarrayIO|
117
+ idx = result.index
118
+ loop do
119
+ idx += 1
120
+ str = get_string(sarrayIO, fulltextIO, idx, size)
121
+ upto = str.index("\0")
122
+ str = str[0, upto] if upto
123
+ break unless str.index(result.query) == 0
124
+ if str[term_or_regexp]
125
+ fulltextIO.pos = index_to_offset(sarrayIO, idx)
126
+ path, metadata = find_metadata(fulltextIO)
127
+ ret << Result.new(self, result.query, idx, path, metadata) if path
128
+ end
129
+ end
130
+ end
131
+ end
132
+
133
+ ret
134
+ end
135
+
136
+ def fetch_data(index, size, offset = 0)
137
+ raise "Bad offset" unless offset <= 0
138
+ get_fulltext_IO do |fulltextIO|
139
+ get_sarray_IO do |sarrayIO|
140
+ base = index_to_offset(sarrayIO, index)
141
+ actual_offset = offset
142
+ newsize = size
143
+ if base + offset < 0 # at the beginning
144
+ excess = (base + offset).abs # remember offset is < 0
145
+ newsize = size - excess
146
+ actual_offset = offset + excess
147
+ end
148
+ str = get_string(sarrayIO, fulltextIO, index, newsize, offset)
149
+ from = (str.rindex("\0", -actual_offset) || -1) + 1
150
+ to = (str.index("\0", -actual_offset) || 0) - 1
151
+ str[from..to]
152
+ end
153
+ end
154
+ end
155
+
156
+ private
157
+ def check_magic
158
+ get_fulltext_IO do |io|
159
+ io.rewind
160
+ header = io.read(FullTextIndexer::MAGIC.size)
161
+ raise "Unsupported index format." unless header
162
+ version = header[/\d+\.\d+\.\d+/]
163
+ raise "Unsupported index format." unless version
164
+ major, minor, teeny = version.scan(/\d+/)
165
+ if major != FASTRI_FT_INDEX_FORMAT_MAJOR or
166
+ minor > FASTRI_FT_INDEX_FORMAT_MINOR
167
+ raise "Unsupported index format"
168
+ end
169
+ end
170
+ end
171
+
172
+ def get_fulltext_IO
173
+ case @type
174
+ when :io; yield @fulltext_IO
175
+ when :filenames
176
+ File.open(@fulltext_fname, "rb"){|f| yield f}
177
+ end
178
+ end
179
+
180
+ def get_sarray_IO
181
+ case @type
182
+ when :io; yield @sarray_IO
183
+ when :filenames
184
+ File.open(@sarray_fname, "rb"){|f| yield f}
185
+ end
186
+ end
187
+
188
+ def index_to_offset(sarrayIO, index)
189
+ sarrayIO.pos = index * 4
190
+ sarrayIO.read(4).unpack("V")[0]
191
+ end
192
+
193
+ def find_metadata(fulltextIO)
194
+ oldtext = ""
195
+ loop do
196
+ text = fulltextIO.read(4096)
197
+ break unless text
198
+ if idx = text.index("\0")
199
+ if idx + 4 >= text.size
200
+ text.concat(fulltextIO.read(4096))
201
+ end
202
+ len = text[idx+1, 4].unpack("V")[0]
203
+ missing = idx + 5 + len - text.size
204
+ if missing > 0
205
+ text.concat(fulltextIO.read(missing))
206
+ end
207
+ footer = text[idx + 5, len - 1]
208
+ path, metadata = /(.*?)\0(.*)/m.match(footer).captures
209
+ return [path, Marshal.load(metadata)]
210
+ end
211
+ oldtext = text
212
+ end
213
+ nil
214
+ end
215
+
216
+ def get_string(sarrayIO, fulltextIO, index, size, off = 0)
217
+ sarrayIO.pos = index * 4
218
+ offset = sarrayIO.read(4).unpack("V")[0]
219
+ fulltextIO.pos = [offset + off, 0].max
220
+ fulltextIO.read(size)
221
+ end
222
+
223
+ def binary_search(sarrayIO, fulltextIO, term, from, to)
224
+ #puts "BINARY #{from} -- #{to}"
225
+ #left = get_string(sarrayIO, fulltextIO, from, @max_query_size)
226
+ #right = get_string(sarrayIO, fulltextIO, to, @max_query_size)
227
+ #puts " #{left.inspect} -- #{right.inspect}"
228
+ middle = (from + to) / 2
229
+ pivot = get_string(sarrayIO, fulltextIO, middle, @max_query_size)
230
+ if from == to
231
+ if pivot.index(term) == 0
232
+ sarrayIO.pos = middle * 4
233
+ [middle, sarrayIO.read(4).unpack("V")[0]]
234
+ else
235
+ nil
236
+ end
237
+ elsif term <= pivot
238
+ binary_search(sarrayIO, fulltextIO, term, from, middle)
239
+ elsif term > pivot
240
+ binary_search(sarrayIO, fulltextIO, term, middle+1, to)
241
+ end
242
+ end
243
+ end # class FullTextIndex
244
+
245
+ end # module FastRI