parser 0.9.alpha

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt ADDED
@@ -0,0 +1,18 @@
1
+ .autotest
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ bin/ruby_parse
7
+ bin/ruby_parse_extract_error
8
+ lib/gauntlet_rubyparser.rb
9
+ lib/ruby18_parser.rb
10
+ lib/ruby18_parser.y
11
+ lib/ruby19_parser.rb
12
+ lib/ruby19_parser.y
13
+ lib/ruby_lexer.rb
14
+ lib/ruby_parser.rb
15
+ lib/ruby_parser_extras.rb
16
+ test/test_ruby_lexer.rb
17
+ test/test_ruby_parser.rb
18
+ test/test_ruby_parser_extras.rb
data/README.txt ADDED
@@ -0,0 +1,87 @@
1
+ = ruby_parser
2
+
3
+ home :: https://github.com/seattlerb/ruby_parser
4
+ bugs :: https://github.com/seattlerb/ruby_parser/issues
5
+ rdoc :: http://docs.seattlerb.org/ruby_parser
6
+
7
+ == DESCRIPTION:
8
+
9
+ ruby_parser (RP) is a ruby parser written in pure ruby (utilizing
10
+ racc--which does by default use a C extension). RP's output is
11
+ the same as ParseTree's output: s-expressions using ruby's arrays and
12
+ base types.
13
+
14
+ As an example:
15
+
16
+ def conditional1 arg1
17
+ return 1 if arg1 == 0
18
+ return 0
19
+ end
20
+
21
+ becomes:
22
+
23
+ s(:defn, :conditional1, s(:args, :arg1),
24
+ s(:if,
25
+ s(:call, s(:lvar, :arg1), :==, s(:lit, 0)),
26
+ s(:return, s(:lit, 1)),
27
+ nil),
28
+ s(:return, s(:lit, 0)))
29
+
30
+ == FEATURES/PROBLEMS:
31
+
32
+ * Pure ruby, no compiles.
33
+ * Includes preceding comment data for defn/defs/class/module nodes!
34
+ * Incredibly simple interface.
35
+ * Output is 100% equivalent to ParseTree.
36
+ * Can utilize PT's SexpProcessor and UnifiedRuby for language processing.
37
+ * Known Issue: Speed is now pretty good, but can always improve:
38
+ * RP parses a corpus of 3702 files in 125s (avg 108 Kb/s)
39
+ * MRI+PT parsed the same in 67.38s (avg 200.89 Kb/s)
40
+ * Known Issue: Code is much better, but still has a long way to go.
41
+ * Known Issue: Totally awesome.
42
+ * Known Issue: line number values can be slightly off. Parsing LR sucks.
43
+
44
+ == SYNOPSIS:
45
+
46
+ RubyParser.new.parse "1+1"
47
+ # => s(:call, s(:lit, 1), :+, s(:lit, 1))
48
+
49
+ You can also use Ruby19Parser, Ruby18Parser, or RubyParser.for_current_ruby:
50
+
51
+ RubyParser.for_current_ruby.parse "1+1"
52
+ # => s(:call, s(:lit, 1), :+, s(:lit, 1))
53
+
54
+ == REQUIREMENTS:
55
+
56
+ * ruby. woot.
57
+ * sexp_processor for Sexp and SexpProcessor classes, and testing.
58
+ * racc full package for parser development (compiling .y to .rb).
59
+
60
+ == INSTALL:
61
+
62
+ * sudo gem install ruby_parser
63
+
64
+ == LICENSE:
65
+
66
+ (The MIT License)
67
+
68
+ Copyright (c) Ryan Davis, seattle.rb
69
+
70
+ Permission is hereby granted, free of charge, to any person obtaining
71
+ a copy of this software and associated documentation files (the
72
+ 'Software'), to deal in the Software without restriction, including
73
+ without limitation the rights to use, copy, modify, merge, publish,
74
+ distribute, sublicense, and/or sell copies of the Software, and to
75
+ permit persons to whom the Software is furnished to do so, subject to
76
+ the following conditions:
77
+
78
+ The above copyright notice and this permission notice shall be
79
+ included in all copies or substantial portions of the Software.
80
+
81
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
82
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
83
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
84
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
85
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
86
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
87
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,192 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ Hoe.plugin :seattlerb
7
+ Hoe.plugin :racc
8
+ Hoe.plugin :isolate
9
+
10
+ Hoe.add_include_dirs "../../sexp_processor/dev/lib"
11
+
12
+ Hoe.spec 'parser' do
13
+ developer 'Peter Zotov', 'whitequark@whitequark.org'
14
+
15
+ dependency 'sexp_processor', '~> 4.1'
16
+
17
+ self.racc_flags << " -t" if plugin?(:racc) && ENV["DEBUG"]
18
+ end
19
+
20
+ file "lib/ruby18_parser.rb" => "lib/ruby18_parser.y"
21
+ file "lib/ruby19_parser.rb" => "lib/ruby19_parser.y"
22
+
23
+ file "lib/ruby_lexer.rb" => "lib/ruby_lexer.rl" do |t|
24
+ sh "ragel -R #{t.prerequisites.first} -o #{t.name}"
25
+ end
26
+
27
+ task :clean do
28
+ rm_rf(Dir["**/*~"] +
29
+ Dir["**/*.diff"] +
30
+ Dir["coverage.info"] +
31
+ Dir["coverage"] +
32
+ Dir["lib/*.output"])
33
+ end
34
+
35
+ def next_num(glob)
36
+ num = Dir[glob].max[/\d+/].to_i + 1
37
+ end
38
+
39
+ desc "Compares PT to RP and deletes all files that match"
40
+ task :compare do
41
+ files = Dir["unit/**/*.rb"]
42
+ puts "Parsing #{files.size} files"
43
+ files.each do |file|
44
+ puts file
45
+ system "./cmp.rb -q #{file} && rm #{file}"
46
+ end
47
+ system 'find -d unit -type d -empty -exec rmdir {} \;'
48
+ end
49
+
50
+ desc "Compares PT to RP and stops on first failure"
51
+ task :find_bug do
52
+ files = Dir["unit/**/*.rb"]
53
+ puts "Parsing #{files.size} files"
54
+ files.each do |file|
55
+ puts file
56
+ sh "./cmp.rb -q #{file}"
57
+ end
58
+ end
59
+
60
+ task :sort do
61
+ sh 'grepsort "^ +def" lib/ruby_lexer.rb'
62
+ sh 'grepsort "^ +def (test|util)" test/test_ruby_lexer.rb'
63
+ end
64
+
65
+ task :loc do
66
+ loc1 = `wc -l ../1.0.0/lib/ruby_lexer.rb`[/\d+/]
67
+ flog1 = `flog -s ../1.0.0/lib/ruby_lexer.rb`[/\d+\.\d+/]
68
+ loc2 = `cat lib/ruby_lexer.rb lib/ruby_parser_extras.rb | wc -l`[/\d+/]
69
+ flog2 = `flog -s lib/ruby_lexer.rb lib/ruby_parser_extras.rb`[/\d+\.\d+/]
70
+
71
+ loc1, loc2, flog1, flog2 = loc1.to_i, loc2.to_i, flog1.to_f, flog2.to_f
72
+
73
+ puts "1.0.0: loc = #{loc1} flog = #{flog1}"
74
+ puts "dev : loc = #{loc2} flog = #{flog2}"
75
+ puts "delta: loc = #{loc2-loc1} flog = #{flog2-flog1}"
76
+ end
77
+
78
+ desc "Validate against all normal files in unit dir"
79
+ task :validate do
80
+ sh "./cmp.rb unit/*.rb"
81
+ end
82
+
83
+ def run_and_log cmd, prefix
84
+ files = ENV['FILES'] || 'unit/*.rb'
85
+ p, x = prefix, "txt"
86
+ n = Dir["#{p}.*.#{x}"].map { |s| s[/\d+/].to_i }.max + 1 rescue 1
87
+ f = "#{p}.#{n}.#{x}"
88
+
89
+ sh "#{cmd} #{Hoe::RUBY_FLAGS} bin/ruby_parse -q -g #{files} &> #{f}"
90
+
91
+ puts File.read(f)
92
+ end
93
+
94
+ desc "Benchmark against all normal files in unit dir"
95
+ task :benchmark do
96
+ run_and_log "ruby", "benchmark"
97
+ end
98
+
99
+ desc "Profile against all normal files in unit dir"
100
+ task :profile do
101
+ run_and_log "zenprofile", "profile"
102
+ end
103
+
104
+ desc "what was that command again?"
105
+ task :huh? do
106
+ puts "ruby #{Hoe::RUBY_FLAGS} bin/ruby_parse -q -g ..."
107
+ end
108
+
109
+ task :irb => [:isolate] do
110
+ sh "GEM_HOME=#{Gem.path.first} irb -rubygems -Ilib -rruby_parser;"
111
+ end
112
+
113
+ def (task(:phony)).timestamp
114
+ Time.at 0
115
+ end
116
+
117
+ task :isolate => :phony
118
+
119
+ file "lib/ruby18_parser.rb" => :isolate
120
+ file "lib/ruby19_parser.rb" => :isolate
121
+
122
+ task :compare18 do
123
+ sh "./yack.rb lib/ruby18_parser.output > racc18.txt"
124
+ sh "./yack.rb parse18.output > yacc18.txt"
125
+ sh "diff -du racc18.txt yacc18.txt || true"
126
+ puts
127
+ sh "diff -du racc18.txt yacc18.txt | wc -l"
128
+ end
129
+
130
+ task :compare19 do
131
+ sh "./yack.rb lib/ruby19_parser.output > racc19.txt"
132
+ sh "./yack.rb parse19.output > yacc19.txt"
133
+ sh "diff -du racc19.txt yacc19.txt || true"
134
+ puts
135
+ sh "diff -du racc19.txt yacc19.txt | wc -l"
136
+ end
137
+
138
+ task :debug => :isolate do
139
+ ENV["V"] ||= "19"
140
+ Rake.application[:parser].invoke # this way we can have DEBUG set
141
+
142
+ $: << "lib"
143
+ require 'ruby_parser'
144
+ require 'pp'
145
+
146
+ parser = if ENV["V"] == "18" then
147
+ Ruby18Parser.new
148
+ else
149
+ Ruby19Parser.new
150
+ end
151
+
152
+ time = (ENV["RP_TIMEOUT"] || 10).to_i
153
+
154
+ file = ENV["F"] || ENV["FILE"]
155
+
156
+ ruby = if file then
157
+ File.read(file)
158
+ else
159
+ file = "env"
160
+ ENV["R"] || ENV["RUBY"]
161
+ end
162
+
163
+ begin
164
+ pp parser.process(ruby, file, time)
165
+ rescue Racc::ParseError => e
166
+ p e
167
+ ss = parser.lexer.src
168
+ src = ss.string
169
+ lines = src[0..ss.pos].split(/\n/)
170
+ abort "on #{file}:#{lines.size}"
171
+ end
172
+ end
173
+
174
+ task :debug_ruby do
175
+ file = ENV["F"] || ENV["FILE"]
176
+ sh "ruby19 -cwy #{file} 2>&1 | ./yuck.rb"
177
+ end
178
+
179
+ task :extract => :isolate do
180
+ ENV["V"] ||= "19"
181
+ Rake.application[:parser].invoke # this way we can have DEBUG set
182
+
183
+ file = ENV["F"] || ENV["FILE"]
184
+
185
+ ruby "-Ilib", "bin/ruby_parse_extract_error", file
186
+ end
187
+
188
+ task :bugs do
189
+ sh "for f in bug*.rb ; do rake19 debug F=$f && rm $f ; done"
190
+ end
191
+
192
+ # vim: syntax=Ruby
data/bin/ruby_parse ADDED
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/ruby -s
2
+
3
+ $q ||= false
4
+ $g ||= false
5
+
6
+ require 'rubygems'
7
+ require 'ruby_parser'
8
+ require 'pp'
9
+
10
+ good = bad = 0
11
+
12
+ multi = ARGV.size != 1
13
+ total_time = 0
14
+ total_loc = 0
15
+ total_kbytes = 0
16
+ times = {}
17
+ locs = {}
18
+ kbytes = {}
19
+
20
+ class File
21
+ RUBY19 = "<3".respond_to? :encoding
22
+
23
+ class << self
24
+ alias :binread :read unless RUBY19
25
+ end
26
+ end
27
+
28
+ begin
29
+ ARGV.each do |file|
30
+ rp = RubyParser.new
31
+ loc = `wc -l #{file}`.strip.to_i
32
+ size = `wc -c #{file}`.strip.to_i / 1024.0
33
+ locs[file] = loc
34
+ kbytes[file] = size
35
+ total_loc += loc
36
+ total_kbytes += size
37
+ if $q then
38
+ $stderr.print "."
39
+ else
40
+ warn "# file = #{file} loc = #{loc}"
41
+ end
42
+ GC.start if $g
43
+
44
+ t = Time.now
45
+ begin
46
+ begin
47
+ rp.reset
48
+ r = rp.parse(File.binread(file), file)
49
+ pp r unless $q
50
+ good += 1
51
+ rescue SyntaxError => e
52
+ warn "SyntaxError for #{file}: #{e.message}"
53
+ bad += 1
54
+ end
55
+ rescue => e
56
+ warn "#{e.backtrace.first} #{e.inspect.gsub(/\n/, ' ')} for #{file}"
57
+ warn " #{e.backtrace.join("\n ")}"
58
+ bad += 1
59
+ end
60
+
61
+ t = Time.now - t
62
+ times[file] = t
63
+ total_time += t
64
+ end
65
+ rescue Interrupt
66
+ # do nothing
67
+ end
68
+
69
+ warn "done"
70
+
71
+ total = 0
72
+ times.values.each do |t|
73
+ total += t
74
+ end
75
+
76
+ puts
77
+ puts "good = #{good} bad = #{bad}" if multi
78
+ puts
79
+
80
+ format = "%5.2fs:%9.2f l/s:%8.2f Kb/s:%5d Kb:%5d loc:%s"
81
+
82
+ times.sort_by { |f, t| -t }.each do |f, t|
83
+ next if t < 0.005
84
+ loc = locs[f]
85
+ size = kbytes[f]
86
+ puts format % [t, loc / t, size / t, size, loc, f]
87
+ end
88
+
89
+ puts
90
+
91
+ puts format % [total_time,
92
+ total_loc / total_time,
93
+ total_kbytes / total_time,
94
+ total_kbytes,
95
+ total_loc,
96
+ "TOTAL"] unless total_time == 0
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/ruby -ws
2
+
3
+ $d ||= false
4
+ $d ||= ENV["DELETE"]
5
+ $t ||= false
6
+ $t ||= ENV["DELETE_TIMEOUT"]
7
+ $m ||= false
8
+ $m ||= ENV["MOVE_TIMEOUT"]
9
+ $q ||= false
10
+ $q ||= ENV["QUIET"]
11
+
12
+ require 'rubygems'
13
+ require 'ruby_parser'
14
+ require 'fileutils'
15
+
16
+ ARGV.push "-" if ARGV.empty?
17
+
18
+ class Racc::Parser
19
+ def extract_defs
20
+ ss = lexer.src
21
+
22
+ raise "can't access source. possible encoding issue" unless ss
23
+
24
+ src = ss.string
25
+ pre_error = src[0...ss.pos]
26
+
27
+ defs = pre_error.grep(/^ *(?:def|it)/)
28
+
29
+ raise "can't figure out where the bad code starts" unless defs.last
30
+
31
+ last_def_indent = defs.last[/^ */]
32
+
33
+ post_error = src[ss.pos..-1]
34
+ idx = post_error =~ /^#{last_def_indent}end.*/
35
+
36
+ raise "can't figure out where the bad code ends" unless idx
37
+
38
+ src = pre_error + post_error[0..idx+$&.length]
39
+
40
+ src.scan(/^(( *)(?:def|it) .*?^\2end)/m)
41
+ end
42
+
43
+ def retest_for_errors defs
44
+ parser = self.class.new
45
+
46
+ parser.process(defs.join("\n\n"))
47
+ rescue SyntaxError, StandardError
48
+ nil
49
+ end
50
+ end
51
+
52
+ def expand path
53
+ if File.directory? path then
54
+ require 'find'
55
+
56
+ files = []
57
+
58
+ Find.find(*Dir[path]) do |f|
59
+ files << f if File.file? f
60
+ end
61
+
62
+ files.sort
63
+ else
64
+ Dir.glob path
65
+ end
66
+ end
67
+
68
+ def process_error parser
69
+ defs = parser.extract_defs
70
+
71
+ if parser.retest_for_errors defs then
72
+ warn "Can't reproduce error with just methods, punting..."
73
+ return
74
+ end
75
+
76
+ catch :extract_done do
77
+ (1..defs.size).each do |perm_size|
78
+ defs.combination(perm_size).each do |trial|
79
+ unless parser.retest_for_errors trial then
80
+ puts trial.join "\n"
81
+ throw :extract_done
82
+ end
83
+ end
84
+ end
85
+ end
86
+ rescue RuntimeError, Racc::ParseError => e
87
+ warn "# process error: #{e.message.strip}"
88
+ end
89
+
90
+ def process file
91
+ ruby = file == "-" ? $stdin.read : File.read(file)
92
+ time = (ENV["RP_TIMEOUT"] || 10).to_i
93
+
94
+ $stderr.print "# Validating #{file}: "
95
+ parser = Ruby19Parser.new
96
+ parser.process(ruby, file, time)
97
+ warn "good"
98
+ File.unlink file if $d
99
+ rescue Timeout::Error
100
+ $exit = 1
101
+ warn "TIMEOUT parsing #{file}. Skipping."
102
+
103
+ if $m then
104
+ dir = File.join $m, File.dirname(file)
105
+ FileUtils.mkdir_p dir
106
+ FileUtils.move file, dir
107
+ elsif $t then
108
+ File.unlink file
109
+ end
110
+ rescue StandardError, SyntaxError, Racc::ParseError => e
111
+ $exit = 1
112
+ warn ""
113
+ warn "# error: #{e.message.strip}" unless $q
114
+ warn ""
115
+ return if $q
116
+
117
+ process_error parser
118
+ end
119
+
120
+ $exit = 0
121
+ $stdout.sync = true
122
+
123
+ ARGV.each do |path|
124
+ expand(path).each do |file|
125
+ next unless File.file? file # omg... why would you name a dir support.rb?
126
+ process file
127
+ end
128
+ end
129
+
130
+ exit $exit