parser 0.9.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt ADDED
@@ -0,0 +1,18 @@
1
+ .autotest
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ bin/ruby_parse
7
+ bin/ruby_parse_extract_error
8
+ lib/gauntlet_rubyparser.rb
9
+ lib/ruby18_parser.rb
10
+ lib/ruby18_parser.y
11
+ lib/ruby19_parser.rb
12
+ lib/ruby19_parser.y
13
+ lib/ruby_lexer.rb
14
+ lib/ruby_parser.rb
15
+ lib/ruby_parser_extras.rb
16
+ test/test_ruby_lexer.rb
17
+ test/test_ruby_parser.rb
18
+ test/test_ruby_parser_extras.rb
data/README.txt ADDED
@@ -0,0 +1,87 @@
1
+ = ruby_parser
2
+
3
+ home :: https://github.com/seattlerb/ruby_parser
4
+ bugs :: https://github.com/seattlerb/ruby_parser/issues
5
+ rdoc :: http://docs.seattlerb.org/ruby_parser
6
+
7
+ == DESCRIPTION:
8
+
9
+ ruby_parser (RP) is a ruby parser written in pure ruby (utilizing
10
+ racc--which does by default use a C extension). RP's output is
11
+ the same as ParseTree's output: s-expressions using ruby's arrays and
12
+ base types.
13
+
14
+ As an example:
15
+
16
+ def conditional1 arg1
17
+ return 1 if arg1 == 0
18
+ return 0
19
+ end
20
+
21
+ becomes:
22
+
23
+ s(:defn, :conditional1, s(:args, :arg1),
24
+ s(:if,
25
+ s(:call, s(:lvar, :arg1), :==, s(:lit, 0)),
26
+ s(:return, s(:lit, 1)),
27
+ nil),
28
+ s(:return, s(:lit, 0)))
29
+
30
+ == FEATURES/PROBLEMS:
31
+
32
+ * Pure ruby, no compiles.
33
+ * Includes preceding comment data for defn/defs/class/module nodes!
34
+ * Incredibly simple interface.
35
+ * Output is 100% equivalent to ParseTree.
36
+ * Can utilize PT's SexpProcessor and UnifiedRuby for language processing.
37
+ * Known Issue: Speed is now pretty good, but can always improve:
38
+ * RP parses a corpus of 3702 files in 125s (avg 108 Kb/s)
39
+ * MRI+PT parsed the same in 67.38s (avg 200.89 Kb/s)
40
+ * Known Issue: Code is much better, but still has a long way to go.
41
+ * Known Issue: Totally awesome.
42
+ * Known Issue: line number values can be slightly off. Parsing LR sucks.
43
+
44
+ == SYNOPSIS:
45
+
46
+ RubyParser.new.parse "1+1"
47
+ # => s(:call, s(:lit, 1), :+, s(:lit, 1))
48
+
49
+ You can also use Ruby19Parser, Ruby18Parser, or RubyParser.for_current_ruby:
50
+
51
+ RubyParser.for_current_ruby.parse "1+1"
52
+ # => s(:call, s(:lit, 1), :+, s(:lit, 1))
53
+
54
+ == REQUIREMENTS:
55
+
56
+ * ruby. woot.
57
+ * sexp_processor for Sexp and SexpProcessor classes, and testing.
58
+ * racc full package for parser development (compiling .y to .rb).
59
+
60
+ == INSTALL:
61
+
62
+ * sudo gem install ruby_parser
63
+
64
+ == LICENSE:
65
+
66
+ (The MIT License)
67
+
68
+ Copyright (c) Ryan Davis, seattle.rb
69
+
70
+ Permission is hereby granted, free of charge, to any person obtaining
71
+ a copy of this software and associated documentation files (the
72
+ 'Software'), to deal in the Software without restriction, including
73
+ without limitation the rights to use, copy, modify, merge, publish,
74
+ distribute, sublicense, and/or sell copies of the Software, and to
75
+ permit persons to whom the Software is furnished to do so, subject to
76
+ the following conditions:
77
+
78
+ The above copyright notice and this permission notice shall be
79
+ included in all copies or substantial portions of the Software.
80
+
81
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
82
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
83
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
84
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
85
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
86
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
87
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,192 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ Hoe.plugin :seattlerb
7
+ Hoe.plugin :racc
8
+ Hoe.plugin :isolate
9
+
10
+ Hoe.add_include_dirs "../../sexp_processor/dev/lib"
11
+
12
+ Hoe.spec 'parser' do
13
+ developer 'Peter Zotov', 'whitequark@whitequark.org'
14
+
15
+ dependency 'sexp_processor', '~> 4.1'
16
+
17
+ self.racc_flags << " -t" if plugin?(:racc) && ENV["DEBUG"]
18
+ end
19
+
20
+ file "lib/ruby18_parser.rb" => "lib/ruby18_parser.y"
21
+ file "lib/ruby19_parser.rb" => "lib/ruby19_parser.y"
22
+
23
+ file "lib/ruby_lexer.rb" => "lib/ruby_lexer.rl" do |t|
24
+ sh "ragel -R #{t.prerequisites.first} -o #{t.name}"
25
+ end
26
+
27
+ task :clean do
28
+ rm_rf(Dir["**/*~"] +
29
+ Dir["**/*.diff"] +
30
+ Dir["coverage.info"] +
31
+ Dir["coverage"] +
32
+ Dir["lib/*.output"])
33
+ end
34
+
35
+ def next_num(glob)
36
+ num = Dir[glob].max[/\d+/].to_i + 1
37
+ end
38
+
39
+ desc "Compares PT to RP and deletes all files that match"
40
+ task :compare do
41
+ files = Dir["unit/**/*.rb"]
42
+ puts "Parsing #{files.size} files"
43
+ files.each do |file|
44
+ puts file
45
+ system "./cmp.rb -q #{file} && rm #{file}"
46
+ end
47
+ system 'find -d unit -type d -empty -exec rmdir {} \;'
48
+ end
49
+
50
+ desc "Compares PT to RP and stops on first failure"
51
+ task :find_bug do
52
+ files = Dir["unit/**/*.rb"]
53
+ puts "Parsing #{files.size} files"
54
+ files.each do |file|
55
+ puts file
56
+ sh "./cmp.rb -q #{file}"
57
+ end
58
+ end
59
+
60
+ task :sort do
61
+ sh 'grepsort "^ +def" lib/ruby_lexer.rb'
62
+ sh 'grepsort "^ +def (test|util)" test/test_ruby_lexer.rb'
63
+ end
64
+
65
+ task :loc do
66
+ loc1 = `wc -l ../1.0.0/lib/ruby_lexer.rb`[/\d+/]
67
+ flog1 = `flog -s ../1.0.0/lib/ruby_lexer.rb`[/\d+\.\d+/]
68
+ loc2 = `cat lib/ruby_lexer.rb lib/ruby_parser_extras.rb | wc -l`[/\d+/]
69
+ flog2 = `flog -s lib/ruby_lexer.rb lib/ruby_parser_extras.rb`[/\d+\.\d+/]
70
+
71
+ loc1, loc2, flog1, flog2 = loc1.to_i, loc2.to_i, flog1.to_f, flog2.to_f
72
+
73
+ puts "1.0.0: loc = #{loc1} flog = #{flog1}"
74
+ puts "dev : loc = #{loc2} flog = #{flog2}"
75
+ puts "delta: loc = #{loc2-loc1} flog = #{flog2-flog1}"
76
+ end
77
+
78
+ desc "Validate against all normal files in unit dir"
79
+ task :validate do
80
+ sh "./cmp.rb unit/*.rb"
81
+ end
82
+
83
+ def run_and_log cmd, prefix
84
+ files = ENV['FILES'] || 'unit/*.rb'
85
+ p, x = prefix, "txt"
86
+ n = Dir["#{p}.*.#{x}"].map { |s| s[/\d+/].to_i }.max + 1 rescue 1
87
+ f = "#{p}.#{n}.#{x}"
88
+
89
+ sh "#{cmd} #{Hoe::RUBY_FLAGS} bin/ruby_parse -q -g #{files} &> #{f}"
90
+
91
+ puts File.read(f)
92
+ end
93
+
94
+ desc "Benchmark against all normal files in unit dir"
95
+ task :benchmark do
96
+ run_and_log "ruby", "benchmark"
97
+ end
98
+
99
+ desc "Profile against all normal files in unit dir"
100
+ task :profile do
101
+ run_and_log "zenprofile", "profile"
102
+ end
103
+
104
+ desc "what was that command again?"
105
+ task :huh? do
106
+ puts "ruby #{Hoe::RUBY_FLAGS} bin/ruby_parse -q -g ..."
107
+ end
108
+
109
+ task :irb => [:isolate] do
110
+ sh "GEM_HOME=#{Gem.path.first} irb -rubygems -Ilib -rruby_parser;"
111
+ end
112
+
113
+ def (task(:phony)).timestamp
114
+ Time.at 0
115
+ end
116
+
117
+ task :isolate => :phony
118
+
119
+ file "lib/ruby18_parser.rb" => :isolate
120
+ file "lib/ruby19_parser.rb" => :isolate
121
+
122
+ task :compare18 do
123
+ sh "./yack.rb lib/ruby18_parser.output > racc18.txt"
124
+ sh "./yack.rb parse18.output > yacc18.txt"
125
+ sh "diff -du racc18.txt yacc18.txt || true"
126
+ puts
127
+ sh "diff -du racc18.txt yacc18.txt | wc -l"
128
+ end
129
+
130
+ task :compare19 do
131
+ sh "./yack.rb lib/ruby19_parser.output > racc19.txt"
132
+ sh "./yack.rb parse19.output > yacc19.txt"
133
+ sh "diff -du racc19.txt yacc19.txt || true"
134
+ puts
135
+ sh "diff -du racc19.txt yacc19.txt | wc -l"
136
+ end
137
+
138
+ task :debug => :isolate do
139
+ ENV["V"] ||= "19"
140
+ Rake.application[:parser].invoke # this way we can have DEBUG set
141
+
142
+ $: << "lib"
143
+ require 'ruby_parser'
144
+ require 'pp'
145
+
146
+ parser = if ENV["V"] == "18" then
147
+ Ruby18Parser.new
148
+ else
149
+ Ruby19Parser.new
150
+ end
151
+
152
+ time = (ENV["RP_TIMEOUT"] || 10).to_i
153
+
154
+ file = ENV["F"] || ENV["FILE"]
155
+
156
+ ruby = if file then
157
+ File.read(file)
158
+ else
159
+ file = "env"
160
+ ENV["R"] || ENV["RUBY"]
161
+ end
162
+
163
+ begin
164
+ pp parser.process(ruby, file, time)
165
+ rescue Racc::ParseError => e
166
+ p e
167
+ ss = parser.lexer.src
168
+ src = ss.string
169
+ lines = src[0..ss.pos].split(/\n/)
170
+ abort "on #{file}:#{lines.size}"
171
+ end
172
+ end
173
+
174
+ task :debug_ruby do
175
+ file = ENV["F"] || ENV["FILE"]
176
+ sh "ruby19 -cwy #{file} 2>&1 | ./yuck.rb"
177
+ end
178
+
179
+ task :extract => :isolate do
180
+ ENV["V"] ||= "19"
181
+ Rake.application[:parser].invoke # this way we can have DEBUG set
182
+
183
+ file = ENV["F"] || ENV["FILE"]
184
+
185
+ ruby "-Ilib", "bin/ruby_parse_extract_error", file
186
+ end
187
+
188
+ task :bugs do
189
+ sh "for f in bug*.rb ; do rake19 debug F=$f && rm $f ; done"
190
+ end
191
+
192
+ # vim: syntax=Ruby
data/bin/ruby_parse ADDED
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/ruby -s
2
+
3
+ $q ||= false
4
+ $g ||= false
5
+
6
+ require 'rubygems'
7
+ require 'ruby_parser'
8
+ require 'pp'
9
+
10
+ good = bad = 0
11
+
12
+ multi = ARGV.size != 1
13
+ total_time = 0
14
+ total_loc = 0
15
+ total_kbytes = 0
16
+ times = {}
17
+ locs = {}
18
+ kbytes = {}
19
+
20
+ class File
21
+ RUBY19 = "<3".respond_to? :encoding
22
+
23
+ class << self
24
+ alias :binread :read unless RUBY19
25
+ end
26
+ end
27
+
28
+ begin
29
+ ARGV.each do |file|
30
+ rp = RubyParser.new
31
+ loc = `wc -l #{file}`.strip.to_i
32
+ size = `wc -c #{file}`.strip.to_i / 1024.0
33
+ locs[file] = loc
34
+ kbytes[file] = size
35
+ total_loc += loc
36
+ total_kbytes += size
37
+ if $q then
38
+ $stderr.print "."
39
+ else
40
+ warn "# file = #{file} loc = #{loc}"
41
+ end
42
+ GC.start if $g
43
+
44
+ t = Time.now
45
+ begin
46
+ begin
47
+ rp.reset
48
+ r = rp.parse(File.binread(file), file)
49
+ pp r unless $q
50
+ good += 1
51
+ rescue SyntaxError => e
52
+ warn "SyntaxError for #{file}: #{e.message}"
53
+ bad += 1
54
+ end
55
+ rescue => e
56
+ warn "#{e.backtrace.first} #{e.inspect.gsub(/\n/, ' ')} for #{file}"
57
+ warn " #{e.backtrace.join("\n ")}"
58
+ bad += 1
59
+ end
60
+
61
+ t = Time.now - t
62
+ times[file] = t
63
+ total_time += t
64
+ end
65
+ rescue Interrupt
66
+ # do nothing
67
+ end
68
+
69
+ warn "done"
70
+
71
+ total = 0
72
+ times.values.each do |t|
73
+ total += t
74
+ end
75
+
76
+ puts
77
+ puts "good = #{good} bad = #{bad}" if multi
78
+ puts
79
+
80
+ format = "%5.2fs:%9.2f l/s:%8.2f Kb/s:%5d Kb:%5d loc:%s"
81
+
82
+ times.sort_by { |f, t| -t }.each do |f, t|
83
+ next if t < 0.005
84
+ loc = locs[f]
85
+ size = kbytes[f]
86
+ puts format % [t, loc / t, size / t, size, loc, f]
87
+ end
88
+
89
+ puts
90
+
91
+ puts format % [total_time,
92
+ total_loc / total_time,
93
+ total_kbytes / total_time,
94
+ total_kbytes,
95
+ total_loc,
96
+ "TOTAL"] unless total_time == 0
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/ruby -ws
2
+
3
+ $d ||= false
4
+ $d ||= ENV["DELETE"]
5
+ $t ||= false
6
+ $t ||= ENV["DELETE_TIMEOUT"]
7
+ $m ||= false
8
+ $m ||= ENV["MOVE_TIMEOUT"]
9
+ $q ||= false
10
+ $q ||= ENV["QUIET"]
11
+
12
+ require 'rubygems'
13
+ require 'ruby_parser'
14
+ require 'fileutils'
15
+
16
+ ARGV.push "-" if ARGV.empty?
17
+
18
+ class Racc::Parser
19
+ def extract_defs
20
+ ss = lexer.src
21
+
22
+ raise "can't access source. possible encoding issue" unless ss
23
+
24
+ src = ss.string
25
+ pre_error = src[0...ss.pos]
26
+
27
+ defs = pre_error.grep(/^ *(?:def|it)/)
28
+
29
+ raise "can't figure out where the bad code starts" unless defs.last
30
+
31
+ last_def_indent = defs.last[/^ */]
32
+
33
+ post_error = src[ss.pos..-1]
34
+ idx = post_error =~ /^#{last_def_indent}end.*/
35
+
36
+ raise "can't figure out where the bad code ends" unless idx
37
+
38
+ src = pre_error + post_error[0..idx+$&.length]
39
+
40
+ src.scan(/^(( *)(?:def|it) .*?^\2end)/m)
41
+ end
42
+
43
+ def retest_for_errors defs
44
+ parser = self.class.new
45
+
46
+ parser.process(defs.join("\n\n"))
47
+ rescue SyntaxError, StandardError
48
+ nil
49
+ end
50
+ end
51
+
52
+ def expand path
53
+ if File.directory? path then
54
+ require 'find'
55
+
56
+ files = []
57
+
58
+ Find.find(*Dir[path]) do |f|
59
+ files << f if File.file? f
60
+ end
61
+
62
+ files.sort
63
+ else
64
+ Dir.glob path
65
+ end
66
+ end
67
+
68
+ def process_error parser
69
+ defs = parser.extract_defs
70
+
71
+ if parser.retest_for_errors defs then
72
+ warn "Can't reproduce error with just methods, punting..."
73
+ return
74
+ end
75
+
76
+ catch :extract_done do
77
+ (1..defs.size).each do |perm_size|
78
+ defs.combination(perm_size).each do |trial|
79
+ unless parser.retest_for_errors trial then
80
+ puts trial.join "\n"
81
+ throw :extract_done
82
+ end
83
+ end
84
+ end
85
+ end
86
+ rescue RuntimeError, Racc::ParseError => e
87
+ warn "# process error: #{e.message.strip}"
88
+ end
89
+
90
+ def process file
91
+ ruby = file == "-" ? $stdin.read : File.read(file)
92
+ time = (ENV["RP_TIMEOUT"] || 10).to_i
93
+
94
+ $stderr.print "# Validating #{file}: "
95
+ parser = Ruby19Parser.new
96
+ parser.process(ruby, file, time)
97
+ warn "good"
98
+ File.unlink file if $d
99
+ rescue Timeout::Error
100
+ $exit = 1
101
+ warn "TIMEOUT parsing #{file}. Skipping."
102
+
103
+ if $m then
104
+ dir = File.join $m, File.dirname(file)
105
+ FileUtils.mkdir_p dir
106
+ FileUtils.move file, dir
107
+ elsif $t then
108
+ File.unlink file
109
+ end
110
+ rescue StandardError, SyntaxError, Racc::ParseError => e
111
+ $exit = 1
112
+ warn ""
113
+ warn "# error: #{e.message.strip}" unless $q
114
+ warn ""
115
+ return if $q
116
+
117
+ process_error parser
118
+ end
119
+
120
+ $exit = 0
121
+ $stdout.sync = true
122
+
123
+ ARGV.each do |path|
124
+ expand(path).each do |file|
125
+ next unless File.file? file # omg... why would you name a dir support.rb?
126
+ process file
127
+ end
128
+ end
129
+
130
+ exit $exit