ruby_parser 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ruby_parser might be problematic. Click here for more details.
- data/.autotest +26 -3
- data/History.txt +108 -0
- data/Manifest.txt +3 -0
- data/README.txt +1 -1
- data/Rakefile +126 -28
- data/bin/ruby_parse +89 -0
- data/lib/ruby_lexer.rb +1117 -2536
- data/lib/ruby_parser.rb +5407 -5849
- data/lib/ruby_parser.y +1763 -1621
- data/lib/ruby_parser_extras.rb +1051 -0
- data/test/test_ruby_lexer.rb +1607 -267
- data/test/test_ruby_parser.rb +317 -175
- data/test/test_ruby_parser_extras.rb +177 -0
- metadata +27 -10
data/.autotest
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
# -*- ruby -*-
|
2
2
|
|
3
|
+
require 'autotest/restart'
|
4
|
+
|
3
5
|
Autotest.add_hook :initialize do |at|
|
4
6
|
at.extra_files << "../../ParseTree/dev/test/pt_testcase.rb"
|
5
|
-
at.libs << ":../../ParseTree/dev/lib:../../ParseTree/dev/test"
|
6
|
-
at.
|
7
|
+
at.libs << ":../../ParseTree/dev/lib:../../ParseTree/dev/test:../../sexp_processor/dev/lib"
|
8
|
+
at.add_exception 'unit'
|
9
|
+
at.add_exception 'coverage'
|
10
|
+
at.add_exception '.diff'
|
11
|
+
at.add_exception 'coverage.info'
|
7
12
|
|
8
13
|
at.unit_diff = "unit_diff -u -b"
|
9
14
|
|
@@ -12,10 +17,28 @@ Autotest.add_hook :initialize do |at|
|
|
12
17
|
end
|
13
18
|
|
14
19
|
at.add_mapping(/pt_testcase.rb/) do |f, _|
|
15
|
-
at.files_matching(
|
20
|
+
at.files_matching(/test_.*rb$/)
|
21
|
+
end
|
22
|
+
|
23
|
+
%w(TestEnvironment TestStackState).each do |klass|
|
24
|
+
at.extra_class_map[klass] = "test/test_ruby_parser_extras.rb"
|
25
|
+
end
|
26
|
+
|
27
|
+
%w(TestRubyParser TestParseTree).each do |klass| # HACK
|
28
|
+
at.extra_class_map[klass] = "test/test_ruby_parser.rb"
|
16
29
|
end
|
17
30
|
end
|
18
31
|
|
19
32
|
Autotest.add_hook :run_command do |at|
|
20
33
|
system "rake parser"
|
21
34
|
end
|
35
|
+
|
36
|
+
class Autotest
|
37
|
+
def ruby
|
38
|
+
File.expand_path "~/.multiruby/install/1.9.0-0/bin/ruby"
|
39
|
+
end
|
40
|
+
end if ENV['ONENINE']
|
41
|
+
|
42
|
+
# require 'autotest/rcov'
|
43
|
+
# Autotest::RCov.command = 'rcov_info'
|
44
|
+
# Autotest::RCov.pattern = 'test/test_ruby_lexer.rb'
|
data/History.txt
CHANGED
@@ -1,3 +1,111 @@
|
|
1
|
+
== 2.0.0 / 2008-10-22
|
2
|
+
|
3
|
+
* 1 major enhancement
|
4
|
+
|
5
|
+
* Brought on the AWESOME! 4x faster! no known lexing/parsing bugs!
|
6
|
+
|
7
|
+
* 71 minor enhancements
|
8
|
+
|
9
|
+
* 1.9: Added Fixnum#ord.
|
10
|
+
* 1.9: Added missing Regexp constants and did it so it'd work on 1.9.
|
11
|
+
* Added #store_comment and #comments
|
12
|
+
* Added StringScanner #begin_of_line?
|
13
|
+
* Added a bunch of tests for regexp escape chars, #parse_string, #read_escape, ? numbers, ? whitespace.
|
14
|
+
* Added a hack for rubinius' r2l eval bug.
|
15
|
+
* Added a new token type tSTRING that bypasses tSTRING_BEG/END entirely. Only does non-interpolated strings and then falls back to the old way. MUCH cleaner tho.
|
16
|
+
* Added bin/ruby_parse
|
17
|
+
* Added compare rule to Rakefile.
|
18
|
+
* Added coverage files/dirs to clean rule.
|
19
|
+
* Added file and line numbers to all sexp nodes. Column/ranges to come.
|
20
|
+
* Added lex_state change for lvars at the end of yylex.
|
21
|
+
* Added lexed comments to defn/defs/class/module nodes.
|
22
|
+
* Added stats gathering for yylex. Reordered yylex for avg data
|
23
|
+
* Added tSYMBOL token type and parser rule to speed up symbol lexing.
|
24
|
+
* Added tally output for getch, unread, and unread_many.
|
25
|
+
* Added tests for ambigous uminus/uplus, backtick in cmdarg, square and curly brackets, numeric gvars, eos edge cases, string quoting %<> and %%%.
|
26
|
+
* All cases throughout yylex now return directly if they match, no passthroughs.
|
27
|
+
* All lexer cases now slurp entire token in one swoop.
|
28
|
+
* All zarrays are now just empty arrays.
|
29
|
+
* Changed s(:block_arg, :blah) to :"&blah" in args sexp.
|
30
|
+
* Cleaned up lexer error handling. Now just raises all over.
|
31
|
+
* Cleaned up read_escape and regx_options
|
32
|
+
* Cleaned up tokadd_string (for some definition of cleaned).
|
33
|
+
* Converted single quoted strings to new tSTRING token type.
|
34
|
+
* Coverage is currently 94.4% on lexer.
|
35
|
+
* Done what I can to clean up heredoc lexing... still sucks.
|
36
|
+
* Flattened resbodies in rescue node. Fixed .autotest file.
|
37
|
+
* Folded lex_keywords back in now that it screams.
|
38
|
+
* Found very last instanceof ILiteralNode in the code. haha!
|
39
|
+
* Got the tests subclassing PTTC and cleaned up a lot. YAY
|
40
|
+
* Handle yield(*ary) properly
|
41
|
+
* MASSIVELY cleaned out =begin/=end comment processor.
|
42
|
+
* Massive overhaul on Keyword class. All hail the mighty Hash!
|
43
|
+
* Massively cleaned up ident= edge cases and fixed a stupid bug from jruby.
|
44
|
+
* Merged @/@@ scanner together, going to try to do the same everywhere.
|
45
|
+
* Refactored fix_arg_lex_state, common across the lexer.
|
46
|
+
* Refactored new_fcall into new_call.
|
47
|
+
* Refactored some code to get better profile numbers.
|
48
|
+
* Refactored some more #fix_arg_lex_state.
|
49
|
+
* Refactored tail of yylex into its own method.
|
50
|
+
* Removed Module#kill
|
51
|
+
* Removed Token, replaced with Sexp.
|
52
|
+
* Removed all parse_number and parse_quote tests.
|
53
|
+
* Removed argspush, argscat. YAY!
|
54
|
+
* Removed as many token_buffer.split(//)'s as possible. 1 to go.
|
55
|
+
* Removed begins from compstmts
|
56
|
+
* Removed buffer arg for tokadd_string.
|
57
|
+
* Removed crufty (?) solo '@' token... wtf was that anyhow?
|
58
|
+
* Removed most jruby/stringio cruft from StringScanner.
|
59
|
+
* Removed one unread_many... 2 to go. They're harder.
|
60
|
+
* Removed store_comment, now done directly.
|
61
|
+
* Removed token_buffer. Now I just use token ivar.
|
62
|
+
* Removed use of s() from lexer. Changed the way line numbers are gathered.
|
63
|
+
* Renamed *qwords to *awords.
|
64
|
+
* Renamed StringScanner to RPStringScanner (a subclass) to fix namespace trashing.
|
65
|
+
* Renamed parse to process and aliased to parse.
|
66
|
+
* Renamed token_buffer to string_buffer since that arcane shit still needs it.
|
67
|
+
* Resolved the rest of the lexing issues I brought up w/ ruby-core.
|
68
|
+
* Revamped tokadd_escape.
|
69
|
+
* Rewrote Keyword and KWtable.
|
70
|
+
* Rewrote RubyLexer using StringScanner.
|
71
|
+
* Rewrote tokadd_escape. 79 lines down to 21.
|
72
|
+
* Split out lib/ruby_parser_extras.rb so lexer is standalone.
|
73
|
+
* Started to clean up the parser and make it as skinny as possible
|
74
|
+
* Stripped out as much code as possible.
|
75
|
+
* Stripped yylex of some dead code.
|
76
|
+
* Switched from StringIO to StringScanner.
|
77
|
+
* Updated rakefile for new hoe.
|
78
|
+
* Uses pure ruby racc if ENV['PURE_RUBY'], otherwise use c.
|
79
|
+
* Wrote a ton of lexer tests. Coverage is as close to 100% as possible.
|
80
|
+
* Wrote args to clean up the big nasty args processing grammar section.
|
81
|
+
* lex_strterm is now a plain array, removed RubyLexer#s(...).
|
82
|
+
* yield and super now flatten args.
|
83
|
+
|
84
|
+
* 21+ bug fixes:
|
85
|
+
|
86
|
+
* I'm sure this list is missing a lot:
|
87
|
+
* Fixed 2 bugs both involving attrasgn (and ilk) esp when lhs is an array.
|
88
|
+
* Fixed a bug in the lexer for strings with single digit hex escapes.
|
89
|
+
* Fixed a bug parsing: a (args) { expr }... the space caused a different route to be followed and all hell broke loose.
|
90
|
+
* Fixed a bug with x\n=beginvar not putting begin back.
|
91
|
+
* Fixed attrasgn to have arglists, not arrays.
|
92
|
+
* Fixed bug in defn/defs with block fixing.
|
93
|
+
* Fixed class/module's name slot if colon2/3.
|
94
|
+
* Fixed dstr with empty interpolation body.
|
95
|
+
* Fixed for 1.9 string/char changes.
|
96
|
+
* Fixed lexer BS wrt determining token type of words.
|
97
|
+
* Fixed lexer BS wrt pass through values and lexing words. SO STUPID.
|
98
|
+
* Fixed lexing of floats.
|
99
|
+
* Fixed lexing of identifiers followed by equals. I hope.
|
100
|
+
* Fixed masgn with splat on lhs
|
101
|
+
* Fixed new_super to deal with block_pass correctly.
|
102
|
+
* Fixed parser's treatment of :colon2 and :colon3.
|
103
|
+
* Fixed regexp scanning of escaped numbers, ANY number is valid, not just octs.
|
104
|
+
* Fixed string scanning of escaped octs, allowing 1-3 chars.
|
105
|
+
* Fixed unescape for \n
|
106
|
+
* Fixed: omg this is stupid. '()' was returning bare nil
|
107
|
+
* Fixed: remove_begin now goes to the end, not sure why it didn't before.
|
108
|
+
|
1
109
|
== 1.0.0 / 2007-12-20
|
2
110
|
|
3
111
|
* 1 major enhancement
|
data/Manifest.txt
CHANGED
data/README.txt
CHANGED
@@ -12,6 +12,7 @@ base types.
|
|
12
12
|
== FEATURES/PROBLEMS:
|
13
13
|
|
14
14
|
* Pure ruby, no compiles.
|
15
|
+
* Includes preceding comment data for defn/defs/class/module nodes!
|
15
16
|
* Incredibly simple interface.
|
16
17
|
* Output is 100% equivalent to ParseTree.
|
17
18
|
* Can utilize PT's SexpProcessor and UnifiedRuby for language processing.
|
@@ -20,7 +21,6 @@ base types.
|
|
20
21
|
* Known Issue: I don't currently support newline nodes.
|
21
22
|
* Known Issue: Totally awesome.
|
22
23
|
* Known Issue: dasgn_curr decls can be out of order from ParseTree's.
|
23
|
-
* TODO: Add comment nodes.
|
24
24
|
|
25
25
|
== SYNOPSIS:
|
26
26
|
|
data/Rakefile
CHANGED
@@ -2,31 +2,27 @@
|
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'hoe'
|
5
|
-
require './lib/ruby_lexer.rb'
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
p.summary = p.paragraphs_of('README.txt', 2).join("\n\n")
|
12
|
-
p.description = p.paragraphs_of('README.txt', 2..6).join("\n\n")
|
13
|
-
p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[-1]
|
14
|
-
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
15
|
-
p.extra_deps << 'ParseTree'
|
16
|
-
end
|
6
|
+
Hoe.add_include_dirs("../../ParseTree/dev/lib",
|
7
|
+
"../../ParseTree/dev/test",
|
8
|
+
"../../RubyInline/dev/lib",
|
9
|
+
"../../sexp_processor/dev/lib")
|
17
10
|
|
18
|
-
|
11
|
+
require './lib/ruby_parser_extras.rb'
|
19
12
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
13
|
+
hoe = Hoe.new('ruby_parser', RubyParser::VERSION) do |parser|
|
14
|
+
parser.rubyforge_name = 'parsetree'
|
15
|
+
parser.developer('Ryan Davis', 'ryand-ruby@zenspider.com')
|
16
|
+
parser.extra_deps << 'ParseTree'
|
17
|
+
|
18
|
+
parser.extra_deps << ['sexp_processor', '>= 3.0.0']
|
24
19
|
end
|
25
20
|
|
26
|
-
|
21
|
+
hoe.spec.files += ['lib/ruby_parser.rb'] # jim.... cmon man
|
27
22
|
|
28
|
-
|
29
|
-
task
|
23
|
+
[:default, :multi, :test].each do |t|
|
24
|
+
task t => :parser
|
25
|
+
end
|
30
26
|
|
31
27
|
path = "pkg/ruby_parser-#{RubyParser::VERSION}"
|
32
28
|
task path => :parser do
|
@@ -35,22 +31,124 @@ task path => :parser do
|
|
35
31
|
end
|
36
32
|
end
|
37
33
|
|
34
|
+
desc "build the parser"
|
38
35
|
task :parser => ["lib/ruby_parser.rb"]
|
39
36
|
|
40
37
|
rule '.rb' => '.y' do |t|
|
41
|
-
|
38
|
+
# -v = verbose
|
39
|
+
# -t = debugging parser ~4% reduction in speed -- keep for now
|
40
|
+
# -l = no-line-convert
|
41
|
+
sh "racc -v -t -l -o #{t.name} #{t.source}"
|
42
42
|
end
|
43
43
|
|
44
44
|
task :clean do
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
rm_rf(Dir["**/*~"] +
|
46
|
+
Dir["**/*.diff"] +
|
47
|
+
Dir["coverage.info"] +
|
48
|
+
Dir["coverage"] +
|
49
|
+
Dir["lib/ruby_parser.rb"] +
|
50
|
+
Dir["lib/*.output"])
|
51
|
+
end
|
52
|
+
|
53
|
+
def next_num(glob)
|
54
|
+
num = Dir[glob].max[/\d+/].to_i + 1
|
55
|
+
end
|
56
|
+
|
57
|
+
begin
|
58
|
+
require 'rcov/rcovtask'
|
59
|
+
Rcov::RcovTask.new do |t|
|
60
|
+
pattern = ENV['PATTERN'] || 'test/test_ruby_*.rb'
|
61
|
+
|
62
|
+
t.test_files = FileList[pattern]
|
63
|
+
t.verbose = true
|
64
|
+
t.rcov_opts << "--threshold 80"
|
65
|
+
t.rcov_opts << "--no-color"
|
66
|
+
end
|
67
|
+
rescue LoadError
|
68
|
+
# skip
|
69
|
+
end
|
70
|
+
|
71
|
+
desc "Compares PT to RP and deletes all files that match"
|
72
|
+
task :compare do
|
73
|
+
files = Dir["unit/**/*.rb"]
|
74
|
+
puts "Parsing #{files.size} files"
|
75
|
+
files.each do |file|
|
76
|
+
puts file
|
77
|
+
system "./cmp.rb -q #{file} && rm #{file}"
|
78
|
+
end
|
79
|
+
system 'find -d unit -type d -empty -exec rmdir {} \;'
|
80
|
+
end
|
81
|
+
|
82
|
+
desc "Compares PT to RP and stops on first failure"
|
83
|
+
task :find_bug do
|
84
|
+
files = Dir["unit/**/*.rb"]
|
85
|
+
puts "Parsing #{files.size} files"
|
86
|
+
files.each do |file|
|
87
|
+
puts file
|
88
|
+
sh "./cmp.rb -q #{file}"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
task :sort do
|
93
|
+
sh 'grepsort "^ +def" lib/ruby_lexer.rb'
|
94
|
+
sh 'grepsort "^ +def (test|util)" test/test_ruby_lexer.rb'
|
49
95
|
end
|
50
96
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
97
|
+
task :rcov_info => :parser do
|
98
|
+
pattern = ENV['PATTERN'] || "test/test_*.rb"
|
99
|
+
ruby "-Ilib -S rcov --text-report --save coverage.info #{pattern}"
|
100
|
+
end
|
101
|
+
|
102
|
+
task :rcov_overlay do
|
103
|
+
rcov, eol = Marshal.load(File.read("coverage.info")).last[ENV["FILE"]], 1
|
104
|
+
puts rcov[:lines].zip(rcov[:coverage]).map { |line, coverage|
|
105
|
+
bol, eol = eol, eol + line.length
|
106
|
+
[bol, eol, "#ffcccc"] unless coverage
|
107
|
+
}.compact.inspect
|
108
|
+
end
|
109
|
+
|
110
|
+
task :loc do
|
111
|
+
loc1 = `wc -l ../1.0.0/lib/ruby_lexer.rb`[/\d+/]
|
112
|
+
flog1 = `flog -s ../1.0.0/lib/ruby_lexer.rb`[/\d+\.\d+/]
|
113
|
+
loc2 = `cat lib/ruby_lexer.rb lib/ruby_parser_extras.rb | wc -l`[/\d+/]
|
114
|
+
flog2 = `flog -s lib/ruby_lexer.rb lib/ruby_parser_extras.rb`[/\d+\.\d+/]
|
115
|
+
|
116
|
+
loc1, loc2, flog1, flog2 = loc1.to_i, loc2.to_i, flog1.to_f, flog2.to_f
|
117
|
+
|
118
|
+
puts "1.0.0: loc = #{loc1} flog = #{flog1}"
|
119
|
+
puts "dev : loc = #{loc2} flog = #{flog2}"
|
120
|
+
puts "delta: loc = #{loc2-loc1} flog = #{flog2-flog1}"
|
121
|
+
end
|
122
|
+
|
123
|
+
desc "Validate against all normal files in unit dir"
|
124
|
+
task :validate do
|
125
|
+
sh "./cmp.rb unit/*.rb"
|
126
|
+
end
|
127
|
+
|
128
|
+
def run_and_log cmd, prefix
|
129
|
+
files = ENV['FILES'] || 'unit/*.rb'
|
130
|
+
p, x = prefix, "txt"
|
131
|
+
n = Dir["#{p}.*.#{x}"].map { |s| s[/\d+/].to_i }.max + 1 rescue 1
|
132
|
+
f = "#{p}.#{n}.#{x}"
|
133
|
+
|
134
|
+
sh "#{cmd} #{Hoe::RUBY_FLAGS} bin/ruby_parse -q -g #{files} &> #{f}"
|
135
|
+
|
136
|
+
puts File.read(f)
|
137
|
+
end
|
138
|
+
|
139
|
+
desc "Benchmark against all normal files in unit dir"
|
140
|
+
task :benchmark do
|
141
|
+
run_and_log "ruby", "benchmark"
|
142
|
+
end
|
143
|
+
|
144
|
+
desc "Profile against all normal files in unit dir"
|
145
|
+
task :profile do
|
146
|
+
run_and_log "zenprofile", "profile"
|
147
|
+
end
|
148
|
+
|
149
|
+
desc "what was that command again?"
|
150
|
+
task :huh? do
|
151
|
+
puts "ruby #{Hoe::RUBY_FLAGS} bin/ruby_parse -q -g ..."
|
152
|
+
end
|
55
153
|
|
56
154
|
# vim: syntax=Ruby
|
data/bin/ruby_parse
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/ruby -s
|
2
|
+
|
3
|
+
$q ||= false
|
4
|
+
$g ||= false
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'ruby_parser'
|
8
|
+
require 'pp'
|
9
|
+
|
10
|
+
good = bad = 0
|
11
|
+
|
12
|
+
multi = ARGV.size != 1
|
13
|
+
total_time = 0
|
14
|
+
total_loc = 0
|
15
|
+
total_kbytes = 0
|
16
|
+
times = {}
|
17
|
+
locs = {}
|
18
|
+
kbytes = {}
|
19
|
+
|
20
|
+
begin
|
21
|
+
ARGV.each do |file|
|
22
|
+
rp = RubyParser.new
|
23
|
+
loc = `wc -l #{file}`.strip.to_i
|
24
|
+
size = `wc -c #{file}`.strip.to_i / 1024.0
|
25
|
+
locs[file] = loc
|
26
|
+
kbytes[file] = size
|
27
|
+
total_loc += loc
|
28
|
+
total_kbytes += size
|
29
|
+
if $q then
|
30
|
+
$stderr.print "."
|
31
|
+
else
|
32
|
+
warn "# file = #{file} loc = #{loc}"
|
33
|
+
end
|
34
|
+
GC.start if $g
|
35
|
+
|
36
|
+
t = Time.now
|
37
|
+
begin
|
38
|
+
begin
|
39
|
+
rp.reset
|
40
|
+
r = rp.parse(File.read(file), file)
|
41
|
+
pp r unless $q
|
42
|
+
good += 1
|
43
|
+
rescue SyntaxError => e
|
44
|
+
warn "SyntaxError for #{file}: #{e.message}"
|
45
|
+
bad += 1
|
46
|
+
end
|
47
|
+
rescue => e
|
48
|
+
warn "#{e.backtrace.first} #{e.inspect.gsub(/\n/, ' ')} for #{file}"
|
49
|
+
warn " #{e.backtrace.join("\n ")}"
|
50
|
+
bad += 1
|
51
|
+
end
|
52
|
+
|
53
|
+
t = Time.now - t
|
54
|
+
times[file] = t
|
55
|
+
total_time += t
|
56
|
+
end
|
57
|
+
rescue Interrupt
|
58
|
+
# do nothing
|
59
|
+
end
|
60
|
+
|
61
|
+
warn "done"
|
62
|
+
|
63
|
+
total = 0
|
64
|
+
times.values.each do |t|
|
65
|
+
total += t
|
66
|
+
end
|
67
|
+
|
68
|
+
puts
|
69
|
+
puts "good = #{good} bad = #{bad}" if multi
|
70
|
+
puts
|
71
|
+
|
72
|
+
format = "%5.2fs:%9.2f l/s:%8.2f Kb/s:%5d Kb:%5d loc:%s"
|
73
|
+
|
74
|
+
times.sort_by { |f, t| -t }.each do |f, t|
|
75
|
+
next if t < 0.005
|
76
|
+
loc = locs[f]
|
77
|
+
size = kbytes[f]
|
78
|
+
puts format % [t, loc / t, size / t, size, loc, f]
|
79
|
+
end
|
80
|
+
|
81
|
+
puts
|
82
|
+
|
83
|
+
puts format % [total_time,
|
84
|
+
total_loc / total_time,
|
85
|
+
total_kbytes / total_time,
|
86
|
+
total_kbytes,
|
87
|
+
total_loc,
|
88
|
+
"TOTAL"]
|
89
|
+
|