rubylexer 0.6.2 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/History.txt +55 -0
  2. data/Manifest.txt +67 -0
  3. data/README.txt +103 -0
  4. data/Rakefile +24 -0
  5. data/howtouse.txt +9 -6
  6. data/{assert.rb → lib/assert.rb} +11 -11
  7. data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
  8. data/lib/rubylexer/0.6.2.rb +39 -0
  9. data/lib/rubylexer/0.6.rb +5 -0
  10. data/lib/rubylexer/0.7.0.rb +2 -0
  11. data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
  12. data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
  13. data/{context.rb → lib/rubylexer/context.rb} +48 -18
  14. data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
  15. data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
  16. data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
  17. data/{token.rb → lib/rubylexer/token.rb} +72 -20
  18. data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
  19. data/lib/rubylexer/version.rb +3 -0
  20. data/{testcode → test/code}/deletewarns.rb +0 -0
  21. data/test/code/dl_all_gems.rb +43 -0
  22. data/{testcode → test/code}/dumptokens.rb +12 -9
  23. data/test/code/locatetest +30 -0
  24. data/test/code/locatetest.rb +49 -0
  25. data/test/code/rubylexervsruby.rb +173 -0
  26. data/{testcode → test/code}/tokentest.rb +62 -51
  27. data/{testcode → test/code}/torment +8 -8
  28. data/test/code/unpack_all_gems.rb +15 -0
  29. data/{testdata → test/data}/1.rb.broken +0 -0
  30. data/{testdata → test/data}/23.rb +0 -0
  31. data/test/data/__end__.rb +2 -0
  32. data/test/data/__end__2.rb +3 -0
  33. data/test/data/and.rb +5 -0
  34. data/test/data/blockassigntest.rb +23 -0
  35. data/test/data/chunky.plain.rb +75 -0
  36. data/test/data/chunky_bacon.rb +112 -0
  37. data/test/data/chunky_bacon2.rb +112 -0
  38. data/test/data/chunky_bacon3.rb +112 -0
  39. data/test/data/chunky_bacon4.rb +112 -0
  40. data/test/data/for.rb +45 -0
  41. data/test/data/format.rb +6 -0
  42. data/{testdata → test/data}/g.rb +0 -0
  43. data/test/data/gemlist.txt +280 -0
  44. data/test/data/heart.rb +7 -0
  45. data/test/data/if.rb +6 -0
  46. data/test/data/jarh.rb +369 -0
  47. data/test/data/lbrace.rb +4 -0
  48. data/test/data/lbrack.rb +4 -0
  49. data/{testdata → test/data}/newsyntax.rb +0 -0
  50. data/{testdata → test/data}/noeolatend.rb +0 -0
  51. data/test/data/p-op.rb +8 -0
  52. data/{testdata → test/data}/p.rb +671 -79
  53. data/{testdata → test/data}/pleac.rb.broken +0 -0
  54. data/{testdata → test/data}/pre.rb +0 -0
  55. data/{testdata → test/data}/pre.unix.rb +0 -0
  56. data/{testdata → test/data}/regtest.rb +0 -0
  57. data/test/data/rescue.rb +35 -0
  58. data/test/data/s.rb +186 -0
  59. data/test/data/strinc.rb +2 -0
  60. data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
  61. data/test/data/untermed_here.rb.broken +2 -0
  62. data/test/data/untermed_string.rb.broken +1 -0
  63. data/{testdata → test/data}/untitled1.rb +0 -0
  64. data/{testdata → test/data}/w.rb +0 -0
  65. data/{testdata → test/data}/wsdlDriver.rb +0 -0
  66. data/testing.txt +6 -4
  67. metadata +163 -59
  68. data/README +0 -134
  69. data/Rantfile +0 -37
  70. data/io.each_til_charset.rb +0 -247
  71. data/require.rb +0 -103
  72. data/rlold.rb +0 -12
  73. data/testcode/locatetest +0 -12
  74. data/testcode/rubylexervsruby.rb +0 -104
  75. data/testcode/rubylexervsruby.sh +0 -51
  76. data/testresults/placeholder +0 -0
data/History.txt ADDED
@@ -0,0 +1,55 @@
1
+ === 0.7.0/2-15-2008
2
+ * implicit tokens are now emitted at the right times (need more test code)
3
+ * local variables are now temporarily hidden by class, module, and def
4
+ * line numbers should always be correct now (=begin...=end caused this) (??)
5
+ * fixed meth/var-name confusion in idents after 'def' but before params
6
+ * FileAndLineToken now emitted on all new lines (0.8)
7
+ * offset of __END__ now works(??)
8
+ * put files into lib/
9
+ * improvements in parsing unary * and & (??)
10
+ * input can now come from a string
11
+ * local vars (defs and uses) are recognized in string inclusions
12
+ * explicitly decimal numbers, eg: 0d123456789
13
+ * eof in unusual cases is better handled
14
+ * __END__ is not treated as a keyword
15
+ * '|' as goalpost is now better handled
16
+ * a number of things have been renamed internally
17
+ * no more implicit parens for setter method calls
18
+ * '{' after return, break, and next is now the start of a hash.
19
+ * ambiguous cases of '::','(',':',and '?' are now better handled.
20
+ * more start/end hint tokens (for 'when', 'rescue', and 'for')
21
+ * bugfixes in rhs hint tokens
22
+ * parsing of def headers for singleton methods is improved
23
+ * rescue as operator is now handled
24
+ * block param list lookahead is simplified
25
+ * unary ops (including * and &) can be easily distinguished in output
26
+ * here document bodies better handled, esp after escaped newline
27
+ * symbols like %s{symbol} now actually work
28
+ * implicit parens around => better handled...
29
+ * different cases of '{' can now be easily distinguished
30
+ * ImplicitParamList Start and End are now Keyword, not Ignore tokens.
31
+
32
+ === 0.6.2
33
+ * testcode/dumptokens.rb charhandler.rb doesn't work... but does after unix2dos (not reproducible)
34
+ * files are opened in binmode to avoid all possible eol translation
35
+ * (x.+?x) now works
36
+ * methname/varname mixups fixed in some cases
37
+ * performance improvements, in most important cases
38
+ * error handling tokens should be emitted on error input... ErrorToken mixin module
39
+ * but old error handling interface should be preserved and made available
40
+ * moved readahead and friends into IOext
41
+ * made optimized readahead et al for fakefile
42
+ * dos newlines (and newlines generally) can be fancy string delimiters
43
+ * do,if,until, etc, have a way to tell if an end is associated
44
+ * broke readme into pieces
45
+
46
+ === 0.6.0
47
+ * partly fixed the implicit tokens at the wrong times. (or not at the
48
+ * right times) (partly fixed)
49
+ * : operator might be a synonym for 'then'
50
+ * variables other than the last are now recognized in multiple assignment
51
+ * variables created by for and rescue are now recognized
52
+ * token following :: should not be BareSymbolToken if begins with A-Z (unless obviously a func)
53
+ * read code to be lexed from a string. (irb wants this)
54
+ * fancy symbols weren't supported at all. (like this: %s{abcdefg})
55
+
data/Manifest.txt ADDED
@@ -0,0 +1,67 @@
1
+ COPYING
2
+ README.txt
3
+ Manifest.txt
4
+ Rakefile
5
+ howtouse.txt
6
+ History.txt
7
+ testing.txt
8
+ lib/rubylexer/rubycode.rb
9
+ lib/rubylexer/context.rb
10
+ lib/rubylexer/token.rb
11
+ lib/rubylexer/0.6.rb
12
+ lib/rubylexer/0.6.2.rb
13
+ lib/rubylexer/0.7.0.rb
14
+ lib/rubylexer/version.rb
15
+ lib/rubylexer/rulexer.rb
16
+ lib/rubylexer/tokenprinter.rb
17
+ lib/rubylexer/charset.rb
18
+ lib/rubylexer/symboltable.rb
19
+ lib/rubylexer/charhandler.rb
20
+ lib/assert.rb
21
+ lib/rubylexer.rb
22
+ test/data/gemlist.txt
23
+ test/data/blockassigntest.rb
24
+ test/data/for.rb
25
+ test/data/chunky_bacon.rb
26
+ test/data/and.rb
27
+ test/data/pre.unix.rb
28
+ test/data/untermed_string.rb.broken
29
+ test/data/__end__2.rb
30
+ test/data/w.rb
31
+ test/data/if.rb
32
+ test/data/pre.rb
33
+ test/data/jarh.rb
34
+ test/data/regtest.rb
35
+ test/data/chunky_bacon4.rb
36
+ test/data/__end__.rb
37
+ test/data/strinc.rb
38
+ test/data/lbrace.rb
39
+ test/data/p.rb
40
+ test/data/chunky.plain.rb
41
+ test/data/noeolatend.rb
42
+ test/data/g.rb
43
+ test/data/23.rb
44
+ test/data/lbrack.rb
45
+ test/data/untitled1.rb
46
+ test/data/rescue.rb
47
+ test/data/tokentest.assert.rb.can
48
+ test/data/pleac.rb.broken
49
+ test/data/heart.rb
50
+ test/data/s.rb
51
+ test/data/wsdlDriver.rb
52
+ test/data/p-op.rb
53
+ test/data/1.rb.broken
54
+ test/data/untermed_here.rb.broken
55
+ test/data/newsyntax.rb
56
+ test/data/chunky_bacon3.rb
57
+ test/data/chunky_bacon2.rb
58
+ test/data/format.rb
59
+ test/code/locatetest.rb
60
+ test/code/rubylexervsruby.rb
61
+ test/code/dl_all_gems.rb
62
+ test/code/unpack_all_gems.rb
63
+ test/code/tokentest.rb
64
+ test/code/dumptokens.rb
65
+ test/code/torment
66
+ test/code/locatetest
67
+ test/code/deletewarns.rb
data/README.txt ADDED
@@ -0,0 +1,103 @@
1
+ = RubyLexer
2
+
3
+ *
4
+ *
5
+ *
6
+
7
+ === DESCRIPTION:
8
+
9
+ RubyLexer is a lexer library for Ruby, written in Ruby. Rubylexer is meant
10
+ as a lexer for Ruby that's complete and correct; all legal Ruby
11
+ code should be lexed correctly by RubyLexer as well. Just enough parsing
12
+ capability is included to give RubyLexer enough context to tokenize correctly
13
+ in all cases. (This turned out to be more parsing than I had thought or
14
+ wanted to take on at first.) RubyLexer handles the hard things like
15
+ complicated strings, the ambiguous nature of some punctuation characters and
16
+ keywords in ruby, and distinguishing methods and local variables.
17
+
18
+ RubyLexer is not particularly clean code. As I progressed in writing this,
19
+ I've learned a little about how these things are supposed to be done; the
20
+ lexer is not supposed to have any state of it's own, instead it gets whatever
21
+ it needs to know from the parser. As a stand-alone lexer, Rubylexer maintains
22
+ quite a lot of state. Every instance variable in the RubyLexer class is some
23
+ sort of lexer state. Most of the complication and ugly code in RubyLexer is
24
+ in maintaining or using this state.
25
+
26
+ For information about using RubyLexer in your program, please see howtouse.txt.
27
+
28
+ For my notes on the testing of RubyLexer, see testing.txt.
29
+
30
+ If you have any questions, comments, problems, new feature requests, or just
31
+ want to figure out how to make it work for what you need to do, contact me:
32
+ rubylexer _at_ inforadical _dot_ net
33
+
34
+ RubyLexer is a RubyForge project. RubyForge is another good place to send your
35
+ bug reports or whatever: http://rubyforge.org/projects/rubylexer/
36
+
37
+ (There aren't any bug filed against RubyLexer there yet, but don't be afraid
38
+ that your report will get lonely.)
39
+
40
+ ==SYNOPSIS:
41
+ require "rubylexer.rb"
42
+ #then later
43
+ lexer=RubyLexer.new(a_file_name, opened_File_or_String)
44
+ until EoiToken===(token=lexer.get1token)
45
+ #...do stuff w/ token...
46
+ end
47
+
48
+ == Status
49
+ RubyLexer can correctly lex all legal Ruby 1.8 code that I've been able to
50
+ find on my Debian system. It can also handle (most of) my catalog of nasty
51
+ test cases (in testdata/p.rb) (see below for known problems). At this point,
52
+ new bugs are almost exclusively found by my home-grown test code, rather
53
+ than ruby code gathered 'from the wild'. There are a number of issues I know
54
+ about and plan to fix, but it seems that Ruby coders don't write code complex
55
+ enough to trigger them very often. Although incomplete, RubyLexer can
56
+ correctly distinguish these ambiguous uses of the following operator and
57
+ keywords, depending on context:
58
+ % can be modulus operator or start of fancy string
59
+ / can be division operator or start of regex
60
+ * & + - :: can be unary or binary operator
61
+ [] can be for array literal or [] method (or []=)
62
+ << can be here document or left shift operator (or in class<<obj expr)
63
+ : can be start of symbol, substitute for then, or part of ternary op
64
+ (there are other uses too, but they're not supported yet.)
65
+ ? can be start of character constant or ternary operator
66
+ ` can be method name or start of exec string
67
+ any overrideable operator and most keywords can also be method names
68
+
69
+ == todo
70
+ test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
71
+ these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
72
+ test more ways: cvt source to dos or mac fmt before testing
73
+ test more ways: run unit tests after passing thru rubylexer (0.7)
74
+ test more ways: test require'd, load'd, or eval'd code as well (0.7)
75
+ lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
76
+ incremental lexing (ides want this (for performance))
77
+ put everything in a namespace
78
+ integrate w/ other tools...
79
+ html colorized output?
80
+ move more state onto @parsestack (ongoing)
81
+ the new cases in p.rb now compile, but won't run
82
+ expand on test documentation
83
+ use want_op_name more
84
+ return result as a half-parsed tree (with parentheses and the like matched)
85
+ emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
86
+ strings are still slow
87
+ emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
88
+ token pruning in dumptokens...
89
+
90
+ == known issues: (and planned fix release)
91
+ context not really preserved when entering or leaving string inclusions. this causes
92
+ a number or problems. local variables are ok now, but here document headers started
93
+ in a string inclusion with the body outside will be a problem. (0.8)
94
+ string tokenization sometimes a little different from ruby around newlines
95
+ (htree/template.rb) (0.8)
96
+ string contents might not be correctly translated in a few cases (0.8?)
97
+ symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
98
+ '\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
99
+ windows or mac newline in source are likely to cause problems in obscure cases (need test case)
100
+ unterminated =begin is not an error (0.8)
101
+ ruby 1.9 completely unsupported (0.9)
102
+ character sets other than ascii are not supported at all (1.0)
103
+
data/Rakefile ADDED
@@ -0,0 +1,24 @@
1
+ # Copyright (C) 2008 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'lib/rubylexer/version.rb'
6
+
7
+
8
+ readme=open("README.txt")
9
+ readme.readline("\n=== DESCRIPTION:")
10
+ readme.readline("\n\n")
11
+ desc=readme.readline("\n\n")
12
+
13
+ hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
14
+ _.author = "Caleb Clausen"
15
+ _.email = "rubylexer-owner @at@ inforadical .dot. net"
16
+ _.url = "http://rubylexer.rubyforge.org/"
17
+ _.extra_deps = ["sequence"]
18
+ _.test_globs=["test/{code/*,data/*rb*,results/}"]
19
+ _.description=desc
20
+ _.summary=desc[/\A[^.]+\./]
21
+ _.spec_extras={:bindir=>''}
22
+ end
23
+
24
+
data/howtouse.txt CHANGED
@@ -1,13 +1,13 @@
1
1
 
2
2
  Using rubylexer:
3
3
  require "rubylexer.rb"
4
- ,then
4
+ #then later
5
5
  lexer=RubyLexer.new(a_file_name, opened_File_or_String)
6
- until EoiToken===(tok=lexer.get1token)
7
- ...do stuff w/ toks...
6
+ until EoiToken===(token=lexer.get1token)
7
+ #...do stuff w/ token...
8
8
  end
9
9
 
10
- For a slightly expanded version of this example, see testcode/dumptokens.rb.
10
+ For a slightly expanded version of this example, see test/code/dumptokens.rb.
11
11
 
12
12
  tok will be a subclass of Token. there are many token classes (see token.rb)
13
13
  however, all tokens have some common methods:
@@ -23,7 +23,8 @@ WToken #(mostly useless?) abstract superclass for KeywordToken,
23
23
  #OperatorToken, VarNameToken, and HerePlaceholderToken
24
24
  #but not (confusingly) MethNameToken (perhaps that'll change)
25
25
  KeywordToken #a ruby keyword or non-overridable punctuation char(s)
26
- OperatorToken #overrideable operators
26
+ OperatorToken #overrideable operators.
27
+ #use #unary? and #binary? to find out how many arguments it takes.
27
28
  VarNameToken #a name that represents a variable
28
29
  HerePlaceholderToken #represents the header of a here string. subclass of WToken
29
30
  MethNameToken #the name of a method: the uncoloned
@@ -120,7 +121,8 @@ time to adapt to changes. That promise goes for all the changes described below.
120
121
 
121
122
  In cases where the 2 are incompatible, (inspired by rubygems) I've come up with this:
122
123
 
123
- RubyLexer.version(0.6).new(...args...) #request the 0.6 api
124
+ require 'rubylexer/0.6'
125
+ rl=RubyLexer.new(...args...) #request the 0.6 api
124
126
 
125
127
  This actually works currently; it enables the old api where errors cause an exception instead
126
128
  of generating ErrorTokens. The default will always be to use the new api.
@@ -133,4 +135,5 @@ be a big deal; old clients can just include the namespace module.
133
135
  Token#ident may be taken away or change without notice.
134
136
  MethNameToken may become a WToken
135
137
  HereBodyToken should really be a string subclass...
138
+ Newline,EscNl,BareSymbolToken may get renamed
136
139
 
@@ -1,5 +1,4 @@
1
1
  =begin copyright
2
- rubylexer - a ruby lexer written in ruby
3
2
  Copyright (C) 2004,2005 Caleb Clausen
4
3
 
5
4
  This library is free software; you can redistribute it and/or
@@ -16,16 +15,17 @@
16
15
  License along with this library; if not, write to the Free Software
17
16
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
17
  =end
19
- require 'set'
20
18
 
19
+ module Kernel
20
+ def assert(expr,msg="assertion failed")
21
+ defined? $Debug and $Debug and (expr or raise msg)
22
+ end
21
23
 
22
- def assert(expr,msg="assertion failed")
23
- $DEBUG and (expr or raise msg)
24
- end
25
-
26
- @@printed=Set.new
27
- def fixme(s)
28
- @@printed.include?( s) and return
29
- $DEBUG and STDERR.print "FIXME: #{s}\n"
30
- @@printed.add s
24
+ @@printed={}
25
+ def fixme(s)
26
+ unless @@printed[s]
27
+ @@printed[s]=1
28
+ defined? $Debug and $Debug and $stderr.print "FIXME: #{s}\n"
29
+ end
30
+ end
31
31
  end
@@ -19,15 +19,18 @@
19
19
 
20
20
 
21
21
 
22
- require "rulexer"
23
- require "symboltable"
24
- require "io.each_til_charset"
25
- require "context.rb"
26
-
22
+ require 'rubylexer/rulexer' #must be 1st!!!
23
+ require 'rubylexer/version'
24
+ require 'rubylexer/token'
25
+ require 'rubylexer/charhandler'
26
+ require 'rubylexer/symboltable'
27
+ #require "io.each_til_charset"
28
+ require 'rubylexer/context'
29
+ require 'rubylexer/tokenprinter'
27
30
 
28
31
 
29
32
  #-----------------------------------
30
- class RubyLexer < RuLexer
33
+ class RubyLexer
31
34
  include NestedContexts
32
35
 
33
36
  RUBYSYMOPERATORREX=
@@ -39,7 +42,7 @@ class RubyLexer < RuLexer
39
42
  #or .. ... ?:
40
43
  #for that use:
41
44
  RUBYNONSYMOPERATORREX=
42
- %r{^([%^~/\-+]=|(\|\|?|&&?)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
45
+ %r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
43
46
  RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
44
47
  UNSYMOPS=/^[~!]$/ #always unary
45
48
  UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
@@ -50,16 +53,18 @@ class RubyLexer < RuLexer
50
53
  VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
51
54
  INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
52
55
  BINOPWORDS="(and|or)"
53
- NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)[^a-zA-Z0-9_!?=]?/o
54
- NEVERSTARTPARAMLISTFIRST=CharSet[%[aoeitrwu]] #char set that begins NEVERSTARTPARAMLIST
55
- NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
56
+ NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
57
+ NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
58
+ NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
56
59
 
57
60
  RUBYKEYWORDS=%r{
58
- ^(alias|#{BINOPWORDS}|not|undef|__END__|end|
61
+ ^(alias|#{BINOPWORDS}|not|undef|end|
59
62
  #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
60
63
  #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
61
64
  )$
62
65
  }xo
66
+ #__END__ should not be in this set... its handled in start_of_line_directives
67
+
63
68
  CHARMAPPINGS = {
64
69
  ?$ => :dollar_identifier,
65
70
  ?@ => :at_identifier,
@@ -67,8 +72,7 @@ class RubyLexer < RuLexer
67
72
  ?A..?Z => :identifier,
68
73
  ?_ => :identifier,
69
74
  ?0..?9 => :number,
70
- ?" => :double_quote,
71
- ?' => :single_quote,
75
+ %{"'} => :double_quote,
72
76
  ?` => :back_quote,
73
77
 
74
78
  WHSP => :whitespace, #includes \r
@@ -83,7 +87,8 @@ class RubyLexer < RuLexer
83
87
 
84
88
  #these ones could signal either an op or a term
85
89
  ?/ => :regex_or_div,
86
- "|>" => :quadriop,
90
+ "|" => :conjunction_or_goalpost,
91
+ ">" => :quadriop,
87
92
  "*&" => :star_or_amp, #could be unary
88
93
  "+-" => :plusminus, #could be unary
89
94
  ?< => :lessthan,
@@ -103,22 +108,27 @@ class RubyLexer < RuLexer
103
108
  ?# => :comment
104
109
  }
105
110
 
106
- attr :incomplete_here_tokens
111
+ attr_reader :incomplete_here_tokens, :parsestack
107
112
 
108
113
 
109
114
  #-----------------------------------
110
115
  def initialize(filename,file,linenum=1)
111
116
  super(filename,file, linenum)
112
117
  @start_linenum=linenum
113
- @bracestack=[TopLevelContext.new]
118
+ @parsestack=[TopLevelContext.new]
114
119
  @incomplete_here_tokens=[]
115
- @localvars=SymbolTable.new
120
+ @localvars_stack=[SymbolTable.new]
116
121
  @defining_lvar=nil
122
+ @in_def_name=false
117
123
 
118
124
  @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
119
125
 
120
126
  start_of_line_directives
121
127
  end
128
+
129
+ def localvars;
130
+ @localvars_stack.last
131
+ end
122
132
 
123
133
  #-----------------------------------
124
134
  def get1token
@@ -129,25 +139,23 @@ class RubyLexer < RuLexer
129
139
 
130
140
  #check for bizarre token types
131
141
  case result
132
- when IgnoreToken#,nil
133
- return result
142
+ when StillIgnoreToken#,nil
143
+ result
134
144
  when Token#,String
145
+ @last_operative_token=result
146
+ assert !(IgnoreToken===@last_operative_token)
147
+ result
135
148
  else
136
- raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
149
+ raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
137
150
  end
138
-
139
- @last_operative_token=result
140
-
141
- return result
142
151
  end
143
-
144
152
 
145
153
 
146
154
  #-----------------------------------
147
155
  def balanced_braces?
148
156
 
149
- #@bracestack.empty?
150
- @bracestack.size==1 and TopLevelContext===@bracestack.first
157
+ #@parsestack.empty?
158
+ @parsestack.size==1 and TopLevelContext===@parsestack.first
151
159
  end
152
160
 
153
161
  #-----------------------------------
@@ -182,7 +190,7 @@ private
182
190
 
183
191
  #-----------------------------------
184
192
  def expect_do_or_end_or_nl!(st)
185
- @bracestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
193
+ @parsestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
186
194
  end
187
195
 
188
196
  #-----------------------------------
@@ -199,31 +207,38 @@ private
199
207
  end
200
208
 
201
209
  #-----------------------------------
202
- WSCHARSET=CharSet["#\\\n\s\t\v\r\f"]
203
- def ignored_tokens(allow_eof=false)
210
+ WSCHARSET=/[#\\\n\s\t\v\r\f]/
211
+ def ignored_tokens(allow_eof=false,allow_eol=true)
204
212
  result=[]
205
- result<<@moretokens.shift while IgnoreToken===@moretokens.first
213
+ result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
206
214
  @moretokens.empty? or return result
207
- if true
208
215
  loop do
209
216
  unless @moretokens.empty?
210
- IgnoreToken===@moretokens.first or NewlineToken===@moretokens.first or
211
- break
217
+ case @moretokens.first
218
+ when StillIgnoreToken
219
+ when NewlineToken: allow_eol or break
220
+ else break
221
+ end
212
222
  else
213
- WSCHARSET===nextchar or break
223
+
224
+ break unless ch=nextchar
225
+ ch=ch.chr
226
+ break unless WSCHARSET===ch
227
+ break if ch[/[\r\n]/] and !allow_eol
214
228
  end
229
+
215
230
 
216
231
  tok=get1token
217
- result<<tok
232
+ result << tok
218
233
  case tok
219
- when NewlineToken : block_given? and yield tok
220
- when EoiToken : allow_eof or lexerror tok,"end of file not expected here(2)"
221
- when IgnoreToken
222
- else raise "impossible"
234
+ when NewlineToken; assert allow_eol; block_given? and yield tok
235
+ when EoiToken; allow_eof or lexerror tok,"end of file not expected here(2)"
236
+ when StillIgnoreToken
237
+ else raise "impossible token: #{tok.inspect}"
223
238
  end
224
239
  end
225
240
 
226
- else
241
+ =begin
227
242
  @whsphandler||=CharHandler.new(self, :==,
228
243
  "#" => :comment,
229
244
  "\n" => :newline,
@@ -235,18 +250,18 @@ else
235
250
  block_given? and NewlineToken===tok and yield tok
236
251
  result << tok
237
252
  end
238
- end
253
+ =end
239
254
  return result
240
255
  end
241
256
 
242
257
  #-----------------------------------
243
258
  def safe_recurse
244
259
  old_moretokens=@moretokens
245
- #old_bracestack=@bracestack.dup
260
+ #old_parsestack=@parsestack.dup
246
261
  @moretokens=[]
247
262
  result= yield @moretokens
248
263
  #assert @incomplete_here_tokens.empty?
249
- #assert @bracestack==old_bracestack
264
+ #assert @parsestack==old_parsestack
250
265
  @moretokens= old_moretokens.concat @moretokens
251
266
  return result
252
267
  #need to do something with @last_operative_token?
@@ -258,7 +273,7 @@ end
258
273
  result = ((
259
274
  #order matters here, but it shouldn't
260
275
  #(but til_charset must be last)
261
- eat_next_if(/^[!@&+`'=~\/\\,.;<>*"$?:]$/) or
276
+ eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
262
277
  (eat_next_if('-') and ("-"+getchar)) or
263
278
  (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
264
279
  ))
@@ -266,7 +281,7 @@ end
266
281
 
267
282
  #-----------------------------------
268
283
  def identifier(context=nil)
269
- oldpos=@file.pos
284
+ oldpos= input_position
270
285
  str=identifier_as_string(context)
271
286
 
272
287
  #skip keyword processing if 'escaped' as it were, by def, . or ::
@@ -279,8 +294,8 @@ end
279
294
  @moretokens.unshift(*parse_keywords(str,oldpos) do
280
295
  #if not a keyword,
281
296
  case str
282
- when FUNCLIKE_KEYWORDS: #do nothing
283
- when VARLIKE_KEYWORDS,RUBYKEYWORDS: raise "shouldnt see keywords here, now"
297
+ when FUNCLIKE_KEYWORDS; #do nothing
298
+ when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
284
299
  end
285
300
  safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
286
301
  end)
@@ -290,7 +305,7 @@ end
290
305
  #-----------------------------------
291
306
  def identifier_as_string(context)
292
307
  #must begin w/ letter or underscore
293
- str=eat_next_if(/^[_a-z]$/i) or return nil
308
+ str=eat_next_if(/[_a-z]/i) or return nil
294
309
 
295
310
  #equals, question mark, and exclamation mark
296
311
  #might be allowed at the end in some contexts.
@@ -305,18 +320,20 @@ end
305
320
  when ?: then [?=, ??, ?!]
306
321
  else [nil,??, ?!]
307
322
  end
323
+
324
+ @in_def_name and maybe_eq= ?=
308
325
 
309
326
  str<<til_charset(/[^a-z0-9_]/i)
310
327
 
311
328
  #look for ?, !, or =, if allowed
312
- case b=@file.getc
329
+ case b=getc
313
330
  when nil #means we're at eof
314
331
  #handling nil here prevents b from ever matching
315
332
  #a nil value of maybe_qm, maybe_ex or maybe_eq
316
333
  when maybe_qm
317
334
  str << b
318
335
  when maybe_ex
319
- nc=(nextchar unless @file.eof?)
336
+ nc=(nextchar unless eof?)
320
337
  #does ex appear to be part of a larger operator?
321
338
  if nc==?= #or nc==?~
322
339
  back1char
@@ -324,7 +341,7 @@ end
324
341
  str << b
325
342
  end
326
343
  when maybe_eq
327
- nc=(nextchar unless @file.eof?)
344
+ nc=(nextchar unless eof?)
328
345
  #does eq appear to be part of a larger operator?
329
346
  if nc==?= or nc==?~ or nc==?>
330
347
  back1char
@@ -342,34 +359,37 @@ end
342
359
  #-----------------------------------
343
360
  #contexts in which comma may appear in ruby:
344
361
  #multiple lhs (terminated by assign op)
345
- #multiple rhs (in implicit context) (tbd)
362
+ #multiple rhs (in implicit context)
346
363
  #method actual param list (in ( or implicit context)
347
364
  #method formal param list (in ( or implicit context)
348
- #block formal param list (in | context) (tbd)
365
+ #block formal param list (in | context)
366
+ #nested multiple rhs
367
+ #nested multiple lhs
368
+ #nested block formal list
369
+ #element reference/assignment (in [] or []= method actual parameter context)
349
370
  #hash immediate (in imm{ context)
350
371
  #array immediate (in imm[ context)
351
- #element reference/assignment (in [] or []= method actual parameter context)
352
- #list after for
372
+ #list between 'for' and 'in'
353
373
  #list after rescue
354
374
  #list after when
355
375
  #list after undef
356
376
 
357
- #note: comma in parens not around a param list is illegal
377
+ #note: comma in parens not around a param list or lhs or rhs is illegal
358
378
 
359
379
  #-----------------------------------
360
380
  #a comma has been seen. are we in an
361
381
  #lvalue list or some other construct that uses commas?
362
382
  def comma_in_lvalue_list?
363
- not ListContext===@bracestack.last
383
+ @parsestack.last.lhs= (not ListContext===@parsestack.last)
364
384
  end
365
385
 
366
386
  #-----------------------------------
367
387
  def in_lvar_define_state
368
388
  #@defining_lvar is a hack
369
- @defining_lvar or case ctx=@bracestack.last
370
- when ForSMContext: ctx.state==:for
371
- when RescueSMContext: ctx.state==:arrow
372
- when BlockParamListContext: true
389
+ @defining_lvar or case ctx=@parsestack.last
390
+ when ForSMContext; ctx.state==:for
391
+ when RescueSMContext; ctx.state==:arrow
392
+ #when BlockParamListLhsContext; true
373
393
  end
374
394
  end
375
395
 
@@ -391,66 +411,102 @@ end
391
411
  #look for and ignore local variable names
392
412
 
393
413
  assert String===name
394
-
395
- #fixme: keywords shouldn't be treated specially after :: and .
396
414
 
397
415
  #maybe_local really means 'maybe local or constant'
398
416
  maybe_local=case name
399
- when /[^a-z_0-9]$/i: #do nothing
400
- when /^[a-z_]/: (@localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
401
- when /^[A-Z]/: is_const=true;not lasttok==='.' #this is the right algorithm for constants...
417
+ when /[^a-z_0-9]$/i; #do nothing
418
+ when /^[a-z_]/; (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
419
+ when /^[A-Z]/; is_const=true;not lasttok==='.' #this is the right algorithm for constants...
402
420
  end
403
421
 
404
422
  assert(@moretokens.empty?)
423
+
424
+ oldlast=@last_operative_token
405
425
 
406
426
  tok=@last_operative_token=VarNameToken.new(name,pos)
407
427
 
408
- oldpos=@file.pos
428
+ oldpos= input_position
409
429
  sawnl=false
410
430
  result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
411
- sawnl || @file.eof? and return result.unshift(
412
- *if maybe_local : [tok]
413
- else [MethNameToken.new(name,pos), #insert implicit parens right after tok
414
- ImplicitParamListStartToken.new( oldpos),
415
- ImplicitParamListEndToken.new( oldpos) ]
431
+ if sawnl || eof?
432
+ if maybe_local then
433
+ if in_lvar_define_state
434
+ if /^[a-z_][a-zA-Z_0-9]*$/===name
435
+ assert !(lasttok===/^(\.|::)$/)
436
+ localvars[name]=true
437
+ else
438
+ lexerror tok,"not a valid variable name: #{name}"
439
+ end
440
+ return result.unshift(tok)
441
+ end
442
+ return result.unshift(tok) #if is_const
443
+ else
444
+ return result.unshift(
445
+ MethNameToken.new(name,pos), #insert implicit parens right after tok
446
+ ImplicitParamListStartToken.new( oldpos),
447
+ ImplicitParamListEndToken.new( oldpos)
448
+ )
416
449
  end
417
- )
450
+ end
418
451
 
419
452
  #if next op is assignment (or comma in lvalue list)
420
453
  #then omit implicit parens
421
454
  assignment_coming=case nc=nextchar
422
- when ?=: not /^=[=~]$/===readahead(2)
423
- when ?,: comma_in_lvalue_list?
424
- when ?>,?<: /^([<>])\1=$/===readahead(3)
425
- when ?*,?|,?&: /^([*|&])\1?=/===readahead(3)
426
- when ?%,?/,?-,?+,?^: readahead(2)[1..1]=='='
455
+ when ?=; not /^=[>=~]$/===readahead(2)
456
+ when ?,; comma_in_lvalue_list?
457
+ when ?); last_context_not_implicit.lhs
458
+ when ?>,?<; /^(.)\1=$/===readahead(3)
459
+ when ?*,?&; /^(.)\1?=/===readahead(3)
460
+ when ?|; /^\|\|?=/===readahead(3) or
461
+ #is it a goalpost?
462
+ BlockParamListLhsContext===last_context_not_implicit &&
463
+ readahead(2)[1] != ?|
464
+ when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
427
465
  end
428
- if (assignment_coming or in_lvar_define_state)
466
+ if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
429
467
  tok=VarNameToken.new(name,pos)
430
468
  if /[^a-z_0-9]$/i===name
431
469
  lexerror tok,"not a valid variable name: #{name}"
432
470
  elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
433
- @localvars[name]=true
471
+ localvars[name]=true
434
472
  end
435
473
  return result.unshift(tok)
436
474
  end
437
-
438
- implicit_parens_to_emit=case nc
439
- when ?!: readahead(2)=='!=' ? 2 : 1
475
+
476
+ implicit_parens_to_emit=
477
+ if assignment_coming
478
+ @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
479
+ 0
480
+ else
481
+ case nc
482
+ when nil: 2
483
+ when ?!; readahead(2)=='!=' ? 2 : 1
440
484
  when NEVERSTARTPARAMLISTFIRST
441
485
  (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
442
- when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~: 1
443
- when ?{: maybe_local=false; 2
444
- when ?(: maybe_local=false; 0
445
- when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=: 2
446
- when ?+, ?-, ?*, ?&, ?%, ?/, ?:, ??: (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
447
- when ?<: (ws_toks.empty? || readahead(3)[/^<<[^"'`a-zA-Z_0-9-]/]) ? 2 : 3
448
- when ?[: ws_toks.empty? ? 2 : 3
449
- when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#: raise 'failure'
450
- else raise "unknown char after ident: #{nextchar.chr}"
486
+ when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
487
+ when ?{
488
+ maybe_local=false
489
+ x=2
490
+ x-=1 if /\A(return|break|next)\Z/===name and
491
+ !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
492
+ x
493
+ when ?(;
494
+ maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
495
+ when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
496
+ when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
497
+ when ?:,??; next2=readahead(2);
498
+ WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
499
+ # when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
500
+ when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
501
+ when ?[; ws_toks.empty? ? 2 : 3
502
+ when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
503
+ else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
504
+ end
505
+ end
506
+
507
+ if is_const and implicit_parens_to_emit==3 then
508
+ implicit_parens_to_emit=1
451
509
  end
452
-
453
- implicit_parens_to_emit==3 and is_const and implicit_parens_to_emit=1
454
510
 
455
511
  tok=if maybe_local and implicit_parens_to_emit>=2
456
512
  implicit_parens_to_emit=0
@@ -459,15 +515,18 @@ end
459
515
  MethNameToken
460
516
  end.new(name,pos)
461
517
 
462
-
463
518
  case implicit_parens_to_emit
464
- when 2:
519
+ when 2;
465
520
  result.unshift ImplicitParamListStartToken.new(oldpos),
466
521
  ImplicitParamListEndToken.new(oldpos)
467
- when 1,3:
468
- result.unshift ImplicitParamListStartToken.new(oldpos)
469
- @bracestack.push ParamListContextNoParen.new(@linenum)
470
- when 0: #do nothing
522
+ when 1,3;
523
+ arr,pass=*param_list_coming_with_2_or_more_params?
524
+ result.push( *arr )
525
+ unless pass
526
+ result.unshift ImplicitParamListStartToken.new(oldpos)
527
+ @parsestack.push ParamListContextNoParen.new(@linenum)
528
+ end
529
+ when 0; #do nothing
471
530
  else raise 'invalid value of implicit_parens_to_emit'
472
531
  end
473
532
  return result.unshift(tok)
@@ -476,22 +535,43 @@ end
476
535
  # '\n (unescaped) and or'
477
536
  # 'then else elsif rescue ensure (illegal in value context)'
478
537
 
479
- # 'need to pop noparen from bracestack on these tokens: (in operator context)'
538
+ # 'need to pop noparen from parsestack on these tokens: (in operator context)'
480
539
  # 'not ok:'
481
540
  # 'not (but should it be?)'
482
541
  end
483
542
 
543
+ #-----------------------------------
544
+ def param_list_coming_with_2_or_more_params?
545
+ WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false]
546
+ basesize=@parsestack.size
547
+ result=[get1token]
548
+ pass=loop{
549
+ tok=get1token
550
+ result<<tok
551
+ if @parsestack.size==basesize
552
+ break false
553
+ elsif ','==tok.to_s and @parsestack.size==basesize+1
554
+ break true
555
+ elsif EoiToken===tok
556
+ lexerror tok, "unexpected eof in parameter list"
557
+ end
558
+ }
559
+ return [result,pass]
560
+ end
561
+
484
562
  #-----------------------------------
485
563
  CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
486
564
  ParamListContextNoParen=>ImplicitParamListEndToken,
487
- KwParamListContext=>KwParamListEndToken
565
+ WhenParamListContext=>KwParamListEndToken,
566
+ RescueSMContext=>KwParamListEndToken
488
567
  }
489
568
  def abort_noparens!(str='')
490
569
  #assert @moretokens.empty?
491
570
  result=[]
492
- while klass=CONTEXT2ENDTOK[@bracestack.last.class]
493
- result << klass.new(@file.pos-str.length)
494
- @bracestack.pop
571
+ while klass=CONTEXT2ENDTOK[@parsestack.last.class]
572
+ result << klass.new(input_position-str.length)
573
+ break if RescueSMContext===@parsestack.last
574
+ @parsestack.pop
495
575
  end
496
576
  return result
497
577
  end
@@ -501,13 +581,13 @@ if false #no longer used
501
581
  def abort_1_noparen!(offs=0)
502
582
  assert @moretokens.empty?
503
583
  result=[]
504
- while AssignmentRhsContext===@bracestack.last
505
- @bracestack.pop
506
- result << AssignmentRhsListEndToken.new(@file.pos-offs)
584
+ while AssignmentRhsContext===@parsestack.last
585
+ @parsestack.pop
586
+ result << AssignmentRhsListEndToken.new(input_position-offs)
507
587
  end
508
- ParamListContextNoParen===@bracestack.last or lexerror huh,'{} with no matching callsite'
509
- @bracestack.pop
510
- result << ImplicitParamListEndToken.new(@file.pos-offs)
588
+ ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
589
+ @parsestack.pop
590
+ result << ImplicitParamListEndToken.new(input_position-offs)
511
591
  return result
512
592
  end
513
593
  end
@@ -523,30 +603,31 @@ end
523
603
  case str
524
604
  when "end"
525
605
  result.unshift(*abort_noparens!(str))
526
- @bracestack.last.see @bracestack,:semi #sorta hacky... should make an :end event instead?
606
+ @parsestack.last.see self,:semi #sorta hacky... should make an :end event instead?
527
607
 
528
608
  =begin not needed?
529
- if ExpectDoOrNlContext===@bracestack.last
530
- @bracestack.pop
531
- assert @bracestack.last.starter[/^(while|until|for)$/]
609
+ if ExpectDoOrNlContext===@parsestack.last
610
+ @parsestack.pop
611
+ assert @parsestack.last.starter[/^(while|until|for)$/]
532
612
  end
533
613
  =end
534
614
 
535
- WantsEndContext===@bracestack.last or lexerror result.last, 'unbalanced end'
536
- ctx=@bracestack.pop
615
+ WantsEndContext===@parsestack.last or lexerror result.last, 'unbalanced end'
616
+ ctx=@parsestack.pop
537
617
  start,line=ctx.starter,ctx.linenum
538
618
  BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
539
- /^(class|module|def|do)$/===start and @localvars.end_block
619
+ /^(do)$/===start and localvars.end_block
620
+ /^(class|module|def)$/===start and @localvars_stack.pop
540
621
 
541
622
  when "class","module"
542
623
  result.first.has_end!
543
- @bracestack.push WantsEndContext.new(str,@linenum)
544
- @localvars.start_block
545
-
624
+ @parsestack.push WantsEndContext.new(str,@linenum)
625
+ @localvars_stack.push SymbolTable.new
626
+
546
627
  when "if","unless" #could be infix form without end
547
628
  if after_nonid_op?{false} #prefix form
548
629
  result.first.has_end!
549
- @bracestack.push WantsEndContext.new(str,@linenum)
630
+ @parsestack.push WantsEndContext.new(str,@linenum)
550
631
 
551
632
 
552
633
  else #infix form
@@ -554,11 +635,11 @@ end
554
635
  end
555
636
  when "begin","case"
556
637
  result.first.has_end!
557
- @bracestack.push WantsEndContext.new(str,@linenum)
638
+ @parsestack.push WantsEndContext.new(str,@linenum)
558
639
  when "while","until" #could be infix form without end
559
640
  if after_nonid_op?{false} #prefix form
560
641
  result.first.has_end!
561
- @bracestack.push WantsEndContext.new(str,@linenum)
642
+ @parsestack.push WantsEndContext.new(str,@linenum)
562
643
  expect_do_or_end_or_nl! str
563
644
 
564
645
  else #infix form
@@ -566,24 +647,26 @@ end
566
647
  end
567
648
  when "for"
568
649
  result.first.has_end!
569
- @bracestack.push WantsEndContext.new(str,@linenum)
650
+ result.push KwParamListStartToken.new(offset+str.length)
651
+ # corresponding EndToken emitted leaving ForContext ("in" branch, below)
652
+ @parsestack.push WantsEndContext.new(str,@linenum)
570
653
  #expect_do_or_end_or_nl! str #handled by ForSMContext now
571
- @bracestack.push ForSMContext.new(@linenum)
654
+ @parsestack.push ForSMContext.new(@linenum)
572
655
  when "do"
573
656
  result.unshift(*abort_noparens!(str))
574
- if ExpectDoOrNlContext===@bracestack.last
575
- @bracestack.pop
576
- assert WantsEndContext===@bracestack.last
657
+ if ExpectDoOrNlContext===@parsestack.last
658
+ @parsestack.pop
659
+ assert WantsEndContext===@parsestack.last
577
660
  else
578
661
  result.last.has_end!
579
- @bracestack.push WantsEndContext.new(str,@linenum)
580
- @localvars.start_block
662
+ @parsestack.push WantsEndContext.new(str,@linenum)
663
+ localvars.start_block
581
664
  block_param_list_lookahead
582
665
  end
583
666
  when "def"
584
667
  result.first.has_end!
585
- @bracestack.push WantsEndContext.new("def",@linenum)
586
- @localvars.start_block
668
+ @parsestack.push WantsEndContext.new("def",@linenum)
669
+ @localvars_stack.push SymbolTable.new
587
670
  safe_recurse { |aa|
588
671
  @last_operative_token=KeywordToken.new "def" #hack
589
672
  result.concat ignored_tokens
@@ -591,7 +674,7 @@ end
591
674
  #read an expr like a.b.c or a::b::c
592
675
  #or (expr).b.c
593
676
  if nextchar==?( #look for optional parenthesised head
594
- old_size=@bracestack.size
677
+ old_size=@parsestack.size
595
678
  parencount=0
596
679
  begin
597
680
  tok=get1token
@@ -601,22 +684,58 @@ end
601
684
  end
602
685
  EoiToken===tok and lexerror tok, "eof in def header"
603
686
  result<<tok
604
- end until parencount==0 #@bracestack.size==old_size
687
+ end until parencount==0 #@parsestack.size==old_size
605
688
  else #no parentheses, all tail
606
689
  @last_operative_token=KeywordToken.new "." #hack hack
607
- result << symbol(false,false)
608
- #this isn't quite right.... if a.b.c.d is seen, a, b, and c
690
+ tokindex=result.size
691
+ result << tok=symbol(false,false)
692
+ name=tok.to_s
693
+ assert !in_lvar_define_state
694
+
695
+ #maybe_local really means 'maybe local or constant'
696
+ maybe_local=case name
697
+ when /[^a-z_0-9]$/i; #do nothing
698
+ when /^[@$]/; true
699
+ when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
700
+ when /^[a-z_]/; localvars===name
701
+ when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
702
+ end
703
+ if !ty and maybe_local
704
+ result.push( *ignored_tokens(false,false) )
705
+ nc=nextchar
706
+ if nc==?: || nc==?.
707
+ ty=VarNameToken
708
+ end
709
+ end
710
+ unless ty
711
+ ty=MethNameToken
712
+ endofs=tok.offset+tok.to_s.length
713
+ result[tokindex+1...tokindex+1]=
714
+ [ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
715
+ end
716
+
717
+ assert result[tokindex].equal?(tok)
718
+ result[tokindex]=ty.new(tok.to_s,tok.offset)
719
+
720
+
721
+ #if a.b.c.d is seen, a, b, and c
609
722
  #should be considered maybe varname instead of methnames.
610
723
  #the last (d in the example) is always considered a methname;
611
724
  #it's what's being defined.
725
+ #b and c should be considered varnames only if
726
+ #they are capitalized and preceded by :: .
727
+ #a could even be a keyword (eg self or block_given?).
612
728
  end
613
729
  #read tail: .b.c.d etc
614
- @last_operative_token=result.last
730
+ result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
731
+ ###@last_operative_token=result.last #naive
732
+ assert !(IgnoreToken===@last_operative_token)
615
733
  state=:expect_op
734
+ @in_def_name=true
616
735
  loop do
617
736
 
618
737
  #look for start of parameter list
619
- nc=(@moretokens.first or nextchar.chr)
738
+ nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
620
739
  if state==:expect_op and /^[a-z_(&*]/i===nc
621
740
  result.concat def_param_list
622
741
  break
@@ -627,8 +746,8 @@ end
627
746
  case tok
628
747
  when EoiToken
629
748
  lexerror tok,'unexpected eof in def header'
630
- when IgnoreToken
631
- when MethNameToken #,VarNameToken # /^[a-z_]/i.token_pat
749
+ when StillIgnoreToken
750
+ when MethNameToken ,VarNameToken # /^[a-z_]/i.token_pat
632
751
  lexerror tok,'expected . or ::' unless state==:expect_name
633
752
  state=:expect_op
634
753
  when /^(\.|::)$/.token_pat
@@ -642,6 +761,7 @@ end
642
761
  "#{tok}:#{tok.class}")
643
762
  end
644
763
  end
764
+ @in_def_name=false
645
765
  }
646
766
  when "alias"
647
767
  safe_recurse { |a|
@@ -663,6 +783,7 @@ end
663
783
  tok or lexerror(result.first,"bad symbol in undef")
664
784
  result<< tok
665
785
  @last_operative_token=tok
786
+ assert !(IgnoreToken===@last_operative_token)
666
787
 
667
788
  sawnl=false
668
789
  result.concat ignored_tokens(true){|nl| sawnl=true}
@@ -674,26 +795,47 @@ end
674
795
  }
675
796
 
676
797
  # when "defined?"
677
- # huh
678
798
  #defined? might have a baresymbol following it
679
799
  #does it need to be handled specially?
800
+ #it would seem not.....
680
801
 
681
802
  when "when"
803
+ #abort_noparens! emits EndToken on leaving context
682
804
  result.unshift(*abort_noparens!(str))
683
- @bracestack.push KwParamListContext.new(str,@linenum)
805
+ result.push KwParamListStartToken.new( offset+str.length)
806
+ @parsestack.push WhenParamListContext.new(str,@linenum)
684
807
 
685
808
  when "rescue"
686
- result.unshift(*abort_noparens!(str))
687
- @bracestack.push RescueSMContext.new(@linenum)
809
+ unless after_nonid_op? {false}
810
+ #rescue needs to be treated differently when in operator context...
811
+ #i think no RescueSMContext should be pushed on the stack...
812
+ #plus, the rescue token should be marked as infix
813
+ result.first.set_infix!
814
+ else
815
+ result.push KwParamListStartToken.new(offset+str.length)
816
+ #corresponding EndToken emitted by abort_noparens! on leaving rescue context
817
+ result.unshift(*abort_noparens!(str))
818
+ @parsestack.push RescueSMContext.new(@linenum)
819
+ end
688
820
 
689
- when "then","in"
821
+ when "then"
822
+ result.unshift(*abort_noparens!(str))
823
+ @parsestack.last.see self,:then
824
+
825
+ when "in"
826
+ result.unshift KwParamListEndToken.new( offset)
690
827
  result.unshift(*abort_noparens!(str))
691
- @bracestack.last.see @bracestack,str.to_sym
828
+ @parsestack.last.see self,:in
692
829
 
693
- when /^(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})$/o
830
+ when /\A(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})\Z/o
694
831
  result.unshift(*abort_noparens!(str))
695
832
 
696
- when FUNCLIKE_KEYWORDS: result=yield
833
+ when /\A(return|break|next)\Z/
834
+ result=yield
835
+ result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
836
+
837
+ when FUNCLIKE_KEYWORDS
838
+ result=yield
697
839
 
698
840
  when RUBYKEYWORDS
699
841
  #do nothing
@@ -706,6 +848,36 @@ end
706
848
  end
707
849
 
708
850
 
851
+ #-----------------------------------
852
+ def parsestack_lastnonassign_is?(obj)
853
+ @parsestack.reverse_each{|ctx|
854
+ case ctx
855
+ # when klass: return true
856
+ when AssignmentRhsContext
857
+ else return ctx.object_id==obj.object_id
858
+ end
859
+ }
860
+ end
861
+
862
+ #-----------------------------------
863
+ #what's inside goalposts (the block formal parameter list)
864
+ #is considered the left hand side of an assignment.
865
+ #inside goalposts, a local variable is declared if
866
+ #it has one of the following tokens on both sides:
867
+ # , (if directly inside goalposts or nested lhs)
868
+ # | (as a goalpost)
869
+ # * or & (unary only)
870
+ # ( or ) (if they form a nested left hand side)
871
+ #parens form a nested lhs if they're not part of an actual
872
+ #parameter list and have a comma directly in them somewhere
873
+ #a nested lhs _must_ have a comma in it somewhere. this is
874
+ #not legal:
875
+ # (foo)=[1]
876
+ #whereas this is:
877
+ # (foo,)=[1]
878
+
879
+
880
+
709
881
  #-----------------------------------
710
882
  def block_param_list_lookahead
711
883
  safe_recurse{ |la|
@@ -713,27 +885,45 @@ end
713
885
  a=ignored_tokens
714
886
 
715
887
  if eat_next_if(?|)
716
- a<<KeywordToken.new("|",@file.pos-1)
888
+ a<<KeywordToken.new("|", input_position-1)
889
+ if true
890
+ @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
891
+ nextchar==?| and a.push NoWsToken.new(input_position)
892
+ else
717
893
  if eat_next_if(?|)
718
- a.concat [NoWsToken.new(@file.pos-1),
719
- KeywordToken.new('|',@file.pos-1)]
894
+ a.concat [NoWsToken.new(input_position-1),
895
+ KeywordToken.new('|', input_position-1)]
720
896
  else
721
897
  assert !@defining_lvar
722
898
  @defining_lvar=true
723
899
  assert((@last_operative_token===';' or NewlineToken===@last_operative_token))
724
- @bracestack.push BlockParamListContext.new(@linenum)
725
- #block param initializers are not supported here, because ruby doesn't allow them!
726
- begin
900
+ @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
901
+ #block param initializers ARE supported here, even tho ruby doesn't allow them!
902
+ tok=nil
903
+ loop do
727
904
  tok=get1token
728
- EoiToken===tok and lexerror tok,"eof in block parameter list"
905
+ case tok
906
+ when EoiToken; lexerror tok,"eof in block parameter list"
907
+ when AssignmentRhsListStartToken; @defining_lvar=false
908
+ when AssignmentRhsListEndToken; parsestack_lastnonassign_is?(mycontext) and @defining_lvar=true
909
+ end
910
+
911
+ tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
729
912
  a<<tok
730
- end until tok==='|'
731
- assert@defining_lvar
913
+ end
914
+ assert@defining_lvar || AssignmentRhsContext===@parsestack.last
732
915
  @defining_lvar=false
733
- BlockParamListContext===@bracestack.last or raise 'expected BlockParamListContext atop @bracestack'
734
- @bracestack.pop
916
+ while AssignmentRhsContext===@parsestack.last
917
+ a.push( *abort_noparens!('|') )
918
+ end
919
+
920
+ @parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
921
+ @parsestack.pop
922
+
923
+ a<<KeywordToken.new('|',tok.offset)
735
924
  @moretokens.empty? or
736
925
  fixme %#moretokens might be set from get1token call above...might be bad#
926
+ end
737
927
  end
738
928
  end
739
929
 
@@ -755,8 +945,9 @@ end
755
945
  #then match the following tokens until
756
946
  #the matching endbrace is found
757
947
  def def_param_list
948
+ @in_def_name=false
758
949
  result=[]
759
- normal_comma_level=old_bracestack_size=@bracestack.size
950
+ normal_comma_level=old_parsestack_size=@parsestack.size
760
951
  safe_recurse { |a|
761
952
  assert(@moretokens.empty?)
762
953
  assert((not IgnoreToken===@moretokens[0]))
@@ -770,9 +961,9 @@ end
770
961
  assert(tok==='(')
771
962
 
772
963
 
773
- #bracestack was changed by get1token above...
964
+ #parsestack was changed by get1token above...
774
965
  normal_comma_level+=1
775
- assert(normal_comma_level==@bracestack.size)
966
+ assert(normal_comma_level==@parsestack.size)
776
967
  endingblock=proc{|tok| tok===')' }
777
968
  else
778
969
  endingblock=proc{|tok| tok===';' or NewlineToken===tok}
@@ -785,36 +976,48 @@ end
785
976
  #read local parameter names
786
977
  loop do
787
978
  expect_name=(@last_operative_token===',' and
788
- normal_comma_level==@bracestack.size)
979
+ normal_comma_level==@parsestack.size)
789
980
  expect_name and @defining_lvar||=true
790
981
  result << tok=get1token
791
982
  lexerror tok, "unexpected eof in def header" if EoiToken===tok
792
983
 
793
984
  #break if at end of param list
794
985
  endingblock===tok and
795
- old_bracestack_size>=@bracestack.size and break
986
+ old_parsestack_size>=@parsestack.size and break
796
987
 
797
988
  #next token is a local var name
798
989
  #(or the one after that if unary ops present)
799
990
  #result.concat ignored_tokens
800
- expect_name and case tok
801
- when IgnoreToken#, /^[A-Z]/ #do nothing
802
- when VarNameToken
991
+ if expect_name
992
+ case tok
993
+ when IgnoreToken #, /^[A-Z]/ #do nothing
994
+ when /^,$/.token_pat #hack
995
+
996
+
997
+ when VarNameToken
803
998
  assert@defining_lvar
804
999
  @defining_lvar=false
805
1000
  assert((not @last_operative_token===','))
806
- when /^[&*]$/.token_pat #unary form...
1001
+ when /^[&*]$/.token_pat #unary form...
807
1002
  #a NoWsToken is also expected... read it now
808
1003
  result.concat maybe_no_ws_token #not needed?
809
1004
  @last_operative_token=KeywordToken.new ','
810
- else lexerror tok,"unfamiliar var name '#{tok}'"
1005
+ else
1006
+ lexerror tok,"unfamiliar var name '#{tok}'"
1007
+ end
1008
+ elsif /^,$/.token_pat===tok and
1009
+ normal_comma_level+1==@parsestack.size and
1010
+ AssignmentRhsContext===@parsestack.last
1011
+ #seeing comma here should end implicit rhs started within the param list
1012
+ result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
1013
+ @parsestack.pop
811
1014
  end
812
1015
  end
813
1016
 
814
1017
  @defining_lvar=false
815
1018
 
816
1019
 
817
- assert(@bracestack.size <= old_bracestack_size)
1020
+ assert(@parsestack.size <= old_parsestack_size)
818
1021
  assert(endingblock[tok])
819
1022
 
820
1023
  #hack: force next token to look like start of a
@@ -846,19 +1049,19 @@ end
846
1049
  end
847
1050
 
848
1051
  #-----------------------------------
849
- #handle * in ruby code. is unary or binary operator?
1052
+ #handle * & in ruby code. is unary or binary operator?
850
1053
  def star_or_amp(ch)
851
1054
  assert('*&'[ch])
852
- if unary_op_expected? ch
1055
+ want_unary=unary_op_expected? ch
1056
+ result=(quadriop ch)
1057
+ if want_unary
853
1058
  #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
854
- result=operator_or_methname_token getchar
1059
+ assert OperatorToken===result
1060
+ result.unary=true #result should distinguish unary+binary *&
855
1061
  WHSPLF[nextchar.chr] or
856
- @moretokens << NoWsToken.new(@file.pos)
857
- return result
858
- else
859
- return(quadriop ch)
1062
+ @moretokens << NoWsToken.new(input_position)
860
1063
  end
861
- #result should distinguish unary+binary *&
1064
+ result
862
1065
  end
863
1066
 
864
1067
  #-----------------------------------
@@ -868,7 +1071,7 @@ end
868
1071
  getchar
869
1072
  NumberToken.new getchar_maybe_escape
870
1073
  else
871
- @bracestack.push TernaryContext.new(@linenum)
1074
+ @parsestack.push TernaryContext.new(@linenum)
872
1075
  KeywordToken.new getchar #operator
873
1076
  end
874
1077
  end
@@ -888,18 +1091,19 @@ end
888
1091
  end
889
1092
 
890
1093
  #-----------------------------------
891
- #return true if tok corresponds to a variable or constant, false if its for a method, nil for something else
892
- #we assume tok is a valid token with a correctly formed name.
1094
+ #return true if last tok corresponds to a variable or constant,
1095
+ #false if its for a method, nil for something else
1096
+ #we assume it is a valid token with a correctly formed name.
893
1097
  #...should really be called was_var_name
894
1098
  def is_var_name?
895
1099
  (tok=@last_operative_token)
896
1100
 
897
1101
  s=tok.to_s
898
1102
  case s
899
- when /[^a-z_0-9]$/i: false
900
- when /^[a-z_]/: @localvars===s or VARLIKE_KEYWORDS===s
901
- when /^[A-Z]/: VarNameToken===tok
902
- when /^[@$<]/: true
1103
+ when /[^a-z_0-9]$/i; false
1104
+ when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
1105
+ when /^[A-Z]/; VarNameToken===tok
1106
+ when /^[@$<]/; true
903
1107
  else raise "not var or method name: #{s}"
904
1108
  end
905
1109
  end
@@ -907,21 +1111,26 @@ end
907
1111
  #-----------------------------------
908
1112
  def colon_quote_expected?(ch) #yukko hack
909
1113
  assert ':?'[ch]
910
- readahead(2)[/^(\?[^#{WHSPLF}]|:[$@a-zA-Z_'"`\[*~+\-\/%<=>&|^])$/o] or return false
1114
+ readahead(2)[/^(\?[^#{WHSPLF}]|:[^\s\r\n\t\f\v :])$/o] or return false
911
1115
 
912
1116
  after_nonid_op? {
913
1117
  #possible func-call as operator
914
1118
 
915
- !is_var_name?
1119
+ not is_var_name? and
1120
+ if ch==':'
1121
+ not TernaryContext===@parsestack.last
1122
+ else
1123
+ !readahead(3)[/^\?[a-z0-9_]{2}/i]
1124
+ end
916
1125
  }
917
1126
  end
918
1127
 
919
1128
  #-----------------------------------
920
1129
  def symbol_or_op(ch)
921
- startpos=@file.pos
1130
+ startpos= input_position
922
1131
  qe= colon_quote_expected?(ch)
923
1132
  lastchar=prevchar
924
- eat_next_if(ch) or raise "needed: "+ch
1133
+ eat_next_if(ch[0]) or raise "needed: "+ch
925
1134
 
926
1135
  #handle quoted symbols like :"foobar", :"[]"
927
1136
  qe and return symbol(':')
@@ -932,13 +1141,13 @@ end
932
1141
  @moretokens.push(*abort_noparens!(':'))
933
1142
 
934
1143
  #end ternary context, if any
935
- @bracestack.last.see @bracestack,:colon
1144
+ @parsestack.last.see self,:colon
936
1145
 
937
- TernaryContext===@bracestack.last and @bracestack.pop #should be in the context's see handler
1146
+ TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
938
1147
 
939
- if ExpectDoOrNlContext===@bracestack.last #should be in the context's see handler
940
- @bracestack.pop
941
- assert @bracestack.last.starter[/^(while|until|for)$/]
1148
+ if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
1149
+ @parsestack.pop
1150
+ assert @parsestack.last.starter[/^(while|until|for)$/]
942
1151
  end
943
1152
 
944
1153
  @moretokens.push KeywordToken.new(':',startpos)
@@ -965,17 +1174,17 @@ end
965
1174
  #-----------------------------------
966
1175
  def symbol(notbare,couldbecallsite=!notbare)
967
1176
  assert !couldbecallsite
968
- start=@file.pos
1177
+ start= input_position
969
1178
  notbare and start-=1
970
1179
  klass=(notbare ? SymbolToken : MethNameToken)
971
1180
 
972
1181
  #look for operators
973
1182
  opmatches=readahead(3)[RUBYSYMOPERATORREX]
974
- result= opmatches ? @file.read(opmatches.size) :
1183
+ result= opmatches ? read(opmatches.size) :
975
1184
  case nc=nextchar
976
1185
  when ?" then assert notbare;double_quote('"')
977
1186
  when ?' then assert notbare;double_quote("'")
978
- when ?` then @file.read(1)
1187
+ when ?` then read(1)
979
1188
  when ?@ then at_identifier.to_s
980
1189
  when ?$ then dollar_identifier.to_s
981
1190
  when ?_,?a..?z then identifier_as_string(?:)
@@ -991,19 +1200,24 @@ end
991
1200
  return lexerror(klass.new(result,start),error)
992
1201
  end
993
1202
 
1203
+ def merge_assignment_op_in_setter_callsites?
1204
+ false
1205
+ end
994
1206
  #-----------------------------------
995
1207
  def callsite_symbol(tok_to_errify)
996
- start=@file.pos
1208
+ start= input_position
997
1209
 
998
1210
  #look for operators
999
1211
  opmatches=readahead(3)[RUBYSYMOPERATORREX]
1000
- return [opmatches ? @file.read(opmatches.size) :
1212
+ return [opmatches ? read(opmatches.size) :
1001
1213
  case nc=nextchar
1002
- when ?` then @file.read(1)
1003
- when ?_,?a..?z,?A..?Z then identifier_as_string(?:)
1214
+ when ?` then read(1)
1215
+ when ?_,?a..?z,?A..?Z then
1216
+ context=merge_assignment_op_in_setter_callsites? ? ?: : nc
1217
+ identifier_as_string(context)
1004
1218
  else
1005
1219
  @last_operative_token=KeywordToken.new(';')
1006
- lexerror(tok_to_errify,"unexpected char starting symbol: #{nc.chr}")
1220
+ lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
1007
1221
  nil
1008
1222
  end, start
1009
1223
  ]
@@ -1011,10 +1225,10 @@ end
1011
1225
 
1012
1226
  #-----------------------------------
1013
1227
  def here_header
1014
- @file.read(2)=='<<' or raise "parser insanity"
1228
+ read(2)=='<<' or raise "parser insanity"
1015
1229
 
1016
1230
  dash=eat_next_if(?-)
1017
- quote=eat_next_if( /^['"`]$/)
1231
+ quote=eat_next_if( /['"`]/)
1018
1232
  if quote
1019
1233
  ender=til_charset(/[#{quote}]/)
1020
1234
  (quote==getchar) or
@@ -1042,8 +1256,8 @@ end
1042
1256
  #handle case of here header in a string inclusion, but
1043
1257
  #here body outside it.
1044
1258
  cnt=0
1045
- 1.upto @bracestack.size do |i|
1046
- case @bracestack[-i]
1259
+ 1.upto @parsestack.size do |i|
1260
+ case @parsestack[-i]
1047
1261
  when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext
1048
1262
  else cnt+=1
1049
1263
  end
@@ -1054,11 +1268,11 @@ end
1054
1268
  end
1055
1269
 
1056
1270
  tok=get1token
1057
- assert(a.object_id==@moretokens.object_id)
1271
+ assert(a.equal?( @moretokens))
1058
1272
  toks<<tok
1059
1273
  EoiToken===tok and lexerror tok, "here body expected before eof"
1060
1274
  end while res.unsafe_to_use
1061
- assert(a.object_id==@moretokens.object_id)
1275
+ assert(a.equal?( @moretokens))
1062
1276
  a[0,0]= toks #same as a=toks+a, but keeps a's id
1063
1277
  }
1064
1278
 
@@ -1076,9 +1290,9 @@ end
1076
1290
  if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
1077
1291
  here_header
1078
1292
  else
1079
- operator_or_methname_token @file.read(2)
1293
+ operator_or_methname_token read(2)
1080
1294
  end
1081
- when "<=>" then operator_or_methname_token @file.read(3)
1295
+ when "<=>" then operator_or_methname_token read(3)
1082
1296
  else quadriop(ch)
1083
1297
  end
1084
1298
  end
@@ -1087,115 +1301,152 @@ end
1087
1301
  def escnewline(ch)
1088
1302
  assert ch == '\\'
1089
1303
 
1090
- pos=@file.pos
1304
+ pos= input_position
1091
1305
  result=getchar
1092
1306
  if nl=readnl
1093
1307
  result+=nl
1094
1308
  else
1095
1309
  error='illegal escape sequence'
1096
1310
  end
1097
- lexerror EscNlToken.new(@filename,@linenum,result,pos), error
1098
- end
1311
+
1312
+ @moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
1313
+ optional_here_bodies
1099
1314
 
1315
+ lexerror EscNlToken.new(@filename,ln-1,result,pos), error
1316
+ end
1317
+
1100
1318
  #-----------------------------------
1101
- def newline(ch)
1102
- assert("\r\n"[nextchar.chr])
1319
+ def optional_here_bodies
1103
1320
 
1104
1321
  #handle here bodies queued up by previous line
1105
1322
  #(we should be more compatible with dos/mac style newlines...)
1106
- if tofill=@incomplete_here_tokens.shift
1107
- tofill.string.offset=@file.pos
1323
+ while tofill=@incomplete_here_tokens.shift
1324
+ tofill.string.offset= input_position
1108
1325
  loop {
1109
- assert("\r\n"[nextchar.chr])
1110
-
1111
- #retr evrything til next nl
1112
- line=all_quote(/^[\r\n]$/, tofill.quote, /^[\r\n]$/, :regex_esc_seq)
1113
- #(you didn't know all_quote could take a regex, did you?)
1326
+ assert("\r\n"[prevchar])
1114
1327
 
1115
- #get rid of fals that otherwise appear to be in the middle of
1116
- #a string (and are emitted out of order)
1117
- fal=@moretokens.pop
1118
- assert FileAndLineToken===fal || fal.nil?
1119
-
1120
- back1char
1121
- assert("\r\n"[nextchar.chr])
1122
-
1123
- #matches terminating reg expr?
1124
- break if line.elems.size==1 and
1125
- line.elems[0][tofill.termex]
1126
-
1127
- tofill.append_token line
1128
- tofill.append readnl
1129
- back1char
1328
+ #here body terminator?
1329
+ oldpos= input_position
1330
+ if tofill.dash
1331
+ til_charset(/[^#{WHSP}]/o)
1332
+ end
1333
+ break if eof?
1334
+ break if read(tofill.ender.size)==tofill.ender and readnl
1335
+ input_position_set oldpos
1336
+
1337
+ if tofill.quote=="'"
1338
+ line=til_charset(/[\r\n]/)+readnl
1339
+ line.gsub! "\\\\", "\\"
1340
+ tofill.append line
1341
+ assert(line[-1..-1][/[\r\n]/])
1342
+ else
1343
+
1344
+ back1char #-1 to make newline char the next to read
1345
+ @linenum-=1
1346
+
1347
+ #retr evrything til next nl
1348
+ line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
1349
+ #(you didn't know all_quote could take a regex, did you?)
1350
+
1351
+ #get rid of fals that otherwise appear to be in the middle of
1352
+ #a string (and are emitted out of order)
1353
+ fal=@moretokens.pop
1354
+ assert FileAndLineToken===fal || fal.nil?
1355
+
1356
+ back1char
1357
+ @linenum-=1
1358
+ assert("\r\n"[nextchar.chr])
1359
+ tofill.append_token line
1360
+ tofill.append readnl
1361
+ end
1130
1362
  }
1131
1363
 
1132
- assert("\r\n"[nextchar.chr])
1364
+ assert(eof? || "\r\n"[prevchar])
1133
1365
  tofill.unsafe_to_use=false
1366
+ tofill.line=@linenum-1
1134
1367
 
1135
- return tofill.bodyclass.new(tofill)
1368
+ @moretokens.push \
1369
+ tofill.bodyclass.new(tofill),
1370
+ FileAndLineToken.new(@filename,@linenum,input_position)
1136
1371
  end
1372
+
1373
+ end
1374
+
1375
+ #-----------------------------------
1376
+ def newline(ch)
1377
+ assert("\r\n"[nextchar.chr])
1378
+
1379
+
1137
1380
 
1138
1381
  #ordinary newline handling (possibly implicitly escaped)
1139
1382
  assert("\r\n"[nextchar.chr])
1383
+ assert !@parsestack.empty?
1140
1384
  assert @moretokens.empty?
1141
1385
  result=if NewlineToken===@last_operative_token or #hack
1142
1386
  @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
1143
1387
  !after_nonid_op?{false}
1144
1388
  then #hack-o-rama: probly cases left out above
1145
1389
  a= abort_noparens!
1146
- ExpectDoOrNlContext===@bracestack.last and @bracestack.pop
1147
- @bracestack.last.see @bracestack,:semi
1390
+ ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
1391
+ assert !@parsestack.empty?
1392
+ @parsestack.last.see self,:semi
1148
1393
 
1149
1394
  a << super(ch)
1150
1395
  @moretokens.replace a+@moretokens
1151
1396
  @moretokens.shift
1152
1397
  else
1153
- offset=@file.pos
1154
- #@moretokens <<
1155
- EscNlToken.new(@filename,@linenum,readnl,offset)
1398
+ offset= input_position
1399
+ nl=readnl
1400
+ @moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
1401
+ EscNlToken.new(@filename,@linenum-1,nl,offset)
1156
1402
  #WsToken.new ' ' #why? #should be "\\\n" ?
1157
1403
  end
1158
1404
 
1405
+ optional_here_bodies
1406
+
1159
1407
  start_of_line_directives
1160
1408
 
1161
1409
  return result
1162
1410
  end
1163
1411
 
1164
1412
  #-----------------------------------
1165
- EQBEGIN=%r/^=begin[^a-zA-Z_0-9]$/
1413
+ EQBEGIN=%r/^=begin[ \t\v\r\n\f]$/
1166
1414
  EQBEGINLENGTH=7
1167
1415
  EQEND='=end'
1168
- ENDMARKER=/^__END__[\r\n]$/
1416
+ EQENDLENGTH=4
1417
+ ENDMARKER=/^__END__[\r\n]?\Z/
1169
1418
  ENDMARKERLENGTH=8
1170
1419
  def start_of_line_directives
1171
1420
  #handle =begin...=end (at start of a line)
1172
1421
  while EQBEGIN===readahead(EQBEGINLENGTH)
1173
- startpos=@file.pos
1174
- more=@file.read(EQBEGINLENGTH-1) #get =begin
1175
-
1176
- #keep reading til /\n=end.*\n/
1177
- @file.each(EQEND) {|cblock|
1178
- more << cblock
1179
- #must be at start of line
1180
- break if /^[\r\n]#{EQEND}/o===readback(EQEND.length+1)
1181
- }
1422
+ startpos= input_position
1423
+ more= read(EQBEGINLENGTH-1) #get =begin
1424
+
1425
+ begin
1426
+ eof? and raise "eof before =end"
1427
+ more<<til_charset(/[\r\n]/)
1428
+ more<<readnl
1429
+ end until readahead(EQENDLENGTH)==EQEND
1430
+
1182
1431
  #read rest of line after =end
1183
- more << @file.til_charset(/[\r\n]/)
1432
+ more << til_charset(/[\r\n]/)
1184
1433
  assert((?\r===nextchar or ?\n===nextchar))
1185
1434
  assert !(/[\r\n]/===more[-1,1])
1435
+ more<< readnl
1186
1436
 
1187
- newls= more.scan(/\r\n?|\n\r?/)
1188
- @linenum+= newls.size
1437
+ # newls= more.scan(/\r\n?|\n\r?/)
1438
+ # @linenum+= newls.size
1189
1439
 
1190
1440
  #inject the fresh comment into future token results
1191
- @moretokens.push IgnoreToken.new(more,startpos)
1441
+ @moretokens.push IgnoreToken.new(more,startpos),
1442
+ FileAndLineToken.new(@filename,@linenum,input_position)
1192
1443
  end
1193
1444
 
1194
1445
  #handle __END__
1195
1446
  if ENDMARKER===readahead(ENDMARKERLENGTH)
1196
- assert !(ImplicitContext===@bracestack.last)
1197
- @moretokens.unshift endoffile_detected(@file.read(6))
1198
- @file.pos=@file.stat.size
1447
+ assert !(ImplicitContext===@parsestack.last)
1448
+ @moretokens.unshift endoffile_detected(read(7))
1449
+ # input_position_set @file.size
1199
1450
  end
1200
1451
  end
1201
1452
 
@@ -1221,11 +1472,15 @@ end
1221
1472
  #used to resolve the ambiguity of
1222
1473
  # <<, %, ? in ruby
1223
1474
  #returns whether current token is to be the start of a literal
1224
- #/ is not handled right here if whitespace immediately follows the /
1225
1475
  def quote_expected?(ch) #yukko hack
1476
+ if AssignmentContext===@parsestack.last
1477
+ @parsestack.pop
1478
+ return false
1479
+ end
1480
+
1226
1481
  case ch[0]
1227
1482
  when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
1228
- when ?% then readahead(3)[/^%([a-ps-vyzA-PR-VX-Z]|[QqrwWx][a-zA-Z0-9])/]
1483
+ when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
1229
1484
  when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
1230
1485
  else raise 'unexpected ch (#{ch}) in quote_expected?'
1231
1486
  # when ?+,?-,?&,?*,?~,?! then '*&='[readahead(2)[1..1]]
@@ -1240,22 +1495,29 @@ end
1240
1495
  end
1241
1496
 
1242
1497
  #-----------------------------------
1498
+ #returns false if last token was an value, true if it was an operator.
1499
+ #returns what block yields if last token was a method name.
1243
1500
  #used to resolve the ambiguity of
1244
- # <<, %, /, ?, :, and newline in ruby
1501
+ # <<, %, /, ?, :, and newline (among others) in ruby
1245
1502
  def after_nonid_op?
1246
1503
  case @last_operative_token
1247
- when MethNameToken,VarNameToken, FUNCLIKE_KEYWORDS.token_pat
1504
+ when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
1505
+ #VarNameToken should really be left out of this case...
1506
+ #should be in next branch instread
1507
+ #callers all check for last token being not a variable if they pass anything
1508
+ #but {false} in the block
1248
1509
  return yield
1249
1510
  when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
1250
- %r{^(class|module|do|end|self|true|false|nil|
1251
- __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
1511
+ %r{^(
1512
+ class|module|end|self|true|false|nil|
1513
+ __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
1252
1514
  )$}x.token_pat
1253
- #do shouldn't be in above list... dunno about def/undef
1254
- #maybe class/module shouldn't either?
1515
+ #dunno about def/undef
1516
+ #maybe class/module shouldn't he here either?
1255
1517
  #for is also in NewlineToken branch, below.
1256
1518
  #what about rescue?
1257
1519
  return false
1258
- when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS})$/o.token_pat
1520
+ when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS}|do)$/o.token_pat
1259
1521
  #regexs above must match whole string
1260
1522
  #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
1261
1523
  return true
@@ -1273,19 +1535,46 @@ end
1273
1535
  end
1274
1536
  end
1275
1537
 
1538
+
1539
+
1540
+
1541
+ #-----------------------------------
1542
+ #returns the last context on @parsestack which isn't an ImplicitContext
1543
+ def last_context_not_implicit
1544
+ @parsestack.reverse_each{|ctx|
1545
+ return ctx unless ImplicitContext===ctx
1546
+ }
1547
+ fail
1548
+ end
1549
+
1550
+ #-----------------------------------
1551
+ #a | has been seen. is it an operator? or a goalpost?
1552
+ #(goalpost == delimiter of block param list)
1553
+ #if it is a goalpost, end the BlockParamListLhsContext on
1554
+ #the context stack, as well as any implicit contexts on top of it.
1555
+ def conjunction_or_goalpost(ch)
1556
+ result=quadriop(ch)
1557
+ if result===/^|$/ and BlockParamListLhsContext===last_context_not_implicit
1558
+ @moretokens.push( *abort_noparens!("|"))
1559
+ assert(BlockParamListLhsContext===@parsestack.last)
1560
+ @parsestack.pop
1561
+ @moretokens.push KeywordToken.new("|", input_position-1)
1562
+ result=@moretokens.shift
1563
+ end
1564
+ result
1565
+ end
1566
+
1276
1567
  #-----------------------------------
1277
1568
  def quadriop(ch) #match /&&?=?/ (&, &&, &=, or &&=)
1278
1569
  assert(%w[& * | < >].include?(ch))
1279
- # '&*'[ch] and qe=quote_expected?(ch) #not needed?
1280
1570
  result=getchar + (eat_next_if(ch)or'')
1281
1571
  if eat_next_if(?=)
1282
1572
  result << ?=
1283
- # elsif qe and result[/^[&*]$/] #not needed?
1284
- # @moretokens<<NoWsToken.new(@file.pos) #not needed?
1285
1573
  end
1286
1574
  return operator_or_methname_token(result)
1287
1575
  end
1288
1576
 
1577
+
1289
1578
  #-----------------------------------
1290
1579
  def biop(ch) #match /%=?/ (% or %=)
1291
1580
  assert(ch[/^[%^~]$/])
@@ -1295,18 +1584,18 @@ end
1295
1584
  end
1296
1585
  return operator_or_methname_token( result)
1297
1586
  end
1298
-
1299
1587
  #-----------------------------------
1300
- def tilde(ch) #match /~=?/ (~ or ~=)
1588
+ def tilde(ch) #match ~
1301
1589
  assert(ch=='~')
1302
1590
  result=getchar
1303
- # eat_next_if(?=) ?
1591
+ # eat_next_if(?=) ? #ack, spppft, I'm always getting this backwards
1304
1592
  # result <<?= :
1305
1593
  WHSPLF[nextchar.chr] ||
1306
- @moretokens << NoWsToken.new(@file.pos)
1594
+ @moretokens << NoWsToken.new(input_position)
1307
1595
  #why is the NoWsToken necessary at this point?
1308
- return operator_or_methname_token( result)
1309
- #result should distinguish unary ~
1596
+ result=operator_or_methname_token result
1597
+ result.unary=true #result should distinguish unary ~
1598
+ result
1310
1599
  end
1311
1600
 
1312
1601
  #-----------------------------------
@@ -1327,8 +1616,9 @@ end
1327
1616
  else #unary operator
1328
1617
  result=getchar
1329
1618
  WHSPLF[nextchar.chr] or
1330
- @moretokens << NoWsToken.new(@file.pos)
1331
- return(operator_or_methname_token result)
1619
+ @moretokens << NoWsToken.new(input_position)
1620
+ result=(operator_or_methname_token result)
1621
+ result.unary=true
1332
1622
  #todo: result should distinguish unary+binary +-
1333
1623
  end
1334
1624
  else #binary operator
@@ -1337,45 +1627,54 @@ end
1337
1627
  if eat_next_if(?=)
1338
1628
  result << ?=
1339
1629
  end
1340
- return(operator_or_methname_token result)
1630
+ result=(operator_or_methname_token result)
1341
1631
  #todo: result should distinguish unary+binary +-
1342
1632
  end
1633
+ result
1343
1634
  end
1344
1635
 
1345
1636
  #-----------------------------------
1346
1637
  def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
1347
- offset=@file.pos
1638
+ offset= input_position
1348
1639
  str=getchar
1349
1640
  assert str=='='
1350
- c=(eat_next_if(/^[~=>]$/)or'')
1641
+ c=(eat_next_if(/[~=>]/)or'')
1351
1642
  str << c
1643
+ result= operator_or_methname_token( str,offset)
1352
1644
  case c
1353
1645
  when '=': str<< (eat_next_if(?=)or'')
1354
1646
 
1355
- when '>': @bracestack.last.see @bracestack,:arrow
1647
+ when '>':
1648
+ unless ParamListContextNoParen===@parsestack.last
1649
+ @moretokens.unshift result
1650
+ @moretokens.unshift( *abort_noparens!("=>"))
1651
+ result=@moretokens.shift
1652
+ end
1653
+ @parsestack.last.see self,:arrow
1356
1654
  when '': #record local variable definitions
1357
1655
 
1358
- @bracestack.push AssignmentRhsContext.new(@linenum)
1656
+ @parsestack.push AssignmentRhsContext.new(@linenum)
1359
1657
  @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
1360
1658
  end
1361
- return operator_or_methname_token( str,offset)
1659
+ return result
1362
1660
  end
1363
1661
 
1364
1662
  #-----------------------------------
1365
1663
  def exclam(ch) #match /![~=]?/ (! or != or !~)
1366
1664
  assert nextchar==?!
1367
1665
  result=getchar
1368
- k=eat_next_if(/^[~=]$/)
1666
+ k=eat_next_if(/[~=]/)
1369
1667
  if k
1370
1668
  result+=k
1371
1669
  else
1372
1670
  WHSPLF[nextchar.chr] or
1373
- @moretokens << NoWsToken.new(@file.pos)
1671
+ @moretokens << NoWsToken.new(input_position)
1374
1672
  end
1375
- return KeywordToken.new(result)
1673
+ return KeywordToken.new(result, input_position-result.size)
1376
1674
  #result should distinguish unary !
1377
1675
  end
1378
1676
 
1677
+
1379
1678
  #-----------------------------------
1380
1679
  def dot(ch)
1381
1680
  str=''
@@ -1391,7 +1690,6 @@ end
1391
1690
  dot_rhs(result)
1392
1691
  return result
1393
1692
  end
1394
-
1395
1693
  #-----------------------------------
1396
1694
  def dot_rhs(prevtok)
1397
1695
  safe_recurse { |a|
@@ -1403,20 +1701,17 @@ end
1403
1701
  }
1404
1702
  end
1405
1703
 
1406
- #-----------------------------------
1407
- def single_quote(ch=nil)
1408
- double_quote(ch)
1409
- end
1410
-
1411
1704
  #-----------------------------------
1412
1705
  def back_quote(ch=nil)
1413
- oldpos=@file.pos
1414
- @last_operative_token===/^(def|::|\.)$/ and return MethNameToken.new(
1415
- (eat_next_if(?`) or raise "insanity"), oldpos
1416
- )
1417
- double_quote(ch)
1706
+ if @last_operative_token===/^(def|::|\.)$/
1707
+ oldpos= input_position
1708
+ MethNameToken.new(eat_next_if(?`), oldpos)
1709
+ else
1710
+ double_quote(ch)
1711
+ end
1418
1712
  end
1419
1713
 
1714
+ if false
1420
1715
  #-----------------------------------
1421
1716
  def comment(str)
1422
1717
  result=""
@@ -1441,27 +1736,30 @@ end
1441
1736
 
1442
1737
  return IgnoreToken.new(result)
1443
1738
  end
1444
-
1739
+ end
1445
1740
  #-----------------------------------
1446
1741
  def open_brace(ch)
1447
1742
  assert((ch!='[' or !want_op_name))
1448
1743
  assert(@moretokens.empty?)
1449
1744
  lastchar=prevchar
1450
- ch=eat_next_if(/^[({\[]$/)or raise "lexer confusion"
1451
- tokch=KeywordToken.new(ch,@file.pos-1)
1745
+ ch=eat_next_if(/[({\[]/)or raise "lexer confusion"
1746
+ tokch=KeywordToken.new(ch, input_position-1)
1747
+
1452
1748
 
1453
1749
  #maybe emitting of NoWsToken can be moved into var_or_meth_name ??
1454
1750
  case tokch.ident
1455
1751
  when '['
1456
- #fixme: in contexts expecting an (operator) method name, we
1457
- # should match [] or []= at this point
1458
- @bracestack.push ListImmedContext.new(ch,@linenum)
1752
+ # in contexts expecting an (operator) method name, we
1753
+ # would want to match [] or []= at this point
1754
+ #but control never comes this way in those cases... goes
1755
+ #to custom parsers for alias, undef, and def in #parse_keywords
1756
+ tokch.set_infix! unless after_nonid_op?{WHSPLF[lastchar]}
1757
+ @parsestack.push ListImmedContext.new(ch,@linenum)
1459
1758
  lasttok=last_operative_token
1460
1759
  #could be: lasttok===/^[a-z_]/i
1461
- if (VarNameToken===lasttok or MethNameToken===lasttok or
1462
- lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
1760
+ if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or MethNameToken===lasttok) and !WHSPCHARS[lastchar]
1463
1761
  @moretokens << (tokch)
1464
- tokch= NoWsToken.new(@file.pos-1)
1762
+ tokch= NoWsToken.new(input_position-1)
1465
1763
  end
1466
1764
  when '('
1467
1765
  lasttok=last_operative_token
@@ -1470,19 +1768,20 @@ end
1470
1768
  lasttok===FUNCLIKE_KEYWORDS)
1471
1769
  unless WHSPCHARS[lastchar]
1472
1770
  @moretokens << tokch
1473
- tokch= NoWsToken.new(@file.pos-1)
1771
+ tokch= NoWsToken.new(input_position-1)
1474
1772
  end
1475
- @bracestack.push ParamListContext.new(@linenum)
1773
+ @parsestack.push ParamListContext.new(@linenum)
1476
1774
  else
1477
- @bracestack.push ParenContext.new(@linenum)
1775
+ @parsestack.push ParenContext.new(@linenum)
1478
1776
  end
1479
1777
 
1480
1778
  when '{'
1481
1779
  #check if we are in a hash literal or string inclusion (#{}),
1482
1780
  #in which case below would be bad.
1483
- if after_nonid_op?{false}
1484
- @bracestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
1781
+ if after_nonid_op?{false} or @last_operative_token.has_no_block?
1782
+ @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
1485
1783
  else
1784
+ tokch.set_infix!
1486
1785
  =begin not needed now, i think
1487
1786
  # 'need to find matching callsite context and end it if implicit'
1488
1787
  lasttok=last_operative_token
@@ -1492,8 +1791,8 @@ end
1492
1791
  end
1493
1792
  =end
1494
1793
 
1495
- @localvars.start_block
1496
- @bracestack.push BlockContext.new(@linenum)
1794
+ localvars.start_block
1795
+ @parsestack.push BlockContext.new(@linenum)
1497
1796
  block_param_list_lookahead
1498
1797
  end
1499
1798
  end
@@ -1504,18 +1803,18 @@ end
1504
1803
  def close_brace(ch)
1505
1804
  ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
1506
1805
  @moretokens.concat abort_noparens!(ch)
1507
- @moretokens<< kw=KeywordToken.new( ch,@file.pos-1)
1508
- @bracestack.last.see @bracestack,:semi #hack
1509
- if @bracestack.empty?
1806
+ @parsestack.last.see self,:semi #hack
1807
+ @moretokens<< kw=KeywordToken.new( ch, input_position-1)
1808
+ if @parsestack.empty?
1510
1809
  lexerror kw,"unmatched brace: #{ch}"
1511
1810
  return @moretokens.shift
1512
1811
  end
1513
- ctx=@bracestack.pop
1812
+ ctx=@parsestack.pop
1514
1813
  origch,line=ctx.starter,ctx.linenum
1515
1814
  ch==PAIRS[origch] or
1516
1815
  lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
1517
1816
  "matching brace location", @filename, line
1518
- BlockContext===ctx and @localvars.end_block
1817
+ BlockContext===ctx and localvars.end_block
1519
1818
  if ParamListContext==ctx.class
1520
1819
  assert ch==')'
1521
1820
  #kw.set_callsite! #not needed?
@@ -1525,29 +1824,29 @@ end
1525
1824
 
1526
1825
  #-----------------------------------
1527
1826
  def eof(ch=nil)
1528
- #this must be the very last character...
1529
- oldpos=@file.pos
1530
- assert(?\0==@file.getc)
1827
+ #this must be the very last character...
1828
+ oldpos= input_position
1829
+ assert(?\0==getc)
1531
1830
 
1532
- result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
1831
+ result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
1533
1832
 
1534
- @file.pos==@file.stat.size or
1535
- lexerror result,'nul character is not at the end of file'
1536
- @file.pos=@file.stat.size
1537
- return(endoffile_detected result)
1833
+ eof? or
1834
+ lexerror result,'nul character is not at the end of file'
1835
+ input_position_set @file.size
1836
+ return(endoffile_detected result)
1538
1837
  end
1539
1838
 
1540
1839
  #-----------------------------------
1541
1840
  def endoffile_detected(s='')
1542
1841
  @moretokens.push( *(abort_noparens!.push super(s)))
1543
1842
  result= @moretokens.shift
1544
- balanced_braces? or (lexerror result,"unbalanced braces at eof. bracestack=#{@bracestack.inspect}")
1843
+ balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
1545
1844
  result
1546
1845
  end
1547
1846
 
1548
1847
  #-----------------------------------
1549
1848
  def single_char_token(ch)
1550
- KeywordToken.new super(ch), @file.pos-1
1849
+ KeywordToken.new super(ch), input_position-1
1551
1850
  end
1552
1851
 
1553
1852
  #-----------------------------------
@@ -1557,13 +1856,13 @@ end
1557
1856
 
1558
1857
  #-----------------------------------
1559
1858
  def semicolon(ch)
1560
- assert @moretokens.empty?
1859
+ assert @moretokens.empty?
1561
1860
  @moretokens.push(*abort_noparens!)
1562
- @bracestack.last.see @bracestack,:semi
1563
- if ExpectDoOrNlContext===@bracestack.last #should be in context's see:semi handler
1564
- @bracestack.pop
1565
- assert @bracestack.last.starter[/^(while|until|for)$/]
1566
- end
1861
+ @parsestack.last.see self,:semi
1862
+ if ExpectDoOrNlContext===@parsestack.last #should be in context's see:semi handler
1863
+ @parsestack.pop
1864
+ assert @parsestack.last.starter[/^(while|until|for)$/]
1865
+ end
1567
1866
  @moretokens.push single_char_token(ch)
1568
1867
  return @moretokens.shift
1569
1868
  end
@@ -1582,7 +1881,11 @@ end
1582
1881
 
1583
1882
  #-----------------------------------
1584
1883
  #tokenify_results_of :identifier
1585
- save_offsets_in(*CHARMAPPINGS.values.uniq-[:symbol_or_op,:open_brace,:whitespace])
1884
+ save_offsets_in(*CHARMAPPINGS.values.uniq-[
1885
+ :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
1886
+
1887
+
1888
+ ])
1586
1889
  #save_offsets_in :symbol
1587
1890
 
1588
1891
  end