rubylexer 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/History.txt +55 -0
  2. data/Manifest.txt +67 -0
  3. data/README.txt +103 -0
  4. data/Rakefile +24 -0
  5. data/howtouse.txt +9 -6
  6. data/{assert.rb → lib/assert.rb} +11 -11
  7. data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
  8. data/lib/rubylexer/0.6.2.rb +39 -0
  9. data/lib/rubylexer/0.6.rb +5 -0
  10. data/lib/rubylexer/0.7.0.rb +2 -0
  11. data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
  12. data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
  13. data/{context.rb → lib/rubylexer/context.rb} +48 -18
  14. data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
  15. data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
  16. data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
  17. data/{token.rb → lib/rubylexer/token.rb} +72 -20
  18. data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
  19. data/lib/rubylexer/version.rb +3 -0
  20. data/{testcode → test/code}/deletewarns.rb +0 -0
  21. data/test/code/dl_all_gems.rb +43 -0
  22. data/{testcode → test/code}/dumptokens.rb +12 -9
  23. data/test/code/locatetest +30 -0
  24. data/test/code/locatetest.rb +49 -0
  25. data/test/code/rubylexervsruby.rb +173 -0
  26. data/{testcode → test/code}/tokentest.rb +62 -51
  27. data/{testcode → test/code}/torment +8 -8
  28. data/test/code/unpack_all_gems.rb +15 -0
  29. data/{testdata → test/data}/1.rb.broken +0 -0
  30. data/{testdata → test/data}/23.rb +0 -0
  31. data/test/data/__end__.rb +2 -0
  32. data/test/data/__end__2.rb +3 -0
  33. data/test/data/and.rb +5 -0
  34. data/test/data/blockassigntest.rb +23 -0
  35. data/test/data/chunky.plain.rb +75 -0
  36. data/test/data/chunky_bacon.rb +112 -0
  37. data/test/data/chunky_bacon2.rb +112 -0
  38. data/test/data/chunky_bacon3.rb +112 -0
  39. data/test/data/chunky_bacon4.rb +112 -0
  40. data/test/data/for.rb +45 -0
  41. data/test/data/format.rb +6 -0
  42. data/{testdata → test/data}/g.rb +0 -0
  43. data/test/data/gemlist.txt +280 -0
  44. data/test/data/heart.rb +7 -0
  45. data/test/data/if.rb +6 -0
  46. data/test/data/jarh.rb +369 -0
  47. data/test/data/lbrace.rb +4 -0
  48. data/test/data/lbrack.rb +4 -0
  49. data/{testdata → test/data}/newsyntax.rb +0 -0
  50. data/{testdata → test/data}/noeolatend.rb +0 -0
  51. data/test/data/p-op.rb +8 -0
  52. data/{testdata → test/data}/p.rb +671 -79
  53. data/{testdata → test/data}/pleac.rb.broken +0 -0
  54. data/{testdata → test/data}/pre.rb +0 -0
  55. data/{testdata → test/data}/pre.unix.rb +0 -0
  56. data/{testdata → test/data}/regtest.rb +0 -0
  57. data/test/data/rescue.rb +35 -0
  58. data/test/data/s.rb +186 -0
  59. data/test/data/strinc.rb +2 -0
  60. data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
  61. data/test/data/untermed_here.rb.broken +2 -0
  62. data/test/data/untermed_string.rb.broken +1 -0
  63. data/{testdata → test/data}/untitled1.rb +0 -0
  64. data/{testdata → test/data}/w.rb +0 -0
  65. data/{testdata → test/data}/wsdlDriver.rb +0 -0
  66. data/testing.txt +6 -4
  67. metadata +163 -59
  68. data/README +0 -134
  69. data/Rantfile +0 -37
  70. data/io.each_til_charset.rb +0 -247
  71. data/require.rb +0 -103
  72. data/rlold.rb +0 -12
  73. data/testcode/locatetest +0 -12
  74. data/testcode/rubylexervsruby.rb +0 -104
  75. data/testcode/rubylexervsruby.sh +0 -51
  76. data/testresults/placeholder +0 -0
data/History.txt ADDED
@@ -0,0 +1,55 @@
1
+ === 0.7.0/2-15-2008
2
+ * implicit tokens are now emitted at the right times (need more test code)
3
+ * local variables are now temporarily hidden by class, module, and def
4
+ * line numbers should always be correct now (=begin...=end caused this) (??)
5
+ * fixed meth/var-name confusion in idents after 'def' but before params
6
+ * FileAndLineToken now emitted on all new lines (0.8)
7
+ * offset of __END__ now works(??)
8
+ * put files into lib/
9
+ * improvements in parsing unary * and & (??)
10
+ * input can now come from a string
11
+ * local vars (defs and uses) are recognized in string inclusions
12
+ * explicitly decimal numbers, eg: 0d123456789
13
+ * eof in unusual cases is better handled
14
+ * __END__ is not treated as a keyword
15
+ * '|' as goalpost is now better handled
16
+ * a number of things have been renamed internally
17
+ * no more implicit parens for setter method calls
18
+ * '{' after return, break, and next is now the start of a hash.
19
+ * ambiguous cases of '::','(',':',and '?' are now better handled.
20
+ * more start/end hint tokens (for 'when', 'rescue', and 'for')
21
+ * bugfixes in rhs hint tokens
22
+ * parsing of def headers for singleton methods is improved
23
+ * rescue as operator is now handled
24
+ * block param list lookahead is simplified
25
+ * unary ops (including * and &) can be easily distinguished in output
26
+ * here document bodies better handled, esp after escaped newline
27
+ * symbols like %s{symbol} now actually work
28
+ * implicit parens around => better handled...
29
+ * different cases of '{' can now be easily distinguished
30
+ * ImplicitParamList Start and End are now Keyword, not Ignore tokens.
31
+
32
+ === 0.6.2
33
+ * testcode/dumptokens.rb charhandler.rb doesn't work... but does after unix2dos (not reproducible)
34
+ * files are opened in binmode to avoid all possible eol translation
35
+ * (x.+?x) now works
36
+ * methname/varname mixups fixed in some cases
37
+ * performance improvements, in most important cases
38
+ * error handling tokens should be emitted on error input... ErrorToken mixin module
39
+ * but old error handling interface should be preserved and made available
40
+ * moved readahead and friends into IOext
41
+ * made optimized readahead et al for fakefile
42
+ * dos newlines (and newlines generally) can be fancy string delimiters
43
+ * do,if,until, etc, have a way to tell if an end is associated
44
+ * broke readme into pieces
45
+
46
+ === 0.6.0
47
+ * partly fixed the implicit tokens at the wrong times. (or not at the
48
+ * right times) (partly fixed)
49
+ * : operator might be a synonym for 'then'
50
+ * variables other than the last are now recognized in multiple assignment
51
+ * variables created by for and rescue are now recognized
52
+ * token following :: should not be BareSymbolToken if begins with A-Z (unless obviously a func)
53
+ * read code to be lexed from a string. (irb wants this)
54
+ * fancy symbols weren't supported at all. (like this: %s{abcdefg})
55
+
data/Manifest.txt ADDED
@@ -0,0 +1,67 @@
1
+ COPYING
2
+ README.txt
3
+ Manifest.txt
4
+ Rakefile
5
+ howtouse.txt
6
+ History.txt
7
+ testing.txt
8
+ lib/rubylexer/rubycode.rb
9
+ lib/rubylexer/context.rb
10
+ lib/rubylexer/token.rb
11
+ lib/rubylexer/0.6.rb
12
+ lib/rubylexer/0.6.2.rb
13
+ lib/rubylexer/0.7.0.rb
14
+ lib/rubylexer/version.rb
15
+ lib/rubylexer/rulexer.rb
16
+ lib/rubylexer/tokenprinter.rb
17
+ lib/rubylexer/charset.rb
18
+ lib/rubylexer/symboltable.rb
19
+ lib/rubylexer/charhandler.rb
20
+ lib/assert.rb
21
+ lib/rubylexer.rb
22
+ test/data/gemlist.txt
23
+ test/data/blockassigntest.rb
24
+ test/data/for.rb
25
+ test/data/chunky_bacon.rb
26
+ test/data/and.rb
27
+ test/data/pre.unix.rb
28
+ test/data/untermed_string.rb.broken
29
+ test/data/__end__2.rb
30
+ test/data/w.rb
31
+ test/data/if.rb
32
+ test/data/pre.rb
33
+ test/data/jarh.rb
34
+ test/data/regtest.rb
35
+ test/data/chunky_bacon4.rb
36
+ test/data/__end__.rb
37
+ test/data/strinc.rb
38
+ test/data/lbrace.rb
39
+ test/data/p.rb
40
+ test/data/chunky.plain.rb
41
+ test/data/noeolatend.rb
42
+ test/data/g.rb
43
+ test/data/23.rb
44
+ test/data/lbrack.rb
45
+ test/data/untitled1.rb
46
+ test/data/rescue.rb
47
+ test/data/tokentest.assert.rb.can
48
+ test/data/pleac.rb.broken
49
+ test/data/heart.rb
50
+ test/data/s.rb
51
+ test/data/wsdlDriver.rb
52
+ test/data/p-op.rb
53
+ test/data/1.rb.broken
54
+ test/data/untermed_here.rb.broken
55
+ test/data/newsyntax.rb
56
+ test/data/chunky_bacon3.rb
57
+ test/data/chunky_bacon2.rb
58
+ test/data/format.rb
59
+ test/code/locatetest.rb
60
+ test/code/rubylexervsruby.rb
61
+ test/code/dl_all_gems.rb
62
+ test/code/unpack_all_gems.rb
63
+ test/code/tokentest.rb
64
+ test/code/dumptokens.rb
65
+ test/code/torment
66
+ test/code/locatetest
67
+ test/code/deletewarns.rb
data/README.txt ADDED
@@ -0,0 +1,103 @@
1
+ = RubyLexer
2
+
3
+ *
4
+ *
5
+ *
6
+
7
+ === DESCRIPTION:
8
+
9
+ RubyLexer is a lexer library for Ruby, written in Ruby. Rubylexer is meant
10
+ as a lexer for Ruby that's complete and correct; all legal Ruby
11
+ code should be lexed correctly by RubyLexer as well. Just enough parsing
12
+ capability is included to give RubyLexer enough context to tokenize correctly
13
+ in all cases. (This turned out to be more parsing than I had thought or
14
+ wanted to take on at first.) RubyLexer handles the hard things like
15
+ complicated strings, the ambiguous nature of some punctuation characters and
16
+ keywords in ruby, and distinguishing methods and local variables.
17
+
18
+ RubyLexer is not particularly clean code. As I progressed in writing this,
19
+ I've learned a little about how these things are supposed to be done; the
20
+ lexer is not supposed to have any state of it's own, instead it gets whatever
21
+ it needs to know from the parser. As a stand-alone lexer, Rubylexer maintains
22
+ quite a lot of state. Every instance variable in the RubyLexer class is some
23
+ sort of lexer state. Most of the complication and ugly code in RubyLexer is
24
+ in maintaining or using this state.
25
+
26
+ For information about using RubyLexer in your program, please see howtouse.txt.
27
+
28
+ For my notes on the testing of RubyLexer, see testing.txt.
29
+
30
+ If you have any questions, comments, problems, new feature requests, or just
31
+ want to figure out how to make it work for what you need to do, contact me:
32
+ rubylexer _at_ inforadical _dot_ net
33
+
34
+ RubyLexer is a RubyForge project. RubyForge is another good place to send your
35
+ bug reports or whatever: http://rubyforge.org/projects/rubylexer/
36
+
37
+ (There aren't any bug filed against RubyLexer there yet, but don't be afraid
38
+ that your report will get lonely.)
39
+
40
+ ==SYNOPSIS:
41
+ require "rubylexer.rb"
42
+ #then later
43
+ lexer=RubyLexer.new(a_file_name, opened_File_or_String)
44
+ until EoiToken===(token=lexer.get1token)
45
+ #...do stuff w/ token...
46
+ end
47
+
48
+ == Status
49
+ RubyLexer can correctly lex all legal Ruby 1.8 code that I've been able to
50
+ find on my Debian system. It can also handle (most of) my catalog of nasty
51
+ test cases (in testdata/p.rb) (see below for known problems). At this point,
52
+ new bugs are almost exclusively found by my home-grown test code, rather
53
+ than ruby code gathered 'from the wild'. There are a number of issues I know
54
+ about and plan to fix, but it seems that Ruby coders don't write code complex
55
+ enough to trigger them very often. Although incomplete, RubyLexer can
56
+ correctly distinguish these ambiguous uses of the following operator and
57
+ keywords, depending on context:
58
+ % can be modulus operator or start of fancy string
59
+ / can be division operator or start of regex
60
+ * & + - :: can be unary or binary operator
61
+ [] can be for array literal or [] method (or []=)
62
+ << can be here document or left shift operator (or in class<<obj expr)
63
+ : can be start of symbol, substitute for then, or part of ternary op
64
+ (there are other uses too, but they're not supported yet.)
65
+ ? can be start of character constant or ternary operator
66
+ ` can be method name or start of exec string
67
+ any overrideable operator and most keywords can also be method names
68
+
69
+ == todo
70
+ test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
71
+ these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
72
+ test more ways: cvt source to dos or mac fmt before testing
73
+ test more ways: run unit tests after passing thru rubylexer (0.7)
74
+ test more ways: test require'd, load'd, or eval'd code as well (0.7)
75
+ lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
76
+ incremental lexing (ides want this (for performance))
77
+ put everything in a namespace
78
+ integrate w/ other tools...
79
+ html colorized output?
80
+ move more state onto @parsestack (ongoing)
81
+ the new cases in p.rb now compile, but won't run
82
+ expand on test documentation
83
+ use want_op_name more
84
+ return result as a half-parsed tree (with parentheses and the like matched)
85
+ emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
86
+ strings are still slow
87
+ emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
88
+ token pruning in dumptokens...
89
+
90
+ == known issues: (and planned fix release)
91
+ context not really preserved when entering or leaving string inclusions. this causes
92
+ a number or problems. local variables are ok now, but here document headers started
93
+ in a string inclusion with the body outside will be a problem. (0.8)
94
+ string tokenization sometimes a little different from ruby around newlines
95
+ (htree/template.rb) (0.8)
96
+ string contents might not be correctly translated in a few cases (0.8?)
97
+ symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
98
+ '\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
99
+ windows or mac newline in source are likely to cause problems in obscure cases (need test case)
100
+ unterminated =begin is not an error (0.8)
101
+ ruby 1.9 completely unsupported (0.9)
102
+ character sets other than ascii are not supported at all (1.0)
103
+
data/Rakefile ADDED
@@ -0,0 +1,24 @@
1
+ # Copyright (C) 2008 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'lib/rubylexer/version.rb'
6
+
7
+
8
+ readme=open("README.txt")
9
+ readme.readline("\n=== DESCRIPTION:")
10
+ readme.readline("\n\n")
11
+ desc=readme.readline("\n\n")
12
+
13
+ hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
14
+ _.author = "Caleb Clausen"
15
+ _.email = "rubylexer-owner @at@ inforadical .dot. net"
16
+ _.url = "http://rubylexer.rubyforge.org/"
17
+ _.extra_deps = ["sequence"]
18
+ _.test_globs=["test/{code/*,data/*rb*,results/}"]
19
+ _.description=desc
20
+ _.summary=desc[/\A[^.]+\./]
21
+ _.spec_extras={:bindir=>''}
22
+ end
23
+
24
+
data/howtouse.txt CHANGED
@@ -1,13 +1,13 @@
1
1
 
2
2
  Using rubylexer:
3
3
  require "rubylexer.rb"
4
- ,then
4
+ #then later
5
5
  lexer=RubyLexer.new(a_file_name, opened_File_or_String)
6
- until EoiToken===(tok=lexer.get1token)
7
- ...do stuff w/ toks...
6
+ until EoiToken===(token=lexer.get1token)
7
+ #...do stuff w/ token...
8
8
  end
9
9
 
10
- For a slightly expanded version of this example, see testcode/dumptokens.rb.
10
+ For a slightly expanded version of this example, see test/code/dumptokens.rb.
11
11
 
12
12
  tok will be a subclass of Token. there are many token classes (see token.rb)
13
13
  however, all tokens have some common methods:
@@ -23,7 +23,8 @@ WToken #(mostly useless?) abstract superclass for KeywordToken,
23
23
  #OperatorToken, VarNameToken, and HerePlaceholderToken
24
24
  #but not (confusingly) MethNameToken (perhaps that'll change)
25
25
  KeywordToken #a ruby keyword or non-overridable punctuation char(s)
26
- OperatorToken #overrideable operators
26
+ OperatorToken #overrideable operators.
27
+ #use #unary? and #binary? to find out how many arguments it takes.
27
28
  VarNameToken #a name that represents a variable
28
29
  HerePlaceholderToken #represents the header of a here string. subclass of WToken
29
30
  MethNameToken #the name of a method: the uncoloned
@@ -120,7 +121,8 @@ time to adapt to changes. That promise goes for all the changes described below.
120
121
 
121
122
  In cases where the 2 are incompatible, (inspired by rubygems) I've come up with this:
122
123
 
123
- RubyLexer.version(0.6).new(...args...) #request the 0.6 api
124
+ require 'rubylexer/0.6'
125
+ rl=RubyLexer.new(...args...) #request the 0.6 api
124
126
 
125
127
  This actually works currently; it enables the old api where errors cause an exception instead
126
128
  of generating ErrorTokens. The default will always be to use the new api.
@@ -133,4 +135,5 @@ be a big deal; old clients can just include the namespace module.
133
135
  Token#ident may be taken away or change without notice.
134
136
  MethNameToken may become a WToken
135
137
  HereBodyToken should really be a string subclass...
138
+ Newline,EscNl,BareSymbolToken may get renamed
136
139
 
@@ -1,5 +1,4 @@
1
1
  =begin copyright
2
- rubylexer - a ruby lexer written in ruby
3
2
  Copyright (C) 2004,2005 Caleb Clausen
4
3
 
5
4
  This library is free software; you can redistribute it and/or
@@ -16,16 +15,17 @@
16
15
  License along with this library; if not, write to the Free Software
17
16
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
17
  =end
19
- require 'set'
20
18
 
19
+ module Kernel
20
+ def assert(expr,msg="assertion failed")
21
+ defined? $Debug and $Debug and (expr or raise msg)
22
+ end
21
23
 
22
- def assert(expr,msg="assertion failed")
23
- $DEBUG and (expr or raise msg)
24
- end
25
-
26
- @@printed=Set.new
27
- def fixme(s)
28
- @@printed.include?( s) and return
29
- $DEBUG and STDERR.print "FIXME: #{s}\n"
30
- @@printed.add s
24
+ @@printed={}
25
+ def fixme(s)
26
+ unless @@printed[s]
27
+ @@printed[s]=1
28
+ defined? $Debug and $Debug and $stderr.print "FIXME: #{s}\n"
29
+ end
30
+ end
31
31
  end
@@ -19,15 +19,18 @@
19
19
 
20
20
 
21
21
 
22
- require "rulexer"
23
- require "symboltable"
24
- require "io.each_til_charset"
25
- require "context.rb"
26
-
22
+ require 'rubylexer/rulexer' #must be 1st!!!
23
+ require 'rubylexer/version'
24
+ require 'rubylexer/token'
25
+ require 'rubylexer/charhandler'
26
+ require 'rubylexer/symboltable'
27
+ #require "io.each_til_charset"
28
+ require 'rubylexer/context'
29
+ require 'rubylexer/tokenprinter'
27
30
 
28
31
 
29
32
  #-----------------------------------
30
- class RubyLexer < RuLexer
33
+ class RubyLexer
31
34
  include NestedContexts
32
35
 
33
36
  RUBYSYMOPERATORREX=
@@ -39,7 +42,7 @@ class RubyLexer < RuLexer
39
42
  #or .. ... ?:
40
43
  #for that use:
41
44
  RUBYNONSYMOPERATORREX=
42
- %r{^([%^~/\-+]=|(\|\|?|&&?)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
45
+ %r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
43
46
  RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
44
47
  UNSYMOPS=/^[~!]$/ #always unary
45
48
  UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
@@ -50,16 +53,18 @@ class RubyLexer < RuLexer
50
53
  VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
51
54
  INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
52
55
  BINOPWORDS="(and|or)"
53
- NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)[^a-zA-Z0-9_!?=]?/o
54
- NEVERSTARTPARAMLISTFIRST=CharSet[%[aoeitrwu]] #char set that begins NEVERSTARTPARAMLIST
55
- NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
56
+ NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
57
+ NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
58
+ NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
56
59
 
57
60
  RUBYKEYWORDS=%r{
58
- ^(alias|#{BINOPWORDS}|not|undef|__END__|end|
61
+ ^(alias|#{BINOPWORDS}|not|undef|end|
59
62
  #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
60
63
  #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
61
64
  )$
62
65
  }xo
66
+ #__END__ should not be in this set... its handled in start_of_line_directives
67
+
63
68
  CHARMAPPINGS = {
64
69
  ?$ => :dollar_identifier,
65
70
  ?@ => :at_identifier,
@@ -67,8 +72,7 @@ class RubyLexer < RuLexer
67
72
  ?A..?Z => :identifier,
68
73
  ?_ => :identifier,
69
74
  ?0..?9 => :number,
70
- ?" => :double_quote,
71
- ?' => :single_quote,
75
+ %{"'} => :double_quote,
72
76
  ?` => :back_quote,
73
77
 
74
78
  WHSP => :whitespace, #includes \r
@@ -83,7 +87,8 @@ class RubyLexer < RuLexer
83
87
 
84
88
  #these ones could signal either an op or a term
85
89
  ?/ => :regex_or_div,
86
- "|>" => :quadriop,
90
+ "|" => :conjunction_or_goalpost,
91
+ ">" => :quadriop,
87
92
  "*&" => :star_or_amp, #could be unary
88
93
  "+-" => :plusminus, #could be unary
89
94
  ?< => :lessthan,
@@ -103,22 +108,27 @@ class RubyLexer < RuLexer
103
108
  ?# => :comment
104
109
  }
105
110
 
106
- attr :incomplete_here_tokens
111
+ attr_reader :incomplete_here_tokens, :parsestack
107
112
 
108
113
 
109
114
  #-----------------------------------
110
115
  def initialize(filename,file,linenum=1)
111
116
  super(filename,file, linenum)
112
117
  @start_linenum=linenum
113
- @bracestack=[TopLevelContext.new]
118
+ @parsestack=[TopLevelContext.new]
114
119
  @incomplete_here_tokens=[]
115
- @localvars=SymbolTable.new
120
+ @localvars_stack=[SymbolTable.new]
116
121
  @defining_lvar=nil
122
+ @in_def_name=false
117
123
 
118
124
  @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
119
125
 
120
126
  start_of_line_directives
121
127
  end
128
+
129
+ def localvars;
130
+ @localvars_stack.last
131
+ end
122
132
 
123
133
  #-----------------------------------
124
134
  def get1token
@@ -129,25 +139,23 @@ class RubyLexer < RuLexer
129
139
 
130
140
  #check for bizarre token types
131
141
  case result
132
- when IgnoreToken#,nil
133
- return result
142
+ when StillIgnoreToken#,nil
143
+ result
134
144
  when Token#,String
145
+ @last_operative_token=result
146
+ assert !(IgnoreToken===@last_operative_token)
147
+ result
135
148
  else
136
- raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
149
+ raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
137
150
  end
138
-
139
- @last_operative_token=result
140
-
141
- return result
142
151
  end
143
-
144
152
 
145
153
 
146
154
  #-----------------------------------
147
155
  def balanced_braces?
148
156
 
149
- #@bracestack.empty?
150
- @bracestack.size==1 and TopLevelContext===@bracestack.first
157
+ #@parsestack.empty?
158
+ @parsestack.size==1 and TopLevelContext===@parsestack.first
151
159
  end
152
160
 
153
161
  #-----------------------------------
@@ -182,7 +190,7 @@ private
182
190
 
183
191
  #-----------------------------------
184
192
  def expect_do_or_end_or_nl!(st)
185
- @bracestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
193
+ @parsestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
186
194
  end
187
195
 
188
196
  #-----------------------------------
@@ -199,31 +207,38 @@ private
199
207
  end
200
208
 
201
209
  #-----------------------------------
202
- WSCHARSET=CharSet["#\\\n\s\t\v\r\f"]
203
- def ignored_tokens(allow_eof=false)
210
+ WSCHARSET=/[#\\\n\s\t\v\r\f]/
211
+ def ignored_tokens(allow_eof=false,allow_eol=true)
204
212
  result=[]
205
- result<<@moretokens.shift while IgnoreToken===@moretokens.first
213
+ result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
206
214
  @moretokens.empty? or return result
207
- if true
208
215
  loop do
209
216
  unless @moretokens.empty?
210
- IgnoreToken===@moretokens.first or NewlineToken===@moretokens.first or
211
- break
217
+ case @moretokens.first
218
+ when StillIgnoreToken
219
+ when NewlineToken: allow_eol or break
220
+ else break
221
+ end
212
222
  else
213
- WSCHARSET===nextchar or break
223
+
224
+ break unless ch=nextchar
225
+ ch=ch.chr
226
+ break unless WSCHARSET===ch
227
+ break if ch[/[\r\n]/] and !allow_eol
214
228
  end
229
+
215
230
 
216
231
  tok=get1token
217
- result<<tok
232
+ result << tok
218
233
  case tok
219
- when NewlineToken : block_given? and yield tok
220
- when EoiToken : allow_eof or lexerror tok,"end of file not expected here(2)"
221
- when IgnoreToken
222
- else raise "impossible"
234
+ when NewlineToken; assert allow_eol; block_given? and yield tok
235
+ when EoiToken; allow_eof or lexerror tok,"end of file not expected here(2)"
236
+ when StillIgnoreToken
237
+ else raise "impossible token: #{tok.inspect}"
223
238
  end
224
239
  end
225
240
 
226
- else
241
+ =begin
227
242
  @whsphandler||=CharHandler.new(self, :==,
228
243
  "#" => :comment,
229
244
  "\n" => :newline,
@@ -235,18 +250,18 @@ else
235
250
  block_given? and NewlineToken===tok and yield tok
236
251
  result << tok
237
252
  end
238
- end
253
+ =end
239
254
  return result
240
255
  end
241
256
 
242
257
  #-----------------------------------
243
258
  def safe_recurse
244
259
  old_moretokens=@moretokens
245
- #old_bracestack=@bracestack.dup
260
+ #old_parsestack=@parsestack.dup
246
261
  @moretokens=[]
247
262
  result= yield @moretokens
248
263
  #assert @incomplete_here_tokens.empty?
249
- #assert @bracestack==old_bracestack
264
+ #assert @parsestack==old_parsestack
250
265
  @moretokens= old_moretokens.concat @moretokens
251
266
  return result
252
267
  #need to do something with @last_operative_token?
@@ -258,7 +273,7 @@ end
258
273
  result = ((
259
274
  #order matters here, but it shouldn't
260
275
  #(but til_charset must be last)
261
- eat_next_if(/^[!@&+`'=~\/\\,.;<>*"$?:]$/) or
276
+ eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
262
277
  (eat_next_if('-') and ("-"+getchar)) or
263
278
  (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
264
279
  ))
@@ -266,7 +281,7 @@ end
266
281
 
267
282
  #-----------------------------------
268
283
  def identifier(context=nil)
269
- oldpos=@file.pos
284
+ oldpos= input_position
270
285
  str=identifier_as_string(context)
271
286
 
272
287
  #skip keyword processing if 'escaped' as it were, by def, . or ::
@@ -279,8 +294,8 @@ end
279
294
  @moretokens.unshift(*parse_keywords(str,oldpos) do
280
295
  #if not a keyword,
281
296
  case str
282
- when FUNCLIKE_KEYWORDS: #do nothing
283
- when VARLIKE_KEYWORDS,RUBYKEYWORDS: raise "shouldnt see keywords here, now"
297
+ when FUNCLIKE_KEYWORDS; #do nothing
298
+ when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
284
299
  end
285
300
  safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
286
301
  end)
@@ -290,7 +305,7 @@ end
290
305
  #-----------------------------------
291
306
  def identifier_as_string(context)
292
307
  #must begin w/ letter or underscore
293
- str=eat_next_if(/^[_a-z]$/i) or return nil
308
+ str=eat_next_if(/[_a-z]/i) or return nil
294
309
 
295
310
  #equals, question mark, and exclamation mark
296
311
  #might be allowed at the end in some contexts.
@@ -305,18 +320,20 @@ end
305
320
  when ?: then [?=, ??, ?!]
306
321
  else [nil,??, ?!]
307
322
  end
323
+
324
+ @in_def_name and maybe_eq= ?=
308
325
 
309
326
  str<<til_charset(/[^a-z0-9_]/i)
310
327
 
311
328
  #look for ?, !, or =, if allowed
312
- case b=@file.getc
329
+ case b=getc
313
330
  when nil #means we're at eof
314
331
  #handling nil here prevents b from ever matching
315
332
  #a nil value of maybe_qm, maybe_ex or maybe_eq
316
333
  when maybe_qm
317
334
  str << b
318
335
  when maybe_ex
319
- nc=(nextchar unless @file.eof?)
336
+ nc=(nextchar unless eof?)
320
337
  #does ex appear to be part of a larger operator?
321
338
  if nc==?= #or nc==?~
322
339
  back1char
@@ -324,7 +341,7 @@ end
324
341
  str << b
325
342
  end
326
343
  when maybe_eq
327
- nc=(nextchar unless @file.eof?)
344
+ nc=(nextchar unless eof?)
328
345
  #does eq appear to be part of a larger operator?
329
346
  if nc==?= or nc==?~ or nc==?>
330
347
  back1char
@@ -342,34 +359,37 @@ end
342
359
  #-----------------------------------
343
360
  #contexts in which comma may appear in ruby:
344
361
  #multiple lhs (terminated by assign op)
345
- #multiple rhs (in implicit context) (tbd)
362
+ #multiple rhs (in implicit context)
346
363
  #method actual param list (in ( or implicit context)
347
364
  #method formal param list (in ( or implicit context)
348
- #block formal param list (in | context) (tbd)
365
+ #block formal param list (in | context)
366
+ #nested multiple rhs
367
+ #nested multiple lhs
368
+ #nested block formal list
369
+ #element reference/assignment (in [] or []= method actual parameter context)
349
370
  #hash immediate (in imm{ context)
350
371
  #array immediate (in imm[ context)
351
- #element reference/assignment (in [] or []= method actual parameter context)
352
- #list after for
372
+ #list between 'for' and 'in'
353
373
  #list after rescue
354
374
  #list after when
355
375
  #list after undef
356
376
 
357
- #note: comma in parens not around a param list is illegal
377
+ #note: comma in parens not around a param list or lhs or rhs is illegal
358
378
 
359
379
  #-----------------------------------
360
380
  #a comma has been seen. are we in an
361
381
  #lvalue list or some other construct that uses commas?
362
382
  def comma_in_lvalue_list?
363
- not ListContext===@bracestack.last
383
+ @parsestack.last.lhs= (not ListContext===@parsestack.last)
364
384
  end
365
385
 
366
386
  #-----------------------------------
367
387
  def in_lvar_define_state
368
388
  #@defining_lvar is a hack
369
- @defining_lvar or case ctx=@bracestack.last
370
- when ForSMContext: ctx.state==:for
371
- when RescueSMContext: ctx.state==:arrow
372
- when BlockParamListContext: true
389
+ @defining_lvar or case ctx=@parsestack.last
390
+ when ForSMContext; ctx.state==:for
391
+ when RescueSMContext; ctx.state==:arrow
392
+ #when BlockParamListLhsContext; true
373
393
  end
374
394
  end
375
395
 
@@ -391,66 +411,102 @@ end
391
411
  #look for and ignore local variable names
392
412
 
393
413
  assert String===name
394
-
395
- #fixme: keywords shouldn't be treated specially after :: and .
396
414
 
397
415
  #maybe_local really means 'maybe local or constant'
398
416
  maybe_local=case name
399
- when /[^a-z_0-9]$/i: #do nothing
400
- when /^[a-z_]/: (@localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
401
- when /^[A-Z]/: is_const=true;not lasttok==='.' #this is the right algorithm for constants...
417
+ when /[^a-z_0-9]$/i; #do nothing
418
+ when /^[a-z_]/; (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
419
+ when /^[A-Z]/; is_const=true;not lasttok==='.' #this is the right algorithm for constants...
402
420
  end
403
421
 
404
422
  assert(@moretokens.empty?)
423
+
424
+ oldlast=@last_operative_token
405
425
 
406
426
  tok=@last_operative_token=VarNameToken.new(name,pos)
407
427
 
408
- oldpos=@file.pos
428
+ oldpos= input_position
409
429
  sawnl=false
410
430
  result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
411
- sawnl || @file.eof? and return result.unshift(
412
- *if maybe_local : [tok]
413
- else [MethNameToken.new(name,pos), #insert implicit parens right after tok
414
- ImplicitParamListStartToken.new( oldpos),
415
- ImplicitParamListEndToken.new( oldpos) ]
431
+ if sawnl || eof?
432
+ if maybe_local then
433
+ if in_lvar_define_state
434
+ if /^[a-z_][a-zA-Z_0-9]*$/===name
435
+ assert !(lasttok===/^(\.|::)$/)
436
+ localvars[name]=true
437
+ else
438
+ lexerror tok,"not a valid variable name: #{name}"
439
+ end
440
+ return result.unshift(tok)
441
+ end
442
+ return result.unshift(tok) #if is_const
443
+ else
444
+ return result.unshift(
445
+ MethNameToken.new(name,pos), #insert implicit parens right after tok
446
+ ImplicitParamListStartToken.new( oldpos),
447
+ ImplicitParamListEndToken.new( oldpos)
448
+ )
416
449
  end
417
- )
450
+ end
418
451
 
419
452
  #if next op is assignment (or comma in lvalue list)
420
453
  #then omit implicit parens
421
454
  assignment_coming=case nc=nextchar
422
- when ?=: not /^=[=~]$/===readahead(2)
423
- when ?,: comma_in_lvalue_list?
424
- when ?>,?<: /^([<>])\1=$/===readahead(3)
425
- when ?*,?|,?&: /^([*|&])\1?=/===readahead(3)
426
- when ?%,?/,?-,?+,?^: readahead(2)[1..1]=='='
455
+ when ?=; not /^=[>=~]$/===readahead(2)
456
+ when ?,; comma_in_lvalue_list?
457
+ when ?); last_context_not_implicit.lhs
458
+ when ?>,?<; /^(.)\1=$/===readahead(3)
459
+ when ?*,?&; /^(.)\1?=/===readahead(3)
460
+ when ?|; /^\|\|?=/===readahead(3) or
461
+ #is it a goalpost?
462
+ BlockParamListLhsContext===last_context_not_implicit &&
463
+ readahead(2)[1] != ?|
464
+ when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
427
465
  end
428
- if (assignment_coming or in_lvar_define_state)
466
+ if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
429
467
  tok=VarNameToken.new(name,pos)
430
468
  if /[^a-z_0-9]$/i===name
431
469
  lexerror tok,"not a valid variable name: #{name}"
432
470
  elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
433
- @localvars[name]=true
471
+ localvars[name]=true
434
472
  end
435
473
  return result.unshift(tok)
436
474
  end
437
-
438
- implicit_parens_to_emit=case nc
439
- when ?!: readahead(2)=='!=' ? 2 : 1
475
+
476
+ implicit_parens_to_emit=
477
+ if assignment_coming
478
+ @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
479
+ 0
480
+ else
481
+ case nc
482
+ when nil: 2
483
+ when ?!; readahead(2)=='!=' ? 2 : 1
440
484
  when NEVERSTARTPARAMLISTFIRST
441
485
  (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
442
- when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~: 1
443
- when ?{: maybe_local=false; 2
444
- when ?(: maybe_local=false; 0
445
- when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=: 2
446
- when ?+, ?-, ?*, ?&, ?%, ?/, ?:, ??: (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
447
- when ?<: (ws_toks.empty? || readahead(3)[/^<<[^"'`a-zA-Z_0-9-]/]) ? 2 : 3
448
- when ?[: ws_toks.empty? ? 2 : 3
449
- when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#: raise 'failure'
450
- else raise "unknown char after ident: #{nextchar.chr}"
486
+ when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
487
+ when ?{
488
+ maybe_local=false
489
+ x=2
490
+ x-=1 if /\A(return|break|next)\Z/===name and
491
+ !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
492
+ x
493
+ when ?(;
494
+ maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
495
+ when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
496
+ when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
497
+ when ?:,??; next2=readahead(2);
498
+ WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
499
+ # when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
500
+ when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
501
+ when ?[; ws_toks.empty? ? 2 : 3
502
+ when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
503
+ else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
504
+ end
505
+ end
506
+
507
+ if is_const and implicit_parens_to_emit==3 then
508
+ implicit_parens_to_emit=1
451
509
  end
452
-
453
- implicit_parens_to_emit==3 and is_const and implicit_parens_to_emit=1
454
510
 
455
511
  tok=if maybe_local and implicit_parens_to_emit>=2
456
512
  implicit_parens_to_emit=0
@@ -459,15 +515,18 @@ end
459
515
  MethNameToken
460
516
  end.new(name,pos)
461
517
 
462
-
463
518
  case implicit_parens_to_emit
464
- when 2:
519
+ when 2;
465
520
  result.unshift ImplicitParamListStartToken.new(oldpos),
466
521
  ImplicitParamListEndToken.new(oldpos)
467
- when 1,3:
468
- result.unshift ImplicitParamListStartToken.new(oldpos)
469
- @bracestack.push ParamListContextNoParen.new(@linenum)
470
- when 0: #do nothing
522
+ when 1,3;
523
+ arr,pass=*param_list_coming_with_2_or_more_params?
524
+ result.push( *arr )
525
+ unless pass
526
+ result.unshift ImplicitParamListStartToken.new(oldpos)
527
+ @parsestack.push ParamListContextNoParen.new(@linenum)
528
+ end
529
+ when 0; #do nothing
471
530
  else raise 'invalid value of implicit_parens_to_emit'
472
531
  end
473
532
  return result.unshift(tok)
@@ -476,22 +535,43 @@ end
476
535
  # '\n (unescaped) and or'
477
536
  # 'then else elsif rescue ensure (illegal in value context)'
478
537
 
479
- # 'need to pop noparen from bracestack on these tokens: (in operator context)'
538
+ # 'need to pop noparen from parsestack on these tokens: (in operator context)'
480
539
  # 'not ok:'
481
540
  # 'not (but should it be?)'
482
541
  end
483
542
 
543
+ #-----------------------------------
544
+ def param_list_coming_with_2_or_more_params?
545
+ WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false]
546
+ basesize=@parsestack.size
547
+ result=[get1token]
548
+ pass=loop{
549
+ tok=get1token
550
+ result<<tok
551
+ if @parsestack.size==basesize
552
+ break false
553
+ elsif ','==tok.to_s and @parsestack.size==basesize+1
554
+ break true
555
+ elsif EoiToken===tok
556
+ lexerror tok, "unexpected eof in parameter list"
557
+ end
558
+ }
559
+ return [result,pass]
560
+ end
561
+
484
562
  #-----------------------------------
485
563
  CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
486
564
  ParamListContextNoParen=>ImplicitParamListEndToken,
487
- KwParamListContext=>KwParamListEndToken
565
+ WhenParamListContext=>KwParamListEndToken,
566
+ RescueSMContext=>KwParamListEndToken
488
567
  }
489
568
  def abort_noparens!(str='')
490
569
  #assert @moretokens.empty?
491
570
  result=[]
492
- while klass=CONTEXT2ENDTOK[@bracestack.last.class]
493
- result << klass.new(@file.pos-str.length)
494
- @bracestack.pop
571
+ while klass=CONTEXT2ENDTOK[@parsestack.last.class]
572
+ result << klass.new(input_position-str.length)
573
+ break if RescueSMContext===@parsestack.last
574
+ @parsestack.pop
495
575
  end
496
576
  return result
497
577
  end
@@ -501,13 +581,13 @@ if false #no longer used
501
581
  def abort_1_noparen!(offs=0)
502
582
  assert @moretokens.empty?
503
583
  result=[]
504
- while AssignmentRhsContext===@bracestack.last
505
- @bracestack.pop
506
- result << AssignmentRhsListEndToken.new(@file.pos-offs)
584
+ while AssignmentRhsContext===@parsestack.last
585
+ @parsestack.pop
586
+ result << AssignmentRhsListEndToken.new(input_position-offs)
507
587
  end
508
- ParamListContextNoParen===@bracestack.last or lexerror huh,'{} with no matching callsite'
509
- @bracestack.pop
510
- result << ImplicitParamListEndToken.new(@file.pos-offs)
588
+ ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
589
+ @parsestack.pop
590
+ result << ImplicitParamListEndToken.new(input_position-offs)
511
591
  return result
512
592
  end
513
593
  end
@@ -523,30 +603,31 @@ end
523
603
  case str
524
604
  when "end"
525
605
  result.unshift(*abort_noparens!(str))
526
- @bracestack.last.see @bracestack,:semi #sorta hacky... should make an :end event instead?
606
+ @parsestack.last.see self,:semi #sorta hacky... should make an :end event instead?
527
607
 
528
608
  =begin not needed?
529
- if ExpectDoOrNlContext===@bracestack.last
530
- @bracestack.pop
531
- assert @bracestack.last.starter[/^(while|until|for)$/]
609
+ if ExpectDoOrNlContext===@parsestack.last
610
+ @parsestack.pop
611
+ assert @parsestack.last.starter[/^(while|until|for)$/]
532
612
  end
533
613
  =end
534
614
 
535
- WantsEndContext===@bracestack.last or lexerror result.last, 'unbalanced end'
536
- ctx=@bracestack.pop
615
+ WantsEndContext===@parsestack.last or lexerror result.last, 'unbalanced end'
616
+ ctx=@parsestack.pop
537
617
  start,line=ctx.starter,ctx.linenum
538
618
  BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
539
- /^(class|module|def|do)$/===start and @localvars.end_block
619
+ /^(do)$/===start and localvars.end_block
620
+ /^(class|module|def)$/===start and @localvars_stack.pop
540
621
 
541
622
  when "class","module"
542
623
  result.first.has_end!
543
- @bracestack.push WantsEndContext.new(str,@linenum)
544
- @localvars.start_block
545
-
624
+ @parsestack.push WantsEndContext.new(str,@linenum)
625
+ @localvars_stack.push SymbolTable.new
626
+
546
627
  when "if","unless" #could be infix form without end
547
628
  if after_nonid_op?{false} #prefix form
548
629
  result.first.has_end!
549
- @bracestack.push WantsEndContext.new(str,@linenum)
630
+ @parsestack.push WantsEndContext.new(str,@linenum)
550
631
 
551
632
 
552
633
  else #infix form
@@ -554,11 +635,11 @@ end
554
635
  end
555
636
  when "begin","case"
556
637
  result.first.has_end!
557
- @bracestack.push WantsEndContext.new(str,@linenum)
638
+ @parsestack.push WantsEndContext.new(str,@linenum)
558
639
  when "while","until" #could be infix form without end
559
640
  if after_nonid_op?{false} #prefix form
560
641
  result.first.has_end!
561
- @bracestack.push WantsEndContext.new(str,@linenum)
642
+ @parsestack.push WantsEndContext.new(str,@linenum)
562
643
  expect_do_or_end_or_nl! str
563
644
 
564
645
  else #infix form
@@ -566,24 +647,26 @@ end
566
647
  end
567
648
  when "for"
568
649
  result.first.has_end!
569
- @bracestack.push WantsEndContext.new(str,@linenum)
650
+ result.push KwParamListStartToken.new(offset+str.length)
651
+ # corresponding EndToken emitted leaving ForContext ("in" branch, below)
652
+ @parsestack.push WantsEndContext.new(str,@linenum)
570
653
  #expect_do_or_end_or_nl! str #handled by ForSMContext now
571
- @bracestack.push ForSMContext.new(@linenum)
654
+ @parsestack.push ForSMContext.new(@linenum)
572
655
  when "do"
573
656
  result.unshift(*abort_noparens!(str))
574
- if ExpectDoOrNlContext===@bracestack.last
575
- @bracestack.pop
576
- assert WantsEndContext===@bracestack.last
657
+ if ExpectDoOrNlContext===@parsestack.last
658
+ @parsestack.pop
659
+ assert WantsEndContext===@parsestack.last
577
660
  else
578
661
  result.last.has_end!
579
- @bracestack.push WantsEndContext.new(str,@linenum)
580
- @localvars.start_block
662
+ @parsestack.push WantsEndContext.new(str,@linenum)
663
+ localvars.start_block
581
664
  block_param_list_lookahead
582
665
  end
583
666
  when "def"
584
667
  result.first.has_end!
585
- @bracestack.push WantsEndContext.new("def",@linenum)
586
- @localvars.start_block
668
+ @parsestack.push WantsEndContext.new("def",@linenum)
669
+ @localvars_stack.push SymbolTable.new
587
670
  safe_recurse { |aa|
588
671
  @last_operative_token=KeywordToken.new "def" #hack
589
672
  result.concat ignored_tokens
@@ -591,7 +674,7 @@ end
591
674
  #read an expr like a.b.c or a::b::c
592
675
  #or (expr).b.c
593
676
  if nextchar==?( #look for optional parenthesised head
594
- old_size=@bracestack.size
677
+ old_size=@parsestack.size
595
678
  parencount=0
596
679
  begin
597
680
  tok=get1token
@@ -601,22 +684,58 @@ end
601
684
  end
602
685
  EoiToken===tok and lexerror tok, "eof in def header"
603
686
  result<<tok
604
- end until parencount==0 #@bracestack.size==old_size
687
+ end until parencount==0 #@parsestack.size==old_size
605
688
  else #no parentheses, all tail
606
689
  @last_operative_token=KeywordToken.new "." #hack hack
607
- result << symbol(false,false)
608
- #this isn't quite right.... if a.b.c.d is seen, a, b, and c
690
+ tokindex=result.size
691
+ result << tok=symbol(false,false)
692
+ name=tok.to_s
693
+ assert !in_lvar_define_state
694
+
695
+ #maybe_local really means 'maybe local or constant'
696
+ maybe_local=case name
697
+ when /[^a-z_0-9]$/i; #do nothing
698
+ when /^[@$]/; true
699
+ when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
700
+ when /^[a-z_]/; localvars===name
701
+ when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
702
+ end
703
+ if !ty and maybe_local
704
+ result.push( *ignored_tokens(false,false) )
705
+ nc=nextchar
706
+ if nc==?: || nc==?.
707
+ ty=VarNameToken
708
+ end
709
+ end
710
+ unless ty
711
+ ty=MethNameToken
712
+ endofs=tok.offset+tok.to_s.length
713
+ result[tokindex+1...tokindex+1]=
714
+ [ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
715
+ end
716
+
717
+ assert result[tokindex].equal?(tok)
718
+ result[tokindex]=ty.new(tok.to_s,tok.offset)
719
+
720
+
721
+ #if a.b.c.d is seen, a, b, and c
609
722
  #should be considered maybe varname instead of methnames.
610
723
  #the last (d in the example) is always considered a methname;
611
724
  #it's what's being defined.
725
+ #b and c should be considered varnames only if
726
+ #they are capitalized and preceded by :: .
727
+ #a could even be a keyword (eg self or block_given?).
612
728
  end
613
729
  #read tail: .b.c.d etc
614
- @last_operative_token=result.last
730
+ result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
731
+ ###@last_operative_token=result.last #naive
732
+ assert !(IgnoreToken===@last_operative_token)
615
733
  state=:expect_op
734
+ @in_def_name=true
616
735
  loop do
617
736
 
618
737
  #look for start of parameter list
619
- nc=(@moretokens.first or nextchar.chr)
738
+ nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
620
739
  if state==:expect_op and /^[a-z_(&*]/i===nc
621
740
  result.concat def_param_list
622
741
  break
@@ -627,8 +746,8 @@ end
627
746
  case tok
628
747
  when EoiToken
629
748
  lexerror tok,'unexpected eof in def header'
630
- when IgnoreToken
631
- when MethNameToken #,VarNameToken # /^[a-z_]/i.token_pat
749
+ when StillIgnoreToken
750
+ when MethNameToken ,VarNameToken # /^[a-z_]/i.token_pat
632
751
  lexerror tok,'expected . or ::' unless state==:expect_name
633
752
  state=:expect_op
634
753
  when /^(\.|::)$/.token_pat
@@ -642,6 +761,7 @@ end
642
761
  "#{tok}:#{tok.class}")
643
762
  end
644
763
  end
764
+ @in_def_name=false
645
765
  }
646
766
  when "alias"
647
767
  safe_recurse { |a|
@@ -663,6 +783,7 @@ end
663
783
  tok or lexerror(result.first,"bad symbol in undef")
664
784
  result<< tok
665
785
  @last_operative_token=tok
786
+ assert !(IgnoreToken===@last_operative_token)
666
787
 
667
788
  sawnl=false
668
789
  result.concat ignored_tokens(true){|nl| sawnl=true}
@@ -674,26 +795,47 @@ end
674
795
  }
675
796
 
676
797
  # when "defined?"
677
- # huh
678
798
  #defined? might have a baresymbol following it
679
799
  #does it need to be handled specially?
800
+ #it would seem not.....
680
801
 
681
802
  when "when"
803
+ #abort_noparens! emits EndToken on leaving context
682
804
  result.unshift(*abort_noparens!(str))
683
- @bracestack.push KwParamListContext.new(str,@linenum)
805
+ result.push KwParamListStartToken.new( offset+str.length)
806
+ @parsestack.push WhenParamListContext.new(str,@linenum)
684
807
 
685
808
  when "rescue"
686
- result.unshift(*abort_noparens!(str))
687
- @bracestack.push RescueSMContext.new(@linenum)
809
+ unless after_nonid_op? {false}
810
+ #rescue needs to be treated differently when in operator context...
811
+ #i think no RescueSMContext should be pushed on the stack...
812
+ #plus, the rescue token should be marked as infix
813
+ result.first.set_infix!
814
+ else
815
+ result.push KwParamListStartToken.new(offset+str.length)
816
+ #corresponding EndToken emitted by abort_noparens! on leaving rescue context
817
+ result.unshift(*abort_noparens!(str))
818
+ @parsestack.push RescueSMContext.new(@linenum)
819
+ end
688
820
 
689
- when "then","in"
821
+ when "then"
822
+ result.unshift(*abort_noparens!(str))
823
+ @parsestack.last.see self,:then
824
+
825
+ when "in"
826
+ result.unshift KwParamListEndToken.new( offset)
690
827
  result.unshift(*abort_noparens!(str))
691
- @bracestack.last.see @bracestack,str.to_sym
828
+ @parsestack.last.see self,:in
692
829
 
693
- when /^(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})$/o
830
+ when /\A(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})\Z/o
694
831
  result.unshift(*abort_noparens!(str))
695
832
 
696
- when FUNCLIKE_KEYWORDS: result=yield
833
+ when /\A(return|break|next)\Z/
834
+ result=yield
835
+ result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
836
+
837
+ when FUNCLIKE_KEYWORDS
838
+ result=yield
697
839
 
698
840
  when RUBYKEYWORDS
699
841
  #do nothing
@@ -706,6 +848,36 @@ end
706
848
  end
707
849
 
708
850
 
851
+ #-----------------------------------
852
+ def parsestack_lastnonassign_is?(obj)
853
+ @parsestack.reverse_each{|ctx|
854
+ case ctx
855
+ # when klass: return true
856
+ when AssignmentRhsContext
857
+ else return ctx.object_id==obj.object_id
858
+ end
859
+ }
860
+ end
861
+
862
+ #-----------------------------------
863
+ #what's inside goalposts (the block formal parameter list)
864
+ #is considered the left hand side of an assignment.
865
+ #inside goalposts, a local variable is declared if
866
+ #it has one of the following tokens on both sides:
867
+ # , (if directly inside goalposts or nested lhs)
868
+ # | (as a goalpost)
869
+ # * or & (unary only)
870
+ # ( or ) (if they form a nested left hand side)
871
+ #parens form a nested lhs if they're not part of an actual
872
+ #parameter list and have a comma directly in them somewhere
873
+ #a nested lhs _must_ have a comma in it somewhere. this is
874
+ #not legal:
875
+ # (foo)=[1]
876
+ #whereas this is:
877
+ # (foo,)=[1]
878
+
879
+
880
+
709
881
  #-----------------------------------
710
882
  def block_param_list_lookahead
711
883
  safe_recurse{ |la|
@@ -713,27 +885,45 @@ end
713
885
  a=ignored_tokens
714
886
 
715
887
  if eat_next_if(?|)
716
- a<<KeywordToken.new("|",@file.pos-1)
888
+ a<<KeywordToken.new("|", input_position-1)
889
+ if true
890
+ @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
891
+ nextchar==?| and a.push NoWsToken.new(input_position)
892
+ else
717
893
  if eat_next_if(?|)
718
- a.concat [NoWsToken.new(@file.pos-1),
719
- KeywordToken.new('|',@file.pos-1)]
894
+ a.concat [NoWsToken.new(input_position-1),
895
+ KeywordToken.new('|', input_position-1)]
720
896
  else
721
897
  assert !@defining_lvar
722
898
  @defining_lvar=true
723
899
  assert((@last_operative_token===';' or NewlineToken===@last_operative_token))
724
- @bracestack.push BlockParamListContext.new(@linenum)
725
- #block param initializers are not supported here, because ruby doesn't allow them!
726
- begin
900
+ @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
901
+ #block param initializers ARE supported here, even tho ruby doesn't allow them!
902
+ tok=nil
903
+ loop do
727
904
  tok=get1token
728
- EoiToken===tok and lexerror tok,"eof in block parameter list"
905
+ case tok
906
+ when EoiToken; lexerror tok,"eof in block parameter list"
907
+ when AssignmentRhsListStartToken; @defining_lvar=false
908
+ when AssignmentRhsListEndToken; parsestack_lastnonassign_is?(mycontext) and @defining_lvar=true
909
+ end
910
+
911
+ tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
729
912
  a<<tok
730
- end until tok==='|'
731
- assert@defining_lvar
913
+ end
914
+ assert@defining_lvar || AssignmentRhsContext===@parsestack.last
732
915
  @defining_lvar=false
733
- BlockParamListContext===@bracestack.last or raise 'expected BlockParamListContext atop @bracestack'
734
- @bracestack.pop
916
+ while AssignmentRhsContext===@parsestack.last
917
+ a.push( *abort_noparens!('|') )
918
+ end
919
+
920
+ @parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
921
+ @parsestack.pop
922
+
923
+ a<<KeywordToken.new('|',tok.offset)
735
924
  @moretokens.empty? or
736
925
  fixme %#moretokens might be set from get1token call above...might be bad#
926
+ end
737
927
  end
738
928
  end
739
929
 
@@ -755,8 +945,9 @@ end
755
945
  #then match the following tokens until
756
946
  #the matching endbrace is found
757
947
  def def_param_list
948
+ @in_def_name=false
758
949
  result=[]
759
- normal_comma_level=old_bracestack_size=@bracestack.size
950
+ normal_comma_level=old_parsestack_size=@parsestack.size
760
951
  safe_recurse { |a|
761
952
  assert(@moretokens.empty?)
762
953
  assert((not IgnoreToken===@moretokens[0]))
@@ -770,9 +961,9 @@ end
770
961
  assert(tok==='(')
771
962
 
772
963
 
773
- #bracestack was changed by get1token above...
964
+ #parsestack was changed by get1token above...
774
965
  normal_comma_level+=1
775
- assert(normal_comma_level==@bracestack.size)
966
+ assert(normal_comma_level==@parsestack.size)
776
967
  endingblock=proc{|tok| tok===')' }
777
968
  else
778
969
  endingblock=proc{|tok| tok===';' or NewlineToken===tok}
@@ -785,36 +976,48 @@ end
785
976
  #read local parameter names
786
977
  loop do
787
978
  expect_name=(@last_operative_token===',' and
788
- normal_comma_level==@bracestack.size)
979
+ normal_comma_level==@parsestack.size)
789
980
  expect_name and @defining_lvar||=true
790
981
  result << tok=get1token
791
982
  lexerror tok, "unexpected eof in def header" if EoiToken===tok
792
983
 
793
984
  #break if at end of param list
794
985
  endingblock===tok and
795
- old_bracestack_size>=@bracestack.size and break
986
+ old_parsestack_size>=@parsestack.size and break
796
987
 
797
988
  #next token is a local var name
798
989
  #(or the one after that if unary ops present)
799
990
  #result.concat ignored_tokens
800
- expect_name and case tok
801
- when IgnoreToken#, /^[A-Z]/ #do nothing
802
- when VarNameToken
991
+ if expect_name
992
+ case tok
993
+ when IgnoreToken #, /^[A-Z]/ #do nothing
994
+ when /^,$/.token_pat #hack
995
+
996
+
997
+ when VarNameToken
803
998
  assert@defining_lvar
804
999
  @defining_lvar=false
805
1000
  assert((not @last_operative_token===','))
806
- when /^[&*]$/.token_pat #unary form...
1001
+ when /^[&*]$/.token_pat #unary form...
807
1002
  #a NoWsToken is also expected... read it now
808
1003
  result.concat maybe_no_ws_token #not needed?
809
1004
  @last_operative_token=KeywordToken.new ','
810
- else lexerror tok,"unfamiliar var name '#{tok}'"
1005
+ else
1006
+ lexerror tok,"unfamiliar var name '#{tok}'"
1007
+ end
1008
+ elsif /^,$/.token_pat===tok and
1009
+ normal_comma_level+1==@parsestack.size and
1010
+ AssignmentRhsContext===@parsestack.last
1011
+ #seeing comma here should end implicit rhs started within the param list
1012
+ result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
1013
+ @parsestack.pop
811
1014
  end
812
1015
  end
813
1016
 
814
1017
  @defining_lvar=false
815
1018
 
816
1019
 
817
- assert(@bracestack.size <= old_bracestack_size)
1020
+ assert(@parsestack.size <= old_parsestack_size)
818
1021
  assert(endingblock[tok])
819
1022
 
820
1023
  #hack: force next token to look like start of a
@@ -846,19 +1049,19 @@ end
846
1049
  end
847
1050
 
848
1051
  #-----------------------------------
849
- #handle * in ruby code. is unary or binary operator?
1052
+ #handle * & in ruby code. is unary or binary operator?
850
1053
  def star_or_amp(ch)
851
1054
  assert('*&'[ch])
852
- if unary_op_expected? ch
1055
+ want_unary=unary_op_expected? ch
1056
+ result=(quadriop ch)
1057
+ if want_unary
853
1058
  #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
854
- result=operator_or_methname_token getchar
1059
+ assert OperatorToken===result
1060
+ result.unary=true #result should distinguish unary+binary *&
855
1061
  WHSPLF[nextchar.chr] or
856
- @moretokens << NoWsToken.new(@file.pos)
857
- return result
858
- else
859
- return(quadriop ch)
1062
+ @moretokens << NoWsToken.new(input_position)
860
1063
  end
861
- #result should distinguish unary+binary *&
1064
+ result
862
1065
  end
863
1066
 
864
1067
  #-----------------------------------
@@ -868,7 +1071,7 @@ end
868
1071
  getchar
869
1072
  NumberToken.new getchar_maybe_escape
870
1073
  else
871
- @bracestack.push TernaryContext.new(@linenum)
1074
+ @parsestack.push TernaryContext.new(@linenum)
872
1075
  KeywordToken.new getchar #operator
873
1076
  end
874
1077
  end
@@ -888,18 +1091,19 @@ end
888
1091
  end
889
1092
 
890
1093
  #-----------------------------------
891
- #return true if tok corresponds to a variable or constant, false if its for a method, nil for something else
892
- #we assume tok is a valid token with a correctly formed name.
1094
+ #return true if last tok corresponds to a variable or constant,
1095
+ #false if its for a method, nil for something else
1096
+ #we assume it is a valid token with a correctly formed name.
893
1097
  #...should really be called was_var_name
894
1098
  def is_var_name?
895
1099
  (tok=@last_operative_token)
896
1100
 
897
1101
  s=tok.to_s
898
1102
  case s
899
- when /[^a-z_0-9]$/i: false
900
- when /^[a-z_]/: @localvars===s or VARLIKE_KEYWORDS===s
901
- when /^[A-Z]/: VarNameToken===tok
902
- when /^[@$<]/: true
1103
+ when /[^a-z_0-9]$/i; false
1104
+ when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
1105
+ when /^[A-Z]/; VarNameToken===tok
1106
+ when /^[@$<]/; true
903
1107
  else raise "not var or method name: #{s}"
904
1108
  end
905
1109
  end
@@ -907,21 +1111,26 @@ end
907
1111
  #-----------------------------------
908
1112
  def colon_quote_expected?(ch) #yukko hack
909
1113
  assert ':?'[ch]
910
- readahead(2)[/^(\?[^#{WHSPLF}]|:[$@a-zA-Z_'"`\[*~+\-\/%<=>&|^])$/o] or return false
1114
+ readahead(2)[/^(\?[^#{WHSPLF}]|:[^\s\r\n\t\f\v :])$/o] or return false
911
1115
 
912
1116
  after_nonid_op? {
913
1117
  #possible func-call as operator
914
1118
 
915
- !is_var_name?
1119
+ not is_var_name? and
1120
+ if ch==':'
1121
+ not TernaryContext===@parsestack.last
1122
+ else
1123
+ !readahead(3)[/^\?[a-z0-9_]{2}/i]
1124
+ end
916
1125
  }
917
1126
  end
918
1127
 
919
1128
  #-----------------------------------
920
1129
  def symbol_or_op(ch)
921
- startpos=@file.pos
1130
+ startpos= input_position
922
1131
  qe= colon_quote_expected?(ch)
923
1132
  lastchar=prevchar
924
- eat_next_if(ch) or raise "needed: "+ch
1133
+ eat_next_if(ch[0]) or raise "needed: "+ch
925
1134
 
926
1135
  #handle quoted symbols like :"foobar", :"[]"
927
1136
  qe and return symbol(':')
@@ -932,13 +1141,13 @@ end
932
1141
  @moretokens.push(*abort_noparens!(':'))
933
1142
 
934
1143
  #end ternary context, if any
935
- @bracestack.last.see @bracestack,:colon
1144
+ @parsestack.last.see self,:colon
936
1145
 
937
- TernaryContext===@bracestack.last and @bracestack.pop #should be in the context's see handler
1146
+ TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
938
1147
 
939
- if ExpectDoOrNlContext===@bracestack.last #should be in the context's see handler
940
- @bracestack.pop
941
- assert @bracestack.last.starter[/^(while|until|for)$/]
1148
+ if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
1149
+ @parsestack.pop
1150
+ assert @parsestack.last.starter[/^(while|until|for)$/]
942
1151
  end
943
1152
 
944
1153
  @moretokens.push KeywordToken.new(':',startpos)
@@ -965,17 +1174,17 @@ end
965
1174
  #-----------------------------------
966
1175
  def symbol(notbare,couldbecallsite=!notbare)
967
1176
  assert !couldbecallsite
968
- start=@file.pos
1177
+ start= input_position
969
1178
  notbare and start-=1
970
1179
  klass=(notbare ? SymbolToken : MethNameToken)
971
1180
 
972
1181
  #look for operators
973
1182
  opmatches=readahead(3)[RUBYSYMOPERATORREX]
974
- result= opmatches ? @file.read(opmatches.size) :
1183
+ result= opmatches ? read(opmatches.size) :
975
1184
  case nc=nextchar
976
1185
  when ?" then assert notbare;double_quote('"')
977
1186
  when ?' then assert notbare;double_quote("'")
978
- when ?` then @file.read(1)
1187
+ when ?` then read(1)
979
1188
  when ?@ then at_identifier.to_s
980
1189
  when ?$ then dollar_identifier.to_s
981
1190
  when ?_,?a..?z then identifier_as_string(?:)
@@ -991,19 +1200,24 @@ end
991
1200
  return lexerror(klass.new(result,start),error)
992
1201
  end
993
1202
 
1203
+ def merge_assignment_op_in_setter_callsites?
1204
+ false
1205
+ end
994
1206
  #-----------------------------------
995
1207
  def callsite_symbol(tok_to_errify)
996
- start=@file.pos
1208
+ start= input_position
997
1209
 
998
1210
  #look for operators
999
1211
  opmatches=readahead(3)[RUBYSYMOPERATORREX]
1000
- return [opmatches ? @file.read(opmatches.size) :
1212
+ return [opmatches ? read(opmatches.size) :
1001
1213
  case nc=nextchar
1002
- when ?` then @file.read(1)
1003
- when ?_,?a..?z,?A..?Z then identifier_as_string(?:)
1214
+ when ?` then read(1)
1215
+ when ?_,?a..?z,?A..?Z then
1216
+ context=merge_assignment_op_in_setter_callsites? ? ?: : nc
1217
+ identifier_as_string(context)
1004
1218
  else
1005
1219
  @last_operative_token=KeywordToken.new(';')
1006
- lexerror(tok_to_errify,"unexpected char starting symbol: #{nc.chr}")
1220
+ lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
1007
1221
  nil
1008
1222
  end, start
1009
1223
  ]
@@ -1011,10 +1225,10 @@ end
1011
1225
 
1012
1226
  #-----------------------------------
1013
1227
  def here_header
1014
- @file.read(2)=='<<' or raise "parser insanity"
1228
+ read(2)=='<<' or raise "parser insanity"
1015
1229
 
1016
1230
  dash=eat_next_if(?-)
1017
- quote=eat_next_if( /^['"`]$/)
1231
+ quote=eat_next_if( /['"`]/)
1018
1232
  if quote
1019
1233
  ender=til_charset(/[#{quote}]/)
1020
1234
  (quote==getchar) or
@@ -1042,8 +1256,8 @@ end
1042
1256
  #handle case of here header in a string inclusion, but
1043
1257
  #here body outside it.
1044
1258
  cnt=0
1045
- 1.upto @bracestack.size do |i|
1046
- case @bracestack[-i]
1259
+ 1.upto @parsestack.size do |i|
1260
+ case @parsestack[-i]
1047
1261
  when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext
1048
1262
  else cnt+=1
1049
1263
  end
@@ -1054,11 +1268,11 @@ end
1054
1268
  end
1055
1269
 
1056
1270
  tok=get1token
1057
- assert(a.object_id==@moretokens.object_id)
1271
+ assert(a.equal?( @moretokens))
1058
1272
  toks<<tok
1059
1273
  EoiToken===tok and lexerror tok, "here body expected before eof"
1060
1274
  end while res.unsafe_to_use
1061
- assert(a.object_id==@moretokens.object_id)
1275
+ assert(a.equal?( @moretokens))
1062
1276
  a[0,0]= toks #same as a=toks+a, but keeps a's id
1063
1277
  }
1064
1278
 
@@ -1076,9 +1290,9 @@ end
1076
1290
  if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
1077
1291
  here_header
1078
1292
  else
1079
- operator_or_methname_token @file.read(2)
1293
+ operator_or_methname_token read(2)
1080
1294
  end
1081
- when "<=>" then operator_or_methname_token @file.read(3)
1295
+ when "<=>" then operator_or_methname_token read(3)
1082
1296
  else quadriop(ch)
1083
1297
  end
1084
1298
  end
@@ -1087,115 +1301,152 @@ end
1087
1301
  def escnewline(ch)
1088
1302
  assert ch == '\\'
1089
1303
 
1090
- pos=@file.pos
1304
+ pos= input_position
1091
1305
  result=getchar
1092
1306
  if nl=readnl
1093
1307
  result+=nl
1094
1308
  else
1095
1309
  error='illegal escape sequence'
1096
1310
  end
1097
- lexerror EscNlToken.new(@filename,@linenum,result,pos), error
1098
- end
1311
+
1312
+ @moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
1313
+ optional_here_bodies
1099
1314
 
1315
+ lexerror EscNlToken.new(@filename,ln-1,result,pos), error
1316
+ end
1317
+
1100
1318
  #-----------------------------------
1101
- def newline(ch)
1102
- assert("\r\n"[nextchar.chr])
1319
+ def optional_here_bodies
1103
1320
 
1104
1321
  #handle here bodies queued up by previous line
1105
1322
  #(we should be more compatible with dos/mac style newlines...)
1106
- if tofill=@incomplete_here_tokens.shift
1107
- tofill.string.offset=@file.pos
1323
+ while tofill=@incomplete_here_tokens.shift
1324
+ tofill.string.offset= input_position
1108
1325
  loop {
1109
- assert("\r\n"[nextchar.chr])
1110
-
1111
- #retr evrything til next nl
1112
- line=all_quote(/^[\r\n]$/, tofill.quote, /^[\r\n]$/, :regex_esc_seq)
1113
- #(you didn't know all_quote could take a regex, did you?)
1326
+ assert("\r\n"[prevchar])
1114
1327
 
1115
- #get rid of fals that otherwise appear to be in the middle of
1116
- #a string (and are emitted out of order)
1117
- fal=@moretokens.pop
1118
- assert FileAndLineToken===fal || fal.nil?
1119
-
1120
- back1char
1121
- assert("\r\n"[nextchar.chr])
1122
-
1123
- #matches terminating reg expr?
1124
- break if line.elems.size==1 and
1125
- line.elems[0][tofill.termex]
1126
-
1127
- tofill.append_token line
1128
- tofill.append readnl
1129
- back1char
1328
+ #here body terminator?
1329
+ oldpos= input_position
1330
+ if tofill.dash
1331
+ til_charset(/[^#{WHSP}]/o)
1332
+ end
1333
+ break if eof?
1334
+ break if read(tofill.ender.size)==tofill.ender and readnl
1335
+ input_position_set oldpos
1336
+
1337
+ if tofill.quote=="'"
1338
+ line=til_charset(/[\r\n]/)+readnl
1339
+ line.gsub! "\\\\", "\\"
1340
+ tofill.append line
1341
+ assert(line[-1..-1][/[\r\n]/])
1342
+ else
1343
+
1344
+ back1char #-1 to make newline char the next to read
1345
+ @linenum-=1
1346
+
1347
+ #retr evrything til next nl
1348
+ line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
1349
+ #(you didn't know all_quote could take a regex, did you?)
1350
+
1351
+ #get rid of fals that otherwise appear to be in the middle of
1352
+ #a string (and are emitted out of order)
1353
+ fal=@moretokens.pop
1354
+ assert FileAndLineToken===fal || fal.nil?
1355
+
1356
+ back1char
1357
+ @linenum-=1
1358
+ assert("\r\n"[nextchar.chr])
1359
+ tofill.append_token line
1360
+ tofill.append readnl
1361
+ end
1130
1362
  }
1131
1363
 
1132
- assert("\r\n"[nextchar.chr])
1364
+ assert(eof? || "\r\n"[prevchar])
1133
1365
  tofill.unsafe_to_use=false
1366
+ tofill.line=@linenum-1
1134
1367
 
1135
- return tofill.bodyclass.new(tofill)
1368
+ @moretokens.push \
1369
+ tofill.bodyclass.new(tofill),
1370
+ FileAndLineToken.new(@filename,@linenum,input_position)
1136
1371
  end
1372
+
1373
+ end
1374
+
1375
+ #-----------------------------------
1376
+ def newline(ch)
1377
+ assert("\r\n"[nextchar.chr])
1378
+
1379
+
1137
1380
 
1138
1381
  #ordinary newline handling (possibly implicitly escaped)
1139
1382
  assert("\r\n"[nextchar.chr])
1383
+ assert !@parsestack.empty?
1140
1384
  assert @moretokens.empty?
1141
1385
  result=if NewlineToken===@last_operative_token or #hack
1142
1386
  @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
1143
1387
  !after_nonid_op?{false}
1144
1388
  then #hack-o-rama: probly cases left out above
1145
1389
  a= abort_noparens!
1146
- ExpectDoOrNlContext===@bracestack.last and @bracestack.pop
1147
- @bracestack.last.see @bracestack,:semi
1390
+ ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
1391
+ assert !@parsestack.empty?
1392
+ @parsestack.last.see self,:semi
1148
1393
 
1149
1394
  a << super(ch)
1150
1395
  @moretokens.replace a+@moretokens
1151
1396
  @moretokens.shift
1152
1397
  else
1153
- offset=@file.pos
1154
- #@moretokens <<
1155
- EscNlToken.new(@filename,@linenum,readnl,offset)
1398
+ offset= input_position
1399
+ nl=readnl
1400
+ @moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
1401
+ EscNlToken.new(@filename,@linenum-1,nl,offset)
1156
1402
  #WsToken.new ' ' #why? #should be "\\\n" ?
1157
1403
  end
1158
1404
 
1405
+ optional_here_bodies
1406
+
1159
1407
  start_of_line_directives
1160
1408
 
1161
1409
  return result
1162
1410
  end
1163
1411
 
1164
1412
  #-----------------------------------
1165
- EQBEGIN=%r/^=begin[^a-zA-Z_0-9]$/
1413
+ EQBEGIN=%r/^=begin[ \t\v\r\n\f]$/
1166
1414
  EQBEGINLENGTH=7
1167
1415
  EQEND='=end'
1168
- ENDMARKER=/^__END__[\r\n]$/
1416
+ EQENDLENGTH=4
1417
+ ENDMARKER=/^__END__[\r\n]?\Z/
1169
1418
  ENDMARKERLENGTH=8
1170
1419
  def start_of_line_directives
1171
1420
  #handle =begin...=end (at start of a line)
1172
1421
  while EQBEGIN===readahead(EQBEGINLENGTH)
1173
- startpos=@file.pos
1174
- more=@file.read(EQBEGINLENGTH-1) #get =begin
1175
-
1176
- #keep reading til /\n=end.*\n/
1177
- @file.each(EQEND) {|cblock|
1178
- more << cblock
1179
- #must be at start of line
1180
- break if /^[\r\n]#{EQEND}/o===readback(EQEND.length+1)
1181
- }
1422
+ startpos= input_position
1423
+ more= read(EQBEGINLENGTH-1) #get =begin
1424
+
1425
+ begin
1426
+ eof? and raise "eof before =end"
1427
+ more<<til_charset(/[\r\n]/)
1428
+ more<<readnl
1429
+ end until readahead(EQENDLENGTH)==EQEND
1430
+
1182
1431
  #read rest of line after =end
1183
- more << @file.til_charset(/[\r\n]/)
1432
+ more << til_charset(/[\r\n]/)
1184
1433
  assert((?\r===nextchar or ?\n===nextchar))
1185
1434
  assert !(/[\r\n]/===more[-1,1])
1435
+ more<< readnl
1186
1436
 
1187
- newls= more.scan(/\r\n?|\n\r?/)
1188
- @linenum+= newls.size
1437
+ # newls= more.scan(/\r\n?|\n\r?/)
1438
+ # @linenum+= newls.size
1189
1439
 
1190
1440
  #inject the fresh comment into future token results
1191
- @moretokens.push IgnoreToken.new(more,startpos)
1441
+ @moretokens.push IgnoreToken.new(more,startpos),
1442
+ FileAndLineToken.new(@filename,@linenum,input_position)
1192
1443
  end
1193
1444
 
1194
1445
  #handle __END__
1195
1446
  if ENDMARKER===readahead(ENDMARKERLENGTH)
1196
- assert !(ImplicitContext===@bracestack.last)
1197
- @moretokens.unshift endoffile_detected(@file.read(6))
1198
- @file.pos=@file.stat.size
1447
+ assert !(ImplicitContext===@parsestack.last)
1448
+ @moretokens.unshift endoffile_detected(read(7))
1449
+ # input_position_set @file.size
1199
1450
  end
1200
1451
  end
1201
1452
 
@@ -1221,11 +1472,15 @@ end
1221
1472
  #used to resolve the ambiguity of
1222
1473
  # <<, %, ? in ruby
1223
1474
  #returns whether current token is to be the start of a literal
1224
- #/ is not handled right here if whitespace immediately follows the /
1225
1475
  def quote_expected?(ch) #yukko hack
1476
+ if AssignmentContext===@parsestack.last
1477
+ @parsestack.pop
1478
+ return false
1479
+ end
1480
+
1226
1481
  case ch[0]
1227
1482
  when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
1228
- when ?% then readahead(3)[/^%([a-ps-vyzA-PR-VX-Z]|[QqrwWx][a-zA-Z0-9])/]
1483
+ when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
1229
1484
  when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
1230
1485
  else raise 'unexpected ch (#{ch}) in quote_expected?'
1231
1486
  # when ?+,?-,?&,?*,?~,?! then '*&='[readahead(2)[1..1]]
@@ -1240,22 +1495,29 @@ end
1240
1495
  end
1241
1496
 
1242
1497
  #-----------------------------------
1498
+ #returns false if last token was an value, true if it was an operator.
1499
+ #returns what block yields if last token was a method name.
1243
1500
  #used to resolve the ambiguity of
1244
- # <<, %, /, ?, :, and newline in ruby
1501
+ # <<, %, /, ?, :, and newline (among others) in ruby
1245
1502
  def after_nonid_op?
1246
1503
  case @last_operative_token
1247
- when MethNameToken,VarNameToken, FUNCLIKE_KEYWORDS.token_pat
1504
+ when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
1505
+ #VarNameToken should really be left out of this case...
1506
+ #should be in next branch instread
1507
+ #callers all check for last token being not a variable if they pass anything
1508
+ #but {false} in the block
1248
1509
  return yield
1249
1510
  when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
1250
- %r{^(class|module|do|end|self|true|false|nil|
1251
- __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
1511
+ %r{^(
1512
+ class|module|end|self|true|false|nil|
1513
+ __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
1252
1514
  )$}x.token_pat
1253
- #do shouldn't be in above list... dunno about def/undef
1254
- #maybe class/module shouldn't either?
1515
+ #dunno about def/undef
1516
+ #maybe class/module shouldn't he here either?
1255
1517
  #for is also in NewlineToken branch, below.
1256
1518
  #what about rescue?
1257
1519
  return false
1258
- when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS})$/o.token_pat
1520
+ when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS}|do)$/o.token_pat
1259
1521
  #regexs above must match whole string
1260
1522
  #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
1261
1523
  return true
@@ -1273,19 +1535,46 @@ end
1273
1535
  end
1274
1536
  end
1275
1537
 
1538
+
1539
+
1540
+
1541
+ #-----------------------------------
1542
+ #returns the last context on @parsestack which isn't an ImplicitContext
1543
+ def last_context_not_implicit
1544
+ @parsestack.reverse_each{|ctx|
1545
+ return ctx unless ImplicitContext===ctx
1546
+ }
1547
+ fail
1548
+ end
1549
+
1550
+ #-----------------------------------
1551
+ #a | has been seen. is it an operator? or a goalpost?
1552
+ #(goalpost == delimiter of block param list)
1553
+ #if it is a goalpost, end the BlockParamListLhsContext on
1554
+ #the context stack, as well as any implicit contexts on top of it.
1555
+ def conjunction_or_goalpost(ch)
1556
+ result=quadriop(ch)
1557
+ if result===/^|$/ and BlockParamListLhsContext===last_context_not_implicit
1558
+ @moretokens.push( *abort_noparens!("|"))
1559
+ assert(BlockParamListLhsContext===@parsestack.last)
1560
+ @parsestack.pop
1561
+ @moretokens.push KeywordToken.new("|", input_position-1)
1562
+ result=@moretokens.shift
1563
+ end
1564
+ result
1565
+ end
1566
+
1276
1567
  #-----------------------------------
1277
1568
  def quadriop(ch) #match /&&?=?/ (&, &&, &=, or &&=)
1278
1569
  assert(%w[& * | < >].include?(ch))
1279
- # '&*'[ch] and qe=quote_expected?(ch) #not needed?
1280
1570
  result=getchar + (eat_next_if(ch)or'')
1281
1571
  if eat_next_if(?=)
1282
1572
  result << ?=
1283
- # elsif qe and result[/^[&*]$/] #not needed?
1284
- # @moretokens<<NoWsToken.new(@file.pos) #not needed?
1285
1573
  end
1286
1574
  return operator_or_methname_token(result)
1287
1575
  end
1288
1576
 
1577
+
1289
1578
  #-----------------------------------
1290
1579
  def biop(ch) #match /%=?/ (% or %=)
1291
1580
  assert(ch[/^[%^~]$/])
@@ -1295,18 +1584,18 @@ end
1295
1584
  end
1296
1585
  return operator_or_methname_token( result)
1297
1586
  end
1298
-
1299
1587
  #-----------------------------------
1300
- def tilde(ch) #match /~=?/ (~ or ~=)
1588
+ def tilde(ch) #match ~
1301
1589
  assert(ch=='~')
1302
1590
  result=getchar
1303
- # eat_next_if(?=) ?
1591
+ # eat_next_if(?=) ? #ack, spppft, I'm always getting this backwards
1304
1592
  # result <<?= :
1305
1593
  WHSPLF[nextchar.chr] ||
1306
- @moretokens << NoWsToken.new(@file.pos)
1594
+ @moretokens << NoWsToken.new(input_position)
1307
1595
  #why is the NoWsToken necessary at this point?
1308
- return operator_or_methname_token( result)
1309
- #result should distinguish unary ~
1596
+ result=operator_or_methname_token result
1597
+ result.unary=true #result should distinguish unary ~
1598
+ result
1310
1599
  end
1311
1600
 
1312
1601
  #-----------------------------------
@@ -1327,8 +1616,9 @@ end
1327
1616
  else #unary operator
1328
1617
  result=getchar
1329
1618
  WHSPLF[nextchar.chr] or
1330
- @moretokens << NoWsToken.new(@file.pos)
1331
- return(operator_or_methname_token result)
1619
+ @moretokens << NoWsToken.new(input_position)
1620
+ result=(operator_or_methname_token result)
1621
+ result.unary=true
1332
1622
  #todo: result should distinguish unary+binary +-
1333
1623
  end
1334
1624
  else #binary operator
@@ -1337,45 +1627,54 @@ end
1337
1627
  if eat_next_if(?=)
1338
1628
  result << ?=
1339
1629
  end
1340
- return(operator_or_methname_token result)
1630
+ result=(operator_or_methname_token result)
1341
1631
  #todo: result should distinguish unary+binary +-
1342
1632
  end
1633
+ result
1343
1634
  end
1344
1635
 
1345
1636
  #-----------------------------------
1346
1637
  def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
1347
- offset=@file.pos
1638
+ offset= input_position
1348
1639
  str=getchar
1349
1640
  assert str=='='
1350
- c=(eat_next_if(/^[~=>]$/)or'')
1641
+ c=(eat_next_if(/[~=>]/)or'')
1351
1642
  str << c
1643
+ result= operator_or_methname_token( str,offset)
1352
1644
  case c
1353
1645
  when '=': str<< (eat_next_if(?=)or'')
1354
1646
 
1355
- when '>': @bracestack.last.see @bracestack,:arrow
1647
+ when '>':
1648
+ unless ParamListContextNoParen===@parsestack.last
1649
+ @moretokens.unshift result
1650
+ @moretokens.unshift( *abort_noparens!("=>"))
1651
+ result=@moretokens.shift
1652
+ end
1653
+ @parsestack.last.see self,:arrow
1356
1654
  when '': #record local variable definitions
1357
1655
 
1358
- @bracestack.push AssignmentRhsContext.new(@linenum)
1656
+ @parsestack.push AssignmentRhsContext.new(@linenum)
1359
1657
  @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
1360
1658
  end
1361
- return operator_or_methname_token( str,offset)
1659
+ return result
1362
1660
  end
1363
1661
 
1364
1662
  #-----------------------------------
1365
1663
  def exclam(ch) #match /![~=]?/ (! or != or !~)
1366
1664
  assert nextchar==?!
1367
1665
  result=getchar
1368
- k=eat_next_if(/^[~=]$/)
1666
+ k=eat_next_if(/[~=]/)
1369
1667
  if k
1370
1668
  result+=k
1371
1669
  else
1372
1670
  WHSPLF[nextchar.chr] or
1373
- @moretokens << NoWsToken.new(@file.pos)
1671
+ @moretokens << NoWsToken.new(input_position)
1374
1672
  end
1375
- return KeywordToken.new(result)
1673
+ return KeywordToken.new(result, input_position-result.size)
1376
1674
  #result should distinguish unary !
1377
1675
  end
1378
1676
 
1677
+
1379
1678
  #-----------------------------------
1380
1679
  def dot(ch)
1381
1680
  str=''
@@ -1391,7 +1690,6 @@ end
1391
1690
  dot_rhs(result)
1392
1691
  return result
1393
1692
  end
1394
-
1395
1693
  #-----------------------------------
1396
1694
  def dot_rhs(prevtok)
1397
1695
  safe_recurse { |a|
@@ -1403,20 +1701,17 @@ end
1403
1701
  }
1404
1702
  end
1405
1703
 
1406
- #-----------------------------------
1407
- def single_quote(ch=nil)
1408
- double_quote(ch)
1409
- end
1410
-
1411
1704
  #-----------------------------------
1412
1705
  def back_quote(ch=nil)
1413
- oldpos=@file.pos
1414
- @last_operative_token===/^(def|::|\.)$/ and return MethNameToken.new(
1415
- (eat_next_if(?`) or raise "insanity"), oldpos
1416
- )
1417
- double_quote(ch)
1706
+ if @last_operative_token===/^(def|::|\.)$/
1707
+ oldpos= input_position
1708
+ MethNameToken.new(eat_next_if(?`), oldpos)
1709
+ else
1710
+ double_quote(ch)
1711
+ end
1418
1712
  end
1419
1713
 
1714
+ if false
1420
1715
  #-----------------------------------
1421
1716
  def comment(str)
1422
1717
  result=""
@@ -1441,27 +1736,30 @@ end
1441
1736
 
1442
1737
  return IgnoreToken.new(result)
1443
1738
  end
1444
-
1739
+ end
1445
1740
  #-----------------------------------
1446
1741
  def open_brace(ch)
1447
1742
  assert((ch!='[' or !want_op_name))
1448
1743
  assert(@moretokens.empty?)
1449
1744
  lastchar=prevchar
1450
- ch=eat_next_if(/^[({\[]$/)or raise "lexer confusion"
1451
- tokch=KeywordToken.new(ch,@file.pos-1)
1745
+ ch=eat_next_if(/[({\[]/)or raise "lexer confusion"
1746
+ tokch=KeywordToken.new(ch, input_position-1)
1747
+
1452
1748
 
1453
1749
  #maybe emitting of NoWsToken can be moved into var_or_meth_name ??
1454
1750
  case tokch.ident
1455
1751
  when '['
1456
- #fixme: in contexts expecting an (operator) method name, we
1457
- # should match [] or []= at this point
1458
- @bracestack.push ListImmedContext.new(ch,@linenum)
1752
+ # in contexts expecting an (operator) method name, we
1753
+ # would want to match [] or []= at this point
1754
+ #but control never comes this way in those cases... goes
1755
+ #to custom parsers for alias, undef, and def in #parse_keywords
1756
+ tokch.set_infix! unless after_nonid_op?{WHSPLF[lastchar]}
1757
+ @parsestack.push ListImmedContext.new(ch,@linenum)
1459
1758
  lasttok=last_operative_token
1460
1759
  #could be: lasttok===/^[a-z_]/i
1461
- if (VarNameToken===lasttok or MethNameToken===lasttok or
1462
- lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
1760
+ if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or MethNameToken===lasttok) and !WHSPCHARS[lastchar]
1463
1761
  @moretokens << (tokch)
1464
- tokch= NoWsToken.new(@file.pos-1)
1762
+ tokch= NoWsToken.new(input_position-1)
1465
1763
  end
1466
1764
  when '('
1467
1765
  lasttok=last_operative_token
@@ -1470,19 +1768,20 @@ end
1470
1768
  lasttok===FUNCLIKE_KEYWORDS)
1471
1769
  unless WHSPCHARS[lastchar]
1472
1770
  @moretokens << tokch
1473
- tokch= NoWsToken.new(@file.pos-1)
1771
+ tokch= NoWsToken.new(input_position-1)
1474
1772
  end
1475
- @bracestack.push ParamListContext.new(@linenum)
1773
+ @parsestack.push ParamListContext.new(@linenum)
1476
1774
  else
1477
- @bracestack.push ParenContext.new(@linenum)
1775
+ @parsestack.push ParenContext.new(@linenum)
1478
1776
  end
1479
1777
 
1480
1778
  when '{'
1481
1779
  #check if we are in a hash literal or string inclusion (#{}),
1482
1780
  #in which case below would be bad.
1483
- if after_nonid_op?{false}
1484
- @bracestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
1781
+ if after_nonid_op?{false} or @last_operative_token.has_no_block?
1782
+ @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
1485
1783
  else
1784
+ tokch.set_infix!
1486
1785
  =begin not needed now, i think
1487
1786
  # 'need to find matching callsite context and end it if implicit'
1488
1787
  lasttok=last_operative_token
@@ -1492,8 +1791,8 @@ end
1492
1791
  end
1493
1792
  =end
1494
1793
 
1495
- @localvars.start_block
1496
- @bracestack.push BlockContext.new(@linenum)
1794
+ localvars.start_block
1795
+ @parsestack.push BlockContext.new(@linenum)
1497
1796
  block_param_list_lookahead
1498
1797
  end
1499
1798
  end
@@ -1504,18 +1803,18 @@ end
1504
1803
  def close_brace(ch)
1505
1804
  ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
1506
1805
  @moretokens.concat abort_noparens!(ch)
1507
- @moretokens<< kw=KeywordToken.new( ch,@file.pos-1)
1508
- @bracestack.last.see @bracestack,:semi #hack
1509
- if @bracestack.empty?
1806
+ @parsestack.last.see self,:semi #hack
1807
+ @moretokens<< kw=KeywordToken.new( ch, input_position-1)
1808
+ if @parsestack.empty?
1510
1809
  lexerror kw,"unmatched brace: #{ch}"
1511
1810
  return @moretokens.shift
1512
1811
  end
1513
- ctx=@bracestack.pop
1812
+ ctx=@parsestack.pop
1514
1813
  origch,line=ctx.starter,ctx.linenum
1515
1814
  ch==PAIRS[origch] or
1516
1815
  lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
1517
1816
  "matching brace location", @filename, line
1518
- BlockContext===ctx and @localvars.end_block
1817
+ BlockContext===ctx and localvars.end_block
1519
1818
  if ParamListContext==ctx.class
1520
1819
  assert ch==')'
1521
1820
  #kw.set_callsite! #not needed?
@@ -1525,29 +1824,29 @@ end
1525
1824
 
1526
1825
  #-----------------------------------
1527
1826
  def eof(ch=nil)
1528
- #this must be the very last character...
1529
- oldpos=@file.pos
1530
- assert(?\0==@file.getc)
1827
+ #this must be the very last character...
1828
+ oldpos= input_position
1829
+ assert(?\0==getc)
1531
1830
 
1532
- result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
1831
+ result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
1533
1832
 
1534
- @file.pos==@file.stat.size or
1535
- lexerror result,'nul character is not at the end of file'
1536
- @file.pos=@file.stat.size
1537
- return(endoffile_detected result)
1833
+ eof? or
1834
+ lexerror result,'nul character is not at the end of file'
1835
+ input_position_set @file.size
1836
+ return(endoffile_detected result)
1538
1837
  end
1539
1838
 
1540
1839
  #-----------------------------------
1541
1840
  def endoffile_detected(s='')
1542
1841
  @moretokens.push( *(abort_noparens!.push super(s)))
1543
1842
  result= @moretokens.shift
1544
- balanced_braces? or (lexerror result,"unbalanced braces at eof. bracestack=#{@bracestack.inspect}")
1843
+ balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
1545
1844
  result
1546
1845
  end
1547
1846
 
1548
1847
  #-----------------------------------
1549
1848
  def single_char_token(ch)
1550
- KeywordToken.new super(ch), @file.pos-1
1849
+ KeywordToken.new super(ch), input_position-1
1551
1850
  end
1552
1851
 
1553
1852
  #-----------------------------------
@@ -1557,13 +1856,13 @@ end
1557
1856
 
1558
1857
  #-----------------------------------
1559
1858
  def semicolon(ch)
1560
- assert @moretokens.empty?
1859
+ assert @moretokens.empty?
1561
1860
  @moretokens.push(*abort_noparens!)
1562
- @bracestack.last.see @bracestack,:semi
1563
- if ExpectDoOrNlContext===@bracestack.last #should be in context's see:semi handler
1564
- @bracestack.pop
1565
- assert @bracestack.last.starter[/^(while|until|for)$/]
1566
- end
1861
+ @parsestack.last.see self,:semi
1862
+ if ExpectDoOrNlContext===@parsestack.last #should be in context's see:semi handler
1863
+ @parsestack.pop
1864
+ assert @parsestack.last.starter[/^(while|until|for)$/]
1865
+ end
1567
1866
  @moretokens.push single_char_token(ch)
1568
1867
  return @moretokens.shift
1569
1868
  end
@@ -1582,7 +1881,11 @@ end
1582
1881
 
1583
1882
  #-----------------------------------
1584
1883
  #tokenify_results_of :identifier
1585
- save_offsets_in(*CHARMAPPINGS.values.uniq-[:symbol_or_op,:open_brace,:whitespace])
1884
+ save_offsets_in(*CHARMAPPINGS.values.uniq-[
1885
+ :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
1886
+
1887
+
1888
+ ])
1586
1889
  #save_offsets_in :symbol
1587
1890
 
1588
1891
  end