parser 0.9.alpha → 0.9.alpha1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,175 @@
1
+ module Parser
2
+
3
+ class LexerLiteral
4
+ DELIMITERS = { '(' => ')', '[' => ']', '{' => '}', '<' => '>' }
5
+ MONOLITHIC = { :tSTRING_BEG => :tSTRING, :tSYMBEG => :tSYMBOL }
6
+
7
+ TYPES = {
8
+ # type start token interpolate?
9
+ "'" => [ :tSTRING_BEG, false ],
10
+ '%q' => [ :tSTRING_BEG, false ],
11
+ '"' => [ :tSTRING_BEG, true ],
12
+ '%' => [ :tSTRING_BEG, true ],
13
+ '%Q' => [ :tSTRING_BEG, true ],
14
+
15
+ '%w' => [ :tQWORDS_BEG, false ],
16
+ '%W' => [ :tWORDS_BEG, true ],
17
+
18
+ ":'" => [ :tSYMBEG, false ],
19
+ '%s' => [ :tSYMBEG, false ],
20
+ ':"' => [ :tSYMBEG, true ],
21
+
22
+ '/' => [ :tREGEXP_BEG, true ],
23
+ '%r' => [ :tREGEXP_BEG, true ],
24
+
25
+ '%x' => [ :tXSTRING_BEG, true ],
26
+ '`' => [ :tXSTRING_BEG, true ],
27
+ }
28
+
29
+ attr_reader :heredoc_e
30
+ attr_accessor :saved_herebody_s
31
+
32
+ def initialize(lexer, str_type, delimiter, str_s, heredoc_e = nil, indent = false)
33
+ @lexer = lexer
34
+ @nesting = 1
35
+
36
+ unless TYPES.include? str_type
37
+ lexer.send :error, "#{str_type}: unknown type of %string"
38
+ end
39
+
40
+ # String type. For :'foo', it is :'
41
+ @str_type = str_type
42
+ # Start of the string type specifier.
43
+ @str_s = str_s
44
+
45
+ # Data buffer.
46
+ @buffer = ""
47
+ # Start of the current chunk in data buffer.
48
+ @buffer_s = nil
49
+
50
+ @start_tok, @interpolate = TYPES[str_type]
51
+ @start_delim = DELIMITERS.include?(delimiter) ? delimiter : nil
52
+ @end_delim = DELIMITERS.fetch(delimiter, delimiter)
53
+
54
+ @heredoc_e = heredoc_e
55
+ @indent = indent
56
+
57
+ @interp_braces = 0
58
+
59
+ # Monolithic strings are glued into a single token, e.g.
60
+ # tSTRING_BEG tSTRING_CONTENT tSTRING_END -> tSTRING.
61
+ @monolithic = (
62
+ [:tSTRING_BEG, :tSYMBEG].include?(type) &&
63
+ !heredoc?
64
+ )
65
+
66
+ emit_start_tok unless @monolithic
67
+ end
68
+
69
+ def interpolate?
70
+ @interpolate
71
+ end
72
+
73
+ def words?
74
+ type == :tWORDS_BEG || type == :tQWORDS_BEG
75
+ end
76
+
77
+ def regexp?
78
+ type == :tREGEXP_BEG
79
+ end
80
+
81
+ def heredoc?
82
+ !!@heredoc_e
83
+ end
84
+
85
+ def type
86
+ @start_tok
87
+ end
88
+
89
+ def munge_escape?(character)
90
+ if words? && character =~ /[ \t\v\r\f\n]/
91
+ true
92
+ else
93
+ ['\\', @start_delim, @end_delim].include? character
94
+ end
95
+ end
96
+
97
+ def delimiter?(delimiter)
98
+ if @indent
99
+ @end_delim == delimiter.lstrip
100
+ else
101
+ @end_delim == delimiter
102
+ end
103
+ end
104
+
105
+ def nest_and_try_closing(delimiter, ts, te)
106
+ if @start_delim && @start_delim == delimiter
107
+ @nesting += 1
108
+ elsif delimiter?(delimiter)
109
+ @nesting -= 1
110
+ end
111
+
112
+ # Finalize if last matching delimiter is closed.
113
+ if @nesting == 0
114
+ # Emit the string as a single token if it's applicable.
115
+ if @monolithic
116
+ @lexer.emit MONOLITHIC[@start_tok], @buffer, @str_s, te
117
+ else
118
+ # If this is a heredoc, @buffer contains the sentinel now.
119
+ # Just throw it out. Lexer flushes the heredoc after each
120
+ # non-heredoc-terminating \n anyway, so no data will be lost.
121
+ flush_string unless heredoc?
122
+
123
+ @lexer.emit :tSTRING_END, @end_delim, ts, te
124
+ end
125
+ end
126
+ end
127
+
128
+ def start_interp_brace
129
+ @interp_braces += 1
130
+ end
131
+
132
+ def end_interp_brace_and_try_closing
133
+ @interp_braces -= 1
134
+
135
+ (@interp_braces == 0)
136
+ end
137
+
138
+ def extend_string(string, ts, te)
139
+ if @buffer_s.nil?
140
+ @buffer_s = ts
141
+ end
142
+
143
+ @buffer_e = te
144
+
145
+ @buffer << string
146
+ end
147
+
148
+ def flush_string
149
+ if @monolithic
150
+ emit_start_tok
151
+ @monolithic = false
152
+ end
153
+
154
+ unless @buffer.empty?
155
+ @lexer.emit :tSTRING_CONTENT, @buffer, @buffer_s, @buffer_e
156
+
157
+ if words?
158
+ @lexer.emit :tSPACE, nil, @buffer_e, @buffer_e + 1
159
+ end
160
+
161
+ @buffer = ""
162
+ @buffer_s = nil
163
+ @buffer_e = nil
164
+ end
165
+ end
166
+
167
+ protected
168
+
169
+ def emit_start_tok
170
+ str_e = @heredoc_e || @str_s + @str_type.length
171
+ @lexer.emit @start_tok, @str_type, @str_s, str_e
172
+ end
173
+ end
174
+
175
+ end
@@ -0,0 +1,38 @@
1
+ require 'set'
2
+
3
+ module Parser
4
+
5
+ class StaticEnvironment
6
+ def initialize
7
+ @variables = Set[]
8
+ @stack = []
9
+ end
10
+
11
+ def extend_static
12
+ @stack.push @variables
13
+ @variables = Set[]
14
+
15
+ self
16
+ end
17
+
18
+ def extend_dynamic
19
+ @stack.push @variables
20
+ @variables = @variables.dup
21
+
22
+ self
23
+ end
24
+
25
+ def unextend
26
+ @variables = @stack.pop
27
+ end
28
+
29
+ def declare(name)
30
+ @variables.add name
31
+ end
32
+
33
+ def declared?(name)
34
+ @variables.include? name
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,3 @@
1
+ module Parser
2
+ class SyntaxError < StandardError; end
3
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "parser"
5
+ spec.version = "0.9.alpha1"
6
+ spec.authors = ["Peter Zotov"]
7
+ spec.email = ["whitequark@whitequark.org"]
8
+ spec.description = %q{A Ruby parser.}
9
+ spec.summary = spec.description
10
+ spec.homepage = "http://github.com/whitequark/parser"
11
+ spec.license = "MIT"
12
+
13
+ spec.files = `git ls-files`.split($/) + %w(
14
+ lib/parser/lexer.rb
15
+ )
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = '>= 1.9'
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "racc"
25
+ end
@@ -1,19 +1,65 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: ascii-8bit
3
2
 
4
3
  require 'minitest/autorun'
5
- require 'ruby_lexer'
6
- require 'ruby_parser_extras'
4
+ require 'parser/lexer'
7
5
 
8
- class TestRubyLexer < MiniTest::Unit::TestCase
9
- alias :deny :refute
6
+ class TestLexer < MiniTest::Unit::TestCase
7
+ def setup_lexer version
8
+ @lex = Parser::Lexer.new(version)
9
+ end
10
10
 
11
11
  def setup
12
12
  setup_lexer 18
13
13
  end
14
14
 
15
- def setup_lexer version
16
- @lex = RubyLexer.new(version)
15
+ def util_bad_token s, *args
16
+ assert_raises Parser::SyntaxError do
17
+ util_lex_token s, *args
18
+ end
19
+ end
20
+
21
+ def util_escape expected, input
22
+ @lex.reset
23
+ @lex.source = "%Q[\\#{input}]"
24
+
25
+ lex_token, lex_value = @lex.advance
26
+
27
+ if lex_value.respond_to?(:force_encoding)
28
+ lex_value.force_encoding('ASCII-8BIT')
29
+ end
30
+
31
+ assert_equal [:tSTRING, expected],
32
+ [lex_token, lex_value],
33
+ @lex.source
34
+ end
35
+
36
+ def util_escape_bad input
37
+ assert_raises Parser::SyntaxError do
38
+ @lex.state = :expr_beg
39
+ util_lex_token "%Q[\\#{input}]"
40
+ end
41
+ end
42
+
43
+ def util_lex_fname name, type, end_state = :expr_end
44
+ util_lex_token("def #{name} ", :kDEF, "def", type, name)
45
+
46
+ assert_equal end_state, @lex.state
47
+ end
48
+
49
+ def util_lex_token input, *args
50
+ @lex.reset(false)
51
+ @lex.source = input
52
+
53
+ until args.empty? do
54
+ token, value = args.shift(2)
55
+
56
+ lex_token, lex_value = @lex.advance
57
+ assert lex_token, "no more tokens"
58
+ assert_equal [token, value], [lex_token, lex_value], input
59
+ end
60
+
61
+ lex_token, lex_value = @lex.advance
62
+ refute lex_token, "must be empty, but had #{[lex_token, lex_value].inspect}"
17
63
  end
18
64
 
19
65
  def test_advance
@@ -23,7 +69,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
23
69
  assert token # blah
24
70
 
25
71
  token, = @lex.advance
26
- deny token # nada
72
+ refute token # nada
27
73
  end
28
74
 
29
75
  def test_read_escape
@@ -367,9 +413,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
367
413
  end
368
414
 
369
415
  def test_yylex_cvar_bad
370
- assert_raises RubyParser::SyntaxError do
371
- util_lex_token "@@1"
372
- end
416
+ util_bad_token "@@1"
373
417
  end
374
418
 
375
419
  def test_yylex_div
@@ -403,15 +447,16 @@ class TestRubyLexer < MiniTest::Unit::TestCase
403
447
  :kEND, "end")
404
448
  end
405
449
 
406
- def test_yylex_do_cond
407
- @lex.cond.push true
450
+ # TODO
451
+ # def test_yylex_do_cond
452
+ # @lex.cond.push true
408
453
 
409
- util_lex_token("x do 42 end",
410
- :tIDENTIFIER, "x",
411
- :kDO_COND, "do",
412
- :tINTEGER, 42,
413
- :kEND, "end")
414
- end
454
+ # util_lex_token("x do 42 end",
455
+ # :tIDENTIFIER, "x",
456
+ # :kDO_COND, "do",
457
+ # :tINTEGER, 42,
458
+ # :kEND, "end")
459
+ # end
415
460
 
416
461
  def test_yylex_dot
417
462
  util_lex_token ".", :tDOT, "."
@@ -868,25 +913,25 @@ class TestRubyLexer < MiniTest::Unit::TestCase
868
913
  end
869
914
 
870
915
  def test_yylex_question_eh_a__18
871
- @lex = RubyLexer.new 18
916
+ setup_lexer 18
872
917
 
873
918
  util_lex_token "?a", :tINTEGER, 97
874
919
  end
875
920
 
876
921
  def test_yylex_question_eh_a__19
877
- @lex = RubyLexer.new 19
922
+ setup_lexer 19
878
923
 
879
924
  util_lex_token '?a', :tSTRING, "a"
880
925
  end
881
926
 
882
927
  def test_yylex_question_eh_escape_M_escape_C__18
883
- @lex = RubyLexer.new 18
928
+ setup_lexer 18
884
929
 
885
930
  util_lex_token '?\M-\C-a', :tINTEGER, 129
886
931
  end
887
932
 
888
933
  def test_yylex_question_eh_escape_M_escape_C__19
889
- @lex = RubyLexer.new 19
934
+ setup_lexer 19
890
935
 
891
936
  util_lex_token '?\M-\C-a', :tSTRING, "\M-\C-a"
892
937
  end
@@ -1214,13 +1259,13 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1214
1259
  end
1215
1260
 
1216
1261
  def test_yylex_question__18
1217
- @lex = RubyLexer.new 18
1262
+ setup_lexer 18
1218
1263
 
1219
1264
  util_lex_token "?*", :tINTEGER, 42
1220
1265
  end
1221
1266
 
1222
1267
  def test_yylex_question__19
1223
- @lex = RubyLexer.new 19
1268
+ setup_lexer 19
1224
1269
 
1225
1270
  util_lex_token "?*", :tSTRING, "*"
1226
1271
  end
@@ -1239,7 +1284,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1239
1284
  end
1240
1285
 
1241
1286
  def test_yylex_question_ws_backslashed__18
1242
- @lex = RubyLexer.new 18
1287
+ setup_lexer 18
1243
1288
 
1244
1289
  @lex.state = :expr_beg
1245
1290
  util_lex_token "?\\ ", :tINTEGER, 32
@@ -1256,7 +1301,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1256
1301
  end
1257
1302
 
1258
1303
  def test_yylex_question_ws_backslashed__19
1259
- @lex = RubyLexer.new 19
1304
+ setup_lexer 19
1260
1305
 
1261
1306
  @lex.state = :expr_beg
1262
1307
  util_lex_token "?\\ ", :tSTRING, " "
@@ -1887,8 +1932,9 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1887
1932
 
1888
1933
  def test_yylex_underscore_end
1889
1934
  @lex.source = "__END__\n"
1935
+
1890
1936
  tok, = @lex.advance
1891
- deny tok
1937
+ refute tok
1892
1938
  end
1893
1939
 
1894
1940
  def test_yylex_uplus
@@ -1916,8 +1962,9 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1916
1962
  end
1917
1963
 
1918
1964
  def test_yylex_static_env
1919
- env = RubyParserStuff::Environment.new
1920
- env[:a] = :lvar
1965
+ env = Parser::StaticEnvironment.new
1966
+ env.declare :a
1967
+
1921
1968
  @lex.static_env = env
1922
1969
 
1923
1970
  util_lex_token("a [42]",
@@ -1926,103 +1973,4 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1926
1973
  :tINTEGER, 42,
1927
1974
  :tRBRACK, "]")
1928
1975
  end
1929
-
1930
- def test_zbug_float_in_decl
1931
- util_lex_token("def initialize(u = ",
1932
- :kDEF, "def",
1933
- :tIDENTIFIER, "initialize",
1934
- :tLPAREN2, "(",
1935
- :tIDENTIFIER, "u",
1936
- :tEQL, "=")
1937
-
1938
- assert_equal :expr_beg, @lex.state
1939
-
1940
- util_lex_token("0.0, s = 0.0",
1941
- :tFLOAT, 0.0,
1942
- :tCOMMA, ',',
1943
- :tIDENTIFIER, "s",
1944
- :tEQL, "=",
1945
- :tFLOAT, 0.0)
1946
- end
1947
-
1948
- def test_zbug_id_equals
1949
- util_lex_token("a =",
1950
- :tIDENTIFIER, "a",
1951
- :tEQL, "=")
1952
-
1953
- assert_equal :expr_beg, @lex.state
1954
-
1955
- util_lex_token("0.0",
1956
- :tFLOAT, 0.0)
1957
- end
1958
-
1959
- def test_zbug_no_spaces_in_decl
1960
- util_lex_token("def initialize(u=",
1961
- :kDEF, "def",
1962
- :tIDENTIFIER, "initialize",
1963
- :tLPAREN2, "(",
1964
- :tIDENTIFIER, "u",
1965
- :tEQL, "=")
1966
-
1967
- assert_equal :expr_beg, @lex.state
1968
-
1969
- util_lex_token("0.0,s=0.0",
1970
- :tFLOAT, 0.0,
1971
- :tCOMMA, ",",
1972
- :tIDENTIFIER, "s",
1973
- :tEQL, "=",
1974
- :tFLOAT, 0.0)
1975
- end
1976
-
1977
- ############################################################
1978
-
1979
- def util_bad_token s, *args
1980
- assert_raises RubyParser::SyntaxError do
1981
- util_lex_token s, *args
1982
- end
1983
- end
1984
-
1985
- def util_escape expected, input
1986
- @lex.reset
1987
- @lex.source = "%Q[\\#{input}]"
1988
-
1989
- lex_token, lex_value = @lex.advance
1990
-
1991
- if lex_value.respond_to?(:force_encoding)
1992
- lex_value.force_encoding('ASCII-8BIT')
1993
- end
1994
-
1995
- assert_equal [:tSTRING, expected],
1996
- [lex_token, lex_value],
1997
- @lex.source
1998
- end
1999
-
2000
- def util_escape_bad input
2001
- assert_raises RubyParser::SyntaxError do
2002
- @lex.state = :expr_beg
2003
- util_lex_token "%Q[\\#{input}]"
2004
- end
2005
- end
2006
-
2007
- def util_lex_fname name, type, end_state = :expr_end
2008
- util_lex_token("def #{name} ", :kDEF, "def", type, name)
2009
-
2010
- assert_equal end_state, @lex.state
2011
- end
2012
-
2013
- def util_lex_token input, *args
2014
- @lex.reset(false)
2015
- @lex.source = input
2016
-
2017
- until args.empty? do
2018
- token, value = args.shift(2)
2019
-
2020
- lex_token, lex_value = @lex.advance
2021
- assert lex_token, "no more tokens"
2022
- assert_equal [token, value], [lex_token, lex_value], input
2023
- end
2024
-
2025
- lex_token, lex_value = @lex.advance
2026
- deny lex_token, "must be empty, but had #{[lex_token, lex_value].inspect}"
2027
- end
2028
1976
  end