parser 0.9.alpha → 0.9.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ module Parser
2
+
3
+ class LexerLiteral
4
+ DELIMITERS = { '(' => ')', '[' => ']', '{' => '}', '<' => '>' }
5
+ MONOLITHIC = { :tSTRING_BEG => :tSTRING, :tSYMBEG => :tSYMBOL }
6
+
7
+ TYPES = {
8
+ # type start token interpolate?
9
+ "'" => [ :tSTRING_BEG, false ],
10
+ '%q' => [ :tSTRING_BEG, false ],
11
+ '"' => [ :tSTRING_BEG, true ],
12
+ '%' => [ :tSTRING_BEG, true ],
13
+ '%Q' => [ :tSTRING_BEG, true ],
14
+
15
+ '%w' => [ :tQWORDS_BEG, false ],
16
+ '%W' => [ :tWORDS_BEG, true ],
17
+
18
+ ":'" => [ :tSYMBEG, false ],
19
+ '%s' => [ :tSYMBEG, false ],
20
+ ':"' => [ :tSYMBEG, true ],
21
+
22
+ '/' => [ :tREGEXP_BEG, true ],
23
+ '%r' => [ :tREGEXP_BEG, true ],
24
+
25
+ '%x' => [ :tXSTRING_BEG, true ],
26
+ '`' => [ :tXSTRING_BEG, true ],
27
+ }
28
+
29
+ attr_reader :heredoc_e
30
+ attr_accessor :saved_herebody_s
31
+
32
+ def initialize(lexer, str_type, delimiter, str_s, heredoc_e = nil, indent = false)
33
+ @lexer = lexer
34
+ @nesting = 1
35
+
36
+ unless TYPES.include? str_type
37
+ lexer.send :error, "#{str_type}: unknown type of %string"
38
+ end
39
+
40
+ # String type. For :'foo', it is :'
41
+ @str_type = str_type
42
+ # Start of the string type specifier.
43
+ @str_s = str_s
44
+
45
+ # Data buffer.
46
+ @buffer = ""
47
+ # Start of the current chunk in data buffer.
48
+ @buffer_s = nil
49
+
50
+ @start_tok, @interpolate = TYPES[str_type]
51
+ @start_delim = DELIMITERS.include?(delimiter) ? delimiter : nil
52
+ @end_delim = DELIMITERS.fetch(delimiter, delimiter)
53
+
54
+ @heredoc_e = heredoc_e
55
+ @indent = indent
56
+
57
+ @interp_braces = 0
58
+
59
+ # Monolithic strings are glued into a single token, e.g.
60
+ # tSTRING_BEG tSTRING_CONTENT tSTRING_END -> tSTRING.
61
+ @monolithic = (
62
+ [:tSTRING_BEG, :tSYMBEG].include?(type) &&
63
+ !heredoc?
64
+ )
65
+
66
+ emit_start_tok unless @monolithic
67
+ end
68
+
69
+ def interpolate?
70
+ @interpolate
71
+ end
72
+
73
+ def words?
74
+ type == :tWORDS_BEG || type == :tQWORDS_BEG
75
+ end
76
+
77
+ def regexp?
78
+ type == :tREGEXP_BEG
79
+ end
80
+
81
+ def heredoc?
82
+ !!@heredoc_e
83
+ end
84
+
85
+ def type
86
+ @start_tok
87
+ end
88
+
89
+ def munge_escape?(character)
90
+ if words? && character =~ /[ \t\v\r\f\n]/
91
+ true
92
+ else
93
+ ['\\', @start_delim, @end_delim].include? character
94
+ end
95
+ end
96
+
97
+ def delimiter?(delimiter)
98
+ if @indent
99
+ @end_delim == delimiter.lstrip
100
+ else
101
+ @end_delim == delimiter
102
+ end
103
+ end
104
+
105
+ def nest_and_try_closing(delimiter, ts, te)
106
+ if @start_delim && @start_delim == delimiter
107
+ @nesting += 1
108
+ elsif delimiter?(delimiter)
109
+ @nesting -= 1
110
+ end
111
+
112
+ # Finalize if last matching delimiter is closed.
113
+ if @nesting == 0
114
+ # Emit the string as a single token if it's applicable.
115
+ if @monolithic
116
+ @lexer.emit MONOLITHIC[@start_tok], @buffer, @str_s, te
117
+ else
118
+ # If this is a heredoc, @buffer contains the sentinel now.
119
+ # Just throw it out. Lexer flushes the heredoc after each
120
+ # non-heredoc-terminating \n anyway, so no data will be lost.
121
+ flush_string unless heredoc?
122
+
123
+ @lexer.emit :tSTRING_END, @end_delim, ts, te
124
+ end
125
+ end
126
+ end
127
+
128
+ def start_interp_brace
129
+ @interp_braces += 1
130
+ end
131
+
132
+ def end_interp_brace_and_try_closing
133
+ @interp_braces -= 1
134
+
135
+ (@interp_braces == 0)
136
+ end
137
+
138
+ def extend_string(string, ts, te)
139
+ if @buffer_s.nil?
140
+ @buffer_s = ts
141
+ end
142
+
143
+ @buffer_e = te
144
+
145
+ @buffer << string
146
+ end
147
+
148
+ def flush_string
149
+ if @monolithic
150
+ emit_start_tok
151
+ @monolithic = false
152
+ end
153
+
154
+ unless @buffer.empty?
155
+ @lexer.emit :tSTRING_CONTENT, @buffer, @buffer_s, @buffer_e
156
+
157
+ if words?
158
+ @lexer.emit :tSPACE, nil, @buffer_e, @buffer_e + 1
159
+ end
160
+
161
+ @buffer = ""
162
+ @buffer_s = nil
163
+ @buffer_e = nil
164
+ end
165
+ end
166
+
167
+ protected
168
+
169
+ def emit_start_tok
170
+ str_e = @heredoc_e || @str_s + @str_type.length
171
+ @lexer.emit @start_tok, @str_type, @str_s, str_e
172
+ end
173
+ end
174
+
175
+ end
@@ -0,0 +1,38 @@
1
+ require 'set'
2
+
3
+ module Parser
4
+
5
+ class StaticEnvironment
6
+ def initialize
7
+ @variables = Set[]
8
+ @stack = []
9
+ end
10
+
11
+ def extend_static
12
+ @stack.push @variables
13
+ @variables = Set[]
14
+
15
+ self
16
+ end
17
+
18
+ def extend_dynamic
19
+ @stack.push @variables
20
+ @variables = @variables.dup
21
+
22
+ self
23
+ end
24
+
25
+ def unextend
26
+ @variables = @stack.pop
27
+ end
28
+
29
+ def declare(name)
30
+ @variables.add name
31
+ end
32
+
33
+ def declared?(name)
34
+ @variables.include? name
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,3 @@
1
+ module Parser
2
+ class SyntaxError < StandardError; end
3
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "parser"
5
+ spec.version = "0.9.alpha1"
6
+ spec.authors = ["Peter Zotov"]
7
+ spec.email = ["whitequark@whitequark.org"]
8
+ spec.description = %q{A Ruby parser.}
9
+ spec.summary = spec.description
10
+ spec.homepage = "http://github.com/whitequark/parser"
11
+ spec.license = "MIT"
12
+
13
+ spec.files = `git ls-files`.split($/) + %w(
14
+ lib/parser/lexer.rb
15
+ )
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = '>= 1.9'
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "racc"
25
+ end
@@ -1,19 +1,65 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: ascii-8bit
3
2
 
4
3
  require 'minitest/autorun'
5
- require 'ruby_lexer'
6
- require 'ruby_parser_extras'
4
+ require 'parser/lexer'
7
5
 
8
- class TestRubyLexer < MiniTest::Unit::TestCase
9
- alias :deny :refute
6
+ class TestLexer < MiniTest::Unit::TestCase
7
+ def setup_lexer version
8
+ @lex = Parser::Lexer.new(version)
9
+ end
10
10
 
11
11
  def setup
12
12
  setup_lexer 18
13
13
  end
14
14
 
15
- def setup_lexer version
16
- @lex = RubyLexer.new(version)
15
+ def util_bad_token s, *args
16
+ assert_raises Parser::SyntaxError do
17
+ util_lex_token s, *args
18
+ end
19
+ end
20
+
21
+ def util_escape expected, input
22
+ @lex.reset
23
+ @lex.source = "%Q[\\#{input}]"
24
+
25
+ lex_token, lex_value = @lex.advance
26
+
27
+ if lex_value.respond_to?(:force_encoding)
28
+ lex_value.force_encoding('ASCII-8BIT')
29
+ end
30
+
31
+ assert_equal [:tSTRING, expected],
32
+ [lex_token, lex_value],
33
+ @lex.source
34
+ end
35
+
36
+ def util_escape_bad input
37
+ assert_raises Parser::SyntaxError do
38
+ @lex.state = :expr_beg
39
+ util_lex_token "%Q[\\#{input}]"
40
+ end
41
+ end
42
+
43
+ def util_lex_fname name, type, end_state = :expr_end
44
+ util_lex_token("def #{name} ", :kDEF, "def", type, name)
45
+
46
+ assert_equal end_state, @lex.state
47
+ end
48
+
49
+ def util_lex_token input, *args
50
+ @lex.reset(false)
51
+ @lex.source = input
52
+
53
+ until args.empty? do
54
+ token, value = args.shift(2)
55
+
56
+ lex_token, lex_value = @lex.advance
57
+ assert lex_token, "no more tokens"
58
+ assert_equal [token, value], [lex_token, lex_value], input
59
+ end
60
+
61
+ lex_token, lex_value = @lex.advance
62
+ refute lex_token, "must be empty, but had #{[lex_token, lex_value].inspect}"
17
63
  end
18
64
 
19
65
  def test_advance
@@ -23,7 +69,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
23
69
  assert token # blah
24
70
 
25
71
  token, = @lex.advance
26
- deny token # nada
72
+ refute token # nada
27
73
  end
28
74
 
29
75
  def test_read_escape
@@ -367,9 +413,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
367
413
  end
368
414
 
369
415
  def test_yylex_cvar_bad
370
- assert_raises RubyParser::SyntaxError do
371
- util_lex_token "@@1"
372
- end
416
+ util_bad_token "@@1"
373
417
  end
374
418
 
375
419
  def test_yylex_div
@@ -403,15 +447,16 @@ class TestRubyLexer < MiniTest::Unit::TestCase
403
447
  :kEND, "end")
404
448
  end
405
449
 
406
- def test_yylex_do_cond
407
- @lex.cond.push true
450
+ # TODO
451
+ # def test_yylex_do_cond
452
+ # @lex.cond.push true
408
453
 
409
- util_lex_token("x do 42 end",
410
- :tIDENTIFIER, "x",
411
- :kDO_COND, "do",
412
- :tINTEGER, 42,
413
- :kEND, "end")
414
- end
454
+ # util_lex_token("x do 42 end",
455
+ # :tIDENTIFIER, "x",
456
+ # :kDO_COND, "do",
457
+ # :tINTEGER, 42,
458
+ # :kEND, "end")
459
+ # end
415
460
 
416
461
  def test_yylex_dot
417
462
  util_lex_token ".", :tDOT, "."
@@ -868,25 +913,25 @@ class TestRubyLexer < MiniTest::Unit::TestCase
868
913
  end
869
914
 
870
915
  def test_yylex_question_eh_a__18
871
- @lex = RubyLexer.new 18
916
+ setup_lexer 18
872
917
 
873
918
  util_lex_token "?a", :tINTEGER, 97
874
919
  end
875
920
 
876
921
  def test_yylex_question_eh_a__19
877
- @lex = RubyLexer.new 19
922
+ setup_lexer 19
878
923
 
879
924
  util_lex_token '?a', :tSTRING, "a"
880
925
  end
881
926
 
882
927
  def test_yylex_question_eh_escape_M_escape_C__18
883
- @lex = RubyLexer.new 18
928
+ setup_lexer 18
884
929
 
885
930
  util_lex_token '?\M-\C-a', :tINTEGER, 129
886
931
  end
887
932
 
888
933
  def test_yylex_question_eh_escape_M_escape_C__19
889
- @lex = RubyLexer.new 19
934
+ setup_lexer 19
890
935
 
891
936
  util_lex_token '?\M-\C-a', :tSTRING, "\M-\C-a"
892
937
  end
@@ -1214,13 +1259,13 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1214
1259
  end
1215
1260
 
1216
1261
  def test_yylex_question__18
1217
- @lex = RubyLexer.new 18
1262
+ setup_lexer 18
1218
1263
 
1219
1264
  util_lex_token "?*", :tINTEGER, 42
1220
1265
  end
1221
1266
 
1222
1267
  def test_yylex_question__19
1223
- @lex = RubyLexer.new 19
1268
+ setup_lexer 19
1224
1269
 
1225
1270
  util_lex_token "?*", :tSTRING, "*"
1226
1271
  end
@@ -1239,7 +1284,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1239
1284
  end
1240
1285
 
1241
1286
  def test_yylex_question_ws_backslashed__18
1242
- @lex = RubyLexer.new 18
1287
+ setup_lexer 18
1243
1288
 
1244
1289
  @lex.state = :expr_beg
1245
1290
  util_lex_token "?\\ ", :tINTEGER, 32
@@ -1256,7 +1301,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1256
1301
  end
1257
1302
 
1258
1303
  def test_yylex_question_ws_backslashed__19
1259
- @lex = RubyLexer.new 19
1304
+ setup_lexer 19
1260
1305
 
1261
1306
  @lex.state = :expr_beg
1262
1307
  util_lex_token "?\\ ", :tSTRING, " "
@@ -1887,8 +1932,9 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1887
1932
 
1888
1933
  def test_yylex_underscore_end
1889
1934
  @lex.source = "__END__\n"
1935
+
1890
1936
  tok, = @lex.advance
1891
- deny tok
1937
+ refute tok
1892
1938
  end
1893
1939
 
1894
1940
  def test_yylex_uplus
@@ -1916,8 +1962,9 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1916
1962
  end
1917
1963
 
1918
1964
  def test_yylex_static_env
1919
- env = RubyParserStuff::Environment.new
1920
- env[:a] = :lvar
1965
+ env = Parser::StaticEnvironment.new
1966
+ env.declare :a
1967
+
1921
1968
  @lex.static_env = env
1922
1969
 
1923
1970
  util_lex_token("a [42]",
@@ -1926,103 +1973,4 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1926
1973
  :tINTEGER, 42,
1927
1974
  :tRBRACK, "]")
1928
1975
  end
1929
-
1930
- def test_zbug_float_in_decl
1931
- util_lex_token("def initialize(u = ",
1932
- :kDEF, "def",
1933
- :tIDENTIFIER, "initialize",
1934
- :tLPAREN2, "(",
1935
- :tIDENTIFIER, "u",
1936
- :tEQL, "=")
1937
-
1938
- assert_equal :expr_beg, @lex.state
1939
-
1940
- util_lex_token("0.0, s = 0.0",
1941
- :tFLOAT, 0.0,
1942
- :tCOMMA, ',',
1943
- :tIDENTIFIER, "s",
1944
- :tEQL, "=",
1945
- :tFLOAT, 0.0)
1946
- end
1947
-
1948
- def test_zbug_id_equals
1949
- util_lex_token("a =",
1950
- :tIDENTIFIER, "a",
1951
- :tEQL, "=")
1952
-
1953
- assert_equal :expr_beg, @lex.state
1954
-
1955
- util_lex_token("0.0",
1956
- :tFLOAT, 0.0)
1957
- end
1958
-
1959
- def test_zbug_no_spaces_in_decl
1960
- util_lex_token("def initialize(u=",
1961
- :kDEF, "def",
1962
- :tIDENTIFIER, "initialize",
1963
- :tLPAREN2, "(",
1964
- :tIDENTIFIER, "u",
1965
- :tEQL, "=")
1966
-
1967
- assert_equal :expr_beg, @lex.state
1968
-
1969
- util_lex_token("0.0,s=0.0",
1970
- :tFLOAT, 0.0,
1971
- :tCOMMA, ",",
1972
- :tIDENTIFIER, "s",
1973
- :tEQL, "=",
1974
- :tFLOAT, 0.0)
1975
- end
1976
-
1977
- ############################################################
1978
-
1979
- def util_bad_token s, *args
1980
- assert_raises RubyParser::SyntaxError do
1981
- util_lex_token s, *args
1982
- end
1983
- end
1984
-
1985
- def util_escape expected, input
1986
- @lex.reset
1987
- @lex.source = "%Q[\\#{input}]"
1988
-
1989
- lex_token, lex_value = @lex.advance
1990
-
1991
- if lex_value.respond_to?(:force_encoding)
1992
- lex_value.force_encoding('ASCII-8BIT')
1993
- end
1994
-
1995
- assert_equal [:tSTRING, expected],
1996
- [lex_token, lex_value],
1997
- @lex.source
1998
- end
1999
-
2000
- def util_escape_bad input
2001
- assert_raises RubyParser::SyntaxError do
2002
- @lex.state = :expr_beg
2003
- util_lex_token "%Q[\\#{input}]"
2004
- end
2005
- end
2006
-
2007
- def util_lex_fname name, type, end_state = :expr_end
2008
- util_lex_token("def #{name} ", :kDEF, "def", type, name)
2009
-
2010
- assert_equal end_state, @lex.state
2011
- end
2012
-
2013
- def util_lex_token input, *args
2014
- @lex.reset(false)
2015
- @lex.source = input
2016
-
2017
- until args.empty? do
2018
- token, value = args.shift(2)
2019
-
2020
- lex_token, lex_value = @lex.advance
2021
- assert lex_token, "no more tokens"
2022
- assert_equal [token, value], [lex_token, lex_value], input
2023
- end
2024
-
2025
- lex_token, lex_value = @lex.advance
2026
- deny lex_token, "must be empty, but had #{[lex_token, lex_value].inspect}"
2027
- end
2028
1976
  end