ruby_parser 3.3.0 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,189 @@
1
+ # encoding: UTF-8
2
+ # TODO: this needs to be put on the first line
3
+ #
4
+ # new_ruby_parser.rex
5
+ # lexical scanner definition for ruby
6
+
7
+ class RubyLexer
8
+
9
+ macro
10
+
11
+ IDENT /^#{IDENT_CHAR}+/o
12
+
13
+ ESC /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
14
+ SIMPLE_STRING /(#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
15
+ SSTRING /(\\.|[^\'])*/
16
+
17
+ INT_DEC /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0d[0-9_]+)/i
18
+ INT_HEX /[+]?0x[a-f0-9_]+/i
19
+ INT_BIN /[+]?0b[01_]+/i
20
+ INT_OCT /[+]?0o?[0-7_]+|0o/i
21
+ FLOAT /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?\b|[+]?[\d_]+e[+-]?[\d_]+\b/i
22
+ INT_DEC2 /[+]?\d[0-9_]*(?![e])/i
23
+
24
+ NUM_BAD /[+]?0[xbd]\b/i
25
+ INT_OCT_BAD /[+]?0o?[0-7_]*[89]/i
26
+ FLOAT_BAD /[+]?\d[\d_]*_(e|\.)/i
27
+
28
+ start
29
+
30
+ return process_string if lex_strterm
31
+
32
+ self.command_state = self.command_start
33
+ self.command_start = false
34
+ self.space_seen = false
35
+ self.last_state = lex_state
36
+
37
+ rule
38
+
39
+ # [:state] pattern [actions]
40
+
41
+ # \s - \n + \v
42
+ /[\ \t\r\f\v]/ { self.space_seen = true; next }
43
+
44
+ /\n|\#/ process_newline_or_comment
45
+
46
+ /[\]\)\}]/ process_bracing
47
+ /\!/ process_bang
48
+
49
+ /\.\.\.?|,|![=~]?/ { result :expr_beg, TOKENS[text], text }
50
+
51
+ /\.\d/ { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
52
+
53
+ /\./ { result :expr_dot, :tDOT, "." }
54
+
55
+ /\(/ process_paren
56
+
57
+ /\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/ { result arg_state, TOKENS[text], text }
58
+
59
+ bol? /\=begin(?=\s)/ process_begin
60
+ /\=(?=begin\b)/ { result arg_state, TOKENS[text], text }
61
+
62
+ /\"(#{SIMPLE_STRING})\"/o { result :expr_end, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
63
+ /\"/ { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
64
+
65
+ /\@\@?\d/ { rb_compile_error "`#{text}` is not allowed as a variable name" }
66
+ /\@\@?#{IDENT_CHAR}+/o process_ivar
67
+
68
+ # /\:\:/ : happy? { result :expr_beg, :tCOLON3, text }
69
+ # | { result :expr_beg, :tCOLON3, text }
70
+ # /\:/ : trinary? { result :expr_beg, :tCOLON, text }
71
+ # | /\'/ { string STR_SSYM; result :expr_fname, :tSYMBEG, text }
72
+ # | /\"/ { string STR_DSYM; result :expr_fname, :tSYMBEG, text }
73
+
74
+ not_end? /:([a-zA-Z_]#{IDENT_CHAR}*(?:[?]|[!](?!=)|=(?==>)|=(?![=>]))?)/o process_symbol
75
+ not_end? /\:\"(#{SIMPLE_STRING})\"/o process_symbol
76
+ not_end? /\:\'(#{SSTRING})\'/o process_symbol
77
+
78
+ /\:\:/ process_colon2
79
+ /\:/ process_colon1
80
+
81
+ # numbers:
82
+
83
+ # : /\d/
84
+ # | /#{NUM_BAD}/o { rb_compile_error "Invalid numeric format" }
85
+ # | /#{INT_DEC}/o { int_with_base 10 }
86
+ # | /#{INT_HEX}/o { int_with_base 16 }
87
+ # | /#{INT_BIN}/o { int_with_base 2 }
88
+ # | /#{INT_OCT_BAD}/o { rb_compile_error "Illegal octal digit." }
89
+ # | /#{INT_OCT}/o { int_with_base 8 }
90
+ # | /#{FLOAT_BAD}/o { rb_compile_error "Trailing '_' in number." }
91
+ # | /#{FLOAT}/o process_float
92
+ # | /#{INT_DEC2}/o { int_with_base 10 }
93
+
94
+ /->/ { result :expr_endfn, :tLAMBDA, nil }
95
+
96
+ /[+-]/ process_plus_minus
97
+
98
+ /#{NUM_BAD}/o { rb_compile_error "Invalid numeric format" }
99
+ /#{INT_DEC}/o { int_with_base 10 }
100
+ /#{INT_HEX}/o { int_with_base 16 }
101
+ /#{INT_BIN}/o { int_with_base 2 }
102
+ /#{INT_OCT_BAD}/o { rb_compile_error "Illegal octal digit." }
103
+ /#{INT_OCT}/o { int_with_base 8 }
104
+ /#{FLOAT_BAD}/o { rb_compile_error "Trailing '_' in number." }
105
+ /#{FLOAT}/o process_float
106
+ /#{INT_DEC2}/o { int_with_base 10 }
107
+ /[0-9]/ { rb_compile_error "Bad number format" }
108
+
109
+ /\[/ process_square_bracket
110
+
111
+ /\'#{SSTRING}\'/o { result :expr_end, :tSTRING, matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") } # " stupid emacs
112
+
113
+ /\|\|\=/ { result :expr_beg, :tOP_ASGN, "||" }
114
+ /\|\|/ { result :expr_beg, :tOROP, "||" }
115
+ /\|\=/ { result :expr_beg, :tOP_ASGN, "|" }
116
+ /\|/ { result :arg_state, :tPIPE, "|" }
117
+
118
+ /\{/ process_curly_brace
119
+
120
+ /\*\*=/ { result :expr_beg, :tOP_ASGN, "**" }
121
+ /\*\*/ { result(:arg_state, space_vs_beginning(:tDSTAR, :tDSTAR, :tPOW), "**") }
122
+ /\*\=/ { result(:expr_beg, :tOP_ASGN, "*") }
123
+ /\*/ { result(:arg_state, space_vs_beginning(:tSTAR, :tSTAR, :tSTAR2), "*") }
124
+
125
+ /\<\=\>/ { result :arg_state, :tCMP, "<=>" }
126
+ /\<\=/ { result :arg_state, :tLEQ, "<=" }
127
+ /\<\<\=/ { result :arg_state, :tOP_ASGN, "<<" }
128
+ /\<\</ process_lchevron
129
+ /\</ { result :arg_state, :tLT, "<" }
130
+
131
+ # : /\>/
132
+ # | /\>\=/ { result :arg_state, :tGEQ, ">=" }
133
+ # | /\>\>=/ { result :arg_state, :tOP_ASGN, ">>" }
134
+ # | /\>\>/ { result :arg_state, :tRSHFT, ">>" }
135
+ # | /\>/ { result :arg_state, :tGT, ">" }
136
+
137
+ /\>\=/ { result :arg_state, :tGEQ, ">=" }
138
+ /\>\>=/ { result :arg_state, :tOP_ASGN, ">>" }
139
+ /\>\>/ { result :arg_state, :tRSHFT, ">>" }
140
+ /\>/ { result :arg_state, :tGT, ">" }
141
+
142
+ /\`/ process_backtick
143
+
144
+ # /\`/ : expr_fname? { result(:expr_end, :tBACK_REF2, "`") }
145
+ # | expr_dot? { result((command_state ? :expr_cmdarg : :expr_arg), :tBACK_REF2, "`")
146
+ # | { string STR_XQUOTE, '`'; result(nil, :tXSTRING_BEG, "`") }
147
+
148
+ /\?/ process_questionmark
149
+
150
+ /\&\&\=/ { result(:expr_beg, :tOP_ASGN, "&&") }
151
+ /\&\&/ { result(:expr_beg, :tANDOP, "&&") }
152
+ /\&\=/ { result(:expr_beg, :tOP_ASGN, "&" ) }
153
+ /\&/ process_amper
154
+
155
+ /\// process_slash
156
+
157
+ /\^=/ { result(:expr_beg, :tOP_ASGN, "^") }
158
+ /\^/ { result(:arg_state, :tCARET, "^") }
159
+
160
+ /\;/ { self.command_start = true; result(:expr_beg, :tSEMI, ";") }
161
+
162
+ in_arg_state? /\~@/ { result(:arg_state, :tTILDE, "~") }
163
+ /\~/ { result(:arg_state, :tTILDE, "~") }
164
+
165
+ /\\\r?\n/ { self.lineno += 1; self.space_seen = true; next }
166
+ /\\/ { rb_compile_error "bare backslash only allowed before newline" }
167
+
168
+ /\%/ process_percent
169
+
170
+ /\$_\w+/ process_gvar
171
+ /\$_/ process_gvar
172
+ /\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/ process_gvar
173
+ in_fname? /\$([\&\`\'\+])/ process_gvar
174
+ /\$([\&\`\'\+])/ process_backref
175
+ in_fname? /\$([1-9]\d*)/ process_gvar
176
+ /\$([1-9]\d*)/ process_nthref
177
+ /\$0/ process_gvar
178
+ /\$\W|\$\z/ process_gvar_oddity
179
+ /\$\w+/ process_gvar
180
+
181
+ /\_/ process_underscore
182
+
183
+ /#{IDENT}/o process_token
184
+
185
+ /\004|\032|\000|\Z/ { [RubyLexer::EOF, RubyLexer::EOF] }
186
+
187
+ /./ { rb_compile_error "Invalid char #{text.inspect} in expression" }
188
+
189
+ end
@@ -0,0 +1,263 @@
1
+ #--
2
+ # This file is automatically generated. Do not modify it.
3
+ # Generated by: oedipus_lex version 2.1.0.
4
+ # Source: lib/ruby_lexer.rex
5
+ #++
6
+
7
+ # encoding: UTF-8
8
+ # TODO: this needs to be put on the first line
9
+ #
10
+ # new_ruby_parser.rex
11
+ # lexical scanner definition for ruby
12
+
13
+ class RubyLexer
14
+ require 'strscan'
15
+
16
+ IDENT = /^#{IDENT_CHAR}+/o
17
+ ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
18
+ SIMPLE_STRING = /(#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
19
+ SSTRING = /(\\.|[^\'])*/
20
+ INT_DEC = /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0d[0-9_]+)/i
21
+ INT_HEX = /[+]?0x[a-f0-9_]+/i
22
+ INT_BIN = /[+]?0b[01_]+/i
23
+ INT_OCT = /[+]?0o?[0-7_]+|0o/i
24
+ FLOAT = /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?\b|[+]?[\d_]+e[+-]?[\d_]+\b/i
25
+ INT_DEC2 = /[+]?\d[0-9_]*(?![e])/i
26
+ NUM_BAD = /[+]?0[xbd]\b/i
27
+ INT_OCT_BAD = /[+]?0o?[0-7_]*[89]/i
28
+ FLOAT_BAD = /[+]?\d[\d_]*_(e|\.)/i
29
+
30
+ class ScanError < StandardError ; end
31
+
32
+ attr_accessor :lineno
33
+ attr_accessor :filename
34
+ attr_accessor :ss
35
+ attr_accessor :state
36
+
37
+ alias :match :ss
38
+
39
+ def matches
40
+ m = (1..9).map { |i| ss[i] }
41
+ m.pop until m[-1] or m.empty?
42
+ m
43
+ end
44
+
45
+ def action
46
+ yield
47
+ end
48
+
49
+ def scanner_class
50
+ StringScanner
51
+ end unless instance_methods(false).map(&:to_s).include?("scanner_class")
52
+
53
+ def parse str
54
+ self.ss = scanner_class.new str
55
+ self.lineno = 1
56
+ self.state ||= nil
57
+
58
+ do_parse
59
+ end
60
+
61
+ def parse_file path
62
+ self.filename = path
63
+ open path do |f|
64
+ parse f.read
65
+ end
66
+ end
67
+
68
+ def next_token
69
+ return process_string if lex_strterm
70
+ self.command_state = self.command_start
71
+ self.command_start = false
72
+ self.space_seen = false
73
+ self.last_state = lex_state
74
+
75
+ token = nil
76
+
77
+ until ss.eos? or token do
78
+ token =
79
+ case state
80
+ when nil then
81
+ case
82
+ when text = ss.scan(/[\ \t\r\f\v]/) then
83
+ action { self.space_seen = true; next }
84
+ when text = ss.scan(/\n|\#/) then
85
+ process_newline_or_comment text
86
+ when text = ss.scan(/[\]\)\}]/) then
87
+ process_bracing text
88
+ when text = ss.scan(/\!/) then
89
+ process_bang text
90
+ when text = ss.scan(/\.\.\.?|,|![=~]?/) then
91
+ action { result :expr_beg, TOKENS[text], text }
92
+ when text = ss.scan(/\.\d/) then
93
+ action { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
94
+ when text = ss.scan(/\./) then
95
+ action { result :expr_dot, :tDOT, "." }
96
+ when text = ss.scan(/\(/) then
97
+ process_paren text
98
+ when text = ss.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
99
+ action { result arg_state, TOKENS[text], text }
100
+ when bol? && (text = ss.scan(/\=begin(?=\s)/)) then
101
+ process_begin text
102
+ when text = ss.scan(/\=(?=begin\b)/) then
103
+ action { result arg_state, TOKENS[text], text }
104
+ when text = ss.scan(/\"(#{SIMPLE_STRING})\"/o) then
105
+ action { result :expr_end, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
106
+ when text = ss.scan(/\"/) then
107
+ action { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
108
+ when text = ss.scan(/\@\@?\d/) then
109
+ action { rb_compile_error "`#{text}` is not allowed as a variable name" }
110
+ when text = ss.scan(/\@\@?#{IDENT_CHAR}+/o) then
111
+ process_ivar text
112
+ when not_end? && (text = ss.scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?]|[!](?!=)|=(?==>)|=(?![=>]))?)/o)) then
113
+ process_symbol text
114
+ when not_end? && (text = ss.scan(/\:\"(#{SIMPLE_STRING})\"/o)) then
115
+ process_symbol text
116
+ when not_end? && (text = ss.scan(/\:\'(#{SSTRING})\'/o)) then
117
+ process_symbol text
118
+ when text = ss.scan(/\:\:/) then
119
+ process_colon2 text
120
+ when text = ss.scan(/\:/) then
121
+ process_colon1 text
122
+ when text = ss.scan(/->/) then
123
+ action { result :expr_endfn, :tLAMBDA, nil }
124
+ when text = ss.scan(/[+-]/) then
125
+ process_plus_minus text
126
+ when text = ss.scan(/#{NUM_BAD}/o) then
127
+ action { rb_compile_error "Invalid numeric format" }
128
+ when text = ss.scan(/#{INT_DEC}/o) then
129
+ action { int_with_base 10 }
130
+ when text = ss.scan(/#{INT_HEX}/o) then
131
+ action { int_with_base 16 }
132
+ when text = ss.scan(/#{INT_BIN}/o) then
133
+ action { int_with_base 2 }
134
+ when text = ss.scan(/#{INT_OCT_BAD}/o) then
135
+ action { rb_compile_error "Illegal octal digit." }
136
+ when text = ss.scan(/#{INT_OCT}/o) then
137
+ action { int_with_base 8 }
138
+ when text = ss.scan(/#{FLOAT_BAD}/o) then
139
+ action { rb_compile_error "Trailing '_' in number." }
140
+ when text = ss.scan(/#{FLOAT}/o) then
141
+ process_float text
142
+ when text = ss.scan(/#{INT_DEC2}/o) then
143
+ action { int_with_base 10 }
144
+ when text = ss.scan(/[0-9]/) then
145
+ action { rb_compile_error "Bad number format" }
146
+ when text = ss.scan(/\[/) then
147
+ process_square_bracket text
148
+ when text = ss.scan(/\'#{SSTRING}\'/o) then
149
+ action { result :expr_end, :tSTRING, matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") } # " stupid emacs
150
+ when text = ss.scan(/\|\|\=/) then
151
+ action { result :expr_beg, :tOP_ASGN, "||" }
152
+ when text = ss.scan(/\|\|/) then
153
+ action { result :expr_beg, :tOROP, "||" }
154
+ when text = ss.scan(/\|\=/) then
155
+ action { result :expr_beg, :tOP_ASGN, "|" }
156
+ when text = ss.scan(/\|/) then
157
+ action { result :arg_state, :tPIPE, "|" }
158
+ when text = ss.scan(/\{/) then
159
+ process_curly_brace text
160
+ when text = ss.scan(/\*\*=/) then
161
+ action { result :expr_beg, :tOP_ASGN, "**" }
162
+ when text = ss.scan(/\*\*/) then
163
+ action { result(:arg_state, space_vs_beginning(:tDSTAR, :tDSTAR, :tPOW), "**") }
164
+ when text = ss.scan(/\*\=/) then
165
+ action { result(:expr_beg, :tOP_ASGN, "*") }
166
+ when text = ss.scan(/\*/) then
167
+ action { result(:arg_state, space_vs_beginning(:tSTAR, :tSTAR, :tSTAR2), "*") }
168
+ when text = ss.scan(/\<\=\>/) then
169
+ action { result :arg_state, :tCMP, "<=>" }
170
+ when text = ss.scan(/\<\=/) then
171
+ action { result :arg_state, :tLEQ, "<=" }
172
+ when text = ss.scan(/\<\<\=/) then
173
+ action { result :arg_state, :tOP_ASGN, "<<" }
174
+ when text = ss.scan(/\<\</) then
175
+ process_lchevron text
176
+ when text = ss.scan(/\</) then
177
+ action { result :arg_state, :tLT, "<" }
178
+ when text = ss.scan(/\>\=/) then
179
+ action { result :arg_state, :tGEQ, ">=" }
180
+ when text = ss.scan(/\>\>=/) then
181
+ action { result :arg_state, :tOP_ASGN, ">>" }
182
+ when text = ss.scan(/\>\>/) then
183
+ action { result :arg_state, :tRSHFT, ">>" }
184
+ when text = ss.scan(/\>/) then
185
+ action { result :arg_state, :tGT, ">" }
186
+ when text = ss.scan(/\`/) then
187
+ process_backtick text
188
+ when text = ss.scan(/\?/) then
189
+ process_questionmark text
190
+ when text = ss.scan(/\&\&\=/) then
191
+ action { result(:expr_beg, :tOP_ASGN, "&&") }
192
+ when text = ss.scan(/\&\&/) then
193
+ action { result(:expr_beg, :tANDOP, "&&") }
194
+ when text = ss.scan(/\&\=/) then
195
+ action { result(:expr_beg, :tOP_ASGN, "&" ) }
196
+ when text = ss.scan(/\&/) then
197
+ process_amper text
198
+ when text = ss.scan(/\//) then
199
+ process_slash text
200
+ when text = ss.scan(/\^=/) then
201
+ action { result(:expr_beg, :tOP_ASGN, "^") }
202
+ when text = ss.scan(/\^/) then
203
+ action { result(:arg_state, :tCARET, "^") }
204
+ when text = ss.scan(/\;/) then
205
+ action { self.command_start = true; result(:expr_beg, :tSEMI, ";") }
206
+ when in_arg_state? && (text = ss.scan(/\~@/)) then
207
+ action { result(:arg_state, :tTILDE, "~") }
208
+ when text = ss.scan(/\~/) then
209
+ action { result(:arg_state, :tTILDE, "~") }
210
+ when text = ss.scan(/\\\r?\n/) then
211
+ action { self.lineno += 1; self.space_seen = true; next }
212
+ when text = ss.scan(/\\/) then
213
+ action { rb_compile_error "bare backslash only allowed before newline" }
214
+ when text = ss.scan(/\%/) then
215
+ process_percent text
216
+ when text = ss.scan(/\$_\w+/) then
217
+ process_gvar text
218
+ when text = ss.scan(/\$_/) then
219
+ process_gvar text
220
+ when text = ss.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
221
+ process_gvar text
222
+ when in_fname? && (text = ss.scan(/\$([\&\`\'\+])/)) then
223
+ process_gvar text
224
+ when text = ss.scan(/\$([\&\`\'\+])/) then
225
+ process_backref text
226
+ when in_fname? && (text = ss.scan(/\$([1-9]\d*)/)) then
227
+ process_gvar text
228
+ when text = ss.scan(/\$([1-9]\d*)/) then
229
+ process_nthref text
230
+ when text = ss.scan(/\$0/) then
231
+ process_gvar text
232
+ when text = ss.scan(/\$\W|\$\z/) then
233
+ process_gvar_oddity text
234
+ when text = ss.scan(/\$\w+/) then
235
+ process_gvar text
236
+ when text = ss.scan(/\_/) then
237
+ process_underscore text
238
+ when text = ss.scan(/#{IDENT}/o) then
239
+ process_token text
240
+ when text = ss.scan(/\004|\032|\000|\Z/) then
241
+ action { [RubyLexer::EOF, RubyLexer::EOF] }
242
+ when text = ss.scan(/./) then
243
+ action { rb_compile_error "Invalid char #{text.inspect} in expression" }
244
+ else
245
+ text = ss.string[ss.pos .. -1]
246
+ raise ScanError, "can not match (#{state.inspect}): '#{text}'"
247
+ end
248
+ else
249
+ raise ScanError, "undefined state: '#{state}'"
250
+ end # token = case state
251
+
252
+ next unless token # allow functions to trigger redo w/ nil
253
+ end # while
254
+
255
+ raise "bad lexical result: #{token.inspect}" unless
256
+ token.nil? || (Array === token && token.size >= 2)
257
+
258
+ # auto-switch state
259
+ self.state = token.last if token && token.first == :state
260
+
261
+ token
262
+ end # def _next_token
263
+ end # class