ruby_parser 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,189 @@
1
+ # encoding: UTF-8
2
+ # TODO: this needs to be put on the first line
3
+ #
4
+ # new_ruby_parser.rex
5
+ # lexical scanner definition for ruby
6
+
7
+ class RubyLexer
8
+
9
+ macro
10
+
11
+ IDENT /^#{IDENT_CHAR}+/o
12
+
13
+ ESC /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
14
+ SIMPLE_STRING /(#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
15
+ SSTRING /(\\.|[^\'])*/
16
+
17
+ INT_DEC /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0d[0-9_]+)/i
18
+ INT_HEX /[+]?0x[a-f0-9_]+/i
19
+ INT_BIN /[+]?0b[01_]+/i
20
+ INT_OCT /[+]?0o?[0-7_]+|0o/i
21
+ FLOAT /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?\b|[+]?[\d_]+e[+-]?[\d_]+\b/i
22
+ INT_DEC2 /[+]?\d[0-9_]*(?![e])/i
23
+
24
+ NUM_BAD /[+]?0[xbd]\b/i
25
+ INT_OCT_BAD /[+]?0o?[0-7_]*[89]/i
26
+ FLOAT_BAD /[+]?\d[\d_]*_(e|\.)/i
27
+
28
+ start
29
+
30
+ return process_string if lex_strterm
31
+
32
+ self.command_state = self.command_start
33
+ self.command_start = false
34
+ self.space_seen = false
35
+ self.last_state = lex_state
36
+
37
+ rule
38
+
39
+ # [:state] pattern [actions]
40
+
41
+ # \s - \n + \v
42
+ /[\ \t\r\f\v]/ { self.space_seen = true; next }
43
+
44
+ /\n|\#/ process_newline_or_comment
45
+
46
+ /[\]\)\}]/ process_bracing
47
+ /\!/ process_bang
48
+
49
+ /\.\.\.?|,|![=~]?/ { result :expr_beg, TOKENS[text], text }
50
+
51
+ /\.\d/ { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
52
+
53
+ /\./ { result :expr_dot, :tDOT, "." }
54
+
55
+ /\(/ process_paren
56
+
57
+ /\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/ { result arg_state, TOKENS[text], text }
58
+
59
+ bol? /\=begin(?=\s)/ process_begin
60
+ /\=(?=begin\b)/ { result arg_state, TOKENS[text], text }
61
+
62
+ /\"(#{SIMPLE_STRING})\"/o { result :expr_end, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
63
+ /\"/ { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
64
+
65
+ /\@\@?\d/ { rb_compile_error "`#{text}` is not allowed as a variable name" }
66
+ /\@\@?#{IDENT_CHAR}+/o process_ivar
67
+
68
+ # /\:\:/ : happy? { result :expr_beg, :tCOLON3, text }
69
+ # | { result :expr_beg, :tCOLON3, text }
70
+ # /\:/ : trinary? { result :expr_beg, :tCOLON, text }
71
+ # | /\'/ { string STR_SSYM; result :expr_fname, :tSYMBEG, text }
72
+ # | /\"/ { string STR_DSYM; result :expr_fname, :tSYMBEG, text }
73
+
74
+ not_end? /:([a-zA-Z_]#{IDENT_CHAR}*(?:[?]|[!](?!=)|=(?==>)|=(?![=>]))?)/o process_symbol
75
+ not_end? /\:\"(#{SIMPLE_STRING})\"/o process_symbol
76
+ not_end? /\:\'(#{SSTRING})\'/o process_symbol
77
+
78
+ /\:\:/ process_colon2
79
+ /\:/ process_colon1
80
+
81
+ # numbers:
82
+
83
+ # : /\d/
84
+ # | /#{NUM_BAD}/o { rb_compile_error "Invalid numeric format" }
85
+ # | /#{INT_DEC}/o { int_with_base 10 }
86
+ # | /#{INT_HEX}/o { int_with_base 16 }
87
+ # | /#{INT_BIN}/o { int_with_base 2 }
88
+ # | /#{INT_OCT_BAD}/o { rb_compile_error "Illegal octal digit." }
89
+ # | /#{INT_OCT}/o { int_with_base 8 }
90
+ # | /#{FLOAT_BAD}/o { rb_compile_error "Trailing '_' in number." }
91
+ # | /#{FLOAT}/o process_float
92
+ # | /#{INT_DEC2}/o { int_with_base 10 }
93
+
94
+ /->/ { result :expr_endfn, :tLAMBDA, nil }
95
+
96
+ /[+-]/ process_plus_minus
97
+
98
+ /#{NUM_BAD}/o { rb_compile_error "Invalid numeric format" }
99
+ /#{INT_DEC}/o { int_with_base 10 }
100
+ /#{INT_HEX}/o { int_with_base 16 }
101
+ /#{INT_BIN}/o { int_with_base 2 }
102
+ /#{INT_OCT_BAD}/o { rb_compile_error "Illegal octal digit." }
103
+ /#{INT_OCT}/o { int_with_base 8 }
104
+ /#{FLOAT_BAD}/o { rb_compile_error "Trailing '_' in number." }
105
+ /#{FLOAT}/o process_float
106
+ /#{INT_DEC2}/o { int_with_base 10 }
107
+ /[0-9]/ { rb_compile_error "Bad number format" }
108
+
109
+ /\[/ process_square_bracket
110
+
111
+ /\'#{SSTRING}\'/o { result :expr_end, :tSTRING, matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") } # " stupid emacs
112
+
113
+ /\|\|\=/ { result :expr_beg, :tOP_ASGN, "||" }
114
+ /\|\|/ { result :expr_beg, :tOROP, "||" }
115
+ /\|\=/ { result :expr_beg, :tOP_ASGN, "|" }
116
+ /\|/ { result :arg_state, :tPIPE, "|" }
117
+
118
+ /\{/ process_curly_brace
119
+
120
+ /\*\*=/ { result :expr_beg, :tOP_ASGN, "**" }
121
+ /\*\*/ { result(:arg_state, space_vs_beginning(:tDSTAR, :tDSTAR, :tPOW), "**") }
122
+ /\*\=/ { result(:expr_beg, :tOP_ASGN, "*") }
123
+ /\*/ { result(:arg_state, space_vs_beginning(:tSTAR, :tSTAR, :tSTAR2), "*") }
124
+
125
+ /\<\=\>/ { result :arg_state, :tCMP, "<=>" }
126
+ /\<\=/ { result :arg_state, :tLEQ, "<=" }
127
+ /\<\<\=/ { result :arg_state, :tOP_ASGN, "<<" }
128
+ /\<\</ process_lchevron
129
+ /\</ { result :arg_state, :tLT, "<" }
130
+
131
+ # : /\>/
132
+ # | /\>\=/ { result :arg_state, :tGEQ, ">=" }
133
+ # | /\>\>=/ { result :arg_state, :tOP_ASGN, ">>" }
134
+ # | /\>\>/ { result :arg_state, :tRSHFT, ">>" }
135
+ # | /\>/ { result :arg_state, :tGT, ">" }
136
+
137
+ /\>\=/ { result :arg_state, :tGEQ, ">=" }
138
+ /\>\>=/ { result :arg_state, :tOP_ASGN, ">>" }
139
+ /\>\>/ { result :arg_state, :tRSHFT, ">>" }
140
+ /\>/ { result :arg_state, :tGT, ">" }
141
+
142
+ /\`/ process_backtick
143
+
144
+ # /\`/ : expr_fname? { result(:expr_end, :tBACK_REF2, "`") }
145
+ # | expr_dot? { result((command_state ? :expr_cmdarg : :expr_arg), :tBACK_REF2, "`")
146
+ # | { string STR_XQUOTE, '`'; result(nil, :tXSTRING_BEG, "`") }
147
+
148
+ /\?/ process_questionmark
149
+
150
+ /\&\&\=/ { result(:expr_beg, :tOP_ASGN, "&&") }
151
+ /\&\&/ { result(:expr_beg, :tANDOP, "&&") }
152
+ /\&\=/ { result(:expr_beg, :tOP_ASGN, "&" ) }
153
+ /\&/ process_amper
154
+
155
+ /\// process_slash
156
+
157
+ /\^=/ { result(:expr_beg, :tOP_ASGN, "^") }
158
+ /\^/ { result(:arg_state, :tCARET, "^") }
159
+
160
+ /\;/ { self.command_start = true; result(:expr_beg, :tSEMI, ";") }
161
+
162
+ in_arg_state? /\~@/ { result(:arg_state, :tTILDE, "~") }
163
+ /\~/ { result(:arg_state, :tTILDE, "~") }
164
+
165
+ /\\\r?\n/ { self.lineno += 1; self.space_seen = true; next }
166
+ /\\/ { rb_compile_error "bare backslash only allowed before newline" }
167
+
168
+ /\%/ process_percent
169
+
170
+ /\$_\w+/ process_gvar
171
+ /\$_/ process_gvar
172
+ /\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/ process_gvar
173
+ in_fname? /\$([\&\`\'\+])/ process_gvar
174
+ /\$([\&\`\'\+])/ process_backref
175
+ in_fname? /\$([1-9]\d*)/ process_gvar
176
+ /\$([1-9]\d*)/ process_nthref
177
+ /\$0/ process_gvar
178
+ /\$\W|\$\z/ process_gvar_oddity
179
+ /\$\w+/ process_gvar
180
+
181
+ /\_/ process_underscore
182
+
183
+ /#{IDENT}/o process_token
184
+
185
+ /\004|\032|\000|\Z/ { [RubyLexer::EOF, RubyLexer::EOF] }
186
+
187
+ /./ { rb_compile_error "Invalid char #{text.inspect} in expression" }
188
+
189
+ end
@@ -0,0 +1,263 @@
1
+ #--
2
+ # This file is automatically generated. Do not modify it.
3
+ # Generated by: oedipus_lex version 2.1.0.
4
+ # Source: lib/ruby_lexer.rex
5
+ #++
6
+
7
+ # encoding: UTF-8
8
+ # TODO: this needs to be put on the first line
9
+ #
10
+ # new_ruby_parser.rex
11
+ # lexical scanner definition for ruby
12
+
13
+ class RubyLexer
14
+ require 'strscan'
15
+
16
+ IDENT = /^#{IDENT_CHAR}+/o
17
+ ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
18
+ SIMPLE_STRING = /(#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
19
+ SSTRING = /(\\.|[^\'])*/
20
+ INT_DEC = /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0d[0-9_]+)/i
21
+ INT_HEX = /[+]?0x[a-f0-9_]+/i
22
+ INT_BIN = /[+]?0b[01_]+/i
23
+ INT_OCT = /[+]?0o?[0-7_]+|0o/i
24
+ FLOAT = /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?\b|[+]?[\d_]+e[+-]?[\d_]+\b/i
25
+ INT_DEC2 = /[+]?\d[0-9_]*(?![e])/i
26
+ NUM_BAD = /[+]?0[xbd]\b/i
27
+ INT_OCT_BAD = /[+]?0o?[0-7_]*[89]/i
28
+ FLOAT_BAD = /[+]?\d[\d_]*_(e|\.)/i
29
+
30
+ class ScanError < StandardError ; end
31
+
32
+ attr_accessor :lineno
33
+ attr_accessor :filename
34
+ attr_accessor :ss
35
+ attr_accessor :state
36
+
37
+ alias :match :ss
38
+
39
+ def matches
40
+ m = (1..9).map { |i| ss[i] }
41
+ m.pop until m[-1] or m.empty?
42
+ m
43
+ end
44
+
45
+ def action
46
+ yield
47
+ end
48
+
49
+ def scanner_class
50
+ StringScanner
51
+ end unless instance_methods(false).map(&:to_s).include?("scanner_class")
52
+
53
+ def parse str
54
+ self.ss = scanner_class.new str
55
+ self.lineno = 1
56
+ self.state ||= nil
57
+
58
+ do_parse
59
+ end
60
+
61
+ def parse_file path
62
+ self.filename = path
63
+ open path do |f|
64
+ parse f.read
65
+ end
66
+ end
67
+
68
+ def next_token
69
+ return process_string if lex_strterm
70
+ self.command_state = self.command_start
71
+ self.command_start = false
72
+ self.space_seen = false
73
+ self.last_state = lex_state
74
+
75
+ token = nil
76
+
77
+ until ss.eos? or token do
78
+ token =
79
+ case state
80
+ when nil then
81
+ case
82
+ when text = ss.scan(/[\ \t\r\f\v]/) then
83
+ action { self.space_seen = true; next }
84
+ when text = ss.scan(/\n|\#/) then
85
+ process_newline_or_comment text
86
+ when text = ss.scan(/[\]\)\}]/) then
87
+ process_bracing text
88
+ when text = ss.scan(/\!/) then
89
+ process_bang text
90
+ when text = ss.scan(/\.\.\.?|,|![=~]?/) then
91
+ action { result :expr_beg, TOKENS[text], text }
92
+ when text = ss.scan(/\.\d/) then
93
+ action { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
94
+ when text = ss.scan(/\./) then
95
+ action { result :expr_dot, :tDOT, "." }
96
+ when text = ss.scan(/\(/) then
97
+ process_paren text
98
+ when text = ss.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
99
+ action { result arg_state, TOKENS[text], text }
100
+ when bol? && (text = ss.scan(/\=begin(?=\s)/)) then
101
+ process_begin text
102
+ when text = ss.scan(/\=(?=begin\b)/) then
103
+ action { result arg_state, TOKENS[text], text }
104
+ when text = ss.scan(/\"(#{SIMPLE_STRING})\"/o) then
105
+ action { result :expr_end, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
106
+ when text = ss.scan(/\"/) then
107
+ action { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
108
+ when text = ss.scan(/\@\@?\d/) then
109
+ action { rb_compile_error "`#{text}` is not allowed as a variable name" }
110
+ when text = ss.scan(/\@\@?#{IDENT_CHAR}+/o) then
111
+ process_ivar text
112
+ when not_end? && (text = ss.scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?]|[!](?!=)|=(?==>)|=(?![=>]))?)/o)) then
113
+ process_symbol text
114
+ when not_end? && (text = ss.scan(/\:\"(#{SIMPLE_STRING})\"/o)) then
115
+ process_symbol text
116
+ when not_end? && (text = ss.scan(/\:\'(#{SSTRING})\'/o)) then
117
+ process_symbol text
118
+ when text = ss.scan(/\:\:/) then
119
+ process_colon2 text
120
+ when text = ss.scan(/\:/) then
121
+ process_colon1 text
122
+ when text = ss.scan(/->/) then
123
+ action { result :expr_endfn, :tLAMBDA, nil }
124
+ when text = ss.scan(/[+-]/) then
125
+ process_plus_minus text
126
+ when text = ss.scan(/#{NUM_BAD}/o) then
127
+ action { rb_compile_error "Invalid numeric format" }
128
+ when text = ss.scan(/#{INT_DEC}/o) then
129
+ action { int_with_base 10 }
130
+ when text = ss.scan(/#{INT_HEX}/o) then
131
+ action { int_with_base 16 }
132
+ when text = ss.scan(/#{INT_BIN}/o) then
133
+ action { int_with_base 2 }
134
+ when text = ss.scan(/#{INT_OCT_BAD}/o) then
135
+ action { rb_compile_error "Illegal octal digit." }
136
+ when text = ss.scan(/#{INT_OCT}/o) then
137
+ action { int_with_base 8 }
138
+ when text = ss.scan(/#{FLOAT_BAD}/o) then
139
+ action { rb_compile_error "Trailing '_' in number." }
140
+ when text = ss.scan(/#{FLOAT}/o) then
141
+ process_float text
142
+ when text = ss.scan(/#{INT_DEC2}/o) then
143
+ action { int_with_base 10 }
144
+ when text = ss.scan(/[0-9]/) then
145
+ action { rb_compile_error "Bad number format" }
146
+ when text = ss.scan(/\[/) then
147
+ process_square_bracket text
148
+ when text = ss.scan(/\'#{SSTRING}\'/o) then
149
+ action { result :expr_end, :tSTRING, matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") } # " stupid emacs
150
+ when text = ss.scan(/\|\|\=/) then
151
+ action { result :expr_beg, :tOP_ASGN, "||" }
152
+ when text = ss.scan(/\|\|/) then
153
+ action { result :expr_beg, :tOROP, "||" }
154
+ when text = ss.scan(/\|\=/) then
155
+ action { result :expr_beg, :tOP_ASGN, "|" }
156
+ when text = ss.scan(/\|/) then
157
+ action { result :arg_state, :tPIPE, "|" }
158
+ when text = ss.scan(/\{/) then
159
+ process_curly_brace text
160
+ when text = ss.scan(/\*\*=/) then
161
+ action { result :expr_beg, :tOP_ASGN, "**" }
162
+ when text = ss.scan(/\*\*/) then
163
+ action { result(:arg_state, space_vs_beginning(:tDSTAR, :tDSTAR, :tPOW), "**") }
164
+ when text = ss.scan(/\*\=/) then
165
+ action { result(:expr_beg, :tOP_ASGN, "*") }
166
+ when text = ss.scan(/\*/) then
167
+ action { result(:arg_state, space_vs_beginning(:tSTAR, :tSTAR, :tSTAR2), "*") }
168
+ when text = ss.scan(/\<\=\>/) then
169
+ action { result :arg_state, :tCMP, "<=>" }
170
+ when text = ss.scan(/\<\=/) then
171
+ action { result :arg_state, :tLEQ, "<=" }
172
+ when text = ss.scan(/\<\<\=/) then
173
+ action { result :arg_state, :tOP_ASGN, "<<" }
174
+ when text = ss.scan(/\<\</) then
175
+ process_lchevron text
176
+ when text = ss.scan(/\</) then
177
+ action { result :arg_state, :tLT, "<" }
178
+ when text = ss.scan(/\>\=/) then
179
+ action { result :arg_state, :tGEQ, ">=" }
180
+ when text = ss.scan(/\>\>=/) then
181
+ action { result :arg_state, :tOP_ASGN, ">>" }
182
+ when text = ss.scan(/\>\>/) then
183
+ action { result :arg_state, :tRSHFT, ">>" }
184
+ when text = ss.scan(/\>/) then
185
+ action { result :arg_state, :tGT, ">" }
186
+ when text = ss.scan(/\`/) then
187
+ process_backtick text
188
+ when text = ss.scan(/\?/) then
189
+ process_questionmark text
190
+ when text = ss.scan(/\&\&\=/) then
191
+ action { result(:expr_beg, :tOP_ASGN, "&&") }
192
+ when text = ss.scan(/\&\&/) then
193
+ action { result(:expr_beg, :tANDOP, "&&") }
194
+ when text = ss.scan(/\&\=/) then
195
+ action { result(:expr_beg, :tOP_ASGN, "&" ) }
196
+ when text = ss.scan(/\&/) then
197
+ process_amper text
198
+ when text = ss.scan(/\//) then
199
+ process_slash text
200
+ when text = ss.scan(/\^=/) then
201
+ action { result(:expr_beg, :tOP_ASGN, "^") }
202
+ when text = ss.scan(/\^/) then
203
+ action { result(:arg_state, :tCARET, "^") }
204
+ when text = ss.scan(/\;/) then
205
+ action { self.command_start = true; result(:expr_beg, :tSEMI, ";") }
206
+ when in_arg_state? && (text = ss.scan(/\~@/)) then
207
+ action { result(:arg_state, :tTILDE, "~") }
208
+ when text = ss.scan(/\~/) then
209
+ action { result(:arg_state, :tTILDE, "~") }
210
+ when text = ss.scan(/\\\r?\n/) then
211
+ action { self.lineno += 1; self.space_seen = true; next }
212
+ when text = ss.scan(/\\/) then
213
+ action { rb_compile_error "bare backslash only allowed before newline" }
214
+ when text = ss.scan(/\%/) then
215
+ process_percent text
216
+ when text = ss.scan(/\$_\w+/) then
217
+ process_gvar text
218
+ when text = ss.scan(/\$_/) then
219
+ process_gvar text
220
+ when text = ss.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
221
+ process_gvar text
222
+ when in_fname? && (text = ss.scan(/\$([\&\`\'\+])/)) then
223
+ process_gvar text
224
+ when text = ss.scan(/\$([\&\`\'\+])/) then
225
+ process_backref text
226
+ when in_fname? && (text = ss.scan(/\$([1-9]\d*)/)) then
227
+ process_gvar text
228
+ when text = ss.scan(/\$([1-9]\d*)/) then
229
+ process_nthref text
230
+ when text = ss.scan(/\$0/) then
231
+ process_gvar text
232
+ when text = ss.scan(/\$\W|\$\z/) then
233
+ process_gvar_oddity text
234
+ when text = ss.scan(/\$\w+/) then
235
+ process_gvar text
236
+ when text = ss.scan(/\_/) then
237
+ process_underscore text
238
+ when text = ss.scan(/#{IDENT}/o) then
239
+ process_token text
240
+ when text = ss.scan(/\004|\032|\000|\Z/) then
241
+ action { [RubyLexer::EOF, RubyLexer::EOF] }
242
+ when text = ss.scan(/./) then
243
+ action { rb_compile_error "Invalid char #{text.inspect} in expression" }
244
+ else
245
+ text = ss.string[ss.pos .. -1]
246
+ raise ScanError, "can not match (#{state.inspect}): '#{text}'"
247
+ end
248
+ else
249
+ raise ScanError, "undefined state: '#{state}'"
250
+ end # token = case state
251
+
252
+ next unless token # allow functions to trigger redo w/ nil
253
+ end # while
254
+
255
+ raise "bad lexical result: #{token.inspect}" unless
256
+ token.nil? || (Array === token && token.size >= 2)
257
+
258
+ # auto-switch state
259
+ self.state = token.last if token && token.first == :state
260
+
261
+ token
262
+ end # def _next_token
263
+ end # class