prism 0.19.0 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +102 -1
  3. data/Makefile +5 -0
  4. data/README.md +9 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +84 -16
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/ruby_parser_translation.md +19 -0
  13. data/docs/serialization.md +19 -5
  14. data/ext/prism/api_node.c +1989 -1525
  15. data/ext/prism/extension.c +130 -30
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +1700 -505
  18. data/include/prism/defines.h +8 -0
  19. data/include/prism/diagnostic.h +49 -7
  20. data/include/prism/encoding.h +17 -0
  21. data/include/prism/options.h +40 -14
  22. data/include/prism/parser.h +34 -18
  23. data/include/prism/util/pm_buffer.h +9 -0
  24. data/include/prism/util/pm_constant_pool.h +18 -0
  25. data/include/prism/util/pm_newline_list.h +4 -14
  26. data/include/prism/util/pm_strpbrk.h +4 -1
  27. data/include/prism/version.h +2 -2
  28. data/include/prism.h +19 -2
  29. data/lib/prism/debug.rb +11 -5
  30. data/lib/prism/desugar_compiler.rb +225 -80
  31. data/lib/prism/dot_visitor.rb +36 -14
  32. data/lib/prism/dsl.rb +302 -299
  33. data/lib/prism/ffi.rb +107 -76
  34. data/lib/prism/lex_compat.rb +17 -1
  35. data/lib/prism/node.rb +4580 -2607
  36. data/lib/prism/node_ext.rb +27 -4
  37. data/lib/prism/parse_result.rb +75 -29
  38. data/lib/prism/serialize.rb +633 -305
  39. data/lib/prism/translation/parser/compiler.rb +1838 -0
  40. data/lib/prism/translation/parser/lexer.rb +335 -0
  41. data/lib/prism/translation/parser/rubocop.rb +45 -0
  42. data/lib/prism/translation/parser.rb +190 -0
  43. data/lib/prism/translation/parser33.rb +12 -0
  44. data/lib/prism/translation/parser34.rb +12 -0
  45. data/lib/prism/translation/ripper.rb +696 -0
  46. data/lib/prism/translation/ruby_parser.rb +1521 -0
  47. data/lib/prism/translation.rb +11 -0
  48. data/lib/prism.rb +1 -1
  49. data/prism.gemspec +18 -7
  50. data/rbi/prism.rbi +150 -88
  51. data/rbi/prism_static.rbi +15 -3
  52. data/sig/prism.rbs +996 -961
  53. data/sig/prism_static.rbs +123 -46
  54. data/src/diagnostic.c +264 -219
  55. data/src/encoding.c +21 -26
  56. data/src/node.c +2 -6
  57. data/src/options.c +29 -5
  58. data/src/prettyprint.c +176 -44
  59. data/src/prism.c +1499 -564
  60. data/src/serialize.c +35 -21
  61. data/src/token_type.c +353 -4
  62. data/src/util/pm_buffer.c +11 -0
  63. data/src/util/pm_constant_pool.c +37 -11
  64. data/src/util/pm_newline_list.c +6 -15
  65. data/src/util/pm_string.c +0 -7
  66. data/src/util/pm_strpbrk.c +122 -14
  67. metadata +16 -5
  68. data/docs/building.md +0 -29
  69. data/lib/prism/ripper_compat.rb +0 -207
@@ -0,0 +1,335 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ module Translation
5
+ class Parser
6
+ # Accepts a list of prism tokens and converts them into the expected
7
+ # format for the parser gem.
8
+ class Lexer
9
+ # The direct translating of types between the two lexers.
10
+ TYPES = {
11
+ # These tokens should never appear in the output of the lexer.
12
+ EOF: nil,
13
+ MISSING: nil,
14
+ NOT_PROVIDED: nil,
15
+ IGNORED_NEWLINE: nil,
16
+ EMBDOC_END: nil,
17
+ EMBDOC_LINE: nil,
18
+ __END__: nil,
19
+
20
+ # These tokens have more or less direct mappings.
21
+ AMPERSAND: :tAMPER2,
22
+ AMPERSAND_AMPERSAND: :tANDOP,
23
+ AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
24
+ AMPERSAND_DOT: :tANDDOT,
25
+ AMPERSAND_EQUAL: :tOP_ASGN,
26
+ BACK_REFERENCE: :tBACK_REF,
27
+ BACKTICK: :tXSTRING_BEG,
28
+ BANG: :tBANG,
29
+ BANG_EQUAL: :tNEQ,
30
+ BANG_TILDE: :tNMATCH,
31
+ BRACE_LEFT: :tLCURLY,
32
+ BRACE_RIGHT: :tRCURLY,
33
+ BRACKET_LEFT: :tLBRACK2,
34
+ BRACKET_LEFT_ARRAY: :tLBRACK,
35
+ BRACKET_LEFT_RIGHT: :tAREF,
36
+ BRACKET_LEFT_RIGHT_EQUAL: :tASET,
37
+ BRACKET_RIGHT: :tRBRACK,
38
+ CARET: :tCARET,
39
+ CARET_EQUAL: :tOP_ASGN,
40
+ CHARACTER_LITERAL: :tCHARACTER,
41
+ CLASS_VARIABLE: :tCVAR,
42
+ COLON: :tCOLON,
43
+ COLON_COLON: :tCOLON2,
44
+ COMMA: :tCOMMA,
45
+ COMMENT: :tCOMMENT,
46
+ CONSTANT: :tCONSTANT,
47
+ DOT: :tDOT,
48
+ DOT_DOT: :tDOT2,
49
+ DOT_DOT_DOT: :tDOT3,
50
+ EMBDOC_BEGIN: :tCOMMENT,
51
+ EMBEXPR_BEGIN: :tSTRING_DBEG,
52
+ EMBEXPR_END: :tSTRING_DEND,
53
+ EMBVAR: :tSTRING_DVAR,
54
+ EQUAL: :tEQL,
55
+ EQUAL_EQUAL: :tEQ,
56
+ EQUAL_EQUAL_EQUAL: :tEQQ,
57
+ EQUAL_GREATER: :tASSOC,
58
+ EQUAL_TILDE: :tMATCH,
59
+ FLOAT: :tFLOAT,
60
+ FLOAT_IMAGINARY: :tIMAGINARY,
61
+ FLOAT_RATIONAL: :tRATIONAL,
62
+ FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
63
+ GLOBAL_VARIABLE: :tGVAR,
64
+ GREATER: :tGT,
65
+ GREATER_EQUAL: :tGEQ,
66
+ GREATER_GREATER: :tRSHFT,
67
+ GREATER_GREATER_EQUAL: :tOP_ASGN,
68
+ HEREDOC_START: :tSTRING_BEG,
69
+ HEREDOC_END: :tSTRING_END,
70
+ IDENTIFIER: :tIDENTIFIER,
71
+ INSTANCE_VARIABLE: :tIVAR,
72
+ INTEGER: :tINTEGER,
73
+ INTEGER_IMAGINARY: :tIMAGINARY,
74
+ INTEGER_RATIONAL: :tRATIONAL,
75
+ INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
76
+ KEYWORD_ALIAS: :kALIAS,
77
+ KEYWORD_AND: :kAND,
78
+ KEYWORD_BEGIN: :kBEGIN,
79
+ KEYWORD_BEGIN_UPCASE: :klBEGIN,
80
+ KEYWORD_BREAK: :kBREAK,
81
+ KEYWORD_CASE: :kCASE,
82
+ KEYWORD_CLASS: :kCLASS,
83
+ KEYWORD_DEF: :kDEF,
84
+ KEYWORD_DEFINED: :kDEFINED,
85
+ KEYWORD_DO: :kDO,
86
+ KEYWORD_DO_LOOP: :kDO_COND,
87
+ KEYWORD_END: :kEND,
88
+ KEYWORD_END_UPCASE: :klEND,
89
+ KEYWORD_ENSURE: :kENSURE,
90
+ KEYWORD_ELSE: :kELSE,
91
+ KEYWORD_ELSIF: :kELSIF,
92
+ KEYWORD_FALSE: :kFALSE,
93
+ KEYWORD_FOR: :kFOR,
94
+ KEYWORD_IF: :kIF,
95
+ KEYWORD_IF_MODIFIER: :kIF_MOD,
96
+ KEYWORD_IN: :kIN,
97
+ KEYWORD_MODULE: :kMODULE,
98
+ KEYWORD_NEXT: :kNEXT,
99
+ KEYWORD_NIL: :kNIL,
100
+ KEYWORD_NOT: :kNOT,
101
+ KEYWORD_OR: :kOR,
102
+ KEYWORD_REDO: :kREDO,
103
+ KEYWORD_RESCUE: :kRESCUE,
104
+ KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
105
+ KEYWORD_RETRY: :kRETRY,
106
+ KEYWORD_RETURN: :kRETURN,
107
+ KEYWORD_SELF: :kSELF,
108
+ KEYWORD_SUPER: :kSUPER,
109
+ KEYWORD_THEN: :kTHEN,
110
+ KEYWORD_TRUE: :kTRUE,
111
+ KEYWORD_UNDEF: :kUNDEF,
112
+ KEYWORD_UNLESS: :kUNLESS,
113
+ KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
114
+ KEYWORD_UNTIL: :kUNTIL,
115
+ KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
116
+ KEYWORD_WHEN: :kWHEN,
117
+ KEYWORD_WHILE: :kWHILE,
118
+ KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
119
+ KEYWORD_YIELD: :kYIELD,
120
+ KEYWORD___ENCODING__: :k__ENCODING__,
121
+ KEYWORD___FILE__: :k__FILE__,
122
+ KEYWORD___LINE__: :k__LINE__,
123
+ LABEL: :tLABEL,
124
+ LABEL_END: :tLABEL_END,
125
+ LAMBDA_BEGIN: :tLAMBEG,
126
+ LESS: :tLT,
127
+ LESS_EQUAL: :tLEQ,
128
+ LESS_EQUAL_GREATER: :tCMP,
129
+ LESS_LESS: :tLSHFT,
130
+ LESS_LESS_EQUAL: :tOP_ASGN,
131
+ METHOD_NAME: :tFID,
132
+ MINUS: :tMINUS,
133
+ MINUS_EQUAL: :tOP_ASGN,
134
+ MINUS_GREATER: :tLAMBDA,
135
+ NEWLINE: :tNL,
136
+ NUMBERED_REFERENCE: :tNTH_REF,
137
+ PARENTHESIS_LEFT: :tLPAREN,
138
+ PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
139
+ PARENTHESIS_RIGHT: :tRPAREN,
140
+ PERCENT: :tPERCENT,
141
+ PERCENT_EQUAL: :tOP_ASGN,
142
+ PERCENT_LOWER_I: :tQSYMBOLS_BEG,
143
+ PERCENT_LOWER_W: :tQWORDS_BEG,
144
+ PERCENT_UPPER_I: :tSYMBOLS_BEG,
145
+ PERCENT_UPPER_W: :tWORDS_BEG,
146
+ PERCENT_LOWER_X: :tXSTRING_BEG,
147
+ PLUS: :tPLUS,
148
+ PLUS_EQUAL: :tOP_ASGN,
149
+ PIPE_EQUAL: :tOP_ASGN,
150
+ PIPE: :tPIPE,
151
+ PIPE_PIPE: :tOROP,
152
+ PIPE_PIPE_EQUAL: :tOP_ASGN,
153
+ QUESTION_MARK: :tEH,
154
+ REGEXP_BEGIN: :tREGEXP_BEG,
155
+ REGEXP_END: :tSTRING_END,
156
+ SEMICOLON: :tSEMI,
157
+ SLASH: :tDIVIDE,
158
+ SLASH_EQUAL: :tOP_ASGN,
159
+ STAR: :tSTAR2,
160
+ STAR_EQUAL: :tOP_ASGN,
161
+ STAR_STAR: :tPOW,
162
+ STAR_STAR_EQUAL: :tOP_ASGN,
163
+ STRING_BEGIN: :tSTRING_BEG,
164
+ STRING_CONTENT: :tSTRING_CONTENT,
165
+ STRING_END: :tSTRING_END,
166
+ SYMBOL_BEGIN: :tSYMBEG,
167
+ TILDE: :tTILDE,
168
+ UAMPERSAND: :tAMPER,
169
+ UCOLON_COLON: :tCOLON3,
170
+ UDOT_DOT: :tDOT2,
171
+ UDOT_DOT_DOT: :tBDOT3,
172
+ UMINUS: :tUMINUS,
173
+ UMINUS_NUM: :tUNARY_NUM,
174
+ UPLUS: :tUPLUS,
175
+ USTAR: :tSTAR,
176
+ USTAR_STAR: :tPOW,
177
+ WORDS_SEP: :tSPACE
178
+ }
179
+
180
+ private_constant :TYPES
181
+
182
+ # The Parser::Source::Buffer that the tokens were lexed from.
183
+ attr_reader :source_buffer
184
+
185
+ # An array of prism tokens that we lexed.
186
+ attr_reader :lexed
187
+
188
+ # A hash that maps offsets in bytes to offsets in characters.
189
+ attr_reader :offset_cache
190
+
191
+ # Initialize the lexer with the given source buffer, prism tokens, and
192
+ # offset cache.
193
+ def initialize(source_buffer, lexed, offset_cache)
194
+ @source_buffer = source_buffer
195
+ @lexed = lexed
196
+ @offset_cache = offset_cache
197
+ end
198
+
199
+ Range = ::Parser::Source::Range # :nodoc:
200
+ private_constant :Range
201
+
202
+ # Convert the prism tokens into the expected format for the parser gem.
203
+ def to_a
204
+ tokens = []
205
+ index = 0
206
+
207
+ while index < lexed.length
208
+ token, = lexed[index]
209
+ index += 1
210
+ next if token.type == :IGNORED_NEWLINE || token.type == :EOF
211
+
212
+ type = TYPES.fetch(token.type)
213
+ value = token.value
214
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
215
+
216
+ case type
217
+ when :tCHARACTER
218
+ value.delete_prefix!("?")
219
+ when :tCOMMENT
220
+ if token.type == :EMBDOC_BEGIN
221
+ until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
222
+ value += next_token.value
223
+ index += 1
224
+ end
225
+
226
+ value += next_token.value
227
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
228
+ index += 1
229
+ else
230
+ value.chomp!
231
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
232
+ end
233
+ when :tNL
234
+ value = nil
235
+ when :tFLOAT
236
+ value = Float(value)
237
+ when :tIMAGINARY
238
+ value = parse_complex(value)
239
+ when :tINTEGER
240
+ if value.start_with?("+")
241
+ tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
242
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
243
+ end
244
+
245
+ value = Integer(value)
246
+ when :tLABEL
247
+ value.chomp!(":")
248
+ when :tLABEL_END
249
+ value.chomp!(":")
250
+ when :tNTH_REF
251
+ value = Integer(value.delete_prefix("$"))
252
+ when :tOP_ASGN
253
+ value.chomp!("=")
254
+ when :tRATIONAL
255
+ value = parse_rational(value)
256
+ when :tSPACE
257
+ value = nil
258
+ when :tSTRING_BEG
259
+ if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
260
+ next_location = token.location.join(next_token.location)
261
+ type = :tSTRING
262
+ value = ""
263
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
264
+ index += 1
265
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
266
+ next_location = token.location.join(next_next_token.location)
267
+ type = :tSTRING
268
+ value = next_token.value
269
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
270
+ index += 2
271
+ elsif value.start_with?("<<")
272
+ quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
273
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
274
+ end
275
+ when :tSTRING_DVAR
276
+ value = nil
277
+ when :tSTRING_END
278
+ if token.type == :REGEXP_END
279
+ value = value[0]
280
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
281
+ end
282
+ when :tSYMBEG
283
+ if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
284
+ next_location = token.location.join(next_token.location)
285
+ type = :tSYMBOL
286
+ value = next_token.value
287
+ value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
288
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
289
+ index += 1
290
+ end
291
+ when :tFID
292
+ if tokens[-1][0] == :kDEF
293
+ type = :tIDENTIFIER
294
+ end
295
+ end
296
+
297
+ tokens << [type, [value, location]]
298
+
299
+ if token.type == :REGEXP_END
300
+ tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
301
+ end
302
+ end
303
+
304
+ tokens
305
+ end
306
+
307
+ private
308
+
309
+ # Parse a complex from the string representation.
310
+ def parse_complex(value)
311
+ value.chomp!("i")
312
+
313
+ if value.end_with?("r")
314
+ Complex(0, parse_rational(value))
315
+ elsif value.start_with?(/0[BbOoDdXx]/)
316
+ Complex(0, Integer(value))
317
+ else
318
+ Complex(0, value)
319
+ end
320
+ end
321
+
322
+ # Parse a rational from the string representation.
323
+ def parse_rational(value)
324
+ value.chomp!("r")
325
+
326
+ if value.start_with?(/0[BbOoDdXx]/)
327
+ Rational(Integer(value))
328
+ else
329
+ Rational(value)
330
+ end
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "rubocop"
5
+
6
+ require "prism"
7
+ require "prism/translation/parser"
8
+
9
+ module Prism
10
+ module Translation
11
+ class Parser
12
+ # This is the special version numbers that should be used in RuboCop
13
+ # configuration files to trigger using prism.
14
+
15
+ # For Ruby 3.3
16
+ VERSION_3_3 = 80_82_73_83_77.33
17
+
18
+ # For Ruby 3.4
19
+ VERSION_3_4 = 80_82_73_83_77.34
20
+
21
+ # This module gets prepended into RuboCop::AST::ProcessedSource.
22
+ module ProcessedSource
23
+ # Redefine parser_class so that we can inject the prism parser into the
24
+ # list of known parsers.
25
+ def parser_class(ruby_version)
26
+ if ruby_version == Prism::Translation::Parser::VERSION_3_3
27
+ require "prism/translation/parser33"
28
+ Prism::Translation::Parser33
29
+ elsif ruby_version == Prism::Translation::Parser::VERSION_3_4
30
+ require "prism/translation/parser34"
31
+ Prism::Translation::Parser34
32
+ else
33
+ super
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ # :stopdoc:
42
+ RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
43
+ known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
44
+ RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
45
+ RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
@@ -0,0 +1,190 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+
5
+ module Prism
6
+ module Translation
7
+ # This class is the entry-point for converting a prism syntax tree into the
8
+ # whitequark/parser gem's syntax tree. It inherits from the base parser for
9
+ # the parser gem, and overrides the parse* methods to parse with prism and
10
+ # then translate.
11
+ class Parser < ::Parser::Base
12
+ # The parser gem has a list of diagnostics with a hard-coded set of error
13
+ # messages. We create our own diagnostic class in order to set our own
14
+ # error messages.
15
+ class Diagnostic < ::Parser::Diagnostic
16
+ # The message generated by prism.
17
+ attr_reader :message
18
+
19
+ # Initialize a new diagnostic with the given message and location.
20
+ def initialize(message, location)
21
+ @message = message
22
+ super(:error, :prism_error, {}, location, [])
23
+ end
24
+ end
25
+
26
+ Racc_debug_parser = false # :nodoc:
27
+
28
+ def version # :nodoc:
29
+ 34
30
+ end
31
+
32
+ # The default encoding for Ruby files is UTF-8.
33
+ def default_encoding
34
+ Encoding::UTF_8
35
+ end
36
+
37
+ def yyerror # :nodoc:
38
+ end
39
+
40
+ # Parses a source buffer and returns the AST.
41
+ def parse(source_buffer)
42
+ @source_buffer = source_buffer
43
+ source = source_buffer.source
44
+
45
+ offset_cache = build_offset_cache(source)
46
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
47
+
48
+ build_ast(result.value, offset_cache)
49
+ ensure
50
+ @source_buffer = nil
51
+ end
52
+
53
+ # Parses a source buffer and returns the AST and the source code comments.
54
+ def parse_with_comments(source_buffer)
55
+ @source_buffer = source_buffer
56
+ source = source_buffer.source
57
+
58
+ offset_cache = build_offset_cache(source)
59
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
60
+
61
+ [
62
+ build_ast(result.value, offset_cache),
63
+ build_comments(result.comments, offset_cache)
64
+ ]
65
+ ensure
66
+ @source_buffer = nil
67
+ end
68
+
69
+ # Parses a source buffer and returns the AST, the source code comments,
70
+ # and the tokens emitted by the lexer.
71
+ def tokenize(source_buffer, recover = false)
72
+ @source_buffer = source_buffer
73
+ source = source_buffer.source
74
+
75
+ offset_cache = build_offset_cache(source)
76
+ result =
77
+ begin
78
+ unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
79
+ rescue ::Parser::SyntaxError
80
+ raise if !recover
81
+ end
82
+
83
+ program, tokens = result.value
84
+ ast = build_ast(program, offset_cache) if result.success?
85
+
86
+ [
87
+ ast,
88
+ build_comments(result.comments, offset_cache),
89
+ build_tokens(tokens, offset_cache)
90
+ ]
91
+ ensure
92
+ @source_buffer = nil
93
+ end
94
+
95
+ # Since prism resolves num params for us, we don't need to support this
96
+ # kind of logic here.
97
+ def try_declare_numparam(node)
98
+ node.children[0].match?(/\A_[1-9]\z/)
99
+ end
100
+
101
+ private
102
+
103
+ # This is a hook to allow consumers to disable some errors if they don't
104
+ # want them to block creating the syntax tree.
105
+ def valid_error?(error)
106
+ true
107
+ end
108
+
109
+ # If there was a error generated during the parse, then raise an
110
+ # appropriate syntax error. Otherwise return the result.
111
+ def unwrap(result, offset_cache)
112
+ result.errors.each do |error|
113
+ next unless valid_error?(error)
114
+
115
+ location = build_range(error.location, offset_cache)
116
+ diagnostics.process(Diagnostic.new(error.message, location))
117
+ end
118
+
119
+ result
120
+ end
121
+
122
+ # Prism deals with offsets in bytes, while the parser gem deals with
123
+ # offsets in characters. We need to handle this conversion in order to
124
+ # build the parser gem AST.
125
+ #
126
+ # If the bytesize of the source is the same as the length, then we can
127
+ # just use the offset directly. Otherwise, we build an array where the
128
+ # index is the byte offset and the value is the character offset.
129
+ def build_offset_cache(source)
130
+ if source.bytesize == source.length
131
+ -> (offset) { offset }
132
+ else
133
+ offset_cache = []
134
+ offset = 0
135
+
136
+ source.each_char do |char|
137
+ char.bytesize.times { offset_cache << offset }
138
+ offset += 1
139
+ end
140
+
141
+ offset_cache << offset
142
+ end
143
+ end
144
+
145
+ # Build the parser gem AST from the prism AST.
146
+ def build_ast(program, offset_cache)
147
+ program.accept(Compiler.new(self, offset_cache))
148
+ end
149
+
150
+ # Build the parser gem comments from the prism comments.
151
+ def build_comments(comments, offset_cache)
152
+ comments.map do |comment|
153
+ ::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
154
+ end
155
+ end
156
+
157
+ # Build the parser gem tokens from the prism tokens.
158
+ def build_tokens(tokens, offset_cache)
159
+ Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
160
+ end
161
+
162
+ # Build a range from a prism location.
163
+ def build_range(location, offset_cache)
164
+ ::Parser::Source::Range.new(
165
+ source_buffer,
166
+ offset_cache[location.start_offset],
167
+ offset_cache[location.end_offset]
168
+ )
169
+ end
170
+
171
+ # Converts the version format handled by Parser to the format handled by Prism.
172
+ def convert_for_prism(version)
173
+ case version
174
+ when 33
175
+ "3.3.0"
176
+ when 34
177
+ "3.4.0"
178
+ else
179
+ "latest"
180
+ end
181
+ end
182
+
183
+ require_relative "parser/compiler"
184
+ require_relative "parser/lexer"
185
+
186
+ private_constant :Compiler
187
+ private_constant :Lexer
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,12 @@
1
+ require_relative "parser"
2
+
3
+ module Prism
4
+ module Translation
5
+ # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
6
+ class Parser33 < Parser
7
+ def version # :nodoc:
8
+ 33
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ require_relative "parser"
2
+
3
+ module Prism
4
+ module Translation
5
+ # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
6
+ class Parser34 < Parser
7
+ def version # :nodoc:
8
+ 34
9
+ end
10
+ end
11
+ end
12
+ end