jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,335 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ module Translation
5
+ class Parser
6
+ # Accepts a list of prism tokens and converts them into the expected
7
+ # format for the parser gem.
8
+ class Lexer
9
+ # The direct translating of types between the two lexers.
10
+ TYPES = {
11
+ # These tokens should never appear in the output of the lexer.
12
+ EOF: nil,
13
+ MISSING: nil,
14
+ NOT_PROVIDED: nil,
15
+ IGNORED_NEWLINE: nil,
16
+ EMBDOC_END: nil,
17
+ EMBDOC_LINE: nil,
18
+ __END__: nil,
19
+
20
+ # These tokens have more or less direct mappings.
21
+ AMPERSAND: :tAMPER2,
22
+ AMPERSAND_AMPERSAND: :tANDOP,
23
+ AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
24
+ AMPERSAND_DOT: :tANDDOT,
25
+ AMPERSAND_EQUAL: :tOP_ASGN,
26
+ BACK_REFERENCE: :tBACK_REF,
27
+ BACKTICK: :tXSTRING_BEG,
28
+ BANG: :tBANG,
29
+ BANG_EQUAL: :tNEQ,
30
+ BANG_TILDE: :tNMATCH,
31
+ BRACE_LEFT: :tLCURLY,
32
+ BRACE_RIGHT: :tRCURLY,
33
+ BRACKET_LEFT: :tLBRACK2,
34
+ BRACKET_LEFT_ARRAY: :tLBRACK,
35
+ BRACKET_LEFT_RIGHT: :tAREF,
36
+ BRACKET_LEFT_RIGHT_EQUAL: :tASET,
37
+ BRACKET_RIGHT: :tRBRACK,
38
+ CARET: :tCARET,
39
+ CARET_EQUAL: :tOP_ASGN,
40
+ CHARACTER_LITERAL: :tCHARACTER,
41
+ CLASS_VARIABLE: :tCVAR,
42
+ COLON: :tCOLON,
43
+ COLON_COLON: :tCOLON2,
44
+ COMMA: :tCOMMA,
45
+ COMMENT: :tCOMMENT,
46
+ CONSTANT: :tCONSTANT,
47
+ DOT: :tDOT,
48
+ DOT_DOT: :tDOT2,
49
+ DOT_DOT_DOT: :tDOT3,
50
+ EMBDOC_BEGIN: :tCOMMENT,
51
+ EMBEXPR_BEGIN: :tSTRING_DBEG,
52
+ EMBEXPR_END: :tSTRING_DEND,
53
+ EMBVAR: :tSTRING_DVAR,
54
+ EQUAL: :tEQL,
55
+ EQUAL_EQUAL: :tEQ,
56
+ EQUAL_EQUAL_EQUAL: :tEQQ,
57
+ EQUAL_GREATER: :tASSOC,
58
+ EQUAL_TILDE: :tMATCH,
59
+ FLOAT: :tFLOAT,
60
+ FLOAT_IMAGINARY: :tIMAGINARY,
61
+ FLOAT_RATIONAL: :tRATIONAL,
62
+ FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
63
+ GLOBAL_VARIABLE: :tGVAR,
64
+ GREATER: :tGT,
65
+ GREATER_EQUAL: :tGEQ,
66
+ GREATER_GREATER: :tRSHFT,
67
+ GREATER_GREATER_EQUAL: :tOP_ASGN,
68
+ HEREDOC_START: :tSTRING_BEG,
69
+ HEREDOC_END: :tSTRING_END,
70
+ IDENTIFIER: :tIDENTIFIER,
71
+ INSTANCE_VARIABLE: :tIVAR,
72
+ INTEGER: :tINTEGER,
73
+ INTEGER_IMAGINARY: :tIMAGINARY,
74
+ INTEGER_RATIONAL: :tRATIONAL,
75
+ INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
76
+ KEYWORD_ALIAS: :kALIAS,
77
+ KEYWORD_AND: :kAND,
78
+ KEYWORD_BEGIN: :kBEGIN,
79
+ KEYWORD_BEGIN_UPCASE: :klBEGIN,
80
+ KEYWORD_BREAK: :kBREAK,
81
+ KEYWORD_CASE: :kCASE,
82
+ KEYWORD_CLASS: :kCLASS,
83
+ KEYWORD_DEF: :kDEF,
84
+ KEYWORD_DEFINED: :kDEFINED,
85
+ KEYWORD_DO: :kDO,
86
+ KEYWORD_DO_LOOP: :kDO_COND,
87
+ KEYWORD_END: :kEND,
88
+ KEYWORD_END_UPCASE: :klEND,
89
+ KEYWORD_ENSURE: :kENSURE,
90
+ KEYWORD_ELSE: :kELSE,
91
+ KEYWORD_ELSIF: :kELSIF,
92
+ KEYWORD_FALSE: :kFALSE,
93
+ KEYWORD_FOR: :kFOR,
94
+ KEYWORD_IF: :kIF,
95
+ KEYWORD_IF_MODIFIER: :kIF_MOD,
96
+ KEYWORD_IN: :kIN,
97
+ KEYWORD_MODULE: :kMODULE,
98
+ KEYWORD_NEXT: :kNEXT,
99
+ KEYWORD_NIL: :kNIL,
100
+ KEYWORD_NOT: :kNOT,
101
+ KEYWORD_OR: :kOR,
102
+ KEYWORD_REDO: :kREDO,
103
+ KEYWORD_RESCUE: :kRESCUE,
104
+ KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
105
+ KEYWORD_RETRY: :kRETRY,
106
+ KEYWORD_RETURN: :kRETURN,
107
+ KEYWORD_SELF: :kSELF,
108
+ KEYWORD_SUPER: :kSUPER,
109
+ KEYWORD_THEN: :kTHEN,
110
+ KEYWORD_TRUE: :kTRUE,
111
+ KEYWORD_UNDEF: :kUNDEF,
112
+ KEYWORD_UNLESS: :kUNLESS,
113
+ KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
114
+ KEYWORD_UNTIL: :kUNTIL,
115
+ KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
116
+ KEYWORD_WHEN: :kWHEN,
117
+ KEYWORD_WHILE: :kWHILE,
118
+ KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
119
+ KEYWORD_YIELD: :kYIELD,
120
+ KEYWORD___ENCODING__: :k__ENCODING__,
121
+ KEYWORD___FILE__: :k__FILE__,
122
+ KEYWORD___LINE__: :k__LINE__,
123
+ LABEL: :tLABEL,
124
+ LABEL_END: :tLABEL_END,
125
+ LAMBDA_BEGIN: :tLAMBEG,
126
+ LESS: :tLT,
127
+ LESS_EQUAL: :tLEQ,
128
+ LESS_EQUAL_GREATER: :tCMP,
129
+ LESS_LESS: :tLSHFT,
130
+ LESS_LESS_EQUAL: :tOP_ASGN,
131
+ METHOD_NAME: :tFID,
132
+ MINUS: :tMINUS,
133
+ MINUS_EQUAL: :tOP_ASGN,
134
+ MINUS_GREATER: :tLAMBDA,
135
+ NEWLINE: :tNL,
136
+ NUMBERED_REFERENCE: :tNTH_REF,
137
+ PARENTHESIS_LEFT: :tLPAREN,
138
+ PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
139
+ PARENTHESIS_RIGHT: :tRPAREN,
140
+ PERCENT: :tPERCENT,
141
+ PERCENT_EQUAL: :tOP_ASGN,
142
+ PERCENT_LOWER_I: :tQSYMBOLS_BEG,
143
+ PERCENT_LOWER_W: :tQWORDS_BEG,
144
+ PERCENT_UPPER_I: :tSYMBOLS_BEG,
145
+ PERCENT_UPPER_W: :tWORDS_BEG,
146
+ PERCENT_LOWER_X: :tXSTRING_BEG,
147
+ PLUS: :tPLUS,
148
+ PLUS_EQUAL: :tOP_ASGN,
149
+ PIPE_EQUAL: :tOP_ASGN,
150
+ PIPE: :tPIPE,
151
+ PIPE_PIPE: :tOROP,
152
+ PIPE_PIPE_EQUAL: :tOP_ASGN,
153
+ QUESTION_MARK: :tEH,
154
+ REGEXP_BEGIN: :tREGEXP_BEG,
155
+ REGEXP_END: :tSTRING_END,
156
+ SEMICOLON: :tSEMI,
157
+ SLASH: :tDIVIDE,
158
+ SLASH_EQUAL: :tOP_ASGN,
159
+ STAR: :tSTAR2,
160
+ STAR_EQUAL: :tOP_ASGN,
161
+ STAR_STAR: :tPOW,
162
+ STAR_STAR_EQUAL: :tOP_ASGN,
163
+ STRING_BEGIN: :tSTRING_BEG,
164
+ STRING_CONTENT: :tSTRING_CONTENT,
165
+ STRING_END: :tSTRING_END,
166
+ SYMBOL_BEGIN: :tSYMBEG,
167
+ TILDE: :tTILDE,
168
+ UAMPERSAND: :tAMPER,
169
+ UCOLON_COLON: :tCOLON3,
170
+ UDOT_DOT: :tDOT2,
171
+ UDOT_DOT_DOT: :tBDOT3,
172
+ UMINUS: :tUMINUS,
173
+ UMINUS_NUM: :tUNARY_NUM,
174
+ UPLUS: :tUPLUS,
175
+ USTAR: :tSTAR,
176
+ USTAR_STAR: :tPOW,
177
+ WORDS_SEP: :tSPACE
178
+ }
179
+
180
+ private_constant :TYPES
181
+
182
+ # The Parser::Source::Buffer that the tokens were lexed from.
183
+ attr_reader :source_buffer
184
+
185
+ # An array of prism tokens that we lexed.
186
+ attr_reader :lexed
187
+
188
+ # A hash that maps offsets in bytes to offsets in characters.
189
+ attr_reader :offset_cache
190
+
191
+ # Initialize the lexer with the given source buffer, prism tokens, and
192
+ # offset cache.
193
+ def initialize(source_buffer, lexed, offset_cache)
194
+ @source_buffer = source_buffer
195
+ @lexed = lexed
196
+ @offset_cache = offset_cache
197
+ end
198
+
199
+ Range = ::Parser::Source::Range # :nodoc:
200
+ private_constant :Range
201
+
202
+ # Convert the prism tokens into the expected format for the parser gem.
203
+ def to_a
204
+ tokens = []
205
+ index = 0
206
+
207
+ while index < lexed.length
208
+ token, = lexed[index]
209
+ index += 1
210
+ next if token.type == :IGNORED_NEWLINE || token.type == :EOF
211
+
212
+ type = TYPES.fetch(token.type)
213
+ value = token.value
214
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
215
+
216
+ case type
217
+ when :tCHARACTER
218
+ value.delete_prefix!("?")
219
+ when :tCOMMENT
220
+ if token.type == :EMBDOC_BEGIN
221
+ until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
222
+ value += next_token.value
223
+ index += 1
224
+ end
225
+
226
+ value += next_token.value
227
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
228
+ index += 1
229
+ else
230
+ value.chomp!
231
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
232
+ end
233
+ when :tNL
234
+ value = nil
235
+ when :tFLOAT
236
+ value = Float(value)
237
+ when :tIMAGINARY
238
+ value = parse_complex(value)
239
+ when :tINTEGER
240
+ if value.start_with?("+")
241
+ tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
242
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
243
+ end
244
+
245
+ value = Integer(value)
246
+ when :tLABEL
247
+ value.chomp!(":")
248
+ when :tLABEL_END
249
+ value.chomp!(":")
250
+ when :tNTH_REF
251
+ value = Integer(value.delete_prefix("$"))
252
+ when :tOP_ASGN
253
+ value.chomp!("=")
254
+ when :tRATIONAL
255
+ value = parse_rational(value)
256
+ when :tSPACE
257
+ value = nil
258
+ when :tSTRING_BEG
259
+ if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
260
+ next_location = token.location.join(next_token.location)
261
+ type = :tSTRING
262
+ value = ""
263
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
264
+ index += 1
265
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
266
+ next_location = token.location.join(next_next_token.location)
267
+ type = :tSTRING
268
+ value = next_token.value
269
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
270
+ index += 2
271
+ elsif value.start_with?("<<")
272
+ quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
273
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
274
+ end
275
+ when :tSTRING_DVAR
276
+ value = nil
277
+ when :tSTRING_END
278
+ if token.type == :REGEXP_END
279
+ value = value[0]
280
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
281
+ end
282
+ when :tSYMBEG
283
+ if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
284
+ next_location = token.location.join(next_token.location)
285
+ type = :tSYMBOL
286
+ value = next_token.value
287
+ value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
288
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
289
+ index += 1
290
+ end
291
+ when :tFID
292
+ if tokens[-1][0] == :kDEF
293
+ type = :tIDENTIFIER
294
+ end
295
+ end
296
+
297
+ tokens << [type, [value, location]]
298
+
299
+ if token.type == :REGEXP_END
300
+ tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
301
+ end
302
+ end
303
+
304
+ tokens
305
+ end
306
+
307
+ private
308
+
309
+ # Parse a complex from the string representation.
310
+ def parse_complex(value)
311
+ value.chomp!("i")
312
+
313
+ if value.end_with?("r")
314
+ Complex(0, parse_rational(value))
315
+ elsif value.start_with?(/0[BbOoDdXx]/)
316
+ Complex(0, Integer(value))
317
+ else
318
+ Complex(0, value)
319
+ end
320
+ end
321
+
322
+ # Parse a rational from the string representation.
323
+ def parse_rational(value)
324
+ value.chomp!("r")
325
+
326
+ if value.start_with?(/0[BbOoDdXx]/)
327
+ Rational(Integer(value))
328
+ else
329
+ Rational(value)
330
+ end
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "rubocop"
5
+
6
+ require "prism"
7
+ require "prism/translation/parser"
8
+
9
+ module Prism
10
+ module Translation
11
+ class Parser
12
+ # This is the special version number that should be used in rubocop
13
+ # configuration files to trigger using prism.
14
+ VERSION_3_3 = 80_82_73_83_77.33
15
+
16
+ # This module gets prepended into RuboCop::AST::ProcessedSource.
17
+ module ProcessedSource
18
+ # Redefine parser_class so that we can inject the prism parser into the
19
+ # list of known parsers.
20
+ def parser_class(ruby_version)
21
+ if ruby_version == Prism::Translation::Parser::VERSION_3_3
22
+ require "prism/translation/parser"
23
+ Prism::Translation::Parser
24
+ else
25
+ super
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ # :stopdoc:
34
+ RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
35
+ known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
36
+ RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
37
+ RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+
5
+ module Prism
6
+ module Translation
7
+ # This class is the entry-point for converting a prism syntax tree into the
8
+ # whitequark/parser gem's syntax tree. It inherits from the base parser for
9
+ # the parser gem, and overrides the parse* methods to parse with prism and
10
+ # then translate.
11
+ class Parser < ::Parser::Base
12
+ # The parser gem has a list of diagnostics with a hard-coded set of error
13
+ # messages. We create our own diagnostic class in order to set our own
14
+ # error messages.
15
+ class Diagnostic < ::Parser::Diagnostic
16
+ # The message generated by prism.
17
+ attr_reader :message
18
+
19
+ # Initialize a new diagnostic with the given message and location.
20
+ def initialize(message, location)
21
+ @message = message
22
+ super(:error, :prism_error, {}, location, [])
23
+ end
24
+ end
25
+
26
+ Racc_debug_parser = false # :nodoc:
27
+
28
+ def version # :nodoc:
29
+ 34
30
+ end
31
+
32
+ # The default encoding for Ruby files is UTF-8.
33
+ def default_encoding
34
+ Encoding::UTF_8
35
+ end
36
+
37
+ def yyerror # :nodoc:
38
+ end
39
+
40
+ # Parses a source buffer and returns the AST.
41
+ def parse(source_buffer)
42
+ @source_buffer = source_buffer
43
+ source = source_buffer.source
44
+
45
+ offset_cache = build_offset_cache(source)
46
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
47
+
48
+ build_ast(result.value, offset_cache)
49
+ ensure
50
+ @source_buffer = nil
51
+ end
52
+
53
+ # Parses a source buffer and returns the AST and the source code comments.
54
+ def parse_with_comments(source_buffer)
55
+ @source_buffer = source_buffer
56
+ source = source_buffer.source
57
+
58
+ offset_cache = build_offset_cache(source)
59
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
60
+
61
+ [
62
+ build_ast(result.value, offset_cache),
63
+ build_comments(result.comments, offset_cache)
64
+ ]
65
+ ensure
66
+ @source_buffer = nil
67
+ end
68
+
69
+ # Parses a source buffer and returns the AST, the source code comments,
70
+ # and the tokens emitted by the lexer.
71
+ def tokenize(source_buffer, recover = false)
72
+ @source_buffer = source_buffer
73
+ source = source_buffer.source
74
+
75
+ offset_cache = build_offset_cache(source)
76
+ result =
77
+ begin
78
+ unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
79
+ rescue ::Parser::SyntaxError
80
+ raise if !recover
81
+ end
82
+
83
+ program, tokens = result.value
84
+ ast = build_ast(program, offset_cache) if result.success?
85
+
86
+ [
87
+ ast,
88
+ build_comments(result.comments, offset_cache),
89
+ build_tokens(tokens, offset_cache)
90
+ ]
91
+ ensure
92
+ @source_buffer = nil
93
+ end
94
+
95
+ # Since prism resolves num params for us, we don't need to support this
96
+ # kind of logic here.
97
+ def try_declare_numparam(node)
98
+ node.children[0].match?(/\A_[1-9]\z/)
99
+ end
100
+
101
+ private
102
+
103
+ # This is a hook to allow consumers to disable some errors if they don't
104
+ # want them to block creating the syntax tree.
105
+ def valid_error?(error)
106
+ true
107
+ end
108
+
109
+ # If there was a error generated during the parse, then raise an
110
+ # appropriate syntax error. Otherwise return the result.
111
+ def unwrap(result, offset_cache)
112
+ result.errors.each do |error|
113
+ next unless valid_error?(error)
114
+
115
+ location = build_range(error.location, offset_cache)
116
+ diagnostics.process(Diagnostic.new(error.message, location))
117
+ end
118
+
119
+ result
120
+ end
121
+
122
+ # Prism deals with offsets in bytes, while the parser gem deals with
123
+ # offsets in characters. We need to handle this conversion in order to
124
+ # build the parser gem AST.
125
+ #
126
+ # If the bytesize of the source is the same as the length, then we can
127
+ # just use the offset directly. Otherwise, we build an array where the
128
+ # index is the byte offset and the value is the character offset.
129
+ def build_offset_cache(source)
130
+ if source.bytesize == source.length
131
+ -> (offset) { offset }
132
+ else
133
+ offset_cache = []
134
+ offset = 0
135
+
136
+ source.each_char do |char|
137
+ char.bytesize.times { offset_cache << offset }
138
+ offset += 1
139
+ end
140
+
141
+ offset_cache << offset
142
+ end
143
+ end
144
+
145
+ # Build the parser gem AST from the prism AST.
146
+ def build_ast(program, offset_cache)
147
+ program.accept(Compiler.new(self, offset_cache))
148
+ end
149
+
150
+ # Build the parser gem comments from the prism comments.
151
+ def build_comments(comments, offset_cache)
152
+ comments.map do |comment|
153
+ ::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
154
+ end
155
+ end
156
+
157
+ # Build the parser gem tokens from the prism tokens.
158
+ def build_tokens(tokens, offset_cache)
159
+ Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
160
+ end
161
+
162
+ # Build a range from a prism location.
163
+ def build_range(location, offset_cache)
164
+ ::Parser::Source::Range.new(
165
+ source_buffer,
166
+ offset_cache[location.start_offset],
167
+ offset_cache[location.end_offset]
168
+ )
169
+ end
170
+
171
+ require_relative "parser/compiler"
172
+ require_relative "parser/lexer"
173
+
174
+ private_constant :Compiler
175
+ private_constant :Lexer
176
+ end
177
+ end
178
+ end