jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,335 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ module Translation
5
+ class Parser
6
+ # Accepts a list of prism tokens and converts them into the expected
7
+ # format for the parser gem.
8
+ class Lexer
9
+ # The direct translating of types between the two lexers.
10
+ TYPES = {
11
+ # These tokens should never appear in the output of the lexer.
12
+ EOF: nil,
13
+ MISSING: nil,
14
+ NOT_PROVIDED: nil,
15
+ IGNORED_NEWLINE: nil,
16
+ EMBDOC_END: nil,
17
+ EMBDOC_LINE: nil,
18
+ __END__: nil,
19
+
20
+ # These tokens have more or less direct mappings.
21
+ AMPERSAND: :tAMPER2,
22
+ AMPERSAND_AMPERSAND: :tANDOP,
23
+ AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
24
+ AMPERSAND_DOT: :tANDDOT,
25
+ AMPERSAND_EQUAL: :tOP_ASGN,
26
+ BACK_REFERENCE: :tBACK_REF,
27
+ BACKTICK: :tXSTRING_BEG,
28
+ BANG: :tBANG,
29
+ BANG_EQUAL: :tNEQ,
30
+ BANG_TILDE: :tNMATCH,
31
+ BRACE_LEFT: :tLCURLY,
32
+ BRACE_RIGHT: :tRCURLY,
33
+ BRACKET_LEFT: :tLBRACK2,
34
+ BRACKET_LEFT_ARRAY: :tLBRACK,
35
+ BRACKET_LEFT_RIGHT: :tAREF,
36
+ BRACKET_LEFT_RIGHT_EQUAL: :tASET,
37
+ BRACKET_RIGHT: :tRBRACK,
38
+ CARET: :tCARET,
39
+ CARET_EQUAL: :tOP_ASGN,
40
+ CHARACTER_LITERAL: :tCHARACTER,
41
+ CLASS_VARIABLE: :tCVAR,
42
+ COLON: :tCOLON,
43
+ COLON_COLON: :tCOLON2,
44
+ COMMA: :tCOMMA,
45
+ COMMENT: :tCOMMENT,
46
+ CONSTANT: :tCONSTANT,
47
+ DOT: :tDOT,
48
+ DOT_DOT: :tDOT2,
49
+ DOT_DOT_DOT: :tDOT3,
50
+ EMBDOC_BEGIN: :tCOMMENT,
51
+ EMBEXPR_BEGIN: :tSTRING_DBEG,
52
+ EMBEXPR_END: :tSTRING_DEND,
53
+ EMBVAR: :tSTRING_DVAR,
54
+ EQUAL: :tEQL,
55
+ EQUAL_EQUAL: :tEQ,
56
+ EQUAL_EQUAL_EQUAL: :tEQQ,
57
+ EQUAL_GREATER: :tASSOC,
58
+ EQUAL_TILDE: :tMATCH,
59
+ FLOAT: :tFLOAT,
60
+ FLOAT_IMAGINARY: :tIMAGINARY,
61
+ FLOAT_RATIONAL: :tRATIONAL,
62
+ FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
63
+ GLOBAL_VARIABLE: :tGVAR,
64
+ GREATER: :tGT,
65
+ GREATER_EQUAL: :tGEQ,
66
+ GREATER_GREATER: :tRSHFT,
67
+ GREATER_GREATER_EQUAL: :tOP_ASGN,
68
+ HEREDOC_START: :tSTRING_BEG,
69
+ HEREDOC_END: :tSTRING_END,
70
+ IDENTIFIER: :tIDENTIFIER,
71
+ INSTANCE_VARIABLE: :tIVAR,
72
+ INTEGER: :tINTEGER,
73
+ INTEGER_IMAGINARY: :tIMAGINARY,
74
+ INTEGER_RATIONAL: :tRATIONAL,
75
+ INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
76
+ KEYWORD_ALIAS: :kALIAS,
77
+ KEYWORD_AND: :kAND,
78
+ KEYWORD_BEGIN: :kBEGIN,
79
+ KEYWORD_BEGIN_UPCASE: :klBEGIN,
80
+ KEYWORD_BREAK: :kBREAK,
81
+ KEYWORD_CASE: :kCASE,
82
+ KEYWORD_CLASS: :kCLASS,
83
+ KEYWORD_DEF: :kDEF,
84
+ KEYWORD_DEFINED: :kDEFINED,
85
+ KEYWORD_DO: :kDO,
86
+ KEYWORD_DO_LOOP: :kDO_COND,
87
+ KEYWORD_END: :kEND,
88
+ KEYWORD_END_UPCASE: :klEND,
89
+ KEYWORD_ENSURE: :kENSURE,
90
+ KEYWORD_ELSE: :kELSE,
91
+ KEYWORD_ELSIF: :kELSIF,
92
+ KEYWORD_FALSE: :kFALSE,
93
+ KEYWORD_FOR: :kFOR,
94
+ KEYWORD_IF: :kIF,
95
+ KEYWORD_IF_MODIFIER: :kIF_MOD,
96
+ KEYWORD_IN: :kIN,
97
+ KEYWORD_MODULE: :kMODULE,
98
+ KEYWORD_NEXT: :kNEXT,
99
+ KEYWORD_NIL: :kNIL,
100
+ KEYWORD_NOT: :kNOT,
101
+ KEYWORD_OR: :kOR,
102
+ KEYWORD_REDO: :kREDO,
103
+ KEYWORD_RESCUE: :kRESCUE,
104
+ KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
105
+ KEYWORD_RETRY: :kRETRY,
106
+ KEYWORD_RETURN: :kRETURN,
107
+ KEYWORD_SELF: :kSELF,
108
+ KEYWORD_SUPER: :kSUPER,
109
+ KEYWORD_THEN: :kTHEN,
110
+ KEYWORD_TRUE: :kTRUE,
111
+ KEYWORD_UNDEF: :kUNDEF,
112
+ KEYWORD_UNLESS: :kUNLESS,
113
+ KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
114
+ KEYWORD_UNTIL: :kUNTIL,
115
+ KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
116
+ KEYWORD_WHEN: :kWHEN,
117
+ KEYWORD_WHILE: :kWHILE,
118
+ KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
119
+ KEYWORD_YIELD: :kYIELD,
120
+ KEYWORD___ENCODING__: :k__ENCODING__,
121
+ KEYWORD___FILE__: :k__FILE__,
122
+ KEYWORD___LINE__: :k__LINE__,
123
+ LABEL: :tLABEL,
124
+ LABEL_END: :tLABEL_END,
125
+ LAMBDA_BEGIN: :tLAMBEG,
126
+ LESS: :tLT,
127
+ LESS_EQUAL: :tLEQ,
128
+ LESS_EQUAL_GREATER: :tCMP,
129
+ LESS_LESS: :tLSHFT,
130
+ LESS_LESS_EQUAL: :tOP_ASGN,
131
+ METHOD_NAME: :tFID,
132
+ MINUS: :tMINUS,
133
+ MINUS_EQUAL: :tOP_ASGN,
134
+ MINUS_GREATER: :tLAMBDA,
135
+ NEWLINE: :tNL,
136
+ NUMBERED_REFERENCE: :tNTH_REF,
137
+ PARENTHESIS_LEFT: :tLPAREN,
138
+ PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
139
+ PARENTHESIS_RIGHT: :tRPAREN,
140
+ PERCENT: :tPERCENT,
141
+ PERCENT_EQUAL: :tOP_ASGN,
142
+ PERCENT_LOWER_I: :tQSYMBOLS_BEG,
143
+ PERCENT_LOWER_W: :tQWORDS_BEG,
144
+ PERCENT_UPPER_I: :tSYMBOLS_BEG,
145
+ PERCENT_UPPER_W: :tWORDS_BEG,
146
+ PERCENT_LOWER_X: :tXSTRING_BEG,
147
+ PLUS: :tPLUS,
148
+ PLUS_EQUAL: :tOP_ASGN,
149
+ PIPE_EQUAL: :tOP_ASGN,
150
+ PIPE: :tPIPE,
151
+ PIPE_PIPE: :tOROP,
152
+ PIPE_PIPE_EQUAL: :tOP_ASGN,
153
+ QUESTION_MARK: :tEH,
154
+ REGEXP_BEGIN: :tREGEXP_BEG,
155
+ REGEXP_END: :tSTRING_END,
156
+ SEMICOLON: :tSEMI,
157
+ SLASH: :tDIVIDE,
158
+ SLASH_EQUAL: :tOP_ASGN,
159
+ STAR: :tSTAR2,
160
+ STAR_EQUAL: :tOP_ASGN,
161
+ STAR_STAR: :tPOW,
162
+ STAR_STAR_EQUAL: :tOP_ASGN,
163
+ STRING_BEGIN: :tSTRING_BEG,
164
+ STRING_CONTENT: :tSTRING_CONTENT,
165
+ STRING_END: :tSTRING_END,
166
+ SYMBOL_BEGIN: :tSYMBEG,
167
+ TILDE: :tTILDE,
168
+ UAMPERSAND: :tAMPER,
169
+ UCOLON_COLON: :tCOLON3,
170
+ UDOT_DOT: :tDOT2,
171
+ UDOT_DOT_DOT: :tBDOT3,
172
+ UMINUS: :tUMINUS,
173
+ UMINUS_NUM: :tUNARY_NUM,
174
+ UPLUS: :tUPLUS,
175
+ USTAR: :tSTAR,
176
+ USTAR_STAR: :tPOW,
177
+ WORDS_SEP: :tSPACE
178
+ }
179
+
180
+ private_constant :TYPES
181
+
182
+ # The Parser::Source::Buffer that the tokens were lexed from.
183
+ attr_reader :source_buffer
184
+
185
+ # An array of prism tokens that we lexed.
186
+ attr_reader :lexed
187
+
188
+ # A hash that maps offsets in bytes to offsets in characters.
189
+ attr_reader :offset_cache
190
+
191
+ # Initialize the lexer with the given source buffer, prism tokens, and
192
+ # offset cache.
193
+ def initialize(source_buffer, lexed, offset_cache)
194
+ @source_buffer = source_buffer
195
+ @lexed = lexed
196
+ @offset_cache = offset_cache
197
+ end
198
+
199
+ Range = ::Parser::Source::Range # :nodoc:
200
+ private_constant :Range
201
+
202
+ # Convert the prism tokens into the expected format for the parser gem.
203
+ def to_a
204
+ tokens = []
205
+ index = 0
206
+
207
+ while index < lexed.length
208
+ token, = lexed[index]
209
+ index += 1
210
+ next if token.type == :IGNORED_NEWLINE || token.type == :EOF
211
+
212
+ type = TYPES.fetch(token.type)
213
+ value = token.value
214
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
215
+
216
+ case type
217
+ when :tCHARACTER
218
+ value.delete_prefix!("?")
219
+ when :tCOMMENT
220
+ if token.type == :EMBDOC_BEGIN
221
+ until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
222
+ value += next_token.value
223
+ index += 1
224
+ end
225
+
226
+ value += next_token.value
227
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
228
+ index += 1
229
+ else
230
+ value.chomp!
231
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
232
+ end
233
+ when :tNL
234
+ value = nil
235
+ when :tFLOAT
236
+ value = Float(value)
237
+ when :tIMAGINARY
238
+ value = parse_complex(value)
239
+ when :tINTEGER
240
+ if value.start_with?("+")
241
+ tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
242
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
243
+ end
244
+
245
+ value = Integer(value)
246
+ when :tLABEL
247
+ value.chomp!(":")
248
+ when :tLABEL_END
249
+ value.chomp!(":")
250
+ when :tNTH_REF
251
+ value = Integer(value.delete_prefix("$"))
252
+ when :tOP_ASGN
253
+ value.chomp!("=")
254
+ when :tRATIONAL
255
+ value = parse_rational(value)
256
+ when :tSPACE
257
+ value = nil
258
+ when :tSTRING_BEG
259
+ if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
260
+ next_location = token.location.join(next_token.location)
261
+ type = :tSTRING
262
+ value = ""
263
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
264
+ index += 1
265
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
266
+ next_location = token.location.join(next_next_token.location)
267
+ type = :tSTRING
268
+ value = next_token.value
269
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
270
+ index += 2
271
+ elsif value.start_with?("<<")
272
+ quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
273
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
274
+ end
275
+ when :tSTRING_DVAR
276
+ value = nil
277
+ when :tSTRING_END
278
+ if token.type == :REGEXP_END
279
+ value = value[0]
280
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
281
+ end
282
+ when :tSYMBEG
283
+ if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
284
+ next_location = token.location.join(next_token.location)
285
+ type = :tSYMBOL
286
+ value = next_token.value
287
+ value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
288
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
289
+ index += 1
290
+ end
291
+ when :tFID
292
+ if tokens[-1][0] == :kDEF
293
+ type = :tIDENTIFIER
294
+ end
295
+ end
296
+
297
+ tokens << [type, [value, location]]
298
+
299
+ if token.type == :REGEXP_END
300
+ tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
301
+ end
302
+ end
303
+
304
+ tokens
305
+ end
306
+
307
+ private
308
+
309
+ # Parse a complex from the string representation.
310
+ def parse_complex(value)
311
+ value.chomp!("i")
312
+
313
+ if value.end_with?("r")
314
+ Complex(0, parse_rational(value))
315
+ elsif value.start_with?(/0[BbOoDdXx]/)
316
+ Complex(0, Integer(value))
317
+ else
318
+ Complex(0, value)
319
+ end
320
+ end
321
+
322
+ # Parse a rational from the string representation.
323
+ def parse_rational(value)
324
+ value.chomp!("r")
325
+
326
+ if value.start_with?(/0[BbOoDdXx]/)
327
+ Rational(Integer(value))
328
+ else
329
+ Rational(value)
330
+ end
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "rubocop"
5
+
6
+ require "prism"
7
+ require "prism/translation/parser"
8
+
9
+ module Prism
10
+ module Translation
11
+ class Parser
12
+ # This is the special version number that should be used in rubocop
13
+ # configuration files to trigger using prism.
14
+ VERSION_3_3 = 80_82_73_83_77.33
15
+
16
+ # This module gets prepended into RuboCop::AST::ProcessedSource.
17
+ module ProcessedSource
18
+ # Redefine parser_class so that we can inject the prism parser into the
19
+ # list of known parsers.
20
+ def parser_class(ruby_version)
21
+ if ruby_version == Prism::Translation::Parser::VERSION_3_3
22
+ require "prism/translation/parser"
23
+ Prism::Translation::Parser
24
+ else
25
+ super
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ # :stopdoc:
34
+ RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
35
+ known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
36
+ RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
37
+ RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+
5
+ module Prism
6
+ module Translation
7
+ # This class is the entry-point for converting a prism syntax tree into the
8
+ # whitequark/parser gem's syntax tree. It inherits from the base parser for
9
+ # the parser gem, and overrides the parse* methods to parse with prism and
10
+ # then translate.
11
+ class Parser < ::Parser::Base
12
+ # The parser gem has a list of diagnostics with a hard-coded set of error
13
+ # messages. We create our own diagnostic class in order to set our own
14
+ # error messages.
15
+ class Diagnostic < ::Parser::Diagnostic
16
+ # The message generated by prism.
17
+ attr_reader :message
18
+
19
+ # Initialize a new diagnostic with the given message and location.
20
+ def initialize(message, location)
21
+ @message = message
22
+ super(:error, :prism_error, {}, location, [])
23
+ end
24
+ end
25
+
26
+ Racc_debug_parser = false # :nodoc:
27
+
28
+ def version # :nodoc:
29
+ 34
30
+ end
31
+
32
+ # The default encoding for Ruby files is UTF-8.
33
+ def default_encoding
34
+ Encoding::UTF_8
35
+ end
36
+
37
+ def yyerror # :nodoc:
38
+ end
39
+
40
+ # Parses a source buffer and returns the AST.
41
+ def parse(source_buffer)
42
+ @source_buffer = source_buffer
43
+ source = source_buffer.source
44
+
45
+ offset_cache = build_offset_cache(source)
46
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
47
+
48
+ build_ast(result.value, offset_cache)
49
+ ensure
50
+ @source_buffer = nil
51
+ end
52
+
53
+ # Parses a source buffer and returns the AST and the source code comments.
54
+ def parse_with_comments(source_buffer)
55
+ @source_buffer = source_buffer
56
+ source = source_buffer.source
57
+
58
+ offset_cache = build_offset_cache(source)
59
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
60
+
61
+ [
62
+ build_ast(result.value, offset_cache),
63
+ build_comments(result.comments, offset_cache)
64
+ ]
65
+ ensure
66
+ @source_buffer = nil
67
+ end
68
+
69
+ # Parses a source buffer and returns the AST, the source code comments,
70
+ # and the tokens emitted by the lexer.
71
+ def tokenize(source_buffer, recover = false)
72
+ @source_buffer = source_buffer
73
+ source = source_buffer.source
74
+
75
+ offset_cache = build_offset_cache(source)
76
+ result =
77
+ begin
78
+ unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
79
+ rescue ::Parser::SyntaxError
80
+ raise if !recover
81
+ end
82
+
83
+ program, tokens = result.value
84
+ ast = build_ast(program, offset_cache) if result.success?
85
+
86
+ [
87
+ ast,
88
+ build_comments(result.comments, offset_cache),
89
+ build_tokens(tokens, offset_cache)
90
+ ]
91
+ ensure
92
+ @source_buffer = nil
93
+ end
94
+
95
+ # Since prism resolves num params for us, we don't need to support this
96
+ # kind of logic here.
97
+ def try_declare_numparam(node)
98
+ node.children[0].match?(/\A_[1-9]\z/)
99
+ end
100
+
101
+ private
102
+
103
+ # This is a hook to allow consumers to disable some errors if they don't
104
+ # want them to block creating the syntax tree.
105
+ def valid_error?(error)
106
+ true
107
+ end
108
+
109
+ # If there was a error generated during the parse, then raise an
110
+ # appropriate syntax error. Otherwise return the result.
111
+ def unwrap(result, offset_cache)
112
+ result.errors.each do |error|
113
+ next unless valid_error?(error)
114
+
115
+ location = build_range(error.location, offset_cache)
116
+ diagnostics.process(Diagnostic.new(error.message, location))
117
+ end
118
+
119
+ result
120
+ end
121
+
122
+ # Prism deals with offsets in bytes, while the parser gem deals with
123
+ # offsets in characters. We need to handle this conversion in order to
124
+ # build the parser gem AST.
125
+ #
126
+ # If the bytesize of the source is the same as the length, then we can
127
+ # just use the offset directly. Otherwise, we build an array where the
128
+ # index is the byte offset and the value is the character offset.
129
+ def build_offset_cache(source)
130
+ if source.bytesize == source.length
131
+ -> (offset) { offset }
132
+ else
133
+ offset_cache = []
134
+ offset = 0
135
+
136
+ source.each_char do |char|
137
+ char.bytesize.times { offset_cache << offset }
138
+ offset += 1
139
+ end
140
+
141
+ offset_cache << offset
142
+ end
143
+ end
144
+
145
+ # Build the parser gem AST from the prism AST.
146
+ def build_ast(program, offset_cache)
147
+ program.accept(Compiler.new(self, offset_cache))
148
+ end
149
+
150
+ # Build the parser gem comments from the prism comments.
151
+ def build_comments(comments, offset_cache)
152
+ comments.map do |comment|
153
+ ::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
154
+ end
155
+ end
156
+
157
+ # Build the parser gem tokens from the prism tokens.
158
+ def build_tokens(tokens, offset_cache)
159
+ Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
160
+ end
161
+
162
+ # Build a range from a prism location.
163
+ def build_range(location, offset_cache)
164
+ ::Parser::Source::Range.new(
165
+ source_buffer,
166
+ offset_cache[location.start_offset],
167
+ offset_cache[location.end_offset]
168
+ )
169
+ end
170
+
171
+ require_relative "parser/compiler"
172
+ require_relative "parser/lexer"
173
+
174
+ private_constant :Compiler
175
+ private_constant :Lexer
176
+ end
177
+ end
178
+ end