prism 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -1
  3. data/Makefile +5 -0
  4. data/README.md +8 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +3 -3
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/serialization.md +17 -5
  13. data/ext/prism/api_node.c +101 -81
  14. data/ext/prism/extension.c +74 -11
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +1699 -504
  17. data/include/prism/defines.h +8 -0
  18. data/include/prism/diagnostic.h +39 -2
  19. data/include/prism/encoding.h +10 -0
  20. data/include/prism/options.h +40 -14
  21. data/include/prism/parser.h +33 -17
  22. data/include/prism/util/pm_buffer.h +9 -0
  23. data/include/prism/util/pm_constant_pool.h +7 -0
  24. data/include/prism/util/pm_newline_list.h +0 -11
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +19 -2
  27. data/lib/prism/debug.rb +11 -5
  28. data/lib/prism/dot_visitor.rb +36 -14
  29. data/lib/prism/dsl.rb +22 -22
  30. data/lib/prism/ffi.rb +2 -2
  31. data/lib/prism/node.rb +1020 -737
  32. data/lib/prism/node_ext.rb +2 -2
  33. data/lib/prism/parse_result.rb +17 -9
  34. data/lib/prism/serialize.rb +53 -29
  35. data/lib/prism/translation/parser/compiler.rb +1831 -0
  36. data/lib/prism/translation/parser/lexer.rb +335 -0
  37. data/lib/prism/translation/parser/rubocop.rb +37 -0
  38. data/lib/prism/translation/parser.rb +163 -0
  39. data/lib/prism/translation.rb +11 -0
  40. data/lib/prism.rb +1 -0
  41. data/prism.gemspec +12 -5
  42. data/rbi/prism.rbi +150 -88
  43. data/rbi/prism_static.rbi +15 -3
  44. data/sig/prism.rbs +996 -961
  45. data/sig/prism_static.rbs +123 -46
  46. data/src/diagnostic.c +259 -219
  47. data/src/encoding.c +4 -8
  48. data/src/node.c +2 -6
  49. data/src/options.c +24 -5
  50. data/src/prettyprint.c +174 -42
  51. data/src/prism.c +1136 -328
  52. data/src/serialize.c +12 -9
  53. data/src/token_type.c +353 -4
  54. data/src/util/pm_buffer.c +11 -0
  55. data/src/util/pm_constant_pool.c +12 -11
  56. data/src/util/pm_newline_list.c +2 -14
  57. metadata +10 -3
  58. data/docs/building.md +0 -29
@@ -0,0 +1,335 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ module Translation
5
+ class Parser
6
+ # Accepts a list of prism tokens and converts them into the expected
7
+ # format for the parser gem.
8
+ class Lexer
9
+ # The direct translating of types between the two lexers.
10
+ TYPES = {
11
+ # These tokens should never appear in the output of the lexer.
12
+ EOF: nil,
13
+ MISSING: nil,
14
+ NOT_PROVIDED: nil,
15
+ IGNORED_NEWLINE: nil,
16
+ EMBDOC_END: nil,
17
+ EMBDOC_LINE: nil,
18
+ __END__: nil,
19
+
20
+ # These tokens have more or less direct mappings.
21
+ AMPERSAND: :tAMPER2,
22
+ AMPERSAND_AMPERSAND: :tANDOP,
23
+ AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
24
+ AMPERSAND_DOT: :tANDDOT,
25
+ AMPERSAND_EQUAL: :tOP_ASGN,
26
+ BACK_REFERENCE: :tBACK_REF,
27
+ BACKTICK: :tXSTRING_BEG,
28
+ BANG: :tBANG,
29
+ BANG_EQUAL: :tNEQ,
30
+ BANG_TILDE: :tNMATCH,
31
+ BRACE_LEFT: :tLCURLY,
32
+ BRACE_RIGHT: :tRCURLY,
33
+ BRACKET_LEFT: :tLBRACK2,
34
+ BRACKET_LEFT_ARRAY: :tLBRACK,
35
+ BRACKET_LEFT_RIGHT: :tAREF,
36
+ BRACKET_LEFT_RIGHT_EQUAL: :tASET,
37
+ BRACKET_RIGHT: :tRBRACK,
38
+ CARET: :tCARET,
39
+ CARET_EQUAL: :tOP_ASGN,
40
+ CHARACTER_LITERAL: :tCHARACTER,
41
+ CLASS_VARIABLE: :tCVAR,
42
+ COLON: :tCOLON,
43
+ COLON_COLON: :tCOLON2,
44
+ COMMA: :tCOMMA,
45
+ COMMENT: :tCOMMENT,
46
+ CONSTANT: :tCONSTANT,
47
+ DOT: :tDOT,
48
+ DOT_DOT: :tDOT2,
49
+ DOT_DOT_DOT: :tDOT3,
50
+ EMBDOC_BEGIN: :tCOMMENT,
51
+ EMBEXPR_BEGIN: :tSTRING_DBEG,
52
+ EMBEXPR_END: :tSTRING_DEND,
53
+ EMBVAR: :tSTRING_DVAR,
54
+ EQUAL: :tEQL,
55
+ EQUAL_EQUAL: :tEQ,
56
+ EQUAL_EQUAL_EQUAL: :tEQQ,
57
+ EQUAL_GREATER: :tASSOC,
58
+ EQUAL_TILDE: :tMATCH,
59
+ FLOAT: :tFLOAT,
60
+ FLOAT_IMAGINARY: :tIMAGINARY,
61
+ FLOAT_RATIONAL: :tRATIONAL,
62
+ FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
63
+ GLOBAL_VARIABLE: :tGVAR,
64
+ GREATER: :tGT,
65
+ GREATER_EQUAL: :tGEQ,
66
+ GREATER_GREATER: :tRSHFT,
67
+ GREATER_GREATER_EQUAL: :tOP_ASGN,
68
+ HEREDOC_START: :tSTRING_BEG,
69
+ HEREDOC_END: :tSTRING_END,
70
+ IDENTIFIER: :tIDENTIFIER,
71
+ INSTANCE_VARIABLE: :tIVAR,
72
+ INTEGER: :tINTEGER,
73
+ INTEGER_IMAGINARY: :tIMAGINARY,
74
+ INTEGER_RATIONAL: :tRATIONAL,
75
+ INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
76
+ KEYWORD_ALIAS: :kALIAS,
77
+ KEYWORD_AND: :kAND,
78
+ KEYWORD_BEGIN: :kBEGIN,
79
+ KEYWORD_BEGIN_UPCASE: :klBEGIN,
80
+ KEYWORD_BREAK: :kBREAK,
81
+ KEYWORD_CASE: :kCASE,
82
+ KEYWORD_CLASS: :kCLASS,
83
+ KEYWORD_DEF: :kDEF,
84
+ KEYWORD_DEFINED: :kDEFINED,
85
+ KEYWORD_DO: :kDO,
86
+ KEYWORD_DO_LOOP: :kDO_COND,
87
+ KEYWORD_END: :kEND,
88
+ KEYWORD_END_UPCASE: :klEND,
89
+ KEYWORD_ENSURE: :kENSURE,
90
+ KEYWORD_ELSE: :kELSE,
91
+ KEYWORD_ELSIF: :kELSIF,
92
+ KEYWORD_FALSE: :kFALSE,
93
+ KEYWORD_FOR: :kFOR,
94
+ KEYWORD_IF: :kIF,
95
+ KEYWORD_IF_MODIFIER: :kIF_MOD,
96
+ KEYWORD_IN: :kIN,
97
+ KEYWORD_MODULE: :kMODULE,
98
+ KEYWORD_NEXT: :kNEXT,
99
+ KEYWORD_NIL: :kNIL,
100
+ KEYWORD_NOT: :kNOT,
101
+ KEYWORD_OR: :kOR,
102
+ KEYWORD_REDO: :kREDO,
103
+ KEYWORD_RESCUE: :kRESCUE,
104
+ KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
105
+ KEYWORD_RETRY: :kRETRY,
106
+ KEYWORD_RETURN: :kRETURN,
107
+ KEYWORD_SELF: :kSELF,
108
+ KEYWORD_SUPER: :kSUPER,
109
+ KEYWORD_THEN: :kTHEN,
110
+ KEYWORD_TRUE: :kTRUE,
111
+ KEYWORD_UNDEF: :kUNDEF,
112
+ KEYWORD_UNLESS: :kUNLESS,
113
+ KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
114
+ KEYWORD_UNTIL: :kUNTIL,
115
+ KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
116
+ KEYWORD_WHEN: :kWHEN,
117
+ KEYWORD_WHILE: :kWHILE,
118
+ KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
119
+ KEYWORD_YIELD: :kYIELD,
120
+ KEYWORD___ENCODING__: :k__ENCODING__,
121
+ KEYWORD___FILE__: :k__FILE__,
122
+ KEYWORD___LINE__: :k__LINE__,
123
+ LABEL: :tLABEL,
124
+ LABEL_END: :tLABEL_END,
125
+ LAMBDA_BEGIN: :tLAMBEG,
126
+ LESS: :tLT,
127
+ LESS_EQUAL: :tLEQ,
128
+ LESS_EQUAL_GREATER: :tCMP,
129
+ LESS_LESS: :tLSHFT,
130
+ LESS_LESS_EQUAL: :tOP_ASGN,
131
+ METHOD_NAME: :tFID,
132
+ MINUS: :tMINUS,
133
+ MINUS_EQUAL: :tOP_ASGN,
134
+ MINUS_GREATER: :tLAMBDA,
135
+ NEWLINE: :tNL,
136
+ NUMBERED_REFERENCE: :tNTH_REF,
137
+ PARENTHESIS_LEFT: :tLPAREN,
138
+ PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
139
+ PARENTHESIS_RIGHT: :tRPAREN,
140
+ PERCENT: :tPERCENT,
141
+ PERCENT_EQUAL: :tOP_ASGN,
142
+ PERCENT_LOWER_I: :tQSYMBOLS_BEG,
143
+ PERCENT_LOWER_W: :tQWORDS_BEG,
144
+ PERCENT_UPPER_I: :tSYMBOLS_BEG,
145
+ PERCENT_UPPER_W: :tWORDS_BEG,
146
+ PERCENT_LOWER_X: :tXSTRING_BEG,
147
+ PLUS: :tPLUS,
148
+ PLUS_EQUAL: :tOP_ASGN,
149
+ PIPE_EQUAL: :tOP_ASGN,
150
+ PIPE: :tPIPE,
151
+ PIPE_PIPE: :tOROP,
152
+ PIPE_PIPE_EQUAL: :tOP_ASGN,
153
+ QUESTION_MARK: :tEH,
154
+ REGEXP_BEGIN: :tREGEXP_BEG,
155
+ REGEXP_END: :tSTRING_END,
156
+ SEMICOLON: :tSEMI,
157
+ SLASH: :tDIVIDE,
158
+ SLASH_EQUAL: :tOP_ASGN,
159
+ STAR: :tSTAR2,
160
+ STAR_EQUAL: :tOP_ASGN,
161
+ STAR_STAR: :tPOW,
162
+ STAR_STAR_EQUAL: :tOP_ASGN,
163
+ STRING_BEGIN: :tSTRING_BEG,
164
+ STRING_CONTENT: :tSTRING_CONTENT,
165
+ STRING_END: :tSTRING_END,
166
+ SYMBOL_BEGIN: :tSYMBEG,
167
+ TILDE: :tTILDE,
168
+ UAMPERSAND: :tAMPER,
169
+ UCOLON_COLON: :tCOLON3,
170
+ UDOT_DOT: :tDOT2,
171
+ UDOT_DOT_DOT: :tBDOT3,
172
+ UMINUS: :tUMINUS,
173
+ UMINUS_NUM: :tUNARY_NUM,
174
+ UPLUS: :tUPLUS,
175
+ USTAR: :tSTAR,
176
+ USTAR_STAR: :tPOW,
177
+ WORDS_SEP: :tSPACE
178
+ }
179
+
180
+ private_constant :TYPES
181
+
182
+ # The Parser::Source::Buffer that the tokens were lexed from.
183
+ attr_reader :source_buffer
184
+
185
+ # An array of prism tokens that we lexed.
186
+ attr_reader :lexed
187
+
188
+ # A hash that maps offsets in bytes to offsets in characters.
189
+ attr_reader :offset_cache
190
+
191
+ # Initialize the lexer with the given source buffer, prism tokens, and
192
+ # offset cache.
193
+ def initialize(source_buffer, lexed, offset_cache)
194
+ @source_buffer = source_buffer
195
+ @lexed = lexed
196
+ @offset_cache = offset_cache
197
+ end
198
+
199
+ Range = ::Parser::Source::Range # :nodoc:
200
+ private_constant :Range
201
+
202
+ # Convert the prism tokens into the expected format for the parser gem.
203
+ def to_a
204
+ tokens = []
205
+ index = 0
206
+
207
+ while index < lexed.length
208
+ token, = lexed[index]
209
+ index += 1
210
+ next if token.type == :IGNORED_NEWLINE || token.type == :EOF
211
+
212
+ type = TYPES.fetch(token.type)
213
+ value = token.value
214
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
215
+
216
+ case type
217
+ when :tCHARACTER
218
+ value.delete_prefix!("?")
219
+ when :tCOMMENT
220
+ if token.type == :EMBDOC_BEGIN
221
+ until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
222
+ value += next_token.value
223
+ index += 1
224
+ end
225
+
226
+ value += next_token.value
227
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
228
+ index += 1
229
+ else
230
+ value.chomp!
231
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
232
+ end
233
+ when :tNL
234
+ value = nil
235
+ when :tFLOAT
236
+ value = Float(value)
237
+ when :tIMAGINARY
238
+ value = parse_complex(value)
239
+ when :tINTEGER
240
+ if value.start_with?("+")
241
+ tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
242
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
243
+ end
244
+
245
+ value = Integer(value)
246
+ when :tLABEL
247
+ value.chomp!(":")
248
+ when :tLABEL_END
249
+ value.chomp!(":")
250
+ when :tNTH_REF
251
+ value = Integer(value.delete_prefix("$"))
252
+ when :tOP_ASGN
253
+ value.chomp!("=")
254
+ when :tRATIONAL
255
+ value = parse_rational(value)
256
+ when :tSPACE
257
+ value = nil
258
+ when :tSTRING_BEG
259
+ if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
260
+ next_location = token.location.join(next_token.location)
261
+ type = :tSTRING
262
+ value = ""
263
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
264
+ index += 1
265
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
266
+ next_location = token.location.join(next_next_token.location)
267
+ type = :tSTRING
268
+ value = next_token.value
269
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
270
+ index += 2
271
+ elsif value.start_with?("<<")
272
+ quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
273
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
274
+ end
275
+ when :tSTRING_DVAR
276
+ value = nil
277
+ when :tSTRING_END
278
+ if token.type == :REGEXP_END
279
+ value = value[0]
280
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
281
+ end
282
+ when :tSYMBEG
283
+ if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
284
+ next_location = token.location.join(next_token.location)
285
+ type = :tSYMBOL
286
+ value = next_token.value
287
+ value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
288
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
289
+ index += 1
290
+ end
291
+ when :tFID
292
+ if tokens[-1][0] == :kDEF
293
+ type = :tIDENTIFIER
294
+ end
295
+ end
296
+
297
+ tokens << [type, [value, location]]
298
+
299
+ if token.type == :REGEXP_END
300
+ tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
301
+ end
302
+ end
303
+
304
+ tokens
305
+ end
306
+
307
+ private
308
+
309
+ # Parse a complex from the string representation.
310
+ def parse_complex(value)
311
+ value.chomp!("i")
312
+
313
+ if value.end_with?("r")
314
+ Complex(0, parse_rational(value))
315
+ elsif value.start_with?(/0[BbOoDdXx]/)
316
+ Complex(0, Integer(value))
317
+ else
318
+ Complex(0, value)
319
+ end
320
+ end
321
+
322
+ # Parse a rational from the string representation.
323
+ def parse_rational(value)
324
+ value.chomp!("r")
325
+
326
+ if value.start_with?(/0[BbOoDdXx]/)
327
+ Rational(Integer(value))
328
+ else
329
+ Rational(value)
330
+ end
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "rubocop"
5
+
6
+ require "prism"
7
+ require "prism/translation/parser"
8
+
9
+ module Prism
10
+ module Translation
11
+ class Parser
12
+ # This is the special version number that should be used in rubocop
13
+ # configuration files to trigger using prism.
14
+ VERSION_3_3 = 80_82_73_83_77.33
15
+
16
+ # This module gets prepended into RuboCop::AST::ProcessedSource.
17
+ module ProcessedSource
18
+ # Redefine parser_class so that we can inject the prism parser into the
19
+ # list of known parsers.
20
+ def parser_class(ruby_version)
21
+ if ruby_version == Prism::Translation::Parser::VERSION_3_3
22
+ require "prism/translation/parser"
23
+ Prism::Translation::Parser
24
+ else
25
+ super
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ # :stopdoc:
34
+ RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
35
+ known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
36
+ RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
37
+ RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+
5
+ module Prism
6
+ module Translation
7
+ # This class is the entry-point for converting a prism syntax tree into the
8
+ # whitequark/parser gem's syntax tree. It inherits from the base parser for
9
+ # the parser gem, and overrides the parse* methods to parse with prism and
10
+ # then translate.
11
+ class Parser < ::Parser::Base
12
+ # The parser gem has a list of diagnostics with a hard-coded set of error
13
+ # messages. We create our own diagnostic class in order to set our own
14
+ # error messages.
15
+ class Diagnostic < ::Parser::Diagnostic
16
+ # The message generated by prism.
17
+ attr_reader :message
18
+
19
+ # Initialize a new diagnostic with the given message and location.
20
+ def initialize(message, location)
21
+ @message = message
22
+ super(:error, :prism_error, {}, location, [])
23
+ end
24
+ end
25
+
26
+ Racc_debug_parser = false # :nodoc:
27
+
28
+ def version # :nodoc:
29
+ 33
30
+ end
31
+
32
+ # The default encoding for Ruby files is UTF-8.
33
+ def default_encoding
34
+ Encoding::UTF_8
35
+ end
36
+
37
+ def yyerror # :nodoc:
38
+ end
39
+
40
+ # Parses a source buffer and returns the AST.
41
+ def parse(source_buffer)
42
+ @source_buffer = source_buffer
43
+ source = source_buffer.source
44
+
45
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name))
46
+
47
+ build_ast(result.value, build_offset_cache(source))
48
+ ensure
49
+ @source_buffer = nil
50
+ end
51
+
52
+ # Parses a source buffer and returns the AST and the source code comments.
53
+ def parse_with_comments(source_buffer)
54
+ @source_buffer = source_buffer
55
+ source = source_buffer.source
56
+
57
+ offset_cache = build_offset_cache(source)
58
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name))
59
+
60
+ [
61
+ build_ast(result.value, offset_cache),
62
+ build_comments(result.comments, offset_cache)
63
+ ]
64
+ ensure
65
+ @source_buffer = nil
66
+ end
67
+
68
+ # Parses a source buffer and returns the AST, the source code comments,
69
+ # and the tokens emitted by the lexer.
70
+ def tokenize(source_buffer, _recover = false)
71
+ @source_buffer = source_buffer
72
+ source = source_buffer.source
73
+
74
+ offset_cache = build_offset_cache(source)
75
+ result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name))
76
+
77
+ program, tokens = result.value
78
+
79
+ [
80
+ build_ast(program, offset_cache),
81
+ build_comments(result.comments, offset_cache),
82
+ build_tokens(tokens, offset_cache)
83
+ ]
84
+ ensure
85
+ @source_buffer = nil
86
+ end
87
+
88
+ # Since prism resolves num params for us, we don't need to support this
89
+ # kind of logic here.
90
+ def try_declare_numparam(node)
91
+ node.children[0].match?(/\A_[1-9]\z/)
92
+ end
93
+
94
+ private
95
+
96
+ # If there was a error generated during the parse, then raise an
97
+ # appropriate syntax error. Otherwise return the result.
98
+ def unwrap(result)
99
+ return result if result.success?
100
+
101
+ error = result.errors.first
102
+ offset_cache = build_offset_cache(source_buffer.source)
103
+
104
+ diagnostic = Diagnostic.new(error.message, build_range(error.location, offset_cache))
105
+ raise ::Parser::SyntaxError, diagnostic
106
+ end
107
+
108
+ # Prism deals with offsets in bytes, while the parser gem deals with
109
+ # offsets in characters. We need to handle this conversion in order to
110
+ # build the parser gem AST.
111
+ #
112
+ # If the bytesize of the source is the same as the length, then we can
113
+ # just use the offset directly. Otherwise, we build a hash that functions
114
+ # as a cache for the conversion.
115
+ #
116
+ # This is a good opportunity for some optimizations. If the source file
117
+ # has any multi-byte characters, this can tank the performance of the
118
+ # translator. We could make this significantly faster by using a
119
+ # different data structure for the cache.
120
+ def build_offset_cache(source)
121
+ if source.bytesize == source.length
122
+ -> (offset) { offset }
123
+ else
124
+ Hash.new do |hash, offset|
125
+ hash[offset] = source.byteslice(0, offset).length
126
+ end
127
+ end
128
+ end
129
+
130
+ # Build the parser gem AST from the prism AST.
131
+ def build_ast(program, offset_cache)
132
+ program.accept(Compiler.new(self, offset_cache))
133
+ end
134
+
135
+ # Build the parser gem comments from the prism comments.
136
+ def build_comments(comments, offset_cache)
137
+ comments.map do |comment|
138
+ ::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
139
+ end
140
+ end
141
+
142
+ # Build the parser gem tokens from the prism tokens.
143
+ def build_tokens(tokens, offset_cache)
144
+ Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
145
+ end
146
+
147
+ # Build a range from a prism location.
148
+ def build_range(location, offset_cache)
149
+ ::Parser::Source::Range.new(
150
+ source_buffer,
151
+ offset_cache[location.start_offset],
152
+ offset_cache[location.end_offset]
153
+ )
154
+ end
155
+
156
+ require_relative "parser/compiler"
157
+ require_relative "parser/lexer"
158
+
159
+ private_constant :Compiler
160
+ private_constant :Lexer
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ # This module is responsible for converting the prism syntax tree into other
5
+ # syntax trees. At the moment it only supports converting to the
6
+ # whitequark/parser gem's syntax tree, but support is planned for the
7
+ # seattlerb/ruby_parser gem's syntax tree as well.
8
+ module Translation
9
+ autoload :Parser, "prism/translation/parser"
10
+ end
11
+ end
data/lib/prism.rb CHANGED
@@ -26,6 +26,7 @@ module Prism
26
26
  autoload :Pack, "prism/pack"
27
27
  autoload :Pattern, "prism/pattern"
28
28
  autoload :Serialize, "prism/serialize"
29
+ autoload :Translation, "prism/translation"
29
30
  autoload :Visitor, "prism/visitor"
30
31
 
31
32
  # Some of these constants are not meant to be exposed, so marking them as
data/prism.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "prism"
5
- spec.version = "0.19.0"
5
+ spec.version = "0.20.0"
6
6
  spec.authors = ["Shopify"]
7
7
  spec.email = ["ruby@shopify.com"]
8
8
 
@@ -22,8 +22,8 @@ Gem::Specification.new do |spec|
22
22
  "README.md",
23
23
  "config.yml",
24
24
  "docs/build_system.md",
25
- "docs/building.md",
26
25
  "docs/configuration.md",
26
+ "docs/cruby_compilation.md",
27
27
  "docs/design.md",
28
28
  "docs/encoding.md",
29
29
  "docs/fuzzing.md",
@@ -31,6 +31,8 @@ Gem::Specification.new do |spec|
31
31
  "docs/javascript.md",
32
32
  "docs/local_variable_depth.md",
33
33
  "docs/mapping.md",
34
+ "docs/parser_translation.md",
35
+ "docs/parsing_rules.md",
34
36
  "docs/releasing.md",
35
37
  "docs/ripper.md",
36
38
  "docs/ruby_api.md",
@@ -73,16 +75,21 @@ Gem::Specification.new do |spec|
73
75
  "lib/prism/ffi.rb",
74
76
  "lib/prism/lex_compat.rb",
75
77
  "lib/prism/mutation_compiler.rb",
76
- "lib/prism/node.rb",
77
78
  "lib/prism/node_ext.rb",
78
79
  "lib/prism/node_inspector.rb",
80
+ "lib/prism/node.rb",
79
81
  "lib/prism/pack.rb",
80
82
  "lib/prism/parse_result.rb",
83
+ "lib/prism/parse_result/comments.rb",
84
+ "lib/prism/parse_result/newlines.rb",
81
85
  "lib/prism/pattern.rb",
82
86
  "lib/prism/ripper_compat.rb",
83
87
  "lib/prism/serialize.rb",
84
- "lib/prism/parse_result/comments.rb",
85
- "lib/prism/parse_result/newlines.rb",
88
+ "lib/prism/translation.rb",
89
+ "lib/prism/translation/parser.rb",
90
+ "lib/prism/translation/parser/compiler.rb",
91
+ "lib/prism/translation/parser/lexer.rb",
92
+ "lib/prism/translation/parser/rubocop.rb",
86
93
  "lib/prism/visitor.rb",
87
94
  "src/diagnostic.c",
88
95
  "src/encoding.c",