prism 0.19.0 → 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -1
  3. data/Makefile +5 -0
  4. data/README.md +8 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +3 -3
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/serialization.md +17 -5
  13. data/ext/prism/api_node.c +101 -81
  14. data/ext/prism/extension.c +74 -11
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +1699 -504
  17. data/include/prism/defines.h +8 -0
  18. data/include/prism/diagnostic.h +39 -2
  19. data/include/prism/encoding.h +10 -0
  20. data/include/prism/options.h +40 -14
  21. data/include/prism/parser.h +33 -17
  22. data/include/prism/util/pm_buffer.h +9 -0
  23. data/include/prism/util/pm_constant_pool.h +7 -0
  24. data/include/prism/util/pm_newline_list.h +0 -11
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +19 -2
  27. data/lib/prism/debug.rb +11 -5
  28. data/lib/prism/dot_visitor.rb +36 -14
  29. data/lib/prism/dsl.rb +22 -22
  30. data/lib/prism/ffi.rb +2 -2
  31. data/lib/prism/node.rb +1020 -737
  32. data/lib/prism/node_ext.rb +2 -2
  33. data/lib/prism/parse_result.rb +17 -9
  34. data/lib/prism/serialize.rb +53 -29
  35. data/lib/prism/translation/parser/compiler.rb +1831 -0
  36. data/lib/prism/translation/parser/lexer.rb +335 -0
  37. data/lib/prism/translation/parser/rubocop.rb +37 -0
  38. data/lib/prism/translation/parser.rb +163 -0
  39. data/lib/prism/translation.rb +11 -0
  40. data/lib/prism.rb +1 -0
  41. data/prism.gemspec +12 -5
  42. data/rbi/prism.rbi +150 -88
  43. data/rbi/prism_static.rbi +15 -3
  44. data/sig/prism.rbs +996 -961
  45. data/sig/prism_static.rbs +123 -46
  46. data/src/diagnostic.c +259 -219
  47. data/src/encoding.c +4 -8
  48. data/src/node.c +2 -6
  49. data/src/options.c +24 -5
  50. data/src/prettyprint.c +174 -42
  51. data/src/prism.c +1136 -328
  52. data/src/serialize.c +12 -9
  53. data/src/token_type.c +353 -4
  54. data/src/util/pm_buffer.c +11 -0
  55. data/src/util/pm_constant_pool.c +12 -11
  56. data/src/util/pm_newline_list.c +2 -14
  57. metadata +10 -3
  58. data/docs/building.md +0 -29
@@ -0,0 +1,335 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ module Translation
5
+ class Parser
6
+ # Accepts a list of prism tokens and converts them into the expected
7
+ # format for the parser gem.
8
+ class Lexer
9
+ # The direct translating of types between the two lexers.
10
+ TYPES = {
11
+ # These tokens should never appear in the output of the lexer.
12
+ EOF: nil,
13
+ MISSING: nil,
14
+ NOT_PROVIDED: nil,
15
+ IGNORED_NEWLINE: nil,
16
+ EMBDOC_END: nil,
17
+ EMBDOC_LINE: nil,
18
+ __END__: nil,
19
+
20
+ # These tokens have more or less direct mappings.
21
+ AMPERSAND: :tAMPER2,
22
+ AMPERSAND_AMPERSAND: :tANDOP,
23
+ AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
24
+ AMPERSAND_DOT: :tANDDOT,
25
+ AMPERSAND_EQUAL: :tOP_ASGN,
26
+ BACK_REFERENCE: :tBACK_REF,
27
+ BACKTICK: :tXSTRING_BEG,
28
+ BANG: :tBANG,
29
+ BANG_EQUAL: :tNEQ,
30
+ BANG_TILDE: :tNMATCH,
31
+ BRACE_LEFT: :tLCURLY,
32
+ BRACE_RIGHT: :tRCURLY,
33
+ BRACKET_LEFT: :tLBRACK2,
34
+ BRACKET_LEFT_ARRAY: :tLBRACK,
35
+ BRACKET_LEFT_RIGHT: :tAREF,
36
+ BRACKET_LEFT_RIGHT_EQUAL: :tASET,
37
+ BRACKET_RIGHT: :tRBRACK,
38
+ CARET: :tCARET,
39
+ CARET_EQUAL: :tOP_ASGN,
40
+ CHARACTER_LITERAL: :tCHARACTER,
41
+ CLASS_VARIABLE: :tCVAR,
42
+ COLON: :tCOLON,
43
+ COLON_COLON: :tCOLON2,
44
+ COMMA: :tCOMMA,
45
+ COMMENT: :tCOMMENT,
46
+ CONSTANT: :tCONSTANT,
47
+ DOT: :tDOT,
48
+ DOT_DOT: :tDOT2,
49
+ DOT_DOT_DOT: :tDOT3,
50
+ EMBDOC_BEGIN: :tCOMMENT,
51
+ EMBEXPR_BEGIN: :tSTRING_DBEG,
52
+ EMBEXPR_END: :tSTRING_DEND,
53
+ EMBVAR: :tSTRING_DVAR,
54
+ EQUAL: :tEQL,
55
+ EQUAL_EQUAL: :tEQ,
56
+ EQUAL_EQUAL_EQUAL: :tEQQ,
57
+ EQUAL_GREATER: :tASSOC,
58
+ EQUAL_TILDE: :tMATCH,
59
+ FLOAT: :tFLOAT,
60
+ FLOAT_IMAGINARY: :tIMAGINARY,
61
+ FLOAT_RATIONAL: :tRATIONAL,
62
+ FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
63
+ GLOBAL_VARIABLE: :tGVAR,
64
+ GREATER: :tGT,
65
+ GREATER_EQUAL: :tGEQ,
66
+ GREATER_GREATER: :tRSHFT,
67
+ GREATER_GREATER_EQUAL: :tOP_ASGN,
68
+ HEREDOC_START: :tSTRING_BEG,
69
+ HEREDOC_END: :tSTRING_END,
70
+ IDENTIFIER: :tIDENTIFIER,
71
+ INSTANCE_VARIABLE: :tIVAR,
72
+ INTEGER: :tINTEGER,
73
+ INTEGER_IMAGINARY: :tIMAGINARY,
74
+ INTEGER_RATIONAL: :tRATIONAL,
75
+ INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
76
+ KEYWORD_ALIAS: :kALIAS,
77
+ KEYWORD_AND: :kAND,
78
+ KEYWORD_BEGIN: :kBEGIN,
79
+ KEYWORD_BEGIN_UPCASE: :klBEGIN,
80
+ KEYWORD_BREAK: :kBREAK,
81
+ KEYWORD_CASE: :kCASE,
82
+ KEYWORD_CLASS: :kCLASS,
83
+ KEYWORD_DEF: :kDEF,
84
+ KEYWORD_DEFINED: :kDEFINED,
85
+ KEYWORD_DO: :kDO,
86
+ KEYWORD_DO_LOOP: :kDO_COND,
87
+ KEYWORD_END: :kEND,
88
+ KEYWORD_END_UPCASE: :klEND,
89
+ KEYWORD_ENSURE: :kENSURE,
90
+ KEYWORD_ELSE: :kELSE,
91
+ KEYWORD_ELSIF: :kELSIF,
92
+ KEYWORD_FALSE: :kFALSE,
93
+ KEYWORD_FOR: :kFOR,
94
+ KEYWORD_IF: :kIF,
95
+ KEYWORD_IF_MODIFIER: :kIF_MOD,
96
+ KEYWORD_IN: :kIN,
97
+ KEYWORD_MODULE: :kMODULE,
98
+ KEYWORD_NEXT: :kNEXT,
99
+ KEYWORD_NIL: :kNIL,
100
+ KEYWORD_NOT: :kNOT,
101
+ KEYWORD_OR: :kOR,
102
+ KEYWORD_REDO: :kREDO,
103
+ KEYWORD_RESCUE: :kRESCUE,
104
+ KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
105
+ KEYWORD_RETRY: :kRETRY,
106
+ KEYWORD_RETURN: :kRETURN,
107
+ KEYWORD_SELF: :kSELF,
108
+ KEYWORD_SUPER: :kSUPER,
109
+ KEYWORD_THEN: :kTHEN,
110
+ KEYWORD_TRUE: :kTRUE,
111
+ KEYWORD_UNDEF: :kUNDEF,
112
+ KEYWORD_UNLESS: :kUNLESS,
113
+ KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
114
+ KEYWORD_UNTIL: :kUNTIL,
115
+ KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
116
+ KEYWORD_WHEN: :kWHEN,
117
+ KEYWORD_WHILE: :kWHILE,
118
+ KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
119
+ KEYWORD_YIELD: :kYIELD,
120
+ KEYWORD___ENCODING__: :k__ENCODING__,
121
+ KEYWORD___FILE__: :k__FILE__,
122
+ KEYWORD___LINE__: :k__LINE__,
123
+ LABEL: :tLABEL,
124
+ LABEL_END: :tLABEL_END,
125
+ LAMBDA_BEGIN: :tLAMBEG,
126
+ LESS: :tLT,
127
+ LESS_EQUAL: :tLEQ,
128
+ LESS_EQUAL_GREATER: :tCMP,
129
+ LESS_LESS: :tLSHFT,
130
+ LESS_LESS_EQUAL: :tOP_ASGN,
131
+ METHOD_NAME: :tFID,
132
+ MINUS: :tMINUS,
133
+ MINUS_EQUAL: :tOP_ASGN,
134
+ MINUS_GREATER: :tLAMBDA,
135
+ NEWLINE: :tNL,
136
+ NUMBERED_REFERENCE: :tNTH_REF,
137
+ PARENTHESIS_LEFT: :tLPAREN,
138
+ PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
139
+ PARENTHESIS_RIGHT: :tRPAREN,
140
+ PERCENT: :tPERCENT,
141
+ PERCENT_EQUAL: :tOP_ASGN,
142
+ PERCENT_LOWER_I: :tQSYMBOLS_BEG,
143
+ PERCENT_LOWER_W: :tQWORDS_BEG,
144
+ PERCENT_UPPER_I: :tSYMBOLS_BEG,
145
+ PERCENT_UPPER_W: :tWORDS_BEG,
146
+ PERCENT_LOWER_X: :tXSTRING_BEG,
147
+ PLUS: :tPLUS,
148
+ PLUS_EQUAL: :tOP_ASGN,
149
+ PIPE_EQUAL: :tOP_ASGN,
150
+ PIPE: :tPIPE,
151
+ PIPE_PIPE: :tOROP,
152
+ PIPE_PIPE_EQUAL: :tOP_ASGN,
153
+ QUESTION_MARK: :tEH,
154
+ REGEXP_BEGIN: :tREGEXP_BEG,
155
+ REGEXP_END: :tSTRING_END,
156
+ SEMICOLON: :tSEMI,
157
+ SLASH: :tDIVIDE,
158
+ SLASH_EQUAL: :tOP_ASGN,
159
+ STAR: :tSTAR2,
160
+ STAR_EQUAL: :tOP_ASGN,
161
+ STAR_STAR: :tPOW,
162
+ STAR_STAR_EQUAL: :tOP_ASGN,
163
+ STRING_BEGIN: :tSTRING_BEG,
164
+ STRING_CONTENT: :tSTRING_CONTENT,
165
+ STRING_END: :tSTRING_END,
166
+ SYMBOL_BEGIN: :tSYMBEG,
167
+ TILDE: :tTILDE,
168
+ UAMPERSAND: :tAMPER,
169
+ UCOLON_COLON: :tCOLON3,
170
+ UDOT_DOT: :tDOT2,
171
+ UDOT_DOT_DOT: :tBDOT3,
172
+ UMINUS: :tUMINUS,
173
+ UMINUS_NUM: :tUNARY_NUM,
174
+ UPLUS: :tUPLUS,
175
+ USTAR: :tSTAR,
176
+ USTAR_STAR: :tPOW,
177
+ WORDS_SEP: :tSPACE
178
+ }
179
+
180
+ private_constant :TYPES
181
+
182
+ # The Parser::Source::Buffer that the tokens were lexed from.
183
+ attr_reader :source_buffer
184
+
185
+ # An array of prism tokens that we lexed.
186
+ attr_reader :lexed
187
+
188
+ # A hash that maps offsets in bytes to offsets in characters.
189
+ attr_reader :offset_cache
190
+
191
+ # Initialize the lexer with the given source buffer, prism tokens, and
192
+ # offset cache.
193
+ def initialize(source_buffer, lexed, offset_cache)
194
+ @source_buffer = source_buffer
195
+ @lexed = lexed
196
+ @offset_cache = offset_cache
197
+ end
198
+
199
+ Range = ::Parser::Source::Range # :nodoc:
200
+ private_constant :Range
201
+
202
+ # Convert the prism tokens into the expected format for the parser gem.
203
+ def to_a
204
+ tokens = []
205
+ index = 0
206
+
207
+ while index < lexed.length
208
+ token, = lexed[index]
209
+ index += 1
210
+ next if token.type == :IGNORED_NEWLINE || token.type == :EOF
211
+
212
+ type = TYPES.fetch(token.type)
213
+ value = token.value
214
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
215
+
216
+ case type
217
+ when :tCHARACTER
218
+ value.delete_prefix!("?")
219
+ when :tCOMMENT
220
+ if token.type == :EMBDOC_BEGIN
221
+ until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
222
+ value += next_token.value
223
+ index += 1
224
+ end
225
+
226
+ value += next_token.value
227
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
228
+ index += 1
229
+ else
230
+ value.chomp!
231
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
232
+ end
233
+ when :tNL
234
+ value = nil
235
+ when :tFLOAT
236
+ value = Float(value)
237
+ when :tIMAGINARY
238
+ value = parse_complex(value)
239
+ when :tINTEGER
240
+ if value.start_with?("+")
241
+ tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
242
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
243
+ end
244
+
245
+ value = Integer(value)
246
+ when :tLABEL
247
+ value.chomp!(":")
248
+ when :tLABEL_END
249
+ value.chomp!(":")
250
+ when :tNTH_REF
251
+ value = Integer(value.delete_prefix("$"))
252
+ when :tOP_ASGN
253
+ value.chomp!("=")
254
+ when :tRATIONAL
255
+ value = parse_rational(value)
256
+ when :tSPACE
257
+ value = nil
258
+ when :tSTRING_BEG
259
+ if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
260
+ next_location = token.location.join(next_token.location)
261
+ type = :tSTRING
262
+ value = ""
263
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
264
+ index += 1
265
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
266
+ next_location = token.location.join(next_next_token.location)
267
+ type = :tSTRING
268
+ value = next_token.value
269
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
270
+ index += 2
271
+ elsif value.start_with?("<<")
272
+ quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
273
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
274
+ end
275
+ when :tSTRING_DVAR
276
+ value = nil
277
+ when :tSTRING_END
278
+ if token.type == :REGEXP_END
279
+ value = value[0]
280
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
281
+ end
282
+ when :tSYMBEG
283
+ if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
284
+ next_location = token.location.join(next_token.location)
285
+ type = :tSYMBOL
286
+ value = next_token.value
287
+ value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
288
+ location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
289
+ index += 1
290
+ end
291
+ when :tFID
292
+ if tokens[-1][0] == :kDEF
293
+ type = :tIDENTIFIER
294
+ end
295
+ end
296
+
297
+ tokens << [type, [value, location]]
298
+
299
+ if token.type == :REGEXP_END
300
+ tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
301
+ end
302
+ end
303
+
304
+ tokens
305
+ end
306
+
307
+ private
308
+
309
+ # Parse a complex from the string representation.
310
+ def parse_complex(value)
311
+ value.chomp!("i")
312
+
313
+ if value.end_with?("r")
314
+ Complex(0, parse_rational(value))
315
+ elsif value.start_with?(/0[BbOoDdXx]/)
316
+ Complex(0, Integer(value))
317
+ else
318
+ Complex(0, value)
319
+ end
320
+ end
321
+
322
+ # Parse a rational from the string representation.
323
+ def parse_rational(value)
324
+ value.chomp!("r")
325
+
326
+ if value.start_with?(/0[BbOoDdXx]/)
327
+ Rational(Integer(value))
328
+ else
329
+ Rational(value)
330
+ end
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "rubocop"
5
+
6
+ require "prism"
7
+ require "prism/translation/parser"
8
+
9
+ module Prism
10
+ module Translation
11
+ class Parser
12
+ # This is the special version number that should be used in rubocop
13
+ # configuration files to trigger using prism.
14
+ VERSION_3_3 = 80_82_73_83_77.33
15
+
16
+ # This module gets prepended into RuboCop::AST::ProcessedSource.
17
+ module ProcessedSource
18
+ # Redefine parser_class so that we can inject the prism parser into the
19
+ # list of known parsers.
20
+ def parser_class(ruby_version)
21
+ if ruby_version == Prism::Translation::Parser::VERSION_3_3
22
+ require "prism/translation/parser"
23
+ Prism::Translation::Parser
24
+ else
25
+ super
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ # :stopdoc:
34
+ RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
35
+ known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
36
+ RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
37
+ RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+
5
+ module Prism
6
+ module Translation
7
+ # This class is the entry-point for converting a prism syntax tree into the
8
+ # whitequark/parser gem's syntax tree. It inherits from the base parser for
9
+ # the parser gem, and overrides the parse* methods to parse with prism and
10
+ # then translate.
11
+ class Parser < ::Parser::Base
12
+ # The parser gem has a list of diagnostics with a hard-coded set of error
13
+ # messages. We create our own diagnostic class in order to set our own
14
+ # error messages.
15
+ class Diagnostic < ::Parser::Diagnostic
16
+ # The message generated by prism.
17
+ attr_reader :message
18
+
19
+ # Initialize a new diagnostic with the given message and location.
20
+ def initialize(message, location)
21
+ @message = message
22
+ super(:error, :prism_error, {}, location, [])
23
+ end
24
+ end
25
+
26
+ Racc_debug_parser = false # :nodoc:
27
+
28
+ def version # :nodoc:
29
+ 33
30
+ end
31
+
32
+ # The default encoding for Ruby files is UTF-8.
33
+ def default_encoding
34
+ Encoding::UTF_8
35
+ end
36
+
37
+ def yyerror # :nodoc:
38
+ end
39
+
40
+ # Parses a source buffer and returns the AST.
41
+ def parse(source_buffer)
42
+ @source_buffer = source_buffer
43
+ source = source_buffer.source
44
+
45
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name))
46
+
47
+ build_ast(result.value, build_offset_cache(source))
48
+ ensure
49
+ @source_buffer = nil
50
+ end
51
+
52
+ # Parses a source buffer and returns the AST and the source code comments.
53
+ def parse_with_comments(source_buffer)
54
+ @source_buffer = source_buffer
55
+ source = source_buffer.source
56
+
57
+ offset_cache = build_offset_cache(source)
58
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name))
59
+
60
+ [
61
+ build_ast(result.value, offset_cache),
62
+ build_comments(result.comments, offset_cache)
63
+ ]
64
+ ensure
65
+ @source_buffer = nil
66
+ end
67
+
68
+ # Parses a source buffer and returns the AST, the source code comments,
69
+ # and the tokens emitted by the lexer.
70
+ def tokenize(source_buffer, _recover = false)
71
+ @source_buffer = source_buffer
72
+ source = source_buffer.source
73
+
74
+ offset_cache = build_offset_cache(source)
75
+ result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name))
76
+
77
+ program, tokens = result.value
78
+
79
+ [
80
+ build_ast(program, offset_cache),
81
+ build_comments(result.comments, offset_cache),
82
+ build_tokens(tokens, offset_cache)
83
+ ]
84
+ ensure
85
+ @source_buffer = nil
86
+ end
87
+
88
+ # Since prism resolves num params for us, we don't need to support this
89
+ # kind of logic here.
90
+ def try_declare_numparam(node)
91
+ node.children[0].match?(/\A_[1-9]\z/)
92
+ end
93
+
94
+ private
95
+
96
+ # If there was a error generated during the parse, then raise an
97
+ # appropriate syntax error. Otherwise return the result.
98
+ def unwrap(result)
99
+ return result if result.success?
100
+
101
+ error = result.errors.first
102
+ offset_cache = build_offset_cache(source_buffer.source)
103
+
104
+ diagnostic = Diagnostic.new(error.message, build_range(error.location, offset_cache))
105
+ raise ::Parser::SyntaxError, diagnostic
106
+ end
107
+
108
+ # Prism deals with offsets in bytes, while the parser gem deals with
109
+ # offsets in characters. We need to handle this conversion in order to
110
+ # build the parser gem AST.
111
+ #
112
+ # If the bytesize of the source is the same as the length, then we can
113
+ # just use the offset directly. Otherwise, we build a hash that functions
114
+ # as a cache for the conversion.
115
+ #
116
+ # This is a good opportunity for some optimizations. If the source file
117
+ # has any multi-byte characters, this can tank the performance of the
118
+ # translator. We could make this significantly faster by using a
119
+ # different data structure for the cache.
120
+ def build_offset_cache(source)
121
+ if source.bytesize == source.length
122
+ -> (offset) { offset }
123
+ else
124
+ Hash.new do |hash, offset|
125
+ hash[offset] = source.byteslice(0, offset).length
126
+ end
127
+ end
128
+ end
129
+
130
+ # Build the parser gem AST from the prism AST.
131
+ def build_ast(program, offset_cache)
132
+ program.accept(Compiler.new(self, offset_cache))
133
+ end
134
+
135
+ # Build the parser gem comments from the prism comments.
136
+ def build_comments(comments, offset_cache)
137
+ comments.map do |comment|
138
+ ::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
139
+ end
140
+ end
141
+
142
+ # Build the parser gem tokens from the prism tokens.
143
+ def build_tokens(tokens, offset_cache)
144
+ Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
145
+ end
146
+
147
+ # Build a range from a prism location.
148
+ def build_range(location, offset_cache)
149
+ ::Parser::Source::Range.new(
150
+ source_buffer,
151
+ offset_cache[location.start_offset],
152
+ offset_cache[location.end_offset]
153
+ )
154
+ end
155
+
156
+ require_relative "parser/compiler"
157
+ require_relative "parser/lexer"
158
+
159
+ private_constant :Compiler
160
+ private_constant :Lexer
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ # This module is responsible for converting the prism syntax tree into other
5
+ # syntax trees. At the moment it only supports converting to the
6
+ # whitequark/parser gem's syntax tree, but support is planned for the
7
+ # seattlerb/ruby_parser gem's syntax tree as well.
8
+ module Translation
9
+ autoload :Parser, "prism/translation/parser"
10
+ end
11
+ end
data/lib/prism.rb CHANGED
@@ -26,6 +26,7 @@ module Prism
26
26
  autoload :Pack, "prism/pack"
27
27
  autoload :Pattern, "prism/pattern"
28
28
  autoload :Serialize, "prism/serialize"
29
+ autoload :Translation, "prism/translation"
29
30
  autoload :Visitor, "prism/visitor"
30
31
 
31
32
  # Some of these constants are not meant to be exposed, so marking them as
data/prism.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "prism"
5
- spec.version = "0.19.0"
5
+ spec.version = "0.20.0"
6
6
  spec.authors = ["Shopify"]
7
7
  spec.email = ["ruby@shopify.com"]
8
8
 
@@ -22,8 +22,8 @@ Gem::Specification.new do |spec|
22
22
  "README.md",
23
23
  "config.yml",
24
24
  "docs/build_system.md",
25
- "docs/building.md",
26
25
  "docs/configuration.md",
26
+ "docs/cruby_compilation.md",
27
27
  "docs/design.md",
28
28
  "docs/encoding.md",
29
29
  "docs/fuzzing.md",
@@ -31,6 +31,8 @@ Gem::Specification.new do |spec|
31
31
  "docs/javascript.md",
32
32
  "docs/local_variable_depth.md",
33
33
  "docs/mapping.md",
34
+ "docs/parser_translation.md",
35
+ "docs/parsing_rules.md",
34
36
  "docs/releasing.md",
35
37
  "docs/ripper.md",
36
38
  "docs/ruby_api.md",
@@ -73,16 +75,21 @@ Gem::Specification.new do |spec|
73
75
  "lib/prism/ffi.rb",
74
76
  "lib/prism/lex_compat.rb",
75
77
  "lib/prism/mutation_compiler.rb",
76
- "lib/prism/node.rb",
77
78
  "lib/prism/node_ext.rb",
78
79
  "lib/prism/node_inspector.rb",
80
+ "lib/prism/node.rb",
79
81
  "lib/prism/pack.rb",
80
82
  "lib/prism/parse_result.rb",
83
+ "lib/prism/parse_result/comments.rb",
84
+ "lib/prism/parse_result/newlines.rb",
81
85
  "lib/prism/pattern.rb",
82
86
  "lib/prism/ripper_compat.rb",
83
87
  "lib/prism/serialize.rb",
84
- "lib/prism/parse_result/comments.rb",
85
- "lib/prism/parse_result/newlines.rb",
88
+ "lib/prism/translation.rb",
89
+ "lib/prism/translation/parser.rb",
90
+ "lib/prism/translation/parser/compiler.rb",
91
+ "lib/prism/translation/parser/lexer.rb",
92
+ "lib/prism/translation/parser/rubocop.rb",
86
93
  "lib/prism/visitor.rb",
87
94
  "src/diagnostic.c",
88
95
  "src/encoding.c",