prism 1.4.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +73 -1
- data/Makefile +7 -5
- data/README.md +3 -1
- data/config.yml +294 -41
- data/docs/build_system.md +2 -2
- data/docs/cruby_compilation.md +1 -1
- data/docs/design.md +2 -2
- data/docs/parser_translation.md +8 -23
- data/docs/releasing.md +6 -25
- data/docs/ripper_translation.md +1 -1
- data/ext/prism/api_node.c +9 -3
- data/ext/prism/extconf.rb +1 -1
- data/ext/prism/extension.c +24 -3
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +360 -70
- data/include/prism/diagnostic.h +7 -0
- data/include/prism/options.h +49 -3
- data/include/prism/parser.h +3 -0
- data/include/prism/regexp.h +2 -2
- data/include/prism/util/pm_buffer.h +8 -0
- data/include/prism/util/pm_integer.h +4 -0
- data/include/prism/util/pm_list.h +6 -0
- data/include/prism/util/pm_string.h +12 -2
- data/include/prism/version.h +2 -2
- data/include/prism.h +40 -15
- data/lib/prism/compiler.rb +456 -151
- data/lib/prism/desugar_compiler.rb +1 -0
- data/lib/prism/dispatcher.rb +16 -0
- data/lib/prism/dot_visitor.rb +10 -1
- data/lib/prism/dsl.rb +5 -2
- data/lib/prism/ffi.rb +28 -10
- data/lib/prism/inspect_visitor.rb +4 -0
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/mutation_compiler.rb +3 -0
- data/lib/prism/node.rb +559 -349
- data/lib/prism/node_ext.rb +4 -1
- data/lib/prism/pack.rb +2 -0
- data/lib/prism/parse_result/comments.rb +1 -0
- data/lib/prism/parse_result/errors.rb +1 -0
- data/lib/prism/parse_result/newlines.rb +1 -0
- data/lib/prism/parse_result.rb +3 -15
- data/lib/prism/pattern.rb +1 -0
- data/lib/prism/polyfill/scan_byte.rb +14 -0
- data/lib/prism/polyfill/warn.rb +36 -0
- data/lib/prism/reflection.rb +4 -1
- data/lib/prism/relocation.rb +1 -0
- data/lib/prism/serialize.rb +30 -22
- data/lib/prism/string_query.rb +1 -0
- data/lib/prism/translation/parser/builder.rb +1 -0
- data/lib/prism/translation/parser/compiler.rb +63 -41
- data/lib/prism/translation/parser/lexer.rb +29 -21
- data/lib/prism/translation/parser.rb +25 -4
- data/lib/prism/translation/parser33.rb +1 -0
- data/lib/prism/translation/parser34.rb +1 -0
- data/lib/prism/translation/parser35.rb +2 -6
- data/lib/prism/translation/parser40.rb +13 -0
- data/lib/prism/translation/parser41.rb +13 -0
- data/lib/prism/translation/parser_current.rb +26 -0
- data/lib/prism/translation/ripper/sexp.rb +1 -0
- data/lib/prism/translation/ripper.rb +19 -3
- data/lib/prism/translation/ruby_parser.rb +340 -22
- data/lib/prism/translation.rb +4 -0
- data/lib/prism/visitor.rb +457 -152
- data/lib/prism.rb +22 -0
- data/prism.gemspec +9 -1
- data/rbi/prism/dsl.rbi +6 -6
- data/rbi/prism/node.rbi +42 -17
- data/rbi/prism/translation/parser35.rbi +0 -2
- data/rbi/prism/translation/parser40.rbi +6 -0
- data/rbi/prism/translation/parser41.rbi +6 -0
- data/sig/prism/dispatcher.rbs +3 -0
- data/sig/prism/dsl.rbs +5 -5
- data/sig/prism/node.rbs +462 -38
- data/sig/prism/node_ext.rbs +84 -17
- data/sig/prism/parse_result/comments.rbs +38 -0
- data/sig/prism/parse_result.rbs +4 -0
- data/sig/prism/reflection.rbs +1 -1
- data/sig/prism.rbs +4 -0
- data/src/diagnostic.c +13 -1
- data/src/encoding.c +172 -67
- data/src/node.c +11 -0
- data/src/options.c +17 -7
- data/src/prettyprint.c +18 -0
- data/src/prism.c +1495 -2021
- data/src/serialize.c +9 -1
- data/src/token_type.c +38 -36
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_string.c +6 -8
- metadata +11 -3
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
# :markup: markdown
|
|
2
3
|
|
|
3
4
|
require "strscan"
|
|
4
5
|
require_relative "../../polyfill/append_as_bytes"
|
|
6
|
+
require_relative "../../polyfill/scan_byte"
|
|
5
7
|
|
|
6
8
|
module Prism
|
|
7
9
|
module Translation
|
|
@@ -200,8 +202,8 @@ module Prism
|
|
|
200
202
|
# The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
|
|
201
203
|
# The following token types are listed as those classified as `tLPAREN`.
|
|
202
204
|
LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
|
|
203
|
-
:kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
|
|
204
|
-
:tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
|
|
205
|
+
:kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
|
|
206
|
+
:tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY
|
|
205
207
|
])
|
|
206
208
|
|
|
207
209
|
# Types of tokens that are allowed to continue a method call with comments in-between.
|
|
@@ -275,20 +277,20 @@ module Prism
|
|
|
275
277
|
when :tCOMMENT
|
|
276
278
|
if token.type == :EMBDOC_BEGIN
|
|
277
279
|
|
|
278
|
-
while !((next_token = lexed[index]
|
|
280
|
+
while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
|
|
279
281
|
value += next_token.value
|
|
280
282
|
index += 1
|
|
281
283
|
end
|
|
282
284
|
|
|
283
285
|
value += next_token.value
|
|
284
|
-
location = range(token.location.start_offset,
|
|
286
|
+
location = range(token.location.start_offset, next_token.location.end_offset)
|
|
285
287
|
index += 1
|
|
286
288
|
else
|
|
287
289
|
is_at_eol = value.chomp!.nil?
|
|
288
290
|
location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
|
|
289
291
|
|
|
290
|
-
prev_token = lexed[index - 2]
|
|
291
|
-
next_token = lexed[index]
|
|
292
|
+
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
|
293
|
+
next_token, _ = lexed[index]
|
|
292
294
|
|
|
293
295
|
is_inline_comment = prev_token&.location&.start_line == token.location.start_line
|
|
294
296
|
if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
|
|
@@ -307,7 +309,7 @@ module Prism
|
|
|
307
309
|
end
|
|
308
310
|
end
|
|
309
311
|
when :tNL
|
|
310
|
-
next_token
|
|
312
|
+
next_token, _ = lexed[index]
|
|
311
313
|
# Newlines after comments are emitted out of order.
|
|
312
314
|
if next_token&.type == :COMMENT
|
|
313
315
|
comment_newline_location = location
|
|
@@ -344,8 +346,8 @@ module Prism
|
|
|
344
346
|
location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
|
|
345
347
|
value = nil
|
|
346
348
|
when :tSTRING_BEG
|
|
347
|
-
next_token = lexed[index]
|
|
348
|
-
next_next_token = lexed[index + 1]
|
|
349
|
+
next_token, _ = lexed[index]
|
|
350
|
+
next_next_token, _ = lexed[index + 1]
|
|
349
351
|
basic_quotes = value == '"' || value == "'"
|
|
350
352
|
|
|
351
353
|
if basic_quotes && next_token&.type == :STRING_END
|
|
@@ -413,7 +415,8 @@ module Prism
|
|
|
413
415
|
while token.type == :STRING_CONTENT
|
|
414
416
|
current_length += token.value.bytesize
|
|
415
417
|
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
|
|
416
|
-
|
|
418
|
+
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
|
419
|
+
is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
|
|
417
420
|
# The parser gem only removes indentation when the heredoc is not nested
|
|
418
421
|
not_nested = heredoc_stack.size == 1
|
|
419
422
|
if is_percent_array
|
|
@@ -423,11 +426,16 @@ module Prism
|
|
|
423
426
|
end
|
|
424
427
|
|
|
425
428
|
current_string << unescape_string(value, quote_stack.last)
|
|
426
|
-
|
|
429
|
+
relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I")
|
|
430
|
+
0 # the last backslash escapes the newline
|
|
431
|
+
else
|
|
432
|
+
token.value[/(\\{1,})\n/, 1]&.length || 0
|
|
433
|
+
end
|
|
434
|
+
if relevant_backslash_count.even? || !interpolation?(quote_stack.last)
|
|
427
435
|
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
|
|
428
436
|
break
|
|
429
437
|
end
|
|
430
|
-
token = lexed[index]
|
|
438
|
+
token, _ = lexed[index]
|
|
431
439
|
index += 1
|
|
432
440
|
end
|
|
433
441
|
else
|
|
@@ -482,7 +490,7 @@ module Prism
|
|
|
482
490
|
end
|
|
483
491
|
|
|
484
492
|
if percent_array?(quote_stack.pop)
|
|
485
|
-
prev_token = lexed[index - 2]
|
|
493
|
+
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
|
486
494
|
empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
|
|
487
495
|
ends_with_whitespace = prev_token&.type == :WORDS_SEP
|
|
488
496
|
# parser always emits a space token after content in a percent array, even if no actual whitespace is present.
|
|
@@ -491,7 +499,7 @@ module Prism
|
|
|
491
499
|
end
|
|
492
500
|
end
|
|
493
501
|
when :tSYMBEG
|
|
494
|
-
if (next_token = lexed[index]
|
|
502
|
+
if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
|
|
495
503
|
next_location = token.location.join(next_token.location)
|
|
496
504
|
type = :tSYMBOL
|
|
497
505
|
value = next_token.value
|
|
@@ -506,13 +514,13 @@ module Prism
|
|
|
506
514
|
type = :tIDENTIFIER
|
|
507
515
|
end
|
|
508
516
|
when :tXSTRING_BEG
|
|
509
|
-
if (next_token = lexed[index]
|
|
517
|
+
if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
|
|
510
518
|
# self.`()
|
|
511
519
|
type = :tBACK_REF2
|
|
512
520
|
end
|
|
513
521
|
quote_stack.push(value)
|
|
514
522
|
when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
|
|
515
|
-
if (next_token = lexed[index]
|
|
523
|
+
if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
|
|
516
524
|
index += 1
|
|
517
525
|
end
|
|
518
526
|
|
|
@@ -588,9 +596,9 @@ module Prism
|
|
|
588
596
|
previous_line = -1
|
|
589
597
|
result = Float::MAX
|
|
590
598
|
|
|
591
|
-
while (
|
|
599
|
+
while (next_token = lexed[next_token_index]&.first)
|
|
592
600
|
next_token_index += 1
|
|
593
|
-
next_next_token = lexed[next_token_index]
|
|
601
|
+
next_next_token, _ = lexed[next_token_index]
|
|
594
602
|
first_token_on_line = next_token.location.start_column == 0
|
|
595
603
|
|
|
596
604
|
# String content inside nested heredocs and interpolation is ignored
|
|
@@ -761,12 +769,12 @@ module Prism
|
|
|
761
769
|
elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
|
|
762
770
|
# \M-x where x is an ASCII printable character
|
|
763
771
|
escape_read(result, scanner, control, true)
|
|
764
|
-
elsif (byte = scanner.
|
|
772
|
+
elsif (byte = scanner.scan_byte)
|
|
765
773
|
# Something else after an escape.
|
|
766
|
-
if control && byte ==
|
|
774
|
+
if control && byte == 0x3f # ASCII '?'
|
|
767
775
|
result.append_as_bytes(escape_build(0x7f, false, meta))
|
|
768
776
|
else
|
|
769
|
-
result.append_as_bytes(escape_build(byte
|
|
777
|
+
result.append_as_bytes(escape_build(byte, control, meta))
|
|
770
778
|
end
|
|
771
779
|
end
|
|
772
780
|
end
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
# :markup: markdown
|
|
2
3
|
|
|
3
4
|
begin
|
|
5
|
+
required_version = ">= 3.3.7.2"
|
|
6
|
+
gem "parser", required_version
|
|
4
7
|
require "parser"
|
|
5
8
|
rescue LoadError
|
|
6
|
-
warn(
|
|
9
|
+
warn(<<~MSG)
|
|
10
|
+
Error: Unable to load parser #{required_version}. \
|
|
11
|
+
Add `gem "parser"` to your Gemfile or run `bundle update parser`.
|
|
12
|
+
MSG
|
|
7
13
|
exit(1)
|
|
8
14
|
end
|
|
9
15
|
|
|
@@ -13,6 +19,13 @@ module Prism
|
|
|
13
19
|
# whitequark/parser gem's syntax tree. It inherits from the base parser for
|
|
14
20
|
# the parser gem, and overrides the parse* methods to parse with prism and
|
|
15
21
|
# then translate.
|
|
22
|
+
#
|
|
23
|
+
# Note that this version of the parser always parses using the latest
|
|
24
|
+
# version of Ruby syntax supported by Prism. If you want specific version
|
|
25
|
+
# support, use one of the version-specific subclasses, such as
|
|
26
|
+
# `Prism::Translation::Parser34`. If you want to parse using the same
|
|
27
|
+
# version of Ruby syntax as the currently running version of Ruby, use
|
|
28
|
+
# `Prism::Translation::ParserCurrent`.
|
|
16
29
|
class Parser < ::Parser::Base
|
|
17
30
|
Diagnostic = ::Parser::Diagnostic # :nodoc:
|
|
18
31
|
private_constant :Diagnostic
|
|
@@ -59,13 +72,19 @@ module Prism
|
|
|
59
72
|
# should be implemented as needed.
|
|
60
73
|
#
|
|
61
74
|
def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism)
|
|
75
|
+
if !builder.is_a?(Prism::Translation::Parser::Builder)
|
|
76
|
+
warn(<<~MSG, uplevel: 1, category: :deprecated)
|
|
77
|
+
[deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \
|
|
78
|
+
`Prism::Translation::Parser::Builder` subclass. This will raise in the next major version.
|
|
79
|
+
MSG
|
|
80
|
+
end
|
|
62
81
|
@parser = parser
|
|
63
82
|
|
|
64
83
|
super(builder)
|
|
65
84
|
end
|
|
66
85
|
|
|
67
86
|
def version # :nodoc:
|
|
68
|
-
|
|
87
|
+
41
|
|
69
88
|
end
|
|
70
89
|
|
|
71
90
|
# The default encoding for Ruby files is UTF-8.
|
|
@@ -337,8 +356,10 @@ module Prism
|
|
|
337
356
|
"3.3.1"
|
|
338
357
|
when 34
|
|
339
358
|
"3.4.0"
|
|
340
|
-
when 35
|
|
341
|
-
"
|
|
359
|
+
when 35, 40
|
|
360
|
+
"4.0.0"
|
|
361
|
+
when 41
|
|
362
|
+
"4.1.0"
|
|
342
363
|
else
|
|
343
364
|
"latest"
|
|
344
365
|
end
|
|
@@ -1,12 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
# :markup: markdown
|
|
2
3
|
|
|
3
4
|
module Prism
|
|
4
5
|
module Translation
|
|
5
|
-
|
|
6
|
-
class Parser35 < Parser
|
|
7
|
-
def version # :nodoc:
|
|
8
|
-
35
|
|
9
|
-
end
|
|
10
|
-
end
|
|
6
|
+
Parser35 = Parser40 # :nodoc:
|
|
11
7
|
end
|
|
12
8
|
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# :markup: markdown
|
|
3
|
+
# typed: ignore
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
module Prism
|
|
7
|
+
module Translation
|
|
8
|
+
case RUBY_VERSION
|
|
9
|
+
when /^3\.3\./
|
|
10
|
+
ParserCurrent = Parser33
|
|
11
|
+
when /^3\.4\./
|
|
12
|
+
ParserCurrent = Parser34
|
|
13
|
+
when /^3\.5\./, /^4\.0\./
|
|
14
|
+
ParserCurrent = Parser40
|
|
15
|
+
when /^4\.1\./
|
|
16
|
+
ParserCurrent = Parser41
|
|
17
|
+
else
|
|
18
|
+
# Keep this in sync with released Ruby.
|
|
19
|
+
parser = Parser34
|
|
20
|
+
major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
|
|
21
|
+
warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \
|
|
22
|
+
"but you are running #{major}.#{minor}."
|
|
23
|
+
ParserCurrent = parser
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
# :markup: markdown
|
|
2
3
|
|
|
3
4
|
require "ripper"
|
|
4
5
|
|
|
@@ -70,7 +71,7 @@ module Prism
|
|
|
70
71
|
# [[1, 13], :on_kw, "end", END ]]
|
|
71
72
|
#
|
|
72
73
|
def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
|
|
73
|
-
result = Prism.lex_compat(src, filepath: filename, line: lineno)
|
|
74
|
+
result = Prism.lex_compat(src, filepath: filename, line: lineno, version: "current")
|
|
74
75
|
|
|
75
76
|
if result.failure? && raise_errors
|
|
76
77
|
raise SyntaxError, result.errors.first.message
|
|
@@ -1615,8 +1616,23 @@ module Prism
|
|
|
1615
1616
|
# defined?(a)
|
|
1616
1617
|
# ^^^^^^^^^^^
|
|
1617
1618
|
def visit_defined_node(node)
|
|
1619
|
+
expression = visit(node.value)
|
|
1620
|
+
|
|
1621
|
+
# Very weird circumstances here where something like:
|
|
1622
|
+
#
|
|
1623
|
+
# defined?
|
|
1624
|
+
# (1)
|
|
1625
|
+
#
|
|
1626
|
+
# gets parsed in Ruby as having only the `1` expression but in Ripper it
|
|
1627
|
+
# gets parsed as having a parentheses node. In this case we need to
|
|
1628
|
+
# synthesize that node to match Ripper's behavior.
|
|
1629
|
+
if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
|
|
1630
|
+
bounds(node.lparen_loc.join(node.rparen_loc))
|
|
1631
|
+
expression = on_paren(on_stmts_add(on_stmts_new, expression))
|
|
1632
|
+
end
|
|
1633
|
+
|
|
1618
1634
|
bounds(node.location)
|
|
1619
|
-
on_defined(
|
|
1635
|
+
on_defined(expression)
|
|
1620
1636
|
end
|
|
1621
1637
|
|
|
1622
1638
|
# if foo then bar else baz end
|
|
@@ -3279,7 +3295,7 @@ module Prism
|
|
|
3279
3295
|
|
|
3280
3296
|
# Lazily initialize the parse result.
|
|
3281
3297
|
def result
|
|
3282
|
-
@result ||= Prism.parse(source, partial_script: true)
|
|
3298
|
+
@result ||= Prism.parse(source, partial_script: true, version: "current")
|
|
3283
3299
|
end
|
|
3284
3300
|
|
|
3285
3301
|
##########################################################################
|