prism 1.4.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +73 -1
  3. data/Makefile +7 -5
  4. data/README.md +3 -1
  5. data/config.yml +294 -41
  6. data/docs/build_system.md +2 -2
  7. data/docs/cruby_compilation.md +1 -1
  8. data/docs/design.md +2 -2
  9. data/docs/parser_translation.md +8 -23
  10. data/docs/releasing.md +6 -25
  11. data/docs/ripper_translation.md +1 -1
  12. data/ext/prism/api_node.c +9 -3
  13. data/ext/prism/extconf.rb +1 -1
  14. data/ext/prism/extension.c +24 -3
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +360 -70
  17. data/include/prism/diagnostic.h +7 -0
  18. data/include/prism/options.h +49 -3
  19. data/include/prism/parser.h +3 -0
  20. data/include/prism/regexp.h +2 -2
  21. data/include/prism/util/pm_buffer.h +8 -0
  22. data/include/prism/util/pm_integer.h +4 -0
  23. data/include/prism/util/pm_list.h +6 -0
  24. data/include/prism/util/pm_string.h +12 -2
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +40 -15
  27. data/lib/prism/compiler.rb +456 -151
  28. data/lib/prism/desugar_compiler.rb +1 -0
  29. data/lib/prism/dispatcher.rb +16 -0
  30. data/lib/prism/dot_visitor.rb +10 -1
  31. data/lib/prism/dsl.rb +5 -2
  32. data/lib/prism/ffi.rb +28 -10
  33. data/lib/prism/inspect_visitor.rb +4 -0
  34. data/lib/prism/lex_compat.rb +1 -0
  35. data/lib/prism/mutation_compiler.rb +3 -0
  36. data/lib/prism/node.rb +559 -349
  37. data/lib/prism/node_ext.rb +4 -1
  38. data/lib/prism/pack.rb +2 -0
  39. data/lib/prism/parse_result/comments.rb +1 -0
  40. data/lib/prism/parse_result/errors.rb +1 -0
  41. data/lib/prism/parse_result/newlines.rb +1 -0
  42. data/lib/prism/parse_result.rb +3 -15
  43. data/lib/prism/pattern.rb +1 -0
  44. data/lib/prism/polyfill/scan_byte.rb +14 -0
  45. data/lib/prism/polyfill/warn.rb +36 -0
  46. data/lib/prism/reflection.rb +4 -1
  47. data/lib/prism/relocation.rb +1 -0
  48. data/lib/prism/serialize.rb +30 -22
  49. data/lib/prism/string_query.rb +1 -0
  50. data/lib/prism/translation/parser/builder.rb +1 -0
  51. data/lib/prism/translation/parser/compiler.rb +63 -41
  52. data/lib/prism/translation/parser/lexer.rb +29 -21
  53. data/lib/prism/translation/parser.rb +25 -4
  54. data/lib/prism/translation/parser33.rb +1 -0
  55. data/lib/prism/translation/parser34.rb +1 -0
  56. data/lib/prism/translation/parser35.rb +2 -6
  57. data/lib/prism/translation/parser40.rb +13 -0
  58. data/lib/prism/translation/parser41.rb +13 -0
  59. data/lib/prism/translation/parser_current.rb +26 -0
  60. data/lib/prism/translation/ripper/sexp.rb +1 -0
  61. data/lib/prism/translation/ripper.rb +19 -3
  62. data/lib/prism/translation/ruby_parser.rb +340 -22
  63. data/lib/prism/translation.rb +4 -0
  64. data/lib/prism/visitor.rb +457 -152
  65. data/lib/prism.rb +22 -0
  66. data/prism.gemspec +9 -1
  67. data/rbi/prism/dsl.rbi +6 -6
  68. data/rbi/prism/node.rbi +42 -17
  69. data/rbi/prism/translation/parser35.rbi +0 -2
  70. data/rbi/prism/translation/parser40.rbi +6 -0
  71. data/rbi/prism/translation/parser41.rbi +6 -0
  72. data/sig/prism/dispatcher.rbs +3 -0
  73. data/sig/prism/dsl.rbs +5 -5
  74. data/sig/prism/node.rbs +462 -38
  75. data/sig/prism/node_ext.rbs +84 -17
  76. data/sig/prism/parse_result/comments.rbs +38 -0
  77. data/sig/prism/parse_result.rbs +4 -0
  78. data/sig/prism/reflection.rbs +1 -1
  79. data/sig/prism.rbs +4 -0
  80. data/src/diagnostic.c +13 -1
  81. data/src/encoding.c +172 -67
  82. data/src/node.c +11 -0
  83. data/src/options.c +17 -7
  84. data/src/prettyprint.c +18 -0
  85. data/src/prism.c +1495 -2021
  86. data/src/serialize.c +9 -1
  87. data/src/token_type.c +38 -36
  88. data/src/util/pm_constant_pool.c +1 -1
  89. data/src/util/pm_string.c +6 -8
  90. metadata +11 -3
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  require "strscan"
4
5
  require_relative "../../polyfill/append_as_bytes"
6
+ require_relative "../../polyfill/scan_byte"
5
7
 
6
8
  module Prism
7
9
  module Translation
@@ -200,8 +202,8 @@ module Prism
200
202
  # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
201
203
  # The following token types are listed as those classified as `tLPAREN`.
202
204
  LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
203
- :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
204
- :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
205
+ :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
206
+ :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY
205
207
  ])
206
208
 
207
209
  # Types of tokens that are allowed to continue a method call with comments in-between.
@@ -275,20 +277,20 @@ module Prism
275
277
  when :tCOMMENT
276
278
  if token.type == :EMBDOC_BEGIN
277
279
 
278
- while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
280
+ while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
279
281
  value += next_token.value
280
282
  index += 1
281
283
  end
282
284
 
283
285
  value += next_token.value
284
- location = range(token.location.start_offset, lexed[index][0].location.end_offset)
286
+ location = range(token.location.start_offset, next_token.location.end_offset)
285
287
  index += 1
286
288
  else
287
289
  is_at_eol = value.chomp!.nil?
288
290
  location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
289
291
 
290
- prev_token = lexed[index - 2][0] if index - 2 >= 0
291
- next_token = lexed[index][0]
292
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
293
+ next_token, _ = lexed[index]
292
294
 
293
295
  is_inline_comment = prev_token&.location&.start_line == token.location.start_line
294
296
  if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
@@ -307,7 +309,7 @@ module Prism
307
309
  end
308
310
  end
309
311
  when :tNL
310
- next_token = next_token = lexed[index][0]
312
+ next_token, _ = lexed[index]
311
313
  # Newlines after comments are emitted out of order.
312
314
  if next_token&.type == :COMMENT
313
315
  comment_newline_location = location
@@ -344,8 +346,8 @@ module Prism
344
346
  location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
345
347
  value = nil
346
348
  when :tSTRING_BEG
347
- next_token = lexed[index][0]
348
- next_next_token = lexed[index + 1][0]
349
+ next_token, _ = lexed[index]
350
+ next_next_token, _ = lexed[index + 1]
349
351
  basic_quotes = value == '"' || value == "'"
350
352
 
351
353
  if basic_quotes && next_token&.type == :STRING_END
@@ -413,7 +415,8 @@ module Prism
413
415
  while token.type == :STRING_CONTENT
414
416
  current_length += token.value.bytesize
415
417
  # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
416
- is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
418
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
419
+ is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
417
420
  # The parser gem only removes indentation when the heredoc is not nested
418
421
  not_nested = heredoc_stack.size == 1
419
422
  if is_percent_array
@@ -423,11 +426,16 @@ module Prism
423
426
  end
424
427
 
425
428
  current_string << unescape_string(value, quote_stack.last)
426
- if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last)
429
+ relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I")
430
+ 0 # the last backslash escapes the newline
431
+ else
432
+ token.value[/(\\{1,})\n/, 1]&.length || 0
433
+ end
434
+ if relevant_backslash_count.even? || !interpolation?(quote_stack.last)
427
435
  tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
428
436
  break
429
437
  end
430
- token = lexed[index][0]
438
+ token, _ = lexed[index]
431
439
  index += 1
432
440
  end
433
441
  else
@@ -482,7 +490,7 @@ module Prism
482
490
  end
483
491
 
484
492
  if percent_array?(quote_stack.pop)
485
- prev_token = lexed[index - 2][0] if index - 2 >= 0
493
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
486
494
  empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
487
495
  ends_with_whitespace = prev_token&.type == :WORDS_SEP
488
496
  # parser always emits a space token after content in a percent array, even if no actual whitespace is present.
@@ -491,7 +499,7 @@ module Prism
491
499
  end
492
500
  end
493
501
  when :tSYMBEG
494
- if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
502
+ if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
495
503
  next_location = token.location.join(next_token.location)
496
504
  type = :tSYMBOL
497
505
  value = next_token.value
@@ -506,13 +514,13 @@ module Prism
506
514
  type = :tIDENTIFIER
507
515
  end
508
516
  when :tXSTRING_BEG
509
- if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
517
+ if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
510
518
  # self.`()
511
519
  type = :tBACK_REF2
512
520
  end
513
521
  quote_stack.push(value)
514
522
  when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
515
- if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP
523
+ if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
516
524
  index += 1
517
525
  end
518
526
 
@@ -588,9 +596,9 @@ module Prism
588
596
  previous_line = -1
589
597
  result = Float::MAX
590
598
 
591
- while (lexed[next_token_index] && next_token = lexed[next_token_index][0])
599
+ while (next_token = lexed[next_token_index]&.first)
592
600
  next_token_index += 1
593
- next_next_token = lexed[next_token_index] && lexed[next_token_index][0]
601
+ next_next_token, _ = lexed[next_token_index]
594
602
  first_token_on_line = next_token.location.start_column == 0
595
603
 
596
604
  # String content inside nested heredocs and interpolation is ignored
@@ -761,12 +769,12 @@ module Prism
761
769
  elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
762
770
  # \M-x where x is an ASCII printable character
763
771
  escape_read(result, scanner, control, true)
764
- elsif (byte = scanner.get_byte)
772
+ elsif (byte = scanner.scan_byte)
765
773
  # Something else after an escape.
766
- if control && byte == "?"
774
+ if control && byte == 0x3f # ASCII '?'
767
775
  result.append_as_bytes(escape_build(0x7f, false, meta))
768
776
  else
769
- result.append_as_bytes(escape_build(byte.ord, control, meta))
777
+ result.append_as_bytes(escape_build(byte, control, meta))
770
778
  end
771
779
  end
772
780
  end
@@ -1,9 +1,15 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  begin
5
+ required_version = ">= 3.3.7.2"
6
+ gem "parser", required_version
4
7
  require "parser"
5
8
  rescue LoadError
6
- warn(%q{Error: Unable to load parser. Add `gem "parser"` to your Gemfile.})
9
+ warn(<<~MSG)
10
+ Error: Unable to load parser #{required_version}. \
11
+ Add `gem "parser"` to your Gemfile or run `bundle update parser`.
12
+ MSG
7
13
  exit(1)
8
14
  end
9
15
 
@@ -13,6 +19,13 @@ module Prism
13
19
  # whitequark/parser gem's syntax tree. It inherits from the base parser for
14
20
  # the parser gem, and overrides the parse* methods to parse with prism and
15
21
  # then translate.
22
+ #
23
+ # Note that this version of the parser always parses using the latest
24
+ # version of Ruby syntax supported by Prism. If you want specific version
25
+ # support, use one of the version-specific subclasses, such as
26
+ # `Prism::Translation::Parser34`. If you want to parse using the same
27
+ # version of Ruby syntax as the currently running version of Ruby, use
28
+ # `Prism::Translation::ParserCurrent`.
16
29
  class Parser < ::Parser::Base
17
30
  Diagnostic = ::Parser::Diagnostic # :nodoc:
18
31
  private_constant :Diagnostic
@@ -59,13 +72,19 @@ module Prism
59
72
  # should be implemented as needed.
60
73
  #
61
74
  def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism)
75
+ if !builder.is_a?(Prism::Translation::Parser::Builder)
76
+ warn(<<~MSG, uplevel: 1, category: :deprecated)
77
+ [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \
78
+ `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version.
79
+ MSG
80
+ end
62
81
  @parser = parser
63
82
 
64
83
  super(builder)
65
84
  end
66
85
 
67
86
  def version # :nodoc:
68
- 34
87
+ 41
69
88
  end
70
89
 
71
90
  # The default encoding for Ruby files is UTF-8.
@@ -337,8 +356,10 @@ module Prism
337
356
  "3.3.1"
338
357
  when 34
339
358
  "3.4.0"
340
- when 35
341
- "3.5.0"
359
+ when 35, 40
360
+ "4.0.0"
361
+ when 41
362
+ "4.1.0"
342
363
  else
343
364
  "latest"
344
365
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  module Prism
4
5
  module Translation
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  module Prism
4
5
  module Translation
@@ -1,12 +1,8 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  module Prism
4
5
  module Translation
5
- # This class is the entry-point for Ruby 3.5 of `Prism::Translation::Parser`.
6
- class Parser35 < Parser
7
- def version # :nodoc:
8
- 35
9
- end
10
- end
6
+ Parser35 = Parser40 # :nodoc:
11
7
  end
12
8
  end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+ # :markup: markdown
3
+
4
+ module Prism
5
+ module Translation
6
+ # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`.
7
+ class Parser40 < Parser
8
+ def version # :nodoc:
9
+ 40
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+ # :markup: markdown
3
+
4
+ module Prism
5
+ module Translation
6
+ # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`.
7
+ class Parser41 < Parser
8
+ def version # :nodoc:
9
+ 41
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+ # :markup: markdown
3
+ # typed: ignore
4
+
5
+ #
6
+ module Prism
7
+ module Translation
8
+ case RUBY_VERSION
9
+ when /^3\.3\./
10
+ ParserCurrent = Parser33
11
+ when /^3\.4\./
12
+ ParserCurrent = Parser34
13
+ when /^3\.5\./, /^4\.0\./
14
+ ParserCurrent = Parser40
15
+ when /^4\.1\./
16
+ ParserCurrent = Parser41
17
+ else
18
+ # Keep this in sync with released Ruby.
19
+ parser = Parser34
20
+ major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
21
+ warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \
22
+ "but you are running #{major}.#{minor}."
23
+ ParserCurrent = parser
24
+ end
25
+ end
26
+ end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  require_relative "../ripper"
4
5
 
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  require "ripper"
4
5
 
@@ -70,7 +71,7 @@ module Prism
70
71
  # [[1, 13], :on_kw, "end", END ]]
71
72
  #
72
73
  def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
73
- result = Prism.lex_compat(src, filepath: filename, line: lineno)
74
+ result = Prism.lex_compat(src, filepath: filename, line: lineno, version: "current")
74
75
 
75
76
  if result.failure? && raise_errors
76
77
  raise SyntaxError, result.errors.first.message
@@ -1615,8 +1616,23 @@ module Prism
1615
1616
  # defined?(a)
1616
1617
  # ^^^^^^^^^^^
1617
1618
  def visit_defined_node(node)
1619
+ expression = visit(node.value)
1620
+
1621
+ # Very weird circumstances here where something like:
1622
+ #
1623
+ # defined?
1624
+ # (1)
1625
+ #
1626
+ # gets parsed in Ruby as having only the `1` expression but in Ripper it
1627
+ # gets parsed as having a parentheses node. In this case we need to
1628
+ # synthesize that node to match Ripper's behavior.
1629
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
1630
+ bounds(node.lparen_loc.join(node.rparen_loc))
1631
+ expression = on_paren(on_stmts_add(on_stmts_new, expression))
1632
+ end
1633
+
1618
1634
  bounds(node.location)
1619
- on_defined(visit(node.value))
1635
+ on_defined(expression)
1620
1636
  end
1621
1637
 
1622
1638
  # if foo then bar else baz end
@@ -3279,7 +3295,7 @@ module Prism
3279
3295
 
3280
3296
  # Lazily initialize the parse result.
3281
3297
  def result
3282
- @result ||= Prism.parse(source, partial_script: true)
3298
+ @result ||= Prism.parse(source, partial_script: true, version: "current")
3283
3299
  end
3284
3300
 
3285
3301
  ##########################################################################