prism 0.24.0 → 0.26.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +69 -1
  4. data/Makefile +22 -16
  5. data/README.md +45 -6
  6. data/config.yml +510 -4
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +3 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +7 -9
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +911 -815
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +34 -13
  18. data/ext/prism/extension.c +341 -68
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +213 -64
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +146 -72
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +82 -7
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +203 -54
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +118 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +8 -0
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +4 -2
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +87 -16
  43. data/lib/prism/dsl.rb +24 -12
  44. data/lib/prism/ffi.rb +77 -12
  45. data/lib/prism/lex_compat.rb +17 -15
  46. data/lib/prism/mutation_compiler.rb +11 -0
  47. data/lib/prism/node.rb +2112 -2499
  48. data/lib/prism/node_ext.rb +77 -29
  49. data/lib/prism/pack.rb +4 -0
  50. data/lib/prism/parse_result/comments.rb +34 -17
  51. data/lib/prism/parse_result/newlines.rb +3 -1
  52. data/lib/prism/parse_result.rb +83 -32
  53. data/lib/prism/pattern.rb +16 -4
  54. data/lib/prism/polyfill/string.rb +12 -0
  55. data/lib/prism/reflection.rb +421 -0
  56. data/lib/prism/serialize.rb +450 -102
  57. data/lib/prism/translation/parser/compiler.rb +189 -50
  58. data/lib/prism/translation/parser/lexer.rb +103 -22
  59. data/lib/prism/translation/parser/rubocop.rb +41 -13
  60. data/lib/prism/translation/parser.rb +119 -7
  61. data/lib/prism/translation/parser33.rb +1 -1
  62. data/lib/prism/translation/parser34.rb +1 -1
  63. data/lib/prism/translation/ripper/sexp.rb +125 -0
  64. data/lib/prism/translation/ripper/shim.rb +5 -0
  65. data/lib/prism/translation/ripper.rb +3212 -462
  66. data/lib/prism/translation/ruby_parser.rb +35 -18
  67. data/lib/prism/translation.rb +3 -1
  68. data/lib/prism/visitor.rb +10 -0
  69. data/lib/prism.rb +9 -18
  70. data/prism.gemspec +39 -6
  71. data/rbi/prism/compiler.rbi +14 -0
  72. data/rbi/prism/desugar_compiler.rbi +5 -0
  73. data/rbi/prism/mutation_compiler.rbi +5 -0
  74. data/rbi/prism/node.rbi +8674 -0
  75. data/rbi/prism/node_ext.rbi +102 -0
  76. data/rbi/prism/parse_result.rbi +307 -0
  77. data/rbi/prism/reflection.rbi +64 -0
  78. data/rbi/prism/translation/parser/compiler.rbi +13 -0
  79. data/rbi/prism/translation/parser.rbi +11 -0
  80. data/rbi/prism/translation/parser33.rbi +6 -0
  81. data/rbi/prism/translation/parser34.rbi +6 -0
  82. data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
  83. data/rbi/prism/translation/ripper.rbi +25 -0
  84. data/rbi/prism/translation/ruby_parser.rbi +11 -0
  85. data/rbi/prism/visitor.rbi +470 -0
  86. data/rbi/prism.rbi +38 -7748
  87. data/sig/prism/compiler.rbs +9 -0
  88. data/sig/prism/dispatcher.rbs +16 -0
  89. data/sig/prism/dot_visitor.rbs +6 -0
  90. data/sig/prism/dsl.rbs +462 -0
  91. data/sig/prism/mutation_compiler.rbs +158 -0
  92. data/sig/prism/node.rbs +3538 -0
  93. data/sig/prism/node_ext.rbs +78 -0
  94. data/sig/prism/pack.rbs +43 -0
  95. data/sig/prism/parse_result.rbs +128 -0
  96. data/sig/prism/pattern.rbs +13 -0
  97. data/sig/prism/reflection.rbs +56 -0
  98. data/sig/prism/serialize.rbs +7 -0
  99. data/sig/prism/visitor.rbs +168 -0
  100. data/sig/prism.rbs +188 -4767
  101. data/src/diagnostic.c +597 -230
  102. data/src/encoding.c +211 -108
  103. data/src/node.c +7526 -447
  104. data/src/options.c +66 -31
  105. data/src/pack.c +33 -17
  106. data/src/prettyprint.c +1294 -1385
  107. data/src/prism.c +4015 -1149
  108. data/src/regexp.c +17 -2
  109. data/src/serialize.c +47 -28
  110. data/src/static_literals.c +552 -0
  111. data/src/token_type.c +4 -3
  112. data/src/util/pm_buffer.c +147 -20
  113. data/src/util/pm_char.c +4 -4
  114. data/src/util/pm_constant_pool.c +35 -11
  115. data/src/util/pm_integer.c +635 -0
  116. data/src/util/pm_list.c +1 -1
  117. data/src/util/pm_newline_list.c +14 -5
  118. data/src/util/pm_string.c +134 -5
  119. data/src/util/pm_string_list.c +2 -2
  120. metadata +41 -8
  121. data/docs/ripper.md +0 -36
  122. data/include/prism/util/pm_state_stack.h +0 -42
  123. data/rbi/prism_static.rbi +0 -207
  124. data/sig/prism_static.rbs +0 -201
  125. data/src/util/pm_state_stack.c +0 -25
@@ -116,7 +116,14 @@ module Prism
116
116
  builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
117
117
  end
118
118
  elsif node.value.is_a?(ImplicitNode)
119
- builder.pair_label([node.key.unescaped, srange(node.key.location)])
119
+ if (value = node.value.value).is_a?(LocalVariableReadNode)
120
+ builder.pair_keyword(
121
+ [node.key.unescaped, srange(node.key)],
122
+ builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar)
123
+ )
124
+ else
125
+ builder.pair_label([node.key.unescaped, srange(node.key.location)])
126
+ end
120
127
  elsif node.operator_loc
121
128
  builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
122
129
  elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
@@ -247,18 +254,30 @@ module Prism
247
254
 
248
255
  if node.call_operator_loc.nil?
249
256
  case name
257
+ when :-@
258
+ case (receiver = node.receiver).type
259
+ when :integer_node, :float_node, :rational_node, :imaginary_node
260
+ return visit(numeric_negate(node.message_loc, receiver))
261
+ end
250
262
  when :!
251
263
  return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
264
+ when :=~
265
+ if (receiver = node.receiver).is_a?(RegularExpressionNode)
266
+ return builder.match_op(visit(receiver), token(node.message_loc), visit(node.arguments.arguments.first))
267
+ end
252
268
  when :[]
253
269
  return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
254
270
  when :[]=
255
271
  if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
272
+ arguments = node.arguments.arguments[...-1]
273
+ arguments << node.block if node.block
274
+
256
275
  return visit_block(
257
276
  builder.assign(
258
277
  builder.index_asgn(
259
278
  visit(node.receiver),
260
279
  token(node.opening_loc),
261
- visit_all(node.arguments.arguments[...-1]),
280
+ visit_all(arguments),
262
281
  token(node.closing_loc),
263
282
  ),
264
283
  srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
@@ -387,9 +406,6 @@ module Prism
387
406
 
388
407
  # @@foo = 1
389
408
  # ^^^^^^^^^
390
- #
391
- # @@foo, @@bar = 1
392
- # ^^^^^ ^^^^^
393
409
  def visit_class_variable_write_node(node)
394
410
  builder.assign(
395
411
  builder.assignable(builder.cvar(token(node.name_loc))),
@@ -682,9 +698,6 @@ module Prism
682
698
 
683
699
  # $foo = 1
684
700
  # ^^^^^^^^
685
- #
686
- # $foo, $bar = 1
687
- # ^^^^ ^^^^
688
701
  def visit_global_variable_write_node(node)
689
702
  builder.assign(
690
703
  builder.assignable(builder.gvar(token(node.name_loc))),
@@ -788,8 +801,9 @@ module Prism
788
801
  end
789
802
 
790
803
  # 1i
804
+ # ^^
791
805
  def visit_imaginary_node(node)
792
- visit_numeric(node, builder.complex([node.value, srange(node.location)]))
806
+ visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)]))
793
807
  end
794
808
 
795
809
  # { foo: }
@@ -875,9 +889,6 @@ module Prism
875
889
 
876
890
  # @foo = 1
877
891
  # ^^^^^^^^
878
- #
879
- # @foo, @bar = 1
880
- # ^^^^ ^^^^
881
892
  def visit_instance_variable_write_node(node)
882
893
  builder.assign(
883
894
  builder.assignable(builder.ivar(token(node.name_loc))),
@@ -934,16 +945,64 @@ module Prism
934
945
  # "foo #{bar}"
935
946
  # ^^^^^^^^^^^^
936
947
  def visit_interpolated_string_node(node)
937
- if node.opening&.start_with?("<<")
948
+ if node.heredoc?
938
949
  children, closing = visit_heredoc(node)
939
- builder.string_compose(token(node.opening_loc), children, closing)
950
+ opening = token(node.opening_loc)
951
+
952
+ start_offset = node.opening_loc.end_offset + 1
953
+ end_offset = node.parts.first.location.start_offset
954
+
955
+ # In the below case, the offsets should be the same:
956
+ #
957
+ # <<~HEREDOC
958
+ # a #{b}
959
+ # HEREDOC
960
+ #
961
+ # But in this case, the end_offset would be greater than the start_offset:
962
+ #
963
+ # <<~HEREDOC
964
+ # #{b}
965
+ # HEREDOC
966
+ #
967
+ # So we need to make sure the result node's heredoc range is correct, without updating the children
968
+ result = if start_offset < end_offset
969
+ # We need to add a padding string to ensure that the heredoc has correct range for its body
970
+ padding_string_node = builder.string_internal(["", srange_offsets(start_offset, end_offset)])
971
+ node_with_correct_location = builder.string_compose(opening, [padding_string_node, *children], closing)
972
+ # But the padding string should not be included in the final AST, so we need to update the result's children
973
+ node_with_correct_location.updated(:dstr, children)
974
+ else
975
+ builder.string_compose(opening, children, closing)
976
+ end
977
+
978
+ return result
979
+ end
980
+
981
+ parts = if node.parts.one? { |part| part.type == :string_node }
982
+ node.parts.flat_map do |node|
983
+ if node.type == :string_node && node.unescaped.lines.count >= 2
984
+ start_offset = node.content_loc.start_offset
985
+
986
+ node.unescaped.lines.map do |line|
987
+ end_offset = start_offset + line.length
988
+ offsets = srange_offsets(start_offset, end_offset)
989
+ start_offset = end_offset
990
+
991
+ builder.string_internal([line, offsets])
992
+ end
993
+ else
994
+ visit(node)
995
+ end
996
+ end
940
997
  else
941
- builder.string_compose(
942
- token(node.opening_loc),
943
- visit_all(node.parts),
944
- token(node.closing_loc)
945
- )
998
+ visit_all(node.parts)
946
999
  end
1000
+
1001
+ builder.string_compose(
1002
+ token(node.opening_loc),
1003
+ parts,
1004
+ token(node.closing_loc)
1005
+ )
947
1006
  end
948
1007
 
949
1008
  # :"foo #{bar}"
@@ -959,7 +1018,7 @@ module Prism
959
1018
  # `foo #{bar}`
960
1019
  # ^^^^^^^^^^^^
961
1020
  def visit_interpolated_x_string_node(node)
962
- if node.opening.start_with?("<<")
1021
+ if node.heredoc?
963
1022
  children, closing = visit_heredoc(node)
964
1023
  builder.xstring_compose(token(node.opening_loc), children, closing)
965
1024
  else
@@ -990,6 +1049,7 @@ module Prism
990
1049
  end
991
1050
 
992
1051
  # -> {}
1052
+ # ^^^^^
993
1053
  def visit_lambda_node(node)
994
1054
  parameters = node.parameters
995
1055
 
@@ -1021,9 +1081,6 @@ module Prism
1021
1081
 
1022
1082
  # foo = 1
1023
1083
  # ^^^^^^^
1024
- #
1025
- # foo, bar = 1
1026
- # ^^^ ^^^
1027
1084
  def visit_local_variable_write_node(node)
1028
1085
  builder.assign(
1029
1086
  builder.assignable(builder.ident(token(node.name_loc))),
@@ -1062,22 +1119,12 @@ module Prism
1062
1119
 
1063
1120
  # foo in bar
1064
1121
  # ^^^^^^^^^^
1065
- if RUBY_VERSION >= "3.0"
1066
- def visit_match_predicate_node(node)
1067
- builder.match_pattern_p(
1068
- visit(node.value),
1069
- token(node.operator_loc),
1070
- within_pattern { |compiler| node.pattern.accept(compiler) }
1071
- )
1072
- end
1073
- else
1074
- def visit_match_predicate_node(node)
1075
- builder.match_pattern(
1076
- visit(node.value),
1077
- token(node.operator_loc),
1078
- within_pattern { |compiler| node.pattern.accept(compiler) }
1079
- )
1080
- end
1122
+ def visit_match_predicate_node(node)
1123
+ builder.match_pattern_p(
1124
+ visit(node.value),
1125
+ token(node.operator_loc),
1126
+ within_pattern { |compiler| node.pattern.accept(compiler) }
1127
+ )
1081
1128
  end
1082
1129
 
1083
1130
  # foo => bar
@@ -1263,7 +1310,8 @@ module Prism
1263
1310
  # foo => ^(bar)
1264
1311
  # ^^^^^^
1265
1312
  def visit_pinned_expression_node(node)
1266
- builder.pin(token(node.operator_loc), visit(node.expression))
1313
+ expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
1314
+ builder.pin(token(node.operator_loc), expression)
1267
1315
  end
1268
1316
 
1269
1317
  # foo = 1 and bar => ^foo
@@ -1322,7 +1370,7 @@ module Prism
1322
1370
  # 1r
1323
1371
  # ^^
1324
1372
  def visit_rational_node(node)
1325
- visit_numeric(node, builder.rational([node.value, srange(node.location)]))
1373
+ visit_numeric(node, builder.rational([rational_value(node), srange(node.location)]))
1326
1374
  end
1327
1375
 
1328
1376
  # redo
@@ -1418,6 +1466,11 @@ module Prism
1418
1466
  builder.self(token(node.location))
1419
1467
  end
1420
1468
 
1469
+ # A shareable constant.
1470
+ def visit_shareable_constant_node(node)
1471
+ visit(node.write)
1472
+ end
1473
+
1421
1474
  # class << self; end
1422
1475
  # ^^^^^^^^^^^^^^^^^^
1423
1476
  def visit_singleton_class_node(node)
@@ -1476,15 +1529,39 @@ module Prism
1476
1529
  # "foo"
1477
1530
  # ^^^^^
1478
1531
  def visit_string_node(node)
1479
- if node.opening&.start_with?("<<")
1480
- children, closing = visit_heredoc(InterpolatedStringNode.new(node.send(:source), node.opening_loc, [node.copy(opening_loc: nil, closing_loc: nil, location: node.content_loc)], node.closing_loc, node.location))
1532
+ if node.heredoc?
1533
+ children, closing = visit_heredoc(node.to_interpolated)
1481
1534
  builder.string_compose(token(node.opening_loc), children, closing)
1482
1535
  elsif node.opening == "?"
1483
1536
  builder.character([node.unescaped, srange(node.location)])
1484
1537
  else
1538
+ content_lines = node.content.lines
1539
+ unescaped_lines = node.unescaped.lines
1540
+
1541
+ parts =
1542
+ if content_lines.length <= 1 || unescaped_lines.length <= 1
1543
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1544
+ elsif content_lines.length != unescaped_lines.length
1545
+ # This occurs when we have line continuations in the string. We
1546
+ # need to come back and fix this, but for now this stops the
1547
+ # code from breaking when we encounter it because of trying to
1548
+ # transpose arrays of different lengths.
1549
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1550
+ else
1551
+ start_offset = node.content_loc.start_offset
1552
+
1553
+ [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
1554
+ end_offset = start_offset + content_line.length
1555
+ offsets = srange_offsets(start_offset, end_offset)
1556
+ start_offset = end_offset
1557
+
1558
+ builder.string_internal([unescaped_line, offsets])
1559
+ end
1560
+ end
1561
+
1485
1562
  builder.string_compose(
1486
1563
  token(node.opening_loc),
1487
- [builder.string_internal([node.unescaped, srange(node.content_loc)])],
1564
+ parts,
1488
1565
  token(node.closing_loc)
1489
1566
  )
1490
1567
  end
@@ -1523,9 +1600,23 @@ module Prism
1523
1600
  builder.symbol([node.unescaped, srange(node.location)])
1524
1601
  end
1525
1602
  else
1603
+ parts = if node.value.lines.one?
1604
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1605
+ else
1606
+ start_offset = node.value_loc.start_offset
1607
+
1608
+ node.value.lines.map do |line|
1609
+ end_offset = start_offset + line.length
1610
+ offsets = srange_offsets(start_offset, end_offset)
1611
+ start_offset = end_offset
1612
+
1613
+ builder.string_internal([line, offsets])
1614
+ end
1615
+ end
1616
+
1526
1617
  builder.symbol_compose(
1527
1618
  token(node.opening_loc),
1528
- [builder.string_internal([node.unescaped, srange(node.value_loc)])],
1619
+ parts,
1529
1620
  token(node.closing_loc)
1530
1621
  )
1531
1622
  end
@@ -1604,7 +1695,11 @@ module Prism
1604
1695
  builder.when(
1605
1696
  token(node.keyword_loc),
1606
1697
  visit_all(node.conditions),
1607
- srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";", "then"]),
1698
+ if node.then_keyword_loc
1699
+ token(node.then_keyword_loc)
1700
+ else
1701
+ srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";"])
1702
+ end,
1608
1703
  visit(node.statements)
1609
1704
  )
1610
1705
  end
@@ -1637,13 +1732,27 @@ module Prism
1637
1732
  # `foo`
1638
1733
  # ^^^^^
1639
1734
  def visit_x_string_node(node)
1640
- if node.opening&.start_with?("<<")
1641
- children, closing = visit_heredoc(InterpolatedXStringNode.new(node.opening_loc, [StringNode.new(0, nil, node.content_loc, nil, node.unescaped, node.content_loc)], node.closing_loc, node.location))
1735
+ if node.heredoc?
1736
+ children, closing = visit_heredoc(node.to_interpolated)
1642
1737
  builder.xstring_compose(token(node.opening_loc), children, closing)
1643
1738
  else
1739
+ parts = if node.unescaped.lines.one?
1740
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1741
+ else
1742
+ start_offset = node.content_loc.start_offset
1743
+
1744
+ node.unescaped.lines.map do |line|
1745
+ end_offset = start_offset + line.length
1746
+ offsets = srange_offsets(start_offset, end_offset)
1747
+ start_offset = end_offset
1748
+
1749
+ builder.string_internal([line, offsets])
1750
+ end
1751
+ end
1752
+
1644
1753
  builder.xstring_compose(
1645
1754
  token(node.opening_loc),
1646
- [builder.string_internal([node.unescaped, srange(node.content_loc)])],
1755
+ parts,
1647
1756
  token(node.closing_loc)
1648
1757
  )
1649
1758
  end
@@ -1687,6 +1796,26 @@ module Prism
1687
1796
  forwarding
1688
1797
  end
1689
1798
 
1799
+ # Because we have mutated the AST to allow for newlines in the middle of
1800
+ # a rational, we need to manually handle the value here.
1801
+ def imaginary_value(node)
1802
+ Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value)
1803
+ end
1804
+
1805
+ # Negate the value of a numeric node. This is a special case where you
1806
+ # have a negative sign on one line and then a number on the next line.
1807
+ # In normal Ruby, this will always be a method call. The parser gem,
1808
+ # however, marks this as a numeric literal. We have to massage the tree
1809
+ # here to get it into the correct form.
1810
+ def numeric_negate(message_loc, receiver)
1811
+ case receiver.type
1812
+ when :integer_node, :float_node
1813
+ receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
1814
+ when :rational_node, :imaginary_node
1815
+ receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
1816
+ end
1817
+ end
1818
+
1690
1819
  # Blocks can have a special set of parameters that automatically expand
1691
1820
  # when given arrays if they have a single required parameter and no
1692
1821
  # other parameters.
@@ -1701,6 +1830,16 @@ module Prism
1701
1830
  parameters.block.nil?
1702
1831
  end
1703
1832
 
1833
+ # Because we have mutated the AST to allow for newlines in the middle of
1834
+ # a rational, we need to manually handle the value here.
1835
+ def rational_value(node)
1836
+ if node.numeric.is_a?(IntegerNode)
1837
+ Rational(node.numeric.value)
1838
+ else
1839
+ Rational(node.slice.gsub(/\s/, "").chomp("r"))
1840
+ end
1841
+ end
1842
+
1704
1843
  # Locations in the parser gem AST are generated using this class. We
1705
1844
  # store a reference to its constant to make it slightly faster to look
1706
1845
  # up.
@@ -1767,7 +1906,7 @@ module Prism
1767
1906
 
1768
1907
  # Visit a heredoc that can be either a string or an xstring.
1769
1908
  def visit_heredoc(node)
1770
- children = []
1909
+ children = Array.new
1771
1910
  node.parts.each do |part|
1772
1911
  pushing =
1773
1912
  if part.is_a?(StringNode) && part.unescaped.include?("\n")
@@ -167,7 +167,7 @@ module Prism
167
167
  TILDE: :tTILDE,
168
168
  UAMPERSAND: :tAMPER,
169
169
  UCOLON_COLON: :tCOLON3,
170
- UDOT_DOT: :tDOT2,
170
+ UDOT_DOT: :tBDOT2,
171
171
  UDOT_DOT_DOT: :tBDOT3,
172
172
  UMINUS: :tUMINUS,
173
173
  UMINUS_NUM: :tUNARY_NUM,
@@ -177,12 +177,23 @@ module Prism
177
177
  WORDS_SEP: :tSPACE
178
178
  }
179
179
 
180
- private_constant :TYPES
180
+ # These constants represent flags in our lex state. We really, really
181
+ # don't want to be using them and we really, really don't want to be
182
+ # exposing them as part of our public API. Unfortunately, we don't have
183
+ # another way of matching the exact tokens that the parser gem expects
184
+ # without them. We should find another way to do this, but in the
185
+ # meantime we'll hide them from the documentation and mark them as
186
+ # private constants.
187
+ EXPR_BEG = 0x1 # :nodoc:
188
+ EXPR_LABEL = 0x400 # :nodoc:
189
+
190
+ private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL
181
191
 
182
192
  # The Parser::Source::Buffer that the tokens were lexed from.
183
193
  attr_reader :source_buffer
184
194
 
185
- # An array of prism tokens that we lexed.
195
+ # An array of tuples that contain prism tokens and their associated lex
196
+ # state when they were lexed.
186
197
  attr_reader :lexed
187
198
 
188
199
  # A hash that maps offsets in bytes to offsets in characters.
@@ -202,12 +213,16 @@ module Prism
202
213
  # Convert the prism tokens into the expected format for the parser gem.
203
214
  def to_a
204
215
  tokens = []
216
+
205
217
  index = 0
218
+ length = lexed.length
219
+
220
+ heredoc_identifier_stack = []
206
221
 
207
- while index < lexed.length
208
- token, = lexed[index]
222
+ while index < length
223
+ token, state = lexed[index]
209
224
  index += 1
210
- next if token.type == :IGNORED_NEWLINE || token.type == :EOF
225
+ next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
211
226
 
212
227
  type = TYPES.fetch(token.type)
213
228
  value = token.value
@@ -218,14 +233,18 @@ module Prism
218
233
  value.delete_prefix!("?")
219
234
  when :tCOMMENT
220
235
  if token.type == :EMBDOC_BEGIN
221
- until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
236
+ start_index = index
237
+
238
+ while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
222
239
  value += next_token.value
223
240
  index += 1
224
241
  end
225
242
 
226
- value += next_token.value
227
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
228
- index += 1
243
+ if start_index != index
244
+ value += next_token.value
245
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
246
+ index += 1
247
+ end
229
248
  else
230
249
  value.chomp!
231
250
  location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
@@ -233,7 +252,7 @@ module Prism
233
252
  when :tNL
234
253
  value = nil
235
254
  when :tFLOAT
236
- value = Float(value)
255
+ value = parse_float(value)
237
256
  when :tIMAGINARY
238
257
  value = parse_complex(value)
239
258
  when :tINTEGER
@@ -242,13 +261,15 @@ module Prism
242
261
  location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
243
262
  end
244
263
 
245
- value = Integer(value)
264
+ value = parse_integer(value)
246
265
  when :tLABEL
247
266
  value.chomp!(":")
248
267
  when :tLABEL_END
249
268
  value.chomp!(":")
269
+ when :tLCURLY
270
+ type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
250
271
  when :tNTH_REF
251
- value = Integer(value.delete_prefix("$"))
272
+ value = parse_integer(value.delete_prefix("$"))
252
273
  when :tOP_ASGN
253
274
  value.chomp!("=")
254
275
  when :tRATIONAL
@@ -256,31 +277,69 @@ module Prism
256
277
  when :tSPACE
257
278
  value = nil
258
279
  when :tSTRING_BEG
259
- if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
280
+ if token.type == :HEREDOC_START
281
+ heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
282
+ end
283
+ if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
260
284
  next_location = token.location.join(next_token.location)
261
285
  type = :tSTRING
262
286
  value = ""
263
287
  location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
264
288
  index += 1
265
- elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
289
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
266
290
  next_location = token.location.join(next_next_token.location)
267
291
  type = :tSTRING
268
- value = next_token.value
292
+ value = next_token.value.gsub("\\\\", "\\")
269
293
  location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
270
294
  index += 2
271
295
  elsif value.start_with?("<<")
272
296
  quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
273
- value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
297
+ if quote == "`"
298
+ type = :tXSTRING_BEG
299
+ value = "<<`"
300
+ else
301
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
302
+ end
303
+ end
304
+ when :tSTRING_CONTENT
305
+ unless (lines = token.value.lines).one?
306
+ start_offset = offset_cache[token.location.start_offset]
307
+ lines.map do |line|
308
+ newline = line.end_with?("\r\n") ? "\r\n" : "\n"
309
+ chomped_line = line.chomp
310
+ if match = chomped_line.match(/(?<backslashes>\\+)\z/)
311
+ adjustment = match[:backslashes].size / 2
312
+ adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
313
+ if match[:backslashes].size.odd?
314
+ adjusted_line.delete_suffix!("\\")
315
+ adjustment += 2
316
+ else
317
+ adjusted_line << newline
318
+ end
319
+ else
320
+ adjusted_line = line
321
+ adjustment = 0
322
+ end
323
+
324
+ end_offset = start_offset + adjusted_line.length + adjustment
325
+ tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
326
+ start_offset = end_offset
327
+ end
328
+ next
274
329
  end
275
330
  when :tSTRING_DVAR
276
331
  value = nil
277
332
  when :tSTRING_END
278
- if token.type == :REGEXP_END
333
+ if token.type == :HEREDOC_END && value.end_with?("\n")
334
+ newline_length = value.end_with?("\r\n") ? 2 : 1
335
+ value = heredoc_identifier_stack.pop
336
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
337
+ elsif token.type == :REGEXP_END
279
338
  value = value[0]
280
339
  location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
281
340
  end
282
341
  when :tSYMBEG
283
- if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
342
+ if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
284
343
  next_location = token.location.join(next_token.location)
285
344
  type = :tSYMBOL
286
345
  value = next_token.value
@@ -289,9 +348,13 @@ module Prism
289
348
  index += 1
290
349
  end
291
350
  when :tFID
292
- if tokens[-1][0] == :kDEF
351
+ if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
293
352
  type = :tIDENTIFIER
294
353
  end
354
+ when :tXSTRING_BEG
355
+ if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
356
+ type = :tBACK_REF2
357
+ end
295
358
  end
296
359
 
297
360
  tokens << [type, [value, location]]
@@ -306,6 +369,20 @@ module Prism
306
369
 
307
370
  private
308
371
 
372
+ # Parse an integer from the string representation.
373
+ def parse_integer(value)
374
+ Integer(value)
375
+ rescue ArgumentError
376
+ 0
377
+ end
378
+
379
+ # Parse a float from the string representation.
380
+ def parse_float(value)
381
+ Float(value)
382
+ rescue ArgumentError
383
+ 0.0
384
+ end
385
+
309
386
  # Parse a complex from the string representation.
310
387
  def parse_complex(value)
311
388
  value.chomp!("i")
@@ -313,10 +390,12 @@ module Prism
313
390
  if value.end_with?("r")
314
391
  Complex(0, parse_rational(value))
315
392
  elsif value.start_with?(/0[BbOoDdXx]/)
316
- Complex(0, Integer(value))
393
+ Complex(0, parse_integer(value))
317
394
  else
318
395
  Complex(0, value)
319
396
  end
397
+ rescue ArgumentError
398
+ 0i
320
399
  end
321
400
 
322
401
  # Parse a rational from the string representation.
@@ -324,10 +403,12 @@ module Prism
324
403
  value.chomp!("r")
325
404
 
326
405
  if value.start_with?(/0[BbOoDdXx]/)
327
- Rational(Integer(value))
406
+ Rational(parse_integer(value))
328
407
  else
329
408
  Rational(value)
330
409
  end
410
+ rescue ArgumentError
411
+ 0r
331
412
  end
332
413
  end
333
414
  end