prism 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +65 -1
  4. data/Makefile +5 -2
  5. data/README.md +45 -6
  6. data/config.yml +499 -4
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +2 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +3 -3
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +2342 -1801
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +27 -11
  18. data/ext/prism/extension.c +313 -66
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +213 -64
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +134 -71
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +82 -7
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +198 -53
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +118 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +12 -3
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +225 -80
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +87 -16
  43. data/lib/prism/dsl.rb +315 -300
  44. data/lib/prism/ffi.rb +165 -84
  45. data/lib/prism/lex_compat.rb +17 -15
  46. data/lib/prism/mutation_compiler.rb +11 -0
  47. data/lib/prism/node.rb +4857 -3750
  48. data/lib/prism/node_ext.rb +77 -29
  49. data/lib/prism/pack.rb +4 -0
  50. data/lib/prism/parse_result/comments.rb +34 -17
  51. data/lib/prism/parse_result/newlines.rb +3 -1
  52. data/lib/prism/parse_result.rb +88 -34
  53. data/lib/prism/pattern.rb +16 -4
  54. data/lib/prism/polyfill/string.rb +12 -0
  55. data/lib/prism/serialize.rb +960 -327
  56. data/lib/prism/translation/parser/compiler.rb +152 -50
  57. data/lib/prism/translation/parser/lexer.rb +103 -22
  58. data/lib/prism/translation/parser/rubocop.rb +47 -11
  59. data/lib/prism/translation/parser.rb +134 -10
  60. data/lib/prism/translation/parser33.rb +12 -0
  61. data/lib/prism/translation/parser34.rb +12 -0
  62. data/lib/prism/translation/ripper/sexp.rb +125 -0
  63. data/lib/prism/translation/ripper/shim.rb +5 -0
  64. data/lib/prism/translation/ripper.rb +3248 -379
  65. data/lib/prism/translation/ruby_parser.rb +35 -18
  66. data/lib/prism/translation.rb +3 -1
  67. data/lib/prism/visitor.rb +10 -0
  68. data/lib/prism.rb +8 -2
  69. data/prism.gemspec +35 -4
  70. data/rbi/prism/compiler.rbi +14 -0
  71. data/rbi/prism/desugar_compiler.rbi +5 -0
  72. data/rbi/prism/mutation_compiler.rbi +5 -0
  73. data/rbi/prism/node.rbi +8221 -0
  74. data/rbi/prism/node_ext.rbi +102 -0
  75. data/rbi/prism/parse_result.rbi +304 -0
  76. data/rbi/prism/translation/parser/compiler.rbi +13 -0
  77. data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
  78. data/rbi/prism/translation/ripper.rbi +25 -0
  79. data/rbi/prism/translation/ruby_parser.rbi +11 -0
  80. data/rbi/prism/visitor.rbi +470 -0
  81. data/rbi/prism.rbi +39 -7749
  82. data/sig/prism/compiler.rbs +9 -0
  83. data/sig/prism/dispatcher.rbs +16 -0
  84. data/sig/prism/dot_visitor.rbs +6 -0
  85. data/sig/prism/dsl.rbs +462 -0
  86. data/sig/prism/mutation_compiler.rbs +158 -0
  87. data/sig/prism/node.rbs +3529 -0
  88. data/sig/prism/node_ext.rbs +78 -0
  89. data/sig/prism/pack.rbs +43 -0
  90. data/sig/prism/parse_result.rbs +127 -0
  91. data/sig/prism/pattern.rbs +13 -0
  92. data/sig/prism/serialize.rbs +7 -0
  93. data/sig/prism/visitor.rbs +168 -0
  94. data/sig/prism.rbs +188 -4767
  95. data/src/diagnostic.c +575 -230
  96. data/src/encoding.c +211 -108
  97. data/src/node.c +7526 -447
  98. data/src/options.c +36 -12
  99. data/src/pack.c +33 -17
  100. data/src/prettyprint.c +1297 -1388
  101. data/src/prism.c +3665 -1121
  102. data/src/regexp.c +17 -2
  103. data/src/serialize.c +47 -28
  104. data/src/static_literals.c +552 -0
  105. data/src/token_type.c +1 -0
  106. data/src/util/pm_buffer.c +147 -20
  107. data/src/util/pm_char.c +4 -4
  108. data/src/util/pm_constant_pool.c +35 -11
  109. data/src/util/pm_integer.c +629 -0
  110. data/src/util/pm_list.c +1 -1
  111. data/src/util/pm_newline_list.c +20 -8
  112. data/src/util/pm_string.c +134 -5
  113. data/src/util/pm_string_list.c +2 -2
  114. metadata +37 -6
  115. data/docs/ripper.md +0 -36
  116. data/rbi/prism_static.rbi +0 -207
  117. data/sig/prism_static.rbs +0 -201
@@ -116,7 +116,14 @@ module Prism
116
116
  builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
117
117
  end
118
118
  elsif node.value.is_a?(ImplicitNode)
119
- builder.pair_label([node.key.unescaped, srange(node.key.location)])
119
+ if (value = node.value.value).is_a?(LocalVariableReadNode)
120
+ builder.pair_keyword(
121
+ [node.key.unescaped, srange(node.key)],
122
+ builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar)
123
+ )
124
+ else
125
+ builder.pair_label([node.key.unescaped, srange(node.key.location)])
126
+ end
120
127
  elsif node.operator_loc
121
128
  builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
122
129
  elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
@@ -247,18 +254,30 @@ module Prism
247
254
 
248
255
  if node.call_operator_loc.nil?
249
256
  case name
257
+ when :-@
258
+ case (receiver = node.receiver).type
259
+ when :integer_node, :float_node, :rational_node, :imaginary_node
260
+ return visit(numeric_negate(node.message_loc, receiver))
261
+ end
250
262
  when :!
251
263
  return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
264
+ when :=~
265
+ if (receiver = node.receiver).is_a?(RegularExpressionNode)
266
+ return builder.match_op(visit(receiver), token(node.message_loc), visit(node.arguments.arguments.first))
267
+ end
252
268
  when :[]
253
269
  return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
254
270
  when :[]=
255
271
  if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
272
+ arguments = node.arguments.arguments[...-1]
273
+ arguments << node.block if node.block
274
+
256
275
  return visit_block(
257
276
  builder.assign(
258
277
  builder.index_asgn(
259
278
  visit(node.receiver),
260
279
  token(node.opening_loc),
261
- visit_all(node.arguments.arguments[...-1]),
280
+ visit_all(arguments),
262
281
  token(node.closing_loc),
263
282
  ),
264
283
  srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
@@ -387,9 +406,6 @@ module Prism
387
406
 
388
407
  # @@foo = 1
389
408
  # ^^^^^^^^^
390
- #
391
- # @@foo, @@bar = 1
392
- # ^^^^^ ^^^^^
393
409
  def visit_class_variable_write_node(node)
394
410
  builder.assign(
395
411
  builder.assignable(builder.cvar(token(node.name_loc))),
@@ -682,9 +698,6 @@ module Prism
682
698
 
683
699
  # $foo = 1
684
700
  # ^^^^^^^^
685
- #
686
- # $foo, $bar = 1
687
- # ^^^^ ^^^^
688
701
  def visit_global_variable_write_node(node)
689
702
  builder.assign(
690
703
  builder.assignable(builder.gvar(token(node.name_loc))),
@@ -788,8 +801,9 @@ module Prism
788
801
  end
789
802
 
790
803
  # 1i
804
+ # ^^
791
805
  def visit_imaginary_node(node)
792
- visit_numeric(node, builder.complex([node.value, srange(node.location)]))
806
+ visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)]))
793
807
  end
794
808
 
795
809
  # { foo: }
@@ -875,9 +889,6 @@ module Prism
875
889
 
876
890
  # @foo = 1
877
891
  # ^^^^^^^^
878
- #
879
- # @foo, @bar = 1
880
- # ^^^^ ^^^^
881
892
  def visit_instance_variable_write_node(node)
882
893
  builder.assign(
883
894
  builder.assignable(builder.ivar(token(node.name_loc))),
@@ -934,16 +945,37 @@ module Prism
934
945
  # "foo #{bar}"
935
946
  # ^^^^^^^^^^^^
936
947
  def visit_interpolated_string_node(node)
937
- if node.opening&.start_with?("<<")
948
+ if node.heredoc?
938
949
  children, closing = visit_heredoc(node)
939
- builder.string_compose(token(node.opening_loc), children, closing)
950
+
951
+ return builder.string_compose(token(node.opening_loc), children, closing)
952
+ end
953
+
954
+ parts = if node.parts.one? { |part| part.type == :string_node }
955
+ node.parts.flat_map do |node|
956
+ if node.type == :string_node && node.unescaped.lines.count >= 2
957
+ start_offset = node.content_loc.start_offset
958
+
959
+ node.unescaped.lines.map do |line|
960
+ end_offset = start_offset + line.length
961
+ offsets = srange_offsets(start_offset, end_offset)
962
+ start_offset = end_offset
963
+
964
+ builder.string_internal([line, offsets])
965
+ end
966
+ else
967
+ visit(node)
968
+ end
969
+ end
940
970
  else
941
- builder.string_compose(
942
- token(node.opening_loc),
943
- visit_all(node.parts),
944
- token(node.closing_loc)
945
- )
971
+ visit_all(node.parts)
946
972
  end
973
+
974
+ builder.string_compose(
975
+ token(node.opening_loc),
976
+ parts,
977
+ token(node.closing_loc)
978
+ )
947
979
  end
948
980
 
949
981
  # :"foo #{bar}"
@@ -959,7 +991,7 @@ module Prism
959
991
  # `foo #{bar}`
960
992
  # ^^^^^^^^^^^^
961
993
  def visit_interpolated_x_string_node(node)
962
- if node.opening.start_with?("<<")
994
+ if node.heredoc?
963
995
  children, closing = visit_heredoc(node)
964
996
  builder.xstring_compose(token(node.opening_loc), children, closing)
965
997
  else
@@ -990,6 +1022,7 @@ module Prism
990
1022
  end
991
1023
 
992
1024
  # -> {}
1025
+ # ^^^^^
993
1026
  def visit_lambda_node(node)
994
1027
  parameters = node.parameters
995
1028
 
@@ -1021,9 +1054,6 @@ module Prism
1021
1054
 
1022
1055
  # foo = 1
1023
1056
  # ^^^^^^^
1024
- #
1025
- # foo, bar = 1
1026
- # ^^^ ^^^
1027
1057
  def visit_local_variable_write_node(node)
1028
1058
  builder.assign(
1029
1059
  builder.assignable(builder.ident(token(node.name_loc))),
@@ -1062,22 +1092,12 @@ module Prism
1062
1092
 
1063
1093
  # foo in bar
1064
1094
  # ^^^^^^^^^^
1065
- if RUBY_VERSION >= "3.0"
1066
- def visit_match_predicate_node(node)
1067
- builder.match_pattern_p(
1068
- visit(node.value),
1069
- token(node.operator_loc),
1070
- within_pattern { |compiler| node.pattern.accept(compiler) }
1071
- )
1072
- end
1073
- else
1074
- def visit_match_predicate_node(node)
1075
- builder.match_pattern(
1076
- visit(node.value),
1077
- token(node.operator_loc),
1078
- within_pattern { |compiler| node.pattern.accept(compiler) }
1079
- )
1080
- end
1095
+ def visit_match_predicate_node(node)
1096
+ builder.match_pattern_p(
1097
+ visit(node.value),
1098
+ token(node.operator_loc),
1099
+ within_pattern { |compiler| node.pattern.accept(compiler) }
1100
+ )
1081
1101
  end
1082
1102
 
1083
1103
  # foo => bar
@@ -1263,7 +1283,8 @@ module Prism
1263
1283
  # foo => ^(bar)
1264
1284
  # ^^^^^^
1265
1285
  def visit_pinned_expression_node(node)
1266
- builder.pin(token(node.operator_loc), visit(node.expression))
1286
+ expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
1287
+ builder.pin(token(node.operator_loc), expression)
1267
1288
  end
1268
1289
 
1269
1290
  # foo = 1 and bar => ^foo
@@ -1322,7 +1343,7 @@ module Prism
1322
1343
  # 1r
1323
1344
  # ^^
1324
1345
  def visit_rational_node(node)
1325
- visit_numeric(node, builder.rational([node.value, srange(node.location)]))
1346
+ visit_numeric(node, builder.rational([rational_value(node), srange(node.location)]))
1326
1347
  end
1327
1348
 
1328
1349
  # redo
@@ -1418,6 +1439,11 @@ module Prism
1418
1439
  builder.self(token(node.location))
1419
1440
  end
1420
1441
 
1442
+ # A shareable constant.
1443
+ def visit_shareable_constant_node(node)
1444
+ visit(node.write)
1445
+ end
1446
+
1421
1447
  # class << self; end
1422
1448
  # ^^^^^^^^^^^^^^^^^^
1423
1449
  def visit_singleton_class_node(node)
@@ -1476,15 +1502,29 @@ module Prism
1476
1502
  # "foo"
1477
1503
  # ^^^^^
1478
1504
  def visit_string_node(node)
1479
- if node.opening&.start_with?("<<")
1480
- children, closing = visit_heredoc(InterpolatedStringNode.new(node.opening_loc, [node.copy(opening_loc: nil, closing_loc: nil, location: node.content_loc)], node.closing_loc, node.location))
1505
+ if node.heredoc?
1506
+ children, closing = visit_heredoc(node.to_interpolated)
1481
1507
  builder.string_compose(token(node.opening_loc), children, closing)
1482
1508
  elsif node.opening == "?"
1483
1509
  builder.character([node.unescaped, srange(node.location)])
1484
1510
  else
1511
+ parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1
1512
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1513
+ else
1514
+ start_offset = node.content_loc.start_offset
1515
+
1516
+ [node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line|
1517
+ end_offset = start_offset + content_line.length
1518
+ offsets = srange_offsets(start_offset, end_offset)
1519
+ start_offset = end_offset
1520
+
1521
+ builder.string_internal([unescaped_line, offsets])
1522
+ end
1523
+ end
1524
+
1485
1525
  builder.string_compose(
1486
1526
  token(node.opening_loc),
1487
- [builder.string_internal([node.unescaped, srange(node.content_loc)])],
1527
+ parts,
1488
1528
  token(node.closing_loc)
1489
1529
  )
1490
1530
  end
@@ -1523,9 +1563,23 @@ module Prism
1523
1563
  builder.symbol([node.unescaped, srange(node.location)])
1524
1564
  end
1525
1565
  else
1566
+ parts = if node.value.lines.one?
1567
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1568
+ else
1569
+ start_offset = node.value_loc.start_offset
1570
+
1571
+ node.value.lines.map do |line|
1572
+ end_offset = start_offset + line.length
1573
+ offsets = srange_offsets(start_offset, end_offset)
1574
+ start_offset = end_offset
1575
+
1576
+ builder.string_internal([line, offsets])
1577
+ end
1578
+ end
1579
+
1526
1580
  builder.symbol_compose(
1527
1581
  token(node.opening_loc),
1528
- [builder.string_internal([node.unescaped, srange(node.value_loc)])],
1582
+ parts,
1529
1583
  token(node.closing_loc)
1530
1584
  )
1531
1585
  end
@@ -1604,7 +1658,11 @@ module Prism
1604
1658
  builder.when(
1605
1659
  token(node.keyword_loc),
1606
1660
  visit_all(node.conditions),
1607
- srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";", "then"]),
1661
+ if node.then_keyword_loc
1662
+ token(node.then_keyword_loc)
1663
+ else
1664
+ srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";"])
1665
+ end,
1608
1666
  visit(node.statements)
1609
1667
  )
1610
1668
  end
@@ -1637,13 +1695,27 @@ module Prism
1637
1695
  # `foo`
1638
1696
  # ^^^^^
1639
1697
  def visit_x_string_node(node)
1640
- if node.opening&.start_with?("<<")
1641
- children, closing = visit_heredoc(InterpolatedXStringNode.new(node.opening_loc, [StringNode.new(0, nil, node.content_loc, nil, node.unescaped, node.content_loc)], node.closing_loc, node.location))
1698
+ if node.heredoc?
1699
+ children, closing = visit_heredoc(node.to_interpolated)
1642
1700
  builder.xstring_compose(token(node.opening_loc), children, closing)
1643
1701
  else
1702
+ parts = if node.unescaped.lines.one?
1703
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1704
+ else
1705
+ start_offset = node.content_loc.start_offset
1706
+
1707
+ node.unescaped.lines.map do |line|
1708
+ end_offset = start_offset + line.length
1709
+ offsets = srange_offsets(start_offset, end_offset)
1710
+ start_offset = end_offset
1711
+
1712
+ builder.string_internal([line, offsets])
1713
+ end
1714
+ end
1715
+
1644
1716
  builder.xstring_compose(
1645
1717
  token(node.opening_loc),
1646
- [builder.string_internal([node.unescaped, srange(node.content_loc)])],
1718
+ parts,
1647
1719
  token(node.closing_loc)
1648
1720
  )
1649
1721
  end
@@ -1687,6 +1759,26 @@ module Prism
1687
1759
  forwarding
1688
1760
  end
1689
1761
 
1762
+ # Because we have mutated the AST to allow for newlines in the middle of
1763
+ # a rational, we need to manually handle the value here.
1764
+ def imaginary_value(node)
1765
+ Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value)
1766
+ end
1767
+
1768
+ # Negate the value of a numeric node. This is a special case where you
1769
+ # have a negative sign on one line and then a number on the next line.
1770
+ # In normal Ruby, this will always be a method call. The parser gem,
1771
+ # however, marks this as a numeric literal. We have to massage the tree
1772
+ # here to get it into the correct form.
1773
+ def numeric_negate(message_loc, receiver)
1774
+ case receiver.type
1775
+ when :integer_node, :float_node
1776
+ receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
1777
+ when :rational_node, :imaginary_node
1778
+ receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
1779
+ end
1780
+ end
1781
+
1690
1782
  # Blocks can have a special set of parameters that automatically expand
1691
1783
  # when given arrays if they have a single required parameter and no
1692
1784
  # other parameters.
@@ -1701,6 +1793,16 @@ module Prism
1701
1793
  parameters.block.nil?
1702
1794
  end
1703
1795
 
1796
+ # Because we have mutated the AST to allow for newlines in the middle of
1797
+ # a rational, we need to manually handle the value here.
1798
+ def rational_value(node)
1799
+ if node.numeric.is_a?(IntegerNode)
1800
+ Rational(node.numeric.value)
1801
+ else
1802
+ Rational(node.slice.gsub(/\s/, "").chomp("r"))
1803
+ end
1804
+ end
1805
+
1704
1806
  # Locations in the parser gem AST are generated using this class. We
1705
1807
  # store a reference to its constant to make it slightly faster to look
1706
1808
  # up.
@@ -1767,7 +1869,7 @@ module Prism
1767
1869
 
1768
1870
  # Visit a heredoc that can be either a string or an xstring.
1769
1871
  def visit_heredoc(node)
1770
- children = []
1872
+ children = Array.new
1771
1873
  node.parts.each do |part|
1772
1874
  pushing =
1773
1875
  if part.is_a?(StringNode) && part.unescaped.include?("\n")
@@ -167,7 +167,7 @@ module Prism
167
167
  TILDE: :tTILDE,
168
168
  UAMPERSAND: :tAMPER,
169
169
  UCOLON_COLON: :tCOLON3,
170
- UDOT_DOT: :tDOT2,
170
+ UDOT_DOT: :tBDOT2,
171
171
  UDOT_DOT_DOT: :tBDOT3,
172
172
  UMINUS: :tUMINUS,
173
173
  UMINUS_NUM: :tUNARY_NUM,
@@ -177,12 +177,23 @@ module Prism
177
177
  WORDS_SEP: :tSPACE
178
178
  }
179
179
 
180
- private_constant :TYPES
180
+ # These constants represent flags in our lex state. We really, really
181
+ # don't want to be using them and we really, really don't want to be
182
+ # exposing them as part of our public API. Unfortunately, we don't have
183
+ # another way of matching the exact tokens that the parser gem expects
184
+ # without them. We should find another way to do this, but in the
185
+ # meantime we'll hide them from the documentation and mark them as
186
+ # private constants.
187
+ EXPR_BEG = 0x1 # :nodoc:
188
+ EXPR_LABEL = 0x400 # :nodoc:
189
+
190
+ private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL
181
191
 
182
192
  # The Parser::Source::Buffer that the tokens were lexed from.
183
193
  attr_reader :source_buffer
184
194
 
185
- # An array of prism tokens that we lexed.
195
+ # An array of tuples that contain prism tokens and their associated lex
196
+ # state when they were lexed.
186
197
  attr_reader :lexed
187
198
 
188
199
  # A hash that maps offsets in bytes to offsets in characters.
@@ -202,12 +213,16 @@ module Prism
202
213
  # Convert the prism tokens into the expected format for the parser gem.
203
214
  def to_a
204
215
  tokens = []
216
+
205
217
  index = 0
218
+ length = lexed.length
219
+
220
+ heredoc_identifier_stack = []
206
221
 
207
- while index < lexed.length
208
- token, = lexed[index]
222
+ while index < length
223
+ token, state = lexed[index]
209
224
  index += 1
210
- next if token.type == :IGNORED_NEWLINE || token.type == :EOF
225
+ next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
211
226
 
212
227
  type = TYPES.fetch(token.type)
213
228
  value = token.value
@@ -218,14 +233,18 @@ module Prism
218
233
  value.delete_prefix!("?")
219
234
  when :tCOMMENT
220
235
  if token.type == :EMBDOC_BEGIN
221
- until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
236
+ start_index = index
237
+
238
+ while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
222
239
  value += next_token.value
223
240
  index += 1
224
241
  end
225
242
 
226
- value += next_token.value
227
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
228
- index += 1
243
+ if start_index != index
244
+ value += next_token.value
245
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
246
+ index += 1
247
+ end
229
248
  else
230
249
  value.chomp!
231
250
  location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
@@ -233,7 +252,7 @@ module Prism
233
252
  when :tNL
234
253
  value = nil
235
254
  when :tFLOAT
236
- value = Float(value)
255
+ value = parse_float(value)
237
256
  when :tIMAGINARY
238
257
  value = parse_complex(value)
239
258
  when :tINTEGER
@@ -242,13 +261,15 @@ module Prism
242
261
  location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
243
262
  end
244
263
 
245
- value = Integer(value)
264
+ value = parse_integer(value)
246
265
  when :tLABEL
247
266
  value.chomp!(":")
248
267
  when :tLABEL_END
249
268
  value.chomp!(":")
269
+ when :tLCURLY
270
+ type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
250
271
  when :tNTH_REF
251
- value = Integer(value.delete_prefix("$"))
272
+ value = parse_integer(value.delete_prefix("$"))
252
273
  when :tOP_ASGN
253
274
  value.chomp!("=")
254
275
  when :tRATIONAL
@@ -256,31 +277,69 @@ module Prism
256
277
  when :tSPACE
257
278
  value = nil
258
279
  when :tSTRING_BEG
259
- if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
280
+ if token.type == :HEREDOC_START
281
+ heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
282
+ end
283
+ if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
260
284
  next_location = token.location.join(next_token.location)
261
285
  type = :tSTRING
262
286
  value = ""
263
287
  location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
264
288
  index += 1
265
- elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
289
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
266
290
  next_location = token.location.join(next_next_token.location)
267
291
  type = :tSTRING
268
- value = next_token.value
292
+ value = next_token.value.gsub("\\\\", "\\")
269
293
  location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
270
294
  index += 2
271
295
  elsif value.start_with?("<<")
272
296
  quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
273
- value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
297
+ if quote == "`"
298
+ type = :tXSTRING_BEG
299
+ value = "<<`"
300
+ else
301
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
302
+ end
303
+ end
304
+ when :tSTRING_CONTENT
305
+ unless (lines = token.value.lines).one?
306
+ start_offset = offset_cache[token.location.start_offset]
307
+ lines.map do |line|
308
+ newline = line.end_with?("\r\n") ? "\r\n" : "\n"
309
+ chomped_line = line.chomp
310
+ if match = chomped_line.match(/(?<backslashes>\\+)\z/)
311
+ adjustment = match[:backslashes].size / 2
312
+ adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
313
+ if match[:backslashes].size.odd?
314
+ adjusted_line.delete_suffix!("\\")
315
+ adjustment += 2
316
+ else
317
+ adjusted_line << newline
318
+ end
319
+ else
320
+ adjusted_line = line
321
+ adjustment = 0
322
+ end
323
+
324
+ end_offset = start_offset + adjusted_line.length + adjustment
325
+ tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
326
+ start_offset = end_offset
327
+ end
328
+ next
274
329
  end
275
330
  when :tSTRING_DVAR
276
331
  value = nil
277
332
  when :tSTRING_END
278
- if token.type == :REGEXP_END
333
+ if token.type == :HEREDOC_END && value.end_with?("\n")
334
+ newline_length = value.end_with?("\r\n") ? 2 : 1
335
+ value = heredoc_identifier_stack.pop
336
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
337
+ elsif token.type == :REGEXP_END
279
338
  value = value[0]
280
339
  location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
281
340
  end
282
341
  when :tSYMBEG
283
- if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
342
+ if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
284
343
  next_location = token.location.join(next_token.location)
285
344
  type = :tSYMBOL
286
345
  value = next_token.value
@@ -289,9 +348,13 @@ module Prism
289
348
  index += 1
290
349
  end
291
350
  when :tFID
292
- if tokens[-1][0] == :kDEF
351
+ if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
293
352
  type = :tIDENTIFIER
294
353
  end
354
+ when :tXSTRING_BEG
355
+ if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
356
+ type = :tBACK_REF2
357
+ end
295
358
  end
296
359
 
297
360
  tokens << [type, [value, location]]
@@ -306,6 +369,20 @@ module Prism
306
369
 
307
370
  private
308
371
 
372
+ # Parse an integer from the string representation.
373
+ def parse_integer(value)
374
+ Integer(value)
375
+ rescue ArgumentError
376
+ 0
377
+ end
378
+
379
+ # Parse a float from the string representation.
380
+ def parse_float(value)
381
+ Float(value)
382
+ rescue ArgumentError
383
+ 0.0
384
+ end
385
+
309
386
  # Parse a complex from the string representation.
310
387
  def parse_complex(value)
311
388
  value.chomp!("i")
@@ -313,10 +390,12 @@ module Prism
313
390
  if value.end_with?("r")
314
391
  Complex(0, parse_rational(value))
315
392
  elsif value.start_with?(/0[BbOoDdXx]/)
316
- Complex(0, Integer(value))
393
+ Complex(0, parse_integer(value))
317
394
  else
318
395
  Complex(0, value)
319
396
  end
397
+ rescue ArgumentError
398
+ 0i
320
399
  end
321
400
 
322
401
  # Parse a rational from the string representation.
@@ -324,10 +403,12 @@ module Prism
324
403
  value.chomp!("r")
325
404
 
326
405
  if value.start_with?(/0[BbOoDdXx]/)
327
- Rational(Integer(value))
406
+ Rational(parse_integer(value))
328
407
  else
329
408
  Rational(value)
330
409
  end
410
+ rescue ArgumentError
411
+ 0r
331
412
  end
332
413
  end
333
414
  end