syntax_tree 5.3.0 → 6.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -1
  3. data/CHANGELOG.md +78 -1
  4. data/Gemfile.lock +7 -7
  5. data/README.md +33 -9
  6. data/Rakefile +12 -8
  7. data/bin/console +1 -0
  8. data/bin/whitequark +79 -0
  9. data/doc/changing_structure.md +16 -0
  10. data/lib/syntax_tree/basic_visitor.rb +44 -5
  11. data/lib/syntax_tree/cli.rb +2 -2
  12. data/lib/syntax_tree/dsl.rb +23 -11
  13. data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
  14. data/lib/syntax_tree/formatter.rb +1 -1
  15. data/lib/syntax_tree/index.rb +158 -59
  16. data/lib/syntax_tree/json_visitor.rb +55 -0
  17. data/lib/syntax_tree/language_server.rb +157 -2
  18. data/lib/syntax_tree/match_visitor.rb +120 -0
  19. data/lib/syntax_tree/mermaid.rb +177 -0
  20. data/lib/syntax_tree/mermaid_visitor.rb +69 -0
  21. data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
  22. data/lib/syntax_tree/node.rb +245 -123
  23. data/lib/syntax_tree/parser.rb +332 -119
  24. data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
  25. data/lib/syntax_tree/reflection.rb +241 -0
  26. data/lib/syntax_tree/translation/parser.rb +3107 -0
  27. data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
  28. data/lib/syntax_tree/translation.rb +28 -0
  29. data/lib/syntax_tree/version.rb +1 -1
  30. data/lib/syntax_tree/with_scope.rb +244 -0
  31. data/lib/syntax_tree/yarv/basic_block.rb +53 -0
  32. data/lib/syntax_tree/yarv/calldata.rb +91 -0
  33. data/lib/syntax_tree/yarv/compiler.rb +110 -100
  34. data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
  35. data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
  36. data/lib/syntax_tree/yarv/decompiler.rb +1 -1
  37. data/lib/syntax_tree/yarv/disassembler.rb +104 -80
  38. data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
  39. data/lib/syntax_tree/yarv/instructions.rb +203 -649
  40. data/lib/syntax_tree/yarv/legacy.rb +12 -24
  41. data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
  42. data/lib/syntax_tree/yarv.rb +18 -0
  43. data/lib/syntax_tree.rb +88 -56
  44. data/tasks/sorbet.rake +277 -0
  45. data/tasks/whitequark.rake +87 -0
  46. metadata +23 -11
  47. data/.gitmodules +0 -9
  48. data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
  49. data/lib/syntax_tree/visitor/environment.rb +0 -84
  50. data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
  51. data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
  52. data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
  53. data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -256,11 +256,37 @@ module SyntaxTree
256
256
  tokens[index] if index
257
257
  end
258
258
 
259
+ def find_token_between(type, left, right)
260
+ bounds = left.location.end_char...right.location.start_char
261
+ index =
262
+ tokens.rindex do |token|
263
+ char = token.location.start_char
264
+ break if char < bounds.begin
265
+
266
+ token.is_a?(type) && bounds.cover?(char)
267
+ end
268
+
269
+ tokens[index] if index
270
+ end
271
+
259
272
  def find_keyword(name)
260
273
  index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) }
261
274
  tokens[index] if index
262
275
  end
263
276
 
277
+ def find_keyword_between(name, left, right)
278
+ bounds = left.end_char...right.start_char
279
+ index =
280
+ tokens.rindex do |token|
281
+ char = token.location.start_char
282
+ break if char < bounds.begin
283
+
284
+ token.is_a?(Kw) && (token.name == name) && bounds.cover?(char)
285
+ end
286
+
287
+ tokens[index] if index
288
+ end
289
+
264
290
  def find_operator(name)
265
291
  index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) }
266
292
  tokens[index] if index
@@ -348,6 +374,7 @@ module SyntaxTree
348
374
 
349
375
  start_char = find_next_statement_start(lbrace.location.end_char)
350
376
  statements.bind(
377
+ self,
351
378
  start_char,
352
379
  start_char - line_counts[lbrace.location.start_line - 1].start,
353
380
  rbrace.location.start_char,
@@ -386,6 +413,7 @@ module SyntaxTree
386
413
 
387
414
  start_char = find_next_statement_start(lbrace.location.end_char)
388
415
  statements.bind(
416
+ self,
389
417
  start_char,
390
418
  start_char - line_counts[lbrace.location.start_line - 1].start,
391
419
  rbrace.location.start_char,
@@ -640,13 +668,14 @@ module SyntaxTree
640
668
  stack.pop
641
669
  end
642
670
 
643
- def visit_var_ref(node)
644
- pins.shift
645
- node.pin(stack[-2])
671
+ visit_methods do
672
+ def visit_var_ref(node)
673
+ node.pin(stack[-2], pins.shift)
674
+ end
646
675
  end
647
676
 
648
677
  def self.visit(node, tokens)
649
- start_char = node.location.start_char
678
+ start_char = node.start_char
650
679
  allocated = []
651
680
 
652
681
  tokens.reverse_each do |token|
@@ -670,18 +699,22 @@ module SyntaxTree
670
699
  # (nil | Array[untyped]) posts
671
700
  # ) -> AryPtn
672
701
  def on_aryptn(constant, requireds, rest, posts)
673
- parts = [constant, *requireds, rest, *posts].compact
702
+ lbracket = find_token(LBracket)
703
+ lbracket ||= find_token(LParen) if constant
674
704
 
675
- # If there aren't any parts (no constant, no positional arguments), then
676
- # we're matching an empty array. In this case, we're going to look for the
677
- # left and right brackets explicitly. Otherwise, we'll just use the bounds
678
- # of the various parts.
679
- location =
680
- if parts.empty?
681
- consume_token(LBracket).location.to(consume_token(RBracket).location)
682
- else
683
- parts[0].location.to(parts[-1].location)
684
- end
705
+ rbracket = find_token(RBracket)
706
+ rbracket ||= find_token(RParen) if constant
707
+
708
+ parts = [constant, lbracket, *requireds, rest, *posts, rbracket].compact
709
+
710
+ # The location is going to be determined by the first part to the last
711
+ # part. This includes potential brackets.
712
+ location = parts[0].location.to(parts[-1].location)
713
+
714
+ # Now that we have the location calculated, we can remove the brackets
715
+ # from the list of tokens.
716
+ tokens.delete(lbracket) if lbracket
717
+ tokens.delete(rbracket) if rbracket
685
718
 
686
719
  # If there is a plain *, then we're going to fix up the location of it
687
720
  # here because it currently doesn't have anything to use for its precise
@@ -820,6 +853,7 @@ module SyntaxTree
820
853
  end
821
854
 
822
855
  bodystmt.bind(
856
+ self,
823
857
  find_next_statement_start(keyword.location.end_char),
824
858
  keyword.location.end_column,
825
859
  end_location.end_char,
@@ -871,13 +905,34 @@ module SyntaxTree
871
905
  # on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar
872
906
  def on_block_var(params, locals)
873
907
  index =
874
- tokens.rindex do |node|
875
- node.is_a?(Op) && %w[| ||].include?(node.value) &&
876
- node.location.start_char < params.location.start_char
877
- end
908
+ tokens.rindex { |node| node.is_a?(Op) && %w[| ||].include?(node.value) }
909
+
910
+ ending = tokens.delete_at(index)
911
+ beginning = ending.value == "||" ? ending : consume_operator(:|)
912
+
913
+ # If there are no parameters, then we didn't have anything to base the
914
+ # location information of off. Now that we have an opening of the
915
+ # block, we can correct this.
916
+ if params.empty?
917
+ start_line = params.location.start_line
918
+ start_char =
919
+ (
920
+ if beginning.value == "||"
921
+ beginning.location.start_char
922
+ else
923
+ find_next_statement_start(beginning.location.end_char)
924
+ end
925
+ )
878
926
 
879
- beginning = tokens[index]
880
- ending = tokens[-1]
927
+ location =
928
+ Location.fixed(
929
+ line: start_line,
930
+ char: start_char,
931
+ column: start_char - line_counts[start_line - 1].start
932
+ )
933
+
934
+ params = params.copy(location: location)
935
+ end
881
936
 
882
937
  BlockVar.new(
883
938
  params: params,
@@ -905,6 +960,14 @@ module SyntaxTree
905
960
  # (nil | Ensure) ensure_clause
906
961
  # ) -> BodyStmt
907
962
  def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause)
963
+ # In certain versions of Ruby, the `statements` argument can be any node
964
+ # in the case that we're inside of an endless method definition. In this
965
+ # case we'll wrap it in a Statements node to be consistent.
966
+ unless statements.is_a?(Statements)
967
+ statements =
968
+ Statements.new(body: [statements], location: statements.location)
969
+ end
970
+
908
971
  parts = [statements, rescue_clause, else_clause, ensure_clause].compact
909
972
 
910
973
  BodyStmt.new(
@@ -929,6 +992,7 @@ module SyntaxTree
929
992
 
930
993
  start_char = find_next_statement_start(location.end_char)
931
994
  statements.bind(
995
+ self,
932
996
  start_char,
933
997
  start_char - line_counts[location.start_line - 1].start,
934
998
  rbrace.location.start_char,
@@ -1036,6 +1100,7 @@ module SyntaxTree
1036
1100
  start_char = find_next_statement_start(location.end_char)
1037
1101
 
1038
1102
  bodystmt.bind(
1103
+ self,
1039
1104
  start_char,
1040
1105
  start_char - line_counts[location.start_line - 1].start,
1041
1106
  ending.location.start_char,
@@ -1154,13 +1219,23 @@ module SyntaxTree
1154
1219
  end
1155
1220
 
1156
1221
  # :call-seq:
1157
- # on_const_path_field: (untyped parent, Const constant) -> ConstPathField
1222
+ # on_const_path_field: (untyped parent, Const constant) ->
1223
+ # ConstPathField | Field
1158
1224
  def on_const_path_field(parent, constant)
1159
- ConstPathField.new(
1160
- parent: parent,
1161
- constant: constant,
1162
- location: parent.location.to(constant.location)
1163
- )
1225
+ if constant.is_a?(Const)
1226
+ ConstPathField.new(
1227
+ parent: parent,
1228
+ constant: constant,
1229
+ location: parent.location.to(constant.location)
1230
+ )
1231
+ else
1232
+ Field.new(
1233
+ parent: parent,
1234
+ operator: consume_operator(:"::"),
1235
+ name: constant,
1236
+ location: parent.location.to(constant.location)
1237
+ )
1238
+ end
1164
1239
  end
1165
1240
 
1166
1241
  # :call-seq:
@@ -1235,6 +1310,7 @@ module SyntaxTree
1235
1310
  start_char = find_next_statement_start(params.location.end_char)
1236
1311
 
1237
1312
  bodystmt.bind(
1313
+ self,
1238
1314
  start_char,
1239
1315
  start_char - line_counts[params.location.start_line - 1].start,
1240
1316
  ending.location.start_char,
@@ -1323,6 +1399,7 @@ module SyntaxTree
1323
1399
  start_char = find_next_statement_start(params.location.end_char)
1324
1400
 
1325
1401
  bodystmt.bind(
1402
+ self,
1326
1403
  start_char,
1327
1404
  start_char - line_counts[params.location.start_line - 1].start,
1328
1405
  ending.location.start_char,
@@ -1362,6 +1439,7 @@ module SyntaxTree
1362
1439
  start_char = find_next_statement_start(location.end_char)
1363
1440
 
1364
1441
  bodystmt.bind(
1442
+ self,
1365
1443
  start_char,
1366
1444
  start_char - line_counts[location.start_line - 1].start,
1367
1445
  ending.location.start_char,
@@ -1457,6 +1535,7 @@ module SyntaxTree
1457
1535
 
1458
1536
  start_char = find_next_statement_start(keyword.location.end_char)
1459
1537
  statements.bind(
1538
+ self,
1460
1539
  start_char,
1461
1540
  start_char - line_counts[keyword.location.start_line - 1].start,
1462
1541
  ending.location.start_char,
@@ -1480,8 +1559,16 @@ module SyntaxTree
1480
1559
  beginning = consume_keyword(:elsif)
1481
1560
  ending = consequent || consume_keyword(:end)
1482
1561
 
1483
- start_char = find_next_statement_start(predicate.location.end_char)
1562
+ delimiter =
1563
+ find_keyword_between(:then, predicate, statements) ||
1564
+ find_token_between(Semicolon, predicate, statements)
1565
+
1566
+ tokens.delete(delimiter) if delimiter
1567
+ start_char =
1568
+ find_next_statement_start((delimiter || predicate).location.end_char)
1569
+
1484
1570
  statements.bind(
1571
+ self,
1485
1572
  start_char,
1486
1573
  start_char - line_counts[predicate.location.start_line - 1].start,
1487
1574
  ending.location.start_char,
@@ -1605,6 +1692,7 @@ module SyntaxTree
1605
1692
  ending = find_keyword(:end)
1606
1693
  start_char = find_next_statement_start(keyword.location.end_char)
1607
1694
  statements.bind(
1695
+ self,
1608
1696
  start_char,
1609
1697
  start_char - line_counts[keyword.location.start_line - 1].start,
1610
1698
  ending.location.start_char,
@@ -1679,6 +1767,22 @@ module SyntaxTree
1679
1767
  # VarField right
1680
1768
  # ) -> FndPtn
1681
1769
  def on_fndptn(constant, left, values, right)
1770
+ # The left and right of a find pattern are always going to be splats, so
1771
+ # we're going to consume the * operators and use their location
1772
+ # information to extend the location of the splats.
1773
+ right, left =
1774
+ [right, left].map do |node|
1775
+ operator = consume_operator(:*)
1776
+ location =
1777
+ if node.value
1778
+ operator.location.to(node.location)
1779
+ else
1780
+ operator.location
1781
+ end
1782
+
1783
+ node.copy(location: location)
1784
+ end
1785
+
1682
1786
  # The opening of this find pattern is either going to be a left bracket, a
1683
1787
  # right left parenthesis, or the left splat. We're going to use this to
1684
1788
  # determine how to find the closing of the pattern, as well as determining
@@ -1719,21 +1823,20 @@ module SyntaxTree
1719
1823
  in_keyword = consume_keyword(:in)
1720
1824
  ending = consume_keyword(:end)
1721
1825
 
1722
- # Consume the do keyword if it exists so that it doesn't get confused for
1723
- # some other block
1724
- keyword = find_keyword(:do)
1725
- if keyword &&
1726
- keyword.location.start_char > collection.location.end_char &&
1727
- keyword.location.end_char < ending.location.start_char
1728
- tokens.delete(keyword)
1729
- end
1826
+ delimiter =
1827
+ find_keyword_between(:do, collection, ending) ||
1828
+ find_token_between(Semicolon, collection, ending)
1829
+
1830
+ tokens.delete(delimiter) if delimiter
1730
1831
 
1731
1832
  start_char =
1732
- find_next_statement_start((keyword || collection).location.end_char)
1833
+ find_next_statement_start((delimiter || collection).location.end_char)
1834
+
1733
1835
  statements.bind(
1836
+ self,
1734
1837
  start_char,
1735
1838
  start_char -
1736
- line_counts[(keyword || collection).location.end_line - 1].start,
1839
+ line_counts[(delimiter || collection).location.end_line - 1].start,
1737
1840
  ending.location.start_char,
1738
1841
  ending.location.start_column
1739
1842
  )
@@ -1787,7 +1890,7 @@ module SyntaxTree
1787
1890
  line: lineno,
1788
1891
  char: char_pos,
1789
1892
  column: current_column,
1790
- size: value.size + 1
1893
+ size: value.size
1791
1894
  )
1792
1895
 
1793
1896
  # Here we're going to artificially create an extra node type so that if
@@ -1822,7 +1925,7 @@ module SyntaxTree
1822
1925
  line: lineno,
1823
1926
  char: char_pos,
1824
1927
  column: current_column,
1825
- size: value.size + 1
1928
+ size: value.size
1826
1929
  )
1827
1930
 
1828
1931
  heredoc_end = HeredocEnd.new(value: value.chomp, location: location)
@@ -1837,9 +1940,9 @@ module SyntaxTree
1837
1940
  start_line: heredoc.location.start_line,
1838
1941
  start_char: heredoc.location.start_char,
1839
1942
  start_column: heredoc.location.start_column,
1840
- end_line: lineno,
1841
- end_char: char_pos,
1842
- end_column: current_column
1943
+ end_line: location.end_line,
1944
+ end_char: location.end_char,
1945
+ end_column: location.end_column
1843
1946
  )
1844
1947
  )
1845
1948
  end
@@ -1847,10 +1950,42 @@ module SyntaxTree
1847
1950
  # :call-seq:
1848
1951
  # on_hshptn: (
1849
1952
  # (nil | untyped) constant,
1850
- # Array[[Label, untyped]] keywords,
1953
+ # Array[[Label | StringContent, untyped]] keywords,
1851
1954
  # (nil | VarField) keyword_rest
1852
1955
  # ) -> HshPtn
1853
1956
  def on_hshptn(constant, keywords, keyword_rest)
1957
+ keywords =
1958
+ (keywords || []).map do |(label, value)|
1959
+ if label.is_a?(Label)
1960
+ [label, value]
1961
+ else
1962
+ tstring_beg_index =
1963
+ tokens.rindex do |token|
1964
+ token.is_a?(TStringBeg) &&
1965
+ token.location.start_char < label.location.start_char
1966
+ end
1967
+
1968
+ tstring_beg = tokens.delete_at(tstring_beg_index)
1969
+
1970
+ label_end_index =
1971
+ tokens.rindex do |token|
1972
+ token.is_a?(LabelEnd) &&
1973
+ token.location.start_char == label.location.end_char
1974
+ end
1975
+
1976
+ label_end = tokens.delete_at(label_end_index)
1977
+
1978
+ [
1979
+ DynaSymbol.new(
1980
+ parts: label.parts,
1981
+ quote: label_end.value[0],
1982
+ location: tstring_beg.location.to(label_end.location)
1983
+ ),
1984
+ value
1985
+ ]
1986
+ end
1987
+ end
1988
+
1854
1989
  if keyword_rest
1855
1990
  # We're doing this to delete the token from the list so that it doesn't
1856
1991
  # confuse future patterns by thinking they have an extra ** on the end.
@@ -1863,7 +1998,7 @@ module SyntaxTree
1863
1998
  keyword_rest = VarField.new(value: nil, location: token.location)
1864
1999
  end
1865
2000
 
1866
- parts = [constant, *keywords&.flatten(1), keyword_rest].compact
2001
+ parts = [constant, *keywords.flatten(1), keyword_rest].compact
1867
2002
 
1868
2003
  # If there's no constant, there may be braces, so we're going to look for
1869
2004
  # those to get our bounds.
@@ -1880,7 +2015,7 @@ module SyntaxTree
1880
2015
 
1881
2016
  HshPtn.new(
1882
2017
  constant: constant,
1883
- keywords: keywords || [],
2018
+ keywords: keywords,
1884
2019
  keyword_rest: keyword_rest,
1885
2020
  location: parts[0].location.to(parts[-1].location)
1886
2021
  )
@@ -1911,8 +2046,15 @@ module SyntaxTree
1911
2046
  beginning = consume_keyword(:if)
1912
2047
  ending = consequent || consume_keyword(:end)
1913
2048
 
1914
- start_char = find_next_statement_start(predicate.location.end_char)
2049
+ if (keyword = find_keyword_between(:then, predicate, ending))
2050
+ tokens.delete(keyword)
2051
+ end
2052
+
2053
+ start_char =
2054
+ find_next_statement_start((keyword || predicate).location.end_char)
2055
+
1915
2056
  statements.bind(
2057
+ self,
1916
2058
  start_char,
1917
2059
  start_char - line_counts[predicate.location.end_line - 1].start,
1918
2060
  ending.location.start_char,
@@ -1946,7 +2088,7 @@ module SyntaxTree
1946
2088
  IfNode.new(
1947
2089
  predicate: predicate,
1948
2090
  statements:
1949
- Statements.new(self, body: [statement], location: statement.location),
2091
+ Statements.new(body: [statement], location: statement.location),
1950
2092
  consequent: nil,
1951
2093
  location: statement.location.to(predicate.location)
1952
2094
  )
@@ -1995,8 +2137,10 @@ module SyntaxTree
1995
2137
  statements_start = token
1996
2138
  end
1997
2139
 
1998
- start_char = find_next_statement_start(statements_start.location.end_char)
2140
+ start_char =
2141
+ find_next_statement_start((token || statements_start).location.end_char)
1999
2142
  statements.bind(
2143
+ self,
2000
2144
  start_char,
2001
2145
  start_char -
2002
2146
  line_counts[statements_start.location.start_line - 1].start,
@@ -2121,12 +2265,19 @@ module SyntaxTree
2121
2265
  token.location.start_char > beginning.location.start_char
2122
2266
  end
2123
2267
 
2268
+ if braces
2269
+ opening = consume_token(TLamBeg)
2270
+ closing = consume_token(RBrace)
2271
+ else
2272
+ opening = consume_keyword(:do)
2273
+ closing = consume_keyword(:end)
2274
+ end
2275
+
2124
2276
  # We need to do some special mapping here. Since ripper doesn't support
2125
- # capturing lambda var until 3.2, we need to normalize all of that here.
2277
+ # capturing lambda vars, we need to normalize all of that here.
2126
2278
  params =
2127
- case params
2128
- when Paren
2129
- # In this case we've gotten to the <3.2 parentheses wrapping a set of
2279
+ if params.is_a?(Paren)
2280
+ # In this case we've gotten to the parentheses wrapping a set of
2130
2281
  # parameters case. Here we need to manually scan for lambda locals.
2131
2282
  range = (params.location.start_char + 1)...params.location.end_char
2132
2283
  locals = lambda_locals(source[range])
@@ -2148,27 +2299,31 @@ module SyntaxTree
2148
2299
 
2149
2300
  node.comments.concat(params.comments)
2150
2301
  node
2151
- when Params
2152
- # In this case we've gotten to the <3.2 plain set of parameters. In
2153
- # this case there cannot be lambda locals, so we will wrap the
2154
- # parameters into a lambda var that has no locals.
2302
+ else
2303
+ # If there are no parameters, then we didn't have anything to base the
2304
+ # location information of off. Now that we have an opening of the
2305
+ # block, we can correct this.
2306
+ if params.empty?
2307
+ opening_location = opening.location
2308
+ location =
2309
+ Location.fixed(
2310
+ line: opening_location.start_line,
2311
+ char: opening_location.start_char,
2312
+ column: opening_location.start_column
2313
+ )
2314
+
2315
+ params = params.copy(location: location)
2316
+ end
2317
+
2318
+ # In this case we've gotten to the plain set of parameters. In this
2319
+ # case there cannot be lambda locals, so we will wrap the parameters
2320
+ # into a lambda var that has no locals.
2155
2321
  LambdaVar.new(params: params, locals: [], location: params.location)
2156
- when LambdaVar
2157
- # In this case we've gotten to 3.2+ lambda var. In this case we don't
2158
- # need to do anything and can just the value as given.
2159
- params
2160
2322
  end
2161
2323
 
2162
- if braces
2163
- opening = consume_token(TLamBeg)
2164
- closing = consume_token(RBrace)
2165
- else
2166
- opening = consume_keyword(:do)
2167
- closing = consume_keyword(:end)
2168
- end
2169
-
2170
2324
  start_char = find_next_statement_start(opening.location.end_char)
2171
2325
  statements.bind(
2326
+ self,
2172
2327
  start_char,
2173
2328
  start_char - line_counts[opening.location.end_line - 1].start,
2174
2329
  closing.location.start_char,
@@ -2353,23 +2508,30 @@ module SyntaxTree
2353
2508
 
2354
2509
  # :call-seq:
2355
2510
  # on_method_add_block: (
2356
- # (Call | Command | CommandCall) call,
2511
+ # (Break | Call | Command | CommandCall, Next) call,
2357
2512
  # Block block
2358
- # ) -> MethodAddBlock
2513
+ # ) -> Break | MethodAddBlock
2359
2514
  def on_method_add_block(call, block)
2360
2515
  location = call.location.to(block.location)
2361
2516
 
2362
2517
  case call
2518
+ when Break, Next, ReturnNode
2519
+ parts = call.arguments.parts
2520
+
2521
+ node = parts.pop
2522
+ copied =
2523
+ node.copy(block: block, location: node.location.to(block.location))
2524
+
2525
+ copied.comments.concat(call.comments)
2526
+ parts << copied
2527
+
2528
+ call.copy(location: location)
2363
2529
  when Command, CommandCall
2364
2530
  node = call.copy(block: block, location: location)
2365
2531
  node.comments.concat(call.comments)
2366
2532
  node
2367
2533
  else
2368
- MethodAddBlock.new(
2369
- call: call,
2370
- block: block,
2371
- location: call.location.to(block.location)
2372
- )
2534
+ MethodAddBlock.new(call: call, block: block, location: location)
2373
2535
  end
2374
2536
  end
2375
2537
 
@@ -2446,6 +2608,7 @@ module SyntaxTree
2446
2608
  start_char = find_next_statement_start(constant.location.end_char)
2447
2609
 
2448
2610
  bodystmt.bind(
2611
+ self,
2449
2612
  start_char,
2450
2613
  start_char - line_counts[constant.location.start_line - 1].start,
2451
2614
  ending.location.start_char,
@@ -2592,19 +2755,40 @@ module SyntaxTree
2592
2755
  # have a `nil` for the value instead of a `false`.
2593
2756
  keywords&.map! { |(key, value)| [key, value || nil] }
2594
2757
 
2595
- parts = [
2596
- *requireds,
2597
- *optionals&.flatten(1),
2598
- rest,
2599
- *posts,
2600
- *keywords&.flatten(1),
2601
- (keyword_rest if keyword_rest != :nil),
2602
- (block if block != :&)
2603
- ].compact
2758
+ # Here we're going to build up a list of all of the params so that we can
2759
+ # determine our location information.
2760
+ parts = []
2761
+
2762
+ requireds&.each { |required| parts << required.location }
2763
+ optionals&.each do |(key, value)|
2764
+ parts << key.location
2765
+ parts << value.location if value
2766
+ end
2767
+
2768
+ parts << rest.location if rest
2769
+ posts&.each { |post| parts << post.location }
2770
+
2771
+ keywords&.each do |(key, value)|
2772
+ parts << key.location
2773
+ parts << value.location if value
2774
+ end
2775
+
2776
+ if keyword_rest == :nil
2777
+ # When we get a :nil here, it means that we have **nil syntax, which
2778
+ # means this set of parameters accepts no more keyword arguments. In
2779
+ # this case we need to go and find the location of these two tokens.
2780
+ operator = consume_operator(:**)
2781
+ parts << operator.location.to(consume_keyword(:nil).location)
2782
+ elsif keyword_rest
2783
+ parts << keyword_rest.location
2784
+ end
2785
+
2786
+ parts << block.location if block && block != :&
2787
+ parts = parts.compact
2604
2788
 
2605
2789
  location =
2606
2790
  if parts.any?
2607
- parts[0].location.to(parts[-1].location)
2791
+ parts[0].to(parts[-1])
2608
2792
  else
2609
2793
  Location.fixed(line: lineno, char: char_pos, column: current_column)
2610
2794
  end
@@ -2701,7 +2885,7 @@ module SyntaxTree
2701
2885
  )
2702
2886
 
2703
2887
  statements.body << @__end__ if @__end__
2704
- statements.bind(0, 0, source.length, last_column)
2888
+ statements.bind(self, 0, 0, source.length, last_column)
2705
2889
 
2706
2890
  program = Program.new(statements: statements, location: location)
2707
2891
  attach_comments(program, @comments)
@@ -3033,8 +3217,9 @@ module SyntaxTree
3033
3217
  exceptions = exceptions[0] if exceptions.is_a?(Array)
3034
3218
 
3035
3219
  last_node = variable || exceptions || keyword
3036
- start_char = find_next_statement_start(last_node.location.end_char)
3220
+ start_char = find_next_statement_start(last_node.end_char)
3037
3221
  statements.bind(
3222
+ self,
3038
3223
  start_char,
3039
3224
  start_char - line_counts[last_node.location.start_line - 1].start,
3040
3225
  char_pos,
@@ -3055,7 +3240,7 @@ module SyntaxTree
3055
3240
  start_char: keyword.location.end_char + 1,
3056
3241
  start_column: keyword.location.end_column + 1,
3057
3242
  end_line: last_node.location.end_line,
3058
- end_char: last_node.location.end_char,
3243
+ end_char: last_node.end_char,
3059
3244
  end_column: last_node.location.end_column
3060
3245
  )
3061
3246
  )
@@ -3153,6 +3338,7 @@ module SyntaxTree
3153
3338
  start_char = find_next_statement_start(target.location.end_char)
3154
3339
 
3155
3340
  bodystmt.bind(
3341
+ self,
3156
3342
  start_char,
3157
3343
  start_char - line_counts[target.location.start_line - 1].start,
3158
3344
  ending.location.start_char,
@@ -3166,9 +3352,29 @@ module SyntaxTree
3166
3352
  )
3167
3353
  end
3168
3354
 
3169
- # def on_semicolon(value)
3170
- # value
3171
- # end
3355
+ # Semicolons are tokens that get added to the token list but never get
3356
+ # attached to the AST. Because of this they only need to track their
3357
+ # associated location so they can be used for computing bounds.
3358
+ class Semicolon
3359
+ attr_reader :location
3360
+
3361
+ def initialize(location)
3362
+ @location = location
3363
+ end
3364
+ end
3365
+
3366
+ # :call-seq:
3367
+ # on_semicolon: (String value) -> Semicolon
3368
+ def on_semicolon(value)
3369
+ tokens << Semicolon.new(
3370
+ Location.token(
3371
+ line: lineno,
3372
+ char: char_pos,
3373
+ column: current_column,
3374
+ size: value.size
3375
+ )
3376
+ )
3377
+ end
3172
3378
 
3173
3379
  # def on_sp(value)
3174
3380
  # value
@@ -3186,18 +3392,13 @@ module SyntaxTree
3186
3392
  statements.location.to(statement.location)
3187
3393
  end
3188
3394
 
3189
- Statements.new(
3190
- self,
3191
- body: statements.body << statement,
3192
- location: location
3193
- )
3395
+ Statements.new(body: statements.body << statement, location: location)
3194
3396
  end
3195
3397
 
3196
3398
  # :call-seq:
3197
3399
  # on_stmts_new: () -> Statements
3198
3400
  def on_stmts_new
3199
3401
  Statements.new(
3200
- self,
3201
3402
  body: [],
3202
3403
  location:
3203
3404
  Location.fixed(line: lineno, char: char_pos, column: current_column)
@@ -3262,6 +3463,7 @@ module SyntaxTree
3262
3463
  embexpr_end = consume_token(EmbExprEnd)
3263
3464
 
3264
3465
  statements.bind(
3466
+ self,
3265
3467
  embexpr_beg.location.end_char,
3266
3468
  embexpr_beg.location.end_column,
3267
3469
  embexpr_end.location.start_char,
@@ -3605,8 +3807,15 @@ module SyntaxTree
3605
3807
  beginning = consume_keyword(:unless)
3606
3808
  ending = consequent || consume_keyword(:end)
3607
3809
 
3608
- start_char = find_next_statement_start(predicate.location.end_char)
3810
+ if (keyword = find_keyword_between(:then, predicate, ending))
3811
+ tokens.delete(keyword)
3812
+ end
3813
+
3814
+ start_char =
3815
+ find_next_statement_start((keyword || predicate).location.end_char)
3816
+
3609
3817
  statements.bind(
3818
+ self,
3610
3819
  start_char,
3611
3820
  start_char - line_counts[predicate.location.end_line - 1].start,
3612
3821
  ending.location.start_char,
@@ -3629,7 +3838,7 @@ module SyntaxTree
3629
3838
  UnlessNode.new(
3630
3839
  predicate: predicate,
3631
3840
  statements:
3632
- Statements.new(self, body: [statement], location: statement.location),
3841
+ Statements.new(body: [statement], location: statement.location),
3633
3842
  consequent: nil,
3634
3843
  location: statement.location.to(predicate.location)
3635
3844
  )
@@ -3641,17 +3850,18 @@ module SyntaxTree
3641
3850
  beginning = consume_keyword(:until)
3642
3851
  ending = consume_keyword(:end)
3643
3852
 
3644
- # Consume the do keyword if it exists so that it doesn't get confused for
3645
- # some other block
3646
- keyword = find_keyword(:do)
3647
- if keyword && keyword.location.start_char > predicate.location.end_char &&
3648
- keyword.location.end_char < ending.location.start_char
3649
- tokens.delete(keyword)
3650
- end
3853
+ delimiter =
3854
+ find_keyword_between(:do, predicate, statements) ||
3855
+ find_token_between(Semicolon, predicate, statements)
3856
+
3857
+ tokens.delete(delimiter) if delimiter
3651
3858
 
3652
3859
  # Update the Statements location information
3653
- start_char = find_next_statement_start(predicate.location.end_char)
3860
+ start_char =
3861
+ find_next_statement_start((delimiter || predicate).location.end_char)
3862
+
3654
3863
  statements.bind(
3864
+ self,
3655
3865
  start_char,
3656
3866
  start_char - line_counts[predicate.location.end_line - 1].start,
3657
3867
  ending.location.start_char,
@@ -3673,7 +3883,7 @@ module SyntaxTree
3673
3883
  UntilNode.new(
3674
3884
  predicate: predicate,
3675
3885
  statements:
3676
- Statements.new(self, body: [statement], location: statement.location),
3886
+ Statements.new(body: [statement], location: statement.location),
3677
3887
  location: statement.location.to(predicate.location)
3678
3888
  )
3679
3889
  end
@@ -3744,9 +3954,11 @@ module SyntaxTree
3744
3954
  statements_start = token
3745
3955
  end
3746
3956
 
3747
- start_char = find_next_statement_start(statements_start.location.end_char)
3957
+ start_char =
3958
+ find_next_statement_start((token || statements_start).location.end_char)
3748
3959
 
3749
3960
  statements.bind(
3961
+ self,
3750
3962
  start_char,
3751
3963
  start_char -
3752
3964
  line_counts[statements_start.location.start_line - 1].start,
@@ -3768,17 +3980,18 @@ module SyntaxTree
3768
3980
  beginning = consume_keyword(:while)
3769
3981
  ending = consume_keyword(:end)
3770
3982
 
3771
- # Consume the do keyword if it exists so that it doesn't get confused for
3772
- # some other block
3773
- keyword = find_keyword(:do)
3774
- if keyword && keyword.location.start_char > predicate.location.end_char &&
3775
- keyword.location.end_char < ending.location.start_char
3776
- tokens.delete(keyword)
3777
- end
3983
+ delimiter =
3984
+ find_keyword_between(:do, predicate, statements) ||
3985
+ find_token_between(Semicolon, predicate, statements)
3986
+
3987
+ tokens.delete(delimiter) if delimiter
3778
3988
 
3779
3989
  # Update the Statements location information
3780
- start_char = find_next_statement_start(predicate.location.end_char)
3990
+ start_char =
3991
+ find_next_statement_start((delimiter || predicate).location.end_char)
3992
+
3781
3993
  statements.bind(
3994
+ self,
3782
3995
  start_char,
3783
3996
  start_char - line_counts[predicate.location.end_line - 1].start,
3784
3997
  ending.location.start_char,
@@ -3800,7 +4013,7 @@ module SyntaxTree
3800
4013
  WhileNode.new(
3801
4014
  predicate: predicate,
3802
4015
  statements:
3803
- Statements.new(self, body: [statement], location: statement.location),
4016
+ Statements.new(body: [statement], location: statement.location),
3804
4017
  location: statement.location.to(predicate.location)
3805
4018
  )
3806
4019
  end