prism 0.29.0 → 0.30.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +1 -0
  5. data/config.yml +66 -9
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/ripper_translation.md +22 -0
  8. data/ext/prism/api_node.c +30 -12
  9. data/ext/prism/extension.c +107 -372
  10. data/ext/prism/extension.h +1 -1
  11. data/include/prism/ast.h +138 -70
  12. data/include/prism/diagnostic.h +7 -2
  13. data/include/prism/node.h +0 -21
  14. data/include/prism/parser.h +23 -25
  15. data/include/prism/regexp.h +17 -8
  16. data/include/prism/static_literals.h +3 -2
  17. data/include/prism/util/pm_char.h +1 -2
  18. data/include/prism/util/pm_constant_pool.h +0 -8
  19. data/include/prism/util/pm_integer.h +16 -9
  20. data/include/prism/util/pm_string.h +0 -8
  21. data/include/prism/version.h +2 -2
  22. data/include/prism.h +0 -11
  23. data/lib/prism/compiler.rb +3 -0
  24. data/lib/prism/dispatcher.rb +14 -0
  25. data/lib/prism/dot_visitor.rb +22 -3
  26. data/lib/prism/dsl.rb +7 -2
  27. data/lib/prism/ffi.rb +24 -3
  28. data/lib/prism/inspect_visitor.rb +10 -8
  29. data/lib/prism/mutation_compiler.rb +6 -1
  30. data/lib/prism/node.rb +166 -241
  31. data/lib/prism/node_ext.rb +21 -5
  32. data/lib/prism/parse_result/comments.rb +0 -7
  33. data/lib/prism/parse_result/newlines.rb +101 -11
  34. data/lib/prism/parse_result.rb +17 -0
  35. data/lib/prism/reflection.rb +3 -1
  36. data/lib/prism/serialize.rb +80 -67
  37. data/lib/prism/translation/parser/compiler.rb +134 -114
  38. data/lib/prism/translation/parser.rb +6 -1
  39. data/lib/prism/translation/ripper.rb +8 -6
  40. data/lib/prism/translation/ruby_parser.rb +23 -5
  41. data/lib/prism/visitor.rb +3 -0
  42. data/lib/prism.rb +0 -4
  43. data/prism.gemspec +1 -4
  44. data/rbi/prism/node.rbi +63 -6
  45. data/rbi/prism/visitor.rbi +3 -0
  46. data/rbi/prism.rbi +6 -0
  47. data/sig/prism/dsl.rbs +4 -1
  48. data/sig/prism/mutation_compiler.rbs +1 -0
  49. data/sig/prism/node.rbs +28 -4
  50. data/sig/prism/visitor.rbs +1 -0
  51. data/sig/prism.rbs +21 -0
  52. data/src/diagnostic.c +27 -17
  53. data/src/node.c +408 -1666
  54. data/src/prettyprint.c +49 -6
  55. data/src/prism.c +958 -991
  56. data/src/regexp.c +133 -68
  57. data/src/serialize.c +6 -1
  58. data/src/static_literals.c +63 -84
  59. data/src/token_type.c +2 -2
  60. data/src/util/pm_constant_pool.c +0 -8
  61. data/src/util/pm_integer.c +39 -11
  62. data/src/util/pm_string.c +0 -12
  63. data/src/util/pm_strpbrk.c +32 -6
  64. metadata +2 -5
  65. data/include/prism/util/pm_string_list.h +0 -44
  66. data/lib/prism/debug.rb +0 -249
  67. data/src/util/pm_string_list.c +0 -28
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ac8167684c0f975fdba449e77206d97756f6c39f22ce871f79a79a61260503f
4
- data.tar.gz: 6a2dd5c0a47df7c8c575ad2e5b344c789d548e93cfe82e5fa29974d46a52bb3c
3
+ metadata.gz: 46cabe6b76e675b905cb9b2941faf00d35c0c5043e480c094952a89404e87587
4
+ data.tar.gz: 50a764fb701657e5936b764a7438f49ab074f45d5bee3e435bf70c9b891067be
5
5
  SHA512:
6
- metadata.gz: dfa7fe63285b85cb45aa0be681916d6891ee5e5adf2162ab5c18423417f7938afc99f00eca8fd6187fbd0b7168af088d932fbda3c1361d1c73953f39329bed70
7
- data.tar.gz: 79bc51db60600d74a6bfbeae5eba3a8ec505e9809d0f7ff0b05563f1f7a8cd284cc293233397323a443217a4a5271f90b4c40516168ae3a8be0ddc2d7376cef9
6
+ metadata.gz: 58535a5049cf8d6b8ceb97721135f58ca2fa4e7311cce8c336b6d4d5674de38861a07728651859ffe926bf84db8c14454b543d5d26732174de4c67a2be7cb46b
7
+ data.tar.gz: 16ae75556351bd4c96a54dbd930e466c40695419c29ba9cf5b048dc1173fc397094974bea313258e7615a27f67c9b55b00d72dcee33ddacc1613169005b0e184
data/CHANGELOG.md CHANGED
@@ -6,6 +6,26 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.30.0] - 2024-06-07
10
+
11
+ ### Added
12
+
13
+ - More correctly raise mixed encoding errors.
14
+ - Implement ambiguous binary operator warning.
15
+ - Fix up regexp escapes with control and meta characters.
16
+ - Fix up support for the `it` implicit local variable.
17
+ - Heredoc identifiers now properly disallow CLRF.
18
+ - Errors added for void value expressions in begin clauses.
19
+ - Many updates to more closely match the `parser` gem in parser translation.
20
+ - Many errors added for invalid regular expressions.
21
+
22
+ ### Changed
23
+
24
+ - Handle parser translation missing the `parser` gem.
25
+ - Handle ruby_parser translation missing the `ruby_parser` gem.
26
+ - Various error messages have been updated to more closely match CRuby.
27
+ - `RationalNode` now has a `numerator` and `denominator` field instead of a `numeric` field. For the Ruby API we provide a `RationalNode#numeric` method for backwards-compatibility.
28
+
9
29
  ## [0.29.0] - 2024-05-10
10
30
 
11
31
  ### Added
@@ -518,7 +538,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
518
538
 
519
539
  - 🎉 Initial release! 🎉
520
540
 
521
- [unreleased]: https://github.com/ruby/prism/compare/v0.29.0...HEAD
541
+ [unreleased]: https://github.com/ruby/prism/compare/v0.30.0...HEAD
542
+ [0.30.0]: https://github.com/ruby/prism/compare/v0.29.0...v0.30.0
522
543
  [0.29.0]: https://github.com/ruby/prism/compare/v0.28.0...v0.29.0
523
544
  [0.28.0]: https://github.com/ruby/prism/compare/v0.27.0...v0.28.0
524
545
  [0.27.0]: https://github.com/ruby/prism/compare/v0.26.0...v0.27.0
data/CONTRIBUTING.md CHANGED
@@ -10,10 +10,6 @@ The discussions page on the GitHub repository are open. If you have a question o
10
10
 
11
11
  If you want to contribute code, please first open or contribute to a discussion. A lot of the project is in flux, and we want to make sure that you are contributing to the right place. Once you have a discussion going, you can open a pull request with your changes. We will review your code and get it merged in.
12
12
 
13
- ### Ruby Features
14
-
15
- Pattern matching and endless method definitions should be avoided as long as the latest TruffleRuby release does not support it.
16
-
17
13
  ## Tests
18
14
 
19
15
  We could always use more tests! One of the biggest challenges of this project is building up a big test suite. If you want to contribute tests, feel free to open a pull request. These will get merged in as soon as possible.
data/README.md CHANGED
@@ -40,6 +40,7 @@ The repository contains the infrastructure for both a shared library (libprism)
40
40
  ├── rust
41
41
  │   ├── ruby-prism Rustified crate for the shared library
42
42
  │   └── ruby-prism-sys FFI binding for Rust
43
+ ├── sample Sample code that uses the Ruby API for documentation
43
44
  ├── sig RBS type signatures for the Ruby library
44
45
  ├── src
45
46
  │   ├── util various utility files
data/config.yml CHANGED
@@ -114,6 +114,7 @@ errors:
114
114
  - EXPRESSION_NOT_WRITABLE_FILE
115
115
  - EXPRESSION_NOT_WRITABLE_LINE
116
116
  - EXPRESSION_NOT_WRITABLE_NIL
117
+ - EXPRESSION_NOT_WRITABLE_NUMBERED
117
118
  - EXPRESSION_NOT_WRITABLE_SELF
118
119
  - EXPRESSION_NOT_WRITABLE_TRUE
119
120
  - FLOAT_PARSE
@@ -153,6 +154,7 @@ errors:
153
154
  - INVALID_NUMBER_UNDERSCORE_INNER
154
155
  - INVALID_NUMBER_UNDERSCORE_TRAILING
155
156
  - INVALID_PERCENT
157
+ - INVALID_PERCENT_EOF
156
158
  - INVALID_PRINTABLE_CHARACTER
157
159
  - INVALID_RETRY_AFTER_ELSE
158
160
  - INVALID_RETRY_AFTER_ENSURE
@@ -184,9 +186,10 @@ errors:
184
186
  - NO_LOCAL_VARIABLE
185
187
  - NOT_EXPRESSION
186
188
  - NUMBER_LITERAL_UNDERSCORE
189
+ - NUMBERED_PARAMETER_INNER_BLOCK
187
190
  - NUMBERED_PARAMETER_IT
188
191
  - NUMBERED_PARAMETER_ORDINARY
189
- - NUMBERED_PARAMETER_OUTER_SCOPE
192
+ - NUMBERED_PARAMETER_OUTER_BLOCK
190
193
  - OPERATOR_MULTI_ASSIGN
191
194
  - OPERATOR_WRITE_ARGUMENTS
192
195
  - OPERATOR_WRITE_BLOCK
@@ -203,8 +206,8 @@ errors:
203
206
  - PARAMETER_SPLAT_MULTI
204
207
  - PARAMETER_STAR
205
208
  - PARAMETER_UNEXPECTED_FWD
206
- - PARAMETER_WILD_LOOSE_COMMA
207
209
  - PARAMETER_UNEXPECTED_NO_KW
210
+ - PARAMETER_WILD_LOOSE_COMMA
208
211
  - PATTERN_CAPTURE_DUPLICATE
209
212
  - PATTERN_EXPRESSION_AFTER_BRACKET
210
213
  - PATTERN_EXPRESSION_AFTER_COMMA
@@ -233,6 +236,7 @@ errors:
233
236
  - REGEXP_INCOMPAT_CHAR_ENCODING
234
237
  - REGEXP_INVALID_UNICODE_RANGE
235
238
  - REGEXP_NON_ESCAPED_MBC
239
+ - REGEXP_PARSE_ERROR
236
240
  - REGEXP_TERM
237
241
  - REGEXP_UNKNOWN_OPTIONS
238
242
  - REGEXP_UTF8_CHAR_NON_UTF8_REGEXP
@@ -273,6 +277,7 @@ errors:
273
277
  - WRITE_TARGET_UNEXPECTED
274
278
  - XSTRING_TERM
275
279
  warnings:
280
+ - AMBIGUOUS_BINARY_OPERATOR
276
281
  - AMBIGUOUS_FIRST_ARGUMENT_MINUS
277
282
  - AMBIGUOUS_FIRST_ARGUMENT_PLUS
278
283
  - AMBIGUOUS_PREFIX_AMPERSAND
@@ -1876,19 +1881,56 @@ nodes:
1876
1881
  fields:
1877
1882
  - name: index
1878
1883
  type: node
1884
+ comment: |
1885
+ The index expression for `for` loops.
1886
+
1887
+ for i in a end
1888
+ ^
1879
1889
  - name: collection
1880
1890
  type: node
1891
+ comment: |
1892
+ The collection to iterate over.
1893
+
1894
+ for i in a end
1895
+ ^
1881
1896
  - name: statements
1882
1897
  type: node?
1883
1898
  kind: StatementsNode
1899
+ comment: |
1900
+ Represents the body of statements to execute for each iteration of the loop.
1901
+
1902
+ for i in a
1903
+ foo(i)
1904
+ ^^^^^^
1905
+ end
1884
1906
  - name: for_keyword_loc
1885
1907
  type: location
1908
+ comment: |
1909
+ The location of the `for` keyword.
1910
+
1911
+ for i in a end
1912
+ ^^^
1886
1913
  - name: in_keyword_loc
1887
1914
  type: location
1915
+ comment: |
1916
+ The location of the `in` keyword.
1917
+
1918
+ for i in a end
1919
+ ^^
1888
1920
  - name: do_keyword_loc
1889
1921
  type: location?
1922
+ comment: |
1923
+ The location of the `do` keyword, if present.
1924
+
1925
+ for i in a do end
1926
+ ^^
1890
1927
  - name: end_keyword_loc
1891
1928
  type: location
1929
+ comment: |
1930
+ The location of the `end` keyword.
1931
+
1932
+ for i in a end
1933
+ ^^^
1892
1934
  comment: |
1893
1935
  Represents the use of the `for` keyword.
1894
1936
 
@@ -2566,6 +2608,12 @@ nodes:
2566
2608
 
2567
2609
  `foo #{bar} baz`
2568
2610
  ^^^^^^^^^^^^^^^^
2611
+ - name: ItLocalVariableReadNode
2612
+ comment: |
2613
+ Represents reading from the implicit `it` local variable.
2614
+
2615
+ -> { it }
2616
+ ^^
2569
2617
  - name: ItParametersNode
2570
2618
  comment: |
2571
2619
  Represents an implicit set of parameters through the use of the `it` keyword within a block or lambda.
@@ -2691,10 +2739,6 @@ nodes:
2691
2739
 
2692
2740
  _1 # name `:_1`
2693
2741
 
2694
- Finally, for the default `it` block parameter, the name is `0it`. This is to distinguish it from an `it` local variable that is explicitly declared.
2695
-
2696
- it # name `:0it`
2697
-
2698
2742
  - name: depth
2699
2743
  type: uint32
2700
2744
  comment: |
@@ -3227,8 +3271,21 @@ nodes:
3227
3271
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3228
3272
  - name: RationalNode
3229
3273
  fields:
3230
- - name: numeric
3231
- type: node
3274
+ - name: flags
3275
+ type: flags
3276
+ kind: IntegerBaseFlags
3277
+ - name: numerator
3278
+ type: integer
3279
+ comment: |
3280
+ The numerator of the rational number.
3281
+
3282
+ 1.5r # numerator 3
3283
+ - name: denominator
3284
+ type: integer
3285
+ comment: |
3286
+ The denominator of the rational number.
3287
+
3288
+ 1.5r # denominator 2
3232
3289
  comment: |
3233
3290
  Represents a rational number literal.
3234
3291
 
@@ -3567,7 +3624,7 @@ nodes:
3567
3624
  ^^^^
3568
3625
  - name: then_keyword_loc
3569
3626
  type: location?
3570
- comment:
3627
+ comment: |
3571
3628
  The location of the `then` keyword, if present.
3572
3629
 
3573
3630
  unless cond then bar end
data/docs/fuzzing.md CHANGED
@@ -26,7 +26,7 @@ fuzz
26
26
  There are currently three fuzzing targets
27
27
 
28
28
  - `pm_serialize_parse` (parse)
29
- - `pm_regexp_named_capture_group_names` (regexp)
29
+ - `pm_regexp_parse` (regexp)
30
30
 
31
31
  Respectively, fuzzing can be performed with
32
32
 
@@ -48,3 +48,25 @@ ArithmeticRipper.new("1 + 2 - 3").parse # => [0]
48
48
  ```
49
49
 
50
50
  The exact names of the `on_*` methods are listed in the `Ripper` source.
51
+
52
+ ## Background
53
+
54
+ It is helpful to understand the differences between the `Ripper` library and the `Prism` library. Both libraries perform parsing and provide you with APIs to manipulate and understand the resulting syntax tree. However, there are a few key differences.
55
+
56
+ ### Design
57
+
58
+ `Ripper` is a streaming parser. This means as it is parsing Ruby code, it dispatches events back to the consumer. This allows quite a bit of flexibility. You can use it to build your own syntax tree or to find specific patterns in the code. `Prism` on the other hand returns to your the completed syntax tree _before_ it allows you to manipulate it. This means the tree that you get back is the only representation that can be generated by the parser _at parse time_ (but of course can be manipulated later).
59
+
60
+ ### Fields
61
+
62
+ We use the term "field" to mean a piece of information on a syntax tree node. `Ripper` provides the minimal number of fields to accurately represent the syntax tree for the purposes of compilation/interpretation. For example, in the callbacks for nodes that are based on keywords (`class`, `module`, `for`, `while`, etc.) you are not given the keyword itself, you need to attach it on your own. In other cases, tokens are not necessarily dispatched at all, meaning you need to find them yourself. `Prism` provides the opposite: the maximum number of fields on nodes is provided. As a tradeoff, this requires more memory, but this is chosen to make it easier on consumers.
63
+
64
+ ### Maintainability
65
+
66
+ The `Ripper` interface is not guaranteed in any way, and tends to change between patch versions of CRuby. This is largely due to the fact that `Ripper` is a by-product of the generated parser, as opposed to its own parser. As an example, in the expression `foo::bar = baz`, there are three different represents possible for the call operator, including:
67
+
68
+ * `:"::"` - Ruby 1.9 to Ruby 3.1.4
69
+ * `73` - Ruby 3.1.5 to Ruby 3.1.6
70
+ * `[:@op, "::", [lineno, column]]` - Ruby 3.2.0 and later
71
+
72
+ The `Prism` interface is guaranteed going forward to be the consistent, and the official Ruby syntax tree interface. This means you can rely on this interface without having to worry about individual changes between Ruby versions. It also is a gem, which means it is versioned based on the gem version, as opposed to being versioned based on the Ruby version. Finally, you can use `Prism` to parse multiple versions of Ruby, whereas `Ripper` is tied to the Ruby version it is running on.
data/ext/prism/api_node.c CHANGED
@@ -102,6 +102,7 @@ static VALUE rb_cPrismInterpolatedRegularExpressionNode;
102
102
  static VALUE rb_cPrismInterpolatedStringNode;
103
103
  static VALUE rb_cPrismInterpolatedSymbolNode;
104
104
  static VALUE rb_cPrismInterpolatedXStringNode;
105
+ static VALUE rb_cPrismItLocalVariableReadNode;
105
106
  static VALUE rb_cPrismItParametersNode;
106
107
  static VALUE rb_cPrismKeywordHashNode;
107
108
  static VALUE rb_cPrismKeywordRestParameterNode;
@@ -980,12 +981,6 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
980
981
  pm_node_stack_push(&node_stack, (pm_node_t *) cast->right);
981
982
  break;
982
983
  }
983
- #line 146 "api_node.c.erb"
984
- case PM_RATIONAL_NODE: {
985
- pm_rational_node_t *cast = (pm_rational_node_t *) node;
986
- pm_node_stack_push(&node_stack, (pm_node_t *) cast->numeric);
987
- break;
988
- }
989
984
  #line 146 "api_node.c.erb"
990
985
  case PM_RESCUE_MODIFIER_NODE: {
991
986
  pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
@@ -3733,6 +3728,19 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
3733
3728
  rb_ary_push(value_stack, rb_class_new_instance(5, argv, rb_cPrismInterpolatedXStringNode));
3734
3729
  break;
3735
3730
  }
3731
+ #line 172 "api_node.c.erb"
3732
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
3733
+ VALUE argv[2];
3734
+
3735
+ // source
3736
+ argv[0] = source;
3737
+
3738
+ // location
3739
+ argv[1] = pm_location_new(parser, node->location.start, node->location.end);
3740
+
3741
+ rb_ary_push(value_stack, rb_class_new_instance(2, argv, rb_cPrismItLocalVariableReadNode));
3742
+ break;
3743
+ }
3736
3744
  #line 172 "api_node.c.erb"
3737
3745
  case PM_IT_PARAMETERS_NODE: {
3738
3746
  VALUE argv[2];
@@ -4723,19 +4731,28 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
4723
4731
  }
4724
4732
  #line 172 "api_node.c.erb"
4725
4733
  case PM_RATIONAL_NODE: {
4726
- VALUE argv[3];
4734
+ pm_rational_node_t *cast = (pm_rational_node_t *) node;
4735
+ VALUE argv[5];
4727
4736
 
4728
4737
  // source
4729
4738
  argv[0] = source;
4730
4739
 
4731
- // numeric
4732
- #line 186 "api_node.c.erb"
4733
- argv[1] = rb_ary_pop(value_stack);
4740
+ // flags
4741
+ #line 223 "api_node.c.erb"
4742
+ argv[1] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
4743
+
4744
+ // numerator
4745
+ #line 226 "api_node.c.erb"
4746
+ argv[2] = pm_integer_new(&cast->numerator);
4747
+
4748
+ // denominator
4749
+ #line 226 "api_node.c.erb"
4750
+ argv[3] = pm_integer_new(&cast->denominator);
4734
4751
 
4735
4752
  // location
4736
- argv[2] = pm_location_new(parser, node->location.start, node->location.end);
4753
+ argv[4] = pm_location_new(parser, node->location.start, node->location.end);
4737
4754
 
4738
- rb_ary_push(value_stack, rb_class_new_instance(3, argv, rb_cPrismRationalNode));
4755
+ rb_ary_push(value_stack, rb_class_new_instance(5, argv, rb_cPrismRationalNode));
4739
4756
  break;
4740
4757
  }
4741
4758
  #line 172 "api_node.c.erb"
@@ -5578,6 +5595,7 @@ Init_prism_api_node(void) {
5578
5595
  rb_cPrismInterpolatedStringNode = rb_define_class_under(rb_cPrism, "InterpolatedStringNode", rb_cPrismNode);
5579
5596
  rb_cPrismInterpolatedSymbolNode = rb_define_class_under(rb_cPrism, "InterpolatedSymbolNode", rb_cPrismNode);
5580
5597
  rb_cPrismInterpolatedXStringNode = rb_define_class_under(rb_cPrism, "InterpolatedXStringNode", rb_cPrismNode);
5598
+ rb_cPrismItLocalVariableReadNode = rb_define_class_under(rb_cPrism, "ItLocalVariableReadNode", rb_cPrismNode);
5581
5599
  rb_cPrismItParametersNode = rb_define_class_under(rb_cPrism, "ItParametersNode", rb_cPrismNode);
5582
5600
  rb_cPrismKeywordHashNode = rb_define_class_under(rb_cPrism, "KeywordHashNode", rb_cPrismNode);
5583
5601
  rb_cPrismKeywordRestParameterNode = rb_define_class_under(rb_cPrism, "KeywordRestParameterNode", rb_cPrismNode);