prism 0.24.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +132 -1
  4. data/Makefile +25 -18
  5. data/README.md +45 -6
  6. data/config.yml +828 -25
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +4 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +7 -9
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +1037 -936
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +62 -18
  18. data/ext/prism/extension.c +351 -71
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +539 -101
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +168 -74
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +84 -9
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +213 -54
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +120 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +8 -0
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +5 -3
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +141 -54
  43. data/lib/prism/dsl.rb +48 -36
  44. data/lib/prism/ffi.rb +82 -17
  45. data/lib/prism/inspect_visitor.rb +2156 -0
  46. data/lib/prism/lex_compat.rb +34 -15
  47. data/lib/prism/mutation_compiler.rb +13 -2
  48. data/lib/prism/node.rb +4453 -4459
  49. data/lib/prism/node_ext.rb +249 -30
  50. data/lib/prism/pack.rb +4 -0
  51. data/lib/prism/parse_result/comments.rb +35 -18
  52. data/lib/prism/parse_result/newlines.rb +2 -2
  53. data/lib/prism/parse_result.rb +218 -43
  54. data/lib/prism/pattern.rb +28 -10
  55. data/lib/prism/polyfill/byteindex.rb +13 -0
  56. data/lib/prism/polyfill/unpack1.rb +14 -0
  57. data/lib/prism/reflection.rb +411 -0
  58. data/lib/prism/serialize.rb +480 -112
  59. data/lib/prism/translation/parser/compiler.rb +376 -88
  60. data/lib/prism/translation/parser/lexer.rb +103 -22
  61. data/lib/prism/translation/parser/rubocop.rb +41 -13
  62. data/lib/prism/translation/parser.rb +123 -11
  63. data/lib/prism/translation/parser33.rb +1 -1
  64. data/lib/prism/translation/parser34.rb +1 -1
  65. data/lib/prism/translation/ripper/sexp.rb +125 -0
  66. data/lib/prism/translation/ripper/shim.rb +5 -0
  67. data/lib/prism/translation/ripper.rb +3216 -462
  68. data/lib/prism/translation/ruby_parser.rb +111 -56
  69. data/lib/prism/translation.rb +3 -1
  70. data/lib/prism/visitor.rb +10 -0
  71. data/lib/prism.rb +12 -20
  72. data/prism.gemspec +46 -14
  73. data/rbi/prism/compiler.rbi +12 -0
  74. data/rbi/prism/inspect_visitor.rbi +12 -0
  75. data/rbi/prism/node.rbi +8712 -0
  76. data/rbi/prism/node_ext.rbi +107 -0
  77. data/rbi/prism/parse_result.rbi +358 -0
  78. data/rbi/prism/reflection.rbi +58 -0
  79. data/rbi/prism/translation/parser.rbi +11 -0
  80. data/rbi/prism/translation/parser33.rbi +6 -0
  81. data/rbi/prism/translation/parser34.rbi +6 -0
  82. data/rbi/prism/translation/ripper.rbi +15 -0
  83. data/rbi/prism/visitor.rbi +470 -0
  84. data/rbi/prism.rbi +38 -7748
  85. data/sig/prism/compiler.rbs +9 -0
  86. data/sig/prism/dispatcher.rbs +16 -0
  87. data/sig/prism/dot_visitor.rbs +6 -0
  88. data/sig/prism/dsl.rbs +462 -0
  89. data/sig/prism/inspect_visitor.rbs +22 -0
  90. data/sig/prism/lex_compat.rbs +10 -0
  91. data/sig/prism/mutation_compiler.rbs +158 -0
  92. data/sig/prism/node.rbs +3558 -0
  93. data/sig/prism/node_ext.rbs +82 -0
  94. data/sig/prism/pack.rbs +43 -0
  95. data/sig/prism/parse_result.rbs +160 -0
  96. data/sig/prism/pattern.rbs +13 -0
  97. data/sig/prism/reflection.rbs +50 -0
  98. data/sig/prism/serialize.rbs +6 -0
  99. data/sig/prism/visitor.rbs +168 -0
  100. data/sig/prism.rbs +188 -4767
  101. data/src/diagnostic.c +636 -230
  102. data/src/encoding.c +211 -108
  103. data/src/node.c +7555 -451
  104. data/src/options.c +66 -31
  105. data/src/pack.c +33 -17
  106. data/src/prettyprint.c +1383 -1431
  107. data/src/prism.c +4734 -1310
  108. data/src/regexp.c +17 -2
  109. data/src/serialize.c +68 -46
  110. data/src/static_literals.c +638 -0
  111. data/src/token_type.c +10 -9
  112. data/src/util/pm_buffer.c +147 -20
  113. data/src/util/pm_char.c +4 -4
  114. data/src/util/pm_constant_pool.c +35 -11
  115. data/src/util/pm_integer.c +642 -0
  116. data/src/util/pm_list.c +1 -1
  117. data/src/util/pm_newline_list.c +14 -5
  118. data/src/util/pm_string.c +134 -5
  119. data/src/util/pm_string_list.c +2 -2
  120. metadata +41 -9
  121. data/docs/ripper.md +0 -36
  122. data/include/prism/util/pm_state_stack.h +0 -42
  123. data/lib/prism/node_inspector.rb +0 -68
  124. data/rbi/prism_static.rbi +0 -207
  125. data/sig/prism_static.rbs +0 -201
  126. data/src/util/pm_state_stack.c +0 -25
data/docs/build_system.md CHANGED
@@ -73,6 +73,25 @@ and links to `libprism.a` (to avoid exporting symbols, so no conflict when insta
73
73
 
74
74
  TODO, similar to TruffleRuby.
75
75
 
76
+ ### Building prism for embedded system
77
+
78
+ For instance, you can build a static library `libprism.a` targeting the Arm Cortex-M0+ embedded system by the commands below:
79
+
80
+ * `templates/template.rb`
81
+ * `CFLAGS="-mcpu=cortex-m0plus" make static CC=arm-none-eabi-gcc`
82
+
83
+ The build process internally looks up `_POSIX_MAPPED_FILES` and `_WIN32` macros to determine whether the functions of the memory map are available on the target platform.
84
+
85
+ ### Building prism with custom memory allocator
86
+
87
+ If you need to use memory allocation functions implemented outside of the standard library, follow these steps:
88
+
89
+ * Add `-D PRISM_XALLOCATOR` to the build options
90
+ * Additionally, include `-I [path/to/custom_allocator]` where your `prism_xallocator.h` is located
91
+ * Link the implementation of `prism_xallocator.c` that contains functions declared in `prism_xallocator.h`
92
+
93
+ For further clarity, refer to `include/prism/defines.h`.
94
+
76
95
  ### Building prism from source as a C library
77
96
 
78
97
  All of the source files match `src/**/*.c` and all of the headers match `include/**/*.h`.
@@ -89,3 +108,15 @@ If you want to build prism as a shared library and link against it, you should c
89
108
  ```
90
109
  MAKEFLAGS="-j10" bundle exec rake compile
91
110
  ```
111
+
112
+ ## Build options
113
+
114
+ * `PRISM_BUILD_DEBUG` - Will cause all file reading to copy into its own allocation to allow easier tracking of reading off the end of the buffer. By default this is off.
115
+ * `PRISM_BUILD_MINIMAL` - Define all of the `PRISM_EXCLUDE_*` flags at once.
116
+ * `PRISM_ENCODING_EXCLUDE_FULL` - Will cause the library to exclude the full encoding API, and only include the minimal number of encodings to support parsing Ruby code without encoding comments. By default this is off.
117
+ * `PRISM_EXPORT_SYMBOLS` - Will cause the shared library to export symbols. By default this is off.
118
+ * `PRISM_EXCLUDE_JSON` - Will cause the library to exclude the JSON API. By default this is off.
119
+ * `PRISM_EXCLUDE_PACK` - Will cause the library to exclude the pack API. By default this is off.
120
+ * `PRISM_EXCLUDE_PRETTYPRINT` - Will cause the library to exclude the prettyprint API. By default this is off.
121
+ * `PRISM_EXCLUDE_SERIALIZATION` - Will cause the library to exclude the serialization API. By default this is off.
122
+ * `PRISM_XALLOCATOR` - Will cause the library to use the custom memory allocator. By default this is off.
@@ -4,6 +4,7 @@ A lot of code in prism's repository is templated from a single configuration fil
4
4
 
5
5
  * `ext/prism/api_node.c` - for defining how to build Ruby objects for the nodes out of C structs
6
6
  * `include/prism/ast.h` - for defining the C structs that represent the nodes
7
+ * `include/prism/diagnostic.h` - for defining the diagnostics
7
8
  * `javascript/src/deserialize.js` - for defining how to deserialize the nodes in JavaScript
8
9
  * `javascript/src/nodes.js` - for defining the nodes in JavaScript
9
10
  * `java/org/prism/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
@@ -13,10 +14,13 @@ A lot of code in prism's repository is templated from a single configuration fil
13
14
  * `lib/prism/dispatcher.rb` - for defining the dispatch visitors for the nodes in Ruby
14
15
  * `lib/prism/dot_visitor.rb` - for defining the dot visitor for the nodes in Ruby
15
16
  * `lib/prism/dsl.rb` - for defining the DSL for the nodes in Ruby
17
+ * `lib/prism/inspect_visitor.rb` - for defining the `#inspect` methods on nodes in Ruby
16
18
  * `lib/prism/mutation_compiler.rb` - for defining the mutation compiler for the nodes in Ruby
17
19
  * `lib/prism/node.rb` - for defining the nodes in Ruby
20
+ * `lib/prism/reflection.rb` - for defining the reflection API in Ruby
18
21
  * `lib/prism/serialize.rb` - for defining how to deserialize the nodes in Ruby
19
22
  * `lib/prism/visitor.rb` - for defining the visitor interface for the nodes in Ruby
23
+ * `src/diagnostic.c` - for defining how to build diagnostics
20
24
  * `src/node.c` - for defining how to free the nodes in C and calculate the size in memory in C
21
25
  * `src/prettyprint.c` - for defining how to prettyprint the nodes in C
22
26
  * `src/serialize.c` - for defining how to serialize the nodes in C
@@ -10,7 +10,7 @@ ruby/ruby uses the Prism code to generate an AST from which it can generate inst
10
10
 
11
11
  1. Compute an AST
12
12
 
13
- Syncing over the Prism code allows ruby/ruby to compute the AST using Prism. It currently does this within [`iseq.c`](https://github.com/ruby/ruby/blob/master/iseq.c) using the `pm_parser_init` fuction.
13
+ Syncing over the Prism code allows ruby/ruby to compute the AST using Prism. It currently does this within [`iseq.c`](https://github.com/ruby/ruby/blob/master/iseq.c) using the `pm_parser_init` function.
14
14
 
15
15
  2. Run a first pass of compilation
16
16
 
@@ -16,19 +16,24 @@ Prism::Translation::Parser.parse_file("path/to/file.rb")
16
16
 
17
17
  ### RuboCop
18
18
 
19
- To run RuboCop using the `prism` gem as the parser, you will need to require the `prism/translation/parser/rubocop` file. This file injects `prism` into the known options for both `rubocop` and `rubocop-ast`, such that you can specify it in your `.rubocop.yml` file. Unfortunately `rubocop` doesn't support any direct way to do this, so we have to get a bit hacky.
19
+ Prism as a parser engine is directly supported since RuboCop 1.62. The class used for parsing is `Prism::Translation::Parser`.
20
20
 
21
- First, set the `TargetRubyVersion` in your RuboCop configuration file to `80_82_73_83_77.33`. This is the version of Ruby that `prism` reports itself as. (The leading numbers are the ASCII values for `PRISM`.)
21
+ First, specify `prism` in your Gemfile:
22
22
 
23
- ```yaml
24
- AllCops:
25
- TargetRubyVersion: 80_82_73_83_77.33
23
+ ```ruby
24
+ gem "prism"
26
25
  ```
27
26
 
28
- Now when you run `rubocop` you will need to require the `prism/translation/parser/rubocop` file before executing so that it can inject the `prism` parser into the known options.
27
+ To use Prism with RuboCop, specify `ParserEngine` and `TargetRubyVersion` in your RuboCop configuration file:
29
28
 
29
+ ```yaml
30
+ AllCops:
31
+ ParserEngine: parser_prism
32
+ TargetRubyVersion: 3.3
30
33
  ```
31
- bundle exec ruby -rprism/translation/parser/rubocop $(bundle exec which rubocop)
32
- ```
33
34
 
34
- This should run RuboCop using the `prism` parser.
35
+ The default value for `ParserEngine` is `parser_whitequark`, which indicates the Parser gem. You need to explicitly switch it to `parser_prism` to indicate Prism. Additionally, the value for `TargetRubyVersion` must be specified as `3.3` or higher, as Prism supports parsing versions of Ruby 3.3 and higher.
36
+ The parser class is determined by the combination of values for `ParserEngine` and `TargetRubyVersion`. For example, if `TargetRubyVersion: 3.3`, parsing is performed by `Prism::Translation::Parser33`, and for `TargetRubyVersion 3.4`, parsing is performed by `Prism::Translation::Parser34`.
37
+
38
+ For further information, please refer to the RuboCop documentation:
39
+ https://docs.rubocop.org/rubocop/configuration.html#setting-the-parser-engine
data/docs/releasing.md CHANGED
@@ -47,15 +47,7 @@ bundle install
47
47
  * Update the version-specific lockfiles:
48
48
 
49
49
  ```sh
50
- chruby ruby-2.7.8 && BUNDLE_GEMFILE=gemfiles/2.7/Gemfile bundle install
51
- chruby ruby-3.0.6 && BUNDLE_GEMFILE=gemfiles/3.0/Gemfile bundle install
52
- chruby ruby-3.1.4 && BUNDLE_GEMFILE=gemfiles/3.1/Gemfile bundle install
53
- chruby ruby-3.2.3 && BUNDLE_GEMFILE=gemfiles/3.2/Gemfile bundle install
54
- chruby ruby-3.3.0 && BUNDLE_GEMFILE=gemfiles/3.3/Gemfile bundle install
55
- chruby ruby-3.4.0-dev && BUNDLE_GEMFILE=gemfiles/3.4/Gemfile bundle install
56
- chruby jruby-9.4.5.0 && BUNDLE_GEMFILE=gemfiles/jruby/Gemfile bundle install
57
- chruby truffleruby-23.1.2 && BUNDLE_GEMFILE=gemfiles/truffleruby/Gemfile bundle install
58
- chruby ruby-3.4.0-dev
50
+ bin/prism bundle install
59
51
  ```
60
52
 
61
53
  * Update the cargo lockfiles:
@@ -70,6 +62,12 @@ bundle exec rake cargo:build
70
62
  git commit -am "Bump to v$PRISM_VERSION"
71
63
  ```
72
64
 
65
+ * Push up the changes:
66
+
67
+ ```sh
68
+ git push
69
+ ```
70
+
73
71
  ## Publishing
74
72
 
75
73
  * Update the GitHub release page with a copy of the latest entry in the `CHANGELOG.md` file.
@@ -0,0 +1,50 @@
1
+ # Ripper translation
2
+
3
+ Prism provides the ability to mirror the `Ripper` standard library. You can do this by:
4
+
5
+ ```ruby
6
+ require "prism/translation/ripper/shim"
7
+ ```
8
+
9
+ This provides the APIs like:
10
+
11
+ ```ruby
12
+ Ripper.lex
13
+ Ripper.parse
14
+ Ripper.sexp_raw
15
+ Ripper.sexp
16
+
17
+ Ripper::SexpBuilder
18
+ Ripper::SexpBuilderPP
19
+ ```
20
+
21
+ Briefly, `Ripper` is a streaming parser that allows you to construct your own syntax tree. As an example:
22
+
23
+ ```ruby
24
+ class ArithmeticRipper < Prism::Translation::Ripper
25
+ def on_binary(left, operator, right)
26
+ left.public_send(operator, right)
27
+ end
28
+
29
+ def on_int(value)
30
+ value.to_i
31
+ end
32
+
33
+ def on_program(stmts)
34
+ stmts
35
+ end
36
+
37
+ def on_stmts_new
38
+ []
39
+ end
40
+
41
+ def on_stmts_add(stmts, stmt)
42
+ stmts << stmt
43
+ stmts
44
+ end
45
+ end
46
+
47
+ ArithmeticRipper.new("1 + 2 - 3").parse # => [0]
48
+ ```
49
+
50
+ The exact names of the `on_*` methods are listed in the `Ripper` source.
data/docs/ruby_api.md CHANGED
@@ -20,6 +20,7 @@ The full API is documented below.
20
20
  * `Prism.lex_file(filepath)` - parse the tokens corresponding to the given source file and return them as an array within a parse result
21
21
  * `Prism.parse(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result
22
22
  * `Prism.parse_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result
23
+ * `Prism.parse_stream(io)` - parse the syntax tree corresponding to the source that is read out of the given IO object using the `#gets` method and return it within a parse result
23
24
  * `Prism.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens
24
25
  * `Prism.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens
25
26
  * `Prism.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree
@@ -55,18 +55,28 @@ The comment type is one of:
55
55
 
56
56
  | # bytes | field |
57
57
  | --- | --- |
58
+ | varuint | type |
58
59
  | string | error message (ASCII-only characters) |
59
60
  | location | the location in the source this error applies to |
60
- | `1` | the level of the error: `0` for `fatal` |
61
+ | `1` | the level of the error: `0` for `fatal`, `1` for `argument`, `2` for `load` |
61
62
 
62
- ## warning
63
+ ### warning
63
64
 
64
65
  | # bytes | field |
65
66
  | --- | --- |
67
+ | varuint | type |
66
68
  | string | warning message (ASCII-only characters) |
67
69
  | location | the location in the source this warning applies to |
68
70
  | `1` | the level of the warning: `0` for `default` and `1` for `verbose` |
69
71
 
72
+ ### integer
73
+
74
+ | # bytes | field |
75
+ | --- | --- |
76
+ | `1` | `1` if the integer is negative, `0` if the integer is positive |
77
+ | varuint | the number of words in this integer |
78
+ | varuint+ | the words of the integer, least-significant to most-significant |
79
+
70
80
  ## Structure
71
81
 
72
82
  The serialized string representing the syntax tree is composed of three parts: the header, the body, and the constant pool.
@@ -110,12 +120,14 @@ Each node is structured like the following table:
110
120
 
111
121
  Every field on the node is then appended to the serialized string. The fields can be determined by referencing `config.yml`. Depending on the type of field, it could take a couple of different forms, described below:
112
122
 
123
+ * `double` - A field that is a `double`. This is structured as a sequence of 8 bytes in native endian order.
113
124
  * `node` - A field that is a node. This is structured just as like parent node.
114
125
  * `node?` - A field that is a node that is optionally present. If the node is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just as like parent node.
115
126
  * `node[]` - A field that is an array of nodes. This is structured as a variable-length integer length, followed by the child nodes themselves.
116
127
  * `string` - A field that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is structured as a variable-length integer byte length, followed by the string itself (_without_ a trailing null byte).
117
128
  * `constant` - A variable-length integer that represents an index in the constant pool.
118
129
  * `constant?` - An optional variable-length integer that represents an index in the constant pool. If it's not present, then a single `0` byte will be written in its place.
130
+ * `integer` - A field that represents an arbitrary-sized integer. The structure is listed above.
119
131
  * `location` - A field that is a location. This is structured as a variable-length integer start followed by a variable-length integer length.
120
132
  * `location?` - A field that is a location that is optionally present. If the location is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just like the `location` child node.
121
133
  * `uint8` - A field that is an 8-bit unsigned integer. This is structured as a single byte.
@@ -185,21 +197,30 @@ The final argument to `pm_serialize_parse` is an optional string that controls t
185
197
  | `4` | the length the encoding |
186
198
  | ... | the encoding bytes |
187
199
  | `1` | frozen string literal |
188
- | `1` | suppress warnings |
200
+ | `1` | command line flags |
189
201
  | `1` | syntax version, see [pm_options_version_t](https://github.com/ruby/prism/blob/main/include/prism/options.h) for valid values |
190
202
  | `4` | the number of scopes |
191
203
  | ... | the scopes |
192
204
 
205
+ Command line flags are a bitset. By default every flag is `0`. It includes the following values:
206
+
207
+ * `0x1` - the `-a` option
208
+ * `0x2` - the `-e` option
209
+ * `0x4` - the `-l` option
210
+ * `0x8` - the `-n` option
211
+ * `0x10` - the `-p` option
212
+ * `0x20` - the `-x` option
213
+
193
214
  Scopes are ordered from the outermost scope to the innermost one.
194
215
 
195
- Each scope is layed out as follows:
216
+ Each scope is laid out as follows:
196
217
 
197
218
  | # bytes | field |
198
219
  | ------- | -------------------------- |
199
220
  | `4` | the number of locals |
200
221
  | ... | the locals |
201
222
 
202
- Each local is layed out as follows:
223
+ Each local is laid out as follows:
203
224
 
204
225
  | # bytes | field |
205
226
  | ------- | -------------------------- |