jruby-prism-parser 0.24.0-java → 1.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +269 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +25 -18
- data/README.md +57 -6
- data/config.yml +1724 -140
- data/docs/build_system.md +39 -11
- data/docs/configuration.md +4 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/fuzzing.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/parsing_rules.md +4 -1
- data/docs/releasing.md +8 -10
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +72 -0
- data/docs/ruby_api.md +2 -1
- data/docs/serialization.md +29 -5
- data/ext/prism/api_node.c +3395 -1999
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +55 -34
- data/ext/prism/extension.c +597 -346
- data/ext/prism/extension.h +6 -5
- data/include/prism/ast.h +2612 -455
- data/include/prism/defines.h +160 -2
- data/include/prism/diagnostic.h +188 -76
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +89 -17
- data/include/prism/options.h +224 -12
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +267 -66
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +121 -0
- data/include/prism/util/pm_buffer.h +75 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +18 -9
- data/include/prism/util/pm_integer.h +126 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +19 -0
- data/include/prism/util/pm_string.h +48 -8
- data/include/prism/version.h +3 -3
- data/include/prism.h +99 -5
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +11 -1
- data/lib/prism/desugar_compiler.rb +113 -74
- data/lib/prism/dispatcher.rb +45 -1
- data/lib/prism/dot_visitor.rb +201 -77
- data/lib/prism/dsl.rb +673 -461
- data/lib/prism/ffi.rb +233 -45
- data/lib/prism/inspect_visitor.rb +2389 -0
- data/lib/prism/lex_compat.rb +35 -16
- data/lib/prism/mutation_compiler.rb +24 -8
- data/lib/prism/node.rb +7731 -8460
- data/lib/prism/node_ext.rb +328 -32
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -24
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +102 -12
- data/lib/prism/parse_result.rb +448 -44
- data/lib/prism/pattern.rb +28 -10
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +413 -0
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +1940 -1198
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/builder.rb +61 -0
- data/lib/prism/translation/parser/compiler.rb +569 -195
- data/lib/prism/translation/parser/lexer.rb +516 -39
- data/lib/prism/translation/parser.rb +177 -12
- data/lib/prism/translation/parser33.rb +1 -1
- data/lib/prism/translation/parser34.rb +1 -1
- data/lib/prism/translation/parser35.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3224 -462
- data/lib/prism/translation/ruby_parser.rb +194 -69
- data/lib/prism/translation.rb +4 -1
- data/lib/prism/version.rb +1 -1
- data/lib/prism/visitor.rb +13 -0
- data/lib/prism.rb +17 -27
- data/prism.gemspec +57 -17
- data/rbi/prism/compiler.rbi +12 -0
- data/rbi/prism/dsl.rbi +524 -0
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +8722 -0
- data/rbi/prism/node_ext.rbi +107 -0
- data/rbi/prism/parse_result.rbi +404 -0
- data/rbi/prism/reflection.rbi +58 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/translation/parser.rbi +11 -0
- data/rbi/prism/translation/parser33.rbi +6 -0
- data/rbi/prism/translation/parser34.rbi +6 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism/translation/ripper.rbi +15 -0
- data/rbi/prism/visitor.rbi +473 -0
- data/rbi/prism.rbi +44 -7745
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +351 -0
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/mutation_compiler.rbs +159 -0
- data/sig/prism/node.rbs +3614 -0
- data/sig/prism/node_ext.rbs +82 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +192 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/reflection.rbs +50 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/serialize.rbs +8 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +169 -0
- data/sig/prism.rbs +248 -4767
- data/src/diagnostic.c +672 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7541 -1653
- data/src/options.c +135 -20
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1543 -1485
- data/src/prism.c +7813 -3050
- data/src/regexp.c +225 -73
- data/src/serialize.c +101 -77
- data/src/static_literals.c +617 -0
- data/src/token_type.c +14 -13
- data/src/util/pm_buffer.c +187 -20
- data/src/util/pm_char.c +5 -5
- data/src/util/pm_constant_pool.c +39 -19
- data/src/util/pm_integer.c +670 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +43 -5
- data/src/util/pm_string.c +213 -33
- data/src/util/pm_strncasecmp.c +13 -1
- data/src/util/pm_strpbrk.c +32 -6
- metadata +55 -19
- data/docs/ripper.md +0 -36
- data/include/prism/util/pm_state_stack.h +0 -42
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -206
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/translation/parser/rubocop.rb +0 -45
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
- data/src/util/pm_state_stack.c +0 -25
- data/src/util/pm_string_list.c +0 -28
data/docs/build_system.md
CHANGED
@@ -16,9 +16,9 @@ The main solution for the second point seems a Makefile, otherwise many of the u
|
|
16
16
|
## General Design
|
17
17
|
|
18
18
|
1. Templates are generated by `templates/template.rb`
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
2. The `Makefile` compiles both `libprism.a` and `libprism.{so,dylib,dll}` from the `src/**/*.c` and `include/**/*.h` files
|
20
|
+
3. The `Rakefile` `:compile` task ensures the above prerequisites are done, then calls `make`,
|
21
|
+
and uses `Rake::ExtensionTask` to compile the C extension (using its `extconf.rb`)
|
22
22
|
|
23
23
|
This way there is minimal duplication, and each layer builds on the previous one and has its own responsibilities.
|
24
24
|
|
@@ -35,14 +35,11 @@ loaded per process (i.e., at most one version of the prism *gem* loaded in a pro
|
|
35
35
|
### Building the prism gem by `gem install/bundle install`
|
36
36
|
|
37
37
|
The gem contains the pre-generated templates.
|
38
|
-
When installing the gem, `extconf.rb` is used and that:
|
39
|
-
* runs `make build/libprism.a`
|
40
|
-
* compiles the C extension with mkmf
|
41
38
|
|
42
|
-
When installing the gem on
|
43
|
-
|
44
|
-
|
45
|
-
(JRuby does not support C extensions, serialization is faster on TruffleRuby than the C extension).
|
39
|
+
When installing the gem on CRuby, `extconf.rb` is used and that compiles the C extension with mkmf, including both the extension files and the sources of prism itself.
|
40
|
+
|
41
|
+
When installing the gem on JRuby and TruffleRuby, no C extension is built, so instead the `extconf.rb` runs `make build/libprism.{so,dylib,dll}`.
|
42
|
+
There is Ruby code using FFI which uses `libprism.{so,dylib,dll}` to implement the same methods as the C extension, but using serialization instead of many native calls/accesses (JRuby does not support C extensions, serialization is faster on TruffleRuby than the C extension).
|
46
43
|
|
47
44
|
### Building the prism gem from git, e.g. `gem "prism", github: "ruby/prism"`
|
48
45
|
|
@@ -66,13 +63,32 @@ The script generates the templates when importing.
|
|
66
63
|
|
67
64
|
Then when `mx build` builds TruffleRuby and the `prism` mx project inside, it runs `make`.
|
68
65
|
|
69
|
-
Then the `prism bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/
|
66
|
+
Then the `prism bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/vm-24.1.1/src/main/c/yarp_bindings/src/yarp_bindings.c)
|
70
67
|
and links to `libprism.a` (to avoid exporting symbols, so no conflict when installing the prism gem).
|
71
68
|
|
72
69
|
### Building prism as part of JRuby
|
73
70
|
|
74
71
|
TODO, similar to TruffleRuby.
|
75
72
|
|
73
|
+
### Building prism for embedded system
|
74
|
+
|
75
|
+
For instance, you can build a static library `libprism.a` targeting the Arm Cortex-M0+ embedded system by the commands below:
|
76
|
+
|
77
|
+
* `templates/template.rb`
|
78
|
+
* `CFLAGS="-mcpu=cortex-m0plus" make static CC=arm-none-eabi-gcc`
|
79
|
+
|
80
|
+
The build process internally looks up `_POSIX_MAPPED_FILES` and `_WIN32` macros to determine whether the functions of the memory map are available on the target platform.
|
81
|
+
|
82
|
+
### Building prism with custom memory allocator
|
83
|
+
|
84
|
+
If you need to use memory allocation functions implemented outside of the standard library, follow these steps:
|
85
|
+
|
86
|
+
* Add `-D PRISM_XALLOCATOR` to the build options
|
87
|
+
* Additionally, include `-I [path/to/custom_allocator]` where your `prism_xallocator.h` is located
|
88
|
+
* Link the implementation of `prism_xallocator.c` that contains functions declared in `prism_xallocator.h`
|
89
|
+
|
90
|
+
For further clarity, refer to `include/prism/defines.h`.
|
91
|
+
|
76
92
|
### Building prism from source as a C library
|
77
93
|
|
78
94
|
All of the source files match `src/**/*.c` and all of the headers match `include/**/*.h`.
|
@@ -89,3 +105,15 @@ If you want to build prism as a shared library and link against it, you should c
|
|
89
105
|
```
|
90
106
|
MAKEFLAGS="-j10" bundle exec rake compile
|
91
107
|
```
|
108
|
+
|
109
|
+
## Build options
|
110
|
+
|
111
|
+
* `PRISM_BUILD_DEBUG` - Will cause all file reading to copy into its own allocation to allow easier tracking of reading off the end of the buffer. By default this is off.
|
112
|
+
* `PRISM_BUILD_MINIMAL` - Define all of the `PRISM_EXCLUDE_*` flags at once.
|
113
|
+
* `PRISM_ENCODING_EXCLUDE_FULL` - Will cause the library to exclude the full encoding API, and only include the minimal number of encodings to support parsing Ruby code without encoding comments. By default this is off.
|
114
|
+
* `PRISM_EXPORT_SYMBOLS` - Will cause the shared library to export symbols. By default this is off.
|
115
|
+
* `PRISM_EXCLUDE_JSON` - Will cause the library to exclude the JSON API. By default this is off.
|
116
|
+
* `PRISM_EXCLUDE_PACK` - Will cause the library to exclude the pack API. By default this is off.
|
117
|
+
* `PRISM_EXCLUDE_PRETTYPRINT` - Will cause the library to exclude the prettyprint API. By default this is off.
|
118
|
+
* `PRISM_EXCLUDE_SERIALIZATION` - Will cause the library to exclude the serialization API. By default this is off.
|
119
|
+
* `PRISM_XALLOCATOR` - Will cause the library to use the custom memory allocator. By default this is off.
|
data/docs/configuration.md
CHANGED
@@ -4,6 +4,7 @@ A lot of code in prism's repository is templated from a single configuration fil
|
|
4
4
|
|
5
5
|
* `ext/prism/api_node.c` - for defining how to build Ruby objects for the nodes out of C structs
|
6
6
|
* `include/prism/ast.h` - for defining the C structs that represent the nodes
|
7
|
+
* `include/prism/diagnostic.h` - for defining the diagnostics
|
7
8
|
* `javascript/src/deserialize.js` - for defining how to deserialize the nodes in JavaScript
|
8
9
|
* `javascript/src/nodes.js` - for defining the nodes in JavaScript
|
9
10
|
* `java/org/prism/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
|
@@ -13,10 +14,13 @@ A lot of code in prism's repository is templated from a single configuration fil
|
|
13
14
|
* `lib/prism/dispatcher.rb` - for defining the dispatch visitors for the nodes in Ruby
|
14
15
|
* `lib/prism/dot_visitor.rb` - for defining the dot visitor for the nodes in Ruby
|
15
16
|
* `lib/prism/dsl.rb` - for defining the DSL for the nodes in Ruby
|
17
|
+
* `lib/prism/inspect_visitor.rb` - for defining the `#inspect` methods on nodes in Ruby
|
16
18
|
* `lib/prism/mutation_compiler.rb` - for defining the mutation compiler for the nodes in Ruby
|
17
19
|
* `lib/prism/node.rb` - for defining the nodes in Ruby
|
20
|
+
* `lib/prism/reflection.rb` - for defining the reflection API in Ruby
|
18
21
|
* `lib/prism/serialize.rb` - for defining how to deserialize the nodes in Ruby
|
19
22
|
* `lib/prism/visitor.rb` - for defining the visitor interface for the nodes in Ruby
|
23
|
+
* `src/diagnostic.c` - for defining how to build diagnostics
|
20
24
|
* `src/node.c` - for defining how to free the nodes in C and calculate the size in memory in C
|
21
25
|
* `src/prettyprint.c` - for defining how to prettyprint the nodes in C
|
22
26
|
* `src/serialize.c` - for defining how to serialize the nodes in C
|
data/docs/cruby_compilation.md
CHANGED
@@ -10,7 +10,7 @@ ruby/ruby uses the Prism code to generate an AST from which it can generate inst
|
|
10
10
|
|
11
11
|
1. Compute an AST
|
12
12
|
|
13
|
-
Syncing over the Prism code allows ruby/ruby to compute the AST using Prism. It currently does this within [`iseq.c`](https://github.com/ruby/ruby/blob/master/iseq.c) using the `pm_parser_init`
|
13
|
+
Syncing over the Prism code allows ruby/ruby to compute the AST using Prism. It currently does this within [`iseq.c`](https://github.com/ruby/ruby/blob/master/iseq.c) using the `pm_parser_init` function.
|
14
14
|
|
15
15
|
2. Run a first pass of compilation
|
16
16
|
|
data/docs/fuzzing.md
CHANGED
data/docs/parser_translation.md
CHANGED
@@ -16,19 +16,24 @@ Prism::Translation::Parser.parse_file("path/to/file.rb")
|
|
16
16
|
|
17
17
|
### RuboCop
|
18
18
|
|
19
|
-
|
19
|
+
Prism as a parser engine is directly supported since RuboCop 1.62. The class used for parsing is `Prism::Translation::Parser`.
|
20
20
|
|
21
|
-
First,
|
21
|
+
First, specify `prism` in your Gemfile:
|
22
22
|
|
23
|
-
```
|
24
|
-
|
25
|
-
TargetRubyVersion: 80_82_73_83_77.33
|
23
|
+
```ruby
|
24
|
+
gem "prism"
|
26
25
|
```
|
27
26
|
|
28
|
-
|
27
|
+
To use Prism with RuboCop, specify `ParserEngine` and `TargetRubyVersion` in your RuboCop configuration file:
|
29
28
|
|
29
|
+
```yaml
|
30
|
+
AllCops:
|
31
|
+
ParserEngine: parser_prism
|
32
|
+
TargetRubyVersion: 3.3
|
30
33
|
```
|
31
|
-
bundle exec ruby -rprism/translation/parser/rubocop $(bundle exec which rubocop)
|
32
|
-
```
|
33
34
|
|
34
|
-
|
35
|
+
The default value for `ParserEngine` is `parser_whitequark`, which indicates the Parser gem. You need to explicitly switch it to `parser_prism` to indicate Prism. Additionally, the value for `TargetRubyVersion` must be specified as `3.3` or higher, as Prism supports parsing versions of Ruby 3.3 and higher.
|
36
|
+
The parser class is determined by the combination of values for `ParserEngine` and `TargetRubyVersion`. For example, if `TargetRubyVersion: 3.3`, parsing is performed by `Prism::Translation::Parser33`, and for `TargetRubyVersion 3.4`, parsing is performed by `Prism::Translation::Parser34`.
|
37
|
+
|
38
|
+
For further information, please refer to the RuboCop documentation:
|
39
|
+
https://docs.rubocop.org/rubocop/configuration.html#setting-the-parser-engine
|
data/docs/parsing_rules.md
CHANGED
@@ -12,7 +12,10 @@ Constants in Ruby begin with an upper-case letter. This is followed by any numbe
|
|
12
12
|
|
13
13
|
Most expressions in CRuby are non-void. This means the expression they represent resolves to a value. For example, `1 + 2` is a non-void expression, because it resolves to a method call. Even things like `class Foo; end` is a non-void expression, because it returns the last evaluated expression in the body of the class (or `nil`).
|
14
14
|
|
15
|
-
Certain nodes, however, are void expressions, and cannot be combined to form larger expressions.
|
15
|
+
Certain nodes, however, are void expressions, and cannot be combined to form larger expressions.
|
16
|
+
* `BEGIN {}`, `END {}`, `alias foo bar`, and `undef foo` can only be at a statement position.
|
17
|
+
* The "jumps": `return`, `break`, `next`, `redo`, `retry` are void expressions.
|
18
|
+
* `value => pattern` is also considered a void expression.
|
16
19
|
|
17
20
|
## Identifiers
|
18
21
|
|
data/docs/releasing.md
CHANGED
@@ -40,22 +40,14 @@ ruby -pi -e 'gsub(/^ruby-prism-sys = \{ version = ".+?"/, %Q{ruby-prism-sys = \{
|
|
40
40
|
* Update the `Gemfile.lock` file:
|
41
41
|
|
42
42
|
```sh
|
43
|
-
chruby ruby-3.
|
43
|
+
chruby ruby-3.5.0-dev
|
44
44
|
bundle install
|
45
45
|
```
|
46
46
|
|
47
47
|
* Update the version-specific lockfiles:
|
48
48
|
|
49
49
|
```sh
|
50
|
-
|
51
|
-
chruby ruby-3.0.6 && BUNDLE_GEMFILE=gemfiles/3.0/Gemfile bundle install
|
52
|
-
chruby ruby-3.1.4 && BUNDLE_GEMFILE=gemfiles/3.1/Gemfile bundle install
|
53
|
-
chruby ruby-3.2.3 && BUNDLE_GEMFILE=gemfiles/3.2/Gemfile bundle install
|
54
|
-
chruby ruby-3.3.0 && BUNDLE_GEMFILE=gemfiles/3.3/Gemfile bundle install
|
55
|
-
chruby ruby-3.4.0-dev && BUNDLE_GEMFILE=gemfiles/3.4/Gemfile bundle install
|
56
|
-
chruby jruby-9.4.5.0 && BUNDLE_GEMFILE=gemfiles/jruby/Gemfile bundle install
|
57
|
-
chruby truffleruby-23.1.2 && BUNDLE_GEMFILE=gemfiles/truffleruby/Gemfile bundle install
|
58
|
-
chruby ruby-3.4.0-dev
|
50
|
+
bin/prism bundle install
|
59
51
|
```
|
60
52
|
|
61
53
|
* Update the cargo lockfiles:
|
@@ -70,6 +62,12 @@ bundle exec rake cargo:build
|
|
70
62
|
git commit -am "Bump to v$PRISM_VERSION"
|
71
63
|
```
|
72
64
|
|
65
|
+
* Push up the changes:
|
66
|
+
|
67
|
+
```sh
|
68
|
+
git push
|
69
|
+
```
|
70
|
+
|
73
71
|
## Publishing
|
74
72
|
|
75
73
|
* Update the GitHub release page with a copy of the latest entry in the `CHANGELOG.md` file.
|
data/docs/relocation.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Relocation
|
2
|
+
|
3
|
+
Prism parses deterministically for the same input. This provides a nice property that is exposed through the `#node_id` API on nodes. Effectively this means that for the same input, these values will remain consistent every time the source is parsed. This means we can reparse the source same with a `#node_id` value and find the exact same node again.
|
4
|
+
|
5
|
+
The `Relocation` module provides an API around this property. It allows you to "save" nodes and locations using a minimal amount of memory (just the node_id and a field identifier) and then reify them later. This minimizes the amount of memory you need to allocate to store this information because it does not keep around a pointer to the source string.
|
6
|
+
|
7
|
+
## Getting started
|
8
|
+
|
9
|
+
To get started with the `Relocation` module, you would first instantiate a `Repository` object. You do this through a DSL that chains method calls for configuration. For example, if for every entry in the repository you want to store the start and end lines, the start and end code unit columns for in UTF-16, and the leading comments, you would:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
repository = Prism::Relocation.filepath("path/to/file").lines.code_unit_columns(Encoding::UTF_16).leading_comments
|
13
|
+
```
|
14
|
+
|
15
|
+
Now that you have the repository, you can pass it into any of the `save*` APIs on nodes or locations to create entries in the repository that will be lazily reified.
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
# assume that node is a Prism::ClassNode object
|
19
|
+
entry = node.constant_path.save(repository)
|
20
|
+
```
|
21
|
+
|
22
|
+
Now that you have the entry object, you do not need to keep around a reference to the repository, it will be cleaned up on its own when the last entry is reified. Now, whenever you need to, you may call the associated field methods on the entry object, as in:
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
entry.start_line
|
26
|
+
entry.end_line
|
27
|
+
|
28
|
+
entry.start_code_units_column
|
29
|
+
entry.end_code_units_column
|
30
|
+
|
31
|
+
entry.leading_comments
|
32
|
+
```
|
33
|
+
|
34
|
+
Note that if you had configured other fields to be saved, you would be able to access them as well. The first time one of these fields is accessed, the repository will reify every entry it knows about and then clean itself up. In this way, you can effectively treat them as if you had kept around lightweight versions of `Prism::Node` or `Prism::Location` objects.
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# Ripper translation
|
2
|
+
|
3
|
+
Prism provides the ability to mirror the `Ripper` standard library. You can do this by:
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
require "prism/translation/ripper/shim"
|
7
|
+
```
|
8
|
+
|
9
|
+
This provides the APIs like:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
Ripper.lex
|
13
|
+
Ripper.parse
|
14
|
+
Ripper.sexp_raw
|
15
|
+
Ripper.sexp
|
16
|
+
|
17
|
+
Ripper::SexpBuilder
|
18
|
+
Ripper::SexpBuilderPP
|
19
|
+
```
|
20
|
+
|
21
|
+
Briefly, `Ripper` is a streaming parser that allows you to construct your own syntax tree. As an example:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
class ArithmeticRipper < Prism::Translation::Ripper
|
25
|
+
def on_binary(left, operator, right)
|
26
|
+
left.public_send(operator, right)
|
27
|
+
end
|
28
|
+
|
29
|
+
def on_int(value)
|
30
|
+
value.to_i
|
31
|
+
end
|
32
|
+
|
33
|
+
def on_program(stmts)
|
34
|
+
stmts
|
35
|
+
end
|
36
|
+
|
37
|
+
def on_stmts_new
|
38
|
+
[]
|
39
|
+
end
|
40
|
+
|
41
|
+
def on_stmts_add(stmts, stmt)
|
42
|
+
stmts << stmt
|
43
|
+
stmts
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
ArithmeticRipper.new("1 + 2 - 3").parse # => [0]
|
48
|
+
```
|
49
|
+
|
50
|
+
The exact names of the `on_*` methods are listed in the `Ripper` source.
|
51
|
+
|
52
|
+
## Background
|
53
|
+
|
54
|
+
It is helpful to understand the differences between the `Ripper` library and the `Prism` library. Both libraries perform parsing and provide you with APIs to manipulate and understand the resulting syntax tree. However, there are a few key differences.
|
55
|
+
|
56
|
+
### Design
|
57
|
+
|
58
|
+
`Ripper` is a streaming parser. This means as it is parsing Ruby code, it dispatches events back to the consumer. This allows quite a bit of flexibility. You can use it to build your own syntax tree or to find specific patterns in the code. `Prism` on the other hand returns to your the completed syntax tree _before_ it allows you to manipulate it. This means the tree that you get back is the only representation that can be generated by the parser _at parse time_ (but of course can be manipulated later).
|
59
|
+
|
60
|
+
### Fields
|
61
|
+
|
62
|
+
We use the term "field" to mean a piece of information on a syntax tree node. `Ripper` provides the minimal number of fields to accurately represent the syntax tree for the purposes of compilation/interpretation. For example, in the callbacks for nodes that are based on keywords (`class`, `module`, `for`, `while`, etc.) you are not given the keyword itself, you need to attach it on your own. In other cases, tokens are not necessarily dispatched at all, meaning you need to find them yourself. `Prism` provides the opposite: the maximum number of fields on nodes is provided. As a tradeoff, this requires more memory, but this is chosen to make it easier on consumers.
|
63
|
+
|
64
|
+
### Maintainability
|
65
|
+
|
66
|
+
The `Ripper` interface is not guaranteed in any way, and tends to change between patch versions of CRuby. This is largely due to the fact that `Ripper` is a by-product of the generated parser, as opposed to its own parser. As an example, in the expression `foo::bar = baz`, there are three different represents possible for the call operator, including:
|
67
|
+
|
68
|
+
* `:"::"` - Ruby 1.9 to Ruby 3.1.4
|
69
|
+
* `73` - Ruby 3.1.5 to Ruby 3.1.6
|
70
|
+
* `[:@op, "::", [lineno, column]]` - Ruby 3.2.0 and later
|
71
|
+
|
72
|
+
The `Prism` interface is guaranteed going forward to be the consistent, and the official Ruby syntax tree interface. This means you can rely on this interface without having to worry about individual changes between Ruby versions. It also is a gem, which means it is versioned based on the gem version, as opposed to being versioned based on the Ruby version. Finally, you can use `Prism` to parse multiple versions of Ruby, whereas `Ripper` is tied to the Ruby version it is running on.
|
data/docs/ruby_api.md
CHANGED
@@ -20,9 +20,10 @@ The full API is documented below.
|
|
20
20
|
* `Prism.lex_file(filepath)` - parse the tokens corresponding to the given source file and return them as an array within a parse result
|
21
21
|
* `Prism.parse(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result
|
22
22
|
* `Prism.parse_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result
|
23
|
+
* `Prism.parse_stream(io)` - parse the syntax tree corresponding to the source that is read out of the given IO object using the `#gets` method and return it within a parse result
|
23
24
|
* `Prism.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens
|
24
25
|
* `Prism.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens
|
25
|
-
* `Prism.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree
|
26
|
+
* `Prism.load(source, serialized, freeze = false)` - load the serialized syntax tree using the source as a reference into a syntax tree
|
26
27
|
* `Prism.parse_comments(source)` - parse the comments corresponding to the given source string and return them
|
27
28
|
* `Prism.parse_file_comments(source)` - parse the comments corresponding to the given source file and return them
|
28
29
|
* `Prism.parse_success?(source)` - parse the syntax tree corresponding to the given source string and return true if it was parsed without errors
|
data/docs/serialization.md
CHANGED
@@ -55,18 +55,28 @@ The comment type is one of:
|
|
55
55
|
|
56
56
|
| # bytes | field |
|
57
57
|
| --- | --- |
|
58
|
+
| varuint | type |
|
58
59
|
| string | error message (ASCII-only characters) |
|
59
60
|
| location | the location in the source this error applies to |
|
60
|
-
| `1` | the level of the error: `0` for `fatal` |
|
61
|
+
| `1` | the level of the error: `0` for `fatal`, `1` for `argument`, `2` for `load` |
|
61
62
|
|
62
|
-
|
63
|
+
### warning
|
63
64
|
|
64
65
|
| # bytes | field |
|
65
66
|
| --- | --- |
|
67
|
+
| varuint | type |
|
66
68
|
| string | warning message (ASCII-only characters) |
|
67
69
|
| location | the location in the source this warning applies to |
|
68
70
|
| `1` | the level of the warning: `0` for `default` and `1` for `verbose` |
|
69
71
|
|
72
|
+
### integer
|
73
|
+
|
74
|
+
| # bytes | field |
|
75
|
+
| --- | --- |
|
76
|
+
| `1` | `1` if the integer is negative, `0` if the integer is positive |
|
77
|
+
| varuint | the number of words in this integer |
|
78
|
+
| varuint+ | the words of the integer, least-significant to most-significant |
|
79
|
+
|
70
80
|
## Structure
|
71
81
|
|
72
82
|
The serialized string representing the syntax tree is composed of three parts: the header, the body, and the constant pool.
|
@@ -106,16 +116,20 @@ Each node is structured like the following table:
|
|
106
116
|
| # bytes | field |
|
107
117
|
| --- | --- |
|
108
118
|
| `1` | node type |
|
119
|
+
| varuint | node identifier |
|
109
120
|
| location | node location |
|
121
|
+
| varuint | node flags |
|
110
122
|
|
111
123
|
Every field on the node is then appended to the serialized string. The fields can be determined by referencing `config.yml`. Depending on the type of field, it could take a couple of different forms, described below:
|
112
124
|
|
125
|
+
* `double` - A field that is a `double`. This is structured as a sequence of 8 bytes in native endian order.
|
113
126
|
* `node` - A field that is a node. This is structured just as like parent node.
|
114
127
|
* `node?` - A field that is a node that is optionally present. If the node is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just as like parent node.
|
115
128
|
* `node[]` - A field that is an array of nodes. This is structured as a variable-length integer length, followed by the child nodes themselves.
|
116
129
|
* `string` - A field that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is structured as a variable-length integer byte length, followed by the string itself (_without_ a trailing null byte).
|
117
130
|
* `constant` - A variable-length integer that represents an index in the constant pool.
|
118
131
|
* `constant?` - An optional variable-length integer that represents an index in the constant pool. If it's not present, then a single `0` byte will be written in its place.
|
132
|
+
* `integer` - A field that represents an arbitrary-sized integer. The structure is listed above.
|
119
133
|
* `location` - A field that is a location. This is structured as a variable-length integer start followed by a variable-length integer length.
|
120
134
|
* `location?` - A field that is a location that is optionally present. If the location is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just like the `location` child node.
|
121
135
|
* `uint8` - A field that is an 8-bit unsigned integer. This is structured as a single byte.
|
@@ -185,21 +199,31 @@ The final argument to `pm_serialize_parse` is an optional string that controls t
|
|
185
199
|
| `4` | the length the encoding |
|
186
200
|
| ... | the encoding bytes |
|
187
201
|
| `1` | frozen string literal |
|
188
|
-
| `1` |
|
202
|
+
| `1` | command line flags |
|
189
203
|
| `1` | syntax version, see [pm_options_version_t](https://github.com/ruby/prism/blob/main/include/prism/options.h) for valid values |
|
204
|
+
| `1` | whether or not the encoding is locked (should almost always be false) |
|
190
205
|
| `4` | the number of scopes |
|
191
206
|
| ... | the scopes |
|
192
207
|
|
208
|
+
Command line flags are a bitset. By default every flag is `0`. It includes the following values:
|
209
|
+
|
210
|
+
* `0x1` - the `-a` option
|
211
|
+
* `0x2` - the `-e` option
|
212
|
+
* `0x4` - the `-l` option
|
213
|
+
* `0x8` - the `-n` option
|
214
|
+
* `0x10` - the `-p` option
|
215
|
+
* `0x20` - the `-x` option
|
216
|
+
|
193
217
|
Scopes are ordered from the outermost scope to the innermost one.
|
194
218
|
|
195
|
-
Each scope is
|
219
|
+
Each scope is laid out as follows:
|
196
220
|
|
197
221
|
| # bytes | field |
|
198
222
|
| ------- | -------------------------- |
|
199
223
|
| `4` | the number of locals |
|
200
224
|
| ... | the locals |
|
201
225
|
|
202
|
-
Each local is
|
226
|
+
Each local is laid out as follows:
|
203
227
|
|
204
228
|
| # bytes | field |
|
205
229
|
| ------- | -------------------------- |
|