jruby-prism-parser 0.24.0-java → 1.4.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +269 -1
  4. data/CONTRIBUTING.md +0 -4
  5. data/Makefile +25 -18
  6. data/README.md +57 -6
  7. data/config.yml +1724 -140
  8. data/docs/build_system.md +39 -11
  9. data/docs/configuration.md +4 -0
  10. data/docs/cruby_compilation.md +1 -1
  11. data/docs/fuzzing.md +1 -1
  12. data/docs/parser_translation.md +14 -9
  13. data/docs/parsing_rules.md +4 -1
  14. data/docs/releasing.md +8 -10
  15. data/docs/relocation.md +34 -0
  16. data/docs/ripper_translation.md +72 -0
  17. data/docs/ruby_api.md +2 -1
  18. data/docs/serialization.md +29 -5
  19. data/ext/prism/api_node.c +3395 -1999
  20. data/ext/prism/api_pack.c +9 -0
  21. data/ext/prism/extconf.rb +55 -34
  22. data/ext/prism/extension.c +597 -346
  23. data/ext/prism/extension.h +6 -5
  24. data/include/prism/ast.h +2612 -455
  25. data/include/prism/defines.h +160 -2
  26. data/include/prism/diagnostic.h +188 -76
  27. data/include/prism/encoding.h +22 -4
  28. data/include/prism/node.h +89 -17
  29. data/include/prism/options.h +224 -12
  30. data/include/prism/pack.h +11 -0
  31. data/include/prism/parser.h +267 -66
  32. data/include/prism/prettyprint.h +8 -0
  33. data/include/prism/regexp.h +18 -8
  34. data/include/prism/static_literals.h +121 -0
  35. data/include/prism/util/pm_buffer.h +75 -2
  36. data/include/prism/util/pm_char.h +1 -2
  37. data/include/prism/util/pm_constant_pool.h +18 -9
  38. data/include/prism/util/pm_integer.h +126 -0
  39. data/include/prism/util/pm_list.h +1 -1
  40. data/include/prism/util/pm_newline_list.h +19 -0
  41. data/include/prism/util/pm_string.h +48 -8
  42. data/include/prism/version.h +3 -3
  43. data/include/prism.h +99 -5
  44. data/jruby-prism.jar +0 -0
  45. data/lib/prism/compiler.rb +11 -1
  46. data/lib/prism/desugar_compiler.rb +113 -74
  47. data/lib/prism/dispatcher.rb +45 -1
  48. data/lib/prism/dot_visitor.rb +201 -77
  49. data/lib/prism/dsl.rb +673 -461
  50. data/lib/prism/ffi.rb +233 -45
  51. data/lib/prism/inspect_visitor.rb +2389 -0
  52. data/lib/prism/lex_compat.rb +35 -16
  53. data/lib/prism/mutation_compiler.rb +24 -8
  54. data/lib/prism/node.rb +7731 -8460
  55. data/lib/prism/node_ext.rb +328 -32
  56. data/lib/prism/pack.rb +4 -0
  57. data/lib/prism/parse_result/comments.rb +34 -24
  58. data/lib/prism/parse_result/errors.rb +65 -0
  59. data/lib/prism/parse_result/newlines.rb +102 -12
  60. data/lib/prism/parse_result.rb +448 -44
  61. data/lib/prism/pattern.rb +28 -10
  62. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  63. data/lib/prism/polyfill/byteindex.rb +13 -0
  64. data/lib/prism/polyfill/unpack1.rb +14 -0
  65. data/lib/prism/reflection.rb +413 -0
  66. data/lib/prism/relocation.rb +504 -0
  67. data/lib/prism/serialize.rb +1940 -1198
  68. data/lib/prism/string_query.rb +30 -0
  69. data/lib/prism/translation/parser/builder.rb +61 -0
  70. data/lib/prism/translation/parser/compiler.rb +569 -195
  71. data/lib/prism/translation/parser/lexer.rb +516 -39
  72. data/lib/prism/translation/parser.rb +177 -12
  73. data/lib/prism/translation/parser33.rb +1 -1
  74. data/lib/prism/translation/parser34.rb +1 -1
  75. data/lib/prism/translation/parser35.rb +12 -0
  76. data/lib/prism/translation/ripper/sexp.rb +125 -0
  77. data/lib/prism/translation/ripper/shim.rb +5 -0
  78. data/lib/prism/translation/ripper.rb +3224 -462
  79. data/lib/prism/translation/ruby_parser.rb +194 -69
  80. data/lib/prism/translation.rb +4 -1
  81. data/lib/prism/version.rb +1 -1
  82. data/lib/prism/visitor.rb +13 -0
  83. data/lib/prism.rb +17 -27
  84. data/prism.gemspec +57 -17
  85. data/rbi/prism/compiler.rbi +12 -0
  86. data/rbi/prism/dsl.rbi +524 -0
  87. data/rbi/prism/inspect_visitor.rbi +12 -0
  88. data/rbi/prism/node.rbi +8722 -0
  89. data/rbi/prism/node_ext.rbi +107 -0
  90. data/rbi/prism/parse_result.rbi +404 -0
  91. data/rbi/prism/reflection.rbi +58 -0
  92. data/rbi/prism/string_query.rbi +12 -0
  93. data/rbi/prism/translation/parser.rbi +11 -0
  94. data/rbi/prism/translation/parser33.rbi +6 -0
  95. data/rbi/prism/translation/parser34.rbi +6 -0
  96. data/rbi/prism/translation/parser35.rbi +6 -0
  97. data/rbi/prism/translation/ripper.rbi +15 -0
  98. data/rbi/prism/visitor.rbi +473 -0
  99. data/rbi/prism.rbi +44 -7745
  100. data/sig/prism/compiler.rbs +9 -0
  101. data/sig/prism/dispatcher.rbs +16 -0
  102. data/sig/prism/dot_visitor.rbs +6 -0
  103. data/sig/prism/dsl.rbs +351 -0
  104. data/sig/prism/inspect_visitor.rbs +22 -0
  105. data/sig/prism/lex_compat.rbs +10 -0
  106. data/sig/prism/mutation_compiler.rbs +159 -0
  107. data/sig/prism/node.rbs +3614 -0
  108. data/sig/prism/node_ext.rbs +82 -0
  109. data/sig/prism/pack.rbs +43 -0
  110. data/sig/prism/parse_result.rbs +192 -0
  111. data/sig/prism/pattern.rbs +13 -0
  112. data/sig/prism/reflection.rbs +50 -0
  113. data/sig/prism/relocation.rbs +185 -0
  114. data/sig/prism/serialize.rbs +8 -0
  115. data/sig/prism/string_query.rbs +11 -0
  116. data/sig/prism/visitor.rbs +169 -0
  117. data/sig/prism.rbs +248 -4767
  118. data/src/diagnostic.c +672 -230
  119. data/src/encoding.c +211 -108
  120. data/src/node.c +7541 -1653
  121. data/src/options.c +135 -20
  122. data/src/pack.c +33 -17
  123. data/src/prettyprint.c +1543 -1485
  124. data/src/prism.c +7813 -3050
  125. data/src/regexp.c +225 -73
  126. data/src/serialize.c +101 -77
  127. data/src/static_literals.c +617 -0
  128. data/src/token_type.c +14 -13
  129. data/src/util/pm_buffer.c +187 -20
  130. data/src/util/pm_char.c +5 -5
  131. data/src/util/pm_constant_pool.c +39 -19
  132. data/src/util/pm_integer.c +670 -0
  133. data/src/util/pm_list.c +1 -1
  134. data/src/util/pm_newline_list.c +43 -5
  135. data/src/util/pm_string.c +213 -33
  136. data/src/util/pm_strncasecmp.c +13 -1
  137. data/src/util/pm_strpbrk.c +32 -6
  138. metadata +55 -19
  139. data/docs/ripper.md +0 -36
  140. data/include/prism/util/pm_state_stack.h +0 -42
  141. data/include/prism/util/pm_string_list.h +0 -44
  142. data/lib/prism/debug.rb +0 -206
  143. data/lib/prism/node_inspector.rb +0 -68
  144. data/lib/prism/translation/parser/rubocop.rb +0 -45
  145. data/rbi/prism_static.rbi +0 -207
  146. data/sig/prism_static.rbs +0 -201
  147. data/src/util/pm_state_stack.c +0 -25
  148. data/src/util/pm_string_list.c +0 -28
data/docs/build_system.md CHANGED
@@ -16,9 +16,9 @@ The main solution for the second point seems a Makefile, otherwise many of the u
16
16
  ## General Design
17
17
 
18
18
  1. Templates are generated by `templates/template.rb`
19
- 4. The `Makefile` compiles both `libprism.a` and `libprism.{so,dylib,dll}` from the `src/**/*.c` and `include/**/*.h` files
20
- 5. The `Rakefile` `:compile` task ensures the above prerequisites are done, then calls `make`,
21
- and uses `Rake::ExtensionTask` to compile the C extension (using its `extconf.rb`), which uses `libprism.a`
19
+ 2. The `Makefile` compiles both `libprism.a` and `libprism.{so,dylib,dll}` from the `src/**/*.c` and `include/**/*.h` files
20
+ 3. The `Rakefile` `:compile` task ensures the above prerequisites are done, then calls `make`,
21
+ and uses `Rake::ExtensionTask` to compile the C extension (using its `extconf.rb`)
22
22
 
23
23
  This way there is minimal duplication, and each layer builds on the previous one and has its own responsibilities.
24
24
 
@@ -35,14 +35,11 @@ loaded per process (i.e., at most one version of the prism *gem* loaded in a pro
35
35
  ### Building the prism gem by `gem install/bundle install`
36
36
 
37
37
  The gem contains the pre-generated templates.
38
- When installing the gem, `extconf.rb` is used and that:
39
- * runs `make build/libprism.a`
40
- * compiles the C extension with mkmf
41
38
 
42
- When installing the gem on JRuby and TruffleRuby, no C extension is built, so instead of the last step,
43
- there is Ruby code using FFI which uses `libprism.{so,dylib,dll}`
44
- to implement the same methods as the C extension, but using serialization instead of many native calls/accesses
45
- (JRuby does not support C extensions, serialization is faster on TruffleRuby than the C extension).
39
+ When installing the gem on CRuby, `extconf.rb` is used and that compiles the C extension with mkmf, including both the extension files and the sources of prism itself.
40
+
41
+ When installing the gem on JRuby and TruffleRuby, no C extension is built, so instead the `extconf.rb` runs `make build/libprism.{so,dylib,dll}`.
42
+ There is Ruby code using FFI which uses `libprism.{so,dylib,dll}` to implement the same methods as the C extension, but using serialization instead of many native calls/accesses (JRuby does not support C extensions, serialization is faster on TruffleRuby than the C extension).
46
43
 
47
44
  ### Building the prism gem from git, e.g. `gem "prism", github: "ruby/prism"`
48
45
 
@@ -66,13 +63,32 @@ The script generates the templates when importing.
66
63
 
67
64
  Then when `mx build` builds TruffleRuby and the `prism` mx project inside, it runs `make`.
68
65
 
69
- Then the `prism bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/master/src/main/c/prism_bindings/src/prism_bindings.c)
66
+ Then the `prism bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/vm-24.1.1/src/main/c/yarp_bindings/src/yarp_bindings.c)
70
67
  and links to `libprism.a` (to avoid exporting symbols, so no conflict when installing the prism gem).
71
68
 
72
69
  ### Building prism as part of JRuby
73
70
 
74
71
  TODO, similar to TruffleRuby.
75
72
 
73
+ ### Building prism for embedded system
74
+
75
+ For instance, you can build a static library `libprism.a` targeting the Arm Cortex-M0+ embedded system by the commands below:
76
+
77
+ * `templates/template.rb`
78
+ * `CFLAGS="-mcpu=cortex-m0plus" make static CC=arm-none-eabi-gcc`
79
+
80
+ The build process internally looks up `_POSIX_MAPPED_FILES` and `_WIN32` macros to determine whether the functions of the memory map are available on the target platform.
81
+
82
+ ### Building prism with custom memory allocator
83
+
84
+ If you need to use memory allocation functions implemented outside of the standard library, follow these steps:
85
+
86
+ * Add `-D PRISM_XALLOCATOR` to the build options
87
+ * Additionally, include `-I [path/to/custom_allocator]` where your `prism_xallocator.h` is located
88
+ * Link the implementation of `prism_xallocator.c` that contains functions declared in `prism_xallocator.h`
89
+
90
+ For further clarity, refer to `include/prism/defines.h`.
91
+
76
92
  ### Building prism from source as a C library
77
93
 
78
94
  All of the source files match `src/**/*.c` and all of the headers match `include/**/*.h`.
@@ -89,3 +105,15 @@ If you want to build prism as a shared library and link against it, you should c
89
105
  ```
90
106
  MAKEFLAGS="-j10" bundle exec rake compile
91
107
  ```
108
+
109
+ ## Build options
110
+
111
+ * `PRISM_BUILD_DEBUG` - Will cause all file reading to copy into its own allocation to allow easier tracking of reading off the end of the buffer. By default this is off.
112
+ * `PRISM_BUILD_MINIMAL` - Define all of the `PRISM_EXCLUDE_*` flags at once.
113
+ * `PRISM_ENCODING_EXCLUDE_FULL` - Will cause the library to exclude the full encoding API, and only include the minimal number of encodings to support parsing Ruby code without encoding comments. By default this is off.
114
+ * `PRISM_EXPORT_SYMBOLS` - Will cause the shared library to export symbols. By default this is off.
115
+ * `PRISM_EXCLUDE_JSON` - Will cause the library to exclude the JSON API. By default this is off.
116
+ * `PRISM_EXCLUDE_PACK` - Will cause the library to exclude the pack API. By default this is off.
117
+ * `PRISM_EXCLUDE_PRETTYPRINT` - Will cause the library to exclude the prettyprint API. By default this is off.
118
+ * `PRISM_EXCLUDE_SERIALIZATION` - Will cause the library to exclude the serialization API. By default this is off.
119
+ * `PRISM_XALLOCATOR` - Will cause the library to use the custom memory allocator. By default this is off.
@@ -4,6 +4,7 @@ A lot of code in prism's repository is templated from a single configuration fil
4
4
 
5
5
  * `ext/prism/api_node.c` - for defining how to build Ruby objects for the nodes out of C structs
6
6
  * `include/prism/ast.h` - for defining the C structs that represent the nodes
7
+ * `include/prism/diagnostic.h` - for defining the diagnostics
7
8
  * `javascript/src/deserialize.js` - for defining how to deserialize the nodes in JavaScript
8
9
  * `javascript/src/nodes.js` - for defining the nodes in JavaScript
9
10
  * `java/org/prism/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
@@ -13,10 +14,13 @@ A lot of code in prism's repository is templated from a single configuration fil
13
14
  * `lib/prism/dispatcher.rb` - for defining the dispatch visitors for the nodes in Ruby
14
15
  * `lib/prism/dot_visitor.rb` - for defining the dot visitor for the nodes in Ruby
15
16
  * `lib/prism/dsl.rb` - for defining the DSL for the nodes in Ruby
17
+ * `lib/prism/inspect_visitor.rb` - for defining the `#inspect` methods on nodes in Ruby
16
18
  * `lib/prism/mutation_compiler.rb` - for defining the mutation compiler for the nodes in Ruby
17
19
  * `lib/prism/node.rb` - for defining the nodes in Ruby
20
+ * `lib/prism/reflection.rb` - for defining the reflection API in Ruby
18
21
  * `lib/prism/serialize.rb` - for defining how to deserialize the nodes in Ruby
19
22
  * `lib/prism/visitor.rb` - for defining the visitor interface for the nodes in Ruby
23
+ * `src/diagnostic.c` - for defining how to build diagnostics
20
24
  * `src/node.c` - for defining how to free the nodes in C and calculate the size in memory in C
21
25
  * `src/prettyprint.c` - for defining how to prettyprint the nodes in C
22
26
  * `src/serialize.c` - for defining how to serialize the nodes in C
@@ -10,7 +10,7 @@ ruby/ruby uses the Prism code to generate an AST from which it can generate inst
10
10
 
11
11
  1. Compute an AST
12
12
 
13
- Syncing over the Prism code allows ruby/ruby to compute the AST using Prism. It currently does this within [`iseq.c`](https://github.com/ruby/ruby/blob/master/iseq.c) using the `pm_parser_init` fuction.
13
+ Syncing over the Prism code allows ruby/ruby to compute the AST using Prism. It currently does this within [`iseq.c`](https://github.com/ruby/ruby/blob/master/iseq.c) using the `pm_parser_init` function.
14
14
 
15
15
  2. Run a first pass of compilation
16
16
 
data/docs/fuzzing.md CHANGED
@@ -26,7 +26,7 @@ fuzz
26
26
  There are currently three fuzzing targets
27
27
 
28
28
  - `pm_serialize_parse` (parse)
29
- - `pm_regexp_named_capture_group_names` (regexp)
29
+ - `pm_regexp_parse` (regexp)
30
30
 
31
31
  Respectively, fuzzing can be performed with
32
32
 
@@ -16,19 +16,24 @@ Prism::Translation::Parser.parse_file("path/to/file.rb")
16
16
 
17
17
  ### RuboCop
18
18
 
19
- To run RuboCop using the `prism` gem as the parser, you will need to require the `prism/translation/parser/rubocop` file. This file injects `prism` into the known options for both `rubocop` and `rubocop-ast`, such that you can specify it in your `.rubocop.yml` file. Unfortunately `rubocop` doesn't support any direct way to do this, so we have to get a bit hacky.
19
+ Prism as a parser engine is directly supported since RuboCop 1.62. The class used for parsing is `Prism::Translation::Parser`.
20
20
 
21
- First, set the `TargetRubyVersion` in your RuboCop configuration file to `80_82_73_83_77.33`. This is the version of Ruby that `prism` reports itself as. (The leading numbers are the ASCII values for `PRISM`.)
21
+ First, specify `prism` in your Gemfile:
22
22
 
23
- ```yaml
24
- AllCops:
25
- TargetRubyVersion: 80_82_73_83_77.33
23
+ ```ruby
24
+ gem "prism"
26
25
  ```
27
26
 
28
- Now when you run `rubocop` you will need to require the `prism/translation/parser/rubocop` file before executing so that it can inject the `prism` parser into the known options.
27
+ To use Prism with RuboCop, specify `ParserEngine` and `TargetRubyVersion` in your RuboCop configuration file:
29
28
 
29
+ ```yaml
30
+ AllCops:
31
+ ParserEngine: parser_prism
32
+ TargetRubyVersion: 3.3
30
33
  ```
31
- bundle exec ruby -rprism/translation/parser/rubocop $(bundle exec which rubocop)
32
- ```
33
34
 
34
- This should run RuboCop using the `prism` parser.
35
+ The default value for `ParserEngine` is `parser_whitequark`, which indicates the Parser gem. You need to explicitly switch it to `parser_prism` to indicate Prism. Additionally, the value for `TargetRubyVersion` must be specified as `3.3` or higher, as Prism supports parsing versions of Ruby 3.3 and higher.
36
+ The parser class is determined by the combination of values for `ParserEngine` and `TargetRubyVersion`. For example, if `TargetRubyVersion: 3.3`, parsing is performed by `Prism::Translation::Parser33`, and for `TargetRubyVersion 3.4`, parsing is performed by `Prism::Translation::Parser34`.
37
+
38
+ For further information, please refer to the RuboCop documentation:
39
+ https://docs.rubocop.org/rubocop/configuration.html#setting-the-parser-engine
@@ -12,7 +12,10 @@ Constants in Ruby begin with an upper-case letter. This is followed by any numbe
12
12
 
13
13
  Most expressions in CRuby are non-void. This means the expression they represent resolves to a value. For example, `1 + 2` is a non-void expression, because it resolves to a method call. Even things like `class Foo; end` is a non-void expression, because it returns the last evaluated expression in the body of the class (or `nil`).
14
14
 
15
- Certain nodes, however, are void expressions, and cannot be combined to form larger expressions. For example, `BEGIN {}`, `END {}`, `alias foo bar`, and `undef foo`.
15
+ Certain nodes, however, are void expressions, and cannot be combined to form larger expressions.
16
+ * `BEGIN {}`, `END {}`, `alias foo bar`, and `undef foo` can only be at a statement position.
17
+ * The "jumps": `return`, `break`, `next`, `redo`, `retry` are void expressions.
18
+ * `value => pattern` is also considered a void expression.
16
19
 
17
20
  ## Identifiers
18
21
 
data/docs/releasing.md CHANGED
@@ -40,22 +40,14 @@ ruby -pi -e 'gsub(/^ruby-prism-sys = \{ version = ".+?"/, %Q{ruby-prism-sys = \{
40
40
  * Update the `Gemfile.lock` file:
41
41
 
42
42
  ```sh
43
- chruby ruby-3.4.0-dev
43
+ chruby ruby-3.5.0-dev
44
44
  bundle install
45
45
  ```
46
46
 
47
47
  * Update the version-specific lockfiles:
48
48
 
49
49
  ```sh
50
- chruby ruby-2.7.8 && BUNDLE_GEMFILE=gemfiles/2.7/Gemfile bundle install
51
- chruby ruby-3.0.6 && BUNDLE_GEMFILE=gemfiles/3.0/Gemfile bundle install
52
- chruby ruby-3.1.4 && BUNDLE_GEMFILE=gemfiles/3.1/Gemfile bundle install
53
- chruby ruby-3.2.3 && BUNDLE_GEMFILE=gemfiles/3.2/Gemfile bundle install
54
- chruby ruby-3.3.0 && BUNDLE_GEMFILE=gemfiles/3.3/Gemfile bundle install
55
- chruby ruby-3.4.0-dev && BUNDLE_GEMFILE=gemfiles/3.4/Gemfile bundle install
56
- chruby jruby-9.4.5.0 && BUNDLE_GEMFILE=gemfiles/jruby/Gemfile bundle install
57
- chruby truffleruby-23.1.2 && BUNDLE_GEMFILE=gemfiles/truffleruby/Gemfile bundle install
58
- chruby ruby-3.4.0-dev
50
+ bin/prism bundle install
59
51
  ```
60
52
 
61
53
  * Update the cargo lockfiles:
@@ -70,6 +62,12 @@ bundle exec rake cargo:build
70
62
  git commit -am "Bump to v$PRISM_VERSION"
71
63
  ```
72
64
 
65
+ * Push up the changes:
66
+
67
+ ```sh
68
+ git push
69
+ ```
70
+
73
71
  ## Publishing
74
72
 
75
73
  * Update the GitHub release page with a copy of the latest entry in the `CHANGELOG.md` file.
@@ -0,0 +1,34 @@
1
+ # Relocation
2
+
3
+ Prism parses deterministically for the same input. This provides a nice property that is exposed through the `#node_id` API on nodes. Effectively this means that for the same input, these values will remain consistent every time the source is parsed. This means we can reparse the source same with a `#node_id` value and find the exact same node again.
4
+
5
+ The `Relocation` module provides an API around this property. It allows you to "save" nodes and locations using a minimal amount of memory (just the node_id and a field identifier) and then reify them later. This minimizes the amount of memory you need to allocate to store this information because it does not keep around a pointer to the source string.
6
+
7
+ ## Getting started
8
+
9
+ To get started with the `Relocation` module, you would first instantiate a `Repository` object. You do this through a DSL that chains method calls for configuration. For example, if for every entry in the repository you want to store the start and end lines, the start and end code unit columns for in UTF-16, and the leading comments, you would:
10
+
11
+ ```ruby
12
+ repository = Prism::Relocation.filepath("path/to/file").lines.code_unit_columns(Encoding::UTF_16).leading_comments
13
+ ```
14
+
15
+ Now that you have the repository, you can pass it into any of the `save*` APIs on nodes or locations to create entries in the repository that will be lazily reified.
16
+
17
+ ```ruby
18
+ # assume that node is a Prism::ClassNode object
19
+ entry = node.constant_path.save(repository)
20
+ ```
21
+
22
+ Now that you have the entry object, you do not need to keep around a reference to the repository, it will be cleaned up on its own when the last entry is reified. Now, whenever you need to, you may call the associated field methods on the entry object, as in:
23
+
24
+ ```ruby
25
+ entry.start_line
26
+ entry.end_line
27
+
28
+ entry.start_code_units_column
29
+ entry.end_code_units_column
30
+
31
+ entry.leading_comments
32
+ ```
33
+
34
+ Note that if you had configured other fields to be saved, you would be able to access them as well. The first time one of these fields is accessed, the repository will reify every entry it knows about and then clean itself up. In this way, you can effectively treat them as if you had kept around lightweight versions of `Prism::Node` or `Prism::Location` objects.
@@ -0,0 +1,72 @@
1
+ # Ripper translation
2
+
3
+ Prism provides the ability to mirror the `Ripper` standard library. You can do this by:
4
+
5
+ ```ruby
6
+ require "prism/translation/ripper/shim"
7
+ ```
8
+
9
+ This provides the APIs like:
10
+
11
+ ```ruby
12
+ Ripper.lex
13
+ Ripper.parse
14
+ Ripper.sexp_raw
15
+ Ripper.sexp
16
+
17
+ Ripper::SexpBuilder
18
+ Ripper::SexpBuilderPP
19
+ ```
20
+
21
+ Briefly, `Ripper` is a streaming parser that allows you to construct your own syntax tree. As an example:
22
+
23
+ ```ruby
24
+ class ArithmeticRipper < Prism::Translation::Ripper
25
+ def on_binary(left, operator, right)
26
+ left.public_send(operator, right)
27
+ end
28
+
29
+ def on_int(value)
30
+ value.to_i
31
+ end
32
+
33
+ def on_program(stmts)
34
+ stmts
35
+ end
36
+
37
+ def on_stmts_new
38
+ []
39
+ end
40
+
41
+ def on_stmts_add(stmts, stmt)
42
+ stmts << stmt
43
+ stmts
44
+ end
45
+ end
46
+
47
+ ArithmeticRipper.new("1 + 2 - 3").parse # => [0]
48
+ ```
49
+
50
+ The exact names of the `on_*` methods are listed in the `Ripper` source.
51
+
52
+ ## Background
53
+
54
+ It is helpful to understand the differences between the `Ripper` library and the `Prism` library. Both libraries perform parsing and provide you with APIs to manipulate and understand the resulting syntax tree. However, there are a few key differences.
55
+
56
+ ### Design
57
+
58
+ `Ripper` is a streaming parser. This means as it is parsing Ruby code, it dispatches events back to the consumer. This allows quite a bit of flexibility. You can use it to build your own syntax tree or to find specific patterns in the code. `Prism` on the other hand returns to your the completed syntax tree _before_ it allows you to manipulate it. This means the tree that you get back is the only representation that can be generated by the parser _at parse time_ (but of course can be manipulated later).
59
+
60
+ ### Fields
61
+
62
+ We use the term "field" to mean a piece of information on a syntax tree node. `Ripper` provides the minimal number of fields to accurately represent the syntax tree for the purposes of compilation/interpretation. For example, in the callbacks for nodes that are based on keywords (`class`, `module`, `for`, `while`, etc.) you are not given the keyword itself, you need to attach it on your own. In other cases, tokens are not necessarily dispatched at all, meaning you need to find them yourself. `Prism` provides the opposite: the maximum number of fields on nodes is provided. As a tradeoff, this requires more memory, but this is chosen to make it easier on consumers.
63
+
64
+ ### Maintainability
65
+
66
+ The `Ripper` interface is not guaranteed in any way, and tends to change between patch versions of CRuby. This is largely due to the fact that `Ripper` is a by-product of the generated parser, as opposed to its own parser. As an example, in the expression `foo::bar = baz`, there are three different represents possible for the call operator, including:
67
+
68
+ * `:"::"` - Ruby 1.9 to Ruby 3.1.4
69
+ * `73` - Ruby 3.1.5 to Ruby 3.1.6
70
+ * `[:@op, "::", [lineno, column]]` - Ruby 3.2.0 and later
71
+
72
+ The `Prism` interface is guaranteed going forward to be the consistent, and the official Ruby syntax tree interface. This means you can rely on this interface without having to worry about individual changes between Ruby versions. It also is a gem, which means it is versioned based on the gem version, as opposed to being versioned based on the Ruby version. Finally, you can use `Prism` to parse multiple versions of Ruby, whereas `Ripper` is tied to the Ruby version it is running on.
data/docs/ruby_api.md CHANGED
@@ -20,9 +20,10 @@ The full API is documented below.
20
20
  * `Prism.lex_file(filepath)` - parse the tokens corresponding to the given source file and return them as an array within a parse result
21
21
  * `Prism.parse(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result
22
22
  * `Prism.parse_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result
23
+ * `Prism.parse_stream(io)` - parse the syntax tree corresponding to the source that is read out of the given IO object using the `#gets` method and return it within a parse result
23
24
  * `Prism.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens
24
25
  * `Prism.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens
25
- * `Prism.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree
26
+ * `Prism.load(source, serialized, freeze = false)` - load the serialized syntax tree using the source as a reference into a syntax tree
26
27
  * `Prism.parse_comments(source)` - parse the comments corresponding to the given source string and return them
27
28
  * `Prism.parse_file_comments(source)` - parse the comments corresponding to the given source file and return them
28
29
  * `Prism.parse_success?(source)` - parse the syntax tree corresponding to the given source string and return true if it was parsed without errors
@@ -55,18 +55,28 @@ The comment type is one of:
55
55
 
56
56
  | # bytes | field |
57
57
  | --- | --- |
58
+ | varuint | type |
58
59
  | string | error message (ASCII-only characters) |
59
60
  | location | the location in the source this error applies to |
60
- | `1` | the level of the error: `0` for `fatal` |
61
+ | `1` | the level of the error: `0` for `fatal`, `1` for `argument`, `2` for `load` |
61
62
 
62
- ## warning
63
+ ### warning
63
64
 
64
65
  | # bytes | field |
65
66
  | --- | --- |
67
+ | varuint | type |
66
68
  | string | warning message (ASCII-only characters) |
67
69
  | location | the location in the source this warning applies to |
68
70
  | `1` | the level of the warning: `0` for `default` and `1` for `verbose` |
69
71
 
72
+ ### integer
73
+
74
+ | # bytes | field |
75
+ | --- | --- |
76
+ | `1` | `1` if the integer is negative, `0` if the integer is positive |
77
+ | varuint | the number of words in this integer |
78
+ | varuint+ | the words of the integer, least-significant to most-significant |
79
+
70
80
  ## Structure
71
81
 
72
82
  The serialized string representing the syntax tree is composed of three parts: the header, the body, and the constant pool.
@@ -106,16 +116,20 @@ Each node is structured like the following table:
106
116
  | # bytes | field |
107
117
  | --- | --- |
108
118
  | `1` | node type |
119
+ | varuint | node identifier |
109
120
  | location | node location |
121
+ | varuint | node flags |
110
122
 
111
123
  Every field on the node is then appended to the serialized string. The fields can be determined by referencing `config.yml`. Depending on the type of field, it could take a couple of different forms, described below:
112
124
 
125
+ * `double` - A field that is a `double`. This is structured as a sequence of 8 bytes in native endian order.
113
126
  * `node` - A field that is a node. This is structured just as like parent node.
114
127
  * `node?` - A field that is a node that is optionally present. If the node is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just as like parent node.
115
128
  * `node[]` - A field that is an array of nodes. This is structured as a variable-length integer length, followed by the child nodes themselves.
116
129
  * `string` - A field that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is structured as a variable-length integer byte length, followed by the string itself (_without_ a trailing null byte).
117
130
  * `constant` - A variable-length integer that represents an index in the constant pool.
118
131
  * `constant?` - An optional variable-length integer that represents an index in the constant pool. If it's not present, then a single `0` byte will be written in its place.
132
+ * `integer` - A field that represents an arbitrary-sized integer. The structure is listed above.
119
133
  * `location` - A field that is a location. This is structured as a variable-length integer start followed by a variable-length integer length.
120
134
  * `location?` - A field that is a location that is optionally present. If the location is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just like the `location` child node.
121
135
  * `uint8` - A field that is an 8-bit unsigned integer. This is structured as a single byte.
@@ -185,21 +199,31 @@ The final argument to `pm_serialize_parse` is an optional string that controls t
185
199
  | `4` | the length the encoding |
186
200
  | ... | the encoding bytes |
187
201
  | `1` | frozen string literal |
188
- | `1` | suppress warnings |
202
+ | `1` | command line flags |
189
203
  | `1` | syntax version, see [pm_options_version_t](https://github.com/ruby/prism/blob/main/include/prism/options.h) for valid values |
204
+ | `1` | whether or not the encoding is locked (should almost always be false) |
190
205
  | `4` | the number of scopes |
191
206
  | ... | the scopes |
192
207
 
208
+ Command line flags are a bitset. By default every flag is `0`. It includes the following values:
209
+
210
+ * `0x1` - the `-a` option
211
+ * `0x2` - the `-e` option
212
+ * `0x4` - the `-l` option
213
+ * `0x8` - the `-n` option
214
+ * `0x10` - the `-p` option
215
+ * `0x20` - the `-x` option
216
+
193
217
  Scopes are ordered from the outermost scope to the innermost one.
194
218
 
195
- Each scope is layed out as follows:
219
+ Each scope is laid out as follows:
196
220
 
197
221
  | # bytes | field |
198
222
  | ------- | -------------------------- |
199
223
  | `4` | the number of locals |
200
224
  | ... | the locals |
201
225
 
202
- Each local is layed out as follows:
226
+ Each local is laid out as follows:
203
227
 
204
228
  | # bytes | field |
205
229
  | ------- | -------------------------- |