yarp 0.11.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +53 -7
  3. data/CONTRIBUTING.md +2 -2
  4. data/Makefile +5 -5
  5. data/README.md +11 -12
  6. data/config.yml +111 -8
  7. data/docs/build_system.md +21 -21
  8. data/docs/building.md +4 -4
  9. data/docs/configuration.md +25 -21
  10. data/docs/design.md +2 -2
  11. data/docs/encoding.md +17 -17
  12. data/docs/fuzzing.md +4 -4
  13. data/docs/heredocs.md +3 -3
  14. data/docs/mapping.md +94 -94
  15. data/docs/ripper.md +4 -4
  16. data/docs/ruby_api.md +11 -11
  17. data/docs/serialization.md +17 -16
  18. data/docs/testing.md +6 -6
  19. data/ext/prism/api_node.c +4725 -0
  20. data/ext/{yarp → prism}/api_pack.c +82 -82
  21. data/ext/{yarp → prism}/extconf.rb +13 -13
  22. data/ext/{yarp → prism}/extension.c +180 -166
  23. data/ext/prism/extension.h +18 -0
  24. data/include/prism/ast.h +1932 -0
  25. data/include/prism/defines.h +45 -0
  26. data/include/prism/diagnostic.h +231 -0
  27. data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
  28. data/include/prism/node.h +41 -0
  29. data/include/prism/pack.h +141 -0
  30. data/include/{yarp → prism}/parser.h +184 -155
  31. data/include/prism/regexp.h +19 -0
  32. data/include/prism/unescape.h +48 -0
  33. data/include/prism/util/pm_buffer.h +51 -0
  34. data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +37 -21
  35. data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
  36. data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
  37. data/include/prism/util/pm_memchr.h +14 -0
  38. data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
  39. data/include/prism/util/pm_state_stack.h +24 -0
  40. data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
  41. data/include/prism/util/pm_string_list.h +25 -0
  42. data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
  43. data/include/prism/version.h +4 -0
  44. data/include/prism.h +82 -0
  45. data/lib/prism/compiler.rb +465 -0
  46. data/lib/prism/debug.rb +157 -0
  47. data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
  48. data/lib/prism/dispatcher.rb +2051 -0
  49. data/lib/prism/dsl.rb +750 -0
  50. data/lib/{yarp → prism}/ffi.rb +66 -67
  51. data/lib/{yarp → prism}/lex_compat.rb +40 -43
  52. data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +31 -6
  53. data/lib/{yarp → prism}/node.rb +4042 -1201
  54. data/lib/prism/node_ext.rb +55 -0
  55. data/lib/prism/node_inspector.rb +68 -0
  56. data/lib/{yarp → prism}/pack.rb +1 -1
  57. data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
  58. data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
  59. data/lib/prism/parse_result.rb +266 -0
  60. data/lib/{yarp → prism}/pattern.rb +14 -14
  61. data/lib/{yarp → prism}/ripper_compat.rb +5 -5
  62. data/lib/{yarp → prism}/serialize.rb +157 -142
  63. data/lib/prism/visitor.rb +470 -0
  64. data/lib/prism.rb +64 -0
  65. data/lib/yarp.rb +2 -599
  66. data/src/diagnostic.c +213 -203
  67. data/src/enc/pm_big5.c +52 -0
  68. data/src/enc/pm_euc_jp.c +58 -0
  69. data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
  70. data/src/enc/pm_shift_jis.c +56 -0
  71. data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
  72. data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
  73. data/src/enc/pm_windows_31j.c +56 -0
  74. data/src/node.c +1328 -1189
  75. data/src/pack.c +247 -247
  76. data/src/prettyprint.c +1504 -1440
  77. data/src/prism.c +14587 -0
  78. data/src/regexp.c +132 -132
  79. data/src/serialize.c +1137 -1097
  80. data/src/token_type.c +169 -167
  81. data/src/unescape.c +106 -87
  82. data/src/util/pm_buffer.c +103 -0
  83. data/src/util/pm_char.c +272 -0
  84. data/src/util/pm_constant_pool.c +252 -0
  85. data/src/util/{yp_list.c → pm_list.c} +10 -10
  86. data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
  87. data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
  88. data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
  89. data/src/util/{yp_string.c → pm_string.c} +38 -38
  90. data/src/util/pm_string_list.c +29 -0
  91. data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
  92. data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
  93. data/yarp.gemspec +68 -59
  94. metadata +71 -62
  95. data/ext/yarp/api_node.c +0 -4562
  96. data/ext/yarp/extension.h +0 -18
  97. data/include/yarp/ast.h +0 -1976
  98. data/include/yarp/defines.h +0 -45
  99. data/include/yarp/diagnostic.h +0 -221
  100. data/include/yarp/node.h +0 -42
  101. data/include/yarp/pack.h +0 -141
  102. data/include/yarp/regexp.h +0 -19
  103. data/include/yarp/unescape.h +0 -44
  104. data/include/yarp/util/yp_buffer.h +0 -51
  105. data/include/yarp/util/yp_memchr.h +0 -14
  106. data/include/yarp/util/yp_state_stack.h +0 -24
  107. data/include/yarp/util/yp_string_list.h +0 -25
  108. data/include/yarp/version.h +0 -4
  109. data/include/yarp.h +0 -82
  110. data/src/enc/yp_big5.c +0 -52
  111. data/src/enc/yp_euc_jp.c +0 -58
  112. data/src/enc/yp_shift_jis.c +0 -56
  113. data/src/enc/yp_windows_31j.c +0 -56
  114. data/src/util/yp_buffer.c +0 -101
  115. data/src/util/yp_char.c +0 -224
  116. data/src/util/yp_constant_pool.c +0 -180
  117. data/src/util/yp_string_list.c +0 -29
  118. data/src/yarp.c +0 -13955
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d096d8bdda05ec360a58c8c6d98d1aeb6680b95a983a7d10d916d4a941be1c8
4
- data.tar.gz: 7833c692ae2547f6628d15972b539484e06804179277d8f73a33cf448687f48b
3
+ metadata.gz: da8f3b5f2cdae92e669cfe7865566c2014b57746644fd550e23dfe27a7f4a5cd
4
+ data.tar.gz: 994f5db733b1261c76920a6426820dc941a72ba2619f71de0b7ac731da68d57d
5
5
  SHA512:
6
- metadata.gz: 6c42795d64e15210922249c5b81a7bd8a4f04f6589f8266aaf73341afbd72bde016aeafa879eddb12dd0ae75b6e2b91bba7c28599495f31030031117d12be9cf
7
- data.tar.gz: 6534173b9aa41bf180358750198c16749279723575045fffb819040509f6ca2e4f264653ad7857af152f64f7bdb16e95cbdedaab474bb62d86f9f920c5e44d83
6
+ metadata.gz: bc2bc26648b224d5195a1649f906a7d66d6772626b20481b17ccff76a224946b614919aa2d84afb1756ed6bae4e80e460715e25e0a1f0af41b5f5c02d217c99d
7
+ data.tar.gz: db5a41e5abd08dbd4184e1fb6284c4c414f9377894ef4cca689f15b10c388427d2d9e465067159abce7cb8e545bc32c3996983feb5f1ffbd340c84d48796c04b
data/CHANGELOG.md CHANGED
@@ -6,6 +6,50 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.13.0] - 2023-09-29
10
+
11
+ ### Added
12
+
13
+ - `BEGIN {}` blocks are only allowed at the top-level, and will now provide a syntax error if they are not.
14
+ - Numbered parameters are not allowed in block parameters, and will now provide a syntax error if they are.
15
+ - Many more Ruby modules and classes are now documented. Also, many have been moved into their own files and autoloaded so that initial boot time of the gem is much faster.
16
+ - `PM_TOKEN_METHOD_NAME` is introduced, used to indicate an identifier that if definitely a method name because it has an `!` or `?` at the end.
17
+ - In the C API, arrays, assocs, and hashes now can have the `PM_NODE_FLAG_STATIC_LITERAL` flag attached if they can be compiled statically. This is used in CRuby, for example, to determine if a `duphash`/`duparray` instruction can be used as opposed to a `newhash`/`newarray`.
18
+ - `Node#type` is introduced, which returns a symbol representing the type of the node. This is useful for case comparisons when you have to compare against multiple types.
19
+
20
+ ### Changed
21
+
22
+ - **BREAKING**: Everything has been renamed to `prism` instead of `yarp`. The `yp_`/`YP_` prefix in the C API has been changed to `pm_`/`PM_`. For the most part, everything should be find/replaceable.
23
+ - **BREAKING**: `BlockArgumentNode` nodes now go into the `block` field on `CallNode` nodes, in addition to the `BlockNode` nodes that used to be there. Hopefully this makes it more consistent to compile/deal with in general, but it does mean it can be a surprising breaking change.
24
+ - Escaped whitespace in `%w` lists is now properly unescaped.
25
+ - `Node#pretty_print` now respects pretty print indentation.
26
+ - `Dispatcher` was previously firing `_leave` events in the incorrect order. This has now been fixed.
27
+ - **BREAKING**: `Visitor` has now been split into `Visitor` and `Compiler`. The visitor visits nodes but doesn't return anything from the visit methods. It is suitable for taking action based on the tree, but not manipulating the tree itself. The `Compiler` visits nodes and returns the computed value up the tree. It is suitable for compiling the tree into another format. As such, `MutationVisitor` has been renamed to `MutationCompiler`.
28
+
29
+ ## [0.12.0] - 2023-09-15
30
+
31
+ ### Added
32
+
33
+ - `RegularExpressionNode#options` and `InterpolatedRegularExpressionNode#options` are now provided. These return integers that match up to the `Regexp#options` API.
34
+ - Greatly improved `Node#inspect` and `Node#pretty_print` APIs.
35
+ - `MatchLastLineNode` and `InterpolatedMatchLastLineNode` are introduced to represent using a regular expression as the predicate of an `if` or `unless` statement.
36
+ - `IntegerNode` now has a base flag on it.
37
+ - Heredocs that were previously `InterpolatedStringNode` and `InterpolatedXStringNode` nodes without any actual interpolation are now `StringNode` and `XStringNode`, respectively.
38
+ - `StringNode` now has a `frozen?` flag on it, which respects the `frozen_string_literal` magic comment.
39
+ - Numbered parameters are now supported, and are properly represented using `LocalVariableReadNode` nodes.
40
+ - `ImplicitNode` is introduced, which wraps implicit calls, local variable reads, or constant reads in omitted hash values.
41
+ - `YARP::Dispatcher` is introduced, which provides a way for multiple objects to listen for certain events on the AST while it is being walked. This is effectively a way to implement a more efficient visitor pattern when you have many different uses for the AST.
42
+
43
+ ### Changed
44
+
45
+ - **BREAKING**: Flags fields are now marked as private, to ensure we can change their implementation under the hood. Actually querying should be through the accessor methods.
46
+ - **BREAKING**: `AliasNode` is now split into `AliasMethodNode` and `AliasGlobalVariableNode`.
47
+ - Method definitions on local variables is now correctly handled.
48
+ - Unary minus precedence has been fixed.
49
+ - Concatenating character literals with string literals is now fixed.
50
+ - Many more invalid syntaxes are now properly rejected.
51
+ - **BREAKING**: Comments now no longer include their trailing newline.
52
+
9
53
  ## [0.11.0] - 2023-09-08
10
54
 
11
55
  ### Added
@@ -117,10 +161,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
117
161
 
118
162
  - 🎉 Initial release! 🎉
119
163
 
120
- [unreleased]: https://github.com/ruby/yarp/compare/v0.11.0...HEAD
121
- [0.11.0]: https://github.com/ruby/yarp/compare/v0.10.0...v0.11.0
122
- [0.10.0]: https://github.com/ruby/yarp/compare/v0.9.0...v0.10.0
123
- [0.9.0]: https://github.com/ruby/yarp/compare/v0.8.0...v0.9.0
124
- [0.8.0]: https://github.com/ruby/yarp/compare/v0.7.0...v0.8.0
125
- [0.7.0]: https://github.com/ruby/yarp/compare/v0.6.0...v0.7.0
126
- [0.6.0]: https://github.com/ruby/yarp/compare/d60531...v0.6.0
164
+ [unreleased]: https://github.com/ruby/prism/compare/v0.13.0...HEAD
165
+ [0.13.0]: https://github.com/ruby/prism/compare/v0.12.0...v0.13.0
166
+ [0.12.0]: https://github.com/ruby/prism/compare/v0.11.0...v0.12.0
167
+ [0.11.0]: https://github.com/ruby/prism/compare/v0.10.0...v0.11.0
168
+ [0.10.0]: https://github.com/ruby/prism/compare/v0.9.0...v0.10.0
169
+ [0.9.0]: https://github.com/ruby/prism/compare/v0.8.0...v0.9.0
170
+ [0.8.0]: https://github.com/ruby/prism/compare/v0.7.0...v0.8.0
171
+ [0.7.0]: https://github.com/ruby/prism/compare/v0.6.0...v0.7.0
172
+ [0.6.0]: https://github.com/ruby/prism/compare/d60531...v0.6.0
data/CONTRIBUTING.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Contributing
2
2
 
3
- Thank you for your interest in contributing to YARP! Below are a couple of ways that you can help out.
3
+ Thank you for your interest in contributing to prism! Below are a couple of ways that you can help out.
4
4
 
5
5
  ## Discussions
6
6
 
@@ -29,7 +29,7 @@ or explicitly running the `compile` task:
29
29
  ``` sh
30
30
  bundle exec rake compile test
31
31
  # or to just compile the C extension ...
32
- bundle exec rake compile:yarp test
32
+ bundle exec rake compile:prism test
33
33
  ```
34
34
 
35
35
  To test the rust bindings (with caveats about setting up your Rust environment properly first):
data/Makefile CHANGED
@@ -35,7 +35,7 @@ build/librubyparser.a: $(STATIC_OBJECTS)
35
35
  build/shared/%.o: src/%.c Makefile $(HEADERS)
36
36
  $(ECHO) "compiling $@"
37
37
  $(Q) mkdir -p $(@D)
38
- $(Q) $(CC) $(DEBUG_FLAGS) -DYP_EXPORT_SYMBOLS $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
38
+ $(Q) $(CC) $(DEBUG_FLAGS) -DPRISM_EXPORT_SYMBOLS $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
39
39
 
40
40
  build/static/%.o: src/%.c Makefile $(HEADERS)
41
41
  $(ECHO) "compiling $@"
@@ -55,20 +55,20 @@ build/fuzz.heisenbug.%: $(SOURCES) fuzz/%.c fuzz/heisenbug.c
55
55
 
56
56
  fuzz-debug:
57
57
  $(ECHO) "entering debug shell"
58
- $(Q) docker run -it --rm -e HISTFILE=/yarp/fuzz/output/.bash_history -v $(shell pwd):/yarp -v $(FUZZ_OUTPUT_DIR):/fuzz_output yarp/fuzz
58
+ $(Q) docker run -it --rm -e HISTFILE=/prism/fuzz/output/.bash_history -v $(shell pwd):/prism -v $(FUZZ_OUTPUT_DIR):/fuzz_output prism/fuzz
59
59
 
60
60
  fuzz-docker-build: fuzz/docker/Dockerfile
61
61
  $(ECHO) "building docker image"
62
- $(Q) docker build -t yarp/fuzz fuzz/docker/
62
+ $(Q) docker build -t prism/fuzz fuzz/docker/
63
63
 
64
64
  fuzz-run-%: FORCE fuzz-docker-build
65
65
  $(ECHO) "generating templates"
66
66
  $(Q) bundle exec rake templates
67
67
  $(ECHO) "running $* fuzzer"
68
- $(Q) docker run --rm -v $(shell pwd):/yarp yarp/fuzz /bin/bash -c "FUZZ_FLAGS=\"$(FUZZ_FLAGS)\" make build/fuzz.$*"
68
+ $(Q) docker run --rm -v $(shell pwd):/prism prism/fuzz /bin/bash -c "FUZZ_FLAGS=\"$(FUZZ_FLAGS)\" make build/fuzz.$*"
69
69
  $(ECHO) "starting AFL++ run"
70
70
  $(Q) mkdir -p $(FUZZ_OUTPUT_DIR)/$*
71
- $(Q) docker run -it --rm -v $(shell pwd):/yarp -v $(FUZZ_OUTPUT_DIR):/fuzz_output yarp/fuzz /bin/bash -c "./fuzz/$*.sh /fuzz_output/$*"
71
+ $(Q) docker run -it --rm -v $(shell pwd):/prism -v $(FUZZ_OUTPUT_DIR):/fuzz_output prism/fuzz /bin/bash -c "./fuzz/$*.sh /fuzz_output/$*"
72
72
  FORCE:
73
73
 
74
74
  fuzz-clean:
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Yet Another Ruby Parser
1
+ # Prism Ruby parser
2
2
 
3
3
  This is a parser for the Ruby programming language. It is designed to be portable, error tolerant, and maintainable. It is written in C99 and has no dependencies. It is currently being integrated into [CRuby](https://github.com/ruby/ruby), [JRuby](https://github.com/jruby/jruby), [TruffleRuby](https://github.com/oracle/truffleruby), [Sorbet](https://github.com/sorbet/sorbet), and [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
4
4
 
@@ -16,29 +16,29 @@ The repository contains the infrastructure for both a shared library (librubypar
16
16
  ├── config.yml specification for tokens and nodes in the tree
17
17
  ├── docs documentation about the project
18
18
  ├── ext
19
- │   └── yarp
19
+ │   └── prism
20
20
  │   ├── extconf.rb configuration to generate the Makefile for the native extension
21
21
  │   └── extension.c the native extension that interacts with librubyparser
22
22
  ├── fuzz files related to fuzz testing
23
23
  ├── include
24
- │   ├── yarp header files for the shared library
25
- │   └── yarp.h main header file for the shared library
24
+ │   ├── prism header files for the shared library
25
+ │   └── prism.h main header file for the shared library
26
26
  ├── java Java bindings for the shared library
27
27
  ├── lib
28
- │   ├── yarp Ruby library files
29
- │   └── yarp.rb main entrypoint for the Ruby library
28
+ │   ├── prism Ruby library files
29
+ │   └── prism.rb main entrypoint for the Ruby library
30
30
  ├── rakelib various Rake tasks for the project
31
31
  ├── rust
32
- │   ├── yarp Rustified crate for the shared library
33
- │   └── yarp-sys FFI binding for Rust
32
+ │   ├── prism Rustified crate for the shared library
33
+ │   └── prism-sys FFI binding for Rust
34
34
  ├── src
35
35
  │   ├── enc various encoding files
36
36
  │   ├── util various utility files
37
- │   └── yarp.c main entrypoint for the shared library
37
+ │   └── prism.c main entrypoint for the shared library
38
38
  ├── templates contains ERB templates generated by templates/template.rb
39
39
  │   └── template.rb generates code from the nodes and tokens configured by config.yml
40
40
  └── test
41
- └── yarp
41
+ └── prism
42
42
  ├── fixtures Ruby code used for testing
43
43
  └── snapshots snapshots of generated syntax trees corresponding to fixtures
44
44
  ```
@@ -48,7 +48,7 @@ The repository contains the infrastructure for both a shared library (librubypar
48
48
  To compile the shared library, you will need:
49
49
 
50
50
  * A C99 compiler
51
- * autotools autoconf, automake, libtool)
51
+ * autotools (autoconf, automake, libtool)
52
52
  * make
53
53
  * Ruby 3.3.0-preview1 or later
54
54
 
@@ -87,4 +87,3 @@ See the [CONTRIBUTING.md](CONTRIBUTING.md) file for more information. We additio
87
87
  * [Ruby API](docs/ruby_api.md)
88
88
  * [Serialization](docs/serialization.md)
89
89
  * [Testing](docs/testing.md)
90
-
data/config.yml CHANGED
@@ -232,6 +232,8 @@ tokens:
232
232
  comment: "<<"
233
233
  - name: LESS_LESS_EQUAL
234
234
  comment: "<<="
235
+ - name: METHOD_NAME
236
+ comment: "a method name"
235
237
  - name: MINUS
236
238
  comment: "-"
237
239
  - name: MINUS_EQUAL
@@ -333,6 +335,16 @@ flags:
333
335
  comment: "&. operator"
334
336
  - name: VARIABLE_CALL
335
337
  comment: "a call that could have been a local variable"
338
+ - name: IntegerBaseFlags
339
+ values:
340
+ - name: BINARY
341
+ comment: "0b prefix"
342
+ - name: OCTAL
343
+ comment: "0o or 0 prefix"
344
+ - name: DECIMAL
345
+ comment: "0d or no prefix"
346
+ - name: HEXADECIMAL
347
+ comment: "0x prefix"
336
348
  - name: LoopFlags
337
349
  values:
338
350
  - name: BEGIN_MODIFIER
@@ -345,10 +357,10 @@ flags:
345
357
  values:
346
358
  - name: IGNORE_CASE
347
359
  comment: "i - ignores the case of characters when matching"
348
- - name: MULTI_LINE
349
- comment: "m - allows $ to match the end of lines within strings"
350
360
  - name: EXTENDED
351
361
  comment: "x - ignores whitespace and allows comments in regular expressions"
362
+ - name: MULTI_LINE
363
+ comment: "m - allows $ to match the end of lines within strings"
352
364
  - name: EUC_JP
353
365
  comment: "e - forces the EUC-JP encoding"
354
366
  - name: ASCII_8BIT
@@ -359,8 +371,12 @@ flags:
359
371
  comment: "u - forces the UTF-8 encoding"
360
372
  - name: ONCE
361
373
  comment: "o - only interpolates values into the regular expression once"
374
+ - name: StringFlags
375
+ values:
376
+ - name: FROZEN
377
+ comment: "frozen by virtue of a frozen_string_literal comment"
362
378
  nodes:
363
- - name: AliasNode
379
+ - name: AliasGlobalVariableNode
364
380
  fields:
365
381
  - name: new_name
366
382
  type: node
@@ -369,7 +385,20 @@ nodes:
369
385
  - name: keyword_loc
370
386
  type: location
371
387
  comment: |
372
- Represents the use of the `alias` keyword.
388
+ Represents the use of the `alias` keyword to alias a global variable.
389
+
390
+ alias $foo $bar
391
+ ^^^^^^^^^^^^^^^
392
+ - name: AliasMethodNode
393
+ fields:
394
+ - name: new_name
395
+ type: node
396
+ - name: old_name
397
+ type: node
398
+ - name: keyword_loc
399
+ type: location
400
+ comment: |
401
+ Represents the use of the `alias` keyword to alias a method.
373
402
 
374
403
  alias foo bar
375
404
  ^^^^^^^^^^^^^
@@ -641,7 +670,6 @@ nodes:
641
670
  type: location?
642
671
  - name: block
643
672
  type: node?
644
- kind: BlockNode
645
673
  - name: flags
646
674
  type: flags
647
675
  kind: CallNodeFlags
@@ -1386,6 +1414,19 @@ nodes:
1386
1414
 
1387
1415
  1.0i
1388
1416
  ^^^^
1417
+ - name: ImplicitNode
1418
+ fields:
1419
+ - name: value
1420
+ type: node
1421
+ comment: |
1422
+ Represents a node that is implicitly being added to the tree but doesn't
1423
+ correspond directly to a node in the source.
1424
+
1425
+ { foo: }
1426
+ ^^^^
1427
+
1428
+ { Foo: }
1429
+ ^^^^
1389
1430
  - name: InNode
1390
1431
  fields:
1391
1432
  - name: pattern
@@ -1483,11 +1524,34 @@ nodes:
1483
1524
  @foo = 1
1484
1525
  ^^^^^^^^
1485
1526
  - name: IntegerNode
1527
+ fields:
1528
+ - name: flags
1529
+ type: flags
1530
+ kind: IntegerBaseFlags
1486
1531
  comment: |
1487
1532
  Represents an integer number literal.
1488
1533
 
1489
1534
  1
1490
1535
  ^
1536
+ - name: InterpolatedMatchLastLineNode
1537
+ fields:
1538
+ - name: opening_loc
1539
+ type: location
1540
+ - name: parts
1541
+ type: node[]
1542
+ - name: closing_loc
1543
+ type: location
1544
+ - name: flags
1545
+ type: flags
1546
+ kind: RegularExpressionFlags
1547
+ newline: parts
1548
+ comment: |
1549
+ Represents a regular expression literal that contains interpolation that
1550
+ is being used in the predicate of a conditional to implicitly match
1551
+ against the last line read by an IO object.
1552
+
1553
+ if /foo #{bar} baz/ then end
1554
+ ^^^^^^^^^^^^^^^^
1491
1555
  - name: InterpolatedRegularExpressionNode
1492
1556
  fields:
1493
1557
  - name: opening_loc
@@ -1702,6 +1766,27 @@ nodes:
1702
1766
 
1703
1767
  foo = 1
1704
1768
  ^^^^^^^
1769
+ - name: MatchLastLineNode
1770
+ fields:
1771
+ - name: opening_loc
1772
+ type: location
1773
+ - name: content_loc
1774
+ type: location
1775
+ semantic_field: true # https://github.com/ruby/prism/issues/1452
1776
+ - name: closing_loc
1777
+ type: location
1778
+ - name: unescaped
1779
+ type: string
1780
+ - name: flags
1781
+ type: flags
1782
+ kind: RegularExpressionFlags
1783
+ comment: |
1784
+ Represents a regular expression literal used in the predicate of a
1785
+ conditional to implicitly match against the last line read by an IO
1786
+ object.
1787
+
1788
+ if /foo/i then end
1789
+ ^^^^^^
1705
1790
  - name: MatchPredicateNode
1706
1791
  fields:
1707
1792
  - name: value
@@ -1728,6 +1813,19 @@ nodes:
1728
1813
 
1729
1814
  foo => bar
1730
1815
  ^^^^^^^^^^
1816
+ - name: MatchWriteNode
1817
+ fields:
1818
+ - name: call
1819
+ type: node
1820
+ kind: CallNode
1821
+ - name: locals
1822
+ type: constant[]
1823
+ comment: |
1824
+ Represents writing local variables using a regular expression match with
1825
+ named capture groups.
1826
+
1827
+ /(?<foo>bar)/ =~ baz
1828
+ ^^^^^^^^^^^^^^^^^^^^
1731
1829
  - name: MissingNode
1732
1830
  comment: |
1733
1831
  Represents a node that is missing from the source and results in a syntax
@@ -1855,11 +1953,11 @@ nodes:
1855
1953
  type: node[]
1856
1954
  - name: optionals
1857
1955
  type: node[]
1858
- - name: posts
1859
- type: node[]
1860
1956
  - name: rest
1861
1957
  type: node?
1862
1958
  kind: RestParameterNode
1959
+ - name: posts
1960
+ type: node[]
1863
1961
  - name: keywords
1864
1962
  type: node[]
1865
1963
  - name: keyword_rest
@@ -1995,6 +2093,7 @@ nodes:
1995
2093
  type: location
1996
2094
  - name: content_loc
1997
2095
  type: location
2096
+ semantic_field: true # https://github.com/ruby/prism/issues/1452
1998
2097
  - name: closing_loc
1999
2098
  type: location
2000
2099
  - name: unescaped
@@ -2183,10 +2282,15 @@ nodes:
2183
2282
  ^^^^^^^^^^^
2184
2283
  - name: StringNode
2185
2284
  fields:
2285
+ - name: flags
2286
+ type: flags
2287
+ kind: StringFlags
2186
2288
  - name: opening_loc
2187
2289
  type: location?
2290
+ semantic_field: true # https://github.com/ruby/prism/issues/1452
2188
2291
  - name: content_loc
2189
2292
  type: location
2293
+ semantic_field: true # https://github.com/ruby/prism/issues/1452
2190
2294
  - name: closing_loc
2191
2295
  type: location?
2192
2296
  - name: unescaped
@@ -2216,7 +2320,6 @@ nodes:
2216
2320
  type: location?
2217
2321
  - name: block
2218
2322
  type: node?
2219
- kind: BlockNode
2220
2323
  comment: |
2221
2324
  Represents the use of the `super` keyword with parentheses or arguments.
2222
2325
 
data/docs/build_system.md CHANGED
@@ -1,17 +1,17 @@
1
1
  # Build System
2
2
 
3
- There are many ways to build YARP, which means the build system is a bit more complicated than usual.
3
+ There are many ways to build prism, which means the build system is a bit more complicated than usual.
4
4
 
5
5
  ## Requirements
6
6
 
7
- * It must work to build YARP for all 6 uses-cases below.
8
- * It must be possible to build YARP without needing ruby/rake/etc.
9
- Because once YARP is the single parser in TruffleRuby, JRuby or CRuby there won't be another Ruby parser around to parse such Ruby code.
7
+ * It must work to build prism for all 6 uses-cases below.
8
+ * It must be possible to build prism without needing ruby/rake/etc.
9
+ Because once prism is the single parser in TruffleRuby, JRuby or CRuby there won't be another Ruby parser around to parse such Ruby code.
10
10
  Most/every Ruby implementations want to avoid depending on another Ruby during the build process as that is very brittle.
11
- * It is desirable to compile YARP with the same or very similar compiler flags for all use-cases (e.g. optimization level, warning flags, etc).
12
- Otherwise, there is the risk YARP does not work correctly with those different compiler flags.
11
+ * It is desirable to compile prism with the same or very similar compiler flags for all use-cases (e.g. optimization level, warning flags, etc).
12
+ Otherwise, there is the risk prism does not work correctly with those different compiler flags.
13
13
 
14
- The main solution for the second point seems a Makefile, otherwise many of the usages would have to duplicate the logic to build YARP.
14
+ The main solution for the second point seems a Makefile, otherwise many of the usages would have to duplicate the logic to build prism.
15
15
 
16
16
  ## General Design
17
17
 
@@ -24,15 +24,15 @@ This way there is minimal duplication, and each layer builds on the previous one
24
24
 
25
25
  The static library exports no symbols, to avoid any conflict.
26
26
  The shared library exports some symbols, and this is fine since there should only be one librubyparser shared library
27
- loaded per process (i.e., at most one version of the yarp *gem* loaded in a process, only the gem uses the shared library).
27
+ loaded per process (i.e., at most one version of the prism *gem* loaded in a process, only the gem uses the shared library).
28
28
 
29
- ## The various ways to build YARP
29
+ ## The various ways to build prism
30
30
 
31
- ### Building from ruby/yarp repository with `bundle exec rake`
31
+ ### Building from ruby/prism repository with `bundle exec rake`
32
32
 
33
33
  `rake` calls `make` and then uses `Rake::ExtensionTask` to compile the C extension (see above).
34
34
 
35
- ### Building the yarp gem by `gem install/bundle install`
35
+ ### Building the prism gem by `gem install/bundle install`
36
36
 
37
37
  The gem contains the pre-generated templates.
38
38
  When installing the gem, `extconf.rb` is used and that:
@@ -44,31 +44,31 @@ there is Ruby code using FFI which uses `librubyparser.{so,dylib,dll}`
44
44
  to implement the same methods as the C extension, but using serialization instead of many native calls/accesses
45
45
  (JRuby does not support C extensions, serialization is faster on TruffleRuby than the C extension).
46
46
 
47
- ### Building the yarp gem from git, e.g. `gem 'yarp', github: 'ruby/yarp'`
47
+ ### Building the prism gem from git, e.g. `gem "prism", github: "ruby/prism"`
48
48
 
49
49
  The same as above, except the `extconf.rb` additionally runs first:
50
50
  * `templates/template.rb` to generate the templates
51
51
 
52
52
  Because of course those files are not part of the git repository.
53
53
 
54
- ### Building YARP as part of CRuby
54
+ ### Building prism as part of CRuby
55
55
 
56
- [This script](https://github.com/ruby/ruby/blob/32e828bb4a6c65a392b2300f3bdf93008c7b6f25/tool/sync_default_gems.rb#L399-L426) imports YARP sources in CRuby.
56
+ [This script](https://github.com/ruby/ruby/blob/32e828bb4a6c65a392b2300f3bdf93008c7b6f25/tool/sync_default_gems.rb#L399-L426) imports prism sources in CRuby.
57
57
 
58
58
  The script generates the templates when importing.
59
59
 
60
- YARP's `Makefile` is not used at all in CRuby. Instead, CRuby's `Makefile` is used.
60
+ prism's `Makefile` is not used at all in CRuby. Instead, CRuby's `Makefile` is used.
61
61
 
62
- ### Building YARP as part of TruffleRuby
62
+ ### Building prism as part of TruffleRuby
63
63
 
64
- [This script](https://github.com/oracle/truffleruby/blob/master/tool/import-yarp.sh) imports YARP sources in TruffleRuby.
64
+ [This script](https://github.com/oracle/truffleruby/blob/master/tool/import-prism.sh) imports prism sources in TruffleRuby.
65
65
  The script generates the templates when importing.
66
66
 
67
- Then when `mx build` builds TruffleRuby and the `yarp` mx project inside, it runs `make`.
67
+ Then when `mx build` builds TruffleRuby and the `prism` mx project inside, it runs `make`.
68
68
 
69
- Then the `yarp bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/master/src/main/c/yarp_bindings/src/yarp_bindings.c)
70
- and links to `librubyparser.a` (to avoid exporting symbols, so no conflict when installing the yarp gem).
69
+ Then the `prism bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/master/src/main/c/prism_bindings/src/prism_bindings.c)
70
+ and links to `librubyparser.a` (to avoid exporting symbols, so no conflict when installing the prism gem).
71
71
 
72
- ### Building YARP as part of JRuby
72
+ ### Building prism as part of JRuby
73
73
 
74
74
  TODO, probably similar to TruffleRuby.
data/docs/building.md CHANGED
@@ -1,13 +1,13 @@
1
1
  # Building
2
2
 
3
- The following describes how to build YARP from source.
3
+ The following describes how to build prism from source.
4
4
  This comes directly from the [Makefile](../Makefile).
5
5
 
6
6
  ## Common
7
7
 
8
8
  All of the source files match `src/**/*.c` and all of the headers match `include/**/*.h`.
9
9
 
10
- The following flags should be used to compile YARP:
10
+ The following flags should be used to compile prism:
11
11
 
12
12
  * `-std=c99` - Use the C99 standard
13
13
  * `-Wall -Wconversion -Wextra -Wpedantic -Wundef` - Enable the warnings we care about
@@ -16,7 +16,7 @@ The following flags should be used to compile YARP:
16
16
 
17
17
  ## Shared
18
18
 
19
- If you want to build YARP as a shared library and link against it, you should compile with:
19
+ If you want to build prism as a shared library and link against it, you should compile with:
20
20
 
21
21
  * `-fPIC -shared` - Compile as a shared library
22
- * `-DYP_EXPORT_SYMBOLS` - Export the symbols (by default nothing is exported)
22
+ * `-DPRISM_EXPORT_SYMBOLS` - Export the symbols (by default nothing is exported)
@@ -1,15 +1,19 @@
1
1
  # Configuration
2
2
 
3
- A lot of code in YARP's repository is templated from a single configuration file, [config.yml](../config.yml). This file is used to generate the following files:
4
-
5
- * `ext/yarp/api_node.c` - for defining how to build Ruby objects for the nodes out of C structs
6
- * `include/yarp/ast.h` - for defining the C structs that represent the nodes
7
- * `java/org/yarp/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
8
- * `java/org/yarp/Loader.java` - for defining how to deserialize the nodes in Java
9
- * `java/org/yarp/Nodes.java` - for defining the nodes in Java
10
- * `lib/yarp/mutation_visitor.rb` - for defining the mutation visitor for the nodes in Ruby
11
- * `lib/yarp/node.rb` - for defining the nodes in Ruby
12
- * `lib/yarp/serialize.rb` - for defining how to deserialize the nodes in Ruby
3
+ A lot of code in prism's repository is templated from a single configuration file, [config.yml](../config.yml). This file is used to generate the following files:
4
+
5
+ * `ext/prism/api_node.c` - for defining how to build Ruby objects for the nodes out of C structs
6
+ * `include/prism/ast.h` - for defining the C structs that represent the nodes
7
+ * `java/org/prism/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
8
+ * `java/org/prism/Loader.java` - for defining how to deserialize the nodes in Java
9
+ * `java/org/prism/Nodes.java` - for defining the nodes in Java
10
+ * `lib/prism/compiler.rb` - for defining the compiler for the nodes in Ruby
11
+ * `lib/prism/dispatcher.rb` - for defining the dispatch visitors for the nodes in Ruby
12
+ * `lib/prism/dsl.rb` - for defining the DSL for the nodes in Ruby
13
+ * `lib/prism/mutation_compiler.rb` - for defining the mutation compiler for the nodes in Ruby
14
+ * `lib/prism/node.rb` - for defining the nodes in Ruby
15
+ * `lib/prism/serialize.rb` - for defining how to deserialize the nodes in Ruby
16
+ * `lib/prism/visitor.rb` - for defining the visitor interface for the nodes in Ruby
13
17
  * `src/node.c` - for defining how to free the nodes in C and calculate the size in memory in C
14
18
  * `src/prettyprint.c` - for defining how to prettyprint the nodes in C
15
19
  * `src/serialize.c` - for defining how to serialize the nodes in C
@@ -25,7 +29,7 @@ This is a list of tokens to be used by the lexer. It is shared here so that it c
25
29
 
26
30
  Each token is expected to have a `name` key and a `comment` key (both as strings). Optionally they can have a `value` key (an integer) which is used to represent the value in the enum.
27
31
 
28
- In C these tokens will be templated out with the prefix `YP_TOKEN_`. For example, if you have a `name` key with the value `PERCENT`, you can access this in C through `YP_TOKEN_PERCENT`.
32
+ In C these tokens will be templated out with the prefix `PM_TOKEN_`. For example, if you have a `name` key with the value `PERCENT`, you can access this in C through `PM_TOKEN_PERCENT`.
29
33
 
30
34
  ## `flags`
31
35
 
@@ -33,7 +37,7 @@ Sometimes we need to communicate more information in the tree than can be repres
33
37
 
34
38
  Each flag is expected to have a `name` key (a string) and a `values` key (an array). Each value in the `values` key should be an object that contains both a `name` key (a string) that represents the name of the flag and a `comment` key (a string) that represents the comment for the flag.
35
39
 
36
- In C these flags will get templated out with a `YP_` prefix, then a snake-case version of the flag name, then the flag itself. For example, if you have a flag with the name `RegularExpressionFlags` and a value with the name `IGNORE_CASE`, you can access this in C through `YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE`.
40
+ In C these flags will get templated out with a `PM_` prefix, then a snake-case version of the flag name, then the flag itself. For example, if you have a flag with the name `RegularExpressionFlags` and a value with the name `IGNORE_CASE`, you can access this in C through `PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE`.
37
41
 
38
42
  ## `nodes`
39
43
 
@@ -43,14 +47,14 @@ Optionally, every node can define a `child_nodes` key that is an array. This arr
43
47
 
44
48
  The available values for `type` are:
45
49
 
46
- * `node` - A child node that is a node itself. This is a `yp_node_t *` in C.
47
- * `node?` - A child node that is optionally present. This is also a `yp_node_t *` in C, but can be `NULL`.
48
- * `node[]` - A child node that is an array of nodes. This is a `yp_node_list_t` in C.
49
- * `string` - A child node that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is a `yp_string_t` in C.
50
- * `constant` - A variable-length integer that represents an index in the constant pool. This is a `yp_constant_id_t` in C.
51
- * `constant[]` - A child node that is an array of constants. This is a `yp_constant_id_list_t` in C.
52
- * `location` - A child node that is a location. This is a `yp_location_t` in C.
53
- * `location?` - A child node that is a location that is optionally present. This is a `yp_location_t` in C, but if the value is not present then the `start` and `end` fields will be `NULL`.
50
+ * `node` - A child node that is a node itself. This is a `pm_node_t *` in C.
51
+ * `node?` - A child node that is optionally present. This is also a `pm_node_t *` in C, but can be `NULL`.
52
+ * `node[]` - A child node that is an array of nodes. This is a `pm_node_list_t` in C.
53
+ * `string` - A child node that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is a `pm_string_t` in C.
54
+ * `constant` - A variable-length integer that represents an index in the constant pool. This is a `pm_constant_id_t` in C.
55
+ * `constant[]` - A child node that is an array of constants. This is a `pm_constant_id_list_t` in C.
56
+ * `location` - A child node that is a location. This is a `pm_location_t` in C.
57
+ * `location?` - A child node that is a location that is optionally present. This is a `pm_location_t` in C, but if the value is not present then the `start` and `end` fields will be `NULL`.
54
58
  * `uint32` - A child node that is a 32-bit unsigned integer. This is a `uint32_t` in C.
55
59
 
56
- If the type is `node` or `node?` then the value also accepts an optional `kind` key (a string). This key is expected to match to the name of another node type within `config.yml`. This changes a couple of places where code is templated out to use the more specific struct name instead of the generic `yp_node_t`. For example, with `kind: StatementsNode` the `yp_node_t *` in C becomes a `yp_statements_node_t *`.
60
+ If the type is `node` or `node?` then the value also accepts an optional `kind` key (a string). This key is expected to match to the name of another node type within `config.yml`. This changes a couple of places where code is templated out to use the more specific struct name instead of the generic `pm_node_t`. For example, with `kind: StatementsNode` the `pm_node_t *` in C becomes a `pm_statements_node_t *`.
data/docs/design.md CHANGED
@@ -12,7 +12,7 @@ The design of the parser is based around these main goals.
12
12
 
13
13
  The first piece to understand about the parser is the design of its syntax tree. This is documented in `config.yml`. Every token and node is defined in that file, along with comments about where they are found in what kinds of syntax. This file is used to template out a lot of different files, all found in the `templates` directory. The `templates/template.rb` script performs the templating and outputs all files matching the directory structure found in the templates directory.
14
14
 
15
- The templated files contain all of the code required to allocate and initialize nodes, pretty print nodes, and serialize nodes. This means for the most part, you will only need to then hook up the parser to call the templated functions to create the nodes in the correct position. That means editing the parser itself, which is housed in `yarp.c`.
15
+ The templated files contain all of the code required to allocate and initialize nodes, pretty print nodes, and serialize nodes. This means for the most part, you will only need to then hook up the parser to call the templated functions to create the nodes in the correct position. That means editing the parser itself, which is housed in `prism.c`.
16
16
 
17
17
  ## Pratt parsing
18
18
 
@@ -24,7 +24,7 @@ In order to provide the best possible error tolerance, the parser is hand-writte
24
24
  * https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
25
25
  * https://chidiwilliams.com/post/on-recursive-descent-and-pratt-parsing/
26
26
 
27
- You can find most of the functions that correspond to constructs in the Pratt parsing algorithm in `yarp.c`. As a couple of examples:
27
+ You can find most of the functions that correspond to constructs in the Pratt parsing algorithm in `prism.c`. As a couple of examples:
28
28
 
29
29
  * `parse` corresponds to the `parse_expression` function
30
30
  * `nud` (null denotation) corresponds to the `parse_expression_prefix` function