yarp 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -8
  3. data/CONTRIBUTING.md +2 -2
  4. data/Makefile +5 -5
  5. data/README.md +11 -12
  6. data/config.yml +6 -2
  7. data/docs/build_system.md +21 -21
  8. data/docs/building.md +4 -4
  9. data/docs/configuration.md +25 -21
  10. data/docs/design.md +2 -2
  11. data/docs/encoding.md +17 -17
  12. data/docs/fuzzing.md +4 -4
  13. data/docs/heredocs.md +3 -3
  14. data/docs/mapping.md +94 -94
  15. data/docs/ripper.md +4 -4
  16. data/docs/ruby_api.md +11 -11
  17. data/docs/serialization.md +17 -16
  18. data/docs/testing.md +6 -6
  19. data/ext/prism/api_node.c +4725 -0
  20. data/ext/{yarp → prism}/api_pack.c +82 -82
  21. data/ext/{yarp → prism}/extconf.rb +13 -13
  22. data/ext/{yarp → prism}/extension.c +175 -168
  23. data/ext/prism/extension.h +18 -0
  24. data/include/prism/ast.h +1932 -0
  25. data/include/prism/defines.h +45 -0
  26. data/include/prism/diagnostic.h +231 -0
  27. data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
  28. data/include/prism/node.h +41 -0
  29. data/include/prism/pack.h +141 -0
  30. data/include/{yarp → prism}/parser.h +143 -142
  31. data/include/prism/regexp.h +19 -0
  32. data/include/prism/unescape.h +48 -0
  33. data/include/prism/util/pm_buffer.h +51 -0
  34. data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
  35. data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
  36. data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
  37. data/include/prism/util/pm_memchr.h +14 -0
  38. data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
  39. data/include/prism/util/pm_state_stack.h +24 -0
  40. data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
  41. data/include/prism/util/pm_string_list.h +25 -0
  42. data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
  43. data/include/prism/version.h +4 -0
  44. data/include/prism.h +82 -0
  45. data/lib/prism/compiler.rb +465 -0
  46. data/lib/prism/debug.rb +157 -0
  47. data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
  48. data/lib/prism/dispatcher.rb +2051 -0
  49. data/lib/prism/dsl.rb +750 -0
  50. data/lib/{yarp → prism}/ffi.rb +66 -67
  51. data/lib/{yarp → prism}/lex_compat.rb +40 -43
  52. data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
  53. data/lib/{yarp → prism}/node.rb +2012 -2593
  54. data/lib/prism/node_ext.rb +55 -0
  55. data/lib/prism/node_inspector.rb +68 -0
  56. data/lib/{yarp → prism}/pack.rb +1 -1
  57. data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
  58. data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
  59. data/lib/prism/parse_result.rb +266 -0
  60. data/lib/{yarp → prism}/pattern.rb +14 -14
  61. data/lib/{yarp → prism}/ripper_compat.rb +5 -5
  62. data/lib/{yarp → prism}/serialize.rb +12 -7
  63. data/lib/prism/visitor.rb +470 -0
  64. data/lib/prism.rb +64 -0
  65. data/lib/yarp.rb +2 -614
  66. data/src/diagnostic.c +213 -208
  67. data/src/enc/pm_big5.c +52 -0
  68. data/src/enc/pm_euc_jp.c +58 -0
  69. data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
  70. data/src/enc/pm_shift_jis.c +56 -0
  71. data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
  72. data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
  73. data/src/enc/pm_windows_31j.c +56 -0
  74. data/src/node.c +1293 -1233
  75. data/src/pack.c +247 -247
  76. data/src/prettyprint.c +1479 -1479
  77. data/src/{yarp.c → prism.c} +5205 -5083
  78. data/src/regexp.c +132 -132
  79. data/src/serialize.c +1121 -1121
  80. data/src/token_type.c +169 -167
  81. data/src/unescape.c +106 -87
  82. data/src/util/pm_buffer.c +103 -0
  83. data/src/util/{yp_char.c → pm_char.c} +72 -72
  84. data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
  85. data/src/util/{yp_list.c → pm_list.c} +10 -10
  86. data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
  87. data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
  88. data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
  89. data/src/util/{yp_string.c → pm_string.c} +38 -38
  90. data/src/util/pm_string_list.c +29 -0
  91. data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
  92. data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
  93. data/yarp.gemspec +68 -59
  94. metadata +70 -61
  95. data/ext/yarp/api_node.c +0 -4728
  96. data/ext/yarp/extension.h +0 -18
  97. data/include/yarp/ast.h +0 -1929
  98. data/include/yarp/defines.h +0 -45
  99. data/include/yarp/diagnostic.h +0 -226
  100. data/include/yarp/node.h +0 -42
  101. data/include/yarp/pack.h +0 -141
  102. data/include/yarp/regexp.h +0 -19
  103. data/include/yarp/unescape.h +0 -44
  104. data/include/yarp/util/yp_buffer.h +0 -51
  105. data/include/yarp/util/yp_memchr.h +0 -14
  106. data/include/yarp/util/yp_state_stack.h +0 -24
  107. data/include/yarp/util/yp_string_list.h +0 -25
  108. data/include/yarp/version.h +0 -4
  109. data/include/yarp.h +0 -82
  110. data/src/enc/yp_big5.c +0 -52
  111. data/src/enc/yp_euc_jp.c +0 -58
  112. data/src/enc/yp_shift_jis.c +0 -56
  113. data/src/enc/yp_windows_31j.c +0 -56
  114. data/src/util/yp_buffer.c +0 -101
  115. data/src/util/yp_string_list.c +0 -29
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9afbc3b5f4c070c404f0421f1814d5a1200cb849ef9bf38b2ae23f50ba738fdf
4
- data.tar.gz: bed121fb1ac414cf5918a2bcf7df2a9b1318df2f46dff9863cc5b4427ce79409
3
+ metadata.gz: da8f3b5f2cdae92e669cfe7865566c2014b57746644fd550e23dfe27a7f4a5cd
4
+ data.tar.gz: 994f5db733b1261c76920a6426820dc941a72ba2619f71de0b7ac731da68d57d
5
5
  SHA512:
6
- metadata.gz: 3fd5831ab86ca1ca299e86ca5f2ca184164fa56de353cfa0d9b51d2ece522c2f5bebfadd1896222e4b7e1e984604be414ee8900b1a5bbc0b8eb3b6ee7bb738dc
7
- data.tar.gz: 6fa02c77777391a1c4b5dc47445d905fee835213c7c02687cac432a4b566bbae8ead26467609066e83bf030ce19e09f46cf553fc0acdd8f7ef977074568b7061
6
+ metadata.gz: bc2bc26648b224d5195a1649f906a7d66d6772626b20481b17ccff76a224946b614919aa2d84afb1756ed6bae4e80e460715e25e0a1f0af41b5f5c02d217c99d
7
+ data.tar.gz: db5a41e5abd08dbd4184e1fb6284c4c414f9377894ef4cca689f15b10c388427d2d9e465067159abce7cb8e545bc32c3996983feb5f1ffbd340c84d48796c04b
data/CHANGELOG.md CHANGED
@@ -6,6 +6,26 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.13.0] - 2023-09-29
10
+
11
+ ### Added
12
+
13
+ - `BEGIN {}` blocks are only allowed at the top-level, and will now provide a syntax error if they are not.
14
+ - Numbered parameters are not allowed in block parameters, and will now provide a syntax error if they are.
15
+ - Many more Ruby modules and classes are now documented. Also, many have been moved into their own files and autoloaded so that initial boot time of the gem is much faster.
16
+ - `PM_TOKEN_METHOD_NAME` is introduced, used to indicate an identifier that if definitely a method name because it has an `!` or `?` at the end.
17
+ - In the C API, arrays, assocs, and hashes now can have the `PM_NODE_FLAG_STATIC_LITERAL` flag attached if they can be compiled statically. This is used in CRuby, for example, to determine if a `duphash`/`duparray` instruction can be used as opposed to a `newhash`/`newarray`.
18
+ - `Node#type` is introduced, which returns a symbol representing the type of the node. This is useful for case comparisons when you have to compare against multiple types.
19
+
20
+ ### Changed
21
+
22
+ - **BREAKING**: Everything has been renamed to `prism` instead of `yarp`. The `yp_`/`YP_` prefix in the C API has been changed to `pm_`/`PM_`. For the most part, everything should be find/replaceable.
23
+ - **BREAKING**: `BlockArgumentNode` nodes now go into the `block` field on `CallNode` nodes, in addition to the `BlockNode` nodes that used to be there. Hopefully this makes it more consistent to compile/deal with in general, but it does mean it can be a surprising breaking change.
24
+ - Escaped whitespace in `%w` lists is now properly unescaped.
25
+ - `Node#pretty_print` now respects pretty print indentation.
26
+ - `Dispatcher` was previously firing `_leave` events in the incorrect order. This has now been fixed.
27
+ - **BREAKING**: `Visitor` has now been split into `Visitor` and `Compiler`. The visitor visits nodes but doesn't return anything from the visit methods. It is suitable for taking action based on the tree, but not manipulating the tree itself. The `Compiler` visits nodes and returns the computed value up the tree. It is suitable for compiling the tree into another format. As such, `MutationVisitor` has been renamed to `MutationCompiler`.
28
+
9
29
  ## [0.12.0] - 2023-09-15
10
30
 
11
31
  ### Added
@@ -141,11 +161,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
141
161
 
142
162
  - 🎉 Initial release! 🎉
143
163
 
144
- [unreleased]: https://github.com/ruby/yarp/compare/v0.12.0...HEAD
145
- [0.12.0]: https://github.com/ruby/yarp/compare/v0.11.0...v0.12.0
146
- [0.11.0]: https://github.com/ruby/yarp/compare/v0.10.0...v0.11.0
147
- [0.10.0]: https://github.com/ruby/yarp/compare/v0.9.0...v0.10.0
148
- [0.9.0]: https://github.com/ruby/yarp/compare/v0.8.0...v0.9.0
149
- [0.8.0]: https://github.com/ruby/yarp/compare/v0.7.0...v0.8.0
150
- [0.7.0]: https://github.com/ruby/yarp/compare/v0.6.0...v0.7.0
151
- [0.6.0]: https://github.com/ruby/yarp/compare/d60531...v0.6.0
164
+ [unreleased]: https://github.com/ruby/prism/compare/v0.13.0...HEAD
165
+ [0.13.0]: https://github.com/ruby/prism/compare/v0.12.0...v0.13.0
166
+ [0.12.0]: https://github.com/ruby/prism/compare/v0.11.0...v0.12.0
167
+ [0.11.0]: https://github.com/ruby/prism/compare/v0.10.0...v0.11.0
168
+ [0.10.0]: https://github.com/ruby/prism/compare/v0.9.0...v0.10.0
169
+ [0.9.0]: https://github.com/ruby/prism/compare/v0.8.0...v0.9.0
170
+ [0.8.0]: https://github.com/ruby/prism/compare/v0.7.0...v0.8.0
171
+ [0.7.0]: https://github.com/ruby/prism/compare/v0.6.0...v0.7.0
172
+ [0.6.0]: https://github.com/ruby/prism/compare/d60531...v0.6.0
data/CONTRIBUTING.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Contributing
2
2
 
3
- Thank you for your interest in contributing to YARP! Below are a couple of ways that you can help out.
3
+ Thank you for your interest in contributing to prism! Below are a couple of ways that you can help out.
4
4
 
5
5
  ## Discussions
6
6
 
@@ -29,7 +29,7 @@ or explicitly running the `compile` task:
29
29
  ``` sh
30
30
  bundle exec rake compile test
31
31
  # or to just compile the C extension ...
32
- bundle exec rake compile:yarp test
32
+ bundle exec rake compile:prism test
33
33
  ```
34
34
 
35
35
  To test the rust bindings (with caveats about setting up your Rust environment properly first):
data/Makefile CHANGED
@@ -35,7 +35,7 @@ build/librubyparser.a: $(STATIC_OBJECTS)
35
35
  build/shared/%.o: src/%.c Makefile $(HEADERS)
36
36
  $(ECHO) "compiling $@"
37
37
  $(Q) mkdir -p $(@D)
38
- $(Q) $(CC) $(DEBUG_FLAGS) -DYP_EXPORT_SYMBOLS $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
38
+ $(Q) $(CC) $(DEBUG_FLAGS) -DPRISM_EXPORT_SYMBOLS $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
39
39
 
40
40
  build/static/%.o: src/%.c Makefile $(HEADERS)
41
41
  $(ECHO) "compiling $@"
@@ -55,20 +55,20 @@ build/fuzz.heisenbug.%: $(SOURCES) fuzz/%.c fuzz/heisenbug.c
55
55
 
56
56
  fuzz-debug:
57
57
  $(ECHO) "entering debug shell"
58
- $(Q) docker run -it --rm -e HISTFILE=/yarp/fuzz/output/.bash_history -v $(shell pwd):/yarp -v $(FUZZ_OUTPUT_DIR):/fuzz_output yarp/fuzz
58
+ $(Q) docker run -it --rm -e HISTFILE=/prism/fuzz/output/.bash_history -v $(shell pwd):/prism -v $(FUZZ_OUTPUT_DIR):/fuzz_output prism/fuzz
59
59
 
60
60
  fuzz-docker-build: fuzz/docker/Dockerfile
61
61
  $(ECHO) "building docker image"
62
- $(Q) docker build -t yarp/fuzz fuzz/docker/
62
+ $(Q) docker build -t prism/fuzz fuzz/docker/
63
63
 
64
64
  fuzz-run-%: FORCE fuzz-docker-build
65
65
  $(ECHO) "generating templates"
66
66
  $(Q) bundle exec rake templates
67
67
  $(ECHO) "running $* fuzzer"
68
- $(Q) docker run --rm -v $(shell pwd):/yarp yarp/fuzz /bin/bash -c "FUZZ_FLAGS=\"$(FUZZ_FLAGS)\" make build/fuzz.$*"
68
+ $(Q) docker run --rm -v $(shell pwd):/prism prism/fuzz /bin/bash -c "FUZZ_FLAGS=\"$(FUZZ_FLAGS)\" make build/fuzz.$*"
69
69
  $(ECHO) "starting AFL++ run"
70
70
  $(Q) mkdir -p $(FUZZ_OUTPUT_DIR)/$*
71
- $(Q) docker run -it --rm -v $(shell pwd):/yarp -v $(FUZZ_OUTPUT_DIR):/fuzz_output yarp/fuzz /bin/bash -c "./fuzz/$*.sh /fuzz_output/$*"
71
+ $(Q) docker run -it --rm -v $(shell pwd):/prism -v $(FUZZ_OUTPUT_DIR):/fuzz_output prism/fuzz /bin/bash -c "./fuzz/$*.sh /fuzz_output/$*"
72
72
  FORCE:
73
73
 
74
74
  fuzz-clean:
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Yet Another Ruby Parser
1
+ # Prism Ruby parser
2
2
 
3
3
  This is a parser for the Ruby programming language. It is designed to be portable, error tolerant, and maintainable. It is written in C99 and has no dependencies. It is currently being integrated into [CRuby](https://github.com/ruby/ruby), [JRuby](https://github.com/jruby/jruby), [TruffleRuby](https://github.com/oracle/truffleruby), [Sorbet](https://github.com/sorbet/sorbet), and [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
4
4
 
@@ -16,29 +16,29 @@ The repository contains the infrastructure for both a shared library (librubypar
16
16
  ├── config.yml specification for tokens and nodes in the tree
17
17
  ├── docs documentation about the project
18
18
  ├── ext
19
- │   └── yarp
19
+ │   └── prism
20
20
  │   ├── extconf.rb configuration to generate the Makefile for the native extension
21
21
  │   └── extension.c the native extension that interacts with librubyparser
22
22
  ├── fuzz files related to fuzz testing
23
23
  ├── include
24
- │   ├── yarp header files for the shared library
25
- │   └── yarp.h main header file for the shared library
24
+ │   ├── prism header files for the shared library
25
+ │   └── prism.h main header file for the shared library
26
26
  ├── java Java bindings for the shared library
27
27
  ├── lib
28
- │   ├── yarp Ruby library files
29
- │   └── yarp.rb main entrypoint for the Ruby library
28
+ │   ├── prism Ruby library files
29
+ │   └── prism.rb main entrypoint for the Ruby library
30
30
  ├── rakelib various Rake tasks for the project
31
31
  ├── rust
32
- │   ├── yarp Rustified crate for the shared library
33
- │   └── yarp-sys FFI binding for Rust
32
+ │   ├── prism Rustified crate for the shared library
33
+ │   └── prism-sys FFI binding for Rust
34
34
  ├── src
35
35
  │   ├── enc various encoding files
36
36
  │   ├── util various utility files
37
- │   └── yarp.c main entrypoint for the shared library
37
+ │   └── prism.c main entrypoint for the shared library
38
38
  ├── templates contains ERB templates generated by templates/template.rb
39
39
  │   └── template.rb generates code from the nodes and tokens configured by config.yml
40
40
  └── test
41
- └── yarp
41
+ └── prism
42
42
  ├── fixtures Ruby code used for testing
43
43
  └── snapshots snapshots of generated syntax trees corresponding to fixtures
44
44
  ```
@@ -48,7 +48,7 @@ The repository contains the infrastructure for both a shared library (librubypar
48
48
  To compile the shared library, you will need:
49
49
 
50
50
  * A C99 compiler
51
- * autotools autoconf, automake, libtool)
51
+ * autotools (autoconf, automake, libtool)
52
52
  * make
53
53
  * Ruby 3.3.0-preview1 or later
54
54
 
@@ -87,4 +87,3 @@ See the [CONTRIBUTING.md](CONTRIBUTING.md) file for more information. We additio
87
87
  * [Ruby API](docs/ruby_api.md)
88
88
  * [Serialization](docs/serialization.md)
89
89
  * [Testing](docs/testing.md)
90
-
data/config.yml CHANGED
@@ -232,6 +232,8 @@ tokens:
232
232
  comment: "<<"
233
233
  - name: LESS_LESS_EQUAL
234
234
  comment: "<<="
235
+ - name: METHOD_NAME
236
+ comment: "a method name"
235
237
  - name: MINUS
236
238
  comment: "-"
237
239
  - name: MINUS_EQUAL
@@ -668,7 +670,6 @@ nodes:
668
670
  type: location?
669
671
  - name: block
670
672
  type: node?
671
- kind: BlockNode
672
673
  - name: flags
673
674
  type: flags
674
675
  kind: CallNodeFlags
@@ -1771,6 +1772,7 @@ nodes:
1771
1772
  type: location
1772
1773
  - name: content_loc
1773
1774
  type: location
1775
+ semantic_field: true # https://github.com/ruby/prism/issues/1452
1774
1776
  - name: closing_loc
1775
1777
  type: location
1776
1778
  - name: unescaped
@@ -2091,6 +2093,7 @@ nodes:
2091
2093
  type: location
2092
2094
  - name: content_loc
2093
2095
  type: location
2096
+ semantic_field: true # https://github.com/ruby/prism/issues/1452
2094
2097
  - name: closing_loc
2095
2098
  type: location
2096
2099
  - name: unescaped
@@ -2284,8 +2287,10 @@ nodes:
2284
2287
  kind: StringFlags
2285
2288
  - name: opening_loc
2286
2289
  type: location?
2290
+ semantic_field: true # https://github.com/ruby/prism/issues/1452
2287
2291
  - name: content_loc
2288
2292
  type: location
2293
+ semantic_field: true # https://github.com/ruby/prism/issues/1452
2289
2294
  - name: closing_loc
2290
2295
  type: location?
2291
2296
  - name: unescaped
@@ -2315,7 +2320,6 @@ nodes:
2315
2320
  type: location?
2316
2321
  - name: block
2317
2322
  type: node?
2318
- kind: BlockNode
2319
2323
  comment: |
2320
2324
  Represents the use of the `super` keyword with parentheses or arguments.
2321
2325
 
data/docs/build_system.md CHANGED
@@ -1,17 +1,17 @@
1
1
  # Build System
2
2
 
3
- There are many ways to build YARP, which means the build system is a bit more complicated than usual.
3
+ There are many ways to build prism, which means the build system is a bit more complicated than usual.
4
4
 
5
5
  ## Requirements
6
6
 
7
- * It must work to build YARP for all 6 uses-cases below.
8
- * It must be possible to build YARP without needing ruby/rake/etc.
9
- Because once YARP is the single parser in TruffleRuby, JRuby or CRuby there won't be another Ruby parser around to parse such Ruby code.
7
+ * It must work to build prism for all 6 uses-cases below.
8
+ * It must be possible to build prism without needing ruby/rake/etc.
9
+ Because once prism is the single parser in TruffleRuby, JRuby or CRuby there won't be another Ruby parser around to parse such Ruby code.
10
10
  Most/every Ruby implementations want to avoid depending on another Ruby during the build process as that is very brittle.
11
- * It is desirable to compile YARP with the same or very similar compiler flags for all use-cases (e.g. optimization level, warning flags, etc).
12
- Otherwise, there is the risk YARP does not work correctly with those different compiler flags.
11
+ * It is desirable to compile prism with the same or very similar compiler flags for all use-cases (e.g. optimization level, warning flags, etc).
12
+ Otherwise, there is the risk prism does not work correctly with those different compiler flags.
13
13
 
14
- The main solution for the second point seems a Makefile, otherwise many of the usages would have to duplicate the logic to build YARP.
14
+ The main solution for the second point seems a Makefile, otherwise many of the usages would have to duplicate the logic to build prism.
15
15
 
16
16
  ## General Design
17
17
 
@@ -24,15 +24,15 @@ This way there is minimal duplication, and each layer builds on the previous one
24
24
 
25
25
  The static library exports no symbols, to avoid any conflict.
26
26
  The shared library exports some symbols, and this is fine since there should only be one librubyparser shared library
27
- loaded per process (i.e., at most one version of the yarp *gem* loaded in a process, only the gem uses the shared library).
27
+ loaded per process (i.e., at most one version of the prism *gem* loaded in a process, only the gem uses the shared library).
28
28
 
29
- ## The various ways to build YARP
29
+ ## The various ways to build prism
30
30
 
31
- ### Building from ruby/yarp repository with `bundle exec rake`
31
+ ### Building from ruby/prism repository with `bundle exec rake`
32
32
 
33
33
  `rake` calls `make` and then uses `Rake::ExtensionTask` to compile the C extension (see above).
34
34
 
35
- ### Building the yarp gem by `gem install/bundle install`
35
+ ### Building the prism gem by `gem install/bundle install`
36
36
 
37
37
  The gem contains the pre-generated templates.
38
38
  When installing the gem, `extconf.rb` is used and that:
@@ -44,31 +44,31 @@ there is Ruby code using FFI which uses `librubyparser.{so,dylib,dll}`
44
44
  to implement the same methods as the C extension, but using serialization instead of many native calls/accesses
45
45
  (JRuby does not support C extensions, serialization is faster on TruffleRuby than the C extension).
46
46
 
47
- ### Building the yarp gem from git, e.g. `gem 'yarp', github: 'ruby/yarp'`
47
+ ### Building the prism gem from git, e.g. `gem "prism", github: "ruby/prism"`
48
48
 
49
49
  The same as above, except the `extconf.rb` additionally runs first:
50
50
  * `templates/template.rb` to generate the templates
51
51
 
52
52
  Because of course those files are not part of the git repository.
53
53
 
54
- ### Building YARP as part of CRuby
54
+ ### Building prism as part of CRuby
55
55
 
56
- [This script](https://github.com/ruby/ruby/blob/32e828bb4a6c65a392b2300f3bdf93008c7b6f25/tool/sync_default_gems.rb#L399-L426) imports YARP sources in CRuby.
56
+ [This script](https://github.com/ruby/ruby/blob/32e828bb4a6c65a392b2300f3bdf93008c7b6f25/tool/sync_default_gems.rb#L399-L426) imports prism sources in CRuby.
57
57
 
58
58
  The script generates the templates when importing.
59
59
 
60
- YARP's `Makefile` is not used at all in CRuby. Instead, CRuby's `Makefile` is used.
60
+ prism's `Makefile` is not used at all in CRuby. Instead, CRuby's `Makefile` is used.
61
61
 
62
- ### Building YARP as part of TruffleRuby
62
+ ### Building prism as part of TruffleRuby
63
63
 
64
- [This script](https://github.com/oracle/truffleruby/blob/master/tool/import-yarp.sh) imports YARP sources in TruffleRuby.
64
+ [This script](https://github.com/oracle/truffleruby/blob/master/tool/import-prism.sh) imports prism sources in TruffleRuby.
65
65
  The script generates the templates when importing.
66
66
 
67
- Then when `mx build` builds TruffleRuby and the `yarp` mx project inside, it runs `make`.
67
+ Then when `mx build` builds TruffleRuby and the `prism` mx project inside, it runs `make`.
68
68
 
69
- Then the `yarp bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/master/src/main/c/yarp_bindings/src/yarp_bindings.c)
70
- and links to `librubyparser.a` (to avoid exporting symbols, so no conflict when installing the yarp gem).
69
+ Then the `prism bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/master/src/main/c/prism_bindings/src/prism_bindings.c)
70
+ and links to `librubyparser.a` (to avoid exporting symbols, so no conflict when installing the prism gem).
71
71
 
72
- ### Building YARP as part of JRuby
72
+ ### Building prism as part of JRuby
73
73
 
74
74
  TODO, probably similar to TruffleRuby.
data/docs/building.md CHANGED
@@ -1,13 +1,13 @@
1
1
  # Building
2
2
 
3
- The following describes how to build YARP from source.
3
+ The following describes how to build prism from source.
4
4
  This comes directly from the [Makefile](../Makefile).
5
5
 
6
6
  ## Common
7
7
 
8
8
  All of the source files match `src/**/*.c` and all of the headers match `include/**/*.h`.
9
9
 
10
- The following flags should be used to compile YARP:
10
+ The following flags should be used to compile prism:
11
11
 
12
12
  * `-std=c99` - Use the C99 standard
13
13
  * `-Wall -Wconversion -Wextra -Wpedantic -Wundef` - Enable the warnings we care about
@@ -16,7 +16,7 @@ The following flags should be used to compile YARP:
16
16
 
17
17
  ## Shared
18
18
 
19
- If you want to build YARP as a shared library and link against it, you should compile with:
19
+ If you want to build prism as a shared library and link against it, you should compile with:
20
20
 
21
21
  * `-fPIC -shared` - Compile as a shared library
22
- * `-DYP_EXPORT_SYMBOLS` - Export the symbols (by default nothing is exported)
22
+ * `-DPRISM_EXPORT_SYMBOLS` - Export the symbols (by default nothing is exported)
@@ -1,15 +1,19 @@
1
1
  # Configuration
2
2
 
3
- A lot of code in YARP's repository is templated from a single configuration file, [config.yml](../config.yml). This file is used to generate the following files:
4
-
5
- * `ext/yarp/api_node.c` - for defining how to build Ruby objects for the nodes out of C structs
6
- * `include/yarp/ast.h` - for defining the C structs that represent the nodes
7
- * `java/org/yarp/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
8
- * `java/org/yarp/Loader.java` - for defining how to deserialize the nodes in Java
9
- * `java/org/yarp/Nodes.java` - for defining the nodes in Java
10
- * `lib/yarp/mutation_visitor.rb` - for defining the mutation visitor for the nodes in Ruby
11
- * `lib/yarp/node.rb` - for defining the nodes in Ruby
12
- * `lib/yarp/serialize.rb` - for defining how to deserialize the nodes in Ruby
3
+ A lot of code in prism's repository is templated from a single configuration file, [config.yml](../config.yml). This file is used to generate the following files:
4
+
5
+ * `ext/prism/api_node.c` - for defining how to build Ruby objects for the nodes out of C structs
6
+ * `include/prism/ast.h` - for defining the C structs that represent the nodes
7
+ * `java/org/prism/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
8
+ * `java/org/prism/Loader.java` - for defining how to deserialize the nodes in Java
9
+ * `java/org/prism/Nodes.java` - for defining the nodes in Java
10
+ * `lib/prism/compiler.rb` - for defining the compiler for the nodes in Ruby
11
+ * `lib/prism/dispatcher.rb` - for defining the dispatch visitors for the nodes in Ruby
12
+ * `lib/prism/dsl.rb` - for defining the DSL for the nodes in Ruby
13
+ * `lib/prism/mutation_compiler.rb` - for defining the mutation compiler for the nodes in Ruby
14
+ * `lib/prism/node.rb` - for defining the nodes in Ruby
15
+ * `lib/prism/serialize.rb` - for defining how to deserialize the nodes in Ruby
16
+ * `lib/prism/visitor.rb` - for defining the visitor interface for the nodes in Ruby
13
17
  * `src/node.c` - for defining how to free the nodes in C and calculate the size in memory in C
14
18
  * `src/prettyprint.c` - for defining how to prettyprint the nodes in C
15
19
  * `src/serialize.c` - for defining how to serialize the nodes in C
@@ -25,7 +29,7 @@ This is a list of tokens to be used by the lexer. It is shared here so that it c
25
29
 
26
30
  Each token is expected to have a `name` key and a `comment` key (both as strings). Optionally they can have a `value` key (an integer) which is used to represent the value in the enum.
27
31
 
28
- In C these tokens will be templated out with the prefix `YP_TOKEN_`. For example, if you have a `name` key with the value `PERCENT`, you can access this in C through `YP_TOKEN_PERCENT`.
32
+ In C these tokens will be templated out with the prefix `PM_TOKEN_`. For example, if you have a `name` key with the value `PERCENT`, you can access this in C through `PM_TOKEN_PERCENT`.
29
33
 
30
34
  ## `flags`
31
35
 
@@ -33,7 +37,7 @@ Sometimes we need to communicate more information in the tree than can be repres
33
37
 
34
38
  Each flag is expected to have a `name` key (a string) and a `values` key (an array). Each value in the `values` key should be an object that contains both a `name` key (a string) that represents the name of the flag and a `comment` key (a string) that represents the comment for the flag.
35
39
 
36
- In C these flags will get templated out with a `YP_` prefix, then a snake-case version of the flag name, then the flag itself. For example, if you have a flag with the name `RegularExpressionFlags` and a value with the name `IGNORE_CASE`, you can access this in C through `YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE`.
40
+ In C these flags will get templated out with a `PM_` prefix, then a snake-case version of the flag name, then the flag itself. For example, if you have a flag with the name `RegularExpressionFlags` and a value with the name `IGNORE_CASE`, you can access this in C through `PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE`.
37
41
 
38
42
  ## `nodes`
39
43
 
@@ -43,14 +47,14 @@ Optionally, every node can define a `child_nodes` key that is an array. This arr
43
47
 
44
48
  The available values for `type` are:
45
49
 
46
- * `node` - A child node that is a node itself. This is a `yp_node_t *` in C.
47
- * `node?` - A child node that is optionally present. This is also a `yp_node_t *` in C, but can be `NULL`.
48
- * `node[]` - A child node that is an array of nodes. This is a `yp_node_list_t` in C.
49
- * `string` - A child node that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is a `yp_string_t` in C.
50
- * `constant` - A variable-length integer that represents an index in the constant pool. This is a `yp_constant_id_t` in C.
51
- * `constant[]` - A child node that is an array of constants. This is a `yp_constant_id_list_t` in C.
52
- * `location` - A child node that is a location. This is a `yp_location_t` in C.
53
- * `location?` - A child node that is a location that is optionally present. This is a `yp_location_t` in C, but if the value is not present then the `start` and `end` fields will be `NULL`.
50
+ * `node` - A child node that is a node itself. This is a `pm_node_t *` in C.
51
+ * `node?` - A child node that is optionally present. This is also a `pm_node_t *` in C, but can be `NULL`.
52
+ * `node[]` - A child node that is an array of nodes. This is a `pm_node_list_t` in C.
53
+ * `string` - A child node that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is a `pm_string_t` in C.
54
+ * `constant` - A variable-length integer that represents an index in the constant pool. This is a `pm_constant_id_t` in C.
55
+ * `constant[]` - A child node that is an array of constants. This is a `pm_constant_id_list_t` in C.
56
+ * `location` - A child node that is a location. This is a `pm_location_t` in C.
57
+ * `location?` - A child node that is a location that is optionally present. This is a `pm_location_t` in C, but if the value is not present then the `start` and `end` fields will be `NULL`.
54
58
  * `uint32` - A child node that is a 32-bit unsigned integer. This is a `uint32_t` in C.
55
59
 
56
- If the type is `node` or `node?` then the value also accepts an optional `kind` key (a string). This key is expected to match to the name of another node type within `config.yml`. This changes a couple of places where code is templated out to use the more specific struct name instead of the generic `yp_node_t`. For example, with `kind: StatementsNode` the `yp_node_t *` in C becomes a `yp_statements_node_t *`.
60
+ If the type is `node` or `node?` then the value also accepts an optional `kind` key (a string). This key is expected to match to the name of another node type within `config.yml`. This changes a couple of places where code is templated out to use the more specific struct name instead of the generic `pm_node_t`. For example, with `kind: StatementsNode` the `pm_node_t *` in C becomes a `pm_statements_node_t *`.
data/docs/design.md CHANGED
@@ -12,7 +12,7 @@ The design of the parser is based around these main goals.
12
12
 
13
13
  The first piece to understand about the parser is the design of its syntax tree. This is documented in `config.yml`. Every token and node is defined in that file, along with comments about where they are found in what kinds of syntax. This file is used to template out a lot of different files, all found in the `templates` directory. The `templates/template.rb` script performs the templating and outputs all files matching the directory structure found in the templates directory.
14
14
 
15
- The templated files contain all of the code required to allocate and initialize nodes, pretty print nodes, and serialize nodes. This means for the most part, you will only need to then hook up the parser to call the templated functions to create the nodes in the correct position. That means editing the parser itself, which is housed in `yarp.c`.
15
+ The templated files contain all of the code required to allocate and initialize nodes, pretty print nodes, and serialize nodes. This means for the most part, you will only need to then hook up the parser to call the templated functions to create the nodes in the correct position. That means editing the parser itself, which is housed in `prism.c`.
16
16
 
17
17
  ## Pratt parsing
18
18
 
@@ -24,7 +24,7 @@ In order to provide the best possible error tolerance, the parser is hand-writte
24
24
  * https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
25
25
  * https://chidiwilliams.com/post/on-recursive-descent-and-pratt-parsing/
26
26
 
27
- You can find most of the functions that correspond to constructs in the Pratt parsing algorithm in `yarp.c`. As a couple of examples:
27
+ You can find most of the functions that correspond to constructs in the Pratt parsing algorithm in `prism.c`. As a couple of examples:
28
28
 
29
29
  * `parse` corresponds to the `parse_expression` function
30
30
  * `nud` (null denotation) corresponds to the `parse_expression_prefix` function
data/docs/encoding.md CHANGED
@@ -10,7 +10,7 @@ If the file is not encoded in UTF-8, the user must specify the encoding in a "ma
10
10
  # encoding: iso-8859-9
11
11
  ```
12
12
 
13
- The key of the comment can be either "encoding" or "coding". The value of the comment must be a string that is a valid encoding name. The encodings that YARP supports by default are:
13
+ The key of the comment can be either "encoding" or "coding". The value of the comment must be a string that is a valid encoding name. The encodings that prism supports by default are:
14
14
 
15
15
  * `ascii`
16
16
  * `ascii-8bit`
@@ -44,11 +44,11 @@ The key of the comment can be either "encoding" or "coding". The value of the co
44
44
  * `windows-1251`
45
45
  * `windows-1252`
46
46
 
47
- For each of these encodings, YARP provides a function for checking if the subsequent bytes form an alphabetic or alphanumeric character.
47
+ For each of these encodings, prism provides a function for checking if the subsequent bytes form an alphabetic or alphanumeric character.
48
48
 
49
49
  ## Support for other encodings
50
50
 
51
- If an encoding is encountered that is not supported by YARP, YARP will call a user-provided callback function with the name of the encoding if one is provided. That function can be registered with `yp_parser_register_encoding_decode_callback`. The user-provided callback function can then provide a pointer to an encoding struct that contains the requisite functions that YARP will use those to parse identifiers going forward.
51
+ If an encoding is encountered that is not supported by prism, prism will call a user-provided callback function with the name of the encoding if one is provided. That function can be registered with `pm_parser_register_encoding_decode_callback`. The user-provided callback function can then provide a pointer to an encoding struct that contains the requisite functions that prism will use those to parse identifiers going forward.
52
52
 
53
53
  If the user-provided callback function returns `NULL` (the value also provided by the default implementation in case a callback was not registered), an error will be added to the parser's error list and parsing will continue on using the default UTF-8 encoding.
54
54
 
@@ -84,34 +84,34 @@ typedef struct {
84
84
 
85
85
  // Return true if the encoding is a multibyte encoding.
86
86
  bool multibyte;
87
- } yp_encoding_t;
87
+ } pm_encoding_t;
88
88
 
89
- // When an encoding is encountered that isn't understood by YARP, we provide
89
+ // When an encoding is encountered that isn't understood by prism, we provide
90
90
  // the ability here to call out to a user-defined function to get an encoding
91
91
  // struct. If the function returns something that isn't NULL, we set that to
92
92
  // our encoding and use it to parse identifiers.
93
- typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
93
+ typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
94
94
 
95
- // Register a callback that will be called when YARP encounters a magic comment
95
+ // Register a callback that will be called when prism encounters a magic comment
96
96
  // with an encoding referenced that it doesn't understand. The callback should
97
97
  // return NULL if it also doesn't understand the encoding or it should return a
98
- // pointer to a yp_encoding_t struct that contains the functions necessary to
98
+ // pointer to a pm_encoding_t struct that contains the functions necessary to
99
99
  // parse identifiers.
100
- YP_EXPORTED_FUNCTION void
101
- yp_parser_register_encoding_decode_callback(yp_parser_t *parser, yp_encoding_decode_callback_t callback);
100
+ PRISM_EXPORTED_FUNCTION void
101
+ pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback);
102
102
  ```
103
103
 
104
104
  ## Getting notified when the encoding changes
105
105
 
106
- You may want to get notified when the encoding changes based on the result of parsing an encoding comment. We use this internally for our `lex` function in order to provide the correct encodings for the tokens that are returned. For that you can register a callback with `yp_parser_register_encoding_changed_callback`. The callback will be called with a pointer to the parser. The encoding can be accessed through `parser->encoding`.
106
+ You may want to get notified when the encoding changes based on the result of parsing an encoding comment. We use this internally for our `lex` function in order to provide the correct encodings for the tokens that are returned. For that you can register a callback with `pm_parser_register_encoding_changed_callback`. The callback will be called with a pointer to the parser. The encoding can be accessed through `parser->encoding`.
107
107
 
108
108
  ```c
109
- // When the encoding that is being used to parse the source is changed by YARP,
109
+ // When the encoding that is being used to parse the source is changed by prism,
110
110
  // we provide the ability here to call out to a user-defined function.
111
- typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
111
+ typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
112
112
 
113
- // Register a callback that will be called whenever YARP changes the encoding it
114
- // is using to parse based on the magic comment.
115
- YP_EXPORTED_FUNCTION void
116
- yp_parser_register_encoding_changed_callback(yp_parser_t *parser, yp_encoding_changed_callback_t callback);
113
+ // Register a callback that will be called whenever prism changes the encoding
114
+ // it is using to parse based on the magic comment.
115
+ PRISM_EXPORTED_FUNCTION void
116
+ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
117
117
  ```
data/docs/fuzzing.md CHANGED
@@ -28,9 +28,9 @@ fuzz
28
28
 
29
29
  There are currently three fuzzing targets
30
30
 
31
- - `yp_parse_serialize` (parse)
32
- - `yp_regexp_named_capture_group_names` (regexp)
33
- - `yp_unescape_manipulate_string` (unescape)
31
+ - `pm_parse_serialize` (parse)
32
+ - `pm_regexp_named_capture_group_names` (regexp)
33
+ - `pm_unescape_manipulate_string` (unescape)
34
34
 
35
35
  Respectively, fuzzing can be performed with
36
36
 
@@ -48,7 +48,7 @@ make fuzz-debug
48
48
 
49
49
  # Out-of-bounds reads
50
50
 
51
- Currently, encoding functionality implementing the `yp_encoding_t` interface can read outside of inputs. For the time being, ASAN instrumentation is disabled for functions from src/enc. See `fuzz/asan.ignore`.
51
+ Currently, encoding functionality implementing the `pm_encoding_t` interface can read outside of inputs. For the time being, ASAN instrumentation is disabled for functions from src/enc. See `fuzz/asan.ignore`.
52
52
 
53
53
  To disable ASAN read instrumentation globally, use the `FUZZ_FLAGS` environment variable e.g.
54
54
 
data/docs/heredocs.md CHANGED
@@ -4,7 +4,7 @@ Heredocs are one of the most complicated pieces of this parser. There are many d
4
4
 
5
5
  ## 1. Lexing the identifier
6
6
 
7
- When a heredoc identifier is encountered in the regular process of lexing, we push the `YP_LEX_HEREDOC` mode onto the stack with the following metadata:
7
+ When a heredoc identifier is encountered in the regular process of lexing, we push the `PM_LEX_HEREDOC` mode onto the stack with the following metadata:
8
8
 
9
9
  * `ident_start`: A pointer to the start of the identifier for the heredoc. We need this to match against the end of the heredoc.
10
10
  * `ident_length`: The length of the identifier for the heredoc. We also need this to match.
@@ -16,7 +16,7 @@ Note that if the `parser.heredoc_end` field is already set, then it means we hav
16
16
 
17
17
  ## 2. Lexing the body
18
18
 
19
- The next time the lexer is asked for a token, it will be in the `YP_LEX_HEREDOC` mode. In this mode we are lexing the body of the heredoc. It will start by checking if the `next_start` field is set. If it is, then this is the first token within the body of the heredoc so we'll start lexing from there. Otherwise we'll start lexing from the end of the previous token.
19
+ The next time the lexer is asked for a token, it will be in the `PM_LEX_HEREDOC` mode. In this mode we are lexing the body of the heredoc. It will start by checking if the `next_start` field is set. If it is, then this is the first token within the body of the heredoc so we'll start lexing from there. Otherwise we'll start lexing from the end of the previous token.
20
20
 
21
21
  Lexing these fields is extremely similar to lexing an interpolated string. The only difference is that we also do an additional check at the beginning of each line to check if we have hit the terminator.
22
22
 
@@ -33,4 +33,4 @@ Once the heredoc has been lexed, the lexer will resume lexing from the `next_sta
33
33
 
34
34
  ## Compatibility with Ripper
35
35
 
36
- The order in which tokens are emitted is different from that of Ripper. Ripper emits each token in the file in the order in which it appears. YARP instead will emit the tokens that makes the most sense for the lexer, using the process described above. Therefore to line things up, `YARP.lex_compat` will shuffle the tokens around to match Ripper's output.
36
+ The order in which tokens are emitted is different from that of Ripper. Ripper emits each token in the file in the order in which it appears. prism instead will emit the tokens that makes the most sense for the lexer, using the process described above. Therefore to line things up, `Prism.lex_compat` will shuffle the tokens around to match Ripper's output.