yarp 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/CONTRIBUTING.md +4 -0
- data/{Makefile.in → Makefile} +3 -4
- data/README.md +1 -1
- data/config.yml +29 -7
- data/docs/build_system.md +4 -15
- data/docs/building.md +1 -5
- data/docs/encoding.md +1 -0
- data/docs/{extension.md → ruby_api.md} +6 -3
- data/docs/serialization.md +71 -24
- data/ext/yarp/api_node.c +38 -6
- data/ext/yarp/extconf.rb +15 -10
- data/ext/yarp/extension.c +2 -0
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +108 -104
- data/include/yarp/defines.h +0 -15
- data/include/yarp/enc/yp_encoding.h +1 -0
- data/include/yarp/util/yp_buffer.h +1 -0
- data/include/yarp/util/yp_string.h +5 -1
- data/include/yarp/version.h +2 -3
- data/include/yarp.h +4 -2
- data/lib/yarp/ffi.rb +211 -0
- data/lib/yarp/lex_compat.rb +16 -2
- data/lib/yarp/node.rb +169 -117
- data/lib/yarp/ripper_compat.rb +3 -3
- data/lib/yarp/serialize.rb +285 -92
- data/lib/yarp.rb +167 -2
- data/src/enc/yp_unicode.c +9 -0
- data/src/node.c +22 -0
- data/src/prettyprint.c +49 -30
- data/src/serialize.c +90 -17
- data/src/util/yp_string.c +8 -17
- data/src/yarp.c +181 -49
- data/yarp.gemspec +5 -5
- metadata +6 -6
- data/config.h.in +0 -25
- data/configure +0 -4487
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81d6e0266ee0d13563faafa47a0c0d09c5feabe870e2324f32564a7f68299dff
|
4
|
+
data.tar.gz: 771e9fd99caaf99cf7493d897cf9fdd5f93824e90bf09c22a1e5b5ec0e1a4bcf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16525d9ed87d00d9fd94af5cab5f99b8278ac4a81c7117195e80d3cd3cc6b12358fd0f2255cbe7df15ef1adef609719371bc3a210d8a2273d6e34fbdcb4b1a58
|
7
|
+
data.tar.gz: efbf02ccd9a7cb6a13b6ba56dc8fc3fc6e425d7d779583614fc9c24c74216fe3d89eadc83999f197a7227bfd90ca8ba9446c1b71c128e4874609f44f0869eb72
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
6
|
+
|
7
|
+
## [Unreleased]
|
8
|
+
|
9
|
+
## [0.7.0] - 2023-08-14
|
10
|
+
|
11
|
+
### Added
|
12
|
+
|
13
|
+
- We now have an explicit `FlipFlopNode`. It has the same flags as `RangeNode`.
|
14
|
+
- We now have a syntax error when implicit and explicit blocks are passed to a method call.
|
15
|
+
- `Node#slice` is now implemented, for retrieving the slice of the source code corresponding to a node.
|
16
|
+
- We now support the `utf8-mac` encoding.
|
17
|
+
- Predicate methods have been added for nodes that have flags. For example `CallNode#safe_navigation?` and `RangeNode#exclude_end?`.
|
18
|
+
- The gem now functions on JRuby and TruffleRuby, thanks to a new FFI backend.
|
19
|
+
- Comments are now part of the serialization API.
|
20
|
+
|
21
|
+
### Changed
|
22
|
+
|
23
|
+
- Autotools has been removed from the build system, so when the gem is installed it will no longer need to go through a configure step.
|
24
|
+
- The AST for `foo = *bar` has changed to have an explicit array on the right hand side, rather than a splat node. This is more consistent with how other parsers handle this.
|
25
|
+
- `RangeNodeFlags` has been renamed to `RangeFlags`.
|
26
|
+
- Unary minus on number literals is now parsed as part of the literal, rather than a call to a unary operator. This is more consistent with how other parsers handle this.
|
27
|
+
|
28
|
+
## [0.6.0] - 2023-08-09
|
29
|
+
|
30
|
+
### Added
|
31
|
+
|
32
|
+
- 🎉 Initial release! 🎉
|
33
|
+
|
34
|
+
[unreleased]: https://github.com/ruby/yarp/compare/v0.7.0...HEAD
|
35
|
+
[0.7.0]: https://github.com/ruby/yarp/compare/v0.6.0...v0.7.0
|
36
|
+
[0.6.0]: https://github.com/ruby/yarp/compare/d60531...v0.6.0
|
data/CONTRIBUTING.md
CHANGED
@@ -10,6 +10,10 @@ The discussions page on the GitHub repository are open. If you have a question o
|
|
10
10
|
|
11
11
|
If you want to contribute code, please first open or contribute to a discussion. A lot of the project is in flux, and we want to make sure that you are contributing to the right place. Once you have a discussion going, you can open a pull request with your changes. We will review your code and get it merged in.
|
12
12
|
|
13
|
+
### Ruby Features
|
14
|
+
|
15
|
+
Pattern matching and endless method definitions should be avoided as long as the latest TruffleRuby release does not support it.
|
16
|
+
|
13
17
|
## Tests
|
14
18
|
|
15
19
|
We could always use more tests! One of the biggest challenges of this project is building up a big test suite. If you want to contribute tests, feel free to open a pull request. These will get merged in as soon as possible.
|
data/{Makefile.in → Makefile}
RENAMED
@@ -10,10 +10,9 @@ FUZZ_OUTPUT_DIR = $(shell pwd)/fuzz/output
|
|
10
10
|
|
11
11
|
SOEXT := $(shell ruby -e 'puts RbConfig::CONFIG["SOEXT"]')
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
CC := @CC@
|
13
|
+
CPPFLAGS := -Iinclude
|
14
|
+
CFLAGS := -g -O2 -std=c99 -Wall -Werror -Wextra -Wpedantic -Wundef -Wconversion -fPIC -fvisibility=hidden
|
15
|
+
CC := cc
|
17
16
|
|
18
17
|
HEADERS := $(shell find include -name '*.h')
|
19
18
|
SOURCES := $(shell find src -name '*.c')
|
data/README.md
CHANGED
@@ -8,7 +8,7 @@ The repository contains the infrastructure for both a shared library (librubypar
|
|
8
8
|
|
9
9
|
```
|
10
10
|
.
|
11
|
-
├── Makefile
|
11
|
+
├── Makefile configuration to compile the shared library and native tests
|
12
12
|
├── Rakefile configuration to compile the native extension and run the Ruby tests
|
13
13
|
├── bin
|
14
14
|
│ ├── lex runs the lexer on a file or string, prints the tokens, and compares to ripper
|
data/config.yml
CHANGED
@@ -337,7 +337,7 @@ flags:
|
|
337
337
|
values:
|
338
338
|
- name: BEGIN_MODIFIER
|
339
339
|
comment: "a loop after a begin statement, so the body is executed first before the condition"
|
340
|
-
- name:
|
340
|
+
- name: RangeFlags
|
341
341
|
values:
|
342
342
|
- name: EXCLUDE_END
|
343
343
|
comment: "... operator"
|
@@ -601,7 +601,8 @@ nodes:
|
|
601
601
|
type: node?
|
602
602
|
kind: BlockNode
|
603
603
|
- name: flags
|
604
|
-
type:
|
604
|
+
type: flags
|
605
|
+
kind: CallNodeFlags
|
605
606
|
- name: name
|
606
607
|
type: string
|
607
608
|
comment: |
|
@@ -1053,6 +1054,22 @@ nodes:
|
|
1053
1054
|
|
1054
1055
|
foo in Foo(*bar, baz, *qux)
|
1055
1056
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
1057
|
+
- name: FlipFlopNode
|
1058
|
+
child_nodes:
|
1059
|
+
- name: left
|
1060
|
+
type: node?
|
1061
|
+
- name: right
|
1062
|
+
type: node?
|
1063
|
+
- name: operator_loc
|
1064
|
+
type: location
|
1065
|
+
- name: flags
|
1066
|
+
type: flags
|
1067
|
+
kind: RangeFlags
|
1068
|
+
comment: |
|
1069
|
+
Represents the use of the `..` or `...` operators to create flip flops.
|
1070
|
+
|
1071
|
+
baz if foo .. bar
|
1072
|
+
^^^^^^^^^^
|
1056
1073
|
- name: FloatNode
|
1057
1074
|
comment: |
|
1058
1075
|
Represents a floating point number literal.
|
@@ -1321,7 +1338,8 @@ nodes:
|
|
1321
1338
|
- name: closing_loc
|
1322
1339
|
type: location
|
1323
1340
|
- name: flags
|
1324
|
-
type:
|
1341
|
+
type: flags
|
1342
|
+
kind: RegularExpressionFlags
|
1325
1343
|
newline: parts
|
1326
1344
|
comment: |
|
1327
1345
|
Represents a regular expression literal that contains interpolation.
|
@@ -1744,7 +1762,8 @@ nodes:
|
|
1744
1762
|
- name: operator_loc
|
1745
1763
|
type: location
|
1746
1764
|
- name: flags
|
1747
|
-
type:
|
1765
|
+
type: flags
|
1766
|
+
kind: RangeFlags
|
1748
1767
|
comment: |
|
1749
1768
|
Represents the use of the `..` or `...` operators.
|
1750
1769
|
|
@@ -1779,7 +1798,8 @@ nodes:
|
|
1779
1798
|
- name: unescaped
|
1780
1799
|
type: string
|
1781
1800
|
- name: flags
|
1782
|
-
type:
|
1801
|
+
type: flags
|
1802
|
+
kind: RegularExpressionFlags
|
1783
1803
|
comment: |
|
1784
1804
|
Represents a regular expression literal with no interpolation.
|
1785
1805
|
|
@@ -2070,7 +2090,8 @@ nodes:
|
|
2070
2090
|
type: node?
|
2071
2091
|
kind: StatementsNode
|
2072
2092
|
- name: flags
|
2073
|
-
type:
|
2093
|
+
type: flags
|
2094
|
+
kind: LoopFlags
|
2074
2095
|
newline: predicate
|
2075
2096
|
comment: |
|
2076
2097
|
Represents the use of the `until` keyword, either in the block form or the modifier form.
|
@@ -2104,7 +2125,8 @@ nodes:
|
|
2104
2125
|
type: node?
|
2105
2126
|
kind: StatementsNode
|
2106
2127
|
- name: flags
|
2107
|
-
type:
|
2128
|
+
type: flags
|
2129
|
+
kind: LoopFlags
|
2108
2130
|
newline: predicate
|
2109
2131
|
comment: |
|
2110
2132
|
Represents the use of the `while` keyword, either in the block form or the modifier form.
|
data/docs/build_system.md
CHANGED
@@ -16,8 +16,6 @@ The main solution for the second point seems a Makefile, otherwise many of the u
|
|
16
16
|
## General Design
|
17
17
|
|
18
18
|
1. Templates are generated by `templates/template.rb`
|
19
|
-
2. `autoconf` creates `./configure` and `autoheader` creates `config.h.in` (both files are platform-independent)
|
20
|
-
3. `./configure` creates `include/yarp/config.h` (which contains `HAVE_*` macros, platform-specific) and the `Makefile`
|
21
19
|
4. The `Makefile` compiles both `librubyparser.a` and `librubyparser.{so,dylib,dll}` from the `src/**/*.c` and `include/**/*.h` files
|
22
20
|
5. The `Rakefile` `:compile` task ensures the above prerequisites are done, then calls `make`,
|
23
21
|
and uses `Rake::ExtensionTask` to compile the C extension (using its `extconf.rb`), which uses `librubyparser.a`
|
@@ -36,14 +34,13 @@ loaded per process (i.e., at most one version of the yarp *gem* loaded in a proc
|
|
36
34
|
|
37
35
|
### Building the yarp gem by `gem install/bundle install`
|
38
36
|
|
39
|
-
The gem contains the pre-generated templates
|
37
|
+
The gem contains the pre-generated templates.
|
40
38
|
When installing the gem, `extconf.rb` is used and that:
|
41
|
-
* runs `./configure` which creates the `Makefile` and `include/yarp/config.h`
|
42
39
|
* runs `make build/librubyparser.a`
|
43
40
|
* compiles the C extension with mkmf
|
44
41
|
|
45
42
|
When installing the gem on JRuby and TruffleRuby, no C extension is built, so instead of the last step,
|
46
|
-
there is Ruby code using
|
43
|
+
there is Ruby code using FFI which uses `librubyparser.{so,dylib,dll}`
|
47
44
|
to implement the same methods as the C extension, but using serialization instead of many native calls/accesses
|
48
45
|
(JRuby does not support C extensions, serialization is faster on TruffleRuby than the C extension).
|
49
46
|
|
@@ -51,7 +48,6 @@ to implement the same methods as the C extension, but using serialization instea
|
|
51
48
|
|
52
49
|
The same as above, except the `extconf.rb` additionally runs first:
|
53
50
|
* `templates/template.rb` to generate the templates
|
54
|
-
* `autoconf` and `autoheader` to generate `configure` and `config.h.in`
|
55
51
|
|
56
52
|
Because of course those files are not part of the git repository.
|
57
53
|
|
@@ -61,21 +57,14 @@ Because of course those files are not part of the git repository.
|
|
61
57
|
|
62
58
|
The script generates the templates when importing.
|
63
59
|
|
64
|
-
`
|
65
|
-
It is assumed that CRuby's `./configure` is a superset of YARP's configure checks.
|
66
|
-
|
67
|
-
YARP's `autotools` is not used at all in CRuby and in fact YARP's `Makefile` is not used either.
|
68
|
-
Instead, CRuby's `autotools` setup is used, and `CRuby`'s Makefiles are used.
|
60
|
+
YARP's `Makefile` is not used at all in CRuby. Instead, CRuby's `Makefile` is used.
|
69
61
|
|
70
62
|
### Building YARP as part of TruffleRuby
|
71
63
|
|
72
64
|
[This script](https://github.com/oracle/truffleruby/blob/master/tool/import-yarp.sh) imports YARP sources in TruffleRuby.
|
73
65
|
The script generates the templates when importing.
|
74
|
-
It also generates `configure` and `config.h.in` (to avoid needing `autotools` on every machine building TruffleRuby).
|
75
66
|
|
76
|
-
Then when `mx build` builds TruffleRuby and the `yarp` mx project inside, it
|
77
|
-
* runs `./configure`
|
78
|
-
* runs `make`
|
67
|
+
Then when `mx build` builds TruffleRuby and the `yarp` mx project inside, it runs `make`.
|
79
68
|
|
80
69
|
Then the `yarp bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/master/src/main/c/yarp_bindings/src/yarp_bindings.c)
|
81
70
|
and links to `librubyparser.a` (to avoid exporting symbols, so no conflict when installing the yarp gem).
|
data/docs/building.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Building
|
2
2
|
|
3
3
|
The following describes how to build YARP from source.
|
4
|
+
This comes directly from the [Makefile](../Makefile).
|
4
5
|
|
5
6
|
## Common
|
6
7
|
|
@@ -13,11 +14,6 @@ The following flags should be used to compile YARP:
|
|
13
14
|
* `-Werror` - Treat warnings as errors
|
14
15
|
* `-fvisibility=hidden` - Hide all symbols by default
|
15
16
|
|
16
|
-
The following flags can be used to compile YARP:
|
17
|
-
|
18
|
-
* `-DHAVE_MMAP` - Should be passed if the system has the `mmap` function
|
19
|
-
* `-DHAVE_SNPRINTF` - Should be passed if the system has the `snprintf` function
|
20
|
-
|
21
17
|
## Shared
|
22
18
|
|
23
19
|
If you want to build YARP as a shared library and link against it, you should compile with:
|
data/docs/encoding.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
# Ruby API
|
2
2
|
|
3
|
-
|
3
|
+
The `yarp` gem provides a Ruby API for accessing the syntax tree.
|
4
4
|
|
5
5
|
For the most part, the API for accessing the tree mirrors that found in the [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree) project. This means:
|
6
6
|
|
@@ -8,7 +8,9 @@ For the most part, the API for accessing the tree mirrors that found in the [Syn
|
|
8
8
|
* Nodes in the tree respond to named methods for accessing their children as well as `#child_nodes`
|
9
9
|
* Nodes respond to the pattern matching interfaces `#deconstruct` and `#deconstruct_keys`
|
10
10
|
|
11
|
-
Every entry in `config.yml` will generate a Ruby class as well as the code that builds the nodes themselves.
|
11
|
+
Every entry in `config.yml` will generate a Ruby class as well as the code that builds the nodes themselves.
|
12
|
+
Creating a syntax tree involves calling one of the class methods on the `YARP` module.
|
13
|
+
The full API is documented below.
|
12
14
|
|
13
15
|
## API
|
14
16
|
|
@@ -18,3 +20,4 @@ Every entry in `config.yml` will generate a Ruby class as well as the code that
|
|
18
20
|
* `YARP.lex_file(filepath)` - parse the tokens corresponding to the given source file and return them as an array within a parse result
|
19
21
|
* `YARP.parse(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result
|
20
22
|
* `YARP.parse_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result
|
23
|
+
* `YARP.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree
|
data/docs/serialization.md
CHANGED
@@ -1,10 +1,58 @@
|
|
1
1
|
# Serialization
|
2
2
|
|
3
|
-
YARP ships with the ability to serialize a syntax tree to a single string.
|
3
|
+
YARP ships with the ability to serialize a syntax tree to a single string.
|
4
|
+
The string can then be deserialized back into a syntax tree using a language other than C.
|
5
|
+
This is useful for using the parsing logic in other tools without having to write a parser in that language.
|
6
|
+
The syntax tree still requires a copy of the original source, as for the most part it just contains byte offsets into the source string.
|
7
|
+
|
8
|
+
## Types
|
9
|
+
|
10
|
+
Let us define some simple types for readability.
|
11
|
+
|
12
|
+
### varint
|
13
|
+
|
14
|
+
A variable-length integer with the value fitting in `uint32_t` using between 1 and 5 bytes, using the [LEB128](https://en.wikipedia.org/wiki/LEB128) encoding.
|
15
|
+
This drastically cuts down on the size of the serialized string, especially when the source file is large.
|
16
|
+
|
17
|
+
### string
|
18
|
+
|
19
|
+
| # bytes | field |
|
20
|
+
| --- | --- |
|
21
|
+
| varint | the length of the string in bytes |
|
22
|
+
| ... | the string bytes |
|
23
|
+
|
24
|
+
### location
|
25
|
+
|
26
|
+
| # bytes | field |
|
27
|
+
| --- | --- |
|
28
|
+
| varint | byte offset into the source string where this location begins |
|
29
|
+
| varint | length of the location in bytes in the source string |
|
30
|
+
|
31
|
+
### comment
|
32
|
+
|
33
|
+
The comment type is one of:
|
34
|
+
* 0=`INLINE` (`# comment`)
|
35
|
+
* 1=`EMBEDDED_DOCUMENT` (`=begin`/`=end`)
|
36
|
+
* 2=`__END__` (after `__END__`)
|
37
|
+
|
38
|
+
| # bytes | field |
|
39
|
+
| --- | --- |
|
40
|
+
| `1` | comment type |
|
41
|
+
| location | the location in the source of this comment |
|
42
|
+
|
43
|
+
### diagnostic
|
44
|
+
|
45
|
+
| # bytes | field |
|
46
|
+
| --- | --- |
|
47
|
+
| string | diagnostic message (ASCII-only characters) |
|
48
|
+
| location | the location in the source this diagnostic applies to |
|
4
49
|
|
5
50
|
## Structure
|
6
51
|
|
7
|
-
The serialized string representing the syntax tree is composed of three parts: the header, the body, and the constant pool.
|
52
|
+
The serialized string representing the syntax tree is composed of three parts: the header, the body, and the constant pool.
|
53
|
+
The header contains information like the version of YARP that serialized the tree.
|
54
|
+
The body contains the actual nodes in the tree.
|
55
|
+
The constant pool contains constants that were interned while parsing.
|
8
56
|
|
9
57
|
The header is structured like the following table:
|
10
58
|
|
@@ -14,32 +62,28 @@ The header is structured like the following table:
|
|
14
62
|
| `1` | major version number |
|
15
63
|
| `1` | minor version number |
|
16
64
|
| `1` | patch version number |
|
17
|
-
| varint | the length of the encoding name |
|
18
65
|
| string | the encoding name |
|
66
|
+
| varint | number of comments |
|
67
|
+
| comment* | comments |
|
19
68
|
| varint | number of errors |
|
20
|
-
|
|
21
|
-
| string | error string, as byte[] in source encoding |
|
22
|
-
| varint | location in the source code - start |
|
23
|
-
| varint | location in the source code - length |
|
24
|
-
| ... | more errors |
|
69
|
+
| diagnostic* | errors |
|
25
70
|
| varint | number of warnings |
|
26
|
-
|
|
27
|
-
| string | warning string, as byte[] in source encoding |
|
28
|
-
| varint | location in the source code - start |
|
29
|
-
| varint | location in the source code - length |
|
30
|
-
| ... | more warnings |
|
71
|
+
| diagnostic* | warnings |
|
31
72
|
| `4` | content pool offset |
|
32
73
|
| varint | content pool size |
|
33
74
|
|
34
|
-
After the header comes the body of the serialized string.
|
75
|
+
After the header comes the body of the serialized string.
|
76
|
+
The body consistents of a sequence of nodes that is built using a prefix traversal order of the syntax tree.
|
77
|
+
Each node is structured like the following table:
|
35
78
|
|
36
79
|
| # bytes | field |
|
37
80
|
| --- | --- |
|
38
81
|
| `1` | node type |
|
39
|
-
|
|
40
|
-
| varint | length of the node in bytes in the source string |
|
82
|
+
| location | node location |
|
41
83
|
|
42
|
-
Each node's child is then appended to the serialized string.
|
84
|
+
Each node's child is then appended to the serialized string.
|
85
|
+
The child node types can be determined by referencing `config.yml`.
|
86
|
+
Depending on the type of child node, it could take a couple of different forms, described below:
|
43
87
|
|
44
88
|
* `node` - A child node that is a node itself. This is structured just as like parent node.
|
45
89
|
* `node?` - A child node that is optionally present. If the node is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just as like parent node.
|
@@ -52,7 +96,10 @@ Each node's child is then appended to the serialized string. The child node type
|
|
52
96
|
* `location[]` - A child node that is an array of locations. This is structured as a `4` byte length, followed by the locations themselves.
|
53
97
|
* `uint32` - A child node that is a 32-bit unsigned integer. This is structured as a variable-length integer.
|
54
98
|
|
55
|
-
After the syntax tree, the content pool is serialized.
|
99
|
+
After the syntax tree, the content pool is serialized.
|
100
|
+
This is a list of constants that were referenced from within the tree.
|
101
|
+
The content pool begins at the offset specified in the header.
|
102
|
+
Each constant is structured as:
|
56
103
|
|
57
104
|
| # bytes | field |
|
58
105
|
| --- | --- |
|
@@ -61,10 +108,6 @@ After the syntax tree, the content pool is serialized. This is a list of constan
|
|
61
108
|
|
62
109
|
At the end of the serialization, the buffer is null terminated.
|
63
110
|
|
64
|
-
## Variable-length integers
|
65
|
-
|
66
|
-
Variable-length integers are used throughout the serialized format, using the [LEB128](https://en.wikipedia.org/wiki/LEB128) encoding. This drastically cuts down on the size of the serialized string, especially when the source file is large.
|
67
|
-
|
68
111
|
## APIs
|
69
112
|
|
70
113
|
The relevant APIs and struct definitions are listed below:
|
@@ -105,7 +148,10 @@ serialize(const char *source, size_t length) {
|
|
105
148
|
}
|
106
149
|
```
|
107
150
|
|
108
|
-
The final argument to `yp_parse_serialize` controls the metadata of the source.
|
151
|
+
The final argument to `yp_parse_serialize` controls the metadata of the source.
|
152
|
+
This includes the filepath that the source is associated with, and any nested local variables scopes that are necessary to properly parse the file (in the case of parsing an `eval`).
|
153
|
+
Note that no `varint` are used here to make it easier to produce the metadata for the caller, and also serialized size is less important here.
|
154
|
+
The metadata is a serialized format itself, and is structured as follows:
|
109
155
|
|
110
156
|
| # bytes | field |
|
111
157
|
| --- | --- |
|
@@ -127,4 +173,5 @@ Each local variable within each scope is encoded as:
|
|
127
173
|
| `4` | the size of the local variable name |
|
128
174
|
| | the local variable name |
|
129
175
|
|
130
|
-
The metadata can be `NULL` (as seen in the example above).
|
176
|
+
The metadata can be `NULL` (as seen in the example above).
|
177
|
+
If it is not null, then a minimal metadata string would be `"\0\0\0\0\0\0\0\0"` which would use 4 bytes to indicate an empty filepath string and 4 bytes to indicate that there were no local variable scopes.
|
data/ext/yarp/api_node.c
CHANGED
@@ -59,6 +59,7 @@ static VALUE rb_cYARPEmbeddedVariableNode;
|
|
59
59
|
static VALUE rb_cYARPEnsureNode;
|
60
60
|
static VALUE rb_cYARPFalseNode;
|
61
61
|
static VALUE rb_cYARPFindPatternNode;
|
62
|
+
static VALUE rb_cYARPFlipFlopNode;
|
62
63
|
static VALUE rb_cYARPFloatNode;
|
63
64
|
static VALUE rb_cYARPForNode;
|
64
65
|
static VALUE rb_cYARPForwardingArgumentsNode;
|
@@ -519,6 +520,13 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
519
520
|
yp_node_stack_push(&node_stack, (yp_node_t *) cast->right);
|
520
521
|
break;
|
521
522
|
}
|
523
|
+
#line 111 "api_node.c.erb"
|
524
|
+
case YP_NODE_FLIP_FLOP_NODE: {
|
525
|
+
yp_flip_flop_node_t *cast = (yp_flip_flop_node_t *) node;
|
526
|
+
yp_node_stack_push(&node_stack, (yp_node_t *) cast->left);
|
527
|
+
yp_node_stack_push(&node_stack, (yp_node_t *) cast->right);
|
528
|
+
break;
|
529
|
+
}
|
522
530
|
#line 111 "api_node.c.erb"
|
523
531
|
case YP_NODE_FOR_NODE: {
|
524
532
|
yp_for_node_t *cast = (yp_for_node_t *) node;
|
@@ -1280,7 +1288,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
1280
1288
|
argv[6] = rb_ary_pop(value_stack);
|
1281
1289
|
|
1282
1290
|
// flags
|
1283
|
-
argv[7] = ULONG2NUM(
|
1291
|
+
argv[7] = ULONG2NUM(node->flags >> 1);
|
1284
1292
|
|
1285
1293
|
// name
|
1286
1294
|
argv[8] = yp_string_new(&cast->name, encoding);
|
@@ -1916,6 +1924,29 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
1916
1924
|
rb_ary_push(value_stack, rb_class_new_instance(7, argv, rb_cYARPFindPatternNode));
|
1917
1925
|
break;
|
1918
1926
|
}
|
1927
|
+
#line 137 "api_node.c.erb"
|
1928
|
+
case YP_NODE_FLIP_FLOP_NODE: {
|
1929
|
+
yp_flip_flop_node_t *cast = (yp_flip_flop_node_t *) node;
|
1930
|
+
VALUE argv[5];
|
1931
|
+
|
1932
|
+
// left
|
1933
|
+
argv[0] = rb_ary_pop(value_stack);
|
1934
|
+
|
1935
|
+
// right
|
1936
|
+
argv[1] = rb_ary_pop(value_stack);
|
1937
|
+
|
1938
|
+
// operator_loc
|
1939
|
+
argv[2] = yp_location_new(parser, cast->operator_loc.start, cast->operator_loc.end, source);
|
1940
|
+
|
1941
|
+
// flags
|
1942
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
1943
|
+
|
1944
|
+
// location
|
1945
|
+
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
1946
|
+
|
1947
|
+
rb_ary_push(value_stack, rb_class_new_instance(5, argv, rb_cYARPFlipFlopNode));
|
1948
|
+
break;
|
1949
|
+
}
|
1919
1950
|
#line 137 "api_node.c.erb"
|
1920
1951
|
case YP_NODE_FLOAT_NODE: {
|
1921
1952
|
VALUE argv[1];
|
@@ -2319,7 +2350,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
2319
2350
|
argv[2] = yp_location_new(parser, cast->closing_loc.start, cast->closing_loc.end, source);
|
2320
2351
|
|
2321
2352
|
// flags
|
2322
|
-
argv[3] = ULONG2NUM(
|
2353
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
2323
2354
|
|
2324
2355
|
// location
|
2325
2356
|
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -2978,7 +3009,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
2978
3009
|
argv[2] = yp_location_new(parser, cast->operator_loc.start, cast->operator_loc.end, source);
|
2979
3010
|
|
2980
3011
|
// flags
|
2981
|
-
argv[3] = ULONG2NUM(
|
3012
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
2982
3013
|
|
2983
3014
|
// location
|
2984
3015
|
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3027,7 +3058,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
3027
3058
|
argv[3] = yp_string_new(&cast->unescaped, encoding);
|
3028
3059
|
|
3029
3060
|
// flags
|
3030
|
-
argv[4] = ULONG2NUM(
|
3061
|
+
argv[4] = ULONG2NUM(node->flags >> 1);
|
3031
3062
|
|
3032
3063
|
// location
|
3033
3064
|
argv[5] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3437,7 +3468,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
3437
3468
|
argv[2] = rb_ary_pop(value_stack);
|
3438
3469
|
|
3439
3470
|
// flags
|
3440
|
-
argv[3] = ULONG2NUM(
|
3471
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
3441
3472
|
|
3442
3473
|
// location
|
3443
3474
|
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3483,7 +3514,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
3483
3514
|
argv[2] = rb_ary_pop(value_stack);
|
3484
3515
|
|
3485
3516
|
// flags
|
3486
|
-
argv[3] = ULONG2NUM(
|
3517
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
3487
3518
|
|
3488
3519
|
// location
|
3489
3520
|
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3595,6 +3626,7 @@ Init_yarp_api_node(void) {
|
|
3595
3626
|
rb_cYARPEnsureNode = rb_define_class_under(rb_cYARP, "EnsureNode", rb_cYARPNode);
|
3596
3627
|
rb_cYARPFalseNode = rb_define_class_under(rb_cYARP, "FalseNode", rb_cYARPNode);
|
3597
3628
|
rb_cYARPFindPatternNode = rb_define_class_under(rb_cYARP, "FindPatternNode", rb_cYARPNode);
|
3629
|
+
rb_cYARPFlipFlopNode = rb_define_class_under(rb_cYARP, "FlipFlopNode", rb_cYARPNode);
|
3598
3630
|
rb_cYARPFloatNode = rb_define_class_under(rb_cYARP, "FloatNode", rb_cYARPNode);
|
3599
3631
|
rb_cYARPForNode = rb_define_class_under(rb_cYARP, "ForNode", rb_cYARPNode);
|
3600
3632
|
rb_cYARPForwardingArgumentsNode = rb_define_class_under(rb_cYARP, "ForwardingArgumentsNode", rb_cYARPNode);
|
data/ext/yarp/extconf.rb
CHANGED
@@ -1,13 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "mkmf"
|
4
3
|
require "rbconfig"
|
5
|
-
require "rake"
|
6
4
|
|
7
5
|
module Yarp
|
8
6
|
module ExtConf
|
9
7
|
class << self
|
10
8
|
def configure
|
9
|
+
unless RUBY_ENGINE == "ruby"
|
10
|
+
# On non-CRuby we only need the shared library, so build only that and not the C extension.
|
11
|
+
# We also avoid `require "mkmf"` as that prepends the LLVM toolchain to PATH on TruffleRuby,
|
12
|
+
# but we want to use the native toolchain here since librubyparser is run natively.
|
13
|
+
build_shared_rubyparser
|
14
|
+
File.write("Makefile", "all install clean:\n\t@#{RbConfig::CONFIG["NULLCMD"]}\n")
|
15
|
+
return
|
16
|
+
end
|
17
|
+
|
18
|
+
require "mkmf"
|
11
19
|
configure_c_extension
|
12
20
|
configure_rubyparser
|
13
21
|
|
@@ -35,7 +43,7 @@ module Yarp
|
|
35
43
|
end
|
36
44
|
$LOCAL_LIBS << " #{static_archive_path}"
|
37
45
|
else
|
38
|
-
shared_library_path = File.join(build_dir, "librubyparser.#{RbConfig::CONFIG["
|
46
|
+
shared_library_path = File.join(build_dir, "librubyparser.#{RbConfig::CONFIG["SOEXT"]}")
|
39
47
|
unless File.exist?(shared_library_path)
|
40
48
|
build_shared_rubyparser
|
41
49
|
end
|
@@ -62,15 +70,12 @@ module Yarp
|
|
62
70
|
|
63
71
|
def build_target_rubyparser(target)
|
64
72
|
Dir.chdir(root_dir) do
|
65
|
-
if !File.exist?("
|
73
|
+
if !File.exist?("include/yarp/ast.h") && Dir.exist?(".git")
|
66
74
|
# this block only exists to support building the gem from a "git" source,
|
67
75
|
# normally we package up the configure and other files in the gem itself
|
68
|
-
|
69
|
-
Rake.sh("autoheader")
|
70
|
-
Rake.sh("templates/template.rb")
|
76
|
+
system("templates/template.rb", exception: true)
|
71
77
|
end
|
72
|
-
|
73
|
-
Rake.sh("make", target)
|
78
|
+
system("make", target, exception: true)
|
74
79
|
end
|
75
80
|
end
|
76
81
|
|
@@ -123,7 +128,7 @@ module Yarp
|
|
123
128
|
end
|
124
129
|
end
|
125
130
|
|
126
|
-
if
|
131
|
+
if ARGV.delete("--help")
|
127
132
|
Yarp::ExtConf.print_help
|
128
133
|
exit!(0)
|
129
134
|
end
|
data/ext/yarp/extension.c
CHANGED
@@ -522,6 +522,8 @@ Init_yarp(void) {
|
|
522
522
|
// in yarp.h.
|
523
523
|
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
|
524
524
|
|
525
|
+
rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
|
526
|
+
|
525
527
|
// First, the functions that have to do with lexing and parsing.
|
526
528
|
rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
|
527
529
|
rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
|
data/ext/yarp/extension.h
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
#include <ruby/encoding.h>
|
6
6
|
#include "yarp.h"
|
7
7
|
|
8
|
-
#define EXPECTED_YARP_VERSION "0.
|
8
|
+
#define EXPECTED_YARP_VERSION "0.7.0"
|
9
9
|
|
10
10
|
VALUE yp_source_new(yp_parser_t *parser);
|
11
11
|
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
|