yarp 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/CONTRIBUTING.md +4 -0
- data/{Makefile.in → Makefile} +3 -4
- data/README.md +1 -1
- data/config.yml +29 -7
- data/docs/build_system.md +4 -15
- data/docs/building.md +1 -5
- data/docs/encoding.md +1 -0
- data/docs/{extension.md → ruby_api.md} +6 -3
- data/docs/serialization.md +71 -24
- data/ext/yarp/api_node.c +38 -6
- data/ext/yarp/extconf.rb +15 -10
- data/ext/yarp/extension.c +2 -0
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +108 -104
- data/include/yarp/defines.h +0 -15
- data/include/yarp/enc/yp_encoding.h +1 -0
- data/include/yarp/util/yp_buffer.h +1 -0
- data/include/yarp/util/yp_string.h +5 -1
- data/include/yarp/version.h +2 -3
- data/include/yarp.h +4 -2
- data/lib/yarp/ffi.rb +211 -0
- data/lib/yarp/lex_compat.rb +16 -2
- data/lib/yarp/node.rb +169 -117
- data/lib/yarp/ripper_compat.rb +3 -3
- data/lib/yarp/serialize.rb +285 -92
- data/lib/yarp.rb +167 -2
- data/src/enc/yp_unicode.c +9 -0
- data/src/node.c +22 -0
- data/src/prettyprint.c +49 -30
- data/src/serialize.c +90 -17
- data/src/util/yp_string.c +8 -17
- data/src/yarp.c +181 -49
- data/yarp.gemspec +5 -5
- metadata +6 -6
- data/config.h.in +0 -25
- data/configure +0 -4487
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81d6e0266ee0d13563faafa47a0c0d09c5feabe870e2324f32564a7f68299dff
|
4
|
+
data.tar.gz: 771e9fd99caaf99cf7493d897cf9fdd5f93824e90bf09c22a1e5b5ec0e1a4bcf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16525d9ed87d00d9fd94af5cab5f99b8278ac4a81c7117195e80d3cd3cc6b12358fd0f2255cbe7df15ef1adef609719371bc3a210d8a2273d6e34fbdcb4b1a58
|
7
|
+
data.tar.gz: efbf02ccd9a7cb6a13b6ba56dc8fc3fc6e425d7d779583614fc9c24c74216fe3d89eadc83999f197a7227bfd90ca8ba9446c1b71c128e4874609f44f0869eb72
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
6
|
+
|
7
|
+
## [Unreleased]
|
8
|
+
|
9
|
+
## [0.7.0] - 2023-08-14
|
10
|
+
|
11
|
+
### Added
|
12
|
+
|
13
|
+
- We now have an explicit `FlipFlopNode`. It has the same flags as `RangeNode`.
|
14
|
+
- We now have a syntax error when implicit and explicit blocks are passed to a method call.
|
15
|
+
- `Node#slice` is now implemented, for retrieving the slice of the source code corresponding to a node.
|
16
|
+
- We now support the `utf8-mac` encoding.
|
17
|
+
- Predicate methods have been added for nodes that have flags. For example `CallNode#safe_navigation?` and `RangeNode#exclude_end?`.
|
18
|
+
- The gem now functions on JRuby and TruffleRuby, thanks to a new FFI backend.
|
19
|
+
- Comments are now part of the serialization API.
|
20
|
+
|
21
|
+
### Changed
|
22
|
+
|
23
|
+
- Autotools has been removed from the build system, so when the gem is installed it will no longer need to go through a configure step.
|
24
|
+
- The AST for `foo = *bar` has changed to have an explicit array on the right hand side, rather than a splat node. This is more consistent with how other parsers handle this.
|
25
|
+
- `RangeNodeFlags` has been renamed to `RangeFlags`.
|
26
|
+
- Unary minus on number literals is now parsed as part of the literal, rather than a call to a unary operator. This is more consistent with how other parsers handle this.
|
27
|
+
|
28
|
+
## [0.6.0] - 2023-08-09
|
29
|
+
|
30
|
+
### Added
|
31
|
+
|
32
|
+
- 🎉 Initial release! 🎉
|
33
|
+
|
34
|
+
[unreleased]: https://github.com/ruby/yarp/compare/v0.7.0...HEAD
|
35
|
+
[0.7.0]: https://github.com/ruby/yarp/compare/v0.6.0...v0.7.0
|
36
|
+
[0.6.0]: https://github.com/ruby/yarp/compare/d60531...v0.6.0
|
data/CONTRIBUTING.md
CHANGED
@@ -10,6 +10,10 @@ The discussions page on the GitHub repository are open. If you have a question o
|
|
10
10
|
|
11
11
|
If you want to contribute code, please first open or contribute to a discussion. A lot of the project is in flux, and we want to make sure that you are contributing to the right place. Once you have a discussion going, you can open a pull request with your changes. We will review your code and get it merged in.
|
12
12
|
|
13
|
+
### Ruby Features
|
14
|
+
|
15
|
+
Pattern matching and endless method definitions should be avoided as long as the latest TruffleRuby release does not support it.
|
16
|
+
|
13
17
|
## Tests
|
14
18
|
|
15
19
|
We could always use more tests! One of the biggest challenges of this project is building up a big test suite. If you want to contribute tests, feel free to open a pull request. These will get merged in as soon as possible.
|
data/{Makefile.in → Makefile}
RENAMED
@@ -10,10 +10,9 @@ FUZZ_OUTPUT_DIR = $(shell pwd)/fuzz/output
|
|
10
10
|
|
11
11
|
SOEXT := $(shell ruby -e 'puts RbConfig::CONFIG["SOEXT"]')
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
CC := @CC@
|
13
|
+
CPPFLAGS := -Iinclude
|
14
|
+
CFLAGS := -g -O2 -std=c99 -Wall -Werror -Wextra -Wpedantic -Wundef -Wconversion -fPIC -fvisibility=hidden
|
15
|
+
CC := cc
|
17
16
|
|
18
17
|
HEADERS := $(shell find include -name '*.h')
|
19
18
|
SOURCES := $(shell find src -name '*.c')
|
data/README.md
CHANGED
@@ -8,7 +8,7 @@ The repository contains the infrastructure for both a shared library (librubypar
|
|
8
8
|
|
9
9
|
```
|
10
10
|
.
|
11
|
-
├── Makefile
|
11
|
+
├── Makefile configuration to compile the shared library and native tests
|
12
12
|
├── Rakefile configuration to compile the native extension and run the Ruby tests
|
13
13
|
├── bin
|
14
14
|
│ ├── lex runs the lexer on a file or string, prints the tokens, and compares to ripper
|
data/config.yml
CHANGED
@@ -337,7 +337,7 @@ flags:
|
|
337
337
|
values:
|
338
338
|
- name: BEGIN_MODIFIER
|
339
339
|
comment: "a loop after a begin statement, so the body is executed first before the condition"
|
340
|
-
- name:
|
340
|
+
- name: RangeFlags
|
341
341
|
values:
|
342
342
|
- name: EXCLUDE_END
|
343
343
|
comment: "... operator"
|
@@ -601,7 +601,8 @@ nodes:
|
|
601
601
|
type: node?
|
602
602
|
kind: BlockNode
|
603
603
|
- name: flags
|
604
|
-
type:
|
604
|
+
type: flags
|
605
|
+
kind: CallNodeFlags
|
605
606
|
- name: name
|
606
607
|
type: string
|
607
608
|
comment: |
|
@@ -1053,6 +1054,22 @@ nodes:
|
|
1053
1054
|
|
1054
1055
|
foo in Foo(*bar, baz, *qux)
|
1055
1056
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
1057
|
+
- name: FlipFlopNode
|
1058
|
+
child_nodes:
|
1059
|
+
- name: left
|
1060
|
+
type: node?
|
1061
|
+
- name: right
|
1062
|
+
type: node?
|
1063
|
+
- name: operator_loc
|
1064
|
+
type: location
|
1065
|
+
- name: flags
|
1066
|
+
type: flags
|
1067
|
+
kind: RangeFlags
|
1068
|
+
comment: |
|
1069
|
+
Represents the use of the `..` or `...` operators to create flip flops.
|
1070
|
+
|
1071
|
+
baz if foo .. bar
|
1072
|
+
^^^^^^^^^^
|
1056
1073
|
- name: FloatNode
|
1057
1074
|
comment: |
|
1058
1075
|
Represents a floating point number literal.
|
@@ -1321,7 +1338,8 @@ nodes:
|
|
1321
1338
|
- name: closing_loc
|
1322
1339
|
type: location
|
1323
1340
|
- name: flags
|
1324
|
-
type:
|
1341
|
+
type: flags
|
1342
|
+
kind: RegularExpressionFlags
|
1325
1343
|
newline: parts
|
1326
1344
|
comment: |
|
1327
1345
|
Represents a regular expression literal that contains interpolation.
|
@@ -1744,7 +1762,8 @@ nodes:
|
|
1744
1762
|
- name: operator_loc
|
1745
1763
|
type: location
|
1746
1764
|
- name: flags
|
1747
|
-
type:
|
1765
|
+
type: flags
|
1766
|
+
kind: RangeFlags
|
1748
1767
|
comment: |
|
1749
1768
|
Represents the use of the `..` or `...` operators.
|
1750
1769
|
|
@@ -1779,7 +1798,8 @@ nodes:
|
|
1779
1798
|
- name: unescaped
|
1780
1799
|
type: string
|
1781
1800
|
- name: flags
|
1782
|
-
type:
|
1801
|
+
type: flags
|
1802
|
+
kind: RegularExpressionFlags
|
1783
1803
|
comment: |
|
1784
1804
|
Represents a regular expression literal with no interpolation.
|
1785
1805
|
|
@@ -2070,7 +2090,8 @@ nodes:
|
|
2070
2090
|
type: node?
|
2071
2091
|
kind: StatementsNode
|
2072
2092
|
- name: flags
|
2073
|
-
type:
|
2093
|
+
type: flags
|
2094
|
+
kind: LoopFlags
|
2074
2095
|
newline: predicate
|
2075
2096
|
comment: |
|
2076
2097
|
Represents the use of the `until` keyword, either in the block form or the modifier form.
|
@@ -2104,7 +2125,8 @@ nodes:
|
|
2104
2125
|
type: node?
|
2105
2126
|
kind: StatementsNode
|
2106
2127
|
- name: flags
|
2107
|
-
type:
|
2128
|
+
type: flags
|
2129
|
+
kind: LoopFlags
|
2108
2130
|
newline: predicate
|
2109
2131
|
comment: |
|
2110
2132
|
Represents the use of the `while` keyword, either in the block form or the modifier form.
|
data/docs/build_system.md
CHANGED
@@ -16,8 +16,6 @@ The main solution for the second point seems a Makefile, otherwise many of the u
|
|
16
16
|
## General Design
|
17
17
|
|
18
18
|
1. Templates are generated by `templates/template.rb`
|
19
|
-
2. `autoconf` creates `./configure` and `autoheader` creates `config.h.in` (both files are platform-independent)
|
20
|
-
3. `./configure` creates `include/yarp/config.h` (which contains `HAVE_*` macros, platform-specific) and the `Makefile`
|
21
19
|
4. The `Makefile` compiles both `librubyparser.a` and `librubyparser.{so,dylib,dll}` from the `src/**/*.c` and `include/**/*.h` files
|
22
20
|
5. The `Rakefile` `:compile` task ensures the above prerequisites are done, then calls `make`,
|
23
21
|
and uses `Rake::ExtensionTask` to compile the C extension (using its `extconf.rb`), which uses `librubyparser.a`
|
@@ -36,14 +34,13 @@ loaded per process (i.e., at most one version of the yarp *gem* loaded in a proc
|
|
36
34
|
|
37
35
|
### Building the yarp gem by `gem install/bundle install`
|
38
36
|
|
39
|
-
The gem contains the pre-generated templates
|
37
|
+
The gem contains the pre-generated templates.
|
40
38
|
When installing the gem, `extconf.rb` is used and that:
|
41
|
-
* runs `./configure` which creates the `Makefile` and `include/yarp/config.h`
|
42
39
|
* runs `make build/librubyparser.a`
|
43
40
|
* compiles the C extension with mkmf
|
44
41
|
|
45
42
|
When installing the gem on JRuby and TruffleRuby, no C extension is built, so instead of the last step,
|
46
|
-
there is Ruby code using
|
43
|
+
there is Ruby code using FFI which uses `librubyparser.{so,dylib,dll}`
|
47
44
|
to implement the same methods as the C extension, but using serialization instead of many native calls/accesses
|
48
45
|
(JRuby does not support C extensions, serialization is faster on TruffleRuby than the C extension).
|
49
46
|
|
@@ -51,7 +48,6 @@ to implement the same methods as the C extension, but using serialization instea
|
|
51
48
|
|
52
49
|
The same as above, except the `extconf.rb` additionally runs first:
|
53
50
|
* `templates/template.rb` to generate the templates
|
54
|
-
* `autoconf` and `autoheader` to generate `configure` and `config.h.in`
|
55
51
|
|
56
52
|
Because of course those files are not part of the git repository.
|
57
53
|
|
@@ -61,21 +57,14 @@ Because of course those files are not part of the git repository.
|
|
61
57
|
|
62
58
|
The script generates the templates when importing.
|
63
59
|
|
64
|
-
`
|
65
|
-
It is assumed that CRuby's `./configure` is a superset of YARP's configure checks.
|
66
|
-
|
67
|
-
YARP's `autotools` is not used at all in CRuby and in fact YARP's `Makefile` is not used either.
|
68
|
-
Instead, CRuby's `autotools` setup is used, and `CRuby`'s Makefiles are used.
|
60
|
+
YARP's `Makefile` is not used at all in CRuby. Instead, CRuby's `Makefile` is used.
|
69
61
|
|
70
62
|
### Building YARP as part of TruffleRuby
|
71
63
|
|
72
64
|
[This script](https://github.com/oracle/truffleruby/blob/master/tool/import-yarp.sh) imports YARP sources in TruffleRuby.
|
73
65
|
The script generates the templates when importing.
|
74
|
-
It also generates `configure` and `config.h.in` (to avoid needing `autotools` on every machine building TruffleRuby).
|
75
66
|
|
76
|
-
Then when `mx build` builds TruffleRuby and the `yarp` mx project inside, it
|
77
|
-
* runs `./configure`
|
78
|
-
* runs `make`
|
67
|
+
Then when `mx build` builds TruffleRuby and the `yarp` mx project inside, it runs `make`.
|
79
68
|
|
80
69
|
Then the `yarp bindings` mx project is built, which contains the [bindings](https://github.com/oracle/truffleruby/blob/master/src/main/c/yarp_bindings/src/yarp_bindings.c)
|
81
70
|
and links to `librubyparser.a` (to avoid exporting symbols, so no conflict when installing the yarp gem).
|
data/docs/building.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Building
|
2
2
|
|
3
3
|
The following describes how to build YARP from source.
|
4
|
+
This comes directly from the [Makefile](../Makefile).
|
4
5
|
|
5
6
|
## Common
|
6
7
|
|
@@ -13,11 +14,6 @@ The following flags should be used to compile YARP:
|
|
13
14
|
* `-Werror` - Treat warnings as errors
|
14
15
|
* `-fvisibility=hidden` - Hide all symbols by default
|
15
16
|
|
16
|
-
The following flags can be used to compile YARP:
|
17
|
-
|
18
|
-
* `-DHAVE_MMAP` - Should be passed if the system has the `mmap` function
|
19
|
-
* `-DHAVE_SNPRINTF` - Should be passed if the system has the `snprintf` function
|
20
|
-
|
21
17
|
## Shared
|
22
18
|
|
23
19
|
If you want to build YARP as a shared library and link against it, you should compile with:
|
data/docs/encoding.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
# Ruby API
|
2
2
|
|
3
|
-
|
3
|
+
The `yarp` gem provides a Ruby API for accessing the syntax tree.
|
4
4
|
|
5
5
|
For the most part, the API for accessing the tree mirrors that found in the [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree) project. This means:
|
6
6
|
|
@@ -8,7 +8,9 @@ For the most part, the API for accessing the tree mirrors that found in the [Syn
|
|
8
8
|
* Nodes in the tree respond to named methods for accessing their children as well as `#child_nodes`
|
9
9
|
* Nodes respond to the pattern matching interfaces `#deconstruct` and `#deconstruct_keys`
|
10
10
|
|
11
|
-
Every entry in `config.yml` will generate a Ruby class as well as the code that builds the nodes themselves.
|
11
|
+
Every entry in `config.yml` will generate a Ruby class as well as the code that builds the nodes themselves.
|
12
|
+
Creating a syntax tree involves calling one of the class methods on the `YARP` module.
|
13
|
+
The full API is documented below.
|
12
14
|
|
13
15
|
## API
|
14
16
|
|
@@ -18,3 +20,4 @@ Every entry in `config.yml` will generate a Ruby class as well as the code that
|
|
18
20
|
* `YARP.lex_file(filepath)` - parse the tokens corresponding to the given source file and return them as an array within a parse result
|
19
21
|
* `YARP.parse(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result
|
20
22
|
* `YARP.parse_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result
|
23
|
+
* `YARP.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree
|
data/docs/serialization.md
CHANGED
@@ -1,10 +1,58 @@
|
|
1
1
|
# Serialization
|
2
2
|
|
3
|
-
YARP ships with the ability to serialize a syntax tree to a single string.
|
3
|
+
YARP ships with the ability to serialize a syntax tree to a single string.
|
4
|
+
The string can then be deserialized back into a syntax tree using a language other than C.
|
5
|
+
This is useful for using the parsing logic in other tools without having to write a parser in that language.
|
6
|
+
The syntax tree still requires a copy of the original source, as for the most part it just contains byte offsets into the source string.
|
7
|
+
|
8
|
+
## Types
|
9
|
+
|
10
|
+
Let us define some simple types for readability.
|
11
|
+
|
12
|
+
### varint
|
13
|
+
|
14
|
+
A variable-length integer with the value fitting in `uint32_t` using between 1 and 5 bytes, using the [LEB128](https://en.wikipedia.org/wiki/LEB128) encoding.
|
15
|
+
This drastically cuts down on the size of the serialized string, especially when the source file is large.
|
16
|
+
|
17
|
+
### string
|
18
|
+
|
19
|
+
| # bytes | field |
|
20
|
+
| --- | --- |
|
21
|
+
| varint | the length of the string in bytes |
|
22
|
+
| ... | the string bytes |
|
23
|
+
|
24
|
+
### location
|
25
|
+
|
26
|
+
| # bytes | field |
|
27
|
+
| --- | --- |
|
28
|
+
| varint | byte offset into the source string where this location begins |
|
29
|
+
| varint | length of the location in bytes in the source string |
|
30
|
+
|
31
|
+
### comment
|
32
|
+
|
33
|
+
The comment type is one of:
|
34
|
+
* 0=`INLINE` (`# comment`)
|
35
|
+
* 1=`EMBEDDED_DOCUMENT` (`=begin`/`=end`)
|
36
|
+
* 2=`__END__` (after `__END__`)
|
37
|
+
|
38
|
+
| # bytes | field |
|
39
|
+
| --- | --- |
|
40
|
+
| `1` | comment type |
|
41
|
+
| location | the location in the source of this comment |
|
42
|
+
|
43
|
+
### diagnostic
|
44
|
+
|
45
|
+
| # bytes | field |
|
46
|
+
| --- | --- |
|
47
|
+
| string | diagnostic message (ASCII-only characters) |
|
48
|
+
| location | the location in the source this diagnostic applies to |
|
4
49
|
|
5
50
|
## Structure
|
6
51
|
|
7
|
-
The serialized string representing the syntax tree is composed of three parts: the header, the body, and the constant pool.
|
52
|
+
The serialized string representing the syntax tree is composed of three parts: the header, the body, and the constant pool.
|
53
|
+
The header contains information like the version of YARP that serialized the tree.
|
54
|
+
The body contains the actual nodes in the tree.
|
55
|
+
The constant pool contains constants that were interned while parsing.
|
8
56
|
|
9
57
|
The header is structured like the following table:
|
10
58
|
|
@@ -14,32 +62,28 @@ The header is structured like the following table:
|
|
14
62
|
| `1` | major version number |
|
15
63
|
| `1` | minor version number |
|
16
64
|
| `1` | patch version number |
|
17
|
-
| varint | the length of the encoding name |
|
18
65
|
| string | the encoding name |
|
66
|
+
| varint | number of comments |
|
67
|
+
| comment* | comments |
|
19
68
|
| varint | number of errors |
|
20
|
-
|
|
21
|
-
| string | error string, as byte[] in source encoding |
|
22
|
-
| varint | location in the source code - start |
|
23
|
-
| varint | location in the source code - length |
|
24
|
-
| ... | more errors |
|
69
|
+
| diagnostic* | errors |
|
25
70
|
| varint | number of warnings |
|
26
|
-
|
|
27
|
-
| string | warning string, as byte[] in source encoding |
|
28
|
-
| varint | location in the source code - start |
|
29
|
-
| varint | location in the source code - length |
|
30
|
-
| ... | more warnings |
|
71
|
+
| diagnostic* | warnings |
|
31
72
|
| `4` | content pool offset |
|
32
73
|
| varint | content pool size |
|
33
74
|
|
34
|
-
After the header comes the body of the serialized string.
|
75
|
+
After the header comes the body of the serialized string.
|
76
|
+
The body consistents of a sequence of nodes that is built using a prefix traversal order of the syntax tree.
|
77
|
+
Each node is structured like the following table:
|
35
78
|
|
36
79
|
| # bytes | field |
|
37
80
|
| --- | --- |
|
38
81
|
| `1` | node type |
|
39
|
-
|
|
40
|
-
| varint | length of the node in bytes in the source string |
|
82
|
+
| location | node location |
|
41
83
|
|
42
|
-
Each node's child is then appended to the serialized string.
|
84
|
+
Each node's child is then appended to the serialized string.
|
85
|
+
The child node types can be determined by referencing `config.yml`.
|
86
|
+
Depending on the type of child node, it could take a couple of different forms, described below:
|
43
87
|
|
44
88
|
* `node` - A child node that is a node itself. This is structured just as like parent node.
|
45
89
|
* `node?` - A child node that is optionally present. If the node is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just as like parent node.
|
@@ -52,7 +96,10 @@ Each node's child is then appended to the serialized string. The child node type
|
|
52
96
|
* `location[]` - A child node that is an array of locations. This is structured as a `4` byte length, followed by the locations themselves.
|
53
97
|
* `uint32` - A child node that is a 32-bit unsigned integer. This is structured as a variable-length integer.
|
54
98
|
|
55
|
-
After the syntax tree, the content pool is serialized.
|
99
|
+
After the syntax tree, the content pool is serialized.
|
100
|
+
This is a list of constants that were referenced from within the tree.
|
101
|
+
The content pool begins at the offset specified in the header.
|
102
|
+
Each constant is structured as:
|
56
103
|
|
57
104
|
| # bytes | field |
|
58
105
|
| --- | --- |
|
@@ -61,10 +108,6 @@ After the syntax tree, the content pool is serialized. This is a list of constan
|
|
61
108
|
|
62
109
|
At the end of the serialization, the buffer is null terminated.
|
63
110
|
|
64
|
-
## Variable-length integers
|
65
|
-
|
66
|
-
Variable-length integers are used throughout the serialized format, using the [LEB128](https://en.wikipedia.org/wiki/LEB128) encoding. This drastically cuts down on the size of the serialized string, especially when the source file is large.
|
67
|
-
|
68
111
|
## APIs
|
69
112
|
|
70
113
|
The relevant APIs and struct definitions are listed below:
|
@@ -105,7 +148,10 @@ serialize(const char *source, size_t length) {
|
|
105
148
|
}
|
106
149
|
```
|
107
150
|
|
108
|
-
The final argument to `yp_parse_serialize` controls the metadata of the source.
|
151
|
+
The final argument to `yp_parse_serialize` controls the metadata of the source.
|
152
|
+
This includes the filepath that the source is associated with, and any nested local variables scopes that are necessary to properly parse the file (in the case of parsing an `eval`).
|
153
|
+
Note that no `varint` are used here to make it easier to produce the metadata for the caller, and also serialized size is less important here.
|
154
|
+
The metadata is a serialized format itself, and is structured as follows:
|
109
155
|
|
110
156
|
| # bytes | field |
|
111
157
|
| --- | --- |
|
@@ -127,4 +173,5 @@ Each local variable within each scope is encoded as:
|
|
127
173
|
| `4` | the size of the local variable name |
|
128
174
|
| | the local variable name |
|
129
175
|
|
130
|
-
The metadata can be `NULL` (as seen in the example above).
|
176
|
+
The metadata can be `NULL` (as seen in the example above).
|
177
|
+
If it is not null, then a minimal metadata string would be `"\0\0\0\0\0\0\0\0"` which would use 4 bytes to indicate an empty filepath string and 4 bytes to indicate that there were no local variable scopes.
|
data/ext/yarp/api_node.c
CHANGED
@@ -59,6 +59,7 @@ static VALUE rb_cYARPEmbeddedVariableNode;
|
|
59
59
|
static VALUE rb_cYARPEnsureNode;
|
60
60
|
static VALUE rb_cYARPFalseNode;
|
61
61
|
static VALUE rb_cYARPFindPatternNode;
|
62
|
+
static VALUE rb_cYARPFlipFlopNode;
|
62
63
|
static VALUE rb_cYARPFloatNode;
|
63
64
|
static VALUE rb_cYARPForNode;
|
64
65
|
static VALUE rb_cYARPForwardingArgumentsNode;
|
@@ -519,6 +520,13 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
519
520
|
yp_node_stack_push(&node_stack, (yp_node_t *) cast->right);
|
520
521
|
break;
|
521
522
|
}
|
523
|
+
#line 111 "api_node.c.erb"
|
524
|
+
case YP_NODE_FLIP_FLOP_NODE: {
|
525
|
+
yp_flip_flop_node_t *cast = (yp_flip_flop_node_t *) node;
|
526
|
+
yp_node_stack_push(&node_stack, (yp_node_t *) cast->left);
|
527
|
+
yp_node_stack_push(&node_stack, (yp_node_t *) cast->right);
|
528
|
+
break;
|
529
|
+
}
|
522
530
|
#line 111 "api_node.c.erb"
|
523
531
|
case YP_NODE_FOR_NODE: {
|
524
532
|
yp_for_node_t *cast = (yp_for_node_t *) node;
|
@@ -1280,7 +1288,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
1280
1288
|
argv[6] = rb_ary_pop(value_stack);
|
1281
1289
|
|
1282
1290
|
// flags
|
1283
|
-
argv[7] = ULONG2NUM(
|
1291
|
+
argv[7] = ULONG2NUM(node->flags >> 1);
|
1284
1292
|
|
1285
1293
|
// name
|
1286
1294
|
argv[8] = yp_string_new(&cast->name, encoding);
|
@@ -1916,6 +1924,29 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
1916
1924
|
rb_ary_push(value_stack, rb_class_new_instance(7, argv, rb_cYARPFindPatternNode));
|
1917
1925
|
break;
|
1918
1926
|
}
|
1927
|
+
#line 137 "api_node.c.erb"
|
1928
|
+
case YP_NODE_FLIP_FLOP_NODE: {
|
1929
|
+
yp_flip_flop_node_t *cast = (yp_flip_flop_node_t *) node;
|
1930
|
+
VALUE argv[5];
|
1931
|
+
|
1932
|
+
// left
|
1933
|
+
argv[0] = rb_ary_pop(value_stack);
|
1934
|
+
|
1935
|
+
// right
|
1936
|
+
argv[1] = rb_ary_pop(value_stack);
|
1937
|
+
|
1938
|
+
// operator_loc
|
1939
|
+
argv[2] = yp_location_new(parser, cast->operator_loc.start, cast->operator_loc.end, source);
|
1940
|
+
|
1941
|
+
// flags
|
1942
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
1943
|
+
|
1944
|
+
// location
|
1945
|
+
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
1946
|
+
|
1947
|
+
rb_ary_push(value_stack, rb_class_new_instance(5, argv, rb_cYARPFlipFlopNode));
|
1948
|
+
break;
|
1949
|
+
}
|
1919
1950
|
#line 137 "api_node.c.erb"
|
1920
1951
|
case YP_NODE_FLOAT_NODE: {
|
1921
1952
|
VALUE argv[1];
|
@@ -2319,7 +2350,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
2319
2350
|
argv[2] = yp_location_new(parser, cast->closing_loc.start, cast->closing_loc.end, source);
|
2320
2351
|
|
2321
2352
|
// flags
|
2322
|
-
argv[3] = ULONG2NUM(
|
2353
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
2323
2354
|
|
2324
2355
|
// location
|
2325
2356
|
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -2978,7 +3009,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
2978
3009
|
argv[2] = yp_location_new(parser, cast->operator_loc.start, cast->operator_loc.end, source);
|
2979
3010
|
|
2980
3011
|
// flags
|
2981
|
-
argv[3] = ULONG2NUM(
|
3012
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
2982
3013
|
|
2983
3014
|
// location
|
2984
3015
|
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3027,7 +3058,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
3027
3058
|
argv[3] = yp_string_new(&cast->unescaped, encoding);
|
3028
3059
|
|
3029
3060
|
// flags
|
3030
|
-
argv[4] = ULONG2NUM(
|
3061
|
+
argv[4] = ULONG2NUM(node->flags >> 1);
|
3031
3062
|
|
3032
3063
|
// location
|
3033
3064
|
argv[5] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3437,7 +3468,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
3437
3468
|
argv[2] = rb_ary_pop(value_stack);
|
3438
3469
|
|
3439
3470
|
// flags
|
3440
|
-
argv[3] = ULONG2NUM(
|
3471
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
3441
3472
|
|
3442
3473
|
// location
|
3443
3474
|
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3483,7 +3514,7 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
|
|
3483
3514
|
argv[2] = rb_ary_pop(value_stack);
|
3484
3515
|
|
3485
3516
|
// flags
|
3486
|
-
argv[3] = ULONG2NUM(
|
3517
|
+
argv[3] = ULONG2NUM(node->flags >> 1);
|
3487
3518
|
|
3488
3519
|
// location
|
3489
3520
|
argv[4] = yp_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3595,6 +3626,7 @@ Init_yarp_api_node(void) {
|
|
3595
3626
|
rb_cYARPEnsureNode = rb_define_class_under(rb_cYARP, "EnsureNode", rb_cYARPNode);
|
3596
3627
|
rb_cYARPFalseNode = rb_define_class_under(rb_cYARP, "FalseNode", rb_cYARPNode);
|
3597
3628
|
rb_cYARPFindPatternNode = rb_define_class_under(rb_cYARP, "FindPatternNode", rb_cYARPNode);
|
3629
|
+
rb_cYARPFlipFlopNode = rb_define_class_under(rb_cYARP, "FlipFlopNode", rb_cYARPNode);
|
3598
3630
|
rb_cYARPFloatNode = rb_define_class_under(rb_cYARP, "FloatNode", rb_cYARPNode);
|
3599
3631
|
rb_cYARPForNode = rb_define_class_under(rb_cYARP, "ForNode", rb_cYARPNode);
|
3600
3632
|
rb_cYARPForwardingArgumentsNode = rb_define_class_under(rb_cYARP, "ForwardingArgumentsNode", rb_cYARPNode);
|
data/ext/yarp/extconf.rb
CHANGED
@@ -1,13 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "mkmf"
|
4
3
|
require "rbconfig"
|
5
|
-
require "rake"
|
6
4
|
|
7
5
|
module Yarp
|
8
6
|
module ExtConf
|
9
7
|
class << self
|
10
8
|
def configure
|
9
|
+
unless RUBY_ENGINE == "ruby"
|
10
|
+
# On non-CRuby we only need the shared library, so build only that and not the C extension.
|
11
|
+
# We also avoid `require "mkmf"` as that prepends the LLVM toolchain to PATH on TruffleRuby,
|
12
|
+
# but we want to use the native toolchain here since librubyparser is run natively.
|
13
|
+
build_shared_rubyparser
|
14
|
+
File.write("Makefile", "all install clean:\n\t@#{RbConfig::CONFIG["NULLCMD"]}\n")
|
15
|
+
return
|
16
|
+
end
|
17
|
+
|
18
|
+
require "mkmf"
|
11
19
|
configure_c_extension
|
12
20
|
configure_rubyparser
|
13
21
|
|
@@ -35,7 +43,7 @@ module Yarp
|
|
35
43
|
end
|
36
44
|
$LOCAL_LIBS << " #{static_archive_path}"
|
37
45
|
else
|
38
|
-
shared_library_path = File.join(build_dir, "librubyparser.#{RbConfig::CONFIG["
|
46
|
+
shared_library_path = File.join(build_dir, "librubyparser.#{RbConfig::CONFIG["SOEXT"]}")
|
39
47
|
unless File.exist?(shared_library_path)
|
40
48
|
build_shared_rubyparser
|
41
49
|
end
|
@@ -62,15 +70,12 @@ module Yarp
|
|
62
70
|
|
63
71
|
def build_target_rubyparser(target)
|
64
72
|
Dir.chdir(root_dir) do
|
65
|
-
if !File.exist?("
|
73
|
+
if !File.exist?("include/yarp/ast.h") && Dir.exist?(".git")
|
66
74
|
# this block only exists to support building the gem from a "git" source,
|
67
75
|
# normally we package up the configure and other files in the gem itself
|
68
|
-
|
69
|
-
Rake.sh("autoheader")
|
70
|
-
Rake.sh("templates/template.rb")
|
76
|
+
system("templates/template.rb", exception: true)
|
71
77
|
end
|
72
|
-
|
73
|
-
Rake.sh("make", target)
|
78
|
+
system("make", target, exception: true)
|
74
79
|
end
|
75
80
|
end
|
76
81
|
|
@@ -123,7 +128,7 @@ module Yarp
|
|
123
128
|
end
|
124
129
|
end
|
125
130
|
|
126
|
-
if
|
131
|
+
if ARGV.delete("--help")
|
127
132
|
Yarp::ExtConf.print_help
|
128
133
|
exit!(0)
|
129
134
|
end
|
data/ext/yarp/extension.c
CHANGED
@@ -522,6 +522,8 @@ Init_yarp(void) {
|
|
522
522
|
// in yarp.h.
|
523
523
|
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
|
524
524
|
|
525
|
+
rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
|
526
|
+
|
525
527
|
// First, the functions that have to do with lexing and parsing.
|
526
528
|
rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
|
527
529
|
rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
|
data/ext/yarp/extension.h
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
#include <ruby/encoding.h>
|
6
6
|
#include "yarp.h"
|
7
7
|
|
8
|
-
#define EXPECTED_YARP_VERSION "0.
|
8
|
+
#define EXPECTED_YARP_VERSION "0.7.0"
|
9
9
|
|
10
10
|
VALUE yp_source_new(yp_parser_t *parser);
|
11
11
|
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
|