yarp 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/docs/configuration.md
CHANGED
@@ -7,6 +7,7 @@ A lot of code in YARP's repository is templated from a single configuration file
|
|
7
7
|
* `java/org/yarp/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
|
8
8
|
* `java/org/yarp/Loader.java` - for defining how to deserialize the nodes in Java
|
9
9
|
* `java/org/yarp/Nodes.java` - for defining the nodes in Java
|
10
|
+
* `lib/yarp/mutation_visitor.rb` - for defining the mutation visitor for the nodes in Ruby
|
10
11
|
* `lib/yarp/node.rb` - for defining the nodes in Ruby
|
11
12
|
* `lib/yarp/serialize.rb` - for defining how to deserialize the nodes in Ruby
|
12
13
|
* `src/node.c` - for defining how to free the nodes in C and calculate the size in memory in C
|
data/docs/encoding.md
CHANGED
@@ -61,22 +61,22 @@ typedef struct {
|
|
61
61
|
// Return the number of bytes that the next character takes if it is valid
|
62
62
|
// in the encoding. Does not read more than n bytes. It is assumed that n is
|
63
63
|
// at least 1.
|
64
|
-
size_t (*char_width)(const
|
64
|
+
size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
|
65
65
|
|
66
66
|
// Return the number of bytes that the next character takes if it is valid
|
67
67
|
// in the encoding and is alphabetical. Does not read more than n bytes. It
|
68
68
|
// is assumed that n is at least 1.
|
69
|
-
size_t (*alpha_char)(const
|
69
|
+
size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
|
70
70
|
|
71
71
|
// Return the number of bytes that the next character takes if it is valid
|
72
72
|
// in the encoding and is alphanumeric. Does not read more than n bytes. It
|
73
73
|
// is assumed that n is at least 1.
|
74
|
-
size_t (*alnum_char)(const
|
74
|
+
size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
|
75
75
|
|
76
76
|
// Return true if the next character is valid in the encoding and is an
|
77
77
|
// uppercase character. Does not read more than n bytes. It is assumed that
|
78
78
|
// n is at least 1.
|
79
|
-
bool (*isupper_char)(const
|
79
|
+
bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
|
80
80
|
|
81
81
|
// The name of the encoding. This should correspond to a value that can be
|
82
82
|
// passed to Encoding.find in Ruby.
|
@@ -90,7 +90,7 @@ typedef struct {
|
|
90
90
|
// the ability here to call out to a user-defined function to get an encoding
|
91
91
|
// struct. If the function returns something that isn't NULL, we set that to
|
92
92
|
// our encoding and use it to parse identifiers.
|
93
|
-
typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const
|
93
|
+
typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
|
94
94
|
|
95
95
|
// Register a callback that will be called when YARP encounters a magic comment
|
96
96
|
// with an encoding referenced that it doesn't understand. The callback should
|
data/docs/ruby_api.md
CHANGED
@@ -20,4 +20,6 @@ The full API is documented below.
|
|
20
20
|
* `YARP.lex_file(filepath)` - parse the tokens corresponding to the given source file and return them as an array within a parse result
|
21
21
|
* `YARP.parse(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result
|
22
22
|
* `YARP.parse_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result
|
23
|
+
* `YARP.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens
|
24
|
+
* `YARP.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens
|
23
25
|
* `YARP.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree
|
data/docs/serialization.md
CHANGED
@@ -73,7 +73,7 @@ The header is structured like the following table:
|
|
73
73
|
| varint | content pool size |
|
74
74
|
|
75
75
|
After the header comes the body of the serialized string.
|
76
|
-
The body
|
76
|
+
The body consists of a sequence of nodes that is built using a prefix traversal order of the syntax tree.
|
77
77
|
Each node is structured like the following table:
|
78
78
|
|
79
79
|
| # bytes | field |
|
@@ -130,14 +130,14 @@ void yp_buffer_free(yp_buffer_t *);
|
|
130
130
|
|
131
131
|
// Parse and serialize the AST represented by the given source to the given
|
132
132
|
// buffer.
|
133
|
-
void yp_parse_serialize(const
|
133
|
+
void yp_parse_serialize(const uint8_t *source, size_t length, yp_buffer_t *buffer, const char *metadata);
|
134
134
|
```
|
135
135
|
|
136
136
|
Typically you would use a stack-allocated `yp_buffer_t` and call `yp_parse_serialize`, as in:
|
137
137
|
|
138
138
|
```c
|
139
139
|
void
|
140
|
-
serialize(const
|
140
|
+
serialize(const uint8_t *source, size_t length) {
|
141
141
|
yp_buffer_t buffer;
|
142
142
|
if (!yp_buffer_init(&buffer)) return;
|
143
143
|
|
data/docs/testing.md
CHANGED
@@ -12,9 +12,9 @@ These test specific YARP implementation details like comments, errors, and regul
|
|
12
12
|
|
13
13
|
### Snapshot tests
|
14
14
|
|
15
|
-
Snapshot tests ensure that parsed output is equivalent to previous parsed output. There are many categorized examples of valid syntax within the `test/fixtures/` directory. When the test suite runs, it will parse all of this syntax, and compare it against corresponding files in the `test/snapshots/` directory. For example, `test/fixtures/strings.txt` has a corresponding `test/snapshots/strings.txt`.
|
15
|
+
Snapshot tests ensure that parsed output is equivalent to previous parsed output. There are many categorized examples of valid syntax within the `test/yarp/fixtures/` directory. When the test suite runs, it will parse all of this syntax, and compare it against corresponding files in the `test/yarp/snapshots/` directory. For example, `test/yarp/fixtures/strings.txt` has a corresponding `test/yarp/snapshots/strings.txt`.
|
16
16
|
|
17
|
-
If the parsed files do not match, it will raise an error. If there is not a corresponding file in the `test/snapshots/` directory, one will be created so that it exists for the next test run.
|
17
|
+
If the parsed files do not match, it will raise an error. If there is not a corresponding file in the `test/yarp/snapshots/` directory, one will be created so that it exists for the next test run.
|
18
18
|
|
19
19
|
### Testing against repositories
|
20
20
|
|