prism 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/README.md +4 -1
- data/config.yml +10 -14
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +35 -28
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +38 -36
- data/include/prism/node.h +1 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +5 -0
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -1
- data/lib/prism/compiler.rb +141 -141
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/node.rb +1456 -46
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +16 -14
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +1 -1
- data/src/node.c +0 -14
- data/src/prettyprint.c +35 -35
- data/src/prism.c +1728 -811
- data/src/serialize.c +45 -22
- data/src/util/pm_buffer.c +9 -7
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
data/src/serialize.c
CHANGED
@@ -329,9 +329,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
329
329
|
pm_buffer_append_u8(buffer, 1);
|
330
330
|
pm_serialize_location(parser, &((pm_call_and_write_node_t *)node)->closing_loc, buffer);
|
331
331
|
}
|
332
|
-
pm_buffer_append_u32(buffer, node->flags
|
333
|
-
|
334
|
-
|
332
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
333
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_and_write_node_t *)node)->read_name));
|
334
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_and_write_node_t *)node)->write_name));
|
335
335
|
pm_serialize_location(parser, &((pm_call_and_write_node_t *)node)->operator_loc, buffer);
|
336
336
|
pm_serialize_node(parser, (pm_node_t *)((pm_call_and_write_node_t *)node)->value, buffer);
|
337
337
|
break;
|
@@ -376,8 +376,8 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
376
376
|
} else {
|
377
377
|
pm_serialize_node(parser, (pm_node_t *)((pm_call_node_t *)node)->block, buffer);
|
378
378
|
}
|
379
|
-
pm_buffer_append_u32(buffer, node->flags
|
380
|
-
|
379
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
380
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_node_t *)node)->name));
|
381
381
|
break;
|
382
382
|
}
|
383
383
|
case PM_CALL_OPERATOR_WRITE_NODE: {
|
@@ -415,9 +415,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
415
415
|
pm_buffer_append_u8(buffer, 1);
|
416
416
|
pm_serialize_location(parser, &((pm_call_operator_write_node_t *)node)->closing_loc, buffer);
|
417
417
|
}
|
418
|
-
pm_buffer_append_u32(buffer, node->flags
|
419
|
-
|
420
|
-
|
418
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
419
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_operator_write_node_t *)node)->read_name));
|
420
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_operator_write_node_t *)node)->write_name));
|
421
421
|
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_operator_write_node_t *)node)->operator));
|
422
422
|
pm_serialize_location(parser, &((pm_call_operator_write_node_t *)node)->operator_loc, buffer);
|
423
423
|
pm_serialize_node(parser, (pm_node_t *)((pm_call_operator_write_node_t *)node)->value, buffer);
|
@@ -458,9 +458,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
458
458
|
pm_buffer_append_u8(buffer, 1);
|
459
459
|
pm_serialize_location(parser, &((pm_call_or_write_node_t *)node)->closing_loc, buffer);
|
460
460
|
}
|
461
|
-
pm_buffer_append_u32(buffer, node->flags
|
462
|
-
|
463
|
-
|
461
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
462
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_or_write_node_t *)node)->read_name));
|
463
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_or_write_node_t *)node)->write_name));
|
464
464
|
pm_serialize_location(parser, &((pm_call_or_write_node_t *)node)->operator_loc, buffer);
|
465
465
|
pm_serialize_node(parser, (pm_node_t *)((pm_call_or_write_node_t *)node)->value, buffer);
|
466
466
|
break;
|
@@ -805,7 +805,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
805
805
|
pm_serialize_node(parser, (pm_node_t *)((pm_flip_flop_node_t *)node)->right, buffer);
|
806
806
|
}
|
807
807
|
pm_serialize_location(parser, &((pm_flip_flop_node_t *)node)->operator_loc, buffer);
|
808
|
-
pm_buffer_append_u32(buffer, node->flags
|
808
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
809
809
|
break;
|
810
810
|
}
|
811
811
|
case PM_FLOAT_NODE: {
|
@@ -1009,7 +1009,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1009
1009
|
break;
|
1010
1010
|
}
|
1011
1011
|
case PM_INTEGER_NODE: {
|
1012
|
-
pm_buffer_append_u32(buffer, node->flags
|
1012
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1013
1013
|
break;
|
1014
1014
|
}
|
1015
1015
|
case PM_INTERPOLATED_MATCH_LAST_LINE_NODE: {
|
@@ -1020,7 +1020,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1020
1020
|
pm_serialize_node(parser, (pm_node_t *) ((pm_interpolated_match_last_line_node_t *)node)->parts.nodes[index], buffer);
|
1021
1021
|
}
|
1022
1022
|
pm_serialize_location(parser, &((pm_interpolated_match_last_line_node_t *)node)->closing_loc, buffer);
|
1023
|
-
pm_buffer_append_u32(buffer, node->flags
|
1023
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1024
1024
|
break;
|
1025
1025
|
}
|
1026
1026
|
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE: {
|
@@ -1031,7 +1031,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1031
1031
|
pm_serialize_node(parser, (pm_node_t *) ((pm_interpolated_regular_expression_node_t *)node)->parts.nodes[index], buffer);
|
1032
1032
|
}
|
1033
1033
|
pm_serialize_location(parser, &((pm_interpolated_regular_expression_node_t *)node)->closing_loc, buffer);
|
1034
|
-
pm_buffer_append_u32(buffer, node->flags
|
1034
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1035
1035
|
break;
|
1036
1036
|
}
|
1037
1037
|
case PM_INTERPOLATED_STRING_NODE: {
|
@@ -1182,7 +1182,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1182
1182
|
pm_serialize_location(parser, &((pm_match_last_line_node_t *)node)->content_loc, buffer);
|
1183
1183
|
pm_serialize_location(parser, &((pm_match_last_line_node_t *)node)->closing_loc, buffer);
|
1184
1184
|
pm_serialize_string(parser, &((pm_match_last_line_node_t *)node)->unescaped, buffer);
|
1185
|
-
pm_buffer_append_u32(buffer, node->flags
|
1185
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1186
1186
|
break;
|
1187
1187
|
}
|
1188
1188
|
case PM_MATCH_PREDICATE_NODE: {
|
@@ -1405,7 +1405,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1405
1405
|
pm_serialize_node(parser, (pm_node_t *)((pm_range_node_t *)node)->right, buffer);
|
1406
1406
|
}
|
1407
1407
|
pm_serialize_location(parser, &((pm_range_node_t *)node)->operator_loc, buffer);
|
1408
|
-
pm_buffer_append_u32(buffer, node->flags
|
1408
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1409
1409
|
break;
|
1410
1410
|
}
|
1411
1411
|
case PM_RATIONAL_NODE: {
|
@@ -1420,7 +1420,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1420
1420
|
pm_serialize_location(parser, &((pm_regular_expression_node_t *)node)->content_loc, buffer);
|
1421
1421
|
pm_serialize_location(parser, &((pm_regular_expression_node_t *)node)->closing_loc, buffer);
|
1422
1422
|
pm_serialize_string(parser, &((pm_regular_expression_node_t *)node)->unescaped, buffer);
|
1423
|
-
pm_buffer_append_u32(buffer, node->flags
|
1423
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1424
1424
|
break;
|
1425
1425
|
}
|
1426
1426
|
case PM_REQUIRED_DESTRUCTURED_PARAMETER_NODE: {
|
@@ -1549,7 +1549,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1549
1549
|
break;
|
1550
1550
|
}
|
1551
1551
|
case PM_STRING_NODE: {
|
1552
|
-
pm_buffer_append_u32(buffer, node->flags
|
1552
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1553
1553
|
if (((pm_string_node_t *)node)->opening_loc.start == NULL) {
|
1554
1554
|
pm_buffer_append_u8(buffer, 0);
|
1555
1555
|
} else {
|
@@ -1661,7 +1661,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1661
1661
|
} else {
|
1662
1662
|
pm_serialize_node(parser, (pm_node_t *)((pm_until_node_t *)node)->statements, buffer);
|
1663
1663
|
}
|
1664
|
-
pm_buffer_append_u32(buffer, node->flags
|
1664
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1665
1665
|
break;
|
1666
1666
|
}
|
1667
1667
|
case PM_WHEN_NODE: {
|
@@ -1692,7 +1692,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1692
1692
|
} else {
|
1693
1693
|
pm_serialize_node(parser, (pm_node_t *)((pm_while_node_t *)node)->statements, buffer);
|
1694
1694
|
}
|
1695
|
-
pm_buffer_append_u32(buffer, node->flags
|
1695
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1696
1696
|
break;
|
1697
1697
|
}
|
1698
1698
|
case PM_X_STRING_NODE: {
|
@@ -1746,6 +1746,27 @@ pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buf
|
|
1746
1746
|
}
|
1747
1747
|
}
|
1748
1748
|
|
1749
|
+
static void
|
1750
|
+
pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
|
1751
|
+
// serialize key location
|
1752
|
+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start));
|
1753
|
+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_length));
|
1754
|
+
|
1755
|
+
// serialize value location
|
1756
|
+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start));
|
1757
|
+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_length));
|
1758
|
+
}
|
1759
|
+
|
1760
|
+
static void
|
1761
|
+
pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
|
1762
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_list_size(list)));
|
1763
|
+
|
1764
|
+
pm_magic_comment_t *magic_comment;
|
1765
|
+
for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
|
1766
|
+
pm_serialize_magic_comment(parser, magic_comment, buffer);
|
1767
|
+
}
|
1768
|
+
}
|
1769
|
+
|
1749
1770
|
static void
|
1750
1771
|
pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
|
1751
1772
|
// serialize message
|
@@ -1775,11 +1796,12 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
|
1775
1796
|
pm_buffer_append_str(buffer, encoding->name, encoding_length);
|
1776
1797
|
}
|
1777
1798
|
|
1778
|
-
#line
|
1799
|
+
#line 200 "serialize.c.erb"
|
1779
1800
|
void
|
1780
1801
|
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1781
1802
|
pm_serialize_encoding(&parser->encoding, buffer);
|
1782
1803
|
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
1804
|
+
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
|
1783
1805
|
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
1784
1806
|
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
|
1785
1807
|
|
@@ -1868,6 +1890,7 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
|
|
1868
1890
|
|
1869
1891
|
pm_serialize_encoding(&parser.encoding, buffer);
|
1870
1892
|
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
1893
|
+
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
1871
1894
|
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
1872
1895
|
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
|
1873
1896
|
|
data/src/util/pm_buffer.c
CHANGED
@@ -1,24 +1,26 @@
|
|
1
1
|
#include "prism/util/pm_buffer.h"
|
2
2
|
|
3
|
-
#define PRISM_BUFFER_INITIAL_SIZE 1024
|
4
|
-
|
5
3
|
// Return the size of the pm_buffer_t struct.
|
6
4
|
size_t
|
7
5
|
pm_buffer_sizeof(void) {
|
8
6
|
return sizeof(pm_buffer_t);
|
9
7
|
}
|
10
8
|
|
11
|
-
// Initialize a pm_buffer_t with
|
9
|
+
// Initialize a pm_buffer_t with the given capacity.
|
12
10
|
bool
|
13
|
-
|
11
|
+
pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) {
|
14
12
|
buffer->length = 0;
|
15
|
-
buffer->capacity =
|
13
|
+
buffer->capacity = capacity;
|
16
14
|
|
17
|
-
buffer->value = (char *) malloc(
|
15
|
+
buffer->value = (char *) malloc(capacity);
|
18
16
|
return buffer->value != NULL;
|
19
17
|
}
|
20
18
|
|
21
|
-
|
19
|
+
// Initialize a pm_buffer_t with its default values.
|
20
|
+
bool
|
21
|
+
pm_buffer_init(pm_buffer_t *buffer) {
|
22
|
+
return pm_buffer_init_capacity(buffer, 1024);
|
23
|
+
}
|
22
24
|
|
23
25
|
// Return the value of the buffer.
|
24
26
|
char *
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -33,6 +33,7 @@ files:
|
|
33
33
|
- docs/fuzzing.md
|
34
34
|
- docs/heredocs.md
|
35
35
|
- docs/mapping.md
|
36
|
+
- docs/prism.png
|
36
37
|
- docs/ripper.md
|
37
38
|
- docs/ruby_api.md
|
38
39
|
- docs/serialization.md
|
@@ -51,7 +52,6 @@ files:
|
|
51
52
|
- include/prism/pack.h
|
52
53
|
- include/prism/parser.h
|
53
54
|
- include/prism/regexp.h
|
54
|
-
- include/prism/unescape.h
|
55
55
|
- include/prism/util/pm_buffer.h
|
56
56
|
- include/prism/util/pm_char.h
|
57
57
|
- include/prism/util/pm_constant_pool.h
|
@@ -99,7 +99,6 @@ files:
|
|
99
99
|
- src/regexp.c
|
100
100
|
- src/serialize.c
|
101
101
|
- src/token_type.c
|
102
|
-
- src/unescape.c
|
103
102
|
- src/util/pm_buffer.c
|
104
103
|
- src/util/pm_char.c
|
105
104
|
- src/util/pm_constant_pool.c
|
data/include/prism/unescape.h
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
#ifndef PRISM_UNESCAPE_H
|
2
|
-
#define PRISM_UNESCAPE_H
|
3
|
-
|
4
|
-
#include "prism/defines.h"
|
5
|
-
#include "prism/diagnostic.h"
|
6
|
-
#include "prism/parser.h"
|
7
|
-
#include "prism/util/pm_char.h"
|
8
|
-
#include "prism/util/pm_list.h"
|
9
|
-
#include "prism/util/pm_memchr.h"
|
10
|
-
#include "prism/util/pm_string.h"
|
11
|
-
|
12
|
-
#include <assert.h>
|
13
|
-
#include <stdbool.h>
|
14
|
-
#include <stdint.h>
|
15
|
-
#include <string.h>
|
16
|
-
|
17
|
-
// The type of unescape we are performing.
|
18
|
-
typedef enum {
|
19
|
-
// When we're creating a string inside of a list literal like %w, we
|
20
|
-
// shouldn't escape anything.
|
21
|
-
PM_UNESCAPE_NONE,
|
22
|
-
|
23
|
-
// When we're unescaping a single-quoted string, we only need to unescape
|
24
|
-
// single quotes and backslashes.
|
25
|
-
PM_UNESCAPE_MINIMAL,
|
26
|
-
|
27
|
-
// When we're unescaping a string list, in addition to MINIMAL, we need to
|
28
|
-
// unescape whitespace.
|
29
|
-
PM_UNESCAPE_WHITESPACE,
|
30
|
-
|
31
|
-
// When we're unescaping a double-quoted string, we need to unescape all
|
32
|
-
// escapes.
|
33
|
-
PM_UNESCAPE_ALL,
|
34
|
-
} pm_unescape_type_t;
|
35
|
-
|
36
|
-
// Unescape the contents of the given token into the given string using the given unescape mode.
|
37
|
-
PRISM_EXPORTED_FUNCTION void pm_unescape_manipulate_string(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
|
38
|
-
void pm_unescape_manipulate_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
|
39
|
-
|
40
|
-
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
41
|
-
// The caller must pm_string_free(result); after calling this function.
|
42
|
-
PRISM_EXPORTED_FUNCTION bool pm_unescape_string(const uint8_t *start, size_t length, pm_unescape_type_t unescape_type, pm_string_t *result);
|
43
|
-
|
44
|
-
// Returns the number of bytes that encompass the first escape sequence in the
|
45
|
-
// given string.
|
46
|
-
size_t pm_unescape_calculate_difference(pm_parser_t *parser, const uint8_t *value, pm_unescape_type_t unescape_type, bool expect_single_codepoint);
|
47
|
-
|
48
|
-
#endif
|