prism 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/README.md +4 -1
- data/config.yml +10 -14
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +35 -28
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +38 -36
- data/include/prism/node.h +1 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +5 -0
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -1
- data/lib/prism/compiler.rb +141 -141
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/node.rb +1456 -46
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +16 -14
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +1 -1
- data/src/node.c +0 -14
- data/src/prettyprint.c +35 -35
- data/src/prism.c +1728 -811
- data/src/serialize.c +45 -22
- data/src/util/pm_buffer.c +9 -7
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
data/src/serialize.c
CHANGED
@@ -329,9 +329,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
329
329
|
pm_buffer_append_u8(buffer, 1);
|
330
330
|
pm_serialize_location(parser, &((pm_call_and_write_node_t *)node)->closing_loc, buffer);
|
331
331
|
}
|
332
|
-
pm_buffer_append_u32(buffer, node->flags
|
333
|
-
|
334
|
-
|
332
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
333
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_and_write_node_t *)node)->read_name));
|
334
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_and_write_node_t *)node)->write_name));
|
335
335
|
pm_serialize_location(parser, &((pm_call_and_write_node_t *)node)->operator_loc, buffer);
|
336
336
|
pm_serialize_node(parser, (pm_node_t *)((pm_call_and_write_node_t *)node)->value, buffer);
|
337
337
|
break;
|
@@ -376,8 +376,8 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
376
376
|
} else {
|
377
377
|
pm_serialize_node(parser, (pm_node_t *)((pm_call_node_t *)node)->block, buffer);
|
378
378
|
}
|
379
|
-
pm_buffer_append_u32(buffer, node->flags
|
380
|
-
|
379
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
380
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_node_t *)node)->name));
|
381
381
|
break;
|
382
382
|
}
|
383
383
|
case PM_CALL_OPERATOR_WRITE_NODE: {
|
@@ -415,9 +415,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
415
415
|
pm_buffer_append_u8(buffer, 1);
|
416
416
|
pm_serialize_location(parser, &((pm_call_operator_write_node_t *)node)->closing_loc, buffer);
|
417
417
|
}
|
418
|
-
pm_buffer_append_u32(buffer, node->flags
|
419
|
-
|
420
|
-
|
418
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
419
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_operator_write_node_t *)node)->read_name));
|
420
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_operator_write_node_t *)node)->write_name));
|
421
421
|
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_operator_write_node_t *)node)->operator));
|
422
422
|
pm_serialize_location(parser, &((pm_call_operator_write_node_t *)node)->operator_loc, buffer);
|
423
423
|
pm_serialize_node(parser, (pm_node_t *)((pm_call_operator_write_node_t *)node)->value, buffer);
|
@@ -458,9 +458,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
458
458
|
pm_buffer_append_u8(buffer, 1);
|
459
459
|
pm_serialize_location(parser, &((pm_call_or_write_node_t *)node)->closing_loc, buffer);
|
460
460
|
}
|
461
|
-
pm_buffer_append_u32(buffer, node->flags
|
462
|
-
|
463
|
-
|
461
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
462
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_or_write_node_t *)node)->read_name));
|
463
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_call_or_write_node_t *)node)->write_name));
|
464
464
|
pm_serialize_location(parser, &((pm_call_or_write_node_t *)node)->operator_loc, buffer);
|
465
465
|
pm_serialize_node(parser, (pm_node_t *)((pm_call_or_write_node_t *)node)->value, buffer);
|
466
466
|
break;
|
@@ -805,7 +805,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
805
805
|
pm_serialize_node(parser, (pm_node_t *)((pm_flip_flop_node_t *)node)->right, buffer);
|
806
806
|
}
|
807
807
|
pm_serialize_location(parser, &((pm_flip_flop_node_t *)node)->operator_loc, buffer);
|
808
|
-
pm_buffer_append_u32(buffer, node->flags
|
808
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
809
809
|
break;
|
810
810
|
}
|
811
811
|
case PM_FLOAT_NODE: {
|
@@ -1009,7 +1009,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1009
1009
|
break;
|
1010
1010
|
}
|
1011
1011
|
case PM_INTEGER_NODE: {
|
1012
|
-
pm_buffer_append_u32(buffer, node->flags
|
1012
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1013
1013
|
break;
|
1014
1014
|
}
|
1015
1015
|
case PM_INTERPOLATED_MATCH_LAST_LINE_NODE: {
|
@@ -1020,7 +1020,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1020
1020
|
pm_serialize_node(parser, (pm_node_t *) ((pm_interpolated_match_last_line_node_t *)node)->parts.nodes[index], buffer);
|
1021
1021
|
}
|
1022
1022
|
pm_serialize_location(parser, &((pm_interpolated_match_last_line_node_t *)node)->closing_loc, buffer);
|
1023
|
-
pm_buffer_append_u32(buffer, node->flags
|
1023
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1024
1024
|
break;
|
1025
1025
|
}
|
1026
1026
|
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE: {
|
@@ -1031,7 +1031,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1031
1031
|
pm_serialize_node(parser, (pm_node_t *) ((pm_interpolated_regular_expression_node_t *)node)->parts.nodes[index], buffer);
|
1032
1032
|
}
|
1033
1033
|
pm_serialize_location(parser, &((pm_interpolated_regular_expression_node_t *)node)->closing_loc, buffer);
|
1034
|
-
pm_buffer_append_u32(buffer, node->flags
|
1034
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1035
1035
|
break;
|
1036
1036
|
}
|
1037
1037
|
case PM_INTERPOLATED_STRING_NODE: {
|
@@ -1182,7 +1182,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1182
1182
|
pm_serialize_location(parser, &((pm_match_last_line_node_t *)node)->content_loc, buffer);
|
1183
1183
|
pm_serialize_location(parser, &((pm_match_last_line_node_t *)node)->closing_loc, buffer);
|
1184
1184
|
pm_serialize_string(parser, &((pm_match_last_line_node_t *)node)->unescaped, buffer);
|
1185
|
-
pm_buffer_append_u32(buffer, node->flags
|
1185
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1186
1186
|
break;
|
1187
1187
|
}
|
1188
1188
|
case PM_MATCH_PREDICATE_NODE: {
|
@@ -1405,7 +1405,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1405
1405
|
pm_serialize_node(parser, (pm_node_t *)((pm_range_node_t *)node)->right, buffer);
|
1406
1406
|
}
|
1407
1407
|
pm_serialize_location(parser, &((pm_range_node_t *)node)->operator_loc, buffer);
|
1408
|
-
pm_buffer_append_u32(buffer, node->flags
|
1408
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1409
1409
|
break;
|
1410
1410
|
}
|
1411
1411
|
case PM_RATIONAL_NODE: {
|
@@ -1420,7 +1420,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1420
1420
|
pm_serialize_location(parser, &((pm_regular_expression_node_t *)node)->content_loc, buffer);
|
1421
1421
|
pm_serialize_location(parser, &((pm_regular_expression_node_t *)node)->closing_loc, buffer);
|
1422
1422
|
pm_serialize_string(parser, &((pm_regular_expression_node_t *)node)->unescaped, buffer);
|
1423
|
-
pm_buffer_append_u32(buffer, node->flags
|
1423
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1424
1424
|
break;
|
1425
1425
|
}
|
1426
1426
|
case PM_REQUIRED_DESTRUCTURED_PARAMETER_NODE: {
|
@@ -1549,7 +1549,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1549
1549
|
break;
|
1550
1550
|
}
|
1551
1551
|
case PM_STRING_NODE: {
|
1552
|
-
pm_buffer_append_u32(buffer, node->flags
|
1552
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1553
1553
|
if (((pm_string_node_t *)node)->opening_loc.start == NULL) {
|
1554
1554
|
pm_buffer_append_u8(buffer, 0);
|
1555
1555
|
} else {
|
@@ -1661,7 +1661,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1661
1661
|
} else {
|
1662
1662
|
pm_serialize_node(parser, (pm_node_t *)((pm_until_node_t *)node)->statements, buffer);
|
1663
1663
|
}
|
1664
|
-
pm_buffer_append_u32(buffer, node->flags
|
1664
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1665
1665
|
break;
|
1666
1666
|
}
|
1667
1667
|
case PM_WHEN_NODE: {
|
@@ -1692,7 +1692,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1692
1692
|
} else {
|
1693
1693
|
pm_serialize_node(parser, (pm_node_t *)((pm_while_node_t *)node)->statements, buffer);
|
1694
1694
|
}
|
1695
|
-
pm_buffer_append_u32(buffer, node->flags
|
1695
|
+
pm_buffer_append_u32(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1696
1696
|
break;
|
1697
1697
|
}
|
1698
1698
|
case PM_X_STRING_NODE: {
|
@@ -1746,6 +1746,27 @@ pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buf
|
|
1746
1746
|
}
|
1747
1747
|
}
|
1748
1748
|
|
1749
|
+
static void
|
1750
|
+
pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
|
1751
|
+
// serialize key location
|
1752
|
+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start));
|
1753
|
+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_length));
|
1754
|
+
|
1755
|
+
// serialize value location
|
1756
|
+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start));
|
1757
|
+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_length));
|
1758
|
+
}
|
1759
|
+
|
1760
|
+
static void
|
1761
|
+
pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
|
1762
|
+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_list_size(list)));
|
1763
|
+
|
1764
|
+
pm_magic_comment_t *magic_comment;
|
1765
|
+
for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
|
1766
|
+
pm_serialize_magic_comment(parser, magic_comment, buffer);
|
1767
|
+
}
|
1768
|
+
}
|
1769
|
+
|
1749
1770
|
static void
|
1750
1771
|
pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
|
1751
1772
|
// serialize message
|
@@ -1775,11 +1796,12 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
|
1775
1796
|
pm_buffer_append_str(buffer, encoding->name, encoding_length);
|
1776
1797
|
}
|
1777
1798
|
|
1778
|
-
#line
|
1799
|
+
#line 200 "serialize.c.erb"
|
1779
1800
|
void
|
1780
1801
|
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1781
1802
|
pm_serialize_encoding(&parser->encoding, buffer);
|
1782
1803
|
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
1804
|
+
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
|
1783
1805
|
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
1784
1806
|
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
|
1785
1807
|
|
@@ -1868,6 +1890,7 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
|
|
1868
1890
|
|
1869
1891
|
pm_serialize_encoding(&parser.encoding, buffer);
|
1870
1892
|
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
1893
|
+
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
1871
1894
|
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
1872
1895
|
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
|
1873
1896
|
|
data/src/util/pm_buffer.c
CHANGED
@@ -1,24 +1,26 @@
|
|
1
1
|
#include "prism/util/pm_buffer.h"
|
2
2
|
|
3
|
-
#define PRISM_BUFFER_INITIAL_SIZE 1024
|
4
|
-
|
5
3
|
// Return the size of the pm_buffer_t struct.
|
6
4
|
size_t
|
7
5
|
pm_buffer_sizeof(void) {
|
8
6
|
return sizeof(pm_buffer_t);
|
9
7
|
}
|
10
8
|
|
11
|
-
// Initialize a pm_buffer_t with
|
9
|
+
// Initialize a pm_buffer_t with the given capacity.
|
12
10
|
bool
|
13
|
-
|
11
|
+
pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) {
|
14
12
|
buffer->length = 0;
|
15
|
-
buffer->capacity =
|
13
|
+
buffer->capacity = capacity;
|
16
14
|
|
17
|
-
buffer->value = (char *) malloc(
|
15
|
+
buffer->value = (char *) malloc(capacity);
|
18
16
|
return buffer->value != NULL;
|
19
17
|
}
|
20
18
|
|
21
|
-
|
19
|
+
// Initialize a pm_buffer_t with its default values.
|
20
|
+
bool
|
21
|
+
pm_buffer_init(pm_buffer_t *buffer) {
|
22
|
+
return pm_buffer_init_capacity(buffer, 1024);
|
23
|
+
}
|
22
24
|
|
23
25
|
// Return the value of the buffer.
|
24
26
|
char *
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -33,6 +33,7 @@ files:
|
|
33
33
|
- docs/fuzzing.md
|
34
34
|
- docs/heredocs.md
|
35
35
|
- docs/mapping.md
|
36
|
+
- docs/prism.png
|
36
37
|
- docs/ripper.md
|
37
38
|
- docs/ruby_api.md
|
38
39
|
- docs/serialization.md
|
@@ -51,7 +52,6 @@ files:
|
|
51
52
|
- include/prism/pack.h
|
52
53
|
- include/prism/parser.h
|
53
54
|
- include/prism/regexp.h
|
54
|
-
- include/prism/unescape.h
|
55
55
|
- include/prism/util/pm_buffer.h
|
56
56
|
- include/prism/util/pm_char.h
|
57
57
|
- include/prism/util/pm_constant_pool.h
|
@@ -99,7 +99,6 @@ files:
|
|
99
99
|
- src/regexp.c
|
100
100
|
- src/serialize.c
|
101
101
|
- src/token_type.c
|
102
|
-
- src/unescape.c
|
103
102
|
- src/util/pm_buffer.c
|
104
103
|
- src/util/pm_char.c
|
105
104
|
- src/util/pm_constant_pool.c
|
data/include/prism/unescape.h
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
#ifndef PRISM_UNESCAPE_H
|
2
|
-
#define PRISM_UNESCAPE_H
|
3
|
-
|
4
|
-
#include "prism/defines.h"
|
5
|
-
#include "prism/diagnostic.h"
|
6
|
-
#include "prism/parser.h"
|
7
|
-
#include "prism/util/pm_char.h"
|
8
|
-
#include "prism/util/pm_list.h"
|
9
|
-
#include "prism/util/pm_memchr.h"
|
10
|
-
#include "prism/util/pm_string.h"
|
11
|
-
|
12
|
-
#include <assert.h>
|
13
|
-
#include <stdbool.h>
|
14
|
-
#include <stdint.h>
|
15
|
-
#include <string.h>
|
16
|
-
|
17
|
-
// The type of unescape we are performing.
|
18
|
-
typedef enum {
|
19
|
-
// When we're creating a string inside of a list literal like %w, we
|
20
|
-
// shouldn't escape anything.
|
21
|
-
PM_UNESCAPE_NONE,
|
22
|
-
|
23
|
-
// When we're unescaping a single-quoted string, we only need to unescape
|
24
|
-
// single quotes and backslashes.
|
25
|
-
PM_UNESCAPE_MINIMAL,
|
26
|
-
|
27
|
-
// When we're unescaping a string list, in addition to MINIMAL, we need to
|
28
|
-
// unescape whitespace.
|
29
|
-
PM_UNESCAPE_WHITESPACE,
|
30
|
-
|
31
|
-
// When we're unescaping a double-quoted string, we need to unescape all
|
32
|
-
// escapes.
|
33
|
-
PM_UNESCAPE_ALL,
|
34
|
-
} pm_unescape_type_t;
|
35
|
-
|
36
|
-
// Unescape the contents of the given token into the given string using the given unescape mode.
|
37
|
-
PRISM_EXPORTED_FUNCTION void pm_unescape_manipulate_string(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
|
38
|
-
void pm_unescape_manipulate_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
|
39
|
-
|
40
|
-
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
41
|
-
// The caller must pm_string_free(result); after calling this function.
|
42
|
-
PRISM_EXPORTED_FUNCTION bool pm_unescape_string(const uint8_t *start, size_t length, pm_unescape_type_t unescape_type, pm_string_t *result);
|
43
|
-
|
44
|
-
// Returns the number of bytes that encompass the first escape sequence in the
|
45
|
-
// given string.
|
46
|
-
size_t pm_unescape_calculate_difference(pm_parser_t *parser, const uint8_t *value, pm_unescape_type_t unescape_type, bool expect_single_codepoint);
|
47
|
-
|
48
|
-
#endif
|