prism 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/README.md +4 -1
- data/config.yml +10 -14
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +35 -28
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +38 -36
- data/include/prism/node.h +1 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +5 -0
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -1
- data/lib/prism/compiler.rb +141 -141
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/node.rb +1456 -46
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +16 -14
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +1 -1
- data/src/node.c +0 -14
- data/src/prettyprint.c +35 -35
- data/src/prism.c +1728 -811
- data/src/serialize.c +45 -22
- data/src/util/pm_buffer.c +9 -7
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
data/include/prism/ast.h
CHANGED
@@ -358,8 +358,10 @@ typedef uint16_t pm_node_flags_t;
|
|
358
358
|
|
359
359
|
// We store the flags enum in every node in the tree. Some flags are common to
|
360
360
|
// all nodes (the ones listed below). Others are specific to certain node types.
|
361
|
-
|
362
|
-
static const pm_node_flags_t
|
361
|
+
#define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
|
362
|
+
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
|
363
|
+
static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
|
364
|
+
static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = PM_NODE_FLAG_NEWLINE | PM_NODE_FLAG_STATIC_LITERAL;
|
363
365
|
|
364
366
|
// For easy access, we define some macros to check node type
|
365
367
|
#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
|
@@ -562,8 +564,8 @@ typedef struct pm_call_and_write_node {
|
|
562
564
|
pm_location_t opening_loc;
|
563
565
|
struct pm_arguments_node *arguments;
|
564
566
|
pm_location_t closing_loc;
|
565
|
-
|
566
|
-
|
567
|
+
pm_constant_id_t read_name;
|
568
|
+
pm_constant_id_t write_name;
|
567
569
|
pm_location_t operator_loc;
|
568
570
|
struct pm_node *value;
|
569
571
|
} pm_call_and_write_node_t;
|
@@ -583,7 +585,7 @@ typedef struct pm_call_node {
|
|
583
585
|
struct pm_arguments_node *arguments;
|
584
586
|
pm_location_t closing_loc;
|
585
587
|
struct pm_node *block;
|
586
|
-
|
588
|
+
pm_constant_id_t name;
|
587
589
|
} pm_call_node_t;
|
588
590
|
|
589
591
|
// CallOperatorWriteNode
|
@@ -600,8 +602,8 @@ typedef struct pm_call_operator_write_node {
|
|
600
602
|
pm_location_t opening_loc;
|
601
603
|
struct pm_arguments_node *arguments;
|
602
604
|
pm_location_t closing_loc;
|
603
|
-
|
604
|
-
|
605
|
+
pm_constant_id_t read_name;
|
606
|
+
pm_constant_id_t write_name;
|
605
607
|
pm_constant_id_t operator;
|
606
608
|
pm_location_t operator_loc;
|
607
609
|
struct pm_node *value;
|
@@ -621,8 +623,8 @@ typedef struct pm_call_or_write_node {
|
|
621
623
|
pm_location_t opening_loc;
|
622
624
|
struct pm_arguments_node *arguments;
|
623
625
|
pm_location_t closing_loc;
|
624
|
-
|
625
|
-
|
626
|
+
pm_constant_id_t read_name;
|
627
|
+
pm_constant_id_t write_name;
|
626
628
|
pm_location_t operator_loc;
|
627
629
|
struct pm_node *value;
|
628
630
|
} pm_call_or_write_node_t;
|
@@ -1193,11 +1195,11 @@ typedef struct pm_integer_node {
|
|
1193
1195
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1194
1196
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1195
1197
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1198
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1196
1199
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1197
1200
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1198
1201
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1199
1202
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1200
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1201
1203
|
typedef struct pm_interpolated_match_last_line_node {
|
1202
1204
|
pm_node_t base;
|
1203
1205
|
pm_location_t opening_loc;
|
@@ -1212,11 +1214,11 @@ typedef struct pm_interpolated_match_last_line_node {
|
|
1212
1214
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1213
1215
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1214
1216
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1217
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1215
1218
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1216
1219
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1217
1220
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1218
1221
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1219
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1220
1222
|
typedef struct pm_interpolated_regular_expression_node {
|
1221
1223
|
pm_node_t base;
|
1222
1224
|
pm_location_t opening_loc;
|
@@ -1369,11 +1371,11 @@ typedef struct pm_local_variable_write_node {
|
|
1369
1371
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1370
1372
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1371
1373
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1374
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1372
1375
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1373
1376
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1374
1377
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1375
1378
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1376
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1377
1379
|
typedef struct pm_match_last_line_node {
|
1378
1380
|
pm_node_t base;
|
1379
1381
|
pm_location_t opening_loc;
|
@@ -1616,11 +1618,11 @@ typedef struct pm_redo_node {
|
|
1616
1618
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1617
1619
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1618
1620
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1621
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1619
1622
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1620
1623
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1621
1624
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1622
1625
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1623
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1624
1626
|
typedef struct pm_regular_expression_node {
|
1625
1627
|
pm_node_t base;
|
1626
1628
|
pm_location_t opening_loc;
|
@@ -1887,44 +1889,44 @@ typedef struct pm_yield_node {
|
|
1887
1889
|
} pm_yield_node_t;
|
1888
1890
|
|
1889
1891
|
// CallNodeFlags
|
1890
|
-
typedef enum {
|
1891
|
-
PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 <<
|
1892
|
-
PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 <<
|
1892
|
+
typedef enum pm_call_node_flags {
|
1893
|
+
PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 0,
|
1894
|
+
PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 1,
|
1893
1895
|
} pm_call_node_flags_t;
|
1894
1896
|
|
1895
1897
|
// IntegerBaseFlags
|
1896
|
-
typedef enum {
|
1897
|
-
PM_INTEGER_BASE_FLAGS_BINARY = 1 <<
|
1898
|
-
PM_INTEGER_BASE_FLAGS_OCTAL = 1 <<
|
1899
|
-
PM_INTEGER_BASE_FLAGS_DECIMAL = 1 <<
|
1900
|
-
PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 <<
|
1898
|
+
typedef enum pm_integer_base_flags {
|
1899
|
+
PM_INTEGER_BASE_FLAGS_BINARY = 1 << 0,
|
1900
|
+
PM_INTEGER_BASE_FLAGS_OCTAL = 1 << 1,
|
1901
|
+
PM_INTEGER_BASE_FLAGS_DECIMAL = 1 << 2,
|
1902
|
+
PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 << 3,
|
1901
1903
|
} pm_integer_base_flags_t;
|
1902
1904
|
|
1903
1905
|
// LoopFlags
|
1904
|
-
typedef enum {
|
1905
|
-
PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 <<
|
1906
|
+
typedef enum pm_loop_flags {
|
1907
|
+
PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 0,
|
1906
1908
|
} pm_loop_flags_t;
|
1907
1909
|
|
1908
1910
|
// RangeFlags
|
1909
|
-
typedef enum {
|
1910
|
-
PM_RANGE_FLAGS_EXCLUDE_END = 1 <<
|
1911
|
+
typedef enum pm_range_flags {
|
1912
|
+
PM_RANGE_FLAGS_EXCLUDE_END = 1 << 0,
|
1911
1913
|
} pm_range_flags_t;
|
1912
1914
|
|
1913
1915
|
// RegularExpressionFlags
|
1914
|
-
typedef enum {
|
1915
|
-
PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 <<
|
1916
|
-
PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 <<
|
1917
|
-
PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 <<
|
1918
|
-
|
1919
|
-
|
1920
|
-
|
1921
|
-
|
1922
|
-
|
1916
|
+
typedef enum pm_regular_expression_flags {
|
1917
|
+
PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 0,
|
1918
|
+
PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 1,
|
1919
|
+
PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 2,
|
1920
|
+
PM_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 3,
|
1921
|
+
PM_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 4,
|
1922
|
+
PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 5,
|
1923
|
+
PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 6,
|
1924
|
+
PM_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 7,
|
1923
1925
|
} pm_regular_expression_flags_t;
|
1924
1926
|
|
1925
1927
|
// StringFlags
|
1926
|
-
typedef enum {
|
1927
|
-
PM_STRING_FLAGS_FROZEN = 1 <<
|
1928
|
+
typedef enum pm_string_flags {
|
1929
|
+
PM_STRING_FLAGS_FROZEN = 1 << 0,
|
1928
1930
|
} pm_string_flags_t;
|
1929
1931
|
|
1930
1932
|
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS false
|
data/include/prism/node.h
CHANGED
@@ -33,6 +33,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
|
|
33
33
|
// declare them here to avoid generating them.
|
34
34
|
typedef struct pm_scope_node {
|
35
35
|
pm_node_t base;
|
36
|
+
pm_node_t *ast_node;
|
36
37
|
struct pm_parameters_node *parameters;
|
37
38
|
pm_node_t *body;
|
38
39
|
pm_constant_id_list_t locals;
|
data/include/prism/parser.h
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#include "prism/util/pm_list.h"
|
9
9
|
#include "prism/util/pm_newline_list.h"
|
10
10
|
#include "prism/util/pm_state_stack.h"
|
11
|
+
#include "prism/util/pm_string.h"
|
11
12
|
|
12
13
|
#include <stdbool.h>
|
13
14
|
|
@@ -172,6 +173,11 @@ typedef struct pm_lex_mode {
|
|
172
173
|
// This is the pointer to the character where lexing should resume
|
173
174
|
// once the heredoc has been completely processed.
|
174
175
|
const uint8_t *next_start;
|
176
|
+
|
177
|
+
// This is used to track the amount of common whitespace on each
|
178
|
+
// line so that we know how much to dedent each line in the case of
|
179
|
+
// a tilde heredoc.
|
180
|
+
size_t common_whitespace;
|
175
181
|
} heredoc;
|
176
182
|
} as;
|
177
183
|
|
@@ -244,6 +250,16 @@ typedef struct pm_comment {
|
|
244
250
|
pm_comment_type_t type;
|
245
251
|
} pm_comment_t;
|
246
252
|
|
253
|
+
// This is a node in the linked list of magic comments that we've found while
|
254
|
+
// parsing.
|
255
|
+
typedef struct {
|
256
|
+
pm_list_node_t node;
|
257
|
+
const uint8_t *key_start;
|
258
|
+
const uint8_t *value_start;
|
259
|
+
uint32_t key_length;
|
260
|
+
uint32_t value_length;
|
261
|
+
} pm_magic_comment_t;
|
262
|
+
|
247
263
|
// When the encoding that is being used to parse the source is changed by prism,
|
248
264
|
// we provide the ability here to call out to a user-defined function.
|
249
265
|
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
|
@@ -293,6 +309,11 @@ typedef struct pm_scope {
|
|
293
309
|
// This is necessary to determine if child blocks are allowed to use
|
294
310
|
// numbered parameters.
|
295
311
|
bool numbered_params;
|
312
|
+
|
313
|
+
// A transparent scope is a scope that cannot have locals set on itself.
|
314
|
+
// When a local is set on this scope, it will instead be set on the parent
|
315
|
+
// scope's local table.
|
316
|
+
bool transparent;
|
296
317
|
} pm_scope_t;
|
297
318
|
|
298
319
|
// This struct represents the overall parser. It contains a reference to the
|
@@ -342,6 +363,7 @@ struct pm_parser {
|
|
342
363
|
const uint8_t *heredoc_end;
|
343
364
|
|
344
365
|
pm_list_t comment_list; // the list of comments that have been found while parsing
|
366
|
+
pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
|
345
367
|
pm_list_t warning_list; // the list of warnings that have been found while parsing
|
346
368
|
pm_list_t error_list; // the list of errors that have been found while parsing
|
347
369
|
pm_scope_t *current_scope; // the current local scope
|
@@ -388,6 +410,10 @@ struct pm_parser {
|
|
388
410
|
// when we find tokens that we need it for.
|
389
411
|
pm_node_flags_t integer_base;
|
390
412
|
|
413
|
+
// This string is used to pass information from the lexer to the parser. It
|
414
|
+
// is particularly necessary because of escape sequences.
|
415
|
+
pm_string_t current_string;
|
416
|
+
|
391
417
|
// Whether or not we're at the beginning of a command
|
392
418
|
bool command_start;
|
393
419
|
|
@@ -21,6 +21,9 @@ typedef struct {
|
|
21
21
|
// Return the size of the pm_buffer_t struct.
|
22
22
|
PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
|
23
23
|
|
24
|
+
// Initialize a pm_buffer_t with the given capacity.
|
25
|
+
bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
|
26
|
+
|
24
27
|
// Initialize a pm_buffer_t with its default values.
|
25
28
|
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
|
26
29
|
|
@@ -63,6 +63,11 @@ typedef struct {
|
|
63
63
|
// Initialize a new constant pool with a given capacity.
|
64
64
|
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
|
65
65
|
|
66
|
+
static inline pm_constant_t* pm_constant_pool_id_to_constant(pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
|
67
|
+
assert(constant_id > 0 && constant_id <= pool->size);
|
68
|
+
return &pool->constants[constant_id - 1];
|
69
|
+
}
|
70
|
+
|
66
71
|
// Insert a constant into a constant pool that is a slice of a source string.
|
67
72
|
// Returns the id of the constant, or 0 if any potential calls to resize fail.
|
68
73
|
pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
@@ -11,9 +11,10 @@
|
|
11
11
|
|
12
12
|
// This struct represents a string value.
|
13
13
|
typedef struct {
|
14
|
-
enum { PM_STRING_SHARED, PM_STRING_OWNED, PM_STRING_CONSTANT, PM_STRING_MAPPED } type;
|
15
14
|
const uint8_t *source;
|
16
15
|
size_t length;
|
16
|
+
// This field is not the first one, because otherwise things like .pm_string_t_field = 123/pm_constant_id_t does not warn or error
|
17
|
+
enum { PM_STRING_SHARED, PM_STRING_OWNED, PM_STRING_CONSTANT, PM_STRING_MAPPED } type;
|
17
18
|
} pm_string_t;
|
18
19
|
|
19
20
|
#define PM_EMPTY_STRING ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
|
data/include/prism/version.h
CHANGED