prism 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/README.md +4 -1
- data/config.yml +10 -14
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +35 -28
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +38 -36
- data/include/prism/node.h +1 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +5 -0
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -1
- data/lib/prism/compiler.rb +141 -141
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/node.rb +1456 -46
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +16 -14
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +1 -1
- data/src/node.c +0 -14
- data/src/prettyprint.c +35 -35
- data/src/prism.c +1728 -811
- data/src/serialize.c +45 -22
- data/src/util/pm_buffer.c +9 -7
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
data/include/prism/ast.h
CHANGED
@@ -358,8 +358,10 @@ typedef uint16_t pm_node_flags_t;
|
|
358
358
|
|
359
359
|
// We store the flags enum in every node in the tree. Some flags are common to
|
360
360
|
// all nodes (the ones listed below). Others are specific to certain node types.
|
361
|
-
|
362
|
-
static const pm_node_flags_t
|
361
|
+
#define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
|
362
|
+
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
|
363
|
+
static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
|
364
|
+
static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = PM_NODE_FLAG_NEWLINE | PM_NODE_FLAG_STATIC_LITERAL;
|
363
365
|
|
364
366
|
// For easy access, we define some macros to check node type
|
365
367
|
#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
|
@@ -562,8 +564,8 @@ typedef struct pm_call_and_write_node {
|
|
562
564
|
pm_location_t opening_loc;
|
563
565
|
struct pm_arguments_node *arguments;
|
564
566
|
pm_location_t closing_loc;
|
565
|
-
|
566
|
-
|
567
|
+
pm_constant_id_t read_name;
|
568
|
+
pm_constant_id_t write_name;
|
567
569
|
pm_location_t operator_loc;
|
568
570
|
struct pm_node *value;
|
569
571
|
} pm_call_and_write_node_t;
|
@@ -583,7 +585,7 @@ typedef struct pm_call_node {
|
|
583
585
|
struct pm_arguments_node *arguments;
|
584
586
|
pm_location_t closing_loc;
|
585
587
|
struct pm_node *block;
|
586
|
-
|
588
|
+
pm_constant_id_t name;
|
587
589
|
} pm_call_node_t;
|
588
590
|
|
589
591
|
// CallOperatorWriteNode
|
@@ -600,8 +602,8 @@ typedef struct pm_call_operator_write_node {
|
|
600
602
|
pm_location_t opening_loc;
|
601
603
|
struct pm_arguments_node *arguments;
|
602
604
|
pm_location_t closing_loc;
|
603
|
-
|
604
|
-
|
605
|
+
pm_constant_id_t read_name;
|
606
|
+
pm_constant_id_t write_name;
|
605
607
|
pm_constant_id_t operator;
|
606
608
|
pm_location_t operator_loc;
|
607
609
|
struct pm_node *value;
|
@@ -621,8 +623,8 @@ typedef struct pm_call_or_write_node {
|
|
621
623
|
pm_location_t opening_loc;
|
622
624
|
struct pm_arguments_node *arguments;
|
623
625
|
pm_location_t closing_loc;
|
624
|
-
|
625
|
-
|
626
|
+
pm_constant_id_t read_name;
|
627
|
+
pm_constant_id_t write_name;
|
626
628
|
pm_location_t operator_loc;
|
627
629
|
struct pm_node *value;
|
628
630
|
} pm_call_or_write_node_t;
|
@@ -1193,11 +1195,11 @@ typedef struct pm_integer_node {
|
|
1193
1195
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1194
1196
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1195
1197
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1198
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1196
1199
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1197
1200
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1198
1201
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1199
1202
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1200
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1201
1203
|
typedef struct pm_interpolated_match_last_line_node {
|
1202
1204
|
pm_node_t base;
|
1203
1205
|
pm_location_t opening_loc;
|
@@ -1212,11 +1214,11 @@ typedef struct pm_interpolated_match_last_line_node {
|
|
1212
1214
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1213
1215
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1214
1216
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1217
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1215
1218
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1216
1219
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1217
1220
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1218
1221
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1219
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1220
1222
|
typedef struct pm_interpolated_regular_expression_node {
|
1221
1223
|
pm_node_t base;
|
1222
1224
|
pm_location_t opening_loc;
|
@@ -1369,11 +1371,11 @@ typedef struct pm_local_variable_write_node {
|
|
1369
1371
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1370
1372
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1371
1373
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1374
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1372
1375
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1373
1376
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1374
1377
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1375
1378
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1376
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1377
1379
|
typedef struct pm_match_last_line_node {
|
1378
1380
|
pm_node_t base;
|
1379
1381
|
pm_location_t opening_loc;
|
@@ -1616,11 +1618,11 @@ typedef struct pm_redo_node {
|
|
1616
1618
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1617
1619
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1618
1620
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1621
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1619
1622
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1620
1623
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1621
1624
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1622
1625
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1623
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1624
1626
|
typedef struct pm_regular_expression_node {
|
1625
1627
|
pm_node_t base;
|
1626
1628
|
pm_location_t opening_loc;
|
@@ -1887,44 +1889,44 @@ typedef struct pm_yield_node {
|
|
1887
1889
|
} pm_yield_node_t;
|
1888
1890
|
|
1889
1891
|
// CallNodeFlags
|
1890
|
-
typedef enum {
|
1891
|
-
PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 <<
|
1892
|
-
PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 <<
|
1892
|
+
typedef enum pm_call_node_flags {
|
1893
|
+
PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 0,
|
1894
|
+
PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 1,
|
1893
1895
|
} pm_call_node_flags_t;
|
1894
1896
|
|
1895
1897
|
// IntegerBaseFlags
|
1896
|
-
typedef enum {
|
1897
|
-
PM_INTEGER_BASE_FLAGS_BINARY = 1 <<
|
1898
|
-
PM_INTEGER_BASE_FLAGS_OCTAL = 1 <<
|
1899
|
-
PM_INTEGER_BASE_FLAGS_DECIMAL = 1 <<
|
1900
|
-
PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 <<
|
1898
|
+
typedef enum pm_integer_base_flags {
|
1899
|
+
PM_INTEGER_BASE_FLAGS_BINARY = 1 << 0,
|
1900
|
+
PM_INTEGER_BASE_FLAGS_OCTAL = 1 << 1,
|
1901
|
+
PM_INTEGER_BASE_FLAGS_DECIMAL = 1 << 2,
|
1902
|
+
PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 << 3,
|
1901
1903
|
} pm_integer_base_flags_t;
|
1902
1904
|
|
1903
1905
|
// LoopFlags
|
1904
|
-
typedef enum {
|
1905
|
-
PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 <<
|
1906
|
+
typedef enum pm_loop_flags {
|
1907
|
+
PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 0,
|
1906
1908
|
} pm_loop_flags_t;
|
1907
1909
|
|
1908
1910
|
// RangeFlags
|
1909
|
-
typedef enum {
|
1910
|
-
PM_RANGE_FLAGS_EXCLUDE_END = 1 <<
|
1911
|
+
typedef enum pm_range_flags {
|
1912
|
+
PM_RANGE_FLAGS_EXCLUDE_END = 1 << 0,
|
1911
1913
|
} pm_range_flags_t;
|
1912
1914
|
|
1913
1915
|
// RegularExpressionFlags
|
1914
|
-
typedef enum {
|
1915
|
-
PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 <<
|
1916
|
-
PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 <<
|
1917
|
-
PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 <<
|
1918
|
-
|
1919
|
-
|
1920
|
-
|
1921
|
-
|
1922
|
-
|
1916
|
+
typedef enum pm_regular_expression_flags {
|
1917
|
+
PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 0,
|
1918
|
+
PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 1,
|
1919
|
+
PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 2,
|
1920
|
+
PM_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 3,
|
1921
|
+
PM_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 4,
|
1922
|
+
PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 5,
|
1923
|
+
PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 6,
|
1924
|
+
PM_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 7,
|
1923
1925
|
} pm_regular_expression_flags_t;
|
1924
1926
|
|
1925
1927
|
// StringFlags
|
1926
|
-
typedef enum {
|
1927
|
-
PM_STRING_FLAGS_FROZEN = 1 <<
|
1928
|
+
typedef enum pm_string_flags {
|
1929
|
+
PM_STRING_FLAGS_FROZEN = 1 << 0,
|
1928
1930
|
} pm_string_flags_t;
|
1929
1931
|
|
1930
1932
|
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS false
|
data/include/prism/node.h
CHANGED
@@ -33,6 +33,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
|
|
33
33
|
// declare them here to avoid generating them.
|
34
34
|
typedef struct pm_scope_node {
|
35
35
|
pm_node_t base;
|
36
|
+
pm_node_t *ast_node;
|
36
37
|
struct pm_parameters_node *parameters;
|
37
38
|
pm_node_t *body;
|
38
39
|
pm_constant_id_list_t locals;
|
data/include/prism/parser.h
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#include "prism/util/pm_list.h"
|
9
9
|
#include "prism/util/pm_newline_list.h"
|
10
10
|
#include "prism/util/pm_state_stack.h"
|
11
|
+
#include "prism/util/pm_string.h"
|
11
12
|
|
12
13
|
#include <stdbool.h>
|
13
14
|
|
@@ -172,6 +173,11 @@ typedef struct pm_lex_mode {
|
|
172
173
|
// This is the pointer to the character where lexing should resume
|
173
174
|
// once the heredoc has been completely processed.
|
174
175
|
const uint8_t *next_start;
|
176
|
+
|
177
|
+
// This is used to track the amount of common whitespace on each
|
178
|
+
// line so that we know how much to dedent each line in the case of
|
179
|
+
// a tilde heredoc.
|
180
|
+
size_t common_whitespace;
|
175
181
|
} heredoc;
|
176
182
|
} as;
|
177
183
|
|
@@ -244,6 +250,16 @@ typedef struct pm_comment {
|
|
244
250
|
pm_comment_type_t type;
|
245
251
|
} pm_comment_t;
|
246
252
|
|
253
|
+
// This is a node in the linked list of magic comments that we've found while
|
254
|
+
// parsing.
|
255
|
+
typedef struct {
|
256
|
+
pm_list_node_t node;
|
257
|
+
const uint8_t *key_start;
|
258
|
+
const uint8_t *value_start;
|
259
|
+
uint32_t key_length;
|
260
|
+
uint32_t value_length;
|
261
|
+
} pm_magic_comment_t;
|
262
|
+
|
247
263
|
// When the encoding that is being used to parse the source is changed by prism,
|
248
264
|
// we provide the ability here to call out to a user-defined function.
|
249
265
|
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
|
@@ -293,6 +309,11 @@ typedef struct pm_scope {
|
|
293
309
|
// This is necessary to determine if child blocks are allowed to use
|
294
310
|
// numbered parameters.
|
295
311
|
bool numbered_params;
|
312
|
+
|
313
|
+
// A transparent scope is a scope that cannot have locals set on itself.
|
314
|
+
// When a local is set on this scope, it will instead be set on the parent
|
315
|
+
// scope's local table.
|
316
|
+
bool transparent;
|
296
317
|
} pm_scope_t;
|
297
318
|
|
298
319
|
// This struct represents the overall parser. It contains a reference to the
|
@@ -342,6 +363,7 @@ struct pm_parser {
|
|
342
363
|
const uint8_t *heredoc_end;
|
343
364
|
|
344
365
|
pm_list_t comment_list; // the list of comments that have been found while parsing
|
366
|
+
pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
|
345
367
|
pm_list_t warning_list; // the list of warnings that have been found while parsing
|
346
368
|
pm_list_t error_list; // the list of errors that have been found while parsing
|
347
369
|
pm_scope_t *current_scope; // the current local scope
|
@@ -388,6 +410,10 @@ struct pm_parser {
|
|
388
410
|
// when we find tokens that we need it for.
|
389
411
|
pm_node_flags_t integer_base;
|
390
412
|
|
413
|
+
// This string is used to pass information from the lexer to the parser. It
|
414
|
+
// is particularly necessary because of escape sequences.
|
415
|
+
pm_string_t current_string;
|
416
|
+
|
391
417
|
// Whether or not we're at the beginning of a command
|
392
418
|
bool command_start;
|
393
419
|
|
@@ -21,6 +21,9 @@ typedef struct {
|
|
21
21
|
// Return the size of the pm_buffer_t struct.
|
22
22
|
PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
|
23
23
|
|
24
|
+
// Initialize a pm_buffer_t with the given capacity.
|
25
|
+
bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
|
26
|
+
|
24
27
|
// Initialize a pm_buffer_t with its default values.
|
25
28
|
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
|
26
29
|
|
@@ -63,6 +63,11 @@ typedef struct {
|
|
63
63
|
// Initialize a new constant pool with a given capacity.
|
64
64
|
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
|
65
65
|
|
66
|
+
static inline pm_constant_t* pm_constant_pool_id_to_constant(pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
|
67
|
+
assert(constant_id > 0 && constant_id <= pool->size);
|
68
|
+
return &pool->constants[constant_id - 1];
|
69
|
+
}
|
70
|
+
|
66
71
|
// Insert a constant into a constant pool that is a slice of a source string.
|
67
72
|
// Returns the id of the constant, or 0 if any potential calls to resize fail.
|
68
73
|
pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
@@ -11,9 +11,10 @@
|
|
11
11
|
|
12
12
|
// This struct represents a string value.
|
13
13
|
typedef struct {
|
14
|
-
enum { PM_STRING_SHARED, PM_STRING_OWNED, PM_STRING_CONSTANT, PM_STRING_MAPPED } type;
|
15
14
|
const uint8_t *source;
|
16
15
|
size_t length;
|
16
|
+
// This field is not the first one, because otherwise things like .pm_string_t_field = 123/pm_constant_id_t does not warn or error
|
17
|
+
enum { PM_STRING_SHARED, PM_STRING_OWNED, PM_STRING_CONSTANT, PM_STRING_MAPPED } type;
|
17
18
|
} pm_string_t;
|
18
19
|
|
19
20
|
#define PM_EMPTY_STRING ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
|
data/include/prism/version.h
CHANGED