prism 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/README.md +4 -1
- data/config.yml +96 -35
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +239 -86
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +170 -118
- data/include/prism/diagnostic.h +1 -0
- data/include/prism/node.h +8 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +21 -2
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +1 -2
- data/lib/prism/compiler.rb +150 -141
- data/lib/prism/debug.rb +30 -26
- data/lib/prism/dispatcher.rb +42 -0
- data/lib/prism/dsl.rb +23 -8
- data/lib/prism/ffi.rb +4 -4
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/mutation_compiler.rb +18 -3
- data/lib/prism/node.rb +2061 -191
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +95 -87
- data/lib/prism/visitor.rb +9 -0
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +2 -1
- data/src/node.c +99 -32
- data/src/prettyprint.c +137 -80
- data/src/prism.c +1960 -843
- data/src/serialize.c +140 -79
- data/src/util/pm_buffer.c +9 -7
- data/src/util/pm_constant_pool.c +25 -11
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
data/ext/prism/extension.c
CHANGED
@@ -10,6 +10,7 @@ VALUE rb_cPrismToken;
|
|
10
10
|
VALUE rb_cPrismLocation;
|
11
11
|
|
12
12
|
VALUE rb_cPrismComment;
|
13
|
+
VALUE rb_cPrismMagicComment;
|
13
14
|
VALUE rb_cPrismParseError;
|
14
15
|
VALUE rb_cPrismParseWarning;
|
15
16
|
VALUE rb_cPrismParseResult;
|
@@ -153,6 +154,35 @@ parser_comments(pm_parser_t *parser, VALUE source) {
|
|
153
154
|
return comments;
|
154
155
|
}
|
155
156
|
|
157
|
+
// Extract the magic comments out of the parser into an array.
|
158
|
+
static VALUE
|
159
|
+
parser_magic_comments(pm_parser_t *parser, VALUE source) {
|
160
|
+
VALUE magic_comments = rb_ary_new();
|
161
|
+
|
162
|
+
for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
|
163
|
+
VALUE key_loc_argv[] = {
|
164
|
+
source,
|
165
|
+
LONG2FIX(magic_comment->key_start - parser->start),
|
166
|
+
LONG2FIX(magic_comment->key_length)
|
167
|
+
};
|
168
|
+
|
169
|
+
VALUE value_loc_argv[] = {
|
170
|
+
source,
|
171
|
+
LONG2FIX(magic_comment->value_start - parser->start),
|
172
|
+
LONG2FIX(magic_comment->value_length)
|
173
|
+
};
|
174
|
+
|
175
|
+
VALUE magic_comment_argv[] = {
|
176
|
+
rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
|
177
|
+
rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
|
178
|
+
};
|
179
|
+
|
180
|
+
rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
|
181
|
+
}
|
182
|
+
|
183
|
+
return magic_comments;
|
184
|
+
}
|
185
|
+
|
156
186
|
// Extract the errors out of the parser into an array.
|
157
187
|
static VALUE
|
158
188
|
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
@@ -297,6 +327,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
|
|
297
327
|
VALUE result_argv[] = {
|
298
328
|
value,
|
299
329
|
parser_comments(&parser, source),
|
330
|
+
parser_magic_comments(&parser, source),
|
300
331
|
parser_errors(&parser, parse_lex_data.encoding, source),
|
301
332
|
parser_warnings(&parser, parse_lex_data.encoding, source),
|
302
333
|
source
|
@@ -304,7 +335,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
|
|
304
335
|
|
305
336
|
pm_node_destroy(&parser, node);
|
306
337
|
pm_parser_free(&parser);
|
307
|
-
return rb_class_new_instance(
|
338
|
+
return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
|
308
339
|
}
|
309
340
|
|
310
341
|
// Return an array of tokens corresponding to the given string.
|
@@ -351,12 +382,13 @@ parse_input(pm_string_t *input, const char *filepath) {
|
|
351
382
|
VALUE result_argv[] = {
|
352
383
|
pm_ast_new(&parser, node, encoding),
|
353
384
|
parser_comments(&parser, source),
|
385
|
+
parser_magic_comments(&parser, source),
|
354
386
|
parser_errors(&parser, encoding, source),
|
355
387
|
parser_warnings(&parser, encoding, source),
|
356
388
|
source
|
357
389
|
};
|
358
390
|
|
359
|
-
VALUE result = rb_class_new_instance(
|
391
|
+
VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
|
360
392
|
|
361
393
|
pm_node_destroy(&parser, node);
|
362
394
|
pm_parser_free(&parser);
|
@@ -461,48 +493,6 @@ named_captures(VALUE self, VALUE source) {
|
|
461
493
|
return names;
|
462
494
|
}
|
463
495
|
|
464
|
-
// Accepts a source string and a type of unescaping and returns the unescaped
|
465
|
-
// version.
|
466
|
-
static VALUE
|
467
|
-
unescape(VALUE source, pm_unescape_type_t unescape_type) {
|
468
|
-
pm_string_t result;
|
469
|
-
|
470
|
-
if (pm_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
471
|
-
VALUE str = rb_str_new((const char *) pm_string_source(&result), pm_string_length(&result));
|
472
|
-
pm_string_free(&result);
|
473
|
-
return str;
|
474
|
-
} else {
|
475
|
-
pm_string_free(&result);
|
476
|
-
return Qnil;
|
477
|
-
}
|
478
|
-
}
|
479
|
-
|
480
|
-
// Do not unescape anything in the given string. This is here to provide a
|
481
|
-
// consistent API.
|
482
|
-
static VALUE
|
483
|
-
unescape_none(VALUE self, VALUE source) {
|
484
|
-
return unescape(source, PM_UNESCAPE_NONE);
|
485
|
-
}
|
486
|
-
|
487
|
-
// Minimally unescape the given string. This means effectively unescaping just
|
488
|
-
// the quotes of a string. Returns the unescaped string.
|
489
|
-
static VALUE
|
490
|
-
unescape_minimal(VALUE self, VALUE source) {
|
491
|
-
return unescape(source, PM_UNESCAPE_MINIMAL);
|
492
|
-
}
|
493
|
-
|
494
|
-
// Escape the given string minimally plus whitespace. Returns the unescaped string.
|
495
|
-
static VALUE
|
496
|
-
unescape_whitespace(VALUE self, VALUE source) {
|
497
|
-
return unescape(source, PM_UNESCAPE_WHITESPACE);
|
498
|
-
}
|
499
|
-
|
500
|
-
// Unescape everything in the given string. Return the unescaped string.
|
501
|
-
static VALUE
|
502
|
-
unescape_all(VALUE self, VALUE source) {
|
503
|
-
return unescape(source, PM_UNESCAPE_ALL);
|
504
|
-
}
|
505
|
-
|
506
496
|
// Return a hash of information about the given source string's memory usage.
|
507
497
|
static VALUE
|
508
498
|
memsize(VALUE self, VALUE string) {
|
@@ -589,6 +579,7 @@ Init_prism(void) {
|
|
589
579
|
rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
|
590
580
|
rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
|
591
581
|
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
|
582
|
+
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
|
592
583
|
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
|
593
584
|
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
|
594
585
|
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
|
@@ -612,10 +603,6 @@ Init_prism(void) {
|
|
612
603
|
// internal tasks. We expose these to make them easier to test.
|
613
604
|
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
|
614
605
|
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
|
615
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_none", unescape_none, 1);
|
616
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_minimal", unescape_minimal, 1);
|
617
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_whitespace", unescape_whitespace, 1);
|
618
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_all", unescape_all, 1);
|
619
606
|
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
|
620
607
|
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
|
621
608
|
rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
|
data/ext/prism/extension.h
CHANGED
data/include/prism/ast.h
CHANGED
@@ -277,79 +277,82 @@ enum pm_node_type {
|
|
277
277
|
PM_IMAGINARY_NODE = 66,
|
278
278
|
PM_IMPLICIT_NODE = 67,
|
279
279
|
PM_IN_NODE = 68,
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
280
|
+
PM_INDEX_AND_WRITE_NODE = 69,
|
281
|
+
PM_INDEX_OPERATOR_WRITE_NODE = 70,
|
282
|
+
PM_INDEX_OR_WRITE_NODE = 71,
|
283
|
+
PM_INSTANCE_VARIABLE_AND_WRITE_NODE = 72,
|
284
|
+
PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE = 73,
|
285
|
+
PM_INSTANCE_VARIABLE_OR_WRITE_NODE = 74,
|
286
|
+
PM_INSTANCE_VARIABLE_READ_NODE = 75,
|
287
|
+
PM_INSTANCE_VARIABLE_TARGET_NODE = 76,
|
288
|
+
PM_INSTANCE_VARIABLE_WRITE_NODE = 77,
|
289
|
+
PM_INTEGER_NODE = 78,
|
290
|
+
PM_INTERPOLATED_MATCH_LAST_LINE_NODE = 79,
|
291
|
+
PM_INTERPOLATED_REGULAR_EXPRESSION_NODE = 80,
|
292
|
+
PM_INTERPOLATED_STRING_NODE = 81,
|
293
|
+
PM_INTERPOLATED_SYMBOL_NODE = 82,
|
294
|
+
PM_INTERPOLATED_X_STRING_NODE = 83,
|
295
|
+
PM_KEYWORD_HASH_NODE = 84,
|
296
|
+
PM_KEYWORD_PARAMETER_NODE = 85,
|
297
|
+
PM_KEYWORD_REST_PARAMETER_NODE = 86,
|
298
|
+
PM_LAMBDA_NODE = 87,
|
299
|
+
PM_LOCAL_VARIABLE_AND_WRITE_NODE = 88,
|
300
|
+
PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE = 89,
|
301
|
+
PM_LOCAL_VARIABLE_OR_WRITE_NODE = 90,
|
302
|
+
PM_LOCAL_VARIABLE_READ_NODE = 91,
|
303
|
+
PM_LOCAL_VARIABLE_TARGET_NODE = 92,
|
304
|
+
PM_LOCAL_VARIABLE_WRITE_NODE = 93,
|
305
|
+
PM_MATCH_LAST_LINE_NODE = 94,
|
306
|
+
PM_MATCH_PREDICATE_NODE = 95,
|
307
|
+
PM_MATCH_REQUIRED_NODE = 96,
|
308
|
+
PM_MATCH_WRITE_NODE = 97,
|
309
|
+
PM_MISSING_NODE = 98,
|
310
|
+
PM_MODULE_NODE = 99,
|
311
|
+
PM_MULTI_TARGET_NODE = 100,
|
312
|
+
PM_MULTI_WRITE_NODE = 101,
|
313
|
+
PM_NEXT_NODE = 102,
|
314
|
+
PM_NIL_NODE = 103,
|
315
|
+
PM_NO_KEYWORDS_PARAMETER_NODE = 104,
|
316
|
+
PM_NUMBERED_REFERENCE_READ_NODE = 105,
|
317
|
+
PM_OPTIONAL_PARAMETER_NODE = 106,
|
318
|
+
PM_OR_NODE = 107,
|
319
|
+
PM_PARAMETERS_NODE = 108,
|
320
|
+
PM_PARENTHESES_NODE = 109,
|
321
|
+
PM_PINNED_EXPRESSION_NODE = 110,
|
322
|
+
PM_PINNED_VARIABLE_NODE = 111,
|
323
|
+
PM_POST_EXECUTION_NODE = 112,
|
324
|
+
PM_PRE_EXECUTION_NODE = 113,
|
325
|
+
PM_PROGRAM_NODE = 114,
|
326
|
+
PM_RANGE_NODE = 115,
|
327
|
+
PM_RATIONAL_NODE = 116,
|
328
|
+
PM_REDO_NODE = 117,
|
329
|
+
PM_REGULAR_EXPRESSION_NODE = 118,
|
330
|
+
PM_REQUIRED_DESTRUCTURED_PARAMETER_NODE = 119,
|
331
|
+
PM_REQUIRED_PARAMETER_NODE = 120,
|
332
|
+
PM_RESCUE_MODIFIER_NODE = 121,
|
333
|
+
PM_RESCUE_NODE = 122,
|
334
|
+
PM_REST_PARAMETER_NODE = 123,
|
335
|
+
PM_RETRY_NODE = 124,
|
336
|
+
PM_RETURN_NODE = 125,
|
337
|
+
PM_SELF_NODE = 126,
|
338
|
+
PM_SINGLETON_CLASS_NODE = 127,
|
339
|
+
PM_SOURCE_ENCODING_NODE = 128,
|
340
|
+
PM_SOURCE_FILE_NODE = 129,
|
341
|
+
PM_SOURCE_LINE_NODE = 130,
|
342
|
+
PM_SPLAT_NODE = 131,
|
343
|
+
PM_STATEMENTS_NODE = 132,
|
344
|
+
PM_STRING_CONCAT_NODE = 133,
|
345
|
+
PM_STRING_NODE = 134,
|
346
|
+
PM_SUPER_NODE = 135,
|
347
|
+
PM_SYMBOL_NODE = 136,
|
348
|
+
PM_TRUE_NODE = 137,
|
349
|
+
PM_UNDEF_NODE = 138,
|
350
|
+
PM_UNLESS_NODE = 139,
|
351
|
+
PM_UNTIL_NODE = 140,
|
352
|
+
PM_WHEN_NODE = 141,
|
353
|
+
PM_WHILE_NODE = 142,
|
354
|
+
PM_X_STRING_NODE = 143,
|
355
|
+
PM_YIELD_NODE = 144,
|
353
356
|
PM_SCOPE_NODE
|
354
357
|
};
|
355
358
|
|
@@ -358,8 +361,10 @@ typedef uint16_t pm_node_flags_t;
|
|
358
361
|
|
359
362
|
// We store the flags enum in every node in the tree. Some flags are common to
|
360
363
|
// all nodes (the ones listed below). Others are specific to certain node types.
|
361
|
-
|
362
|
-
static const pm_node_flags_t
|
364
|
+
#define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
|
365
|
+
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
|
366
|
+
static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
|
367
|
+
static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS - 1)) | (1 << (PM_NODE_FLAG_BITS - 2));
|
363
368
|
|
364
369
|
// For easy access, we define some macros to check node type
|
365
370
|
#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
|
@@ -474,6 +479,7 @@ typedef struct pm_assoc_splat_node {
|
|
474
479
|
// Type: PM_BACK_REFERENCE_READ_NODE
|
475
480
|
typedef struct pm_back_reference_read_node {
|
476
481
|
pm_node_t base;
|
482
|
+
pm_constant_id_t name;
|
477
483
|
} pm_back_reference_read_node_t;
|
478
484
|
|
479
485
|
// BeginNode
|
@@ -559,11 +565,8 @@ typedef struct pm_call_and_write_node {
|
|
559
565
|
struct pm_node *receiver;
|
560
566
|
pm_location_t call_operator_loc;
|
561
567
|
pm_location_t message_loc;
|
562
|
-
|
563
|
-
|
564
|
-
pm_location_t closing_loc;
|
565
|
-
pm_string_t read_name;
|
566
|
-
pm_string_t write_name;
|
568
|
+
pm_constant_id_t read_name;
|
569
|
+
pm_constant_id_t write_name;
|
567
570
|
pm_location_t operator_loc;
|
568
571
|
struct pm_node *value;
|
569
572
|
} pm_call_and_write_node_t;
|
@@ -583,7 +586,7 @@ typedef struct pm_call_node {
|
|
583
586
|
struct pm_arguments_node *arguments;
|
584
587
|
pm_location_t closing_loc;
|
585
588
|
struct pm_node *block;
|
586
|
-
|
589
|
+
pm_constant_id_t name;
|
587
590
|
} pm_call_node_t;
|
588
591
|
|
589
592
|
// CallOperatorWriteNode
|
@@ -597,11 +600,8 @@ typedef struct pm_call_operator_write_node {
|
|
597
600
|
struct pm_node *receiver;
|
598
601
|
pm_location_t call_operator_loc;
|
599
602
|
pm_location_t message_loc;
|
600
|
-
|
601
|
-
|
602
|
-
pm_location_t closing_loc;
|
603
|
-
pm_string_t read_name;
|
604
|
-
pm_string_t write_name;
|
603
|
+
pm_constant_id_t read_name;
|
604
|
+
pm_constant_id_t write_name;
|
605
605
|
pm_constant_id_t operator;
|
606
606
|
pm_location_t operator_loc;
|
607
607
|
struct pm_node *value;
|
@@ -618,11 +618,8 @@ typedef struct pm_call_or_write_node {
|
|
618
618
|
struct pm_node *receiver;
|
619
619
|
pm_location_t call_operator_loc;
|
620
620
|
pm_location_t message_loc;
|
621
|
-
|
622
|
-
|
623
|
-
pm_location_t closing_loc;
|
624
|
-
pm_string_t read_name;
|
625
|
-
pm_string_t write_name;
|
621
|
+
pm_constant_id_t read_name;
|
622
|
+
pm_constant_id_t write_name;
|
626
623
|
pm_location_t operator_loc;
|
627
624
|
struct pm_node *value;
|
628
625
|
} pm_call_or_write_node_t;
|
@@ -1113,6 +1110,61 @@ typedef struct pm_in_node {
|
|
1113
1110
|
pm_location_t then_loc;
|
1114
1111
|
} pm_in_node_t;
|
1115
1112
|
|
1113
|
+
// IndexAndWriteNode
|
1114
|
+
//
|
1115
|
+
// Type: PM_INDEX_AND_WRITE_NODE
|
1116
|
+
// Flags:
|
1117
|
+
// PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
|
1118
|
+
// PM_CALL_NODE_FLAGS_VARIABLE_CALL
|
1119
|
+
typedef struct pm_index_and_write_node {
|
1120
|
+
pm_node_t base;
|
1121
|
+
struct pm_node *receiver;
|
1122
|
+
pm_location_t call_operator_loc;
|
1123
|
+
pm_location_t opening_loc;
|
1124
|
+
struct pm_arguments_node *arguments;
|
1125
|
+
pm_location_t closing_loc;
|
1126
|
+
struct pm_node *block;
|
1127
|
+
pm_location_t operator_loc;
|
1128
|
+
struct pm_node *value;
|
1129
|
+
} pm_index_and_write_node_t;
|
1130
|
+
|
1131
|
+
// IndexOperatorWriteNode
|
1132
|
+
//
|
1133
|
+
// Type: PM_INDEX_OPERATOR_WRITE_NODE
|
1134
|
+
// Flags:
|
1135
|
+
// PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
|
1136
|
+
// PM_CALL_NODE_FLAGS_VARIABLE_CALL
|
1137
|
+
typedef struct pm_index_operator_write_node {
|
1138
|
+
pm_node_t base;
|
1139
|
+
struct pm_node *receiver;
|
1140
|
+
pm_location_t call_operator_loc;
|
1141
|
+
pm_location_t opening_loc;
|
1142
|
+
struct pm_arguments_node *arguments;
|
1143
|
+
pm_location_t closing_loc;
|
1144
|
+
struct pm_node *block;
|
1145
|
+
pm_constant_id_t operator;
|
1146
|
+
pm_location_t operator_loc;
|
1147
|
+
struct pm_node *value;
|
1148
|
+
} pm_index_operator_write_node_t;
|
1149
|
+
|
1150
|
+
// IndexOrWriteNode
|
1151
|
+
//
|
1152
|
+
// Type: PM_INDEX_OR_WRITE_NODE
|
1153
|
+
// Flags:
|
1154
|
+
// PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
|
1155
|
+
// PM_CALL_NODE_FLAGS_VARIABLE_CALL
|
1156
|
+
typedef struct pm_index_or_write_node {
|
1157
|
+
pm_node_t base;
|
1158
|
+
struct pm_node *receiver;
|
1159
|
+
pm_location_t call_operator_loc;
|
1160
|
+
pm_location_t opening_loc;
|
1161
|
+
struct pm_arguments_node *arguments;
|
1162
|
+
pm_location_t closing_loc;
|
1163
|
+
struct pm_node *block;
|
1164
|
+
pm_location_t operator_loc;
|
1165
|
+
struct pm_node *value;
|
1166
|
+
} pm_index_or_write_node_t;
|
1167
|
+
|
1116
1168
|
// InstanceVariableAndWriteNode
|
1117
1169
|
//
|
1118
1170
|
// Type: PM_INSTANCE_VARIABLE_AND_WRITE_NODE
|
@@ -1193,11 +1245,11 @@ typedef struct pm_integer_node {
|
|
1193
1245
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1194
1246
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1195
1247
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1248
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1196
1249
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1197
1250
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1198
1251
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1199
1252
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1200
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1201
1253
|
typedef struct pm_interpolated_match_last_line_node {
|
1202
1254
|
pm_node_t base;
|
1203
1255
|
pm_location_t opening_loc;
|
@@ -1212,11 +1264,11 @@ typedef struct pm_interpolated_match_last_line_node {
|
|
1212
1264
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1213
1265
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1214
1266
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1267
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1215
1268
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1216
1269
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1217
1270
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1218
1271
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1219
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1220
1272
|
typedef struct pm_interpolated_regular_expression_node {
|
1221
1273
|
pm_node_t base;
|
1222
1274
|
pm_location_t opening_loc;
|
@@ -1369,11 +1421,11 @@ typedef struct pm_local_variable_write_node {
|
|
1369
1421
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1370
1422
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1371
1423
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1424
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1372
1425
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1373
1426
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1374
1427
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1375
1428
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1376
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1377
1429
|
typedef struct pm_match_last_line_node {
|
1378
1430
|
pm_node_t base;
|
1379
1431
|
pm_location_t opening_loc;
|
@@ -1616,11 +1668,11 @@ typedef struct pm_redo_node {
|
|
1616
1668
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1617
1669
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1618
1670
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1671
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1619
1672
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1620
1673
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1621
1674
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1622
1675
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1623
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1624
1676
|
typedef struct pm_regular_expression_node {
|
1625
1677
|
pm_node_t base;
|
1626
1678
|
pm_location_t opening_loc;
|
@@ -1887,44 +1939,44 @@ typedef struct pm_yield_node {
|
|
1887
1939
|
} pm_yield_node_t;
|
1888
1940
|
|
1889
1941
|
// CallNodeFlags
|
1890
|
-
typedef enum {
|
1891
|
-
PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 <<
|
1892
|
-
PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 <<
|
1942
|
+
typedef enum pm_call_node_flags {
|
1943
|
+
PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 0,
|
1944
|
+
PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 1,
|
1893
1945
|
} pm_call_node_flags_t;
|
1894
1946
|
|
1895
1947
|
// IntegerBaseFlags
|
1896
|
-
typedef enum {
|
1897
|
-
PM_INTEGER_BASE_FLAGS_BINARY = 1 <<
|
1898
|
-
PM_INTEGER_BASE_FLAGS_OCTAL = 1 <<
|
1899
|
-
PM_INTEGER_BASE_FLAGS_DECIMAL = 1 <<
|
1900
|
-
PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 <<
|
1948
|
+
typedef enum pm_integer_base_flags {
|
1949
|
+
PM_INTEGER_BASE_FLAGS_BINARY = 1 << 0,
|
1950
|
+
PM_INTEGER_BASE_FLAGS_OCTAL = 1 << 1,
|
1951
|
+
PM_INTEGER_BASE_FLAGS_DECIMAL = 1 << 2,
|
1952
|
+
PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 << 3,
|
1901
1953
|
} pm_integer_base_flags_t;
|
1902
1954
|
|
1903
1955
|
// LoopFlags
|
1904
|
-
typedef enum {
|
1905
|
-
PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 <<
|
1956
|
+
typedef enum pm_loop_flags {
|
1957
|
+
PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 0,
|
1906
1958
|
} pm_loop_flags_t;
|
1907
1959
|
|
1908
1960
|
// RangeFlags
|
1909
|
-
typedef enum {
|
1910
|
-
PM_RANGE_FLAGS_EXCLUDE_END = 1 <<
|
1961
|
+
typedef enum pm_range_flags {
|
1962
|
+
PM_RANGE_FLAGS_EXCLUDE_END = 1 << 0,
|
1911
1963
|
} pm_range_flags_t;
|
1912
1964
|
|
1913
1965
|
// RegularExpressionFlags
|
1914
|
-
typedef enum {
|
1915
|
-
PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 <<
|
1916
|
-
PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 <<
|
1917
|
-
PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 <<
|
1918
|
-
|
1919
|
-
|
1920
|
-
|
1921
|
-
|
1922
|
-
|
1966
|
+
typedef enum pm_regular_expression_flags {
|
1967
|
+
PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 0,
|
1968
|
+
PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 1,
|
1969
|
+
PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 2,
|
1970
|
+
PM_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 3,
|
1971
|
+
PM_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 4,
|
1972
|
+
PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 5,
|
1973
|
+
PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 6,
|
1974
|
+
PM_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 7,
|
1923
1975
|
} pm_regular_expression_flags_t;
|
1924
1976
|
|
1925
1977
|
// StringFlags
|
1926
|
-
typedef enum {
|
1927
|
-
PM_STRING_FLAGS_FROZEN = 1 <<
|
1978
|
+
typedef enum pm_string_flags {
|
1979
|
+
PM_STRING_FLAGS_FROZEN = 1 << 0,
|
1928
1980
|
} pm_string_flags_t;
|
1929
1981
|
|
1930
1982
|
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS false
|
data/include/prism/diagnostic.h
CHANGED
@@ -158,6 +158,7 @@ typedef enum {
|
|
158
158
|
PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
|
159
159
|
PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
|
160
160
|
PM_ERR_OPERATOR_MULTI_ASSIGN,
|
161
|
+
PM_ERR_OPERATOR_WRITE_ARGUMENTS,
|
161
162
|
PM_ERR_OPERATOR_WRITE_BLOCK,
|
162
163
|
PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
|
163
164
|
PM_ERR_PARAMETER_BLOCK_MULTI,
|
data/include/prism/node.h
CHANGED
@@ -33,9 +33,17 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
|
|
33
33
|
// declare them here to avoid generating them.
|
34
34
|
typedef struct pm_scope_node {
|
35
35
|
pm_node_t base;
|
36
|
+
struct pm_scope_node *previous;
|
37
|
+
pm_node_t *ast_node;
|
36
38
|
struct pm_parameters_node *parameters;
|
37
39
|
pm_node_t *body;
|
38
40
|
pm_constant_id_list_t locals;
|
41
|
+
pm_parser_t *parser;
|
42
|
+
|
43
|
+
// We don't have the CRuby types ID and st_table within Prism
|
44
|
+
// so we use void *
|
45
|
+
void *constants; // ID *constants
|
46
|
+
void *index_lookup_table; // st_table *index_lookup_table
|
39
47
|
} pm_scope_node_t;
|
40
48
|
|
41
49
|
#endif // PRISM_NODE_H
|
data/include/prism/parser.h
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#include "prism/util/pm_list.h"
|
9
9
|
#include "prism/util/pm_newline_list.h"
|
10
10
|
#include "prism/util/pm_state_stack.h"
|
11
|
+
#include "prism/util/pm_string.h"
|
11
12
|
|
12
13
|
#include <stdbool.h>
|
13
14
|
|
@@ -172,6 +173,11 @@ typedef struct pm_lex_mode {
|
|
172
173
|
// This is the pointer to the character where lexing should resume
|
173
174
|
// once the heredoc has been completely processed.
|
174
175
|
const uint8_t *next_start;
|
176
|
+
|
177
|
+
// This is used to track the amount of common whitespace on each
|
178
|
+
// line so that we know how much to dedent each line in the case of
|
179
|
+
// a tilde heredoc.
|
180
|
+
size_t common_whitespace;
|
175
181
|
} heredoc;
|
176
182
|
} as;
|
177
183
|
|
@@ -244,6 +250,16 @@ typedef struct pm_comment {
|
|
244
250
|
pm_comment_type_t type;
|
245
251
|
} pm_comment_t;
|
246
252
|
|
253
|
+
// This is a node in the linked list of magic comments that we've found while
|
254
|
+
// parsing.
|
255
|
+
typedef struct {
|
256
|
+
pm_list_node_t node;
|
257
|
+
const uint8_t *key_start;
|
258
|
+
const uint8_t *value_start;
|
259
|
+
uint32_t key_length;
|
260
|
+
uint32_t value_length;
|
261
|
+
} pm_magic_comment_t;
|
262
|
+
|
247
263
|
// When the encoding that is being used to parse the source is changed by prism,
|
248
264
|
// we provide the ability here to call out to a user-defined function.
|
249
265
|
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
|
@@ -293,6 +309,11 @@ typedef struct pm_scope {
|
|
293
309
|
// This is necessary to determine if child blocks are allowed to use
|
294
310
|
// numbered parameters.
|
295
311
|
bool numbered_params;
|
312
|
+
|
313
|
+
// A transparent scope is a scope that cannot have locals set on itself.
|
314
|
+
// When a local is set on this scope, it will instead be set on the parent
|
315
|
+
// scope's local table.
|
316
|
+
bool transparent;
|
296
317
|
} pm_scope_t;
|
297
318
|
|
298
319
|
// This struct represents the overall parser. It contains a reference to the
|
@@ -342,6 +363,7 @@ struct pm_parser {
|
|
342
363
|
const uint8_t *heredoc_end;
|
343
364
|
|
344
365
|
pm_list_t comment_list; // the list of comments that have been found while parsing
|
366
|
+
pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
|
345
367
|
pm_list_t warning_list; // the list of warnings that have been found while parsing
|
346
368
|
pm_list_t error_list; // the list of errors that have been found while parsing
|
347
369
|
pm_scope_t *current_scope; // the current local scope
|
@@ -388,6 +410,10 @@ struct pm_parser {
|
|
388
410
|
// when we find tokens that we need it for.
|
389
411
|
pm_node_flags_t integer_base;
|
390
412
|
|
413
|
+
// This string is used to pass information from the lexer to the parser. It
|
414
|
+
// is particularly necessary because of escape sequences.
|
415
|
+
pm_string_t current_string;
|
416
|
+
|
391
417
|
// Whether or not we're at the beginning of a command
|
392
418
|
bool command_start;
|
393
419
|
|
@@ -21,6 +21,9 @@ typedef struct {
|
|
21
21
|
// Return the size of the pm_buffer_t struct.
|
22
22
|
PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
|
23
23
|
|
24
|
+
// Initialize a pm_buffer_t with the given capacity.
|
25
|
+
bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
|
26
|
+
|
24
27
|
// Initialize a pm_buffer_t with its default values.
|
25
28
|
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
|
26
29
|
|