prism 0.13.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/README.md +4 -1
- data/config.yml +96 -35
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +239 -86
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +170 -118
- data/include/prism/diagnostic.h +1 -0
- data/include/prism/node.h +8 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +21 -2
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +1 -2
- data/lib/prism/compiler.rb +150 -141
- data/lib/prism/debug.rb +30 -26
- data/lib/prism/dispatcher.rb +42 -0
- data/lib/prism/dsl.rb +23 -8
- data/lib/prism/ffi.rb +4 -4
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/mutation_compiler.rb +18 -3
- data/lib/prism/node.rb +2061 -191
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +95 -87
- data/lib/prism/visitor.rb +9 -0
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +2 -1
- data/src/node.c +99 -32
- data/src/prettyprint.c +137 -80
- data/src/prism.c +1960 -843
- data/src/serialize.c +140 -79
- data/src/util/pm_buffer.c +9 -7
- data/src/util/pm_constant_pool.c +25 -11
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
data/ext/prism/extension.c
CHANGED
@@ -10,6 +10,7 @@ VALUE rb_cPrismToken;
|
|
10
10
|
VALUE rb_cPrismLocation;
|
11
11
|
|
12
12
|
VALUE rb_cPrismComment;
|
13
|
+
VALUE rb_cPrismMagicComment;
|
13
14
|
VALUE rb_cPrismParseError;
|
14
15
|
VALUE rb_cPrismParseWarning;
|
15
16
|
VALUE rb_cPrismParseResult;
|
@@ -153,6 +154,35 @@ parser_comments(pm_parser_t *parser, VALUE source) {
|
|
153
154
|
return comments;
|
154
155
|
}
|
155
156
|
|
157
|
+
// Extract the magic comments out of the parser into an array.
|
158
|
+
static VALUE
|
159
|
+
parser_magic_comments(pm_parser_t *parser, VALUE source) {
|
160
|
+
VALUE magic_comments = rb_ary_new();
|
161
|
+
|
162
|
+
for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
|
163
|
+
VALUE key_loc_argv[] = {
|
164
|
+
source,
|
165
|
+
LONG2FIX(magic_comment->key_start - parser->start),
|
166
|
+
LONG2FIX(magic_comment->key_length)
|
167
|
+
};
|
168
|
+
|
169
|
+
VALUE value_loc_argv[] = {
|
170
|
+
source,
|
171
|
+
LONG2FIX(magic_comment->value_start - parser->start),
|
172
|
+
LONG2FIX(magic_comment->value_length)
|
173
|
+
};
|
174
|
+
|
175
|
+
VALUE magic_comment_argv[] = {
|
176
|
+
rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
|
177
|
+
rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
|
178
|
+
};
|
179
|
+
|
180
|
+
rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
|
181
|
+
}
|
182
|
+
|
183
|
+
return magic_comments;
|
184
|
+
}
|
185
|
+
|
156
186
|
// Extract the errors out of the parser into an array.
|
157
187
|
static VALUE
|
158
188
|
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
@@ -297,6 +327,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
|
|
297
327
|
VALUE result_argv[] = {
|
298
328
|
value,
|
299
329
|
parser_comments(&parser, source),
|
330
|
+
parser_magic_comments(&parser, source),
|
300
331
|
parser_errors(&parser, parse_lex_data.encoding, source),
|
301
332
|
parser_warnings(&parser, parse_lex_data.encoding, source),
|
302
333
|
source
|
@@ -304,7 +335,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
|
|
304
335
|
|
305
336
|
pm_node_destroy(&parser, node);
|
306
337
|
pm_parser_free(&parser);
|
307
|
-
return rb_class_new_instance(
|
338
|
+
return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
|
308
339
|
}
|
309
340
|
|
310
341
|
// Return an array of tokens corresponding to the given string.
|
@@ -351,12 +382,13 @@ parse_input(pm_string_t *input, const char *filepath) {
|
|
351
382
|
VALUE result_argv[] = {
|
352
383
|
pm_ast_new(&parser, node, encoding),
|
353
384
|
parser_comments(&parser, source),
|
385
|
+
parser_magic_comments(&parser, source),
|
354
386
|
parser_errors(&parser, encoding, source),
|
355
387
|
parser_warnings(&parser, encoding, source),
|
356
388
|
source
|
357
389
|
};
|
358
390
|
|
359
|
-
VALUE result = rb_class_new_instance(
|
391
|
+
VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
|
360
392
|
|
361
393
|
pm_node_destroy(&parser, node);
|
362
394
|
pm_parser_free(&parser);
|
@@ -461,48 +493,6 @@ named_captures(VALUE self, VALUE source) {
|
|
461
493
|
return names;
|
462
494
|
}
|
463
495
|
|
464
|
-
// Accepts a source string and a type of unescaping and returns the unescaped
|
465
|
-
// version.
|
466
|
-
static VALUE
|
467
|
-
unescape(VALUE source, pm_unescape_type_t unescape_type) {
|
468
|
-
pm_string_t result;
|
469
|
-
|
470
|
-
if (pm_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
471
|
-
VALUE str = rb_str_new((const char *) pm_string_source(&result), pm_string_length(&result));
|
472
|
-
pm_string_free(&result);
|
473
|
-
return str;
|
474
|
-
} else {
|
475
|
-
pm_string_free(&result);
|
476
|
-
return Qnil;
|
477
|
-
}
|
478
|
-
}
|
479
|
-
|
480
|
-
// Do not unescape anything in the given string. This is here to provide a
|
481
|
-
// consistent API.
|
482
|
-
static VALUE
|
483
|
-
unescape_none(VALUE self, VALUE source) {
|
484
|
-
return unescape(source, PM_UNESCAPE_NONE);
|
485
|
-
}
|
486
|
-
|
487
|
-
// Minimally unescape the given string. This means effectively unescaping just
|
488
|
-
// the quotes of a string. Returns the unescaped string.
|
489
|
-
static VALUE
|
490
|
-
unescape_minimal(VALUE self, VALUE source) {
|
491
|
-
return unescape(source, PM_UNESCAPE_MINIMAL);
|
492
|
-
}
|
493
|
-
|
494
|
-
// Escape the given string minimally plus whitespace. Returns the unescaped string.
|
495
|
-
static VALUE
|
496
|
-
unescape_whitespace(VALUE self, VALUE source) {
|
497
|
-
return unescape(source, PM_UNESCAPE_WHITESPACE);
|
498
|
-
}
|
499
|
-
|
500
|
-
// Unescape everything in the given string. Return the unescaped string.
|
501
|
-
static VALUE
|
502
|
-
unescape_all(VALUE self, VALUE source) {
|
503
|
-
return unescape(source, PM_UNESCAPE_ALL);
|
504
|
-
}
|
505
|
-
|
506
496
|
// Return a hash of information about the given source string's memory usage.
|
507
497
|
static VALUE
|
508
498
|
memsize(VALUE self, VALUE string) {
|
@@ -589,6 +579,7 @@ Init_prism(void) {
|
|
589
579
|
rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
|
590
580
|
rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
|
591
581
|
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
|
582
|
+
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
|
592
583
|
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
|
593
584
|
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
|
594
585
|
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
|
@@ -612,10 +603,6 @@ Init_prism(void) {
|
|
612
603
|
// internal tasks. We expose these to make them easier to test.
|
613
604
|
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
|
614
605
|
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
|
615
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_none", unescape_none, 1);
|
616
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_minimal", unescape_minimal, 1);
|
617
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_whitespace", unescape_whitespace, 1);
|
618
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_all", unescape_all, 1);
|
619
606
|
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
|
620
607
|
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
|
621
608
|
rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
|
data/ext/prism/extension.h
CHANGED
data/include/prism/ast.h
CHANGED
@@ -277,79 +277,82 @@ enum pm_node_type {
|
|
277
277
|
PM_IMAGINARY_NODE = 66,
|
278
278
|
PM_IMPLICIT_NODE = 67,
|
279
279
|
PM_IN_NODE = 68,
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
280
|
+
PM_INDEX_AND_WRITE_NODE = 69,
|
281
|
+
PM_INDEX_OPERATOR_WRITE_NODE = 70,
|
282
|
+
PM_INDEX_OR_WRITE_NODE = 71,
|
283
|
+
PM_INSTANCE_VARIABLE_AND_WRITE_NODE = 72,
|
284
|
+
PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE = 73,
|
285
|
+
PM_INSTANCE_VARIABLE_OR_WRITE_NODE = 74,
|
286
|
+
PM_INSTANCE_VARIABLE_READ_NODE = 75,
|
287
|
+
PM_INSTANCE_VARIABLE_TARGET_NODE = 76,
|
288
|
+
PM_INSTANCE_VARIABLE_WRITE_NODE = 77,
|
289
|
+
PM_INTEGER_NODE = 78,
|
290
|
+
PM_INTERPOLATED_MATCH_LAST_LINE_NODE = 79,
|
291
|
+
PM_INTERPOLATED_REGULAR_EXPRESSION_NODE = 80,
|
292
|
+
PM_INTERPOLATED_STRING_NODE = 81,
|
293
|
+
PM_INTERPOLATED_SYMBOL_NODE = 82,
|
294
|
+
PM_INTERPOLATED_X_STRING_NODE = 83,
|
295
|
+
PM_KEYWORD_HASH_NODE = 84,
|
296
|
+
PM_KEYWORD_PARAMETER_NODE = 85,
|
297
|
+
PM_KEYWORD_REST_PARAMETER_NODE = 86,
|
298
|
+
PM_LAMBDA_NODE = 87,
|
299
|
+
PM_LOCAL_VARIABLE_AND_WRITE_NODE = 88,
|
300
|
+
PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE = 89,
|
301
|
+
PM_LOCAL_VARIABLE_OR_WRITE_NODE = 90,
|
302
|
+
PM_LOCAL_VARIABLE_READ_NODE = 91,
|
303
|
+
PM_LOCAL_VARIABLE_TARGET_NODE = 92,
|
304
|
+
PM_LOCAL_VARIABLE_WRITE_NODE = 93,
|
305
|
+
PM_MATCH_LAST_LINE_NODE = 94,
|
306
|
+
PM_MATCH_PREDICATE_NODE = 95,
|
307
|
+
PM_MATCH_REQUIRED_NODE = 96,
|
308
|
+
PM_MATCH_WRITE_NODE = 97,
|
309
|
+
PM_MISSING_NODE = 98,
|
310
|
+
PM_MODULE_NODE = 99,
|
311
|
+
PM_MULTI_TARGET_NODE = 100,
|
312
|
+
PM_MULTI_WRITE_NODE = 101,
|
313
|
+
PM_NEXT_NODE = 102,
|
314
|
+
PM_NIL_NODE = 103,
|
315
|
+
PM_NO_KEYWORDS_PARAMETER_NODE = 104,
|
316
|
+
PM_NUMBERED_REFERENCE_READ_NODE = 105,
|
317
|
+
PM_OPTIONAL_PARAMETER_NODE = 106,
|
318
|
+
PM_OR_NODE = 107,
|
319
|
+
PM_PARAMETERS_NODE = 108,
|
320
|
+
PM_PARENTHESES_NODE = 109,
|
321
|
+
PM_PINNED_EXPRESSION_NODE = 110,
|
322
|
+
PM_PINNED_VARIABLE_NODE = 111,
|
323
|
+
PM_POST_EXECUTION_NODE = 112,
|
324
|
+
PM_PRE_EXECUTION_NODE = 113,
|
325
|
+
PM_PROGRAM_NODE = 114,
|
326
|
+
PM_RANGE_NODE = 115,
|
327
|
+
PM_RATIONAL_NODE = 116,
|
328
|
+
PM_REDO_NODE = 117,
|
329
|
+
PM_REGULAR_EXPRESSION_NODE = 118,
|
330
|
+
PM_REQUIRED_DESTRUCTURED_PARAMETER_NODE = 119,
|
331
|
+
PM_REQUIRED_PARAMETER_NODE = 120,
|
332
|
+
PM_RESCUE_MODIFIER_NODE = 121,
|
333
|
+
PM_RESCUE_NODE = 122,
|
334
|
+
PM_REST_PARAMETER_NODE = 123,
|
335
|
+
PM_RETRY_NODE = 124,
|
336
|
+
PM_RETURN_NODE = 125,
|
337
|
+
PM_SELF_NODE = 126,
|
338
|
+
PM_SINGLETON_CLASS_NODE = 127,
|
339
|
+
PM_SOURCE_ENCODING_NODE = 128,
|
340
|
+
PM_SOURCE_FILE_NODE = 129,
|
341
|
+
PM_SOURCE_LINE_NODE = 130,
|
342
|
+
PM_SPLAT_NODE = 131,
|
343
|
+
PM_STATEMENTS_NODE = 132,
|
344
|
+
PM_STRING_CONCAT_NODE = 133,
|
345
|
+
PM_STRING_NODE = 134,
|
346
|
+
PM_SUPER_NODE = 135,
|
347
|
+
PM_SYMBOL_NODE = 136,
|
348
|
+
PM_TRUE_NODE = 137,
|
349
|
+
PM_UNDEF_NODE = 138,
|
350
|
+
PM_UNLESS_NODE = 139,
|
351
|
+
PM_UNTIL_NODE = 140,
|
352
|
+
PM_WHEN_NODE = 141,
|
353
|
+
PM_WHILE_NODE = 142,
|
354
|
+
PM_X_STRING_NODE = 143,
|
355
|
+
PM_YIELD_NODE = 144,
|
353
356
|
PM_SCOPE_NODE
|
354
357
|
};
|
355
358
|
|
@@ -358,8 +361,10 @@ typedef uint16_t pm_node_flags_t;
|
|
358
361
|
|
359
362
|
// We store the flags enum in every node in the tree. Some flags are common to
|
360
363
|
// all nodes (the ones listed below). Others are specific to certain node types.
|
361
|
-
|
362
|
-
static const pm_node_flags_t
|
364
|
+
#define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
|
365
|
+
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
|
366
|
+
static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
|
367
|
+
static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS - 1)) | (1 << (PM_NODE_FLAG_BITS - 2));
|
363
368
|
|
364
369
|
// For easy access, we define some macros to check node type
|
365
370
|
#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
|
@@ -474,6 +479,7 @@ typedef struct pm_assoc_splat_node {
|
|
474
479
|
// Type: PM_BACK_REFERENCE_READ_NODE
|
475
480
|
typedef struct pm_back_reference_read_node {
|
476
481
|
pm_node_t base;
|
482
|
+
pm_constant_id_t name;
|
477
483
|
} pm_back_reference_read_node_t;
|
478
484
|
|
479
485
|
// BeginNode
|
@@ -559,11 +565,8 @@ typedef struct pm_call_and_write_node {
|
|
559
565
|
struct pm_node *receiver;
|
560
566
|
pm_location_t call_operator_loc;
|
561
567
|
pm_location_t message_loc;
|
562
|
-
|
563
|
-
|
564
|
-
pm_location_t closing_loc;
|
565
|
-
pm_string_t read_name;
|
566
|
-
pm_string_t write_name;
|
568
|
+
pm_constant_id_t read_name;
|
569
|
+
pm_constant_id_t write_name;
|
567
570
|
pm_location_t operator_loc;
|
568
571
|
struct pm_node *value;
|
569
572
|
} pm_call_and_write_node_t;
|
@@ -583,7 +586,7 @@ typedef struct pm_call_node {
|
|
583
586
|
struct pm_arguments_node *arguments;
|
584
587
|
pm_location_t closing_loc;
|
585
588
|
struct pm_node *block;
|
586
|
-
|
589
|
+
pm_constant_id_t name;
|
587
590
|
} pm_call_node_t;
|
588
591
|
|
589
592
|
// CallOperatorWriteNode
|
@@ -597,11 +600,8 @@ typedef struct pm_call_operator_write_node {
|
|
597
600
|
struct pm_node *receiver;
|
598
601
|
pm_location_t call_operator_loc;
|
599
602
|
pm_location_t message_loc;
|
600
|
-
|
601
|
-
|
602
|
-
pm_location_t closing_loc;
|
603
|
-
pm_string_t read_name;
|
604
|
-
pm_string_t write_name;
|
603
|
+
pm_constant_id_t read_name;
|
604
|
+
pm_constant_id_t write_name;
|
605
605
|
pm_constant_id_t operator;
|
606
606
|
pm_location_t operator_loc;
|
607
607
|
struct pm_node *value;
|
@@ -618,11 +618,8 @@ typedef struct pm_call_or_write_node {
|
|
618
618
|
struct pm_node *receiver;
|
619
619
|
pm_location_t call_operator_loc;
|
620
620
|
pm_location_t message_loc;
|
621
|
-
|
622
|
-
|
623
|
-
pm_location_t closing_loc;
|
624
|
-
pm_string_t read_name;
|
625
|
-
pm_string_t write_name;
|
621
|
+
pm_constant_id_t read_name;
|
622
|
+
pm_constant_id_t write_name;
|
626
623
|
pm_location_t operator_loc;
|
627
624
|
struct pm_node *value;
|
628
625
|
} pm_call_or_write_node_t;
|
@@ -1113,6 +1110,61 @@ typedef struct pm_in_node {
|
|
1113
1110
|
pm_location_t then_loc;
|
1114
1111
|
} pm_in_node_t;
|
1115
1112
|
|
1113
|
+
// IndexAndWriteNode
|
1114
|
+
//
|
1115
|
+
// Type: PM_INDEX_AND_WRITE_NODE
|
1116
|
+
// Flags:
|
1117
|
+
// PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
|
1118
|
+
// PM_CALL_NODE_FLAGS_VARIABLE_CALL
|
1119
|
+
typedef struct pm_index_and_write_node {
|
1120
|
+
pm_node_t base;
|
1121
|
+
struct pm_node *receiver;
|
1122
|
+
pm_location_t call_operator_loc;
|
1123
|
+
pm_location_t opening_loc;
|
1124
|
+
struct pm_arguments_node *arguments;
|
1125
|
+
pm_location_t closing_loc;
|
1126
|
+
struct pm_node *block;
|
1127
|
+
pm_location_t operator_loc;
|
1128
|
+
struct pm_node *value;
|
1129
|
+
} pm_index_and_write_node_t;
|
1130
|
+
|
1131
|
+
// IndexOperatorWriteNode
|
1132
|
+
//
|
1133
|
+
// Type: PM_INDEX_OPERATOR_WRITE_NODE
|
1134
|
+
// Flags:
|
1135
|
+
// PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
|
1136
|
+
// PM_CALL_NODE_FLAGS_VARIABLE_CALL
|
1137
|
+
typedef struct pm_index_operator_write_node {
|
1138
|
+
pm_node_t base;
|
1139
|
+
struct pm_node *receiver;
|
1140
|
+
pm_location_t call_operator_loc;
|
1141
|
+
pm_location_t opening_loc;
|
1142
|
+
struct pm_arguments_node *arguments;
|
1143
|
+
pm_location_t closing_loc;
|
1144
|
+
struct pm_node *block;
|
1145
|
+
pm_constant_id_t operator;
|
1146
|
+
pm_location_t operator_loc;
|
1147
|
+
struct pm_node *value;
|
1148
|
+
} pm_index_operator_write_node_t;
|
1149
|
+
|
1150
|
+
// IndexOrWriteNode
|
1151
|
+
//
|
1152
|
+
// Type: PM_INDEX_OR_WRITE_NODE
|
1153
|
+
// Flags:
|
1154
|
+
// PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
|
1155
|
+
// PM_CALL_NODE_FLAGS_VARIABLE_CALL
|
1156
|
+
typedef struct pm_index_or_write_node {
|
1157
|
+
pm_node_t base;
|
1158
|
+
struct pm_node *receiver;
|
1159
|
+
pm_location_t call_operator_loc;
|
1160
|
+
pm_location_t opening_loc;
|
1161
|
+
struct pm_arguments_node *arguments;
|
1162
|
+
pm_location_t closing_loc;
|
1163
|
+
struct pm_node *block;
|
1164
|
+
pm_location_t operator_loc;
|
1165
|
+
struct pm_node *value;
|
1166
|
+
} pm_index_or_write_node_t;
|
1167
|
+
|
1116
1168
|
// InstanceVariableAndWriteNode
|
1117
1169
|
//
|
1118
1170
|
// Type: PM_INSTANCE_VARIABLE_AND_WRITE_NODE
|
@@ -1193,11 +1245,11 @@ typedef struct pm_integer_node {
|
|
1193
1245
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1194
1246
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1195
1247
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1248
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1196
1249
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1197
1250
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1198
1251
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1199
1252
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1200
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1201
1253
|
typedef struct pm_interpolated_match_last_line_node {
|
1202
1254
|
pm_node_t base;
|
1203
1255
|
pm_location_t opening_loc;
|
@@ -1212,11 +1264,11 @@ typedef struct pm_interpolated_match_last_line_node {
|
|
1212
1264
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1213
1265
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1214
1266
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1267
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1215
1268
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1216
1269
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1217
1270
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1218
1271
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1219
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1220
1272
|
typedef struct pm_interpolated_regular_expression_node {
|
1221
1273
|
pm_node_t base;
|
1222
1274
|
pm_location_t opening_loc;
|
@@ -1369,11 +1421,11 @@ typedef struct pm_local_variable_write_node {
|
|
1369
1421
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1370
1422
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1371
1423
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1424
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1372
1425
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1373
1426
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1374
1427
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1375
1428
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1376
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1377
1429
|
typedef struct pm_match_last_line_node {
|
1378
1430
|
pm_node_t base;
|
1379
1431
|
pm_location_t opening_loc;
|
@@ -1616,11 +1668,11 @@ typedef struct pm_redo_node {
|
|
1616
1668
|
// PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
|
1617
1669
|
// PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
|
1618
1670
|
// PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
|
1671
|
+
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1619
1672
|
// PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
|
1620
1673
|
// PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
|
1621
1674
|
// PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
|
1622
1675
|
// PM_REGULAR_EXPRESSION_FLAGS_UTF_8
|
1623
|
-
// PM_REGULAR_EXPRESSION_FLAGS_ONCE
|
1624
1676
|
typedef struct pm_regular_expression_node {
|
1625
1677
|
pm_node_t base;
|
1626
1678
|
pm_location_t opening_loc;
|
@@ -1887,44 +1939,44 @@ typedef struct pm_yield_node {
|
|
1887
1939
|
} pm_yield_node_t;
|
1888
1940
|
|
1889
1941
|
// CallNodeFlags
|
1890
|
-
typedef enum {
|
1891
|
-
PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 <<
|
1892
|
-
PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 <<
|
1942
|
+
typedef enum pm_call_node_flags {
|
1943
|
+
PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 0,
|
1944
|
+
PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 1,
|
1893
1945
|
} pm_call_node_flags_t;
|
1894
1946
|
|
1895
1947
|
// IntegerBaseFlags
|
1896
|
-
typedef enum {
|
1897
|
-
PM_INTEGER_BASE_FLAGS_BINARY = 1 <<
|
1898
|
-
PM_INTEGER_BASE_FLAGS_OCTAL = 1 <<
|
1899
|
-
PM_INTEGER_BASE_FLAGS_DECIMAL = 1 <<
|
1900
|
-
PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 <<
|
1948
|
+
typedef enum pm_integer_base_flags {
|
1949
|
+
PM_INTEGER_BASE_FLAGS_BINARY = 1 << 0,
|
1950
|
+
PM_INTEGER_BASE_FLAGS_OCTAL = 1 << 1,
|
1951
|
+
PM_INTEGER_BASE_FLAGS_DECIMAL = 1 << 2,
|
1952
|
+
PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 << 3,
|
1901
1953
|
} pm_integer_base_flags_t;
|
1902
1954
|
|
1903
1955
|
// LoopFlags
|
1904
|
-
typedef enum {
|
1905
|
-
PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 <<
|
1956
|
+
typedef enum pm_loop_flags {
|
1957
|
+
PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 0,
|
1906
1958
|
} pm_loop_flags_t;
|
1907
1959
|
|
1908
1960
|
// RangeFlags
|
1909
|
-
typedef enum {
|
1910
|
-
PM_RANGE_FLAGS_EXCLUDE_END = 1 <<
|
1961
|
+
typedef enum pm_range_flags {
|
1962
|
+
PM_RANGE_FLAGS_EXCLUDE_END = 1 << 0,
|
1911
1963
|
} pm_range_flags_t;
|
1912
1964
|
|
1913
1965
|
// RegularExpressionFlags
|
1914
|
-
typedef enum {
|
1915
|
-
PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 <<
|
1916
|
-
PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 <<
|
1917
|
-
PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 <<
|
1918
|
-
|
1919
|
-
|
1920
|
-
|
1921
|
-
|
1922
|
-
|
1966
|
+
typedef enum pm_regular_expression_flags {
|
1967
|
+
PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 0,
|
1968
|
+
PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 1,
|
1969
|
+
PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 2,
|
1970
|
+
PM_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 3,
|
1971
|
+
PM_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 4,
|
1972
|
+
PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 5,
|
1973
|
+
PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 6,
|
1974
|
+
PM_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 7,
|
1923
1975
|
} pm_regular_expression_flags_t;
|
1924
1976
|
|
1925
1977
|
// StringFlags
|
1926
|
-
typedef enum {
|
1927
|
-
PM_STRING_FLAGS_FROZEN = 1 <<
|
1978
|
+
typedef enum pm_string_flags {
|
1979
|
+
PM_STRING_FLAGS_FROZEN = 1 << 0,
|
1928
1980
|
} pm_string_flags_t;
|
1929
1981
|
|
1930
1982
|
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS false
|
data/include/prism/diagnostic.h
CHANGED
@@ -158,6 +158,7 @@ typedef enum {
|
|
158
158
|
PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
|
159
159
|
PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
|
160
160
|
PM_ERR_OPERATOR_MULTI_ASSIGN,
|
161
|
+
PM_ERR_OPERATOR_WRITE_ARGUMENTS,
|
161
162
|
PM_ERR_OPERATOR_WRITE_BLOCK,
|
162
163
|
PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
|
163
164
|
PM_ERR_PARAMETER_BLOCK_MULTI,
|
data/include/prism/node.h
CHANGED
@@ -33,9 +33,17 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
|
|
33
33
|
// declare them here to avoid generating them.
|
34
34
|
typedef struct pm_scope_node {
|
35
35
|
pm_node_t base;
|
36
|
+
struct pm_scope_node *previous;
|
37
|
+
pm_node_t *ast_node;
|
36
38
|
struct pm_parameters_node *parameters;
|
37
39
|
pm_node_t *body;
|
38
40
|
pm_constant_id_list_t locals;
|
41
|
+
pm_parser_t *parser;
|
42
|
+
|
43
|
+
// We don't have the CRuby types ID and st_table within Prism
|
44
|
+
// so we use void *
|
45
|
+
void *constants; // ID *constants
|
46
|
+
void *index_lookup_table; // st_table *index_lookup_table
|
39
47
|
} pm_scope_node_t;
|
40
48
|
|
41
49
|
#endif // PRISM_NODE_H
|
data/include/prism/parser.h
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#include "prism/util/pm_list.h"
|
9
9
|
#include "prism/util/pm_newline_list.h"
|
10
10
|
#include "prism/util/pm_state_stack.h"
|
11
|
+
#include "prism/util/pm_string.h"
|
11
12
|
|
12
13
|
#include <stdbool.h>
|
13
14
|
|
@@ -172,6 +173,11 @@ typedef struct pm_lex_mode {
|
|
172
173
|
// This is the pointer to the character where lexing should resume
|
173
174
|
// once the heredoc has been completely processed.
|
174
175
|
const uint8_t *next_start;
|
176
|
+
|
177
|
+
// This is used to track the amount of common whitespace on each
|
178
|
+
// line so that we know how much to dedent each line in the case of
|
179
|
+
// a tilde heredoc.
|
180
|
+
size_t common_whitespace;
|
175
181
|
} heredoc;
|
176
182
|
} as;
|
177
183
|
|
@@ -244,6 +250,16 @@ typedef struct pm_comment {
|
|
244
250
|
pm_comment_type_t type;
|
245
251
|
} pm_comment_t;
|
246
252
|
|
253
|
+
// This is a node in the linked list of magic comments that we've found while
|
254
|
+
// parsing.
|
255
|
+
typedef struct {
|
256
|
+
pm_list_node_t node;
|
257
|
+
const uint8_t *key_start;
|
258
|
+
const uint8_t *value_start;
|
259
|
+
uint32_t key_length;
|
260
|
+
uint32_t value_length;
|
261
|
+
} pm_magic_comment_t;
|
262
|
+
|
247
263
|
// When the encoding that is being used to parse the source is changed by prism,
|
248
264
|
// we provide the ability here to call out to a user-defined function.
|
249
265
|
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
|
@@ -293,6 +309,11 @@ typedef struct pm_scope {
|
|
293
309
|
// This is necessary to determine if child blocks are allowed to use
|
294
310
|
// numbered parameters.
|
295
311
|
bool numbered_params;
|
312
|
+
|
313
|
+
// A transparent scope is a scope that cannot have locals set on itself.
|
314
|
+
// When a local is set on this scope, it will instead be set on the parent
|
315
|
+
// scope's local table.
|
316
|
+
bool transparent;
|
296
317
|
} pm_scope_t;
|
297
318
|
|
298
319
|
// This struct represents the overall parser. It contains a reference to the
|
@@ -342,6 +363,7 @@ struct pm_parser {
|
|
342
363
|
const uint8_t *heredoc_end;
|
343
364
|
|
344
365
|
pm_list_t comment_list; // the list of comments that have been found while parsing
|
366
|
+
pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
|
345
367
|
pm_list_t warning_list; // the list of warnings that have been found while parsing
|
346
368
|
pm_list_t error_list; // the list of errors that have been found while parsing
|
347
369
|
pm_scope_t *current_scope; // the current local scope
|
@@ -388,6 +410,10 @@ struct pm_parser {
|
|
388
410
|
// when we find tokens that we need it for.
|
389
411
|
pm_node_flags_t integer_base;
|
390
412
|
|
413
|
+
// This string is used to pass information from the lexer to the parser. It
|
414
|
+
// is particularly necessary because of escape sequences.
|
415
|
+
pm_string_t current_string;
|
416
|
+
|
391
417
|
// Whether or not we're at the beginning of a command
|
392
418
|
bool command_start;
|
393
419
|
|
@@ -21,6 +21,9 @@ typedef struct {
|
|
21
21
|
// Return the size of the pm_buffer_t struct.
|
22
22
|
PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
|
23
23
|
|
24
|
+
// Initialize a pm_buffer_t with the given capacity.
|
25
|
+
bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
|
26
|
+
|
24
27
|
// Initialize a pm_buffer_t with its default values.
|
25
28
|
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
|
26
29
|
|