prism 0.19.0 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +102 -1
- data/Makefile +5 -0
- data/README.md +9 -6
- data/config.yml +236 -38
- data/docs/build_system.md +19 -2
- data/docs/cruby_compilation.md +27 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +84 -16
- data/docs/ruby_api.md +1 -1
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +19 -5
- data/ext/prism/api_node.c +1989 -1525
- data/ext/prism/extension.c +130 -30
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +1700 -505
- data/include/prism/defines.h +8 -0
- data/include/prism/diagnostic.h +49 -7
- data/include/prism/encoding.h +17 -0
- data/include/prism/options.h +40 -14
- data/include/prism/parser.h +34 -18
- data/include/prism/util/pm_buffer.h +9 -0
- data/include/prism/util/pm_constant_pool.h +18 -0
- data/include/prism/util/pm_newline_list.h +4 -14
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +19 -2
- data/lib/prism/debug.rb +11 -5
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dot_visitor.rb +36 -14
- data/lib/prism/dsl.rb +302 -299
- data/lib/prism/ffi.rb +107 -76
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +4580 -2607
- data/lib/prism/node_ext.rb +27 -4
- data/lib/prism/parse_result.rb +75 -29
- data/lib/prism/serialize.rb +633 -305
- data/lib/prism/translation/parser/compiler.rb +1838 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +45 -0
- data/lib/prism/translation/parser.rb +190 -0
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper.rb +696 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism.rb +1 -1
- data/prism.gemspec +18 -7
- data/rbi/prism.rbi +150 -88
- data/rbi/prism_static.rbi +15 -3
- data/sig/prism.rbs +996 -961
- data/sig/prism_static.rbs +123 -46
- data/src/diagnostic.c +264 -219
- data/src/encoding.c +21 -26
- data/src/node.c +2 -6
- data/src/options.c +29 -5
- data/src/prettyprint.c +176 -44
- data/src/prism.c +1499 -564
- data/src/serialize.c +35 -21
- data/src/token_type.c +353 -4
- data/src/util/pm_buffer.c +11 -0
- data/src/util/pm_constant_pool.c +37 -11
- data/src/util/pm_newline_list.c +6 -15
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +16 -5
- data/docs/building.md +0 -29
- data/lib/prism/ripper_compat.rb +0 -207
data/src/serialize.c
CHANGED
@@ -158,11 +158,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
158
158
|
}
|
159
159
|
case PM_ASSOC_NODE: {
|
160
160
|
pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->key, buffer);
|
161
|
-
|
162
|
-
pm_buffer_append_byte(buffer, 0);
|
163
|
-
} else {
|
164
|
-
pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
|
165
|
-
}
|
161
|
+
pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
|
166
162
|
if (((pm_assoc_node_t *)node)->operator_loc.start == NULL) {
|
167
163
|
pm_buffer_append_byte(buffer, 0);
|
168
164
|
} else {
|
@@ -229,6 +225,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
229
225
|
break;
|
230
226
|
}
|
231
227
|
case PM_BLOCK_LOCAL_VARIABLE_NODE: {
|
228
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
232
229
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_local_variable_node_t *)node)->name));
|
233
230
|
break;
|
234
231
|
}
|
@@ -238,7 +235,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
238
235
|
for (uint32_t index = 0; index < locals_size; index++) {
|
239
236
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_node_t *)node)->locals.ids[index]));
|
240
237
|
}
|
241
|
-
pm_buffer_append_varuint(buffer, ((pm_block_node_t *)node)->locals_body_index);
|
242
238
|
if (((pm_block_node_t *)node)->parameters == NULL) {
|
243
239
|
pm_buffer_append_byte(buffer, 0);
|
244
240
|
} else {
|
@@ -254,6 +250,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
254
250
|
break;
|
255
251
|
}
|
256
252
|
case PM_BLOCK_PARAMETER_NODE: {
|
253
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
257
254
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_parameter_node_t *)node)->name));
|
258
255
|
if (((pm_block_parameter_node_t *)node)->name_loc.start == NULL) {
|
259
256
|
pm_buffer_append_byte(buffer, 0);
|
@@ -651,7 +648,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
651
648
|
for (uint32_t index = 0; index < locals_size; index++) {
|
652
649
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_def_node_t *)node)->locals.ids[index]));
|
653
650
|
}
|
654
|
-
pm_buffer_append_varuint(buffer, ((pm_def_node_t *)node)->locals_body_index);
|
655
651
|
pm_serialize_location(parser, &((pm_def_node_t *)node)->def_keyword_loc, buffer);
|
656
652
|
if (((pm_def_node_t *)node)->operator_loc.start == NULL) {
|
657
653
|
pm_buffer_append_byte(buffer, 0);
|
@@ -1190,6 +1186,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1190
1186
|
break;
|
1191
1187
|
}
|
1192
1188
|
case PM_KEYWORD_REST_PARAMETER_NODE: {
|
1189
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1193
1190
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
|
1194
1191
|
if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
|
1195
1192
|
pm_buffer_append_byte(buffer, 0);
|
@@ -1206,7 +1203,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1206
1203
|
for (uint32_t index = 0; index < locals_size; index++) {
|
1207
1204
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_lambda_node_t *)node)->locals.ids[index]));
|
1208
1205
|
}
|
1209
|
-
pm_buffer_append_varuint(buffer, ((pm_lambda_node_t *)node)->locals_body_index);
|
1210
1206
|
pm_serialize_location(parser, &((pm_lambda_node_t *)node)->operator_loc, buffer);
|
1211
1207
|
pm_serialize_location(parser, &((pm_lambda_node_t *)node)->opening_loc, buffer);
|
1212
1208
|
pm_serialize_location(parser, &((pm_lambda_node_t *)node)->closing_loc, buffer);
|
@@ -1402,12 +1398,14 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1402
1398
|
break;
|
1403
1399
|
}
|
1404
1400
|
case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
|
1401
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1405
1402
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
|
1406
1403
|
pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1407
1404
|
pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
|
1408
1405
|
break;
|
1409
1406
|
}
|
1410
1407
|
case PM_OPTIONAL_PARAMETER_NODE: {
|
1408
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1411
1409
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
|
1412
1410
|
pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
|
1413
1411
|
pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->operator_loc, buffer);
|
@@ -1542,11 +1540,13 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1542
1540
|
break;
|
1543
1541
|
}
|
1544
1542
|
case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
|
1543
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1545
1544
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
|
1546
1545
|
pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1547
1546
|
break;
|
1548
1547
|
}
|
1549
1548
|
case PM_REQUIRED_PARAMETER_NODE: {
|
1549
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1550
1550
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
|
1551
1551
|
break;
|
1552
1552
|
}
|
@@ -1587,6 +1587,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1587
1587
|
break;
|
1588
1588
|
}
|
1589
1589
|
case PM_REST_PARAMETER_NODE: {
|
1590
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1590
1591
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_rest_parameter_node_t *)node)->name));
|
1591
1592
|
if (((pm_rest_parameter_node_t *)node)->name_loc.start == NULL) {
|
1592
1593
|
pm_buffer_append_byte(buffer, 0);
|
@@ -1842,6 +1843,17 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1842
1843
|
}
|
1843
1844
|
}
|
1844
1845
|
|
1846
|
+
static void
|
1847
|
+
pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
|
1848
|
+
uint32_t size = pm_sizet_to_u32(list->size);
|
1849
|
+
pm_buffer_append_varuint(buffer, size);
|
1850
|
+
|
1851
|
+
for (uint32_t i = 0; i < size; i++) {
|
1852
|
+
uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
|
1853
|
+
pm_buffer_append_varuint(buffer, offset);
|
1854
|
+
}
|
1855
|
+
}
|
1856
|
+
|
1845
1857
|
static void
|
1846
1858
|
pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
|
1847
1859
|
// serialize type
|
@@ -1904,6 +1916,8 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
|
|
1904
1916
|
|
1905
1917
|
// serialize location
|
1906
1918
|
pm_serialize_location(parser, &diagnostic->location, buffer);
|
1919
|
+
|
1920
|
+
pm_buffer_append_byte(buffer, diagnostic->level);
|
1907
1921
|
}
|
1908
1922
|
|
1909
1923
|
static void
|
@@ -1926,19 +1940,25 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
|
1926
1940
|
pm_buffer_append_string(buffer, encoding->name, encoding_length);
|
1927
1941
|
}
|
1928
1942
|
|
1929
|
-
|
1930
|
-
|
1931
|
-
* Serialize the encoding, metadata, nodes, and constant pool.
|
1932
|
-
*/
|
1933
|
-
void
|
1934
|
-
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1943
|
+
static void
|
1944
|
+
pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
|
1935
1945
|
pm_serialize_encoding(parser->encoding, buffer);
|
1936
1946
|
pm_buffer_append_varsint(buffer, parser->start_line);
|
1947
|
+
pm_serialize_newline_list(&parser->newline_list, buffer);
|
1937
1948
|
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
1938
1949
|
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
|
1939
1950
|
pm_serialize_data_loc(parser, buffer);
|
1940
1951
|
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
1941
1952
|
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
|
1953
|
+
}
|
1954
|
+
|
1955
|
+
#line 243 "serialize.c.erb"
|
1956
|
+
/**
|
1957
|
+
* Serialize the metadata, nodes, and constant pool.
|
1958
|
+
*/
|
1959
|
+
void
|
1960
|
+
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1961
|
+
pm_serialize_metadata(parser, buffer);
|
1942
1962
|
|
1943
1963
|
// Here we're going to leave space for the offset of the constant pool in
|
1944
1964
|
// the buffer.
|
@@ -2029,13 +2049,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
|
|
2029
2049
|
// Append 0 to mark end of tokens.
|
2030
2050
|
pm_buffer_append_byte(buffer, 0);
|
2031
2051
|
|
2032
|
-
|
2033
|
-
pm_buffer_append_varsint(buffer, parser.start_line);
|
2034
|
-
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
2035
|
-
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
2036
|
-
pm_serialize_data_loc(&parser, buffer);
|
2037
|
-
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
2038
|
-
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
|
2052
|
+
pm_serialize_metadata(&parser, buffer);
|
2039
2053
|
|
2040
2054
|
pm_node_destroy(&parser, node);
|
2041
2055
|
pm_parser_free(&parser);
|
data/src/token_type.c
CHANGED
@@ -13,8 +13,7 @@
|
|
13
13
|
* Returns a string representation of the given token type.
|
14
14
|
*/
|
15
15
|
PRISM_EXPORTED_FUNCTION const char *
|
16
|
-
|
17
|
-
{
|
16
|
+
pm_token_type_name(pm_token_type_t token_type) {
|
18
17
|
switch (token_type) {
|
19
18
|
case PM_TOKEN_EOF:
|
20
19
|
return "EOF";
|
@@ -345,7 +344,357 @@ pm_token_type_to_str(pm_token_type_t token_type)
|
|
345
344
|
case PM_TOKEN___END__:
|
346
345
|
return "__END__";
|
347
346
|
case PM_TOKEN_MAXIMUM:
|
348
|
-
|
347
|
+
assert(false && "unreachable");
|
348
|
+
return "";
|
349
349
|
}
|
350
|
-
|
350
|
+
|
351
|
+
// Provide a default, because some compilers can't determine that the above
|
352
|
+
// switch is exhaustive.
|
353
|
+
assert(false && "unreachable");
|
354
|
+
return "";
|
355
|
+
}
|
356
|
+
|
357
|
+
/**
|
358
|
+
* Returns the human name of the given token type.
|
359
|
+
*/
|
360
|
+
const char *
|
361
|
+
pm_token_type_human(pm_token_type_t token_type) {
|
362
|
+
switch (token_type) {
|
363
|
+
case PM_TOKEN_EOF:
|
364
|
+
return "end of file";
|
365
|
+
case PM_TOKEN_MISSING:
|
366
|
+
return "missing token";
|
367
|
+
case PM_TOKEN_NOT_PROVIDED:
|
368
|
+
return "not provided token";
|
369
|
+
case PM_TOKEN_AMPERSAND:
|
370
|
+
return "'&'";
|
371
|
+
case PM_TOKEN_AMPERSAND_AMPERSAND:
|
372
|
+
return "'&&'";
|
373
|
+
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
374
|
+
return "'&&='";
|
375
|
+
case PM_TOKEN_AMPERSAND_DOT:
|
376
|
+
return "'&.'";
|
377
|
+
case PM_TOKEN_AMPERSAND_EQUAL:
|
378
|
+
return "'&='";
|
379
|
+
case PM_TOKEN_BACKTICK:
|
380
|
+
return "'`'";
|
381
|
+
case PM_TOKEN_BACK_REFERENCE:
|
382
|
+
return "back reference";
|
383
|
+
case PM_TOKEN_BANG:
|
384
|
+
return "'!'";
|
385
|
+
case PM_TOKEN_BANG_EQUAL:
|
386
|
+
return "'!='";
|
387
|
+
case PM_TOKEN_BANG_TILDE:
|
388
|
+
return "'!~'";
|
389
|
+
case PM_TOKEN_BRACE_LEFT:
|
390
|
+
return "'{'";
|
391
|
+
case PM_TOKEN_BRACE_RIGHT:
|
392
|
+
return "'}'";
|
393
|
+
case PM_TOKEN_BRACKET_LEFT:
|
394
|
+
return "'['";
|
395
|
+
case PM_TOKEN_BRACKET_LEFT_ARRAY:
|
396
|
+
return "'['";
|
397
|
+
case PM_TOKEN_BRACKET_LEFT_RIGHT:
|
398
|
+
return "'[]'";
|
399
|
+
case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
|
400
|
+
return "'[]='";
|
401
|
+
case PM_TOKEN_BRACKET_RIGHT:
|
402
|
+
return "']'";
|
403
|
+
case PM_TOKEN_CARET:
|
404
|
+
return "'^'";
|
405
|
+
case PM_TOKEN_CARET_EQUAL:
|
406
|
+
return "'^='";
|
407
|
+
case PM_TOKEN_CHARACTER_LITERAL:
|
408
|
+
return "character literal";
|
409
|
+
case PM_TOKEN_CLASS_VARIABLE:
|
410
|
+
return "class variable";
|
411
|
+
case PM_TOKEN_COLON:
|
412
|
+
return "':'";
|
413
|
+
case PM_TOKEN_COLON_COLON:
|
414
|
+
return "'::'";
|
415
|
+
case PM_TOKEN_COMMA:
|
416
|
+
return "','";
|
417
|
+
case PM_TOKEN_COMMENT:
|
418
|
+
return "comment";
|
419
|
+
case PM_TOKEN_CONSTANT:
|
420
|
+
return "constant";
|
421
|
+
case PM_TOKEN_DOT:
|
422
|
+
return "'.'";
|
423
|
+
case PM_TOKEN_DOT_DOT:
|
424
|
+
return "'..'";
|
425
|
+
case PM_TOKEN_DOT_DOT_DOT:
|
426
|
+
return "'...'";
|
427
|
+
case PM_TOKEN_EMBDOC_BEGIN:
|
428
|
+
return "'=begin'";
|
429
|
+
case PM_TOKEN_EMBDOC_END:
|
430
|
+
return "'=end'";
|
431
|
+
case PM_TOKEN_EMBDOC_LINE:
|
432
|
+
return "embedded documentation line";
|
433
|
+
case PM_TOKEN_EMBEXPR_BEGIN:
|
434
|
+
return "'#{'";
|
435
|
+
case PM_TOKEN_EMBEXPR_END:
|
436
|
+
return "'}'";
|
437
|
+
case PM_TOKEN_EMBVAR:
|
438
|
+
return "'#'";
|
439
|
+
case PM_TOKEN_EQUAL:
|
440
|
+
return "'='";
|
441
|
+
case PM_TOKEN_EQUAL_EQUAL:
|
442
|
+
return "'=='";
|
443
|
+
case PM_TOKEN_EQUAL_EQUAL_EQUAL:
|
444
|
+
return "'==='";
|
445
|
+
case PM_TOKEN_EQUAL_GREATER:
|
446
|
+
return "'=>'";
|
447
|
+
case PM_TOKEN_EQUAL_TILDE:
|
448
|
+
return "'=~'";
|
449
|
+
case PM_TOKEN_FLOAT:
|
450
|
+
return "float";
|
451
|
+
case PM_TOKEN_FLOAT_IMAGINARY:
|
452
|
+
return "imaginary";
|
453
|
+
case PM_TOKEN_FLOAT_RATIONAL:
|
454
|
+
return "rational";
|
455
|
+
case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
|
456
|
+
return "imaginary";
|
457
|
+
case PM_TOKEN_GLOBAL_VARIABLE:
|
458
|
+
return "global variable";
|
459
|
+
case PM_TOKEN_GREATER:
|
460
|
+
return "'>'";
|
461
|
+
case PM_TOKEN_GREATER_EQUAL:
|
462
|
+
return "'>='";
|
463
|
+
case PM_TOKEN_GREATER_GREATER:
|
464
|
+
return "'>>'";
|
465
|
+
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
466
|
+
return "'>>='";
|
467
|
+
case PM_TOKEN_HEREDOC_END:
|
468
|
+
return "heredoc ending";
|
469
|
+
case PM_TOKEN_HEREDOC_START:
|
470
|
+
return "heredoc beginning";
|
471
|
+
case PM_TOKEN_IDENTIFIER:
|
472
|
+
return "local variable or method";
|
473
|
+
case PM_TOKEN_IGNORED_NEWLINE:
|
474
|
+
return "ignored newline";
|
475
|
+
case PM_TOKEN_INSTANCE_VARIABLE:
|
476
|
+
return "instance variable";
|
477
|
+
case PM_TOKEN_INTEGER:
|
478
|
+
return "integer";
|
479
|
+
case PM_TOKEN_INTEGER_IMAGINARY:
|
480
|
+
return "imaginary";
|
481
|
+
case PM_TOKEN_INTEGER_RATIONAL:
|
482
|
+
return "rational";
|
483
|
+
case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
|
484
|
+
return "imaginary";
|
485
|
+
case PM_TOKEN_KEYWORD_ALIAS:
|
486
|
+
return "'alias'";
|
487
|
+
case PM_TOKEN_KEYWORD_AND:
|
488
|
+
return "'and'";
|
489
|
+
case PM_TOKEN_KEYWORD_BEGIN:
|
490
|
+
return "'begin'";
|
491
|
+
case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
|
492
|
+
return "'BEGIN'";
|
493
|
+
case PM_TOKEN_KEYWORD_BREAK:
|
494
|
+
return "'break'";
|
495
|
+
case PM_TOKEN_KEYWORD_CASE:
|
496
|
+
return "'case'";
|
497
|
+
case PM_TOKEN_KEYWORD_CLASS:
|
498
|
+
return "'class'";
|
499
|
+
case PM_TOKEN_KEYWORD_DEF:
|
500
|
+
return "'def'";
|
501
|
+
case PM_TOKEN_KEYWORD_DEFINED:
|
502
|
+
return "'defined?'";
|
503
|
+
case PM_TOKEN_KEYWORD_DO:
|
504
|
+
return "'do'";
|
505
|
+
case PM_TOKEN_KEYWORD_DO_LOOP:
|
506
|
+
return "'do'";
|
507
|
+
case PM_TOKEN_KEYWORD_ELSE:
|
508
|
+
return "'else'";
|
509
|
+
case PM_TOKEN_KEYWORD_ELSIF:
|
510
|
+
return "'elsif'";
|
511
|
+
case PM_TOKEN_KEYWORD_END:
|
512
|
+
return "'end'";
|
513
|
+
case PM_TOKEN_KEYWORD_END_UPCASE:
|
514
|
+
return "'END'";
|
515
|
+
case PM_TOKEN_KEYWORD_ENSURE:
|
516
|
+
return "'ensure'";
|
517
|
+
case PM_TOKEN_KEYWORD_FALSE:
|
518
|
+
return "'false'";
|
519
|
+
case PM_TOKEN_KEYWORD_FOR:
|
520
|
+
return "'for'";
|
521
|
+
case PM_TOKEN_KEYWORD_IF:
|
522
|
+
return "'if'";
|
523
|
+
case PM_TOKEN_KEYWORD_IF_MODIFIER:
|
524
|
+
return "'if'";
|
525
|
+
case PM_TOKEN_KEYWORD_IN:
|
526
|
+
return "'in'";
|
527
|
+
case PM_TOKEN_KEYWORD_MODULE:
|
528
|
+
return "'module'";
|
529
|
+
case PM_TOKEN_KEYWORD_NEXT:
|
530
|
+
return "'next'";
|
531
|
+
case PM_TOKEN_KEYWORD_NIL:
|
532
|
+
return "'nil'";
|
533
|
+
case PM_TOKEN_KEYWORD_NOT:
|
534
|
+
return "'not'";
|
535
|
+
case PM_TOKEN_KEYWORD_OR:
|
536
|
+
return "'or'";
|
537
|
+
case PM_TOKEN_KEYWORD_REDO:
|
538
|
+
return "'redo'";
|
539
|
+
case PM_TOKEN_KEYWORD_RESCUE:
|
540
|
+
return "'rescue'";
|
541
|
+
case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
|
542
|
+
return "'rescue'";
|
543
|
+
case PM_TOKEN_KEYWORD_RETRY:
|
544
|
+
return "'retry'";
|
545
|
+
case PM_TOKEN_KEYWORD_RETURN:
|
546
|
+
return "'return'";
|
547
|
+
case PM_TOKEN_KEYWORD_SELF:
|
548
|
+
return "'self'";
|
549
|
+
case PM_TOKEN_KEYWORD_SUPER:
|
550
|
+
return "'super'";
|
551
|
+
case PM_TOKEN_KEYWORD_THEN:
|
552
|
+
return "'then'";
|
553
|
+
case PM_TOKEN_KEYWORD_TRUE:
|
554
|
+
return "'true'";
|
555
|
+
case PM_TOKEN_KEYWORD_UNDEF:
|
556
|
+
return "'undef'";
|
557
|
+
case PM_TOKEN_KEYWORD_UNLESS:
|
558
|
+
return "'unless'";
|
559
|
+
case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
|
560
|
+
return "'unless'";
|
561
|
+
case PM_TOKEN_KEYWORD_UNTIL:
|
562
|
+
return "'until'";
|
563
|
+
case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
|
564
|
+
return "'until'";
|
565
|
+
case PM_TOKEN_KEYWORD_WHEN:
|
566
|
+
return "'when'";
|
567
|
+
case PM_TOKEN_KEYWORD_WHILE:
|
568
|
+
return "'while'";
|
569
|
+
case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
|
570
|
+
return "'while'";
|
571
|
+
case PM_TOKEN_KEYWORD_YIELD:
|
572
|
+
return "'yield'";
|
573
|
+
case PM_TOKEN_KEYWORD___ENCODING__:
|
574
|
+
return "'__ENCODING__'";
|
575
|
+
case PM_TOKEN_KEYWORD___FILE__:
|
576
|
+
return "'__FILE__'";
|
577
|
+
case PM_TOKEN_KEYWORD___LINE__:
|
578
|
+
return "'__LINE__'";
|
579
|
+
case PM_TOKEN_LABEL:
|
580
|
+
return "label";
|
581
|
+
case PM_TOKEN_LABEL_END:
|
582
|
+
return "label terminator";
|
583
|
+
case PM_TOKEN_LAMBDA_BEGIN:
|
584
|
+
return "'{'";
|
585
|
+
case PM_TOKEN_LESS:
|
586
|
+
return "'<'";
|
587
|
+
case PM_TOKEN_LESS_EQUAL:
|
588
|
+
return "'<='";
|
589
|
+
case PM_TOKEN_LESS_EQUAL_GREATER:
|
590
|
+
return "'<=>'";
|
591
|
+
case PM_TOKEN_LESS_LESS:
|
592
|
+
return "'<<'";
|
593
|
+
case PM_TOKEN_LESS_LESS_EQUAL:
|
594
|
+
return "'<<='";
|
595
|
+
case PM_TOKEN_METHOD_NAME:
|
596
|
+
return "method name";
|
597
|
+
case PM_TOKEN_MINUS:
|
598
|
+
return "'-'";
|
599
|
+
case PM_TOKEN_MINUS_EQUAL:
|
600
|
+
return "'-='";
|
601
|
+
case PM_TOKEN_MINUS_GREATER:
|
602
|
+
return "'->'";
|
603
|
+
case PM_TOKEN_NEWLINE:
|
604
|
+
return "newline";
|
605
|
+
case PM_TOKEN_NUMBERED_REFERENCE:
|
606
|
+
return "numbered reference";
|
607
|
+
case PM_TOKEN_PARENTHESIS_LEFT:
|
608
|
+
return "'('";
|
609
|
+
case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
|
610
|
+
return "'('";
|
611
|
+
case PM_TOKEN_PARENTHESIS_RIGHT:
|
612
|
+
return "')'";
|
613
|
+
case PM_TOKEN_PERCENT:
|
614
|
+
return "'%'";
|
615
|
+
case PM_TOKEN_PERCENT_EQUAL:
|
616
|
+
return "'%='";
|
617
|
+
case PM_TOKEN_PERCENT_LOWER_I:
|
618
|
+
return "'%i'";
|
619
|
+
case PM_TOKEN_PERCENT_LOWER_W:
|
620
|
+
return "'%w'";
|
621
|
+
case PM_TOKEN_PERCENT_LOWER_X:
|
622
|
+
return "'%x'";
|
623
|
+
case PM_TOKEN_PERCENT_UPPER_I:
|
624
|
+
return "'%I'";
|
625
|
+
case PM_TOKEN_PERCENT_UPPER_W:
|
626
|
+
return "'%W'";
|
627
|
+
case PM_TOKEN_PIPE:
|
628
|
+
return "'|'";
|
629
|
+
case PM_TOKEN_PIPE_EQUAL:
|
630
|
+
return "'|='";
|
631
|
+
case PM_TOKEN_PIPE_PIPE:
|
632
|
+
return "'||'";
|
633
|
+
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
634
|
+
return "'||='";
|
635
|
+
case PM_TOKEN_PLUS:
|
636
|
+
return "'+'";
|
637
|
+
case PM_TOKEN_PLUS_EQUAL:
|
638
|
+
return "'+='";
|
639
|
+
case PM_TOKEN_QUESTION_MARK:
|
640
|
+
return "'?'";
|
641
|
+
case PM_TOKEN_REGEXP_BEGIN:
|
642
|
+
return "regular expression beginning";
|
643
|
+
case PM_TOKEN_REGEXP_END:
|
644
|
+
return "regular expression ending";
|
645
|
+
case PM_TOKEN_SEMICOLON:
|
646
|
+
return "';'";
|
647
|
+
case PM_TOKEN_SLASH:
|
648
|
+
return "'/'";
|
649
|
+
case PM_TOKEN_SLASH_EQUAL:
|
650
|
+
return "'/='";
|
651
|
+
case PM_TOKEN_STAR:
|
652
|
+
return "'*'";
|
653
|
+
case PM_TOKEN_STAR_EQUAL:
|
654
|
+
return "'*='";
|
655
|
+
case PM_TOKEN_STAR_STAR:
|
656
|
+
return "'**'";
|
657
|
+
case PM_TOKEN_STAR_STAR_EQUAL:
|
658
|
+
return "'**='";
|
659
|
+
case PM_TOKEN_STRING_BEGIN:
|
660
|
+
return "string beginning";
|
661
|
+
case PM_TOKEN_STRING_CONTENT:
|
662
|
+
return "string content";
|
663
|
+
case PM_TOKEN_STRING_END:
|
664
|
+
return "string ending";
|
665
|
+
case PM_TOKEN_SYMBOL_BEGIN:
|
666
|
+
return "symbol beginning";
|
667
|
+
case PM_TOKEN_TILDE:
|
668
|
+
return "'~'";
|
669
|
+
case PM_TOKEN_UAMPERSAND:
|
670
|
+
return "'&'";
|
671
|
+
case PM_TOKEN_UCOLON_COLON:
|
672
|
+
return "'::'";
|
673
|
+
case PM_TOKEN_UDOT_DOT:
|
674
|
+
return "'..'";
|
675
|
+
case PM_TOKEN_UDOT_DOT_DOT:
|
676
|
+
return "'...'";
|
677
|
+
case PM_TOKEN_UMINUS:
|
678
|
+
return "'-'";
|
679
|
+
case PM_TOKEN_UMINUS_NUM:
|
680
|
+
return "'-'";
|
681
|
+
case PM_TOKEN_UPLUS:
|
682
|
+
return "'+'";
|
683
|
+
case PM_TOKEN_USTAR:
|
684
|
+
return "*";
|
685
|
+
case PM_TOKEN_USTAR_STAR:
|
686
|
+
return "'**'";
|
687
|
+
case PM_TOKEN_WORDS_SEP:
|
688
|
+
return "string separator";
|
689
|
+
case PM_TOKEN___END__:
|
690
|
+
return "'__END__'";
|
691
|
+
case PM_TOKEN_MAXIMUM:
|
692
|
+
assert(false && "unreachable");
|
693
|
+
return "";
|
694
|
+
}
|
695
|
+
|
696
|
+
// Provide a default, because some compilers can't determine that the above
|
697
|
+
// switch is exhaustive.
|
698
|
+
assert(false && "unreachable");
|
699
|
+
return "";
|
351
700
|
}
|
data/src/util/pm_buffer.c
CHANGED
@@ -160,6 +160,17 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
|
|
160
160
|
pm_buffer_append_varuint(buffer, unsigned_int);
|
161
161
|
}
|
162
162
|
|
163
|
+
/**
|
164
|
+
* Prepend the given string to the buffer.
|
165
|
+
*/
|
166
|
+
void
|
167
|
+
pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
|
168
|
+
size_t cursor = buffer->length;
|
169
|
+
pm_buffer_append_length(buffer, length);
|
170
|
+
memmove(buffer->value + length, buffer->value, cursor);
|
171
|
+
memcpy(buffer->value, value, length);
|
172
|
+
}
|
173
|
+
|
163
174
|
/**
|
164
175
|
* Concatenate one buffer onto another.
|
165
176
|
*/
|
data/src/util/pm_constant_pool.c
CHANGED
@@ -124,13 +124,13 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
|
|
124
124
|
|
125
125
|
// If an id is set on this constant, then we know we have content here.
|
126
126
|
// In this case we need to insert it into the next constant pool.
|
127
|
-
if (bucket->id !=
|
127
|
+
if (bucket->id != PM_CONSTANT_ID_UNSET) {
|
128
128
|
uint32_t next_index = bucket->hash & mask;
|
129
129
|
|
130
130
|
// This implements linear scanning to find the next available slot
|
131
131
|
// in case this index is already taken. We don't need to bother
|
132
132
|
// comparing the values since we know that the hash is unique.
|
133
|
-
while (next_buckets[next_index].id !=
|
133
|
+
while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
|
134
134
|
next_index = (next_index + 1) & mask;
|
135
135
|
}
|
136
136
|
|
@@ -177,17 +177,42 @@ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
|
|
177
177
|
*/
|
178
178
|
pm_constant_t *
|
179
179
|
pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
|
180
|
-
assert(constant_id
|
180
|
+
assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
|
181
181
|
return &pool->constants[constant_id - 1];
|
182
182
|
}
|
183
183
|
|
184
|
+
/**
|
185
|
+
* Find a constant in a constant pool. Returns the id of the constant, or 0 if
|
186
|
+
* the constant is not found.
|
187
|
+
*/
|
188
|
+
pm_constant_id_t
|
189
|
+
pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
190
|
+
assert(is_power_of_two(pool->capacity));
|
191
|
+
const uint32_t mask = pool->capacity - 1;
|
192
|
+
|
193
|
+
uint32_t hash = pm_constant_pool_hash(start, length);
|
194
|
+
uint32_t index = hash & mask;
|
195
|
+
pm_constant_pool_bucket_t *bucket;
|
196
|
+
|
197
|
+
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
|
198
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
199
|
+
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
200
|
+
return bucket->id;
|
201
|
+
}
|
202
|
+
|
203
|
+
index = (index + 1) & mask;
|
204
|
+
}
|
205
|
+
|
206
|
+
return PM_CONSTANT_ID_UNSET;
|
207
|
+
}
|
208
|
+
|
184
209
|
/**
|
185
210
|
* Insert a constant into a constant pool and return its index in the pool.
|
186
211
|
*/
|
187
212
|
static inline pm_constant_id_t
|
188
213
|
pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
|
189
214
|
if (pool->size >= (pool->capacity / 4 * 3)) {
|
190
|
-
if (!pm_constant_pool_resize(pool)) return
|
215
|
+
if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
|
191
216
|
}
|
192
217
|
|
193
218
|
assert(is_power_of_two(pool->capacity));
|
@@ -197,7 +222,7 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
|
|
197
222
|
uint32_t index = hash & mask;
|
198
223
|
pm_constant_pool_bucket_t *bucket;
|
199
224
|
|
200
|
-
while (bucket = &pool->buckets[index], bucket->id !=
|
225
|
+
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
|
201
226
|
// If there is a collision, then we need to check if the content is the
|
202
227
|
// same as the content we are trying to insert. If it is, then we can
|
203
228
|
// return the id of the existing constant.
|
@@ -248,8 +273,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
|
|
248
273
|
}
|
249
274
|
|
250
275
|
/**
|
251
|
-
* Insert a constant into a constant pool. Returns the id of the constant, or
|
252
|
-
* if any potential calls to resize fail.
|
276
|
+
* Insert a constant into a constant pool. Returns the id of the constant, or
|
277
|
+
* PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
|
253
278
|
*/
|
254
279
|
pm_constant_id_t
|
255
280
|
pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
@@ -258,8 +283,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
|
|
258
283
|
|
259
284
|
/**
|
260
285
|
* Insert a constant into a constant pool from memory that is now owned by the
|
261
|
-
* constant pool. Returns the id of the constant, or
|
262
|
-
* resize fail.
|
286
|
+
* constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
|
287
|
+
* potential calls to resize fail.
|
263
288
|
*/
|
264
289
|
pm_constant_id_t
|
265
290
|
pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
@@ -268,7 +293,8 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, si
|
|
268
293
|
|
269
294
|
/**
|
270
295
|
* Insert a constant into a constant pool from memory that is constant. Returns
|
271
|
-
* the id of the constant, or
|
296
|
+
* the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
|
297
|
+
* resize fail.
|
272
298
|
*/
|
273
299
|
pm_constant_id_t
|
274
300
|
pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
@@ -286,7 +312,7 @@ pm_constant_pool_free(pm_constant_pool_t *pool) {
|
|
286
312
|
pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
|
287
313
|
|
288
314
|
// If an id is set on this constant, then we know we have content here.
|
289
|
-
if (bucket->id !=
|
315
|
+
if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
|
290
316
|
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
291
317
|
free((void *) constant->start);
|
292
318
|
}
|