prism 0.19.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +102 -1
- data/Makefile +5 -0
- data/README.md +9 -6
- data/config.yml +236 -38
- data/docs/build_system.md +19 -2
- data/docs/cruby_compilation.md +27 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +84 -16
- data/docs/ruby_api.md +1 -1
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +19 -5
- data/ext/prism/api_node.c +1989 -1525
- data/ext/prism/extension.c +130 -30
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +1700 -505
- data/include/prism/defines.h +8 -0
- data/include/prism/diagnostic.h +49 -7
- data/include/prism/encoding.h +17 -0
- data/include/prism/options.h +40 -14
- data/include/prism/parser.h +34 -18
- data/include/prism/util/pm_buffer.h +9 -0
- data/include/prism/util/pm_constant_pool.h +18 -0
- data/include/prism/util/pm_newline_list.h +4 -14
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +19 -2
- data/lib/prism/debug.rb +11 -5
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dot_visitor.rb +36 -14
- data/lib/prism/dsl.rb +302 -299
- data/lib/prism/ffi.rb +107 -76
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +4580 -2607
- data/lib/prism/node_ext.rb +27 -4
- data/lib/prism/parse_result.rb +75 -29
- data/lib/prism/serialize.rb +633 -305
- data/lib/prism/translation/parser/compiler.rb +1838 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +45 -0
- data/lib/prism/translation/parser.rb +190 -0
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper.rb +696 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism.rb +1 -1
- data/prism.gemspec +18 -7
- data/rbi/prism.rbi +150 -88
- data/rbi/prism_static.rbi +15 -3
- data/sig/prism.rbs +996 -961
- data/sig/prism_static.rbs +123 -46
- data/src/diagnostic.c +264 -219
- data/src/encoding.c +21 -26
- data/src/node.c +2 -6
- data/src/options.c +29 -5
- data/src/prettyprint.c +176 -44
- data/src/prism.c +1499 -564
- data/src/serialize.c +35 -21
- data/src/token_type.c +353 -4
- data/src/util/pm_buffer.c +11 -0
- data/src/util/pm_constant_pool.c +37 -11
- data/src/util/pm_newline_list.c +6 -15
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +16 -5
- data/docs/building.md +0 -29
- data/lib/prism/ripper_compat.rb +0 -207
data/src/serialize.c
CHANGED
@@ -158,11 +158,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
158
158
|
}
|
159
159
|
case PM_ASSOC_NODE: {
|
160
160
|
pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->key, buffer);
|
161
|
-
|
162
|
-
pm_buffer_append_byte(buffer, 0);
|
163
|
-
} else {
|
164
|
-
pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
|
165
|
-
}
|
161
|
+
pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
|
166
162
|
if (((pm_assoc_node_t *)node)->operator_loc.start == NULL) {
|
167
163
|
pm_buffer_append_byte(buffer, 0);
|
168
164
|
} else {
|
@@ -229,6 +225,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
229
225
|
break;
|
230
226
|
}
|
231
227
|
case PM_BLOCK_LOCAL_VARIABLE_NODE: {
|
228
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
232
229
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_local_variable_node_t *)node)->name));
|
233
230
|
break;
|
234
231
|
}
|
@@ -238,7 +235,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
238
235
|
for (uint32_t index = 0; index < locals_size; index++) {
|
239
236
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_node_t *)node)->locals.ids[index]));
|
240
237
|
}
|
241
|
-
pm_buffer_append_varuint(buffer, ((pm_block_node_t *)node)->locals_body_index);
|
242
238
|
if (((pm_block_node_t *)node)->parameters == NULL) {
|
243
239
|
pm_buffer_append_byte(buffer, 0);
|
244
240
|
} else {
|
@@ -254,6 +250,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
254
250
|
break;
|
255
251
|
}
|
256
252
|
case PM_BLOCK_PARAMETER_NODE: {
|
253
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
257
254
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_parameter_node_t *)node)->name));
|
258
255
|
if (((pm_block_parameter_node_t *)node)->name_loc.start == NULL) {
|
259
256
|
pm_buffer_append_byte(buffer, 0);
|
@@ -651,7 +648,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
651
648
|
for (uint32_t index = 0; index < locals_size; index++) {
|
652
649
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_def_node_t *)node)->locals.ids[index]));
|
653
650
|
}
|
654
|
-
pm_buffer_append_varuint(buffer, ((pm_def_node_t *)node)->locals_body_index);
|
655
651
|
pm_serialize_location(parser, &((pm_def_node_t *)node)->def_keyword_loc, buffer);
|
656
652
|
if (((pm_def_node_t *)node)->operator_loc.start == NULL) {
|
657
653
|
pm_buffer_append_byte(buffer, 0);
|
@@ -1190,6 +1186,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1190
1186
|
break;
|
1191
1187
|
}
|
1192
1188
|
case PM_KEYWORD_REST_PARAMETER_NODE: {
|
1189
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1193
1190
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
|
1194
1191
|
if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
|
1195
1192
|
pm_buffer_append_byte(buffer, 0);
|
@@ -1206,7 +1203,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1206
1203
|
for (uint32_t index = 0; index < locals_size; index++) {
|
1207
1204
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_lambda_node_t *)node)->locals.ids[index]));
|
1208
1205
|
}
|
1209
|
-
pm_buffer_append_varuint(buffer, ((pm_lambda_node_t *)node)->locals_body_index);
|
1210
1206
|
pm_serialize_location(parser, &((pm_lambda_node_t *)node)->operator_loc, buffer);
|
1211
1207
|
pm_serialize_location(parser, &((pm_lambda_node_t *)node)->opening_loc, buffer);
|
1212
1208
|
pm_serialize_location(parser, &((pm_lambda_node_t *)node)->closing_loc, buffer);
|
@@ -1402,12 +1398,14 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1402
1398
|
break;
|
1403
1399
|
}
|
1404
1400
|
case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
|
1401
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1405
1402
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
|
1406
1403
|
pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1407
1404
|
pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
|
1408
1405
|
break;
|
1409
1406
|
}
|
1410
1407
|
case PM_OPTIONAL_PARAMETER_NODE: {
|
1408
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1411
1409
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
|
1412
1410
|
pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
|
1413
1411
|
pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->operator_loc, buffer);
|
@@ -1542,11 +1540,13 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1542
1540
|
break;
|
1543
1541
|
}
|
1544
1542
|
case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
|
1543
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1545
1544
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
|
1546
1545
|
pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1547
1546
|
break;
|
1548
1547
|
}
|
1549
1548
|
case PM_REQUIRED_PARAMETER_NODE: {
|
1549
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1550
1550
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
|
1551
1551
|
break;
|
1552
1552
|
}
|
@@ -1587,6 +1587,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1587
1587
|
break;
|
1588
1588
|
}
|
1589
1589
|
case PM_REST_PARAMETER_NODE: {
|
1590
|
+
pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1590
1591
|
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_rest_parameter_node_t *)node)->name));
|
1591
1592
|
if (((pm_rest_parameter_node_t *)node)->name_loc.start == NULL) {
|
1592
1593
|
pm_buffer_append_byte(buffer, 0);
|
@@ -1842,6 +1843,17 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1842
1843
|
}
|
1843
1844
|
}
|
1844
1845
|
|
1846
|
+
static void
|
1847
|
+
pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
|
1848
|
+
uint32_t size = pm_sizet_to_u32(list->size);
|
1849
|
+
pm_buffer_append_varuint(buffer, size);
|
1850
|
+
|
1851
|
+
for (uint32_t i = 0; i < size; i++) {
|
1852
|
+
uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
|
1853
|
+
pm_buffer_append_varuint(buffer, offset);
|
1854
|
+
}
|
1855
|
+
}
|
1856
|
+
|
1845
1857
|
static void
|
1846
1858
|
pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
|
1847
1859
|
// serialize type
|
@@ -1904,6 +1916,8 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
|
|
1904
1916
|
|
1905
1917
|
// serialize location
|
1906
1918
|
pm_serialize_location(parser, &diagnostic->location, buffer);
|
1919
|
+
|
1920
|
+
pm_buffer_append_byte(buffer, diagnostic->level);
|
1907
1921
|
}
|
1908
1922
|
|
1909
1923
|
static void
|
@@ -1926,19 +1940,25 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
|
1926
1940
|
pm_buffer_append_string(buffer, encoding->name, encoding_length);
|
1927
1941
|
}
|
1928
1942
|
|
1929
|
-
|
1930
|
-
|
1931
|
-
* Serialize the encoding, metadata, nodes, and constant pool.
|
1932
|
-
*/
|
1933
|
-
void
|
1934
|
-
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1943
|
+
static void
|
1944
|
+
pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
|
1935
1945
|
pm_serialize_encoding(parser->encoding, buffer);
|
1936
1946
|
pm_buffer_append_varsint(buffer, parser->start_line);
|
1947
|
+
pm_serialize_newline_list(&parser->newline_list, buffer);
|
1937
1948
|
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
1938
1949
|
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
|
1939
1950
|
pm_serialize_data_loc(parser, buffer);
|
1940
1951
|
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
1941
1952
|
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
|
1953
|
+
}
|
1954
|
+
|
1955
|
+
#line 243 "serialize.c.erb"
|
1956
|
+
/**
|
1957
|
+
* Serialize the metadata, nodes, and constant pool.
|
1958
|
+
*/
|
1959
|
+
void
|
1960
|
+
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1961
|
+
pm_serialize_metadata(parser, buffer);
|
1942
1962
|
|
1943
1963
|
// Here we're going to leave space for the offset of the constant pool in
|
1944
1964
|
// the buffer.
|
@@ -2029,13 +2049,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
|
|
2029
2049
|
// Append 0 to mark end of tokens.
|
2030
2050
|
pm_buffer_append_byte(buffer, 0);
|
2031
2051
|
|
2032
|
-
|
2033
|
-
pm_buffer_append_varsint(buffer, parser.start_line);
|
2034
|
-
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
2035
|
-
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
2036
|
-
pm_serialize_data_loc(&parser, buffer);
|
2037
|
-
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
2038
|
-
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
|
2052
|
+
pm_serialize_metadata(&parser, buffer);
|
2039
2053
|
|
2040
2054
|
pm_node_destroy(&parser, node);
|
2041
2055
|
pm_parser_free(&parser);
|
data/src/token_type.c
CHANGED
@@ -13,8 +13,7 @@
|
|
13
13
|
* Returns a string representation of the given token type.
|
14
14
|
*/
|
15
15
|
PRISM_EXPORTED_FUNCTION const char *
|
16
|
-
|
17
|
-
{
|
16
|
+
pm_token_type_name(pm_token_type_t token_type) {
|
18
17
|
switch (token_type) {
|
19
18
|
case PM_TOKEN_EOF:
|
20
19
|
return "EOF";
|
@@ -345,7 +344,357 @@ pm_token_type_to_str(pm_token_type_t token_type)
|
|
345
344
|
case PM_TOKEN___END__:
|
346
345
|
return "__END__";
|
347
346
|
case PM_TOKEN_MAXIMUM:
|
348
|
-
|
347
|
+
assert(false && "unreachable");
|
348
|
+
return "";
|
349
349
|
}
|
350
|
-
|
350
|
+
|
351
|
+
// Provide a default, because some compilers can't determine that the above
|
352
|
+
// switch is exhaustive.
|
353
|
+
assert(false && "unreachable");
|
354
|
+
return "";
|
355
|
+
}
|
356
|
+
|
357
|
+
/**
|
358
|
+
* Returns the human name of the given token type.
|
359
|
+
*/
|
360
|
+
const char *
|
361
|
+
pm_token_type_human(pm_token_type_t token_type) {
|
362
|
+
switch (token_type) {
|
363
|
+
case PM_TOKEN_EOF:
|
364
|
+
return "end of file";
|
365
|
+
case PM_TOKEN_MISSING:
|
366
|
+
return "missing token";
|
367
|
+
case PM_TOKEN_NOT_PROVIDED:
|
368
|
+
return "not provided token";
|
369
|
+
case PM_TOKEN_AMPERSAND:
|
370
|
+
return "'&'";
|
371
|
+
case PM_TOKEN_AMPERSAND_AMPERSAND:
|
372
|
+
return "'&&'";
|
373
|
+
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
374
|
+
return "'&&='";
|
375
|
+
case PM_TOKEN_AMPERSAND_DOT:
|
376
|
+
return "'&.'";
|
377
|
+
case PM_TOKEN_AMPERSAND_EQUAL:
|
378
|
+
return "'&='";
|
379
|
+
case PM_TOKEN_BACKTICK:
|
380
|
+
return "'`'";
|
381
|
+
case PM_TOKEN_BACK_REFERENCE:
|
382
|
+
return "back reference";
|
383
|
+
case PM_TOKEN_BANG:
|
384
|
+
return "'!'";
|
385
|
+
case PM_TOKEN_BANG_EQUAL:
|
386
|
+
return "'!='";
|
387
|
+
case PM_TOKEN_BANG_TILDE:
|
388
|
+
return "'!~'";
|
389
|
+
case PM_TOKEN_BRACE_LEFT:
|
390
|
+
return "'{'";
|
391
|
+
case PM_TOKEN_BRACE_RIGHT:
|
392
|
+
return "'}'";
|
393
|
+
case PM_TOKEN_BRACKET_LEFT:
|
394
|
+
return "'['";
|
395
|
+
case PM_TOKEN_BRACKET_LEFT_ARRAY:
|
396
|
+
return "'['";
|
397
|
+
case PM_TOKEN_BRACKET_LEFT_RIGHT:
|
398
|
+
return "'[]'";
|
399
|
+
case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
|
400
|
+
return "'[]='";
|
401
|
+
case PM_TOKEN_BRACKET_RIGHT:
|
402
|
+
return "']'";
|
403
|
+
case PM_TOKEN_CARET:
|
404
|
+
return "'^'";
|
405
|
+
case PM_TOKEN_CARET_EQUAL:
|
406
|
+
return "'^='";
|
407
|
+
case PM_TOKEN_CHARACTER_LITERAL:
|
408
|
+
return "character literal";
|
409
|
+
case PM_TOKEN_CLASS_VARIABLE:
|
410
|
+
return "class variable";
|
411
|
+
case PM_TOKEN_COLON:
|
412
|
+
return "':'";
|
413
|
+
case PM_TOKEN_COLON_COLON:
|
414
|
+
return "'::'";
|
415
|
+
case PM_TOKEN_COMMA:
|
416
|
+
return "','";
|
417
|
+
case PM_TOKEN_COMMENT:
|
418
|
+
return "comment";
|
419
|
+
case PM_TOKEN_CONSTANT:
|
420
|
+
return "constant";
|
421
|
+
case PM_TOKEN_DOT:
|
422
|
+
return "'.'";
|
423
|
+
case PM_TOKEN_DOT_DOT:
|
424
|
+
return "'..'";
|
425
|
+
case PM_TOKEN_DOT_DOT_DOT:
|
426
|
+
return "'...'";
|
427
|
+
case PM_TOKEN_EMBDOC_BEGIN:
|
428
|
+
return "'=begin'";
|
429
|
+
case PM_TOKEN_EMBDOC_END:
|
430
|
+
return "'=end'";
|
431
|
+
case PM_TOKEN_EMBDOC_LINE:
|
432
|
+
return "embedded documentation line";
|
433
|
+
case PM_TOKEN_EMBEXPR_BEGIN:
|
434
|
+
return "'#{'";
|
435
|
+
case PM_TOKEN_EMBEXPR_END:
|
436
|
+
return "'}'";
|
437
|
+
case PM_TOKEN_EMBVAR:
|
438
|
+
return "'#'";
|
439
|
+
case PM_TOKEN_EQUAL:
|
440
|
+
return "'='";
|
441
|
+
case PM_TOKEN_EQUAL_EQUAL:
|
442
|
+
return "'=='";
|
443
|
+
case PM_TOKEN_EQUAL_EQUAL_EQUAL:
|
444
|
+
return "'==='";
|
445
|
+
case PM_TOKEN_EQUAL_GREATER:
|
446
|
+
return "'=>'";
|
447
|
+
case PM_TOKEN_EQUAL_TILDE:
|
448
|
+
return "'=~'";
|
449
|
+
case PM_TOKEN_FLOAT:
|
450
|
+
return "float";
|
451
|
+
case PM_TOKEN_FLOAT_IMAGINARY:
|
452
|
+
return "imaginary";
|
453
|
+
case PM_TOKEN_FLOAT_RATIONAL:
|
454
|
+
return "rational";
|
455
|
+
case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
|
456
|
+
return "imaginary";
|
457
|
+
case PM_TOKEN_GLOBAL_VARIABLE:
|
458
|
+
return "global variable";
|
459
|
+
case PM_TOKEN_GREATER:
|
460
|
+
return "'>'";
|
461
|
+
case PM_TOKEN_GREATER_EQUAL:
|
462
|
+
return "'>='";
|
463
|
+
case PM_TOKEN_GREATER_GREATER:
|
464
|
+
return "'>>'";
|
465
|
+
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
466
|
+
return "'>>='";
|
467
|
+
case PM_TOKEN_HEREDOC_END:
|
468
|
+
return "heredoc ending";
|
469
|
+
case PM_TOKEN_HEREDOC_START:
|
470
|
+
return "heredoc beginning";
|
471
|
+
case PM_TOKEN_IDENTIFIER:
|
472
|
+
return "local variable or method";
|
473
|
+
case PM_TOKEN_IGNORED_NEWLINE:
|
474
|
+
return "ignored newline";
|
475
|
+
case PM_TOKEN_INSTANCE_VARIABLE:
|
476
|
+
return "instance variable";
|
477
|
+
case PM_TOKEN_INTEGER:
|
478
|
+
return "integer";
|
479
|
+
case PM_TOKEN_INTEGER_IMAGINARY:
|
480
|
+
return "imaginary";
|
481
|
+
case PM_TOKEN_INTEGER_RATIONAL:
|
482
|
+
return "rational";
|
483
|
+
case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
|
484
|
+
return "imaginary";
|
485
|
+
case PM_TOKEN_KEYWORD_ALIAS:
|
486
|
+
return "'alias'";
|
487
|
+
case PM_TOKEN_KEYWORD_AND:
|
488
|
+
return "'and'";
|
489
|
+
case PM_TOKEN_KEYWORD_BEGIN:
|
490
|
+
return "'begin'";
|
491
|
+
case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
|
492
|
+
return "'BEGIN'";
|
493
|
+
case PM_TOKEN_KEYWORD_BREAK:
|
494
|
+
return "'break'";
|
495
|
+
case PM_TOKEN_KEYWORD_CASE:
|
496
|
+
return "'case'";
|
497
|
+
case PM_TOKEN_KEYWORD_CLASS:
|
498
|
+
return "'class'";
|
499
|
+
case PM_TOKEN_KEYWORD_DEF:
|
500
|
+
return "'def'";
|
501
|
+
case PM_TOKEN_KEYWORD_DEFINED:
|
502
|
+
return "'defined?'";
|
503
|
+
case PM_TOKEN_KEYWORD_DO:
|
504
|
+
return "'do'";
|
505
|
+
case PM_TOKEN_KEYWORD_DO_LOOP:
|
506
|
+
return "'do'";
|
507
|
+
case PM_TOKEN_KEYWORD_ELSE:
|
508
|
+
return "'else'";
|
509
|
+
case PM_TOKEN_KEYWORD_ELSIF:
|
510
|
+
return "'elsif'";
|
511
|
+
case PM_TOKEN_KEYWORD_END:
|
512
|
+
return "'end'";
|
513
|
+
case PM_TOKEN_KEYWORD_END_UPCASE:
|
514
|
+
return "'END'";
|
515
|
+
case PM_TOKEN_KEYWORD_ENSURE:
|
516
|
+
return "'ensure'";
|
517
|
+
case PM_TOKEN_KEYWORD_FALSE:
|
518
|
+
return "'false'";
|
519
|
+
case PM_TOKEN_KEYWORD_FOR:
|
520
|
+
return "'for'";
|
521
|
+
case PM_TOKEN_KEYWORD_IF:
|
522
|
+
return "'if'";
|
523
|
+
case PM_TOKEN_KEYWORD_IF_MODIFIER:
|
524
|
+
return "'if'";
|
525
|
+
case PM_TOKEN_KEYWORD_IN:
|
526
|
+
return "'in'";
|
527
|
+
case PM_TOKEN_KEYWORD_MODULE:
|
528
|
+
return "'module'";
|
529
|
+
case PM_TOKEN_KEYWORD_NEXT:
|
530
|
+
return "'next'";
|
531
|
+
case PM_TOKEN_KEYWORD_NIL:
|
532
|
+
return "'nil'";
|
533
|
+
case PM_TOKEN_KEYWORD_NOT:
|
534
|
+
return "'not'";
|
535
|
+
case PM_TOKEN_KEYWORD_OR:
|
536
|
+
return "'or'";
|
537
|
+
case PM_TOKEN_KEYWORD_REDO:
|
538
|
+
return "'redo'";
|
539
|
+
case PM_TOKEN_KEYWORD_RESCUE:
|
540
|
+
return "'rescue'";
|
541
|
+
case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
|
542
|
+
return "'rescue'";
|
543
|
+
case PM_TOKEN_KEYWORD_RETRY:
|
544
|
+
return "'retry'";
|
545
|
+
case PM_TOKEN_KEYWORD_RETURN:
|
546
|
+
return "'return'";
|
547
|
+
case PM_TOKEN_KEYWORD_SELF:
|
548
|
+
return "'self'";
|
549
|
+
case PM_TOKEN_KEYWORD_SUPER:
|
550
|
+
return "'super'";
|
551
|
+
case PM_TOKEN_KEYWORD_THEN:
|
552
|
+
return "'then'";
|
553
|
+
case PM_TOKEN_KEYWORD_TRUE:
|
554
|
+
return "'true'";
|
555
|
+
case PM_TOKEN_KEYWORD_UNDEF:
|
556
|
+
return "'undef'";
|
557
|
+
case PM_TOKEN_KEYWORD_UNLESS:
|
558
|
+
return "'unless'";
|
559
|
+
case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
|
560
|
+
return "'unless'";
|
561
|
+
case PM_TOKEN_KEYWORD_UNTIL:
|
562
|
+
return "'until'";
|
563
|
+
case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
|
564
|
+
return "'until'";
|
565
|
+
case PM_TOKEN_KEYWORD_WHEN:
|
566
|
+
return "'when'";
|
567
|
+
case PM_TOKEN_KEYWORD_WHILE:
|
568
|
+
return "'while'";
|
569
|
+
case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
|
570
|
+
return "'while'";
|
571
|
+
case PM_TOKEN_KEYWORD_YIELD:
|
572
|
+
return "'yield'";
|
573
|
+
case PM_TOKEN_KEYWORD___ENCODING__:
|
574
|
+
return "'__ENCODING__'";
|
575
|
+
case PM_TOKEN_KEYWORD___FILE__:
|
576
|
+
return "'__FILE__'";
|
577
|
+
case PM_TOKEN_KEYWORD___LINE__:
|
578
|
+
return "'__LINE__'";
|
579
|
+
case PM_TOKEN_LABEL:
|
580
|
+
return "label";
|
581
|
+
case PM_TOKEN_LABEL_END:
|
582
|
+
return "label terminator";
|
583
|
+
case PM_TOKEN_LAMBDA_BEGIN:
|
584
|
+
return "'{'";
|
585
|
+
case PM_TOKEN_LESS:
|
586
|
+
return "'<'";
|
587
|
+
case PM_TOKEN_LESS_EQUAL:
|
588
|
+
return "'<='";
|
589
|
+
case PM_TOKEN_LESS_EQUAL_GREATER:
|
590
|
+
return "'<=>'";
|
591
|
+
case PM_TOKEN_LESS_LESS:
|
592
|
+
return "'<<'";
|
593
|
+
case PM_TOKEN_LESS_LESS_EQUAL:
|
594
|
+
return "'<<='";
|
595
|
+
case PM_TOKEN_METHOD_NAME:
|
596
|
+
return "method name";
|
597
|
+
case PM_TOKEN_MINUS:
|
598
|
+
return "'-'";
|
599
|
+
case PM_TOKEN_MINUS_EQUAL:
|
600
|
+
return "'-='";
|
601
|
+
case PM_TOKEN_MINUS_GREATER:
|
602
|
+
return "'->'";
|
603
|
+
case PM_TOKEN_NEWLINE:
|
604
|
+
return "newline";
|
605
|
+
case PM_TOKEN_NUMBERED_REFERENCE:
|
606
|
+
return "numbered reference";
|
607
|
+
case PM_TOKEN_PARENTHESIS_LEFT:
|
608
|
+
return "'('";
|
609
|
+
case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
|
610
|
+
return "'('";
|
611
|
+
case PM_TOKEN_PARENTHESIS_RIGHT:
|
612
|
+
return "')'";
|
613
|
+
case PM_TOKEN_PERCENT:
|
614
|
+
return "'%'";
|
615
|
+
case PM_TOKEN_PERCENT_EQUAL:
|
616
|
+
return "'%='";
|
617
|
+
case PM_TOKEN_PERCENT_LOWER_I:
|
618
|
+
return "'%i'";
|
619
|
+
case PM_TOKEN_PERCENT_LOWER_W:
|
620
|
+
return "'%w'";
|
621
|
+
case PM_TOKEN_PERCENT_LOWER_X:
|
622
|
+
return "'%x'";
|
623
|
+
case PM_TOKEN_PERCENT_UPPER_I:
|
624
|
+
return "'%I'";
|
625
|
+
case PM_TOKEN_PERCENT_UPPER_W:
|
626
|
+
return "'%W'";
|
627
|
+
case PM_TOKEN_PIPE:
|
628
|
+
return "'|'";
|
629
|
+
case PM_TOKEN_PIPE_EQUAL:
|
630
|
+
return "'|='";
|
631
|
+
case PM_TOKEN_PIPE_PIPE:
|
632
|
+
return "'||'";
|
633
|
+
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
634
|
+
return "'||='";
|
635
|
+
case PM_TOKEN_PLUS:
|
636
|
+
return "'+'";
|
637
|
+
case PM_TOKEN_PLUS_EQUAL:
|
638
|
+
return "'+='";
|
639
|
+
case PM_TOKEN_QUESTION_MARK:
|
640
|
+
return "'?'";
|
641
|
+
case PM_TOKEN_REGEXP_BEGIN:
|
642
|
+
return "regular expression beginning";
|
643
|
+
case PM_TOKEN_REGEXP_END:
|
644
|
+
return "regular expression ending";
|
645
|
+
case PM_TOKEN_SEMICOLON:
|
646
|
+
return "';'";
|
647
|
+
case PM_TOKEN_SLASH:
|
648
|
+
return "'/'";
|
649
|
+
case PM_TOKEN_SLASH_EQUAL:
|
650
|
+
return "'/='";
|
651
|
+
case PM_TOKEN_STAR:
|
652
|
+
return "'*'";
|
653
|
+
case PM_TOKEN_STAR_EQUAL:
|
654
|
+
return "'*='";
|
655
|
+
case PM_TOKEN_STAR_STAR:
|
656
|
+
return "'**'";
|
657
|
+
case PM_TOKEN_STAR_STAR_EQUAL:
|
658
|
+
return "'**='";
|
659
|
+
case PM_TOKEN_STRING_BEGIN:
|
660
|
+
return "string beginning";
|
661
|
+
case PM_TOKEN_STRING_CONTENT:
|
662
|
+
return "string content";
|
663
|
+
case PM_TOKEN_STRING_END:
|
664
|
+
return "string ending";
|
665
|
+
case PM_TOKEN_SYMBOL_BEGIN:
|
666
|
+
return "symbol beginning";
|
667
|
+
case PM_TOKEN_TILDE:
|
668
|
+
return "'~'";
|
669
|
+
case PM_TOKEN_UAMPERSAND:
|
670
|
+
return "'&'";
|
671
|
+
case PM_TOKEN_UCOLON_COLON:
|
672
|
+
return "'::'";
|
673
|
+
case PM_TOKEN_UDOT_DOT:
|
674
|
+
return "'..'";
|
675
|
+
case PM_TOKEN_UDOT_DOT_DOT:
|
676
|
+
return "'...'";
|
677
|
+
case PM_TOKEN_UMINUS:
|
678
|
+
return "'-'";
|
679
|
+
case PM_TOKEN_UMINUS_NUM:
|
680
|
+
return "'-'";
|
681
|
+
case PM_TOKEN_UPLUS:
|
682
|
+
return "'+'";
|
683
|
+
case PM_TOKEN_USTAR:
|
684
|
+
return "*";
|
685
|
+
case PM_TOKEN_USTAR_STAR:
|
686
|
+
return "'**'";
|
687
|
+
case PM_TOKEN_WORDS_SEP:
|
688
|
+
return "string separator";
|
689
|
+
case PM_TOKEN___END__:
|
690
|
+
return "'__END__'";
|
691
|
+
case PM_TOKEN_MAXIMUM:
|
692
|
+
assert(false && "unreachable");
|
693
|
+
return "";
|
694
|
+
}
|
695
|
+
|
696
|
+
// Provide a default, because some compilers can't determine that the above
|
697
|
+
// switch is exhaustive.
|
698
|
+
assert(false && "unreachable");
|
699
|
+
return "";
|
351
700
|
}
|
data/src/util/pm_buffer.c
CHANGED
@@ -160,6 +160,17 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
|
|
160
160
|
pm_buffer_append_varuint(buffer, unsigned_int);
|
161
161
|
}
|
162
162
|
|
163
|
+
/**
|
164
|
+
* Prepend the given string to the buffer.
|
165
|
+
*/
|
166
|
+
void
|
167
|
+
pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
|
168
|
+
size_t cursor = buffer->length;
|
169
|
+
pm_buffer_append_length(buffer, length);
|
170
|
+
memmove(buffer->value + length, buffer->value, cursor);
|
171
|
+
memcpy(buffer->value, value, length);
|
172
|
+
}
|
173
|
+
|
163
174
|
/**
|
164
175
|
* Concatenate one buffer onto another.
|
165
176
|
*/
|
data/src/util/pm_constant_pool.c
CHANGED
@@ -124,13 +124,13 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
|
|
124
124
|
|
125
125
|
// If an id is set on this constant, then we know we have content here.
|
126
126
|
// In this case we need to insert it into the next constant pool.
|
127
|
-
if (bucket->id !=
|
127
|
+
if (bucket->id != PM_CONSTANT_ID_UNSET) {
|
128
128
|
uint32_t next_index = bucket->hash & mask;
|
129
129
|
|
130
130
|
// This implements linear scanning to find the next available slot
|
131
131
|
// in case this index is already taken. We don't need to bother
|
132
132
|
// comparing the values since we know that the hash is unique.
|
133
|
-
while (next_buckets[next_index].id !=
|
133
|
+
while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
|
134
134
|
next_index = (next_index + 1) & mask;
|
135
135
|
}
|
136
136
|
|
@@ -177,17 +177,42 @@ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
|
|
177
177
|
*/
|
178
178
|
pm_constant_t *
|
179
179
|
pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
|
180
|
-
assert(constant_id
|
180
|
+
assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
|
181
181
|
return &pool->constants[constant_id - 1];
|
182
182
|
}
|
183
183
|
|
184
|
+
/**
|
185
|
+
* Find a constant in a constant pool. Returns the id of the constant, or 0 if
|
186
|
+
* the constant is not found.
|
187
|
+
*/
|
188
|
+
pm_constant_id_t
|
189
|
+
pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
190
|
+
assert(is_power_of_two(pool->capacity));
|
191
|
+
const uint32_t mask = pool->capacity - 1;
|
192
|
+
|
193
|
+
uint32_t hash = pm_constant_pool_hash(start, length);
|
194
|
+
uint32_t index = hash & mask;
|
195
|
+
pm_constant_pool_bucket_t *bucket;
|
196
|
+
|
197
|
+
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
|
198
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
199
|
+
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
200
|
+
return bucket->id;
|
201
|
+
}
|
202
|
+
|
203
|
+
index = (index + 1) & mask;
|
204
|
+
}
|
205
|
+
|
206
|
+
return PM_CONSTANT_ID_UNSET;
|
207
|
+
}
|
208
|
+
|
184
209
|
/**
|
185
210
|
* Insert a constant into a constant pool and return its index in the pool.
|
186
211
|
*/
|
187
212
|
static inline pm_constant_id_t
|
188
213
|
pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
|
189
214
|
if (pool->size >= (pool->capacity / 4 * 3)) {
|
190
|
-
if (!pm_constant_pool_resize(pool)) return
|
215
|
+
if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
|
191
216
|
}
|
192
217
|
|
193
218
|
assert(is_power_of_two(pool->capacity));
|
@@ -197,7 +222,7 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
|
|
197
222
|
uint32_t index = hash & mask;
|
198
223
|
pm_constant_pool_bucket_t *bucket;
|
199
224
|
|
200
|
-
while (bucket = &pool->buckets[index], bucket->id !=
|
225
|
+
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
|
201
226
|
// If there is a collision, then we need to check if the content is the
|
202
227
|
// same as the content we are trying to insert. If it is, then we can
|
203
228
|
// return the id of the existing constant.
|
@@ -248,8 +273,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
|
|
248
273
|
}
|
249
274
|
|
250
275
|
/**
|
251
|
-
* Insert a constant into a constant pool. Returns the id of the constant, or
|
252
|
-
* if any potential calls to resize fail.
|
276
|
+
* Insert a constant into a constant pool. Returns the id of the constant, or
|
277
|
+
* PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
|
253
278
|
*/
|
254
279
|
pm_constant_id_t
|
255
280
|
pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
@@ -258,8 +283,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
|
|
258
283
|
|
259
284
|
/**
|
260
285
|
* Insert a constant into a constant pool from memory that is now owned by the
|
261
|
-
* constant pool. Returns the id of the constant, or
|
262
|
-
* resize fail.
|
286
|
+
* constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
|
287
|
+
* potential calls to resize fail.
|
263
288
|
*/
|
264
289
|
pm_constant_id_t
|
265
290
|
pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
@@ -268,7 +293,8 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, si
|
|
268
293
|
|
269
294
|
/**
|
270
295
|
* Insert a constant into a constant pool from memory that is constant. Returns
|
271
|
-
* the id of the constant, or
|
296
|
+
* the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
|
297
|
+
* resize fail.
|
272
298
|
*/
|
273
299
|
pm_constant_id_t
|
274
300
|
pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
@@ -286,7 +312,7 @@ pm_constant_pool_free(pm_constant_pool_t *pool) {
|
|
286
312
|
pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
|
287
313
|
|
288
314
|
// If an id is set on this constant, then we know we have content here.
|
289
|
-
if (bucket->id !=
|
315
|
+
if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
|
290
316
|
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
291
317
|
free((void *) constant->start);
|
292
318
|
}
|