jruby-prism-parser 0.24.0-java → 1.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +269 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +25 -18
- data/README.md +57 -6
- data/config.yml +1724 -140
- data/docs/build_system.md +39 -11
- data/docs/configuration.md +4 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/fuzzing.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/parsing_rules.md +4 -1
- data/docs/releasing.md +8 -10
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +72 -0
- data/docs/ruby_api.md +2 -1
- data/docs/serialization.md +29 -5
- data/ext/prism/api_node.c +3395 -1999
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +55 -34
- data/ext/prism/extension.c +597 -346
- data/ext/prism/extension.h +6 -5
- data/include/prism/ast.h +2612 -455
- data/include/prism/defines.h +160 -2
- data/include/prism/diagnostic.h +188 -76
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +89 -17
- data/include/prism/options.h +224 -12
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +267 -66
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +121 -0
- data/include/prism/util/pm_buffer.h +75 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +18 -9
- data/include/prism/util/pm_integer.h +126 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +19 -0
- data/include/prism/util/pm_string.h +48 -8
- data/include/prism/version.h +3 -3
- data/include/prism.h +99 -5
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +11 -1
- data/lib/prism/desugar_compiler.rb +113 -74
- data/lib/prism/dispatcher.rb +45 -1
- data/lib/prism/dot_visitor.rb +201 -77
- data/lib/prism/dsl.rb +673 -461
- data/lib/prism/ffi.rb +233 -45
- data/lib/prism/inspect_visitor.rb +2389 -0
- data/lib/prism/lex_compat.rb +35 -16
- data/lib/prism/mutation_compiler.rb +24 -8
- data/lib/prism/node.rb +7731 -8460
- data/lib/prism/node_ext.rb +328 -32
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -24
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +102 -12
- data/lib/prism/parse_result.rb +448 -44
- data/lib/prism/pattern.rb +28 -10
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +413 -0
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +1940 -1198
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/builder.rb +61 -0
- data/lib/prism/translation/parser/compiler.rb +569 -195
- data/lib/prism/translation/parser/lexer.rb +516 -39
- data/lib/prism/translation/parser.rb +177 -12
- data/lib/prism/translation/parser33.rb +1 -1
- data/lib/prism/translation/parser34.rb +1 -1
- data/lib/prism/translation/parser35.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3224 -462
- data/lib/prism/translation/ruby_parser.rb +194 -69
- data/lib/prism/translation.rb +4 -1
- data/lib/prism/version.rb +1 -1
- data/lib/prism/visitor.rb +13 -0
- data/lib/prism.rb +17 -27
- data/prism.gemspec +57 -17
- data/rbi/prism/compiler.rbi +12 -0
- data/rbi/prism/dsl.rbi +524 -0
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +8722 -0
- data/rbi/prism/node_ext.rbi +107 -0
- data/rbi/prism/parse_result.rbi +404 -0
- data/rbi/prism/reflection.rbi +58 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/translation/parser.rbi +11 -0
- data/rbi/prism/translation/parser33.rbi +6 -0
- data/rbi/prism/translation/parser34.rbi +6 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism/translation/ripper.rbi +15 -0
- data/rbi/prism/visitor.rbi +473 -0
- data/rbi/prism.rbi +44 -7745
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +351 -0
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/mutation_compiler.rbs +159 -0
- data/sig/prism/node.rbs +3614 -0
- data/sig/prism/node_ext.rbs +82 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +192 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/reflection.rbs +50 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/serialize.rbs +8 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +169 -0
- data/sig/prism.rbs +248 -4767
- data/src/diagnostic.c +672 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7541 -1653
- data/src/options.c +135 -20
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1543 -1485
- data/src/prism.c +7813 -3050
- data/src/regexp.c +225 -73
- data/src/serialize.c +101 -77
- data/src/static_literals.c +617 -0
- data/src/token_type.c +14 -13
- data/src/util/pm_buffer.c +187 -20
- data/src/util/pm_char.c +5 -5
- data/src/util/pm_constant_pool.c +39 -19
- data/src/util/pm_integer.c +670 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +43 -5
- data/src/util/pm_string.c +213 -33
- data/src/util/pm_strncasecmp.c +13 -1
- data/src/util/pm_strpbrk.c +32 -6
- metadata +55 -19
- data/docs/ripper.md +0 -36
- data/include/prism/util/pm_state_stack.h +0 -42
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -206
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/translation/parser/rubocop.rb +0 -45
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
- data/src/util/pm_state_stack.c +0 -25
- data/src/util/pm_string_list.c +0 -28
data/include/prism/parser.h
CHANGED
@@ -6,14 +6,14 @@
|
|
6
6
|
#ifndef PRISM_PARSER_H
|
7
7
|
#define PRISM_PARSER_H
|
8
8
|
|
9
|
-
#include "prism/ast.h"
|
10
9
|
#include "prism/defines.h"
|
10
|
+
#include "prism/ast.h"
|
11
11
|
#include "prism/encoding.h"
|
12
12
|
#include "prism/options.h"
|
13
|
+
#include "prism/static_literals.h"
|
13
14
|
#include "prism/util/pm_constant_pool.h"
|
14
15
|
#include "prism/util/pm_list.h"
|
15
16
|
#include "prism/util/pm_newline_list.h"
|
16
|
-
#include "prism/util/pm_state_stack.h"
|
17
17
|
#include "prism/util/pm_string.h"
|
18
18
|
|
19
19
|
#include <stdbool.h>
|
@@ -82,6 +82,23 @@ typedef enum {
|
|
82
82
|
PM_HEREDOC_INDENT_TILDE,
|
83
83
|
} pm_heredoc_indent_t;
|
84
84
|
|
85
|
+
/**
|
86
|
+
* All of the information necessary to store to lexing a heredoc.
|
87
|
+
*/
|
88
|
+
typedef struct {
|
89
|
+
/** A pointer to the start of the heredoc identifier. */
|
90
|
+
const uint8_t *ident_start;
|
91
|
+
|
92
|
+
/** The length of the heredoc identifier. */
|
93
|
+
size_t ident_length;
|
94
|
+
|
95
|
+
/** The type of quote that the heredoc uses. */
|
96
|
+
pm_heredoc_quote_t quote;
|
97
|
+
|
98
|
+
/** The type of indentation that the heredoc uses. */
|
99
|
+
pm_heredoc_indent_t indent;
|
100
|
+
} pm_heredoc_lex_mode_t;
|
101
|
+
|
85
102
|
/**
|
86
103
|
* When lexing Ruby source, the lexer has a small amount of state to tell which
|
87
104
|
* kind of token it is currently lexing. For example, when we find the start of
|
@@ -173,7 +190,7 @@ typedef struct pm_lex_mode {
|
|
173
190
|
* This is the character set that should be used to delimit the
|
174
191
|
* tokens within the regular expression.
|
175
192
|
*/
|
176
|
-
uint8_t breakpoints[
|
193
|
+
uint8_t breakpoints[7];
|
177
194
|
} regexp;
|
178
195
|
|
179
196
|
struct {
|
@@ -206,21 +223,14 @@ typedef struct pm_lex_mode {
|
|
206
223
|
* This is the character set that should be used to delimit the
|
207
224
|
* tokens within the string.
|
208
225
|
*/
|
209
|
-
uint8_t breakpoints[
|
226
|
+
uint8_t breakpoints[7];
|
210
227
|
} string;
|
211
228
|
|
212
229
|
struct {
|
213
|
-
/**
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
size_t ident_length;
|
218
|
-
|
219
|
-
/** The type of quote that the heredoc uses. */
|
220
|
-
pm_heredoc_quote_t quote;
|
221
|
-
|
222
|
-
/** The type of indentation that the heredoc uses. */
|
223
|
-
pm_heredoc_indent_t indent;
|
230
|
+
/**
|
231
|
+
* All of the data necessary to lex a heredoc.
|
232
|
+
*/
|
233
|
+
pm_heredoc_lex_mode_t base;
|
224
234
|
|
225
235
|
/**
|
226
236
|
* This is the pointer to the character where lexing should resume
|
@@ -233,7 +243,10 @@ typedef struct pm_lex_mode {
|
|
233
243
|
* line so that we know how much to dedent each line in the case of
|
234
244
|
* a tilde heredoc.
|
235
245
|
*/
|
236
|
-
size_t common_whitespace;
|
246
|
+
size_t *common_whitespace;
|
247
|
+
|
248
|
+
/** True if the previous token ended with a line continuation. */
|
249
|
+
bool line_continuation;
|
237
250
|
} heredoc;
|
238
251
|
} as;
|
239
252
|
|
@@ -265,12 +278,30 @@ typedef enum {
|
|
265
278
|
/** a begin statement */
|
266
279
|
PM_CONTEXT_BEGIN,
|
267
280
|
|
281
|
+
/** an ensure statement with an explicit begin */
|
282
|
+
PM_CONTEXT_BEGIN_ENSURE,
|
283
|
+
|
284
|
+
/** a rescue else statement with an explicit begin */
|
285
|
+
PM_CONTEXT_BEGIN_ELSE,
|
286
|
+
|
287
|
+
/** a rescue statement with an explicit begin */
|
288
|
+
PM_CONTEXT_BEGIN_RESCUE,
|
289
|
+
|
268
290
|
/** expressions in block arguments using braces */
|
269
291
|
PM_CONTEXT_BLOCK_BRACES,
|
270
292
|
|
271
293
|
/** expressions in block arguments using do..end */
|
272
294
|
PM_CONTEXT_BLOCK_KEYWORDS,
|
273
295
|
|
296
|
+
/** an ensure statement within a do..end block */
|
297
|
+
PM_CONTEXT_BLOCK_ENSURE,
|
298
|
+
|
299
|
+
/** a rescue else statement within a do..end block */
|
300
|
+
PM_CONTEXT_BLOCK_ELSE,
|
301
|
+
|
302
|
+
/** a rescue statement within a do..end block */
|
303
|
+
PM_CONTEXT_BLOCK_RESCUE,
|
304
|
+
|
274
305
|
/** a case when statements */
|
275
306
|
PM_CONTEXT_CASE_WHEN,
|
276
307
|
|
@@ -280,12 +311,33 @@ typedef enum {
|
|
280
311
|
/** a class declaration */
|
281
312
|
PM_CONTEXT_CLASS,
|
282
313
|
|
314
|
+
/** an ensure statement within a class statement */
|
315
|
+
PM_CONTEXT_CLASS_ENSURE,
|
316
|
+
|
317
|
+
/** a rescue else statement within a class statement */
|
318
|
+
PM_CONTEXT_CLASS_ELSE,
|
319
|
+
|
320
|
+
/** a rescue statement within a class statement */
|
321
|
+
PM_CONTEXT_CLASS_RESCUE,
|
322
|
+
|
283
323
|
/** a method definition */
|
284
324
|
PM_CONTEXT_DEF,
|
285
325
|
|
326
|
+
/** an ensure statement within a method definition */
|
327
|
+
PM_CONTEXT_DEF_ENSURE,
|
328
|
+
|
329
|
+
/** a rescue else statement within a method definition */
|
330
|
+
PM_CONTEXT_DEF_ELSE,
|
331
|
+
|
332
|
+
/** a rescue statement within a method definition */
|
333
|
+
PM_CONTEXT_DEF_RESCUE,
|
334
|
+
|
286
335
|
/** a method definition's parameters */
|
287
336
|
PM_CONTEXT_DEF_PARAMS,
|
288
337
|
|
338
|
+
/** a defined? expression */
|
339
|
+
PM_CONTEXT_DEFINED,
|
340
|
+
|
289
341
|
/** a method definition's default parameter */
|
290
342
|
PM_CONTEXT_DEFAULT_PARAMS,
|
291
343
|
|
@@ -298,12 +350,6 @@ typedef enum {
|
|
298
350
|
/** an interpolated expression */
|
299
351
|
PM_CONTEXT_EMBEXPR,
|
300
352
|
|
301
|
-
/** an ensure statement */
|
302
|
-
PM_CONTEXT_ENSURE,
|
303
|
-
|
304
|
-
/** an ensure statement within a method definition */
|
305
|
-
PM_CONTEXT_ENSURE_DEF,
|
306
|
-
|
307
353
|
/** a for loop */
|
308
354
|
PM_CONTEXT_FOR,
|
309
355
|
|
@@ -319,12 +365,36 @@ typedef enum {
|
|
319
365
|
/** a lambda expression with do..end */
|
320
366
|
PM_CONTEXT_LAMBDA_DO_END,
|
321
367
|
|
368
|
+
/** an ensure statement within a lambda expression */
|
369
|
+
PM_CONTEXT_LAMBDA_ENSURE,
|
370
|
+
|
371
|
+
/** a rescue else statement within a lambda expression */
|
372
|
+
PM_CONTEXT_LAMBDA_ELSE,
|
373
|
+
|
374
|
+
/** a rescue statement within a lambda expression */
|
375
|
+
PM_CONTEXT_LAMBDA_RESCUE,
|
376
|
+
|
377
|
+
/** the predicate clause of a loop statement */
|
378
|
+
PM_CONTEXT_LOOP_PREDICATE,
|
379
|
+
|
322
380
|
/** the top level context */
|
323
381
|
PM_CONTEXT_MAIN,
|
324
382
|
|
325
383
|
/** a module declaration */
|
326
384
|
PM_CONTEXT_MODULE,
|
327
385
|
|
386
|
+
/** an ensure statement within a module statement */
|
387
|
+
PM_CONTEXT_MODULE_ENSURE,
|
388
|
+
|
389
|
+
/** a rescue else statement within a module statement */
|
390
|
+
PM_CONTEXT_MODULE_ELSE,
|
391
|
+
|
392
|
+
/** a rescue statement within a module statement */
|
393
|
+
PM_CONTEXT_MODULE_RESCUE,
|
394
|
+
|
395
|
+
/** a multiple target expression */
|
396
|
+
PM_CONTEXT_MULTI_TARGET,
|
397
|
+
|
328
398
|
/** a parenthesized expression */
|
329
399
|
PM_CONTEXT_PARENS,
|
330
400
|
|
@@ -337,20 +407,23 @@ typedef enum {
|
|
337
407
|
/** a BEGIN block */
|
338
408
|
PM_CONTEXT_PREEXE,
|
339
409
|
|
340
|
-
/** a rescue
|
341
|
-
|
410
|
+
/** a modifier rescue clause */
|
411
|
+
PM_CONTEXT_RESCUE_MODIFIER,
|
342
412
|
|
343
|
-
/** a
|
344
|
-
|
413
|
+
/** a singleton class definition */
|
414
|
+
PM_CONTEXT_SCLASS,
|
345
415
|
|
346
|
-
/**
|
347
|
-
|
416
|
+
/** an ensure statement with a singleton class */
|
417
|
+
PM_CONTEXT_SCLASS_ENSURE,
|
348
418
|
|
349
|
-
/** a rescue statement
|
350
|
-
|
419
|
+
/** a rescue else statement with a singleton class */
|
420
|
+
PM_CONTEXT_SCLASS_ELSE,
|
351
421
|
|
352
|
-
/** a singleton class
|
353
|
-
|
422
|
+
/** a rescue statement with a singleton class */
|
423
|
+
PM_CONTEXT_SCLASS_RESCUE,
|
424
|
+
|
425
|
+
/** a ternary expression */
|
426
|
+
PM_CONTEXT_TERNARY,
|
354
427
|
|
355
428
|
/** an unless statement */
|
356
429
|
PM_CONTEXT_UNLESS,
|
@@ -445,56 +518,118 @@ typedef struct {
|
|
445
518
|
void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
|
446
519
|
} pm_lex_callback_t;
|
447
520
|
|
521
|
+
/** The type of shareable constant value that can be set. */
|
522
|
+
typedef uint8_t pm_shareable_constant_value_t;
|
523
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
|
524
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
|
525
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
|
526
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
|
527
|
+
|
528
|
+
/**
|
529
|
+
* This tracks an individual local variable in a certain lexical context, as
|
530
|
+
* well as the number of times is it read.
|
531
|
+
*/
|
532
|
+
typedef struct {
|
533
|
+
/** The name of the local variable. */
|
534
|
+
pm_constant_id_t name;
|
535
|
+
|
536
|
+
/** The location of the local variable in the source. */
|
537
|
+
pm_location_t location;
|
538
|
+
|
539
|
+
/** The index of the local variable in the local table. */
|
540
|
+
uint32_t index;
|
541
|
+
|
542
|
+
/** The number of times the local variable is read. */
|
543
|
+
uint32_t reads;
|
544
|
+
|
545
|
+
/** The hash of the local variable. */
|
546
|
+
uint32_t hash;
|
547
|
+
} pm_local_t;
|
548
|
+
|
549
|
+
/**
|
550
|
+
* This is a set of local variables in a certain lexical context (method, class,
|
551
|
+
* module, etc.). We need to track how many times these variables are read in
|
552
|
+
* order to warn if they only get written.
|
553
|
+
*/
|
554
|
+
typedef struct pm_locals {
|
555
|
+
/** The number of local variables in the set. */
|
556
|
+
uint32_t size;
|
557
|
+
|
558
|
+
/** The capacity of the local variables set. */
|
559
|
+
uint32_t capacity;
|
560
|
+
|
561
|
+
/** The nullable allocated memory for the local variables in the set. */
|
562
|
+
pm_local_t *locals;
|
563
|
+
} pm_locals_t;
|
564
|
+
|
565
|
+
/** The flags about scope parameters that can be set. */
|
566
|
+
typedef uint8_t pm_scope_parameters_t;
|
567
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
|
568
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
|
569
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
|
570
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
|
571
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
|
572
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
|
573
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
|
574
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
|
575
|
+
|
448
576
|
/**
|
449
577
|
* This struct represents a node in a linked list of scopes. Some scopes can see
|
450
578
|
* into their parent scopes, while others cannot.
|
451
579
|
*/
|
452
580
|
typedef struct pm_scope {
|
453
|
-
/** The IDs of the locals in the given scope. */
|
454
|
-
pm_constant_id_list_t locals;
|
455
|
-
|
456
581
|
/** A pointer to the previous scope in the linked list. */
|
457
582
|
struct pm_scope *previous;
|
458
583
|
|
459
|
-
/**
|
460
|
-
|
461
|
-
* If closed is true, then the scope cannot see into its parent.
|
462
|
-
*/
|
463
|
-
bool closed;
|
584
|
+
/** The IDs of the locals in the given scope. */
|
585
|
+
pm_locals_t locals;
|
464
586
|
|
465
587
|
/**
|
466
|
-
*
|
467
|
-
*
|
468
|
-
*
|
588
|
+
* This is a list of the implicit parameters contained within the block.
|
589
|
+
* These will be processed after the block is parsed to determine the kind
|
590
|
+
* of parameters node that should be used and to check if any errors need to
|
591
|
+
* be added.
|
469
592
|
*/
|
470
|
-
|
593
|
+
pm_node_list_t implicit_parameters;
|
471
594
|
|
472
595
|
/**
|
473
|
-
*
|
474
|
-
*
|
596
|
+
* This is a bitfield that indicates the parameters that are being used in
|
597
|
+
* this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
|
598
|
+
* There are three different kinds of parameters that can be used in a
|
599
|
+
* scope:
|
600
|
+
*
|
601
|
+
* - Ordinary parameters (e.g., def foo(bar); end)
|
602
|
+
* - Numbered parameters (e.g., def foo; _1; end)
|
603
|
+
* - The it parameter (e.g., def foo; it; end)
|
475
604
|
*
|
476
|
-
*
|
477
|
-
*
|
478
|
-
*
|
479
|
-
*
|
480
|
-
*
|
605
|
+
* If ordinary parameters are being used, then certain parameters can be
|
606
|
+
* forwarded to another method/structure. Those are indicated by four
|
607
|
+
* additional bits in the params field. For example, some combinations of:
|
608
|
+
*
|
609
|
+
* - def foo(*); end
|
610
|
+
* - def foo(**); end
|
611
|
+
* - def foo(&); end
|
612
|
+
* - def foo(...); end
|
481
613
|
*/
|
614
|
+
pm_scope_parameters_t parameters;
|
482
615
|
|
483
|
-
|
616
|
+
/**
|
617
|
+
* The current state of constant shareability for this scope. This is
|
618
|
+
* changed by magic shareable_constant_value comments.
|
619
|
+
*/
|
620
|
+
pm_shareable_constant_value_t shareable_constant;
|
484
621
|
|
485
622
|
/**
|
486
|
-
*
|
487
|
-
*
|
488
|
-
* numbered parameters, and to pass information to consumers of the AST
|
489
|
-
* about how many numbered parameters exist.
|
623
|
+
* A boolean indicating whether or not this scope can see into its parent.
|
624
|
+
* If closed is true, then the scope cannot see into its parent.
|
490
625
|
*/
|
491
|
-
|
626
|
+
bool closed;
|
492
627
|
} pm_scope_t;
|
493
628
|
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
629
|
+
/**
|
630
|
+
* A struct that represents a stack of boolean values.
|
631
|
+
*/
|
632
|
+
typedef uint32_t pm_state_stack_t;
|
498
633
|
|
499
634
|
/**
|
500
635
|
* This struct represents the overall parser. It contains a reference to the
|
@@ -503,6 +638,13 @@ static const uint8_t PM_FORWARDING_ALL = 0x8;
|
|
503
638
|
* it's considering.
|
504
639
|
*/
|
505
640
|
struct pm_parser {
|
641
|
+
/**
|
642
|
+
* The next node identifier that will be assigned. This is a unique
|
643
|
+
* identifier used to track nodes such that the syntax tree can be dropped
|
644
|
+
* but the node can be found through another parse.
|
645
|
+
*/
|
646
|
+
uint32_t node_id;
|
647
|
+
|
506
648
|
/** The current state of the lexer. */
|
507
649
|
pm_lex_state_t lex_state;
|
508
650
|
|
@@ -597,6 +739,15 @@ struct pm_parser {
|
|
597
739
|
/** The current parsing context. */
|
598
740
|
pm_context_node_t *current_context;
|
599
741
|
|
742
|
+
/**
|
743
|
+
* The hash keys for the hash that is currently being parsed. This is not
|
744
|
+
* usually necessary because it can pass it down the various call chains,
|
745
|
+
* but in the event that you're parsing a hash that is being directly
|
746
|
+
* pushed into another hash with **, we need to share the hash keys so that
|
747
|
+
* we can warn for the nested hash as well.
|
748
|
+
*/
|
749
|
+
pm_static_literals_t *current_hash_keys;
|
750
|
+
|
600
751
|
/**
|
601
752
|
* The encoding functions for the current file is attached to the parser as
|
602
753
|
* it's parsing so that it can change with a magic comment.
|
@@ -688,18 +839,62 @@ struct pm_parser {
|
|
688
839
|
*/
|
689
840
|
const pm_encoding_t *explicit_encoding;
|
690
841
|
|
691
|
-
/**
|
692
|
-
|
842
|
+
/**
|
843
|
+
* When parsing block exits (e.g., break, next, redo), we need to validate
|
844
|
+
* that they are in correct contexts. For the most part we can do this by
|
845
|
+
* looking at our parent contexts. However, modifier while and until
|
846
|
+
* expressions can change that context to make block exits valid. In these
|
847
|
+
* cases, we need to keep track of the block exits and then validate them
|
848
|
+
* after the expression has been parsed.
|
849
|
+
*
|
850
|
+
* We use a pointer here because we don't want to keep a whole list attached
|
851
|
+
* since this will only be used in the context of begin/end expressions.
|
852
|
+
*/
|
853
|
+
pm_node_list_t *current_block_exits;
|
693
854
|
|
694
855
|
/** The version of prism that we should use to parse. */
|
695
856
|
pm_options_version_t version;
|
696
857
|
|
858
|
+
/** The command line flags given from the options. */
|
859
|
+
uint8_t command_line;
|
860
|
+
|
861
|
+
/**
|
862
|
+
* Whether or not we have found a frozen_string_literal magic comment with
|
863
|
+
* a true or false value.
|
864
|
+
* May be:
|
865
|
+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
|
866
|
+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
|
867
|
+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
|
868
|
+
*/
|
869
|
+
int8_t frozen_string_literal;
|
870
|
+
|
871
|
+
/**
|
872
|
+
* Whether or not we are parsing an eval string. This impacts whether or not
|
873
|
+
* we should evaluate if block exits/yields are valid.
|
874
|
+
*/
|
875
|
+
bool parsing_eval;
|
876
|
+
|
877
|
+
/**
|
878
|
+
* Whether or not we are parsing a "partial" script, which is a script that
|
879
|
+
* will be evaluated in the context of another script, so we should not
|
880
|
+
* check jumps (next/break/etc.) for validity.
|
881
|
+
*/
|
882
|
+
bool partial_script;
|
883
|
+
|
697
884
|
/** Whether or not we're at the beginning of a command. */
|
698
885
|
bool command_start;
|
699
886
|
|
700
887
|
/** Whether or not we're currently recovering from a syntax error. */
|
701
888
|
bool recovering;
|
702
889
|
|
890
|
+
/**
|
891
|
+
* This is very specialized behavior for when you want to parse in a context
|
892
|
+
* that does not respect encoding comments. Its main use case is translating
|
893
|
+
* into the whitequark/parser AST which re-encodes source files in UTF-8
|
894
|
+
* before they are parsed and ignores encoding comments.
|
895
|
+
*/
|
896
|
+
bool encoding_locked;
|
897
|
+
|
703
898
|
/**
|
704
899
|
* Whether or not the encoding has been changed by a magic comment. We use
|
705
900
|
* this to provide a fast path for the lexer instead of going through the
|
@@ -723,10 +918,16 @@ struct pm_parser {
|
|
723
918
|
bool semantic_token_seen;
|
724
919
|
|
725
920
|
/**
|
726
|
-
*
|
727
|
-
*
|
921
|
+
* True if the current regular expression being lexed contains only ASCII
|
922
|
+
* characters.
|
923
|
+
*/
|
924
|
+
bool current_regular_expression_ascii_only;
|
925
|
+
|
926
|
+
/**
|
927
|
+
* By default, Ruby always warns about mismatched indentation. This can be
|
928
|
+
* toggled with a magic comment.
|
728
929
|
*/
|
729
|
-
bool
|
930
|
+
bool warn_mismatched_indentation;
|
730
931
|
};
|
731
932
|
|
732
933
|
#endif
|
data/include/prism/prettyprint.h
CHANGED
@@ -8,6 +8,12 @@
|
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
10
|
|
11
|
+
#ifdef PRISM_EXCLUDE_PRETTYPRINT
|
12
|
+
|
13
|
+
void pm_prettyprint(void);
|
14
|
+
|
15
|
+
#else
|
16
|
+
|
11
17
|
#include <stdio.h>
|
12
18
|
|
13
19
|
#include "prism/ast.h"
|
@@ -24,3 +30,5 @@
|
|
24
30
|
PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node);
|
25
31
|
|
26
32
|
#endif
|
33
|
+
|
34
|
+
#endif
|
data/include/prism/regexp.h
CHANGED
@@ -10,7 +10,6 @@
|
|
10
10
|
#include "prism/parser.h"
|
11
11
|
#include "prism/encoding.h"
|
12
12
|
#include "prism/util/pm_memchr.h"
|
13
|
-
#include "prism/util/pm_string_list.h"
|
14
13
|
#include "prism/util/pm_string.h"
|
15
14
|
|
16
15
|
#include <stdbool.h>
|
@@ -18,16 +17,27 @@
|
|
18
17
|
#include <string.h>
|
19
18
|
|
20
19
|
/**
|
21
|
-
*
|
22
|
-
|
20
|
+
* This callback is called when a named capture group is found.
|
21
|
+
*/
|
22
|
+
typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
|
23
|
+
|
24
|
+
/**
|
25
|
+
* This callback is called when a parse error is found.
|
26
|
+
*/
|
27
|
+
typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Parse a regular expression.
|
23
31
|
*
|
32
|
+
* @param parser The parser that is currently being used.
|
24
33
|
* @param source The source code to parse.
|
25
34
|
* @param size The size of the source code.
|
26
|
-
* @param
|
27
|
-
* @param
|
28
|
-
* @param
|
29
|
-
* @
|
35
|
+
* @param extended_mode Whether to parse the regular expression in extended mode.
|
36
|
+
* @param name_callback The optional callback to call when a named capture group is found.
|
37
|
+
* @param name_data The optional data to pass to the name callback.
|
38
|
+
* @param error_callback The callback to call when a parse error is found.
|
39
|
+
* @param error_data The data to pass to the error callback.
|
30
40
|
*/
|
31
|
-
PRISM_EXPORTED_FUNCTION
|
41
|
+
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
|
32
42
|
|
33
43
|
#endif
|
@@ -0,0 +1,121 @@
|
|
1
|
+
/**
|
2
|
+
* @file static_literals.h
|
3
|
+
*
|
4
|
+
* A set of static literal nodes that can be checked for duplicates.
|
5
|
+
*/
|
6
|
+
#ifndef PRISM_STATIC_LITERALS_H
|
7
|
+
#define PRISM_STATIC_LITERALS_H
|
8
|
+
|
9
|
+
#include "prism/defines.h"
|
10
|
+
#include "prism/ast.h"
|
11
|
+
#include "prism/util/pm_newline_list.h"
|
12
|
+
|
13
|
+
#include <assert.h>
|
14
|
+
#include <stdbool.h>
|
15
|
+
|
16
|
+
/**
|
17
|
+
* An internal hash table for a set of nodes.
|
18
|
+
*/
|
19
|
+
typedef struct {
|
20
|
+
/** The array of nodes in the hash table. */
|
21
|
+
pm_node_t **nodes;
|
22
|
+
|
23
|
+
/** The size of the hash table. */
|
24
|
+
uint32_t size;
|
25
|
+
|
26
|
+
/** The space that has been allocated in the hash table. */
|
27
|
+
uint32_t capacity;
|
28
|
+
} pm_node_hash_t;
|
29
|
+
|
30
|
+
/**
|
31
|
+
* Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
|
32
|
+
* to alert the user of potential issues. To do this, we keep a set of the nodes
|
33
|
+
* that have been seen so far, and compare whenever we find a new node.
|
34
|
+
*
|
35
|
+
* We bucket the nodes based on their type to minimize the number of comparisons
|
36
|
+
* that need to be performed.
|
37
|
+
*/
|
38
|
+
typedef struct {
|
39
|
+
/**
|
40
|
+
* This is the set of IntegerNode and SourceLineNode instances.
|
41
|
+
*/
|
42
|
+
pm_node_hash_t integer_nodes;
|
43
|
+
|
44
|
+
/**
|
45
|
+
* This is the set of FloatNode instances.
|
46
|
+
*/
|
47
|
+
pm_node_hash_t float_nodes;
|
48
|
+
|
49
|
+
/**
|
50
|
+
* This is the set of RationalNode and ImaginaryNode instances.
|
51
|
+
*/
|
52
|
+
pm_node_hash_t number_nodes;
|
53
|
+
|
54
|
+
/**
|
55
|
+
* This is the set of StringNode and SourceFileNode instances.
|
56
|
+
*/
|
57
|
+
pm_node_hash_t string_nodes;
|
58
|
+
|
59
|
+
/**
|
60
|
+
* This is the set of RegularExpressionNode instances.
|
61
|
+
*/
|
62
|
+
pm_node_hash_t regexp_nodes;
|
63
|
+
|
64
|
+
/**
|
65
|
+
* This is the set of SymbolNode instances.
|
66
|
+
*/
|
67
|
+
pm_node_hash_t symbol_nodes;
|
68
|
+
|
69
|
+
/**
|
70
|
+
* A pointer to the last TrueNode instance that was inserted, or NULL.
|
71
|
+
*/
|
72
|
+
pm_node_t *true_node;
|
73
|
+
|
74
|
+
/**
|
75
|
+
* A pointer to the last FalseNode instance that was inserted, or NULL.
|
76
|
+
*/
|
77
|
+
pm_node_t *false_node;
|
78
|
+
|
79
|
+
/**
|
80
|
+
* A pointer to the last NilNode instance that was inserted, or NULL.
|
81
|
+
*/
|
82
|
+
pm_node_t *nil_node;
|
83
|
+
|
84
|
+
/**
|
85
|
+
* A pointer to the last SourceEncodingNode instance that was inserted, or
|
86
|
+
* NULL.
|
87
|
+
*/
|
88
|
+
pm_node_t *source_encoding_node;
|
89
|
+
} pm_static_literals_t;
|
90
|
+
|
91
|
+
/**
|
92
|
+
* Add a node to the set of static literals.
|
93
|
+
*
|
94
|
+
* @param newline_list The list of newline offsets to use to calculate lines.
|
95
|
+
* @param start_line The line number that the parser starts on.
|
96
|
+
* @param literals The set of static literals to add the node to.
|
97
|
+
* @param node The node to add to the set.
|
98
|
+
* @param replace Whether to replace the previous node if one already exists.
|
99
|
+
* @return A pointer to the node that is being overwritten, if there is one.
|
100
|
+
*/
|
101
|
+
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
|
102
|
+
|
103
|
+
/**
|
104
|
+
* Free the internal memory associated with the given static literals set.
|
105
|
+
*
|
106
|
+
* @param literals The set of static literals to free.
|
107
|
+
*/
|
108
|
+
void pm_static_literals_free(pm_static_literals_t *literals);
|
109
|
+
|
110
|
+
/**
|
111
|
+
* Create a string-based representation of the given static literal.
|
112
|
+
*
|
113
|
+
* @param buffer The buffer to write the string to.
|
114
|
+
* @param newline_list The list of newline offsets to use to calculate lines.
|
115
|
+
* @param start_line The line number that the parser starts on.
|
116
|
+
* @param encoding_name The name of the encoding of the source being parsed.
|
117
|
+
* @param node The node to create a string representation of.
|
118
|
+
*/
|
119
|
+
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
|
120
|
+
|
121
|
+
#endif
|