debase-ruby_core_source 3.3.0 → 3.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/Rakefile +2 -1
  4. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/ast.h +4612 -0
  5. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/defines.h +94 -0
  6. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/diagnostic.h +297 -0
  7. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/encoding.h +248 -0
  8. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/extension.h +18 -0
  9. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/node.h +57 -0
  10. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/options.h +204 -0
  11. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/pack.h +152 -0
  12. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/parser.h +716 -0
  13. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/prettyprint.h +26 -0
  14. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/prism.h +272 -0
  15. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/regexp.h +33 -0
  16. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_buffer.h +146 -0
  17. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_char.h +205 -0
  18. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_constant_pool.h +191 -0
  19. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_list.h +97 -0
  20. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_memchr.h +29 -0
  21. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_newline_list.h +104 -0
  22. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_state_stack.h +42 -0
  23. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_string.h +150 -0
  24. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_string_list.h +44 -0
  25. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_strncasecmp.h +32 -0
  26. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/util/pm_strpbrk.h +43 -0
  27. data/lib/debase/ruby_core_source/ruby-3.3.0-p0/prism/version.h +29 -0
  28. data/lib/debase/ruby_core_source/version.rb +1 -1
  29. metadata +30 -6
@@ -0,0 +1,716 @@
1
+ /**
2
+ * @file parser.h
3
+ *
4
+ * The parser used to parse Ruby source.
5
+ */
6
+ #ifndef PRISM_PARSER_H
7
+ #define PRISM_PARSER_H
8
+
9
+ #include "prism/ast.h"
10
+ #include "prism/defines.h"
11
+ #include "prism/encoding.h"
12
+ #include "prism/util/pm_constant_pool.h"
13
+ #include "prism/util/pm_list.h"
14
+ #include "prism/util/pm_newline_list.h"
15
+ #include "prism/util/pm_state_stack.h"
16
+ #include "prism/util/pm_string.h"
17
+
18
+ #include <stdbool.h>
19
+
20
+ // TODO: remove this by renaming the original flag
21
+ /**
22
+ * Temporary alias for the PM_NODE_FLAG_STATIC_KEYS flag.
23
+ */
24
+ #define PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS PM_KEYWORD_HASH_NODE_FLAGS_STATIC_KEYS
25
+
26
+ /**
27
+ * This enum provides various bits that represent different kinds of states that
28
+ * the lexer can track. This is used to determine which kind of token to return
29
+ * based on the context of the parser.
30
+ */
31
+ typedef enum {
32
+ PM_LEX_STATE_BIT_BEG,
33
+ PM_LEX_STATE_BIT_END,
34
+ PM_LEX_STATE_BIT_ENDARG,
35
+ PM_LEX_STATE_BIT_ENDFN,
36
+ PM_LEX_STATE_BIT_ARG,
37
+ PM_LEX_STATE_BIT_CMDARG,
38
+ PM_LEX_STATE_BIT_MID,
39
+ PM_LEX_STATE_BIT_FNAME,
40
+ PM_LEX_STATE_BIT_DOT,
41
+ PM_LEX_STATE_BIT_CLASS,
42
+ PM_LEX_STATE_BIT_LABEL,
43
+ PM_LEX_STATE_BIT_LABELED,
44
+ PM_LEX_STATE_BIT_FITEM
45
+ } pm_lex_state_bit_t;
46
+
47
+ /**
48
+ * This enum combines the various bits from the above enum into individual
49
+ * values that represent the various states of the lexer.
50
+ */
51
+ typedef enum {
52
+ PM_LEX_STATE_NONE = 0,
53
+ PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
54
+ PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
55
+ PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
56
+ PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
57
+ PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
58
+ PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
59
+ PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
60
+ PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
61
+ PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
62
+ PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
63
+ PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
64
+ PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
65
+ PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
66
+ PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
67
+ PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
68
+ PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
69
+ } pm_lex_state_t;
70
+
71
+ /**
72
+ * The type of quote that a heredoc uses.
73
+ */
74
+ typedef enum {
75
+ PM_HEREDOC_QUOTE_NONE,
76
+ PM_HEREDOC_QUOTE_SINGLE = '\'',
77
+ PM_HEREDOC_QUOTE_DOUBLE = '"',
78
+ PM_HEREDOC_QUOTE_BACKTICK = '`',
79
+ } pm_heredoc_quote_t;
80
+
81
+ /**
82
+ * The type of indentation that a heredoc uses.
83
+ */
84
+ typedef enum {
85
+ PM_HEREDOC_INDENT_NONE,
86
+ PM_HEREDOC_INDENT_DASH,
87
+ PM_HEREDOC_INDENT_TILDE,
88
+ } pm_heredoc_indent_t;
89
+
90
+ /**
91
+ * When lexing Ruby source, the lexer has a small amount of state to tell which
92
+ * kind of token it is currently lexing. For example, when we find the start of
93
+ * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
94
+ * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
95
+ * are found as part of a string.
96
+ */
97
+ typedef struct pm_lex_mode {
98
+ /** The type of this lex mode. */
99
+ enum {
100
+ /** This state is used when any given token is being lexed. */
101
+ PM_LEX_DEFAULT,
102
+
103
+ /**
104
+ * This state is used when we're lexing as normal but inside an embedded
105
+ * expression of a string.
106
+ */
107
+ PM_LEX_EMBEXPR,
108
+
109
+ /**
110
+ * This state is used when we're lexing a variable that is embedded
111
+ * directly inside of a string with the # shorthand.
112
+ */
113
+ PM_LEX_EMBVAR,
114
+
115
+ /** This state is used when you are inside the content of a heredoc. */
116
+ PM_LEX_HEREDOC,
117
+
118
+ /**
119
+ * This state is used when we are lexing a list of tokens, as in a %w
120
+ * word list literal or a %i symbol list literal.
121
+ */
122
+ PM_LEX_LIST,
123
+
124
+ /**
125
+ * This state is used when a regular expression has been begun and we
126
+ * are looking for the terminator.
127
+ */
128
+ PM_LEX_REGEXP,
129
+
130
+ /**
131
+ * This state is used when we are lexing a string or a string-like
132
+ * token, as in string content with either quote or an xstring.
133
+ */
134
+ PM_LEX_STRING
135
+ } mode;
136
+
137
+ /** The data associated with this type of lex mode. */
138
+ union {
139
+ struct {
140
+ /** This keeps track of the nesting level of the list. */
141
+ size_t nesting;
142
+
143
+ /** Whether or not interpolation is allowed in this list. */
144
+ bool interpolation;
145
+
146
+ /**
147
+ * When lexing a list, it takes into account balancing the
148
+ * terminator if the terminator is one of (), [], {}, or <>.
149
+ */
150
+ uint8_t incrementor;
151
+
152
+ /** This is the terminator of the list literal. */
153
+ uint8_t terminator;
154
+
155
+ /**
156
+ * This is the character set that should be used to delimit the
157
+ * tokens within the list.
158
+ */
159
+ uint8_t breakpoints[11];
160
+ } list;
161
+
162
+ struct {
163
+ /**
164
+ * This keeps track of the nesting level of the regular expression.
165
+ */
166
+ size_t nesting;
167
+
168
+ /**
169
+ * When lexing a regular expression, it takes into account balancing
170
+ * the terminator if the terminator is one of (), [], {}, or <>.
171
+ */
172
+ uint8_t incrementor;
173
+
174
+ /** This is the terminator of the regular expression. */
175
+ uint8_t terminator;
176
+
177
+ /**
178
+ * This is the character set that should be used to delimit the
179
+ * tokens within the regular expression.
180
+ */
181
+ uint8_t breakpoints[6];
182
+ } regexp;
183
+
184
+ struct {
185
+ /** This keeps track of the nesting level of the string. */
186
+ size_t nesting;
187
+
188
+ /** Whether or not interpolation is allowed in this string. */
189
+ bool interpolation;
190
+
191
+ /**
192
+ * Whether or not at the end of the string we should allow a :,
193
+ * which would indicate this was a dynamic symbol instead of a
194
+ * string.
195
+ */
196
+ bool label_allowed;
197
+
198
+ /**
199
+ * When lexing a string, it takes into account balancing the
200
+ * terminator if the terminator is one of (), [], {}, or <>.
201
+ */
202
+ uint8_t incrementor;
203
+
204
+ /**
205
+ * This is the terminator of the string. It is typically either a
206
+ * single or double quote.
207
+ */
208
+ uint8_t terminator;
209
+
210
+ /**
211
+ * This is the character set that should be used to delimit the
212
+ * tokens within the string.
213
+ */
214
+ uint8_t breakpoints[6];
215
+ } string;
216
+
217
+ struct {
218
+ /** A pointer to the start of the heredoc identifier. */
219
+ const uint8_t *ident_start;
220
+
221
+ /** The length of the heredoc identifier. */
222
+ size_t ident_length;
223
+
224
+ /** The type of quote that the heredoc uses. */
225
+ pm_heredoc_quote_t quote;
226
+
227
+ /** The type of indentation that the heredoc uses. */
228
+ pm_heredoc_indent_t indent;
229
+
230
+ /**
231
+ * This is the pointer to the character where lexing should resume
232
+ * once the heredoc has been completely processed.
233
+ */
234
+ const uint8_t *next_start;
235
+
236
+ /**
237
+ * This is used to track the amount of common whitespace on each
238
+ * line so that we know how much to dedent each line in the case of
239
+ * a tilde heredoc.
240
+ */
241
+ size_t common_whitespace;
242
+ } heredoc;
243
+ } as;
244
+
245
+ /** The previous lex state so that it knows how to pop. */
246
+ struct pm_lex_mode *prev;
247
+ } pm_lex_mode_t;
248
+
249
+ /**
250
+ * We pre-allocate a certain number of lex states in order to avoid having to
251
+ * call malloc too many times while parsing. You really shouldn't need more than
252
+ * this because you only really nest deeply when doing string interpolation.
253
+ */
254
+ #define PM_LEX_STACK_SIZE 4
255
+
256
+ /**
257
+ * The parser used to parse Ruby source.
258
+ */
259
+ typedef struct pm_parser pm_parser_t;
260
+
261
+ /**
262
+ * While parsing, we keep track of a stack of contexts. This is helpful for
263
+ * error recovery so that we can pop back to a previous context when we hit a
264
+ * token that is understood by a parent context but not by the current context.
265
+ */
266
+ typedef enum {
267
+ /** a begin statement */
268
+ PM_CONTEXT_BEGIN,
269
+
270
+ /** expressions in block arguments using braces */
271
+ PM_CONTEXT_BLOCK_BRACES,
272
+
273
+ /** expressions in block arguments using do..end */
274
+ PM_CONTEXT_BLOCK_KEYWORDS,
275
+
276
+ /** a case when statements */
277
+ PM_CONTEXT_CASE_WHEN,
278
+
279
+ /** a case in statements */
280
+ PM_CONTEXT_CASE_IN,
281
+
282
+ /** a class declaration */
283
+ PM_CONTEXT_CLASS,
284
+
285
+ /** a method definition */
286
+ PM_CONTEXT_DEF,
287
+
288
+ /** a method definition's parameters */
289
+ PM_CONTEXT_DEF_PARAMS,
290
+
291
+ /** a method definition's default parameter */
292
+ PM_CONTEXT_DEFAULT_PARAMS,
293
+
294
+ /** an else clause */
295
+ PM_CONTEXT_ELSE,
296
+
297
+ /** an elsif clause */
298
+ PM_CONTEXT_ELSIF,
299
+
300
+ /** an interpolated expression */
301
+ PM_CONTEXT_EMBEXPR,
302
+
303
+ /** an ensure statement */
304
+ PM_CONTEXT_ENSURE,
305
+
306
+ /** an ensure statement within a method definition */
307
+ PM_CONTEXT_ENSURE_DEF,
308
+
309
+ /** a for loop */
310
+ PM_CONTEXT_FOR,
311
+
312
+ /** a for loop's index */
313
+ PM_CONTEXT_FOR_INDEX,
314
+
315
+ /** an if statement */
316
+ PM_CONTEXT_IF,
317
+
318
+ /** a lambda expression with braces */
319
+ PM_CONTEXT_LAMBDA_BRACES,
320
+
321
+ /** a lambda expression with do..end */
322
+ PM_CONTEXT_LAMBDA_DO_END,
323
+
324
+ /** the top level context */
325
+ PM_CONTEXT_MAIN,
326
+
327
+ /** a module declaration */
328
+ PM_CONTEXT_MODULE,
329
+
330
+ /** a parenthesized expression */
331
+ PM_CONTEXT_PARENS,
332
+
333
+ /** an END block */
334
+ PM_CONTEXT_POSTEXE,
335
+
336
+ /** a predicate inside an if/elsif/unless statement */
337
+ PM_CONTEXT_PREDICATE,
338
+
339
+ /** a BEGIN block */
340
+ PM_CONTEXT_PREEXE,
341
+
342
+ /** a rescue else statement */
343
+ PM_CONTEXT_RESCUE_ELSE,
344
+
345
+ /** a rescue else statement within a method definition */
346
+ PM_CONTEXT_RESCUE_ELSE_DEF,
347
+
348
+ /** a rescue statement */
349
+ PM_CONTEXT_RESCUE,
350
+
351
+ /** a rescue statement within a method definition */
352
+ PM_CONTEXT_RESCUE_DEF,
353
+
354
+ /** a singleton class definition */
355
+ PM_CONTEXT_SCLASS,
356
+
357
+ /** an unless statement */
358
+ PM_CONTEXT_UNLESS,
359
+
360
+ /** an until statement */
361
+ PM_CONTEXT_UNTIL,
362
+
363
+ /** a while statement */
364
+ PM_CONTEXT_WHILE,
365
+ } pm_context_t;
366
+
367
+ /** This is a node in a linked list of contexts. */
368
+ typedef struct pm_context_node {
369
+ /** The context that this node represents. */
370
+ pm_context_t context;
371
+
372
+ /** A pointer to the previous context in the linked list. */
373
+ struct pm_context_node *prev;
374
+ } pm_context_node_t;
375
+
376
+ /** This is the type of a comment that we've found while parsing. */
377
+ typedef enum {
378
+ PM_COMMENT_INLINE,
379
+ PM_COMMENT_EMBDOC
380
+ } pm_comment_type_t;
381
+
382
+ /**
383
+ * This is a node in the linked list of comments that we've found while parsing.
384
+ *
385
+ * @extends pm_list_node_t
386
+ */
387
+ typedef struct pm_comment {
388
+ /** The embedded base node. */
389
+ pm_list_node_t node;
390
+
391
+ /** The location of the comment in the source. */
392
+ pm_location_t location;
393
+
394
+ /** The type of comment that we've found. */
395
+ pm_comment_type_t type;
396
+ } pm_comment_t;
397
+
398
+ /**
399
+ * This is a node in the linked list of magic comments that we've found while
400
+ * parsing.
401
+ *
402
+ * @extends pm_list_node_t
403
+ */
404
+ typedef struct {
405
+ /** The embedded base node. */
406
+ pm_list_node_t node;
407
+
408
+ /** A pointer to the start of the key in the source. */
409
+ const uint8_t *key_start;
410
+
411
+ /** A pointer to the start of the value in the source. */
412
+ const uint8_t *value_start;
413
+
414
+ /** The length of the key in the source. */
415
+ uint32_t key_length;
416
+
417
+ /** The length of the value in the source. */
418
+ uint32_t value_length;
419
+ } pm_magic_comment_t;
420
+
421
+ /**
422
+ * When the encoding that is being used to parse the source is changed by prism,
423
+ * we provide the ability here to call out to a user-defined function.
424
+ */
425
+ typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
426
+
427
+ /**
428
+ * When you are lexing through a file, the lexer needs all of the information
429
+ * that the parser additionally provides (for example, the local table). So if
430
+ * you want to properly lex Ruby, you need to actually lex it in the context of
431
+ * the parser. In order to provide this functionality, we optionally allow a
432
+ * struct to be attached to the parser that calls back out to a user-provided
433
+ * callback when each token is lexed.
434
+ */
435
+ typedef struct {
436
+ /**
437
+ * This opaque pointer is used to provide whatever information the user
438
+ * deemed necessary to the callback. In our case we use it to pass the array
439
+ * that the tokens get appended into.
440
+ */
441
+ void *data;
442
+
443
+ /**
444
+ * This is the callback that is called when a token is lexed. It is passed
445
+ * the opaque data pointer, the parser, and the token that was lexed.
446
+ */
447
+ void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
448
+ } pm_lex_callback_t;
449
+
450
+ /**
451
+ * This struct represents a node in a linked list of scopes. Some scopes can see
452
+ * into their parent scopes, while others cannot.
453
+ */
454
+ typedef struct pm_scope {
455
+ /** The IDs of the locals in the given scope. */
456
+ pm_constant_id_list_t locals;
457
+
458
+ /** A pointer to the previous scope in the linked list. */
459
+ struct pm_scope *previous;
460
+
461
+ /**
462
+ * A boolean indicating whether or not this scope can see into its parent.
463
+ * If closed is true, then the scope cannot see into its parent.
464
+ */
465
+ bool closed;
466
+
467
+ /**
468
+ * A boolean indicating whether or not this scope has explicit parameters.
469
+ * This is necessary to determine whether or not numbered parameters are
470
+ * allowed.
471
+ */
472
+ bool explicit_params;
473
+
474
+ /**
475
+ * An integer indicating the number of numbered parameters on this scope.
476
+ * This is necessary to determine if child blocks are allowed to use
477
+ * numbered parameters, and to pass information to consumers of the AST
478
+ * about how many numbered parameters exist.
479
+ */
480
+ uint8_t numbered_parameters;
481
+ } pm_scope_t;
482
+
483
+ /**
484
+ * This struct represents the overall parser. It contains a reference to the
485
+ * source file, as well as pointers that indicate where in the source it's
486
+ * currently parsing. It also contains the most recent and current token that
487
+ * it's considering.
488
+ */
489
+ struct pm_parser {
490
+ /** The current state of the lexer. */
491
+ pm_lex_state_t lex_state;
492
+
493
+ /** Tracks the current nesting of (), [], and {}. */
494
+ int enclosure_nesting;
495
+
496
+ /**
497
+ * Used to temporarily track the nesting of enclosures to determine if a {
498
+ * is the beginning of a lambda following the parameters of a lambda.
499
+ */
500
+ int lambda_enclosure_nesting;
501
+
502
+ /**
503
+ * Used to track the nesting of braces to ensure we get the correct value
504
+ * when we are interpolating blocks with braces.
505
+ */
506
+ int brace_nesting;
507
+
508
+ /**
509
+ * The stack used to determine if a do keyword belongs to the predicate of a
510
+ * while, until, or for loop.
511
+ */
512
+ pm_state_stack_t do_loop_stack;
513
+
514
+ /**
515
+ * The stack used to determine if a do keyword belongs to the beginning of a
516
+ * block.
517
+ */
518
+ pm_state_stack_t accepts_block_stack;
519
+
520
+ /** A stack of lex modes. */
521
+ struct {
522
+ /** The current mode of the lexer. */
523
+ pm_lex_mode_t *current;
524
+
525
+ /** The stack of lexer modes. */
526
+ pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
527
+
528
+ /** The current index into the lexer mode stack. */
529
+ size_t index;
530
+ } lex_modes;
531
+
532
+ /** The pointer to the start of the source. */
533
+ const uint8_t *start;
534
+
535
+ /** The pointer to the end of the source. */
536
+ const uint8_t *end;
537
+
538
+ /** The previous token we were considering. */
539
+ pm_token_t previous;
540
+
541
+ /** The current token we're considering. */
542
+ pm_token_t current;
543
+
544
+ /**
545
+ * This is a special field set on the parser when we need the parser to jump
546
+ * to a specific location when lexing the next token, as opposed to just
547
+ * using the end of the previous token. Normally this is NULL.
548
+ */
549
+ const uint8_t *next_start;
550
+
551
+ /**
552
+ * This field indicates the end of a heredoc whose identifier was found on
553
+ * the current line. If another heredoc is found on the same line, then this
554
+ * will be moved forward to the end of that heredoc. If no heredocs are
555
+ * found on a line then this is NULL.
556
+ */
557
+ const uint8_t *heredoc_end;
558
+
559
+ /** The list of comments that have been found while parsing. */
560
+ pm_list_t comment_list;
561
+
562
+ /** The list of magic comments that have been found while parsing. */
563
+ pm_list_t magic_comment_list;
564
+
565
+ /** The optional location of the __END__ keyword and its contents. */
566
+ pm_location_t data_loc;
567
+
568
+ /** The list of warnings that have been found while parsing. */
569
+ pm_list_t warning_list;
570
+
571
+ /** The list of errors that have been found while parsing. */
572
+ pm_list_t error_list;
573
+
574
+ /** The current local scope. */
575
+ pm_scope_t *current_scope;
576
+
577
+ /** The current parsing context. */
578
+ pm_context_node_t *current_context;
579
+
580
+ /**
581
+ * The encoding functions for the current file is attached to the parser as
582
+ * it's parsing so that it can change with a magic comment.
583
+ */
584
+ const pm_encoding_t *encoding;
585
+
586
+ /**
587
+ * When the encoding that is being used to parse the source is changed by
588
+ * prism, we provide the ability here to call out to a user-defined
589
+ * function.
590
+ */
591
+ pm_encoding_changed_callback_t encoding_changed_callback;
592
+
593
+ /**
594
+ * This pointer indicates where a comment must start if it is to be
595
+ * considered an encoding comment.
596
+ */
597
+ const uint8_t *encoding_comment_start;
598
+
599
+ /**
600
+ * This is an optional callback that can be attached to the parser that will
601
+ * be called whenever a new token is lexed by the parser.
602
+ */
603
+ pm_lex_callback_t *lex_callback;
604
+
605
+ /**
606
+ * This is the path of the file being parsed. We use the filepath when
607
+ * constructing SourceFileNodes.
608
+ */
609
+ pm_string_t filepath_string;
610
+
611
+ /**
612
+ * This constant pool keeps all of the constants defined throughout the file
613
+ * so that we can reference them later.
614
+ */
615
+ pm_constant_pool_t constant_pool;
616
+
617
+ /** This is the list of newline offsets in the source file. */
618
+ pm_newline_list_t newline_list;
619
+
620
+ /**
621
+ * We want to add a flag to integer nodes that indicates their base. We only
622
+ * want to parse these once, but we don't have space on the token itself to
623
+ * communicate this information. So we store it here and pass it through
624
+ * when we find tokens that we need it for.
625
+ */
626
+ pm_node_flags_t integer_base;
627
+
628
+ /**
629
+ * This string is used to pass information from the lexer to the parser. It
630
+ * is particularly necessary because of escape sequences.
631
+ */
632
+ pm_string_t current_string;
633
+
634
+ /**
635
+ * The line number at the start of the parse. This will be used to offset
636
+ * the line numbers of all of the locations.
637
+ */
638
+ int32_t start_line;
639
+
640
+ /**
641
+ * When a string-like expression is being lexed, any byte or escape sequence
642
+ * that resolves to a value whose top bit is set (i.e., >= 0x80) will
643
+ * explicitly set the encoding to the same encoding as the source.
644
+ * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
645
+ * resolves to a value whose top bit is set, then the encoding will be
646
+ * explicitly set to UTF-8.
647
+ *
648
+ * The _next_ time this happens, if the encoding that is about to become the
649
+ * explicitly set encoding does not match the previously set explicit
650
+ * encoding, a mixed encoding error will be emitted.
651
+ *
652
+ * When the expression is finished being lexed, the explicit encoding
653
+ * controls the encoding of the expression. For the most part this means
654
+ * that the expression will either be encoded in the source encoding or
655
+ * UTF-8. This holds for all encodings except US-ASCII. If the source is
656
+ * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
657
+ * expression will be encoded as ASCII-8BIT.
658
+ *
659
+ * Note that if the expression is a list, different elements within the same
660
+ * list can have different encodings, so this will get reset between each
661
+ * element. Furthermore all of this only applies to lists that support
662
+ * interpolation, because otherwise escapes that could change the encoding
663
+ * are ignored.
664
+ *
665
+ * At first glance, it may make more sense for this to live on the lexer
666
+ * mode, but we need it here to communicate back to the parser for character
667
+ * literals that do not push a new lexer mode.
668
+ */
669
+ const pm_encoding_t *explicit_encoding;
670
+
671
+ /** Whether or not we're at the beginning of a command. */
672
+ bool command_start;
673
+
674
+ /** Whether or not we're currently recovering from a syntax error. */
675
+ bool recovering;
676
+
677
+ /**
678
+ * Whether or not the encoding has been changed by a magic comment. We use
679
+ * this to provide a fast path for the lexer instead of going through the
680
+ * function pointer.
681
+ */
682
+ bool encoding_changed;
683
+
684
+ /**
685
+ * This flag indicates that we are currently parsing a pattern matching
686
+ * expression and impacts that calculation of newlines.
687
+ */
688
+ bool pattern_matching_newlines;
689
+
690
+ /** This flag indicates that we are currently parsing a keyword argument. */
691
+ bool in_keyword_arg;
692
+
693
+ /** The current parameter name id on parsing its default value. */
694
+ pm_constant_id_t current_param_name;
695
+
696
+ /**
697
+ * Whether or not the parser has seen a token that has semantic meaning
698
+ * (i.e., a token that is not a comment or whitespace).
699
+ */
700
+ bool semantic_token_seen;
701
+
702
+ /**
703
+ * Whether or not we have found a frozen_string_literal magic comment with
704
+ * a true value.
705
+ */
706
+ bool frozen_string_literal;
707
+
708
+ /**
709
+ * Whether or not we should emit warnings. This will be set to false if the
710
+ * consumer of the library specified it, usually because they are parsing
711
+ * when $VERBOSE is nil.
712
+ */
713
+ bool suppress_warnings;
714
+ };
715
+
716
+ #endif