jruby-prism-parser 0.24.0-java → 1.4.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +269 -1
  4. data/CONTRIBUTING.md +0 -4
  5. data/Makefile +25 -18
  6. data/README.md +57 -6
  7. data/config.yml +1724 -140
  8. data/docs/build_system.md +39 -11
  9. data/docs/configuration.md +4 -0
  10. data/docs/cruby_compilation.md +1 -1
  11. data/docs/fuzzing.md +1 -1
  12. data/docs/parser_translation.md +14 -9
  13. data/docs/parsing_rules.md +4 -1
  14. data/docs/releasing.md +8 -10
  15. data/docs/relocation.md +34 -0
  16. data/docs/ripper_translation.md +72 -0
  17. data/docs/ruby_api.md +2 -1
  18. data/docs/serialization.md +29 -5
  19. data/ext/prism/api_node.c +3395 -1999
  20. data/ext/prism/api_pack.c +9 -0
  21. data/ext/prism/extconf.rb +55 -34
  22. data/ext/prism/extension.c +597 -346
  23. data/ext/prism/extension.h +6 -5
  24. data/include/prism/ast.h +2612 -455
  25. data/include/prism/defines.h +160 -2
  26. data/include/prism/diagnostic.h +188 -76
  27. data/include/prism/encoding.h +22 -4
  28. data/include/prism/node.h +89 -17
  29. data/include/prism/options.h +224 -12
  30. data/include/prism/pack.h +11 -0
  31. data/include/prism/parser.h +267 -66
  32. data/include/prism/prettyprint.h +8 -0
  33. data/include/prism/regexp.h +18 -8
  34. data/include/prism/static_literals.h +121 -0
  35. data/include/prism/util/pm_buffer.h +75 -2
  36. data/include/prism/util/pm_char.h +1 -2
  37. data/include/prism/util/pm_constant_pool.h +18 -9
  38. data/include/prism/util/pm_integer.h +126 -0
  39. data/include/prism/util/pm_list.h +1 -1
  40. data/include/prism/util/pm_newline_list.h +19 -0
  41. data/include/prism/util/pm_string.h +48 -8
  42. data/include/prism/version.h +3 -3
  43. data/include/prism.h +99 -5
  44. data/jruby-prism.jar +0 -0
  45. data/lib/prism/compiler.rb +11 -1
  46. data/lib/prism/desugar_compiler.rb +113 -74
  47. data/lib/prism/dispatcher.rb +45 -1
  48. data/lib/prism/dot_visitor.rb +201 -77
  49. data/lib/prism/dsl.rb +673 -461
  50. data/lib/prism/ffi.rb +233 -45
  51. data/lib/prism/inspect_visitor.rb +2389 -0
  52. data/lib/prism/lex_compat.rb +35 -16
  53. data/lib/prism/mutation_compiler.rb +24 -8
  54. data/lib/prism/node.rb +7731 -8460
  55. data/lib/prism/node_ext.rb +328 -32
  56. data/lib/prism/pack.rb +4 -0
  57. data/lib/prism/parse_result/comments.rb +34 -24
  58. data/lib/prism/parse_result/errors.rb +65 -0
  59. data/lib/prism/parse_result/newlines.rb +102 -12
  60. data/lib/prism/parse_result.rb +448 -44
  61. data/lib/prism/pattern.rb +28 -10
  62. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  63. data/lib/prism/polyfill/byteindex.rb +13 -0
  64. data/lib/prism/polyfill/unpack1.rb +14 -0
  65. data/lib/prism/reflection.rb +413 -0
  66. data/lib/prism/relocation.rb +504 -0
  67. data/lib/prism/serialize.rb +1940 -1198
  68. data/lib/prism/string_query.rb +30 -0
  69. data/lib/prism/translation/parser/builder.rb +61 -0
  70. data/lib/prism/translation/parser/compiler.rb +569 -195
  71. data/lib/prism/translation/parser/lexer.rb +516 -39
  72. data/lib/prism/translation/parser.rb +177 -12
  73. data/lib/prism/translation/parser33.rb +1 -1
  74. data/lib/prism/translation/parser34.rb +1 -1
  75. data/lib/prism/translation/parser35.rb +12 -0
  76. data/lib/prism/translation/ripper/sexp.rb +125 -0
  77. data/lib/prism/translation/ripper/shim.rb +5 -0
  78. data/lib/prism/translation/ripper.rb +3224 -462
  79. data/lib/prism/translation/ruby_parser.rb +194 -69
  80. data/lib/prism/translation.rb +4 -1
  81. data/lib/prism/version.rb +1 -1
  82. data/lib/prism/visitor.rb +13 -0
  83. data/lib/prism.rb +17 -27
  84. data/prism.gemspec +57 -17
  85. data/rbi/prism/compiler.rbi +12 -0
  86. data/rbi/prism/dsl.rbi +524 -0
  87. data/rbi/prism/inspect_visitor.rbi +12 -0
  88. data/rbi/prism/node.rbi +8722 -0
  89. data/rbi/prism/node_ext.rbi +107 -0
  90. data/rbi/prism/parse_result.rbi +404 -0
  91. data/rbi/prism/reflection.rbi +58 -0
  92. data/rbi/prism/string_query.rbi +12 -0
  93. data/rbi/prism/translation/parser.rbi +11 -0
  94. data/rbi/prism/translation/parser33.rbi +6 -0
  95. data/rbi/prism/translation/parser34.rbi +6 -0
  96. data/rbi/prism/translation/parser35.rbi +6 -0
  97. data/rbi/prism/translation/ripper.rbi +15 -0
  98. data/rbi/prism/visitor.rbi +473 -0
  99. data/rbi/prism.rbi +44 -7745
  100. data/sig/prism/compiler.rbs +9 -0
  101. data/sig/prism/dispatcher.rbs +16 -0
  102. data/sig/prism/dot_visitor.rbs +6 -0
  103. data/sig/prism/dsl.rbs +351 -0
  104. data/sig/prism/inspect_visitor.rbs +22 -0
  105. data/sig/prism/lex_compat.rbs +10 -0
  106. data/sig/prism/mutation_compiler.rbs +159 -0
  107. data/sig/prism/node.rbs +3614 -0
  108. data/sig/prism/node_ext.rbs +82 -0
  109. data/sig/prism/pack.rbs +43 -0
  110. data/sig/prism/parse_result.rbs +192 -0
  111. data/sig/prism/pattern.rbs +13 -0
  112. data/sig/prism/reflection.rbs +50 -0
  113. data/sig/prism/relocation.rbs +185 -0
  114. data/sig/prism/serialize.rbs +8 -0
  115. data/sig/prism/string_query.rbs +11 -0
  116. data/sig/prism/visitor.rbs +169 -0
  117. data/sig/prism.rbs +248 -4767
  118. data/src/diagnostic.c +672 -230
  119. data/src/encoding.c +211 -108
  120. data/src/node.c +7541 -1653
  121. data/src/options.c +135 -20
  122. data/src/pack.c +33 -17
  123. data/src/prettyprint.c +1543 -1485
  124. data/src/prism.c +7813 -3050
  125. data/src/regexp.c +225 -73
  126. data/src/serialize.c +101 -77
  127. data/src/static_literals.c +617 -0
  128. data/src/token_type.c +14 -13
  129. data/src/util/pm_buffer.c +187 -20
  130. data/src/util/pm_char.c +5 -5
  131. data/src/util/pm_constant_pool.c +39 -19
  132. data/src/util/pm_integer.c +670 -0
  133. data/src/util/pm_list.c +1 -1
  134. data/src/util/pm_newline_list.c +43 -5
  135. data/src/util/pm_string.c +213 -33
  136. data/src/util/pm_strncasecmp.c +13 -1
  137. data/src/util/pm_strpbrk.c +32 -6
  138. metadata +55 -19
  139. data/docs/ripper.md +0 -36
  140. data/include/prism/util/pm_state_stack.h +0 -42
  141. data/include/prism/util/pm_string_list.h +0 -44
  142. data/lib/prism/debug.rb +0 -206
  143. data/lib/prism/node_inspector.rb +0 -68
  144. data/lib/prism/translation/parser/rubocop.rb +0 -45
  145. data/rbi/prism_static.rbi +0 -207
  146. data/sig/prism_static.rbs +0 -201
  147. data/src/util/pm_state_stack.c +0 -25
  148. data/src/util/pm_string_list.c +0 -28
@@ -6,14 +6,14 @@
6
6
  #ifndef PRISM_PARSER_H
7
7
  #define PRISM_PARSER_H
8
8
 
9
- #include "prism/ast.h"
10
9
  #include "prism/defines.h"
10
+ #include "prism/ast.h"
11
11
  #include "prism/encoding.h"
12
12
  #include "prism/options.h"
13
+ #include "prism/static_literals.h"
13
14
  #include "prism/util/pm_constant_pool.h"
14
15
  #include "prism/util/pm_list.h"
15
16
  #include "prism/util/pm_newline_list.h"
16
- #include "prism/util/pm_state_stack.h"
17
17
  #include "prism/util/pm_string.h"
18
18
 
19
19
  #include <stdbool.h>
@@ -82,6 +82,23 @@ typedef enum {
82
82
  PM_HEREDOC_INDENT_TILDE,
83
83
  } pm_heredoc_indent_t;
84
84
 
85
+ /**
86
+ * All of the information necessary to store to lexing a heredoc.
87
+ */
88
+ typedef struct {
89
+ /** A pointer to the start of the heredoc identifier. */
90
+ const uint8_t *ident_start;
91
+
92
+ /** The length of the heredoc identifier. */
93
+ size_t ident_length;
94
+
95
+ /** The type of quote that the heredoc uses. */
96
+ pm_heredoc_quote_t quote;
97
+
98
+ /** The type of indentation that the heredoc uses. */
99
+ pm_heredoc_indent_t indent;
100
+ } pm_heredoc_lex_mode_t;
101
+
85
102
  /**
86
103
  * When lexing Ruby source, the lexer has a small amount of state to tell which
87
104
  * kind of token it is currently lexing. For example, when we find the start of
@@ -173,7 +190,7 @@ typedef struct pm_lex_mode {
173
190
  * This is the character set that should be used to delimit the
174
191
  * tokens within the regular expression.
175
192
  */
176
- uint8_t breakpoints[6];
193
+ uint8_t breakpoints[7];
177
194
  } regexp;
178
195
 
179
196
  struct {
@@ -206,21 +223,14 @@ typedef struct pm_lex_mode {
206
223
  * This is the character set that should be used to delimit the
207
224
  * tokens within the string.
208
225
  */
209
- uint8_t breakpoints[6];
226
+ uint8_t breakpoints[7];
210
227
  } string;
211
228
 
212
229
  struct {
213
- /** A pointer to the start of the heredoc identifier. */
214
- const uint8_t *ident_start;
215
-
216
- /** The length of the heredoc identifier. */
217
- size_t ident_length;
218
-
219
- /** The type of quote that the heredoc uses. */
220
- pm_heredoc_quote_t quote;
221
-
222
- /** The type of indentation that the heredoc uses. */
223
- pm_heredoc_indent_t indent;
230
+ /**
231
+ * All of the data necessary to lex a heredoc.
232
+ */
233
+ pm_heredoc_lex_mode_t base;
224
234
 
225
235
  /**
226
236
  * This is the pointer to the character where lexing should resume
@@ -233,7 +243,10 @@ typedef struct pm_lex_mode {
233
243
  * line so that we know how much to dedent each line in the case of
234
244
  * a tilde heredoc.
235
245
  */
236
- size_t common_whitespace;
246
+ size_t *common_whitespace;
247
+
248
+ /** True if the previous token ended with a line continuation. */
249
+ bool line_continuation;
237
250
  } heredoc;
238
251
  } as;
239
252
 
@@ -265,12 +278,30 @@ typedef enum {
265
278
  /** a begin statement */
266
279
  PM_CONTEXT_BEGIN,
267
280
 
281
+ /** an ensure statement with an explicit begin */
282
+ PM_CONTEXT_BEGIN_ENSURE,
283
+
284
+ /** a rescue else statement with an explicit begin */
285
+ PM_CONTEXT_BEGIN_ELSE,
286
+
287
+ /** a rescue statement with an explicit begin */
288
+ PM_CONTEXT_BEGIN_RESCUE,
289
+
268
290
  /** expressions in block arguments using braces */
269
291
  PM_CONTEXT_BLOCK_BRACES,
270
292
 
271
293
  /** expressions in block arguments using do..end */
272
294
  PM_CONTEXT_BLOCK_KEYWORDS,
273
295
 
296
+ /** an ensure statement within a do..end block */
297
+ PM_CONTEXT_BLOCK_ENSURE,
298
+
299
+ /** a rescue else statement within a do..end block */
300
+ PM_CONTEXT_BLOCK_ELSE,
301
+
302
+ /** a rescue statement within a do..end block */
303
+ PM_CONTEXT_BLOCK_RESCUE,
304
+
274
305
  /** a case when statements */
275
306
  PM_CONTEXT_CASE_WHEN,
276
307
 
@@ -280,12 +311,33 @@ typedef enum {
280
311
  /** a class declaration */
281
312
  PM_CONTEXT_CLASS,
282
313
 
314
+ /** an ensure statement within a class statement */
315
+ PM_CONTEXT_CLASS_ENSURE,
316
+
317
+ /** a rescue else statement within a class statement */
318
+ PM_CONTEXT_CLASS_ELSE,
319
+
320
+ /** a rescue statement within a class statement */
321
+ PM_CONTEXT_CLASS_RESCUE,
322
+
283
323
  /** a method definition */
284
324
  PM_CONTEXT_DEF,
285
325
 
326
+ /** an ensure statement within a method definition */
327
+ PM_CONTEXT_DEF_ENSURE,
328
+
329
+ /** a rescue else statement within a method definition */
330
+ PM_CONTEXT_DEF_ELSE,
331
+
332
+ /** a rescue statement within a method definition */
333
+ PM_CONTEXT_DEF_RESCUE,
334
+
286
335
  /** a method definition's parameters */
287
336
  PM_CONTEXT_DEF_PARAMS,
288
337
 
338
+ /** a defined? expression */
339
+ PM_CONTEXT_DEFINED,
340
+
289
341
  /** a method definition's default parameter */
290
342
  PM_CONTEXT_DEFAULT_PARAMS,
291
343
 
@@ -298,12 +350,6 @@ typedef enum {
298
350
  /** an interpolated expression */
299
351
  PM_CONTEXT_EMBEXPR,
300
352
 
301
- /** an ensure statement */
302
- PM_CONTEXT_ENSURE,
303
-
304
- /** an ensure statement within a method definition */
305
- PM_CONTEXT_ENSURE_DEF,
306
-
307
353
  /** a for loop */
308
354
  PM_CONTEXT_FOR,
309
355
 
@@ -319,12 +365,36 @@ typedef enum {
319
365
  /** a lambda expression with do..end */
320
366
  PM_CONTEXT_LAMBDA_DO_END,
321
367
 
368
+ /** an ensure statement within a lambda expression */
369
+ PM_CONTEXT_LAMBDA_ENSURE,
370
+
371
+ /** a rescue else statement within a lambda expression */
372
+ PM_CONTEXT_LAMBDA_ELSE,
373
+
374
+ /** a rescue statement within a lambda expression */
375
+ PM_CONTEXT_LAMBDA_RESCUE,
376
+
377
+ /** the predicate clause of a loop statement */
378
+ PM_CONTEXT_LOOP_PREDICATE,
379
+
322
380
  /** the top level context */
323
381
  PM_CONTEXT_MAIN,
324
382
 
325
383
  /** a module declaration */
326
384
  PM_CONTEXT_MODULE,
327
385
 
386
+ /** an ensure statement within a module statement */
387
+ PM_CONTEXT_MODULE_ENSURE,
388
+
389
+ /** a rescue else statement within a module statement */
390
+ PM_CONTEXT_MODULE_ELSE,
391
+
392
+ /** a rescue statement within a module statement */
393
+ PM_CONTEXT_MODULE_RESCUE,
394
+
395
+ /** a multiple target expression */
396
+ PM_CONTEXT_MULTI_TARGET,
397
+
328
398
  /** a parenthesized expression */
329
399
  PM_CONTEXT_PARENS,
330
400
 
@@ -337,20 +407,23 @@ typedef enum {
337
407
  /** a BEGIN block */
338
408
  PM_CONTEXT_PREEXE,
339
409
 
340
- /** a rescue else statement */
341
- PM_CONTEXT_RESCUE_ELSE,
410
+ /** a modifier rescue clause */
411
+ PM_CONTEXT_RESCUE_MODIFIER,
342
412
 
343
- /** a rescue else statement within a method definition */
344
- PM_CONTEXT_RESCUE_ELSE_DEF,
413
+ /** a singleton class definition */
414
+ PM_CONTEXT_SCLASS,
345
415
 
346
- /** a rescue statement */
347
- PM_CONTEXT_RESCUE,
416
+ /** an ensure statement with a singleton class */
417
+ PM_CONTEXT_SCLASS_ENSURE,
348
418
 
349
- /** a rescue statement within a method definition */
350
- PM_CONTEXT_RESCUE_DEF,
419
+ /** a rescue else statement with a singleton class */
420
+ PM_CONTEXT_SCLASS_ELSE,
351
421
 
352
- /** a singleton class definition */
353
- PM_CONTEXT_SCLASS,
422
+ /** a rescue statement with a singleton class */
423
+ PM_CONTEXT_SCLASS_RESCUE,
424
+
425
+ /** a ternary expression */
426
+ PM_CONTEXT_TERNARY,
354
427
 
355
428
  /** an unless statement */
356
429
  PM_CONTEXT_UNLESS,
@@ -445,56 +518,118 @@ typedef struct {
445
518
  void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
446
519
  } pm_lex_callback_t;
447
520
 
521
+ /** The type of shareable constant value that can be set. */
522
+ typedef uint8_t pm_shareable_constant_value_t;
523
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
524
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
525
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
526
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
527
+
528
+ /**
529
+ * This tracks an individual local variable in a certain lexical context, as
530
+ * well as the number of times is it read.
531
+ */
532
+ typedef struct {
533
+ /** The name of the local variable. */
534
+ pm_constant_id_t name;
535
+
536
+ /** The location of the local variable in the source. */
537
+ pm_location_t location;
538
+
539
+ /** The index of the local variable in the local table. */
540
+ uint32_t index;
541
+
542
+ /** The number of times the local variable is read. */
543
+ uint32_t reads;
544
+
545
+ /** The hash of the local variable. */
546
+ uint32_t hash;
547
+ } pm_local_t;
548
+
549
+ /**
550
+ * This is a set of local variables in a certain lexical context (method, class,
551
+ * module, etc.). We need to track how many times these variables are read in
552
+ * order to warn if they only get written.
553
+ */
554
+ typedef struct pm_locals {
555
+ /** The number of local variables in the set. */
556
+ uint32_t size;
557
+
558
+ /** The capacity of the local variables set. */
559
+ uint32_t capacity;
560
+
561
+ /** The nullable allocated memory for the local variables in the set. */
562
+ pm_local_t *locals;
563
+ } pm_locals_t;
564
+
565
+ /** The flags about scope parameters that can be set. */
566
+ typedef uint8_t pm_scope_parameters_t;
567
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
568
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
569
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
570
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
571
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
572
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
573
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
574
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
575
+
448
576
  /**
449
577
  * This struct represents a node in a linked list of scopes. Some scopes can see
450
578
  * into their parent scopes, while others cannot.
451
579
  */
452
580
  typedef struct pm_scope {
453
- /** The IDs of the locals in the given scope. */
454
- pm_constant_id_list_t locals;
455
-
456
581
  /** A pointer to the previous scope in the linked list. */
457
582
  struct pm_scope *previous;
458
583
 
459
- /**
460
- * A boolean indicating whether or not this scope can see into its parent.
461
- * If closed is true, then the scope cannot see into its parent.
462
- */
463
- bool closed;
584
+ /** The IDs of the locals in the given scope. */
585
+ pm_locals_t locals;
464
586
 
465
587
  /**
466
- * A boolean indicating whether or not this scope has explicit parameters.
467
- * This is necessary to determine whether or not numbered parameters are
468
- * allowed.
588
+ * This is a list of the implicit parameters contained within the block.
589
+ * These will be processed after the block is parsed to determine the kind
590
+ * of parameters node that should be used and to check if any errors need to
591
+ * be added.
469
592
  */
470
- bool explicit_params;
593
+ pm_node_list_t implicit_parameters;
471
594
 
472
595
  /**
473
- * Booleans indicating whether the parameters for this scope have declared
474
- * forwarding parameters.
596
+ * This is a bitfield that indicates the parameters that are being used in
597
+ * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
598
+ * There are three different kinds of parameters that can be used in a
599
+ * scope:
600
+ *
601
+ * - Ordinary parameters (e.g., def foo(bar); end)
602
+ * - Numbered parameters (e.g., def foo; _1; end)
603
+ * - The it parameter (e.g., def foo; it; end)
475
604
  *
476
- * For example, some combinations of:
477
- * def foo(*); end
478
- * def foo(**); end
479
- * def foo(&); end
480
- * def foo(...); end
605
+ * If ordinary parameters are being used, then certain parameters can be
606
+ * forwarded to another method/structure. Those are indicated by four
607
+ * additional bits in the params field. For example, some combinations of:
608
+ *
609
+ * - def foo(*); end
610
+ * - def foo(**); end
611
+ * - def foo(&); end
612
+ * - def foo(...); end
481
613
  */
614
+ pm_scope_parameters_t parameters;
482
615
 
483
- uint8_t forwarding_params;
616
+ /**
617
+ * The current state of constant shareability for this scope. This is
618
+ * changed by magic shareable_constant_value comments.
619
+ */
620
+ pm_shareable_constant_value_t shareable_constant;
484
621
 
485
622
  /**
486
- * An integer indicating the number of numbered parameters on this scope.
487
- * This is necessary to determine if child blocks are allowed to use
488
- * numbered parameters, and to pass information to consumers of the AST
489
- * about how many numbered parameters exist.
623
+ * A boolean indicating whether or not this scope can see into its parent.
624
+ * If closed is true, then the scope cannot see into its parent.
490
625
  */
491
- uint8_t numbered_parameters;
626
+ bool closed;
492
627
  } pm_scope_t;
493
628
 
494
- static const uint8_t PM_FORWARDING_POSITIONALS = 0x1;
495
- static const uint8_t PM_FORWARDING_KEYWORDS = 0x2;
496
- static const uint8_t PM_FORWARDING_BLOCK = 0x4;
497
- static const uint8_t PM_FORWARDING_ALL = 0x8;
629
+ /**
630
+ * A struct that represents a stack of boolean values.
631
+ */
632
+ typedef uint32_t pm_state_stack_t;
498
633
 
499
634
  /**
500
635
  * This struct represents the overall parser. It contains a reference to the
@@ -503,6 +638,13 @@ static const uint8_t PM_FORWARDING_ALL = 0x8;
503
638
  * it's considering.
504
639
  */
505
640
  struct pm_parser {
641
+ /**
642
+ * The next node identifier that will be assigned. This is a unique
643
+ * identifier used to track nodes such that the syntax tree can be dropped
644
+ * but the node can be found through another parse.
645
+ */
646
+ uint32_t node_id;
647
+
506
648
  /** The current state of the lexer. */
507
649
  pm_lex_state_t lex_state;
508
650
 
@@ -597,6 +739,15 @@ struct pm_parser {
597
739
  /** The current parsing context. */
598
740
  pm_context_node_t *current_context;
599
741
 
742
+ /**
743
+ * The hash keys for the hash that is currently being parsed. This is not
744
+ * usually necessary because it can pass it down the various call chains,
745
+ * but in the event that you're parsing a hash that is being directly
746
+ * pushed into another hash with **, we need to share the hash keys so that
747
+ * we can warn for the nested hash as well.
748
+ */
749
+ pm_static_literals_t *current_hash_keys;
750
+
600
751
  /**
601
752
  * The encoding functions for the current file is attached to the parser as
602
753
  * it's parsing so that it can change with a magic comment.
@@ -688,18 +839,62 @@ struct pm_parser {
688
839
  */
689
840
  const pm_encoding_t *explicit_encoding;
690
841
 
691
- /** The current parameter name id on parsing its default value. */
692
- pm_constant_id_t current_param_name;
842
+ /**
843
+ * When parsing block exits (e.g., break, next, redo), we need to validate
844
+ * that they are in correct contexts. For the most part we can do this by
845
+ * looking at our parent contexts. However, modifier while and until
846
+ * expressions can change that context to make block exits valid. In these
847
+ * cases, we need to keep track of the block exits and then validate them
848
+ * after the expression has been parsed.
849
+ *
850
+ * We use a pointer here because we don't want to keep a whole list attached
851
+ * since this will only be used in the context of begin/end expressions.
852
+ */
853
+ pm_node_list_t *current_block_exits;
693
854
 
694
855
  /** The version of prism that we should use to parse. */
695
856
  pm_options_version_t version;
696
857
 
858
+ /** The command line flags given from the options. */
859
+ uint8_t command_line;
860
+
861
+ /**
862
+ * Whether or not we have found a frozen_string_literal magic comment with
863
+ * a true or false value.
864
+ * May be:
865
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
866
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
867
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
868
+ */
869
+ int8_t frozen_string_literal;
870
+
871
+ /**
872
+ * Whether or not we are parsing an eval string. This impacts whether or not
873
+ * we should evaluate if block exits/yields are valid.
874
+ */
875
+ bool parsing_eval;
876
+
877
+ /**
878
+ * Whether or not we are parsing a "partial" script, which is a script that
879
+ * will be evaluated in the context of another script, so we should not
880
+ * check jumps (next/break/etc.) for validity.
881
+ */
882
+ bool partial_script;
883
+
697
884
  /** Whether or not we're at the beginning of a command. */
698
885
  bool command_start;
699
886
 
700
887
  /** Whether or not we're currently recovering from a syntax error. */
701
888
  bool recovering;
702
889
 
890
+ /**
891
+ * This is very specialized behavior for when you want to parse in a context
892
+ * that does not respect encoding comments. Its main use case is translating
893
+ * into the whitequark/parser AST which re-encodes source files in UTF-8
894
+ * before they are parsed and ignores encoding comments.
895
+ */
896
+ bool encoding_locked;
897
+
703
898
  /**
704
899
  * Whether or not the encoding has been changed by a magic comment. We use
705
900
  * this to provide a fast path for the lexer instead of going through the
@@ -723,10 +918,16 @@ struct pm_parser {
723
918
  bool semantic_token_seen;
724
919
 
725
920
  /**
726
- * Whether or not we have found a frozen_string_literal magic comment with
727
- * a true value.
921
+ * True if the current regular expression being lexed contains only ASCII
922
+ * characters.
923
+ */
924
+ bool current_regular_expression_ascii_only;
925
+
926
+ /**
927
+ * By default, Ruby always warns about mismatched indentation. This can be
928
+ * toggled with a magic comment.
728
929
  */
729
- bool frozen_string_literal;
930
+ bool warn_mismatched_indentation;
730
931
  };
731
932
 
732
933
  #endif
@@ -8,6 +8,12 @@
8
8
 
9
9
  #include "prism/defines.h"
10
10
 
11
+ #ifdef PRISM_EXCLUDE_PRETTYPRINT
12
+
13
+ void pm_prettyprint(void);
14
+
15
+ #else
16
+
11
17
  #include <stdio.h>
12
18
 
13
19
  #include "prism/ast.h"
@@ -24,3 +30,5 @@
24
30
  PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node);
25
31
 
26
32
  #endif
33
+
34
+ #endif
@@ -10,7 +10,6 @@
10
10
  #include "prism/parser.h"
11
11
  #include "prism/encoding.h"
12
12
  #include "prism/util/pm_memchr.h"
13
- #include "prism/util/pm_string_list.h"
14
13
  #include "prism/util/pm_string.h"
15
14
 
16
15
  #include <stdbool.h>
@@ -18,16 +17,27 @@
18
17
  #include <string.h>
19
18
 
20
19
  /**
21
- * Parse a regular expression and extract the names of all of the named capture
22
- * groups.
20
+ * This callback is called when a named capture group is found.
21
+ */
22
+ typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
23
+
24
+ /**
25
+ * This callback is called when a parse error is found.
26
+ */
27
+ typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
28
+
29
+ /**
30
+ * Parse a regular expression.
23
31
  *
32
+ * @param parser The parser that is currently being used.
24
33
  * @param source The source code to parse.
25
34
  * @param size The size of the source code.
26
- * @param named_captures The list to add the names of the named capture groups.
27
- * @param encoding_changed Whether or not the encoding changed from the default.
28
- * @param encoding The encoding of the source code.
29
- * @return Whether or not the parsing was successful.
35
+ * @param extended_mode Whether to parse the regular expression in extended mode.
36
+ * @param name_callback The optional callback to call when a named capture group is found.
37
+ * @param name_data The optional data to pass to the name callback.
38
+ * @param error_callback The callback to call when a parse error is found.
39
+ * @param error_data The data to pass to the error callback.
30
40
  */
31
- PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
41
+ PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
32
42
 
33
43
  #endif
@@ -0,0 +1,121 @@
1
+ /**
2
+ * @file static_literals.h
3
+ *
4
+ * A set of static literal nodes that can be checked for duplicates.
5
+ */
6
+ #ifndef PRISM_STATIC_LITERALS_H
7
+ #define PRISM_STATIC_LITERALS_H
8
+
9
+ #include "prism/defines.h"
10
+ #include "prism/ast.h"
11
+ #include "prism/util/pm_newline_list.h"
12
+
13
+ #include <assert.h>
14
+ #include <stdbool.h>
15
+
16
+ /**
17
+ * An internal hash table for a set of nodes.
18
+ */
19
+ typedef struct {
20
+ /** The array of nodes in the hash table. */
21
+ pm_node_t **nodes;
22
+
23
+ /** The size of the hash table. */
24
+ uint32_t size;
25
+
26
+ /** The space that has been allocated in the hash table. */
27
+ uint32_t capacity;
28
+ } pm_node_hash_t;
29
+
30
+ /**
31
+ * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
32
+ * to alert the user of potential issues. To do this, we keep a set of the nodes
33
+ * that have been seen so far, and compare whenever we find a new node.
34
+ *
35
+ * We bucket the nodes based on their type to minimize the number of comparisons
36
+ * that need to be performed.
37
+ */
38
+ typedef struct {
39
+ /**
40
+ * This is the set of IntegerNode and SourceLineNode instances.
41
+ */
42
+ pm_node_hash_t integer_nodes;
43
+
44
+ /**
45
+ * This is the set of FloatNode instances.
46
+ */
47
+ pm_node_hash_t float_nodes;
48
+
49
+ /**
50
+ * This is the set of RationalNode and ImaginaryNode instances.
51
+ */
52
+ pm_node_hash_t number_nodes;
53
+
54
+ /**
55
+ * This is the set of StringNode and SourceFileNode instances.
56
+ */
57
+ pm_node_hash_t string_nodes;
58
+
59
+ /**
60
+ * This is the set of RegularExpressionNode instances.
61
+ */
62
+ pm_node_hash_t regexp_nodes;
63
+
64
+ /**
65
+ * This is the set of SymbolNode instances.
66
+ */
67
+ pm_node_hash_t symbol_nodes;
68
+
69
+ /**
70
+ * A pointer to the last TrueNode instance that was inserted, or NULL.
71
+ */
72
+ pm_node_t *true_node;
73
+
74
+ /**
75
+ * A pointer to the last FalseNode instance that was inserted, or NULL.
76
+ */
77
+ pm_node_t *false_node;
78
+
79
+ /**
80
+ * A pointer to the last NilNode instance that was inserted, or NULL.
81
+ */
82
+ pm_node_t *nil_node;
83
+
84
+ /**
85
+ * A pointer to the last SourceEncodingNode instance that was inserted, or
86
+ * NULL.
87
+ */
88
+ pm_node_t *source_encoding_node;
89
+ } pm_static_literals_t;
90
+
91
+ /**
92
+ * Add a node to the set of static literals.
93
+ *
94
+ * @param newline_list The list of newline offsets to use to calculate lines.
95
+ * @param start_line The line number that the parser starts on.
96
+ * @param literals The set of static literals to add the node to.
97
+ * @param node The node to add to the set.
98
+ * @param replace Whether to replace the previous node if one already exists.
99
+ * @return A pointer to the node that is being overwritten, if there is one.
100
+ */
101
+ pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
102
+
103
+ /**
104
+ * Free the internal memory associated with the given static literals set.
105
+ *
106
+ * @param literals The set of static literals to free.
107
+ */
108
+ void pm_static_literals_free(pm_static_literals_t *literals);
109
+
110
+ /**
111
+ * Create a string-based representation of the given static literal.
112
+ *
113
+ * @param buffer The buffer to write the string to.
114
+ * @param newline_list The list of newline offsets to use to calculate lines.
115
+ * @param start_line The line number that the parser starts on.
116
+ * @param encoding_name The name of the encoding of the source being parsed.
117
+ * @param node The node to create a string representation of.
118
+ */
119
+ void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
120
+
121
+ #endif