prism 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
data/src/diagnostic.c CHANGED
@@ -1,56 +1,55 @@
1
1
  #include "prism/diagnostic.h"
2
2
 
3
- /*
4
- ## Message composition
5
-
6
- When composing an error message, use sentence fragments.
7
-
8
- Try describing the property of the code that caused the error, rather than the rule that is being
9
- violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
- encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
- context) after a semicolon.
12
-
13
- For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
-
15
- > "Invalid control escape sequence; control cannot be repeated"
16
-
17
- In some cases, where the failure is more general or syntax expectations are violated, it may make
18
- more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
-
20
- For example:
21
-
22
- > "Expected an expression after `(`"
23
- > "Cannot parse the expression"
24
-
25
-
26
- ## Message style guide
27
-
28
- - Use articles like "a", "an", and "the" when appropriate.
29
- - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
- - Use the common name for tokens and nodes.
31
- - e.g., prefer "keyword splat" to "assoc splat"
32
- - e.g., prefer "embedded document" to "embdoc"
33
- - Capitalize the initial word of the message.
34
- - Use back ticks around token literals
35
- - e.g., "Expected a `=>` between the hash key and value"
36
- - Do not use `.` or other punctuation at the end of the message.
37
- - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
- - For tokens that can have multiple meanings, reference the token and its meaning.
39
- - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
-
41
-
42
- ## Error names (PM_ERR_*)
43
-
44
- - When appropriate, prefer node name to token name.
45
- - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
- - Prefer token name to common name.
47
- - e.g., prefer "STAR" to "ASTERISK".
48
- - Try to order the words in the name from more general to more specific,
49
- - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
- - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
- sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
- */
53
-
3
+ /**
4
+ * ## Message composition
5
+ *
6
+ * When composing an error message, use sentence fragments.
7
+ *
8
+ * Try describing the property of the code that caused the error, rather than the rule that is being
9
+ * violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
+ * encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
+ * context) after a semicolon.
12
+ *
13
+ * For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
+ *
15
+ * > "Invalid control escape sequence; control cannot be repeated"
16
+ *
17
+ * In some cases, where the failure is more general or syntax expectations are violated, it may make
18
+ * more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
+ *
20
+ * For example:
21
+ *
22
+ * > "Expected an expression after `(`"
23
+ * > "Cannot parse the expression"
24
+ *
25
+ *
26
+ * ## Message style guide
27
+ *
28
+ * - Use articles like "a", "an", and "the" when appropriate.
29
+ * - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
+ * - Use the common name for tokens and nodes.
31
+ * - e.g., prefer "keyword splat" to "assoc splat"
32
+ * - e.g., prefer "embedded document" to "embdoc"
33
+ * - Capitalize the initial word of the message.
34
+ * - Use back ticks around token literals
35
+ * - e.g., "Expected a `=>` between the hash key and value"
36
+ * - Do not use `.` or other punctuation at the end of the message.
37
+ * - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
+ * - For tokens that can have multiple meanings, reference the token and its meaning.
39
+ * - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
+ *
41
+ *
42
+ * ## Error names (PM_ERR_*)
43
+ *
44
+ * - When appropriate, prefer node name to token name.
45
+ * - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
+ * - Prefer token name to common name.
47
+ * - e.g., prefer "STAR" to "ASTERISK".
48
+ * - Try to order the words in the name from more general to more specific,
49
+ * - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
+ * - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
+ * sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
+ */
54
53
  static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
55
54
  [PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
56
55
  [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
@@ -263,7 +262,9 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
263
262
  return message;
264
263
  }
265
264
 
266
- // Append an error to the given list of diagnostic.
265
+ /**
266
+ * Append an error to the given list of diagnostic.
267
+ */
267
268
  bool
268
269
  pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
269
270
  pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
@@ -274,7 +275,9 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *
274
275
  return true;
275
276
  }
276
277
 
277
- // Deallocate the internal state of the given diagnostic list.
278
+ /**
279
+ * Deallocate the internal state of the given diagnostic list.
280
+ */
278
281
  void
279
282
  pm_diagnostic_list_free(pm_list_t *list) {
280
283
  pm_list_node_t *node, *next;
data/src/enc/pm_big5.c CHANGED
@@ -42,6 +42,7 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
42
  }
43
43
  }
44
44
 
45
+ /** Big5 encoding */
45
46
  pm_encoding_t pm_encoding_big5 = {
46
47
  .name = "big5",
47
48
  .char_width = pm_encoding_big5_char_width,
data/src/enc/pm_euc_jp.c CHANGED
@@ -48,6 +48,7 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
48
48
  }
49
49
  }
50
50
 
51
+ /** EUC-JP encoding */
51
52
  pm_encoding_t pm_encoding_euc_jp = {
52
53
  .name = "euc-jp",
53
54
  .char_width = pm_encoding_euc_jp_char_width,
data/src/enc/pm_gbk.c CHANGED
@@ -51,6 +51,7 @@ pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
51
51
  }
52
52
  }
53
53
 
54
+ /** GBK encoding */
54
55
  pm_encoding_t pm_encoding_gbk = {
55
56
  .name = "gbk",
56
57
  .char_width = pm_encoding_gbk_char_width,
@@ -46,6 +46,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
46
46
  }
47
47
  }
48
48
 
49
+ /** Shift_JIS encoding */
49
50
  pm_encoding_t pm_encoding_shift_jis = {
50
51
  .name = "shift_jis",
51
52
  .char_width = pm_encoding_shift_jis_char_width,