prism 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
data/src/diagnostic.c CHANGED
@@ -1,56 +1,55 @@
1
1
  #include "prism/diagnostic.h"
2
2
 
3
- /*
4
- ## Message composition
5
-
6
- When composing an error message, use sentence fragments.
7
-
8
- Try describing the property of the code that caused the error, rather than the rule that is being
9
- violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
- encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
- context) after a semicolon.
12
-
13
- For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
-
15
- > "Invalid control escape sequence; control cannot be repeated"
16
-
17
- In some cases, where the failure is more general or syntax expectations are violated, it may make
18
- more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
-
20
- For example:
21
-
22
- > "Expected an expression after `(`"
23
- > "Cannot parse the expression"
24
-
25
-
26
- ## Message style guide
27
-
28
- - Use articles like "a", "an", and "the" when appropriate.
29
- - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
- - Use the common name for tokens and nodes.
31
- - e.g., prefer "keyword splat" to "assoc splat"
32
- - e.g., prefer "embedded document" to "embdoc"
33
- - Capitalize the initial word of the message.
34
- - Use back ticks around token literals
35
- - e.g., "Expected a `=>` between the hash key and value"
36
- - Do not use `.` or other punctuation at the end of the message.
37
- - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
- - For tokens that can have multiple meanings, reference the token and its meaning.
39
- - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
-
41
-
42
- ## Error names (PM_ERR_*)
43
-
44
- - When appropriate, prefer node name to token name.
45
- - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
- - Prefer token name to common name.
47
- - e.g., prefer "STAR" to "ASTERISK".
48
- - Try to order the words in the name from more general to more specific,
49
- - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
- - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
- sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
- */
53
-
3
+ /**
4
+ * ## Message composition
5
+ *
6
+ * When composing an error message, use sentence fragments.
7
+ *
8
+ * Try describing the property of the code that caused the error, rather than the rule that is being
9
+ * violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
+ * encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
+ * context) after a semicolon.
12
+ *
13
+ * For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
+ *
15
+ * > "Invalid control escape sequence; control cannot be repeated"
16
+ *
17
+ * In some cases, where the failure is more general or syntax expectations are violated, it may make
18
+ * more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
+ *
20
+ * For example:
21
+ *
22
+ * > "Expected an expression after `(`"
23
+ * > "Cannot parse the expression"
24
+ *
25
+ *
26
+ * ## Message style guide
27
+ *
28
+ * - Use articles like "a", "an", and "the" when appropriate.
29
+ * - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
+ * - Use the common name for tokens and nodes.
31
+ * - e.g., prefer "keyword splat" to "assoc splat"
32
+ * - e.g., prefer "embedded document" to "embdoc"
33
+ * - Capitalize the initial word of the message.
34
+ * - Use back ticks around token literals
35
+ * - e.g., "Expected a `=>` between the hash key and value"
36
+ * - Do not use `.` or other punctuation at the end of the message.
37
+ * - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
+ * - For tokens that can have multiple meanings, reference the token and its meaning.
39
+ * - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
+ *
41
+ *
42
+ * ## Error names (PM_ERR_*)
43
+ *
44
+ * - When appropriate, prefer node name to token name.
45
+ * - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
+ * - Prefer token name to common name.
47
+ * - e.g., prefer "STAR" to "ASTERISK".
48
+ * - Try to order the words in the name from more general to more specific,
49
+ * - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
+ * - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
+ * sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
+ */
54
53
  static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
55
54
  [PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
56
55
  [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
@@ -263,7 +262,9 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
263
262
  return message;
264
263
  }
265
264
 
266
- // Append an error to the given list of diagnostic.
265
+ /**
266
+ * Append an error to the given list of diagnostic.
267
+ */
267
268
  bool
268
269
  pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
269
270
  pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
@@ -274,7 +275,9 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *
274
275
  return true;
275
276
  }
276
277
 
277
- // Deallocate the internal state of the given diagnostic list.
278
+ /**
279
+ * Deallocate the internal state of the given diagnostic list.
280
+ */
278
281
  void
279
282
  pm_diagnostic_list_free(pm_list_t *list) {
280
283
  pm_list_node_t *node, *next;
data/src/enc/pm_big5.c CHANGED
@@ -42,6 +42,7 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
42
  }
43
43
  }
44
44
 
45
+ /** Big5 encoding */
45
46
  pm_encoding_t pm_encoding_big5 = {
46
47
  .name = "big5",
47
48
  .char_width = pm_encoding_big5_char_width,
data/src/enc/pm_euc_jp.c CHANGED
@@ -48,6 +48,7 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
48
48
  }
49
49
  }
50
50
 
51
+ /** EUC-JP encoding */
51
52
  pm_encoding_t pm_encoding_euc_jp = {
52
53
  .name = "euc-jp",
53
54
  .char_width = pm_encoding_euc_jp_char_width,
data/src/enc/pm_gbk.c CHANGED
@@ -51,6 +51,7 @@ pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
51
51
  }
52
52
  }
53
53
 
54
+ /** GBK encoding */
54
55
  pm_encoding_t pm_encoding_gbk = {
55
56
  .name = "gbk",
56
57
  .char_width = pm_encoding_gbk_char_width,
@@ -46,6 +46,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
46
46
  }
47
47
  }
48
48
 
49
+ /** Shift_JIS encoding */
49
50
  pm_encoding_t pm_encoding_shift_jis = {
50
51
  .name = "shift_jis",
51
52
  .char_width = pm_encoding_shift_jis_char_width,