prism 0.15.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -1
  3. data/Makefile +12 -0
  4. data/README.md +3 -1
  5. data/config.yml +66 -50
  6. data/docs/configuration.md +2 -0
  7. data/docs/fuzzing.md +1 -1
  8. data/docs/javascript.md +90 -0
  9. data/docs/releasing.md +27 -0
  10. data/docs/ruby_api.md +2 -0
  11. data/docs/serialization.md +28 -29
  12. data/ext/prism/api_node.c +856 -826
  13. data/ext/prism/api_pack.c +20 -9
  14. data/ext/prism/extension.c +494 -119
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +3157 -747
  17. data/include/prism/defines.h +40 -8
  18. data/include/prism/diagnostic.h +36 -3
  19. data/include/prism/enc/pm_encoding.h +119 -28
  20. data/include/prism/node.h +38 -30
  21. data/include/prism/options.h +204 -0
  22. data/include/prism/pack.h +44 -33
  23. data/include/prism/parser.h +445 -199
  24. data/include/prism/prettyprint.h +26 -0
  25. data/include/prism/regexp.h +16 -2
  26. data/include/prism/util/pm_buffer.h +102 -18
  27. data/include/prism/util/pm_char.h +162 -48
  28. data/include/prism/util/pm_constant_pool.h +128 -34
  29. data/include/prism/util/pm_list.h +68 -38
  30. data/include/prism/util/pm_memchr.h +18 -3
  31. data/include/prism/util/pm_newline_list.h +71 -28
  32. data/include/prism/util/pm_state_stack.h +25 -7
  33. data/include/prism/util/pm_string.h +115 -27
  34. data/include/prism/util/pm_string_list.h +25 -6
  35. data/include/prism/util/pm_strncasecmp.h +32 -0
  36. data/include/prism/util/pm_strpbrk.h +31 -17
  37. data/include/prism/version.h +28 -3
  38. data/include/prism.h +229 -36
  39. data/lib/prism/compiler.rb +5 -5
  40. data/lib/prism/debug.rb +43 -13
  41. data/lib/prism/desugar_compiler.rb +1 -1
  42. data/lib/prism/dispatcher.rb +27 -26
  43. data/lib/prism/dsl.rb +16 -16
  44. data/lib/prism/ffi.rb +138 -61
  45. data/lib/prism/lex_compat.rb +26 -16
  46. data/lib/prism/mutation_compiler.rb +11 -11
  47. data/lib/prism/node.rb +426 -227
  48. data/lib/prism/node_ext.rb +23 -16
  49. data/lib/prism/node_inspector.rb +1 -1
  50. data/lib/prism/pack.rb +79 -40
  51. data/lib/prism/parse_result/comments.rb +7 -2
  52. data/lib/prism/parse_result/newlines.rb +4 -0
  53. data/lib/prism/parse_result.rb +157 -21
  54. data/lib/prism/pattern.rb +14 -3
  55. data/lib/prism/ripper_compat.rb +28 -10
  56. data/lib/prism/serialize.rb +935 -307
  57. data/lib/prism/visitor.rb +9 -5
  58. data/lib/prism.rb +20 -2
  59. data/prism.gemspec +11 -2
  60. data/rbi/prism.rbi +7305 -0
  61. data/rbi/prism_static.rbi +196 -0
  62. data/sig/prism.rbs +4468 -0
  63. data/sig/prism_static.rbs +123 -0
  64. data/src/diagnostic.c +56 -53
  65. data/src/enc/pm_big5.c +1 -0
  66. data/src/enc/pm_euc_jp.c +1 -0
  67. data/src/enc/pm_gbk.c +1 -0
  68. data/src/enc/pm_shift_jis.c +1 -0
  69. data/src/enc/pm_tables.c +316 -80
  70. data/src/enc/pm_unicode.c +54 -9
  71. data/src/enc/pm_windows_31j.c +1 -0
  72. data/src/node.c +357 -345
  73. data/src/options.c +170 -0
  74. data/src/prettyprint.c +7697 -1643
  75. data/src/prism.c +1964 -1125
  76. data/src/regexp.c +153 -95
  77. data/src/serialize.c +432 -397
  78. data/src/token_type.c +3 -1
  79. data/src/util/pm_buffer.c +88 -23
  80. data/src/util/pm_char.c +103 -57
  81. data/src/util/pm_constant_pool.c +52 -22
  82. data/src/util/pm_list.c +12 -4
  83. data/src/util/pm_memchr.c +5 -3
  84. data/src/util/pm_newline_list.c +25 -63
  85. data/src/util/pm_state_stack.c +9 -3
  86. data/src/util/pm_string.c +95 -85
  87. data/src/util/pm_string_list.c +14 -15
  88. data/src/util/pm_strncasecmp.c +10 -3
  89. data/src/util/pm_strpbrk.c +25 -19
  90. metadata +12 -3
  91. data/docs/prism.png +0 -0
@@ -0,0 +1,123 @@
1
+ module Prism
2
+ class ParseResult
3
+ def value: () -> ProgramNode
4
+ def comments: () -> Array[Comment]
5
+ def errors: () -> Array[ParseError]
6
+ def warnings: () -> Array[ParseWarning]
7
+ def source: () -> Source
8
+ end
9
+
10
+ class ParseError
11
+ def message: () -> String
12
+ def location: () -> Location
13
+ end
14
+
15
+ class ParseWarning
16
+ def message: () -> String
17
+ def location: () -> Location
18
+ end
19
+
20
+ class Node
21
+ def child_nodes: () -> Array[Node?]
22
+ def location: () -> Location
23
+ def slice: () -> String
24
+ end
25
+
26
+ class Comment
27
+ def location: () -> Location
28
+ def trailing?: () -> bool
29
+ end
30
+
31
+ class InlineComment < Comment
32
+ def trailing?: () -> bool
33
+ end
34
+
35
+ class EmbDocComment < Comment
36
+ end
37
+
38
+ class DATAComment < Comment
39
+ end
40
+
41
+ class Location
42
+ def initialize: (source: Source, start_offset: Integer, length: Integer) -> void
43
+ def slice: () -> String
44
+ def comments: () -> Array[Comment]
45
+ def copy: (**untyped) -> Location
46
+ def start_offset: () -> Integer
47
+ def end_offset: () -> Integer
48
+ def start_line: () -> Integer
49
+ def end_line: () -> Integer
50
+ def start_column: () -> Integer
51
+ def end_column: () -> Integer
52
+ end
53
+
54
+ class Source
55
+ attr_reader source: String
56
+ attr_reader start_line: Integer
57
+ attr_reader offsets: Array[Integer]
58
+
59
+ @source: String
60
+ @start_line: Integer
61
+ @offsets: Array[Integer]
62
+
63
+ def initialize: (source: String, start_line: Integer, offsets: Array[Integer]) -> void
64
+ def slice: (offset: Integer, length: Integer) -> String
65
+ def line: (value: Integer) -> Integer
66
+ def line_offset: (value: Integer) -> Integer
67
+ def column: (value: Integer) -> Integer
68
+ end
69
+
70
+ class Token
71
+ attr_reader type: untyped
72
+ attr_reader value: String
73
+ attr_reader location: Location
74
+
75
+ @type: untyped
76
+ @value: String
77
+ @location: Location
78
+
79
+ def initialize: (type: untyped, value: String, location: Location) -> void
80
+ def deconstruct_keys: (keys: untyped) -> untyped
81
+ def pretty_print: (q: untyped) -> untyped
82
+ def ==: (other: untyped) -> bool
83
+ end
84
+
85
+ class NodeInspector
86
+ attr_reader prefix: String
87
+ attr_reader output: String
88
+
89
+ @prefix: String
90
+ @output: String
91
+
92
+ def initialize: (prefix: String) -> void
93
+
94
+ # Appends a line to the output with the current prefix.
95
+ def <<: (line: String) -> void
96
+
97
+ # This generates a string that is used as the header of the inspect output
98
+ # for any given node.
99
+ def header: (node: Node) -> String
100
+
101
+ # Generates a string that represents a list of nodes. It handles properly
102
+ # using the box drawing characters to make the output look nice.
103
+ def list: (prefix: String, nodes: Array[Node]) -> String
104
+
105
+ # Generates a string that represents a location field on a node.
106
+ def location: (value: Location) -> String
107
+
108
+ # Generates a string that represents a child node.
109
+ def child_node: (node: Node, append: String) -> String
110
+
111
+ # Returns a new inspector that can be used to inspect a child node.
112
+ def child_inspector: (append: String) -> NodeInspector
113
+
114
+ # Returns the output as a string.
115
+ def to_str: () -> String
116
+ end
117
+
118
+ class BasicVisitor
119
+ def visit: (node: Node?) -> void
120
+ def visit_all: (nodes: Array[Node?]) -> void
121
+ def visit_child_nodes: (node: Node) -> void
122
+ end
123
+ end
data/src/diagnostic.c CHANGED
@@ -1,56 +1,55 @@
1
1
  #include "prism/diagnostic.h"
2
2
 
3
- /*
4
- ## Message composition
5
-
6
- When composing an error message, use sentence fragments.
7
-
8
- Try describing the property of the code that caused the error, rather than the rule that is being
9
- violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
- encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
- context) after a semicolon.
12
-
13
- For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
-
15
- > "Invalid control escape sequence; control cannot be repeated"
16
-
17
- In some cases, where the failure is more general or syntax expectations are violated, it may make
18
- more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
-
20
- For example:
21
-
22
- > "Expected an expression after `(`"
23
- > "Cannot parse the expression"
24
-
25
-
26
- ## Message style guide
27
-
28
- - Use articles like "a", "an", and "the" when appropriate.
29
- - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
- - Use the common name for tokens and nodes.
31
- - e.g., prefer "keyword splat" to "assoc splat"
32
- - e.g., prefer "embedded document" to "embdoc"
33
- - Capitalize the initial word of the message.
34
- - Use back ticks around token literals
35
- - e.g., "Expected a `=>` between the hash key and value"
36
- - Do not use `.` or other punctuation at the end of the message.
37
- - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
- - For tokens that can have multiple meanings, reference the token and its meaning.
39
- - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
-
41
-
42
- ## Error names (PM_ERR_*)
43
-
44
- - When appropriate, prefer node name to token name.
45
- - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
- - Prefer token name to common name.
47
- - e.g., prefer "STAR" to "ASTERISK".
48
- - Try to order the words in the name from more general to more specific,
49
- - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
- - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
- sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
- */
53
-
3
+ /**
4
+ * ## Message composition
5
+ *
6
+ * When composing an error message, use sentence fragments.
7
+ *
8
+ * Try describing the property of the code that caused the error, rather than the rule that is being
9
+ * violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
+ * encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
+ * context) after a semicolon.
12
+ *
13
+ * For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
+ *
15
+ * > "Invalid control escape sequence; control cannot be repeated"
16
+ *
17
+ * In some cases, where the failure is more general or syntax expectations are violated, it may make
18
+ * more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
+ *
20
+ * For example:
21
+ *
22
+ * > "Expected an expression after `(`"
23
+ * > "Cannot parse the expression"
24
+ *
25
+ *
26
+ * ## Message style guide
27
+ *
28
+ * - Use articles like "a", "an", and "the" when appropriate.
29
+ * - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
+ * - Use the common name for tokens and nodes.
31
+ * - e.g., prefer "keyword splat" to "assoc splat"
32
+ * - e.g., prefer "embedded document" to "embdoc"
33
+ * - Capitalize the initial word of the message.
34
+ * - Use back ticks around token literals
35
+ * - e.g., "Expected a `=>` between the hash key and value"
36
+ * - Do not use `.` or other punctuation at the end of the message.
37
+ * - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
+ * - For tokens that can have multiple meanings, reference the token and its meaning.
39
+ * - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
+ *
41
+ *
42
+ * ## Error names (PM_ERR_*)
43
+ *
44
+ * - When appropriate, prefer node name to token name.
45
+ * - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
+ * - Prefer token name to common name.
47
+ * - e.g., prefer "STAR" to "ASTERISK".
48
+ * - Try to order the words in the name from more general to more specific,
49
+ * - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
+ * - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
+ * sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
+ */
54
53
  static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
55
54
  [PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
56
55
  [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
@@ -263,7 +262,9 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
263
262
  return message;
264
263
  }
265
264
 
266
- // Append an error to the given list of diagnostic.
265
+ /**
266
+ * Append an error to the given list of diagnostic.
267
+ */
267
268
  bool
268
269
  pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
269
270
  pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
@@ -274,7 +275,9 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *
274
275
  return true;
275
276
  }
276
277
 
277
- // Deallocate the internal state of the given diagnostic list.
278
+ /**
279
+ * Deallocate the internal state of the given diagnostic list.
280
+ */
278
281
  void
279
282
  pm_diagnostic_list_free(pm_list_t *list) {
280
283
  pm_list_node_t *node, *next;
data/src/enc/pm_big5.c CHANGED
@@ -42,6 +42,7 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
42
  }
43
43
  }
44
44
 
45
+ /** Big5 encoding */
45
46
  pm_encoding_t pm_encoding_big5 = {
46
47
  .name = "big5",
47
48
  .char_width = pm_encoding_big5_char_width,
data/src/enc/pm_euc_jp.c CHANGED
@@ -48,6 +48,7 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
48
48
  }
49
49
  }
50
50
 
51
+ /** EUC-JP encoding */
51
52
  pm_encoding_t pm_encoding_euc_jp = {
52
53
  .name = "euc-jp",
53
54
  .char_width = pm_encoding_euc_jp_char_width,
data/src/enc/pm_gbk.c CHANGED
@@ -51,6 +51,7 @@ pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
51
51
  }
52
52
  }
53
53
 
54
+ /** GBK encoding */
54
55
  pm_encoding_t pm_encoding_gbk = {
55
56
  .name = "gbk",
56
57
  .char_width = pm_encoding_gbk_char_width,
@@ -46,6 +46,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
46
46
  }
47
47
  }
48
48
 
49
+ /** Shift_JIS encoding */
49
50
  pm_encoding_t pm_encoding_shift_jis = {
50
51
  .name = "shift_jis",
51
52
  .char_width = pm_encoding_shift_jis_char_width,