prism 0.15.1 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -1
  3. data/Makefile +12 -0
  4. data/README.md +3 -1
  5. data/config.yml +66 -50
  6. data/docs/configuration.md +2 -0
  7. data/docs/fuzzing.md +1 -1
  8. data/docs/javascript.md +90 -0
  9. data/docs/releasing.md +27 -0
  10. data/docs/ruby_api.md +2 -0
  11. data/docs/serialization.md +28 -29
  12. data/ext/prism/api_node.c +856 -826
  13. data/ext/prism/api_pack.c +20 -9
  14. data/ext/prism/extension.c +494 -119
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +3157 -747
  17. data/include/prism/defines.h +40 -8
  18. data/include/prism/diagnostic.h +36 -3
  19. data/include/prism/enc/pm_encoding.h +119 -28
  20. data/include/prism/node.h +38 -30
  21. data/include/prism/options.h +204 -0
  22. data/include/prism/pack.h +44 -33
  23. data/include/prism/parser.h +445 -199
  24. data/include/prism/prettyprint.h +26 -0
  25. data/include/prism/regexp.h +16 -2
  26. data/include/prism/util/pm_buffer.h +102 -18
  27. data/include/prism/util/pm_char.h +162 -48
  28. data/include/prism/util/pm_constant_pool.h +128 -34
  29. data/include/prism/util/pm_list.h +68 -38
  30. data/include/prism/util/pm_memchr.h +18 -3
  31. data/include/prism/util/pm_newline_list.h +71 -28
  32. data/include/prism/util/pm_state_stack.h +25 -7
  33. data/include/prism/util/pm_string.h +115 -27
  34. data/include/prism/util/pm_string_list.h +25 -6
  35. data/include/prism/util/pm_strncasecmp.h +32 -0
  36. data/include/prism/util/pm_strpbrk.h +31 -17
  37. data/include/prism/version.h +28 -3
  38. data/include/prism.h +229 -36
  39. data/lib/prism/compiler.rb +5 -5
  40. data/lib/prism/debug.rb +43 -13
  41. data/lib/prism/desugar_compiler.rb +1 -1
  42. data/lib/prism/dispatcher.rb +27 -26
  43. data/lib/prism/dsl.rb +16 -16
  44. data/lib/prism/ffi.rb +138 -61
  45. data/lib/prism/lex_compat.rb +26 -16
  46. data/lib/prism/mutation_compiler.rb +11 -11
  47. data/lib/prism/node.rb +426 -227
  48. data/lib/prism/node_ext.rb +23 -16
  49. data/lib/prism/node_inspector.rb +1 -1
  50. data/lib/prism/pack.rb +79 -40
  51. data/lib/prism/parse_result/comments.rb +7 -2
  52. data/lib/prism/parse_result/newlines.rb +4 -0
  53. data/lib/prism/parse_result.rb +157 -21
  54. data/lib/prism/pattern.rb +14 -3
  55. data/lib/prism/ripper_compat.rb +28 -10
  56. data/lib/prism/serialize.rb +935 -307
  57. data/lib/prism/visitor.rb +9 -5
  58. data/lib/prism.rb +20 -2
  59. data/prism.gemspec +11 -2
  60. data/rbi/prism.rbi +7305 -0
  61. data/rbi/prism_static.rbi +196 -0
  62. data/sig/prism.rbs +4468 -0
  63. data/sig/prism_static.rbs +123 -0
  64. data/src/diagnostic.c +56 -53
  65. data/src/enc/pm_big5.c +1 -0
  66. data/src/enc/pm_euc_jp.c +1 -0
  67. data/src/enc/pm_gbk.c +1 -0
  68. data/src/enc/pm_shift_jis.c +1 -0
  69. data/src/enc/pm_tables.c +316 -80
  70. data/src/enc/pm_unicode.c +54 -9
  71. data/src/enc/pm_windows_31j.c +1 -0
  72. data/src/node.c +357 -345
  73. data/src/options.c +170 -0
  74. data/src/prettyprint.c +7697 -1643
  75. data/src/prism.c +1964 -1125
  76. data/src/regexp.c +153 -95
  77. data/src/serialize.c +432 -397
  78. data/src/token_type.c +3 -1
  79. data/src/util/pm_buffer.c +88 -23
  80. data/src/util/pm_char.c +103 -57
  81. data/src/util/pm_constant_pool.c +52 -22
  82. data/src/util/pm_list.c +12 -4
  83. data/src/util/pm_memchr.c +5 -3
  84. data/src/util/pm_newline_list.c +25 -63
  85. data/src/util/pm_state_stack.c +9 -3
  86. data/src/util/pm_string.c +95 -85
  87. data/src/util/pm_string_list.c +14 -15
  88. data/src/util/pm_strncasecmp.c +10 -3
  89. data/src/util/pm_strpbrk.c +25 -19
  90. metadata +12 -3
  91. data/docs/prism.png +0 -0
@@ -0,0 +1,123 @@
1
+ module Prism
2
+ class ParseResult
3
+ def value: () -> ProgramNode
4
+ def comments: () -> Array[Comment]
5
+ def errors: () -> Array[ParseError]
6
+ def warnings: () -> Array[ParseWarning]
7
+ def source: () -> Source
8
+ end
9
+
10
+ class ParseError
11
+ def message: () -> String
12
+ def location: () -> Location
13
+ end
14
+
15
+ class ParseWarning
16
+ def message: () -> String
17
+ def location: () -> Location
18
+ end
19
+
20
+ class Node
21
+ def child_nodes: () -> Array[Node?]
22
+ def location: () -> Location
23
+ def slice: () -> String
24
+ end
25
+
26
+ class Comment
27
+ def location: () -> Location
28
+ def trailing?: () -> bool
29
+ end
30
+
31
+ class InlineComment < Comment
32
+ def trailing?: () -> bool
33
+ end
34
+
35
+ class EmbDocComment < Comment
36
+ end
37
+
38
+ class DATAComment < Comment
39
+ end
40
+
41
+ class Location
42
+ def initialize: (source: Source, start_offset: Integer, length: Integer) -> void
43
+ def slice: () -> String
44
+ def comments: () -> Array[Comment]
45
+ def copy: (**untyped) -> Location
46
+ def start_offset: () -> Integer
47
+ def end_offset: () -> Integer
48
+ def start_line: () -> Integer
49
+ def end_line: () -> Integer
50
+ def start_column: () -> Integer
51
+ def end_column: () -> Integer
52
+ end
53
+
54
+ class Source
55
+ attr_reader source: String
56
+ attr_reader start_line: Integer
57
+ attr_reader offsets: Array[Integer]
58
+
59
+ @source: String
60
+ @start_line: Integer
61
+ @offsets: Array[Integer]
62
+
63
+ def initialize: (source: String, start_line: Integer, offsets: Array[Integer]) -> void
64
+ def slice: (offset: Integer, length: Integer) -> String
65
+ def line: (value: Integer) -> Integer
66
+ def line_offset: (value: Integer) -> Integer
67
+ def column: (value: Integer) -> Integer
68
+ end
69
+
70
+ class Token
71
+ attr_reader type: untyped
72
+ attr_reader value: String
73
+ attr_reader location: Location
74
+
75
+ @type: untyped
76
+ @value: String
77
+ @location: Location
78
+
79
+ def initialize: (type: untyped, value: String, location: Location) -> void
80
+ def deconstruct_keys: (keys: untyped) -> untyped
81
+ def pretty_print: (q: untyped) -> untyped
82
+ def ==: (other: untyped) -> bool
83
+ end
84
+
85
+ class NodeInspector
86
+ attr_reader prefix: String
87
+ attr_reader output: String
88
+
89
+ @prefix: String
90
+ @output: String
91
+
92
+ def initialize: (prefix: String) -> void
93
+
94
+ # Appends a line to the output with the current prefix.
95
+ def <<: (line: String) -> void
96
+
97
+ # This generates a string that is used as the header of the inspect output
98
+ # for any given node.
99
+ def header: (node: Node) -> String
100
+
101
+ # Generates a string that represents a list of nodes. It handles properly
102
+ # using the box drawing characters to make the output look nice.
103
+ def list: (prefix: String, nodes: Array[Node]) -> String
104
+
105
+ # Generates a string that represents a location field on a node.
106
+ def location: (value: Location) -> String
107
+
108
+ # Generates a string that represents a child node.
109
+ def child_node: (node: Node, append: String) -> String
110
+
111
+ # Returns a new inspector that can be used to inspect a child node.
112
+ def child_inspector: (append: String) -> NodeInspector
113
+
114
+ # Returns the output as a string.
115
+ def to_str: () -> String
116
+ end
117
+
118
+ class BasicVisitor
119
+ def visit: (node: Node?) -> void
120
+ def visit_all: (nodes: Array[Node?]) -> void
121
+ def visit_child_nodes: (node: Node) -> void
122
+ end
123
+ end
data/src/diagnostic.c CHANGED
@@ -1,56 +1,55 @@
1
1
  #include "prism/diagnostic.h"
2
2
 
3
- /*
4
- ## Message composition
5
-
6
- When composing an error message, use sentence fragments.
7
-
8
- Try describing the property of the code that caused the error, rather than the rule that is being
9
- violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
- encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
- context) after a semicolon.
12
-
13
- For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
-
15
- > "Invalid control escape sequence; control cannot be repeated"
16
-
17
- In some cases, where the failure is more general or syntax expectations are violated, it may make
18
- more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
-
20
- For example:
21
-
22
- > "Expected an expression after `(`"
23
- > "Cannot parse the expression"
24
-
25
-
26
- ## Message style guide
27
-
28
- - Use articles like "a", "an", and "the" when appropriate.
29
- - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
- - Use the common name for tokens and nodes.
31
- - e.g., prefer "keyword splat" to "assoc splat"
32
- - e.g., prefer "embedded document" to "embdoc"
33
- - Capitalize the initial word of the message.
34
- - Use back ticks around token literals
35
- - e.g., "Expected a `=>` between the hash key and value"
36
- - Do not use `.` or other punctuation at the end of the message.
37
- - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
- - For tokens that can have multiple meanings, reference the token and its meaning.
39
- - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
-
41
-
42
- ## Error names (PM_ERR_*)
43
-
44
- - When appropriate, prefer node name to token name.
45
- - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
- - Prefer token name to common name.
47
- - e.g., prefer "STAR" to "ASTERISK".
48
- - Try to order the words in the name from more general to more specific,
49
- - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
- - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
- sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
- */
53
-
3
+ /**
4
+ * ## Message composition
5
+ *
6
+ * When composing an error message, use sentence fragments.
7
+ *
8
+ * Try describing the property of the code that caused the error, rather than the rule that is being
9
+ * violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
+ * encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
+ * context) after a semicolon.
12
+ *
13
+ * For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
+ *
15
+ * > "Invalid control escape sequence; control cannot be repeated"
16
+ *
17
+ * In some cases, where the failure is more general or syntax expectations are violated, it may make
18
+ * more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
+ *
20
+ * For example:
21
+ *
22
+ * > "Expected an expression after `(`"
23
+ * > "Cannot parse the expression"
24
+ *
25
+ *
26
+ * ## Message style guide
27
+ *
28
+ * - Use articles like "a", "an", and "the" when appropriate.
29
+ * - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
+ * - Use the common name for tokens and nodes.
31
+ * - e.g., prefer "keyword splat" to "assoc splat"
32
+ * - e.g., prefer "embedded document" to "embdoc"
33
+ * - Capitalize the initial word of the message.
34
+ * - Use back ticks around token literals
35
+ * - e.g., "Expected a `=>` between the hash key and value"
36
+ * - Do not use `.` or other punctuation at the end of the message.
37
+ * - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
+ * - For tokens that can have multiple meanings, reference the token and its meaning.
39
+ * - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
+ *
41
+ *
42
+ * ## Error names (PM_ERR_*)
43
+ *
44
+ * - When appropriate, prefer node name to token name.
45
+ * - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
+ * - Prefer token name to common name.
47
+ * - e.g., prefer "STAR" to "ASTERISK".
48
+ * - Try to order the words in the name from more general to more specific,
49
+ * - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
+ * - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
+ * sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
+ */
54
53
  static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
55
54
  [PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
56
55
  [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
@@ -263,7 +262,9 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
263
262
  return message;
264
263
  }
265
264
 
266
- // Append an error to the given list of diagnostic.
265
+ /**
266
+ * Append an error to the given list of diagnostic.
267
+ */
267
268
  bool
268
269
  pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
269
270
  pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
@@ -274,7 +275,9 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *
274
275
  return true;
275
276
  }
276
277
 
277
- // Deallocate the internal state of the given diagnostic list.
278
+ /**
279
+ * Deallocate the internal state of the given diagnostic list.
280
+ */
278
281
  void
279
282
  pm_diagnostic_list_free(pm_list_t *list) {
280
283
  pm_list_node_t *node, *next;
data/src/enc/pm_big5.c CHANGED
@@ -42,6 +42,7 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
42
  }
43
43
  }
44
44
 
45
+ /** Big5 encoding */
45
46
  pm_encoding_t pm_encoding_big5 = {
46
47
  .name = "big5",
47
48
  .char_width = pm_encoding_big5_char_width,
data/src/enc/pm_euc_jp.c CHANGED
@@ -48,6 +48,7 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
48
48
  }
49
49
  }
50
50
 
51
+ /** EUC-JP encoding */
51
52
  pm_encoding_t pm_encoding_euc_jp = {
52
53
  .name = "euc-jp",
53
54
  .char_width = pm_encoding_euc_jp_char_width,
data/src/enc/pm_gbk.c CHANGED
@@ -51,6 +51,7 @@ pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
51
51
  }
52
52
  }
53
53
 
54
+ /** GBK encoding */
54
55
  pm_encoding_t pm_encoding_gbk = {
55
56
  .name = "gbk",
56
57
  .char_width = pm_encoding_gbk_char_width,
@@ -46,6 +46,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
46
46
  }
47
47
  }
48
48
 
49
+ /** Shift_JIS encoding */
49
50
  pm_encoding_t pm_encoding_shift_jis = {
50
51
  .name = "shift_jis",
51
52
  .char_width = pm_encoding_shift_jis_char_width,