prism 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +46 -1
  3. data/Makefile +2 -1
  4. data/README.md +1 -0
  5. data/config.yml +273 -37
  6. data/docs/parser_translation.md +8 -23
  7. data/docs/releasing.md +1 -1
  8. data/docs/ripper_translation.md +1 -1
  9. data/docs/ruby_api.md +1 -1
  10. data/ext/prism/api_node.c +1816 -1303
  11. data/ext/prism/extension.c +244 -110
  12. data/ext/prism/extension.h +4 -4
  13. data/include/prism/ast.h +291 -49
  14. data/include/prism/defines.h +4 -1
  15. data/include/prism/diagnostic.h +4 -0
  16. data/include/prism/options.h +89 -3
  17. data/include/prism/regexp.h +2 -2
  18. data/include/prism/util/pm_buffer.h +18 -0
  19. data/include/prism/util/pm_integer.h +4 -0
  20. data/include/prism/util/pm_list.h +6 -0
  21. data/include/prism/util/pm_string.h +12 -2
  22. data/include/prism/version.h +2 -2
  23. data/include/prism.h +41 -16
  24. data/lib/prism/compiler.rb +456 -151
  25. data/lib/prism/desugar_compiler.rb +1 -0
  26. data/lib/prism/dispatcher.rb +16 -0
  27. data/lib/prism/dot_visitor.rb +21 -1
  28. data/lib/prism/dsl.rb +13 -2
  29. data/lib/prism/ffi.rb +62 -34
  30. data/lib/prism/inspect_visitor.rb +5 -1
  31. data/lib/prism/lex_compat.rb +1 -0
  32. data/lib/prism/mutation_compiler.rb +3 -0
  33. data/lib/prism/node.rb +554 -345
  34. data/lib/prism/node_ext.rb +4 -1
  35. data/lib/prism/pack.rb +2 -0
  36. data/lib/prism/parse_result/comments.rb +1 -0
  37. data/lib/prism/parse_result/errors.rb +1 -0
  38. data/lib/prism/parse_result/newlines.rb +2 -1
  39. data/lib/prism/parse_result.rb +53 -0
  40. data/lib/prism/pattern.rb +1 -0
  41. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  42. data/lib/prism/polyfill/scan_byte.rb +14 -0
  43. data/lib/prism/polyfill/warn.rb +42 -0
  44. data/lib/prism/reflection.rb +5 -2
  45. data/lib/prism/relocation.rb +1 -0
  46. data/lib/prism/serialize.rb +1275 -783
  47. data/lib/prism/string_query.rb +1 -0
  48. data/lib/prism/translation/parser/builder.rb +62 -0
  49. data/lib/prism/translation/parser/compiler.rb +230 -152
  50. data/lib/prism/translation/parser/lexer.rb +446 -64
  51. data/lib/prism/translation/parser.rb +64 -4
  52. data/lib/prism/translation/parser33.rb +1 -0
  53. data/lib/prism/translation/parser34.rb +1 -0
  54. data/lib/prism/translation/parser35.rb +13 -0
  55. data/lib/prism/translation/parser_current.rb +24 -0
  56. data/lib/prism/translation/ripper/sexp.rb +1 -0
  57. data/lib/prism/translation/ripper.rb +30 -4
  58. data/lib/prism/translation/ruby_parser.rb +291 -7
  59. data/lib/prism/translation.rb +3 -0
  60. data/lib/prism/visitor.rb +457 -152
  61. data/lib/prism.rb +5 -3
  62. data/prism.gemspec +9 -1
  63. data/rbi/prism/dsl.rbi +9 -6
  64. data/rbi/prism/node.rbi +43 -16
  65. data/rbi/prism/parse_result.rbi +17 -0
  66. data/rbi/prism/translation/parser35.rbi +6 -0
  67. data/rbi/prism.rbi +39 -36
  68. data/sig/prism/dispatcher.rbs +3 -0
  69. data/sig/prism/dsl.rbs +7 -5
  70. data/sig/prism/node.rbs +461 -37
  71. data/sig/prism/node_ext.rbs +84 -17
  72. data/sig/prism/parse_result/comments.rbs +38 -0
  73. data/sig/prism/parse_result.rbs +14 -0
  74. data/sig/prism/reflection.rbs +1 -1
  75. data/sig/prism/serialize.rbs +4 -2
  76. data/sig/prism.rbs +22 -1
  77. data/src/diagnostic.c +9 -3
  78. data/src/node.c +23 -0
  79. data/src/options.c +33 -2
  80. data/src/prettyprint.c +32 -0
  81. data/src/prism.c +620 -242
  82. data/src/serialize.c +8 -0
  83. data/src/token_type.c +36 -34
  84. data/src/util/pm_buffer.c +40 -0
  85. data/src/util/pm_constant_pool.c +6 -2
  86. data/src/util/pm_strncasecmp.c +13 -1
  87. metadata +11 -7
data/src/serialize.c CHANGED
@@ -1,3 +1,5 @@
1
+ /* :markup: markdown */
2
+
1
3
  /*----------------------------------------------------------------------------*/
2
4
  /* This file is generated by the templates/template.rb script and should not */
3
5
  /* be modified manually. See */
@@ -1706,6 +1708,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1706
1708
  } else {
1707
1709
  pm_serialize_node(parser, (pm_node_t *)((pm_rescue_node_t *)node)->reference, buffer);
1708
1710
  }
1711
+ if (((pm_rescue_node_t *)node)->then_keyword_loc.start == NULL) {
1712
+ pm_buffer_append_byte(buffer, 0);
1713
+ } else {
1714
+ pm_buffer_append_byte(buffer, 1);
1715
+ pm_serialize_location(parser, &((pm_rescue_node_t *)node)->then_keyword_loc, buffer);
1716
+ }
1709
1717
  if (((pm_rescue_node_t *)node)->statements == NULL) {
1710
1718
  pm_buffer_append_byte(buffer, 0);
1711
1719
  } else {
data/src/token_type.c CHANGED
@@ -1,3 +1,5 @@
1
+ /* :markup: markdown */
2
+
1
3
  /*----------------------------------------------------------------------------*/
2
4
  /* This file is generated by the templates/template.rb script and should not */
3
5
  /* be modified manually. See */
@@ -18,10 +20,36 @@ pm_token_type_name(pm_token_type_t token_type) {
18
20
  switch (token_type) {
19
21
  case PM_TOKEN_EOF:
20
22
  return "EOF";
21
- case PM_TOKEN_MISSING:
22
- return "MISSING";
23
- case PM_TOKEN_NOT_PROVIDED:
24
- return "NOT_PROVIDED";
23
+ case PM_TOKEN_BRACE_RIGHT:
24
+ return "BRACE_RIGHT";
25
+ case PM_TOKEN_COMMA:
26
+ return "COMMA";
27
+ case PM_TOKEN_EMBEXPR_END:
28
+ return "EMBEXPR_END";
29
+ case PM_TOKEN_KEYWORD_DO:
30
+ return "KEYWORD_DO";
31
+ case PM_TOKEN_KEYWORD_ELSE:
32
+ return "KEYWORD_ELSE";
33
+ case PM_TOKEN_KEYWORD_ELSIF:
34
+ return "KEYWORD_ELSIF";
35
+ case PM_TOKEN_KEYWORD_END:
36
+ return "KEYWORD_END";
37
+ case PM_TOKEN_KEYWORD_ENSURE:
38
+ return "KEYWORD_ENSURE";
39
+ case PM_TOKEN_KEYWORD_IN:
40
+ return "KEYWORD_IN";
41
+ case PM_TOKEN_KEYWORD_RESCUE:
42
+ return "KEYWORD_RESCUE";
43
+ case PM_TOKEN_KEYWORD_THEN:
44
+ return "KEYWORD_THEN";
45
+ case PM_TOKEN_KEYWORD_WHEN:
46
+ return "KEYWORD_WHEN";
47
+ case PM_TOKEN_NEWLINE:
48
+ return "NEWLINE";
49
+ case PM_TOKEN_PARENTHESIS_RIGHT:
50
+ return "PARENTHESIS_RIGHT";
51
+ case PM_TOKEN_SEMICOLON:
52
+ return "SEMICOLON";
25
53
  case PM_TOKEN_AMPERSAND:
26
54
  return "AMPERSAND";
27
55
  case PM_TOKEN_AMPERSAND_AMPERSAND:
@@ -44,8 +72,6 @@ pm_token_type_name(pm_token_type_t token_type) {
44
72
  return "BANG_TILDE";
45
73
  case PM_TOKEN_BRACE_LEFT:
46
74
  return "BRACE_LEFT";
47
- case PM_TOKEN_BRACE_RIGHT:
48
- return "BRACE_RIGHT";
49
75
  case PM_TOKEN_BRACKET_LEFT:
50
76
  return "BRACKET_LEFT";
51
77
  case PM_TOKEN_BRACKET_LEFT_ARRAY:
@@ -68,8 +94,6 @@ pm_token_type_name(pm_token_type_t token_type) {
68
94
  return "COLON";
69
95
  case PM_TOKEN_COLON_COLON:
70
96
  return "COLON_COLON";
71
- case PM_TOKEN_COMMA:
72
- return "COMMA";
73
97
  case PM_TOKEN_COMMENT:
74
98
  return "COMMENT";
75
99
  case PM_TOKEN_CONSTANT:
@@ -88,8 +112,6 @@ pm_token_type_name(pm_token_type_t token_type) {
88
112
  return "EMBDOC_LINE";
89
113
  case PM_TOKEN_EMBEXPR_BEGIN:
90
114
  return "EMBEXPR_BEGIN";
91
- case PM_TOKEN_EMBEXPR_END:
92
- return "EMBEXPR_END";
93
115
  case PM_TOKEN_EMBVAR:
94
116
  return "EMBVAR";
95
117
  case PM_TOKEN_EQUAL:
@@ -156,20 +178,10 @@ pm_token_type_name(pm_token_type_t token_type) {
156
178
  return "KEYWORD_DEF";
157
179
  case PM_TOKEN_KEYWORD_DEFINED:
158
180
  return "KEYWORD_DEFINED";
159
- case PM_TOKEN_KEYWORD_DO:
160
- return "KEYWORD_DO";
161
181
  case PM_TOKEN_KEYWORD_DO_LOOP:
162
182
  return "KEYWORD_DO_LOOP";
163
- case PM_TOKEN_KEYWORD_ELSE:
164
- return "KEYWORD_ELSE";
165
- case PM_TOKEN_KEYWORD_ELSIF:
166
- return "KEYWORD_ELSIF";
167
- case PM_TOKEN_KEYWORD_END:
168
- return "KEYWORD_END";
169
183
  case PM_TOKEN_KEYWORD_END_UPCASE:
170
184
  return "KEYWORD_END_UPCASE";
171
- case PM_TOKEN_KEYWORD_ENSURE:
172
- return "KEYWORD_ENSURE";
173
185
  case PM_TOKEN_KEYWORD_FALSE:
174
186
  return "KEYWORD_FALSE";
175
187
  case PM_TOKEN_KEYWORD_FOR:
@@ -178,8 +190,6 @@ pm_token_type_name(pm_token_type_t token_type) {
178
190
  return "KEYWORD_IF";
179
191
  case PM_TOKEN_KEYWORD_IF_MODIFIER:
180
192
  return "KEYWORD_IF_MODIFIER";
181
- case PM_TOKEN_KEYWORD_IN:
182
- return "KEYWORD_IN";
183
193
  case PM_TOKEN_KEYWORD_MODULE:
184
194
  return "KEYWORD_MODULE";
185
195
  case PM_TOKEN_KEYWORD_NEXT:
@@ -192,8 +202,6 @@ pm_token_type_name(pm_token_type_t token_type) {
192
202
  return "KEYWORD_OR";
193
203
  case PM_TOKEN_KEYWORD_REDO:
194
204
  return "KEYWORD_REDO";
195
- case PM_TOKEN_KEYWORD_RESCUE:
196
- return "KEYWORD_RESCUE";
197
205
  case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
198
206
  return "KEYWORD_RESCUE_MODIFIER";
199
207
  case PM_TOKEN_KEYWORD_RETRY:
@@ -204,8 +212,6 @@ pm_token_type_name(pm_token_type_t token_type) {
204
212
  return "KEYWORD_SELF";
205
213
  case PM_TOKEN_KEYWORD_SUPER:
206
214
  return "KEYWORD_SUPER";
207
- case PM_TOKEN_KEYWORD_THEN:
208
- return "KEYWORD_THEN";
209
215
  case PM_TOKEN_KEYWORD_TRUE:
210
216
  return "KEYWORD_TRUE";
211
217
  case PM_TOKEN_KEYWORD_UNDEF:
@@ -218,8 +224,6 @@ pm_token_type_name(pm_token_type_t token_type) {
218
224
  return "KEYWORD_UNTIL";
219
225
  case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
220
226
  return "KEYWORD_UNTIL_MODIFIER";
221
- case PM_TOKEN_KEYWORD_WHEN:
222
- return "KEYWORD_WHEN";
223
227
  case PM_TOKEN_KEYWORD_WHILE:
224
228
  return "KEYWORD_WHILE";
225
229
  case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
@@ -256,16 +260,12 @@ pm_token_type_name(pm_token_type_t token_type) {
256
260
  return "MINUS_EQUAL";
257
261
  case PM_TOKEN_MINUS_GREATER:
258
262
  return "MINUS_GREATER";
259
- case PM_TOKEN_NEWLINE:
260
- return "NEWLINE";
261
263
  case PM_TOKEN_NUMBERED_REFERENCE:
262
264
  return "NUMBERED_REFERENCE";
263
265
  case PM_TOKEN_PARENTHESIS_LEFT:
264
266
  return "PARENTHESIS_LEFT";
265
267
  case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
266
268
  return "PARENTHESIS_LEFT_PARENTHESES";
267
- case PM_TOKEN_PARENTHESIS_RIGHT:
268
- return "PARENTHESIS_RIGHT";
269
269
  case PM_TOKEN_PERCENT:
270
270
  return "PERCENT";
271
271
  case PM_TOKEN_PERCENT_EQUAL:
@@ -298,8 +298,6 @@ pm_token_type_name(pm_token_type_t token_type) {
298
298
  return "REGEXP_BEGIN";
299
299
  case PM_TOKEN_REGEXP_END:
300
300
  return "REGEXP_END";
301
- case PM_TOKEN_SEMICOLON:
302
- return "SEMICOLON";
303
301
  case PM_TOKEN_SLASH:
304
302
  return "SLASH";
305
303
  case PM_TOKEN_SLASH_EQUAL:
@@ -344,6 +342,10 @@ pm_token_type_name(pm_token_type_t token_type) {
344
342
  return "WORDS_SEP";
345
343
  case PM_TOKEN___END__:
346
344
  return "__END__";
345
+ case PM_TOKEN_MISSING:
346
+ return "MISSING";
347
+ case PM_TOKEN_NOT_PROVIDED:
348
+ return "NOT_PROVIDED";
347
349
  case PM_TOKEN_MAXIMUM:
348
350
  assert(false && "unreachable");
349
351
  return "";
data/src/util/pm_buffer.c CHANGED
@@ -172,6 +172,46 @@ pm_buffer_append_double(pm_buffer_t *buffer, double value) {
172
172
  pm_buffer_append(buffer, source, sizeof(double));
173
173
  }
174
174
 
175
+ /**
176
+ * Append a unicode codepoint to the buffer.
177
+ */
178
+ bool
179
+ pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value) {
180
+ if (value <= 0x7F) {
181
+ pm_buffer_append_byte(buffer, (uint8_t) value); // 0xxxxxxx
182
+ return true;
183
+ } else if (value <= 0x7FF) {
184
+ uint8_t bytes[] = {
185
+ (uint8_t) (0xC0 | ((value >> 6) & 0x3F)), // 110xxxxx
186
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
187
+ };
188
+
189
+ pm_buffer_append_bytes(buffer, bytes, 2);
190
+ return true;
191
+ } else if (value <= 0xFFFF) {
192
+ uint8_t bytes[] = {
193
+ (uint8_t) (0xE0 | ((value >> 12) & 0x3F)), // 1110xxxx
194
+ (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx
195
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
196
+ };
197
+
198
+ pm_buffer_append_bytes(buffer, bytes, 3);
199
+ return true;
200
+ } else if (value <= 0x10FFFF) {
201
+ uint8_t bytes[] = {
202
+ (uint8_t) (0xF0 | ((value >> 18) & 0x3F)), // 11110xxx
203
+ (uint8_t) (0x80 | ((value >> 12) & 0x3F)), // 10xxxxxx
204
+ (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx
205
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
206
+ };
207
+
208
+ pm_buffer_append_bytes(buffer, bytes, 4);
209
+ return true;
210
+ } else {
211
+ return false;
212
+ }
213
+ }
214
+
175
215
  /**
176
216
  * Append a slice of source code to the buffer.
177
217
  */
@@ -15,8 +15,12 @@ pm_constant_id_list_init(pm_constant_id_list_t *list) {
15
15
  */
16
16
  void
17
17
  pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
18
- list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
19
- if (list->ids == NULL) abort();
18
+ if (capacity) {
19
+ list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
20
+ if (list->ids == NULL) abort();
21
+ } else {
22
+ list->ids = NULL;
23
+ }
20
24
 
21
25
  list->size = 0;
22
26
  list->capacity = capacity;
@@ -1,5 +1,17 @@
1
1
  #include "prism/util/pm_strncasecmp.h"
2
2
 
3
+ /**
4
+ * A locale-insensitive version of `tolower(3)`
5
+ */
6
+ static inline int
7
+ pm_tolower(int c)
8
+ {
9
+ if ('A' <= c && c <= 'Z') {
10
+ return c | 0x20;
11
+ }
12
+ return c;
13
+ }
14
+
3
15
  /**
4
16
  * Compare two strings, ignoring case, up to the given length. Returns 0 if the
5
17
  * strings are equal, a negative number if string1 is less than string2, or a
@@ -16,7 +28,7 @@ pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
16
28
 
17
29
  while (offset < length && string1[offset] != '\0') {
18
30
  if (string2[offset] == '\0') return string1[offset];
19
- if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
31
+ if ((difference = pm_tolower(string1[offset]) - pm_tolower(string2[offset])) != 0) return difference;
20
32
  offset++;
21
33
  }
22
34
 
metadata CHANGED
@@ -1,16 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-12-21 00:00:00.000000000 Z
10
+ date: 2025-09-12 00:00:00.000000000 Z
12
11
  dependencies: []
13
- description:
14
12
  email:
15
13
  - ruby@shopify.com
16
14
  executables: []
@@ -91,18 +89,24 @@ files:
91
89
  - lib/prism/parse_result/errors.rb
92
90
  - lib/prism/parse_result/newlines.rb
93
91
  - lib/prism/pattern.rb
92
+ - lib/prism/polyfill/append_as_bytes.rb
94
93
  - lib/prism/polyfill/byteindex.rb
94
+ - lib/prism/polyfill/scan_byte.rb
95
95
  - lib/prism/polyfill/unpack1.rb
96
+ - lib/prism/polyfill/warn.rb
96
97
  - lib/prism/reflection.rb
97
98
  - lib/prism/relocation.rb
98
99
  - lib/prism/serialize.rb
99
100
  - lib/prism/string_query.rb
100
101
  - lib/prism/translation.rb
101
102
  - lib/prism/translation/parser.rb
103
+ - lib/prism/translation/parser/builder.rb
102
104
  - lib/prism/translation/parser/compiler.rb
103
105
  - lib/prism/translation/parser/lexer.rb
104
106
  - lib/prism/translation/parser33.rb
105
107
  - lib/prism/translation/parser34.rb
108
+ - lib/prism/translation/parser35.rb
109
+ - lib/prism/translation/parser_current.rb
106
110
  - lib/prism/translation/ripper.rb
107
111
  - lib/prism/translation/ripper/sexp.rb
108
112
  - lib/prism/translation/ripper/shim.rb
@@ -121,6 +125,7 @@ files:
121
125
  - rbi/prism/translation/parser.rbi
122
126
  - rbi/prism/translation/parser33.rbi
123
127
  - rbi/prism/translation/parser34.rbi
128
+ - rbi/prism/translation/parser35.rbi
124
129
  - rbi/prism/translation/ripper.rbi
125
130
  - rbi/prism/visitor.rbi
126
131
  - sig/prism.rbs
@@ -135,6 +140,7 @@ files:
135
140
  - sig/prism/node_ext.rbs
136
141
  - sig/prism/pack.rbs
137
142
  - sig/prism/parse_result.rbs
143
+ - sig/prism/parse_result/comments.rbs
138
144
  - sig/prism/pattern.rbs
139
145
  - sig/prism/reflection.rbs
140
146
  - sig/prism/relocation.rbs
@@ -169,7 +175,6 @@ metadata:
169
175
  allowed_push_host: https://rubygems.org
170
176
  source_code_uri: https://github.com/ruby/prism
171
177
  changelog_uri: https://github.com/ruby/prism/blob/main/CHANGELOG.md
172
- post_install_message:
173
178
  rdoc_options: []
174
179
  require_paths:
175
180
  - lib
@@ -184,8 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
189
  - !ruby/object:Gem::Version
185
190
  version: '0'
186
191
  requirements: []
187
- rubygems_version: 3.5.16
188
- signing_key:
192
+ rubygems_version: 3.6.2
189
193
  specification_version: 4
190
194
  summary: Prism Ruby parser
191
195
  test_files: []