prism 1.4.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +73 -1
  3. data/Makefile +7 -5
  4. data/README.md +3 -1
  5. data/config.yml +294 -41
  6. data/docs/build_system.md +2 -2
  7. data/docs/cruby_compilation.md +1 -1
  8. data/docs/design.md +2 -2
  9. data/docs/parser_translation.md +8 -23
  10. data/docs/releasing.md +6 -25
  11. data/docs/ripper_translation.md +1 -1
  12. data/ext/prism/api_node.c +9 -3
  13. data/ext/prism/extconf.rb +1 -1
  14. data/ext/prism/extension.c +24 -3
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +360 -70
  17. data/include/prism/diagnostic.h +7 -0
  18. data/include/prism/options.h +49 -3
  19. data/include/prism/parser.h +3 -0
  20. data/include/prism/regexp.h +2 -2
  21. data/include/prism/util/pm_buffer.h +8 -0
  22. data/include/prism/util/pm_integer.h +4 -0
  23. data/include/prism/util/pm_list.h +6 -0
  24. data/include/prism/util/pm_string.h +12 -2
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +40 -15
  27. data/lib/prism/compiler.rb +456 -151
  28. data/lib/prism/desugar_compiler.rb +1 -0
  29. data/lib/prism/dispatcher.rb +16 -0
  30. data/lib/prism/dot_visitor.rb +10 -1
  31. data/lib/prism/dsl.rb +5 -2
  32. data/lib/prism/ffi.rb +28 -10
  33. data/lib/prism/inspect_visitor.rb +4 -0
  34. data/lib/prism/lex_compat.rb +1 -0
  35. data/lib/prism/mutation_compiler.rb +3 -0
  36. data/lib/prism/node.rb +559 -349
  37. data/lib/prism/node_ext.rb +4 -1
  38. data/lib/prism/pack.rb +2 -0
  39. data/lib/prism/parse_result/comments.rb +1 -0
  40. data/lib/prism/parse_result/errors.rb +1 -0
  41. data/lib/prism/parse_result/newlines.rb +1 -0
  42. data/lib/prism/parse_result.rb +3 -15
  43. data/lib/prism/pattern.rb +1 -0
  44. data/lib/prism/polyfill/scan_byte.rb +14 -0
  45. data/lib/prism/polyfill/warn.rb +36 -0
  46. data/lib/prism/reflection.rb +4 -1
  47. data/lib/prism/relocation.rb +1 -0
  48. data/lib/prism/serialize.rb +30 -22
  49. data/lib/prism/string_query.rb +1 -0
  50. data/lib/prism/translation/parser/builder.rb +1 -0
  51. data/lib/prism/translation/parser/compiler.rb +63 -41
  52. data/lib/prism/translation/parser/lexer.rb +29 -21
  53. data/lib/prism/translation/parser.rb +25 -4
  54. data/lib/prism/translation/parser33.rb +1 -0
  55. data/lib/prism/translation/parser34.rb +1 -0
  56. data/lib/prism/translation/parser35.rb +2 -6
  57. data/lib/prism/translation/parser40.rb +13 -0
  58. data/lib/prism/translation/parser41.rb +13 -0
  59. data/lib/prism/translation/parser_current.rb +26 -0
  60. data/lib/prism/translation/ripper/sexp.rb +1 -0
  61. data/lib/prism/translation/ripper.rb +19 -3
  62. data/lib/prism/translation/ruby_parser.rb +340 -22
  63. data/lib/prism/translation.rb +4 -0
  64. data/lib/prism/visitor.rb +457 -152
  65. data/lib/prism.rb +22 -0
  66. data/prism.gemspec +9 -1
  67. data/rbi/prism/dsl.rbi +6 -6
  68. data/rbi/prism/node.rbi +42 -17
  69. data/rbi/prism/translation/parser35.rbi +0 -2
  70. data/rbi/prism/translation/parser40.rbi +6 -0
  71. data/rbi/prism/translation/parser41.rbi +6 -0
  72. data/sig/prism/dispatcher.rbs +3 -0
  73. data/sig/prism/dsl.rbs +5 -5
  74. data/sig/prism/node.rbs +462 -38
  75. data/sig/prism/node_ext.rbs +84 -17
  76. data/sig/prism/parse_result/comments.rbs +38 -0
  77. data/sig/prism/parse_result.rbs +4 -0
  78. data/sig/prism/reflection.rbs +1 -1
  79. data/sig/prism.rbs +4 -0
  80. data/src/diagnostic.c +13 -1
  81. data/src/encoding.c +172 -67
  82. data/src/node.c +11 -0
  83. data/src/options.c +17 -7
  84. data/src/prettyprint.c +18 -0
  85. data/src/prism.c +1495 -2021
  86. data/src/serialize.c +9 -1
  87. data/src/token_type.c +38 -36
  88. data/src/util/pm_constant_pool.c +1 -1
  89. data/src/util/pm_string.c +6 -8
  90. metadata +11 -3
data/src/serialize.c CHANGED
@@ -1,3 +1,5 @@
1
+ /* :markup: markdown */
2
+
1
3
  /*----------------------------------------------------------------------------*/
2
4
  /* This file is generated by the templates/template.rb script and should not */
3
5
  /* be modified manually. See */
@@ -393,6 +395,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
393
395
  pm_buffer_append_byte(buffer, 1);
394
396
  pm_serialize_location(parser, &((pm_call_node_t *)node)->closing_loc, buffer);
395
397
  }
398
+ if (((pm_call_node_t *)node)->equal_loc.start == NULL) {
399
+ pm_buffer_append_byte(buffer, 0);
400
+ } else {
401
+ pm_buffer_append_byte(buffer, 1);
402
+ pm_serialize_location(parser, &((pm_call_node_t *)node)->equal_loc, buffer);
403
+ }
396
404
  if (((pm_call_node_t *)node)->block == NULL) {
397
405
  pm_buffer_append_byte(buffer, 0);
398
406
  } else {
@@ -2175,7 +2183,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
2175
2183
  // buffer offset. We will add a leading 1 to indicate that this
2176
2184
  // is a buffer offset.
2177
2185
  uint32_t content_offset = pm_sizet_to_u32(buffer->length);
2178
- uint32_t owned_mask = (uint32_t) (1 << 31);
2186
+ uint32_t owned_mask = 1U << 31;
2179
2187
 
2180
2188
  assert(content_offset < owned_mask);
2181
2189
  content_offset |= owned_mask;
data/src/token_type.c CHANGED
@@ -1,3 +1,5 @@
1
+ /* :markup: markdown */
2
+
1
3
  /*----------------------------------------------------------------------------*/
2
4
  /* This file is generated by the templates/template.rb script and should not */
3
5
  /* be modified manually. See */
@@ -18,10 +20,38 @@ pm_token_type_name(pm_token_type_t token_type) {
18
20
  switch (token_type) {
19
21
  case PM_TOKEN_EOF:
20
22
  return "EOF";
21
- case PM_TOKEN_MISSING:
22
- return "MISSING";
23
- case PM_TOKEN_NOT_PROVIDED:
24
- return "NOT_PROVIDED";
23
+ case PM_TOKEN_BRACE_RIGHT:
24
+ return "BRACE_RIGHT";
25
+ case PM_TOKEN_COMMA:
26
+ return "COMMA";
27
+ case PM_TOKEN_EMBEXPR_END:
28
+ return "EMBEXPR_END";
29
+ case PM_TOKEN_KEYWORD_DO:
30
+ return "KEYWORD_DO";
31
+ case PM_TOKEN_KEYWORD_ELSE:
32
+ return "KEYWORD_ELSE";
33
+ case PM_TOKEN_KEYWORD_ELSIF:
34
+ return "KEYWORD_ELSIF";
35
+ case PM_TOKEN_KEYWORD_END:
36
+ return "KEYWORD_END";
37
+ case PM_TOKEN_KEYWORD_ENSURE:
38
+ return "KEYWORD_ENSURE";
39
+ case PM_TOKEN_KEYWORD_IN:
40
+ return "KEYWORD_IN";
41
+ case PM_TOKEN_KEYWORD_RESCUE:
42
+ return "KEYWORD_RESCUE";
43
+ case PM_TOKEN_KEYWORD_THEN:
44
+ return "KEYWORD_THEN";
45
+ case PM_TOKEN_KEYWORD_WHEN:
46
+ return "KEYWORD_WHEN";
47
+ case PM_TOKEN_NEWLINE:
48
+ return "NEWLINE";
49
+ case PM_TOKEN_PARENTHESIS_RIGHT:
50
+ return "PARENTHESIS_RIGHT";
51
+ case PM_TOKEN_PIPE:
52
+ return "PIPE";
53
+ case PM_TOKEN_SEMICOLON:
54
+ return "SEMICOLON";
25
55
  case PM_TOKEN_AMPERSAND:
26
56
  return "AMPERSAND";
27
57
  case PM_TOKEN_AMPERSAND_AMPERSAND:
@@ -44,8 +74,6 @@ pm_token_type_name(pm_token_type_t token_type) {
44
74
  return "BANG_TILDE";
45
75
  case PM_TOKEN_BRACE_LEFT:
46
76
  return "BRACE_LEFT";
47
- case PM_TOKEN_BRACE_RIGHT:
48
- return "BRACE_RIGHT";
49
77
  case PM_TOKEN_BRACKET_LEFT:
50
78
  return "BRACKET_LEFT";
51
79
  case PM_TOKEN_BRACKET_LEFT_ARRAY:
@@ -68,8 +96,6 @@ pm_token_type_name(pm_token_type_t token_type) {
68
96
  return "COLON";
69
97
  case PM_TOKEN_COLON_COLON:
70
98
  return "COLON_COLON";
71
- case PM_TOKEN_COMMA:
72
- return "COMMA";
73
99
  case PM_TOKEN_COMMENT:
74
100
  return "COMMENT";
75
101
  case PM_TOKEN_CONSTANT:
@@ -88,8 +114,6 @@ pm_token_type_name(pm_token_type_t token_type) {
88
114
  return "EMBDOC_LINE";
89
115
  case PM_TOKEN_EMBEXPR_BEGIN:
90
116
  return "EMBEXPR_BEGIN";
91
- case PM_TOKEN_EMBEXPR_END:
92
- return "EMBEXPR_END";
93
117
  case PM_TOKEN_EMBVAR:
94
118
  return "EMBVAR";
95
119
  case PM_TOKEN_EQUAL:
@@ -156,20 +180,10 @@ pm_token_type_name(pm_token_type_t token_type) {
156
180
  return "KEYWORD_DEF";
157
181
  case PM_TOKEN_KEYWORD_DEFINED:
158
182
  return "KEYWORD_DEFINED";
159
- case PM_TOKEN_KEYWORD_DO:
160
- return "KEYWORD_DO";
161
183
  case PM_TOKEN_KEYWORD_DO_LOOP:
162
184
  return "KEYWORD_DO_LOOP";
163
- case PM_TOKEN_KEYWORD_ELSE:
164
- return "KEYWORD_ELSE";
165
- case PM_TOKEN_KEYWORD_ELSIF:
166
- return "KEYWORD_ELSIF";
167
- case PM_TOKEN_KEYWORD_END:
168
- return "KEYWORD_END";
169
185
  case PM_TOKEN_KEYWORD_END_UPCASE:
170
186
  return "KEYWORD_END_UPCASE";
171
- case PM_TOKEN_KEYWORD_ENSURE:
172
- return "KEYWORD_ENSURE";
173
187
  case PM_TOKEN_KEYWORD_FALSE:
174
188
  return "KEYWORD_FALSE";
175
189
  case PM_TOKEN_KEYWORD_FOR:
@@ -178,8 +192,6 @@ pm_token_type_name(pm_token_type_t token_type) {
178
192
  return "KEYWORD_IF";
179
193
  case PM_TOKEN_KEYWORD_IF_MODIFIER:
180
194
  return "KEYWORD_IF_MODIFIER";
181
- case PM_TOKEN_KEYWORD_IN:
182
- return "KEYWORD_IN";
183
195
  case PM_TOKEN_KEYWORD_MODULE:
184
196
  return "KEYWORD_MODULE";
185
197
  case PM_TOKEN_KEYWORD_NEXT:
@@ -192,8 +204,6 @@ pm_token_type_name(pm_token_type_t token_type) {
192
204
  return "KEYWORD_OR";
193
205
  case PM_TOKEN_KEYWORD_REDO:
194
206
  return "KEYWORD_REDO";
195
- case PM_TOKEN_KEYWORD_RESCUE:
196
- return "KEYWORD_RESCUE";
197
207
  case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
198
208
  return "KEYWORD_RESCUE_MODIFIER";
199
209
  case PM_TOKEN_KEYWORD_RETRY:
@@ -204,8 +214,6 @@ pm_token_type_name(pm_token_type_t token_type) {
204
214
  return "KEYWORD_SELF";
205
215
  case PM_TOKEN_KEYWORD_SUPER:
206
216
  return "KEYWORD_SUPER";
207
- case PM_TOKEN_KEYWORD_THEN:
208
- return "KEYWORD_THEN";
209
217
  case PM_TOKEN_KEYWORD_TRUE:
210
218
  return "KEYWORD_TRUE";
211
219
  case PM_TOKEN_KEYWORD_UNDEF:
@@ -218,8 +226,6 @@ pm_token_type_name(pm_token_type_t token_type) {
218
226
  return "KEYWORD_UNTIL";
219
227
  case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
220
228
  return "KEYWORD_UNTIL_MODIFIER";
221
- case PM_TOKEN_KEYWORD_WHEN:
222
- return "KEYWORD_WHEN";
223
229
  case PM_TOKEN_KEYWORD_WHILE:
224
230
  return "KEYWORD_WHILE";
225
231
  case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
@@ -256,16 +262,12 @@ pm_token_type_name(pm_token_type_t token_type) {
256
262
  return "MINUS_EQUAL";
257
263
  case PM_TOKEN_MINUS_GREATER:
258
264
  return "MINUS_GREATER";
259
- case PM_TOKEN_NEWLINE:
260
- return "NEWLINE";
261
265
  case PM_TOKEN_NUMBERED_REFERENCE:
262
266
  return "NUMBERED_REFERENCE";
263
267
  case PM_TOKEN_PARENTHESIS_LEFT:
264
268
  return "PARENTHESIS_LEFT";
265
269
  case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
266
270
  return "PARENTHESIS_LEFT_PARENTHESES";
267
- case PM_TOKEN_PARENTHESIS_RIGHT:
268
- return "PARENTHESIS_RIGHT";
269
271
  case PM_TOKEN_PERCENT:
270
272
  return "PERCENT";
271
273
  case PM_TOKEN_PERCENT_EQUAL:
@@ -280,8 +282,6 @@ pm_token_type_name(pm_token_type_t token_type) {
280
282
  return "PERCENT_UPPER_I";
281
283
  case PM_TOKEN_PERCENT_UPPER_W:
282
284
  return "PERCENT_UPPER_W";
283
- case PM_TOKEN_PIPE:
284
- return "PIPE";
285
285
  case PM_TOKEN_PIPE_EQUAL:
286
286
  return "PIPE_EQUAL";
287
287
  case PM_TOKEN_PIPE_PIPE:
@@ -298,8 +298,6 @@ pm_token_type_name(pm_token_type_t token_type) {
298
298
  return "REGEXP_BEGIN";
299
299
  case PM_TOKEN_REGEXP_END:
300
300
  return "REGEXP_END";
301
- case PM_TOKEN_SEMICOLON:
302
- return "SEMICOLON";
303
301
  case PM_TOKEN_SLASH:
304
302
  return "SLASH";
305
303
  case PM_TOKEN_SLASH_EQUAL:
@@ -344,6 +342,10 @@ pm_token_type_name(pm_token_type_t token_type) {
344
342
  return "WORDS_SEP";
345
343
  case PM_TOKEN___END__:
346
344
  return "__END__";
345
+ case PM_TOKEN_MISSING:
346
+ return "MISSING";
347
+ case PM_TOKEN_NOT_PROVIDED:
348
+ return "NOT_PROVIDED";
347
349
  case PM_TOKEN_MAXIMUM:
348
350
  assert(false && "unreachable");
349
351
  return "";
@@ -264,7 +264,7 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
264
264
  // constant and replace it with the shared constant.
265
265
  xfree((void *) constant->start);
266
266
  constant->start = start;
267
- bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
267
+ bucket->type = (unsigned int) (type & 0x3);
268
268
  }
269
269
 
270
270
  return bucket->id;
data/src/util/pm_string.c CHANGED
@@ -1,5 +1,7 @@
1
1
  #include "prism/util/pm_string.h"
2
2
 
3
+ static const uint8_t empty_source[] = "";
4
+
3
5
  /**
4
6
  * Returns the size of the pm_string_t struct. This is necessary to allocate the
5
7
  * correct amount of memory in the FFI backend.
@@ -133,8 +135,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
133
135
  // the source to a constant empty string and return.
134
136
  if (file_size == 0) {
135
137
  pm_string_file_handle_close(&handle);
136
- const uint8_t source[] = "";
137
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
138
+ *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
138
139
  return PM_STRING_INIT_SUCCESS;
139
140
  }
140
141
 
@@ -182,8 +183,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
182
183
 
183
184
  if (size == 0) {
184
185
  close(fd);
185
- const uint8_t source[] = "";
186
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
186
+ *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
187
187
  return PM_STRING_INIT_SUCCESS;
188
188
  }
189
189
 
@@ -225,8 +225,7 @@ pm_string_file_init(pm_string_t *string, const char *filepath) {
225
225
  // the source to a constant empty string and return.
226
226
  if (file_size == 0) {
227
227
  pm_string_file_handle_close(&handle);
228
- const uint8_t source[] = "";
229
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
228
+ *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
230
229
  return PM_STRING_INIT_SUCCESS;
231
230
  }
232
231
 
@@ -278,8 +277,7 @@ pm_string_file_init(pm_string_t *string, const char *filepath) {
278
277
  size_t size = (size_t) sb.st_size;
279
278
  if (size == 0) {
280
279
  close(fd);
281
- const uint8_t source[] = "";
282
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
280
+ *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
283
281
  return PM_STRING_INIT_SUCCESS;
284
282
  }
285
283
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-03-18 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  email:
13
13
  - ruby@shopify.com
@@ -91,7 +91,9 @@ files:
91
91
  - lib/prism/pattern.rb
92
92
  - lib/prism/polyfill/append_as_bytes.rb
93
93
  - lib/prism/polyfill/byteindex.rb
94
+ - lib/prism/polyfill/scan_byte.rb
94
95
  - lib/prism/polyfill/unpack1.rb
96
+ - lib/prism/polyfill/warn.rb
95
97
  - lib/prism/reflection.rb
96
98
  - lib/prism/relocation.rb
97
99
  - lib/prism/serialize.rb
@@ -104,6 +106,9 @@ files:
104
106
  - lib/prism/translation/parser33.rb
105
107
  - lib/prism/translation/parser34.rb
106
108
  - lib/prism/translation/parser35.rb
109
+ - lib/prism/translation/parser40.rb
110
+ - lib/prism/translation/parser41.rb
111
+ - lib/prism/translation/parser_current.rb
107
112
  - lib/prism/translation/ripper.rb
108
113
  - lib/prism/translation/ripper/sexp.rb
109
114
  - lib/prism/translation/ripper/shim.rb
@@ -123,6 +128,8 @@ files:
123
128
  - rbi/prism/translation/parser33.rbi
124
129
  - rbi/prism/translation/parser34.rbi
125
130
  - rbi/prism/translation/parser35.rbi
131
+ - rbi/prism/translation/parser40.rbi
132
+ - rbi/prism/translation/parser41.rbi
126
133
  - rbi/prism/translation/ripper.rbi
127
134
  - rbi/prism/visitor.rbi
128
135
  - sig/prism.rbs
@@ -137,6 +144,7 @@ files:
137
144
  - sig/prism/node_ext.rbs
138
145
  - sig/prism/pack.rbs
139
146
  - sig/prism/parse_result.rbs
147
+ - sig/prism/parse_result/comments.rbs
140
148
  - sig/prism/pattern.rbs
141
149
  - sig/prism/reflection.rbs
142
150
  - sig/prism/relocation.rbs
@@ -185,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
185
193
  - !ruby/object:Gem::Version
186
194
  version: '0'
187
195
  requirements: []
188
- rubygems_version: 3.6.2
196
+ rubygems_version: 3.6.9
189
197
  specification_version: 4
190
198
  summary: Prism Ruby parser
191
199
  test_files: []