prism 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +46 -1
  3. data/Makefile +1 -1
  4. data/config.yml +429 -2
  5. data/docs/build_system.md +8 -11
  6. data/docs/releasing.md +1 -1
  7. data/docs/relocation.md +34 -0
  8. data/docs/ruby_api.md +1 -1
  9. data/ext/prism/api_node.c +1824 -1305
  10. data/ext/prism/extconf.rb +13 -36
  11. data/ext/prism/extension.c +298 -109
  12. data/ext/prism/extension.h +4 -4
  13. data/include/prism/ast.h +442 -2
  14. data/include/prism/defines.h +26 -8
  15. data/include/prism/options.h +47 -1
  16. data/include/prism/util/pm_buffer.h +10 -0
  17. data/include/prism/version.h +2 -2
  18. data/include/prism.h +51 -4
  19. data/lib/prism/dot_visitor.rb +26 -0
  20. data/lib/prism/dsl.rb +14 -6
  21. data/lib/prism/ffi.rb +93 -28
  22. data/lib/prism/inspect_visitor.rb +4 -1
  23. data/lib/prism/node.rb +1886 -105
  24. data/lib/prism/parse_result/errors.rb +1 -1
  25. data/lib/prism/parse_result/newlines.rb +1 -1
  26. data/lib/prism/parse_result.rb +54 -2
  27. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  28. data/lib/prism/reflection.rb +4 -4
  29. data/lib/prism/relocation.rb +504 -0
  30. data/lib/prism/serialize.rb +1252 -765
  31. data/lib/prism/string_query.rb +30 -0
  32. data/lib/prism/translation/parser/builder.rb +61 -0
  33. data/lib/prism/translation/parser/compiler.rb +228 -162
  34. data/lib/prism/translation/parser/lexer.rb +435 -61
  35. data/lib/prism/translation/parser.rb +51 -3
  36. data/lib/prism/translation/parser35.rb +12 -0
  37. data/lib/prism/translation/ripper.rb +13 -3
  38. data/lib/prism/translation/ruby_parser.rb +17 -7
  39. data/lib/prism/translation.rb +1 -0
  40. data/lib/prism.rb +9 -7
  41. data/prism.gemspec +11 -1
  42. data/rbi/prism/dsl.rbi +10 -7
  43. data/rbi/prism/node.rbi +44 -17
  44. data/rbi/prism/parse_result.rbi +17 -0
  45. data/rbi/prism/string_query.rbi +12 -0
  46. data/rbi/prism/translation/parser35.rbi +6 -0
  47. data/rbi/prism.rbi +39 -36
  48. data/sig/prism/dsl.rbs +6 -4
  49. data/sig/prism/node.rbs +29 -15
  50. data/sig/prism/parse_result.rbs +10 -0
  51. data/sig/prism/relocation.rbs +185 -0
  52. data/sig/prism/serialize.rbs +4 -2
  53. data/sig/prism/string_query.rbs +11 -0
  54. data/sig/prism.rbs +22 -1
  55. data/src/diagnostic.c +2 -2
  56. data/src/node.c +39 -0
  57. data/src/options.c +31 -0
  58. data/src/prettyprint.c +62 -0
  59. data/src/prism.c +738 -199
  60. data/src/regexp.c +7 -3
  61. data/src/serialize.c +18 -0
  62. data/src/static_literals.c +1 -1
  63. data/src/util/pm_buffer.c +40 -0
  64. data/src/util/pm_char.c +1 -1
  65. data/src/util/pm_constant_pool.c +6 -2
  66. data/src/util/pm_string.c +1 -0
  67. data/src/util/pm_strncasecmp.c +13 -1
  68. metadata +13 -7
data/src/regexp.c CHANGED
@@ -158,6 +158,11 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
158
158
  } state = PM_REGEXP_RANGE_QUANTIFIER_STATE_START;
159
159
 
160
160
  while (1) {
161
+ if (parser->cursor >= parser->end) {
162
+ parser->cursor = savepoint;
163
+ return true;
164
+ }
165
+
161
166
  switch (state) {
162
167
  case PM_REGEXP_RANGE_QUANTIFIER_STATE_START:
163
168
  switch (*parser->cursor) {
@@ -594,8 +599,7 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
594
599
  // If we hit a -, then we're done parsing options.
595
600
  if (*parser->cursor != '-') break;
596
601
 
597
- // Otherwise, fallthrough to the - case.
598
- /* fallthrough */
602
+ PRISM_FALLTHROUGH
599
603
  case '-':
600
604
  parser->cursor++;
601
605
  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
@@ -707,7 +711,7 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
707
711
  if (!pm_regexp_char_find(parser, '\n')) parser->cursor = parser->end;
708
712
  return true;
709
713
  }
710
- /* fallthrough */
714
+ PRISM_FALLTHROUGH
711
715
  default: {
712
716
  size_t width;
713
717
  if (!parser->encoding_changed) {
data/src/serialize.c CHANGED
@@ -1706,6 +1706,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1706
1706
  } else {
1707
1707
  pm_serialize_node(parser, (pm_node_t *)((pm_rescue_node_t *)node)->reference, buffer);
1708
1708
  }
1709
+ if (((pm_rescue_node_t *)node)->then_keyword_loc.start == NULL) {
1710
+ pm_buffer_append_byte(buffer, 0);
1711
+ } else {
1712
+ pm_buffer_append_byte(buffer, 1);
1713
+ pm_serialize_location(parser, &((pm_rescue_node_t *)node)->then_keyword_loc, buffer);
1714
+ }
1709
1715
  if (((pm_rescue_node_t *)node)->statements == NULL) {
1710
1716
  pm_buffer_append_byte(buffer, 0);
1711
1717
  } else {
@@ -1916,6 +1922,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1916
1922
  case PM_UNTIL_NODE: {
1917
1923
  pm_buffer_append_varuint(buffer, (uint32_t) node->flags);
1918
1924
  pm_serialize_location(parser, &((pm_until_node_t *)node)->keyword_loc, buffer);
1925
+ if (((pm_until_node_t *)node)->do_keyword_loc.start == NULL) {
1926
+ pm_buffer_append_byte(buffer, 0);
1927
+ } else {
1928
+ pm_buffer_append_byte(buffer, 1);
1929
+ pm_serialize_location(parser, &((pm_until_node_t *)node)->do_keyword_loc, buffer);
1930
+ }
1919
1931
  if (((pm_until_node_t *)node)->closing_loc.start == NULL) {
1920
1932
  pm_buffer_append_byte(buffer, 0);
1921
1933
  } else {
@@ -1954,6 +1966,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1954
1966
  case PM_WHILE_NODE: {
1955
1967
  pm_buffer_append_varuint(buffer, (uint32_t) node->flags);
1956
1968
  pm_serialize_location(parser, &((pm_while_node_t *)node)->keyword_loc, buffer);
1969
+ if (((pm_while_node_t *)node)->do_keyword_loc.start == NULL) {
1970
+ pm_buffer_append_byte(buffer, 0);
1971
+ } else {
1972
+ pm_buffer_append_byte(buffer, 1);
1973
+ pm_serialize_location(parser, &((pm_while_node_t *)node)->do_keyword_loc, buffer);
1974
+ }
1957
1975
  if (((pm_while_node_t *)node)->closing_loc.start == NULL) {
1958
1976
  pm_buffer_append_byte(buffer, 0);
1959
1977
  } else {
@@ -501,7 +501,7 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
501
501
  case PM_FLOAT_NODE: {
502
502
  const double value = ((const pm_float_node_t *) node)->value;
503
503
 
504
- if (isinf(value)) {
504
+ if (PRISM_ISINF(value)) {
505
505
  if (*node->location.start == '-') {
506
506
  pm_buffer_append_byte(buffer, '-');
507
507
  }
data/src/util/pm_buffer.c CHANGED
@@ -172,6 +172,46 @@ pm_buffer_append_double(pm_buffer_t *buffer, double value) {
172
172
  pm_buffer_append(buffer, source, sizeof(double));
173
173
  }
174
174
 
175
+ /**
176
+ * Append a unicode codepoint to the buffer.
177
+ */
178
+ bool
179
+ pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value) {
180
+ if (value <= 0x7F) {
181
+ pm_buffer_append_byte(buffer, (uint8_t) value); // 0xxxxxxx
182
+ return true;
183
+ } else if (value <= 0x7FF) {
184
+ uint8_t bytes[] = {
185
+ (uint8_t) (0xC0 | ((value >> 6) & 0x3F)), // 110xxxxx
186
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
187
+ };
188
+
189
+ pm_buffer_append_bytes(buffer, bytes, 2);
190
+ return true;
191
+ } else if (value <= 0xFFFF) {
192
+ uint8_t bytes[] = {
193
+ (uint8_t) (0xE0 | ((value >> 12) & 0x3F)), // 1110xxxx
194
+ (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx
195
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
196
+ };
197
+
198
+ pm_buffer_append_bytes(buffer, bytes, 3);
199
+ return true;
200
+ } else if (value <= 0x10FFFF) {
201
+ uint8_t bytes[] = {
202
+ (uint8_t) (0xF0 | ((value >> 18) & 0x3F)), // 11110xxx
203
+ (uint8_t) (0x80 | ((value >> 12) & 0x3F)), // 10xxxxxx
204
+ (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx
205
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
206
+ };
207
+
208
+ pm_buffer_append_bytes(buffer, bytes, 4);
209
+ return true;
210
+ } else {
211
+ return false;
212
+ }
213
+ }
214
+
175
215
  /**
176
216
  * Append a slice of source code to the buffer.
177
217
  */
data/src/util/pm_char.c CHANGED
@@ -185,7 +185,7 @@ pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const
185
185
  size++;
186
186
  }
187
187
 
188
- if (string[size - 1] == '_') *invalid = string + size - 1;
188
+ if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
189
189
  return size;
190
190
  }
191
191
 
@@ -15,8 +15,12 @@ pm_constant_id_list_init(pm_constant_id_list_t *list) {
15
15
  */
16
16
  void
17
17
  pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
18
- list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
19
- if (list->ids == NULL) abort();
18
+ if (capacity) {
19
+ list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
20
+ if (list->ids == NULL) abort();
21
+ } else {
22
+ list->ids = NULL;
23
+ }
20
24
 
21
25
  list->size = 0;
22
26
  list->capacity = capacity;
data/src/util/pm_string.c CHANGED
@@ -189,6 +189,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
189
189
 
190
190
  source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
191
191
  if (source == MAP_FAILED) {
192
+ close(fd);
192
193
  return PM_STRING_INIT_ERROR_GENERIC;
193
194
  }
194
195
 
@@ -1,5 +1,17 @@
1
1
  #include "prism/util/pm_strncasecmp.h"
2
2
 
3
+ /**
4
+ * A locale-insensitive version of `tolower(3)`
5
+ */
6
+ static inline int
7
+ pm_tolower(int c)
8
+ {
9
+ if ('A' <= c && c <= 'Z') {
10
+ return c | 0x20;
11
+ }
12
+ return c;
13
+ }
14
+
3
15
  /**
4
16
  * Compare two strings, ignoring case, up to the given length. Returns 0 if the
5
17
  * strings are equal, a negative number if string1 is less than string2, or a
@@ -16,7 +28,7 @@ pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
16
28
 
17
29
  while (offset < length && string1[offset] != '\0') {
18
30
  if (string2[offset] == '\0') return string1[offset];
19
- if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
31
+ if ((difference = pm_tolower(string1[offset]) - pm_tolower(string2[offset])) != 0) return difference;
20
32
  offset++;
21
33
  }
22
34
 
metadata CHANGED
@@ -1,16 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-10-10 00:00:00.000000000 Z
10
+ date: 2025-03-18 00:00:00.000000000 Z
12
11
  dependencies: []
13
- description:
14
12
  email:
15
13
  - ruby@shopify.com
16
14
  executables: []
@@ -39,6 +37,7 @@ files:
39
37
  - docs/parser_translation.md
40
38
  - docs/parsing_rules.md
41
39
  - docs/releasing.md
40
+ - docs/relocation.md
42
41
  - docs/ripper_translation.md
43
42
  - docs/ruby_api.md
44
43
  - docs/ruby_parser_translation.md
@@ -90,16 +89,21 @@ files:
90
89
  - lib/prism/parse_result/errors.rb
91
90
  - lib/prism/parse_result/newlines.rb
92
91
  - lib/prism/pattern.rb
92
+ - lib/prism/polyfill/append_as_bytes.rb
93
93
  - lib/prism/polyfill/byteindex.rb
94
94
  - lib/prism/polyfill/unpack1.rb
95
95
  - lib/prism/reflection.rb
96
+ - lib/prism/relocation.rb
96
97
  - lib/prism/serialize.rb
98
+ - lib/prism/string_query.rb
97
99
  - lib/prism/translation.rb
98
100
  - lib/prism/translation/parser.rb
101
+ - lib/prism/translation/parser/builder.rb
99
102
  - lib/prism/translation/parser/compiler.rb
100
103
  - lib/prism/translation/parser/lexer.rb
101
104
  - lib/prism/translation/parser33.rb
102
105
  - lib/prism/translation/parser34.rb
106
+ - lib/prism/translation/parser35.rb
103
107
  - lib/prism/translation/ripper.rb
104
108
  - lib/prism/translation/ripper/sexp.rb
105
109
  - lib/prism/translation/ripper/shim.rb
@@ -114,9 +118,11 @@ files:
114
118
  - rbi/prism/node_ext.rbi
115
119
  - rbi/prism/parse_result.rbi
116
120
  - rbi/prism/reflection.rbi
121
+ - rbi/prism/string_query.rbi
117
122
  - rbi/prism/translation/parser.rbi
118
123
  - rbi/prism/translation/parser33.rbi
119
124
  - rbi/prism/translation/parser34.rbi
125
+ - rbi/prism/translation/parser35.rbi
120
126
  - rbi/prism/translation/ripper.rbi
121
127
  - rbi/prism/visitor.rbi
122
128
  - sig/prism.rbs
@@ -133,7 +139,9 @@ files:
133
139
  - sig/prism/parse_result.rbs
134
140
  - sig/prism/pattern.rbs
135
141
  - sig/prism/reflection.rbs
142
+ - sig/prism/relocation.rbs
136
143
  - sig/prism/serialize.rbs
144
+ - sig/prism/string_query.rbs
137
145
  - sig/prism/visitor.rbs
138
146
  - src/diagnostic.c
139
147
  - src/encoding.c
@@ -163,7 +171,6 @@ metadata:
163
171
  allowed_push_host: https://rubygems.org
164
172
  source_code_uri: https://github.com/ruby/prism
165
173
  changelog_uri: https://github.com/ruby/prism/blob/main/CHANGELOG.md
166
- post_install_message:
167
174
  rdoc_options: []
168
175
  require_paths:
169
176
  - lib
@@ -178,8 +185,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
185
  - !ruby/object:Gem::Version
179
186
  version: '0'
180
187
  requirements: []
181
- rubygems_version: 3.6.0.dev
182
- signing_key:
188
+ rubygems_version: 3.6.2
183
189
  specification_version: 4
184
190
  summary: Prism Ruby parser
185
191
  test_files: []