prism 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -1
  3. data/Makefile +5 -5
  4. data/README.md +4 -3
  5. data/config.yml +214 -68
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +10 -3
  8. data/docs/configuration.md +11 -9
  9. data/docs/encoding.md +92 -88
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/local_variable_depth.md +229 -0
  13. data/docs/ruby_api.md +16 -0
  14. data/docs/serialization.md +18 -13
  15. data/ext/prism/api_node.c +411 -240
  16. data/ext/prism/extconf.rb +97 -127
  17. data/ext/prism/extension.c +97 -33
  18. data/ext/prism/extension.h +1 -1
  19. data/include/prism/ast.h +377 -159
  20. data/include/prism/defines.h +17 -0
  21. data/include/prism/diagnostic.h +38 -6
  22. data/include/prism/{enc/pm_encoding.h → encoding.h} +126 -64
  23. data/include/prism/options.h +2 -2
  24. data/include/prism/parser.h +62 -36
  25. data/include/prism/regexp.h +2 -2
  26. data/include/prism/util/pm_buffer.h +9 -1
  27. data/include/prism/util/pm_memchr.h +2 -2
  28. data/include/prism/util/pm_strpbrk.h +3 -3
  29. data/include/prism/version.h +3 -3
  30. data/include/prism.h +13 -15
  31. data/lib/prism/compiler.rb +15 -3
  32. data/lib/prism/debug.rb +13 -4
  33. data/lib/prism/desugar_compiler.rb +4 -3
  34. data/lib/prism/dispatcher.rb +70 -14
  35. data/lib/prism/dot_visitor.rb +4612 -0
  36. data/lib/prism/dsl.rb +77 -57
  37. data/lib/prism/ffi.rb +19 -6
  38. data/lib/prism/lex_compat.rb +19 -9
  39. data/lib/prism/mutation_compiler.rb +26 -6
  40. data/lib/prism/node.rb +1314 -522
  41. data/lib/prism/node_ext.rb +102 -19
  42. data/lib/prism/parse_result.rb +58 -27
  43. data/lib/prism/ripper_compat.rb +49 -34
  44. data/lib/prism/serialize.rb +251 -227
  45. data/lib/prism/visitor.rb +15 -3
  46. data/lib/prism.rb +21 -4
  47. data/prism.gemspec +7 -9
  48. data/rbi/prism.rbi +688 -284
  49. data/rbi/prism_static.rbi +3 -0
  50. data/sig/prism.rbs +426 -156
  51. data/sig/prism_static.rbs +1 -0
  52. data/src/diagnostic.c +280 -216
  53. data/src/encoding.c +5137 -0
  54. data/src/node.c +99 -21
  55. data/src/options.c +21 -2
  56. data/src/prettyprint.c +1743 -1241
  57. data/src/prism.c +1774 -831
  58. data/src/regexp.c +15 -15
  59. data/src/serialize.c +261 -164
  60. data/src/util/pm_buffer.c +10 -1
  61. data/src/util/pm_memchr.c +1 -1
  62. data/src/util/pm_strpbrk.c +4 -4
  63. metadata +8 -10
  64. data/src/enc/pm_big5.c +0 -53
  65. data/src/enc/pm_euc_jp.c +0 -59
  66. data/src/enc/pm_gbk.c +0 -62
  67. data/src/enc/pm_shift_jis.c +0 -57
  68. data/src/enc/pm_tables.c +0 -743
  69. data/src/enc/pm_unicode.c +0 -2369
  70. data/src/enc/pm_windows_31j.c +0 -57
data/src/util/pm_buffer.c CHANGED
@@ -138,7 +138,7 @@ pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value) {
138
138
  * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
139
139
  */
140
140
  void
141
- pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value) {
141
+ pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) {
142
142
  if (value < 128) {
143
143
  pm_buffer_append_byte(buffer, (uint8_t) value);
144
144
  } else {
@@ -151,6 +151,15 @@ pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value) {
151
151
  }
152
152
  }
153
153
 
154
+ /**
155
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
156
+ */
157
+ void
158
+ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
159
+ uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31));
160
+ pm_buffer_append_varuint(buffer, unsigned_int);
161
+ }
162
+
154
163
  /**
155
164
  * Concatenate one buffer onto another.
156
165
  */
data/src/util/pm_memchr.c CHANGED
@@ -8,7 +8,7 @@
8
8
  * of a multibyte character.
9
9
  */
10
10
  void *
11
- pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding) {
11
+ pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
12
12
  if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
13
13
  const uint8_t *source = (const uint8_t *) memory;
14
14
  size_t index = 0;
@@ -4,7 +4,7 @@
4
4
  * This is the slow path that does care about the encoding.
5
5
  */
6
6
  static inline const uint8_t *
7
- pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
7
+ pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
8
8
  size_t index = 0;
9
9
 
10
10
  while (index < maximum) {
@@ -12,7 +12,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
12
12
  return source + index;
13
13
  }
14
14
 
15
- size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
15
+ size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
16
16
  if (width == 0) {
17
17
  return NULL;
18
18
  }
@@ -61,10 +61,10 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
61
61
  * need to take a slower path and iterate one multi-byte character at a time.
62
62
  */
63
63
  const uint8_t *
64
- pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
64
+ pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
65
65
  if (length <= 0) {
66
66
  return NULL;
67
- } else if (parser->encoding_changed && parser->encoding.multibyte) {
67
+ } else if (parser->encoding_changed && parser->encoding->multibyte) {
68
68
  return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length);
69
69
  } else {
70
70
  return pm_strpbrk_single_byte(source, charset, (size_t) length);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.19.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-03 00:00:00.000000000 Z
11
+ date: 2023-12-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -33,6 +33,7 @@ files:
33
33
  - docs/fuzzing.md
34
34
  - docs/heredocs.md
35
35
  - docs/javascript.md
36
+ - docs/local_variable_depth.md
36
37
  - docs/mapping.md
37
38
  - docs/releasing.md
38
39
  - docs/ripper.md
@@ -48,7 +49,7 @@ files:
48
49
  - include/prism/ast.h
49
50
  - include/prism/defines.h
50
51
  - include/prism/diagnostic.h
51
- - include/prism/enc/pm_encoding.h
52
+ - include/prism/encoding.h
52
53
  - include/prism/node.h
53
54
  - include/prism/options.h
54
55
  - include/prism/pack.h
@@ -72,6 +73,7 @@ files:
72
73
  - lib/prism/debug.rb
73
74
  - lib/prism/desugar_compiler.rb
74
75
  - lib/prism/dispatcher.rb
76
+ - lib/prism/dot_visitor.rb
75
77
  - lib/prism/dsl.rb
76
78
  - lib/prism/ffi.rb
77
79
  - lib/prism/lex_compat.rb
@@ -93,13 +95,7 @@ files:
93
95
  - sig/prism.rbs
94
96
  - sig/prism_static.rbs
95
97
  - src/diagnostic.c
96
- - src/enc/pm_big5.c
97
- - src/enc/pm_euc_jp.c
98
- - src/enc/pm_gbk.c
99
- - src/enc/pm_shift_jis.c
100
- - src/enc/pm_tables.c
101
- - src/enc/pm_unicode.c
102
- - src/enc/pm_windows_31j.c
98
+ - src/encoding.c
103
99
  - src/node.c
104
100
  - src/options.c
105
101
  - src/pack.c
@@ -124,6 +120,8 @@ licenses:
124
120
  - MIT
125
121
  metadata:
126
122
  allowed_push_host: https://rubygems.org
123
+ source_code_uri: https://github.com/ruby/prism
124
+ changelog_uri: https://github.com/ruby/prism/blob/main/CHANGELOG.md
127
125
  post_install_message:
128
126
  rdoc_options: []
129
127
  require_paths:
data/src/enc/pm_big5.c DELETED
@@ -1,53 +0,0 @@
1
- #include "prism/enc/pm_encoding.h"
2
-
3
- static size_t
4
- pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
5
- // These are the single byte characters.
6
- if (*b < 0x80) {
7
- return 1;
8
- }
9
-
10
- // These are the double byte characters.
11
- if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
12
- return 2;
13
- }
14
-
15
- return 0;
16
- }
17
-
18
- static size_t
19
- pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
20
- if (pm_encoding_big5_char_width(b, n) == 1) {
21
- return pm_encoding_ascii_alpha_char(b, n);
22
- } else {
23
- return 0;
24
- }
25
- }
26
-
27
- static size_t
28
- pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
29
- if (pm_encoding_big5_char_width(b, n) == 1) {
30
- return pm_encoding_ascii_alnum_char(b, n);
31
- } else {
32
- return 0;
33
- }
34
- }
35
-
36
- static bool
37
- pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
38
- if (pm_encoding_big5_char_width(b, n) == 1) {
39
- return pm_encoding_ascii_isupper_char(b, n);
40
- } else {
41
- return false;
42
- }
43
- }
44
-
45
- /** Big5 encoding */
46
- pm_encoding_t pm_encoding_big5 = {
47
- .name = "big5",
48
- .char_width = pm_encoding_big5_char_width,
49
- .alnum_char = pm_encoding_big5_alnum_char,
50
- .alpha_char = pm_encoding_big5_alpha_char,
51
- .isupper_char = pm_encoding_big5_isupper_char,
52
- .multibyte = true
53
- };
data/src/enc/pm_euc_jp.c DELETED
@@ -1,59 +0,0 @@
1
- #include "prism/enc/pm_encoding.h"
2
-
3
- static size_t
4
- pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
5
- // These are the single byte characters.
6
- if (*b < 0x80) {
7
- return 1;
8
- }
9
-
10
- // These are the double byte characters.
11
- if (
12
- (n > 1) &&
13
- (
14
- ((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
15
- ((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
16
- )
17
- ) {
18
- return 2;
19
- }
20
-
21
- return 0;
22
- }
23
-
24
- static size_t
25
- pm_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
26
- if (pm_encoding_euc_jp_char_width(b, n) == 1) {
27
- return pm_encoding_ascii_alpha_char(b, n);
28
- } else {
29
- return 0;
30
- }
31
- }
32
-
33
- static size_t
34
- pm_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
35
- if (pm_encoding_euc_jp_char_width(b, n) == 1) {
36
- return pm_encoding_ascii_alnum_char(b, n);
37
- } else {
38
- return 0;
39
- }
40
- }
41
-
42
- static bool
43
- pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
44
- if (pm_encoding_euc_jp_char_width(b, n) == 1) {
45
- return pm_encoding_ascii_isupper_char(b, n);
46
- } else {
47
- return 0;
48
- }
49
- }
50
-
51
- /** EUC-JP encoding */
52
- pm_encoding_t pm_encoding_euc_jp = {
53
- .name = "euc-jp",
54
- .char_width = pm_encoding_euc_jp_char_width,
55
- .alnum_char = pm_encoding_euc_jp_alnum_char,
56
- .alpha_char = pm_encoding_euc_jp_alpha_char,
57
- .isupper_char = pm_encoding_euc_jp_isupper_char,
58
- .multibyte = true
59
- };
data/src/enc/pm_gbk.c DELETED
@@ -1,62 +0,0 @@
1
- #include "prism/enc/pm_encoding.h"
2
-
3
- static size_t
4
- pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
5
- // These are the single byte characters.
6
- if (*b < 0x80) {
7
- return 1;
8
- }
9
-
10
- // These are the double byte characters.
11
- if (
12
- (n > 1) &&
13
- (
14
- ((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
15
- ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
16
- ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
17
- ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
18
- ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
19
- )
20
- ) {
21
- return 2;
22
- }
23
-
24
- return 0;
25
- }
26
-
27
- static size_t
28
- pm_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
29
- if (pm_encoding_gbk_char_width(b, n) == 1) {
30
- return pm_encoding_ascii_alpha_char(b, n);
31
- } else {
32
- return 0;
33
- }
34
- }
35
-
36
- static size_t
37
- pm_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
38
- if (pm_encoding_gbk_char_width(b, n) == 1) {
39
- return pm_encoding_ascii_alnum_char(b, n);
40
- } else {
41
- return 0;
42
- }
43
- }
44
-
45
- static bool
46
- pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
47
- if (pm_encoding_gbk_char_width(b, n) == 1) {
48
- return pm_encoding_ascii_isupper_char(b, n);
49
- } else {
50
- return false;
51
- }
52
- }
53
-
54
- /** GBK encoding */
55
- pm_encoding_t pm_encoding_gbk = {
56
- .name = "gbk",
57
- .char_width = pm_encoding_gbk_char_width,
58
- .alnum_char = pm_encoding_gbk_alnum_char,
59
- .alpha_char = pm_encoding_gbk_alpha_char,
60
- .isupper_char = pm_encoding_gbk_isupper_char,
61
- .multibyte = true
62
- };
@@ -1,57 +0,0 @@
1
- #include "prism/enc/pm_encoding.h"
2
-
3
- static size_t
4
- pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
5
- // These are the single byte characters.
6
- if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
- return 1;
8
- }
9
-
10
- // These are the double byte characters.
11
- if (
12
- (n > 1) &&
13
- ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
- (b[1] >= 0x40 && b[1] <= 0xFC)
15
- ) {
16
- return 2;
17
- }
18
-
19
- return 0;
20
- }
21
-
22
- static size_t
23
- pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
- if (pm_encoding_shift_jis_char_width(b, n) == 1) {
25
- return pm_encoding_ascii_alpha_char(b, n);
26
- } else {
27
- return 0;
28
- }
29
- }
30
-
31
- static size_t
32
- pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
- if (pm_encoding_shift_jis_char_width(b, n) == 1) {
34
- return pm_encoding_ascii_alnum_char(b, n);
35
- } else {
36
- return 0;
37
- }
38
- }
39
-
40
- static bool
41
- pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
- if (pm_encoding_shift_jis_char_width(b, n) == 1) {
43
- return pm_encoding_ascii_isupper_char(b, n);
44
- } else {
45
- return 0;
46
- }
47
- }
48
-
49
- /** Shift_JIS encoding */
50
- pm_encoding_t pm_encoding_shift_jis = {
51
- .name = "shift_jis",
52
- .char_width = pm_encoding_shift_jis_char_width,
53
- .alnum_char = pm_encoding_shift_jis_alnum_char,
54
- .alpha_char = pm_encoding_shift_jis_alpha_char,
55
- .isupper_char = pm_encoding_shift_jis_isupper_char,
56
- .multibyte = true
57
- };