prism 0.17.1 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -1
  3. data/Makefile +5 -5
  4. data/README.md +4 -3
  5. data/config.yml +214 -68
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +10 -3
  8. data/docs/configuration.md +11 -9
  9. data/docs/encoding.md +92 -88
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/local_variable_depth.md +229 -0
  13. data/docs/ruby_api.md +16 -0
  14. data/docs/serialization.md +18 -13
  15. data/ext/prism/api_node.c +411 -240
  16. data/ext/prism/extconf.rb +97 -127
  17. data/ext/prism/extension.c +97 -33
  18. data/ext/prism/extension.h +1 -1
  19. data/include/prism/ast.h +377 -159
  20. data/include/prism/defines.h +17 -0
  21. data/include/prism/diagnostic.h +38 -6
  22. data/include/prism/{enc/pm_encoding.h → encoding.h} +126 -64
  23. data/include/prism/options.h +2 -2
  24. data/include/prism/parser.h +62 -36
  25. data/include/prism/regexp.h +2 -2
  26. data/include/prism/util/pm_buffer.h +9 -1
  27. data/include/prism/util/pm_memchr.h +2 -2
  28. data/include/prism/util/pm_strpbrk.h +3 -3
  29. data/include/prism/version.h +3 -3
  30. data/include/prism.h +13 -15
  31. data/lib/prism/compiler.rb +15 -3
  32. data/lib/prism/debug.rb +13 -4
  33. data/lib/prism/desugar_compiler.rb +4 -3
  34. data/lib/prism/dispatcher.rb +70 -14
  35. data/lib/prism/dot_visitor.rb +4612 -0
  36. data/lib/prism/dsl.rb +77 -57
  37. data/lib/prism/ffi.rb +19 -6
  38. data/lib/prism/lex_compat.rb +19 -9
  39. data/lib/prism/mutation_compiler.rb +26 -6
  40. data/lib/prism/node.rb +1314 -522
  41. data/lib/prism/node_ext.rb +102 -19
  42. data/lib/prism/parse_result.rb +58 -27
  43. data/lib/prism/ripper_compat.rb +49 -34
  44. data/lib/prism/serialize.rb +251 -227
  45. data/lib/prism/visitor.rb +15 -3
  46. data/lib/prism.rb +21 -4
  47. data/prism.gemspec +7 -9
  48. data/rbi/prism.rbi +688 -284
  49. data/rbi/prism_static.rbi +3 -0
  50. data/sig/prism.rbs +426 -156
  51. data/sig/prism_static.rbs +1 -0
  52. data/src/diagnostic.c +280 -216
  53. data/src/encoding.c +5137 -0
  54. data/src/node.c +99 -21
  55. data/src/options.c +21 -2
  56. data/src/prettyprint.c +1743 -1241
  57. data/src/prism.c +1774 -831
  58. data/src/regexp.c +15 -15
  59. data/src/serialize.c +261 -164
  60. data/src/util/pm_buffer.c +10 -1
  61. data/src/util/pm_memchr.c +1 -1
  62. data/src/util/pm_strpbrk.c +4 -4
  63. metadata +8 -10
  64. data/src/enc/pm_big5.c +0 -53
  65. data/src/enc/pm_euc_jp.c +0 -59
  66. data/src/enc/pm_gbk.c +0 -62
  67. data/src/enc/pm_shift_jis.c +0 -57
  68. data/src/enc/pm_tables.c +0 -743
  69. data/src/enc/pm_unicode.c +0 -2369
  70. data/src/enc/pm_windows_31j.c +0 -57
data/src/util/pm_buffer.c CHANGED
@@ -138,7 +138,7 @@ pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value) {
138
138
  * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
139
139
  */
140
140
  void
141
- pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value) {
141
+ pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) {
142
142
  if (value < 128) {
143
143
  pm_buffer_append_byte(buffer, (uint8_t) value);
144
144
  } else {
@@ -151,6 +151,15 @@ pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value) {
151
151
  }
152
152
  }
153
153
 
154
+ /**
155
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
156
+ */
157
+ void
158
+ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
159
+ uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31));
160
+ pm_buffer_append_varuint(buffer, unsigned_int);
161
+ }
162
+
154
163
  /**
155
164
  * Concatenate one buffer onto another.
156
165
  */
data/src/util/pm_memchr.c CHANGED
@@ -8,7 +8,7 @@
8
8
  * of a multibyte character.
9
9
  */
10
10
  void *
11
- pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding) {
11
+ pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
12
12
  if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
13
13
  const uint8_t *source = (const uint8_t *) memory;
14
14
  size_t index = 0;
@@ -4,7 +4,7 @@
4
4
  * This is the slow path that does care about the encoding.
5
5
  */
6
6
  static inline const uint8_t *
7
- pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
7
+ pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
8
8
  size_t index = 0;
9
9
 
10
10
  while (index < maximum) {
@@ -12,7 +12,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
12
12
  return source + index;
13
13
  }
14
14
 
15
- size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
15
+ size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
16
16
  if (width == 0) {
17
17
  return NULL;
18
18
  }
@@ -61,10 +61,10 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
61
61
  * need to take a slower path and iterate one multi-byte character at a time.
62
62
  */
63
63
  const uint8_t *
64
- pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
64
+ pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
65
65
  if (length <= 0) {
66
66
  return NULL;
67
- } else if (parser->encoding_changed && parser->encoding.multibyte) {
67
+ } else if (parser->encoding_changed && parser->encoding->multibyte) {
68
68
  return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length);
69
69
  } else {
70
70
  return pm_strpbrk_single_byte(source, charset, (size_t) length);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.19.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-03 00:00:00.000000000 Z
11
+ date: 2023-12-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -33,6 +33,7 @@ files:
33
33
  - docs/fuzzing.md
34
34
  - docs/heredocs.md
35
35
  - docs/javascript.md
36
+ - docs/local_variable_depth.md
36
37
  - docs/mapping.md
37
38
  - docs/releasing.md
38
39
  - docs/ripper.md
@@ -48,7 +49,7 @@ files:
48
49
  - include/prism/ast.h
49
50
  - include/prism/defines.h
50
51
  - include/prism/diagnostic.h
51
- - include/prism/enc/pm_encoding.h
52
+ - include/prism/encoding.h
52
53
  - include/prism/node.h
53
54
  - include/prism/options.h
54
55
  - include/prism/pack.h
@@ -72,6 +73,7 @@ files:
72
73
  - lib/prism/debug.rb
73
74
  - lib/prism/desugar_compiler.rb
74
75
  - lib/prism/dispatcher.rb
76
+ - lib/prism/dot_visitor.rb
75
77
  - lib/prism/dsl.rb
76
78
  - lib/prism/ffi.rb
77
79
  - lib/prism/lex_compat.rb
@@ -93,13 +95,7 @@ files:
93
95
  - sig/prism.rbs
94
96
  - sig/prism_static.rbs
95
97
  - src/diagnostic.c
96
- - src/enc/pm_big5.c
97
- - src/enc/pm_euc_jp.c
98
- - src/enc/pm_gbk.c
99
- - src/enc/pm_shift_jis.c
100
- - src/enc/pm_tables.c
101
- - src/enc/pm_unicode.c
102
- - src/enc/pm_windows_31j.c
98
+ - src/encoding.c
103
99
  - src/node.c
104
100
  - src/options.c
105
101
  - src/pack.c
@@ -124,6 +120,8 @@ licenses:
124
120
  - MIT
125
121
  metadata:
126
122
  allowed_push_host: https://rubygems.org
123
+ source_code_uri: https://github.com/ruby/prism
124
+ changelog_uri: https://github.com/ruby/prism/blob/main/CHANGELOG.md
127
125
  post_install_message:
128
126
  rdoc_options: []
129
127
  require_paths:
data/src/enc/pm_big5.c DELETED
@@ -1,53 +0,0 @@
1
- #include "prism/enc/pm_encoding.h"
2
-
3
- static size_t
4
- pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
5
- // These are the single byte characters.
6
- if (*b < 0x80) {
7
- return 1;
8
- }
9
-
10
- // These are the double byte characters.
11
- if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
12
- return 2;
13
- }
14
-
15
- return 0;
16
- }
17
-
18
- static size_t
19
- pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
20
- if (pm_encoding_big5_char_width(b, n) == 1) {
21
- return pm_encoding_ascii_alpha_char(b, n);
22
- } else {
23
- return 0;
24
- }
25
- }
26
-
27
- static size_t
28
- pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
29
- if (pm_encoding_big5_char_width(b, n) == 1) {
30
- return pm_encoding_ascii_alnum_char(b, n);
31
- } else {
32
- return 0;
33
- }
34
- }
35
-
36
- static bool
37
- pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
38
- if (pm_encoding_big5_char_width(b, n) == 1) {
39
- return pm_encoding_ascii_isupper_char(b, n);
40
- } else {
41
- return false;
42
- }
43
- }
44
-
45
- /** Big5 encoding */
46
- pm_encoding_t pm_encoding_big5 = {
47
- .name = "big5",
48
- .char_width = pm_encoding_big5_char_width,
49
- .alnum_char = pm_encoding_big5_alnum_char,
50
- .alpha_char = pm_encoding_big5_alpha_char,
51
- .isupper_char = pm_encoding_big5_isupper_char,
52
- .multibyte = true
53
- };
data/src/enc/pm_euc_jp.c DELETED
@@ -1,59 +0,0 @@
1
- #include "prism/enc/pm_encoding.h"
2
-
3
- static size_t
4
- pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
5
- // These are the single byte characters.
6
- if (*b < 0x80) {
7
- return 1;
8
- }
9
-
10
- // These are the double byte characters.
11
- if (
12
- (n > 1) &&
13
- (
14
- ((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
15
- ((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
16
- )
17
- ) {
18
- return 2;
19
- }
20
-
21
- return 0;
22
- }
23
-
24
- static size_t
25
- pm_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
26
- if (pm_encoding_euc_jp_char_width(b, n) == 1) {
27
- return pm_encoding_ascii_alpha_char(b, n);
28
- } else {
29
- return 0;
30
- }
31
- }
32
-
33
- static size_t
34
- pm_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
35
- if (pm_encoding_euc_jp_char_width(b, n) == 1) {
36
- return pm_encoding_ascii_alnum_char(b, n);
37
- } else {
38
- return 0;
39
- }
40
- }
41
-
42
- static bool
43
- pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
44
- if (pm_encoding_euc_jp_char_width(b, n) == 1) {
45
- return pm_encoding_ascii_isupper_char(b, n);
46
- } else {
47
- return 0;
48
- }
49
- }
50
-
51
- /** EUC-JP encoding */
52
- pm_encoding_t pm_encoding_euc_jp = {
53
- .name = "euc-jp",
54
- .char_width = pm_encoding_euc_jp_char_width,
55
- .alnum_char = pm_encoding_euc_jp_alnum_char,
56
- .alpha_char = pm_encoding_euc_jp_alpha_char,
57
- .isupper_char = pm_encoding_euc_jp_isupper_char,
58
- .multibyte = true
59
- };
data/src/enc/pm_gbk.c DELETED
@@ -1,62 +0,0 @@
1
- #include "prism/enc/pm_encoding.h"
2
-
3
- static size_t
4
- pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
5
- // These are the single byte characters.
6
- if (*b < 0x80) {
7
- return 1;
8
- }
9
-
10
- // These are the double byte characters.
11
- if (
12
- (n > 1) &&
13
- (
14
- ((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
15
- ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
16
- ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
17
- ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
18
- ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
19
- )
20
- ) {
21
- return 2;
22
- }
23
-
24
- return 0;
25
- }
26
-
27
- static size_t
28
- pm_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
29
- if (pm_encoding_gbk_char_width(b, n) == 1) {
30
- return pm_encoding_ascii_alpha_char(b, n);
31
- } else {
32
- return 0;
33
- }
34
- }
35
-
36
- static size_t
37
- pm_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
38
- if (pm_encoding_gbk_char_width(b, n) == 1) {
39
- return pm_encoding_ascii_alnum_char(b, n);
40
- } else {
41
- return 0;
42
- }
43
- }
44
-
45
- static bool
46
- pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
47
- if (pm_encoding_gbk_char_width(b, n) == 1) {
48
- return pm_encoding_ascii_isupper_char(b, n);
49
- } else {
50
- return false;
51
- }
52
- }
53
-
54
- /** GBK encoding */
55
- pm_encoding_t pm_encoding_gbk = {
56
- .name = "gbk",
57
- .char_width = pm_encoding_gbk_char_width,
58
- .alnum_char = pm_encoding_gbk_alnum_char,
59
- .alpha_char = pm_encoding_gbk_alpha_char,
60
- .isupper_char = pm_encoding_gbk_isupper_char,
61
- .multibyte = true
62
- };
@@ -1,57 +0,0 @@
1
- #include "prism/enc/pm_encoding.h"
2
-
3
- static size_t
4
- pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
5
- // These are the single byte characters.
6
- if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
- return 1;
8
- }
9
-
10
- // These are the double byte characters.
11
- if (
12
- (n > 1) &&
13
- ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
- (b[1] >= 0x40 && b[1] <= 0xFC)
15
- ) {
16
- return 2;
17
- }
18
-
19
- return 0;
20
- }
21
-
22
- static size_t
23
- pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
- if (pm_encoding_shift_jis_char_width(b, n) == 1) {
25
- return pm_encoding_ascii_alpha_char(b, n);
26
- } else {
27
- return 0;
28
- }
29
- }
30
-
31
- static size_t
32
- pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
- if (pm_encoding_shift_jis_char_width(b, n) == 1) {
34
- return pm_encoding_ascii_alnum_char(b, n);
35
- } else {
36
- return 0;
37
- }
38
- }
39
-
40
- static bool
41
- pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
- if (pm_encoding_shift_jis_char_width(b, n) == 1) {
43
- return pm_encoding_ascii_isupper_char(b, n);
44
- } else {
45
- return 0;
46
- }
47
- }
48
-
49
- /** Shift_JIS encoding */
50
- pm_encoding_t pm_encoding_shift_jis = {
51
- .name = "shift_jis",
52
- .char_width = pm_encoding_shift_jis_char_width,
53
- .alnum_char = pm_encoding_shift_jis_alnum_char,
54
- .alpha_char = pm_encoding_shift_jis_alpha_char,
55
- .isupper_char = pm_encoding_shift_jis_isupper_char,
56
- .multibyte = true
57
- };