prism 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -1
  3. data/Makefile +5 -5
  4. data/README.md +2 -2
  5. data/config.yml +26 -13
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +1 -1
  8. data/docs/configuration.md +1 -0
  9. data/docs/encoding.md +68 -32
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/releasing.md +4 -1
  13. data/docs/ruby_api.md +14 -0
  14. data/ext/prism/api_node.c +74 -45
  15. data/ext/prism/extconf.rb +91 -127
  16. data/ext/prism/extension.c +4 -1
  17. data/ext/prism/extension.h +1 -1
  18. data/include/prism/ast.h +148 -133
  19. data/include/prism/diagnostic.h +27 -1
  20. data/include/prism/enc/pm_encoding.h +42 -1
  21. data/include/prism/parser.h +6 -0
  22. data/include/prism/version.h +2 -2
  23. data/lib/prism/compiler.rb +3 -3
  24. data/lib/prism/debug.rb +4 -0
  25. data/lib/prism/desugar_compiler.rb +1 -0
  26. data/lib/prism/dispatcher.rb +14 -14
  27. data/lib/prism/dot_visitor.rb +4334 -0
  28. data/lib/prism/dsl.rb +11 -11
  29. data/lib/prism/ffi.rb +3 -3
  30. data/lib/prism/mutation_compiler.rb +6 -6
  31. data/lib/prism/node.rb +182 -113
  32. data/lib/prism/node_ext.rb +61 -3
  33. data/lib/prism/parse_result.rb +46 -12
  34. data/lib/prism/serialize.rb +124 -130
  35. data/lib/prism/visitor.rb +3 -3
  36. data/lib/prism.rb +1 -0
  37. data/prism.gemspec +5 -1
  38. data/rbi/prism.rbi +5565 -5540
  39. data/rbi/prism_static.rbi +138 -142
  40. data/sig/prism.rbs +47 -32
  41. data/src/diagnostic.c +61 -3
  42. data/src/enc/pm_big5.c +63 -0
  43. data/src/enc/pm_cp51932.c +57 -0
  44. data/src/enc/pm_euc_jp.c +10 -0
  45. data/src/enc/pm_gbk.c +5 -2
  46. data/src/enc/pm_tables.c +1478 -148
  47. data/src/node.c +33 -21
  48. data/src/prettyprint.c +1027 -925
  49. data/src/prism.c +925 -374
  50. data/src/regexp.c +12 -12
  51. data/src/serialize.c +36 -9
  52. metadata +6 -2
data/src/enc/pm_big5.c CHANGED
@@ -15,6 +15,22 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
15
15
  return 0;
16
16
  }
17
17
 
18
+ static size_t
19
+ pm_encoding_big5_star_char_width(const uint8_t *b, ptrdiff_t n) {
20
+ // These are the single byte characters.
21
+ if (*b < 0x80) {
22
+ return 1;
23
+ }
24
+
25
+ // These are the double byte characters.
26
+ if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
27
+ ((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
28
+ return 2;
29
+ }
30
+
31
+ return 0;
32
+ }
33
+
18
34
  static size_t
19
35
  pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
20
36
  if (pm_encoding_big5_char_width(b, n) == 1) {
@@ -24,6 +40,15 @@ pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
40
  }
25
41
  }
26
42
 
43
+ static size_t
44
+ pm_encoding_big5_star_alpha_char(const uint8_t *b, ptrdiff_t n) {
45
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
46
+ return pm_encoding_ascii_alpha_char(b, n);
47
+ } else {
48
+ return 0;
49
+ }
50
+ }
51
+
27
52
  static size_t
28
53
  pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
29
54
  if (pm_encoding_big5_char_width(b, n) == 1) {
@@ -33,6 +58,15 @@ pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
58
  }
34
59
  }
35
60
 
61
+ static size_t
62
+ pm_encoding_big5_star_alnum_char(const uint8_t *b, ptrdiff_t n) {
63
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
64
+ return pm_encoding_ascii_alnum_char(b, n);
65
+ } else {
66
+ return 0;
67
+ }
68
+ }
69
+
36
70
  static bool
37
71
  pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
38
72
  if (pm_encoding_big5_char_width(b, n) == 1) {
@@ -42,6 +76,15 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
76
  }
43
77
  }
44
78
 
79
+ static bool
80
+ pm_encoding_big5_star_isupper_char(const uint8_t *b, ptrdiff_t n) {
81
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
82
+ return pm_encoding_ascii_isupper_char(b, n);
83
+ } else {
84
+ return false;
85
+ }
86
+ }
87
+
45
88
  /** Big5 encoding */
46
89
  pm_encoding_t pm_encoding_big5 = {
47
90
  .name = "big5",
@@ -51,3 +94,23 @@ pm_encoding_t pm_encoding_big5 = {
51
94
  .isupper_char = pm_encoding_big5_isupper_char,
52
95
  .multibyte = true
53
96
  };
97
+
98
+ /** Big5-HKSCS encoding */
99
+ pm_encoding_t pm_encoding_big5_hkscs = {
100
+ .name = "big5-hkscs",
101
+ .char_width = pm_encoding_big5_star_char_width,
102
+ .alnum_char = pm_encoding_big5_star_alnum_char,
103
+ .alpha_char = pm_encoding_big5_star_alpha_char,
104
+ .isupper_char = pm_encoding_big5_star_isupper_char,
105
+ .multibyte = true
106
+ };
107
+
108
+ /** Big5-UAO encoding */
109
+ pm_encoding_t pm_encoding_big5_uao = {
110
+ .name = "big5-uao",
111
+ .char_width = pm_encoding_big5_star_char_width,
112
+ .alnum_char = pm_encoding_big5_star_alnum_char,
113
+ .alpha_char = pm_encoding_big5_star_alpha_char,
114
+ .isupper_char = pm_encoding_big5_star_isupper_char,
115
+ .multibyte = true
116
+ };
@@ -0,0 +1,57 @@
1
+ #include "prism/enc/pm_encoding.h"
2
+
3
+ static size_t
4
+ pm_encoding_cp51932_char_width(const uint8_t *b, ptrdiff_t n) {
5
+ // These are the single byte characters.
6
+ if (*b < 0x80) {
7
+ return 1;
8
+ }
9
+
10
+ // These are the double byte characters.
11
+ if (
12
+ (n > 1) &&
13
+ ((b[0] >= 0xa1 && b[0] <= 0xfe) || (b[0] == 0x8e)) &&
14
+ (b[1] >= 0xa1 && b[1] <= 0xfe)
15
+ ) {
16
+ return 2;
17
+ }
18
+
19
+ return 0;
20
+ }
21
+
22
+ static size_t
23
+ pm_encoding_cp51932_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
25
+ return pm_encoding_ascii_alpha_char(b, n);
26
+ } else {
27
+ return 0;
28
+ }
29
+ }
30
+
31
+ static size_t
32
+ pm_encoding_cp51932_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
34
+ return pm_encoding_ascii_alnum_char(b, n);
35
+ } else {
36
+ return 0;
37
+ }
38
+ }
39
+
40
+ static bool
41
+ pm_encoding_cp51932_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
43
+ return pm_encoding_ascii_isupper_char(b, n);
44
+ } else {
45
+ return 0;
46
+ }
47
+ }
48
+
49
+ /** cp51932 encoding */
50
+ pm_encoding_t pm_encoding_cp51932 = {
51
+ .name = "cp51932",
52
+ .char_width = pm_encoding_cp51932_char_width,
53
+ .alnum_char = pm_encoding_cp51932_alnum_char,
54
+ .alpha_char = pm_encoding_cp51932_alpha_char,
55
+ .isupper_char = pm_encoding_cp51932_isupper_char,
56
+ .multibyte = true
57
+ };
data/src/enc/pm_euc_jp.c CHANGED
@@ -18,6 +18,16 @@ pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
18
18
  return 2;
19
19
  }
20
20
 
21
+ // These are the triple byte characters.
22
+ if (
23
+ (n > 2) &&
24
+ (b[0] == 0x8F) &&
25
+ (b[1] >= 0xA1 && b[2] <= 0xFE) &&
26
+ (b[2] >= 0xA1 && b[2] <= 0xFE)
27
+ ) {
28
+ return 3;
29
+ }
30
+
21
31
  return 0;
22
32
  }
23
33
 
data/src/enc/pm_gbk.c CHANGED
@@ -3,7 +3,7 @@
3
3
  static size_t
4
4
  pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
5
5
  // These are the single byte characters.
6
- if (*b < 0x80) {
6
+ if (*b <= 0x80) {
7
7
  return 1;
8
8
  }
9
9
 
@@ -15,7 +15,10 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
15
15
  ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
16
16
  ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
17
17
  ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
18
- ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
18
+ ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/5
19
+ ((b[0] >= 0xAA && b[0] <= 0xAF) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 1
20
+ ((b[0] >= 0xF8 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 2
21
+ ((b[0] >= 0xA1 && b[0] <= 0xA7) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // user-defined 3
19
22
  )
20
23
  ) {
21
24
  return 2;