prism 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -1
  3. data/Makefile +5 -5
  4. data/README.md +2 -2
  5. data/config.yml +26 -13
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +1 -1
  8. data/docs/configuration.md +1 -0
  9. data/docs/encoding.md +68 -32
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/releasing.md +4 -1
  13. data/docs/ruby_api.md +14 -0
  14. data/ext/prism/api_node.c +74 -45
  15. data/ext/prism/extconf.rb +91 -127
  16. data/ext/prism/extension.c +4 -1
  17. data/ext/prism/extension.h +1 -1
  18. data/include/prism/ast.h +148 -133
  19. data/include/prism/diagnostic.h +27 -1
  20. data/include/prism/enc/pm_encoding.h +42 -1
  21. data/include/prism/parser.h +6 -0
  22. data/include/prism/version.h +2 -2
  23. data/lib/prism/compiler.rb +3 -3
  24. data/lib/prism/debug.rb +4 -0
  25. data/lib/prism/desugar_compiler.rb +1 -0
  26. data/lib/prism/dispatcher.rb +14 -14
  27. data/lib/prism/dot_visitor.rb +4334 -0
  28. data/lib/prism/dsl.rb +11 -11
  29. data/lib/prism/ffi.rb +3 -3
  30. data/lib/prism/mutation_compiler.rb +6 -6
  31. data/lib/prism/node.rb +182 -113
  32. data/lib/prism/node_ext.rb +61 -3
  33. data/lib/prism/parse_result.rb +46 -12
  34. data/lib/prism/serialize.rb +124 -130
  35. data/lib/prism/visitor.rb +3 -3
  36. data/lib/prism.rb +1 -0
  37. data/prism.gemspec +5 -1
  38. data/rbi/prism.rbi +5565 -5540
  39. data/rbi/prism_static.rbi +138 -142
  40. data/sig/prism.rbs +47 -32
  41. data/src/diagnostic.c +61 -3
  42. data/src/enc/pm_big5.c +63 -0
  43. data/src/enc/pm_cp51932.c +57 -0
  44. data/src/enc/pm_euc_jp.c +10 -0
  45. data/src/enc/pm_gbk.c +5 -2
  46. data/src/enc/pm_tables.c +1478 -148
  47. data/src/node.c +33 -21
  48. data/src/prettyprint.c +1027 -925
  49. data/src/prism.c +925 -374
  50. data/src/regexp.c +12 -12
  51. data/src/serialize.c +36 -9
  52. metadata +6 -2
data/src/enc/pm_big5.c CHANGED
@@ -15,6 +15,22 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
15
15
  return 0;
16
16
  }
17
17
 
18
+ static size_t
19
+ pm_encoding_big5_star_char_width(const uint8_t *b, ptrdiff_t n) {
20
+ // These are the single byte characters.
21
+ if (*b < 0x80) {
22
+ return 1;
23
+ }
24
+
25
+ // These are the double byte characters.
26
+ if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
27
+ ((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
28
+ return 2;
29
+ }
30
+
31
+ return 0;
32
+ }
33
+
18
34
  static size_t
19
35
  pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
20
36
  if (pm_encoding_big5_char_width(b, n) == 1) {
@@ -24,6 +40,15 @@ pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
40
  }
25
41
  }
26
42
 
43
+ static size_t
44
+ pm_encoding_big5_star_alpha_char(const uint8_t *b, ptrdiff_t n) {
45
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
46
+ return pm_encoding_ascii_alpha_char(b, n);
47
+ } else {
48
+ return 0;
49
+ }
50
+ }
51
+
27
52
  static size_t
28
53
  pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
29
54
  if (pm_encoding_big5_char_width(b, n) == 1) {
@@ -33,6 +58,15 @@ pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
58
  }
34
59
  }
35
60
 
61
+ static size_t
62
+ pm_encoding_big5_star_alnum_char(const uint8_t *b, ptrdiff_t n) {
63
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
64
+ return pm_encoding_ascii_alnum_char(b, n);
65
+ } else {
66
+ return 0;
67
+ }
68
+ }
69
+
36
70
  static bool
37
71
  pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
38
72
  if (pm_encoding_big5_char_width(b, n) == 1) {
@@ -42,6 +76,15 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
76
  }
43
77
  }
44
78
 
79
+ static bool
80
+ pm_encoding_big5_star_isupper_char(const uint8_t *b, ptrdiff_t n) {
81
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
82
+ return pm_encoding_ascii_isupper_char(b, n);
83
+ } else {
84
+ return false;
85
+ }
86
+ }
87
+
45
88
  /** Big5 encoding */
46
89
  pm_encoding_t pm_encoding_big5 = {
47
90
  .name = "big5",
@@ -51,3 +94,23 @@ pm_encoding_t pm_encoding_big5 = {
51
94
  .isupper_char = pm_encoding_big5_isupper_char,
52
95
  .multibyte = true
53
96
  };
97
+
98
+ /** Big5-HKSCS encoding */
99
+ pm_encoding_t pm_encoding_big5_hkscs = {
100
+ .name = "big5-hkscs",
101
+ .char_width = pm_encoding_big5_star_char_width,
102
+ .alnum_char = pm_encoding_big5_star_alnum_char,
103
+ .alpha_char = pm_encoding_big5_star_alpha_char,
104
+ .isupper_char = pm_encoding_big5_star_isupper_char,
105
+ .multibyte = true
106
+ };
107
+
108
+ /** Big5-UAO encoding */
109
+ pm_encoding_t pm_encoding_big5_uao = {
110
+ .name = "big5-uao",
111
+ .char_width = pm_encoding_big5_star_char_width,
112
+ .alnum_char = pm_encoding_big5_star_alnum_char,
113
+ .alpha_char = pm_encoding_big5_star_alpha_char,
114
+ .isupper_char = pm_encoding_big5_star_isupper_char,
115
+ .multibyte = true
116
+ };
@@ -0,0 +1,57 @@
1
+ #include "prism/enc/pm_encoding.h"
2
+
3
+ static size_t
4
+ pm_encoding_cp51932_char_width(const uint8_t *b, ptrdiff_t n) {
5
+ // These are the single byte characters.
6
+ if (*b < 0x80) {
7
+ return 1;
8
+ }
9
+
10
+ // These are the double byte characters.
11
+ if (
12
+ (n > 1) &&
13
+ ((b[0] >= 0xa1 && b[0] <= 0xfe) || (b[0] == 0x8e)) &&
14
+ (b[1] >= 0xa1 && b[1] <= 0xfe)
15
+ ) {
16
+ return 2;
17
+ }
18
+
19
+ return 0;
20
+ }
21
+
22
+ static size_t
23
+ pm_encoding_cp51932_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
25
+ return pm_encoding_ascii_alpha_char(b, n);
26
+ } else {
27
+ return 0;
28
+ }
29
+ }
30
+
31
+ static size_t
32
+ pm_encoding_cp51932_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
34
+ return pm_encoding_ascii_alnum_char(b, n);
35
+ } else {
36
+ return 0;
37
+ }
38
+ }
39
+
40
+ static bool
41
+ pm_encoding_cp51932_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
43
+ return pm_encoding_ascii_isupper_char(b, n);
44
+ } else {
45
+ return 0;
46
+ }
47
+ }
48
+
49
+ /** cp51932 encoding */
50
+ pm_encoding_t pm_encoding_cp51932 = {
51
+ .name = "cp51932",
52
+ .char_width = pm_encoding_cp51932_char_width,
53
+ .alnum_char = pm_encoding_cp51932_alnum_char,
54
+ .alpha_char = pm_encoding_cp51932_alpha_char,
55
+ .isupper_char = pm_encoding_cp51932_isupper_char,
56
+ .multibyte = true
57
+ };
data/src/enc/pm_euc_jp.c CHANGED
@@ -18,6 +18,16 @@ pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
18
18
  return 2;
19
19
  }
20
20
 
21
+ // These are the triple byte characters.
22
+ if (
23
+ (n > 2) &&
24
+ (b[0] == 0x8F) &&
25
+ (b[1] >= 0xA1 && b[2] <= 0xFE) &&
26
+ (b[2] >= 0xA1 && b[2] <= 0xFE)
27
+ ) {
28
+ return 3;
29
+ }
30
+
21
31
  return 0;
22
32
  }
23
33
 
data/src/enc/pm_gbk.c CHANGED
@@ -3,7 +3,7 @@
3
3
  static size_t
4
4
  pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
5
5
  // These are the single byte characters.
6
- if (*b < 0x80) {
6
+ if (*b <= 0x80) {
7
7
  return 1;
8
8
  }
9
9
 
@@ -15,7 +15,10 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
15
15
  ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
16
16
  ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
17
17
  ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
18
- ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
18
+ ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/5
19
+ ((b[0] >= 0xAA && b[0] <= 0xAF) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 1
20
+ ((b[0] >= 0xF8 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 2
21
+ ((b[0] >= 0xA1 && b[0] <= 0xA7) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // user-defined 3
19
22
  )
20
23
  ) {
21
24
  return 2;