prism 0.17.1 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Makefile +5 -5
  4. data/README.md +2 -2
  5. data/config.yml +26 -13
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +1 -1
  8. data/docs/configuration.md +1 -0
  9. data/docs/encoding.md +68 -32
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/ruby_api.md +14 -0
  13. data/ext/prism/api_node.c +74 -45
  14. data/ext/prism/extconf.rb +91 -127
  15. data/ext/prism/extension.c +1 -1
  16. data/ext/prism/extension.h +1 -1
  17. data/include/prism/ast.h +148 -133
  18. data/include/prism/diagnostic.h +27 -1
  19. data/include/prism/enc/pm_encoding.h +42 -1
  20. data/include/prism/parser.h +6 -0
  21. data/include/prism/version.h +3 -3
  22. data/lib/prism/compiler.rb +3 -3
  23. data/lib/prism/debug.rb +4 -0
  24. data/lib/prism/desugar_compiler.rb +1 -0
  25. data/lib/prism/dispatcher.rb +14 -14
  26. data/lib/prism/dot_visitor.rb +4334 -0
  27. data/lib/prism/dsl.rb +11 -11
  28. data/lib/prism/ffi.rb +3 -3
  29. data/lib/prism/mutation_compiler.rb +6 -6
  30. data/lib/prism/node.rb +182 -113
  31. data/lib/prism/node_ext.rb +61 -3
  32. data/lib/prism/parse_result.rb +46 -12
  33. data/lib/prism/serialize.rb +125 -131
  34. data/lib/prism/visitor.rb +3 -3
  35. data/lib/prism.rb +1 -0
  36. data/prism.gemspec +5 -1
  37. data/rbi/prism.rbi +83 -54
  38. data/sig/prism.rbs +47 -32
  39. data/src/diagnostic.c +61 -3
  40. data/src/enc/pm_big5.c +63 -0
  41. data/src/enc/pm_cp51932.c +57 -0
  42. data/src/enc/pm_euc_jp.c +10 -0
  43. data/src/enc/pm_gbk.c +5 -2
  44. data/src/enc/pm_tables.c +1478 -148
  45. data/src/node.c +33 -21
  46. data/src/prettyprint.c +1027 -925
  47. data/src/prism.c +925 -374
  48. data/src/regexp.c +12 -12
  49. data/src/serialize.c +36 -9
  50. metadata +6 -2
@@ -0,0 +1,57 @@
1
+ #include "prism/enc/pm_encoding.h"
2
+
3
+ static size_t
4
+ pm_encoding_cp51932_char_width(const uint8_t *b, ptrdiff_t n) {
5
+ // These are the single byte characters.
6
+ if (*b < 0x80) {
7
+ return 1;
8
+ }
9
+
10
+ // These are the double byte characters.
11
+ if (
12
+ (n > 1) &&
13
+ ((b[0] >= 0xa1 && b[0] <= 0xfe) || (b[0] == 0x8e)) &&
14
+ (b[1] >= 0xa1 && b[1] <= 0xfe)
15
+ ) {
16
+ return 2;
17
+ }
18
+
19
+ return 0;
20
+ }
21
+
22
+ static size_t
23
+ pm_encoding_cp51932_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
25
+ return pm_encoding_ascii_alpha_char(b, n);
26
+ } else {
27
+ return 0;
28
+ }
29
+ }
30
+
31
+ static size_t
32
+ pm_encoding_cp51932_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
34
+ return pm_encoding_ascii_alnum_char(b, n);
35
+ } else {
36
+ return 0;
37
+ }
38
+ }
39
+
40
+ static bool
41
+ pm_encoding_cp51932_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (pm_encoding_cp51932_char_width(b, n) == 1) {
43
+ return pm_encoding_ascii_isupper_char(b, n);
44
+ } else {
45
+ return 0;
46
+ }
47
+ }
48
+
49
+ /** cp51932 encoding */
50
+ pm_encoding_t pm_encoding_cp51932 = {
51
+ .name = "cp51932",
52
+ .char_width = pm_encoding_cp51932_char_width,
53
+ .alnum_char = pm_encoding_cp51932_alnum_char,
54
+ .alpha_char = pm_encoding_cp51932_alpha_char,
55
+ .isupper_char = pm_encoding_cp51932_isupper_char,
56
+ .multibyte = true
57
+ };
data/src/enc/pm_euc_jp.c CHANGED
@@ -18,6 +18,16 @@ pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
18
18
  return 2;
19
19
  }
20
20
 
21
+ // These are the triple byte characters.
22
+ if (
23
+ (n > 2) &&
24
+ (b[0] == 0x8F) &&
25
+ (b[1] >= 0xA1 && b[2] <= 0xFE) &&
26
+ (b[2] >= 0xA1 && b[2] <= 0xFE)
27
+ ) {
28
+ return 3;
29
+ }
30
+
21
31
  return 0;
22
32
  }
23
33
 
data/src/enc/pm_gbk.c CHANGED
@@ -3,7 +3,7 @@
3
3
  static size_t
4
4
  pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
5
5
  // These are the single byte characters.
6
- if (*b < 0x80) {
6
+ if (*b <= 0x80) {
7
7
  return 1;
8
8
  }
9
9
 
@@ -15,7 +15,10 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
15
15
  ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
16
16
  ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
17
17
  ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
18
- ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
18
+ ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/5
19
+ ((b[0] >= 0xAA && b[0] <= 0xAF) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 1
20
+ ((b[0] >= 0xF8 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 2
21
+ ((b[0] >= 0xA1 && b[0] <= 0xA7) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // user-defined 3
19
22
  )
20
23
  ) {
21
24
  return 2;