prism 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -1
  3. data/Makefile +3 -3
  4. data/README.md +1 -1
  5. data/config.yml +28 -3
  6. data/docs/build_system.md +2 -2
  7. data/docs/cruby_compilation.md +1 -1
  8. data/docs/releasing.md +2 -2
  9. data/ext/prism/api_node.c +7 -3
  10. data/ext/prism/extconf.rb +1 -1
  11. data/ext/prism/extension.c +2 -3
  12. data/ext/prism/extension.h +1 -1
  13. data/include/prism/ast.h +54 -20
  14. data/include/prism/diagnostic.h +2 -0
  15. data/include/prism/options.h +8 -2
  16. data/include/prism/parser.h +3 -0
  17. data/include/prism/version.h +2 -2
  18. data/include/prism.h +1 -1
  19. data/lib/prism/dot_visitor.rb +5 -0
  20. data/lib/prism/dsl.rb +2 -2
  21. data/lib/prism/ffi.rb +3 -1
  22. data/lib/prism/inspect_visitor.rb +1 -0
  23. data/lib/prism/node.rb +52 -13
  24. data/lib/prism/parse_result.rb +2 -15
  25. data/lib/prism/polyfill/scan_byte.rb +1 -1
  26. data/lib/prism/reflection.rb +1 -1
  27. data/lib/prism/serialize.rb +6 -4
  28. data/lib/prism/translation/parser/compiler.rb +16 -16
  29. data/lib/prism/translation/parser.rb +5 -3
  30. data/lib/prism/translation/parser35.rb +1 -6
  31. data/lib/prism/translation/parser40.rb +13 -0
  32. data/lib/prism/translation/parser41.rb +13 -0
  33. data/lib/prism/translation/parser_current.rb +4 -2
  34. data/lib/prism/translation/ripper.rb +2 -2
  35. data/lib/prism/translation/ruby_parser.rb +53 -18
  36. data/lib/prism/translation.rb +2 -0
  37. data/lib/prism.rb +4 -5
  38. data/prism.gemspec +5 -1
  39. data/rbi/prism/dsl.rbi +3 -3
  40. data/rbi/prism/node.rbi +21 -8
  41. data/rbi/prism/translation/parser35.rbi +0 -2
  42. data/rbi/prism/translation/parser40.rbi +6 -0
  43. data/rbi/prism/translation/parser41.rbi +6 -0
  44. data/sig/prism/dsl.rbs +2 -2
  45. data/sig/prism/node.rbs +18 -8
  46. data/src/diagnostic.c +5 -1
  47. data/src/encoding.c +172 -67
  48. data/src/node.c +9 -0
  49. data/src/options.c +17 -7
  50. data/src/prettyprint.c +16 -0
  51. data/src/prism.c +1192 -1895
  52. data/src/serialize.c +7 -1
  53. data/src/token_type.c +2 -2
  54. data/src/util/pm_constant_pool.c +1 -1
  55. metadata +5 -1
data/src/encoding.c CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  typedef uint32_t pm_unicode_codepoint_t;
4
4
 
5
- #define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
5
+ #define UNICODE_ALPHA_CODEPOINTS_LENGTH 1508
6
6
  static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
7
7
  0x100, 0x2C1,
8
8
  0x2C6, 0x2D1,
@@ -10,7 +10,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
10
10
  0x2EC, 0x2EC,
11
11
  0x2EE, 0x2EE,
12
12
  0x345, 0x345,
13
- 0x370, 0x374,
13
+ 0x363, 0x374,
14
14
  0x376, 0x377,
15
15
  0x37A, 0x37D,
16
16
  0x37F, 0x37F,
@@ -50,7 +50,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
50
50
  0x840, 0x858,
51
51
  0x860, 0x86A,
52
52
  0x870, 0x887,
53
- 0x889, 0x88E,
53
+ 0x889, 0x88F,
54
+ 0x897, 0x897,
54
55
  0x8A0, 0x8C9,
55
56
  0x8D4, 0x8DF,
56
57
  0x8E3, 0x8E9,
@@ -140,7 +141,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
140
141
  0xC4A, 0xC4C,
141
142
  0xC55, 0xC56,
142
143
  0xC58, 0xC5A,
143
- 0xC5D, 0xC5D,
144
+ 0xC5C, 0xC5D,
144
145
  0xC60, 0xC63,
145
146
  0xC80, 0xC83,
146
147
  0xC85, 0xC8C,
@@ -152,7 +153,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
152
153
  0xCC6, 0xCC8,
153
154
  0xCCA, 0xCCC,
154
155
  0xCD5, 0xCD6,
155
- 0xCDD, 0xCDE,
156
+ 0xCDC, 0xCDE,
156
157
  0xCE0, 0xCE3,
157
158
  0xCF1, 0xCF3,
158
159
  0xD00, 0xD0C,
@@ -264,7 +265,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
264
265
  0x1C00, 0x1C36,
265
266
  0x1C4D, 0x1C4F,
266
267
  0x1C5A, 0x1C7D,
267
- 0x1C80, 0x1C88,
268
+ 0x1C80, 0x1C8A,
268
269
  0x1C90, 0x1CBA,
269
270
  0x1CBD, 0x1CBF,
270
271
  0x1CE9, 0x1CEC,
@@ -272,7 +273,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
272
273
  0x1CF5, 0x1CF6,
273
274
  0x1CFA, 0x1CFA,
274
275
  0x1D00, 0x1DBF,
275
- 0x1DE7, 0x1DF4,
276
+ 0x1DD3, 0x1DF4,
276
277
  0x1E00, 0x1F15,
277
278
  0x1F18, 0x1F1D,
278
279
  0x1F20, 0x1F45,
@@ -352,11 +353,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
352
353
  0xA67F, 0xA6EF,
353
354
  0xA717, 0xA71F,
354
355
  0xA722, 0xA788,
355
- 0xA78B, 0xA7CA,
356
- 0xA7D0, 0xA7D1,
357
- 0xA7D3, 0xA7D3,
358
- 0xA7D5, 0xA7D9,
359
- 0xA7F2, 0xA805,
356
+ 0xA78B, 0xA7DC,
357
+ 0xA7F1, 0xA805,
360
358
  0xA807, 0xA827,
361
359
  0xA840, 0xA873,
362
360
  0xA880, 0xA8C3,
@@ -446,6 +444,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
446
444
  0x105A3, 0x105B1,
447
445
  0x105B3, 0x105B9,
448
446
  0x105BB, 0x105BC,
447
+ 0x105C0, 0x105F3,
449
448
  0x10600, 0x10736,
450
449
  0x10740, 0x10755,
451
450
  0x10760, 0x10767,
@@ -464,6 +463,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
464
463
  0x108F4, 0x108F5,
465
464
  0x10900, 0x10915,
466
465
  0x10920, 0x10939,
466
+ 0x10940, 0x10959,
467
467
  0x10980, 0x109B7,
468
468
  0x109BE, 0x109BF,
469
469
  0x10A00, 0x10A03,
@@ -483,9 +483,14 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
483
483
  0x10C80, 0x10CB2,
484
484
  0x10CC0, 0x10CF2,
485
485
  0x10D00, 0x10D27,
486
+ 0x10D4A, 0x10D65,
487
+ 0x10D69, 0x10D69,
488
+ 0x10D6F, 0x10D85,
486
489
  0x10E80, 0x10EA9,
487
490
  0x10EAB, 0x10EAC,
488
491
  0x10EB0, 0x10EB1,
492
+ 0x10EC2, 0x10EC7,
493
+ 0x10EFA, 0x10EFC,
489
494
  0x10F00, 0x10F1C,
490
495
  0x10F27, 0x10F27,
491
496
  0x10F30, 0x10F45,
@@ -529,6 +534,17 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
529
534
  0x11350, 0x11350,
530
535
  0x11357, 0x11357,
531
536
  0x1135D, 0x11363,
537
+ 0x11380, 0x11389,
538
+ 0x1138B, 0x1138B,
539
+ 0x1138E, 0x1138E,
540
+ 0x11390, 0x113B5,
541
+ 0x113B7, 0x113C0,
542
+ 0x113C2, 0x113C2,
543
+ 0x113C5, 0x113C5,
544
+ 0x113C7, 0x113CA,
545
+ 0x113CC, 0x113CD,
546
+ 0x113D1, 0x113D1,
547
+ 0x113D3, 0x113D3,
532
548
  0x11400, 0x11441,
533
549
  0x11443, 0x11445,
534
550
  0x11447, 0x1144A,
@@ -567,6 +583,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
567
583
  0x11A50, 0x11A97,
568
584
  0x11A9D, 0x11A9D,
569
585
  0x11AB0, 0x11AF8,
586
+ 0x11B60, 0x11B67,
587
+ 0x11BC0, 0x11BE0,
570
588
  0x11C00, 0x11C08,
571
589
  0x11C0A, 0x11C36,
572
590
  0x11C38, 0x11C3E,
@@ -588,6 +606,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
588
606
  0x11D90, 0x11D91,
589
607
  0x11D93, 0x11D96,
590
608
  0x11D98, 0x11D98,
609
+ 0x11DB0, 0x11DDB,
591
610
  0x11EE0, 0x11EF6,
592
611
  0x11F00, 0x11F10,
593
612
  0x11F12, 0x11F3A,
@@ -599,7 +618,9 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
599
618
  0x12F90, 0x12FF0,
600
619
  0x13000, 0x1342F,
601
620
  0x13441, 0x13446,
621
+ 0x13460, 0x143FA,
602
622
  0x14400, 0x14646,
623
+ 0x16100, 0x1612E,
603
624
  0x16800, 0x16A38,
604
625
  0x16A40, 0x16A5E,
605
626
  0x16A70, 0x16ABE,
@@ -608,16 +629,19 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
608
629
  0x16B40, 0x16B43,
609
630
  0x16B63, 0x16B77,
610
631
  0x16B7D, 0x16B8F,
632
+ 0x16D40, 0x16D6C,
611
633
  0x16E40, 0x16E7F,
634
+ 0x16EA0, 0x16EB8,
635
+ 0x16EBB, 0x16ED3,
612
636
  0x16F00, 0x16F4A,
613
637
  0x16F4F, 0x16F87,
614
638
  0x16F8F, 0x16F9F,
615
639
  0x16FE0, 0x16FE1,
616
640
  0x16FE3, 0x16FE3,
617
- 0x16FF0, 0x16FF1,
618
- 0x17000, 0x187F7,
619
- 0x18800, 0x18CD5,
620
- 0x18D00, 0x18D08,
641
+ 0x16FF0, 0x16FF6,
642
+ 0x17000, 0x18CD5,
643
+ 0x18CFF, 0x18D1E,
644
+ 0x18D80, 0x18DF2,
621
645
  0x1AFF0, 0x1AFF3,
622
646
  0x1AFF5, 0x1AFFB,
623
647
  0x1AFFD, 0x1AFFE,
@@ -677,6 +701,11 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
677
701
  0x1E290, 0x1E2AD,
678
702
  0x1E2C0, 0x1E2EB,
679
703
  0x1E4D0, 0x1E4EB,
704
+ 0x1E5D0, 0x1E5ED,
705
+ 0x1E5F0, 0x1E5F0,
706
+ 0x1E6C0, 0x1E6DE,
707
+ 0x1E6E0, 0x1E6F5,
708
+ 0x1E6FE, 0x1E6FF,
680
709
  0x1E7E0, 0x1E7E6,
681
710
  0x1E7E8, 0x1E7EB,
682
711
  0x1E7ED, 0x1E7EE,
@@ -722,16 +751,16 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
722
751
  0x1F150, 0x1F169,
723
752
  0x1F170, 0x1F189,
724
753
  0x20000, 0x2A6DF,
725
- 0x2A700, 0x2B739,
726
- 0x2B740, 0x2B81D,
727
- 0x2B820, 0x2CEA1,
754
+ 0x2A700, 0x2B81D,
755
+ 0x2B820, 0x2CEAD,
728
756
  0x2CEB0, 0x2EBE0,
757
+ 0x2EBF0, 0x2EE5D,
729
758
  0x2F800, 0x2FA1D,
730
759
  0x30000, 0x3134A,
731
- 0x31350, 0x323AF,
760
+ 0x31350, 0x33479,
732
761
  };
733
762
 
734
- #define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528
763
+ #define UNICODE_ALNUM_CODEPOINTS_LENGTH 1598
735
764
  static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
736
765
  0x100, 0x2C1,
737
766
  0x2C6, 0x2D1,
@@ -739,7 +768,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
739
768
  0x2EC, 0x2EC,
740
769
  0x2EE, 0x2EE,
741
770
  0x345, 0x345,
742
- 0x370, 0x374,
771
+ 0x363, 0x374,
743
772
  0x376, 0x377,
744
773
  0x37A, 0x37D,
745
774
  0x37F, 0x37F,
@@ -778,7 +807,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
778
807
  0x840, 0x858,
779
808
  0x860, 0x86A,
780
809
  0x870, 0x887,
781
- 0x889, 0x88E,
810
+ 0x889, 0x88F,
811
+ 0x897, 0x897,
782
812
  0x8A0, 0x8C9,
783
813
  0x8D4, 0x8DF,
784
814
  0x8E3, 0x8E9,
@@ -872,7 +902,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
872
902
  0xC4A, 0xC4C,
873
903
  0xC55, 0xC56,
874
904
  0xC58, 0xC5A,
875
- 0xC5D, 0xC5D,
905
+ 0xC5C, 0xC5D,
876
906
  0xC60, 0xC63,
877
907
  0xC66, 0xC6F,
878
908
  0xC80, 0xC83,
@@ -885,7 +915,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
885
915
  0xCC6, 0xCC8,
886
916
  0xCCA, 0xCCC,
887
917
  0xCD5, 0xCD6,
888
- 0xCDD, 0xCDE,
918
+ 0xCDC, 0xCDE,
889
919
  0xCE0, 0xCE3,
890
920
  0xCE6, 0xCEF,
891
921
  0xCF1, 0xCF3,
@@ -1007,7 +1037,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1007
1037
  0x1C00, 0x1C36,
1008
1038
  0x1C40, 0x1C49,
1009
1039
  0x1C4D, 0x1C7D,
1010
- 0x1C80, 0x1C88,
1040
+ 0x1C80, 0x1C8A,
1011
1041
  0x1C90, 0x1CBA,
1012
1042
  0x1CBD, 0x1CBF,
1013
1043
  0x1CE9, 0x1CEC,
@@ -1015,7 +1045,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1015
1045
  0x1CF5, 0x1CF6,
1016
1046
  0x1CFA, 0x1CFA,
1017
1047
  0x1D00, 0x1DBF,
1018
- 0x1DE7, 0x1DF4,
1048
+ 0x1DD3, 0x1DF4,
1019
1049
  0x1E00, 0x1F15,
1020
1050
  0x1F18, 0x1F1D,
1021
1051
  0x1F20, 0x1F45,
@@ -1094,11 +1124,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1094
1124
  0xA67F, 0xA6EF,
1095
1125
  0xA717, 0xA71F,
1096
1126
  0xA722, 0xA788,
1097
- 0xA78B, 0xA7CA,
1098
- 0xA7D0, 0xA7D1,
1099
- 0xA7D3, 0xA7D3,
1100
- 0xA7D5, 0xA7D9,
1101
- 0xA7F2, 0xA805,
1127
+ 0xA78B, 0xA7DC,
1128
+ 0xA7F1, 0xA805,
1102
1129
  0xA807, 0xA827,
1103
1130
  0xA840, 0xA873,
1104
1131
  0xA880, 0xA8C3,
@@ -1191,6 +1218,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1191
1218
  0x105A3, 0x105B1,
1192
1219
  0x105B3, 0x105B9,
1193
1220
  0x105BB, 0x105BC,
1221
+ 0x105C0, 0x105F3,
1194
1222
  0x10600, 0x10736,
1195
1223
  0x10740, 0x10755,
1196
1224
  0x10760, 0x10767,
@@ -1209,6 +1237,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1209
1237
  0x108F4, 0x108F5,
1210
1238
  0x10900, 0x10915,
1211
1239
  0x10920, 0x10939,
1240
+ 0x10940, 0x10959,
1212
1241
  0x10980, 0x109B7,
1213
1242
  0x109BE, 0x109BF,
1214
1243
  0x10A00, 0x10A03,
@@ -1229,9 +1258,14 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1229
1258
  0x10CC0, 0x10CF2,
1230
1259
  0x10D00, 0x10D27,
1231
1260
  0x10D30, 0x10D39,
1261
+ 0x10D40, 0x10D65,
1262
+ 0x10D69, 0x10D69,
1263
+ 0x10D6F, 0x10D85,
1232
1264
  0x10E80, 0x10EA9,
1233
1265
  0x10EAB, 0x10EAC,
1234
1266
  0x10EB0, 0x10EB1,
1267
+ 0x10EC2, 0x10EC7,
1268
+ 0x10EFA, 0x10EFC,
1235
1269
  0x10F00, 0x10F1C,
1236
1270
  0x10F27, 0x10F27,
1237
1271
  0x10F30, 0x10F45,
@@ -1278,6 +1312,17 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1278
1312
  0x11350, 0x11350,
1279
1313
  0x11357, 0x11357,
1280
1314
  0x1135D, 0x11363,
1315
+ 0x11380, 0x11389,
1316
+ 0x1138B, 0x1138B,
1317
+ 0x1138E, 0x1138E,
1318
+ 0x11390, 0x113B5,
1319
+ 0x113B7, 0x113C0,
1320
+ 0x113C2, 0x113C2,
1321
+ 0x113C5, 0x113C5,
1322
+ 0x113C7, 0x113CA,
1323
+ 0x113CC, 0x113CD,
1324
+ 0x113D1, 0x113D1,
1325
+ 0x113D3, 0x113D3,
1281
1326
  0x11400, 0x11441,
1282
1327
  0x11443, 0x11445,
1283
1328
  0x11447, 0x1144A,
@@ -1297,6 +1342,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1297
1342
  0x11680, 0x116B5,
1298
1343
  0x116B8, 0x116B8,
1299
1344
  0x116C0, 0x116C9,
1345
+ 0x116D0, 0x116E3,
1300
1346
  0x11700, 0x1171A,
1301
1347
  0x1171D, 0x1172A,
1302
1348
  0x11730, 0x11739,
@@ -1322,6 +1368,9 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1322
1368
  0x11A50, 0x11A97,
1323
1369
  0x11A9D, 0x11A9D,
1324
1370
  0x11AB0, 0x11AF8,
1371
+ 0x11B60, 0x11B67,
1372
+ 0x11BC0, 0x11BE0,
1373
+ 0x11BF0, 0x11BF9,
1325
1374
  0x11C00, 0x11C08,
1326
1375
  0x11C0A, 0x11C36,
1327
1376
  0x11C38, 0x11C3E,
@@ -1346,6 +1395,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1346
1395
  0x11D93, 0x11D96,
1347
1396
  0x11D98, 0x11D98,
1348
1397
  0x11DA0, 0x11DA9,
1398
+ 0x11DB0, 0x11DDB,
1399
+ 0x11DE0, 0x11DE9,
1349
1400
  0x11EE0, 0x11EF6,
1350
1401
  0x11F00, 0x11F10,
1351
1402
  0x11F12, 0x11F3A,
@@ -1358,7 +1409,10 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1358
1409
  0x12F90, 0x12FF0,
1359
1410
  0x13000, 0x1342F,
1360
1411
  0x13441, 0x13446,
1412
+ 0x13460, 0x143FA,
1361
1413
  0x14400, 0x14646,
1414
+ 0x16100, 0x1612E,
1415
+ 0x16130, 0x16139,
1362
1416
  0x16800, 0x16A38,
1363
1417
  0x16A40, 0x16A5E,
1364
1418
  0x16A60, 0x16A69,
@@ -1370,16 +1424,20 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1370
1424
  0x16B50, 0x16B59,
1371
1425
  0x16B63, 0x16B77,
1372
1426
  0x16B7D, 0x16B8F,
1427
+ 0x16D40, 0x16D6C,
1428
+ 0x16D70, 0x16D79,
1373
1429
  0x16E40, 0x16E7F,
1430
+ 0x16EA0, 0x16EB8,
1431
+ 0x16EBB, 0x16ED3,
1374
1432
  0x16F00, 0x16F4A,
1375
1433
  0x16F4F, 0x16F87,
1376
1434
  0x16F8F, 0x16F9F,
1377
1435
  0x16FE0, 0x16FE1,
1378
1436
  0x16FE3, 0x16FE3,
1379
- 0x16FF0, 0x16FF1,
1380
- 0x17000, 0x187F7,
1381
- 0x18800, 0x18CD5,
1382
- 0x18D00, 0x18D08,
1437
+ 0x16FF0, 0x16FF6,
1438
+ 0x17000, 0x18CD5,
1439
+ 0x18CFF, 0x18D1E,
1440
+ 0x18D80, 0x18DF2,
1383
1441
  0x1AFF0, 0x1AFF3,
1384
1442
  0x1AFF5, 0x1AFFB,
1385
1443
  0x1AFFD, 0x1AFFE,
@@ -1394,6 +1452,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1394
1452
  0x1BC80, 0x1BC88,
1395
1453
  0x1BC90, 0x1BC99,
1396
1454
  0x1BC9E, 0x1BC9E,
1455
+ 0x1CCF0, 0x1CCF9,
1397
1456
  0x1D400, 0x1D454,
1398
1457
  0x1D456, 0x1D49C,
1399
1458
  0x1D49E, 0x1D49F,
@@ -1443,6 +1502,11 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1443
1502
  0x1E2F0, 0x1E2F9,
1444
1503
  0x1E4D0, 0x1E4EB,
1445
1504
  0x1E4F0, 0x1E4F9,
1505
+ 0x1E5D0, 0x1E5ED,
1506
+ 0x1E5F0, 0x1E5FA,
1507
+ 0x1E6C0, 0x1E6DE,
1508
+ 0x1E6E0, 0x1E6F5,
1509
+ 0x1E6FE, 0x1E6FF,
1446
1510
  0x1E7E0, 0x1E7E6,
1447
1511
  0x1E7E8, 0x1E7EB,
1448
1512
  0x1E7ED, 0x1E7EE,
@@ -1490,16 +1554,16 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1490
1554
  0x1F170, 0x1F189,
1491
1555
  0x1FBF0, 0x1FBF9,
1492
1556
  0x20000, 0x2A6DF,
1493
- 0x2A700, 0x2B739,
1494
- 0x2B740, 0x2B81D,
1495
- 0x2B820, 0x2CEA1,
1557
+ 0x2A700, 0x2B81D,
1558
+ 0x2B820, 0x2CEAD,
1496
1559
  0x2CEB0, 0x2EBE0,
1560
+ 0x2EBF0, 0x2EE5D,
1497
1561
  0x2F800, 0x2FA1D,
1498
1562
  0x30000, 0x3134A,
1499
- 0x31350, 0x323AF,
1563
+ 0x31350, 0x33479,
1500
1564
  };
1501
1565
 
1502
- #define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1302
1566
+ #define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1320
1503
1567
  static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
1504
1568
  0x100, 0x100,
1505
1569
  0x102, 0x102,
@@ -1774,6 +1838,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
1774
1838
  0x10C7, 0x10C7,
1775
1839
  0x10CD, 0x10CD,
1776
1840
  0x13A0, 0x13F5,
1841
+ 0x1C89, 0x1C89,
1777
1842
  0x1C90, 0x1CBA,
1778
1843
  0x1CBD, 0x1CBF,
1779
1844
  0x1E00, 0x1E00,
@@ -2103,9 +2168,15 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
2103
2168
  0xA7C2, 0xA7C2,
2104
2169
  0xA7C4, 0xA7C7,
2105
2170
  0xA7C9, 0xA7C9,
2171
+ 0xA7CB, 0xA7CC,
2172
+ 0xA7CE, 0xA7CE,
2106
2173
  0xA7D0, 0xA7D0,
2174
+ 0xA7D2, 0xA7D2,
2175
+ 0xA7D4, 0xA7D4,
2107
2176
  0xA7D6, 0xA7D6,
2108
2177
  0xA7D8, 0xA7D8,
2178
+ 0xA7DA, 0xA7DA,
2179
+ 0xA7DC, 0xA7DC,
2109
2180
  0xA7F5, 0xA7F5,
2110
2181
  0xFF21, 0xFF3A,
2111
2182
  0x10400, 0x10427,
@@ -2115,8 +2186,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
2115
2186
  0x1058C, 0x10592,
2116
2187
  0x10594, 0x10595,
2117
2188
  0x10C80, 0x10CB2,
2189
+ 0x10D50, 0x10D65,
2118
2190
  0x118A0, 0x118BF,
2119
2191
  0x16E40, 0x16E5F,
2192
+ 0x16EA0, 0x16EB8,
2120
2193
  0x1D400, 0x1D419,
2121
2194
  0x1D434, 0x1D44D,
2122
2195
  0x1D468, 0x1D481,
@@ -2304,6 +2377,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2304
2377
  */
2305
2378
  size_t
2306
2379
  pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2380
+ if (n == 0) {
2381
+ return 0;
2382
+ }
2383
+
2307
2384
  if (*b < 0x80) {
2308
2385
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2309
2386
  }
@@ -2324,6 +2401,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2324
2401
  */
2325
2402
  size_t
2326
2403
  pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2404
+ if (n == 0) {
2405
+ return 0;
2406
+ }
2407
+
2327
2408
  if (*b < 0x80) {
2328
2409
  return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2329
2410
  }
@@ -2344,6 +2425,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2344
2425
  */
2345
2426
  bool
2346
2427
  pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2428
+ if (n == 0) {
2429
+ return 0;
2430
+ }
2431
+
2347
2432
  if (*b < 0x80) {
2348
2433
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
2349
2434
  }
@@ -2362,7 +2447,8 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2362
2447
 
2363
2448
  static pm_unicode_codepoint_t
2364
2449
  pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2365
- if (b[0] < 0x80) {
2450
+
2451
+ if ((n > 0) && (b[0] < 0x80)) {
2366
2452
  *width = 1;
2367
2453
  return (pm_unicode_codepoint_t) b[0];
2368
2454
  }
@@ -2401,6 +2487,10 @@ pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2401
2487
 
2402
2488
  static size_t
2403
2489
  pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
2490
+ if (n == 0) {
2491
+ return 0;
2492
+ }
2493
+
2404
2494
  size_t width;
2405
2495
  pm_cesu_8_codepoint(b, n, &width);
2406
2496
  return width;
@@ -2408,6 +2498,10 @@ pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
2408
2498
 
2409
2499
  static size_t
2410
2500
  pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2501
+ if (n == 0) {
2502
+ return 0;
2503
+ }
2504
+
2411
2505
  if (*b < 0x80) {
2412
2506
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2413
2507
  }
@@ -2424,6 +2518,10 @@ pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2424
2518
 
2425
2519
  static size_t
2426
2520
  pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2521
+ if (n == 0) {
2522
+ return 0;
2523
+ }
2524
+
2427
2525
  if (*b < 0x80) {
2428
2526
  return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2429
2527
  }
@@ -2440,6 +2538,10 @@ pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2440
2538
 
2441
2539
  static bool
2442
2540
  pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2541
+ if (n == 0) {
2542
+ return 0;
2543
+ }
2544
+
2443
2545
  if (*b < 0x80) {
2444
2546
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
2445
2547
  }
@@ -3855,14 +3957,14 @@ static const uint8_t pm_encoding_windows_874_table[256] = {
3855
3957
  };
3856
3958
 
3857
3959
  #define PRISM_ENCODING_TABLE(name) \
3858
- static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
3859
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
3960
+ static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, ptrdiff_t n) { \
3961
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT)); \
3860
3962
  } \
3861
- static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
3862
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
3963
+ static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, ptrdiff_t n) { \
3964
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; \
3863
3965
  } \
3864
- static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
3865
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
3966
+ static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, ptrdiff_t n) { \
3967
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT)); \
3866
3968
  }
3867
3969
 
3868
3970
  PRISM_ENCODING_TABLE(cp850)
@@ -3931,8 +4033,8 @@ PRISM_ENCODING_TABLE(windows_874)
3931
4033
  * means that if the top bit is not set, the character is 1 byte long.
3932
4034
  */
3933
4035
  static size_t
3934
- pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
3935
- return *b < 0x80 ? 1 : 0;
4036
+ pm_encoding_ascii_char_width(const uint8_t *b, ptrdiff_t n) {
4037
+ return ((n > 0) && (*b < 0x80)) ? 1 : 0;
3936
4038
  }
3937
4039
 
3938
4040
  /**
@@ -3940,8 +4042,8 @@ pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
3940
4042
  * alphabetical character.
3941
4043
  */
3942
4044
  static size_t
3943
- pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
3944
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
4045
+ pm_encoding_ascii_alpha_char(const uint8_t *b, ptrdiff_t n) {
4046
+ return (n > 0) ? (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) : 0;
3945
4047
  }
3946
4048
 
3947
4049
  /**
@@ -3951,7 +4053,7 @@ pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
3951
4053
  */
3952
4054
  static size_t
3953
4055
  pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
3954
- return (*b < 0x80) ? pm_encoding_ascii_alpha_char(b, n) : 0;
4056
+ return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alpha_char(b, n) : 0;
3955
4057
  }
3956
4058
 
3957
4059
  /**
@@ -3959,8 +4061,8 @@ pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
3959
4061
  * alphanumeric character.
3960
4062
  */
3961
4063
  static size_t
3962
- pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
3963
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
4064
+ pm_encoding_ascii_alnum_char(const uint8_t *b, ptrdiff_t n) {
4065
+ return ((n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
3964
4066
  }
3965
4067
 
3966
4068
  /**
@@ -3970,7 +4072,7 @@ pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
3970
4072
  */
3971
4073
  static size_t
3972
4074
  pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
3973
- return (*b < 0x80) ? pm_encoding_ascii_alnum_char(b, n) : 0;
4075
+ return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alnum_char(b, n) : 0;
3974
4076
  }
3975
4077
 
3976
4078
  /**
@@ -3978,8 +4080,8 @@ pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
3978
4080
  * character.
3979
4081
  */
3980
4082
  static bool
3981
- pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
3982
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
4083
+ pm_encoding_ascii_isupper_char(const uint8_t *b, ptrdiff_t n) {
4084
+ return (n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
3983
4085
  }
3984
4086
 
3985
4087
  /**
@@ -3998,7 +4100,7 @@ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATT
3998
4100
  static size_t
3999
4101
  pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
4000
4102
  // These are the single byte characters.
4001
- if (*b < 0x80) {
4103
+ if ((n > 0) && (*b < 0x80)) {
4002
4104
  return 1;
4003
4105
  }
4004
4106
 
@@ -4042,6 +4144,9 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
4042
4144
  */
4043
4145
  static size_t
4044
4146
  pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
4147
+ if (n == 0) {
4148
+ return 0;
4149
+ }
4045
4150
  // These are the single byte characters.
4046
4151
  if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
4047
4152
  return 1;
@@ -4105,7 +4210,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
4105
4210
  */
4106
4211
  static bool
4107
4212
  pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
4108
- return (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
4213
+ return (n > 0) && (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
4109
4214
  }
4110
4215
 
4111
4216
  /**
@@ -4115,7 +4220,7 @@ pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
4115
4220
  static size_t
4116
4221
  pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
4117
4222
  // These are the single byte characters.
4118
- if (*b < 0x80) {
4223
+ if ((n > 0) && (*b < 0x80)) {
4119
4224
  return 1;
4120
4225
  }
4121
4226
 
@@ -4134,7 +4239,7 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
4134
4239
  static size_t
4135
4240
  pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
4136
4241
  // These are the single byte characters
4137
- if (*b <= 0x80) {
4242
+ if ((n > 0) && (*b <= 0x80)) {
4138
4243
  return 1;
4139
4244
  }
4140
4245
 
@@ -4153,7 +4258,7 @@ pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
4153
4258
  static size_t
4154
4259
  pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
4155
4260
  // These are the 1 byte characters.
4156
- if (*b < 0x80) {
4261
+ if ((n > 0) && (*b < 0x80)) {
4157
4262
  return 1;
4158
4263
  }
4159
4264
 
@@ -4196,7 +4301,7 @@ pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
4196
4301
  static size_t
4197
4302
  pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
4198
4303
  // These are the single byte characters.
4199
- if (*b < 0x80) {
4304
+ if ((n > 0) && (*b < 0x80)) {
4200
4305
  return 1;
4201
4306
  }
4202
4307
 
@@ -4215,7 +4320,7 @@ pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
4215
4320
  static size_t
4216
4321
  pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
4217
4322
  // These are the single byte characters.
4218
- if (*b < 0x80) {
4323
+ if ((n > 0) && (*b < 0x80)) {
4219
4324
  return 1;
4220
4325
  }
4221
4326
 
@@ -4239,7 +4344,7 @@ pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
4239
4344
  static size_t
4240
4345
  pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
4241
4346
  // These are the 1 byte characters.
4242
- if (*b < 0x80) {
4347
+ if ((n > 0) && (*b < 0x80)) {
4243
4348
  return 1;
4244
4349
  }
4245
4350
 
@@ -4263,7 +4368,7 @@ pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
4263
4368
  static size_t
4264
4369
  pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
4265
4370
  // These are the single byte characters.
4266
- if (*b <= 0x80) {
4371
+ if ((n > 0) && (*b <= 0x80)) {
4267
4372
  return 1;
4268
4373
  }
4269
4374