jruby-prism-parser 0.24.0-java → 1.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +269 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +25 -18
- data/README.md +57 -6
- data/config.yml +1724 -140
- data/docs/build_system.md +39 -11
- data/docs/configuration.md +4 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/fuzzing.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/parsing_rules.md +4 -1
- data/docs/releasing.md +8 -10
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +72 -0
- data/docs/ruby_api.md +2 -1
- data/docs/serialization.md +29 -5
- data/ext/prism/api_node.c +3395 -1999
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +55 -34
- data/ext/prism/extension.c +597 -346
- data/ext/prism/extension.h +6 -5
- data/include/prism/ast.h +2612 -455
- data/include/prism/defines.h +160 -2
- data/include/prism/diagnostic.h +188 -76
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +89 -17
- data/include/prism/options.h +224 -12
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +267 -66
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +121 -0
- data/include/prism/util/pm_buffer.h +75 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +18 -9
- data/include/prism/util/pm_integer.h +126 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +19 -0
- data/include/prism/util/pm_string.h +48 -8
- data/include/prism/version.h +3 -3
- data/include/prism.h +99 -5
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +11 -1
- data/lib/prism/desugar_compiler.rb +113 -74
- data/lib/prism/dispatcher.rb +45 -1
- data/lib/prism/dot_visitor.rb +201 -77
- data/lib/prism/dsl.rb +673 -461
- data/lib/prism/ffi.rb +233 -45
- data/lib/prism/inspect_visitor.rb +2389 -0
- data/lib/prism/lex_compat.rb +35 -16
- data/lib/prism/mutation_compiler.rb +24 -8
- data/lib/prism/node.rb +7731 -8460
- data/lib/prism/node_ext.rb +328 -32
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -24
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +102 -12
- data/lib/prism/parse_result.rb +448 -44
- data/lib/prism/pattern.rb +28 -10
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +413 -0
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +1940 -1198
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/builder.rb +61 -0
- data/lib/prism/translation/parser/compiler.rb +569 -195
- data/lib/prism/translation/parser/lexer.rb +516 -39
- data/lib/prism/translation/parser.rb +177 -12
- data/lib/prism/translation/parser33.rb +1 -1
- data/lib/prism/translation/parser34.rb +1 -1
- data/lib/prism/translation/parser35.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3224 -462
- data/lib/prism/translation/ruby_parser.rb +194 -69
- data/lib/prism/translation.rb +4 -1
- data/lib/prism/version.rb +1 -1
- data/lib/prism/visitor.rb +13 -0
- data/lib/prism.rb +17 -27
- data/prism.gemspec +57 -17
- data/rbi/prism/compiler.rbi +12 -0
- data/rbi/prism/dsl.rbi +524 -0
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +8722 -0
- data/rbi/prism/node_ext.rbi +107 -0
- data/rbi/prism/parse_result.rbi +404 -0
- data/rbi/prism/reflection.rbi +58 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/translation/parser.rbi +11 -0
- data/rbi/prism/translation/parser33.rbi +6 -0
- data/rbi/prism/translation/parser34.rbi +6 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism/translation/ripper.rbi +15 -0
- data/rbi/prism/visitor.rbi +473 -0
- data/rbi/prism.rbi +44 -7745
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +351 -0
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/mutation_compiler.rbs +159 -0
- data/sig/prism/node.rbs +3614 -0
- data/sig/prism/node_ext.rbs +82 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +192 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/reflection.rbs +50 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/serialize.rbs +8 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +169 -0
- data/sig/prism.rbs +248 -4767
- data/src/diagnostic.c +672 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7541 -1653
- data/src/options.c +135 -20
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1543 -1485
- data/src/prism.c +7813 -3050
- data/src/regexp.c +225 -73
- data/src/serialize.c +101 -77
- data/src/static_literals.c +617 -0
- data/src/token_type.c +14 -13
- data/src/util/pm_buffer.c +187 -20
- data/src/util/pm_char.c +5 -5
- data/src/util/pm_constant_pool.c +39 -19
- data/src/util/pm_integer.c +670 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +43 -5
- data/src/util/pm_string.c +213 -33
- data/src/util/pm_strncasecmp.c +13 -1
- data/src/util/pm_strpbrk.c +32 -6
- metadata +55 -19
- data/docs/ripper.md +0 -36
- data/include/prism/util/pm_state_stack.h +0 -42
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -206
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/translation/parser/rubocop.rb +0 -45
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
- data/src/util/pm_state_stack.c +0 -25
- data/src/util/pm_string_list.c +0 -28
data/src/encoding.c
CHANGED
@@ -1499,7 +1499,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
|
|
1499
1499
|
0x31350, 0x323AF,
|
1500
1500
|
};
|
1501
1501
|
|
1502
|
-
#define UNICODE_ISUPPER_CODEPOINTS_LENGTH
|
1502
|
+
#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1302
|
1503
1503
|
static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
|
1504
1504
|
0x100, 0x100,
|
1505
1505
|
0x102, 0x102,
|
@@ -1582,9 +1582,9 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
|
|
1582
1582
|
0x1B5, 0x1B5,
|
1583
1583
|
0x1B7, 0x1B8,
|
1584
1584
|
0x1BC, 0x1BC,
|
1585
|
-
0x1C4,
|
1586
|
-
0x1C7,
|
1587
|
-
0x1CA,
|
1585
|
+
0x1C4, 0x1C5,
|
1586
|
+
0x1C7, 0x1C8,
|
1587
|
+
0x1CA, 0x1CB,
|
1588
1588
|
0x1CD, 0x1CD,
|
1589
1589
|
0x1CF, 0x1CF,
|
1590
1590
|
0x1D1, 0x1D1,
|
@@ -1602,7 +1602,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
|
|
1602
1602
|
0x1EA, 0x1EA,
|
1603
1603
|
0x1EC, 0x1EC,
|
1604
1604
|
0x1EE, 0x1EE,
|
1605
|
-
0x1F1,
|
1605
|
+
0x1F1, 0x1F2,
|
1606
1606
|
0x1F4, 0x1F4,
|
1607
1607
|
0x1F6, 0x1F8,
|
1608
1608
|
0x1FA, 0x1FA,
|
@@ -1910,11 +1910,14 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
|
|
1910
1910
|
0x1F5D, 0x1F5D,
|
1911
1911
|
0x1F5F, 0x1F5F,
|
1912
1912
|
0x1F68, 0x1F6F,
|
1913
|
-
|
1914
|
-
|
1913
|
+
0x1F88, 0x1F8F,
|
1914
|
+
0x1F98, 0x1F9F,
|
1915
|
+
0x1FA8, 0x1FAF,
|
1916
|
+
0x1FB8, 0x1FBC,
|
1917
|
+
0x1FC8, 0x1FCC,
|
1915
1918
|
0x1FD8, 0x1FDB,
|
1916
1919
|
0x1FE8, 0x1FEC,
|
1917
|
-
0x1FF8,
|
1920
|
+
0x1FF8, 0x1FFC,
|
1918
1921
|
0x2102, 0x2102,
|
1919
1922
|
0x2107, 0x2107,
|
1920
1923
|
0x210B, 0x210D,
|
@@ -2355,6 +2358,8 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|
2355
2358
|
}
|
2356
2359
|
}
|
2357
2360
|
|
2361
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
2362
|
+
|
2358
2363
|
static pm_unicode_codepoint_t
|
2359
2364
|
pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2360
2365
|
if (b[0] < 0x80) {
|
@@ -2449,13 +2454,15 @@ pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|
2449
2454
|
}
|
2450
2455
|
}
|
2451
2456
|
|
2457
|
+
#endif
|
2458
|
+
|
2452
2459
|
#undef UNICODE_ALPHA_CODEPOINTS_LENGTH
|
2453
2460
|
#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
|
2454
2461
|
#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
|
2455
2462
|
|
2456
2463
|
/**
|
2457
2464
|
* Each element of the following table contains a bitfield that indicates a
|
2458
|
-
* piece of information about the corresponding ASCII character.
|
2465
|
+
* piece of information about the corresponding US-ASCII character.
|
2459
2466
|
*/
|
2460
2467
|
static const uint8_t pm_encoding_ascii_table[256] = {
|
2461
2468
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
@@ -2477,6 +2484,8 @@ static const uint8_t pm_encoding_ascii_table[256] = {
|
|
2477
2484
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
2478
2485
|
};
|
2479
2486
|
|
2487
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
2488
|
+
|
2480
2489
|
/**
|
2481
2490
|
* Each element of the following table contains a bitfield that indicates a
|
2482
2491
|
* piece of information about the corresponding CP850 character.
|
@@ -3624,7 +3633,7 @@ static const uint8_t pm_encoding_windows_1250_table[256] = {
|
|
3624
3633
|
0, 0, 0, 7, 0, 7, 0, 0, 0, 0, 7, 0, 0, 0, 0, 7, // Ax
|
3625
3634
|
0, 0, 0, 3, 0, 3, 0, 0, 0, 3, 3, 0, 7, 0, 3, 3, // Bx
|
3626
3635
|
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
|
3627
|
-
7, 7, 7, 7, 7, 7, 7,
|
3636
|
+
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
|
3628
3637
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
|
3629
3638
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
3630
3639
|
};
|
@@ -3672,7 +3681,7 @@ static const uint8_t pm_encoding_windows_1252_table[256] = {
|
|
3672
3681
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Ax
|
3673
3682
|
0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Bx
|
3674
3683
|
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
|
3675
|
-
7, 7, 7, 7, 7, 7, 7,
|
3684
|
+
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
|
3676
3685
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
|
3677
3686
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
3678
3687
|
};
|
@@ -3915,6 +3924,7 @@ PRISM_ENCODING_TABLE(windows_1258)
|
|
3915
3924
|
PRISM_ENCODING_TABLE(windows_874)
|
3916
3925
|
|
3917
3926
|
#undef PRISM_ENCODING_TABLE
|
3927
|
+
#endif
|
3918
3928
|
|
3919
3929
|
/**
|
3920
3930
|
* Returns the size of the next character in the ASCII encoding. This basically
|
@@ -3973,22 +3983,129 @@ pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_
|
|
3973
3983
|
}
|
3974
3984
|
|
3975
3985
|
/**
|
3976
|
-
*
|
3977
|
-
*
|
3978
|
-
|
3986
|
+
* For a lot of encodings the default is that they are a single byte long no
|
3987
|
+
* matter what the codepoint, so this function is shared between them.
|
3988
|
+
*/
|
3989
|
+
static size_t
|
3990
|
+
pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
3991
|
+
return 1;
|
3992
|
+
}
|
3993
|
+
|
3994
|
+
/**
|
3995
|
+
* Returns the size of the next character in the EUC-JP encoding, or 0 if a
|
3996
|
+
* character cannot be decoded from the given bytes.
|
3997
|
+
*/
|
3998
|
+
static size_t
|
3999
|
+
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
4000
|
+
// These are the single byte characters.
|
4001
|
+
if (*b < 0x80) {
|
4002
|
+
return 1;
|
4003
|
+
}
|
4004
|
+
|
4005
|
+
// These are the double byte characters.
|
4006
|
+
if ((n > 1) && ((b[0] == 0x8E) || (b[0] >= 0xA1 && b[0] <= 0xFE)) && (b[1] >= 0xA1 && b[1] <= 0xFE)) {
|
4007
|
+
return 2;
|
4008
|
+
}
|
4009
|
+
|
4010
|
+
// These are the triple byte characters.
|
4011
|
+
if ((n > 2) && (b[0] == 0x8F) && (b[1] >= 0xA1 && b[2] <= 0xFE) && (b[2] >= 0xA1 && b[2] <= 0xFE)) {
|
4012
|
+
return 3;
|
4013
|
+
}
|
4014
|
+
|
4015
|
+
return 0;
|
4016
|
+
}
|
4017
|
+
|
4018
|
+
/**
|
4019
|
+
* Returns the size of the next character in the EUC-JP encoding if it is an
|
4020
|
+
* uppercase character.
|
3979
4021
|
*/
|
3980
4022
|
static bool
|
3981
|
-
|
3982
|
-
|
4023
|
+
pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
4024
|
+
size_t width = pm_encoding_euc_jp_char_width(b, n);
|
4025
|
+
|
4026
|
+
if (width == 1) {
|
4027
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
4028
|
+
} else if (width == 2) {
|
4029
|
+
return (
|
4030
|
+
(b[0] == 0xA3 && b[1] >= 0xC1 && b[1] <= 0xDA) ||
|
4031
|
+
(b[0] == 0xA6 && b[1] >= 0xA1 && b[1] <= 0xB8) ||
|
4032
|
+
(b[0] == 0xA7 && b[1] >= 0xA1 && b[1] <= 0xC1)
|
4033
|
+
);
|
4034
|
+
} else {
|
4035
|
+
return false;
|
4036
|
+
}
|
3983
4037
|
}
|
3984
4038
|
|
3985
4039
|
/**
|
3986
|
-
*
|
3987
|
-
*
|
4040
|
+
* Returns the size of the next character in the Shift_JIS encoding, or 0 if a
|
4041
|
+
* character cannot be decoded from the given bytes.
|
3988
4042
|
*/
|
3989
4043
|
static size_t
|
3990
|
-
|
3991
|
-
|
4044
|
+
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
4045
|
+
// These are the single byte characters.
|
4046
|
+
if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
|
4047
|
+
return 1;
|
4048
|
+
}
|
4049
|
+
|
4050
|
+
// These are the double byte characters.
|
4051
|
+
if ((n > 1) && ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && (b[1] >= 0x40 && b[1] <= 0xFC && b[1] != 0x7F)) {
|
4052
|
+
return 2;
|
4053
|
+
}
|
4054
|
+
|
4055
|
+
return 0;
|
4056
|
+
}
|
4057
|
+
|
4058
|
+
/**
|
4059
|
+
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
4060
|
+
* alphanumeric character.
|
4061
|
+
*/
|
4062
|
+
static size_t
|
4063
|
+
pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
4064
|
+
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
4065
|
+
return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alnum_char(b, n)) : width;
|
4066
|
+
}
|
4067
|
+
|
4068
|
+
/**
|
4069
|
+
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
4070
|
+
* alphabetical character.
|
4071
|
+
*/
|
4072
|
+
static size_t
|
4073
|
+
pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
4074
|
+
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
4075
|
+
return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alpha_char(b, n)) : width;
|
4076
|
+
}
|
4077
|
+
|
4078
|
+
/**
|
4079
|
+
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
4080
|
+
* uppercase character.
|
4081
|
+
*/
|
4082
|
+
static bool
|
4083
|
+
pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
4084
|
+
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
4085
|
+
|
4086
|
+
if (width == 1) {
|
4087
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
4088
|
+
} else if (width == 2) {
|
4089
|
+
return (
|
4090
|
+
((b[0] == 0x82) && (b[1] >= 0x60 && b[1] <= 0x79)) ||
|
4091
|
+
((b[0] == 0x83) && (b[1] >= 0x9F && b[1] <= 0xB6)) ||
|
4092
|
+
((b[0] == 0x84) && (b[1] >= 0x40 && b[1] <= 0x60))
|
4093
|
+
);
|
4094
|
+
} else {
|
4095
|
+
return width;
|
4096
|
+
}
|
4097
|
+
}
|
4098
|
+
|
4099
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
4100
|
+
|
4101
|
+
/**
|
4102
|
+
* Certain encodings are equivalent to ASCII below 0x80, so it works for our
|
4103
|
+
* purposes to have a function here that first checks the bounds and then falls
|
4104
|
+
* back to checking the ASCII lookup table.
|
4105
|
+
*/
|
4106
|
+
static bool
|
4107
|
+
pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
|
4108
|
+
return (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
|
3992
4109
|
}
|
3993
4110
|
|
3994
4111
|
/**
|
@@ -4022,7 +4139,7 @@ pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
4022
4139
|
}
|
4023
4140
|
|
4024
4141
|
// These are the double byte characters
|
4025
|
-
if ((n > 1) && (b[0] >= 0x81 && b[0] <=
|
4142
|
+
if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0xFE) && ((b[1] >= 0x41 && b[1] <= 0x5A) || (b[1] >= 0x61 && b[1] <= 0x7A) || (b[1] >= 0x81 && b[1] <= 0xFE))) {
|
4026
4143
|
return 2;
|
4027
4144
|
}
|
4028
4145
|
|
@@ -4072,30 +4189,6 @@ pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
4072
4189
|
return 0;
|
4073
4190
|
}
|
4074
4191
|
|
4075
|
-
/**
|
4076
|
-
* Returns the size of the next character in the EUC-JP encoding, or 0 if a
|
4077
|
-
* character cannot be decoded from the given bytes.
|
4078
|
-
*/
|
4079
|
-
static size_t
|
4080
|
-
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
4081
|
-
// These are the single byte characters.
|
4082
|
-
if (*b < 0x80) {
|
4083
|
-
return 1;
|
4084
|
-
}
|
4085
|
-
|
4086
|
-
// These are the double byte characters.
|
4087
|
-
if ((n > 1) && ((b[0] == 0x8E) || (b[0] >= 0xA1 && b[0] <= 0xFE)) && (b[1] >= 0xA1 && b[1] <= 0xFE)) {
|
4088
|
-
return 2;
|
4089
|
-
}
|
4090
|
-
|
4091
|
-
// These are the triple byte characters.
|
4092
|
-
if ((n > 2) && (b[0] == 0x8F) && (b[1] >= 0xA1 && b[2] <= 0xFE) && (b[2] >= 0xA1 && b[2] <= 0xFE)) {
|
4093
|
-
return 3;
|
4094
|
-
}
|
4095
|
-
|
4096
|
-
return 0;
|
4097
|
-
}
|
4098
|
-
|
4099
4192
|
/**
|
4100
4193
|
* Returns the size of the next character in the EUC-KR encoding, or 0 if a
|
4101
4194
|
* character cannot be decoded from the given bytes.
|
@@ -4194,24 +4287,7 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
4194
4287
|
return 0;
|
4195
4288
|
}
|
4196
4289
|
|
4197
|
-
|
4198
|
-
* Returns the size of the next character in the Shift_JIS encoding, or 0 if a
|
4199
|
-
* character cannot be decoded from the given bytes.
|
4200
|
-
*/
|
4201
|
-
static size_t
|
4202
|
-
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
4203
|
-
// These are the single byte characters.
|
4204
|
-
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
4205
|
-
return 1;
|
4206
|
-
}
|
4207
|
-
|
4208
|
-
// These are the double byte characters.
|
4209
|
-
if ((n > 1) && ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && (b[1] >= 0x40 && b[1] <= 0xFC)) {
|
4210
|
-
return 2;
|
4211
|
-
}
|
4212
|
-
|
4213
|
-
return 0;
|
4214
|
-
}
|
4290
|
+
#endif
|
4215
4291
|
|
4216
4292
|
/**
|
4217
4293
|
* This is the table of all of the encodings that prism supports.
|
@@ -4225,6 +4301,14 @@ const pm_encoding_t pm_encodings[] = {
|
|
4225
4301
|
.isupper_char = pm_encoding_utf_8_isupper_char,
|
4226
4302
|
.multibyte = true
|
4227
4303
|
},
|
4304
|
+
[PM_ENCODING_US_ASCII] = {
|
4305
|
+
.name = "US-ASCII",
|
4306
|
+
.char_width = pm_encoding_ascii_char_width,
|
4307
|
+
.alnum_char = pm_encoding_ascii_alnum_char,
|
4308
|
+
.alpha_char = pm_encoding_ascii_alpha_char,
|
4309
|
+
.isupper_char = pm_encoding_ascii_isupper_char,
|
4310
|
+
.multibyte = false
|
4311
|
+
},
|
4228
4312
|
[PM_ENCODING_ASCII_8BIT] = {
|
4229
4313
|
.name = "ASCII-8BIT",
|
4230
4314
|
.char_width = pm_encoding_single_char_width,
|
@@ -4233,6 +4317,24 @@ const pm_encoding_t pm_encodings[] = {
|
|
4233
4317
|
.isupper_char = pm_encoding_ascii_isupper_char,
|
4234
4318
|
.multibyte = false
|
4235
4319
|
},
|
4320
|
+
[PM_ENCODING_EUC_JP] = {
|
4321
|
+
.name = "EUC-JP",
|
4322
|
+
.char_width = pm_encoding_euc_jp_char_width,
|
4323
|
+
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
|
4324
|
+
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
|
4325
|
+
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
4326
|
+
.multibyte = true
|
4327
|
+
},
|
4328
|
+
[PM_ENCODING_WINDOWS_31J] = {
|
4329
|
+
.name = "Windows-31J",
|
4330
|
+
.char_width = pm_encoding_shift_jis_char_width,
|
4331
|
+
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
4332
|
+
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
4333
|
+
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
4334
|
+
.multibyte = true
|
4335
|
+
},
|
4336
|
+
|
4337
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
4236
4338
|
[PM_ENCODING_BIG5] = {
|
4237
4339
|
.name = "Big5",
|
4238
4340
|
.char_width = pm_encoding_big5_char_width,
|
@@ -4270,7 +4372,7 @@ const pm_encoding_t pm_encodings[] = {
|
|
4270
4372
|
.char_width = pm_encoding_euc_jp_char_width,
|
4271
4373
|
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
|
4272
4374
|
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
|
4273
|
-
.isupper_char =
|
4375
|
+
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
4274
4376
|
.multibyte = true
|
4275
4377
|
},
|
4276
4378
|
[PM_ENCODING_CP850] = {
|
@@ -4329,20 +4431,12 @@ const pm_encoding_t pm_encodings[] = {
|
|
4329
4431
|
.isupper_char = pm_encoding_ascii_isupper_char_7bit,
|
4330
4432
|
.multibyte = true
|
4331
4433
|
},
|
4332
|
-
[PM_ENCODING_EUC_JP] = {
|
4333
|
-
.name = "EUC-JP",
|
4334
|
-
.char_width = pm_encoding_euc_jp_char_width,
|
4335
|
-
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
|
4336
|
-
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
|
4337
|
-
.isupper_char = pm_encoding_ascii_isupper_char_7bit,
|
4338
|
-
.multibyte = true
|
4339
|
-
},
|
4340
4434
|
[PM_ENCODING_EUC_JP_MS] = {
|
4341
4435
|
.name = "eucJP-ms",
|
4342
4436
|
.char_width = pm_encoding_euc_jp_char_width,
|
4343
4437
|
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
|
4344
4438
|
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
|
4345
|
-
.isupper_char =
|
4439
|
+
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
4346
4440
|
.multibyte = true
|
4347
4441
|
},
|
4348
4442
|
[PM_ENCODING_EUC_JIS_2004] = {
|
@@ -4350,7 +4444,7 @@ const pm_encoding_t pm_encodings[] = {
|
|
4350
4444
|
.char_width = pm_encoding_euc_jp_char_width,
|
4351
4445
|
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
|
4352
4446
|
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
|
4353
|
-
.isupper_char =
|
4447
|
+
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
4354
4448
|
.multibyte = true
|
4355
4449
|
},
|
4356
4450
|
[PM_ENCODING_EUC_KR] = {
|
@@ -4708,9 +4802,9 @@ const pm_encoding_t pm_encodings[] = {
|
|
4708
4802
|
[PM_ENCODING_MAC_JAPANESE] = {
|
4709
4803
|
.name = "MacJapanese",
|
4710
4804
|
.char_width = pm_encoding_shift_jis_char_width,
|
4711
|
-
.alnum_char =
|
4712
|
-
.alpha_char =
|
4713
|
-
.isupper_char =
|
4805
|
+
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
4806
|
+
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
4807
|
+
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
4714
4808
|
.multibyte = true
|
4715
4809
|
},
|
4716
4810
|
[PM_ENCODING_MAC_ROMAN] = {
|
@@ -4756,33 +4850,33 @@ const pm_encoding_t pm_encodings[] = {
|
|
4756
4850
|
[PM_ENCODING_SHIFT_JIS] = {
|
4757
4851
|
.name = "Shift_JIS",
|
4758
4852
|
.char_width = pm_encoding_shift_jis_char_width,
|
4759
|
-
.alnum_char =
|
4760
|
-
.alpha_char =
|
4761
|
-
.isupper_char =
|
4853
|
+
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
4854
|
+
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
4855
|
+
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
4762
4856
|
.multibyte = true
|
4763
4857
|
},
|
4764
4858
|
[PM_ENCODING_SJIS_DOCOMO] = {
|
4765
4859
|
.name = "SJIS-DoCoMo",
|
4766
4860
|
.char_width = pm_encoding_shift_jis_char_width,
|
4767
|
-
.alnum_char =
|
4768
|
-
.alpha_char =
|
4769
|
-
.isupper_char =
|
4861
|
+
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
4862
|
+
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
4863
|
+
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
4770
4864
|
.multibyte = true
|
4771
4865
|
},
|
4772
4866
|
[PM_ENCODING_SJIS_KDDI] = {
|
4773
4867
|
.name = "SJIS-KDDI",
|
4774
4868
|
.char_width = pm_encoding_shift_jis_char_width,
|
4775
|
-
.alnum_char =
|
4776
|
-
.alpha_char =
|
4777
|
-
.isupper_char =
|
4869
|
+
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
4870
|
+
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
4871
|
+
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
4778
4872
|
.multibyte = true
|
4779
4873
|
},
|
4780
4874
|
[PM_ENCODING_SJIS_SOFTBANK] = {
|
4781
4875
|
.name = "SJIS-SoftBank",
|
4782
4876
|
.char_width = pm_encoding_shift_jis_char_width,
|
4783
|
-
.alnum_char =
|
4784
|
-
.alpha_char =
|
4785
|
-
.isupper_char =
|
4877
|
+
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
4878
|
+
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
4879
|
+
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
4786
4880
|
.multibyte = true
|
4787
4881
|
},
|
4788
4882
|
[PM_ENCODING_STATELESS_ISO_2022_JP] = {
|
@@ -4809,14 +4903,6 @@ const pm_encoding_t pm_encodings[] = {
|
|
4809
4903
|
.isupper_char = pm_encoding_tis_620_isupper_char,
|
4810
4904
|
.multibyte = false
|
4811
4905
|
},
|
4812
|
-
[PM_ENCODING_US_ASCII] = {
|
4813
|
-
.name = "US-ASCII",
|
4814
|
-
.char_width = pm_encoding_ascii_char_width,
|
4815
|
-
.alnum_char = pm_encoding_ascii_alnum_char,
|
4816
|
-
.alpha_char = pm_encoding_ascii_alpha_char,
|
4817
|
-
.isupper_char = pm_encoding_ascii_isupper_char,
|
4818
|
-
.multibyte = false
|
4819
|
-
},
|
4820
4906
|
[PM_ENCODING_UTF8_MAC] = {
|
4821
4907
|
.name = "UTF8-MAC",
|
4822
4908
|
.char_width = pm_encoding_utf_8_char_width,
|
@@ -4921,14 +5007,6 @@ const pm_encoding_t pm_encodings[] = {
|
|
4921
5007
|
.isupper_char = pm_encoding_windows_1258_isupper_char,
|
4922
5008
|
.multibyte = false
|
4923
5009
|
},
|
4924
|
-
[PM_ENCODING_WINDOWS_31J] = {
|
4925
|
-
.name = "Windows-31J",
|
4926
|
-
.char_width = pm_encoding_shift_jis_char_width,
|
4927
|
-
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
|
4928
|
-
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
|
4929
|
-
.isupper_char = pm_encoding_ascii_isupper_char_7bit,
|
4930
|
-
.multibyte = true
|
4931
|
-
},
|
4932
5010
|
[PM_ENCODING_WINDOWS_874] = {
|
4933
5011
|
.name = "Windows-874",
|
4934
5012
|
.char_width = pm_encoding_single_char_width,
|
@@ -4937,6 +5015,7 @@ const pm_encoding_t pm_encodings[] = {
|
|
4937
5015
|
.isupper_char = pm_encoding_windows_874_isupper_char,
|
4938
5016
|
.multibyte = false
|
4939
5017
|
}
|
5018
|
+
#endif
|
4940
5019
|
};
|
4941
5020
|
|
4942
5021
|
/**
|
@@ -4951,11 +5030,13 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|
4951
5030
|
// UTF-8 can contain extra information at the end about the platform it is
|
4952
5031
|
// encoded on, such as UTF-8-MAC or UTF-8-UNIX. We'll ignore those suffixes.
|
4953
5032
|
if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "UTF-8", 5) == 0)) {
|
5033
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
4954
5034
|
// We need to explicitly handle UTF-8-HFS, as that one needs to switch
|
4955
5035
|
// over to being UTF8-MAC.
|
4956
5036
|
if (width == 9 && (pm_strncasecmp(start + 5, (const uint8_t *) "-HFS", 4) == 0)) {
|
4957
5037
|
return &pm_encodings[PM_ENCODING_UTF8_MAC];
|
4958
5038
|
}
|
5039
|
+
#endif
|
4959
5040
|
|
4960
5041
|
// Otherwise we'll return the default UTF-8 encoding.
|
4961
5042
|
return PM_ENCODING_UTF_8_ENTRY;
|
@@ -4975,11 +5056,16 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|
4975
5056
|
break;
|
4976
5057
|
case 'B': case 'b':
|
4977
5058
|
ENCODING1("BINARY", PM_ENCODING_ASCII_8BIT);
|
5059
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
4978
5060
|
ENCODING1("Big5", PM_ENCODING_BIG5);
|
4979
5061
|
ENCODING2("Big5-HKSCS", "Big5-HKSCS:2008", PM_ENCODING_BIG5_HKSCS);
|
4980
5062
|
ENCODING1("Big5-UAO", PM_ENCODING_BIG5_UAO);
|
5063
|
+
#endif
|
4981
5064
|
break;
|
4982
5065
|
case 'C': case 'c':
|
5066
|
+
ENCODING1("CP65001", PM_ENCODING_UTF_8);
|
5067
|
+
ENCODING2("CP932", "csWindows31J", PM_ENCODING_WINDOWS_31J);
|
5068
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
4983
5069
|
ENCODING1("CESU-8", PM_ENCODING_CESU_8);
|
4984
5070
|
ENCODING1("CP437", PM_ENCODING_IBM437);
|
4985
5071
|
ENCODING1("CP720", PM_ENCODING_IBM720);
|
@@ -4999,7 +5085,6 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|
4999
5085
|
ENCODING1("CP874", PM_ENCODING_WINDOWS_874);
|
5000
5086
|
ENCODING1("CP878", PM_ENCODING_KOI8_R);
|
5001
5087
|
ENCODING1("CP863", PM_ENCODING_IBM863);
|
5002
|
-
ENCODING2("CP932", "csWindows31J", PM_ENCODING_WINDOWS_31J);
|
5003
5088
|
ENCODING1("CP936", PM_ENCODING_GBK);
|
5004
5089
|
ENCODING1("CP949", PM_ENCODING_CP949);
|
5005
5090
|
ENCODING1("CP950", PM_ENCODING_CP950);
|
@@ -5014,25 +5099,30 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|
5014
5099
|
ENCODING1("CP1257", PM_ENCODING_WINDOWS_1257);
|
5015
5100
|
ENCODING1("CP1258", PM_ENCODING_WINDOWS_1258);
|
5016
5101
|
ENCODING1("CP51932", PM_ENCODING_CP51932);
|
5017
|
-
|
5102
|
+
#endif
|
5018
5103
|
break;
|
5019
5104
|
case 'E': case 'e':
|
5020
5105
|
ENCODING2("EUC-JP", "eucJP", PM_ENCODING_EUC_JP);
|
5106
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5021
5107
|
ENCODING2("eucJP-ms", "euc-jp-ms", PM_ENCODING_EUC_JP_MS);
|
5022
5108
|
ENCODING2("EUC-JIS-2004", "EUC-JISX0213", PM_ENCODING_EUC_JIS_2004);
|
5023
5109
|
ENCODING2("EUC-KR", "eucKR", PM_ENCODING_EUC_KR);
|
5024
5110
|
ENCODING2("EUC-CN", "eucCN", PM_ENCODING_GB2312);
|
5025
5111
|
ENCODING2("EUC-TW", "eucTW", PM_ENCODING_EUC_TW);
|
5026
5112
|
ENCODING1("Emacs-Mule", PM_ENCODING_EMACS_MULE);
|
5113
|
+
#endif
|
5027
5114
|
break;
|
5028
5115
|
case 'G': case 'g':
|
5116
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5029
5117
|
ENCODING1("GBK", PM_ENCODING_GBK);
|
5030
5118
|
ENCODING1("GB12345", PM_ENCODING_GB12345);
|
5031
5119
|
ENCODING1("GB18030", PM_ENCODING_GB18030);
|
5032
5120
|
ENCODING1("GB1988", PM_ENCODING_GB1988);
|
5033
5121
|
ENCODING1("GB2312", PM_ENCODING_GB2312);
|
5122
|
+
#endif
|
5034
5123
|
break;
|
5035
5124
|
case 'I': case 'i':
|
5125
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5036
5126
|
ENCODING1("IBM437", PM_ENCODING_IBM437);
|
5037
5127
|
ENCODING1("IBM720", PM_ENCODING_IBM720);
|
5038
5128
|
ENCODING1("IBM737", PM_ENCODING_IBM737);
|
@@ -5064,12 +5154,16 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|
5064
5154
|
ENCODING2("ISO-8859-14", "ISO8859-14", PM_ENCODING_ISO_8859_14);
|
5065
5155
|
ENCODING2("ISO-8859-15", "ISO8859-15", PM_ENCODING_ISO_8859_15);
|
5066
5156
|
ENCODING2("ISO-8859-16", "ISO8859-16", PM_ENCODING_ISO_8859_16);
|
5157
|
+
#endif
|
5067
5158
|
break;
|
5068
5159
|
case 'K': case 'k':
|
5160
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5069
5161
|
ENCODING1("KOI8-R", PM_ENCODING_KOI8_R);
|
5070
5162
|
ENCODING1("KOI8-U", PM_ENCODING_KOI8_U);
|
5163
|
+
#endif
|
5071
5164
|
break;
|
5072
5165
|
case 'M': case 'm':
|
5166
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5073
5167
|
ENCODING1("macCentEuro", PM_ENCODING_MAC_CENT_EURO);
|
5074
5168
|
ENCODING1("macCroatian", PM_ENCODING_MAC_CROATIAN);
|
5075
5169
|
ENCODING1("macCyrillic", PM_ENCODING_MAC_CYRILLIC);
|
@@ -5082,31 +5176,39 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|
5082
5176
|
ENCODING1("macThai", PM_ENCODING_MAC_THAI);
|
5083
5177
|
ENCODING1("macTurkish", PM_ENCODING_MAC_TURKISH);
|
5084
5178
|
ENCODING1("macUkraine", PM_ENCODING_MAC_UKRAINE);
|
5179
|
+
#endif
|
5085
5180
|
break;
|
5086
5181
|
case 'P': case 'p':
|
5087
5182
|
ENCODING1("PCK", PM_ENCODING_WINDOWS_31J);
|
5088
5183
|
break;
|
5089
5184
|
case 'S': case 's':
|
5090
|
-
ENCODING1("Shift_JIS", PM_ENCODING_SHIFT_JIS);
|
5091
5185
|
ENCODING1("SJIS", PM_ENCODING_WINDOWS_31J);
|
5186
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5187
|
+
ENCODING1("Shift_JIS", PM_ENCODING_SHIFT_JIS);
|
5092
5188
|
ENCODING1("SJIS-DoCoMo", PM_ENCODING_SJIS_DOCOMO);
|
5093
5189
|
ENCODING1("SJIS-KDDI", PM_ENCODING_SJIS_KDDI);
|
5094
5190
|
ENCODING1("SJIS-SoftBank", PM_ENCODING_SJIS_SOFTBANK);
|
5095
5191
|
ENCODING1("stateless-ISO-2022-JP", PM_ENCODING_STATELESS_ISO_2022_JP);
|
5096
5192
|
ENCODING1("stateless-ISO-2022-JP-KDDI", PM_ENCODING_STATELESS_ISO_2022_JP_KDDI);
|
5193
|
+
#endif
|
5097
5194
|
break;
|
5098
5195
|
case 'T': case 't':
|
5196
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5099
5197
|
ENCODING1("TIS-620", PM_ENCODING_TIS_620);
|
5198
|
+
#endif
|
5100
5199
|
break;
|
5101
5200
|
case 'U': case 'u':
|
5102
5201
|
ENCODING1("US-ASCII", PM_ENCODING_US_ASCII);
|
5202
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5103
5203
|
ENCODING2("UTF8-MAC", "UTF-8-HFS", PM_ENCODING_UTF8_MAC);
|
5104
5204
|
ENCODING1("UTF8-DoCoMo", PM_ENCODING_UTF8_DOCOMO);
|
5105
5205
|
ENCODING1("UTF8-KDDI", PM_ENCODING_UTF8_KDDI);
|
5106
5206
|
ENCODING1("UTF8-SoftBank", PM_ENCODING_UTF8_SOFTBANK);
|
5207
|
+
#endif
|
5107
5208
|
break;
|
5108
5209
|
case 'W': case 'w':
|
5109
5210
|
ENCODING1("Windows-31J", PM_ENCODING_WINDOWS_31J);
|
5211
|
+
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
5110
5212
|
ENCODING1("Windows-874", PM_ENCODING_WINDOWS_874);
|
5111
5213
|
ENCODING1("Windows-1250", PM_ENCODING_WINDOWS_1250);
|
5112
5214
|
ENCODING1("Windows-1251", PM_ENCODING_WINDOWS_1251);
|
@@ -5117,6 +5219,7 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|
5117
5219
|
ENCODING1("Windows-1256", PM_ENCODING_WINDOWS_1256);
|
5118
5220
|
ENCODING1("Windows-1257", PM_ENCODING_WINDOWS_1257);
|
5119
5221
|
ENCODING1("Windows-1258", PM_ENCODING_WINDOWS_1258);
|
5222
|
+
#endif
|
5120
5223
|
break;
|
5121
5224
|
case '6':
|
5122
5225
|
ENCODING1("646", PM_ENCODING_US_ASCII);
|