prism 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/Makefile +6 -0
- data/README.md +1 -1
- data/config.yml +50 -35
- data/docs/fuzzing.md +1 -1
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +802 -770
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +464 -162
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3173 -763
- data/include/prism/defines.h +32 -9
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +118 -28
- data/include/prism/node.h +38 -13
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -200
- data/include/prism/prettyprint.h +12 -1
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +94 -16
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +126 -32
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +70 -27
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +27 -2
- data/include/prism.h +224 -31
- data/lib/prism/compiler.rb +6 -3
- data/lib/prism/debug.rb +23 -7
- data/lib/prism/dispatcher.rb +33 -18
- data/lib/prism/dsl.rb +10 -5
- data/lib/prism/ffi.rb +132 -80
- data/lib/prism/lex_compat.rb +25 -15
- data/lib/prism/mutation_compiler.rb +10 -5
- data/lib/prism/node.rb +370 -135
- data/lib/prism/node_ext.rb +1 -1
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +150 -30
- data/lib/prism/pattern.rb +11 -0
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +86 -54
- data/lib/prism/visitor.rb +10 -3
- data/lib/prism.rb +20 -2
- data/prism.gemspec +4 -2
- data/rbi/prism.rbi +104 -60
- data/rbi/prism_static.rbi +16 -2
- data/sig/prism.rbs +72 -43
- data/sig/prism_static.rbs +14 -1
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +53 -8
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +334 -321
- data/src/options.c +170 -0
- data/src/prettyprint.c +74 -47
- data/src/prism.c +1642 -856
- data/src/regexp.c +151 -95
- data/src/serialize.c +44 -20
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +45 -15
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +51 -21
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +20 -12
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +5 -3
- data/docs/prism.png +0 -0
data/src/enc/pm_tables.c
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
#include "prism/enc/pm_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
/**
|
4
|
+
* Each element of the following table contains a bitfield that indicates a
|
5
|
+
* piece of information about the corresponding ASCII character.
|
6
|
+
*/
|
5
7
|
static uint8_t pm_encoding_ascii_table[256] = {
|
6
8
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
7
9
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -22,8 +24,10 @@ static uint8_t pm_encoding_ascii_table[256] = {
|
|
22
24
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
23
25
|
};
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
+
/**
|
28
|
+
* Each element of the following table contains a bitfield that indicates a
|
29
|
+
* piece of information about the corresponding ISO-8859-1 character.
|
30
|
+
*/
|
27
31
|
static uint8_t pm_encoding_iso_8859_1_table[256] = {
|
28
32
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
29
33
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -44,8 +48,10 @@ static uint8_t pm_encoding_iso_8859_1_table[256] = {
|
|
44
48
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
45
49
|
};
|
46
50
|
|
47
|
-
|
48
|
-
|
51
|
+
/**
|
52
|
+
* Each element of the following table contains a bitfield that indicates a
|
53
|
+
* piece of information about the corresponding ISO-8859-2 character.
|
54
|
+
*/
|
49
55
|
static uint8_t pm_encoding_iso_8859_2_table[256] = {
|
50
56
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
51
57
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -66,8 +72,10 @@ static uint8_t pm_encoding_iso_8859_2_table[256] = {
|
|
66
72
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
67
73
|
};
|
68
74
|
|
69
|
-
|
70
|
-
|
75
|
+
/**
|
76
|
+
* Each element of the following table contains a bitfield that indicates a
|
77
|
+
* piece of information about the corresponding ISO-8859-3 character.
|
78
|
+
*/
|
71
79
|
static uint8_t pm_encoding_iso_8859_3_table[256] = {
|
72
80
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
73
81
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -88,8 +96,10 @@ static uint8_t pm_encoding_iso_8859_3_table[256] = {
|
|
88
96
|
0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
89
97
|
};
|
90
98
|
|
91
|
-
|
92
|
-
|
99
|
+
/**
|
100
|
+
* Each element of the following table contains a bitfield that indicates a
|
101
|
+
* piece of information about the corresponding ISO-8859-4 character.
|
102
|
+
*/
|
93
103
|
static uint8_t pm_encoding_iso_8859_4_table[256] = {
|
94
104
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
95
105
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -110,8 +120,10 @@ static uint8_t pm_encoding_iso_8859_4_table[256] = {
|
|
110
120
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
111
121
|
};
|
112
122
|
|
113
|
-
|
114
|
-
|
123
|
+
/**
|
124
|
+
* Each element of the following table contains a bitfield that indicates a
|
125
|
+
* piece of information about the corresponding ISO-8859-5 character.
|
126
|
+
*/
|
115
127
|
static uint8_t pm_encoding_iso_8859_5_table[256] = {
|
116
128
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
117
129
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -132,8 +144,10 @@ static uint8_t pm_encoding_iso_8859_5_table[256] = {
|
|
132
144
|
0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, // Fx
|
133
145
|
};
|
134
146
|
|
135
|
-
|
136
|
-
|
147
|
+
/**
|
148
|
+
* Each element of the following table contains a bitfield that indicates a
|
149
|
+
* piece of information about the corresponding ISO-8859-6 character.
|
150
|
+
*/
|
137
151
|
static uint8_t pm_encoding_iso_8859_6_table[256] = {
|
138
152
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
139
153
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -154,8 +168,10 @@ static uint8_t pm_encoding_iso_8859_6_table[256] = {
|
|
154
168
|
3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
155
169
|
};
|
156
170
|
|
157
|
-
|
158
|
-
|
171
|
+
/**
|
172
|
+
* Each element of the following table contains a bitfield that indicates a
|
173
|
+
* piece of information about the corresponding ISO-8859-7 character.
|
174
|
+
*/
|
159
175
|
static uint8_t pm_encoding_iso_8859_7_table[256] = {
|
160
176
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
161
177
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -176,8 +192,10 @@ static uint8_t pm_encoding_iso_8859_7_table[256] = {
|
|
176
192
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
177
193
|
};
|
178
194
|
|
179
|
-
|
180
|
-
|
195
|
+
/**
|
196
|
+
* Each element of the following table contains a bitfield that indicates a
|
197
|
+
* piece of information about the corresponding ISO-8859-8 character.
|
198
|
+
*/
|
181
199
|
static uint8_t pm_encoding_iso_8859_8_table[256] = {
|
182
200
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
183
201
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -198,8 +216,10 @@ static uint8_t pm_encoding_iso_8859_8_table[256] = {
|
|
198
216
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Fx
|
199
217
|
};
|
200
218
|
|
201
|
-
|
202
|
-
|
219
|
+
/**
|
220
|
+
* Each element of the following table contains a bitfield that indicates a
|
221
|
+
* piece of information about the corresponding ISO-8859-9 character.
|
222
|
+
*/
|
203
223
|
static uint8_t pm_encoding_iso_8859_9_table[256] = {
|
204
224
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
205
225
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -220,8 +240,10 @@ static uint8_t pm_encoding_iso_8859_9_table[256] = {
|
|
220
240
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
221
241
|
};
|
222
242
|
|
223
|
-
|
224
|
-
|
243
|
+
/**
|
244
|
+
* Each element of the following table contains a bitfield that indicates a
|
245
|
+
* piece of information about the corresponding ISO-8859-10 character.
|
246
|
+
*/
|
225
247
|
static uint8_t pm_encoding_iso_8859_10_table[256] = {
|
226
248
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
227
249
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -242,8 +264,10 @@ static uint8_t pm_encoding_iso_8859_10_table[256] = {
|
|
242
264
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
243
265
|
};
|
244
266
|
|
245
|
-
|
246
|
-
|
267
|
+
/**
|
268
|
+
* Each element of the following table contains a bitfield that indicates a
|
269
|
+
* piece of information about the corresponding ISO-8859-11 character.
|
270
|
+
*/
|
247
271
|
static uint8_t pm_encoding_iso_8859_11_table[256] = {
|
248
272
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
249
273
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -264,8 +288,10 @@ static uint8_t pm_encoding_iso_8859_11_table[256] = {
|
|
264
288
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, // Fx
|
265
289
|
};
|
266
290
|
|
267
|
-
|
268
|
-
|
291
|
+
/**
|
292
|
+
* Each element of the following table contains a bitfield that indicates a
|
293
|
+
* piece of information about the corresponding ISO-8859-13 character.
|
294
|
+
*/
|
269
295
|
static uint8_t pm_encoding_iso_8859_13_table[256] = {
|
270
296
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
271
297
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -286,8 +312,10 @@ static uint8_t pm_encoding_iso_8859_13_table[256] = {
|
|
286
312
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
287
313
|
};
|
288
314
|
|
289
|
-
|
290
|
-
|
315
|
+
/**
|
316
|
+
* Each element of the following table contains a bitfield that indicates a
|
317
|
+
* piece of information about the corresponding ISO-8859-14 character.
|
318
|
+
*/
|
291
319
|
static uint8_t pm_encoding_iso_8859_14_table[256] = {
|
292
320
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
293
321
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -308,8 +336,10 @@ static uint8_t pm_encoding_iso_8859_14_table[256] = {
|
|
308
336
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
309
337
|
};
|
310
338
|
|
311
|
-
|
312
|
-
|
339
|
+
/**
|
340
|
+
* Each element of the following table contains a bitfield that indicates a
|
341
|
+
* piece of information about the corresponding ISO-8859-15 character.
|
342
|
+
*/
|
313
343
|
static uint8_t pm_encoding_iso_8859_15_table[256] = {
|
314
344
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
315
345
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -330,8 +360,10 @@ static uint8_t pm_encoding_iso_8859_15_table[256] = {
|
|
330
360
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
331
361
|
};
|
332
362
|
|
333
|
-
|
334
|
-
|
363
|
+
/**
|
364
|
+
* Each element of the following table contains a bitfield that indicates a
|
365
|
+
* piece of information about the corresponding ISO-8859-16 character.
|
366
|
+
*/
|
335
367
|
static uint8_t pm_encoding_iso_8859_16_table[256] = {
|
336
368
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
337
369
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -352,8 +384,10 @@ static uint8_t pm_encoding_iso_8859_16_table[256] = {
|
|
352
384
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
353
385
|
};
|
354
386
|
|
355
|
-
|
356
|
-
|
387
|
+
/**
|
388
|
+
* Each element of the following table contains a bitfield that indicates a
|
389
|
+
* piece of information about the corresponding KOI8-R character.
|
390
|
+
*/
|
357
391
|
static uint8_t pm_encoding_koi8_r_table[256] = {
|
358
392
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
359
393
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -374,8 +408,10 @@ static uint8_t pm_encoding_koi8_r_table[256] = {
|
|
374
408
|
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx
|
375
409
|
};
|
376
410
|
|
377
|
-
|
378
|
-
|
411
|
+
/**
|
412
|
+
* Each element of the following table contains a bitfield that indicates a
|
413
|
+
* piece of information about the corresponding windows-1251 character.
|
414
|
+
*/
|
379
415
|
static uint8_t pm_encoding_windows_1251_table[256] = {
|
380
416
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
381
417
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -396,8 +432,10 @@ static uint8_t pm_encoding_windows_1251_table[256] = {
|
|
396
432
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
397
433
|
};
|
398
434
|
|
399
|
-
|
400
|
-
|
435
|
+
/**
|
436
|
+
* Each element of the following table contains a bitfield that indicates a
|
437
|
+
* piece of information about the corresponding windows-1252 character.
|
438
|
+
*/
|
401
439
|
static uint8_t pm_encoding_windows_1252_table[256] = {
|
402
440
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
403
441
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -418,36 +456,93 @@ static uint8_t pm_encoding_windows_1252_table[256] = {
|
|
418
456
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
419
457
|
};
|
420
458
|
|
459
|
+
/**
|
460
|
+
* Returns the size of the next character in the ASCII encoding. This basically
|
461
|
+
* means that if the top bit is not set, the character is 1 byte long.
|
462
|
+
*/
|
421
463
|
static size_t
|
422
464
|
pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
423
465
|
return *b < 0x80 ? 1 : 0;
|
424
466
|
}
|
425
467
|
|
468
|
+
/**
|
469
|
+
* Return the size of the next character in the ASCII encoding if it is an
|
470
|
+
* alphabetical character.
|
471
|
+
*/
|
426
472
|
size_t
|
427
473
|
pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
428
474
|
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
|
429
475
|
}
|
430
476
|
|
477
|
+
/**
|
478
|
+
* Return the size of the next character in the ASCII encoding if it is an
|
479
|
+
* alphanumeric character.
|
480
|
+
*/
|
431
481
|
size_t
|
432
482
|
pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
433
483
|
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
|
434
484
|
}
|
435
485
|
|
486
|
+
/**
|
487
|
+
* Return true if the next character in the ASCII encoding if it is an uppercase
|
488
|
+
* character.
|
489
|
+
*/
|
436
490
|
bool
|
437
491
|
pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
438
492
|
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
|
439
493
|
}
|
440
494
|
|
495
|
+
/**
|
496
|
+
* For a lot of encodings the default is that they are a single byte long no
|
497
|
+
* matter what the codepoint, so this function is shared between them.
|
498
|
+
*/
|
441
499
|
static size_t
|
442
|
-
|
443
|
-
return
|
500
|
+
pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
501
|
+
return 1;
|
444
502
|
}
|
445
503
|
|
504
|
+
/**
|
505
|
+
* Returns the size of the next character in the KOI-8 encoding. This means
|
506
|
+
* checking if it's a valid codepoint in KOI-8 and if it is returning 1.
|
507
|
+
*/
|
446
508
|
static size_t
|
447
|
-
|
448
|
-
return 1;
|
509
|
+
pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
510
|
+
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
|
449
511
|
}
|
450
512
|
|
513
|
+
#define PRISM_ENCODING_TABLE(name) \
|
514
|
+
static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
515
|
+
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
|
516
|
+
} \
|
517
|
+
static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
518
|
+
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
|
519
|
+
} \
|
520
|
+
static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
521
|
+
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
|
522
|
+
}
|
523
|
+
|
524
|
+
PRISM_ENCODING_TABLE(iso_8859_1)
|
525
|
+
PRISM_ENCODING_TABLE(iso_8859_2)
|
526
|
+
PRISM_ENCODING_TABLE(iso_8859_3)
|
527
|
+
PRISM_ENCODING_TABLE(iso_8859_4)
|
528
|
+
PRISM_ENCODING_TABLE(iso_8859_5)
|
529
|
+
PRISM_ENCODING_TABLE(iso_8859_6)
|
530
|
+
PRISM_ENCODING_TABLE(iso_8859_7)
|
531
|
+
PRISM_ENCODING_TABLE(iso_8859_8)
|
532
|
+
PRISM_ENCODING_TABLE(iso_8859_9)
|
533
|
+
PRISM_ENCODING_TABLE(iso_8859_10)
|
534
|
+
PRISM_ENCODING_TABLE(iso_8859_11)
|
535
|
+
PRISM_ENCODING_TABLE(iso_8859_13)
|
536
|
+
PRISM_ENCODING_TABLE(iso_8859_14)
|
537
|
+
PRISM_ENCODING_TABLE(iso_8859_15)
|
538
|
+
PRISM_ENCODING_TABLE(iso_8859_16)
|
539
|
+
PRISM_ENCODING_TABLE(koi8_r)
|
540
|
+
PRISM_ENCODING_TABLE(windows_1251)
|
541
|
+
PRISM_ENCODING_TABLE(windows_1252)
|
542
|
+
|
543
|
+
#undef PRISM_ENCODING_TABLE
|
544
|
+
|
545
|
+
/** ASCII encoding */
|
451
546
|
pm_encoding_t pm_encoding_ascii = {
|
452
547
|
.name = "ascii",
|
453
548
|
.char_width = pm_encoding_ascii_char_width,
|
@@ -457,6 +552,7 @@ pm_encoding_t pm_encoding_ascii = {
|
|
457
552
|
.multibyte = false
|
458
553
|
};
|
459
554
|
|
555
|
+
/** ASCII-8BIT encoding */
|
460
556
|
pm_encoding_t pm_encoding_ascii_8bit = {
|
461
557
|
.name = "ascii-8bit",
|
462
558
|
.char_width = pm_encoding_single_char_width,
|
@@ -466,42 +562,182 @@ pm_encoding_t pm_encoding_ascii_8bit = {
|
|
466
562
|
.multibyte = false
|
467
563
|
};
|
468
564
|
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
} \
|
479
|
-
pm_encoding_t pm_encoding_ ##i = { \
|
480
|
-
.name = s, \
|
481
|
-
.char_width = w, \
|
482
|
-
.alnum_char = pm_encoding_ ##i ## _alnum_char, \
|
483
|
-
.alpha_char = pm_encoding_ ##i ## _alpha_char, \
|
484
|
-
.isupper_char = pm_encoding_ ##i ## _isupper_char, \
|
485
|
-
.multibyte = false, \
|
486
|
-
};
|
487
|
-
|
488
|
-
PRISM_ENCODING_TABLE("iso-8859-1", iso_8859_1, pm_encoding_single_char_width)
|
489
|
-
PRISM_ENCODING_TABLE("iso-8859-2", iso_8859_2, pm_encoding_single_char_width)
|
490
|
-
PRISM_ENCODING_TABLE("iso-8859-3", iso_8859_3, pm_encoding_single_char_width)
|
491
|
-
PRISM_ENCODING_TABLE("iso-8859-4", iso_8859_4, pm_encoding_single_char_width)
|
492
|
-
PRISM_ENCODING_TABLE("iso-8859-5", iso_8859_5, pm_encoding_single_char_width)
|
493
|
-
PRISM_ENCODING_TABLE("iso-8859-6", iso_8859_6, pm_encoding_single_char_width)
|
494
|
-
PRISM_ENCODING_TABLE("iso-8859-7", iso_8859_7, pm_encoding_single_char_width)
|
495
|
-
PRISM_ENCODING_TABLE("iso-8859-8", iso_8859_8, pm_encoding_single_char_width)
|
496
|
-
PRISM_ENCODING_TABLE("iso-8859-9", iso_8859_9, pm_encoding_single_char_width)
|
497
|
-
PRISM_ENCODING_TABLE("iso-8859-10", iso_8859_10, pm_encoding_single_char_width)
|
498
|
-
PRISM_ENCODING_TABLE("iso-8859-11", iso_8859_11, pm_encoding_single_char_width)
|
499
|
-
PRISM_ENCODING_TABLE("iso-8859-13", iso_8859_13, pm_encoding_single_char_width)
|
500
|
-
PRISM_ENCODING_TABLE("iso-8859-14", iso_8859_14, pm_encoding_single_char_width)
|
501
|
-
PRISM_ENCODING_TABLE("iso-8859-15", iso_8859_15, pm_encoding_single_char_width)
|
502
|
-
PRISM_ENCODING_TABLE("iso-8859-16", iso_8859_16, pm_encoding_single_char_width)
|
503
|
-
PRISM_ENCODING_TABLE("koi8-r", koi8_r, pm_encoding_koi8_r_char_width)
|
504
|
-
PRISM_ENCODING_TABLE("windows-1251", windows_1251, pm_encoding_single_char_width)
|
505
|
-
PRISM_ENCODING_TABLE("windows-1252", windows_1252, pm_encoding_single_char_width)
|
565
|
+
/** ISO-8859-1 */
|
566
|
+
pm_encoding_t pm_encoding_iso_8859_1 = {
|
567
|
+
.name = "iso-8859-1",
|
568
|
+
.char_width = pm_encoding_single_char_width,
|
569
|
+
.alnum_char = pm_encoding_iso_8859_1_alnum_char,
|
570
|
+
.alpha_char = pm_encoding_iso_8859_1_alpha_char,
|
571
|
+
.isupper_char = pm_encoding_iso_8859_1_isupper_char,
|
572
|
+
.multibyte = false
|
573
|
+
};
|
506
574
|
|
507
|
-
|
575
|
+
/** ISO-8859-2 */
|
576
|
+
pm_encoding_t pm_encoding_iso_8859_2 = {
|
577
|
+
.name = "iso-8859-2",
|
578
|
+
.char_width = pm_encoding_single_char_width,
|
579
|
+
.alnum_char = pm_encoding_iso_8859_2_alnum_char,
|
580
|
+
.alpha_char = pm_encoding_iso_8859_2_alpha_char,
|
581
|
+
.isupper_char = pm_encoding_iso_8859_2_isupper_char,
|
582
|
+
.multibyte = false
|
583
|
+
};
|
584
|
+
|
585
|
+
/** ISO-8859-3 */
|
586
|
+
pm_encoding_t pm_encoding_iso_8859_3 = {
|
587
|
+
.name = "iso-8859-3",
|
588
|
+
.char_width = pm_encoding_single_char_width,
|
589
|
+
.alnum_char = pm_encoding_iso_8859_3_alnum_char,
|
590
|
+
.alpha_char = pm_encoding_iso_8859_3_alpha_char,
|
591
|
+
.isupper_char = pm_encoding_iso_8859_3_isupper_char,
|
592
|
+
.multibyte = false
|
593
|
+
};
|
594
|
+
|
595
|
+
/** ISO-8859-4 */
|
596
|
+
pm_encoding_t pm_encoding_iso_8859_4 = {
|
597
|
+
.name = "iso-8859-4",
|
598
|
+
.char_width = pm_encoding_single_char_width,
|
599
|
+
.alnum_char = pm_encoding_iso_8859_4_alnum_char,
|
600
|
+
.alpha_char = pm_encoding_iso_8859_4_alpha_char,
|
601
|
+
.isupper_char = pm_encoding_iso_8859_4_isupper_char,
|
602
|
+
.multibyte = false
|
603
|
+
};
|
604
|
+
|
605
|
+
/** ISO-8859-5 */
|
606
|
+
pm_encoding_t pm_encoding_iso_8859_5 = {
|
607
|
+
.name = "iso-8859-5",
|
608
|
+
.char_width = pm_encoding_single_char_width,
|
609
|
+
.alnum_char = pm_encoding_iso_8859_5_alnum_char,
|
610
|
+
.alpha_char = pm_encoding_iso_8859_5_alpha_char,
|
611
|
+
.isupper_char = pm_encoding_iso_8859_5_isupper_char,
|
612
|
+
.multibyte = false
|
613
|
+
};
|
614
|
+
|
615
|
+
/** ISO-8859-6 */
|
616
|
+
pm_encoding_t pm_encoding_iso_8859_6 = {
|
617
|
+
.name = "iso-8859-6",
|
618
|
+
.char_width = pm_encoding_single_char_width,
|
619
|
+
.alnum_char = pm_encoding_iso_8859_6_alnum_char,
|
620
|
+
.alpha_char = pm_encoding_iso_8859_6_alpha_char,
|
621
|
+
.isupper_char = pm_encoding_iso_8859_6_isupper_char,
|
622
|
+
.multibyte = false
|
623
|
+
};
|
624
|
+
|
625
|
+
/** ISO-8859-7 */
|
626
|
+
pm_encoding_t pm_encoding_iso_8859_7 = {
|
627
|
+
.name = "iso-8859-7",
|
628
|
+
.char_width = pm_encoding_single_char_width,
|
629
|
+
.alnum_char = pm_encoding_iso_8859_7_alnum_char,
|
630
|
+
.alpha_char = pm_encoding_iso_8859_7_alpha_char,
|
631
|
+
.isupper_char = pm_encoding_iso_8859_7_isupper_char,
|
632
|
+
.multibyte = false
|
633
|
+
};
|
634
|
+
|
635
|
+
/** ISO-8859-8 */
|
636
|
+
pm_encoding_t pm_encoding_iso_8859_8 = {
|
637
|
+
.name = "iso-8859-8",
|
638
|
+
.char_width = pm_encoding_single_char_width,
|
639
|
+
.alnum_char = pm_encoding_iso_8859_8_alnum_char,
|
640
|
+
.alpha_char = pm_encoding_iso_8859_8_alpha_char,
|
641
|
+
.isupper_char = pm_encoding_iso_8859_8_isupper_char,
|
642
|
+
.multibyte = false
|
643
|
+
};
|
644
|
+
|
645
|
+
/** ISO-8859-9 */
|
646
|
+
pm_encoding_t pm_encoding_iso_8859_9 = {
|
647
|
+
.name = "iso-8859-9",
|
648
|
+
.char_width = pm_encoding_single_char_width,
|
649
|
+
.alnum_char = pm_encoding_iso_8859_9_alnum_char,
|
650
|
+
.alpha_char = pm_encoding_iso_8859_9_alpha_char,
|
651
|
+
.isupper_char = pm_encoding_iso_8859_9_isupper_char,
|
652
|
+
.multibyte = false
|
653
|
+
};
|
654
|
+
|
655
|
+
/** ISO-8859-10 */
|
656
|
+
pm_encoding_t pm_encoding_iso_8859_10 = {
|
657
|
+
.name = "iso-8859-10",
|
658
|
+
.char_width = pm_encoding_single_char_width,
|
659
|
+
.alnum_char = pm_encoding_iso_8859_10_alnum_char,
|
660
|
+
.alpha_char = pm_encoding_iso_8859_10_alpha_char,
|
661
|
+
.isupper_char = pm_encoding_iso_8859_10_isupper_char,
|
662
|
+
.multibyte = false
|
663
|
+
};
|
664
|
+
|
665
|
+
/** ISO-8859-11 */
|
666
|
+
pm_encoding_t pm_encoding_iso_8859_11 = {
|
667
|
+
.name = "iso-8859-11",
|
668
|
+
.char_width = pm_encoding_single_char_width,
|
669
|
+
.alnum_char = pm_encoding_iso_8859_11_alnum_char,
|
670
|
+
.alpha_char = pm_encoding_iso_8859_11_alpha_char,
|
671
|
+
.isupper_char = pm_encoding_iso_8859_11_isupper_char,
|
672
|
+
.multibyte = false
|
673
|
+
};
|
674
|
+
|
675
|
+
/** ISO-8859-13 */
|
676
|
+
pm_encoding_t pm_encoding_iso_8859_13 = {
|
677
|
+
.name = "iso-8859-13",
|
678
|
+
.char_width = pm_encoding_single_char_width,
|
679
|
+
.alnum_char = pm_encoding_iso_8859_13_alnum_char,
|
680
|
+
.alpha_char = pm_encoding_iso_8859_13_alpha_char,
|
681
|
+
.isupper_char = pm_encoding_iso_8859_13_isupper_char,
|
682
|
+
.multibyte = false
|
683
|
+
};
|
684
|
+
|
685
|
+
/** ISO-8859-14 */
|
686
|
+
pm_encoding_t pm_encoding_iso_8859_14 = {
|
687
|
+
.name = "iso-8859-14",
|
688
|
+
.char_width = pm_encoding_single_char_width,
|
689
|
+
.alnum_char = pm_encoding_iso_8859_14_alnum_char,
|
690
|
+
.alpha_char = pm_encoding_iso_8859_14_alpha_char,
|
691
|
+
.isupper_char = pm_encoding_iso_8859_14_isupper_char,
|
692
|
+
.multibyte = false
|
693
|
+
};
|
694
|
+
|
695
|
+
/** ISO-8859-15 */
|
696
|
+
pm_encoding_t pm_encoding_iso_8859_15 = {
|
697
|
+
.name = "iso-8859-15",
|
698
|
+
.char_width = pm_encoding_single_char_width,
|
699
|
+
.alnum_char = pm_encoding_iso_8859_15_alnum_char,
|
700
|
+
.alpha_char = pm_encoding_iso_8859_15_alpha_char,
|
701
|
+
.isupper_char = pm_encoding_iso_8859_15_isupper_char,
|
702
|
+
.multibyte = false
|
703
|
+
};
|
704
|
+
|
705
|
+
/** ISO-8859-16 */
|
706
|
+
pm_encoding_t pm_encoding_iso_8859_16 = {
|
707
|
+
.name = "iso-8859-16",
|
708
|
+
.char_width = pm_encoding_single_char_width,
|
709
|
+
.alnum_char = pm_encoding_iso_8859_16_alnum_char,
|
710
|
+
.alpha_char = pm_encoding_iso_8859_16_alpha_char,
|
711
|
+
.isupper_char = pm_encoding_iso_8859_16_isupper_char,
|
712
|
+
.multibyte = false
|
713
|
+
};
|
714
|
+
|
715
|
+
/** KOI8-R */
|
716
|
+
pm_encoding_t pm_encoding_koi8_r = {
|
717
|
+
.name = "koi8-r",
|
718
|
+
.char_width = pm_encoding_koi8_r_char_width,
|
719
|
+
.alnum_char = pm_encoding_koi8_r_alnum_char,
|
720
|
+
.alpha_char = pm_encoding_koi8_r_alpha_char,
|
721
|
+
.isupper_char = pm_encoding_koi8_r_isupper_char,
|
722
|
+
.multibyte = false
|
723
|
+
};
|
724
|
+
|
725
|
+
/** Windows-1251 */
|
726
|
+
pm_encoding_t pm_encoding_windows_1251 = {
|
727
|
+
.name = "windows-1251",
|
728
|
+
.char_width = pm_encoding_single_char_width,
|
729
|
+
.alnum_char = pm_encoding_windows_1251_alnum_char,
|
730
|
+
.alpha_char = pm_encoding_windows_1251_alpha_char,
|
731
|
+
.isupper_char = pm_encoding_windows_1251_isupper_char,
|
732
|
+
.multibyte = false
|
733
|
+
};
|
734
|
+
|
735
|
+
/** Windows-1252 */
|
736
|
+
pm_encoding_t pm_encoding_windows_1252 = {
|
737
|
+
.name = "windows-1252",
|
738
|
+
.char_width = pm_encoding_single_char_width,
|
739
|
+
.alnum_char = pm_encoding_windows_1252_alnum_char,
|
740
|
+
.alpha_char = pm_encoding_windows_1252_alpha_char,
|
741
|
+
.isupper_char = pm_encoding_windows_1252_isupper_char,
|
742
|
+
.multibyte = false
|
743
|
+
};
|