prism 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/Makefile +6 -0
- data/README.md +1 -1
- data/config.yml +50 -35
- data/docs/fuzzing.md +1 -1
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +802 -770
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +464 -162
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3173 -763
- data/include/prism/defines.h +32 -9
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +118 -28
- data/include/prism/node.h +38 -13
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -200
- data/include/prism/prettyprint.h +12 -1
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +94 -16
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +126 -32
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +70 -27
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +27 -2
- data/include/prism.h +224 -31
- data/lib/prism/compiler.rb +6 -3
- data/lib/prism/debug.rb +23 -7
- data/lib/prism/dispatcher.rb +33 -18
- data/lib/prism/dsl.rb +10 -5
- data/lib/prism/ffi.rb +132 -80
- data/lib/prism/lex_compat.rb +25 -15
- data/lib/prism/mutation_compiler.rb +10 -5
- data/lib/prism/node.rb +370 -135
- data/lib/prism/node_ext.rb +1 -1
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +150 -30
- data/lib/prism/pattern.rb +11 -0
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +86 -54
- data/lib/prism/visitor.rb +10 -3
- data/lib/prism.rb +20 -2
- data/prism.gemspec +4 -2
- data/rbi/prism.rbi +104 -60
- data/rbi/prism_static.rbi +16 -2
- data/sig/prism.rbs +72 -43
- data/sig/prism_static.rbs +14 -1
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +53 -8
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +334 -321
- data/src/options.c +170 -0
- data/src/prettyprint.c +74 -47
- data/src/prism.c +1642 -856
- data/src/regexp.c +151 -95
- data/src/serialize.c +44 -20
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +45 -15
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +51 -21
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +20 -12
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +5 -3
- data/docs/prism.png +0 -0
data/src/enc/pm_tables.c
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
#include "prism/enc/pm_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
/**
|
4
|
+
* Each element of the following table contains a bitfield that indicates a
|
5
|
+
* piece of information about the corresponding ASCII character.
|
6
|
+
*/
|
5
7
|
static uint8_t pm_encoding_ascii_table[256] = {
|
6
8
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
7
9
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -22,8 +24,10 @@ static uint8_t pm_encoding_ascii_table[256] = {
|
|
22
24
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
23
25
|
};
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
+
/**
|
28
|
+
* Each element of the following table contains a bitfield that indicates a
|
29
|
+
* piece of information about the corresponding ISO-8859-1 character.
|
30
|
+
*/
|
27
31
|
static uint8_t pm_encoding_iso_8859_1_table[256] = {
|
28
32
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
29
33
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -44,8 +48,10 @@ static uint8_t pm_encoding_iso_8859_1_table[256] = {
|
|
44
48
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
45
49
|
};
|
46
50
|
|
47
|
-
|
48
|
-
|
51
|
+
/**
|
52
|
+
* Each element of the following table contains a bitfield that indicates a
|
53
|
+
* piece of information about the corresponding ISO-8859-2 character.
|
54
|
+
*/
|
49
55
|
static uint8_t pm_encoding_iso_8859_2_table[256] = {
|
50
56
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
51
57
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -66,8 +72,10 @@ static uint8_t pm_encoding_iso_8859_2_table[256] = {
|
|
66
72
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
67
73
|
};
|
68
74
|
|
69
|
-
|
70
|
-
|
75
|
+
/**
|
76
|
+
* Each element of the following table contains a bitfield that indicates a
|
77
|
+
* piece of information about the corresponding ISO-8859-3 character.
|
78
|
+
*/
|
71
79
|
static uint8_t pm_encoding_iso_8859_3_table[256] = {
|
72
80
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
73
81
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -88,8 +96,10 @@ static uint8_t pm_encoding_iso_8859_3_table[256] = {
|
|
88
96
|
0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
89
97
|
};
|
90
98
|
|
91
|
-
|
92
|
-
|
99
|
+
/**
|
100
|
+
* Each element of the following table contains a bitfield that indicates a
|
101
|
+
* piece of information about the corresponding ISO-8859-4 character.
|
102
|
+
*/
|
93
103
|
static uint8_t pm_encoding_iso_8859_4_table[256] = {
|
94
104
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
95
105
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -110,8 +120,10 @@ static uint8_t pm_encoding_iso_8859_4_table[256] = {
|
|
110
120
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
111
121
|
};
|
112
122
|
|
113
|
-
|
114
|
-
|
123
|
+
/**
|
124
|
+
* Each element of the following table contains a bitfield that indicates a
|
125
|
+
* piece of information about the corresponding ISO-8859-5 character.
|
126
|
+
*/
|
115
127
|
static uint8_t pm_encoding_iso_8859_5_table[256] = {
|
116
128
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
117
129
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -132,8 +144,10 @@ static uint8_t pm_encoding_iso_8859_5_table[256] = {
|
|
132
144
|
0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, // Fx
|
133
145
|
};
|
134
146
|
|
135
|
-
|
136
|
-
|
147
|
+
/**
|
148
|
+
* Each element of the following table contains a bitfield that indicates a
|
149
|
+
* piece of information about the corresponding ISO-8859-6 character.
|
150
|
+
*/
|
137
151
|
static uint8_t pm_encoding_iso_8859_6_table[256] = {
|
138
152
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
139
153
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -154,8 +168,10 @@ static uint8_t pm_encoding_iso_8859_6_table[256] = {
|
|
154
168
|
3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
155
169
|
};
|
156
170
|
|
157
|
-
|
158
|
-
|
171
|
+
/**
|
172
|
+
* Each element of the following table contains a bitfield that indicates a
|
173
|
+
* piece of information about the corresponding ISO-8859-7 character.
|
174
|
+
*/
|
159
175
|
static uint8_t pm_encoding_iso_8859_7_table[256] = {
|
160
176
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
161
177
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -176,8 +192,10 @@ static uint8_t pm_encoding_iso_8859_7_table[256] = {
|
|
176
192
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
177
193
|
};
|
178
194
|
|
179
|
-
|
180
|
-
|
195
|
+
/**
|
196
|
+
* Each element of the following table contains a bitfield that indicates a
|
197
|
+
* piece of information about the corresponding ISO-8859-8 character.
|
198
|
+
*/
|
181
199
|
static uint8_t pm_encoding_iso_8859_8_table[256] = {
|
182
200
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
183
201
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -198,8 +216,10 @@ static uint8_t pm_encoding_iso_8859_8_table[256] = {
|
|
198
216
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Fx
|
199
217
|
};
|
200
218
|
|
201
|
-
|
202
|
-
|
219
|
+
/**
|
220
|
+
* Each element of the following table contains a bitfield that indicates a
|
221
|
+
* piece of information about the corresponding ISO-8859-9 character.
|
222
|
+
*/
|
203
223
|
static uint8_t pm_encoding_iso_8859_9_table[256] = {
|
204
224
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
205
225
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -220,8 +240,10 @@ static uint8_t pm_encoding_iso_8859_9_table[256] = {
|
|
220
240
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
221
241
|
};
|
222
242
|
|
223
|
-
|
224
|
-
|
243
|
+
/**
|
244
|
+
* Each element of the following table contains a bitfield that indicates a
|
245
|
+
* piece of information about the corresponding ISO-8859-10 character.
|
246
|
+
*/
|
225
247
|
static uint8_t pm_encoding_iso_8859_10_table[256] = {
|
226
248
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
227
249
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -242,8 +264,10 @@ static uint8_t pm_encoding_iso_8859_10_table[256] = {
|
|
242
264
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
243
265
|
};
|
244
266
|
|
245
|
-
|
246
|
-
|
267
|
+
/**
|
268
|
+
* Each element of the following table contains a bitfield that indicates a
|
269
|
+
* piece of information about the corresponding ISO-8859-11 character.
|
270
|
+
*/
|
247
271
|
static uint8_t pm_encoding_iso_8859_11_table[256] = {
|
248
272
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
249
273
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -264,8 +288,10 @@ static uint8_t pm_encoding_iso_8859_11_table[256] = {
|
|
264
288
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, // Fx
|
265
289
|
};
|
266
290
|
|
267
|
-
|
268
|
-
|
291
|
+
/**
|
292
|
+
* Each element of the following table contains a bitfield that indicates a
|
293
|
+
* piece of information about the corresponding ISO-8859-13 character.
|
294
|
+
*/
|
269
295
|
static uint8_t pm_encoding_iso_8859_13_table[256] = {
|
270
296
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
271
297
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -286,8 +312,10 @@ static uint8_t pm_encoding_iso_8859_13_table[256] = {
|
|
286
312
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
|
287
313
|
};
|
288
314
|
|
289
|
-
|
290
|
-
|
315
|
+
/**
|
316
|
+
* Each element of the following table contains a bitfield that indicates a
|
317
|
+
* piece of information about the corresponding ISO-8859-14 character.
|
318
|
+
*/
|
291
319
|
static uint8_t pm_encoding_iso_8859_14_table[256] = {
|
292
320
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
293
321
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -308,8 +336,10 @@ static uint8_t pm_encoding_iso_8859_14_table[256] = {
|
|
308
336
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
309
337
|
};
|
310
338
|
|
311
|
-
|
312
|
-
|
339
|
+
/**
|
340
|
+
* Each element of the following table contains a bitfield that indicates a
|
341
|
+
* piece of information about the corresponding ISO-8859-15 character.
|
342
|
+
*/
|
313
343
|
static uint8_t pm_encoding_iso_8859_15_table[256] = {
|
314
344
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
315
345
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -330,8 +360,10 @@ static uint8_t pm_encoding_iso_8859_15_table[256] = {
|
|
330
360
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
331
361
|
};
|
332
362
|
|
333
|
-
|
334
|
-
|
363
|
+
/**
|
364
|
+
* Each element of the following table contains a bitfield that indicates a
|
365
|
+
* piece of information about the corresponding ISO-8859-16 character.
|
366
|
+
*/
|
335
367
|
static uint8_t pm_encoding_iso_8859_16_table[256] = {
|
336
368
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
337
369
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -352,8 +384,10 @@ static uint8_t pm_encoding_iso_8859_16_table[256] = {
|
|
352
384
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
353
385
|
};
|
354
386
|
|
355
|
-
|
356
|
-
|
387
|
+
/**
|
388
|
+
* Each element of the following table contains a bitfield that indicates a
|
389
|
+
* piece of information about the corresponding KOI8-R character.
|
390
|
+
*/
|
357
391
|
static uint8_t pm_encoding_koi8_r_table[256] = {
|
358
392
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
359
393
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -374,8 +408,10 @@ static uint8_t pm_encoding_koi8_r_table[256] = {
|
|
374
408
|
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx
|
375
409
|
};
|
376
410
|
|
377
|
-
|
378
|
-
|
411
|
+
/**
|
412
|
+
* Each element of the following table contains a bitfield that indicates a
|
413
|
+
* piece of information about the corresponding windows-1251 character.
|
414
|
+
*/
|
379
415
|
static uint8_t pm_encoding_windows_1251_table[256] = {
|
380
416
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
381
417
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -396,8 +432,10 @@ static uint8_t pm_encoding_windows_1251_table[256] = {
|
|
396
432
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
397
433
|
};
|
398
434
|
|
399
|
-
|
400
|
-
|
435
|
+
/**
|
436
|
+
* Each element of the following table contains a bitfield that indicates a
|
437
|
+
* piece of information about the corresponding windows-1252 character.
|
438
|
+
*/
|
401
439
|
static uint8_t pm_encoding_windows_1252_table[256] = {
|
402
440
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
403
441
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -418,36 +456,93 @@ static uint8_t pm_encoding_windows_1252_table[256] = {
|
|
418
456
|
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
|
419
457
|
};
|
420
458
|
|
459
|
+
/**
|
460
|
+
* Returns the size of the next character in the ASCII encoding. This basically
|
461
|
+
* means that if the top bit is not set, the character is 1 byte long.
|
462
|
+
*/
|
421
463
|
static size_t
|
422
464
|
pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
423
465
|
return *b < 0x80 ? 1 : 0;
|
424
466
|
}
|
425
467
|
|
468
|
+
/**
|
469
|
+
* Return the size of the next character in the ASCII encoding if it is an
|
470
|
+
* alphabetical character.
|
471
|
+
*/
|
426
472
|
size_t
|
427
473
|
pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
428
474
|
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
|
429
475
|
}
|
430
476
|
|
477
|
+
/**
|
478
|
+
* Return the size of the next character in the ASCII encoding if it is an
|
479
|
+
* alphanumeric character.
|
480
|
+
*/
|
431
481
|
size_t
|
432
482
|
pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
433
483
|
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
|
434
484
|
}
|
435
485
|
|
486
|
+
/**
|
487
|
+
* Return true if the next character in the ASCII encoding if it is an uppercase
|
488
|
+
* character.
|
489
|
+
*/
|
436
490
|
bool
|
437
491
|
pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
438
492
|
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
|
439
493
|
}
|
440
494
|
|
495
|
+
/**
|
496
|
+
* For a lot of encodings the default is that they are a single byte long no
|
497
|
+
* matter what the codepoint, so this function is shared between them.
|
498
|
+
*/
|
441
499
|
static size_t
|
442
|
-
|
443
|
-
return
|
500
|
+
pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
501
|
+
return 1;
|
444
502
|
}
|
445
503
|
|
504
|
+
/**
|
505
|
+
* Returns the size of the next character in the KOI-8 encoding. This means
|
506
|
+
* checking if it's a valid codepoint in KOI-8 and if it is returning 1.
|
507
|
+
*/
|
446
508
|
static size_t
|
447
|
-
|
448
|
-
return 1;
|
509
|
+
pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
510
|
+
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
|
449
511
|
}
|
450
512
|
|
513
|
+
#define PRISM_ENCODING_TABLE(name) \
|
514
|
+
static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
515
|
+
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
|
516
|
+
} \
|
517
|
+
static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
518
|
+
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
|
519
|
+
} \
|
520
|
+
static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
521
|
+
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
|
522
|
+
}
|
523
|
+
|
524
|
+
PRISM_ENCODING_TABLE(iso_8859_1)
|
525
|
+
PRISM_ENCODING_TABLE(iso_8859_2)
|
526
|
+
PRISM_ENCODING_TABLE(iso_8859_3)
|
527
|
+
PRISM_ENCODING_TABLE(iso_8859_4)
|
528
|
+
PRISM_ENCODING_TABLE(iso_8859_5)
|
529
|
+
PRISM_ENCODING_TABLE(iso_8859_6)
|
530
|
+
PRISM_ENCODING_TABLE(iso_8859_7)
|
531
|
+
PRISM_ENCODING_TABLE(iso_8859_8)
|
532
|
+
PRISM_ENCODING_TABLE(iso_8859_9)
|
533
|
+
PRISM_ENCODING_TABLE(iso_8859_10)
|
534
|
+
PRISM_ENCODING_TABLE(iso_8859_11)
|
535
|
+
PRISM_ENCODING_TABLE(iso_8859_13)
|
536
|
+
PRISM_ENCODING_TABLE(iso_8859_14)
|
537
|
+
PRISM_ENCODING_TABLE(iso_8859_15)
|
538
|
+
PRISM_ENCODING_TABLE(iso_8859_16)
|
539
|
+
PRISM_ENCODING_TABLE(koi8_r)
|
540
|
+
PRISM_ENCODING_TABLE(windows_1251)
|
541
|
+
PRISM_ENCODING_TABLE(windows_1252)
|
542
|
+
|
543
|
+
#undef PRISM_ENCODING_TABLE
|
544
|
+
|
545
|
+
/** ASCII encoding */
|
451
546
|
pm_encoding_t pm_encoding_ascii = {
|
452
547
|
.name = "ascii",
|
453
548
|
.char_width = pm_encoding_ascii_char_width,
|
@@ -457,6 +552,7 @@ pm_encoding_t pm_encoding_ascii = {
|
|
457
552
|
.multibyte = false
|
458
553
|
};
|
459
554
|
|
555
|
+
/** ASCII-8BIT encoding */
|
460
556
|
pm_encoding_t pm_encoding_ascii_8bit = {
|
461
557
|
.name = "ascii-8bit",
|
462
558
|
.char_width = pm_encoding_single_char_width,
|
@@ -466,42 +562,182 @@ pm_encoding_t pm_encoding_ascii_8bit = {
|
|
466
562
|
.multibyte = false
|
467
563
|
};
|
468
564
|
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
} \
|
479
|
-
pm_encoding_t pm_encoding_ ##i = { \
|
480
|
-
.name = s, \
|
481
|
-
.char_width = w, \
|
482
|
-
.alnum_char = pm_encoding_ ##i ## _alnum_char, \
|
483
|
-
.alpha_char = pm_encoding_ ##i ## _alpha_char, \
|
484
|
-
.isupper_char = pm_encoding_ ##i ## _isupper_char, \
|
485
|
-
.multibyte = false, \
|
486
|
-
};
|
487
|
-
|
488
|
-
PRISM_ENCODING_TABLE("iso-8859-1", iso_8859_1, pm_encoding_single_char_width)
|
489
|
-
PRISM_ENCODING_TABLE("iso-8859-2", iso_8859_2, pm_encoding_single_char_width)
|
490
|
-
PRISM_ENCODING_TABLE("iso-8859-3", iso_8859_3, pm_encoding_single_char_width)
|
491
|
-
PRISM_ENCODING_TABLE("iso-8859-4", iso_8859_4, pm_encoding_single_char_width)
|
492
|
-
PRISM_ENCODING_TABLE("iso-8859-5", iso_8859_5, pm_encoding_single_char_width)
|
493
|
-
PRISM_ENCODING_TABLE("iso-8859-6", iso_8859_6, pm_encoding_single_char_width)
|
494
|
-
PRISM_ENCODING_TABLE("iso-8859-7", iso_8859_7, pm_encoding_single_char_width)
|
495
|
-
PRISM_ENCODING_TABLE("iso-8859-8", iso_8859_8, pm_encoding_single_char_width)
|
496
|
-
PRISM_ENCODING_TABLE("iso-8859-9", iso_8859_9, pm_encoding_single_char_width)
|
497
|
-
PRISM_ENCODING_TABLE("iso-8859-10", iso_8859_10, pm_encoding_single_char_width)
|
498
|
-
PRISM_ENCODING_TABLE("iso-8859-11", iso_8859_11, pm_encoding_single_char_width)
|
499
|
-
PRISM_ENCODING_TABLE("iso-8859-13", iso_8859_13, pm_encoding_single_char_width)
|
500
|
-
PRISM_ENCODING_TABLE("iso-8859-14", iso_8859_14, pm_encoding_single_char_width)
|
501
|
-
PRISM_ENCODING_TABLE("iso-8859-15", iso_8859_15, pm_encoding_single_char_width)
|
502
|
-
PRISM_ENCODING_TABLE("iso-8859-16", iso_8859_16, pm_encoding_single_char_width)
|
503
|
-
PRISM_ENCODING_TABLE("koi8-r", koi8_r, pm_encoding_koi8_r_char_width)
|
504
|
-
PRISM_ENCODING_TABLE("windows-1251", windows_1251, pm_encoding_single_char_width)
|
505
|
-
PRISM_ENCODING_TABLE("windows-1252", windows_1252, pm_encoding_single_char_width)
|
565
|
+
/** ISO-8859-1 */
|
566
|
+
pm_encoding_t pm_encoding_iso_8859_1 = {
|
567
|
+
.name = "iso-8859-1",
|
568
|
+
.char_width = pm_encoding_single_char_width,
|
569
|
+
.alnum_char = pm_encoding_iso_8859_1_alnum_char,
|
570
|
+
.alpha_char = pm_encoding_iso_8859_1_alpha_char,
|
571
|
+
.isupper_char = pm_encoding_iso_8859_1_isupper_char,
|
572
|
+
.multibyte = false
|
573
|
+
};
|
506
574
|
|
507
|
-
|
575
|
+
/** ISO-8859-2 */
|
576
|
+
pm_encoding_t pm_encoding_iso_8859_2 = {
|
577
|
+
.name = "iso-8859-2",
|
578
|
+
.char_width = pm_encoding_single_char_width,
|
579
|
+
.alnum_char = pm_encoding_iso_8859_2_alnum_char,
|
580
|
+
.alpha_char = pm_encoding_iso_8859_2_alpha_char,
|
581
|
+
.isupper_char = pm_encoding_iso_8859_2_isupper_char,
|
582
|
+
.multibyte = false
|
583
|
+
};
|
584
|
+
|
585
|
+
/** ISO-8859-3 */
|
586
|
+
pm_encoding_t pm_encoding_iso_8859_3 = {
|
587
|
+
.name = "iso-8859-3",
|
588
|
+
.char_width = pm_encoding_single_char_width,
|
589
|
+
.alnum_char = pm_encoding_iso_8859_3_alnum_char,
|
590
|
+
.alpha_char = pm_encoding_iso_8859_3_alpha_char,
|
591
|
+
.isupper_char = pm_encoding_iso_8859_3_isupper_char,
|
592
|
+
.multibyte = false
|
593
|
+
};
|
594
|
+
|
595
|
+
/** ISO-8859-4 */
|
596
|
+
pm_encoding_t pm_encoding_iso_8859_4 = {
|
597
|
+
.name = "iso-8859-4",
|
598
|
+
.char_width = pm_encoding_single_char_width,
|
599
|
+
.alnum_char = pm_encoding_iso_8859_4_alnum_char,
|
600
|
+
.alpha_char = pm_encoding_iso_8859_4_alpha_char,
|
601
|
+
.isupper_char = pm_encoding_iso_8859_4_isupper_char,
|
602
|
+
.multibyte = false
|
603
|
+
};
|
604
|
+
|
605
|
+
/** ISO-8859-5 */
|
606
|
+
pm_encoding_t pm_encoding_iso_8859_5 = {
|
607
|
+
.name = "iso-8859-5",
|
608
|
+
.char_width = pm_encoding_single_char_width,
|
609
|
+
.alnum_char = pm_encoding_iso_8859_5_alnum_char,
|
610
|
+
.alpha_char = pm_encoding_iso_8859_5_alpha_char,
|
611
|
+
.isupper_char = pm_encoding_iso_8859_5_isupper_char,
|
612
|
+
.multibyte = false
|
613
|
+
};
|
614
|
+
|
615
|
+
/** ISO-8859-6 */
|
616
|
+
pm_encoding_t pm_encoding_iso_8859_6 = {
|
617
|
+
.name = "iso-8859-6",
|
618
|
+
.char_width = pm_encoding_single_char_width,
|
619
|
+
.alnum_char = pm_encoding_iso_8859_6_alnum_char,
|
620
|
+
.alpha_char = pm_encoding_iso_8859_6_alpha_char,
|
621
|
+
.isupper_char = pm_encoding_iso_8859_6_isupper_char,
|
622
|
+
.multibyte = false
|
623
|
+
};
|
624
|
+
|
625
|
+
/** ISO-8859-7 */
|
626
|
+
pm_encoding_t pm_encoding_iso_8859_7 = {
|
627
|
+
.name = "iso-8859-7",
|
628
|
+
.char_width = pm_encoding_single_char_width,
|
629
|
+
.alnum_char = pm_encoding_iso_8859_7_alnum_char,
|
630
|
+
.alpha_char = pm_encoding_iso_8859_7_alpha_char,
|
631
|
+
.isupper_char = pm_encoding_iso_8859_7_isupper_char,
|
632
|
+
.multibyte = false
|
633
|
+
};
|
634
|
+
|
635
|
+
/** ISO-8859-8 */
|
636
|
+
pm_encoding_t pm_encoding_iso_8859_8 = {
|
637
|
+
.name = "iso-8859-8",
|
638
|
+
.char_width = pm_encoding_single_char_width,
|
639
|
+
.alnum_char = pm_encoding_iso_8859_8_alnum_char,
|
640
|
+
.alpha_char = pm_encoding_iso_8859_8_alpha_char,
|
641
|
+
.isupper_char = pm_encoding_iso_8859_8_isupper_char,
|
642
|
+
.multibyte = false
|
643
|
+
};
|
644
|
+
|
645
|
+
/** ISO-8859-9 */
|
646
|
+
pm_encoding_t pm_encoding_iso_8859_9 = {
|
647
|
+
.name = "iso-8859-9",
|
648
|
+
.char_width = pm_encoding_single_char_width,
|
649
|
+
.alnum_char = pm_encoding_iso_8859_9_alnum_char,
|
650
|
+
.alpha_char = pm_encoding_iso_8859_9_alpha_char,
|
651
|
+
.isupper_char = pm_encoding_iso_8859_9_isupper_char,
|
652
|
+
.multibyte = false
|
653
|
+
};
|
654
|
+
|
655
|
+
/** ISO-8859-10 */
|
656
|
+
pm_encoding_t pm_encoding_iso_8859_10 = {
|
657
|
+
.name = "iso-8859-10",
|
658
|
+
.char_width = pm_encoding_single_char_width,
|
659
|
+
.alnum_char = pm_encoding_iso_8859_10_alnum_char,
|
660
|
+
.alpha_char = pm_encoding_iso_8859_10_alpha_char,
|
661
|
+
.isupper_char = pm_encoding_iso_8859_10_isupper_char,
|
662
|
+
.multibyte = false
|
663
|
+
};
|
664
|
+
|
665
|
+
/** ISO-8859-11 */
|
666
|
+
pm_encoding_t pm_encoding_iso_8859_11 = {
|
667
|
+
.name = "iso-8859-11",
|
668
|
+
.char_width = pm_encoding_single_char_width,
|
669
|
+
.alnum_char = pm_encoding_iso_8859_11_alnum_char,
|
670
|
+
.alpha_char = pm_encoding_iso_8859_11_alpha_char,
|
671
|
+
.isupper_char = pm_encoding_iso_8859_11_isupper_char,
|
672
|
+
.multibyte = false
|
673
|
+
};
|
674
|
+
|
675
|
+
/** ISO-8859-13 */
|
676
|
+
pm_encoding_t pm_encoding_iso_8859_13 = {
|
677
|
+
.name = "iso-8859-13",
|
678
|
+
.char_width = pm_encoding_single_char_width,
|
679
|
+
.alnum_char = pm_encoding_iso_8859_13_alnum_char,
|
680
|
+
.alpha_char = pm_encoding_iso_8859_13_alpha_char,
|
681
|
+
.isupper_char = pm_encoding_iso_8859_13_isupper_char,
|
682
|
+
.multibyte = false
|
683
|
+
};
|
684
|
+
|
685
|
+
/** ISO-8859-14 */
|
686
|
+
pm_encoding_t pm_encoding_iso_8859_14 = {
|
687
|
+
.name = "iso-8859-14",
|
688
|
+
.char_width = pm_encoding_single_char_width,
|
689
|
+
.alnum_char = pm_encoding_iso_8859_14_alnum_char,
|
690
|
+
.alpha_char = pm_encoding_iso_8859_14_alpha_char,
|
691
|
+
.isupper_char = pm_encoding_iso_8859_14_isupper_char,
|
692
|
+
.multibyte = false
|
693
|
+
};
|
694
|
+
|
695
|
+
/** ISO-8859-15 */
|
696
|
+
pm_encoding_t pm_encoding_iso_8859_15 = {
|
697
|
+
.name = "iso-8859-15",
|
698
|
+
.char_width = pm_encoding_single_char_width,
|
699
|
+
.alnum_char = pm_encoding_iso_8859_15_alnum_char,
|
700
|
+
.alpha_char = pm_encoding_iso_8859_15_alpha_char,
|
701
|
+
.isupper_char = pm_encoding_iso_8859_15_isupper_char,
|
702
|
+
.multibyte = false
|
703
|
+
};
|
704
|
+
|
705
|
+
/** ISO-8859-16 */
|
706
|
+
pm_encoding_t pm_encoding_iso_8859_16 = {
|
707
|
+
.name = "iso-8859-16",
|
708
|
+
.char_width = pm_encoding_single_char_width,
|
709
|
+
.alnum_char = pm_encoding_iso_8859_16_alnum_char,
|
710
|
+
.alpha_char = pm_encoding_iso_8859_16_alpha_char,
|
711
|
+
.isupper_char = pm_encoding_iso_8859_16_isupper_char,
|
712
|
+
.multibyte = false
|
713
|
+
};
|
714
|
+
|
715
|
+
/** KOI8-R */
|
716
|
+
pm_encoding_t pm_encoding_koi8_r = {
|
717
|
+
.name = "koi8-r",
|
718
|
+
.char_width = pm_encoding_koi8_r_char_width,
|
719
|
+
.alnum_char = pm_encoding_koi8_r_alnum_char,
|
720
|
+
.alpha_char = pm_encoding_koi8_r_alpha_char,
|
721
|
+
.isupper_char = pm_encoding_koi8_r_isupper_char,
|
722
|
+
.multibyte = false
|
723
|
+
};
|
724
|
+
|
725
|
+
/** Windows-1251 */
|
726
|
+
pm_encoding_t pm_encoding_windows_1251 = {
|
727
|
+
.name = "windows-1251",
|
728
|
+
.char_width = pm_encoding_single_char_width,
|
729
|
+
.alnum_char = pm_encoding_windows_1251_alnum_char,
|
730
|
+
.alpha_char = pm_encoding_windows_1251_alpha_char,
|
731
|
+
.isupper_char = pm_encoding_windows_1251_isupper_char,
|
732
|
+
.multibyte = false
|
733
|
+
};
|
734
|
+
|
735
|
+
/** Windows-1252 */
|
736
|
+
pm_encoding_t pm_encoding_windows_1252 = {
|
737
|
+
.name = "windows-1252",
|
738
|
+
.char_width = pm_encoding_single_char_width,
|
739
|
+
.alnum_char = pm_encoding_windows_1252_alnum_char,
|
740
|
+
.alpha_char = pm_encoding_windows_1252_alpha_char,
|
741
|
+
.isupper_char = pm_encoding_windows_1252_isupper_char,
|
742
|
+
.multibyte = false
|
743
|
+
};
|