prism 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
data/src/enc/pm_tables.c CHANGED
@@ -1,7 +1,9 @@
1
1
  #include "prism/enc/pm_encoding.h"
2
2
 
3
- // Each element of the following table contains a bitfield that indicates a
4
- // piece of information about the corresponding ASCII character.
3
+ /**
4
+ * Each element of the following table contains a bitfield that indicates a
5
+ * piece of information about the corresponding ASCII character.
6
+ */
5
7
  static uint8_t pm_encoding_ascii_table[256] = {
6
8
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
7
9
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -22,8 +24,10 @@ static uint8_t pm_encoding_ascii_table[256] = {
22
24
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
23
25
  };
24
26
 
25
- // Each element of the following table contains a bitfield that indicates a
26
- // piece of information about the corresponding ISO-8859-1 character.
27
+ /**
28
+ * Each element of the following table contains a bitfield that indicates a
29
+ * piece of information about the corresponding ISO-8859-1 character.
30
+ */
27
31
  static uint8_t pm_encoding_iso_8859_1_table[256] = {
28
32
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
29
33
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -44,8 +48,10 @@ static uint8_t pm_encoding_iso_8859_1_table[256] = {
44
48
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
45
49
  };
46
50
 
47
- // Each element of the following table contains a bitfield that indicates a
48
- // piece of information about the corresponding ISO-8859-2 character.
51
+ /**
52
+ * Each element of the following table contains a bitfield that indicates a
53
+ * piece of information about the corresponding ISO-8859-2 character.
54
+ */
49
55
  static uint8_t pm_encoding_iso_8859_2_table[256] = {
50
56
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
51
57
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -66,8 +72,10 @@ static uint8_t pm_encoding_iso_8859_2_table[256] = {
66
72
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
67
73
  };
68
74
 
69
- // Each element of the following table contains a bitfield that indicates a
70
- // piece of information about the corresponding ISO-8859-3 character.
75
+ /**
76
+ * Each element of the following table contains a bitfield that indicates a
77
+ * piece of information about the corresponding ISO-8859-3 character.
78
+ */
71
79
  static uint8_t pm_encoding_iso_8859_3_table[256] = {
72
80
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
73
81
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -88,8 +96,10 @@ static uint8_t pm_encoding_iso_8859_3_table[256] = {
88
96
  0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
89
97
  };
90
98
 
91
- // Each element of the following table contains a bitfield that indicates a
92
- // piece of information about the corresponding ISO-8859-4 character.
99
+ /**
100
+ * Each element of the following table contains a bitfield that indicates a
101
+ * piece of information about the corresponding ISO-8859-4 character.
102
+ */
93
103
  static uint8_t pm_encoding_iso_8859_4_table[256] = {
94
104
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
95
105
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -110,8 +120,10 @@ static uint8_t pm_encoding_iso_8859_4_table[256] = {
110
120
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
111
121
  };
112
122
 
113
- // Each element of the following table contains a bitfield that indicates a
114
- // piece of information about the corresponding ISO-8859-5 character.
123
+ /**
124
+ * Each element of the following table contains a bitfield that indicates a
125
+ * piece of information about the corresponding ISO-8859-5 character.
126
+ */
115
127
  static uint8_t pm_encoding_iso_8859_5_table[256] = {
116
128
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
117
129
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -132,8 +144,10 @@ static uint8_t pm_encoding_iso_8859_5_table[256] = {
132
144
  0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, // Fx
133
145
  };
134
146
 
135
- // Each element of the following table contains a bitfield that indicates a
136
- // piece of information about the corresponding ISO-8859-6 character.
147
+ /**
148
+ * Each element of the following table contains a bitfield that indicates a
149
+ * piece of information about the corresponding ISO-8859-6 character.
150
+ */
137
151
  static uint8_t pm_encoding_iso_8859_6_table[256] = {
138
152
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
139
153
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -154,8 +168,10 @@ static uint8_t pm_encoding_iso_8859_6_table[256] = {
154
168
  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
155
169
  };
156
170
 
157
- // Each element of the following table contains a bitfield that indicates a
158
- // piece of information about the corresponding ISO-8859-7 character.
171
+ /**
172
+ * Each element of the following table contains a bitfield that indicates a
173
+ * piece of information about the corresponding ISO-8859-7 character.
174
+ */
159
175
  static uint8_t pm_encoding_iso_8859_7_table[256] = {
160
176
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
161
177
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -176,8 +192,10 @@ static uint8_t pm_encoding_iso_8859_7_table[256] = {
176
192
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
177
193
  };
178
194
 
179
- // Each element of the following table contains a bitfield that indicates a
180
- // piece of information about the corresponding ISO-8859-8 character.
195
+ /**
196
+ * Each element of the following table contains a bitfield that indicates a
197
+ * piece of information about the corresponding ISO-8859-8 character.
198
+ */
181
199
  static uint8_t pm_encoding_iso_8859_8_table[256] = {
182
200
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
183
201
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -198,8 +216,10 @@ static uint8_t pm_encoding_iso_8859_8_table[256] = {
198
216
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Fx
199
217
  };
200
218
 
201
- // Each element of the following table contains a bitfield that indicates a
202
- // piece of information about the corresponding ISO-8859-9 character.
219
+ /**
220
+ * Each element of the following table contains a bitfield that indicates a
221
+ * piece of information about the corresponding ISO-8859-9 character.
222
+ */
203
223
  static uint8_t pm_encoding_iso_8859_9_table[256] = {
204
224
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
205
225
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -220,8 +240,10 @@ static uint8_t pm_encoding_iso_8859_9_table[256] = {
220
240
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
221
241
  };
222
242
 
223
- // Each element of the following table contains a bitfield that indicates a
224
- // piece of information about the corresponding ISO-8859-10 character.
243
+ /**
244
+ * Each element of the following table contains a bitfield that indicates a
245
+ * piece of information about the corresponding ISO-8859-10 character.
246
+ */
225
247
  static uint8_t pm_encoding_iso_8859_10_table[256] = {
226
248
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
227
249
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -242,8 +264,10 @@ static uint8_t pm_encoding_iso_8859_10_table[256] = {
242
264
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
243
265
  };
244
266
 
245
- // Each element of the following table contains a bitfield that indicates a
246
- // piece of information about the corresponding ISO-8859-11 character.
267
+ /**
268
+ * Each element of the following table contains a bitfield that indicates a
269
+ * piece of information about the corresponding ISO-8859-11 character.
270
+ */
247
271
  static uint8_t pm_encoding_iso_8859_11_table[256] = {
248
272
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
249
273
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -264,8 +288,10 @@ static uint8_t pm_encoding_iso_8859_11_table[256] = {
264
288
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, // Fx
265
289
  };
266
290
 
267
- // Each element of the following table contains a bitfield that indicates a
268
- // piece of information about the corresponding ISO-8859-13 character.
291
+ /**
292
+ * Each element of the following table contains a bitfield that indicates a
293
+ * piece of information about the corresponding ISO-8859-13 character.
294
+ */
269
295
  static uint8_t pm_encoding_iso_8859_13_table[256] = {
270
296
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
271
297
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -286,8 +312,10 @@ static uint8_t pm_encoding_iso_8859_13_table[256] = {
286
312
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
287
313
  };
288
314
 
289
- // Each element of the following table contains a bitfield that indicates a
290
- // piece of information about the corresponding ISO-8859-14 character.
315
+ /**
316
+ * Each element of the following table contains a bitfield that indicates a
317
+ * piece of information about the corresponding ISO-8859-14 character.
318
+ */
291
319
  static uint8_t pm_encoding_iso_8859_14_table[256] = {
292
320
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
293
321
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -308,8 +336,10 @@ static uint8_t pm_encoding_iso_8859_14_table[256] = {
308
336
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
309
337
  };
310
338
 
311
- // Each element of the following table contains a bitfield that indicates a
312
- // piece of information about the corresponding ISO-8859-15 character.
339
+ /**
340
+ * Each element of the following table contains a bitfield that indicates a
341
+ * piece of information about the corresponding ISO-8859-15 character.
342
+ */
313
343
  static uint8_t pm_encoding_iso_8859_15_table[256] = {
314
344
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
315
345
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -330,8 +360,10 @@ static uint8_t pm_encoding_iso_8859_15_table[256] = {
330
360
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
331
361
  };
332
362
 
333
- // Each element of the following table contains a bitfield that indicates a
334
- // piece of information about the corresponding ISO-8859-16 character.
363
+ /**
364
+ * Each element of the following table contains a bitfield that indicates a
365
+ * piece of information about the corresponding ISO-8859-16 character.
366
+ */
335
367
  static uint8_t pm_encoding_iso_8859_16_table[256] = {
336
368
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
337
369
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -352,8 +384,10 @@ static uint8_t pm_encoding_iso_8859_16_table[256] = {
352
384
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
353
385
  };
354
386
 
355
- // Each element of the following table contains a bitfield that indicates a
356
- // piece of information about the corresponding KOI8-R character.
387
+ /**
388
+ * Each element of the following table contains a bitfield that indicates a
389
+ * piece of information about the corresponding KOI8-R character.
390
+ */
357
391
  static uint8_t pm_encoding_koi8_r_table[256] = {
358
392
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
359
393
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -374,8 +408,10 @@ static uint8_t pm_encoding_koi8_r_table[256] = {
374
408
  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx
375
409
  };
376
410
 
377
- // Each element of the following table contains a bitfield that indicates a
378
- // piece of information about the corresponding windows-1251 character.
411
+ /**
412
+ * Each element of the following table contains a bitfield that indicates a
413
+ * piece of information about the corresponding windows-1251 character.
414
+ */
379
415
  static uint8_t pm_encoding_windows_1251_table[256] = {
380
416
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
381
417
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -396,8 +432,10 @@ static uint8_t pm_encoding_windows_1251_table[256] = {
396
432
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
397
433
  };
398
434
 
399
- // Each element of the following table contains a bitfield that indicates a
400
- // piece of information about the corresponding windows-1252 character.
435
+ /**
436
+ * Each element of the following table contains a bitfield that indicates a
437
+ * piece of information about the corresponding windows-1252 character.
438
+ */
401
439
  static uint8_t pm_encoding_windows_1252_table[256] = {
402
440
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
403
441
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -418,36 +456,93 @@ static uint8_t pm_encoding_windows_1252_table[256] = {
418
456
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
419
457
  };
420
458
 
459
+ /**
460
+ * Returns the size of the next character in the ASCII encoding. This basically
461
+ * means that if the top bit is not set, the character is 1 byte long.
462
+ */
421
463
  static size_t
422
464
  pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
465
  return *b < 0x80 ? 1 : 0;
424
466
  }
425
467
 
468
+ /**
469
+ * Return the size of the next character in the ASCII encoding if it is an
470
+ * alphabetical character.
471
+ */
426
472
  size_t
427
473
  pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
428
474
  return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
429
475
  }
430
476
 
477
+ /**
478
+ * Return the size of the next character in the ASCII encoding if it is an
479
+ * alphanumeric character.
480
+ */
431
481
  size_t
432
482
  pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
433
483
  return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
434
484
  }
435
485
 
486
+ /**
487
+ * Return true if the next character in the ASCII encoding if it is an uppercase
488
+ * character.
489
+ */
436
490
  bool
437
491
  pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
438
492
  return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
439
493
  }
440
494
 
495
+ /**
496
+ * For a lot of encodings the default is that they are a single byte long no
497
+ * matter what the codepoint, so this function is shared between them.
498
+ */
441
499
  static size_t
442
- pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
443
- return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
500
+ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
501
+ return 1;
444
502
  }
445
503
 
504
+ /**
505
+ * Returns the size of the next character in the KOI-8 encoding. This means
506
+ * checking if it's a valid codepoint in KOI-8 and if it is returning 1.
507
+ */
446
508
  static size_t
447
- pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
448
- return 1;
509
+ pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
510
+ return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
449
511
  }
450
512
 
513
+ #define PRISM_ENCODING_TABLE(name) \
514
+ static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
515
+ return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
516
+ } \
517
+ static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
518
+ return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
519
+ } \
520
+ static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
521
+ return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
522
+ }
523
+
524
+ PRISM_ENCODING_TABLE(iso_8859_1)
525
+ PRISM_ENCODING_TABLE(iso_8859_2)
526
+ PRISM_ENCODING_TABLE(iso_8859_3)
527
+ PRISM_ENCODING_TABLE(iso_8859_4)
528
+ PRISM_ENCODING_TABLE(iso_8859_5)
529
+ PRISM_ENCODING_TABLE(iso_8859_6)
530
+ PRISM_ENCODING_TABLE(iso_8859_7)
531
+ PRISM_ENCODING_TABLE(iso_8859_8)
532
+ PRISM_ENCODING_TABLE(iso_8859_9)
533
+ PRISM_ENCODING_TABLE(iso_8859_10)
534
+ PRISM_ENCODING_TABLE(iso_8859_11)
535
+ PRISM_ENCODING_TABLE(iso_8859_13)
536
+ PRISM_ENCODING_TABLE(iso_8859_14)
537
+ PRISM_ENCODING_TABLE(iso_8859_15)
538
+ PRISM_ENCODING_TABLE(iso_8859_16)
539
+ PRISM_ENCODING_TABLE(koi8_r)
540
+ PRISM_ENCODING_TABLE(windows_1251)
541
+ PRISM_ENCODING_TABLE(windows_1252)
542
+
543
+ #undef PRISM_ENCODING_TABLE
544
+
545
+ /** ASCII encoding */
451
546
  pm_encoding_t pm_encoding_ascii = {
452
547
  .name = "ascii",
453
548
  .char_width = pm_encoding_ascii_char_width,
@@ -457,6 +552,7 @@ pm_encoding_t pm_encoding_ascii = {
457
552
  .multibyte = false
458
553
  };
459
554
 
555
+ /** ASCII-8BIT encoding */
460
556
  pm_encoding_t pm_encoding_ascii_8bit = {
461
557
  .name = "ascii-8bit",
462
558
  .char_width = pm_encoding_single_char_width,
@@ -466,42 +562,182 @@ pm_encoding_t pm_encoding_ascii_8bit = {
466
562
  .multibyte = false
467
563
  };
468
564
 
469
- #define PRISM_ENCODING_TABLE(s, i, w) \
470
- static size_t pm_encoding_ ##i ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
471
- return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
472
- } \
473
- static size_t pm_encoding_ ##i ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
474
- return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
475
- } \
476
- static bool pm_encoding_ ##i ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
477
- return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
478
- } \
479
- pm_encoding_t pm_encoding_ ##i = { \
480
- .name = s, \
481
- .char_width = w, \
482
- .alnum_char = pm_encoding_ ##i ## _alnum_char, \
483
- .alpha_char = pm_encoding_ ##i ## _alpha_char, \
484
- .isupper_char = pm_encoding_ ##i ## _isupper_char, \
485
- .multibyte = false, \
486
- };
487
-
488
- PRISM_ENCODING_TABLE("iso-8859-1", iso_8859_1, pm_encoding_single_char_width)
489
- PRISM_ENCODING_TABLE("iso-8859-2", iso_8859_2, pm_encoding_single_char_width)
490
- PRISM_ENCODING_TABLE("iso-8859-3", iso_8859_3, pm_encoding_single_char_width)
491
- PRISM_ENCODING_TABLE("iso-8859-4", iso_8859_4, pm_encoding_single_char_width)
492
- PRISM_ENCODING_TABLE("iso-8859-5", iso_8859_5, pm_encoding_single_char_width)
493
- PRISM_ENCODING_TABLE("iso-8859-6", iso_8859_6, pm_encoding_single_char_width)
494
- PRISM_ENCODING_TABLE("iso-8859-7", iso_8859_7, pm_encoding_single_char_width)
495
- PRISM_ENCODING_TABLE("iso-8859-8", iso_8859_8, pm_encoding_single_char_width)
496
- PRISM_ENCODING_TABLE("iso-8859-9", iso_8859_9, pm_encoding_single_char_width)
497
- PRISM_ENCODING_TABLE("iso-8859-10", iso_8859_10, pm_encoding_single_char_width)
498
- PRISM_ENCODING_TABLE("iso-8859-11", iso_8859_11, pm_encoding_single_char_width)
499
- PRISM_ENCODING_TABLE("iso-8859-13", iso_8859_13, pm_encoding_single_char_width)
500
- PRISM_ENCODING_TABLE("iso-8859-14", iso_8859_14, pm_encoding_single_char_width)
501
- PRISM_ENCODING_TABLE("iso-8859-15", iso_8859_15, pm_encoding_single_char_width)
502
- PRISM_ENCODING_TABLE("iso-8859-16", iso_8859_16, pm_encoding_single_char_width)
503
- PRISM_ENCODING_TABLE("koi8-r", koi8_r, pm_encoding_koi8_r_char_width)
504
- PRISM_ENCODING_TABLE("windows-1251", windows_1251, pm_encoding_single_char_width)
505
- PRISM_ENCODING_TABLE("windows-1252", windows_1252, pm_encoding_single_char_width)
565
+ /** ISO-8859-1 */
566
+ pm_encoding_t pm_encoding_iso_8859_1 = {
567
+ .name = "iso-8859-1",
568
+ .char_width = pm_encoding_single_char_width,
569
+ .alnum_char = pm_encoding_iso_8859_1_alnum_char,
570
+ .alpha_char = pm_encoding_iso_8859_1_alpha_char,
571
+ .isupper_char = pm_encoding_iso_8859_1_isupper_char,
572
+ .multibyte = false
573
+ };
506
574
 
507
- #undef PRISM_ENCODING_TABLE
575
+ /** ISO-8859-2 */
576
+ pm_encoding_t pm_encoding_iso_8859_2 = {
577
+ .name = "iso-8859-2",
578
+ .char_width = pm_encoding_single_char_width,
579
+ .alnum_char = pm_encoding_iso_8859_2_alnum_char,
580
+ .alpha_char = pm_encoding_iso_8859_2_alpha_char,
581
+ .isupper_char = pm_encoding_iso_8859_2_isupper_char,
582
+ .multibyte = false
583
+ };
584
+
585
+ /** ISO-8859-3 */
586
+ pm_encoding_t pm_encoding_iso_8859_3 = {
587
+ .name = "iso-8859-3",
588
+ .char_width = pm_encoding_single_char_width,
589
+ .alnum_char = pm_encoding_iso_8859_3_alnum_char,
590
+ .alpha_char = pm_encoding_iso_8859_3_alpha_char,
591
+ .isupper_char = pm_encoding_iso_8859_3_isupper_char,
592
+ .multibyte = false
593
+ };
594
+
595
+ /** ISO-8859-4 */
596
+ pm_encoding_t pm_encoding_iso_8859_4 = {
597
+ .name = "iso-8859-4",
598
+ .char_width = pm_encoding_single_char_width,
599
+ .alnum_char = pm_encoding_iso_8859_4_alnum_char,
600
+ .alpha_char = pm_encoding_iso_8859_4_alpha_char,
601
+ .isupper_char = pm_encoding_iso_8859_4_isupper_char,
602
+ .multibyte = false
603
+ };
604
+
605
+ /** ISO-8859-5 */
606
+ pm_encoding_t pm_encoding_iso_8859_5 = {
607
+ .name = "iso-8859-5",
608
+ .char_width = pm_encoding_single_char_width,
609
+ .alnum_char = pm_encoding_iso_8859_5_alnum_char,
610
+ .alpha_char = pm_encoding_iso_8859_5_alpha_char,
611
+ .isupper_char = pm_encoding_iso_8859_5_isupper_char,
612
+ .multibyte = false
613
+ };
614
+
615
+ /** ISO-8859-6 */
616
+ pm_encoding_t pm_encoding_iso_8859_6 = {
617
+ .name = "iso-8859-6",
618
+ .char_width = pm_encoding_single_char_width,
619
+ .alnum_char = pm_encoding_iso_8859_6_alnum_char,
620
+ .alpha_char = pm_encoding_iso_8859_6_alpha_char,
621
+ .isupper_char = pm_encoding_iso_8859_6_isupper_char,
622
+ .multibyte = false
623
+ };
624
+
625
+ /** ISO-8859-7 */
626
+ pm_encoding_t pm_encoding_iso_8859_7 = {
627
+ .name = "iso-8859-7",
628
+ .char_width = pm_encoding_single_char_width,
629
+ .alnum_char = pm_encoding_iso_8859_7_alnum_char,
630
+ .alpha_char = pm_encoding_iso_8859_7_alpha_char,
631
+ .isupper_char = pm_encoding_iso_8859_7_isupper_char,
632
+ .multibyte = false
633
+ };
634
+
635
+ /** ISO-8859-8 */
636
+ pm_encoding_t pm_encoding_iso_8859_8 = {
637
+ .name = "iso-8859-8",
638
+ .char_width = pm_encoding_single_char_width,
639
+ .alnum_char = pm_encoding_iso_8859_8_alnum_char,
640
+ .alpha_char = pm_encoding_iso_8859_8_alpha_char,
641
+ .isupper_char = pm_encoding_iso_8859_8_isupper_char,
642
+ .multibyte = false
643
+ };
644
+
645
+ /** ISO-8859-9 */
646
+ pm_encoding_t pm_encoding_iso_8859_9 = {
647
+ .name = "iso-8859-9",
648
+ .char_width = pm_encoding_single_char_width,
649
+ .alnum_char = pm_encoding_iso_8859_9_alnum_char,
650
+ .alpha_char = pm_encoding_iso_8859_9_alpha_char,
651
+ .isupper_char = pm_encoding_iso_8859_9_isupper_char,
652
+ .multibyte = false
653
+ };
654
+
655
+ /** ISO-8859-10 */
656
+ pm_encoding_t pm_encoding_iso_8859_10 = {
657
+ .name = "iso-8859-10",
658
+ .char_width = pm_encoding_single_char_width,
659
+ .alnum_char = pm_encoding_iso_8859_10_alnum_char,
660
+ .alpha_char = pm_encoding_iso_8859_10_alpha_char,
661
+ .isupper_char = pm_encoding_iso_8859_10_isupper_char,
662
+ .multibyte = false
663
+ };
664
+
665
+ /** ISO-8859-11 */
666
+ pm_encoding_t pm_encoding_iso_8859_11 = {
667
+ .name = "iso-8859-11",
668
+ .char_width = pm_encoding_single_char_width,
669
+ .alnum_char = pm_encoding_iso_8859_11_alnum_char,
670
+ .alpha_char = pm_encoding_iso_8859_11_alpha_char,
671
+ .isupper_char = pm_encoding_iso_8859_11_isupper_char,
672
+ .multibyte = false
673
+ };
674
+
675
+ /** ISO-8859-13 */
676
+ pm_encoding_t pm_encoding_iso_8859_13 = {
677
+ .name = "iso-8859-13",
678
+ .char_width = pm_encoding_single_char_width,
679
+ .alnum_char = pm_encoding_iso_8859_13_alnum_char,
680
+ .alpha_char = pm_encoding_iso_8859_13_alpha_char,
681
+ .isupper_char = pm_encoding_iso_8859_13_isupper_char,
682
+ .multibyte = false
683
+ };
684
+
685
+ /** ISO-8859-14 */
686
+ pm_encoding_t pm_encoding_iso_8859_14 = {
687
+ .name = "iso-8859-14",
688
+ .char_width = pm_encoding_single_char_width,
689
+ .alnum_char = pm_encoding_iso_8859_14_alnum_char,
690
+ .alpha_char = pm_encoding_iso_8859_14_alpha_char,
691
+ .isupper_char = pm_encoding_iso_8859_14_isupper_char,
692
+ .multibyte = false
693
+ };
694
+
695
+ /** ISO-8859-15 */
696
+ pm_encoding_t pm_encoding_iso_8859_15 = {
697
+ .name = "iso-8859-15",
698
+ .char_width = pm_encoding_single_char_width,
699
+ .alnum_char = pm_encoding_iso_8859_15_alnum_char,
700
+ .alpha_char = pm_encoding_iso_8859_15_alpha_char,
701
+ .isupper_char = pm_encoding_iso_8859_15_isupper_char,
702
+ .multibyte = false
703
+ };
704
+
705
+ /** ISO-8859-16 */
706
+ pm_encoding_t pm_encoding_iso_8859_16 = {
707
+ .name = "iso-8859-16",
708
+ .char_width = pm_encoding_single_char_width,
709
+ .alnum_char = pm_encoding_iso_8859_16_alnum_char,
710
+ .alpha_char = pm_encoding_iso_8859_16_alpha_char,
711
+ .isupper_char = pm_encoding_iso_8859_16_isupper_char,
712
+ .multibyte = false
713
+ };
714
+
715
+ /** KOI8-R */
716
+ pm_encoding_t pm_encoding_koi8_r = {
717
+ .name = "koi8-r",
718
+ .char_width = pm_encoding_koi8_r_char_width,
719
+ .alnum_char = pm_encoding_koi8_r_alnum_char,
720
+ .alpha_char = pm_encoding_koi8_r_alpha_char,
721
+ .isupper_char = pm_encoding_koi8_r_isupper_char,
722
+ .multibyte = false
723
+ };
724
+
725
+ /** Windows-1251 */
726
+ pm_encoding_t pm_encoding_windows_1251 = {
727
+ .name = "windows-1251",
728
+ .char_width = pm_encoding_single_char_width,
729
+ .alnum_char = pm_encoding_windows_1251_alnum_char,
730
+ .alpha_char = pm_encoding_windows_1251_alpha_char,
731
+ .isupper_char = pm_encoding_windows_1251_isupper_char,
732
+ .multibyte = false
733
+ };
734
+
735
+ /** Windows-1252 */
736
+ pm_encoding_t pm_encoding_windows_1252 = {
737
+ .name = "windows-1252",
738
+ .char_width = pm_encoding_single_char_width,
739
+ .alnum_char = pm_encoding_windows_1252_alnum_char,
740
+ .alpha_char = pm_encoding_windows_1252_alpha_char,
741
+ .isupper_char = pm_encoding_windows_1252_isupper_char,
742
+ .multibyte = false
743
+ };