prism 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
data/src/enc/pm_tables.c CHANGED
@@ -1,7 +1,9 @@
1
1
  #include "prism/enc/pm_encoding.h"
2
2
 
3
- // Each element of the following table contains a bitfield that indicates a
4
- // piece of information about the corresponding ASCII character.
3
+ /**
4
+ * Each element of the following table contains a bitfield that indicates a
5
+ * piece of information about the corresponding ASCII character.
6
+ */
5
7
  static uint8_t pm_encoding_ascii_table[256] = {
6
8
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
7
9
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -22,8 +24,10 @@ static uint8_t pm_encoding_ascii_table[256] = {
22
24
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
23
25
  };
24
26
 
25
- // Each element of the following table contains a bitfield that indicates a
26
- // piece of information about the corresponding ISO-8859-1 character.
27
+ /**
28
+ * Each element of the following table contains a bitfield that indicates a
29
+ * piece of information about the corresponding ISO-8859-1 character.
30
+ */
27
31
  static uint8_t pm_encoding_iso_8859_1_table[256] = {
28
32
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
29
33
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -44,8 +48,10 @@ static uint8_t pm_encoding_iso_8859_1_table[256] = {
44
48
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
45
49
  };
46
50
 
47
- // Each element of the following table contains a bitfield that indicates a
48
- // piece of information about the corresponding ISO-8859-2 character.
51
+ /**
52
+ * Each element of the following table contains a bitfield that indicates a
53
+ * piece of information about the corresponding ISO-8859-2 character.
54
+ */
49
55
  static uint8_t pm_encoding_iso_8859_2_table[256] = {
50
56
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
51
57
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -66,8 +72,10 @@ static uint8_t pm_encoding_iso_8859_2_table[256] = {
66
72
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
67
73
  };
68
74
 
69
- // Each element of the following table contains a bitfield that indicates a
70
- // piece of information about the corresponding ISO-8859-3 character.
75
+ /**
76
+ * Each element of the following table contains a bitfield that indicates a
77
+ * piece of information about the corresponding ISO-8859-3 character.
78
+ */
71
79
  static uint8_t pm_encoding_iso_8859_3_table[256] = {
72
80
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
73
81
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -88,8 +96,10 @@ static uint8_t pm_encoding_iso_8859_3_table[256] = {
88
96
  0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
89
97
  };
90
98
 
91
- // Each element of the following table contains a bitfield that indicates a
92
- // piece of information about the corresponding ISO-8859-4 character.
99
+ /**
100
+ * Each element of the following table contains a bitfield that indicates a
101
+ * piece of information about the corresponding ISO-8859-4 character.
102
+ */
93
103
  static uint8_t pm_encoding_iso_8859_4_table[256] = {
94
104
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
95
105
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -110,8 +120,10 @@ static uint8_t pm_encoding_iso_8859_4_table[256] = {
110
120
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
111
121
  };
112
122
 
113
- // Each element of the following table contains a bitfield that indicates a
114
- // piece of information about the corresponding ISO-8859-5 character.
123
+ /**
124
+ * Each element of the following table contains a bitfield that indicates a
125
+ * piece of information about the corresponding ISO-8859-5 character.
126
+ */
115
127
  static uint8_t pm_encoding_iso_8859_5_table[256] = {
116
128
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
117
129
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -132,8 +144,10 @@ static uint8_t pm_encoding_iso_8859_5_table[256] = {
132
144
  0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, // Fx
133
145
  };
134
146
 
135
- // Each element of the following table contains a bitfield that indicates a
136
- // piece of information about the corresponding ISO-8859-6 character.
147
+ /**
148
+ * Each element of the following table contains a bitfield that indicates a
149
+ * piece of information about the corresponding ISO-8859-6 character.
150
+ */
137
151
  static uint8_t pm_encoding_iso_8859_6_table[256] = {
138
152
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
139
153
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -154,8 +168,10 @@ static uint8_t pm_encoding_iso_8859_6_table[256] = {
154
168
  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
155
169
  };
156
170
 
157
- // Each element of the following table contains a bitfield that indicates a
158
- // piece of information about the corresponding ISO-8859-7 character.
171
+ /**
172
+ * Each element of the following table contains a bitfield that indicates a
173
+ * piece of information about the corresponding ISO-8859-7 character.
174
+ */
159
175
  static uint8_t pm_encoding_iso_8859_7_table[256] = {
160
176
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
161
177
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -176,8 +192,10 @@ static uint8_t pm_encoding_iso_8859_7_table[256] = {
176
192
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
177
193
  };
178
194
 
179
- // Each element of the following table contains a bitfield that indicates a
180
- // piece of information about the corresponding ISO-8859-8 character.
195
+ /**
196
+ * Each element of the following table contains a bitfield that indicates a
197
+ * piece of information about the corresponding ISO-8859-8 character.
198
+ */
181
199
  static uint8_t pm_encoding_iso_8859_8_table[256] = {
182
200
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
183
201
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -198,8 +216,10 @@ static uint8_t pm_encoding_iso_8859_8_table[256] = {
198
216
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Fx
199
217
  };
200
218
 
201
- // Each element of the following table contains a bitfield that indicates a
202
- // piece of information about the corresponding ISO-8859-9 character.
219
+ /**
220
+ * Each element of the following table contains a bitfield that indicates a
221
+ * piece of information about the corresponding ISO-8859-9 character.
222
+ */
203
223
  static uint8_t pm_encoding_iso_8859_9_table[256] = {
204
224
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
205
225
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -220,8 +240,10 @@ static uint8_t pm_encoding_iso_8859_9_table[256] = {
220
240
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
221
241
  };
222
242
 
223
- // Each element of the following table contains a bitfield that indicates a
224
- // piece of information about the corresponding ISO-8859-10 character.
243
+ /**
244
+ * Each element of the following table contains a bitfield that indicates a
245
+ * piece of information about the corresponding ISO-8859-10 character.
246
+ */
225
247
  static uint8_t pm_encoding_iso_8859_10_table[256] = {
226
248
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
227
249
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -242,8 +264,10 @@ static uint8_t pm_encoding_iso_8859_10_table[256] = {
242
264
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
243
265
  };
244
266
 
245
- // Each element of the following table contains a bitfield that indicates a
246
- // piece of information about the corresponding ISO-8859-11 character.
267
+ /**
268
+ * Each element of the following table contains a bitfield that indicates a
269
+ * piece of information about the corresponding ISO-8859-11 character.
270
+ */
247
271
  static uint8_t pm_encoding_iso_8859_11_table[256] = {
248
272
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
249
273
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -264,8 +288,10 @@ static uint8_t pm_encoding_iso_8859_11_table[256] = {
264
288
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, // Fx
265
289
  };
266
290
 
267
- // Each element of the following table contains a bitfield that indicates a
268
- // piece of information about the corresponding ISO-8859-13 character.
291
+ /**
292
+ * Each element of the following table contains a bitfield that indicates a
293
+ * piece of information about the corresponding ISO-8859-13 character.
294
+ */
269
295
  static uint8_t pm_encoding_iso_8859_13_table[256] = {
270
296
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
271
297
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -286,8 +312,10 @@ static uint8_t pm_encoding_iso_8859_13_table[256] = {
286
312
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
287
313
  };
288
314
 
289
- // Each element of the following table contains a bitfield that indicates a
290
- // piece of information about the corresponding ISO-8859-14 character.
315
+ /**
316
+ * Each element of the following table contains a bitfield that indicates a
317
+ * piece of information about the corresponding ISO-8859-14 character.
318
+ */
291
319
  static uint8_t pm_encoding_iso_8859_14_table[256] = {
292
320
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
293
321
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -308,8 +336,10 @@ static uint8_t pm_encoding_iso_8859_14_table[256] = {
308
336
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
309
337
  };
310
338
 
311
- // Each element of the following table contains a bitfield that indicates a
312
- // piece of information about the corresponding ISO-8859-15 character.
339
+ /**
340
+ * Each element of the following table contains a bitfield that indicates a
341
+ * piece of information about the corresponding ISO-8859-15 character.
342
+ */
313
343
  static uint8_t pm_encoding_iso_8859_15_table[256] = {
314
344
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
315
345
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -330,8 +360,10 @@ static uint8_t pm_encoding_iso_8859_15_table[256] = {
330
360
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
331
361
  };
332
362
 
333
- // Each element of the following table contains a bitfield that indicates a
334
- // piece of information about the corresponding ISO-8859-16 character.
363
+ /**
364
+ * Each element of the following table contains a bitfield that indicates a
365
+ * piece of information about the corresponding ISO-8859-16 character.
366
+ */
335
367
  static uint8_t pm_encoding_iso_8859_16_table[256] = {
336
368
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
337
369
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -352,8 +384,10 @@ static uint8_t pm_encoding_iso_8859_16_table[256] = {
352
384
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
353
385
  };
354
386
 
355
- // Each element of the following table contains a bitfield that indicates a
356
- // piece of information about the corresponding KOI8-R character.
387
+ /**
388
+ * Each element of the following table contains a bitfield that indicates a
389
+ * piece of information about the corresponding KOI8-R character.
390
+ */
357
391
  static uint8_t pm_encoding_koi8_r_table[256] = {
358
392
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
359
393
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -374,8 +408,10 @@ static uint8_t pm_encoding_koi8_r_table[256] = {
374
408
  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx
375
409
  };
376
410
 
377
- // Each element of the following table contains a bitfield that indicates a
378
- // piece of information about the corresponding windows-1251 character.
411
+ /**
412
+ * Each element of the following table contains a bitfield that indicates a
413
+ * piece of information about the corresponding windows-1251 character.
414
+ */
379
415
  static uint8_t pm_encoding_windows_1251_table[256] = {
380
416
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
381
417
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -396,8 +432,10 @@ static uint8_t pm_encoding_windows_1251_table[256] = {
396
432
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
397
433
  };
398
434
 
399
- // Each element of the following table contains a bitfield that indicates a
400
- // piece of information about the corresponding windows-1252 character.
435
+ /**
436
+ * Each element of the following table contains a bitfield that indicates a
437
+ * piece of information about the corresponding windows-1252 character.
438
+ */
401
439
  static uint8_t pm_encoding_windows_1252_table[256] = {
402
440
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
403
441
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -418,36 +456,93 @@ static uint8_t pm_encoding_windows_1252_table[256] = {
418
456
  3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
419
457
  };
420
458
 
459
+ /**
460
+ * Returns the size of the next character in the ASCII encoding. This basically
461
+ * means that if the top bit is not set, the character is 1 byte long.
462
+ */
421
463
  static size_t
422
464
  pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
465
  return *b < 0x80 ? 1 : 0;
424
466
  }
425
467
 
468
+ /**
469
+ * Return the size of the next character in the ASCII encoding if it is an
470
+ * alphabetical character.
471
+ */
426
472
  size_t
427
473
  pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
428
474
  return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
429
475
  }
430
476
 
477
+ /**
478
+ * Return the size of the next character in the ASCII encoding if it is an
479
+ * alphanumeric character.
480
+ */
431
481
  size_t
432
482
  pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
433
483
  return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
434
484
  }
435
485
 
486
+ /**
487
+ * Return true if the next character in the ASCII encoding if it is an uppercase
488
+ * character.
489
+ */
436
490
  bool
437
491
  pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
438
492
  return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
439
493
  }
440
494
 
495
+ /**
496
+ * For a lot of encodings the default is that they are a single byte long no
497
+ * matter what the codepoint, so this function is shared between them.
498
+ */
441
499
  static size_t
442
- pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
443
- return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
500
+ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
501
+ return 1;
444
502
  }
445
503
 
504
+ /**
505
+ * Returns the size of the next character in the KOI-8 encoding. This means
506
+ * checking if it's a valid codepoint in KOI-8 and if it is returning 1.
507
+ */
446
508
  static size_t
447
- pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
448
- return 1;
509
+ pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
510
+ return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
449
511
  }
450
512
 
513
+ #define PRISM_ENCODING_TABLE(name) \
514
+ static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
515
+ return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
516
+ } \
517
+ static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
518
+ return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
519
+ } \
520
+ static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
521
+ return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
522
+ }
523
+
524
+ PRISM_ENCODING_TABLE(iso_8859_1)
525
+ PRISM_ENCODING_TABLE(iso_8859_2)
526
+ PRISM_ENCODING_TABLE(iso_8859_3)
527
+ PRISM_ENCODING_TABLE(iso_8859_4)
528
+ PRISM_ENCODING_TABLE(iso_8859_5)
529
+ PRISM_ENCODING_TABLE(iso_8859_6)
530
+ PRISM_ENCODING_TABLE(iso_8859_7)
531
+ PRISM_ENCODING_TABLE(iso_8859_8)
532
+ PRISM_ENCODING_TABLE(iso_8859_9)
533
+ PRISM_ENCODING_TABLE(iso_8859_10)
534
+ PRISM_ENCODING_TABLE(iso_8859_11)
535
+ PRISM_ENCODING_TABLE(iso_8859_13)
536
+ PRISM_ENCODING_TABLE(iso_8859_14)
537
+ PRISM_ENCODING_TABLE(iso_8859_15)
538
+ PRISM_ENCODING_TABLE(iso_8859_16)
539
+ PRISM_ENCODING_TABLE(koi8_r)
540
+ PRISM_ENCODING_TABLE(windows_1251)
541
+ PRISM_ENCODING_TABLE(windows_1252)
542
+
543
+ #undef PRISM_ENCODING_TABLE
544
+
545
+ /** ASCII encoding */
451
546
  pm_encoding_t pm_encoding_ascii = {
452
547
  .name = "ascii",
453
548
  .char_width = pm_encoding_ascii_char_width,
@@ -457,6 +552,7 @@ pm_encoding_t pm_encoding_ascii = {
457
552
  .multibyte = false
458
553
  };
459
554
 
555
+ /** ASCII-8BIT encoding */
460
556
  pm_encoding_t pm_encoding_ascii_8bit = {
461
557
  .name = "ascii-8bit",
462
558
  .char_width = pm_encoding_single_char_width,
@@ -466,42 +562,182 @@ pm_encoding_t pm_encoding_ascii_8bit = {
466
562
  .multibyte = false
467
563
  };
468
564
 
469
- #define PRISM_ENCODING_TABLE(s, i, w) \
470
- static size_t pm_encoding_ ##i ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
471
- return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
472
- } \
473
- static size_t pm_encoding_ ##i ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
474
- return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
475
- } \
476
- static bool pm_encoding_ ##i ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
477
- return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
478
- } \
479
- pm_encoding_t pm_encoding_ ##i = { \
480
- .name = s, \
481
- .char_width = w, \
482
- .alnum_char = pm_encoding_ ##i ## _alnum_char, \
483
- .alpha_char = pm_encoding_ ##i ## _alpha_char, \
484
- .isupper_char = pm_encoding_ ##i ## _isupper_char, \
485
- .multibyte = false, \
486
- };
487
-
488
- PRISM_ENCODING_TABLE("iso-8859-1", iso_8859_1, pm_encoding_single_char_width)
489
- PRISM_ENCODING_TABLE("iso-8859-2", iso_8859_2, pm_encoding_single_char_width)
490
- PRISM_ENCODING_TABLE("iso-8859-3", iso_8859_3, pm_encoding_single_char_width)
491
- PRISM_ENCODING_TABLE("iso-8859-4", iso_8859_4, pm_encoding_single_char_width)
492
- PRISM_ENCODING_TABLE("iso-8859-5", iso_8859_5, pm_encoding_single_char_width)
493
- PRISM_ENCODING_TABLE("iso-8859-6", iso_8859_6, pm_encoding_single_char_width)
494
- PRISM_ENCODING_TABLE("iso-8859-7", iso_8859_7, pm_encoding_single_char_width)
495
- PRISM_ENCODING_TABLE("iso-8859-8", iso_8859_8, pm_encoding_single_char_width)
496
- PRISM_ENCODING_TABLE("iso-8859-9", iso_8859_9, pm_encoding_single_char_width)
497
- PRISM_ENCODING_TABLE("iso-8859-10", iso_8859_10, pm_encoding_single_char_width)
498
- PRISM_ENCODING_TABLE("iso-8859-11", iso_8859_11, pm_encoding_single_char_width)
499
- PRISM_ENCODING_TABLE("iso-8859-13", iso_8859_13, pm_encoding_single_char_width)
500
- PRISM_ENCODING_TABLE("iso-8859-14", iso_8859_14, pm_encoding_single_char_width)
501
- PRISM_ENCODING_TABLE("iso-8859-15", iso_8859_15, pm_encoding_single_char_width)
502
- PRISM_ENCODING_TABLE("iso-8859-16", iso_8859_16, pm_encoding_single_char_width)
503
- PRISM_ENCODING_TABLE("koi8-r", koi8_r, pm_encoding_koi8_r_char_width)
504
- PRISM_ENCODING_TABLE("windows-1251", windows_1251, pm_encoding_single_char_width)
505
- PRISM_ENCODING_TABLE("windows-1252", windows_1252, pm_encoding_single_char_width)
565
+ /** ISO-8859-1 */
566
+ pm_encoding_t pm_encoding_iso_8859_1 = {
567
+ .name = "iso-8859-1",
568
+ .char_width = pm_encoding_single_char_width,
569
+ .alnum_char = pm_encoding_iso_8859_1_alnum_char,
570
+ .alpha_char = pm_encoding_iso_8859_1_alpha_char,
571
+ .isupper_char = pm_encoding_iso_8859_1_isupper_char,
572
+ .multibyte = false
573
+ };
506
574
 
507
- #undef PRISM_ENCODING_TABLE
575
+ /** ISO-8859-2 */
576
+ pm_encoding_t pm_encoding_iso_8859_2 = {
577
+ .name = "iso-8859-2",
578
+ .char_width = pm_encoding_single_char_width,
579
+ .alnum_char = pm_encoding_iso_8859_2_alnum_char,
580
+ .alpha_char = pm_encoding_iso_8859_2_alpha_char,
581
+ .isupper_char = pm_encoding_iso_8859_2_isupper_char,
582
+ .multibyte = false
583
+ };
584
+
585
+ /** ISO-8859-3 */
586
+ pm_encoding_t pm_encoding_iso_8859_3 = {
587
+ .name = "iso-8859-3",
588
+ .char_width = pm_encoding_single_char_width,
589
+ .alnum_char = pm_encoding_iso_8859_3_alnum_char,
590
+ .alpha_char = pm_encoding_iso_8859_3_alpha_char,
591
+ .isupper_char = pm_encoding_iso_8859_3_isupper_char,
592
+ .multibyte = false
593
+ };
594
+
595
+ /** ISO-8859-4 */
596
+ pm_encoding_t pm_encoding_iso_8859_4 = {
597
+ .name = "iso-8859-4",
598
+ .char_width = pm_encoding_single_char_width,
599
+ .alnum_char = pm_encoding_iso_8859_4_alnum_char,
600
+ .alpha_char = pm_encoding_iso_8859_4_alpha_char,
601
+ .isupper_char = pm_encoding_iso_8859_4_isupper_char,
602
+ .multibyte = false
603
+ };
604
+
605
+ /** ISO-8859-5 */
606
+ pm_encoding_t pm_encoding_iso_8859_5 = {
607
+ .name = "iso-8859-5",
608
+ .char_width = pm_encoding_single_char_width,
609
+ .alnum_char = pm_encoding_iso_8859_5_alnum_char,
610
+ .alpha_char = pm_encoding_iso_8859_5_alpha_char,
611
+ .isupper_char = pm_encoding_iso_8859_5_isupper_char,
612
+ .multibyte = false
613
+ };
614
+
615
+ /** ISO-8859-6 */
616
+ pm_encoding_t pm_encoding_iso_8859_6 = {
617
+ .name = "iso-8859-6",
618
+ .char_width = pm_encoding_single_char_width,
619
+ .alnum_char = pm_encoding_iso_8859_6_alnum_char,
620
+ .alpha_char = pm_encoding_iso_8859_6_alpha_char,
621
+ .isupper_char = pm_encoding_iso_8859_6_isupper_char,
622
+ .multibyte = false
623
+ };
624
+
625
+ /** ISO-8859-7 */
626
+ pm_encoding_t pm_encoding_iso_8859_7 = {
627
+ .name = "iso-8859-7",
628
+ .char_width = pm_encoding_single_char_width,
629
+ .alnum_char = pm_encoding_iso_8859_7_alnum_char,
630
+ .alpha_char = pm_encoding_iso_8859_7_alpha_char,
631
+ .isupper_char = pm_encoding_iso_8859_7_isupper_char,
632
+ .multibyte = false
633
+ };
634
+
635
+ /** ISO-8859-8 */
636
+ pm_encoding_t pm_encoding_iso_8859_8 = {
637
+ .name = "iso-8859-8",
638
+ .char_width = pm_encoding_single_char_width,
639
+ .alnum_char = pm_encoding_iso_8859_8_alnum_char,
640
+ .alpha_char = pm_encoding_iso_8859_8_alpha_char,
641
+ .isupper_char = pm_encoding_iso_8859_8_isupper_char,
642
+ .multibyte = false
643
+ };
644
+
645
+ /** ISO-8859-9 */
646
+ pm_encoding_t pm_encoding_iso_8859_9 = {
647
+ .name = "iso-8859-9",
648
+ .char_width = pm_encoding_single_char_width,
649
+ .alnum_char = pm_encoding_iso_8859_9_alnum_char,
650
+ .alpha_char = pm_encoding_iso_8859_9_alpha_char,
651
+ .isupper_char = pm_encoding_iso_8859_9_isupper_char,
652
+ .multibyte = false
653
+ };
654
+
655
+ /** ISO-8859-10 */
656
+ pm_encoding_t pm_encoding_iso_8859_10 = {
657
+ .name = "iso-8859-10",
658
+ .char_width = pm_encoding_single_char_width,
659
+ .alnum_char = pm_encoding_iso_8859_10_alnum_char,
660
+ .alpha_char = pm_encoding_iso_8859_10_alpha_char,
661
+ .isupper_char = pm_encoding_iso_8859_10_isupper_char,
662
+ .multibyte = false
663
+ };
664
+
665
+ /** ISO-8859-11 */
666
+ pm_encoding_t pm_encoding_iso_8859_11 = {
667
+ .name = "iso-8859-11",
668
+ .char_width = pm_encoding_single_char_width,
669
+ .alnum_char = pm_encoding_iso_8859_11_alnum_char,
670
+ .alpha_char = pm_encoding_iso_8859_11_alpha_char,
671
+ .isupper_char = pm_encoding_iso_8859_11_isupper_char,
672
+ .multibyte = false
673
+ };
674
+
675
+ /** ISO-8859-13 */
676
+ pm_encoding_t pm_encoding_iso_8859_13 = {
677
+ .name = "iso-8859-13",
678
+ .char_width = pm_encoding_single_char_width,
679
+ .alnum_char = pm_encoding_iso_8859_13_alnum_char,
680
+ .alpha_char = pm_encoding_iso_8859_13_alpha_char,
681
+ .isupper_char = pm_encoding_iso_8859_13_isupper_char,
682
+ .multibyte = false
683
+ };
684
+
685
+ /** ISO-8859-14 */
686
+ pm_encoding_t pm_encoding_iso_8859_14 = {
687
+ .name = "iso-8859-14",
688
+ .char_width = pm_encoding_single_char_width,
689
+ .alnum_char = pm_encoding_iso_8859_14_alnum_char,
690
+ .alpha_char = pm_encoding_iso_8859_14_alpha_char,
691
+ .isupper_char = pm_encoding_iso_8859_14_isupper_char,
692
+ .multibyte = false
693
+ };
694
+
695
+ /** ISO-8859-15 */
696
+ pm_encoding_t pm_encoding_iso_8859_15 = {
697
+ .name = "iso-8859-15",
698
+ .char_width = pm_encoding_single_char_width,
699
+ .alnum_char = pm_encoding_iso_8859_15_alnum_char,
700
+ .alpha_char = pm_encoding_iso_8859_15_alpha_char,
701
+ .isupper_char = pm_encoding_iso_8859_15_isupper_char,
702
+ .multibyte = false
703
+ };
704
+
705
+ /** ISO-8859-16 */
706
+ pm_encoding_t pm_encoding_iso_8859_16 = {
707
+ .name = "iso-8859-16",
708
+ .char_width = pm_encoding_single_char_width,
709
+ .alnum_char = pm_encoding_iso_8859_16_alnum_char,
710
+ .alpha_char = pm_encoding_iso_8859_16_alpha_char,
711
+ .isupper_char = pm_encoding_iso_8859_16_isupper_char,
712
+ .multibyte = false
713
+ };
714
+
715
+ /** KOI8-R */
716
+ pm_encoding_t pm_encoding_koi8_r = {
717
+ .name = "koi8-r",
718
+ .char_width = pm_encoding_koi8_r_char_width,
719
+ .alnum_char = pm_encoding_koi8_r_alnum_char,
720
+ .alpha_char = pm_encoding_koi8_r_alpha_char,
721
+ .isupper_char = pm_encoding_koi8_r_isupper_char,
722
+ .multibyte = false
723
+ };
724
+
725
+ /** Windows-1251 */
726
+ pm_encoding_t pm_encoding_windows_1251 = {
727
+ .name = "windows-1251",
728
+ .char_width = pm_encoding_single_char_width,
729
+ .alnum_char = pm_encoding_windows_1251_alnum_char,
730
+ .alpha_char = pm_encoding_windows_1251_alpha_char,
731
+ .isupper_char = pm_encoding_windows_1251_isupper_char,
732
+ .multibyte = false
733
+ };
734
+
735
+ /** Windows-1252 */
736
+ pm_encoding_t pm_encoding_windows_1252 = {
737
+ .name = "windows-1252",
738
+ .char_width = pm_encoding_single_char_width,
739
+ .alnum_char = pm_encoding_windows_1252_alnum_char,
740
+ .alpha_char = pm_encoding_windows_1252_alpha_char,
741
+ .isupper_char = pm_encoding_windows_1252_isupper_char,
742
+ .multibyte = false
743
+ };