ansi-art-convert 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1135 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from __future__ import annotations
4
+ from argparse import ArgumentParser
5
+ from collections import Counter
6
+ from dataclasses import dataclass, field
7
+ from enum import Enum
8
+ from itertools import chain, batched
9
+ import os
10
+ import pprint
11
+ import sys
12
+ from typing import Iterator, NamedTuple, Tuple, ClassVar
13
+
14
+ from laser_prynter import pp
15
+
16
+ from ansi_art_convert.font_data import FONT_DATA, FILE_DATA_TYPES
17
+
18
+ DEBUG = False
19
+ def dprint(*args, **kwargs):
20
+ if DEBUG:
21
+ print(*args, **kwargs, file=sys.stderr)
22
+
23
+ @dataclass
24
+ class ANSIToken:
25
+ value: str
26
+ value_name: str = field(default='')
27
+ value_map: dict = field(repr=False, default_factory=dict)
28
+ original_value: str = field(init=False)
29
+
30
+ def __post_init__(self):
31
+ self.original_value = self.value
32
+ self.value_name = self.value_map.get(self.value, '')
33
+
34
+ def repr(self):
35
+ return '\n'.join([
36
+ f'\x1b[37m{self.__class__.__name__:<20}\x1b[0m'
37
+ + ' {title:<s} {value!r:<4}'.format(title='value:', value=self.value)
38
+ + ' {title:<10s} {value!r:<8}'.format(title='value_name:', value=self.value_name)
39
+ ])
40
+
41
+ def __str__(self):
42
+ return self.value
43
+
44
+
45
+ @staticmethod
46
+ def get_glyph_offset(font_name: str) -> int:
47
+ if 'topaz' in font_name.lower():
48
+ if '1+' in font_name:
49
+ offset = 0xE000
50
+ elif '2+' in font_name:
51
+ offset = 0xE100
52
+ else:
53
+ raise ValueError(f'Unknown Topaz font_name {font_name!r}')
54
+ elif 'mosoul' in font_name.lower():
55
+ offset = 0xE200
56
+ elif 'microknight' in font_name.lower():
57
+ offset = 0xE300
58
+ elif 'noodle' in font_name.lower():
59
+ offset = 0xE400
60
+ elif 'ibm' in font_name.lower():
61
+ offset = 0xE500
62
+ else:
63
+ raise ValueError(f'Unknown font_name: {font_name!r}')
64
+ dprint(f'font_name: {font_name!r} -> offset: {hex(offset)}')
65
+ return offset
66
+
67
+
68
+ @dataclass
69
+ class TextToken(ANSIToken):
70
+ offset: int = 0xE100
71
+ hex_values: list[str] = field(default_factory=list, repr=False)
72
+
73
+ def __post_init__(self):
74
+ super().__post_init__()
75
+ new_values = []
76
+ for v in self.value:
77
+ if DEBUG:
78
+ self.hex_values.append(str(hex(ord(v))))
79
+ if ord(v) <= 255: # and not (0x21 <= ord(v) <= 0x7e):
80
+ new_values.append(chr(ord(v)+self.offset))
81
+ else:
82
+ new_values.append(v)
83
+ self.value = ''.join(new_values)
84
+
85
+ def repr(self):
86
+ return '\n'.join([
87
+ f'\x1b[32m{self.__class__.__name__:<20}\x1b[0m',
88
+ ' {title:<17s} {value!r}'.format(title='original:', value=self.original_value),
89
+ ' {title:<17s} {value!r}'.format(title='value:', value=self.value),
90
+ ' {title:<17s} {value!r}'.format(title='hex_values:', value=self.hex_values),
91
+ ' {title:<17s} {value!r}'.format(title='len:', value=len(self.value)),
92
+ ])
93
+
94
+ C0_TOKEN_NAMES = {
95
+ 0x00: 'NUL', 0x01: 'SOH', 0x02: 'STX', 0x03: 'ETX', 0x04: 'EOT', 0x05: 'ENQ', 0x06: 'ACK', 0x07: 'BEL',
96
+ 0x08: 'BS', 0x09: 'HT', 0x0A: 'LF', 0x0B: 'VT', 0x0C: 'FF', 0x0D: 'CR', 0x0E: 'SO', 0x0F: 'SI',
97
+ 0x10: 'DLE', 0x11: 'DC1', 0x12: 'DC2', 0x13: 'DC3', 0x14: 'DC4', 0x15: 'NAK', 0x16: 'SYN', 0x17: 'ETB',
98
+ 0x18: 'CAN', 0x19: 'EM', 0x1A: 'SUB', 0x1B: 'ESC', 0x1C: 'FS', 0x1D: 'GS', 0x1E: 'RS', 0x1F: 'US',
99
+ }
100
+
101
+ @dataclass
102
+ class C0Token(TextToken):
103
+ value_map: dict = field(repr=False, default_factory=lambda: C0_TOKEN_NAMES)
104
+
105
+ def __post_init__(self):
106
+ super().__post_init__()
107
+ self.value_name = self.value_map.get(ord(self.original_value), '')
108
+ if self.value_name == 'CR':
109
+ self.value = ''
110
+
111
+ def repr(self):
112
+ return '\n'.join([
113
+ f'\x1b[33m{self.__class__.__name__:<20}\x1b[0m'
114
+ + '{title:<s} {value!r:<6}'.format(title='value:', value=self.value)
115
+ + '{title:<10s} {value!r:<8}'.format(title='value_name:', value=self.value_name)
116
+ + '{title:<10s} {value!r:<6}'.format(title='original:', value=self.original_value)
117
+ + '{title:<4s} {value!r}'.format(title='len:', value=len(self.value))
118
+ ])
119
+
120
+ CP_437_MAP = {
121
+ 0x01: '☺', 0x02: '☻', 0x03: '♥', 0x04: '♦', 0x05: '♣', 0x06: '♠', 0x07: '•', 0x08: '◘',
122
+ 0x09: '○', 0x0A: '◙', 0x0B: '♂', 0x0C: '♀', 0x0D: '♪', 0x0E: '♫', 0x0F: '☼', 0x10: '►',
123
+ 0x11: '◄', 0x12: '↕', 0x13: '‼', 0x14: '¶', 0x15: '§', 0x16: '▬', 0x17: '↨', 0x18: '↑',
124
+ 0x19: '↓', 0x1A: '→', 0x1B: '←', 0x1C: '∟', 0x1D: '↔', 0x1E: '▲', 0x1F: '▼',
125
+ }
126
+
127
+ # Dictionary mapping Unicode character to CP437 byte value
128
+ # Only includes mappings where Unicode codepoint != CP437 value
129
+ UNICODE_TO_CP437 = {
130
+ 0xc7: 0x80, # LATIN CAPITAL LETTER C WITH CEDILLA
131
+ 0xfc: 0x81, # LATIN SMALL LETTER U WITH DIAERESIS
132
+ 0xe9: 0x82, # LATIN SMALL LETTER E WITH ACUTE
133
+ 0xe2: 0x83, # LATIN SMALL LETTER A WITH CIRCUMFLEX
134
+ 0xe4: 0x84, # LATIN SMALL LETTER A WITH DIAERESIS
135
+ 0xe0: 0x85, # LATIN SMALL LETTER A WITH GRAVE
136
+ 0xe5: 0x86, # LATIN SMALL LETTER A WITH RING ABOVE
137
+ 0xe7: 0x87, # LATIN SMALL LETTER C WITH CEDILLA
138
+ 0xea: 0x88, # LATIN SMALL LETTER E WITH CIRCUMFLEX
139
+ 0xeb: 0x89, # LATIN SMALL LETTER E WITH DIAERESIS
140
+ 0xe8: 0x8a, # LATIN SMALL LETTER E WITH GRAVE
141
+ 0xef: 0x8b, # LATIN SMALL LETTER I WITH DIAERESIS
142
+ 0xee: 0x8c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
143
+ 0xec: 0x8d, # LATIN SMALL LETTER I WITH GRAVE
144
+ 0xc4: 0x8e, # LATIN CAPITAL LETTER A WITH DIAERESIS
145
+ 0xc5: 0x8f, # LATIN CAPITAL LETTER A WITH RING ABOVE
146
+ 0xc9: 0x90, # LATIN CAPITAL LETTER E WITH ACUTE
147
+ 0xe6: 0x91, # LATIN SMALL LIGATURE AE
148
+ 0xc6: 0x92, # LATIN CAPITAL LIGATURE AE
149
+ 0xf4: 0x93, # LATIN SMALL LETTER O WITH CIRCUMFLEX
150
+ 0xf6: 0x94, # LATIN SMALL LETTER O WITH DIAERESIS
151
+ 0xf2: 0x95, # LATIN SMALL LETTER O WITH GRAVE
152
+ 0xfb: 0x96, # LATIN SMALL LETTER U WITH CIRCUMFLEX
153
+ 0xf9: 0x97, # LATIN SMALL LETTER U WITH GRAVE
154
+ 0xff: 0x98, # LATIN SMALL LETTER Y WITH DIAERESIS
155
+ 0xd6: 0x99, # LATIN CAPITAL LETTER O WITH DIAERESIS
156
+ 0xdc: 0x9a, # LATIN CAPITAL LETTER U WITH DIAERESIS
157
+ 0xa2: 0x9b, # CENT SIGN
158
+ 0xa3: 0x9c, # POUND SIGN
159
+ 0xa5: 0x9d, # YEN SIGN
160
+ 0x20a7: 0x9e, # PESETA SIGN
161
+ 0x0192: 0x9f, # LATIN SMALL LETTER F WITH HOOK
162
+ 0xe1: 0xa0, # LATIN SMALL LETTER A WITH ACUTE
163
+ 0xed: 0xa1, # LATIN SMALL LETTER I WITH ACUTE
164
+ 0xf3: 0xa2, # LATIN SMALL LETTER O WITH ACUTE
165
+ 0xfa: 0xa3, # LATIN SMALL LETTER U WITH ACUTE
166
+ 0xf1: 0xa4, # LATIN SMALL LETTER N WITH TILDE
167
+ 0xd1: 0xa5, # LATIN CAPITAL LETTER N WITH TILDE
168
+ 0xaa: 0xa6, # FEMININE ORDINAL INDICATOR
169
+ 0xba: 0xa7, # MASCULINE ORDINAL INDICATOR
170
+ 0xbf: 0xa8, # INVERTED QUESTION MARK
171
+ 0x2310: 0xa9, # REVERSED NOT SIGN
172
+ 0xac: 0xaa, # NOT SIGN
173
+ 0xbd: 0xab, # VULGAR FRACTION ONE HALF
174
+ 0xbc: 0xac, # VULGAR FRACTION ONE QUARTER
175
+ 0xa1: 0xad, # INVERTED EXCLAMATION MARK
176
+ 0xab: 0xae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
177
+ 0xbb: 0xaf, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
178
+ 0x2591: 0xb0, # LIGHT SHADE
179
+ 0x2592: 0xb1, # MEDIUM SHADE
180
+ 0x2593: 0xb2, # DARK SHADE
181
+ 0x2502: 0xb3, # BOX DRAWINGS LIGHT VERTICAL
182
+ 0x2524: 0xb4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
183
+ 0x2561: 0xb5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
184
+ 0x2562: 0xb6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
185
+ 0x2556: 0xb7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
186
+ 0x2555: 0xb8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
187
+ 0x2563: 0xb9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
188
+ 0x2551: 0xba, # BOX DRAWINGS DOUBLE VERTICAL
189
+ 0x2557: 0xbb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
190
+ 0x255d: 0xbc, # BOX DRAWINGS DOUBLE UP AND LEFT
191
+ 0x255c: 0xbd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
192
+ 0x255b: 0xbe, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
193
+ 0x2510: 0xbf, # BOX DRAWINGS LIGHT DOWN AND LEFT
194
+ 0x2514: 0xc0, # BOX DRAWINGS LIGHT UP AND RIGHT
195
+ 0x2534: 0xc1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
196
+ 0x252c: 0xc2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
197
+ 0x251c: 0xc3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
198
+ 0x2500: 0xc4, # BOX DRAWINGS LIGHT HORIZONTAL
199
+ 0x253c: 0xc5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
200
+ 0x255e: 0xc6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
201
+ 0x255f: 0xc7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
202
+ 0x255a: 0xc8, # BOX DRAWINGS DOUBLE UP AND RIGHT
203
+ 0x2554: 0xc9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
204
+ 0x2569: 0xca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
205
+ 0x2566: 0xcb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
206
+ 0x2560: 0xcc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
207
+ 0x2550: 0xcd, # BOX DRAWINGS DOUBLE HORIZONTAL
208
+ 0x256c: 0xce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
209
+ 0x2567: 0xcf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
210
+ 0x2568: 0xd0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
211
+ 0x2564: 0xd1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
212
+ 0x2565: 0xd2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
213
+ 0x2559: 0xd3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
214
+ 0x2558: 0xd4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
215
+ 0x2552: 0xd5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
216
+ 0x2553: 0xd6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
217
+ 0x256b: 0xd7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
218
+ 0x256a: 0xd8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
219
+ 0x2518: 0xd9, # BOX DRAWINGS LIGHT UP AND LEFT
220
+ 0x250c: 0xda, # BOX DRAWINGS LIGHT DOWN AND RIGHT
221
+ 0x2588: 0xdb, # FULL BLOCK
222
+ 0x2584: 0xdc, # LOWER HALF BLOCK
223
+ 0x258c: 0xdd, # LEFT HALF BLOCK
224
+ 0x2590: 0xde, # RIGHT HALF BLOCK
225
+ 0x2580: 0xdf, # UPPER HALF BLOCK
226
+ 0x03b1: 0xe0, # GREEK SMALL LETTER ALPHA
227
+ 0xdf: 0xe1, # LATIN SMALL LETTER SHARP S
228
+ 0x0393: 0xe2, # GREEK CAPITAL LETTER GAMMA
229
+ 0x03c0: 0xe3, # GREEK SMALL LETTER PI
230
+ 0x03a3: 0xe4, # GREEK CAPITAL LETTER SIGMA
231
+ 0x03c3: 0xe5, # GREEK SMALL LETTER SIGMA
232
+ 0xb5: 0xe6, # MICRO SIGN
233
+ 0x03c4: 0xe7, # GREEK SMALL LETTER TAU
234
+ 0x03a6: 0xe8, # GREEK CAPITAL LETTER PHI
235
+ 0x0398: 0xe9, # GREEK CAPITAL LETTER THETA
236
+ 0x03a9: 0xea, # GREEK CAPITAL LETTER OMEGA
237
+ 0x03b4: 0xeb, # GREEK SMALL LETTER DELTA
238
+ 0x221e: 0xec, # INFINITY
239
+ 0x03c6: 0xed, # GREEK SMALL LETTER PHI
240
+ 0x03b5: 0xee, # GREEK SMALL LETTER EPSILON
241
+ 0x2229: 0xef, # INTERSECTION
242
+ 0x2261: 0xf0, # IDENTICAL TO
243
+ 0xb1: 0xf1, # PLUS-MINUS SIGN
244
+ 0x2265: 0xf2, # GREATER-THAN OR EQUAL TO
245
+ 0x2264: 0xf3, # LESS-THAN OR EQUAL TO
246
+ 0x2320: 0xf4, # TOP HALF INTEGRAL
247
+ 0x2321: 0xf5, # BOTTOM HALF INTEGRAL
248
+ 0xf7: 0xf6, # DIVISION SIGN
249
+ 0x2248: 0xf7, # ALMOST EQUAL TO
250
+ 0xb0: 0xf8, # DEGREE SIGN
251
+ 0x2219: 0xf9, # BULLET OPERATOR
252
+ 0xb7: 0xfa, # MIDDLE DOT
253
+ 0x221a: 0xfb, # SQUARE ROOT
254
+ 0x207f: 0xfc, # SUPERSCRIPT LATIN SMALL LETTER N
255
+ 0xb2: 0xfd, # SUPERSCRIPT TWO
256
+ 0x25a0: 0xfe, # BLACK SQUARE
257
+ 0xa0: 0xff, # NO-BREAK SPACE
258
+ }
259
+
260
+ @dataclass
261
+ class CP437Token(ANSIToken):
262
+ offset: int = 0xE100
263
+ hex_values: list[str] = field(default_factory=list, repr=False)
264
+
265
+ def _translate_char(self, ch: str) -> str:
266
+ n = UNICODE_TO_CP437.get(ord(ch), ord(ch))
267
+ if n <= 255:
268
+ return chr(n + self.offset)
269
+ else:
270
+ return ch
271
+
272
+ def __post_init__(self):
273
+ super().__post_init__()
274
+ if DEBUG:
275
+ for v in self.original_value:
276
+ self.hex_values.append(str(hex(ord(v))))
277
+ self.value = ''.join([self._translate_char(v) for v in self.original_value])
278
+ self.value_name = self.value_map.get(self.original_value, '')
279
+
280
+ def repr(self):
281
+ return '\n'.join([
282
+ f'\x1b[32m{self.__class__.__name__:<20}\x1b[0m',
283
+ ' {title:<17s} {value!r}'.format(title='original:', value=self.original_value),
284
+ ' {title:<17s} {value!r}'.format(title='value:', value=self.value),
285
+ ' {title:<17s} {value!r}'.format(title='hex_values:', value=self.hex_values),
286
+ ' {title:<17s} {value!r}'.format(title='len:', value=len(self.value)),
287
+ ])
288
+
289
+
290
+ ANSI_CONTROL_CODES = {
291
+ 'A': 'CursorUp',
292
+ 'B': 'CursorDown',
293
+ 'C': 'CursorForward',
294
+ 'D': 'CursorBackward',
295
+ 'E': 'CursorNextLine',
296
+ 'F': 'CursorPrevLine',
297
+ 'G': 'CursorHorizontalAbsolute',
298
+ 'H': 'CursorPosition',
299
+ 'J': 'EraseInDisplay',
300
+ 'K': 'EraseInLine',
301
+ 'S': 'ScrollUp',
302
+ 'T': 'ScrollDown',
303
+ 'f': 'CursorPosition',
304
+ 's': 'SaveCursorPosition',
305
+ 'u': 'RestoreCursorPosition',
306
+ }
307
+
308
+ @dataclass
309
+ class ControlToken(ANSIToken):
310
+ value_map: dict = field(repr=False, default_factory=lambda: ANSI_CONTROL_CODES)
311
+ subtype: str = field(init=False)
312
+
313
+ def __post_init__(self):
314
+ self.subtype = self.value[-1]
315
+ self.value_name = self.value_map.get(self.subtype, '')
316
+ super().__post_init__()
317
+
318
+ def repr(self):
319
+ lines = (
320
+ f'\x1b[35m{self.__class__.__name__:<20}\x1b[0m'
321
+ + '{title:<s} {value!r:<6}'.format(title='value:', value=self.value)
322
+ + '{title:<10s} {value!r:<8}'.format(title='value_name:', value=self.value_name)
323
+ + '{title:<10s} {value!r}'.format(title='subtype:', value=self.subtype)
324
+ )
325
+ if self.subtype == 'C':
326
+ lines += ' {title:<20s} {value!r}'.format(title='spaces:', value=' '*int(self.value[:-1]))
327
+ return lines
328
+
329
+ def __str__(self):
330
+ if self.subtype == 'C':
331
+ return ' '*int(self.value[:-1] or '1')
332
+ elif self.subtype == 'H':
333
+ return '\n'
334
+ else:
335
+ return ''
336
+
337
+ class ColourType(Enum):
338
+ FG = 'fg'
339
+ BG = 'bg'
340
+
341
+ @dataclass
342
+ class ColorFGToken(ANSIToken):
343
+ pass
344
+
345
+ @dataclass
346
+ class ColorBGToken(ANSIToken):
347
+ pass
348
+
349
+ @dataclass
350
+ class TrueColorFGToken(ColorFGToken):
351
+ colour_type: ColourType = field(repr=False, default=ColourType.FG)
352
+ def __str__(self):
353
+ r, g, b = self.value.split(',')
354
+ return f'\x1b[38;2;{r};{g};{b}m'
355
+ def repr(self):
356
+ return '\n'.join([
357
+ f'\x1b[94m{self.__class__.__name__:<20}\x1b[0m',
358
+ ' {title:<20s} {value!r}'.format(title='value:', value=self.value),
359
+ ' {title:<20s} {value!r}'.format(title='colour_type:', value=self.colour_type.value),
360
+ ])
361
+
362
+ @dataclass
363
+ class TrueColorBGToken(ColorBGToken):
364
+ colour_type: ColourType = field(repr=False, default=ColourType.BG)
365
+ def __str__(self):
366
+ r, g, b = self.value.split(',')
367
+ return f'\x1b[48;2;{r};{g};{b}m'
368
+ def repr(self):
369
+ return '\n'.join([
370
+ f'\x1b[96m{self.__class__.__name__:<20}\x1b[0m',
371
+ ' {title:<20s} {value!r}'.format(title='value:', value=self.value),
372
+ ' {title:<20s} {value!r}'.format(title='colour_type:', value=self.colour_type.value),
373
+ ])
374
+
375
+ @dataclass
376
+ class Color256FGToken(ColorFGToken):
377
+ colour_type: ColourType = field(repr=False, default=ColourType.FG)
378
+ def __str__(self):
379
+ n = self.value
380
+ return f'\x1b[38;5;{n}m'
381
+ def repr(self):
382
+ return '\n'.join([
383
+ f'\x1b[34m{self.__class__.__name__:<20}\x1b[0m',
384
+ ' {title:<20s} {value!r}'.format(title='value:', value=self.value),
385
+ ' {title:<20s} {value!r}'.format(title='colour_type:', value=self.colour_type.value),
386
+ ])
387
+
388
+ @dataclass
389
+ class Color256BGToken(ColorBGToken):
390
+ colour_type: ColourType = field(repr=False, default=ColourType.BG)
391
+ def __str__(self):
392
+ n = self.value
393
+ return f'\x1b[48;5;{n}m'
394
+ def repr(self):
395
+ return '\n'.join([
396
+ f'\x1b[36m{self.__class__.__name__:<20}\x1b[0m',
397
+ ' {title:<20s} {value!r}'.format(title='value:', value=self.value),
398
+ ' {title:<20s} {value!r}'.format(title='colour_type:', value=self.colour_type.value),
399
+ ])
400
+
401
+ COLOUR_8_FG_VALUES = {
402
+ '30': 'black', '31': 'red', '32': 'green', '33': 'yellow',
403
+ '34': 'blue', '35': 'magenta', '36': 'cyan', '37': 'white',
404
+ }
405
+ COLOUR_8_FG_BRIGHT_VALUES = {
406
+ '90': 'bright_black', '91': 'bright_red', '92': 'bright_green', '93': 'bright_yellow',
407
+ '94': 'bright_blue', '95': 'bright_magenta', '96': 'bright_cyan', '97': 'bright_white',
408
+ }
409
+ COLOUR_8_BG_VALUES = {
410
+ '40': 'black', '41': 'red', '42': 'green', '43': 'yellow',
411
+ '44': 'blue', '45': 'magenta', '46': 'cyan', '47': 'white',
412
+ }
413
+ COLOUR_8_BG_BRIGHT_VALUES = {
414
+ '100': 'bright_black', '101': 'bright_red', '102': 'bright_green', '103': 'bright_yellow',
415
+ '104': 'bright_blue', '105': 'bright_magenta', '106': 'bright_cyan', '107': 'bright_white',
416
+ }
417
+ COLOUR_8_FG_VALUES = COLOUR_8_FG_VALUES | COLOUR_8_FG_BRIGHT_VALUES
418
+ COLOUR_8_BG_VALUES = COLOUR_8_BG_VALUES | COLOUR_8_BG_BRIGHT_VALUES
419
+ COLOUR_8_VALUES = COLOUR_8_FG_VALUES | COLOUR_8_BG_VALUES
420
+
421
+ @dataclass
422
+ class Color8Token(ANSIToken):
423
+ params: list[str] = field(default_factory=list)
424
+ ice_colours: bool = field(repr=False, default=False)
425
+ bright_bg: bool = field(init=False, default=False)
426
+ bright_fg: bool = field(init=False, default=False)
427
+ sgr_tokens: list[SGRToken] = field(init=False, default_factory=list)
428
+ fg_token: Color8FGToken | None = field(init=False, default=None)
429
+ bg_token: Color8BGToken | None = field(init=False, default=None)
430
+ tokens: list[ANSIToken] = field(init=False, default_factory=list)
431
+
432
+ def __post_init__(self):
433
+ for param in self.params:
434
+ if param in SGR_CODES:
435
+ if self.ice_colours and param == '5':
436
+ self.bright_bg = True
437
+ continue
438
+ elif param == '1':
439
+ self.bright_fg = True
440
+ t = SGRToken(value=param)
441
+ self.sgr_tokens.append(t)
442
+ self.tokens.append(t)
443
+ elif param in COLOUR_8_FG_VALUES:
444
+ self.fg_token = Color8FGToken(value=param, bright=self.bright_fg)
445
+ self.tokens.append(self.fg_token)
446
+ elif param in COLOUR_8_BG_VALUES:
447
+ ice_colours = self.ice_colours and self.bright_bg
448
+ self.bg_token = Color8BGToken(value=param, ice_colours=ice_colours)
449
+ self.tokens.append(self.bg_token)
450
+
451
+ def generate_tokens(self, curr_fg: ColorFGToken | None, curr_bg: ColorBGToken | None) -> Iterator[ANSIToken]:
452
+ if self.sgr_tokens:
453
+ if SGRToken(value='0') in self.sgr_tokens:
454
+ curr_fg = Color8FGToken(value='37', bright=self.bright_fg)
455
+ curr_bg = Color8BGToken(value='40', ice_colours=self.bright_bg)
456
+ yield from self.sgr_tokens
457
+ if self.fg_token:
458
+ yield self.fg_token
459
+ else:
460
+ if curr_fg is None:
461
+ yield Color8FGToken(value='37', bright=self.bright_fg)
462
+ elif isinstance(curr_fg, Color8FGToken):
463
+ yield Color8FGToken(value=curr_fg.original_value, bright=self.bright_fg)
464
+
465
+ bright_bg = False
466
+ if self.bg_token and isinstance(self.bg_token, Color8BGToken) and self.bg_token.ice_colours:
467
+ bright_bg = True
468
+ if curr_bg and isinstance(curr_bg, Color8BGToken) and curr_bg.ice_colours:
469
+ bright_bg = True
470
+ if self.bright_bg:
471
+ bright_bg = True
472
+
473
+ if self.bg_token:
474
+ yield Color8BGToken(value=self.bg_token.original_value, ice_colours=bright_bg)
475
+ else:
476
+ if curr_bg is None:
477
+ yield Color8BGToken(value='40', ice_colours=bright_bg)
478
+ elif isinstance(curr_bg, Color8BGToken):
479
+ yield Color8BGToken(value=curr_bg.original_value, ice_colours=bright_bg)
480
+
481
+ def __str__(self):
482
+ return f'\x1b[{self.value}m'
483
+
484
+ def repr(self):
485
+ lines = [
486
+ f'\x1b[93m{self.__class__.__name__:<20}\x1b[0m',
487
+ ' {title:<20s} {value!r}'.format(title='value:', value=self.value),
488
+ ' {title:<20s} {value!r}'.format(title='params:', value=self.params),
489
+ ' {title:<20s} {value!r}'.format(title='ice_colours:', value=self.ice_colours),
490
+ ]
491
+ for t in self.tokens:
492
+ lines.append('\n'.join([' ' + line for line in t.repr().split('\n')]))
493
+ return '\n'.join(lines)
494
+
495
+ @dataclass
496
+ class Color8FGToken(ColorFGToken):
497
+ value_map: dict = field(repr=False, default_factory=lambda: COLOUR_8_FG_VALUES)
498
+ colour_type: ColourType = field(repr=False, default=ColourType.FG)
499
+ bright: bool = False
500
+
501
+ def __post_init__(self):
502
+ super().__post_init__()
503
+ if self.bright:
504
+ base_value = int(self.value)
505
+ if base_value < 90:
506
+ self.value = str(base_value + 60)
507
+
508
+ def repr(self):
509
+ return '\n'.join([
510
+ f'\x1b[96m{self.__class__.__name__:<20}\x1b[0m'
511
+ + '{title:<s} {value!r:<6}'.format(title='value:', value=self.value)
512
+ + '{title:<10s} {value!r:<8}'.format(title='value_name:', value=self.value_name)
513
+ + '{title:<10s} {value!r:<6}'.format(title='original:', value=self.original_value)
514
+ ])
515
+
516
+ def __str__(self):
517
+ return f'\x1b[{self.value}m'
518
+
519
+ @dataclass
520
+ class Color8BGToken(ColorBGToken):
521
+ value_map: dict = field(repr=False, default_factory=lambda: COLOUR_8_BG_VALUES)
522
+ colour_type: ColourType = field(repr=False, default=ColourType.BG)
523
+ ice_colours: bool = field(default=False)
524
+
525
+ def __post_init__(self):
526
+ super().__post_init__()
527
+ self.original_value = self.value
528
+ if self.ice_colours:
529
+ self.value = str(int(self.value) + 60)
530
+
531
+ def repr(self):
532
+ return '\n'.join([
533
+ f'\x1b[94m{self.__class__.__name__:<20}\x1b[0m'
534
+ + '{title:<s} {value!r:<6}'.format(title='value:', value=self.value)
535
+ + '{title:<10s} {value!r:<8}'.format(title='value_name:', value=self.value_name)
536
+ + '{title:<10s} {value!r:<6}'.format(title='original:', value=self.original_value)
537
+ + '{title:<12s} {value!r}'.format(title='ice_colours:', value=self.ice_colours)
538
+ ])
539
+
540
+ def __str__(self):
541
+ return f'\x1b[{self.value}m'
542
+
543
+ SGR_CODES = {
544
+ '0': 'Reset', '1': 'Bold', '2': 'Dim', '3': 'Italic', '4': 'Underline', '5': 'BlinkSlow',
545
+ '6': 'BlinkRapid', '7': 'ReverseVideo', '8': 'Conceal', '9': 'CrossedOut',
546
+ }
547
+
548
+ @dataclass
549
+ class SGRToken(ANSIToken):
550
+ value_map: dict = field(repr=False, default_factory=lambda: SGR_CODES)
551
+ def __str__(self):
552
+ return f'\x1b[{self.value}m'
553
+ def repr(self):
554
+ return '\n'.join([
555
+ f'\x1b[95m{self.__class__.__name__:<20}\x1b[0m'
556
+ + '{title:<s} {value!r:<6}'.format(title='value:', value=self.value)
557
+ + '{title:<10s} {value!r:<8}'.format(title='value_name:', value=self.value_name)
558
+ ])
559
+
560
+ @dataclass
561
+ class NewLineToken(ANSIToken):
562
+ def __str__(self):
563
+ return '\n'
564
+
565
+ def repr(self):
566
+ return '\n'.join([
567
+ f'\x1b[93m{self.__class__.__name__:<20}\x1b[0m'
568
+ + '{title:<s} {value!r}'.format(title='value:', value=self.value),
569
+ ])
570
+
571
+ @dataclass
572
+ class EOFToken(ANSIToken):
573
+ def __str__(self):
574
+ return ''
575
+ def repr(self):
576
+ return '\n'.join([
577
+ f'\x1b[90m{self.__class__.__name__:<20}\x1b[0m'
578
+ + ' {title:<20s} {value!r}'.format(title='value:', value=self.value),
579
+ ])
580
+
581
+ @dataclass
582
+ class UnknownToken(ANSIToken):
583
+ def repr(self):
584
+ return '\n'.join([
585
+ f'\x1b[91m{self.__class__.__name__:<20}\x1b[0m'
586
+ + ' {title:<20s} {value!r}'.format(title='value:', value=self.value),
587
+ ])
588
+
589
+
590
+ @dataclass
591
+ class SauceRecordExtended:
592
+ 'extended sauce record with extra fields for interpreted/expanded comments, font & flag descriptions'
593
+ fpath: str
594
+ encoding: SupportedEncoding
595
+ sauce: SauceRecord
596
+ comments_data: list[str]
597
+ font: dict
598
+ tinfo: dict
599
+ aspect_ratio: dict = field(init=False, repr=False)
600
+ letter_spacing: dict = field(init=False, repr=False)
601
+ flags: dict = field(repr=False, default_factory=dict)
602
+ ice_colours: bool = field(default=False)
603
+
604
+ aspect_ratio_map: ClassVar[dict] = {
605
+ (0, 0): 'Legacy value. No preference.',
606
+ (0, 1): 'Image was created for a legacy device. When displayed on a device with square pixels, either the font or the image needs to be stretched.',
607
+ (1, 0): 'Image was created for a modern device with square pixels. No stretching is desired on a device with square pixels.',
608
+ (1, 1): 'Not currently a valid value.'
609
+ }
610
+ letter_spacing_map: ClassVar[dict] = {
611
+ (0, 0): 'Legacy value. No preference.',
612
+ (0, 1): 'Select 8 pixel font.',
613
+ (1, 0): 'Select 9 pixel font.',
614
+ (1, 1): 'Not currently a valid value.'
615
+ }
616
+ tinfo_names: ClassVar[list[str]] = ['tinfo1', 'tinfo2', 'tinfo3', 'tinfo4']
617
+ font_map: ClassVar[dict] = FONT_DATA
618
+ tinfo_map: ClassVar[dict] = FILE_DATA_TYPES
619
+
620
+ @staticmethod
621
+ def parse_comments(comment_block: str, n_comments: int) -> list[str]:
622
+ dprint(f'Parsing {n_comments} comments from comment block of size {len(comment_block)}')
623
+ if len(comment_block) != (n_comments * 64) + 5:
624
+ raise ValueError(f'Invalid comment block size: expected {n_comments * 64 + 5}, got {len(comment_block)}')
625
+ dprint(f'Comment block raw data: {comment_block=!r}')
626
+
627
+ comments_data = []
628
+ for c in map(''.join, batched(comment_block[5:], 64)):
629
+ comments_data.append(c.rstrip('\x00'))
630
+
631
+ return comments_data
632
+
633
+ @staticmethod
634
+ def parse_flags(raw_n: int) -> dict:
635
+ f = list(map(int, f'{raw_n:08b}'))
636
+ dprint(f'Parsing flags from raw value {raw_n}: bits={f}')
637
+ _bit1, _bit2, _bit3, ar1, ar2, ls1, ls2, b = f
638
+
639
+ return {
640
+ 'aspect_ratio': SauceRecordExtended.aspect_ratio_map.get((ar1, ar2), 'Unknown'),
641
+ 'letter_spacing': SauceRecordExtended.letter_spacing_map.get((ls1, ls2), 'Unknown'),
642
+ 'non_blink_mode': bool(b),
643
+ }
644
+
645
+ @staticmethod
646
+ def parse_font(font_name: str) -> dict:
647
+ dprint(f'Parsing font data for font name: {font_name!r}')
648
+ return SauceRecordExtended.font_map.get(font_name, {})
649
+
650
+ @staticmethod
651
+ def parse_tinfo_field(tinfo_key: str, sauce: SauceRecord) -> dict:
652
+ if sauce.data_type == 5:
653
+ # ('BinaryText', 'Variable'): {'tinfo1': '0', 'tinfo2': '0', 'tinfo3': '0', 'tinfo4': '0' }``
654
+ raise NotImplementedError('SAUCE tinfo parsing for data_type 5 (BinaryText) is not implemented.')
655
+ return {
656
+ 'name': SauceRecordExtended.tinfo_map.get((sauce.data_type, sauce.file_type), {}).get(tinfo_key, '0'),
657
+ 'value': getattr(sauce, tinfo_key),
658
+ }
659
+
660
+ @staticmethod
661
+ def parse_tinfo(sauce: SauceRecord) -> dict:
662
+ info = {}
663
+ for name in SauceRecordExtended.tinfo_names:
664
+ field_info = SauceRecordExtended.parse_tinfo_field(name, sauce)
665
+ if field_info['name'] != '0':
666
+ info[name] = field_info
667
+ return info
668
+
669
+ @staticmethod
670
+ def parse(sauce: SauceRecord, file_data: str, fpath: str, encoding: SupportedEncoding) -> Tuple[SauceRecordExtended, str]:
671
+ flags = SauceRecordExtended.parse_flags(sauce.flags)
672
+ font = SauceRecordExtended.parse_font(sauce.tinfo_s.strip())
673
+ tinfo = SauceRecordExtended.parse_tinfo(sauce)
674
+ ice_colours = flags.get('non_blink_mode', False)
675
+
676
+ kwargs = {
677
+ 'fpath': fpath,
678
+ 'encoding': encoding,
679
+ 'sauce': sauce,
680
+ 'comments_data': [],
681
+ 'flags': flags,
682
+ 'font': font,
683
+ 'tinfo': tinfo,
684
+ 'ice_colours': ice_colours,
685
+ }
686
+ if sauce.comments == 0:
687
+ dprint('No comments present in SAUCE record.')
688
+ return SauceRecordExtended(**kwargs), file_data
689
+
690
+ blockIdx = len(file_data)-(sauce.comments*64)+5
691
+ data, comment_block = file_data[:blockIdx], file_data[blockIdx:]
692
+
693
+ dprint(f'comment block: {comment_block!r}')
694
+
695
+ try:
696
+ comments_data = SauceRecordExtended.parse_comments(comment_block, sauce.comments)
697
+
698
+ return SauceRecordExtended(**(kwargs | {'comments_data': comments_data})), data
699
+ except ValueError as ve:
700
+ dprint(f'Error parsing comments: {ve}')
701
+ return SauceRecordExtended(**kwargs), file_data
702
+
703
+ def asdict(self) -> dict:
704
+ return {
705
+ 'sauce': self.sauce._asdict(),
706
+ 'extended': {
707
+ 'file_name': os.path.basename(self.fpath),
708
+ 'encoding': self.encoding.value,
709
+ 'comments': self.comments_data,
710
+ 'tinfo': self.tinfo,
711
+ 'flags': self.flags,
712
+ 'font': self.font,
713
+ 'ice_colours': self.ice_colours,
714
+ }
715
+ }
716
+
717
+
718
+ class SauceRecord(NamedTuple):
719
+ ID: str = '' # 5b
720
+ version: str = '' # + 2b = 7b
721
+ title: str = '' # + 35b = 42b
722
+ author: str = '' # + 20b = 62b
723
+ group: str = '' # + 20b = 82b
724
+ date: str = '' # + 8b = 90b
725
+ filesize: int = 0 # + 4b = 94b
726
+ data_type: int = 0 # + 1b = 95b
727
+ file_type: int = 0 # + 1b = 96b
728
+ tinfo1: int = 0 # + 2b = 98b
729
+ tinfo2: int = 0 # + 2b = 100b
730
+ tinfo3: int = 0 # + 2b = 102b
731
+ tinfo4: int = 0 # + 2b = 104b
732
+ comments: int = 0 # + 1b = 105b
733
+ flags: int = 0 # + 1b = 106b
734
+ tinfo_s: str = '' # + 22b = 128b
735
+
736
+ @staticmethod
737
+ def offsets():
738
+ return {
739
+ 'ID': (0, 5),
740
+ 'version': (5, 7),
741
+ 'title': (7, 42),
742
+ 'author': (42, 62),
743
+ 'group': (62, 82),
744
+ 'date': (82, 90),
745
+ 'filesize': (90, 94),
746
+ 'data_type': (94, 95),
747
+ 'file_type': (95, 96),
748
+ 'tinfo1': (96, 98),
749
+ 'tinfo2': (98, 100),
750
+ 'tinfo3': (100, 102),
751
+ 'tinfo4': (102, 104),
752
+ 'comments': (104, 105),
753
+ 'flags': (105, 106),
754
+ 'tinfo_s': (106, 128),
755
+ }
756
+
757
+ def is_empty(self) -> bool:
758
+ return self.ID != 'SAUCE'
759
+
760
+ @staticmethod
761
+ def parse_field(key: str, raw_value: bytes, encoding: str):
762
+ if key in {'data_type', 'file_type', 'comments', 'filesize', 'tinfo1', 'tinfo2', 'tinfo3', 'tinfo4', 'flags'}:
763
+ dprint(f'Parsing {key} field with raw value: {raw_value!r}')
764
+ return int.from_bytes(raw_value.rstrip(b'\x00'), byteorder='little', signed=False)
765
+ else:
766
+ dprint(f'Parsing {key} field with raw value: {raw_value.replace(b"\x00", b"").strip()!r}')
767
+ return raw_value.replace(b'\x00', b'').strip().decode(encoding)
768
+
769
+ @staticmethod
770
+ def parse_record(file_path: str, encoding) -> Tuple[SauceRecord, str]:
771
+ with open(file_path, 'rb') as f:
772
+ file_data = f.read()
773
+
774
+ data, sauce_data = file_data[:-128], file_data[-128:]
775
+
776
+ if not (sauce_data and sauce_data.startswith(b'SAUCE')):
777
+ dprint(f'No SAUCE record found: {sauce_data[:5]!r}')
778
+ return SauceRecord(), file_data.decode(encoding)
779
+
780
+ values = {}
781
+ for key, (start, end) in SauceRecord.offsets().items():
782
+ values[key] = SauceRecord.parse_field(key, sauce_data[start:end], encoding)
783
+
784
+ return SauceRecord(*values.values()), data.decode(encoding)
785
+
786
+ class SupportedEncoding(Enum):
787
+ CP437 = 'cp437'
788
+ ISO_8859_1 = 'iso-8859-1'
789
+ ASCII = 'ascii'
790
+ UTF_8 = 'utf-8'
791
+
792
+ @staticmethod
793
+ def from_value(value: str) -> SupportedEncoding:
794
+ for encoding in SupportedEncoding:
795
+ if encoding.value == value:
796
+ return encoding
797
+ raise ValueError(f'Unsupported encoding: {value}')
798
+
799
+ # blockChars = [][]byte{[]byte("░"), []byte("▒"), []byte("█"), []byte("▄"), []byte("▐"), []byte("▀")}
800
+ CP437_BLOCK_MAP = {
801
+ 0xB0: '░',
802
+ 0xB1: '▒',
803
+ 0xDB: '█',
804
+ 0xDC: '▄',
805
+ 0xDD: '▐',
806
+ 0xDF: '▀',
807
+ }
808
+ CP437_BOX_MAP = {
809
+ 0xC0: '└',
810
+ 0xD9: '┘',
811
+ 0xC3: '├',
812
+ 0xC2: '┬',
813
+ 0xC1: '┐',
814
+ 0xB4: '┤',
815
+ }
816
+ ISO_8859_1_BOX_MAP = {
817
+ 0x7c: '|',
818
+ 0x5c: '\\',
819
+ 0x2f: '/',
820
+ 0xaf: '¯',
821
+ 0x5f: '_',
822
+ }
823
+ POPULAR_CHAR_MAP = {
824
+ 'Ñ': {SupportedEncoding.CP437: 0xA5, SupportedEncoding.ISO_8859_1: 0xD1},
825
+ }
826
+ ODD_ONES_OUT = [
827
+ {
828
+ 'points': 1,
829
+ 'points_for': SupportedEncoding.ISO_8859_1,
830
+ 'char': {0xAF: '¯'}, # in CP437 this char is: ['»' hex=0xaf]
831
+ 'regulars': {0x2D: '-', 0x3A: ':', 0x7C: '|'}, # these decode identically in ISO-8859-1 and CP437
832
+ }
833
+ ]
834
+
835
+ def detect_encoding(fpath: str) -> SupportedEncoding:
836
+ 'Detect file encoding based on presence of CP437 block characters.'
837
+ with open(fpath, 'rb') as f:
838
+ data = f.read()
839
+ points = Counter(list(SupportedEncoding.__members__.values()))
840
+
841
+ for char, version in POPULAR_CHAR_MAP.items():
842
+ for encoding, byt in version.items():
843
+ count = data.count(byt)
844
+ if count == 0:
845
+ continue
846
+ dprint(f'> [{encoding.value} +1] Detected popular character in file: {(char, encoding.value, count)}')
847
+ points[encoding] += 1
848
+
849
+ for odd_char in ODD_ONES_OUT:
850
+ for byt, replacement in odd_char['char'].items():
851
+ count = data.count(byt)
852
+ if count == 0:
853
+ break
854
+
855
+ counts = []
856
+ for byt, replacement in odd_char['regulars'].items():
857
+ count = data.count(byt)
858
+ if count == 0:
859
+ continue
860
+ counts.append((replacement, count))
861
+ if len(counts) > 1:
862
+ dprint(f'> [{odd_char["points_for"].value} +{odd_char["points"]}] Detected odd-one-out characters in file: {counts}')
863
+ points[encoding] += odd_char['points']
864
+
865
+ iso_box_counts = Counter()
866
+ for byte in ISO_8859_1_BOX_MAP.keys():
867
+ iso_box_counts[byte] = data.count(byte)
868
+
869
+ counts = Counter()
870
+ for byte in (CP437_BOX_MAP | CP437_BLOCK_MAP).keys():
871
+ count = data.count(byte)
872
+ if count > 0:
873
+ counts[byte] = data.count(byte)
874
+
875
+ if len(counts) > 1:
876
+ if counts.total() < iso_box_counts.total():
877
+ dprint(f'> [ISO +1] Detected more ISO-8859-1 box characters in file than CP437: {iso_box_counts.total()} vs {counts.total()}')
878
+ points[SupportedEncoding.ISO_8859_1] += 1
879
+ else:
880
+ dprint(f'> [CP437 +1] Detected CP437 characters in file: {counts}')
881
+ points[SupportedEncoding.CP437] += 1
882
+
883
+ if DEBUG:
884
+ pp.ppd({'points': {k.name: v for k,v in points.items()}}, indent=2)
885
+ return points.most_common(1)[0][0]
886
+
887
+
888
+ @dataclass
889
+ class Tokeniser:
890
+ fpath: str
891
+ sauce: SauceRecordExtended
892
+ data: str
893
+ encoding: SupportedEncoding = SupportedEncoding.CP437
894
+ tokens: list[ANSIToken] = field(default_factory=list, init=False)
895
+ glyph_offset: int = field(init=False, default=0xE000)
896
+ ice_colours: bool = field(default=False)
897
+ font_name: str = field(default='')
898
+ width: int = field(default=0)
899
+ counts: Counter[tuple[str, str]] = field(default_factory=Counter, init=False)
900
+ _textTokenType: type = field(init=False, repr=False, default=TextToken)
901
+
902
+ def __post_init__(self):
903
+ if self.font_name:
904
+ self.glyph_offset = get_glyph_offset(self.font_name)
905
+ elif 'name' in self.sauce.font:
906
+ self.glyph_offset = get_glyph_offset(self.sauce.font['name'])
907
+
908
+ if not self.width:
909
+ self.width = int(self.sauce.sauce.tinfo1) or 80
910
+
911
+ if not self.ice_colours:
912
+ self.ice_colours = self.sauce.flags['non_blink_mode']
913
+
914
+ if self.encoding == SupportedEncoding.CP437:
915
+ self._textTokenType = CP437Token
916
+ else:
917
+ self._textTokenType = TextToken
918
+
919
+ dprint(f'Using extended sauce: {self.sauce!r}')
920
+ dprint(f'Width: {self.width}, Glyph offset: {hex(self.glyph_offset)}, Ice colours: {self.ice_colours}')
921
+
922
+ def create_tokens(self, code_chars: list[str]) -> list[ANSIToken]:
923
+ 'Create a token from a complete ANSI escape sequence.'
924
+ if len(code_chars) < 3:
925
+ return [UnknownToken(value=''.join(code_chars))]
926
+
927
+ # Handle custom true color format: \x1b[0;R;G;Bt (FG) or \x1b[1;R;G;Bt (BG)
928
+ if code_chars[0:2] == ['\x1b', '['] and code_chars[-1] == 't':
929
+ params = ''.join(code_chars[2:-1]).split(';')
930
+ if len(params) == 4 and params[0] in ['0', '1']:
931
+ mode, r, g, b = params
932
+ rgb_value = f'{int(r)},{int(g)},{int(b)}'
933
+ if mode == '0':
934
+ return [TrueColorBGToken(value=rgb_value)]
935
+ elif mode == '1':
936
+ return [TrueColorFGToken(value=rgb_value)]
937
+
938
+ if code_chars[0:2] == ['\x1b', '['] and code_chars[-1] == 'm':
939
+ params = ''.join(code_chars[2:-1]).split(';')
940
+ return [Color8Token(value=';'.join(params), params=params, ice_colours=self.ice_colours)]
941
+
942
+ elif code_chars[-1] in ANSI_CONTROL_CODES:
943
+ return [ControlToken(value=''.join(code_chars[2:]))]
944
+
945
+ return [UnknownToken(value=''.join(code_chars))]
946
+
947
+ def tokenise(self) -> Iterator[ANSIToken]:
948
+ 'Tokenise ANSI escape sequences and text.'
949
+ isCode, currCode, currText = False, [], []
950
+ for ch in self.data:
951
+ if ch == '\x1b':
952
+ isCode = True
953
+ currCode.append(ch)
954
+ if currText:
955
+ yield self._textTokenType(value=''.join(currText), offset=self.glyph_offset)
956
+ currText = []
957
+
958
+ elif isCode:
959
+ currCode.append(ch)
960
+ if ch.isalpha():
961
+ isCode = False
962
+ yield from self.create_tokens(currCode)
963
+ currCode = []
964
+ else:
965
+ if DEBUG:
966
+ self.counts[(ch, hex(ord(ch)))] += 1
967
+ if ch == '\n':
968
+ if currText:
969
+ yield self._textTokenType(value=''.join(currText), offset=self.glyph_offset)
970
+ currText = []
971
+ yield NewLineToken(value=ch)
972
+ elif ord(ch) in C0_TOKEN_NAMES:
973
+ if currText:
974
+ yield self._textTokenType(value=''.join(currText), offset=self.glyph_offset)
975
+ currText = []
976
+ yield C0Token(value=ch, offset=self.glyph_offset)
977
+ else:
978
+ currText.append(ch)
979
+ if currText:
980
+ yield self._textTokenType(value=''.join(currText), offset=self.glyph_offset)
981
+
982
+ @dataclass
983
+ class Renderer:
984
+ fpath: str
985
+ tokeniser: Tokeniser = field(repr=False)
986
+ _currLine: list[ANSIToken] = field(default_factory=list, repr=False)
987
+ _currLength: int = field(default=0, repr=False)
988
+ _currFG: ColorFGToken | None = field(default=None, repr=False)
989
+ _currBG: ColorBGToken | None = field(default=None, repr=False)
990
+ _currSGR: ANSIToken | None = field(default=None, repr=False)
991
+ width: int = field(init=False)
992
+
993
+ def __post_init__(self):
994
+ self.width = self.tokeniser.width
995
+
996
+ def split_text_token(self, s: str, remainder: int) -> Iterator[TextToken]:
997
+ for chunk in [s[:remainder]] + list(map(''.join, batched(s[remainder:], self.width))):
998
+ yield TextToken(value=chunk)
999
+
1000
+ def _add_current_colors(self):
1001
+ 'Re-add current FG/BG colors to the current line.'
1002
+ if self._currSGR: self._currLine.append(self._currSGR)
1003
+ if self._currFG: self._currLine.append(self._currFG)
1004
+ if self._currBG: self._currLine.append(self._currBG)
1005
+
1006
+ def gen_lines(self) -> Iterator[list[ANSIToken]]:
1007
+ 'Split tokens into lines at width, or each newline char'
1008
+
1009
+ newLine = [NewLineToken(value='\n')]
1010
+
1011
+ for t in self.tokeniser.tokenise():
1012
+ if isinstance(t, ControlToken) and t.subtype in ('H', 's'):
1013
+ newLine = []
1014
+
1015
+ if isinstance(t, Color8Token):
1016
+
1017
+ tokens = list(t.generate_tokens(self._currFG, self._currBG))
1018
+ self._currLine.extend(tokens)
1019
+
1020
+ for tok in tokens:
1021
+ if isinstance(tok, SGRToken):
1022
+ if tok.value_name == 'Reset':
1023
+ self._currFG, self._currBG, self._currSGR = None, None, None
1024
+ else:
1025
+ self._currSGR = tok
1026
+ elif isinstance(tok, Color8FGToken):
1027
+ self._currFG = tok
1028
+ elif isinstance(tok, Color8BGToken):
1029
+ self._currBG = tok
1030
+
1031
+ elif isinstance(t, TrueColorFGToken):
1032
+ self._currLine.append(t)
1033
+ self._currFG = t
1034
+
1035
+ elif isinstance(t, TrueColorBGToken):
1036
+ self._currLine.append(t)
1037
+ self._currBG = t
1038
+
1039
+ elif isinstance(t, (TextToken, CP437Token, ControlToken)):
1040
+ if self._currLength + len(str(t)) == self.width:
1041
+ yield self._currLine + [t, SGRToken(value='0')] + newLine
1042
+ self._currLine, self._currLength = [], 0
1043
+ self._add_current_colors()
1044
+ continue
1045
+
1046
+ if self._currLength + len(str(t)) < self.width:
1047
+ self._currLine.append(t)
1048
+ self._currLength += len(str(t))
1049
+ continue
1050
+
1051
+ for chunk in self.split_text_token(str(t), self.width - self._currLength):
1052
+ self._currLine.append(chunk)
1053
+ self._currLength += len(str(chunk))
1054
+
1055
+ if self._currLength == self.width:
1056
+ yield self._currLine + [SGRToken(value='0')] + newLine
1057
+
1058
+ self._currLine, self._currLength = [], 0
1059
+ self._add_current_colors()
1060
+
1061
+ elif self._currLength > self.width:
1062
+ raise ValueError(f'Logic error in line splitting, {self._currLength} > {self.width}')
1063
+
1064
+ elif isinstance(t, NewLineToken):
1065
+ yield self._currLine + [SGRToken(value='0')] + newLine
1066
+ self._currLine, self._currLength = [], 0
1067
+ self._add_current_colors()
1068
+
1069
+ else:
1070
+ self._currLine.append(t)
1071
+ if isinstance(t, SGRToken):
1072
+ if t.value_name == 'Reset':
1073
+ self._currFG, self._currBG, self._currSGR = None, None, None
1074
+ else:
1075
+ self._currSGR = t
1076
+ if self._currLine:
1077
+ yield self._currLine + [SGRToken(value='0'), EOFToken(value='')]
1078
+
1079
+ def iter_lines(self) -> Iterator[str]:
1080
+ for i, line in enumerate(self.gen_lines()):
1081
+ if DEBUG:
1082
+ print(f'\n\x1b[30;103m[{i}]:\x1b[0m\n{"\n".join([el.repr() for el in line])}')
1083
+ yield ''.join(map(str, line))
1084
+
1085
+ def render(self) -> str:
1086
+ 'Render tokens into a string with proper line wrapping.'
1087
+ return ''.join(list(self.iter_lines()))
1088
+
1089
+ def parse_args() -> dict:
1090
+ parser = ArgumentParser()
1091
+ parser.add_argument('--fpath', '-f', type=str, required=True, help='Path to the ANSI file to render.')
1092
+ parser.add_argument('--encoding', '-e', type=str, help='Specify the file encoding (cp437, iso-8859-1, ascii, utf-8) if the auto-detection was incorrect.')
1093
+ parser.add_argument('--sauce-only', '-s', action='store_true', default=False, help='Only output the SAUCE record information as JSON and exit.')
1094
+ parser.add_argument('--verbose', '-v', action='store_true', default=False, help='Enable verbose debug output.')
1095
+ parser.add_argument('--ice-colours', action='store_true', default=False, help='Force enabling ICE colours (non-blinking background).')
1096
+ parser.add_argument('--font-name', type=str, help='Specify the font name to determine glyph offset (overrides SAUCE font).')
1097
+ parser.add_argument('--width', '-w', type=int, help='Specify the output width (overrides SAUCE tinfo1).')
1098
+ return parser.parse_args().__dict__
1099
+
1100
+ def main():
1101
+ args = parse_args()
1102
+ global DEBUG
1103
+ DEBUG = args.pop('verbose')
1104
+ pp.enabled = not DEBUG
1105
+
1106
+ if args.get('encoding'):
1107
+ encoding = SupportedEncoding.from_value(args['encoding'])
1108
+ else:
1109
+ encoding = detect_encoding(args['fpath'])
1110
+ dprint(f'Detected encoding: {encoding}')
1111
+
1112
+ sauce_only = args.pop('sauce_only')
1113
+ sauce_record, data = SauceRecord.parse_record(args['fpath'], encoding.value)
1114
+ sauce_extended, data = SauceRecordExtended.parse(sauce_record, data, args['fpath'], encoding)
1115
+
1116
+ if sauce_only:
1117
+ pp.enabled = True
1118
+ pp.ppd(sauce_extended.asdict(), indent=2)
1119
+ return
1120
+
1121
+ t = Tokeniser(**(args | {'encoding': encoding, 'sauce': sauce_extended, 'data': data}))
1122
+ r = Renderer(fpath=args['fpath'], tokeniser=t)
1123
+ dprint('\nRendered string:')
1124
+ try:
1125
+ print(r.render(), end='')
1126
+ except BrokenPipeError as e:
1127
+ dprint(f'BrokenPipeError: {e}')
1128
+ sys.exit(1)
1129
+
1130
+ if DEBUG:
1131
+ dprint(pprint.pformat(t.counts.most_common()))
1132
+
1133
+
1134
+ if __name__ == '__main__':
1135
+ main()