PyCriCodecsEx 0.0.1__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
PyCriCodecsEx/utf.py ADDED
@@ -0,0 +1,704 @@
1
+ from typing import BinaryIO, TypeVar, Type, List
2
+
3
+ T = TypeVar("T")
4
+ from io import BytesIO, FileIO
5
+ from struct import unpack, calcsize, pack
6
+ from PyCriCodecsEx.chunk import *
7
+
8
+ class UTF:
9
+
10
+ _dictarray: list
11
+
12
+ magic: bytes
13
+ table_size: int
14
+ rows_offset: int
15
+ string_offset: int
16
+ data_offset: int
17
+ num_columns: int
18
+ row_length: int
19
+ num_rows: int
20
+ stream: BinaryIO
21
+ recursive: bool
22
+ encoding : str = 'utf-8'
23
+
24
+ def __init__(self, stream, recursive=False):
25
+ """Unpacks UTF table binary payload
26
+
27
+ Args:
28
+ stream (Union[str, bytes]): The input stream or file path to read the UTF table from.
29
+ recursive (bool): Whether to recursively unpack nested UTF tables.
30
+ """
31
+ if type(stream) == str:
32
+ self.stream = FileIO(stream)
33
+ else:
34
+ self.stream = BytesIO(stream)
35
+ (
36
+ self.magic,
37
+ self.table_size,
38
+ self.rows_offset,
39
+ self.string_offset,
40
+ self.data_offset,
41
+ self.table_name,
42
+ self.num_columns,
43
+ self.row_length,
44
+ self.num_rows,
45
+ ) = UTFChunkHeader.unpack(self.stream.read(UTFChunkHeader.size))
46
+ if self.magic == UTFType.UTF.value:
47
+ self._read_rows_and_columns()
48
+ elif self.magic == UTFType.EUTF.value:
49
+ self.stream.seek(0)
50
+ data = memoryview(bytearray(self.stream.read()))
51
+ m = 0x655F
52
+ t = 0x4115
53
+ for i in range(len(data)):
54
+ data[i] ^= 0xFF & m
55
+ m = (m * t) & 0xFFFFFFFF
56
+ self.stream = BytesIO(bytearray(data))
57
+ (
58
+ self.magic,
59
+ self.table_size,
60
+ self.rows_offset,
61
+ self.string_offset,
62
+ self.data_offset,
63
+ self.table_name,
64
+ self.num_columns,
65
+ self.row_length,
66
+ self.num_rows,
67
+ ) = UTFChunkHeader.unpack(self.stream.read(UTFChunkHeader.size))
68
+ if self.magic != UTFType.UTF.value:
69
+ raise Exception("Decryption error.")
70
+ self._read_rows_and_columns()
71
+ else:
72
+ raise ValueError("UTF chunk is not present.")
73
+ self.recursive = recursive
74
+ if recursive:
75
+ def dfs(payload: list[dict]) -> None:
76
+ for col in range(len(payload)):
77
+ for k, v in payload[col].items():
78
+ typeof, value = v
79
+ if typeof == UTFTypeValues.bytes:
80
+ # XXX: Recursive UTF tables doesn't seem to get encrypted (e.g. CPK, ACB)
81
+ # We can pass addition reconstruction flags alongside table names later on, but this is good enough for now
82
+ if value.startswith(UTFType.UTF.value) or value.startswith(
83
+ UTFType.EUTF.value
84
+ ):
85
+ table = UTF(value, recursive=False)
86
+ payload[col][k] = (table.table_name, table.dictarray)
87
+ dfs(table.dictarray)
88
+
89
+ dfs(self.dictarray)
90
+
91
+ def _read_rows_and_columns(self):
92
+ stream = self.stream.read(self.data_offset - 0x18)
93
+ stream = BytesIO(stream)
94
+ types = [[], [], [], []]
95
+ target_data = []
96
+ target_constant = []
97
+ target_tuple = []
98
+ s_offsets = []
99
+ for i in range(self.num_columns):
100
+ flag = stream.read(1)[0]
101
+ stflag = flag >> 4
102
+ typeflag = flag & 0xF
103
+ if stflag == 0x1:
104
+ offset = int.from_bytes(stream.read(4), "big")
105
+ s_offsets.append(offset)
106
+ target_constant.append(offset)
107
+ types[2].append((">" + self._stringtypes(typeflag), typeflag))
108
+ elif stflag == 0x3:
109
+ offset = int.from_bytes(stream.read(4), "big")
110
+ s_offsets.append(offset)
111
+ target_tuple.append(
112
+ (
113
+ offset,
114
+ unpack(
115
+ ">" + self._stringtypes(typeflag),
116
+ stream.read(calcsize(self._stringtypes(typeflag))),
117
+ ),
118
+ )
119
+ )
120
+ types[1].append((">" + self._stringtypes(typeflag), typeflag))
121
+ elif stflag == 0x5:
122
+ offset = int.from_bytes(stream.read(4), "big")
123
+ s_offsets.append(offset)
124
+ target_data.append(offset)
125
+ types[0].append((">" + self._stringtypes(typeflag), typeflag))
126
+ elif stflag == 0x7: # Exists in old CPK's.
127
+ # target_tuple.append((int.from_bytes(stream.read(4), "big"), int.from_bytes(stream.read(calcsize(self.stringtypes(typeflag))), "big")))
128
+ # types[3].append((">"+self.stringtypes(typeflag), typeflag))
129
+ raise NotImplementedError("Unsupported 0x70 storage flag.")
130
+ else:
131
+ raise Exception("Unknown storage flag.")
132
+
133
+ rows = []
134
+ for j in range(self.num_rows):
135
+ for i in types[0]:
136
+ rows.append(unpack(i[0], stream.read(calcsize(i[0]))))
137
+
138
+ for i in range(4):
139
+ for j in range(len(types[i])):
140
+ types[i][j] = (types[i][j][0][1:], types[i][j][1])
141
+ strings = (stream.read()).split(b"\x00")
142
+ strings_copy = strings[:]
143
+ self._dictarray = []
144
+ self.encoding = "utf-8"
145
+ for i in range(len(strings)):
146
+ try:
147
+ strings_copy[i] = strings[i].decode("utf-8")
148
+ except:
149
+ for x in ["shift-jis", "utf-16"]:
150
+ try:
151
+ strings_copy[i] = strings[i].decode(x)
152
+ self.encoding = x
153
+ # This looks sketchy, but it will always work since @UTF only supports these 3 encodings.
154
+ break
155
+ except:
156
+ continue
157
+ else:
158
+ # Probably useless.
159
+ raise UnicodeDecodeError(
160
+ f"String of unknown encoding: {strings[i]}"
161
+ )
162
+ t_t_dict = dict()
163
+ self.table_name = strings_copy[self._finder(self.table_name, strings)]
164
+ UTFTypeValuesList = list(UTFTypeValues)
165
+ s_orders = [strings_copy[self._finder(i, strings)] for i in s_offsets]
166
+
167
+ def ensure_order(d: dict) -> dict:
168
+ return {k: d[k] for k in s_orders if k in d}
169
+
170
+ for i in range(len(target_constant)):
171
+ if types[2][i][1] not in [0xA, 0xB]:
172
+ val = self._finder(target_constant[i], strings)
173
+ t_t_dict.update(
174
+ {strings_copy[val]: (UTFTypeValuesList[types[2][i][1]], None)}
175
+ )
176
+ elif types[2][i][1] == 0xA:
177
+ val = self._finder(target_constant[i], strings)
178
+ t_t_dict.update({strings_copy[val]: (UTFTypeValues.string, "<NULL>")})
179
+ else:
180
+ # Most likely useless, since the code doesn seem to reach here.
181
+ val = self._finder(target_constant[i], strings)
182
+ t_t_dict.update({strings_copy[val]: (UTFTypeValues.bytes, b"")})
183
+ for i in range(len(target_tuple)):
184
+ if types[1][i % (len(types[1]))][1] not in [0xA, 0xB]:
185
+ t_t_dict.update(
186
+ {
187
+ strings_copy[self._finder(target_tuple[i][0], strings)]: (
188
+ UTFTypeValuesList[types[1][i % len(types[1])][1]],
189
+ target_tuple[i][1][0],
190
+ )
191
+ }
192
+ )
193
+ elif types[1][i % (len(types[1]))][1] == 0xA:
194
+ t_t_dict.update(
195
+ {
196
+ strings_copy[self._finder(target_tuple[i][0], strings)]: (
197
+ UTFTypeValues.string,
198
+ strings_copy[self._finder(target_tuple[i][1][0], strings)],
199
+ )
200
+ }
201
+ )
202
+ else:
203
+ self.stream.seek(self.data_offset + target_tuple[i][1][0] + 0x8, 0)
204
+ bin_val = self.stream.read((target_tuple[i][1][1]))
205
+ t_t_dict.update(
206
+ {
207
+ strings_copy[self._finder(target_tuple[i][0], strings)]: (
208
+ UTFTypeValues.bytes,
209
+ bin_val,
210
+ )
211
+ }
212
+ )
213
+ temp_dict = dict()
214
+ if len(rows) == 0:
215
+ self._dictarray.append(ensure_order(t_t_dict))
216
+ for i in range(len(rows)):
217
+ if types[0][i % (len(types[0]))][1] not in [0xA, 0xB]:
218
+ temp_dict.update(
219
+ {
220
+ strings_copy[
221
+ self._finder(target_data[i % (len(target_data))], strings)
222
+ ]: (
223
+ UTFTypeValuesList[types[0][i % (len(types[0]))][1]],
224
+ rows[i][0],
225
+ )
226
+ }
227
+ )
228
+ elif types[0][i % (len(types[0]))][1] == 0xA:
229
+ temp_dict.update(
230
+ {
231
+ strings_copy[
232
+ self._finder(target_data[i % (len(target_data))], strings)
233
+ ]: (
234
+ UTFTypeValues.string,
235
+ strings_copy[self._finder(rows[i][0], strings)],
236
+ )
237
+ }
238
+ )
239
+ else:
240
+ self.stream.seek(self.data_offset + rows[i][0] + 0x8, 0)
241
+ bin_val = self.stream.read((rows[i][1]))
242
+ temp_dict.update(
243
+ {
244
+ strings_copy[
245
+ self._finder(target_data[i % (len(target_data))], strings)
246
+ ]: (UTFTypeValues.bytes, bin_val)
247
+ }
248
+ )
249
+ if not (i + 1) % (len(types[0])):
250
+ temp_dict.update(t_t_dict)
251
+ self._dictarray.append(ensure_order(temp_dict))
252
+ temp_dict = dict()
253
+
254
+ def _stringtypes(self, type: int) -> str:
255
+ types = "BbHhIiQqfdI"
256
+ if type != 0xB:
257
+ return types[type]
258
+ elif type == 0xB:
259
+ return "II"
260
+ else:
261
+ raise Exception("Unkown data type.")
262
+
263
+ def _finder(self, pointer, strings) -> int:
264
+ sum = 0
265
+ for i in range(len(strings)):
266
+ if sum < pointer:
267
+ sum += len(strings[i]) + 1
268
+ continue
269
+ return i
270
+ else:
271
+ raise Exception("Failed string lookup.")
272
+
273
+ @property
274
+ def table(self) -> dict:
275
+ """Returns a dictionary representation of the UTF table.
276
+
277
+ Effectively, this retrieves a transposed version of the dictarray. Whilst discarding
278
+ type info.
279
+
280
+ This is mostly here for cpk.py compatibility.
281
+ """
282
+ keys = self._dictarray[0].keys()
283
+ return {key: [d[key][1] for d in self._dictarray] for key in keys}
284
+
285
+ @property
286
+ def dictarray(self) -> list[dict]:
287
+ """Returns a list representation of the UTF table. """
288
+ return self._dictarray
289
+
290
+ class UTFBuilder:
291
+
292
+ encoding: str
293
+ dictarray: list
294
+ strings: bytes
295
+ table_name: str
296
+ binary: bytes
297
+ table: bytearray
298
+ stflag: list
299
+ rows_data: bytearray
300
+ column_data: bytearray
301
+ data_offset: int
302
+
303
+ def __init__(
304
+ self,
305
+ dictarray: list[dict],
306
+ encrypt: bool = False,
307
+ encoding: str = "utf-8",
308
+ table_name: str = "PyCriCodecs_table",
309
+ ignore_recursion: bool = False,
310
+ ) -> None:
311
+ """Packs UTF payload back into their binary form
312
+
313
+ Args:
314
+ dictarray: A list of dictionaries representing the UTF table.
315
+ encrypt: Whether to encrypt the table (default: False).
316
+ encoding: The character encoding to use (default: "utf-8").
317
+ table_name: The name of the table (default: "PyCriCodecs_table").
318
+ ignore_recursion: Whether to ignore recursion when packing (default: False).
319
+ """
320
+ assert type(dictarray) == list, "dictarray must be a list of dictionaries (see UTF.dictarray)."
321
+
322
+ # Preprocess for nested dictarray types
323
+ def dfs(payload: list[dict], name: str) -> None:
324
+ for dict in range(len(payload)):
325
+ for k, v in payload[dict].items():
326
+ typeof_or_name, value = v
327
+ if type(value) == list:
328
+ assert type(typeof_or_name) == str, "bogus payload data"
329
+ payload[dict][k] = (
330
+ UTFTypeValues.bytes,
331
+ dfs(value, typeof_or_name),
332
+ )
333
+ # ? Could subtables be encrypted at all?
334
+ return UTFBuilder(
335
+ payload, encoding=encoding, table_name=name, ignore_recursion=True
336
+ ).bytes()
337
+
338
+ if not ignore_recursion:
339
+ dfs(dictarray, table_name)
340
+ l = set([len(x) for x in dictarray])
341
+ if len(l) != 1:
342
+ raise ValueError("All dictionaries must be equal in length.")
343
+ matches = [(k, v[0]) for k, v in dictarray[0].items()]
344
+ for i in range(1, len(dictarray)):
345
+ if matches != [(k, v[0]) for k, v in dictarray[i].items()]:
346
+ raise ValueError(
347
+ "Keys and/or value types are not matching across dictionaries."
348
+ )
349
+ self.dictarray = dictarray
350
+ self.encrypt = encrypt
351
+ self.encoding = encoding
352
+ self.table_name = table_name
353
+ self.binary = b""
354
+ self._get_strings()
355
+
356
+ def _write_header(self) -> bytearray:
357
+ self.data_offset = (
358
+ len(self.column_data)
359
+ + len(self.rows_data)
360
+ + len(self.strings)
361
+ + len(self.binary)
362
+ + 0x18
363
+ )
364
+ datalen = self.data_offset
365
+ if self.data_offset % 8 != 0:
366
+ self.data_offset = self.data_offset + (8 - self.data_offset % 8)
367
+ if len(self.binary) == 0:
368
+ binary_offset = self.data_offset
369
+ else:
370
+ binary_offset = datalen - len(self.binary)
371
+ header = UTFChunkHeader.pack(
372
+ b"@UTF", # @UTF
373
+ self.data_offset, # Chunk size.
374
+ len(self.column_data) + 0x18, # Rows offset.
375
+ datalen - len(self.strings) - len(self.binary), # String offset.
376
+ binary_offset, # Binary data offset.
377
+ (
378
+ 0
379
+ if self.strings.startswith(bytes(self.table_name, self.encoding))
380
+ else self.strings.index(
381
+ b"\x00" + bytes(self.table_name, self.encoding) + b"\x00"
382
+ )
383
+ + 1
384
+ ), # Table name pointer.
385
+ len(self.stflag), # Num columns.
386
+ sum(
387
+ [calcsize(self._stringtypes(x[1])) for x in self.stflag if x[0] == 0x50]
388
+ ), # Num rows.
389
+ len(self.dictarray), # Rows length.
390
+ )
391
+ return bytearray(header)
392
+
393
+ def _write_rows(self) -> bytearray:
394
+ rows = bytearray()
395
+ for dict in self.dictarray:
396
+ for data in self.stflag:
397
+ if data[0] == 0x50:
398
+ if data[1] not in [0xA, 0xB]:
399
+ rows += pack(">" + self._stringtypes(data[1]), dict[data[2]][1])
400
+ elif data[1] == 0xA:
401
+ if bytes(dict[data[2]][1], self.encoding) == b"":
402
+ idx = self.strings.index(b"\x00\x00") + 1
403
+ rows += pack(">" + self._stringtypes(data[1]), idx)
404
+ else:
405
+ rows += pack(
406
+ ">" + self._stringtypes(data[1]),
407
+ self.strings.index(
408
+ b"\x00"
409
+ + bytes(dict[data[2]][1], self.encoding)
410
+ + b"\x00"
411
+ )
412
+ + 1,
413
+ )
414
+ else:
415
+ rows += pack(
416
+ ">" + self._stringtypes(data[1]),
417
+ self.binary.index(dict[data[2]][1]),
418
+ len(dict[data[2]][1]),
419
+ )
420
+ return rows
421
+
422
+ def _write_columns(self) -> bytearray:
423
+ columns = bytearray()
424
+ for data in self.stflag:
425
+ columns += int.to_bytes(data[0] | data[1], 1, "big")
426
+ if data[0] in [0x10, 0x50]:
427
+ columns += int.to_bytes(
428
+ self.strings.index(
429
+ b"\x00" + bytes(data[2], self.encoding) + b"\x00"
430
+ )
431
+ + 1,
432
+ 4,
433
+ "big",
434
+ )
435
+ else:
436
+ if data[1] not in [0xA, 0xB]:
437
+ columns += int.to_bytes(
438
+ self.strings.index(
439
+ b"\x00" + bytes(data[2], self.encoding) + b"\x00"
440
+ )
441
+ + 1,
442
+ 4,
443
+ "big",
444
+ ) + int.to_bytes(
445
+ data[3], calcsize(self._stringtypes(data[1])), "big"
446
+ )
447
+ elif data[1] == 0xA:
448
+ columns += int.to_bytes(
449
+ self.strings.index(
450
+ b"\x00" + bytes(data[2], self.encoding) + b"\x00"
451
+ )
452
+ + 1,
453
+ 4,
454
+ "big",
455
+ ) + (
456
+ b"\x00\x00\x00\x00"
457
+ if self.strings.startswith(
458
+ bytes(data[3], self.encoding) + b"\x00"
459
+ )
460
+ else (
461
+ int.to_bytes(
462
+ self.strings.index(
463
+ b"\x00" + bytes(data[3], self.encoding) + b"\x00"
464
+ )
465
+ + 1,
466
+ 4,
467
+ "big",
468
+ )
469
+ )
470
+ )
471
+ else:
472
+ columns += (
473
+ int.to_bytes(
474
+ self.strings.index(
475
+ b"\x00" + bytes(data[2], self.encoding) + b"\x00"
476
+ )
477
+ + 1,
478
+ 4,
479
+ "big",
480
+ )
481
+ + int.to_bytes(self.binary.index(data[3]), 4, "big")
482
+ + int.to_bytes(len(data[3]), 4, "big")
483
+ )
484
+ return columns
485
+
486
+ def _get_stflag(self):
487
+ to_match = [(x, y) for x, y in self.dictarray[0].items()]
488
+ UTFTypeValuesList = list(UTFTypeValues)
489
+ self.stflag = []
490
+ for val in to_match:
491
+ if len(self.dictarray) != 1:
492
+ for dict in self.dictarray:
493
+ if dict[val[0]][1] != val[1][1]:
494
+ self.stflag.append(
495
+ (0x50, UTFTypeValuesList.index(val[1][0]), val[0])
496
+ )
497
+ break
498
+ else:
499
+ if val[1][1] == None:
500
+ self.stflag.append(
501
+ (0x10, UTFTypeValuesList.index(val[1][0]), val[0])
502
+ )
503
+ else:
504
+ self.stflag.append(
505
+ (
506
+ 0x30,
507
+ UTFTypeValuesList.index(val[1][0]),
508
+ val[0],
509
+ val[1][1],
510
+ )
511
+ )
512
+ else:
513
+ # It seems that when there is only one dictionary, there will be no element of type 0x30 flag
514
+ # Otherwise all of them would be either 0x30 or 0x10 flags with no length to the rows.
515
+ if val[1][1] == None or val[1][1] == "<NULL>":
516
+ self.stflag.append(
517
+ (0x10, UTFTypeValuesList.index(val[1][0]), val[0])
518
+ )
519
+ else:
520
+ self.stflag.append(
521
+ (0x50, UTFTypeValuesList.index(val[1][0]), val[0])
522
+ )
523
+
524
+ def _get_strings(self):
525
+ strings = []
526
+ binary = b""
527
+
528
+ for dict in self.dictarray:
529
+ for key, value in dict.items():
530
+ if key not in strings:
531
+ strings.append(key)
532
+ for dict in self.dictarray:
533
+ for key, value in dict.items():
534
+ if type(value[1]) == str and value[1] not in strings:
535
+ strings.append(value[1])
536
+ if (type(value[1]) == bytearray or type(value[1]) == bytes) and value[
537
+ 1
538
+ ] not in binary:
539
+ binary += value[1]
540
+ self.binary = binary
541
+
542
+ strings = [self.table_name] + strings
543
+
544
+ if "<NULL>" in strings:
545
+ strings.pop(strings.index("<NULL>"))
546
+ strings = ["<NULL>"] + strings
547
+
548
+ for i in range(len(strings)):
549
+ val = strings[i].encode(self.encoding)
550
+ if b"\x00" in val:
551
+ raise ValueError(
552
+ f"Encoding of {self.encoding} for '{strings[i]}' results in string with a null byte."
553
+ )
554
+ else:
555
+ strings[i] = val
556
+
557
+ self.strings = b"\x00".join(strings) + b"\x00"
558
+
559
+ def _stringtypes(self, type: int) -> str:
560
+ types = "BbHhIiQqfdI"
561
+ if type != 0xB:
562
+ return types[type]
563
+ elif type == 0xB:
564
+ return "II"
565
+ else:
566
+ raise Exception("Unkown data type.")
567
+
568
+ def bytes(self) -> bytearray:
569
+ """Returns a @UTF bytearray Table from the provided payload dict."""
570
+ self._get_stflag()
571
+ self.column_data = self._write_columns()
572
+ self.rows_data = self._write_rows()
573
+ header_data = self._write_header()
574
+ dataarray = (
575
+ header_data + self.column_data + self.rows_data + self.strings + self.binary
576
+ )
577
+ if len(dataarray) % 8 != 0:
578
+ dataarray = dataarray[:8] + dataarray[8:].ljust(
579
+ self.data_offset, b"\x00"
580
+ ) # Padding.
581
+ if self.encrypt:
582
+ dataarray = memoryview(dataarray)
583
+ m = 0x655F
584
+ t = 0x4115
585
+ for i in range(len(dataarray)):
586
+ dataarray[i] ^= 0xFF & m
587
+ m = (m * t) & 0xFFFFFFFF
588
+ dataarray = bytearray(dataarray)
589
+ return dataarray
590
+
591
+ class UTFViewer:
592
+ _payload: dict
593
+
594
+ def __init__(self, payload):
595
+ """Construct a non-owning read-write, deletable view of a UTF table dictarray.
596
+ Nested classes are supported.
597
+ Sorting (using .sort()) is done in-place and affects the original payload.
598
+
599
+ Example:
600
+ ```python
601
+ class CueNameTable(UTFViewer):
602
+ CueName : str
603
+ CueIndex : int
604
+ class ACBTable(UTFViewer):
605
+ CueNameTable : List[CueNameTable]
606
+ Awb : AWB
607
+ src = ACB(ACB_sample)
608
+ payload = ACBTable(src.payload)
609
+ >>> Referencing items through Python is allowed
610
+ name = payload.CueNameTable
611
+ >>> Lists can be indexed
612
+ name_str = name[0].CueName
613
+ >>> Deleting items from lists is also allowed
614
+ src.view.CueNameTable.pop(1)
615
+ src.view.CueTable.pop(1)
616
+ >>> The changes will be reflected in the original UTF payload
617
+
618
+ See __new__ for the actual constructor.
619
+ ```
620
+ """
621
+ assert isinstance(payload, dict), "Payload must be a dictionary."
622
+ super().__setattr__("_payload", payload)
623
+
624
+ def __getattr__(self, item):
625
+ annotations = super().__getattribute__("__annotations__")
626
+ # Nested definitions
627
+ if item in annotations:
628
+ sub = annotations[item]
629
+ reduced = getattr(sub, "__args__", [None])[0]
630
+ reduced = reduced or sub
631
+ if issubclass(reduced, UTFViewer):
632
+ typeof_or_name, value = self._payload[item]
633
+ assert (
634
+ type(typeof_or_name) == str and type(value) == list
635
+ ), "payload is not expanded. parse with UTF(..., recursive=True)"
636
+ return self._view_as(value, reduced)
637
+ payload = super().__getattribute__("_payload")
638
+ if item not in payload:
639
+ return super().__getattribute__(item)
640
+ _, value = payload[item]
641
+ return value
642
+
643
+ def __setattr__(self, item, value):
644
+ payload = super().__getattribute__("_payload")
645
+ if item not in payload:
646
+ raise AttributeError(f"{item} not in payload")
647
+ typeof, _ = payload[item]
648
+ payload[item] = (typeof, value)
649
+
650
+ def __dir__(self):
651
+ annotations = super().__getattribute__("__annotations__")
652
+ return list(annotations.keys()) + list(super().__dir__())
653
+
654
+ @staticmethod
655
+ def _view_as(payload: dict, clazz: Type[T]) -> T:
656
+ if not issubclass(clazz, UTFViewer):
657
+ raise TypeError("class must be a subclass of UTFViewer")
658
+ return clazz(payload)
659
+
660
+ class ListView(list):
661
+ _payload : List[dict]
662
+ def __init__(self, payload: list[T]):
663
+ self._payload = payload
664
+ super().__init__([UTFViewer(item) for item in payload])
665
+
666
+ def pop(self, index = -1):
667
+ self._payload.pop(index)
668
+ return super().pop(index)
669
+
670
+ def append(self, o : "UTFViewer"):
671
+ if len(self):
672
+ assert type(self[0]) == type(o), "all items in the list must be of the same type."
673
+ self._payload.append(o._payload)
674
+ return super().append(o)
675
+
676
+ def extend(self, iterable):
677
+ for item in iterable:
678
+ self.append(item)
679
+
680
+ def insert(self, index, o : "UTFViewer"):
681
+ if len(self):
682
+ assert type(self[0]) == type(o), "all items in the list must be of the same type."
683
+ self._payload.insert(index, o._payload)
684
+ return super().insert(index, o)
685
+
686
+ def clear(self):
687
+ self._payload.clear()
688
+ return super().clear()
689
+
690
+ def count(self, value):
691
+ raise NotImplementedError("count is not supported on views")
692
+
693
+ def remove(self, value):
694
+ raise NotImplementedError("remove is not supported on views. use pop(index).")
695
+
696
+ def sort(self, key : callable):
697
+ p = sorted([(self[i], i) for i in range(len(self))], key=lambda x: key(x[0]))
698
+ self._payload[:] = [self._payload[i] for x,i in p]
699
+ self[:] = [x for x,i in p]
700
+
701
+ def __new__(cls: Type[T], payload: list | dict, **args) -> T | List[T]:
702
+ if isinstance(payload, list):
703
+ return UTFViewer.ListView(payload)
704
+ return super().__new__(cls)