PyCriCodecsEx 0.0.5__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
PyCriCodecsEx/utf.py ADDED
@@ -0,0 +1,692 @@
1
+ from typing import BinaryIO, TypeVar, Type, List
2
+ from copy import deepcopy
3
+
4
+ T = TypeVar("T")
5
+ Ty = TypeVar("Ty", bound="UTFViewer")
6
+ from io import BytesIO, FileIO
7
+ from struct import unpack, calcsize, pack
8
+ from PyCriCodecsEx.chunk import *
9
+
10
+ class UTF:
11
+ """Use this class to unpack @UTF table binary payload."""
12
+
13
+ _dictarray: list
14
+
15
+ magic: bytes
16
+ table_size: int
17
+ rows_offset: int
18
+ string_offset: int
19
+ data_offset: int
20
+ num_columns: int
21
+ row_length: int
22
+ num_rows: int
23
+ stream: BinaryIO
24
+ recursive: bool
25
+ encoding : str = 'utf-8'
26
+
27
+ def __init__(self, stream : str | BinaryIO, recursive=False):
28
+ """Unpacks UTF table binary payload
29
+
30
+ Args:
31
+ stream (Union[str | BinaryIO]): The input stream or file path to read the UTF table from.
32
+ recursive (bool): Whether to recursively unpack nested UTF tables.
33
+ """
34
+ if type(stream) == str:
35
+ self.stream = FileIO(stream)
36
+ else:
37
+ self.stream = BytesIO(stream)
38
+ (
39
+ self.magic,
40
+ self.table_size,
41
+ self.rows_offset,
42
+ self.string_offset,
43
+ self.data_offset,
44
+ self.table_name,
45
+ self.num_columns,
46
+ self.row_length,
47
+ self.num_rows,
48
+ ) = UTFChunkHeader.unpack(self.stream.read(UTFChunkHeader.size))
49
+ if self.magic == UTFType.UTF.value:
50
+ self._read_rows_and_columns()
51
+ elif self.magic == UTFType.EUTF.value:
52
+ self.stream.seek(0)
53
+ data = memoryview(bytearray(self.stream.read()))
54
+ m = 0x655F
55
+ t = 0x4115
56
+ for i in range(len(data)):
57
+ data[i] ^= 0xFF & m
58
+ m = (m * t) & 0xFFFFFFFF
59
+ self.stream = BytesIO(bytearray(data))
60
+ (
61
+ self.magic,
62
+ self.table_size,
63
+ self.rows_offset,
64
+ self.string_offset,
65
+ self.data_offset,
66
+ self.table_name,
67
+ self.num_columns,
68
+ self.row_length,
69
+ self.num_rows,
70
+ ) = UTFChunkHeader.unpack(self.stream.read(UTFChunkHeader.size))
71
+ if self.magic != UTFType.UTF.value:
72
+ raise Exception("Decryption error.")
73
+ self._read_rows_and_columns()
74
+ else:
75
+ raise ValueError("UTF chunk is not present.")
76
+ self.recursive = recursive
77
+ if recursive:
78
+ def dfs(payload: list[dict]) -> None:
79
+ for col in range(len(payload)):
80
+ for k, v in payload[col].items():
81
+ typeof, value = v
82
+ if typeof == UTFTypeValues.bytes:
83
+ # XXX: Recursive UTF tables doesn't seem to get encrypted (e.g. CPK, ACB)
84
+ # We can pass addition reconstruction flags alongside table names later on, but this is good enough for now
85
+ if value.startswith(UTFType.UTF.value) or value.startswith(
86
+ UTFType.EUTF.value
87
+ ):
88
+ table = UTF(value, recursive=False)
89
+ payload[col][k] = (table.table_name, table.dictarray)
90
+ dfs(table.dictarray)
91
+
92
+ dfs(self.dictarray)
93
+
94
+ def _read_rows_and_columns(self):
95
+ stream = self.stream.read(self.data_offset - 0x18)
96
+ stream = BytesIO(stream)
97
+ types = [[], [], [], []]
98
+ target_data = []
99
+ target_constant = []
100
+ target_tuple = []
101
+ s_offsets = []
102
+ for i in range(self.num_columns):
103
+ flag = stream.read(1)[0]
104
+ stflag = flag >> 4
105
+ typeflag = flag & 0xF
106
+ if stflag == 0x1:
107
+ offset = int.from_bytes(stream.read(4), "big")
108
+ s_offsets.append(offset)
109
+ target_constant.append(offset)
110
+ types[2].append((">" + self._stringtypes(typeflag), typeflag))
111
+ elif stflag == 0x3:
112
+ offset = int.from_bytes(stream.read(4), "big")
113
+ s_offsets.append(offset)
114
+ target_tuple.append(
115
+ (
116
+ offset,
117
+ unpack(
118
+ ">" + self._stringtypes(typeflag),
119
+ stream.read(calcsize(self._stringtypes(typeflag))),
120
+ ),
121
+ )
122
+ )
123
+ types[1].append((">" + self._stringtypes(typeflag), typeflag))
124
+ elif stflag == 0x5:
125
+ offset = int.from_bytes(stream.read(4), "big")
126
+ s_offsets.append(offset)
127
+ target_data.append(offset)
128
+ types[0].append((">" + self._stringtypes(typeflag), typeflag))
129
+ elif stflag == 0x7: # Exists in old CPK's.
130
+ # target_tuple.append((int.from_bytes(stream.read(4), "big"), int.from_bytes(stream.read(calcsize(self.stringtypes(typeflag))), "big")))
131
+ # types[3].append((">"+self.stringtypes(typeflag), typeflag))
132
+ raise NotImplementedError("Unsupported 0x70 storage flag.")
133
+ else:
134
+ raise Exception("Unknown storage flag.")
135
+
136
+ rows = []
137
+ for j in range(self.num_rows):
138
+ for i in types[0]:
139
+ rows.append(unpack(i[0], stream.read(calcsize(i[0]))))
140
+
141
+ for i in range(4):
142
+ for j in range(len(types[i])):
143
+ types[i][j] = (types[i][j][0][1:], types[i][j][1])
144
+ strings = (stream.read()).split(b"\x00")
145
+ strings_copy = strings[:]
146
+ self._dictarray = []
147
+ self.encoding = "utf-8"
148
+ for i in range(len(strings)):
149
+ try:
150
+ strings_copy[i] = strings[i].decode("utf-8")
151
+ except:
152
+ for x in ["shift-jis", "utf-16"]:
153
+ try:
154
+ strings_copy[i] = strings[i].decode(x)
155
+ self.encoding = x
156
+ # This looks sketchy, but it will always work since @UTF only supports these 3 encodings.
157
+ break
158
+ except:
159
+ continue
160
+ else:
161
+ # Probably useless.
162
+ raise UnicodeDecodeError(
163
+ f"String of unknown encoding: {strings[i]}"
164
+ )
165
+ t_t_dict = dict()
166
+ self.table_name = strings_copy[self._finder(self.table_name, strings)]
167
+ UTFTypeValuesList = list(UTFTypeValues)
168
+ s_orders = [strings_copy[self._finder(i, strings)] for i in s_offsets]
169
+
170
+ def ensure_order(d: dict) -> dict:
171
+ return {k: d[k] for k in s_orders if k in d}
172
+
173
+ for i in range(len(target_constant)):
174
+ if types[2][i][1] not in [0xA, 0xB]:
175
+ val = self._finder(target_constant[i], strings)
176
+ t_t_dict.update(
177
+ {strings_copy[val]: (UTFTypeValuesList[types[2][i][1]], None)}
178
+ )
179
+ elif types[2][i][1] == 0xA:
180
+ val = self._finder(target_constant[i], strings)
181
+ t_t_dict.update({strings_copy[val]: (UTFTypeValues.string, "<NULL>")})
182
+ else:
183
+ # Most likely useless, since the code doesn seem to reach here.
184
+ val = self._finder(target_constant[i], strings)
185
+ t_t_dict.update({strings_copy[val]: (UTFTypeValues.bytes, b"")})
186
+ for i in range(len(target_tuple)):
187
+ if types[1][i % (len(types[1]))][1] not in [0xA, 0xB]:
188
+ t_t_dict.update(
189
+ {
190
+ strings_copy[self._finder(target_tuple[i][0], strings)]: (
191
+ UTFTypeValuesList[types[1][i % len(types[1])][1]],
192
+ target_tuple[i][1][0],
193
+ )
194
+ }
195
+ )
196
+ elif types[1][i % (len(types[1]))][1] == 0xA:
197
+ t_t_dict.update(
198
+ {
199
+ strings_copy[self._finder(target_tuple[i][0], strings)]: (
200
+ UTFTypeValues.string,
201
+ strings_copy[self._finder(target_tuple[i][1][0], strings)],
202
+ )
203
+ }
204
+ )
205
+ else:
206
+ self.stream.seek(self.data_offset + target_tuple[i][1][0] + 0x8, 0)
207
+ bin_val = self.stream.read((target_tuple[i][1][1]))
208
+ t_t_dict.update(
209
+ {
210
+ strings_copy[self._finder(target_tuple[i][0], strings)]: (
211
+ UTFTypeValues.bytes,
212
+ bin_val,
213
+ )
214
+ }
215
+ )
216
+ temp_dict = dict()
217
+ if len(rows) == 0:
218
+ self._dictarray.append(ensure_order(t_t_dict))
219
+ for i in range(len(rows)):
220
+ if types[0][i % (len(types[0]))][1] not in [0xA, 0xB]:
221
+ temp_dict.update(
222
+ {
223
+ strings_copy[
224
+ self._finder(target_data[i % (len(target_data))], strings)
225
+ ]: (
226
+ UTFTypeValuesList[types[0][i % (len(types[0]))][1]],
227
+ rows[i][0],
228
+ )
229
+ }
230
+ )
231
+ elif types[0][i % (len(types[0]))][1] == 0xA:
232
+ temp_dict.update(
233
+ {
234
+ strings_copy[
235
+ self._finder(target_data[i % (len(target_data))], strings)
236
+ ]: (
237
+ UTFTypeValues.string,
238
+ strings_copy[self._finder(rows[i][0], strings)],
239
+ )
240
+ }
241
+ )
242
+ else:
243
+ self.stream.seek(self.data_offset + rows[i][0] + 0x8, 0)
244
+ bin_val = self.stream.read((rows[i][1]))
245
+ temp_dict.update(
246
+ {
247
+ strings_copy[
248
+ self._finder(target_data[i % (len(target_data))], strings)
249
+ ]: (UTFTypeValues.bytes, bin_val)
250
+ }
251
+ )
252
+ if not (i + 1) % (len(types[0])):
253
+ temp_dict.update(t_t_dict)
254
+ self._dictarray.append(ensure_order(temp_dict))
255
+ temp_dict = dict()
256
+
257
+ def _stringtypes(self, type: int) -> str:
258
+ types = "BbHhIiQqfdI"
259
+ if type != 0xB:
260
+ return types[type]
261
+ elif type == 0xB:
262
+ return "II"
263
+ else:
264
+ raise Exception("Unkown data type.")
265
+
266
+ def _finder(self, pointer, strings) -> int:
267
+ sum = 0
268
+ for i in range(len(strings)):
269
+ if sum < pointer:
270
+ sum += len(strings[i]) + 1
271
+ continue
272
+ return i
273
+ else:
274
+ raise Exception("Failed string lookup.")
275
+
276
+ @property
277
+ def table(self) -> dict:
278
+ """Returns a dictionary representation of the UTF table.
279
+
280
+ Effectively, this retrieves a transposed version of the dictarray. Whilst discarding
281
+ type info.
282
+
283
+ This is mostly here for cpk.py compatibility.
284
+ """
285
+ keys = self._dictarray[0].keys()
286
+ return {key: [d[key][1] for d in self._dictarray] for key in keys}
287
+
288
+ @property
289
+ def dictarray(self) -> list[dict]:
290
+ """Returns a list representation of the UTF table. """
291
+ return self._dictarray
292
+
293
+ class UTFBuilder:
294
+ """Use this class to build UTF table binary payloads from a `dictarray`."""
295
+
296
+ encoding: str
297
+ dictarray: list
298
+ strings: bytes
299
+ table_name: str
300
+ binary: bytes
301
+ table: bytearray
302
+ stflag: list
303
+ rows_data: bytearray
304
+ column_data: bytearray
305
+ data_offset: int
306
+
307
+ def __init__(
308
+ self,
309
+ dictarray_src: list[dict],
310
+ encrypt: bool = False,
311
+ encoding: str = "utf-8",
312
+ table_name: str = "PyCriCodecs_table",
313
+ ignore_recursion: bool = False,
314
+ ) -> None:
315
+ """Packs UTF payload back into their binary form
316
+
317
+ Args:
318
+ dictarray_src: list[dict]: A list of dictionaries representing the UTF table.
319
+ encrypt: Whether to encrypt the table (default: False).
320
+ encoding: The character encoding to use (default: "utf-8").
321
+ table_name: The name of the table (default: "PyCriCodecs_table").
322
+ ignore_recursion: Whether to ignore recursion when packing (default: False).
323
+ """
324
+ assert type(dictarray_src) == list, "dictarray must be a list of dictionaries (see UTF.dictarray)."
325
+ dictarray = deepcopy(dictarray_src)
326
+ # Preprocess for nested dictarray types
327
+ def dfs(payload: list[dict], name: str) -> None:
328
+ for dict in range(len(payload)):
329
+ for k, v in payload[dict].items():
330
+ typeof_or_name, value = v
331
+ if type(value) == list:
332
+ assert type(typeof_or_name) == str, "bogus payload data"
333
+ payload[dict][k] = (
334
+ UTFTypeValues.bytes,
335
+ dfs(value, typeof_or_name),
336
+ )
337
+ # ? Could subtables be encrypted at all?
338
+ return UTFBuilder(
339
+ payload, encoding=encoding, table_name=name, ignore_recursion=True
340
+ ).bytes()
341
+
342
+ if not ignore_recursion:
343
+ dfs(dictarray, table_name)
344
+ l = set([len(x) for x in dictarray])
345
+ if len(l) != 1:
346
+ raise ValueError("All dictionaries must be equal in length.")
347
+ matches = [(k, v[0]) for k, v in dictarray[0].items()]
348
+ for i in range(1, len(dictarray)):
349
+ if matches != [(k, v[0]) for k, v in dictarray[i].items()]:
350
+ raise ValueError(
351
+ "Keys and/or value types are not matching across dictionaries."
352
+ )
353
+ self.dictarray = dictarray
354
+ self.encrypt = encrypt
355
+ self.encoding = encoding
356
+ self.table_name = table_name
357
+ self.binary = b""
358
+ self._get_strings()
359
+
360
+ def _write_header(self) -> bytearray:
361
+ self.data_offset = (
362
+ len(self.column_data)
363
+ + len(self.rows_data)
364
+ + len(self.strings)
365
+ + len(self.binary)
366
+ + 0x18
367
+ )
368
+ datalen = self.data_offset
369
+ if self.data_offset % 8 != 0:
370
+ self.data_offset = self.data_offset + (8 - self.data_offset % 8)
371
+ if len(self.binary) == 0:
372
+ binary_offset = self.data_offset
373
+ else:
374
+ binary_offset = datalen - len(self.binary)
375
+ header = UTFChunkHeader.pack(
376
+ b"@UTF", # @UTF
377
+ self.data_offset, # Chunk size.
378
+ len(self.column_data) + 0x18, # Rows offset.
379
+ datalen - len(self.strings) - len(self.binary), # String offset.
380
+ binary_offset, # Binary data offset.
381
+ (
382
+ 0
383
+ if self.strings.startswith(bytes(self.table_name, self.encoding))
384
+ else self.strings.index(
385
+ b"\x00" + bytes(self.table_name, self.encoding) + b"\x00"
386
+ )
387
+ + 1
388
+ ), # Table name pointer.
389
+ len(self.stflag), # Num columns.
390
+ sum(
391
+ [calcsize(self._stringtypes(x[1])) for x in self.stflag if x[0] == 0x50]
392
+ ), # Num rows.
393
+ len(self.dictarray), # Rows length.
394
+ )
395
+ return bytearray(header)
396
+
397
+ def _write_rows(self) -> bytearray:
398
+ rows = bytearray()
399
+ for dict in self.dictarray:
400
+ for data in self.stflag:
401
+ if data[0] == 0x50:
402
+ if data[1] not in [0xA, 0xB]:
403
+ rows += pack(">" + self._stringtypes(data[1]), dict[data[2]][1])
404
+ elif data[1] == 0xA:
405
+ if bytes(dict[data[2]][1], self.encoding) == b"":
406
+ idx = self.strings.index(b"\x00\x00") + 1
407
+ rows += pack(">" + self._stringtypes(data[1]), idx)
408
+ else:
409
+ rows += pack(
410
+ ">" + self._stringtypes(data[1]),
411
+ self.strings.index(
412
+ b"\x00"
413
+ + bytes(dict[data[2]][1], self.encoding)
414
+ + b"\x00"
415
+ )
416
+ + 1,
417
+ )
418
+ else:
419
+ rows += pack(
420
+ ">" + self._stringtypes(data[1]),
421
+ self.binary.index(dict[data[2]][1]),
422
+ len(dict[data[2]][1]),
423
+ )
424
+ return rows
425
+
426
+ def _write_columns(self) -> bytearray:
427
+ columns = bytearray()
428
+ for data in self.stflag:
429
+ columns += int.to_bytes(data[0] | data[1], 1, "big")
430
+ if data[0] in [0x10, 0x50]:
431
+ columns += int.to_bytes(
432
+ self.strings.index(
433
+ b"\x00" + bytes(data[2], self.encoding) + b"\x00"
434
+ )
435
+ + 1,
436
+ 4,
437
+ "big",
438
+ )
439
+ else:
440
+ if data[1] not in [0xA, 0xB]:
441
+ columns += int.to_bytes(
442
+ self.strings.index(
443
+ b"\x00" + bytes(data[2], self.encoding) + b"\x00"
444
+ )
445
+ + 1,
446
+ 4,
447
+ "big",
448
+ ) + int.to_bytes(
449
+ data[3], calcsize(self._stringtypes(data[1])), "big"
450
+ )
451
+ elif data[1] == 0xA:
452
+ columns += int.to_bytes(
453
+ self.strings.index(
454
+ b"\x00" + bytes(data[2], self.encoding) + b"\x00"
455
+ )
456
+ + 1,
457
+ 4,
458
+ "big",
459
+ ) + (
460
+ b"\x00\x00\x00\x00"
461
+ if self.strings.startswith(
462
+ bytes(data[3], self.encoding) + b"\x00"
463
+ )
464
+ else (
465
+ int.to_bytes(
466
+ self.strings.index(
467
+ b"\x00" + bytes(data[3], self.encoding) + b"\x00"
468
+ )
469
+ + 1,
470
+ 4,
471
+ "big",
472
+ )
473
+ )
474
+ )
475
+ else:
476
+ columns += (
477
+ int.to_bytes(
478
+ self.strings.index(
479
+ b"\x00" + bytes(data[2], self.encoding) + b"\x00"
480
+ )
481
+ + 1,
482
+ 4,
483
+ "big",
484
+ )
485
+ + int.to_bytes(self.binary.index(data[3]), 4, "big")
486
+ + int.to_bytes(len(data[3]), 4, "big")
487
+ )
488
+ return columns
489
+
490
+ def _get_stflag(self):
491
+ to_match = [(x, y) for x, y in self.dictarray[0].items()]
492
+ UTFTypeValuesList = list(UTFTypeValues)
493
+ self.stflag = []
494
+ for val in to_match:
495
+ if len(self.dictarray) != 1:
496
+ for dict in self.dictarray:
497
+ if dict[val[0]][1] != val[1][1]:
498
+ self.stflag.append(
499
+ (0x50, UTFTypeValuesList.index(val[1][0]), val[0])
500
+ )
501
+ break
502
+ else:
503
+ if val[1][1] == None:
504
+ self.stflag.append(
505
+ (0x10, UTFTypeValuesList.index(val[1][0]), val[0])
506
+ )
507
+ else:
508
+ self.stflag.append(
509
+ (
510
+ 0x30,
511
+ UTFTypeValuesList.index(val[1][0]),
512
+ val[0],
513
+ val[1][1],
514
+ )
515
+ )
516
+ else:
517
+ # It seems that when there is only one dictionary, there will be no element of type 0x30 flag
518
+ # Otherwise all of them would be either 0x30 or 0x10 flags with no length to the rows.
519
+ if val[1][1] == None or val[1][1] == "<NULL>":
520
+ self.stflag.append(
521
+ (0x10, UTFTypeValuesList.index(val[1][0]), val[0])
522
+ )
523
+ else:
524
+ self.stflag.append(
525
+ (0x50, UTFTypeValuesList.index(val[1][0]), val[0])
526
+ )
527
+
528
+ def _get_strings(self):
529
+ strings = []
530
+ binary = b""
531
+
532
+ for dict in self.dictarray:
533
+ for key, value in dict.items():
534
+ if key not in strings:
535
+ strings.append(key)
536
+ for dict in self.dictarray:
537
+ for key, value in dict.items():
538
+ if type(value[1]) == str and value[1] not in strings:
539
+ strings.append(value[1])
540
+ if (type(value[1]) == bytearray or type(value[1]) == bytes) and value[
541
+ 1
542
+ ] not in binary:
543
+ binary += value[1]
544
+ self.binary = binary
545
+
546
+ strings = [self.table_name] + strings
547
+
548
+ if "<NULL>" in strings:
549
+ strings.pop(strings.index("<NULL>"))
550
+ strings = ["<NULL>"] + strings
551
+
552
+ for i in range(len(strings)):
553
+ val = strings[i].encode(self.encoding)
554
+ if b"\x00" in val:
555
+ raise ValueError(
556
+ f"Encoding of {self.encoding} for '{strings[i]}' results in string with a null byte."
557
+ )
558
+ else:
559
+ strings[i] = val
560
+
561
+ self.strings = b"\x00".join(strings) + b"\x00"
562
+
563
+ def _stringtypes(self, type: int) -> str:
564
+ types = "BbHhIiQqfdI"
565
+ if type != 0xB:
566
+ return types[type]
567
+ elif type == 0xB:
568
+ return "II"
569
+ else:
570
+ raise Exception("Unkown data type.")
571
+
572
+ def bytes(self) -> bytearray:
573
+ """Returns a @UTF bytearray Table from the provided payload dict."""
574
+ self._get_stflag()
575
+ self.column_data = self._write_columns()
576
+ self.rows_data = self._write_rows()
577
+ header_data = self._write_header()
578
+ dataarray = (
579
+ header_data + self.column_data + self.rows_data + self.strings + self.binary
580
+ )
581
+ if len(dataarray) % 8 != 0:
582
+ dataarray = dataarray[:8] + dataarray[8:].ljust(
583
+ self.data_offset, b"\x00"
584
+ ) # Padding.
585
+ if self.encrypt:
586
+ dataarray = memoryview(dataarray)
587
+ m = 0x655F
588
+ t = 0x4115
589
+ for i in range(len(dataarray)):
590
+ dataarray[i] ^= 0xFF & m
591
+ m = (m * t) & 0xFFFFFFFF
592
+ dataarray = bytearray(dataarray)
593
+ return dataarray
594
+
595
+ class UTFViewer:
596
+ """Use this class to create dataclass-like access to `dictarray`s."""
597
+
598
+ _payload: dict
599
+
600
+ def __init__(self, payload):
601
+ """Construct a non-owning read-write, deletable view of a UTF table dictarray.
602
+
603
+ Nested classes are supported.
604
+
605
+ Sorting (using .sort()) is done in-place and affects the original payload.
606
+ """
607
+ assert isinstance(payload, dict), "payload must be a dictionary."
608
+ super().__setattr__("_payload", payload)
609
+
610
+ def __getattr__(self, item):
611
+ annotations = super().__getattribute__("__annotations__")
612
+ # Nested definitions
613
+ if item in annotations:
614
+ sub = annotations[item]
615
+ reduced = getattr(sub, "__args__", [None])[0]
616
+ reduced = reduced or sub
617
+ if issubclass(reduced, UTFViewer):
618
+ typeof_or_name, value = self._payload[item]
619
+ assert (
620
+ type(typeof_or_name) == str and type(value) == list
621
+ ), "payload is not expanded. parse with UTF(..., recursive=True)"
622
+ return self._view_as(value, reduced)
623
+ payload = super().__getattribute__("_payload")
624
+ if item not in payload:
625
+ return super().__getattribute__(item)
626
+ _, value = payload[item]
627
+ return value
628
+
629
+ def __setattr__(self, item, value):
630
+ payload = super().__getattribute__("_payload")
631
+ if item not in payload:
632
+ raise AttributeError(f"{item} not in payload. UTFViewer should not store extra states")
633
+ if isinstance(value, dict) or isinstance(value, list):
634
+ raise AttributeError(f"Dict or list assignment is not allowed as this may potentially change the table layout. Access by elements and use list APIs instead")
635
+ typeof, _ = payload[item]
636
+ payload[item] = (typeof, value)
637
+
638
+ def __dir__(self):
639
+ annotations = super().__getattribute__("__annotations__")
640
+ return list(annotations.keys()) + list(super().__dir__())
641
+
642
+ @staticmethod
643
+ def _view_as(payload: dict, clazz: Type[T]) -> T:
644
+ if not issubclass(clazz, UTFViewer):
645
+ raise TypeError("class must be a subclass of UTFViewer")
646
+ return clazz(payload)
647
+
648
+ class ListView(list):
649
+ _payload : List[dict]
650
+ def __init__(self, clazz : Type[Ty], payload: list[Ty]):
651
+ self._payload = payload
652
+ super().__init__([clazz(item) for item in payload])
653
+
654
+ def pop(self, index = -1):
655
+ self._payload.pop(index)
656
+ return super().pop(index)
657
+
658
+ def append(self, o : "UTFViewer"):
659
+ if len(self):
660
+ assert isinstance(o, UTFViewer) and type(self[0]) == type(o), "all items in the list must be of the same type, and must be an instance of UTFViewer."
661
+ self._payload.append(o._payload)
662
+ return super().append(o)
663
+
664
+ def extend(self, iterable):
665
+ for item in iterable:
666
+ self.append(item)
667
+
668
+ def insert(self, index, o : "UTFViewer"):
669
+ if len(self):
670
+ assert isinstance(o, UTFViewer) and type(self[0]) == type(o), "all items in the list must be of the same type, and must be an instance of UTFViewer."
671
+ self._payload.insert(index, o._payload)
672
+ return super().insert(index, o)
673
+
674
+ def clear(self):
675
+ self._payload.clear()
676
+ return super().clear()
677
+
678
+ def count(self, value):
679
+ raise NotImplementedError("count is not supported on views")
680
+
681
+ def remove(self, value):
682
+ raise NotImplementedError("remove is not supported on views. use pop(index).")
683
+
684
+ def sort(self, key : callable):
685
+ p = sorted([(self[i], i) for i in range(len(self))], key=lambda x: key(x[0]))
686
+ self._payload[:] = [self._payload[i] for x,i in p]
687
+ self[:] = [x for x,i in p]
688
+
689
+ def __new__(cls: Type[Ty], payload: list | dict, **args) -> Ty | List[Ty]:
690
+ if isinstance(payload, list):
691
+ return UTFViewer.ListView(cls, payload)
692
+ return super().__new__(cls)