PyCriCodecsEx 0.0.5__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CriCodecsEx.cpython-312-x86_64-linux-gnu.so +0 -0
- PyCriCodecsEx/__init__.py +1 -0
- PyCriCodecsEx/acb.py +306 -0
- PyCriCodecsEx/adx.py +158 -0
- PyCriCodecsEx/awb.py +165 -0
- PyCriCodecsEx/chunk.py +92 -0
- PyCriCodecsEx/cpk.py +743 -0
- PyCriCodecsEx/hca.py +454 -0
- PyCriCodecsEx/usm.py +1001 -0
- PyCriCodecsEx/utf.py +692 -0
- pycricodecsex-0.0.5.dist-info/METADATA +35 -0
- pycricodecsex-0.0.5.dist-info/RECORD +15 -0
- pycricodecsex-0.0.5.dist-info/WHEEL +6 -0
- pycricodecsex-0.0.5.dist-info/licenses/LICENSE +21 -0
- pycricodecsex-0.0.5.dist-info/top_level.txt +2 -0
PyCriCodecsEx/utf.py
ADDED
|
@@ -0,0 +1,692 @@
|
|
|
1
|
+
from typing import BinaryIO, TypeVar, Type, List
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
|
|
4
|
+
T = TypeVar("T")
|
|
5
|
+
Ty = TypeVar("Ty", bound="UTFViewer")
|
|
6
|
+
from io import BytesIO, FileIO
|
|
7
|
+
from struct import unpack, calcsize, pack
|
|
8
|
+
from PyCriCodecsEx.chunk import *
|
|
9
|
+
|
|
10
|
+
class UTF:
|
|
11
|
+
"""Use this class to unpack @UTF table binary payload."""
|
|
12
|
+
|
|
13
|
+
_dictarray: list
|
|
14
|
+
|
|
15
|
+
magic: bytes
|
|
16
|
+
table_size: int
|
|
17
|
+
rows_offset: int
|
|
18
|
+
string_offset: int
|
|
19
|
+
data_offset: int
|
|
20
|
+
num_columns: int
|
|
21
|
+
row_length: int
|
|
22
|
+
num_rows: int
|
|
23
|
+
stream: BinaryIO
|
|
24
|
+
recursive: bool
|
|
25
|
+
encoding : str = 'utf-8'
|
|
26
|
+
|
|
27
|
+
def __init__(self, stream : str | BinaryIO, recursive=False):
|
|
28
|
+
"""Unpacks UTF table binary payload
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
stream (Union[str | BinaryIO]): The input stream or file path to read the UTF table from.
|
|
32
|
+
recursive (bool): Whether to recursively unpack nested UTF tables.
|
|
33
|
+
"""
|
|
34
|
+
if type(stream) == str:
|
|
35
|
+
self.stream = FileIO(stream)
|
|
36
|
+
else:
|
|
37
|
+
self.stream = BytesIO(stream)
|
|
38
|
+
(
|
|
39
|
+
self.magic,
|
|
40
|
+
self.table_size,
|
|
41
|
+
self.rows_offset,
|
|
42
|
+
self.string_offset,
|
|
43
|
+
self.data_offset,
|
|
44
|
+
self.table_name,
|
|
45
|
+
self.num_columns,
|
|
46
|
+
self.row_length,
|
|
47
|
+
self.num_rows,
|
|
48
|
+
) = UTFChunkHeader.unpack(self.stream.read(UTFChunkHeader.size))
|
|
49
|
+
if self.magic == UTFType.UTF.value:
|
|
50
|
+
self._read_rows_and_columns()
|
|
51
|
+
elif self.magic == UTFType.EUTF.value:
|
|
52
|
+
self.stream.seek(0)
|
|
53
|
+
data = memoryview(bytearray(self.stream.read()))
|
|
54
|
+
m = 0x655F
|
|
55
|
+
t = 0x4115
|
|
56
|
+
for i in range(len(data)):
|
|
57
|
+
data[i] ^= 0xFF & m
|
|
58
|
+
m = (m * t) & 0xFFFFFFFF
|
|
59
|
+
self.stream = BytesIO(bytearray(data))
|
|
60
|
+
(
|
|
61
|
+
self.magic,
|
|
62
|
+
self.table_size,
|
|
63
|
+
self.rows_offset,
|
|
64
|
+
self.string_offset,
|
|
65
|
+
self.data_offset,
|
|
66
|
+
self.table_name,
|
|
67
|
+
self.num_columns,
|
|
68
|
+
self.row_length,
|
|
69
|
+
self.num_rows,
|
|
70
|
+
) = UTFChunkHeader.unpack(self.stream.read(UTFChunkHeader.size))
|
|
71
|
+
if self.magic != UTFType.UTF.value:
|
|
72
|
+
raise Exception("Decryption error.")
|
|
73
|
+
self._read_rows_and_columns()
|
|
74
|
+
else:
|
|
75
|
+
raise ValueError("UTF chunk is not present.")
|
|
76
|
+
self.recursive = recursive
|
|
77
|
+
if recursive:
|
|
78
|
+
def dfs(payload: list[dict]) -> None:
|
|
79
|
+
for col in range(len(payload)):
|
|
80
|
+
for k, v in payload[col].items():
|
|
81
|
+
typeof, value = v
|
|
82
|
+
if typeof == UTFTypeValues.bytes:
|
|
83
|
+
# XXX: Recursive UTF tables doesn't seem to get encrypted (e.g. CPK, ACB)
|
|
84
|
+
# We can pass addition reconstruction flags alongside table names later on, but this is good enough for now
|
|
85
|
+
if value.startswith(UTFType.UTF.value) or value.startswith(
|
|
86
|
+
UTFType.EUTF.value
|
|
87
|
+
):
|
|
88
|
+
table = UTF(value, recursive=False)
|
|
89
|
+
payload[col][k] = (table.table_name, table.dictarray)
|
|
90
|
+
dfs(table.dictarray)
|
|
91
|
+
|
|
92
|
+
dfs(self.dictarray)
|
|
93
|
+
|
|
94
|
+
def _read_rows_and_columns(self):
|
|
95
|
+
stream = self.stream.read(self.data_offset - 0x18)
|
|
96
|
+
stream = BytesIO(stream)
|
|
97
|
+
types = [[], [], [], []]
|
|
98
|
+
target_data = []
|
|
99
|
+
target_constant = []
|
|
100
|
+
target_tuple = []
|
|
101
|
+
s_offsets = []
|
|
102
|
+
for i in range(self.num_columns):
|
|
103
|
+
flag = stream.read(1)[0]
|
|
104
|
+
stflag = flag >> 4
|
|
105
|
+
typeflag = flag & 0xF
|
|
106
|
+
if stflag == 0x1:
|
|
107
|
+
offset = int.from_bytes(stream.read(4), "big")
|
|
108
|
+
s_offsets.append(offset)
|
|
109
|
+
target_constant.append(offset)
|
|
110
|
+
types[2].append((">" + self._stringtypes(typeflag), typeflag))
|
|
111
|
+
elif stflag == 0x3:
|
|
112
|
+
offset = int.from_bytes(stream.read(4), "big")
|
|
113
|
+
s_offsets.append(offset)
|
|
114
|
+
target_tuple.append(
|
|
115
|
+
(
|
|
116
|
+
offset,
|
|
117
|
+
unpack(
|
|
118
|
+
">" + self._stringtypes(typeflag),
|
|
119
|
+
stream.read(calcsize(self._stringtypes(typeflag))),
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
types[1].append((">" + self._stringtypes(typeflag), typeflag))
|
|
124
|
+
elif stflag == 0x5:
|
|
125
|
+
offset = int.from_bytes(stream.read(4), "big")
|
|
126
|
+
s_offsets.append(offset)
|
|
127
|
+
target_data.append(offset)
|
|
128
|
+
types[0].append((">" + self._stringtypes(typeflag), typeflag))
|
|
129
|
+
elif stflag == 0x7: # Exists in old CPK's.
|
|
130
|
+
# target_tuple.append((int.from_bytes(stream.read(4), "big"), int.from_bytes(stream.read(calcsize(self.stringtypes(typeflag))), "big")))
|
|
131
|
+
# types[3].append((">"+self.stringtypes(typeflag), typeflag))
|
|
132
|
+
raise NotImplementedError("Unsupported 0x70 storage flag.")
|
|
133
|
+
else:
|
|
134
|
+
raise Exception("Unknown storage flag.")
|
|
135
|
+
|
|
136
|
+
rows = []
|
|
137
|
+
for j in range(self.num_rows):
|
|
138
|
+
for i in types[0]:
|
|
139
|
+
rows.append(unpack(i[0], stream.read(calcsize(i[0]))))
|
|
140
|
+
|
|
141
|
+
for i in range(4):
|
|
142
|
+
for j in range(len(types[i])):
|
|
143
|
+
types[i][j] = (types[i][j][0][1:], types[i][j][1])
|
|
144
|
+
strings = (stream.read()).split(b"\x00")
|
|
145
|
+
strings_copy = strings[:]
|
|
146
|
+
self._dictarray = []
|
|
147
|
+
self.encoding = "utf-8"
|
|
148
|
+
for i in range(len(strings)):
|
|
149
|
+
try:
|
|
150
|
+
strings_copy[i] = strings[i].decode("utf-8")
|
|
151
|
+
except:
|
|
152
|
+
for x in ["shift-jis", "utf-16"]:
|
|
153
|
+
try:
|
|
154
|
+
strings_copy[i] = strings[i].decode(x)
|
|
155
|
+
self.encoding = x
|
|
156
|
+
# This looks sketchy, but it will always work since @UTF only supports these 3 encodings.
|
|
157
|
+
break
|
|
158
|
+
except:
|
|
159
|
+
continue
|
|
160
|
+
else:
|
|
161
|
+
# Probably useless.
|
|
162
|
+
raise UnicodeDecodeError(
|
|
163
|
+
f"String of unknown encoding: {strings[i]}"
|
|
164
|
+
)
|
|
165
|
+
t_t_dict = dict()
|
|
166
|
+
self.table_name = strings_copy[self._finder(self.table_name, strings)]
|
|
167
|
+
UTFTypeValuesList = list(UTFTypeValues)
|
|
168
|
+
s_orders = [strings_copy[self._finder(i, strings)] for i in s_offsets]
|
|
169
|
+
|
|
170
|
+
def ensure_order(d: dict) -> dict:
|
|
171
|
+
return {k: d[k] for k in s_orders if k in d}
|
|
172
|
+
|
|
173
|
+
for i in range(len(target_constant)):
|
|
174
|
+
if types[2][i][1] not in [0xA, 0xB]:
|
|
175
|
+
val = self._finder(target_constant[i], strings)
|
|
176
|
+
t_t_dict.update(
|
|
177
|
+
{strings_copy[val]: (UTFTypeValuesList[types[2][i][1]], None)}
|
|
178
|
+
)
|
|
179
|
+
elif types[2][i][1] == 0xA:
|
|
180
|
+
val = self._finder(target_constant[i], strings)
|
|
181
|
+
t_t_dict.update({strings_copy[val]: (UTFTypeValues.string, "<NULL>")})
|
|
182
|
+
else:
|
|
183
|
+
# Most likely useless, since the code doesn seem to reach here.
|
|
184
|
+
val = self._finder(target_constant[i], strings)
|
|
185
|
+
t_t_dict.update({strings_copy[val]: (UTFTypeValues.bytes, b"")})
|
|
186
|
+
for i in range(len(target_tuple)):
|
|
187
|
+
if types[1][i % (len(types[1]))][1] not in [0xA, 0xB]:
|
|
188
|
+
t_t_dict.update(
|
|
189
|
+
{
|
|
190
|
+
strings_copy[self._finder(target_tuple[i][0], strings)]: (
|
|
191
|
+
UTFTypeValuesList[types[1][i % len(types[1])][1]],
|
|
192
|
+
target_tuple[i][1][0],
|
|
193
|
+
)
|
|
194
|
+
}
|
|
195
|
+
)
|
|
196
|
+
elif types[1][i % (len(types[1]))][1] == 0xA:
|
|
197
|
+
t_t_dict.update(
|
|
198
|
+
{
|
|
199
|
+
strings_copy[self._finder(target_tuple[i][0], strings)]: (
|
|
200
|
+
UTFTypeValues.string,
|
|
201
|
+
strings_copy[self._finder(target_tuple[i][1][0], strings)],
|
|
202
|
+
)
|
|
203
|
+
}
|
|
204
|
+
)
|
|
205
|
+
else:
|
|
206
|
+
self.stream.seek(self.data_offset + target_tuple[i][1][0] + 0x8, 0)
|
|
207
|
+
bin_val = self.stream.read((target_tuple[i][1][1]))
|
|
208
|
+
t_t_dict.update(
|
|
209
|
+
{
|
|
210
|
+
strings_copy[self._finder(target_tuple[i][0], strings)]: (
|
|
211
|
+
UTFTypeValues.bytes,
|
|
212
|
+
bin_val,
|
|
213
|
+
)
|
|
214
|
+
}
|
|
215
|
+
)
|
|
216
|
+
temp_dict = dict()
|
|
217
|
+
if len(rows) == 0:
|
|
218
|
+
self._dictarray.append(ensure_order(t_t_dict))
|
|
219
|
+
for i in range(len(rows)):
|
|
220
|
+
if types[0][i % (len(types[0]))][1] not in [0xA, 0xB]:
|
|
221
|
+
temp_dict.update(
|
|
222
|
+
{
|
|
223
|
+
strings_copy[
|
|
224
|
+
self._finder(target_data[i % (len(target_data))], strings)
|
|
225
|
+
]: (
|
|
226
|
+
UTFTypeValuesList[types[0][i % (len(types[0]))][1]],
|
|
227
|
+
rows[i][0],
|
|
228
|
+
)
|
|
229
|
+
}
|
|
230
|
+
)
|
|
231
|
+
elif types[0][i % (len(types[0]))][1] == 0xA:
|
|
232
|
+
temp_dict.update(
|
|
233
|
+
{
|
|
234
|
+
strings_copy[
|
|
235
|
+
self._finder(target_data[i % (len(target_data))], strings)
|
|
236
|
+
]: (
|
|
237
|
+
UTFTypeValues.string,
|
|
238
|
+
strings_copy[self._finder(rows[i][0], strings)],
|
|
239
|
+
)
|
|
240
|
+
}
|
|
241
|
+
)
|
|
242
|
+
else:
|
|
243
|
+
self.stream.seek(self.data_offset + rows[i][0] + 0x8, 0)
|
|
244
|
+
bin_val = self.stream.read((rows[i][1]))
|
|
245
|
+
temp_dict.update(
|
|
246
|
+
{
|
|
247
|
+
strings_copy[
|
|
248
|
+
self._finder(target_data[i % (len(target_data))], strings)
|
|
249
|
+
]: (UTFTypeValues.bytes, bin_val)
|
|
250
|
+
}
|
|
251
|
+
)
|
|
252
|
+
if not (i + 1) % (len(types[0])):
|
|
253
|
+
temp_dict.update(t_t_dict)
|
|
254
|
+
self._dictarray.append(ensure_order(temp_dict))
|
|
255
|
+
temp_dict = dict()
|
|
256
|
+
|
|
257
|
+
def _stringtypes(self, type: int) -> str:
|
|
258
|
+
types = "BbHhIiQqfdI"
|
|
259
|
+
if type != 0xB:
|
|
260
|
+
return types[type]
|
|
261
|
+
elif type == 0xB:
|
|
262
|
+
return "II"
|
|
263
|
+
else:
|
|
264
|
+
raise Exception("Unkown data type.")
|
|
265
|
+
|
|
266
|
+
def _finder(self, pointer, strings) -> int:
|
|
267
|
+
sum = 0
|
|
268
|
+
for i in range(len(strings)):
|
|
269
|
+
if sum < pointer:
|
|
270
|
+
sum += len(strings[i]) + 1
|
|
271
|
+
continue
|
|
272
|
+
return i
|
|
273
|
+
else:
|
|
274
|
+
raise Exception("Failed string lookup.")
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def table(self) -> dict:
|
|
278
|
+
"""Returns a dictionary representation of the UTF table.
|
|
279
|
+
|
|
280
|
+
Effectively, this retrieves a transposed version of the dictarray. Whilst discarding
|
|
281
|
+
type info.
|
|
282
|
+
|
|
283
|
+
This is mostly here for cpk.py compatibility.
|
|
284
|
+
"""
|
|
285
|
+
keys = self._dictarray[0].keys()
|
|
286
|
+
return {key: [d[key][1] for d in self._dictarray] for key in keys}
|
|
287
|
+
|
|
288
|
+
@property
|
|
289
|
+
def dictarray(self) -> list[dict]:
|
|
290
|
+
"""Returns a list representation of the UTF table. """
|
|
291
|
+
return self._dictarray
|
|
292
|
+
|
|
293
|
+
class UTFBuilder:
|
|
294
|
+
"""Use this class to build UTF table binary payloads from a `dictarray`."""
|
|
295
|
+
|
|
296
|
+
encoding: str
|
|
297
|
+
dictarray: list
|
|
298
|
+
strings: bytes
|
|
299
|
+
table_name: str
|
|
300
|
+
binary: bytes
|
|
301
|
+
table: bytearray
|
|
302
|
+
stflag: list
|
|
303
|
+
rows_data: bytearray
|
|
304
|
+
column_data: bytearray
|
|
305
|
+
data_offset: int
|
|
306
|
+
|
|
307
|
+
def __init__(
|
|
308
|
+
self,
|
|
309
|
+
dictarray_src: list[dict],
|
|
310
|
+
encrypt: bool = False,
|
|
311
|
+
encoding: str = "utf-8",
|
|
312
|
+
table_name: str = "PyCriCodecs_table",
|
|
313
|
+
ignore_recursion: bool = False,
|
|
314
|
+
) -> None:
|
|
315
|
+
"""Packs UTF payload back into their binary form
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
dictarray_src: list[dict]: A list of dictionaries representing the UTF table.
|
|
319
|
+
encrypt: Whether to encrypt the table (default: False).
|
|
320
|
+
encoding: The character encoding to use (default: "utf-8").
|
|
321
|
+
table_name: The name of the table (default: "PyCriCodecs_table").
|
|
322
|
+
ignore_recursion: Whether to ignore recursion when packing (default: False).
|
|
323
|
+
"""
|
|
324
|
+
assert type(dictarray_src) == list, "dictarray must be a list of dictionaries (see UTF.dictarray)."
|
|
325
|
+
dictarray = deepcopy(dictarray_src)
|
|
326
|
+
# Preprocess for nested dictarray types
|
|
327
|
+
def dfs(payload: list[dict], name: str) -> None:
|
|
328
|
+
for dict in range(len(payload)):
|
|
329
|
+
for k, v in payload[dict].items():
|
|
330
|
+
typeof_or_name, value = v
|
|
331
|
+
if type(value) == list:
|
|
332
|
+
assert type(typeof_or_name) == str, "bogus payload data"
|
|
333
|
+
payload[dict][k] = (
|
|
334
|
+
UTFTypeValues.bytes,
|
|
335
|
+
dfs(value, typeof_or_name),
|
|
336
|
+
)
|
|
337
|
+
# ? Could subtables be encrypted at all?
|
|
338
|
+
return UTFBuilder(
|
|
339
|
+
payload, encoding=encoding, table_name=name, ignore_recursion=True
|
|
340
|
+
).bytes()
|
|
341
|
+
|
|
342
|
+
if not ignore_recursion:
|
|
343
|
+
dfs(dictarray, table_name)
|
|
344
|
+
l = set([len(x) for x in dictarray])
|
|
345
|
+
if len(l) != 1:
|
|
346
|
+
raise ValueError("All dictionaries must be equal in length.")
|
|
347
|
+
matches = [(k, v[0]) for k, v in dictarray[0].items()]
|
|
348
|
+
for i in range(1, len(dictarray)):
|
|
349
|
+
if matches != [(k, v[0]) for k, v in dictarray[i].items()]:
|
|
350
|
+
raise ValueError(
|
|
351
|
+
"Keys and/or value types are not matching across dictionaries."
|
|
352
|
+
)
|
|
353
|
+
self.dictarray = dictarray
|
|
354
|
+
self.encrypt = encrypt
|
|
355
|
+
self.encoding = encoding
|
|
356
|
+
self.table_name = table_name
|
|
357
|
+
self.binary = b""
|
|
358
|
+
self._get_strings()
|
|
359
|
+
|
|
360
|
+
def _write_header(self) -> bytearray:
|
|
361
|
+
self.data_offset = (
|
|
362
|
+
len(self.column_data)
|
|
363
|
+
+ len(self.rows_data)
|
|
364
|
+
+ len(self.strings)
|
|
365
|
+
+ len(self.binary)
|
|
366
|
+
+ 0x18
|
|
367
|
+
)
|
|
368
|
+
datalen = self.data_offset
|
|
369
|
+
if self.data_offset % 8 != 0:
|
|
370
|
+
self.data_offset = self.data_offset + (8 - self.data_offset % 8)
|
|
371
|
+
if len(self.binary) == 0:
|
|
372
|
+
binary_offset = self.data_offset
|
|
373
|
+
else:
|
|
374
|
+
binary_offset = datalen - len(self.binary)
|
|
375
|
+
header = UTFChunkHeader.pack(
|
|
376
|
+
b"@UTF", # @UTF
|
|
377
|
+
self.data_offset, # Chunk size.
|
|
378
|
+
len(self.column_data) + 0x18, # Rows offset.
|
|
379
|
+
datalen - len(self.strings) - len(self.binary), # String offset.
|
|
380
|
+
binary_offset, # Binary data offset.
|
|
381
|
+
(
|
|
382
|
+
0
|
|
383
|
+
if self.strings.startswith(bytes(self.table_name, self.encoding))
|
|
384
|
+
else self.strings.index(
|
|
385
|
+
b"\x00" + bytes(self.table_name, self.encoding) + b"\x00"
|
|
386
|
+
)
|
|
387
|
+
+ 1
|
|
388
|
+
), # Table name pointer.
|
|
389
|
+
len(self.stflag), # Num columns.
|
|
390
|
+
sum(
|
|
391
|
+
[calcsize(self._stringtypes(x[1])) for x in self.stflag if x[0] == 0x50]
|
|
392
|
+
), # Num rows.
|
|
393
|
+
len(self.dictarray), # Rows length.
|
|
394
|
+
)
|
|
395
|
+
return bytearray(header)
|
|
396
|
+
|
|
397
|
+
def _write_rows(self) -> bytearray:
|
|
398
|
+
rows = bytearray()
|
|
399
|
+
for dict in self.dictarray:
|
|
400
|
+
for data in self.stflag:
|
|
401
|
+
if data[0] == 0x50:
|
|
402
|
+
if data[1] not in [0xA, 0xB]:
|
|
403
|
+
rows += pack(">" + self._stringtypes(data[1]), dict[data[2]][1])
|
|
404
|
+
elif data[1] == 0xA:
|
|
405
|
+
if bytes(dict[data[2]][1], self.encoding) == b"":
|
|
406
|
+
idx = self.strings.index(b"\x00\x00") + 1
|
|
407
|
+
rows += pack(">" + self._stringtypes(data[1]), idx)
|
|
408
|
+
else:
|
|
409
|
+
rows += pack(
|
|
410
|
+
">" + self._stringtypes(data[1]),
|
|
411
|
+
self.strings.index(
|
|
412
|
+
b"\x00"
|
|
413
|
+
+ bytes(dict[data[2]][1], self.encoding)
|
|
414
|
+
+ b"\x00"
|
|
415
|
+
)
|
|
416
|
+
+ 1,
|
|
417
|
+
)
|
|
418
|
+
else:
|
|
419
|
+
rows += pack(
|
|
420
|
+
">" + self._stringtypes(data[1]),
|
|
421
|
+
self.binary.index(dict[data[2]][1]),
|
|
422
|
+
len(dict[data[2]][1]),
|
|
423
|
+
)
|
|
424
|
+
return rows
|
|
425
|
+
|
|
426
|
+
def _write_columns(self) -> bytearray:
|
|
427
|
+
columns = bytearray()
|
|
428
|
+
for data in self.stflag:
|
|
429
|
+
columns += int.to_bytes(data[0] | data[1], 1, "big")
|
|
430
|
+
if data[0] in [0x10, 0x50]:
|
|
431
|
+
columns += int.to_bytes(
|
|
432
|
+
self.strings.index(
|
|
433
|
+
b"\x00" + bytes(data[2], self.encoding) + b"\x00"
|
|
434
|
+
)
|
|
435
|
+
+ 1,
|
|
436
|
+
4,
|
|
437
|
+
"big",
|
|
438
|
+
)
|
|
439
|
+
else:
|
|
440
|
+
if data[1] not in [0xA, 0xB]:
|
|
441
|
+
columns += int.to_bytes(
|
|
442
|
+
self.strings.index(
|
|
443
|
+
b"\x00" + bytes(data[2], self.encoding) + b"\x00"
|
|
444
|
+
)
|
|
445
|
+
+ 1,
|
|
446
|
+
4,
|
|
447
|
+
"big",
|
|
448
|
+
) + int.to_bytes(
|
|
449
|
+
data[3], calcsize(self._stringtypes(data[1])), "big"
|
|
450
|
+
)
|
|
451
|
+
elif data[1] == 0xA:
|
|
452
|
+
columns += int.to_bytes(
|
|
453
|
+
self.strings.index(
|
|
454
|
+
b"\x00" + bytes(data[2], self.encoding) + b"\x00"
|
|
455
|
+
)
|
|
456
|
+
+ 1,
|
|
457
|
+
4,
|
|
458
|
+
"big",
|
|
459
|
+
) + (
|
|
460
|
+
b"\x00\x00\x00\x00"
|
|
461
|
+
if self.strings.startswith(
|
|
462
|
+
bytes(data[3], self.encoding) + b"\x00"
|
|
463
|
+
)
|
|
464
|
+
else (
|
|
465
|
+
int.to_bytes(
|
|
466
|
+
self.strings.index(
|
|
467
|
+
b"\x00" + bytes(data[3], self.encoding) + b"\x00"
|
|
468
|
+
)
|
|
469
|
+
+ 1,
|
|
470
|
+
4,
|
|
471
|
+
"big",
|
|
472
|
+
)
|
|
473
|
+
)
|
|
474
|
+
)
|
|
475
|
+
else:
|
|
476
|
+
columns += (
|
|
477
|
+
int.to_bytes(
|
|
478
|
+
self.strings.index(
|
|
479
|
+
b"\x00" + bytes(data[2], self.encoding) + b"\x00"
|
|
480
|
+
)
|
|
481
|
+
+ 1,
|
|
482
|
+
4,
|
|
483
|
+
"big",
|
|
484
|
+
)
|
|
485
|
+
+ int.to_bytes(self.binary.index(data[3]), 4, "big")
|
|
486
|
+
+ int.to_bytes(len(data[3]), 4, "big")
|
|
487
|
+
)
|
|
488
|
+
return columns
|
|
489
|
+
|
|
490
|
+
def _get_stflag(self):
|
|
491
|
+
to_match = [(x, y) for x, y in self.dictarray[0].items()]
|
|
492
|
+
UTFTypeValuesList = list(UTFTypeValues)
|
|
493
|
+
self.stflag = []
|
|
494
|
+
for val in to_match:
|
|
495
|
+
if len(self.dictarray) != 1:
|
|
496
|
+
for dict in self.dictarray:
|
|
497
|
+
if dict[val[0]][1] != val[1][1]:
|
|
498
|
+
self.stflag.append(
|
|
499
|
+
(0x50, UTFTypeValuesList.index(val[1][0]), val[0])
|
|
500
|
+
)
|
|
501
|
+
break
|
|
502
|
+
else:
|
|
503
|
+
if val[1][1] == None:
|
|
504
|
+
self.stflag.append(
|
|
505
|
+
(0x10, UTFTypeValuesList.index(val[1][0]), val[0])
|
|
506
|
+
)
|
|
507
|
+
else:
|
|
508
|
+
self.stflag.append(
|
|
509
|
+
(
|
|
510
|
+
0x30,
|
|
511
|
+
UTFTypeValuesList.index(val[1][0]),
|
|
512
|
+
val[0],
|
|
513
|
+
val[1][1],
|
|
514
|
+
)
|
|
515
|
+
)
|
|
516
|
+
else:
|
|
517
|
+
# It seems that when there is only one dictionary, there will be no element of type 0x30 flag
|
|
518
|
+
# Otherwise all of them would be either 0x30 or 0x10 flags with no length to the rows.
|
|
519
|
+
if val[1][1] == None or val[1][1] == "<NULL>":
|
|
520
|
+
self.stflag.append(
|
|
521
|
+
(0x10, UTFTypeValuesList.index(val[1][0]), val[0])
|
|
522
|
+
)
|
|
523
|
+
else:
|
|
524
|
+
self.stflag.append(
|
|
525
|
+
(0x50, UTFTypeValuesList.index(val[1][0]), val[0])
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
def _get_strings(self):
|
|
529
|
+
strings = []
|
|
530
|
+
binary = b""
|
|
531
|
+
|
|
532
|
+
for dict in self.dictarray:
|
|
533
|
+
for key, value in dict.items():
|
|
534
|
+
if key not in strings:
|
|
535
|
+
strings.append(key)
|
|
536
|
+
for dict in self.dictarray:
|
|
537
|
+
for key, value in dict.items():
|
|
538
|
+
if type(value[1]) == str and value[1] not in strings:
|
|
539
|
+
strings.append(value[1])
|
|
540
|
+
if (type(value[1]) == bytearray or type(value[1]) == bytes) and value[
|
|
541
|
+
1
|
|
542
|
+
] not in binary:
|
|
543
|
+
binary += value[1]
|
|
544
|
+
self.binary = binary
|
|
545
|
+
|
|
546
|
+
strings = [self.table_name] + strings
|
|
547
|
+
|
|
548
|
+
if "<NULL>" in strings:
|
|
549
|
+
strings.pop(strings.index("<NULL>"))
|
|
550
|
+
strings = ["<NULL>"] + strings
|
|
551
|
+
|
|
552
|
+
for i in range(len(strings)):
|
|
553
|
+
val = strings[i].encode(self.encoding)
|
|
554
|
+
if b"\x00" in val:
|
|
555
|
+
raise ValueError(
|
|
556
|
+
f"Encoding of {self.encoding} for '{strings[i]}' results in string with a null byte."
|
|
557
|
+
)
|
|
558
|
+
else:
|
|
559
|
+
strings[i] = val
|
|
560
|
+
|
|
561
|
+
self.strings = b"\x00".join(strings) + b"\x00"
|
|
562
|
+
|
|
563
|
+
def _stringtypes(self, type: int) -> str:
|
|
564
|
+
types = "BbHhIiQqfdI"
|
|
565
|
+
if type != 0xB:
|
|
566
|
+
return types[type]
|
|
567
|
+
elif type == 0xB:
|
|
568
|
+
return "II"
|
|
569
|
+
else:
|
|
570
|
+
raise Exception("Unkown data type.")
|
|
571
|
+
|
|
572
|
+
def bytes(self) -> bytearray:
|
|
573
|
+
"""Returns a @UTF bytearray Table from the provided payload dict."""
|
|
574
|
+
self._get_stflag()
|
|
575
|
+
self.column_data = self._write_columns()
|
|
576
|
+
self.rows_data = self._write_rows()
|
|
577
|
+
header_data = self._write_header()
|
|
578
|
+
dataarray = (
|
|
579
|
+
header_data + self.column_data + self.rows_data + self.strings + self.binary
|
|
580
|
+
)
|
|
581
|
+
if len(dataarray) % 8 != 0:
|
|
582
|
+
dataarray = dataarray[:8] + dataarray[8:].ljust(
|
|
583
|
+
self.data_offset, b"\x00"
|
|
584
|
+
) # Padding.
|
|
585
|
+
if self.encrypt:
|
|
586
|
+
dataarray = memoryview(dataarray)
|
|
587
|
+
m = 0x655F
|
|
588
|
+
t = 0x4115
|
|
589
|
+
for i in range(len(dataarray)):
|
|
590
|
+
dataarray[i] ^= 0xFF & m
|
|
591
|
+
m = (m * t) & 0xFFFFFFFF
|
|
592
|
+
dataarray = bytearray(dataarray)
|
|
593
|
+
return dataarray
|
|
594
|
+
|
|
595
|
+
class UTFViewer:
|
|
596
|
+
"""Use this class to create dataclass-like access to `dictarray`s."""
|
|
597
|
+
|
|
598
|
+
_payload: dict
|
|
599
|
+
|
|
600
|
+
def __init__(self, payload):
|
|
601
|
+
"""Construct a non-owning read-write, deletable view of a UTF table dictarray.
|
|
602
|
+
|
|
603
|
+
Nested classes are supported.
|
|
604
|
+
|
|
605
|
+
Sorting (using .sort()) is done in-place and affects the original payload.
|
|
606
|
+
"""
|
|
607
|
+
assert isinstance(payload, dict), "payload must be a dictionary."
|
|
608
|
+
super().__setattr__("_payload", payload)
|
|
609
|
+
|
|
610
|
+
def __getattr__(self, item):
|
|
611
|
+
annotations = super().__getattribute__("__annotations__")
|
|
612
|
+
# Nested definitions
|
|
613
|
+
if item in annotations:
|
|
614
|
+
sub = annotations[item]
|
|
615
|
+
reduced = getattr(sub, "__args__", [None])[0]
|
|
616
|
+
reduced = reduced or sub
|
|
617
|
+
if issubclass(reduced, UTFViewer):
|
|
618
|
+
typeof_or_name, value = self._payload[item]
|
|
619
|
+
assert (
|
|
620
|
+
type(typeof_or_name) == str and type(value) == list
|
|
621
|
+
), "payload is not expanded. parse with UTF(..., recursive=True)"
|
|
622
|
+
return self._view_as(value, reduced)
|
|
623
|
+
payload = super().__getattribute__("_payload")
|
|
624
|
+
if item not in payload:
|
|
625
|
+
return super().__getattribute__(item)
|
|
626
|
+
_, value = payload[item]
|
|
627
|
+
return value
|
|
628
|
+
|
|
629
|
+
def __setattr__(self, item, value):
|
|
630
|
+
payload = super().__getattribute__("_payload")
|
|
631
|
+
if item not in payload:
|
|
632
|
+
raise AttributeError(f"{item} not in payload. UTFViewer should not store extra states")
|
|
633
|
+
if isinstance(value, dict) or isinstance(value, list):
|
|
634
|
+
raise AttributeError(f"Dict or list assignment is not allowed as this may potentially change the table layout. Access by elements and use list APIs instead")
|
|
635
|
+
typeof, _ = payload[item]
|
|
636
|
+
payload[item] = (typeof, value)
|
|
637
|
+
|
|
638
|
+
def __dir__(self):
|
|
639
|
+
annotations = super().__getattribute__("__annotations__")
|
|
640
|
+
return list(annotations.keys()) + list(super().__dir__())
|
|
641
|
+
|
|
642
|
+
@staticmethod
|
|
643
|
+
def _view_as(payload: dict, clazz: Type[T]) -> T:
|
|
644
|
+
if not issubclass(clazz, UTFViewer):
|
|
645
|
+
raise TypeError("class must be a subclass of UTFViewer")
|
|
646
|
+
return clazz(payload)
|
|
647
|
+
|
|
648
|
+
class ListView(list):
|
|
649
|
+
_payload : List[dict]
|
|
650
|
+
def __init__(self, clazz : Type[Ty], payload: list[Ty]):
|
|
651
|
+
self._payload = payload
|
|
652
|
+
super().__init__([clazz(item) for item in payload])
|
|
653
|
+
|
|
654
|
+
def pop(self, index = -1):
|
|
655
|
+
self._payload.pop(index)
|
|
656
|
+
return super().pop(index)
|
|
657
|
+
|
|
658
|
+
def append(self, o : "UTFViewer"):
|
|
659
|
+
if len(self):
|
|
660
|
+
assert isinstance(o, UTFViewer) and type(self[0]) == type(o), "all items in the list must be of the same type, and must be an instance of UTFViewer."
|
|
661
|
+
self._payload.append(o._payload)
|
|
662
|
+
return super().append(o)
|
|
663
|
+
|
|
664
|
+
def extend(self, iterable):
|
|
665
|
+
for item in iterable:
|
|
666
|
+
self.append(item)
|
|
667
|
+
|
|
668
|
+
def insert(self, index, o : "UTFViewer"):
|
|
669
|
+
if len(self):
|
|
670
|
+
assert isinstance(o, UTFViewer) and type(self[0]) == type(o), "all items in the list must be of the same type, and must be an instance of UTFViewer."
|
|
671
|
+
self._payload.insert(index, o._payload)
|
|
672
|
+
return super().insert(index, o)
|
|
673
|
+
|
|
674
|
+
def clear(self):
|
|
675
|
+
self._payload.clear()
|
|
676
|
+
return super().clear()
|
|
677
|
+
|
|
678
|
+
def count(self, value):
|
|
679
|
+
raise NotImplementedError("count is not supported on views")
|
|
680
|
+
|
|
681
|
+
def remove(self, value):
|
|
682
|
+
raise NotImplementedError("remove is not supported on views. use pop(index).")
|
|
683
|
+
|
|
684
|
+
def sort(self, key : callable):
|
|
685
|
+
p = sorted([(self[i], i) for i in range(len(self))], key=lambda x: key(x[0]))
|
|
686
|
+
self._payload[:] = [self._payload[i] for x,i in p]
|
|
687
|
+
self[:] = [x for x,i in p]
|
|
688
|
+
|
|
689
|
+
def __new__(cls: Type[Ty], payload: list | dict, **args) -> Ty | List[Ty]:
|
|
690
|
+
if isinstance(payload, list):
|
|
691
|
+
return UTFViewer.ListView(cls, payload)
|
|
692
|
+
return super().__new__(cls)
|