fstdtools 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fstdtools/__init__.py +0 -0
- fstdtools/__main__.py +2 -0
- fstdtools/cli.py +130 -0
- fstdtools/convert.py +77 -0
- fstdtools/mdict/__init__.py +0 -0
- fstdtools/mdict/lzo.py +246 -0
- fstdtools/mdict/pureSalsa20.py +365 -0
- fstdtools/mdict/readmdict.py +802 -0
- fstdtools/mdict/ripemd128.py +130 -0
- fstdtools/mdict/writemdict.py +673 -0
- fstdtools-0.0.1.dist-info/METADATA +15 -0
- fstdtools-0.0.1.dist-info/RECORD +16 -0
- fstdtools-0.0.1.dist-info/WHEEL +5 -0
- fstdtools-0.0.1.dist-info/entry_points.txt +2 -0
- fstdtools-0.0.1.dist-info/licenses/LICENSE +21 -0
- fstdtools-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
"""
|
|
2
|
+
writemdict.py - a library for creating dictionary files in the MDict file format.
|
|
3
|
+
|
|
4
|
+
Optional dependencies:
|
|
5
|
+
python-lzo: Required to write dictionaries using LZO compression. (Other compression schemes are available.)
|
|
6
|
+
|
|
7
|
+
Simple usage example:
|
|
8
|
+
|
|
9
|
+
from __future__ import unicode_literals
|
|
10
|
+
from writemdict import MDictWriter
|
|
11
|
+
|
|
12
|
+
dictionary = {"doe": "a deer, a female deer.",
|
|
13
|
+
"ray": "a drop of golden sun.",
|
|
14
|
+
"me": "a name I call myself.",
|
|
15
|
+
"far": "a long, long way to run."}
|
|
16
|
+
|
|
17
|
+
writer = MDictWriter(dictionary, title="Example Dictionary", description="This is an example dictionary.")
|
|
18
|
+
outfile = open("dictionary.mdx", "wb")
|
|
19
|
+
writer.write(outfile)
|
|
20
|
+
outfile.close()
|
|
21
|
+
|
|
22
|
+
This will create an MDX file called "dictionary.mdx", with four entries: "doe", "ray", "me", "far", and the
|
|
23
|
+
corresponding definitions.
|
|
24
|
+
|
|
25
|
+
For further options, see the documentation for MdxWriter.__init__().
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import unicode_literals
|
|
29
|
+
|
|
30
|
+
import struct, zlib, operator, sys, datetime
|
|
31
|
+
|
|
32
|
+
from .ripemd128 import ripemd128
|
|
33
|
+
# from cgi import escape
|
|
34
|
+
from html import escape
|
|
35
|
+
from .pureSalsa20 import Salsa20
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
from . import lzo
|
|
39
|
+
HAVE_LZO = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
HAVE_LZO = False
|
|
42
|
+
|
|
43
|
+
class ParameterError(Exception):
|
|
44
|
+
### Raised when some parameter to MdxWriter is invalid or uninterpretable.
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
def _mdx_compress(data, compression_type=2):
|
|
48
|
+
header = (struct.pack(b"<L", compression_type) +
|
|
49
|
+
struct.pack(b">L", zlib.adler32(data) & 0xffffffff)) #depending on python version, zlib.adler32 may return a signed number.
|
|
50
|
+
if compression_type == 0: #no compression
|
|
51
|
+
return header + data
|
|
52
|
+
elif compression_type == 2:
|
|
53
|
+
return header + zlib.compress(data)
|
|
54
|
+
elif compression_type == 1:
|
|
55
|
+
if HAVE_LZO:
|
|
56
|
+
return header + lzo.compress(data)[5:] #python-lzo adds a 5-byte header.
|
|
57
|
+
else:
|
|
58
|
+
raise NotImplementedError()
|
|
59
|
+
else:
|
|
60
|
+
raise ParameterError("Unknown compression type")
|
|
61
|
+
|
|
62
|
+
def _fast_encrypt(data, key):
|
|
63
|
+
b = bytearray(data)
|
|
64
|
+
key = bytearray(key)
|
|
65
|
+
previous = 0x36
|
|
66
|
+
for i in range(len(b)):
|
|
67
|
+
t = b[i] ^ previous ^ (i&0xff) ^ key[i%len(key)]
|
|
68
|
+
previous = b[i] = ((t>>4)|(t<<4)) & 0xff
|
|
69
|
+
return bytes(b)
|
|
70
|
+
|
|
71
|
+
def _mdx_encrypt(comp_block):
|
|
72
|
+
key = ripemd128(comp_block[4:8] + struct.pack(b"<L", 0x3695))
|
|
73
|
+
return comp_block[0:8] + _fast_encrypt(comp_block[8:], key)
|
|
74
|
+
|
|
75
|
+
def _salsa_encrypt(plaintext, dict_key):
|
|
76
|
+
assert(type(dict_key) == bytes)
|
|
77
|
+
assert(type(plaintext) == bytes)
|
|
78
|
+
encrypt_key = ripemd128(dict_key)
|
|
79
|
+
s20 = Salsa20(key=encrypt_key,IV=b"\x00"*8,rounds=8)
|
|
80
|
+
return s20.encryptBytes(plaintext)
|
|
81
|
+
|
|
82
|
+
def _hexdump(bytes_blob):
|
|
83
|
+
# Returns a hexadecimal representation of bytes_blob, as a (unicode) string.
|
|
84
|
+
#
|
|
85
|
+
# bytes_blob should have type bytes.
|
|
86
|
+
|
|
87
|
+
# In Python 2.6+, bytes is an alias for str, and indexing into a bytes
|
|
88
|
+
# object gives a string of length 1.
|
|
89
|
+
# In Python 3, indexing into a bytes object gives a number.
|
|
90
|
+
# The following should work on both versions.
|
|
91
|
+
if bytes == str:
|
|
92
|
+
return "".join("{:02X}".format(ord(c)) for c in bytes_blob)
|
|
93
|
+
else:
|
|
94
|
+
return "".join("{:02X}".format(c) for c in bytes_blob)
|
|
95
|
+
|
|
96
|
+
def encrypt_key(dict_key, **kwargs):
|
|
97
|
+
"""
|
|
98
|
+
Generates a hexadecimal key for use with the official MDict program.
|
|
99
|
+
|
|
100
|
+
Parameters:
|
|
101
|
+
dict_key: a bytes object, representing the dictionary password.
|
|
102
|
+
|
|
103
|
+
Keyword parameters:
|
|
104
|
+
Exactly one of email and device_id should be specified. They should be unicode strings,
|
|
105
|
+
representing either the user's email address, or the device ID of the machine on which
|
|
106
|
+
the dictionary is to be opened.
|
|
107
|
+
|
|
108
|
+
Return value:
|
|
109
|
+
a string of 32 hexadecimal digits. This should be placed in a file of its own,
|
|
110
|
+
with the same name and location as the mdx file but the extension changed to '.key'.
|
|
111
|
+
|
|
112
|
+
Example usage:
|
|
113
|
+
key = encrypt_key(b"password", email="username@example.com")
|
|
114
|
+
|
|
115
|
+
key = encrypt_key(b"password", device_id="12345678-9012-3456-7890-1234")
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
if(("email" not in kwargs and "device_id" not in kwargs) or ("email" in kwargs and "device_id" in kwargs)):
|
|
119
|
+
raise ParameterError("Expected exactly one of email and device_id as keyword argument")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
if "email" in kwargs:
|
|
123
|
+
owner_info_digest = ripemd128(kwargs["email"].encode("ascii"))
|
|
124
|
+
else:
|
|
125
|
+
owner_info_digest = ripemd128(kwargs["device_id"].encode("ascii"))
|
|
126
|
+
|
|
127
|
+
dict_key_digest = ripemd128(dict_key)
|
|
128
|
+
|
|
129
|
+
s20 = Salsa20(key=owner_info_digest,IV=b"\x00"*8,rounds=8)
|
|
130
|
+
output_key = s20.encryptBytes(dict_key_digest)
|
|
131
|
+
return _hexdump(output_key)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class _OffsetTableEntry(object):
|
|
135
|
+
# Each OffsetTableEntry represents one key/record pair of the dictionary.
|
|
136
|
+
# In addition to the values themselves, it contains information about
|
|
137
|
+
# the offset at which this entry will be placed (i.e. the total length
|
|
138
|
+
# of records before it) which is required by the MDX format.
|
|
139
|
+
def __init__(self, key, key_null, key_len, offset, record_null):
|
|
140
|
+
self.key = key
|
|
141
|
+
self.key_null = key_null
|
|
142
|
+
self.key_len = key_len
|
|
143
|
+
self.offset = offset
|
|
144
|
+
self.record_null = record_null
|
|
145
|
+
|
|
146
|
+
class MDictWriter(object):
|
|
147
|
+
|
|
148
|
+
def __init__(self, d, title, description,
|
|
149
|
+
block_size=65536,
|
|
150
|
+
encrypt_index=False,
|
|
151
|
+
encoding="utf8",
|
|
152
|
+
compression_type=2,
|
|
153
|
+
version="2.0",
|
|
154
|
+
encrypt_key = None,
|
|
155
|
+
register_by = None,
|
|
156
|
+
user_email = None,
|
|
157
|
+
user_device_id = None,
|
|
158
|
+
is_mdd=False):
|
|
159
|
+
"""
|
|
160
|
+
Prepares the records. A subsequent call to write() writes
|
|
161
|
+
the mdx or mdd file.
|
|
162
|
+
|
|
163
|
+
d is a dictionary. The keys should be (unicode) strings. If used for an mdx
|
|
164
|
+
file (the parameter is_mdd is False), then the values should also be
|
|
165
|
+
(unicode) strings, containing HTML snippets. If used to write an mdd
|
|
166
|
+
file (the parameter is_mdd is True), then the values should be binary
|
|
167
|
+
strings (bytes objects), containing the raw data for the corresponding
|
|
168
|
+
file object.
|
|
169
|
+
|
|
170
|
+
title is a (unicode) string, with the title of the dictionary
|
|
171
|
+
description is a (unicode) string, with a short description of the
|
|
172
|
+
dictionary.
|
|
173
|
+
|
|
174
|
+
block_size is the approximate number of bytes (uncompressed)
|
|
175
|
+
before starting a new block.
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
encrypt_index is true if the keyword index should be encrypted.
|
|
179
|
+
|
|
180
|
+
encoding is the character encoding to use in the files. Valid options are
|
|
181
|
+
"utf8", "utf16", "gbk", and "big5". If used to write an mdd file (the
|
|
182
|
+
parameter is_mdd is True), then this is ignored.
|
|
183
|
+
|
|
184
|
+
compression_type is an integer specifying the compression type to use.
|
|
185
|
+
Valid options are 0 (no compression), 1 (LZO compression), or 2 (gzip
|
|
186
|
+
compression).
|
|
187
|
+
|
|
188
|
+
version specifies the version of the file format to use. Recognized options are
|
|
189
|
+
"2.0" and "1.2".
|
|
190
|
+
|
|
191
|
+
encrypt_key should be a string, containing the dictionary key. If
|
|
192
|
+
encrypt_key is None, no encryption will be applied. If encrypt_key is
|
|
193
|
+
not None, you need to specify register_by.
|
|
194
|
+
|
|
195
|
+
register_by should be either "email" or "device_id". Ignored unless
|
|
196
|
+
encrypt_key is not None. Specifies whether the user's email or user's
|
|
197
|
+
device ID should be used to encrypt the encryption key.
|
|
198
|
+
|
|
199
|
+
user_email is ignored unless encrypt_key is not None and register_by is
|
|
200
|
+
"email". If it is specified, an encrypted form of encrypt_key will be
|
|
201
|
+
written into the dictionary header. The file can then be opened by
|
|
202
|
+
anyone who has set their email (in the MDict client) this this value.
|
|
203
|
+
If it is not specified, the MDict client will look for this encrypted
|
|
204
|
+
key in a separate .key file.
|
|
205
|
+
|
|
206
|
+
user_device_id is ignored unless encrypt_key is not None and register_by
|
|
207
|
+
is "device_id". If it is specified, an encrypted form of encrypt_key
|
|
208
|
+
will be written into the dictionary header. The file can then be opened
|
|
209
|
+
by anyone whose device ID (as determined by the MDict client) equals this
|
|
210
|
+
value. If it is not specified, the MDict client will look for this
|
|
211
|
+
encrypted key in a separate .key file.
|
|
212
|
+
|
|
213
|
+
is_mdd is a boolean specifying whether the file written will be an mdx file
|
|
214
|
+
or an mdd file. By default this is False, meaning that an mdd file will
|
|
215
|
+
be written.
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
self._num_entries = len(d)
|
|
219
|
+
self._title=title
|
|
220
|
+
self._description=description
|
|
221
|
+
self._block_size = block_size
|
|
222
|
+
self._encrypt_index = encrypt_index
|
|
223
|
+
self._encrypt = (encrypt_key is not None)
|
|
224
|
+
self._encrypt_key = encrypt_key
|
|
225
|
+
if register_by not in ["email", "device_id", None]:
|
|
226
|
+
raise ParameterError("Unkonwn register_by type")
|
|
227
|
+
self._register_by = register_by
|
|
228
|
+
self._user_email = user_email
|
|
229
|
+
self._user_device_id = user_device_id
|
|
230
|
+
self._compression_type = compression_type
|
|
231
|
+
self._is_mdd = is_mdd
|
|
232
|
+
|
|
233
|
+
# encoding is set to the string used in the mdx header.
|
|
234
|
+
# python_encoding is passed on to the python .encode()
|
|
235
|
+
# function to encode the data.
|
|
236
|
+
# encoding_length is the size of one unit of the encoding,
|
|
237
|
+
# used to calculate the length for keys in the key index.
|
|
238
|
+
if not is_mdd:
|
|
239
|
+
encoding = encoding.lower()
|
|
240
|
+
if encoding in ["utf8", "utf-8"]:
|
|
241
|
+
self._python_encoding = "utf_8"
|
|
242
|
+
self._encoding = "UTF-8"
|
|
243
|
+
self._encoding_length = 1
|
|
244
|
+
elif encoding in ["utf16", "utf-16"]:
|
|
245
|
+
self._python_encoding = "utf_16_le"
|
|
246
|
+
self._encoding = "UTF-16"
|
|
247
|
+
self._encoding_length = 2
|
|
248
|
+
elif encoding == "gbk":
|
|
249
|
+
self._python_encoding = "gbk"
|
|
250
|
+
self._encoding = "GBK"
|
|
251
|
+
self._encoding_length = 1
|
|
252
|
+
elif encoding == "big5":
|
|
253
|
+
self._python_encoding = "big5"
|
|
254
|
+
self._encoding = "BIG5"
|
|
255
|
+
self._encoding_length = 1
|
|
256
|
+
else:
|
|
257
|
+
raise ParameterError("Unknown encoding")
|
|
258
|
+
else:
|
|
259
|
+
self._python_encoding="utf_16_le"
|
|
260
|
+
self._encoding_length=2
|
|
261
|
+
if version not in ["2.0", "1.2"]:
|
|
262
|
+
raise ParameterError("Unknown version")
|
|
263
|
+
self._version = version
|
|
264
|
+
self._build_offset_table(d)
|
|
265
|
+
self._build_key_blocks()
|
|
266
|
+
self._build_keyb_index()
|
|
267
|
+
self._build_record_blocks()
|
|
268
|
+
self._build_recordb_index()
|
|
269
|
+
|
|
270
|
+
def _build_offset_table(self,d):
|
|
271
|
+
# Sets self._offset_table to a table of entries _OffsetTableEntry objects e.
|
|
272
|
+
#
|
|
273
|
+
# where:
|
|
274
|
+
# e.key: encoded version of the key, not null-terminated
|
|
275
|
+
# e.key_null: encoded version of the key, null-terminated
|
|
276
|
+
# e.key_len: the length of the key, in either bytes or 2-byte units, not counting the null character
|
|
277
|
+
# (as required by the MDX format in the keyword index)
|
|
278
|
+
# e.offset: the cumulative sum of len(record_null) for preceding records
|
|
279
|
+
# e.record_null: encoded version of the record, null-terminated
|
|
280
|
+
#
|
|
281
|
+
# Also sets self._total_record_len to the total length of all record fields.
|
|
282
|
+
items = list(d.items())
|
|
283
|
+
items.sort(key=operator.itemgetter(0))
|
|
284
|
+
|
|
285
|
+
self._offset_table = []
|
|
286
|
+
offset = 0
|
|
287
|
+
for key, record in items:
|
|
288
|
+
key_enc = key.encode(self._python_encoding)
|
|
289
|
+
key_null = (key+"\0").encode(self._python_encoding)
|
|
290
|
+
key_len = len(key_enc) // self._encoding_length
|
|
291
|
+
|
|
292
|
+
# set record_null to a the the value of the record. If it's
|
|
293
|
+
# an MDX file, append an extra null character.
|
|
294
|
+
if self._is_mdd:
|
|
295
|
+
record_null = record
|
|
296
|
+
else:
|
|
297
|
+
record_null = (record+"\0").encode(self._python_encoding)
|
|
298
|
+
self._offset_table.append(_OffsetTableEntry(
|
|
299
|
+
key=key_enc,
|
|
300
|
+
key_null=key_null,
|
|
301
|
+
key_len=key_len,
|
|
302
|
+
record_null=record_null,
|
|
303
|
+
offset=offset))
|
|
304
|
+
offset += len(record_null)
|
|
305
|
+
self._total_record_len = offset
|
|
306
|
+
|
|
307
|
+
def _split_blocks(self, block_type):
|
|
308
|
+
# Split either the records or the keys into blocks for compression.
|
|
309
|
+
#
|
|
310
|
+
# Returns a list of _MdxBlock, where the decompressed size of each block is (as
|
|
311
|
+
# far as practicable) less than self._block_size.
|
|
312
|
+
#
|
|
313
|
+
# block_type should be a subclass of _MdxBlock, i.e. either _MdxRecordBlock or
|
|
314
|
+
# _MdxKeyBlock.
|
|
315
|
+
|
|
316
|
+
this_block_start = 0
|
|
317
|
+
cur_size = 0
|
|
318
|
+
blocks = []
|
|
319
|
+
for ind in range(len(self._offset_table)+1):
|
|
320
|
+
if ind != len(self._offset_table):
|
|
321
|
+
t = self._offset_table[ind]
|
|
322
|
+
else:
|
|
323
|
+
t = None
|
|
324
|
+
|
|
325
|
+
if ind == 0:
|
|
326
|
+
flush = False
|
|
327
|
+
# nothing to flush yet
|
|
328
|
+
# this part is needed in case the first entry is longer than
|
|
329
|
+
# self._block_size.
|
|
330
|
+
elif ind == len(self._offset_table):
|
|
331
|
+
flush = True #always flush the last block
|
|
332
|
+
elif cur_size + block_type._len_block_entry(t) > self._block_size:
|
|
333
|
+
flush = True #Adding this entry to make us larger than
|
|
334
|
+
#self._block_size, so flush now.
|
|
335
|
+
else:
|
|
336
|
+
flush = False
|
|
337
|
+
if flush:
|
|
338
|
+
blocks.append(block_type(
|
|
339
|
+
self._offset_table[this_block_start:ind], self._compression_type, self._version))
|
|
340
|
+
cur_size = 0
|
|
341
|
+
this_block_start = ind
|
|
342
|
+
if t is not None: #mentally add this entry to list of things
|
|
343
|
+
cur_size += block_type._len_block_entry(t)
|
|
344
|
+
return blocks
|
|
345
|
+
|
|
346
|
+
def _build_key_blocks(self):
|
|
347
|
+
# Sets self._key_blocks to a list of _MdxKeyBlocks.
|
|
348
|
+
self._key_blocks = self._split_blocks(_MdxKeyBlock)
|
|
349
|
+
|
|
350
|
+
def _build_record_blocks(self):
|
|
351
|
+
self._record_blocks = self._split_blocks(_MdxRecordBlock)
|
|
352
|
+
|
|
353
|
+
def _build_keyb_index(self):
|
|
354
|
+
# Sets self._keyb_index to a bytes object, containing the index of key blocks, in
|
|
355
|
+
# a format suitable for direct writing to the file.
|
|
356
|
+
#
|
|
357
|
+
# Also sets self._keyb_index_comp_size and self._keyb_index_decomp_size.
|
|
358
|
+
|
|
359
|
+
decomp_data = b"".join(b.get_index_entry() for b in self._key_blocks)
|
|
360
|
+
self._keyb_index_decomp_size = len(decomp_data)
|
|
361
|
+
if self._version == "2.0":
|
|
362
|
+
self._keyb_index = _mdx_compress(decomp_data, self._compression_type)
|
|
363
|
+
if self._encrypt_index:
|
|
364
|
+
self._keyb_index = _mdx_encrypt(self._keyb_index)
|
|
365
|
+
self._keyb_index_comp_size = len(self._keyb_index)
|
|
366
|
+
elif self._encrypt_index:
|
|
367
|
+
raise ParameterError("Key index encryption not supported in version 1.2")
|
|
368
|
+
else:
|
|
369
|
+
self._keyb_index = decomp_data
|
|
370
|
+
|
|
371
|
+
def _build_recordb_index(self):
|
|
372
|
+
# Sets self._recordb_index to a bytes object, containing the index of key blocks,
|
|
373
|
+
# in a format suitable for direct writing to the file.
|
|
374
|
+
|
|
375
|
+
# Also sets self._recordb_index_size.
|
|
376
|
+
|
|
377
|
+
self._recordb_index = b"".join(
|
|
378
|
+
(b.get_index_entry() for b in self._record_blocks))
|
|
379
|
+
self._recordb_index_size = len(self._recordb_index)
|
|
380
|
+
|
|
381
|
+
def _write_key_sect(self, outfile):
|
|
382
|
+
# Writes the key section header, key block index, and all the key blocks to
|
|
383
|
+
# outfile.
|
|
384
|
+
|
|
385
|
+
# outfile: a file-like object, opened in binary mode.
|
|
386
|
+
|
|
387
|
+
keyblocks_total_size = sum(len(b.get_block()) for b in self._key_blocks)
|
|
388
|
+
if self._version == "2.0":
|
|
389
|
+
preamble = struct.pack(b">QQQQQ",
|
|
390
|
+
len(self._key_blocks),
|
|
391
|
+
self._num_entries,
|
|
392
|
+
self._keyb_index_decomp_size,
|
|
393
|
+
self._keyb_index_comp_size,
|
|
394
|
+
keyblocks_total_size)
|
|
395
|
+
preamble_checksum = struct.pack(b">L", zlib.adler32(preamble))
|
|
396
|
+
if(self._encrypt):
|
|
397
|
+
preamble = _salsa_encrypt(preamble, self._encrypt_key)
|
|
398
|
+
outfile.write(preamble)
|
|
399
|
+
outfile.write(preamble_checksum)
|
|
400
|
+
else:
|
|
401
|
+
preamble = struct.pack(b">LLLL",
|
|
402
|
+
len(self._key_blocks),
|
|
403
|
+
self._num_entries,
|
|
404
|
+
self._keyb_index_decomp_size,
|
|
405
|
+
keyblocks_total_size)
|
|
406
|
+
if(self._encrypt):
|
|
407
|
+
preamble = _salsa_encrypt(preamble, self._encrypt_key)
|
|
408
|
+
outfile.write(preamble)
|
|
409
|
+
|
|
410
|
+
outfile.write(self._keyb_index)
|
|
411
|
+
for b in self._key_blocks:
|
|
412
|
+
outfile.write(b.get_block())
|
|
413
|
+
|
|
414
|
+
def _write_record_sect(self, outfile):
|
|
415
|
+
# Writes the record section header, record block index, and all the record blocks
|
|
416
|
+
# to outfile.
|
|
417
|
+
#
|
|
418
|
+
# outfile: a file-like object, opened in binary mode.
|
|
419
|
+
|
|
420
|
+
recordblocks_total_size = sum(
|
|
421
|
+
(len(b.get_block()) for b in self._record_blocks))
|
|
422
|
+
if self._version == "2.0":
|
|
423
|
+
format = b">QQQQ"
|
|
424
|
+
else:
|
|
425
|
+
format = b">LLLL"
|
|
426
|
+
outfile.write(struct.pack(format,
|
|
427
|
+
len(self._record_blocks),
|
|
428
|
+
self._num_entries,
|
|
429
|
+
self._recordb_index_size,
|
|
430
|
+
recordblocks_total_size))
|
|
431
|
+
outfile.write(self._recordb_index)
|
|
432
|
+
for b in self._record_blocks:
|
|
433
|
+
outfile.write(b.get_block())
|
|
434
|
+
|
|
435
|
+
def write(self, outfile):
|
|
436
|
+
"""
|
|
437
|
+
Write the mdx file to outfile.
|
|
438
|
+
|
|
439
|
+
outfile: a file-like object, opened in binary mode.
|
|
440
|
+
"""
|
|
441
|
+
|
|
442
|
+
self._write_header(outfile)
|
|
443
|
+
self._write_key_sect(outfile)
|
|
444
|
+
self._write_record_sect(outfile)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _write_header(self, f):
|
|
448
|
+
encrypted = 0
|
|
449
|
+
if self._encrypt_index:
|
|
450
|
+
encrypted = encrypted | 2
|
|
451
|
+
if self._encrypt:
|
|
452
|
+
encrypted = encrypted | 1
|
|
453
|
+
|
|
454
|
+
if self._encrypt and self._register_by == "email":
|
|
455
|
+
register_by_str = "EMail"
|
|
456
|
+
if self._user_email is not None:
|
|
457
|
+
regcode = encrypt_key(self._encrypt_key, email=self._user_email)
|
|
458
|
+
else:
|
|
459
|
+
regcode = ""
|
|
460
|
+
elif self._encrypt and self._register_by == "device_id":
|
|
461
|
+
register_by_str = "DeviceID"
|
|
462
|
+
if self._user_device_id is not None:
|
|
463
|
+
regcode = encrypt_key(self._encrypt_key, device_id=self._user_device_id)
|
|
464
|
+
else:
|
|
465
|
+
regcode = ""
|
|
466
|
+
else:
|
|
467
|
+
register_by_str = ""
|
|
468
|
+
regcode = ""
|
|
469
|
+
|
|
470
|
+
if not self._is_mdd:
|
|
471
|
+
header_string = (
|
|
472
|
+
"""<Dictionary """
|
|
473
|
+
"""GeneratedByEngineVersion="{version}" """
|
|
474
|
+
"""RequiredEngineVersion="{version}" """
|
|
475
|
+
"""Encrypted="{encrypted}" """
|
|
476
|
+
"""Encoding="{encoding}" """
|
|
477
|
+
"""Format="Html" """
|
|
478
|
+
"""CreationDate="{date.year}-{date.month}-{date.day}" """
|
|
479
|
+
"""Compact="No" """
|
|
480
|
+
"""Compat="No" """
|
|
481
|
+
"""KeyCaseSensitive="No" """
|
|
482
|
+
"""Description="{description}" """
|
|
483
|
+
"""Title="{title}" """
|
|
484
|
+
"""DataSourceFormat="106" """
|
|
485
|
+
"""StyleSheet="" """
|
|
486
|
+
"""RegisterBy="{register_by_str}" """
|
|
487
|
+
"""RegCode="{regcode}"/>\r\n\x00""").format(
|
|
488
|
+
version = self._version,
|
|
489
|
+
encrypted = encrypted,
|
|
490
|
+
encoding = self._encoding,
|
|
491
|
+
date = datetime.date.today(),
|
|
492
|
+
description=escape(self._description, quote=True),
|
|
493
|
+
title=escape(self._title, quote=True),
|
|
494
|
+
register_by_str=register_by_str,
|
|
495
|
+
regcode=regcode
|
|
496
|
+
).encode("utf_16_le")
|
|
497
|
+
else:
|
|
498
|
+
header_string = (
|
|
499
|
+
"""<Library_Data """
|
|
500
|
+
"""GeneratedByEngineVersion="{version}" """
|
|
501
|
+
"""RequiredEngineVersion="{version}" """
|
|
502
|
+
"""Encrypted="{encrypted}" """
|
|
503
|
+
"""Format="" """
|
|
504
|
+
"""CreationDate="{date.year}-{date.month}-{date.day}" """
|
|
505
|
+
"""Compact="No" """
|
|
506
|
+
"""Compat="No" """
|
|
507
|
+
"""KeyCaseSensitive="No" """
|
|
508
|
+
"""Description="{description}" """
|
|
509
|
+
"""Title="{title}" """
|
|
510
|
+
"""DataSourceFormat="106" """
|
|
511
|
+
"""StyleSheet="" """
|
|
512
|
+
"""RegisterBy="{register_by_str}" """
|
|
513
|
+
"""RegCode="{regcode}"/>\r\n\x00""").format(
|
|
514
|
+
version = self._version,
|
|
515
|
+
encrypted = encrypted,
|
|
516
|
+
date = datetime.date.today(),
|
|
517
|
+
description=escape(self._description, quote=True),
|
|
518
|
+
title=escape(self._title, quote=True),
|
|
519
|
+
register_by_str=register_by_str,
|
|
520
|
+
regcode=regcode
|
|
521
|
+
).encode("utf_16_le")
|
|
522
|
+
f.write(struct.pack(b">L", len(header_string)))
|
|
523
|
+
f.write(header_string)
|
|
524
|
+
f.write(struct.pack(b"<L",zlib.adler32(header_string) & 0xffffffff))
|
|
525
|
+
|
|
526
|
+
class _MdxBlock(object):
|
|
527
|
+
# Abstract base class for _MdxRecordBlock and _MdxKeyBlock.
|
|
528
|
+
#
|
|
529
|
+
# In the MDX file format, the keyword section and the record section have a
|
|
530
|
+
# similar structure:
|
|
531
|
+
#
|
|
532
|
+
# section header
|
|
533
|
+
# index entry for block 0
|
|
534
|
+
# ...
|
|
535
|
+
# index entry for block k
|
|
536
|
+
# block 0
|
|
537
|
+
# ...
|
|
538
|
+
# block k
|
|
539
|
+
#
|
|
540
|
+
# This class represents one such block. It defines a common interface for
|
|
541
|
+
# record blocks and keyword blocks, to allow the two sections to
|
|
542
|
+
# be built in a uniform manner.
|
|
543
|
+
#
|
|
544
|
+
|
|
545
|
+
def __init__(self, offset_table, compression_type, version):
|
|
546
|
+
# Builds the data from offset_table.
|
|
547
|
+
#
|
|
548
|
+
# offset_table is a iterable containing _OffsetTableEntry objects.
|
|
549
|
+
|
|
550
|
+
decomp_data = b"".join(
|
|
551
|
+
type(self)._block_entry(t, version)
|
|
552
|
+
for t in offset_table)
|
|
553
|
+
self._decomp_size = len(decomp_data)
|
|
554
|
+
self._comp_data = _mdx_compress(decomp_data, compression_type)
|
|
555
|
+
self._comp_size = len(self._comp_data)
|
|
556
|
+
self._version = version
|
|
557
|
+
|
|
558
|
+
def get_block(self):
|
|
559
|
+
# Returns a bytes object, containing the data for this block.
|
|
560
|
+
return self._comp_data
|
|
561
|
+
|
|
562
|
+
def get_index_entry(self):
|
|
563
|
+
# Returns a bytes object, containing the entry for this block in the
|
|
564
|
+
# corresponding key block index or record block index.
|
|
565
|
+
|
|
566
|
+
raise NotImplementedError()
|
|
567
|
+
|
|
568
|
+
@staticmethod
|
|
569
|
+
def _block_entry(t, version):
|
|
570
|
+
# Returns the data corresponding to a single entry in offset.
|
|
571
|
+
#
|
|
572
|
+
# t is an _OffsetTableEntry object
|
|
573
|
+
|
|
574
|
+
raise NotImplementedError()
|
|
575
|
+
|
|
576
|
+
@staticmethod
|
|
577
|
+
def _len_block_entry(t):
|
|
578
|
+
# Should be approximately equal to len(_block_entry(t)).
|
|
579
|
+
#
|
|
580
|
+
# Used by MdxWriter._split_blocks() to determine where to split into blocks."""
|
|
581
|
+
raise NotImplementedError()
|
|
582
|
+
|
|
583
|
+
class _MdxRecordBlock(_MdxBlock):
|
|
584
|
+
# A class representing a record block.
|
|
585
|
+
#
|
|
586
|
+
# Has the ability to return (in the format suitable for insertion in an mdx file)
|
|
587
|
+
# both the block itself, as well as the entry in the record block index for that
|
|
588
|
+
# block.
|
|
589
|
+
|
|
590
|
+
def __init__(self, offset_table, compression_type, version):
|
|
591
|
+
# Builds the data for offset_table.
|
|
592
|
+
#
|
|
593
|
+
# offset_table is a iterable containing _OffsetTableEntry objects.
|
|
594
|
+
#
|
|
595
|
+
# Actually only uses the record parts.
|
|
596
|
+
|
|
597
|
+
_MdxBlock.__init__(self, offset_table, compression_type, version)
|
|
598
|
+
|
|
599
|
+
def get_index_entry(self):
|
|
600
|
+
# Returns a bytes object, containing the entry for this block in the record
|
|
601
|
+
# block index.
|
|
602
|
+
|
|
603
|
+
if self._version == "2.0":
|
|
604
|
+
format = b">QQ"
|
|
605
|
+
else:
|
|
606
|
+
format = b">LL"
|
|
607
|
+
return struct.pack(format, self._comp_size, self._decomp_size)
|
|
608
|
+
|
|
609
|
+
@staticmethod
|
|
610
|
+
def _block_entry(t, version):
|
|
611
|
+
return t.record_null
|
|
612
|
+
|
|
613
|
+
@staticmethod
|
|
614
|
+
def _len_block_entry(t):
|
|
615
|
+
return len(t.record_null)
|
|
616
|
+
|
|
617
|
+
class _MdxKeyBlock(_MdxBlock):
|
|
618
|
+
# A class representing a key block.
|
|
619
|
+
#
|
|
620
|
+
# Has the ability to return (in the format suitable for insertion in an mdx file)
|
|
621
|
+
# both the block itself, as well as the entry in the record block index for that
|
|
622
|
+
# block.
|
|
623
|
+
def __init__(self, offset_table, compression_type, version):
|
|
624
|
+
# Builds the data for offset_table.
|
|
625
|
+
#
|
|
626
|
+
# offset_table is a iterable containing _OffsetTableEntry objects.
|
|
627
|
+
#
|
|
628
|
+
# Only uses the key, key_len, key_null and offset fields, and effectively ignores record_null.
|
|
629
|
+
|
|
630
|
+
_MdxBlock.__init__(self, offset_table, compression_type, version)
|
|
631
|
+
self._num_entries = len(offset_table)
|
|
632
|
+
if version=="2.0":
|
|
633
|
+
self._first_key = offset_table[0].key_null
|
|
634
|
+
self._last_key = offset_table[len(offset_table)-1].key_null
|
|
635
|
+
else:
|
|
636
|
+
self._first_key = offset_table[0].key
|
|
637
|
+
self._last_key = offset_table[len(offset_table)-1].key
|
|
638
|
+
self._first_key_len = offset_table[0].key_len
|
|
639
|
+
self._last_key_len = offset_table[len(offset_table)-1].key_len
|
|
640
|
+
|
|
641
|
+
@staticmethod
|
|
642
|
+
def _block_entry(t, version):
|
|
643
|
+
if version == "2.0":
|
|
644
|
+
format = b">Q"
|
|
645
|
+
else:
|
|
646
|
+
format = b">L"
|
|
647
|
+
return struct.pack(format, t.offset)+t.key_null
|
|
648
|
+
|
|
649
|
+
@staticmethod
|
|
650
|
+
def _len_block_entry(t):
|
|
651
|
+
return 8 + len(t.key_null) #This is only accurate for version 2.0, but we only need approximate size anyway
|
|
652
|
+
|
|
653
|
+
def get_index_entry(self):
|
|
654
|
+
# Returns a bytes object, containing the header data for this block
|
|
655
|
+
if self._version == "2.0":
|
|
656
|
+
long_format = b">Q"
|
|
657
|
+
short_format = b">H"
|
|
658
|
+
else:
|
|
659
|
+
long_format = b">L"
|
|
660
|
+
short_format = b">B"
|
|
661
|
+
return (
|
|
662
|
+
struct.pack(long_format, self._num_entries)
|
|
663
|
+
+ struct.pack(short_format, self._first_key_len)
|
|
664
|
+
+ self._first_key
|
|
665
|
+
+ struct.pack(short_format, self._last_key_len)
|
|
666
|
+
+ self._last_key
|
|
667
|
+
+ struct.pack(long_format, self._comp_size)
|
|
668
|
+
+ struct.pack(long_format, self._decomp_size)
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fstdtools
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: CLI tools for fstd dictionary to pack/unpack/list/info/convert.
|
|
5
|
+
Author-email: Moujie Qin <moujieqin@gmail.com>
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: fstd>=0.1.3
|
|
10
|
+
Requires-Dist: click>=8.0
|
|
11
|
+
Requires-Dist: tqdm>=4.64.0
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# fstdtools
|
|
15
|
+
A command line tool for fstd dictionary to pack/unpack/list/info/convert.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
fstdtools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
fstdtools/__main__.py,sha256=vkUoLlkcsZRTbXSCvLBFR2M3YbgrVCZ6lmL1ioYYAtk,26
|
|
3
|
+
fstdtools/cli.py,sha256=jgBw5ML8wd_pXc2ShqfANCYRKmFRWUDZUrs9h-L3C40,6334
|
|
4
|
+
fstdtools/convert.py,sha256=-vtjBrTPm6bFvGwJkiUpVJC74XMbadDNYUf8H9j-5zA,2490
|
|
5
|
+
fstdtools/mdict/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
fstdtools/mdict/lzo.py,sha256=a_h-j97xwBw769qOtrKK23RlDLWdIR2V6C1ir0htkrs,6503
|
|
7
|
+
fstdtools/mdict/pureSalsa20.py,sha256=C8XmnkQJRdJqQ9ZSJwZ68Jr5jIdSwmad5Mvh5khp_uc,14236
|
|
8
|
+
fstdtools/mdict/readmdict.py,sha256=0nTqxMANy79CgBA7H26lTyzXwZle-YVNgefcYniqpC8,29522
|
|
9
|
+
fstdtools/mdict/ripemd128.py,sha256=wKvevXO17r6tpDDGI2UCydfMvyIKAiqmJ9QhYCkBdJw,3663
|
|
10
|
+
fstdtools/mdict/writemdict.py,sha256=VooTRwcFFnGSq3nL4wqbJteObIBYgjoCQ7Evp2z1H7c,22748
|
|
11
|
+
fstdtools-0.0.1.dist-info/licenses/LICENSE,sha256=TmFV_q9EIz5aZiT_DFSf6uKwdZYCryehAorcPXVciNM,1066
|
|
12
|
+
fstdtools-0.0.1.dist-info/METADATA,sha256=hL_bOeRH4ZDIfJScAmitdFuzY2MFdSYx6tWBjQPAyJk,448
|
|
13
|
+
fstdtools-0.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
14
|
+
fstdtools-0.0.1.dist-info/entry_points.txt,sha256=BqmW9Ge5iZg5WfqdBPTSp0uJ2deWqhv0Inzid6iXJ-8,48
|
|
15
|
+
fstdtools-0.0.1.dist-info/top_level.txt,sha256=feh-uj4hbmb7wF_o_Brxx9CjUeCIqT-xhQ21k9MjkA4,10
|
|
16
|
+
fstdtools-0.0.1.dist-info/RECORD,,
|