fstdtools 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,673 @@
1
+ """
2
+ writemdict.py - a library for creating dictionary files in the MDict file format.
3
+
4
+ Optional dependencies:
5
+ python-lzo: Required to write dictionaries using LZO compression. (Other compression schemes are available.)
6
+
7
+ Simple usage example:
8
+
9
+ from __future__ import unicode_literals
10
+ from writemdict import MDictWriter
11
+
12
+ dictionary = {"doe": "a deer, a female deer.",
13
+ "ray": "a drop of golden sun.",
14
+ "me": "a name I call myself.",
15
+ "far": "a long, long way to run."}
16
+
17
+ writer = MDictWriter(dictionary, title="Example Dictionary", description="This is an example dictionary.")
18
+ outfile = open("dictionary.mdx", "wb")
19
+ writer.write(outfile)
20
+ outfile.close()
21
+
22
+ This will create an MDX file called "dictionary.mdx", with four entries: "doe", "ray", "me", "far", and the
23
+ corresponding definitions.
24
+
25
+ For further options, see the documentation for MdxWriter.__init__().
26
+ """
27
+
28
+ from __future__ import unicode_literals
29
+
30
+ import struct, zlib, operator, sys, datetime
31
+
32
+ from .ripemd128 import ripemd128
33
+ # from cgi import escape
34
+ from html import escape
35
+ from .pureSalsa20 import Salsa20
36
+
37
+ try:
38
+ from . import lzo
39
+ HAVE_LZO = True
40
+ except ImportError:
41
+ HAVE_LZO = False
42
+
43
+ class ParameterError(Exception):
44
+ ### Raised when some parameter to MdxWriter is invalid or uninterpretable.
45
+ pass
46
+
47
+ def _mdx_compress(data, compression_type=2):
48
+ header = (struct.pack(b"<L", compression_type) +
49
+ struct.pack(b">L", zlib.adler32(data) & 0xffffffff)) #depending on python version, zlib.adler32 may return a signed number.
50
+ if compression_type == 0: #no compression
51
+ return header + data
52
+ elif compression_type == 2:
53
+ return header + zlib.compress(data)
54
+ elif compression_type == 1:
55
+ if HAVE_LZO:
56
+ return header + lzo.compress(data)[5:] #python-lzo adds a 5-byte header.
57
+ else:
58
+ raise NotImplementedError()
59
+ else:
60
+ raise ParameterError("Unknown compression type")
61
+
62
+ def _fast_encrypt(data, key):
63
+ b = bytearray(data)
64
+ key = bytearray(key)
65
+ previous = 0x36
66
+ for i in range(len(b)):
67
+ t = b[i] ^ previous ^ (i&0xff) ^ key[i%len(key)]
68
+ previous = b[i] = ((t>>4)|(t<<4)) & 0xff
69
+ return bytes(b)
70
+
71
+ def _mdx_encrypt(comp_block):
72
+ key = ripemd128(comp_block[4:8] + struct.pack(b"<L", 0x3695))
73
+ return comp_block[0:8] + _fast_encrypt(comp_block[8:], key)
74
+
75
+ def _salsa_encrypt(plaintext, dict_key):
76
+ assert(type(dict_key) == bytes)
77
+ assert(type(plaintext) == bytes)
78
+ encrypt_key = ripemd128(dict_key)
79
+ s20 = Salsa20(key=encrypt_key,IV=b"\x00"*8,rounds=8)
80
+ return s20.encryptBytes(plaintext)
81
+
82
+ def _hexdump(bytes_blob):
83
+ # Returns a hexadecimal representation of bytes_blob, as a (unicode) string.
84
+ #
85
+ # bytes_blob should have type bytes.
86
+
87
+ # In Python 2.6+, bytes is an alias for str, and indexing into a bytes
88
+ # object gives a string of length 1.
89
+ # In Python 3, indexing into a bytes object gives a number.
90
+ # The following should work on both versions.
91
+ if bytes == str:
92
+ return "".join("{:02X}".format(ord(c)) for c in bytes_blob)
93
+ else:
94
+ return "".join("{:02X}".format(c) for c in bytes_blob)
95
+
96
+ def encrypt_key(dict_key, **kwargs):
97
+ """
98
+ Generates a hexadecimal key for use with the official MDict program.
99
+
100
+ Parameters:
101
+ dict_key: a bytes object, representing the dictionary password.
102
+
103
+ Keyword parameters:
104
+ Exactly one of email and device_id should be specified. They should be unicode strings,
105
+ representing either the user's email address, or the device ID of the machine on which
106
+ the dictionary is to be opened.
107
+
108
+ Return value:
109
+ a string of 32 hexadecimal digits. This should be placed in a file of its own,
110
+ with the same name and location as the mdx file but the extension changed to '.key'.
111
+
112
+ Example usage:
113
+ key = encrypt_key(b"password", email="username@example.com")
114
+
115
+ key = encrypt_key(b"password", device_id="12345678-9012-3456-7890-1234")
116
+ """
117
+
118
+ if(("email" not in kwargs and "device_id" not in kwargs) or ("email" in kwargs and "device_id" in kwargs)):
119
+ raise ParameterError("Expected exactly one of email and device_id as keyword argument")
120
+
121
+
122
+ if "email" in kwargs:
123
+ owner_info_digest = ripemd128(kwargs["email"].encode("ascii"))
124
+ else:
125
+ owner_info_digest = ripemd128(kwargs["device_id"].encode("ascii"))
126
+
127
+ dict_key_digest = ripemd128(dict_key)
128
+
129
+ s20 = Salsa20(key=owner_info_digest,IV=b"\x00"*8,rounds=8)
130
+ output_key = s20.encryptBytes(dict_key_digest)
131
+ return _hexdump(output_key)
132
+
133
+
134
+ class _OffsetTableEntry(object):
135
+ # Each OffsetTableEntry represents one key/record pair of the dictionary.
136
+ # In addition to the values themselves, it contains information about
137
+ # the offset at which this entry will be placed (i.e. the total length
138
+ # of records before it) which is required by the MDX format.
139
+ def __init__(self, key, key_null, key_len, offset, record_null):
140
+ self.key = key
141
+ self.key_null = key_null
142
+ self.key_len = key_len
143
+ self.offset = offset
144
+ self.record_null = record_null
145
+
146
+ class MDictWriter(object):
147
+
148
+ def __init__(self, d, title, description,
149
+ block_size=65536,
150
+ encrypt_index=False,
151
+ encoding="utf8",
152
+ compression_type=2,
153
+ version="2.0",
154
+ encrypt_key = None,
155
+ register_by = None,
156
+ user_email = None,
157
+ user_device_id = None,
158
+ is_mdd=False):
159
+ """
160
+ Prepares the records. A subsequent call to write() writes
161
+ the mdx or mdd file.
162
+
163
+ d is a dictionary. The keys should be (unicode) strings. If used for an mdx
164
+ file (the parameter is_mdd is False), then the values should also be
165
+ (unicode) strings, containing HTML snippets. If used to write an mdd
166
+ file (the parameter is_mdd is True), then the values should be binary
167
+ strings (bytes objects), containing the raw data for the corresponding
168
+ file object.
169
+
170
+ title is a (unicode) string, with the title of the dictionary
171
+ description is a (unicode) string, with a short description of the
172
+ dictionary.
173
+
174
+ block_size is the approximate number of bytes (uncompressed)
175
+ before starting a new block.
176
+
177
+
178
+ encrypt_index is true if the keyword index should be encrypted.
179
+
180
+ encoding is the character encoding to use in the files. Valid options are
181
+ "utf8", "utf16", "gbk", and "big5". If used to write an mdd file (the
182
+ parameter is_mdd is True), then this is ignored.
183
+
184
+ compression_type is an integer specifying the compression type to use.
185
+ Valid options are 0 (no compression), 1 (LZO compression), or 2 (gzip
186
+ compression).
187
+
188
+ version specifies the version of the file format to use. Recognized options are
189
+ "2.0" and "1.2".
190
+
191
+ encrypt_key should be a string, containing the dictionary key. If
192
+ encrypt_key is None, no encryption will be applied. If encrypt_key is
193
+ not None, you need to specify register_by.
194
+
195
+ register_by should be either "email" or "device_id". Ignored unless
196
+ encrypt_key is not None. Specifies whether the user's email or user's
197
+ device ID should be used to encrypt the encryption key.
198
+
199
+ user_email is ignored unless encrypt_key is not None and register_by is
200
+ "email". If it is specified, an encrypted form of encrypt_key will be
201
+ written into the dictionary header. The file can then be opened by
202
+ anyone who has set their email (in the MDict client) this this value.
203
+ If it is not specified, the MDict client will look for this encrypted
204
+ key in a separate .key file.
205
+
206
+ user_device_id is ignored unless encrypt_key is not None and register_by
207
+ is "device_id". If it is specified, an encrypted form of encrypt_key
208
+ will be written into the dictionary header. The file can then be opened
209
+ by anyone whose device ID (as determined by the MDict client) equals this
210
+ value. If it is not specified, the MDict client will look for this
211
+ encrypted key in a separate .key file.
212
+
213
+ is_mdd is a boolean specifying whether the file written will be an mdx file
214
+ or an mdd file. By default this is False, meaning that an mdd file will
215
+ be written.
216
+ """
217
+
218
+ self._num_entries = len(d)
219
+ self._title=title
220
+ self._description=description
221
+ self._block_size = block_size
222
+ self._encrypt_index = encrypt_index
223
+ self._encrypt = (encrypt_key is not None)
224
+ self._encrypt_key = encrypt_key
225
+ if register_by not in ["email", "device_id", None]:
226
+ raise ParameterError("Unkonwn register_by type")
227
+ self._register_by = register_by
228
+ self._user_email = user_email
229
+ self._user_device_id = user_device_id
230
+ self._compression_type = compression_type
231
+ self._is_mdd = is_mdd
232
+
233
+ # encoding is set to the string used in the mdx header.
234
+ # python_encoding is passed on to the python .encode()
235
+ # function to encode the data.
236
+ # encoding_length is the size of one unit of the encoding,
237
+ # used to calculate the length for keys in the key index.
238
+ if not is_mdd:
239
+ encoding = encoding.lower()
240
+ if encoding in ["utf8", "utf-8"]:
241
+ self._python_encoding = "utf_8"
242
+ self._encoding = "UTF-8"
243
+ self._encoding_length = 1
244
+ elif encoding in ["utf16", "utf-16"]:
245
+ self._python_encoding = "utf_16_le"
246
+ self._encoding = "UTF-16"
247
+ self._encoding_length = 2
248
+ elif encoding == "gbk":
249
+ self._python_encoding = "gbk"
250
+ self._encoding = "GBK"
251
+ self._encoding_length = 1
252
+ elif encoding == "big5":
253
+ self._python_encoding = "big5"
254
+ self._encoding = "BIG5"
255
+ self._encoding_length = 1
256
+ else:
257
+ raise ParameterError("Unknown encoding")
258
+ else:
259
+ self._python_encoding="utf_16_le"
260
+ self._encoding_length=2
261
+ if version not in ["2.0", "1.2"]:
262
+ raise ParameterError("Unknown version")
263
+ self._version = version
264
+ self._build_offset_table(d)
265
+ self._build_key_blocks()
266
+ self._build_keyb_index()
267
+ self._build_record_blocks()
268
+ self._build_recordb_index()
269
+
270
+ def _build_offset_table(self,d):
271
+ # Sets self._offset_table to a table of entries _OffsetTableEntry objects e.
272
+ #
273
+ # where:
274
+ # e.key: encoded version of the key, not null-terminated
275
+ # e.key_null: encoded version of the key, null-terminated
276
+ # e.key_len: the length of the key, in either bytes or 2-byte units, not counting the null character
277
+ # (as required by the MDX format in the keyword index)
278
+ # e.offset: the cumulative sum of len(record_null) for preceding records
279
+ # e.record_null: encoded version of the record, null-terminated
280
+ #
281
+ # Also sets self._total_record_len to the total length of all record fields.
282
+ items = list(d.items())
283
+ items.sort(key=operator.itemgetter(0))
284
+
285
+ self._offset_table = []
286
+ offset = 0
287
+ for key, record in items:
288
+ key_enc = key.encode(self._python_encoding)
289
+ key_null = (key+"\0").encode(self._python_encoding)
290
+ key_len = len(key_enc) // self._encoding_length
291
+
292
+ # set record_null to a the the value of the record. If it's
293
+ # an MDX file, append an extra null character.
294
+ if self._is_mdd:
295
+ record_null = record
296
+ else:
297
+ record_null = (record+"\0").encode(self._python_encoding)
298
+ self._offset_table.append(_OffsetTableEntry(
299
+ key=key_enc,
300
+ key_null=key_null,
301
+ key_len=key_len,
302
+ record_null=record_null,
303
+ offset=offset))
304
+ offset += len(record_null)
305
+ self._total_record_len = offset
306
+
307
+ def _split_blocks(self, block_type):
308
+ # Split either the records or the keys into blocks for compression.
309
+ #
310
+ # Returns a list of _MdxBlock, where the decompressed size of each block is (as
311
+ # far as practicable) less than self._block_size.
312
+ #
313
+ # block_type should be a subclass of _MdxBlock, i.e. either _MdxRecordBlock or
314
+ # _MdxKeyBlock.
315
+
316
+ this_block_start = 0
317
+ cur_size = 0
318
+ blocks = []
319
+ for ind in range(len(self._offset_table)+1):
320
+ if ind != len(self._offset_table):
321
+ t = self._offset_table[ind]
322
+ else:
323
+ t = None
324
+
325
+ if ind == 0:
326
+ flush = False
327
+ # nothing to flush yet
328
+ # this part is needed in case the first entry is longer than
329
+ # self._block_size.
330
+ elif ind == len(self._offset_table):
331
+ flush = True #always flush the last block
332
+ elif cur_size + block_type._len_block_entry(t) > self._block_size:
333
+ flush = True #Adding this entry to make us larger than
334
+ #self._block_size, so flush now.
335
+ else:
336
+ flush = False
337
+ if flush:
338
+ blocks.append(block_type(
339
+ self._offset_table[this_block_start:ind], self._compression_type, self._version))
340
+ cur_size = 0
341
+ this_block_start = ind
342
+ if t is not None: #mentally add this entry to list of things
343
+ cur_size += block_type._len_block_entry(t)
344
+ return blocks
345
+
346
+ def _build_key_blocks(self):
347
+ # Sets self._key_blocks to a list of _MdxKeyBlocks.
348
+ self._key_blocks = self._split_blocks(_MdxKeyBlock)
349
+
350
+ def _build_record_blocks(self):
351
+ self._record_blocks = self._split_blocks(_MdxRecordBlock)
352
+
353
+ def _build_keyb_index(self):
354
+ # Sets self._keyb_index to a bytes object, containing the index of key blocks, in
355
+ # a format suitable for direct writing to the file.
356
+ #
357
+ # Also sets self._keyb_index_comp_size and self._keyb_index_decomp_size.
358
+
359
+ decomp_data = b"".join(b.get_index_entry() for b in self._key_blocks)
360
+ self._keyb_index_decomp_size = len(decomp_data)
361
+ if self._version == "2.0":
362
+ self._keyb_index = _mdx_compress(decomp_data, self._compression_type)
363
+ if self._encrypt_index:
364
+ self._keyb_index = _mdx_encrypt(self._keyb_index)
365
+ self._keyb_index_comp_size = len(self._keyb_index)
366
+ elif self._encrypt_index:
367
+ raise ParameterError("Key index encryption not supported in version 1.2")
368
+ else:
369
+ self._keyb_index = decomp_data
370
+
371
+ def _build_recordb_index(self):
372
+ # Sets self._recordb_index to a bytes object, containing the index of key blocks,
373
+ # in a format suitable for direct writing to the file.
374
+
375
+ # Also sets self._recordb_index_size.
376
+
377
+ self._recordb_index = b"".join(
378
+ (b.get_index_entry() for b in self._record_blocks))
379
+ self._recordb_index_size = len(self._recordb_index)
380
+
381
+ def _write_key_sect(self, outfile):
382
+ # Writes the key section header, key block index, and all the key blocks to
383
+ # outfile.
384
+
385
+ # outfile: a file-like object, opened in binary mode.
386
+
387
+ keyblocks_total_size = sum(len(b.get_block()) for b in self._key_blocks)
388
+ if self._version == "2.0":
389
+ preamble = struct.pack(b">QQQQQ",
390
+ len(self._key_blocks),
391
+ self._num_entries,
392
+ self._keyb_index_decomp_size,
393
+ self._keyb_index_comp_size,
394
+ keyblocks_total_size)
395
+ preamble_checksum = struct.pack(b">L", zlib.adler32(preamble))
396
+ if(self._encrypt):
397
+ preamble = _salsa_encrypt(preamble, self._encrypt_key)
398
+ outfile.write(preamble)
399
+ outfile.write(preamble_checksum)
400
+ else:
401
+ preamble = struct.pack(b">LLLL",
402
+ len(self._key_blocks),
403
+ self._num_entries,
404
+ self._keyb_index_decomp_size,
405
+ keyblocks_total_size)
406
+ if(self._encrypt):
407
+ preamble = _salsa_encrypt(preamble, self._encrypt_key)
408
+ outfile.write(preamble)
409
+
410
+ outfile.write(self._keyb_index)
411
+ for b in self._key_blocks:
412
+ outfile.write(b.get_block())
413
+
414
+ def _write_record_sect(self, outfile):
415
+ # Writes the record section header, record block index, and all the record blocks
416
+ # to outfile.
417
+ #
418
+ # outfile: a file-like object, opened in binary mode.
419
+
420
+ recordblocks_total_size = sum(
421
+ (len(b.get_block()) for b in self._record_blocks))
422
+ if self._version == "2.0":
423
+ format = b">QQQQ"
424
+ else:
425
+ format = b">LLLL"
426
+ outfile.write(struct.pack(format,
427
+ len(self._record_blocks),
428
+ self._num_entries,
429
+ self._recordb_index_size,
430
+ recordblocks_total_size))
431
+ outfile.write(self._recordb_index)
432
+ for b in self._record_blocks:
433
+ outfile.write(b.get_block())
434
+
435
+ def write(self, outfile):
436
+ """
437
+ Write the mdx file to outfile.
438
+
439
+ outfile: a file-like object, opened in binary mode.
440
+ """
441
+
442
+ self._write_header(outfile)
443
+ self._write_key_sect(outfile)
444
+ self._write_record_sect(outfile)
445
+
446
+
447
+ def _write_header(self, f):
448
+ encrypted = 0
449
+ if self._encrypt_index:
450
+ encrypted = encrypted | 2
451
+ if self._encrypt:
452
+ encrypted = encrypted | 1
453
+
454
+ if self._encrypt and self._register_by == "email":
455
+ register_by_str = "EMail"
456
+ if self._user_email is not None:
457
+ regcode = encrypt_key(self._encrypt_key, email=self._user_email)
458
+ else:
459
+ regcode = ""
460
+ elif self._encrypt and self._register_by == "device_id":
461
+ register_by_str = "DeviceID"
462
+ if self._user_device_id is not None:
463
+ regcode = encrypt_key(self._encrypt_key, device_id=self._user_device_id)
464
+ else:
465
+ regcode = ""
466
+ else:
467
+ register_by_str = ""
468
+ regcode = ""
469
+
470
+ if not self._is_mdd:
471
+ header_string = (
472
+ """<Dictionary """
473
+ """GeneratedByEngineVersion="{version}" """
474
+ """RequiredEngineVersion="{version}" """
475
+ """Encrypted="{encrypted}" """
476
+ """Encoding="{encoding}" """
477
+ """Format="Html" """
478
+ """CreationDate="{date.year}-{date.month}-{date.day}" """
479
+ """Compact="No" """
480
+ """Compat="No" """
481
+ """KeyCaseSensitive="No" """
482
+ """Description="{description}" """
483
+ """Title="{title}" """
484
+ """DataSourceFormat="106" """
485
+ """StyleSheet="" """
486
+ """RegisterBy="{register_by_str}" """
487
+ """RegCode="{regcode}"/>\r\n\x00""").format(
488
+ version = self._version,
489
+ encrypted = encrypted,
490
+ encoding = self._encoding,
491
+ date = datetime.date.today(),
492
+ description=escape(self._description, quote=True),
493
+ title=escape(self._title, quote=True),
494
+ register_by_str=register_by_str,
495
+ regcode=regcode
496
+ ).encode("utf_16_le")
497
+ else:
498
+ header_string = (
499
+ """<Library_Data """
500
+ """GeneratedByEngineVersion="{version}" """
501
+ """RequiredEngineVersion="{version}" """
502
+ """Encrypted="{encrypted}" """
503
+ """Format="" """
504
+ """CreationDate="{date.year}-{date.month}-{date.day}" """
505
+ """Compact="No" """
506
+ """Compat="No" """
507
+ """KeyCaseSensitive="No" """
508
+ """Description="{description}" """
509
+ """Title="{title}" """
510
+ """DataSourceFormat="106" """
511
+ """StyleSheet="" """
512
+ """RegisterBy="{register_by_str}" """
513
+ """RegCode="{regcode}"/>\r\n\x00""").format(
514
+ version = self._version,
515
+ encrypted = encrypted,
516
+ date = datetime.date.today(),
517
+ description=escape(self._description, quote=True),
518
+ title=escape(self._title, quote=True),
519
+ register_by_str=register_by_str,
520
+ regcode=regcode
521
+ ).encode("utf_16_le")
522
+ f.write(struct.pack(b">L", len(header_string)))
523
+ f.write(header_string)
524
+ f.write(struct.pack(b"<L",zlib.adler32(header_string) & 0xffffffff))
525
+
526
+ class _MdxBlock(object):
527
+ # Abstract base class for _MdxRecordBlock and _MdxKeyBlock.
528
+ #
529
+ # In the MDX file format, the keyword section and the record section have a
530
+ # similar structure:
531
+ #
532
+ # section header
533
+ # index entry for block 0
534
+ # ...
535
+ # index entry for block k
536
+ # block 0
537
+ # ...
538
+ # block k
539
+ #
540
+ # This class represents one such block. It defines a common interface for
541
+ # record blocks and keyword blocks, to allow the two sections to
542
+ # be built in a uniform manner.
543
+ #
544
+
545
+ def __init__(self, offset_table, compression_type, version):
546
+ # Builds the data from offset_table.
547
+ #
548
+ # offset_table is a iterable containing _OffsetTableEntry objects.
549
+
550
+ decomp_data = b"".join(
551
+ type(self)._block_entry(t, version)
552
+ for t in offset_table)
553
+ self._decomp_size = len(decomp_data)
554
+ self._comp_data = _mdx_compress(decomp_data, compression_type)
555
+ self._comp_size = len(self._comp_data)
556
+ self._version = version
557
+
558
+ def get_block(self):
559
+ # Returns a bytes object, containing the data for this block.
560
+ return self._comp_data
561
+
562
+ def get_index_entry(self):
563
+ # Returns a bytes object, containing the entry for this block in the
564
+ # corresponding key block index or record block index.
565
+
566
+ raise NotImplementedError()
567
+
568
+ @staticmethod
569
+ def _block_entry(t, version):
570
+ # Returns the data corresponding to a single entry in offset.
571
+ #
572
+ # t is an _OffsetTableEntry object
573
+
574
+ raise NotImplementedError()
575
+
576
+ @staticmethod
577
+ def _len_block_entry(t):
578
+ # Should be approximately equal to len(_block_entry(t)).
579
+ #
580
+ # Used by MdxWriter._split_blocks() to determine where to split into blocks."""
581
+ raise NotImplementedError()
582
+
583
+ class _MdxRecordBlock(_MdxBlock):
584
+ # A class representing a record block.
585
+ #
586
+ # Has the ability to return (in the format suitable for insertion in an mdx file)
587
+ # both the block itself, as well as the entry in the record block index for that
588
+ # block.
589
+
590
+ def __init__(self, offset_table, compression_type, version):
591
+ # Builds the data for offset_table.
592
+ #
593
+ # offset_table is a iterable containing _OffsetTableEntry objects.
594
+ #
595
+ # Actually only uses the record parts.
596
+
597
+ _MdxBlock.__init__(self, offset_table, compression_type, version)
598
+
599
+ def get_index_entry(self):
600
+ # Returns a bytes object, containing the entry for this block in the record
601
+ # block index.
602
+
603
+ if self._version == "2.0":
604
+ format = b">QQ"
605
+ else:
606
+ format = b">LL"
607
+ return struct.pack(format, self._comp_size, self._decomp_size)
608
+
609
+ @staticmethod
610
+ def _block_entry(t, version):
611
+ return t.record_null
612
+
613
+ @staticmethod
614
+ def _len_block_entry(t):
615
+ return len(t.record_null)
616
+
617
+ class _MdxKeyBlock(_MdxBlock):
618
+ # A class representing a key block.
619
+ #
620
+ # Has the ability to return (in the format suitable for insertion in an mdx file)
621
+ # both the block itself, as well as the entry in the record block index for that
622
+ # block.
623
+ def __init__(self, offset_table, compression_type, version):
624
+ # Builds the data for offset_table.
625
+ #
626
+ # offset_table is a iterable containing _OffsetTableEntry objects.
627
+ #
628
+ # Only uses the key, key_len, key_null and offset fields, and effectively ignores record_null.
629
+
630
+ _MdxBlock.__init__(self, offset_table, compression_type, version)
631
+ self._num_entries = len(offset_table)
632
+ if version=="2.0":
633
+ self._first_key = offset_table[0].key_null
634
+ self._last_key = offset_table[len(offset_table)-1].key_null
635
+ else:
636
+ self._first_key = offset_table[0].key
637
+ self._last_key = offset_table[len(offset_table)-1].key
638
+ self._first_key_len = offset_table[0].key_len
639
+ self._last_key_len = offset_table[len(offset_table)-1].key_len
640
+
641
+ @staticmethod
642
+ def _block_entry(t, version):
643
+ if version == "2.0":
644
+ format = b">Q"
645
+ else:
646
+ format = b">L"
647
+ return struct.pack(format, t.offset)+t.key_null
648
+
649
+ @staticmethod
650
+ def _len_block_entry(t):
651
+ return 8 + len(t.key_null) #This is only accurate for version 2.0, but we only need approximate size anyway
652
+
653
+ def get_index_entry(self):
654
+ # Returns a bytes object, containing the header data for this block
655
+ if self._version == "2.0":
656
+ long_format = b">Q"
657
+ short_format = b">H"
658
+ else:
659
+ long_format = b">L"
660
+ short_format = b">B"
661
+ return (
662
+ struct.pack(long_format, self._num_entries)
663
+ + struct.pack(short_format, self._first_key_len)
664
+ + self._first_key
665
+ + struct.pack(short_format, self._last_key_len)
666
+ + self._last_key
667
+ + struct.pack(long_format, self._comp_size)
668
+ + struct.pack(long_format, self._decomp_size)
669
+ )
670
+
671
+
672
+
673
+
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: fstdtools
3
+ Version: 0.0.1
4
+ Summary: CLI tools for fstd dictionary to pack/unpack/list/info/convert.
5
+ Author-email: Moujie Qin <moujieqin@gmail.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: fstd>=0.1.3
10
+ Requires-Dist: click>=8.0
11
+ Requires-Dist: tqdm>=4.64.0
12
+ Dynamic: license-file
13
+
14
+ # fstdtools
15
+ A command line tool for fstd dictionary to pack/unpack/list/info/convert.
@@ -0,0 +1,16 @@
1
+ fstdtools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ fstdtools/__main__.py,sha256=vkUoLlkcsZRTbXSCvLBFR2M3YbgrVCZ6lmL1ioYYAtk,26
3
+ fstdtools/cli.py,sha256=jgBw5ML8wd_pXc2ShqfANCYRKmFRWUDZUrs9h-L3C40,6334
4
+ fstdtools/convert.py,sha256=-vtjBrTPm6bFvGwJkiUpVJC74XMbadDNYUf8H9j-5zA,2490
5
+ fstdtools/mdict/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ fstdtools/mdict/lzo.py,sha256=a_h-j97xwBw769qOtrKK23RlDLWdIR2V6C1ir0htkrs,6503
7
+ fstdtools/mdict/pureSalsa20.py,sha256=C8XmnkQJRdJqQ9ZSJwZ68Jr5jIdSwmad5Mvh5khp_uc,14236
8
+ fstdtools/mdict/readmdict.py,sha256=0nTqxMANy79CgBA7H26lTyzXwZle-YVNgefcYniqpC8,29522
9
+ fstdtools/mdict/ripemd128.py,sha256=wKvevXO17r6tpDDGI2UCydfMvyIKAiqmJ9QhYCkBdJw,3663
10
+ fstdtools/mdict/writemdict.py,sha256=VooTRwcFFnGSq3nL4wqbJteObIBYgjoCQ7Evp2z1H7c,22748
11
+ fstdtools-0.0.1.dist-info/licenses/LICENSE,sha256=TmFV_q9EIz5aZiT_DFSf6uKwdZYCryehAorcPXVciNM,1066
12
+ fstdtools-0.0.1.dist-info/METADATA,sha256=hL_bOeRH4ZDIfJScAmitdFuzY2MFdSYx6tWBjQPAyJk,448
13
+ fstdtools-0.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
14
+ fstdtools-0.0.1.dist-info/entry_points.txt,sha256=BqmW9Ge5iZg5WfqdBPTSp0uJ2deWqhv0Inzid6iXJ-8,48
15
+ fstdtools-0.0.1.dist-info/top_level.txt,sha256=feh-uj4hbmb7wF_o_Brxx9CjUeCIqT-xhQ21k9MjkA4,10
16
+ fstdtools-0.0.1.dist-info/RECORD,,