PyNeoFile 0.19.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyneofile.py ADDED
@@ -0,0 +1,1430 @@
1
+
2
+ # -*- coding: utf-8 -*-
3
+ from __future__ import print_function, unicode_literals, division, absolute_import
4
+
5
+ """
6
+ pyneofile.py — Alternate ArchiveFile core with Py2/3 compatible logic.
7
+
8
+ Features:
9
+ - Pack / unpack / repack / archive_to_array
10
+ - Validation and listing helpers (lowercase names)
11
+ - INI-driven format detection (prefers PYNEOFILE_INI / pyneofile.ini)
12
+ - Compression: zlib, gzip, bz2 (stdlib), xz/lzma when available (Py3)
13
+ - Size-based 'auto' compression policy
14
+ - Checksums (header/json/content) using stored bytes (padded CRC-32)
15
+ - Optional converters: ZIP/TAR (stdlib), RAR via rarfile, 7z via py7zr
16
+ - In-memory mode: bytes input, and bytes output when outfile is None/"-"
17
+ """
18
+
19
+ import os, sys, io, stat, time, json, binascii, hashlib, re, codecs
20
+ try:
21
+ from io import open as _iopen
22
+ except Exception:
23
+ _iopen = open # Py2 fallback
24
+
25
+ # ---------------- Python 2/3 shims ----------------
26
+ try:
27
+ basestring
28
+ except NameError:
29
+ basestring = (str,)
30
+
31
+ try:
32
+ unicode
33
+ except NameError:
34
+ unicode = str # Py3 alias
35
+
36
+ try:
37
+ from io import BytesIO
38
+ except ImportError:
39
+ from cStringIO import StringIO as BytesIO # Py2 fallback
40
+
41
+ # INI support (Py2/3)
42
+ try:
43
+ import configparser as _cfg
44
+ except Exception:
45
+ import ConfigParser as _cfg # Py2
46
+
47
+ # --------------- Compression shim (stdlib only) ---------------
48
+ import zlib, bz2, gzip
49
+ try:
50
+ import lzma as _lzma # Py3
51
+ _HAVE_LZMA = True
52
+ except Exception:
53
+ _lzma = None
54
+ _HAVE_LZMA = False
55
+
56
+ __program_name__ = "PyNeoFile"
57
+ __project__ = __program_name__
58
+ __project_url__ = "https://github.com/GameMaker2k/PyNeoFile"
59
+ __version_info__ = (0, 19, 8, "RC 1", 1)
60
+ __version_date_info__ = (2025, 8, 14, "RC 1", 1)
61
+ __version_date__ = str(__version_date_info__[0]) + "." + str(
62
+ __version_date_info__[1]).zfill(2) + "." + str(__version_date_info__[2]).zfill(2)
63
+ __revision__ = __version_info__[3]
64
+ __revision_id__ = "$Id: c7f8ce877ea9b59fca86a4110cc1eec3cdee518e $"
65
+ if(__version_info__[4] is not None):
66
+ __version_date_plusrc__ = __version_date__ + \
67
+ "-" + str(__version_date_info__[4])
68
+ if(__version_info__[4] is None):
69
+ __version_date_plusrc__ = __version_date__
70
+ if(__version_info__[3] is not None):
71
+ __version__ = str(__version_info__[0]) + "." + str(__version_info__[
72
+ 1]) + "." + str(__version_info__[2]) + " " + str(__version_info__[3])
73
+ if(__version_info__[3] is None):
74
+ __version__ = str(__version_info__[0]) + "." + str(__version_info__[1]) + "." + str(__version_info__[2])
75
+
76
+ def _normalize_algo(algo):
77
+ if not algo:
78
+ return 'none'
79
+ a = (algo or 'none').lower()
80
+ if a in ('xz', 'lzma'):
81
+ return 'xz'
82
+ if a in ('gz', 'gzip'):
83
+ return 'gzip'
84
+ if a in ('deflate', 'z'):
85
+ return 'zlib'
86
+ if a in ('bzip2', 'bzip', 'bz'):
87
+ return 'bz2'
88
+ if a == 'auto':
89
+ return 'auto'
90
+ return a
91
+
92
+ def _compress_bytes(data, algo='none', level=None):
93
+ """Return (stored_bytes, used_algo)."""
94
+ algo = _normalize_algo(algo)
95
+ if algo in ('none', ''):
96
+ return data, 'none'
97
+ if algo == 'zlib':
98
+ lvl = zlib.Z_DEFAULT_COMPRESSION if level is None else int(level)
99
+ return zlib.compress(data, lvl), 'zlib'
100
+ if algo == 'gzip':
101
+ bio = BytesIO()
102
+ gz = gzip.GzipFile(fileobj=bio, mode='wb', compresslevel=(6 if level is None else int(level)))
103
+ try:
104
+ gz.write(data)
105
+ finally:
106
+ gz.close()
107
+ return bio.getvalue(), 'gzip'
108
+ if algo == 'bz2':
109
+ if level is None:
110
+ return bz2.compress(data), 'bz2'
111
+ return bz2.compress(data, int(level)), 'bz2'
112
+ if algo == 'xz':
113
+ if not _HAVE_LZMA:
114
+ raise RuntimeError("xz/lzma compression not available on this Python (needs 3.x lzma)")
115
+ kw = {}
116
+ if level is not None:
117
+ kw['preset'] = int(level)
118
+ return _lzma.compress(data, **kw), 'xz'
119
+ raise ValueError("Unknown compression algorithm: %r" % algo)
120
+
121
+ def _decompress_bytes(data, algo='none'):
122
+ algo = _normalize_algo(algo)
123
+ if algo in ('none', ''):
124
+ return data
125
+ if algo == 'zlib':
126
+ return zlib.decompress(data)
127
+ if algo == 'gzip':
128
+ bio = BytesIO(data)
129
+ gz = gzip.GzipFile(fileobj=bio, mode='rb')
130
+ try:
131
+ return gz.read()
132
+ finally:
133
+ gz.close()
134
+ if algo == 'bz2':
135
+ return bz2.decompress(data)
136
+ if algo == 'xz':
137
+ if not _HAVE_LZMA:
138
+ raise RuntimeError("xz/lzma decompression not available on this Python (needs 3.x lzma)")
139
+ return _lzma.decompress(data)
140
+ raise ValueError("Unknown compression algorithm: %r" % algo)
141
+
142
+ # --- Auto compression policy thresholds (bytes) ---
143
+ _AUTO_XZ_MIN = 2 * 1024 * 1024 # >= 2 MiB → prefer xz (Py3 only)
144
+ _AUTO_BZ2_MIN = 256 * 1024 # >= 256 KiB → prefer bz2 (Py2 or Py3)
145
+ _AUTO_ZLIB_MIN = 16 * 1024 # >= 16 KiB → zlib; smaller often not worth compressing
146
+
147
+ def _auto_pick_for_size(size_bytes):
148
+ """Return ('none'|'zlib'|'gzip'|'bz2'|'xz', level_or_None)."""
149
+ if size_bytes < _AUTO_ZLIB_MIN:
150
+ return ('none', None)
151
+ if _HAVE_LZMA and size_bytes >= _AUTO_XZ_MIN:
152
+ return ('xz', 6)
153
+ if size_bytes >= _AUTO_BZ2_MIN:
154
+ return ('bz2', 9)
155
+ return ('zlib', 6)
156
+
157
+ # -----------------------------------------------------------------------------
158
+ # In-memory I/O helpers
159
+ # -----------------------------------------------------------------------------
160
+
161
+ def _wrap_infile(infile):
162
+ """Return (fp, close_me). Accepts path, file-like, or bytes/bytearray."""
163
+ if isinstance(infile, (bytes, bytearray, memoryview)):
164
+ return BytesIO(bytes(infile)), True
165
+ if hasattr(infile, 'read'):
166
+ return infile, False
167
+ return _iopen(infile, 'rb'), True
168
+
169
+ def _wrap_outfile(outfile):
170
+ """Return (fp, close_me, to_bytes). If outfile is None or '-', buffer to bytes."""
171
+ if outfile in (None, '-', b'-'):
172
+ bio = BytesIO()
173
+ return bio, False, True
174
+ if hasattr(outfile, 'write'):
175
+ return outfile, False, False
176
+ return _iopen(outfile, 'wb'), True, False
177
+
178
+ def _normalize_pack_inputs(infiles):
179
+ """Normalize in-memory inputs into items for pack_iter_neo.
180
+ Supported forms:
181
+ - dict {name: bytes_or_None} (None => directory if name endswith('/'))
182
+ - list/tuple of (name, bytes) or (name, is_dir, bytes_or_None) or dicts
183
+ - single bytes/bytearray => [('memory.bin', False, bytes)]
184
+ - anything else => None (caller will do filesystem walk)
185
+ """
186
+ if isinstance(infiles, dict):
187
+ items = []
188
+ for k, v in infiles.items():
189
+ name = str(k)
190
+ is_dir = bool(v is None or name.endswith('/'))
191
+ items.append({'name': name, 'is_dir': is_dir,
192
+ 'data': (None if is_dir else (bytes(v) if v is not None else b''))})
193
+ return items
194
+ if isinstance(infiles, (bytes, bytearray, memoryview)):
195
+ return [{'name': 'memory.bin', 'is_dir': False, 'data': bytes(infiles)}]
196
+ if isinstance(infiles, (list, tuple)) and infiles:
197
+ def _as_item(x):
198
+ if isinstance(x, dict):
199
+ return x
200
+ if isinstance(x, (list, tuple)):
201
+ if len(x) == 2:
202
+ n, b = x
203
+ return {'name': n, 'is_dir': False, 'data': (bytes(b) if b is not None else b'')}
204
+ if len(x) >= 3:
205
+ n, is_dir, b = x[0], bool(x[1]), x[2]
206
+ return {'name': n, 'is_dir': is_dir,
207
+ 'data': (None if is_dir else (bytes(b) if b is not None else b''))}
208
+ return None
209
+ items = []
210
+ for it in infiles:
211
+ conv = _as_item(it)
212
+ if conv is None:
213
+ return None
214
+ items.append(conv)
215
+ return items
216
+ return None
217
+
218
+ # ---------------- Format helpers ----------------
219
+ def _ver_digits(verstr):
220
+ """Keep numeric digits only; preserve '001' style."""
221
+ if not verstr:
222
+ return '001'
223
+ digits = ''.join([c for c in unicode(verstr) if c.isdigit()])
224
+ return digits or '001'
225
+
226
+ def _default_formatspecs():
227
+ return {
228
+ 'format_magic': 'ArchiveFile',
229
+ 'format_ver': '001',
230
+ 'format_delimiter': '\x00',
231
+ 'new_style': True,
232
+ }
233
+
234
+ __formatspecs_ini_cache__ = None
235
+
236
+ def _decode_delim_escape(s):
237
+ try:
238
+ return codecs.decode(s, 'unicode_escape')
239
+ except Exception:
240
+ return s
241
+
242
+ def _load_formatspecs_from_ini(paths=None, prefer_section=None):
243
+ """
244
+ Load format definition from an INI file.
245
+ Search order:
246
+ - explicit 'paths'
247
+ - env PYNEOFILE_INI, then PYARCHIVE_INI
248
+ - ./pyneofile.ini, ./archivefile.ini, ./catfile.ini, ./foxfile.ini
249
+ Section selection:
250
+ - prefer_section
251
+ - [config] default=... if present
252
+ - first non-[config] section
253
+ """
254
+ cands = []
255
+ if paths:
256
+ if isinstance(paths, basestring):
257
+ cands.append(paths)
258
+ else:
259
+ cands.extend(paths)
260
+ envp = os.environ.get('PYNEOFILE_INI') or os.environ.get('PYARCHIVE_INI')
261
+ if envp:
262
+ cands.append(envp)
263
+ cands.extend(['pyneofile.ini'])
264
+
265
+ picked = None
266
+ for p in cands:
267
+ if os.path.isfile(p):
268
+ picked = p; break
269
+ if not picked:
270
+ return None
271
+
272
+ try:
273
+ cp = _cfg.ConfigParser() if hasattr(_cfg, 'ConfigParser') else _cfg.RawConfigParser()
274
+ if hasattr(cp, 'read_file'):
275
+ with _iopen(picked, 'r') as fh:
276
+ cp.read_file(fh)
277
+ else:
278
+ cp.read(picked)
279
+ except Exception:
280
+ return None
281
+
282
+ sec = None
283
+ if prefer_section and cp.has_section(prefer_section):
284
+ sec = prefer_section
285
+ else:
286
+ defname = None
287
+ if cp.has_section('config'):
288
+ try:
289
+ defname = cp.get('config', 'default')
290
+ except Exception:
291
+ defname = None
292
+ if defname and cp.has_section(defname):
293
+ sec = defname
294
+ else:
295
+ for name in cp.sections():
296
+ if name.lower() != 'config':
297
+ sec = name; break
298
+ if not sec:
299
+ return None
300
+
301
+ def _get(name, default=None):
302
+ try:
303
+ return cp.get(sec, name)
304
+ except Exception:
305
+ return default
306
+
307
+ magic = _get('magic', 'NeoFile')
308
+ ver = _get('ver', '001')
309
+ delim = _get('delimiter', '\\x00')
310
+ newst = _get('newstyle', 'true')
311
+ ext = _get('extension', '.neo')
312
+
313
+ delim_real = _decode_delim_escape(delim)
314
+ ver_digits = _ver_digits(ver)
315
+
316
+ spec = {
317
+ 'format_magic': magic,
318
+ 'format_ver': ver_digits,
319
+ 'format_delimiter': delim_real,
320
+ 'new_style': (str(newst).lower() in ('1','true','yes','on')),
321
+ 'format_name': sec,
322
+ 'extension': ext,
323
+ }
324
+ return spec
325
+
326
+ def _ensure_formatspecs(specs):
327
+ global __formatspecs_ini_cache__
328
+ if specs:
329
+ return specs
330
+ if __formatspecs_ini_cache__ is None:
331
+ __formatspecs_ini_cache__ = _load_formatspecs_from_ini()
332
+ return __formatspecs_ini_cache__ or _default_formatspecs()
333
+
334
+ def _to_bytes(s):
335
+ if isinstance(s, bytes):
336
+ return s
337
+ if isinstance(s, (bytearray, memoryview)):
338
+ return bytes(s)
339
+ if not isinstance(s, basestring):
340
+ s = str(s)
341
+ return s.encode('UTF-8')
342
+
343
+ def _append_null(b, delim):
344
+ if not isinstance(b, bytes):
345
+ b = _to_bytes(b)
346
+ return b + _to_bytes(delim)
347
+
348
+ def _append_nulls(seq, delim):
349
+ out = b''
350
+ for x in seq:
351
+ out += _append_null(x, delim)
352
+ return out
353
+
354
+ def _hex(n):
355
+ return ("%x" % int(n)).lower()
356
+
357
+ def _crc32(data):
358
+ if not isinstance(data, bytes):
359
+ data = _to_bytes(data)
360
+ return ("%08x" % (binascii.crc32(data) & 0xffffffff)).lower()
361
+
362
+ def _sha_like(name, data):
363
+ if not isinstance(data, bytes):
364
+ data = _to_bytes(data)
365
+ try:
366
+ h = hashlib.new(name)
367
+ except ValueError:
368
+ raise ValueError("Unsupported checksum: %r" % name)
369
+ h.update(data)
370
+ return h.hexdigest()
371
+
372
+ def _checksum(data, cstype, text=False):
373
+ if cstype in (None, '', 'none'):
374
+ return '0'
375
+ if text and not isinstance(data, bytes):
376
+ data = _to_bytes(data)
377
+ if (cstype or '').lower() == 'crc32':
378
+ return _crc32(data)
379
+ return _sha_like(cstype.lower(), data)
380
+
381
+ # ---------------- Header builders ----------------
382
+ def _write_global_header(fp, numfiles, encoding, checksumtype, extradata, formatspecs):
383
+ delim = formatspecs['format_delimiter']
384
+ magic = formatspecs['format_magic']
385
+ ver_digits = _ver_digits(formatspecs.get('format_ver','001'))
386
+
387
+ # extras blob: count + items
388
+ if isinstance(extradata, dict) and extradata:
389
+ payload = json.dumps(extradata, separators=(',', ':')).encode('UTF-8')
390
+ try:
391
+ import base64
392
+ extradata = [base64.b64encode(payload).decode('UTF-8')]
393
+ except Exception:
394
+ extradata = []
395
+ elif isinstance(extradata, dict):
396
+ extradata = []
397
+
398
+ extrafields = _hex(len(extradata))
399
+ extras_blob = _append_null(extrafields, delim)
400
+ if extradata:
401
+ extras_blob += _append_nulls(extradata, delim)
402
+ extras_size_hex = _hex(len(extras_blob))
403
+
404
+ platform_name = os.name if os.name in ('nt', 'posix') else sys.platform
405
+ fnumfiles_hex = _hex(int(numfiles))
406
+
407
+ tmpoutlist = [encoding, platform_name, fnumfiles_hex, extras_size_hex, extrafields]
408
+ tmpoutlen = 3 + len(tmpoutlist) + len(extradata) + 1 # compatibility
409
+ tmpoutlen_hex = _hex(tmpoutlen)
410
+
411
+ body = _append_nulls([tmpoutlen_hex, encoding, platform_name, fnumfiles_hex, extras_size_hex, extrafields], delim)
412
+ if extradata:
413
+ body += _append_nulls(extradata, delim)
414
+ body += _append_null(checksumtype, delim)
415
+
416
+ prefix = _append_null(magic + ver_digits, delim)
417
+ tmpfileoutstr = body + _append_null('', delim)
418
+ headersize_hex = _hex(len(tmpfileoutstr) - len(_to_bytes(delim)))
419
+ out = prefix + _append_null(headersize_hex, delim) + body
420
+ header_cs = _checksum(out, checksumtype, text=True)
421
+ out += _append_null(header_cs, delim)
422
+ fp.write(out)
423
+
424
+ def _build_file_header_bytes(filemeta, jsondata, content_bytes_stored, checksumtypes, extradata, formatspecs):
425
+ """Return full bytes for a record (header+json+NUL+content+NUL)."""
426
+ delim = formatspecs['format_delimiter']
427
+ def H(x): return _hex(int(x))
428
+
429
+ fname = filemeta['fname']
430
+ if not re.match(r'^[\./]', fname):
431
+ fname = './' + fname
432
+
433
+ fields = [
434
+ H(filemeta.get('ftype', 0)),
435
+ filemeta.get('fencoding', 'UTF-8'),
436
+ filemeta.get('fcencoding', 'UTF-8'),
437
+ fname,
438
+ filemeta.get('flinkname', ''),
439
+ H(filemeta.get('fsize', 0)),
440
+ H(filemeta.get('fatime', int(time.time()))),
441
+ H(filemeta.get('fmtime', int(time.time()))),
442
+ H(filemeta.get('fctime', int(time.time()))),
443
+ H(filemeta.get('fbtime', int(time.time()))),
444
+ H(filemeta.get('fmode', stat.S_IFREG | 0o666)),
445
+ H(filemeta.get('fwinattributes', 0)),
446
+ filemeta.get('fcompression', ''),
447
+ H(filemeta.get('fcsize', 0)),
448
+ H(filemeta.get('fuid', 0)),
449
+ filemeta.get('funame', ''),
450
+ H(filemeta.get('fgid', 0)),
451
+ filemeta.get('fgname', ''),
452
+ H(filemeta.get('fid', filemeta.get('index', 0))),
453
+ H(filemeta.get('finode', filemeta.get('index', 0))),
454
+ H(filemeta.get('flinkcount', 1)),
455
+ H(filemeta.get('fdev', 0)),
456
+ H(filemeta.get('fdev_minor', 0)),
457
+ H(filemeta.get('fdev_major', 0)),
458
+ "+" + str(len(delim)),
459
+ ]
460
+
461
+ # JSON payload
462
+ fjsontype = 'json' if jsondata else 'none'
463
+ if jsondata:
464
+ raw_json = json.dumps(jsondata, separators=(',', ':')).encode('UTF-8')
465
+ json_cs_type = checksumtypes[2]
466
+ fjsonlen_hex = _hex(len(jsondata) if hasattr(jsondata, '__len__') else 0)
467
+ fjsonsize_hex = _hex(len(raw_json))
468
+ fjsoncs = _checksum(raw_json, json_cs_type, text=True)
469
+ else:
470
+ raw_json = b''
471
+ json_cs_type = 'none'
472
+ fjsonlen_hex = '0'
473
+ fjsonsize_hex = '0'
474
+ fjsoncs = '0'
475
+
476
+ # extras (mirrors global)
477
+ if isinstance(extradata, dict) and extradata:
478
+ payload = json.dumps(extradata, separators=(',', ':')).encode('UTF-8')
479
+ try:
480
+ import base64
481
+ extradata = [base64.b64encode(payload).decode('UTF-8')]
482
+ except Exception:
483
+ extradata = []
484
+ elif isinstance(extradata, dict):
485
+ extradata = []
486
+
487
+ extrafields = _hex(len(extradata))
488
+ extras_blob = _append_null(extrafields, delim)
489
+ if extradata:
490
+ extras_blob += _append_nulls(extradata, delim)
491
+ extras_size_hex = _hex(len(extras_blob))
492
+
493
+ rec_fields = []
494
+ rec_fields.extend(fields)
495
+ rec_fields.extend([fjsontype, fjsonlen_hex, fjsonsize_hex, json_cs_type, fjsoncs])
496
+ rec_fields.extend([extras_size_hex, extrafields])
497
+ if extradata:
498
+ rec_fields.extend(extradata)
499
+
500
+ header_cs_type = checksumtypes[0]
501
+ content_cs_type = checksumtypes[1] if len(content_bytes_stored) > 0 else 'none'
502
+ rec_fields.extend([header_cs_type, content_cs_type])
503
+
504
+ record_fields_len_hex = _hex(len(rec_fields) + 2) # include two checksum VALUE fields
505
+ header_no_cs = _append_nulls(rec_fields, delim)
506
+
507
+ tmp_with_placeholders = _append_null(record_fields_len_hex, delim) + header_no_cs
508
+ tmp_with_placeholders += _append_null('', delim) + _append_null('', delim)
509
+ headersize_hex = _hex(len(tmp_with_placeholders) - len(_to_bytes(delim)))
510
+
511
+ header_with_sizes = _append_null(headersize_hex, delim) + _append_null(record_fields_len_hex, delim) + header_no_cs
512
+
513
+ header_checksum = _checksum(header_with_sizes, header_cs_type, text=True)
514
+ content_checksum = _checksum(content_bytes_stored, content_cs_type, text=False)
515
+
516
+ header_full = header_with_sizes + _append_nulls([header_checksum, content_checksum], delim)
517
+
518
+ out = header_full + raw_json + _to_bytes(delim) + content_bytes_stored + _to_bytes(delim)
519
+ return out
520
+
521
+ # --------------- Reader helpers ---------------
522
+ def _read_cstring(fp, delim):
523
+ d = _to_bytes(delim)
524
+ out = []
525
+ while True:
526
+ b = fp.read(1)
527
+ if not b:
528
+ break
529
+ out.append(b)
530
+ if len(out) >= len(d) and b''.join(out[-len(d):]) == d:
531
+ return b''.join(out[:-len(d)])
532
+ return b''
533
+
534
+ def _read_fields(fp, n, delim):
535
+ fields = []
536
+ for _ in range(int(n)):
537
+ fields.append(_read_cstring(fp, delim).decode('UTF-8'))
538
+ return fields
539
+
540
+ def _parse_global_header(fp, formatspecs, skipchecksum=False):
541
+ delim = formatspecs['format_delimiter']
542
+ magicver = _read_cstring(fp, delim).decode('UTF-8')
543
+ _ = _read_cstring(fp, delim) # headersize_hex
544
+
545
+ tmpoutlenhex = _read_cstring(fp, delim).decode('UTF-8')
546
+ fencoding = _read_cstring(fp, delim).decode('UTF-8')
547
+ fostype = _read_cstring(fp, delim).decode('UTF-8')
548
+ fnumfiles = int(_read_cstring(fp, delim).decode('UTF-8') or '0', 16)
549
+ _ = _read_cstring(fp, delim) # extras_size
550
+ extrafields = int(_read_cstring(fp, delim).decode('UTF-8') or '0', 16)
551
+ extras = []
552
+ for _i in range(extrafields):
553
+ extras.append(_read_cstring(fp, delim).decode('UTF-8'))
554
+ checksumtype = _read_cstring(fp, delim).decode('UTF-8')
555
+ _header_cs = _read_cstring(fp, delim).decode('UTF-8')
556
+ return {'fencoding': fencoding, 'fnumfiles': fnumfiles, 'fostype': fostype,
557
+ 'fextradata': extras, 'fchecksumtype': checksumtype,
558
+ 'ffilelist': [], 'fformatspecs': formatspecs}
559
+
560
+ def _index_json_and_checks(vals):
561
+ """Index JSON meta and checksum positions for a header field list `vals`."""
562
+ def _is_hex(s):
563
+ return bool(s) and all(c in '0123456789abcdefABCDEF' for c in s)
564
+
565
+ if len(vals) < 25:
566
+ raise ValueError("Record too short to index JSON/checksum meta; got %d fields" % len(vals))
567
+
568
+ idx = 25
569
+ fjsontype = vals[idx]; idx += 1
570
+
571
+ v2 = vals[idx] if idx < len(vals) else ''
572
+ v3 = vals[idx + 1] if idx + 1 < len(vals) else ''
573
+ v4 = vals[idx + 2] if idx + 2 < len(vals) else ''
574
+
575
+ cs_candidates = set(['none','crc32','md5','sha1','sha224','sha256','sha384','sha512','blake2b','blake2s'])
576
+
577
+ if _is_hex(v2) and _is_hex(v3) and v4.lower() in cs_candidates:
578
+ idx_json_type = idx - 1
579
+ idx_json_len = idx
580
+ idx_json_size = idx + 1
581
+ idx_json_cst = idx + 2
582
+ idx_json_cs = idx + 3
583
+ idx += 4
584
+ else:
585
+ idx_json_type = idx - 1
586
+ idx_json_len = None
587
+ idx_json_size = idx
588
+ idx_json_cst = idx + 1
589
+ idx_json_cs = idx + 2
590
+ idx += 3
591
+
592
+ if idx + 2 > len(vals):
593
+ raise ValueError("Missing extras header fields")
594
+
595
+ idx_extras_size = idx
596
+ idx_extras_count = idx + 1
597
+ try:
598
+ count_int = int(vals[idx_extras_count] or '0', 16)
599
+ except Exception:
600
+ raise ValueError("Extras count not hex; got %r" % vals[idx_extras_count])
601
+ idx = idx + 2 + count_int
602
+
603
+ if idx + 4 > len(vals):
604
+ raise ValueError("Missing checksum types/values in header")
605
+
606
+ idx_header_cs_type = idx
607
+ idx_content_cs_type = idx + 1
608
+ idx_header_cs = idx + 2
609
+ idx_content_cs = idx + 3
610
+
611
+ return {
612
+ 'json': (idx_json_type, idx_json_len, idx_json_size, idx_json_cst, idx_json_cs),
613
+ 'cstypes': (idx_header_cs_type, idx_content_cs_type),
614
+ 'csvals': (idx_header_cs, idx_content_cs),
615
+ }
616
+
617
+ def _parse_record(fp, formatspecs, listonly=False, skipchecksum=False, uncompress=True):
618
+ delim = formatspecs['format_delimiter']
619
+ dbytes = _to_bytes(delim)
620
+
621
+ first = _read_cstring(fp, delim)
622
+ if first == b'0':
623
+ second = _read_cstring(fp, delim)
624
+ if second == b'0':
625
+ return None
626
+ headersize_hex = first.decode('UTF-8')
627
+ fields_len_hex = second.decode('UTF-8')
628
+ else:
629
+ headersize_hex = first.decode('UTF-8')
630
+ fields_len_hex = _read_cstring(fp, delim).decode('UTF-8')
631
+
632
+ try:
633
+ n_fields = int(fields_len_hex, 16)
634
+ except Exception:
635
+ raise ValueError("Bad record field-count hex: %r" % fields_len_hex)
636
+
637
+ vals = _read_fields(fp, n_fields, delim)
638
+ if len(vals) < 25:
639
+ raise ValueError("Record too short: expected >=25 header fields, got %d" % len(vals))
640
+
641
+ (ftypehex, fencoding, fcencoding, fname, flinkname,
642
+ fsize_hex, fatime_hex, fmtime_hex, fctime_hex, fbtime_hex,
643
+ fmode_hex, fwinattrs_hex, fcompression, fcsize_hex,
644
+ fuid_hex, funame, fgid_hex, fgname, fid_hex, finode_hex,
645
+ flinkcount_hex, fdev_hex, fdev_minor_hex, fdev_major_hex,
646
+ fseeknextfile) = vals[:25]
647
+
648
+ idx = _index_json_and_checks(vals)
649
+ (idx_json_type, idx_json_len, idx_json_size, idx_json_cst, idx_json_cs) = idx['json']
650
+ (idx_header_cs_type, idx_content_cs_type) = idx['cstypes']
651
+ (idx_header_cs, idx_content_cs) = idx['csvals']
652
+
653
+ fjsonsize_hex = vals[idx_json_size] or '0'
654
+ try:
655
+ fjsonsize = int(fjsonsize_hex, 16)
656
+ except Exception:
657
+ raise ValueError("Bad JSON size hex: %r" % fjsonsize_hex)
658
+
659
+ json_bytes = fp.read(fjsonsize)
660
+ fp.read(len(dbytes))
661
+
662
+ # Read content (stored bytes)
663
+ fsize = int(fsize_hex, 16)
664
+ fcsize = int(fcsize_hex, 16)
665
+ read_size = fcsize if (fcompression not in ('', 'none', 'auto') and fcsize > 0) else fsize
666
+
667
+ content_stored = b''
668
+ if read_size:
669
+ if listonly:
670
+ fp.seek(read_size, io.SEEK_CUR)
671
+ else:
672
+ content_stored = fp.read(read_size)
673
+ fp.read(len(dbytes))
674
+
675
+ # Verify checksums (header json/content)
676
+ header_cs_type = vals[idx_header_cs_type]
677
+ content_cs_type = vals[idx_content_cs_type]
678
+ header_cs_val = vals[idx_header_cs]
679
+ content_cs_val = vals[idx_content_cs]
680
+ json_cs_type = vals[idx_json_cst]
681
+ json_cs_val = vals[idx_json_cs]
682
+
683
+ if fjsonsize and not skipchecksum:
684
+ if _checksum(json_bytes, json_cs_type, text=True) != json_cs_val:
685
+ raise ValueError("JSON checksum mismatch for %s" % fname)
686
+
687
+ if not skipchecksum and read_size and not listonly:
688
+ if _checksum(content_stored, content_cs_type, text=False) != content_cs_val:
689
+ raise ValueError("Content checksum mismatch for %s" % fname)
690
+
691
+ # Optionally decompress for returned content
692
+ content_ret = content_stored
693
+ if not listonly and uncompress and fcompression not in ('', 'none', 'auto'):
694
+ try:
695
+ content_ret = _decompress_bytes(content_stored, fcompression)
696
+ except RuntimeError:
697
+ content_ret = content_stored
698
+
699
+ if not re.match(r'^[\./]', fname):
700
+ fname = './' + fname
701
+
702
+ return {
703
+ 'fid': int(fid_hex, 16),
704
+ 'finode': int(finode_hex, 16),
705
+ 'fname': fname,
706
+ 'flinkname': flinkname,
707
+ 'ftype': int(ftypehex, 16),
708
+ 'fsize': fsize,
709
+ 'fcsize': fcsize,
710
+ 'fatime': int(fatime_hex, 16),
711
+ 'fmtime': int(fmtime_hex, 16),
712
+ 'fctime': int(fctime_hex, 16),
713
+ 'fbtime': int(fbtime_hex, 16),
714
+ 'fmode': int(fmode_hex, 16),
715
+ 'fwinattributes': int(fwinattrs_hex, 16),
716
+ 'fuid': int(fuid_hex, 16),
717
+ 'funame': funame,
718
+ 'fgid': int(fgid_hex, 16),
719
+ 'fgname': fgname,
720
+ 'fcompression': fcompression,
721
+ 'fseeknext': fseeknextfile,
722
+ 'fjson': (json.loads(json_bytes.decode('UTF-8') or 'null') if fjsonsize else {}),
723
+ 'fcontent': (None if listonly else content_ret),
724
+ }
725
+
726
+ # ---------------- Public API ----------------
727
+ def pack_neo(infiles, outfile=None, formatspecs=None,
728
+ checksumtypes=("crc32","crc32","crc32"),
729
+ encoding="UTF-8",
730
+ compression="auto",
731
+ compression_level=None):
732
+ """Pack files/dirs to an archive file or return bytes when outfile is None/'-'."""
733
+ fs = _ensure_formatspecs(formatspecs)
734
+ delim = fs['format_delimiter']
735
+
736
+ # In-memory sources?
737
+ items = _normalize_pack_inputs(infiles)
738
+ if items is not None:
739
+ return pack_iter_neo(items, outfile, formatspecs=fs,
740
+ checksumtypes=checksumtypes, encoding=encoding,
741
+ compression=compression, compression_level=compression_level)
742
+
743
+ if isinstance(infiles, basestring):
744
+ paths = [infiles]
745
+ else:
746
+ paths = list(infiles)
747
+
748
+ # Build file list (dirs recursively)
749
+ filelist = []
750
+ base_dir = None
751
+ if len(paths) == 1 and os.path.isdir(paths[0]):
752
+ base_dir = os.path.abspath(paths[0])
753
+ for p in paths:
754
+ if os.path.isdir(p):
755
+ for root, dirs, files in os.walk(p):
756
+ filelist.append((os.path.join(root, ''), True))
757
+ for name in files:
758
+ filelist.append((os.path.join(root, name), False))
759
+ else:
760
+ filelist.append((p, False))
761
+
762
+ # open destination
763
+ fp, close_me, to_bytes = _wrap_outfile(outfile)
764
+
765
+ try:
766
+ _write_global_header(fp, len(filelist), encoding, checksumtypes[0], extradata=[], formatspecs=fs)
767
+
768
+ fid = 0
769
+ for apath, is_dir in filelist:
770
+ st = os.lstat(apath)
771
+ mode = st.st_mode
772
+ if is_dir or stat.S_ISDIR(mode):
773
+ raw = b''
774
+ ftype = 5
775
+ else:
776
+ with _iopen(apath, 'rb') as f:
777
+ raw = f.read()
778
+ ftype = 0
779
+
780
+ # Decide compression
781
+ algo = _normalize_algo(compression)
782
+ if algo == 'auto':
783
+ algo, auto_level = _auto_pick_for_size(len(raw))
784
+ level = compression_level if compression_level is not None else auto_level
785
+ else:
786
+ level = compression_level
787
+
788
+ try:
789
+ stored_bytes, used_algo = _compress_bytes(raw, algo, level=level)
790
+ except RuntimeError:
791
+ stored_bytes, used_algo = _compress_bytes(raw, 'zlib', level=(6 if level is None else level))
792
+
793
+ meta = {
794
+ 'ftype': ftype,
795
+ 'fencoding': encoding,
796
+ 'fcencoding': encoding,
797
+ 'fname': './' + os.path.relpath(apath).replace('\\', '/') if not re.match(r'^[\./]', apath) else apath,
798
+ 'flinkname': '',
799
+ 'fsize': len(raw),
800
+ 'fatime': int(getattr(st, 'st_atime', time.time())),
801
+ 'fmtime': int(getattr(st, 'st_mtime', time.time())),
802
+ 'fctime': int(getattr(st, 'st_ctime', time.time())),
803
+ 'fbtime': int(getattr(st, 'st_mtime', time.time())),
804
+ 'fmode': int(mode),
805
+ 'fwinattributes': 0,
806
+ 'fcompression': used_algo,
807
+ 'fcsize': len(stored_bytes),
808
+ 'fuid': int(getattr(st, 'st_uid', 0)),
809
+ 'funame': '',
810
+ 'fgid': int(getattr(st, 'st_gid', 0)),
811
+ 'fgname': '',
812
+ 'fid': fid,
813
+ 'finode': int(getattr(st, 'st_ino', fid)),
814
+ 'flinkcount': int(getattr(st, 'st_nlink', 1)),
815
+ 'fdev': int(getattr(st, 'st_dev', 0)),
816
+ 'fdev_minor': 0,
817
+ 'fdev_major': 0,
818
+ 'index': fid,
819
+ }
820
+ fid += 1
821
+
822
+ rec = _build_file_header_bytes(meta, jsondata={}, content_bytes_stored=stored_bytes,
823
+ checksumtypes=checksumtypes, extradata=[], formatspecs=fs)
824
+ fp.write(rec)
825
+
826
+ # end marker
827
+ fp.write(_append_nulls(['0','0'], fs['format_delimiter']))
828
+ if to_bytes:
829
+ return fp.getvalue()
830
+ finally:
831
+ if close_me:
832
+ fp.close()
833
+
834
+ def archive_to_array_neo(infile, formatspecs=None,
835
+ listonly=False, skipchecksum=False, uncompress=True):
836
+ fs = _ensure_formatspecs(formatspecs)
837
+ fp, close_me = _wrap_infile(infile)
838
+ try:
839
+ top = _parse_global_header(fp, fs, skipchecksum=skipchecksum)
840
+ while True:
841
+ rec = _parse_record(fp, fs, listonly=listonly, skipchecksum=skipchecksum, uncompress=uncompress)
842
+ if rec is None:
843
+ break
844
+ top['ffilelist'].append(rec)
845
+ return top
846
+ finally:
847
+ if close_me:
848
+ fp.close()
849
+
850
+ def unpack_neo(infile, outdir='.', formatspecs=None, skipchecksum=False, uncompress=True):
851
+ arr = archive_to_array_neo(infile, formatspecs=formatspecs, listonly=False, skipchecksum=skipchecksum, uncompress=uncompress)
852
+ if not arr:
853
+ return False
854
+
855
+ # In-memory extraction
856
+ if outdir in (None, '-', b'-'):
857
+ result = {}
858
+ for ent in arr['ffilelist']:
859
+ if ent['ftype'] == 5:
860
+ result[ent['fname']] = None
861
+ else:
862
+ result[ent['fname']] = ent.get('fcontent') or b''
863
+ return result
864
+
865
+ if not os.path.isdir(outdir):
866
+ if os.path.exists(outdir):
867
+ raise IOError("not a directory: %r" % outdir)
868
+ os.makedirs(outdir)
869
+ for ent in arr['ffilelist']:
870
+ path = os.path.join(outdir, ent['fname'].lstrip('./'))
871
+ if ent['ftype'] == 5: # directory
872
+ if not os.path.isdir(path):
873
+ os.makedirs(path)
874
+ continue
875
+ d = os.path.dirname(path)
876
+ if d and not os.path.isdir(d):
877
+ os.makedirs(d)
878
+ with _iopen(path, 'wb') as f:
879
+ f.write(ent.get('fcontent') or b'')
880
+ try:
881
+ os.chmod(path, ent.get('fmode', 0o666))
882
+ except Exception:
883
+ pass
884
+ return True
885
+
886
+ def repack_neo(infile, outfile=None, formatspecs=None,
887
+ checksumtypes=("crc32","crc32","crc32"),
888
+ compression="auto",
889
+ compression_level=None):
890
+ arr = archive_to_array_neo(infile, formatspecs=formatspecs, listonly=False, skipchecksum=False, uncompress=False)
891
+ fs = _ensure_formatspecs(formatspecs)
892
+ fp, close_me, to_bytes = _wrap_outfile(outfile)
893
+ try:
894
+ _write_global_header(fp, len(arr['ffilelist']), arr.get('fencoding', 'UTF-8'), checksumtypes[0],
895
+ extradata=arr.get('fextradata', []), formatspecs=fs)
896
+ for i, ent in enumerate(arr['ffilelist']):
897
+ src_algo = _normalize_algo(ent.get('fcompression', 'none'))
898
+ dst_algo = _normalize_algo(compression)
899
+
900
+ stored_src = ent.get('fcontent') or b'' # we requested uncompress=False, so this is stored bytes
901
+
902
+ if dst_algo == 'auto':
903
+ try:
904
+ raw = _decompress_bytes(stored_src, src_algo) if src_algo != 'none' else stored_src
905
+ except RuntimeError:
906
+ raw = stored_src
907
+ dst_algo, dst_level = _auto_pick_for_size(len(raw))
908
+ else:
909
+ if src_algo != 'none':
910
+ try:
911
+ raw = _decompress_bytes(stored_src, src_algo)
912
+ except RuntimeError:
913
+ raw = stored_src
914
+ else:
915
+ raw = stored_src
916
+ dst_level = compression_level
917
+
918
+ if dst_algo == src_algo or (dst_algo == 'none' and src_algo == 'none'):
919
+ stored_bytes = stored_src
920
+ used_algo = src_algo
921
+ try:
922
+ raw_len = len(_decompress_bytes(stored_src, src_algo)) if src_algo != 'none' else len(stored_src)
923
+ except RuntimeError:
924
+ raw_len = len(stored_src)
925
+ else:
926
+ stored_bytes, used_algo = _compress_bytes(raw, dst_algo, level=dst_level)
927
+ raw_len = len(raw)
928
+
929
+ meta = {
930
+ 'ftype': ent['ftype'],
931
+ 'fencoding': arr.get('fencoding', 'UTF-8'),
932
+ 'fcencoding': arr.get('fencoding', 'UTF-8'),
933
+ 'fname': ent['fname'],
934
+ 'flinkname': ent.get('flinkname',''),
935
+ 'fsize': raw_len,
936
+ 'fatime': ent.get('fatime', int(time.time())),
937
+ 'fmtime': ent.get('fmtime', int(time.time())),
938
+ 'fctime': ent.get('fctime', int(time.time())),
939
+ 'fbtime': ent.get('fbtime', int(time.time())),
940
+ 'fmode': ent.get('fmode', stat.S_IFREG | 0o666),
941
+ 'fwinattributes': ent.get('fwinattributes', 0),
942
+ 'fcompression': used_algo,
943
+ 'fcsize': len(stored_bytes),
944
+ 'fuid': ent.get('fuid', 0),
945
+ 'funame': ent.get('funame', ''),
946
+ 'fgid': ent.get('fgid', 0),
947
+ 'fgname': ent.get('fgname', ''),
948
+ 'fid': ent.get('fid', i),
949
+ 'finode': ent.get('finode', i),
950
+ 'flinkcount': ent.get('flinkcount', 1),
951
+ 'fdev': ent.get('fdev', 0),
952
+ 'fdev_minor': ent.get('fdev_minor', 0),
953
+ 'fdev_major': ent.get('fdev_major', 0),
954
+ 'index': i,
955
+ }
956
+ rec = _build_file_header_bytes(meta, jsondata=ent.get('fjson', {}), content_bytes_stored=stored_bytes,
957
+ checksumtypes=checksumtypes, extradata=[], formatspecs=fs)
958
+ fp.write(rec)
959
+ fp.write(_append_nulls(['0','0'], fs['format_delimiter']))
960
+ if to_bytes:
961
+ return fp.getvalue()
962
+ finally:
963
+ if close_me:
964
+ fp.close()
965
+
966
+ # -----------------------------------------------------------------------------
967
+ # Alt validation and listing helpers (lowercase names for consistency)
968
+ # -----------------------------------------------------------------------------
969
+
970
+ def _read_record_raw(fp, formatspecs):
971
+ """Low-level read of a single record returning header fields and stored blobs."""
972
+ delim = formatspecs['format_delimiter']
973
+ dbytes = _to_bytes(delim)
974
+
975
+ first = _read_cstring(fp, delim)
976
+ if first == b'0':
977
+ second = _read_cstring(fp, delim)
978
+ if second == b'0':
979
+ return None
980
+ headersize_hex = first.decode('UTF-8')
981
+ fields_len_hex = second.decode('UTF-8')
982
+ else:
983
+ headersize_hex = first.decode('UTF-8')
984
+ fields_len_hex = _read_cstring(fp, delim).decode('UTF-8')
985
+
986
+ try:
987
+ n_fields = int(fields_len_hex, 16)
988
+ except Exception:
989
+ raise ValueError("Bad record field-count hex: %r" % fields_len_hex)
990
+
991
+ vals = _read_fields(fp, n_fields, delim)
992
+ idxs = _index_json_and_checks(vals)
993
+ (idx_json_type, idx_json_len, idx_json_size, idx_json_cst, idx_json_cs) = idxs['json']
994
+
995
+ fjsonsize_hex = vals[idx_json_size] or '0'
996
+ try:
997
+ fjsonsize = int(fjsonsize_hex, 16)
998
+ except Exception:
999
+ raise ValueError("Bad JSON size hex: %r" % fjsonsize_hex)
1000
+
1001
+ json_bytes = fp.read(fjsonsize)
1002
+ fp.read(len(dbytes))
1003
+
1004
+ fcompression = vals[12]
1005
+ fsize_hex = vals[5]
1006
+ fcsize_hex = vals[13]
1007
+ fsize = int(fsize_hex, 16)
1008
+ fcsize = int(fcsize_hex, 16)
1009
+ read_size = fcsize if (fcompression not in ('', 'none', 'auto') and fcsize > 0) else fsize
1010
+ content_stored = b''
1011
+ if read_size:
1012
+ content_stored = fp.read(read_size)
1013
+ fp.read(len(dbytes))
1014
+
1015
+ return headersize_hex, fields_len_hex, vals, json_bytes, content_stored
1016
+
1017
+ def archivefilevalidate_neo(infile, formatspecs=None, verbose=False, return_details=False):
1018
+ """Validate an ArchiveFile using the alt parser."""
1019
+ fs = _ensure_formatspecs(formatspecs)
1020
+ details = []
1021
+ ok_all = True
1022
+
1023
+ fp, close_me = _wrap_infile(infile)
1024
+ try:
1025
+ _ = _parse_global_header(fp, fs, skipchecksum=False)
1026
+ idx = 0
1027
+ while True:
1028
+ raw = _read_record_raw(fp, fs)
1029
+ if raw is None:
1030
+ break
1031
+ headersize_hex, fields_len_hex, vals, json_bytes, content_stored = raw
1032
+
1033
+ idxs = _index_json_and_checks(vals)
1034
+ (idx_json_type, idx_json_len, idx_json_size, idx_json_cst, idx_json_cs) = idxs['json']
1035
+ (idx_header_cs_type, idx_content_cs_type) = idxs['cstypes']
1036
+ (idx_header_cs, idx_content_cs) = idxs['csvals']
1037
+
1038
+ fname = vals[3]
1039
+ header_cs_type = vals[idx_header_cs_type]
1040
+ content_cs_type = vals[idx_content_cs_type]
1041
+ header_cs_val = vals[idx_header_cs]
1042
+ content_cs_val = vals[idx_content_cs]
1043
+ json_cs_type = vals[idx_json_cst]
1044
+ json_cs_val = vals[idx_json_cs]
1045
+
1046
+ delim = fs['format_delimiter']
1047
+ header_bytes = _append_null(headersize_hex, delim) + _append_null(fields_len_hex, delim) + _append_nulls(vals[:-2], delim)
1048
+ computed_hcs = _checksum(header_bytes, header_cs_type, text=True)
1049
+ h_ok = (computed_hcs == header_cs_val)
1050
+
1051
+ j_ok = True
1052
+ try:
1053
+ fjsonsize_hex = vals[idx_json_size] or '0'
1054
+ fjsonsize = int(fjsonsize_hex, 16) if fjsonsize_hex else 0
1055
+ except Exception:
1056
+ fjsonsize = 0
1057
+ if fjsonsize:
1058
+ computed_jcs = _checksum(json_bytes, json_cs_type, text=True)
1059
+ j_ok = (computed_jcs == json_cs_val)
1060
+
1061
+ c_ok = True
1062
+ if content_stored:
1063
+ computed_ccs = _checksum(content_stored, content_cs_type, text=False)
1064
+ c_ok = (computed_ccs == content_cs_val)
1065
+
1066
+ entry_ok = h_ok and j_ok and c_ok
1067
+ ok_all = ok_all and entry_ok
1068
+ if verbose or return_details:
1069
+ details.append({
1070
+ 'index': idx,
1071
+ 'name': fname,
1072
+ 'header_ok': h_ok,
1073
+ 'json_ok': j_ok,
1074
+ 'content_ok': c_ok,
1075
+ 'fcompression': vals[12],
1076
+ 'fsize_hex': vals[5],
1077
+ 'fcsize_hex': vals[13],
1078
+ })
1079
+ idx += 1
1080
+ finally:
1081
+ if close_me:
1082
+ fp.close()
1083
+
1084
+ if return_details:
1085
+ return ok_all, details
1086
+ return ok_all
1087
+
1088
+ def archivefilelistfiles_neo(infile, formatspecs=None, advanced=False, include_dirs=True):
1089
+ """List entries in an archive without extracting."""
1090
+ fs = _ensure_formatspecs(formatspecs)
1091
+ out = []
1092
+
1093
+ fp, close_me = _wrap_infile(infile)
1094
+ try:
1095
+ _ = _parse_global_header(fp, fs, skipchecksum=True)
1096
+ while True:
1097
+ raw = _read_record_raw(fp, fs)
1098
+ if raw is None:
1099
+ break
1100
+ headersize_hex, fields_len_hex, vals, json_bytes, content_stored = raw
1101
+
1102
+ ftypehex = vals[0]
1103
+ fname = vals[3]
1104
+ fcompression = vals[12]
1105
+ fsize_hex = vals[5]
1106
+ fcsize_hex = vals[13]
1107
+ fatime_hex = vals[6]
1108
+ fmtime_hex = vals[7]
1109
+ fmode_hex = vals[10]
1110
+
1111
+ ftype = int(ftypehex, 16)
1112
+ is_dir = (ftype == 5)
1113
+
1114
+ if not include_dirs and is_dir:
1115
+ continue
1116
+
1117
+ if not re.match(r'^[\./]', fname):
1118
+ fname = './' + fname
1119
+
1120
+ if not advanced:
1121
+ out.append(fname)
1122
+ else:
1123
+ out.append({
1124
+ 'name': fname,
1125
+ 'type': 'dir' if is_dir else 'file',
1126
+ 'compression': fcompression or 'none',
1127
+ 'size': int(fsize_hex, 16),
1128
+ 'stored_size': int(fcsize_hex, 16),
1129
+ 'mtime': int(fmtime_hex, 16),
1130
+ 'atime': int(fatime_hex, 16),
1131
+ 'mode': int(fmode_hex, 16),
1132
+ })
1133
+ finally:
1134
+ if close_me:
1135
+ fp.close()
1136
+ return out
1137
+
1138
+ # -----------------------------------------------------------------------------
1139
+ # Pack from iterator + foreign-archive conversion (stdlib + optional deps)
1140
+ # -----------------------------------------------------------------------------
1141
+
1142
+ def pack_iter_neo(items, outfile, formatspecs=None,
1143
+ checksumtypes=("crc32","crc32","crc32"),
1144
+ encoding="UTF-8",
1145
+ compression="auto",
1146
+ compression_level=None):
1147
+ """
1148
+ Pack directly from an iterable of entries without touching the filesystem.
1149
+ Each item may be either a tuple (name, is_dir, data_bytes_or_None)
1150
+ or a dict with keys:
1151
+ name (str), is_dir (bool), data (bytes or None),
1152
+ mode (int, optional), mtime (int, optional),
1153
+ uid (int), gid (int), uname (str), gname (str)
1154
+ """
1155
+ fs = _ensure_formatspecs(formatspecs)
1156
+ fp, close_me, to_bytes = _wrap_outfile(outfile)
1157
+
1158
+ try:
1159
+ # Count items first (may be a generator -> materialize)
1160
+ if not hasattr(items, '__len__'):
1161
+ items = list(items)
1162
+ _write_global_header(fp, len(items), encoding, checksumtypes[0], extradata=[], formatspecs=fs)
1163
+
1164
+ fid = 0
1165
+ for it in items:
1166
+ if isinstance(it, dict):
1167
+ name = it.get('name')
1168
+ is_dir = bool(it.get('is_dir', False))
1169
+ data = it.get('data', None)
1170
+ mode = int(it.get('mode', stat.S_IFDIR | 0o755 if is_dir else stat.S_IFREG | 0o666))
1171
+ mtime = int(it.get('mtime', time.time()))
1172
+ uid = int(it.get('uid', 0)); gid = int(it.get('gid', 0))
1173
+ uname = it.get('uname', ''); gname = it.get('gname', '')
1174
+ else:
1175
+ name, is_dir, data = it
1176
+ mode = stat.S_IFDIR | 0o755 if is_dir or (name.endswith('/') and data is None) else stat.S_IFREG | 0o666
1177
+ mtime = int(time.time())
1178
+ uid = gid = 0; uname = gname = ''
1179
+
1180
+ # Normalize name
1181
+ name = name.replace('\\', '/')
1182
+ if not re.match(r'^[\./]', name):
1183
+ name = './' + name
1184
+
1185
+ if is_dir or name.endswith('/'):
1186
+ raw = b''
1187
+ ftype = 5
1188
+ else:
1189
+ raw = data or b''
1190
+ ftype = 0
1191
+
1192
+ # Decide compression
1193
+ algo = _normalize_algo(compression)
1194
+ if algo == 'auto':
1195
+ algo, auto_level = _auto_pick_for_size(len(raw))
1196
+ level = compression_level if compression_level is not None else auto_level
1197
+ else:
1198
+ level = compression_level
1199
+
1200
+ try:
1201
+ stored_bytes, used_algo = _compress_bytes(raw, algo, level=level)
1202
+ except RuntimeError:
1203
+ stored_bytes, used_algo = _compress_bytes(raw, 'zlib', level=(6 if level is None else level))
1204
+
1205
+ meta = {
1206
+ 'ftype': ftype,
1207
+ 'fencoding': encoding,
1208
+ 'fcencoding': encoding,
1209
+ 'fname': name,
1210
+ 'flinkname': '',
1211
+ 'fsize': len(raw),
1212
+ 'fatime': mtime,
1213
+ 'fmtime': mtime,
1214
+ 'fctime': mtime,
1215
+ 'fbtime': mtime,
1216
+ 'fmode': int(mode),
1217
+ 'fwinattributes': 0,
1218
+ 'fcompression': used_algo,
1219
+ 'fcsize': len(stored_bytes),
1220
+ 'fuid': uid,
1221
+ 'funame': uname,
1222
+ 'fgid': gid,
1223
+ 'fgname': gname,
1224
+ 'fid': fid,
1225
+ 'finode': fid,
1226
+ 'flinkcount': 1,
1227
+ 'fdev': 0,
1228
+ 'fdev_minor': 0,
1229
+ 'fdev_major': 0,
1230
+ 'index': fid,
1231
+ }
1232
+ fid += 1
1233
+
1234
+ rec = _build_file_header_bytes(meta, jsondata={}, content_bytes_stored=stored_bytes,
1235
+ checksumtypes=checksumtypes, extradata=[], formatspecs=fs)
1236
+ fp.write(rec)
1237
+
1238
+ # end marker
1239
+ fp.write(_append_nulls(['0','0'], fs['format_delimiter']))
1240
+ if to_bytes:
1241
+ return fp.getvalue()
1242
+ finally:
1243
+ if close_me:
1244
+ fp.close()
1245
+
1246
+ def _sniff_foreign_type(path):
1247
+ lower = os.path.basename(path).lower() if isinstance(path, (str, bytes)) else ''
1248
+ # Extension first
1249
+ if lower.endswith('.zip'):
1250
+ return 'zip'
1251
+ if lower.endswith(('.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tbz2', '.tar.xz', '.txz')):
1252
+ return 'tar'
1253
+ if lower.endswith('.rar'):
1254
+ return 'rar'
1255
+ if lower.endswith('.7z'):
1256
+ return '7z'
1257
+ # Fallback: stdlib probes for zip/tar only
1258
+ try:
1259
+ import zipfile
1260
+ if isinstance(path, basestring) and zipfile.is_zipfile(path):
1261
+ return 'zip'
1262
+ except Exception:
1263
+ pass
1264
+ try:
1265
+ import tarfile
1266
+ if isinstance(path, basestring) and hasattr(tarfile, 'is_tarfile') and tarfile.is_tarfile(path):
1267
+ return 'tar'
1268
+ except Exception:
1269
+ pass
1270
+ return None
1271
+
1272
+ def _iter_tar_members(tarf):
1273
+ for m in tarf.getmembers():
1274
+ name = m.name
1275
+ if m.isdir():
1276
+ yield {'name': name.rstrip('/') + '/', 'is_dir': True, 'data': None,
1277
+ 'mode': (stat.S_IFDIR | (m.mode or 0o755)), 'mtime': int(getattr(m, 'mtime', time.time())),
1278
+ 'uid': int(getattr(m, 'uid', 0)), 'gid': int(getattr(m, 'gid', 0)),
1279
+ 'uname': getattr(m, 'uname', ''), 'gname': getattr(m, 'gname', '')}
1280
+ else:
1281
+ try:
1282
+ fh = tarf.extractfile(m)
1283
+ data = fh.read() if fh is not None else b''
1284
+ except Exception:
1285
+ data = b''
1286
+ yield {'name': name, 'is_dir': False, 'data': data,
1287
+ 'mode': (stat.S_IFREG | (m.mode or 0o644)), 'mtime': int(getattr(m, 'mtime', time.time())),
1288
+ 'uid': int(getattr(m, 'uid', 0)), 'gid': int(getattr(m, 'gid', 0)),
1289
+ 'uname': getattr(m, 'uname', ''), 'gname': getattr(m, 'gname', '')}
1290
+
1291
+ def _iter_zip_members(zipf):
1292
+ for zi in zipf.infolist():
1293
+ name = zi.filename
1294
+ mode = (zi.external_attr >> 16) & 0o777 if hasattr(zi, 'external_attr') else 0o644
1295
+ mtime = int(time.mktime(getattr(zi, 'date_time', (1980,1,1,0,0,0)) + (0,0,-1)))
1296
+ if name.endswith('/'):
1297
+ yield {'name': name, 'is_dir': True, 'data': None,
1298
+ 'mode': (stat.S_IFDIR | (mode or 0o755)), 'mtime': mtime}
1299
+ else:
1300
+ try:
1301
+ data = zipf.read(zi)
1302
+ except Exception:
1303
+ data = b''
1304
+ yield {'name': name, 'is_dir': False, 'data': data,
1305
+ 'mode': (stat.S_IFREG | (mode or 0o644)), 'mtime': mtime}
1306
+
1307
+ def _iter_rar_members(rarf):
1308
+ for ri in rarf.infolist():
1309
+ name = getattr(ri, 'filename', None) or getattr(ri, 'arcname', None)
1310
+ if name is None:
1311
+ continue
1312
+ try:
1313
+ is_dir = ri.is_dir()
1314
+ except Exception:
1315
+ is_dir = name.endswith('/') or name.endswith('\\')
1316
+ try:
1317
+ dt = getattr(ri, 'date_time', None)
1318
+ if dt:
1319
+ mtime = int(time.mktime(tuple(dt) + (0,0,-1)))
1320
+ else:
1321
+ mtime = int(time.time())
1322
+ except Exception:
1323
+ mtime = int(time.time())
1324
+ if is_dir:
1325
+ yield {'name': name, 'is_dir': True, 'data': None,
1326
+ 'mode': (stat.S_IFDIR | 0o755), 'mtime': mtime}
1327
+ else:
1328
+ try:
1329
+ data = rarf.read(ri)
1330
+ except Exception:
1331
+ data = b''
1332
+ yield {'name': name, 'is_dir': False, 'data': data,
1333
+ 'mode': (stat.S_IFREG | 0o644), 'mtime': mtime}
1334
+
1335
+ def _iter_7z_members(z7):
1336
+ names = []
1337
+ try:
1338
+ entries = z7.list()
1339
+ for e in entries:
1340
+ name = getattr(e, 'filename', None) or getattr(e, 'name', None)
1341
+ if name is None:
1342
+ continue
1343
+ is_dir = bool(getattr(e, 'is_directory', False)) or name.endswith('/') or name.endswith('\\')
1344
+ names.append((name, is_dir))
1345
+ except Exception:
1346
+ try:
1347
+ for n in z7.getnames():
1348
+ is_dir = n.endswith('/') or n.endswith('\\')
1349
+ names.append((n, is_dir))
1350
+ except Exception:
1351
+ names = []
1352
+ try:
1353
+ data_map = z7.readall()
1354
+ except Exception:
1355
+ data_map = {}
1356
+
1357
+ for name, is_dir in names:
1358
+ if is_dir:
1359
+ yield {'name': name, 'is_dir': True, 'data': None,
1360
+ 'mode': (stat.S_IFDIR | 0o755), 'mtime': int(time.time())}
1361
+ else:
1362
+ try:
1363
+ blob = data_map.get(name, b'')
1364
+ if not isinstance(blob, (bytes, bytearray)):
1365
+ try:
1366
+ blob = b''.join(blob) if isinstance(blob, list) else bytes(blob)
1367
+ except Exception:
1368
+ blob = b''
1369
+ except Exception:
1370
+ blob = b''
1371
+ yield {'name': name, 'is_dir': False, 'data': blob,
1372
+ 'mode': (stat.S_IFREG | 0o644), 'mtime': int(time.time())}
1373
+
1374
+ def convert_foreign_to_neo(infile, outfile=None, formatspecs=None,
1375
+ checksumtypes=("crc32","crc32","crc32"),
1376
+ compression="auto",
1377
+ compression_level=None):
1378
+ """
1379
+ Convert a foreign archive (zip/tar/rar/7z) into the alt ArchiveFile format.
1380
+ Uses stdlib for zip/tar; requires 'rarfile' for RAR and 'py7zr' for 7z.
1381
+ Returns bytes when outfile is None/'-'; otherwise writes a file.
1382
+ """
1383
+ kind = _sniff_foreign_type(infile) if isinstance(infile, basestring) else None
1384
+
1385
+ if kind == 'zip' or (not kind and not isinstance(infile, basestring)):
1386
+ import zipfile
1387
+ from io import BytesIO
1388
+ zsrc = BytesIO(infile) if isinstance(infile, (bytes, bytearray, memoryview)) else (infile if isinstance(infile, basestring) else _wrap_infile(infile)[0])
1389
+ try:
1390
+ with zipfile.ZipFile(zsrc, 'r') as zf:
1391
+ return pack_iter_neo(_iter_zip_members(zf), outfile, formatspecs=formatspecs,
1392
+ checksumtypes=checksumtypes, compression=compression,
1393
+ compression_level=compression_level)
1394
+ except zipfile.BadZipfile:
1395
+ pass # maybe not a zip; try others
1396
+
1397
+ if kind == 'tar' or (not kind and isinstance(infile, basestring) and os.path.splitext(infile)[1].startswith('.tar')):
1398
+ import tarfile
1399
+ from io import BytesIO
1400
+ src = BytesIO(infile) if isinstance(infile, (bytes, bytearray, memoryview)) else (infile if isinstance(infile, basestring) else _wrap_infile(infile)[0])
1401
+ with tarfile.open(src, 'r:*') as tf:
1402
+ return pack_iter_neo(_iter_tar_members(tf), outfile, formatspecs=formatspecs,
1403
+ checksumtypes=checksumtypes, compression=compression,
1404
+ compression_level=compression_level)
1405
+
1406
+ if kind == 'rar':
1407
+ try:
1408
+ import rarfile
1409
+ except Exception as e:
1410
+ raise RuntimeError("RAR support requires 'rarfile' package: %s" % e)
1411
+ from io import BytesIO
1412
+ rsrc = BytesIO(infile) if isinstance(infile, (bytes, bytearray, memoryview)) else (infile if isinstance(infile, basestring) else _wrap_infile(infile)[0])
1413
+ with rarfile.RarFile(rsrc) as rf:
1414
+ return pack_iter_neo(_iter_rar_members(rf), outfile, formatspecs=formatspecs,
1415
+ checksumtypes=checksumtypes, compression=compression,
1416
+ compression_level=compression_level)
1417
+
1418
+ if kind == '7z':
1419
+ try:
1420
+ import py7zr
1421
+ except Exception as e:
1422
+ raise RuntimeError("7z support requires 'py7zr' package: %s" % e)
1423
+ from io import BytesIO
1424
+ zsrc = BytesIO(infile) if isinstance(infile, (bytes, bytearray, memoryview)) else (infile if isinstance(infile, basestring) else _wrap_infile(infile)[0])
1425
+ with py7zr.SevenZipFile(zsrc, 'r') as z7:
1426
+ return pack_iter_neo(_iter_7z_members(z7), outfile, formatspecs=formatspecs,
1427
+ checksumtypes=checksumtypes, compression=compression,
1428
+ compression_level=compression_level)
1429
+
1430
+ raise ValueError("Unsupported foreign archive (zip/tar/rar/7z only): %r" % (infile,))