PyNeoFile 0.19.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyneofile-0.19.8.data/scripts/neofile.py +249 -0
- pyneofile-0.19.8.dist-info/METADATA +24 -0
- pyneofile-0.19.8.dist-info/RECORD +8 -0
- pyneofile-0.19.8.dist-info/WHEEL +5 -0
- pyneofile-0.19.8.dist-info/licenses/LICENSE +28 -0
- pyneofile-0.19.8.dist-info/top_level.txt +1 -0
- pyneofile-0.19.8.dist-info/zip-safe +1 -0
- pyneofile.py +1430 -0
pyneofile.py
ADDED
|
@@ -0,0 +1,1430 @@
|
|
|
1
|
+
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
from __future__ import print_function, unicode_literals, division, absolute_import
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
pyneofile.py — Alternate ArchiveFile core with Py2/3 compatible logic.
|
|
7
|
+
|
|
8
|
+
Features:
|
|
9
|
+
- Pack / unpack / repack / archive_to_array
|
|
10
|
+
- Validation and listing helpers (lowercase names)
|
|
11
|
+
- INI-driven format detection (prefers PYNEOFILE_INI / pyneofile.ini)
|
|
12
|
+
- Compression: zlib, gzip, bz2 (stdlib), xz/lzma when available (Py3)
|
|
13
|
+
- Size-based 'auto' compression policy
|
|
14
|
+
- Checksums (header/json/content) using stored bytes (padded CRC-32)
|
|
15
|
+
- Optional converters: ZIP/TAR (stdlib), RAR via rarfile, 7z via py7zr
|
|
16
|
+
- In-memory mode: bytes input, and bytes output when outfile is None/"-"
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import os, sys, io, stat, time, json, binascii, hashlib, re, codecs
|
|
20
|
+
try:
|
|
21
|
+
from io import open as _iopen
|
|
22
|
+
except Exception:
|
|
23
|
+
_iopen = open # Py2 fallback
|
|
24
|
+
|
|
25
|
+
# ---------------- Python 2/3 shims ----------------
|
|
26
|
+
try:
|
|
27
|
+
basestring
|
|
28
|
+
except NameError:
|
|
29
|
+
basestring = (str,)
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
unicode
|
|
33
|
+
except NameError:
|
|
34
|
+
unicode = str # Py3 alias
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
from io import BytesIO
|
|
38
|
+
except ImportError:
|
|
39
|
+
from cStringIO import StringIO as BytesIO # Py2 fallback
|
|
40
|
+
|
|
41
|
+
# INI support (Py2/3)
|
|
42
|
+
try:
|
|
43
|
+
import configparser as _cfg
|
|
44
|
+
except Exception:
|
|
45
|
+
import ConfigParser as _cfg # Py2
|
|
46
|
+
|
|
47
|
+
# --------------- Compression shim (stdlib only) ---------------
|
|
48
|
+
import zlib, bz2, gzip
|
|
49
|
+
try:
|
|
50
|
+
import lzma as _lzma # Py3
|
|
51
|
+
_HAVE_LZMA = True
|
|
52
|
+
except Exception:
|
|
53
|
+
_lzma = None
|
|
54
|
+
_HAVE_LZMA = False
|
|
55
|
+
|
|
56
|
+
__program_name__ = "PyNeoFile"
|
|
57
|
+
__project__ = __program_name__
|
|
58
|
+
__project_url__ = "https://github.com/GameMaker2k/PyNeoFile"
|
|
59
|
+
__version_info__ = (0, 19, 8, "RC 1", 1)
|
|
60
|
+
__version_date_info__ = (2025, 8, 14, "RC 1", 1)
|
|
61
|
+
__version_date__ = str(__version_date_info__[0]) + "." + str(
|
|
62
|
+
__version_date_info__[1]).zfill(2) + "." + str(__version_date_info__[2]).zfill(2)
|
|
63
|
+
__revision__ = __version_info__[3]
|
|
64
|
+
__revision_id__ = "$Id: c7f8ce877ea9b59fca86a4110cc1eec3cdee518e $"
|
|
65
|
+
if(__version_info__[4] is not None):
|
|
66
|
+
__version_date_plusrc__ = __version_date__ + \
|
|
67
|
+
"-" + str(__version_date_info__[4])
|
|
68
|
+
if(__version_info__[4] is None):
|
|
69
|
+
__version_date_plusrc__ = __version_date__
|
|
70
|
+
if(__version_info__[3] is not None):
|
|
71
|
+
__version__ = str(__version_info__[0]) + "." + str(__version_info__[
|
|
72
|
+
1]) + "." + str(__version_info__[2]) + " " + str(__version_info__[3])
|
|
73
|
+
if(__version_info__[3] is None):
|
|
74
|
+
__version__ = str(__version_info__[0]) + "." + str(__version_info__[1]) + "." + str(__version_info__[2])
|
|
75
|
+
|
|
76
|
+
def _normalize_algo(algo):
|
|
77
|
+
if not algo:
|
|
78
|
+
return 'none'
|
|
79
|
+
a = (algo or 'none').lower()
|
|
80
|
+
if a in ('xz', 'lzma'):
|
|
81
|
+
return 'xz'
|
|
82
|
+
if a in ('gz', 'gzip'):
|
|
83
|
+
return 'gzip'
|
|
84
|
+
if a in ('deflate', 'z'):
|
|
85
|
+
return 'zlib'
|
|
86
|
+
if a in ('bzip2', 'bzip', 'bz'):
|
|
87
|
+
return 'bz2'
|
|
88
|
+
if a == 'auto':
|
|
89
|
+
return 'auto'
|
|
90
|
+
return a
|
|
91
|
+
|
|
92
|
+
def _compress_bytes(data, algo='none', level=None):
|
|
93
|
+
"""Return (stored_bytes, used_algo)."""
|
|
94
|
+
algo = _normalize_algo(algo)
|
|
95
|
+
if algo in ('none', ''):
|
|
96
|
+
return data, 'none'
|
|
97
|
+
if algo == 'zlib':
|
|
98
|
+
lvl = zlib.Z_DEFAULT_COMPRESSION if level is None else int(level)
|
|
99
|
+
return zlib.compress(data, lvl), 'zlib'
|
|
100
|
+
if algo == 'gzip':
|
|
101
|
+
bio = BytesIO()
|
|
102
|
+
gz = gzip.GzipFile(fileobj=bio, mode='wb', compresslevel=(6 if level is None else int(level)))
|
|
103
|
+
try:
|
|
104
|
+
gz.write(data)
|
|
105
|
+
finally:
|
|
106
|
+
gz.close()
|
|
107
|
+
return bio.getvalue(), 'gzip'
|
|
108
|
+
if algo == 'bz2':
|
|
109
|
+
if level is None:
|
|
110
|
+
return bz2.compress(data), 'bz2'
|
|
111
|
+
return bz2.compress(data, int(level)), 'bz2'
|
|
112
|
+
if algo == 'xz':
|
|
113
|
+
if not _HAVE_LZMA:
|
|
114
|
+
raise RuntimeError("xz/lzma compression not available on this Python (needs 3.x lzma)")
|
|
115
|
+
kw = {}
|
|
116
|
+
if level is not None:
|
|
117
|
+
kw['preset'] = int(level)
|
|
118
|
+
return _lzma.compress(data, **kw), 'xz'
|
|
119
|
+
raise ValueError("Unknown compression algorithm: %r" % algo)
|
|
120
|
+
|
|
121
|
+
def _decompress_bytes(data, algo='none'):
|
|
122
|
+
algo = _normalize_algo(algo)
|
|
123
|
+
if algo in ('none', ''):
|
|
124
|
+
return data
|
|
125
|
+
if algo == 'zlib':
|
|
126
|
+
return zlib.decompress(data)
|
|
127
|
+
if algo == 'gzip':
|
|
128
|
+
bio = BytesIO(data)
|
|
129
|
+
gz = gzip.GzipFile(fileobj=bio, mode='rb')
|
|
130
|
+
try:
|
|
131
|
+
return gz.read()
|
|
132
|
+
finally:
|
|
133
|
+
gz.close()
|
|
134
|
+
if algo == 'bz2':
|
|
135
|
+
return bz2.decompress(data)
|
|
136
|
+
if algo == 'xz':
|
|
137
|
+
if not _HAVE_LZMA:
|
|
138
|
+
raise RuntimeError("xz/lzma decompression not available on this Python (needs 3.x lzma)")
|
|
139
|
+
return _lzma.decompress(data)
|
|
140
|
+
raise ValueError("Unknown compression algorithm: %r" % algo)
|
|
141
|
+
|
|
142
|
+
# --- Auto compression policy thresholds (bytes) ---
|
|
143
|
+
_AUTO_XZ_MIN = 2 * 1024 * 1024 # >= 2 MiB → prefer xz (Py3 only)
|
|
144
|
+
_AUTO_BZ2_MIN = 256 * 1024 # >= 256 KiB → prefer bz2 (Py2 or Py3)
|
|
145
|
+
_AUTO_ZLIB_MIN = 16 * 1024 # >= 16 KiB → zlib; smaller often not worth compressing
|
|
146
|
+
|
|
147
|
+
def _auto_pick_for_size(size_bytes):
|
|
148
|
+
"""Return ('none'|'zlib'|'gzip'|'bz2'|'xz', level_or_None)."""
|
|
149
|
+
if size_bytes < _AUTO_ZLIB_MIN:
|
|
150
|
+
return ('none', None)
|
|
151
|
+
if _HAVE_LZMA and size_bytes >= _AUTO_XZ_MIN:
|
|
152
|
+
return ('xz', 6)
|
|
153
|
+
if size_bytes >= _AUTO_BZ2_MIN:
|
|
154
|
+
return ('bz2', 9)
|
|
155
|
+
return ('zlib', 6)
|
|
156
|
+
|
|
157
|
+
# -----------------------------------------------------------------------------
|
|
158
|
+
# In-memory I/O helpers
|
|
159
|
+
# -----------------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
def _wrap_infile(infile):
|
|
162
|
+
"""Return (fp, close_me). Accepts path, file-like, or bytes/bytearray."""
|
|
163
|
+
if isinstance(infile, (bytes, bytearray, memoryview)):
|
|
164
|
+
return BytesIO(bytes(infile)), True
|
|
165
|
+
if hasattr(infile, 'read'):
|
|
166
|
+
return infile, False
|
|
167
|
+
return _iopen(infile, 'rb'), True
|
|
168
|
+
|
|
169
|
+
def _wrap_outfile(outfile):
|
|
170
|
+
"""Return (fp, close_me, to_bytes). If outfile is None or '-', buffer to bytes."""
|
|
171
|
+
if outfile in (None, '-', b'-'):
|
|
172
|
+
bio = BytesIO()
|
|
173
|
+
return bio, False, True
|
|
174
|
+
if hasattr(outfile, 'write'):
|
|
175
|
+
return outfile, False, False
|
|
176
|
+
return _iopen(outfile, 'wb'), True, False
|
|
177
|
+
|
|
178
|
+
def _normalize_pack_inputs(infiles):
|
|
179
|
+
"""Normalize in-memory inputs into items for pack_iter_neo.
|
|
180
|
+
Supported forms:
|
|
181
|
+
- dict {name: bytes_or_None} (None => directory if name endswith('/'))
|
|
182
|
+
- list/tuple of (name, bytes) or (name, is_dir, bytes_or_None) or dicts
|
|
183
|
+
- single bytes/bytearray => [('memory.bin', False, bytes)]
|
|
184
|
+
- anything else => None (caller will do filesystem walk)
|
|
185
|
+
"""
|
|
186
|
+
if isinstance(infiles, dict):
|
|
187
|
+
items = []
|
|
188
|
+
for k, v in infiles.items():
|
|
189
|
+
name = str(k)
|
|
190
|
+
is_dir = bool(v is None or name.endswith('/'))
|
|
191
|
+
items.append({'name': name, 'is_dir': is_dir,
|
|
192
|
+
'data': (None if is_dir else (bytes(v) if v is not None else b''))})
|
|
193
|
+
return items
|
|
194
|
+
if isinstance(infiles, (bytes, bytearray, memoryview)):
|
|
195
|
+
return [{'name': 'memory.bin', 'is_dir': False, 'data': bytes(infiles)}]
|
|
196
|
+
if isinstance(infiles, (list, tuple)) and infiles:
|
|
197
|
+
def _as_item(x):
|
|
198
|
+
if isinstance(x, dict):
|
|
199
|
+
return x
|
|
200
|
+
if isinstance(x, (list, tuple)):
|
|
201
|
+
if len(x) == 2:
|
|
202
|
+
n, b = x
|
|
203
|
+
return {'name': n, 'is_dir': False, 'data': (bytes(b) if b is not None else b'')}
|
|
204
|
+
if len(x) >= 3:
|
|
205
|
+
n, is_dir, b = x[0], bool(x[1]), x[2]
|
|
206
|
+
return {'name': n, 'is_dir': is_dir,
|
|
207
|
+
'data': (None if is_dir else (bytes(b) if b is not None else b''))}
|
|
208
|
+
return None
|
|
209
|
+
items = []
|
|
210
|
+
for it in infiles:
|
|
211
|
+
conv = _as_item(it)
|
|
212
|
+
if conv is None:
|
|
213
|
+
return None
|
|
214
|
+
items.append(conv)
|
|
215
|
+
return items
|
|
216
|
+
return None
|
|
217
|
+
|
|
218
|
+
# ---------------- Format helpers ----------------
|
|
219
|
+
def _ver_digits(verstr):
|
|
220
|
+
"""Keep numeric digits only; preserve '001' style."""
|
|
221
|
+
if not verstr:
|
|
222
|
+
return '001'
|
|
223
|
+
digits = ''.join([c for c in unicode(verstr) if c.isdigit()])
|
|
224
|
+
return digits or '001'
|
|
225
|
+
|
|
226
|
+
def _default_formatspecs():
|
|
227
|
+
return {
|
|
228
|
+
'format_magic': 'ArchiveFile',
|
|
229
|
+
'format_ver': '001',
|
|
230
|
+
'format_delimiter': '\x00',
|
|
231
|
+
'new_style': True,
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
__formatspecs_ini_cache__ = None
|
|
235
|
+
|
|
236
|
+
def _decode_delim_escape(s):
|
|
237
|
+
try:
|
|
238
|
+
return codecs.decode(s, 'unicode_escape')
|
|
239
|
+
except Exception:
|
|
240
|
+
return s
|
|
241
|
+
|
|
242
|
+
def _load_formatspecs_from_ini(paths=None, prefer_section=None):
|
|
243
|
+
"""
|
|
244
|
+
Load format definition from an INI file.
|
|
245
|
+
Search order:
|
|
246
|
+
- explicit 'paths'
|
|
247
|
+
- env PYNEOFILE_INI, then PYARCHIVE_INI
|
|
248
|
+
- ./pyneofile.ini, ./archivefile.ini, ./catfile.ini, ./foxfile.ini
|
|
249
|
+
Section selection:
|
|
250
|
+
- prefer_section
|
|
251
|
+
- [config] default=... if present
|
|
252
|
+
- first non-[config] section
|
|
253
|
+
"""
|
|
254
|
+
cands = []
|
|
255
|
+
if paths:
|
|
256
|
+
if isinstance(paths, basestring):
|
|
257
|
+
cands.append(paths)
|
|
258
|
+
else:
|
|
259
|
+
cands.extend(paths)
|
|
260
|
+
envp = os.environ.get('PYNEOFILE_INI') or os.environ.get('PYARCHIVE_INI')
|
|
261
|
+
if envp:
|
|
262
|
+
cands.append(envp)
|
|
263
|
+
cands.extend(['pyneofile.ini'])
|
|
264
|
+
|
|
265
|
+
picked = None
|
|
266
|
+
for p in cands:
|
|
267
|
+
if os.path.isfile(p):
|
|
268
|
+
picked = p; break
|
|
269
|
+
if not picked:
|
|
270
|
+
return None
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
cp = _cfg.ConfigParser() if hasattr(_cfg, 'ConfigParser') else _cfg.RawConfigParser()
|
|
274
|
+
if hasattr(cp, 'read_file'):
|
|
275
|
+
with _iopen(picked, 'r') as fh:
|
|
276
|
+
cp.read_file(fh)
|
|
277
|
+
else:
|
|
278
|
+
cp.read(picked)
|
|
279
|
+
except Exception:
|
|
280
|
+
return None
|
|
281
|
+
|
|
282
|
+
sec = None
|
|
283
|
+
if prefer_section and cp.has_section(prefer_section):
|
|
284
|
+
sec = prefer_section
|
|
285
|
+
else:
|
|
286
|
+
defname = None
|
|
287
|
+
if cp.has_section('config'):
|
|
288
|
+
try:
|
|
289
|
+
defname = cp.get('config', 'default')
|
|
290
|
+
except Exception:
|
|
291
|
+
defname = None
|
|
292
|
+
if defname and cp.has_section(defname):
|
|
293
|
+
sec = defname
|
|
294
|
+
else:
|
|
295
|
+
for name in cp.sections():
|
|
296
|
+
if name.lower() != 'config':
|
|
297
|
+
sec = name; break
|
|
298
|
+
if not sec:
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
def _get(name, default=None):
|
|
302
|
+
try:
|
|
303
|
+
return cp.get(sec, name)
|
|
304
|
+
except Exception:
|
|
305
|
+
return default
|
|
306
|
+
|
|
307
|
+
magic = _get('magic', 'NeoFile')
|
|
308
|
+
ver = _get('ver', '001')
|
|
309
|
+
delim = _get('delimiter', '\\x00')
|
|
310
|
+
newst = _get('newstyle', 'true')
|
|
311
|
+
ext = _get('extension', '.neo')
|
|
312
|
+
|
|
313
|
+
delim_real = _decode_delim_escape(delim)
|
|
314
|
+
ver_digits = _ver_digits(ver)
|
|
315
|
+
|
|
316
|
+
spec = {
|
|
317
|
+
'format_magic': magic,
|
|
318
|
+
'format_ver': ver_digits,
|
|
319
|
+
'format_delimiter': delim_real,
|
|
320
|
+
'new_style': (str(newst).lower() in ('1','true','yes','on')),
|
|
321
|
+
'format_name': sec,
|
|
322
|
+
'extension': ext,
|
|
323
|
+
}
|
|
324
|
+
return spec
|
|
325
|
+
|
|
326
|
+
def _ensure_formatspecs(specs):
|
|
327
|
+
global __formatspecs_ini_cache__
|
|
328
|
+
if specs:
|
|
329
|
+
return specs
|
|
330
|
+
if __formatspecs_ini_cache__ is None:
|
|
331
|
+
__formatspecs_ini_cache__ = _load_formatspecs_from_ini()
|
|
332
|
+
return __formatspecs_ini_cache__ or _default_formatspecs()
|
|
333
|
+
|
|
334
|
+
def _to_bytes(s):
|
|
335
|
+
if isinstance(s, bytes):
|
|
336
|
+
return s
|
|
337
|
+
if isinstance(s, (bytearray, memoryview)):
|
|
338
|
+
return bytes(s)
|
|
339
|
+
if not isinstance(s, basestring):
|
|
340
|
+
s = str(s)
|
|
341
|
+
return s.encode('UTF-8')
|
|
342
|
+
|
|
343
|
+
def _append_null(b, delim):
|
|
344
|
+
if not isinstance(b, bytes):
|
|
345
|
+
b = _to_bytes(b)
|
|
346
|
+
return b + _to_bytes(delim)
|
|
347
|
+
|
|
348
|
+
def _append_nulls(seq, delim):
|
|
349
|
+
out = b''
|
|
350
|
+
for x in seq:
|
|
351
|
+
out += _append_null(x, delim)
|
|
352
|
+
return out
|
|
353
|
+
|
|
354
|
+
def _hex(n):
|
|
355
|
+
return ("%x" % int(n)).lower()
|
|
356
|
+
|
|
357
|
+
def _crc32(data):
|
|
358
|
+
if not isinstance(data, bytes):
|
|
359
|
+
data = _to_bytes(data)
|
|
360
|
+
return ("%08x" % (binascii.crc32(data) & 0xffffffff)).lower()
|
|
361
|
+
|
|
362
|
+
def _sha_like(name, data):
|
|
363
|
+
if not isinstance(data, bytes):
|
|
364
|
+
data = _to_bytes(data)
|
|
365
|
+
try:
|
|
366
|
+
h = hashlib.new(name)
|
|
367
|
+
except ValueError:
|
|
368
|
+
raise ValueError("Unsupported checksum: %r" % name)
|
|
369
|
+
h.update(data)
|
|
370
|
+
return h.hexdigest()
|
|
371
|
+
|
|
372
|
+
def _checksum(data, cstype, text=False):
|
|
373
|
+
if cstype in (None, '', 'none'):
|
|
374
|
+
return '0'
|
|
375
|
+
if text and not isinstance(data, bytes):
|
|
376
|
+
data = _to_bytes(data)
|
|
377
|
+
if (cstype or '').lower() == 'crc32':
|
|
378
|
+
return _crc32(data)
|
|
379
|
+
return _sha_like(cstype.lower(), data)
|
|
380
|
+
|
|
381
|
+
# ---------------- Header builders ----------------
|
|
382
|
+
def _write_global_header(fp, numfiles, encoding, checksumtype, extradata, formatspecs):
|
|
383
|
+
delim = formatspecs['format_delimiter']
|
|
384
|
+
magic = formatspecs['format_magic']
|
|
385
|
+
ver_digits = _ver_digits(formatspecs.get('format_ver','001'))
|
|
386
|
+
|
|
387
|
+
# extras blob: count + items
|
|
388
|
+
if isinstance(extradata, dict) and extradata:
|
|
389
|
+
payload = json.dumps(extradata, separators=(',', ':')).encode('UTF-8')
|
|
390
|
+
try:
|
|
391
|
+
import base64
|
|
392
|
+
extradata = [base64.b64encode(payload).decode('UTF-8')]
|
|
393
|
+
except Exception:
|
|
394
|
+
extradata = []
|
|
395
|
+
elif isinstance(extradata, dict):
|
|
396
|
+
extradata = []
|
|
397
|
+
|
|
398
|
+
extrafields = _hex(len(extradata))
|
|
399
|
+
extras_blob = _append_null(extrafields, delim)
|
|
400
|
+
if extradata:
|
|
401
|
+
extras_blob += _append_nulls(extradata, delim)
|
|
402
|
+
extras_size_hex = _hex(len(extras_blob))
|
|
403
|
+
|
|
404
|
+
platform_name = os.name if os.name in ('nt', 'posix') else sys.platform
|
|
405
|
+
fnumfiles_hex = _hex(int(numfiles))
|
|
406
|
+
|
|
407
|
+
tmpoutlist = [encoding, platform_name, fnumfiles_hex, extras_size_hex, extrafields]
|
|
408
|
+
tmpoutlen = 3 + len(tmpoutlist) + len(extradata) + 1 # compatibility
|
|
409
|
+
tmpoutlen_hex = _hex(tmpoutlen)
|
|
410
|
+
|
|
411
|
+
body = _append_nulls([tmpoutlen_hex, encoding, platform_name, fnumfiles_hex, extras_size_hex, extrafields], delim)
|
|
412
|
+
if extradata:
|
|
413
|
+
body += _append_nulls(extradata, delim)
|
|
414
|
+
body += _append_null(checksumtype, delim)
|
|
415
|
+
|
|
416
|
+
prefix = _append_null(magic + ver_digits, delim)
|
|
417
|
+
tmpfileoutstr = body + _append_null('', delim)
|
|
418
|
+
headersize_hex = _hex(len(tmpfileoutstr) - len(_to_bytes(delim)))
|
|
419
|
+
out = prefix + _append_null(headersize_hex, delim) + body
|
|
420
|
+
header_cs = _checksum(out, checksumtype, text=True)
|
|
421
|
+
out += _append_null(header_cs, delim)
|
|
422
|
+
fp.write(out)
|
|
423
|
+
|
|
424
|
+
def _build_file_header_bytes(filemeta, jsondata, content_bytes_stored, checksumtypes, extradata, formatspecs):
|
|
425
|
+
"""Return full bytes for a record (header+json+NUL+content+NUL)."""
|
|
426
|
+
delim = formatspecs['format_delimiter']
|
|
427
|
+
def H(x): return _hex(int(x))
|
|
428
|
+
|
|
429
|
+
fname = filemeta['fname']
|
|
430
|
+
if not re.match(r'^[\./]', fname):
|
|
431
|
+
fname = './' + fname
|
|
432
|
+
|
|
433
|
+
fields = [
|
|
434
|
+
H(filemeta.get('ftype', 0)),
|
|
435
|
+
filemeta.get('fencoding', 'UTF-8'),
|
|
436
|
+
filemeta.get('fcencoding', 'UTF-8'),
|
|
437
|
+
fname,
|
|
438
|
+
filemeta.get('flinkname', ''),
|
|
439
|
+
H(filemeta.get('fsize', 0)),
|
|
440
|
+
H(filemeta.get('fatime', int(time.time()))),
|
|
441
|
+
H(filemeta.get('fmtime', int(time.time()))),
|
|
442
|
+
H(filemeta.get('fctime', int(time.time()))),
|
|
443
|
+
H(filemeta.get('fbtime', int(time.time()))),
|
|
444
|
+
H(filemeta.get('fmode', stat.S_IFREG | 0o666)),
|
|
445
|
+
H(filemeta.get('fwinattributes', 0)),
|
|
446
|
+
filemeta.get('fcompression', ''),
|
|
447
|
+
H(filemeta.get('fcsize', 0)),
|
|
448
|
+
H(filemeta.get('fuid', 0)),
|
|
449
|
+
filemeta.get('funame', ''),
|
|
450
|
+
H(filemeta.get('fgid', 0)),
|
|
451
|
+
filemeta.get('fgname', ''),
|
|
452
|
+
H(filemeta.get('fid', filemeta.get('index', 0))),
|
|
453
|
+
H(filemeta.get('finode', filemeta.get('index', 0))),
|
|
454
|
+
H(filemeta.get('flinkcount', 1)),
|
|
455
|
+
H(filemeta.get('fdev', 0)),
|
|
456
|
+
H(filemeta.get('fdev_minor', 0)),
|
|
457
|
+
H(filemeta.get('fdev_major', 0)),
|
|
458
|
+
"+" + str(len(delim)),
|
|
459
|
+
]
|
|
460
|
+
|
|
461
|
+
# JSON payload
|
|
462
|
+
fjsontype = 'json' if jsondata else 'none'
|
|
463
|
+
if jsondata:
|
|
464
|
+
raw_json = json.dumps(jsondata, separators=(',', ':')).encode('UTF-8')
|
|
465
|
+
json_cs_type = checksumtypes[2]
|
|
466
|
+
fjsonlen_hex = _hex(len(jsondata) if hasattr(jsondata, '__len__') else 0)
|
|
467
|
+
fjsonsize_hex = _hex(len(raw_json))
|
|
468
|
+
fjsoncs = _checksum(raw_json, json_cs_type, text=True)
|
|
469
|
+
else:
|
|
470
|
+
raw_json = b''
|
|
471
|
+
json_cs_type = 'none'
|
|
472
|
+
fjsonlen_hex = '0'
|
|
473
|
+
fjsonsize_hex = '0'
|
|
474
|
+
fjsoncs = '0'
|
|
475
|
+
|
|
476
|
+
# extras (mirrors global)
|
|
477
|
+
if isinstance(extradata, dict) and extradata:
|
|
478
|
+
payload = json.dumps(extradata, separators=(',', ':')).encode('UTF-8')
|
|
479
|
+
try:
|
|
480
|
+
import base64
|
|
481
|
+
extradata = [base64.b64encode(payload).decode('UTF-8')]
|
|
482
|
+
except Exception:
|
|
483
|
+
extradata = []
|
|
484
|
+
elif isinstance(extradata, dict):
|
|
485
|
+
extradata = []
|
|
486
|
+
|
|
487
|
+
extrafields = _hex(len(extradata))
|
|
488
|
+
extras_blob = _append_null(extrafields, delim)
|
|
489
|
+
if extradata:
|
|
490
|
+
extras_blob += _append_nulls(extradata, delim)
|
|
491
|
+
extras_size_hex = _hex(len(extras_blob))
|
|
492
|
+
|
|
493
|
+
rec_fields = []
|
|
494
|
+
rec_fields.extend(fields)
|
|
495
|
+
rec_fields.extend([fjsontype, fjsonlen_hex, fjsonsize_hex, json_cs_type, fjsoncs])
|
|
496
|
+
rec_fields.extend([extras_size_hex, extrafields])
|
|
497
|
+
if extradata:
|
|
498
|
+
rec_fields.extend(extradata)
|
|
499
|
+
|
|
500
|
+
header_cs_type = checksumtypes[0]
|
|
501
|
+
content_cs_type = checksumtypes[1] if len(content_bytes_stored) > 0 else 'none'
|
|
502
|
+
rec_fields.extend([header_cs_type, content_cs_type])
|
|
503
|
+
|
|
504
|
+
record_fields_len_hex = _hex(len(rec_fields) + 2) # include two checksum VALUE fields
|
|
505
|
+
header_no_cs = _append_nulls(rec_fields, delim)
|
|
506
|
+
|
|
507
|
+
tmp_with_placeholders = _append_null(record_fields_len_hex, delim) + header_no_cs
|
|
508
|
+
tmp_with_placeholders += _append_null('', delim) + _append_null('', delim)
|
|
509
|
+
headersize_hex = _hex(len(tmp_with_placeholders) - len(_to_bytes(delim)))
|
|
510
|
+
|
|
511
|
+
header_with_sizes = _append_null(headersize_hex, delim) + _append_null(record_fields_len_hex, delim) + header_no_cs
|
|
512
|
+
|
|
513
|
+
header_checksum = _checksum(header_with_sizes, header_cs_type, text=True)
|
|
514
|
+
content_checksum = _checksum(content_bytes_stored, content_cs_type, text=False)
|
|
515
|
+
|
|
516
|
+
header_full = header_with_sizes + _append_nulls([header_checksum, content_checksum], delim)
|
|
517
|
+
|
|
518
|
+
out = header_full + raw_json + _to_bytes(delim) + content_bytes_stored + _to_bytes(delim)
|
|
519
|
+
return out
|
|
520
|
+
|
|
521
|
+
# --------------- Reader helpers ---------------
|
|
522
|
+
def _read_cstring(fp, delim):
|
|
523
|
+
d = _to_bytes(delim)
|
|
524
|
+
out = []
|
|
525
|
+
while True:
|
|
526
|
+
b = fp.read(1)
|
|
527
|
+
if not b:
|
|
528
|
+
break
|
|
529
|
+
out.append(b)
|
|
530
|
+
if len(out) >= len(d) and b''.join(out[-len(d):]) == d:
|
|
531
|
+
return b''.join(out[:-len(d)])
|
|
532
|
+
return b''
|
|
533
|
+
|
|
534
|
+
def _read_fields(fp, n, delim):
|
|
535
|
+
fields = []
|
|
536
|
+
for _ in range(int(n)):
|
|
537
|
+
fields.append(_read_cstring(fp, delim).decode('UTF-8'))
|
|
538
|
+
return fields
|
|
539
|
+
|
|
540
|
+
def _parse_global_header(fp, formatspecs, skipchecksum=False):
|
|
541
|
+
delim = formatspecs['format_delimiter']
|
|
542
|
+
magicver = _read_cstring(fp, delim).decode('UTF-8')
|
|
543
|
+
_ = _read_cstring(fp, delim) # headersize_hex
|
|
544
|
+
|
|
545
|
+
tmpoutlenhex = _read_cstring(fp, delim).decode('UTF-8')
|
|
546
|
+
fencoding = _read_cstring(fp, delim).decode('UTF-8')
|
|
547
|
+
fostype = _read_cstring(fp, delim).decode('UTF-8')
|
|
548
|
+
fnumfiles = int(_read_cstring(fp, delim).decode('UTF-8') or '0', 16)
|
|
549
|
+
_ = _read_cstring(fp, delim) # extras_size
|
|
550
|
+
extrafields = int(_read_cstring(fp, delim).decode('UTF-8') or '0', 16)
|
|
551
|
+
extras = []
|
|
552
|
+
for _i in range(extrafields):
|
|
553
|
+
extras.append(_read_cstring(fp, delim).decode('UTF-8'))
|
|
554
|
+
checksumtype = _read_cstring(fp, delim).decode('UTF-8')
|
|
555
|
+
_header_cs = _read_cstring(fp, delim).decode('UTF-8')
|
|
556
|
+
return {'fencoding': fencoding, 'fnumfiles': fnumfiles, 'fostype': fostype,
|
|
557
|
+
'fextradata': extras, 'fchecksumtype': checksumtype,
|
|
558
|
+
'ffilelist': [], 'fformatspecs': formatspecs}
|
|
559
|
+
|
|
560
|
+
def _index_json_and_checks(vals):
|
|
561
|
+
"""Index JSON meta and checksum positions for a header field list `vals`."""
|
|
562
|
+
def _is_hex(s):
|
|
563
|
+
return bool(s) and all(c in '0123456789abcdefABCDEF' for c in s)
|
|
564
|
+
|
|
565
|
+
if len(vals) < 25:
|
|
566
|
+
raise ValueError("Record too short to index JSON/checksum meta; got %d fields" % len(vals))
|
|
567
|
+
|
|
568
|
+
idx = 25
|
|
569
|
+
fjsontype = vals[idx]; idx += 1
|
|
570
|
+
|
|
571
|
+
v2 = vals[idx] if idx < len(vals) else ''
|
|
572
|
+
v3 = vals[idx + 1] if idx + 1 < len(vals) else ''
|
|
573
|
+
v4 = vals[idx + 2] if idx + 2 < len(vals) else ''
|
|
574
|
+
|
|
575
|
+
cs_candidates = set(['none','crc32','md5','sha1','sha224','sha256','sha384','sha512','blake2b','blake2s'])
|
|
576
|
+
|
|
577
|
+
if _is_hex(v2) and _is_hex(v3) and v4.lower() in cs_candidates:
|
|
578
|
+
idx_json_type = idx - 1
|
|
579
|
+
idx_json_len = idx
|
|
580
|
+
idx_json_size = idx + 1
|
|
581
|
+
idx_json_cst = idx + 2
|
|
582
|
+
idx_json_cs = idx + 3
|
|
583
|
+
idx += 4
|
|
584
|
+
else:
|
|
585
|
+
idx_json_type = idx - 1
|
|
586
|
+
idx_json_len = None
|
|
587
|
+
idx_json_size = idx
|
|
588
|
+
idx_json_cst = idx + 1
|
|
589
|
+
idx_json_cs = idx + 2
|
|
590
|
+
idx += 3
|
|
591
|
+
|
|
592
|
+
if idx + 2 > len(vals):
|
|
593
|
+
raise ValueError("Missing extras header fields")
|
|
594
|
+
|
|
595
|
+
idx_extras_size = idx
|
|
596
|
+
idx_extras_count = idx + 1
|
|
597
|
+
try:
|
|
598
|
+
count_int = int(vals[idx_extras_count] or '0', 16)
|
|
599
|
+
except Exception:
|
|
600
|
+
raise ValueError("Extras count not hex; got %r" % vals[idx_extras_count])
|
|
601
|
+
idx = idx + 2 + count_int
|
|
602
|
+
|
|
603
|
+
if idx + 4 > len(vals):
|
|
604
|
+
raise ValueError("Missing checksum types/values in header")
|
|
605
|
+
|
|
606
|
+
idx_header_cs_type = idx
|
|
607
|
+
idx_content_cs_type = idx + 1
|
|
608
|
+
idx_header_cs = idx + 2
|
|
609
|
+
idx_content_cs = idx + 3
|
|
610
|
+
|
|
611
|
+
return {
|
|
612
|
+
'json': (idx_json_type, idx_json_len, idx_json_size, idx_json_cst, idx_json_cs),
|
|
613
|
+
'cstypes': (idx_header_cs_type, idx_content_cs_type),
|
|
614
|
+
'csvals': (idx_header_cs, idx_content_cs),
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
def _parse_record(fp, formatspecs, listonly=False, skipchecksum=False, uncompress=True):
|
|
618
|
+
delim = formatspecs['format_delimiter']
|
|
619
|
+
dbytes = _to_bytes(delim)
|
|
620
|
+
|
|
621
|
+
first = _read_cstring(fp, delim)
|
|
622
|
+
if first == b'0':
|
|
623
|
+
second = _read_cstring(fp, delim)
|
|
624
|
+
if second == b'0':
|
|
625
|
+
return None
|
|
626
|
+
headersize_hex = first.decode('UTF-8')
|
|
627
|
+
fields_len_hex = second.decode('UTF-8')
|
|
628
|
+
else:
|
|
629
|
+
headersize_hex = first.decode('UTF-8')
|
|
630
|
+
fields_len_hex = _read_cstring(fp, delim).decode('UTF-8')
|
|
631
|
+
|
|
632
|
+
try:
|
|
633
|
+
n_fields = int(fields_len_hex, 16)
|
|
634
|
+
except Exception:
|
|
635
|
+
raise ValueError("Bad record field-count hex: %r" % fields_len_hex)
|
|
636
|
+
|
|
637
|
+
vals = _read_fields(fp, n_fields, delim)
|
|
638
|
+
if len(vals) < 25:
|
|
639
|
+
raise ValueError("Record too short: expected >=25 header fields, got %d" % len(vals))
|
|
640
|
+
|
|
641
|
+
(ftypehex, fencoding, fcencoding, fname, flinkname,
|
|
642
|
+
fsize_hex, fatime_hex, fmtime_hex, fctime_hex, fbtime_hex,
|
|
643
|
+
fmode_hex, fwinattrs_hex, fcompression, fcsize_hex,
|
|
644
|
+
fuid_hex, funame, fgid_hex, fgname, fid_hex, finode_hex,
|
|
645
|
+
flinkcount_hex, fdev_hex, fdev_minor_hex, fdev_major_hex,
|
|
646
|
+
fseeknextfile) = vals[:25]
|
|
647
|
+
|
|
648
|
+
idx = _index_json_and_checks(vals)
|
|
649
|
+
(idx_json_type, idx_json_len, idx_json_size, idx_json_cst, idx_json_cs) = idx['json']
|
|
650
|
+
(idx_header_cs_type, idx_content_cs_type) = idx['cstypes']
|
|
651
|
+
(idx_header_cs, idx_content_cs) = idx['csvals']
|
|
652
|
+
|
|
653
|
+
fjsonsize_hex = vals[idx_json_size] or '0'
|
|
654
|
+
try:
|
|
655
|
+
fjsonsize = int(fjsonsize_hex, 16)
|
|
656
|
+
except Exception:
|
|
657
|
+
raise ValueError("Bad JSON size hex: %r" % fjsonsize_hex)
|
|
658
|
+
|
|
659
|
+
json_bytes = fp.read(fjsonsize)
|
|
660
|
+
fp.read(len(dbytes))
|
|
661
|
+
|
|
662
|
+
# Read content (stored bytes)
|
|
663
|
+
fsize = int(fsize_hex, 16)
|
|
664
|
+
fcsize = int(fcsize_hex, 16)
|
|
665
|
+
read_size = fcsize if (fcompression not in ('', 'none', 'auto') and fcsize > 0) else fsize
|
|
666
|
+
|
|
667
|
+
content_stored = b''
|
|
668
|
+
if read_size:
|
|
669
|
+
if listonly:
|
|
670
|
+
fp.seek(read_size, io.SEEK_CUR)
|
|
671
|
+
else:
|
|
672
|
+
content_stored = fp.read(read_size)
|
|
673
|
+
fp.read(len(dbytes))
|
|
674
|
+
|
|
675
|
+
# Verify checksums (header json/content)
|
|
676
|
+
header_cs_type = vals[idx_header_cs_type]
|
|
677
|
+
content_cs_type = vals[idx_content_cs_type]
|
|
678
|
+
header_cs_val = vals[idx_header_cs]
|
|
679
|
+
content_cs_val = vals[idx_content_cs]
|
|
680
|
+
json_cs_type = vals[idx_json_cst]
|
|
681
|
+
json_cs_val = vals[idx_json_cs]
|
|
682
|
+
|
|
683
|
+
if fjsonsize and not skipchecksum:
|
|
684
|
+
if _checksum(json_bytes, json_cs_type, text=True) != json_cs_val:
|
|
685
|
+
raise ValueError("JSON checksum mismatch for %s" % fname)
|
|
686
|
+
|
|
687
|
+
if not skipchecksum and read_size and not listonly:
|
|
688
|
+
if _checksum(content_stored, content_cs_type, text=False) != content_cs_val:
|
|
689
|
+
raise ValueError("Content checksum mismatch for %s" % fname)
|
|
690
|
+
|
|
691
|
+
# Optionally decompress for returned content
|
|
692
|
+
content_ret = content_stored
|
|
693
|
+
if not listonly and uncompress and fcompression not in ('', 'none', 'auto'):
|
|
694
|
+
try:
|
|
695
|
+
content_ret = _decompress_bytes(content_stored, fcompression)
|
|
696
|
+
except RuntimeError:
|
|
697
|
+
content_ret = content_stored
|
|
698
|
+
|
|
699
|
+
if not re.match(r'^[\./]', fname):
|
|
700
|
+
fname = './' + fname
|
|
701
|
+
|
|
702
|
+
return {
|
|
703
|
+
'fid': int(fid_hex, 16),
|
|
704
|
+
'finode': int(finode_hex, 16),
|
|
705
|
+
'fname': fname,
|
|
706
|
+
'flinkname': flinkname,
|
|
707
|
+
'ftype': int(ftypehex, 16),
|
|
708
|
+
'fsize': fsize,
|
|
709
|
+
'fcsize': fcsize,
|
|
710
|
+
'fatime': int(fatime_hex, 16),
|
|
711
|
+
'fmtime': int(fmtime_hex, 16),
|
|
712
|
+
'fctime': int(fctime_hex, 16),
|
|
713
|
+
'fbtime': int(fbtime_hex, 16),
|
|
714
|
+
'fmode': int(fmode_hex, 16),
|
|
715
|
+
'fwinattributes': int(fwinattrs_hex, 16),
|
|
716
|
+
'fuid': int(fuid_hex, 16),
|
|
717
|
+
'funame': funame,
|
|
718
|
+
'fgid': int(fgid_hex, 16),
|
|
719
|
+
'fgname': fgname,
|
|
720
|
+
'fcompression': fcompression,
|
|
721
|
+
'fseeknext': fseeknextfile,
|
|
722
|
+
'fjson': (json.loads(json_bytes.decode('UTF-8') or 'null') if fjsonsize else {}),
|
|
723
|
+
'fcontent': (None if listonly else content_ret),
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
# ---------------- Public API ----------------
|
|
727
|
+
def pack_neo(infiles, outfile=None, formatspecs=None,
|
|
728
|
+
checksumtypes=("crc32","crc32","crc32"),
|
|
729
|
+
encoding="UTF-8",
|
|
730
|
+
compression="auto",
|
|
731
|
+
compression_level=None):
|
|
732
|
+
"""Pack files/dirs to an archive file or return bytes when outfile is None/'-'."""
|
|
733
|
+
fs = _ensure_formatspecs(formatspecs)
|
|
734
|
+
delim = fs['format_delimiter']
|
|
735
|
+
|
|
736
|
+
# In-memory sources?
|
|
737
|
+
items = _normalize_pack_inputs(infiles)
|
|
738
|
+
if items is not None:
|
|
739
|
+
return pack_iter_neo(items, outfile, formatspecs=fs,
|
|
740
|
+
checksumtypes=checksumtypes, encoding=encoding,
|
|
741
|
+
compression=compression, compression_level=compression_level)
|
|
742
|
+
|
|
743
|
+
if isinstance(infiles, basestring):
|
|
744
|
+
paths = [infiles]
|
|
745
|
+
else:
|
|
746
|
+
paths = list(infiles)
|
|
747
|
+
|
|
748
|
+
# Build file list (dirs recursively)
|
|
749
|
+
filelist = []
|
|
750
|
+
base_dir = None
|
|
751
|
+
if len(paths) == 1 and os.path.isdir(paths[0]):
|
|
752
|
+
base_dir = os.path.abspath(paths[0])
|
|
753
|
+
for p in paths:
|
|
754
|
+
if os.path.isdir(p):
|
|
755
|
+
for root, dirs, files in os.walk(p):
|
|
756
|
+
filelist.append((os.path.join(root, ''), True))
|
|
757
|
+
for name in files:
|
|
758
|
+
filelist.append((os.path.join(root, name), False))
|
|
759
|
+
else:
|
|
760
|
+
filelist.append((p, False))
|
|
761
|
+
|
|
762
|
+
# open destination
|
|
763
|
+
fp, close_me, to_bytes = _wrap_outfile(outfile)
|
|
764
|
+
|
|
765
|
+
try:
|
|
766
|
+
_write_global_header(fp, len(filelist), encoding, checksumtypes[0], extradata=[], formatspecs=fs)
|
|
767
|
+
|
|
768
|
+
fid = 0
|
|
769
|
+
for apath, is_dir in filelist:
|
|
770
|
+
st = os.lstat(apath)
|
|
771
|
+
mode = st.st_mode
|
|
772
|
+
if is_dir or stat.S_ISDIR(mode):
|
|
773
|
+
raw = b''
|
|
774
|
+
ftype = 5
|
|
775
|
+
else:
|
|
776
|
+
with _iopen(apath, 'rb') as f:
|
|
777
|
+
raw = f.read()
|
|
778
|
+
ftype = 0
|
|
779
|
+
|
|
780
|
+
# Decide compression
|
|
781
|
+
algo = _normalize_algo(compression)
|
|
782
|
+
if algo == 'auto':
|
|
783
|
+
algo, auto_level = _auto_pick_for_size(len(raw))
|
|
784
|
+
level = compression_level if compression_level is not None else auto_level
|
|
785
|
+
else:
|
|
786
|
+
level = compression_level
|
|
787
|
+
|
|
788
|
+
try:
|
|
789
|
+
stored_bytes, used_algo = _compress_bytes(raw, algo, level=level)
|
|
790
|
+
except RuntimeError:
|
|
791
|
+
stored_bytes, used_algo = _compress_bytes(raw, 'zlib', level=(6 if level is None else level))
|
|
792
|
+
|
|
793
|
+
meta = {
|
|
794
|
+
'ftype': ftype,
|
|
795
|
+
'fencoding': encoding,
|
|
796
|
+
'fcencoding': encoding,
|
|
797
|
+
'fname': './' + os.path.relpath(apath).replace('\\', '/') if not re.match(r'^[\./]', apath) else apath,
|
|
798
|
+
'flinkname': '',
|
|
799
|
+
'fsize': len(raw),
|
|
800
|
+
'fatime': int(getattr(st, 'st_atime', time.time())),
|
|
801
|
+
'fmtime': int(getattr(st, 'st_mtime', time.time())),
|
|
802
|
+
'fctime': int(getattr(st, 'st_ctime', time.time())),
|
|
803
|
+
'fbtime': int(getattr(st, 'st_mtime', time.time())),
|
|
804
|
+
'fmode': int(mode),
|
|
805
|
+
'fwinattributes': 0,
|
|
806
|
+
'fcompression': used_algo,
|
|
807
|
+
'fcsize': len(stored_bytes),
|
|
808
|
+
'fuid': int(getattr(st, 'st_uid', 0)),
|
|
809
|
+
'funame': '',
|
|
810
|
+
'fgid': int(getattr(st, 'st_gid', 0)),
|
|
811
|
+
'fgname': '',
|
|
812
|
+
'fid': fid,
|
|
813
|
+
'finode': int(getattr(st, 'st_ino', fid)),
|
|
814
|
+
'flinkcount': int(getattr(st, 'st_nlink', 1)),
|
|
815
|
+
'fdev': int(getattr(st, 'st_dev', 0)),
|
|
816
|
+
'fdev_minor': 0,
|
|
817
|
+
'fdev_major': 0,
|
|
818
|
+
'index': fid,
|
|
819
|
+
}
|
|
820
|
+
fid += 1
|
|
821
|
+
|
|
822
|
+
rec = _build_file_header_bytes(meta, jsondata={}, content_bytes_stored=stored_bytes,
|
|
823
|
+
checksumtypes=checksumtypes, extradata=[], formatspecs=fs)
|
|
824
|
+
fp.write(rec)
|
|
825
|
+
|
|
826
|
+
# end marker
|
|
827
|
+
fp.write(_append_nulls(['0','0'], fs['format_delimiter']))
|
|
828
|
+
if to_bytes:
|
|
829
|
+
return fp.getvalue()
|
|
830
|
+
finally:
|
|
831
|
+
if close_me:
|
|
832
|
+
fp.close()
|
|
833
|
+
|
|
834
|
+
def archive_to_array_neo(infile, formatspecs=None,
|
|
835
|
+
listonly=False, skipchecksum=False, uncompress=True):
|
|
836
|
+
fs = _ensure_formatspecs(formatspecs)
|
|
837
|
+
fp, close_me = _wrap_infile(infile)
|
|
838
|
+
try:
|
|
839
|
+
top = _parse_global_header(fp, fs, skipchecksum=skipchecksum)
|
|
840
|
+
while True:
|
|
841
|
+
rec = _parse_record(fp, fs, listonly=listonly, skipchecksum=skipchecksum, uncompress=uncompress)
|
|
842
|
+
if rec is None:
|
|
843
|
+
break
|
|
844
|
+
top['ffilelist'].append(rec)
|
|
845
|
+
return top
|
|
846
|
+
finally:
|
|
847
|
+
if close_me:
|
|
848
|
+
fp.close()
|
|
849
|
+
|
|
850
|
+
def unpack_neo(infile, outdir='.', formatspecs=None, skipchecksum=False, uncompress=True):
|
|
851
|
+
arr = archive_to_array_neo(infile, formatspecs=formatspecs, listonly=False, skipchecksum=skipchecksum, uncompress=uncompress)
|
|
852
|
+
if not arr:
|
|
853
|
+
return False
|
|
854
|
+
|
|
855
|
+
# In-memory extraction
|
|
856
|
+
if outdir in (None, '-', b'-'):
|
|
857
|
+
result = {}
|
|
858
|
+
for ent in arr['ffilelist']:
|
|
859
|
+
if ent['ftype'] == 5:
|
|
860
|
+
result[ent['fname']] = None
|
|
861
|
+
else:
|
|
862
|
+
result[ent['fname']] = ent.get('fcontent') or b''
|
|
863
|
+
return result
|
|
864
|
+
|
|
865
|
+
if not os.path.isdir(outdir):
|
|
866
|
+
if os.path.exists(outdir):
|
|
867
|
+
raise IOError("not a directory: %r" % outdir)
|
|
868
|
+
os.makedirs(outdir)
|
|
869
|
+
for ent in arr['ffilelist']:
|
|
870
|
+
path = os.path.join(outdir, ent['fname'].lstrip('./'))
|
|
871
|
+
if ent['ftype'] == 5: # directory
|
|
872
|
+
if not os.path.isdir(path):
|
|
873
|
+
os.makedirs(path)
|
|
874
|
+
continue
|
|
875
|
+
d = os.path.dirname(path)
|
|
876
|
+
if d and not os.path.isdir(d):
|
|
877
|
+
os.makedirs(d)
|
|
878
|
+
with _iopen(path, 'wb') as f:
|
|
879
|
+
f.write(ent.get('fcontent') or b'')
|
|
880
|
+
try:
|
|
881
|
+
os.chmod(path, ent.get('fmode', 0o666))
|
|
882
|
+
except Exception:
|
|
883
|
+
pass
|
|
884
|
+
return True
|
|
885
|
+
|
|
886
|
+
def repack_neo(infile, outfile=None, formatspecs=None,
|
|
887
|
+
checksumtypes=("crc32","crc32","crc32"),
|
|
888
|
+
compression="auto",
|
|
889
|
+
compression_level=None):
|
|
890
|
+
arr = archive_to_array_neo(infile, formatspecs=formatspecs, listonly=False, skipchecksum=False, uncompress=False)
|
|
891
|
+
fs = _ensure_formatspecs(formatspecs)
|
|
892
|
+
fp, close_me, to_bytes = _wrap_outfile(outfile)
|
|
893
|
+
try:
|
|
894
|
+
_write_global_header(fp, len(arr['ffilelist']), arr.get('fencoding', 'UTF-8'), checksumtypes[0],
|
|
895
|
+
extradata=arr.get('fextradata', []), formatspecs=fs)
|
|
896
|
+
for i, ent in enumerate(arr['ffilelist']):
|
|
897
|
+
src_algo = _normalize_algo(ent.get('fcompression', 'none'))
|
|
898
|
+
dst_algo = _normalize_algo(compression)
|
|
899
|
+
|
|
900
|
+
stored_src = ent.get('fcontent') or b'' # we requested uncompress=False, so this is stored bytes
|
|
901
|
+
|
|
902
|
+
if dst_algo == 'auto':
|
|
903
|
+
try:
|
|
904
|
+
raw = _decompress_bytes(stored_src, src_algo) if src_algo != 'none' else stored_src
|
|
905
|
+
except RuntimeError:
|
|
906
|
+
raw = stored_src
|
|
907
|
+
dst_algo, dst_level = _auto_pick_for_size(len(raw))
|
|
908
|
+
else:
|
|
909
|
+
if src_algo != 'none':
|
|
910
|
+
try:
|
|
911
|
+
raw = _decompress_bytes(stored_src, src_algo)
|
|
912
|
+
except RuntimeError:
|
|
913
|
+
raw = stored_src
|
|
914
|
+
else:
|
|
915
|
+
raw = stored_src
|
|
916
|
+
dst_level = compression_level
|
|
917
|
+
|
|
918
|
+
if dst_algo == src_algo or (dst_algo == 'none' and src_algo == 'none'):
|
|
919
|
+
stored_bytes = stored_src
|
|
920
|
+
used_algo = src_algo
|
|
921
|
+
try:
|
|
922
|
+
raw_len = len(_decompress_bytes(stored_src, src_algo)) if src_algo != 'none' else len(stored_src)
|
|
923
|
+
except RuntimeError:
|
|
924
|
+
raw_len = len(stored_src)
|
|
925
|
+
else:
|
|
926
|
+
stored_bytes, used_algo = _compress_bytes(raw, dst_algo, level=dst_level)
|
|
927
|
+
raw_len = len(raw)
|
|
928
|
+
|
|
929
|
+
meta = {
|
|
930
|
+
'ftype': ent['ftype'],
|
|
931
|
+
'fencoding': arr.get('fencoding', 'UTF-8'),
|
|
932
|
+
'fcencoding': arr.get('fencoding', 'UTF-8'),
|
|
933
|
+
'fname': ent['fname'],
|
|
934
|
+
'flinkname': ent.get('flinkname',''),
|
|
935
|
+
'fsize': raw_len,
|
|
936
|
+
'fatime': ent.get('fatime', int(time.time())),
|
|
937
|
+
'fmtime': ent.get('fmtime', int(time.time())),
|
|
938
|
+
'fctime': ent.get('fctime', int(time.time())),
|
|
939
|
+
'fbtime': ent.get('fbtime', int(time.time())),
|
|
940
|
+
'fmode': ent.get('fmode', stat.S_IFREG | 0o666),
|
|
941
|
+
'fwinattributes': ent.get('fwinattributes', 0),
|
|
942
|
+
'fcompression': used_algo,
|
|
943
|
+
'fcsize': len(stored_bytes),
|
|
944
|
+
'fuid': ent.get('fuid', 0),
|
|
945
|
+
'funame': ent.get('funame', ''),
|
|
946
|
+
'fgid': ent.get('fgid', 0),
|
|
947
|
+
'fgname': ent.get('fgname', ''),
|
|
948
|
+
'fid': ent.get('fid', i),
|
|
949
|
+
'finode': ent.get('finode', i),
|
|
950
|
+
'flinkcount': ent.get('flinkcount', 1),
|
|
951
|
+
'fdev': ent.get('fdev', 0),
|
|
952
|
+
'fdev_minor': ent.get('fdev_minor', 0),
|
|
953
|
+
'fdev_major': ent.get('fdev_major', 0),
|
|
954
|
+
'index': i,
|
|
955
|
+
}
|
|
956
|
+
rec = _build_file_header_bytes(meta, jsondata=ent.get('fjson', {}), content_bytes_stored=stored_bytes,
|
|
957
|
+
checksumtypes=checksumtypes, extradata=[], formatspecs=fs)
|
|
958
|
+
fp.write(rec)
|
|
959
|
+
fp.write(_append_nulls(['0','0'], fs['format_delimiter']))
|
|
960
|
+
if to_bytes:
|
|
961
|
+
return fp.getvalue()
|
|
962
|
+
finally:
|
|
963
|
+
if close_me:
|
|
964
|
+
fp.close()
|
|
965
|
+
|
|
966
|
+
# -----------------------------------------------------------------------------
|
|
967
|
+
# Alt validation and listing helpers (lowercase names for consistency)
|
|
968
|
+
# -----------------------------------------------------------------------------
|
|
969
|
+
|
|
970
|
+
def _read_record_raw(fp, formatspecs):
|
|
971
|
+
"""Low-level read of a single record returning header fields and stored blobs."""
|
|
972
|
+
delim = formatspecs['format_delimiter']
|
|
973
|
+
dbytes = _to_bytes(delim)
|
|
974
|
+
|
|
975
|
+
first = _read_cstring(fp, delim)
|
|
976
|
+
if first == b'0':
|
|
977
|
+
second = _read_cstring(fp, delim)
|
|
978
|
+
if second == b'0':
|
|
979
|
+
return None
|
|
980
|
+
headersize_hex = first.decode('UTF-8')
|
|
981
|
+
fields_len_hex = second.decode('UTF-8')
|
|
982
|
+
else:
|
|
983
|
+
headersize_hex = first.decode('UTF-8')
|
|
984
|
+
fields_len_hex = _read_cstring(fp, delim).decode('UTF-8')
|
|
985
|
+
|
|
986
|
+
try:
|
|
987
|
+
n_fields = int(fields_len_hex, 16)
|
|
988
|
+
except Exception:
|
|
989
|
+
raise ValueError("Bad record field-count hex: %r" % fields_len_hex)
|
|
990
|
+
|
|
991
|
+
vals = _read_fields(fp, n_fields, delim)
|
|
992
|
+
idxs = _index_json_and_checks(vals)
|
|
993
|
+
(idx_json_type, idx_json_len, idx_json_size, idx_json_cst, idx_json_cs) = idxs['json']
|
|
994
|
+
|
|
995
|
+
fjsonsize_hex = vals[idx_json_size] or '0'
|
|
996
|
+
try:
|
|
997
|
+
fjsonsize = int(fjsonsize_hex, 16)
|
|
998
|
+
except Exception:
|
|
999
|
+
raise ValueError("Bad JSON size hex: %r" % fjsonsize_hex)
|
|
1000
|
+
|
|
1001
|
+
json_bytes = fp.read(fjsonsize)
|
|
1002
|
+
fp.read(len(dbytes))
|
|
1003
|
+
|
|
1004
|
+
fcompression = vals[12]
|
|
1005
|
+
fsize_hex = vals[5]
|
|
1006
|
+
fcsize_hex = vals[13]
|
|
1007
|
+
fsize = int(fsize_hex, 16)
|
|
1008
|
+
fcsize = int(fcsize_hex, 16)
|
|
1009
|
+
read_size = fcsize if (fcompression not in ('', 'none', 'auto') and fcsize > 0) else fsize
|
|
1010
|
+
content_stored = b''
|
|
1011
|
+
if read_size:
|
|
1012
|
+
content_stored = fp.read(read_size)
|
|
1013
|
+
fp.read(len(dbytes))
|
|
1014
|
+
|
|
1015
|
+
return headersize_hex, fields_len_hex, vals, json_bytes, content_stored
|
|
1016
|
+
|
|
1017
|
+
def archivefilevalidate_neo(infile, formatspecs=None, verbose=False, return_details=False):
|
|
1018
|
+
"""Validate an ArchiveFile using the alt parser."""
|
|
1019
|
+
fs = _ensure_formatspecs(formatspecs)
|
|
1020
|
+
details = []
|
|
1021
|
+
ok_all = True
|
|
1022
|
+
|
|
1023
|
+
fp, close_me = _wrap_infile(infile)
|
|
1024
|
+
try:
|
|
1025
|
+
_ = _parse_global_header(fp, fs, skipchecksum=False)
|
|
1026
|
+
idx = 0
|
|
1027
|
+
while True:
|
|
1028
|
+
raw = _read_record_raw(fp, fs)
|
|
1029
|
+
if raw is None:
|
|
1030
|
+
break
|
|
1031
|
+
headersize_hex, fields_len_hex, vals, json_bytes, content_stored = raw
|
|
1032
|
+
|
|
1033
|
+
idxs = _index_json_and_checks(vals)
|
|
1034
|
+
(idx_json_type, idx_json_len, idx_json_size, idx_json_cst, idx_json_cs) = idxs['json']
|
|
1035
|
+
(idx_header_cs_type, idx_content_cs_type) = idxs['cstypes']
|
|
1036
|
+
(idx_header_cs, idx_content_cs) = idxs['csvals']
|
|
1037
|
+
|
|
1038
|
+
fname = vals[3]
|
|
1039
|
+
header_cs_type = vals[idx_header_cs_type]
|
|
1040
|
+
content_cs_type = vals[idx_content_cs_type]
|
|
1041
|
+
header_cs_val = vals[idx_header_cs]
|
|
1042
|
+
content_cs_val = vals[idx_content_cs]
|
|
1043
|
+
json_cs_type = vals[idx_json_cst]
|
|
1044
|
+
json_cs_val = vals[idx_json_cs]
|
|
1045
|
+
|
|
1046
|
+
delim = fs['format_delimiter']
|
|
1047
|
+
header_bytes = _append_null(headersize_hex, delim) + _append_null(fields_len_hex, delim) + _append_nulls(vals[:-2], delim)
|
|
1048
|
+
computed_hcs = _checksum(header_bytes, header_cs_type, text=True)
|
|
1049
|
+
h_ok = (computed_hcs == header_cs_val)
|
|
1050
|
+
|
|
1051
|
+
j_ok = True
|
|
1052
|
+
try:
|
|
1053
|
+
fjsonsize_hex = vals[idx_json_size] or '0'
|
|
1054
|
+
fjsonsize = int(fjsonsize_hex, 16) if fjsonsize_hex else 0
|
|
1055
|
+
except Exception:
|
|
1056
|
+
fjsonsize = 0
|
|
1057
|
+
if fjsonsize:
|
|
1058
|
+
computed_jcs = _checksum(json_bytes, json_cs_type, text=True)
|
|
1059
|
+
j_ok = (computed_jcs == json_cs_val)
|
|
1060
|
+
|
|
1061
|
+
c_ok = True
|
|
1062
|
+
if content_stored:
|
|
1063
|
+
computed_ccs = _checksum(content_stored, content_cs_type, text=False)
|
|
1064
|
+
c_ok = (computed_ccs == content_cs_val)
|
|
1065
|
+
|
|
1066
|
+
entry_ok = h_ok and j_ok and c_ok
|
|
1067
|
+
ok_all = ok_all and entry_ok
|
|
1068
|
+
if verbose or return_details:
|
|
1069
|
+
details.append({
|
|
1070
|
+
'index': idx,
|
|
1071
|
+
'name': fname,
|
|
1072
|
+
'header_ok': h_ok,
|
|
1073
|
+
'json_ok': j_ok,
|
|
1074
|
+
'content_ok': c_ok,
|
|
1075
|
+
'fcompression': vals[12],
|
|
1076
|
+
'fsize_hex': vals[5],
|
|
1077
|
+
'fcsize_hex': vals[13],
|
|
1078
|
+
})
|
|
1079
|
+
idx += 1
|
|
1080
|
+
finally:
|
|
1081
|
+
if close_me:
|
|
1082
|
+
fp.close()
|
|
1083
|
+
|
|
1084
|
+
if return_details:
|
|
1085
|
+
return ok_all, details
|
|
1086
|
+
return ok_all
|
|
1087
|
+
|
|
1088
|
+
def archivefilelistfiles_neo(infile, formatspecs=None, advanced=False, include_dirs=True):
|
|
1089
|
+
"""List entries in an archive without extracting."""
|
|
1090
|
+
fs = _ensure_formatspecs(formatspecs)
|
|
1091
|
+
out = []
|
|
1092
|
+
|
|
1093
|
+
fp, close_me = _wrap_infile(infile)
|
|
1094
|
+
try:
|
|
1095
|
+
_ = _parse_global_header(fp, fs, skipchecksum=True)
|
|
1096
|
+
while True:
|
|
1097
|
+
raw = _read_record_raw(fp, fs)
|
|
1098
|
+
if raw is None:
|
|
1099
|
+
break
|
|
1100
|
+
headersize_hex, fields_len_hex, vals, json_bytes, content_stored = raw
|
|
1101
|
+
|
|
1102
|
+
ftypehex = vals[0]
|
|
1103
|
+
fname = vals[3]
|
|
1104
|
+
fcompression = vals[12]
|
|
1105
|
+
fsize_hex = vals[5]
|
|
1106
|
+
fcsize_hex = vals[13]
|
|
1107
|
+
fatime_hex = vals[6]
|
|
1108
|
+
fmtime_hex = vals[7]
|
|
1109
|
+
fmode_hex = vals[10]
|
|
1110
|
+
|
|
1111
|
+
ftype = int(ftypehex, 16)
|
|
1112
|
+
is_dir = (ftype == 5)
|
|
1113
|
+
|
|
1114
|
+
if not include_dirs and is_dir:
|
|
1115
|
+
continue
|
|
1116
|
+
|
|
1117
|
+
if not re.match(r'^[\./]', fname):
|
|
1118
|
+
fname = './' + fname
|
|
1119
|
+
|
|
1120
|
+
if not advanced:
|
|
1121
|
+
out.append(fname)
|
|
1122
|
+
else:
|
|
1123
|
+
out.append({
|
|
1124
|
+
'name': fname,
|
|
1125
|
+
'type': 'dir' if is_dir else 'file',
|
|
1126
|
+
'compression': fcompression or 'none',
|
|
1127
|
+
'size': int(fsize_hex, 16),
|
|
1128
|
+
'stored_size': int(fcsize_hex, 16),
|
|
1129
|
+
'mtime': int(fmtime_hex, 16),
|
|
1130
|
+
'atime': int(fatime_hex, 16),
|
|
1131
|
+
'mode': int(fmode_hex, 16),
|
|
1132
|
+
})
|
|
1133
|
+
finally:
|
|
1134
|
+
if close_me:
|
|
1135
|
+
fp.close()
|
|
1136
|
+
return out
|
|
1137
|
+
|
|
1138
|
+
# -----------------------------------------------------------------------------
|
|
1139
|
+
# Pack from iterator + foreign-archive conversion (stdlib + optional deps)
|
|
1140
|
+
# -----------------------------------------------------------------------------
|
|
1141
|
+
|
|
1142
|
+
def pack_iter_neo(items, outfile, formatspecs=None,
|
|
1143
|
+
checksumtypes=("crc32","crc32","crc32"),
|
|
1144
|
+
encoding="UTF-8",
|
|
1145
|
+
compression="auto",
|
|
1146
|
+
compression_level=None):
|
|
1147
|
+
"""
|
|
1148
|
+
Pack directly from an iterable of entries without touching the filesystem.
|
|
1149
|
+
Each item may be either a tuple (name, is_dir, data_bytes_or_None)
|
|
1150
|
+
or a dict with keys:
|
|
1151
|
+
name (str), is_dir (bool), data (bytes or None),
|
|
1152
|
+
mode (int, optional), mtime (int, optional),
|
|
1153
|
+
uid (int), gid (int), uname (str), gname (str)
|
|
1154
|
+
"""
|
|
1155
|
+
fs = _ensure_formatspecs(formatspecs)
|
|
1156
|
+
fp, close_me, to_bytes = _wrap_outfile(outfile)
|
|
1157
|
+
|
|
1158
|
+
try:
|
|
1159
|
+
# Count items first (may be a generator -> materialize)
|
|
1160
|
+
if not hasattr(items, '__len__'):
|
|
1161
|
+
items = list(items)
|
|
1162
|
+
_write_global_header(fp, len(items), encoding, checksumtypes[0], extradata=[], formatspecs=fs)
|
|
1163
|
+
|
|
1164
|
+
fid = 0
|
|
1165
|
+
for it in items:
|
|
1166
|
+
if isinstance(it, dict):
|
|
1167
|
+
name = it.get('name')
|
|
1168
|
+
is_dir = bool(it.get('is_dir', False))
|
|
1169
|
+
data = it.get('data', None)
|
|
1170
|
+
mode = int(it.get('mode', stat.S_IFDIR | 0o755 if is_dir else stat.S_IFREG | 0o666))
|
|
1171
|
+
mtime = int(it.get('mtime', time.time()))
|
|
1172
|
+
uid = int(it.get('uid', 0)); gid = int(it.get('gid', 0))
|
|
1173
|
+
uname = it.get('uname', ''); gname = it.get('gname', '')
|
|
1174
|
+
else:
|
|
1175
|
+
name, is_dir, data = it
|
|
1176
|
+
mode = stat.S_IFDIR | 0o755 if is_dir or (name.endswith('/') and data is None) else stat.S_IFREG | 0o666
|
|
1177
|
+
mtime = int(time.time())
|
|
1178
|
+
uid = gid = 0; uname = gname = ''
|
|
1179
|
+
|
|
1180
|
+
# Normalize name
|
|
1181
|
+
name = name.replace('\\', '/')
|
|
1182
|
+
if not re.match(r'^[\./]', name):
|
|
1183
|
+
name = './' + name
|
|
1184
|
+
|
|
1185
|
+
if is_dir or name.endswith('/'):
|
|
1186
|
+
raw = b''
|
|
1187
|
+
ftype = 5
|
|
1188
|
+
else:
|
|
1189
|
+
raw = data or b''
|
|
1190
|
+
ftype = 0
|
|
1191
|
+
|
|
1192
|
+
# Decide compression
|
|
1193
|
+
algo = _normalize_algo(compression)
|
|
1194
|
+
if algo == 'auto':
|
|
1195
|
+
algo, auto_level = _auto_pick_for_size(len(raw))
|
|
1196
|
+
level = compression_level if compression_level is not None else auto_level
|
|
1197
|
+
else:
|
|
1198
|
+
level = compression_level
|
|
1199
|
+
|
|
1200
|
+
try:
|
|
1201
|
+
stored_bytes, used_algo = _compress_bytes(raw, algo, level=level)
|
|
1202
|
+
except RuntimeError:
|
|
1203
|
+
stored_bytes, used_algo = _compress_bytes(raw, 'zlib', level=(6 if level is None else level))
|
|
1204
|
+
|
|
1205
|
+
meta = {
|
|
1206
|
+
'ftype': ftype,
|
|
1207
|
+
'fencoding': encoding,
|
|
1208
|
+
'fcencoding': encoding,
|
|
1209
|
+
'fname': name,
|
|
1210
|
+
'flinkname': '',
|
|
1211
|
+
'fsize': len(raw),
|
|
1212
|
+
'fatime': mtime,
|
|
1213
|
+
'fmtime': mtime,
|
|
1214
|
+
'fctime': mtime,
|
|
1215
|
+
'fbtime': mtime,
|
|
1216
|
+
'fmode': int(mode),
|
|
1217
|
+
'fwinattributes': 0,
|
|
1218
|
+
'fcompression': used_algo,
|
|
1219
|
+
'fcsize': len(stored_bytes),
|
|
1220
|
+
'fuid': uid,
|
|
1221
|
+
'funame': uname,
|
|
1222
|
+
'fgid': gid,
|
|
1223
|
+
'fgname': gname,
|
|
1224
|
+
'fid': fid,
|
|
1225
|
+
'finode': fid,
|
|
1226
|
+
'flinkcount': 1,
|
|
1227
|
+
'fdev': 0,
|
|
1228
|
+
'fdev_minor': 0,
|
|
1229
|
+
'fdev_major': 0,
|
|
1230
|
+
'index': fid,
|
|
1231
|
+
}
|
|
1232
|
+
fid += 1
|
|
1233
|
+
|
|
1234
|
+
rec = _build_file_header_bytes(meta, jsondata={}, content_bytes_stored=stored_bytes,
|
|
1235
|
+
checksumtypes=checksumtypes, extradata=[], formatspecs=fs)
|
|
1236
|
+
fp.write(rec)
|
|
1237
|
+
|
|
1238
|
+
# end marker
|
|
1239
|
+
fp.write(_append_nulls(['0','0'], fs['format_delimiter']))
|
|
1240
|
+
if to_bytes:
|
|
1241
|
+
return fp.getvalue()
|
|
1242
|
+
finally:
|
|
1243
|
+
if close_me:
|
|
1244
|
+
fp.close()
|
|
1245
|
+
|
|
1246
|
+
def _sniff_foreign_type(path):
|
|
1247
|
+
lower = os.path.basename(path).lower() if isinstance(path, (str, bytes)) else ''
|
|
1248
|
+
# Extension first
|
|
1249
|
+
if lower.endswith('.zip'):
|
|
1250
|
+
return 'zip'
|
|
1251
|
+
if lower.endswith(('.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tbz2', '.tar.xz', '.txz')):
|
|
1252
|
+
return 'tar'
|
|
1253
|
+
if lower.endswith('.rar'):
|
|
1254
|
+
return 'rar'
|
|
1255
|
+
if lower.endswith('.7z'):
|
|
1256
|
+
return '7z'
|
|
1257
|
+
# Fallback: stdlib probes for zip/tar only
|
|
1258
|
+
try:
|
|
1259
|
+
import zipfile
|
|
1260
|
+
if isinstance(path, basestring) and zipfile.is_zipfile(path):
|
|
1261
|
+
return 'zip'
|
|
1262
|
+
except Exception:
|
|
1263
|
+
pass
|
|
1264
|
+
try:
|
|
1265
|
+
import tarfile
|
|
1266
|
+
if isinstance(path, basestring) and hasattr(tarfile, 'is_tarfile') and tarfile.is_tarfile(path):
|
|
1267
|
+
return 'tar'
|
|
1268
|
+
except Exception:
|
|
1269
|
+
pass
|
|
1270
|
+
return None
|
|
1271
|
+
|
|
1272
|
+
def _iter_tar_members(tarf):
|
|
1273
|
+
for m in tarf.getmembers():
|
|
1274
|
+
name = m.name
|
|
1275
|
+
if m.isdir():
|
|
1276
|
+
yield {'name': name.rstrip('/') + '/', 'is_dir': True, 'data': None,
|
|
1277
|
+
'mode': (stat.S_IFDIR | (m.mode or 0o755)), 'mtime': int(getattr(m, 'mtime', time.time())),
|
|
1278
|
+
'uid': int(getattr(m, 'uid', 0)), 'gid': int(getattr(m, 'gid', 0)),
|
|
1279
|
+
'uname': getattr(m, 'uname', ''), 'gname': getattr(m, 'gname', '')}
|
|
1280
|
+
else:
|
|
1281
|
+
try:
|
|
1282
|
+
fh = tarf.extractfile(m)
|
|
1283
|
+
data = fh.read() if fh is not None else b''
|
|
1284
|
+
except Exception:
|
|
1285
|
+
data = b''
|
|
1286
|
+
yield {'name': name, 'is_dir': False, 'data': data,
|
|
1287
|
+
'mode': (stat.S_IFREG | (m.mode or 0o644)), 'mtime': int(getattr(m, 'mtime', time.time())),
|
|
1288
|
+
'uid': int(getattr(m, 'uid', 0)), 'gid': int(getattr(m, 'gid', 0)),
|
|
1289
|
+
'uname': getattr(m, 'uname', ''), 'gname': getattr(m, 'gname', '')}
|
|
1290
|
+
|
|
1291
|
+
def _iter_zip_members(zipf):
|
|
1292
|
+
for zi in zipf.infolist():
|
|
1293
|
+
name = zi.filename
|
|
1294
|
+
mode = (zi.external_attr >> 16) & 0o777 if hasattr(zi, 'external_attr') else 0o644
|
|
1295
|
+
mtime = int(time.mktime(getattr(zi, 'date_time', (1980,1,1,0,0,0)) + (0,0,-1)))
|
|
1296
|
+
if name.endswith('/'):
|
|
1297
|
+
yield {'name': name, 'is_dir': True, 'data': None,
|
|
1298
|
+
'mode': (stat.S_IFDIR | (mode or 0o755)), 'mtime': mtime}
|
|
1299
|
+
else:
|
|
1300
|
+
try:
|
|
1301
|
+
data = zipf.read(zi)
|
|
1302
|
+
except Exception:
|
|
1303
|
+
data = b''
|
|
1304
|
+
yield {'name': name, 'is_dir': False, 'data': data,
|
|
1305
|
+
'mode': (stat.S_IFREG | (mode or 0o644)), 'mtime': mtime}
|
|
1306
|
+
|
|
1307
|
+
def _iter_rar_members(rarf):
|
|
1308
|
+
for ri in rarf.infolist():
|
|
1309
|
+
name = getattr(ri, 'filename', None) or getattr(ri, 'arcname', None)
|
|
1310
|
+
if name is None:
|
|
1311
|
+
continue
|
|
1312
|
+
try:
|
|
1313
|
+
is_dir = ri.is_dir()
|
|
1314
|
+
except Exception:
|
|
1315
|
+
is_dir = name.endswith('/') or name.endswith('\\')
|
|
1316
|
+
try:
|
|
1317
|
+
dt = getattr(ri, 'date_time', None)
|
|
1318
|
+
if dt:
|
|
1319
|
+
mtime = int(time.mktime(tuple(dt) + (0,0,-1)))
|
|
1320
|
+
else:
|
|
1321
|
+
mtime = int(time.time())
|
|
1322
|
+
except Exception:
|
|
1323
|
+
mtime = int(time.time())
|
|
1324
|
+
if is_dir:
|
|
1325
|
+
yield {'name': name, 'is_dir': True, 'data': None,
|
|
1326
|
+
'mode': (stat.S_IFDIR | 0o755), 'mtime': mtime}
|
|
1327
|
+
else:
|
|
1328
|
+
try:
|
|
1329
|
+
data = rarf.read(ri)
|
|
1330
|
+
except Exception:
|
|
1331
|
+
data = b''
|
|
1332
|
+
yield {'name': name, 'is_dir': False, 'data': data,
|
|
1333
|
+
'mode': (stat.S_IFREG | 0o644), 'mtime': mtime}
|
|
1334
|
+
|
|
1335
|
+
def _iter_7z_members(z7):
|
|
1336
|
+
names = []
|
|
1337
|
+
try:
|
|
1338
|
+
entries = z7.list()
|
|
1339
|
+
for e in entries:
|
|
1340
|
+
name = getattr(e, 'filename', None) or getattr(e, 'name', None)
|
|
1341
|
+
if name is None:
|
|
1342
|
+
continue
|
|
1343
|
+
is_dir = bool(getattr(e, 'is_directory', False)) or name.endswith('/') or name.endswith('\\')
|
|
1344
|
+
names.append((name, is_dir))
|
|
1345
|
+
except Exception:
|
|
1346
|
+
try:
|
|
1347
|
+
for n in z7.getnames():
|
|
1348
|
+
is_dir = n.endswith('/') or n.endswith('\\')
|
|
1349
|
+
names.append((n, is_dir))
|
|
1350
|
+
except Exception:
|
|
1351
|
+
names = []
|
|
1352
|
+
try:
|
|
1353
|
+
data_map = z7.readall()
|
|
1354
|
+
except Exception:
|
|
1355
|
+
data_map = {}
|
|
1356
|
+
|
|
1357
|
+
for name, is_dir in names:
|
|
1358
|
+
if is_dir:
|
|
1359
|
+
yield {'name': name, 'is_dir': True, 'data': None,
|
|
1360
|
+
'mode': (stat.S_IFDIR | 0o755), 'mtime': int(time.time())}
|
|
1361
|
+
else:
|
|
1362
|
+
try:
|
|
1363
|
+
blob = data_map.get(name, b'')
|
|
1364
|
+
if not isinstance(blob, (bytes, bytearray)):
|
|
1365
|
+
try:
|
|
1366
|
+
blob = b''.join(blob) if isinstance(blob, list) else bytes(blob)
|
|
1367
|
+
except Exception:
|
|
1368
|
+
blob = b''
|
|
1369
|
+
except Exception:
|
|
1370
|
+
blob = b''
|
|
1371
|
+
yield {'name': name, 'is_dir': False, 'data': blob,
|
|
1372
|
+
'mode': (stat.S_IFREG | 0o644), 'mtime': int(time.time())}
|
|
1373
|
+
|
|
1374
|
+
def convert_foreign_to_neo(infile, outfile=None, formatspecs=None,
|
|
1375
|
+
checksumtypes=("crc32","crc32","crc32"),
|
|
1376
|
+
compression="auto",
|
|
1377
|
+
compression_level=None):
|
|
1378
|
+
"""
|
|
1379
|
+
Convert a foreign archive (zip/tar/rar/7z) into the alt ArchiveFile format.
|
|
1380
|
+
Uses stdlib for zip/tar; requires 'rarfile' for RAR and 'py7zr' for 7z.
|
|
1381
|
+
Returns bytes when outfile is None/'-'; otherwise writes a file.
|
|
1382
|
+
"""
|
|
1383
|
+
kind = _sniff_foreign_type(infile) if isinstance(infile, basestring) else None
|
|
1384
|
+
|
|
1385
|
+
if kind == 'zip' or (not kind and not isinstance(infile, basestring)):
|
|
1386
|
+
import zipfile
|
|
1387
|
+
from io import BytesIO
|
|
1388
|
+
zsrc = BytesIO(infile) if isinstance(infile, (bytes, bytearray, memoryview)) else (infile if isinstance(infile, basestring) else _wrap_infile(infile)[0])
|
|
1389
|
+
try:
|
|
1390
|
+
with zipfile.ZipFile(zsrc, 'r') as zf:
|
|
1391
|
+
return pack_iter_neo(_iter_zip_members(zf), outfile, formatspecs=formatspecs,
|
|
1392
|
+
checksumtypes=checksumtypes, compression=compression,
|
|
1393
|
+
compression_level=compression_level)
|
|
1394
|
+
except zipfile.BadZipfile:
|
|
1395
|
+
pass # maybe not a zip; try others
|
|
1396
|
+
|
|
1397
|
+
if kind == 'tar' or (not kind and isinstance(infile, basestring) and os.path.splitext(infile)[1].startswith('.tar')):
|
|
1398
|
+
import tarfile
|
|
1399
|
+
from io import BytesIO
|
|
1400
|
+
src = BytesIO(infile) if isinstance(infile, (bytes, bytearray, memoryview)) else (infile if isinstance(infile, basestring) else _wrap_infile(infile)[0])
|
|
1401
|
+
with tarfile.open(src, 'r:*') as tf:
|
|
1402
|
+
return pack_iter_neo(_iter_tar_members(tf), outfile, formatspecs=formatspecs,
|
|
1403
|
+
checksumtypes=checksumtypes, compression=compression,
|
|
1404
|
+
compression_level=compression_level)
|
|
1405
|
+
|
|
1406
|
+
if kind == 'rar':
|
|
1407
|
+
try:
|
|
1408
|
+
import rarfile
|
|
1409
|
+
except Exception as e:
|
|
1410
|
+
raise RuntimeError("RAR support requires 'rarfile' package: %s" % e)
|
|
1411
|
+
from io import BytesIO
|
|
1412
|
+
rsrc = BytesIO(infile) if isinstance(infile, (bytes, bytearray, memoryview)) else (infile if isinstance(infile, basestring) else _wrap_infile(infile)[0])
|
|
1413
|
+
with rarfile.RarFile(rsrc) as rf:
|
|
1414
|
+
return pack_iter_neo(_iter_rar_members(rf), outfile, formatspecs=formatspecs,
|
|
1415
|
+
checksumtypes=checksumtypes, compression=compression,
|
|
1416
|
+
compression_level=compression_level)
|
|
1417
|
+
|
|
1418
|
+
if kind == '7z':
|
|
1419
|
+
try:
|
|
1420
|
+
import py7zr
|
|
1421
|
+
except Exception as e:
|
|
1422
|
+
raise RuntimeError("7z support requires 'py7zr' package: %s" % e)
|
|
1423
|
+
from io import BytesIO
|
|
1424
|
+
zsrc = BytesIO(infile) if isinstance(infile, (bytes, bytearray, memoryview)) else (infile if isinstance(infile, basestring) else _wrap_infile(infile)[0])
|
|
1425
|
+
with py7zr.SevenZipFile(zsrc, 'r') as z7:
|
|
1426
|
+
return pack_iter_neo(_iter_7z_members(z7), outfile, formatspecs=formatspecs,
|
|
1427
|
+
checksumtypes=checksumtypes, compression=compression,
|
|
1428
|
+
compression_level=compression_level)
|
|
1429
|
+
|
|
1430
|
+
raise ValueError("Unsupported foreign archive (zip/tar/rar/7z only): %r" % (infile,))
|