pylhasa 0.1.1__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylhasa/__init__.py +18 -0
- pylhasa/_archive.py +423 -0
- pylhasa/_exceptions.py +19 -0
- pylhasa/_paths.py +71 -0
- pylhasa/_pylhasa.so +0 -0
- pylhasa-0.1.1.dist-info/METADATA +159 -0
- pylhasa-0.1.1.dist-info/RECORD +10 -0
- pylhasa-0.1.1.dist-info/WHEEL +6 -0
- pylhasa-0.1.1.dist-info/licenses/LICENSE +21 -0
- pylhasa-0.1.1.dist-info/licenses/native/vendor/lhasa/COPYING.md +17 -0
pylhasa/__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""pylhasa - LHA/LZH archive reader with safe extraction and streaming."""
|
|
2
|
+
|
|
3
|
+
from ._archive import Archive, Entry, from_bytes, open, open_bytes, open_fileobj
|
|
4
|
+
from ._exceptions import BadArchiveError, PylhasaError, UnsafePathError
|
|
5
|
+
|
|
6
|
+
__all__: list[str] = [
|
|
7
|
+
"Archive",
|
|
8
|
+
"Entry",
|
|
9
|
+
"open",
|
|
10
|
+
"from_bytes",
|
|
11
|
+
"open_bytes",
|
|
12
|
+
"open_fileobj",
|
|
13
|
+
"PylhasaError",
|
|
14
|
+
"BadArchiveError",
|
|
15
|
+
"UnsafePathError",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
__version__: str = "0.1.1"
|
pylhasa/_archive.py
ADDED
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
import tempfile
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from datetime import datetime, timedelta, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from types import TracebackType
|
|
11
|
+
from typing import Dict, Iterable, Iterator, Optional, Union
|
|
12
|
+
|
|
13
|
+
from ._exceptions import BadArchiveError, PylhasaError, UnsafePathError
|
|
14
|
+
from ._paths import NormalizedPath, normalize_path
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from . import _pylhasa
|
|
18
|
+
except ImportError as exc: # pragma: no cover - import error shown at runtime
|
|
19
|
+
raise ImportError("pylhasa native extension is not built") from exc
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class Entry:
|
|
24
|
+
"""
|
|
25
|
+
Metadata for a single archive entry.
|
|
26
|
+
|
|
27
|
+
Fields:
|
|
28
|
+
- raw_path: best-effort decoded original path from the archive.
|
|
29
|
+
- raw_path_bytes: raw path bytes from the archive.
|
|
30
|
+
- safe_path: sanitized relative path (None if unsafe).
|
|
31
|
+
- size: uncompressed size in bytes.
|
|
32
|
+
- compressed_size: compressed size in bytes.
|
|
33
|
+
- method: compression method string (e.g., "-lh5-").
|
|
34
|
+
- crc: CRC-16 from header (None if absent).
|
|
35
|
+
- timestamp: Unix timestamp if present (None if absent).
|
|
36
|
+
- is_dir: True if entry is a directory.
|
|
37
|
+
- is_symlink: True if entry is a symlink.
|
|
38
|
+
- header_level: LHA header level (0-3).
|
|
39
|
+
- os_type: OS type byte from header.
|
|
40
|
+
- extra_flags: parsed extended header flags bitfield.
|
|
41
|
+
- unix_perms: Unix permissions if present.
|
|
42
|
+
- unix_uid: Unix UID if present.
|
|
43
|
+
- unix_gid: Unix GID if present.
|
|
44
|
+
- os9_perms: OS-9 permissions if present.
|
|
45
|
+
- unix_username: Unix username if present.
|
|
46
|
+
- unix_group: Unix group name if present.
|
|
47
|
+
- common_crc: common header CRC if present.
|
|
48
|
+
- win_creation_time: Windows FILETIME creation time if present.
|
|
49
|
+
- win_modification_time: Windows FILETIME modification time if present.
|
|
50
|
+
- win_access_time: Windows FILETIME access time if present.
|
|
51
|
+
- datetime_utc(): best-effort UTC datetime (Windows FILETIME if present, otherwise Unix timestamp).
|
|
52
|
+
- symlink_target: symlink target if present.
|
|
53
|
+
- raw_header_bytes: raw header bytes if present.
|
|
54
|
+
- path: directory path component if present.
|
|
55
|
+
- filename: filename component if present.
|
|
56
|
+
"""
|
|
57
|
+
raw_path: str
|
|
58
|
+
raw_path_bytes: bytes
|
|
59
|
+
safe_path: Optional[str]
|
|
60
|
+
size: int
|
|
61
|
+
compressed_size: int
|
|
62
|
+
method: str
|
|
63
|
+
crc: Optional[int]
|
|
64
|
+
timestamp: Optional[int]
|
|
65
|
+
is_dir: bool
|
|
66
|
+
is_symlink: bool
|
|
67
|
+
header_level: int
|
|
68
|
+
os_type: int
|
|
69
|
+
extra_flags: int
|
|
70
|
+
unix_perms: Optional[int]
|
|
71
|
+
unix_uid: Optional[int]
|
|
72
|
+
unix_gid: Optional[int]
|
|
73
|
+
os9_perms: Optional[int]
|
|
74
|
+
unix_username: Optional[str]
|
|
75
|
+
unix_group: Optional[str]
|
|
76
|
+
common_crc: Optional[int]
|
|
77
|
+
win_creation_time: Optional[int]
|
|
78
|
+
win_modification_time: Optional[int]
|
|
79
|
+
win_access_time: Optional[int]
|
|
80
|
+
symlink_target: Optional[str]
|
|
81
|
+
raw_header_bytes: Optional[bytes]
|
|
82
|
+
path: Optional[str]
|
|
83
|
+
filename: Optional[str]
|
|
84
|
+
_index: int
|
|
85
|
+
_archive: "Archive"
|
|
86
|
+
|
|
87
|
+
def open(self) -> io.BufferedReader:
|
|
88
|
+
"""Open the entry for streaming reads of decompressed data."""
|
|
89
|
+
return self._archive._open_entry(self)
|
|
90
|
+
|
|
91
|
+
def read(self) -> bytes:
|
|
92
|
+
"""Read the entry fully into memory (convenience API)."""
|
|
93
|
+
return self._archive.read(self)
|
|
94
|
+
|
|
95
|
+
def datetime_utc(self) -> Optional[datetime]:
|
|
96
|
+
"""
|
|
97
|
+
Return the best available timestamp as a timezone-aware UTC datetime.
|
|
98
|
+
|
|
99
|
+
Prefers Windows FILETIME modification time when present, otherwise
|
|
100
|
+
falls back to the Unix timestamp.
|
|
101
|
+
"""
|
|
102
|
+
if self.win_modification_time is not None:
|
|
103
|
+
return _filetime_to_datetime(self.win_modification_time)
|
|
104
|
+
if self.timestamp is None:
|
|
105
|
+
return None
|
|
106
|
+
return datetime.fromtimestamp(self.timestamp, tz=timezone.utc)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class _EntryRawIO(io.RawIOBase):
|
|
110
|
+
def __init__(self, reader: "_pylhasa.EntryReader") -> None:
|
|
111
|
+
self._reader = reader
|
|
112
|
+
|
|
113
|
+
def readable(self) -> bool:
|
|
114
|
+
return True
|
|
115
|
+
|
|
116
|
+
def read(self, size: int = -1) -> bytes:
|
|
117
|
+
return self._reader.read(size)
|
|
118
|
+
|
|
119
|
+
def readinto(self, b: bytearray) -> int:
|
|
120
|
+
return self._reader.readinto(b)
|
|
121
|
+
|
|
122
|
+
def close(self) -> None:
|
|
123
|
+
if self._reader is not None:
|
|
124
|
+
self._reader.close()
|
|
125
|
+
self._reader = None
|
|
126
|
+
super().close()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
_CRC16_TABLE = [
|
|
130
|
+
0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
|
|
131
|
+
0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
|
|
132
|
+
0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
|
|
133
|
+
0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
|
|
134
|
+
0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
|
|
135
|
+
0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
|
|
136
|
+
0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
|
|
137
|
+
0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
|
|
138
|
+
0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
|
|
139
|
+
0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
|
|
140
|
+
0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
|
|
141
|
+
0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
|
|
142
|
+
0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
|
|
143
|
+
0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
|
|
144
|
+
0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
|
|
145
|
+
0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
|
|
146
|
+
0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
|
|
147
|
+
0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
|
|
148
|
+
0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
|
|
149
|
+
0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
|
|
150
|
+
0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
|
|
151
|
+
0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
|
|
152
|
+
0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
|
|
153
|
+
0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
|
|
154
|
+
0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
|
|
155
|
+
0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
|
|
156
|
+
0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
|
|
157
|
+
0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
|
|
158
|
+
0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
|
|
159
|
+
0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
|
|
160
|
+
0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
|
|
161
|
+
0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040,
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _crc16_update(crc: int, data: bytes) -> int:
|
|
166
|
+
for b in data:
|
|
167
|
+
crc = ((crc >> 8) ^ _CRC16_TABLE[(crc ^ b) & 0xFF]) & 0xFFFF
|
|
168
|
+
return crc
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _filetime_to_datetime(filetime: Optional[int]) -> Optional[datetime]:
|
|
172
|
+
if filetime is None:
|
|
173
|
+
return None
|
|
174
|
+
# Windows FILETIME is 100-ns intervals since 1601-01-01 UTC.
|
|
175
|
+
seconds = filetime / 10_000_000
|
|
176
|
+
return datetime(1601, 1, 1, tzinfo=timezone.utc) + timedelta(seconds=seconds)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class Archive(Iterable[Entry]):
|
|
180
|
+
"""
|
|
181
|
+
High-level archive wrapper that provides iteration and extraction.
|
|
182
|
+
|
|
183
|
+
Entries are materialized on open. Use `read()` for convenience or
|
|
184
|
+
`Entry.open()` to stream decompressed bytes.
|
|
185
|
+
"""
|
|
186
|
+
def __init__(self, backend: "_pylhasa.Archive", temp_path: Optional[Path] = None) -> None:
|
|
187
|
+
self._backend = backend
|
|
188
|
+
self._temp_path = temp_path
|
|
189
|
+
self._closed = False
|
|
190
|
+
self._entries = self._load_entries()
|
|
191
|
+
self._entries_by_raw: Dict[str, Entry] = {entry.raw_path: entry for entry in self._entries}
|
|
192
|
+
|
|
193
|
+
def _load_entries(self) -> list[Entry]:
|
|
194
|
+
entries = []
|
|
195
|
+
for idx, meta in enumerate(self._backend.entries()):
|
|
196
|
+
raw_bytes = meta["raw_path_bytes"]
|
|
197
|
+
if not isinstance(raw_bytes, (bytes, bytearray)):
|
|
198
|
+
raw_bytes = bytes(raw_bytes)
|
|
199
|
+
norm = normalize_path(bytes(raw_bytes))
|
|
200
|
+
entry = Entry(
|
|
201
|
+
raw_path=norm.raw_path,
|
|
202
|
+
raw_path_bytes=norm.raw_path_bytes,
|
|
203
|
+
safe_path=norm.safe_path,
|
|
204
|
+
size=int(meta["size"]),
|
|
205
|
+
compressed_size=int(meta["compressed_size"]),
|
|
206
|
+
method=str(meta["method"]),
|
|
207
|
+
crc=None if meta["crc"] is None else int(meta["crc"]),
|
|
208
|
+
timestamp=None if meta["timestamp"] is None else int(meta["timestamp"]),
|
|
209
|
+
is_dir=bool(meta["is_dir"]),
|
|
210
|
+
is_symlink=bool(meta.get("is_symlink", False)),
|
|
211
|
+
header_level=int(meta.get("header_level", 0)),
|
|
212
|
+
os_type=int(meta.get("os_type", 0)),
|
|
213
|
+
extra_flags=int(meta.get("extra_flags", 0)),
|
|
214
|
+
unix_perms=None if meta.get("unix_perms") is None else int(meta["unix_perms"]),
|
|
215
|
+
unix_uid=None if meta.get("unix_uid") is None else int(meta["unix_uid"]),
|
|
216
|
+
unix_gid=None if meta.get("unix_gid") is None else int(meta["unix_gid"]),
|
|
217
|
+
os9_perms=None if meta.get("os9_perms") is None else int(meta["os9_perms"]),
|
|
218
|
+
unix_username=None if meta.get("unix_username") is None else str(meta["unix_username"]),
|
|
219
|
+
unix_group=None if meta.get("unix_group") is None else str(meta["unix_group"]),
|
|
220
|
+
common_crc=None if meta.get("common_crc") is None else int(meta["common_crc"]),
|
|
221
|
+
win_creation_time=None if meta.get("win_creation_time") is None else int(meta["win_creation_time"]),
|
|
222
|
+
win_modification_time=None if meta.get("win_modification_time") is None else int(meta["win_modification_time"]),
|
|
223
|
+
win_access_time=None if meta.get("win_access_time") is None else int(meta["win_access_time"]),
|
|
224
|
+
symlink_target=None if meta.get("symlink_target") is None else str(meta["symlink_target"]),
|
|
225
|
+
raw_header_bytes=None if meta.get("raw_header_bytes") is None else bytes(meta["raw_header_bytes"]),
|
|
226
|
+
path=None if meta.get("path") is None else str(meta["path"]),
|
|
227
|
+
filename=None if meta.get("filename") is None else str(meta["filename"]),
|
|
228
|
+
_index=idx,
|
|
229
|
+
_archive=self,
|
|
230
|
+
)
|
|
231
|
+
entries.append(entry)
|
|
232
|
+
return entries
|
|
233
|
+
|
|
234
|
+
def __iter__(self) -> Iterator[Entry]:
|
|
235
|
+
return iter(self._entries)
|
|
236
|
+
|
|
237
|
+
def __enter__(self) -> "Archive":
|
|
238
|
+
return self
|
|
239
|
+
|
|
240
|
+
def __exit__(
|
|
241
|
+
self,
|
|
242
|
+
exc_type: Optional[type[BaseException]],
|
|
243
|
+
exc: Optional[BaseException],
|
|
244
|
+
tb: Optional[TracebackType],
|
|
245
|
+
) -> None:
|
|
246
|
+
self.close()
|
|
247
|
+
|
|
248
|
+
def __del__(self) -> None: # pragma: no cover - best-effort cleanup
|
|
249
|
+
try:
|
|
250
|
+
self.close()
|
|
251
|
+
except Exception:
|
|
252
|
+
pass
|
|
253
|
+
|
|
254
|
+
def close(self) -> None:
|
|
255
|
+
if self._closed:
|
|
256
|
+
return
|
|
257
|
+
self._backend.close()
|
|
258
|
+
self._closed = True
|
|
259
|
+
if self._temp_path is not None:
|
|
260
|
+
try:
|
|
261
|
+
self._temp_path.unlink(missing_ok=True)
|
|
262
|
+
except OSError:
|
|
263
|
+
pass
|
|
264
|
+
|
|
265
|
+
def _open_entry(self, entry: Entry) -> io.BufferedReader:
|
|
266
|
+
reader = self._backend.open_entry(entry._index)
|
|
267
|
+
raw = _EntryRawIO(reader)
|
|
268
|
+
return io.BufferedReader(raw)
|
|
269
|
+
|
|
270
|
+
def read(self, name_or_entry: Union[str, Entry]) -> bytes:
|
|
271
|
+
"""Read an entry fully into memory."""
|
|
272
|
+
entry = self._resolve_entry(name_or_entry)
|
|
273
|
+
if entry.is_dir or entry.is_symlink:
|
|
274
|
+
return b""
|
|
275
|
+
with entry.open() as fp:
|
|
276
|
+
return fp.read()
|
|
277
|
+
|
|
278
|
+
def extract(
|
|
279
|
+
self,
|
|
280
|
+
name_or_entry: Union[str, Entry],
|
|
281
|
+
dest_dir: Union[str, Path],
|
|
282
|
+
safe: bool = True,
|
|
283
|
+
allow_symlinks: bool = False,
|
|
284
|
+
verify_crc: bool = True,
|
|
285
|
+
) -> Path:
|
|
286
|
+
"""Extract a single entry to disk."""
|
|
287
|
+
entry = self._resolve_entry(name_or_entry)
|
|
288
|
+
return self._extract_entry(entry, Path(dest_dir), safe=safe, allow_symlinks=allow_symlinks, verify_crc=verify_crc)
|
|
289
|
+
|
|
290
|
+
def extractall(
|
|
291
|
+
self,
|
|
292
|
+
dest_dir: Union[str, Path],
|
|
293
|
+
safe: bool = True,
|
|
294
|
+
allow_symlinks: bool = False,
|
|
295
|
+
verify_crc: bool = True,
|
|
296
|
+
) -> list[Path]:
|
|
297
|
+
"""Extract all entries to disk."""
|
|
298
|
+
dest = Path(dest_dir)
|
|
299
|
+
extracted: list[Path] = []
|
|
300
|
+
for entry in self._entries:
|
|
301
|
+
extracted.append(self._extract_entry(entry, dest, safe=safe, allow_symlinks=allow_symlinks, verify_crc=verify_crc))
|
|
302
|
+
return extracted
|
|
303
|
+
|
|
304
|
+
def _resolve_entry(self, name_or_entry: Union[str, Entry]) -> Entry:
|
|
305
|
+
if isinstance(name_or_entry, Entry):
|
|
306
|
+
return name_or_entry
|
|
307
|
+
if not isinstance(name_or_entry, str):
|
|
308
|
+
raise TypeError("expected entry name or Entry")
|
|
309
|
+
if name_or_entry in self._entries_by_raw:
|
|
310
|
+
return self._entries_by_raw[name_or_entry]
|
|
311
|
+
for entry in self._entries:
|
|
312
|
+
if entry.safe_path == name_or_entry:
|
|
313
|
+
return entry
|
|
314
|
+
raise KeyError(f"entry not found: {name_or_entry}")
|
|
315
|
+
|
|
316
|
+
def _extract_entry(self, entry: Entry, dest_dir: Path, safe: bool, allow_symlinks: bool, verify_crc: bool) -> Path:
|
|
317
|
+
if safe:
|
|
318
|
+
if entry.safe_path is None:
|
|
319
|
+
raise UnsafePathError(f"unsafe entry path: {entry.raw_path}")
|
|
320
|
+
rel_path = Path(entry.safe_path)
|
|
321
|
+
else:
|
|
322
|
+
rel_path = Path(entry.raw_path)
|
|
323
|
+
|
|
324
|
+
if entry.is_symlink and not allow_symlinks:
|
|
325
|
+
raise UnsafePathError(f"symlink entry blocked: {entry.raw_path}")
|
|
326
|
+
|
|
327
|
+
dest_dir = dest_dir.resolve()
|
|
328
|
+
target = (dest_dir / rel_path).resolve()
|
|
329
|
+
|
|
330
|
+
if safe:
|
|
331
|
+
try:
|
|
332
|
+
common = os.path.commonpath([str(dest_dir), str(target)])
|
|
333
|
+
except ValueError:
|
|
334
|
+
raise UnsafePathError(f"unsafe entry path: {entry.raw_path}")
|
|
335
|
+
if common != str(dest_dir):
|
|
336
|
+
raise UnsafePathError(f"unsafe entry path: {entry.raw_path}")
|
|
337
|
+
|
|
338
|
+
if entry.is_dir:
|
|
339
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
340
|
+
return target
|
|
341
|
+
|
|
342
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
343
|
+
crc = 0
|
|
344
|
+
do_crc = verify_crc and entry.crc is not None
|
|
345
|
+
with entry.open() as src, target.open("wb") as dst:
|
|
346
|
+
while True:
|
|
347
|
+
chunk = src.read(131072)
|
|
348
|
+
if not chunk:
|
|
349
|
+
break
|
|
350
|
+
if do_crc:
|
|
351
|
+
crc = _crc16_update(crc, chunk)
|
|
352
|
+
dst.write(chunk)
|
|
353
|
+
if do_crc:
|
|
354
|
+
if crc != entry.crc:
|
|
355
|
+
try:
|
|
356
|
+
target.unlink()
|
|
357
|
+
except OSError:
|
|
358
|
+
pass
|
|
359
|
+
raise BadArchiveError(f"CRC mismatch for {entry.raw_path}")
|
|
360
|
+
return target
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _open_from_path(path: Union[str, Path]) -> Archive:
|
|
364
|
+
resolved = os.path.expanduser(os.fspath(path))
|
|
365
|
+
backend = _pylhasa.open_path(resolved)
|
|
366
|
+
return Archive(backend)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _open_from_bytes(data: bytes) -> Archive:
|
|
370
|
+
backend = _pylhasa.open_bytes(data)
|
|
371
|
+
return Archive(backend)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _open_from_fileobj(fileobj: io.BufferedIOBase, buffering: int) -> Archive:
|
|
375
|
+
if buffering <= 0:
|
|
376
|
+
raise ValueError("buffering must be positive")
|
|
377
|
+
# Spool to a temp file so liblhasa can stream without loading all bytes.
|
|
378
|
+
temp = tempfile.NamedTemporaryFile(prefix="pylhasa_", suffix=".lha", delete=False)
|
|
379
|
+
temp_path = Path(temp.name)
|
|
380
|
+
try:
|
|
381
|
+
shutil.copyfileobj(fileobj, temp, length=buffering)
|
|
382
|
+
finally:
|
|
383
|
+
temp.close()
|
|
384
|
+
backend = _pylhasa.open_path(os.fspath(temp_path))
|
|
385
|
+
return Archive(backend, temp_path=temp_path)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def open(path: Union[str, Path]) -> Archive:
|
|
389
|
+
"""
|
|
390
|
+
Open an LHA/LZH archive from a file path.
|
|
391
|
+
|
|
392
|
+
The path supports `~` expansion. The archive is parsed eagerly to
|
|
393
|
+
collect entry metadata.
|
|
394
|
+
"""
|
|
395
|
+
return _open_from_path(path)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def open_bytes(data: bytes) -> Archive:
|
|
399
|
+
"""
|
|
400
|
+
Open an LHA/LZH archive from bytes in memory.
|
|
401
|
+
|
|
402
|
+
This keeps a reference to the bytes for the lifetime of the archive.
|
|
403
|
+
"""
|
|
404
|
+
if not isinstance(data, (bytes, bytearray, memoryview)):
|
|
405
|
+
raise TypeError("data must be bytes-like")
|
|
406
|
+
return _open_from_bytes(bytes(data))
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def from_bytes(data: bytes) -> Archive:
|
|
410
|
+
"""Alias for open_bytes()."""
|
|
411
|
+
return open_bytes(data)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def open_fileobj(fileobj: io.BufferedIOBase, buffering: int = 131072) -> Archive:
|
|
415
|
+
"""
|
|
416
|
+
Open an LHA/LZH archive from a file-like object.
|
|
417
|
+
|
|
418
|
+
The stream is spooled to a temporary file to avoid loading the full
|
|
419
|
+
archive into memory.
|
|
420
|
+
"""
|
|
421
|
+
if not hasattr(fileobj, "read"):
|
|
422
|
+
raise TypeError("fileobj must be file-like")
|
|
423
|
+
return _open_from_fileobj(fileobj, buffering)
|
pylhasa/_exceptions.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from . import _pylhasa
|
|
3
|
+
except Exception: # pragma: no cover - native module missing
|
|
4
|
+
_pylhasa = None
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
if _pylhasa is not None:
|
|
8
|
+
PylhasaError = _pylhasa.PylhasaError
|
|
9
|
+
BadArchiveError = _pylhasa.BadArchiveError
|
|
10
|
+
else:
|
|
11
|
+
class PylhasaError(Exception):
|
|
12
|
+
"""Base exception for pylhasa."""
|
|
13
|
+
|
|
14
|
+
class BadArchiveError(PylhasaError):
|
|
15
|
+
"""Raised when an archive is malformed or unsupported."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class UnsafePathError(PylhasaError):
|
|
19
|
+
"""Raised when an entry path is unsafe to extract."""
|
pylhasa/_paths.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_WINDOWS_RESERVED = {
|
|
10
|
+
"CON",
|
|
11
|
+
"PRN",
|
|
12
|
+
"AUX",
|
|
13
|
+
"NUL",
|
|
14
|
+
"COM1",
|
|
15
|
+
"COM2",
|
|
16
|
+
"COM3",
|
|
17
|
+
"COM4",
|
|
18
|
+
"COM5",
|
|
19
|
+
"COM6",
|
|
20
|
+
"COM7",
|
|
21
|
+
"COM8",
|
|
22
|
+
"COM9",
|
|
23
|
+
"LPT1",
|
|
24
|
+
"LPT2",
|
|
25
|
+
"LPT3",
|
|
26
|
+
"LPT4",
|
|
27
|
+
"LPT5",
|
|
28
|
+
"LPT6",
|
|
29
|
+
"LPT7",
|
|
30
|
+
"LPT8",
|
|
31
|
+
"LPT9",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class NormalizedPath:
|
|
37
|
+
raw_path: str
|
|
38
|
+
raw_path_bytes: bytes
|
|
39
|
+
safe_path: Optional[str]
|
|
40
|
+
unsafe_reason: Optional[str]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
_drive_re = re.compile(r"^[A-Za-z]:")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def normalize_path(raw_path_bytes: bytes) -> NormalizedPath:
|
|
47
|
+
"""Normalize an archive path into a safe, platform-neutral form."""
|
|
48
|
+
raw_path = raw_path_bytes.decode("utf-8", errors="replace")
|
|
49
|
+
path = raw_path.replace("\\", "/")
|
|
50
|
+
|
|
51
|
+
if path.startswith("//"):
|
|
52
|
+
return NormalizedPath(raw_path, raw_path_bytes, None, "UNC paths are not allowed")
|
|
53
|
+
if path.startswith("/"):
|
|
54
|
+
return NormalizedPath(raw_path, raw_path_bytes, None, "absolute paths are not allowed")
|
|
55
|
+
if _drive_re.match(path):
|
|
56
|
+
return NormalizedPath(raw_path, raw_path_bytes, None, "Windows drive paths are not allowed")
|
|
57
|
+
|
|
58
|
+
path = path.lstrip("/")
|
|
59
|
+
parts = [p for p in path.split("/") if p not in ("", ".")]
|
|
60
|
+
for part in parts:
|
|
61
|
+
if part == "..":
|
|
62
|
+
return NormalizedPath(raw_path, raw_path_bytes, None, "path traversal is not allowed")
|
|
63
|
+
|
|
64
|
+
if os.name == "nt":
|
|
65
|
+
for part in parts:
|
|
66
|
+
base = part.split(".")[0].upper()
|
|
67
|
+
if base in _WINDOWS_RESERVED:
|
|
68
|
+
return NormalizedPath(raw_path, raw_path_bytes, None, "reserved Windows name")
|
|
69
|
+
|
|
70
|
+
safe_path = "/".join(parts)
|
|
71
|
+
return NormalizedPath(raw_path, raw_path_bytes, safe_path, None)
|
pylhasa/_pylhasa.so
ADDED
|
Binary file
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: pylhasa
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Cross-platform Python wrapper for liblhasa (LHA/LZH archives)
|
|
5
|
+
Author: pylhasa contributors
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 bwhitn
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Classifier: Programming Language :: Python :: 3
|
|
29
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
30
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
31
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
32
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
35
|
+
Classifier: Programming Language :: C
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Operating System :: OS Independent
|
|
38
|
+
Project-URL: Homepage, https://github.com/bwhitn/pylhasa
|
|
39
|
+
Project-URL: Repository, https://github.com/bwhitn/pylhasa
|
|
40
|
+
Project-URL: Issues, https://github.com/bwhitn/pylhasa/issues
|
|
41
|
+
Requires-Python: >=3.9
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
|
|
44
|
+
# pylhasa
|
|
45
|
+
|
|
46
|
+
`pylhasa` is a cross-platform Python wrapper for the LHA/LZH archive format. It vendors the liblhasa C sources and builds a CPython extension, producing wheels for Linux, macOS, and Windows.
|
|
47
|
+
|
|
48
|
+
## Install
|
|
49
|
+
|
|
50
|
+
From PyPI:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install pylhasa
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Wheels are built for Python 3.9+.
|
|
57
|
+
|
|
58
|
+
## Usage
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import pylhasa
|
|
62
|
+
|
|
63
|
+
archive = pylhasa.open("example.lha")
|
|
64
|
+
for entry in archive:
|
|
65
|
+
print(entry.raw_path, entry.size)
|
|
66
|
+
|
|
67
|
+
# Read bytes directly (loads full file into memory)
|
|
68
|
+
payload = archive.read("hello.txt")
|
|
69
|
+
|
|
70
|
+
# Stream contents (incremental reads, avoids large memory usage)
|
|
71
|
+
entry = next(iter(archive))
|
|
72
|
+
with entry.open() as stream:
|
|
73
|
+
chunk = stream.read(1024)
|
|
74
|
+
|
|
75
|
+
# Extract safely (default)
|
|
76
|
+
archive.extractall("out")
|
|
77
|
+
archive.close()
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## API overview
|
|
81
|
+
|
|
82
|
+
Top-level functions:
|
|
83
|
+
|
|
84
|
+
- `pylhasa.open(path)`: open an archive from a filesystem path.
|
|
85
|
+
- `pylhasa.open_bytes(data)` / `pylhasa.from_bytes(data)`: open from in-memory bytes.
|
|
86
|
+
- `pylhasa.open_fileobj(fileobj, buffering=131072)`: open from a stream by spooling to a temp file.
|
|
87
|
+
|
|
88
|
+
Archive behavior:
|
|
89
|
+
|
|
90
|
+
- `Archive` is iterable; each item is an `Entry`.
|
|
91
|
+
- `Archive.read(name_or_entry)` returns the full bytes of a file entry.
|
|
92
|
+
- `Entry.read()` returns the full bytes for that entry (same as `Archive.read(entry)`).
|
|
93
|
+
- `Archive.extract(name_or_entry, dest_dir, safe=True, allow_symlinks=False, verify_crc=True)` extracts a single entry.
|
|
94
|
+
- `Archive.extractall(dest_dir, safe=True, allow_symlinks=False, verify_crc=True)` extracts all entries.
|
|
95
|
+
|
|
96
|
+
Entry behavior:
|
|
97
|
+
|
|
98
|
+
- `Entry.open()` returns a binary file-like object for streaming decompressed data.
|
|
99
|
+
- `Entry.read()` loads the full entry into memory in one call.
|
|
100
|
+
- `Entry.read()` reads the full decompressed bytes into memory.
|
|
101
|
+
- `Entry.raw_path` preserves the original path from the archive; `Entry.safe_path` is the sanitized path used for safe extraction.
|
|
102
|
+
|
|
103
|
+
## Examples
|
|
104
|
+
|
|
105
|
+
See `examples/` for runnable scripts:
|
|
106
|
+
|
|
107
|
+
- `examples/list_entries.py`
|
|
108
|
+
- `examples/extract_all.py`
|
|
109
|
+
- `examples/stream_read.py`
|
|
110
|
+
- `examples/all_functions.py`
|
|
111
|
+
|
|
112
|
+
### In-memory / streaming
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
import pylhasa
|
|
116
|
+
|
|
117
|
+
# In-memory bytes
|
|
118
|
+
data = open("example.lha", "rb").read()
|
|
119
|
+
archive = pylhasa.open_bytes(data)
|
|
120
|
+
# or: archive = pylhasa.from_bytes(data)
|
|
121
|
+
|
|
122
|
+
# Streaming file-like object
|
|
123
|
+
with open("example.lha", "rb") as fp:
|
|
124
|
+
archive = pylhasa.open_fileobj(fp, buffering=131072)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Safety notes
|
|
128
|
+
|
|
129
|
+
- Safe extraction is **on by default**. Unsafe paths raise `UnsafePathError`.
|
|
130
|
+
- `Entry.raw_path` preserves the original stored path (best-effort decoding).
|
|
131
|
+
- `Entry.safe_path` contains the sanitized path used for extraction when safe mode is enabled.
|
|
132
|
+
- Path traversal, absolute paths, Windows drive paths, and UNC paths are rejected when `safe=True`.
|
|
133
|
+
- Extraction verifies CRC by default; pass `verify_crc=False` to skip.
|
|
134
|
+
|
|
135
|
+
## Exceptions
|
|
136
|
+
|
|
137
|
+
- `PylhasaError`: base exception
|
|
138
|
+
- `BadArchiveError`: malformed or unsupported archive
|
|
139
|
+
- `UnsafePathError`: unsafe entry path for extraction
|
|
140
|
+
|
|
141
|
+
## Header metadata
|
|
142
|
+
|
|
143
|
+
Each `Entry` exposes the full parsed LHA header fields (for example `header_level`, `os_type`, `extra_flags`, Unix permissions, Windows timestamps, and `raw_header_bytes`). These are available for forensic and advanced use.
|
|
144
|
+
|
|
145
|
+
Time helper:
|
|
146
|
+
|
|
147
|
+
- `Entry.datetime_utc()` returns a best‑effort UTC `datetime` (prefers Windows FILETIME when present, otherwise Unix timestamp).
|
|
148
|
+
|
|
149
|
+
## Compression support
|
|
150
|
+
|
|
151
|
+
The vendored liblhasa core supports common LHA/LZH compression methods including `-lh1-` through `-lh7-`, `-lhd-`, and LArc `-lz*` variants.
|
|
152
|
+
|
|
153
|
+
**Warning (experimental):** `-lh2-` and `-lh3-` support is best‑effort and under‑documented. Treat results with caution and validate against trusted tools when possible.
|
|
154
|
+
|
|
155
|
+
Directory entries (`-lhd-`) and symlinks do not carry file data; `Archive.read()` returns `b\"\"` for those entries.
|
|
156
|
+
|
|
157
|
+
## Third-party licenses
|
|
158
|
+
|
|
159
|
+
This project vendors liblhasa. Its license is included at `native/vendor/lhasa/COPYING.md` and applies to the vendored sources.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
pylhasa/_paths.py,sha256=7SFViWdryBODhzKLTd5YF96bahuc-raQ5Jni-3wPcXE,1790
|
|
2
|
+
pylhasa/_archive.py,sha256=6kXiyMBYRH6YdH71_Bf8doasVq9prqVMVjjbpj9QTQ8,16512
|
|
3
|
+
pylhasa/_exceptions.py,sha256=rY6CpsShOXATIEzx2-yOtzTxORSnYd9-NkeRwwVHhHo,517
|
|
4
|
+
pylhasa/_pylhasa.so,sha256=vVBsBAhmegfc23T26gAV5OjKU12vxP_VZfuooLvy8-A,118904
|
|
5
|
+
pylhasa/__init__.py,sha256=CQfkFxnlTJ9v3z79ob5l2Lak0xq7O5xlIwOzmLKm1mA,445
|
|
6
|
+
pylhasa-0.1.1.dist-info/WHEEL,sha256=SdD_Ze46rbG8O82pDF4NTDXbsCKrpf8pf8aQc3IgDLU,156
|
|
7
|
+
pylhasa-0.1.1.dist-info/METADATA,sha256=De24iY9-fw-OMPuTscZ4hEvgtPZMwadJfcA9IsoxDvA,6085
|
|
8
|
+
pylhasa-0.1.1.dist-info/RECORD,,
|
|
9
|
+
pylhasa-0.1.1.dist-info/licenses/LICENSE,sha256=iXa4uBkH521dMnGiESJVafTsbNcxfh14J9ua0C6WaNs,1063
|
|
10
|
+
pylhasa-0.1.1.dist-info/licenses/native/vendor/lhasa/COPYING.md,sha256=v3sFXGMgXPwf6hdep2OPfn8je6s-q_Kp_Su1yIcu3vs,752
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 bwhitn
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
## ISC License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2011-2025, Simon Howard
|
|
4
|
+
|
|
5
|
+
Permission to use, copy, modify, and/or distribute this software
|
|
6
|
+
for any purpose with or without fee is hereby granted, provided
|
|
7
|
+
that the above copyright notice and this permission notice appear
|
|
8
|
+
in all copies.
|
|
9
|
+
|
|
10
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
11
|
+
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
12
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
13
|
+
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
14
|
+
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
|
15
|
+
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
16
|
+
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
17
|
+
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|