pylhasa 0.1.1__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylhasa/__init__.py ADDED
@@ -0,0 +1,18 @@
1
+ """pylhasa - LHA/LZH archive reader with safe extraction and streaming."""
2
+
3
+ from ._archive import Archive, Entry, from_bytes, open, open_bytes, open_fileobj
4
+ from ._exceptions import BadArchiveError, PylhasaError, UnsafePathError
5
+
6
+ __all__: list[str] = [
7
+ "Archive",
8
+ "Entry",
9
+ "open",
10
+ "from_bytes",
11
+ "open_bytes",
12
+ "open_fileobj",
13
+ "PylhasaError",
14
+ "BadArchiveError",
15
+ "UnsafePathError",
16
+ ]
17
+
18
+ __version__: str = "0.1.1"
pylhasa/_archive.py ADDED
@@ -0,0 +1,423 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import os
5
+ import shutil
6
+ import tempfile
7
+ from dataclasses import dataclass
8
+ from datetime import datetime, timedelta, timezone
9
+ from pathlib import Path
10
+ from types import TracebackType
11
+ from typing import Dict, Iterable, Iterator, Optional, Union
12
+
13
+ from ._exceptions import BadArchiveError, PylhasaError, UnsafePathError
14
+ from ._paths import NormalizedPath, normalize_path
15
+
16
+ try:
17
+ from . import _pylhasa
18
+ except ImportError as exc: # pragma: no cover - import error shown at runtime
19
+ raise ImportError("pylhasa native extension is not built") from exc
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class Entry:
24
+ """
25
+ Metadata for a single archive entry.
26
+
27
+ Fields:
28
+ - raw_path: best-effort decoded original path from the archive.
29
+ - raw_path_bytes: raw path bytes from the archive.
30
+ - safe_path: sanitized relative path (None if unsafe).
31
+ - size: uncompressed size in bytes.
32
+ - compressed_size: compressed size in bytes.
33
+ - method: compression method string (e.g., "-lh5-").
34
+ - crc: CRC-16 from header (None if absent).
35
+ - timestamp: Unix timestamp if present (None if absent).
36
+ - is_dir: True if entry is a directory.
37
+ - is_symlink: True if entry is a symlink.
38
+ - header_level: LHA header level (0-3).
39
+ - os_type: OS type byte from header.
40
+ - extra_flags: parsed extended header flags bitfield.
41
+ - unix_perms: Unix permissions if present.
42
+ - unix_uid: Unix UID if present.
43
+ - unix_gid: Unix GID if present.
44
+ - os9_perms: OS-9 permissions if present.
45
+ - unix_username: Unix username if present.
46
+ - unix_group: Unix group name if present.
47
+ - common_crc: common header CRC if present.
48
+ - win_creation_time: Windows FILETIME creation time if present.
49
+ - win_modification_time: Windows FILETIME modification time if present.
50
+ - win_access_time: Windows FILETIME access time if present.
51
+ - datetime_utc(): best-effort UTC datetime (Windows FILETIME if present, otherwise Unix timestamp).
52
+ - symlink_target: symlink target if present.
53
+ - raw_header_bytes: raw header bytes if present.
54
+ - path: directory path component if present.
55
+ - filename: filename component if present.
56
+ """
57
+ raw_path: str
58
+ raw_path_bytes: bytes
59
+ safe_path: Optional[str]
60
+ size: int
61
+ compressed_size: int
62
+ method: str
63
+ crc: Optional[int]
64
+ timestamp: Optional[int]
65
+ is_dir: bool
66
+ is_symlink: bool
67
+ header_level: int
68
+ os_type: int
69
+ extra_flags: int
70
+ unix_perms: Optional[int]
71
+ unix_uid: Optional[int]
72
+ unix_gid: Optional[int]
73
+ os9_perms: Optional[int]
74
+ unix_username: Optional[str]
75
+ unix_group: Optional[str]
76
+ common_crc: Optional[int]
77
+ win_creation_time: Optional[int]
78
+ win_modification_time: Optional[int]
79
+ win_access_time: Optional[int]
80
+ symlink_target: Optional[str]
81
+ raw_header_bytes: Optional[bytes]
82
+ path: Optional[str]
83
+ filename: Optional[str]
84
+ _index: int
85
+ _archive: "Archive"
86
+
87
+ def open(self) -> io.BufferedReader:
88
+ """Open the entry for streaming reads of decompressed data."""
89
+ return self._archive._open_entry(self)
90
+
91
+ def read(self) -> bytes:
92
+ """Read the entry fully into memory (convenience API)."""
93
+ return self._archive.read(self)
94
+
95
+ def datetime_utc(self) -> Optional[datetime]:
96
+ """
97
+ Return the best available timestamp as a timezone-aware UTC datetime.
98
+
99
+ Prefers Windows FILETIME modification time when present, otherwise
100
+ falls back to the Unix timestamp.
101
+ """
102
+ if self.win_modification_time is not None:
103
+ return _filetime_to_datetime(self.win_modification_time)
104
+ if self.timestamp is None:
105
+ return None
106
+ return datetime.fromtimestamp(self.timestamp, tz=timezone.utc)
107
+
108
+
109
+ class _EntryRawIO(io.RawIOBase):
110
+ def __init__(self, reader: "_pylhasa.EntryReader") -> None:
111
+ self._reader = reader
112
+
113
+ def readable(self) -> bool:
114
+ return True
115
+
116
+ def read(self, size: int = -1) -> bytes:
117
+ return self._reader.read(size)
118
+
119
+ def readinto(self, b: bytearray) -> int:
120
+ return self._reader.readinto(b)
121
+
122
+ def close(self) -> None:
123
+ if self._reader is not None:
124
+ self._reader.close()
125
+ self._reader = None
126
+ super().close()
127
+
128
+
129
+ _CRC16_TABLE = [
130
+ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
131
+ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
132
+ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
133
+ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
134
+ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
135
+ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
136
+ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
137
+ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
138
+ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
139
+ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
140
+ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
141
+ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
142
+ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
143
+ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
144
+ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
145
+ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
146
+ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
147
+ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
148
+ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
149
+ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
150
+ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
151
+ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
152
+ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
153
+ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
154
+ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
155
+ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
156
+ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
157
+ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
158
+ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
159
+ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
160
+ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
161
+ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040,
162
+ ]
163
+
164
+
165
+ def _crc16_update(crc: int, data: bytes) -> int:
166
+ for b in data:
167
+ crc = ((crc >> 8) ^ _CRC16_TABLE[(crc ^ b) & 0xFF]) & 0xFFFF
168
+ return crc
169
+
170
+
171
+ def _filetime_to_datetime(filetime: Optional[int]) -> Optional[datetime]:
172
+ if filetime is None:
173
+ return None
174
+ # Windows FILETIME is 100-ns intervals since 1601-01-01 UTC.
175
+ seconds = filetime / 10_000_000
176
+ return datetime(1601, 1, 1, tzinfo=timezone.utc) + timedelta(seconds=seconds)
177
+
178
+
179
+ class Archive(Iterable[Entry]):
180
+ """
181
+ High-level archive wrapper that provides iteration and extraction.
182
+
183
+ Entries are materialized on open. Use `read()` for convenience or
184
+ `Entry.open()` to stream decompressed bytes.
185
+ """
186
+ def __init__(self, backend: "_pylhasa.Archive", temp_path: Optional[Path] = None) -> None:
187
+ self._backend = backend
188
+ self._temp_path = temp_path
189
+ self._closed = False
190
+ self._entries = self._load_entries()
191
+ self._entries_by_raw: Dict[str, Entry] = {entry.raw_path: entry for entry in self._entries}
192
+
193
+ def _load_entries(self) -> list[Entry]:
194
+ entries = []
195
+ for idx, meta in enumerate(self._backend.entries()):
196
+ raw_bytes = meta["raw_path_bytes"]
197
+ if not isinstance(raw_bytes, (bytes, bytearray)):
198
+ raw_bytes = bytes(raw_bytes)
199
+ norm = normalize_path(bytes(raw_bytes))
200
+ entry = Entry(
201
+ raw_path=norm.raw_path,
202
+ raw_path_bytes=norm.raw_path_bytes,
203
+ safe_path=norm.safe_path,
204
+ size=int(meta["size"]),
205
+ compressed_size=int(meta["compressed_size"]),
206
+ method=str(meta["method"]),
207
+ crc=None if meta["crc"] is None else int(meta["crc"]),
208
+ timestamp=None if meta["timestamp"] is None else int(meta["timestamp"]),
209
+ is_dir=bool(meta["is_dir"]),
210
+ is_symlink=bool(meta.get("is_symlink", False)),
211
+ header_level=int(meta.get("header_level", 0)),
212
+ os_type=int(meta.get("os_type", 0)),
213
+ extra_flags=int(meta.get("extra_flags", 0)),
214
+ unix_perms=None if meta.get("unix_perms") is None else int(meta["unix_perms"]),
215
+ unix_uid=None if meta.get("unix_uid") is None else int(meta["unix_uid"]),
216
+ unix_gid=None if meta.get("unix_gid") is None else int(meta["unix_gid"]),
217
+ os9_perms=None if meta.get("os9_perms") is None else int(meta["os9_perms"]),
218
+ unix_username=None if meta.get("unix_username") is None else str(meta["unix_username"]),
219
+ unix_group=None if meta.get("unix_group") is None else str(meta["unix_group"]),
220
+ common_crc=None if meta.get("common_crc") is None else int(meta["common_crc"]),
221
+ win_creation_time=None if meta.get("win_creation_time") is None else int(meta["win_creation_time"]),
222
+ win_modification_time=None if meta.get("win_modification_time") is None else int(meta["win_modification_time"]),
223
+ win_access_time=None if meta.get("win_access_time") is None else int(meta["win_access_time"]),
224
+ symlink_target=None if meta.get("symlink_target") is None else str(meta["symlink_target"]),
225
+ raw_header_bytes=None if meta.get("raw_header_bytes") is None else bytes(meta["raw_header_bytes"]),
226
+ path=None if meta.get("path") is None else str(meta["path"]),
227
+ filename=None if meta.get("filename") is None else str(meta["filename"]),
228
+ _index=idx,
229
+ _archive=self,
230
+ )
231
+ entries.append(entry)
232
+ return entries
233
+
234
+ def __iter__(self) -> Iterator[Entry]:
235
+ return iter(self._entries)
236
+
237
+ def __enter__(self) -> "Archive":
238
+ return self
239
+
240
+ def __exit__(
241
+ self,
242
+ exc_type: Optional[type[BaseException]],
243
+ exc: Optional[BaseException],
244
+ tb: Optional[TracebackType],
245
+ ) -> None:
246
+ self.close()
247
+
248
+ def __del__(self) -> None: # pragma: no cover - best-effort cleanup
249
+ try:
250
+ self.close()
251
+ except Exception:
252
+ pass
253
+
254
+ def close(self) -> None:
255
+ if self._closed:
256
+ return
257
+ self._backend.close()
258
+ self._closed = True
259
+ if self._temp_path is not None:
260
+ try:
261
+ self._temp_path.unlink(missing_ok=True)
262
+ except OSError:
263
+ pass
264
+
265
+ def _open_entry(self, entry: Entry) -> io.BufferedReader:
266
+ reader = self._backend.open_entry(entry._index)
267
+ raw = _EntryRawIO(reader)
268
+ return io.BufferedReader(raw)
269
+
270
+ def read(self, name_or_entry: Union[str, Entry]) -> bytes:
271
+ """Read an entry fully into memory."""
272
+ entry = self._resolve_entry(name_or_entry)
273
+ if entry.is_dir or entry.is_symlink:
274
+ return b""
275
+ with entry.open() as fp:
276
+ return fp.read()
277
+
278
+ def extract(
279
+ self,
280
+ name_or_entry: Union[str, Entry],
281
+ dest_dir: Union[str, Path],
282
+ safe: bool = True,
283
+ allow_symlinks: bool = False,
284
+ verify_crc: bool = True,
285
+ ) -> Path:
286
+ """Extract a single entry to disk."""
287
+ entry = self._resolve_entry(name_or_entry)
288
+ return self._extract_entry(entry, Path(dest_dir), safe=safe, allow_symlinks=allow_symlinks, verify_crc=verify_crc)
289
+
290
+ def extractall(
291
+ self,
292
+ dest_dir: Union[str, Path],
293
+ safe: bool = True,
294
+ allow_symlinks: bool = False,
295
+ verify_crc: bool = True,
296
+ ) -> list[Path]:
297
+ """Extract all entries to disk."""
298
+ dest = Path(dest_dir)
299
+ extracted: list[Path] = []
300
+ for entry in self._entries:
301
+ extracted.append(self._extract_entry(entry, dest, safe=safe, allow_symlinks=allow_symlinks, verify_crc=verify_crc))
302
+ return extracted
303
+
304
+ def _resolve_entry(self, name_or_entry: Union[str, Entry]) -> Entry:
305
+ if isinstance(name_or_entry, Entry):
306
+ return name_or_entry
307
+ if not isinstance(name_or_entry, str):
308
+ raise TypeError("expected entry name or Entry")
309
+ if name_or_entry in self._entries_by_raw:
310
+ return self._entries_by_raw[name_or_entry]
311
+ for entry in self._entries:
312
+ if entry.safe_path == name_or_entry:
313
+ return entry
314
+ raise KeyError(f"entry not found: {name_or_entry}")
315
+
316
+ def _extract_entry(self, entry: Entry, dest_dir: Path, safe: bool, allow_symlinks: bool, verify_crc: bool) -> Path:
317
+ if safe:
318
+ if entry.safe_path is None:
319
+ raise UnsafePathError(f"unsafe entry path: {entry.raw_path}")
320
+ rel_path = Path(entry.safe_path)
321
+ else:
322
+ rel_path = Path(entry.raw_path)
323
+
324
+ if entry.is_symlink and not allow_symlinks:
325
+ raise UnsafePathError(f"symlink entry blocked: {entry.raw_path}")
326
+
327
+ dest_dir = dest_dir.resolve()
328
+ target = (dest_dir / rel_path).resolve()
329
+
330
+ if safe:
331
+ try:
332
+ common = os.path.commonpath([str(dest_dir), str(target)])
333
+ except ValueError:
334
+ raise UnsafePathError(f"unsafe entry path: {entry.raw_path}")
335
+ if common != str(dest_dir):
336
+ raise UnsafePathError(f"unsafe entry path: {entry.raw_path}")
337
+
338
+ if entry.is_dir:
339
+ target.mkdir(parents=True, exist_ok=True)
340
+ return target
341
+
342
+ target.parent.mkdir(parents=True, exist_ok=True)
343
+ crc = 0
344
+ do_crc = verify_crc and entry.crc is not None
345
+ with entry.open() as src, target.open("wb") as dst:
346
+ while True:
347
+ chunk = src.read(131072)
348
+ if not chunk:
349
+ break
350
+ if do_crc:
351
+ crc = _crc16_update(crc, chunk)
352
+ dst.write(chunk)
353
+ if do_crc:
354
+ if crc != entry.crc:
355
+ try:
356
+ target.unlink()
357
+ except OSError:
358
+ pass
359
+ raise BadArchiveError(f"CRC mismatch for {entry.raw_path}")
360
+ return target
361
+
362
+
363
+ def _open_from_path(path: Union[str, Path]) -> Archive:
364
+ resolved = os.path.expanduser(os.fspath(path))
365
+ backend = _pylhasa.open_path(resolved)
366
+ return Archive(backend)
367
+
368
+
369
+ def _open_from_bytes(data: bytes) -> Archive:
370
+ backend = _pylhasa.open_bytes(data)
371
+ return Archive(backend)
372
+
373
+
374
+ def _open_from_fileobj(fileobj: io.BufferedIOBase, buffering: int) -> Archive:
375
+ if buffering <= 0:
376
+ raise ValueError("buffering must be positive")
377
+ # Spool to a temp file so liblhasa can stream without loading all bytes.
378
+ temp = tempfile.NamedTemporaryFile(prefix="pylhasa_", suffix=".lha", delete=False)
379
+ temp_path = Path(temp.name)
380
+ try:
381
+ shutil.copyfileobj(fileobj, temp, length=buffering)
382
+ finally:
383
+ temp.close()
384
+ backend = _pylhasa.open_path(os.fspath(temp_path))
385
+ return Archive(backend, temp_path=temp_path)
386
+
387
+
388
+ def open(path: Union[str, Path]) -> Archive:
389
+ """
390
+ Open an LHA/LZH archive from a file path.
391
+
392
+ The path supports `~` expansion. The archive is parsed eagerly to
393
+ collect entry metadata.
394
+ """
395
+ return _open_from_path(path)
396
+
397
+
398
+ def open_bytes(data: bytes) -> Archive:
399
+ """
400
+ Open an LHA/LZH archive from bytes in memory.
401
+
402
+ This keeps a reference to the bytes for the lifetime of the archive.
403
+ """
404
+ if not isinstance(data, (bytes, bytearray, memoryview)):
405
+ raise TypeError("data must be bytes-like")
406
+ return _open_from_bytes(bytes(data))
407
+
408
+
409
+ def from_bytes(data: bytes) -> Archive:
410
+ """Alias for open_bytes()."""
411
+ return open_bytes(data)
412
+
413
+
414
+ def open_fileobj(fileobj: io.BufferedIOBase, buffering: int = 131072) -> Archive:
415
+ """
416
+ Open an LHA/LZH archive from a file-like object.
417
+
418
+ The stream is spooled to a temporary file to avoid loading the full
419
+ archive into memory.
420
+ """
421
+ if not hasattr(fileobj, "read"):
422
+ raise TypeError("fileobj must be file-like")
423
+ return _open_from_fileobj(fileobj, buffering)
pylhasa/_exceptions.py ADDED
@@ -0,0 +1,19 @@
1
+ try:
2
+ from . import _pylhasa
3
+ except Exception: # pragma: no cover - native module missing
4
+ _pylhasa = None
5
+
6
+
7
+ if _pylhasa is not None:
8
+ PylhasaError = _pylhasa.PylhasaError
9
+ BadArchiveError = _pylhasa.BadArchiveError
10
+ else:
11
+ class PylhasaError(Exception):
12
+ """Base exception for pylhasa."""
13
+
14
+ class BadArchiveError(PylhasaError):
15
+ """Raised when an archive is malformed or unsupported."""
16
+
17
+
18
+ class UnsafePathError(PylhasaError):
19
+ """Raised when an entry path is unsafe to extract."""
pylhasa/_paths.py ADDED
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import re
5
+ from dataclasses import dataclass
6
+ from typing import Optional
7
+
8
+
9
+ _WINDOWS_RESERVED = {
10
+ "CON",
11
+ "PRN",
12
+ "AUX",
13
+ "NUL",
14
+ "COM1",
15
+ "COM2",
16
+ "COM3",
17
+ "COM4",
18
+ "COM5",
19
+ "COM6",
20
+ "COM7",
21
+ "COM8",
22
+ "COM9",
23
+ "LPT1",
24
+ "LPT2",
25
+ "LPT3",
26
+ "LPT4",
27
+ "LPT5",
28
+ "LPT6",
29
+ "LPT7",
30
+ "LPT8",
31
+ "LPT9",
32
+ }
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class NormalizedPath:
37
+ raw_path: str
38
+ raw_path_bytes: bytes
39
+ safe_path: Optional[str]
40
+ unsafe_reason: Optional[str]
41
+
42
+
43
+ _drive_re = re.compile(r"^[A-Za-z]:")
44
+
45
+
46
+ def normalize_path(raw_path_bytes: bytes) -> NormalizedPath:
47
+ """Normalize an archive path into a safe, platform-neutral form."""
48
+ raw_path = raw_path_bytes.decode("utf-8", errors="replace")
49
+ path = raw_path.replace("\\", "/")
50
+
51
+ if path.startswith("//"):
52
+ return NormalizedPath(raw_path, raw_path_bytes, None, "UNC paths are not allowed")
53
+ if path.startswith("/"):
54
+ return NormalizedPath(raw_path, raw_path_bytes, None, "absolute paths are not allowed")
55
+ if _drive_re.match(path):
56
+ return NormalizedPath(raw_path, raw_path_bytes, None, "Windows drive paths are not allowed")
57
+
58
+ path = path.lstrip("/")
59
+ parts = [p for p in path.split("/") if p not in ("", ".")]
60
+ for part in parts:
61
+ if part == "..":
62
+ return NormalizedPath(raw_path, raw_path_bytes, None, "path traversal is not allowed")
63
+
64
+ if os.name == "nt":
65
+ for part in parts:
66
+ base = part.split(".")[0].upper()
67
+ if base in _WINDOWS_RESERVED:
68
+ return NormalizedPath(raw_path, raw_path_bytes, None, "reserved Windows name")
69
+
70
+ safe_path = "/".join(parts)
71
+ return NormalizedPath(raw_path, raw_path_bytes, safe_path, None)
pylhasa/_pylhasa.so ADDED
Binary file
@@ -0,0 +1,159 @@
1
+ Metadata-Version: 2.2
2
+ Name: pylhasa
3
+ Version: 0.1.1
4
+ Summary: Cross-platform Python wrapper for liblhasa (LHA/LZH archives)
5
+ Author: pylhasa contributors
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 bwhitn
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Classifier: Programming Language :: Python :: 3
29
+ Classifier: Programming Language :: Python :: 3 :: Only
30
+ Classifier: Programming Language :: Python :: 3.9
31
+ Classifier: Programming Language :: Python :: 3.10
32
+ Classifier: Programming Language :: Python :: 3.11
33
+ Classifier: Programming Language :: Python :: 3.12
34
+ Classifier: Programming Language :: Python :: 3.13
35
+ Classifier: Programming Language :: C
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Operating System :: OS Independent
38
+ Project-URL: Homepage, https://github.com/bwhitn/pylhasa
39
+ Project-URL: Repository, https://github.com/bwhitn/pylhasa
40
+ Project-URL: Issues, https://github.com/bwhitn/pylhasa/issues
41
+ Requires-Python: >=3.9
42
+ Description-Content-Type: text/markdown
43
+
44
+ # pylhasa
45
+
46
+ `pylhasa` is a cross-platform Python wrapper for the LHA/LZH archive format. It vendors the liblhasa C sources and builds a CPython extension, producing wheels for Linux, macOS, and Windows.
47
+
48
+ ## Install
49
+
50
+ From PyPI:
51
+
52
+ ```bash
53
+ pip install pylhasa
54
+ ```
55
+
56
+ Wheels are built for Python 3.9+.
57
+
58
+ ## Usage
59
+
60
+ ```python
61
+ import pylhasa
62
+
63
+ archive = pylhasa.open("example.lha")
64
+ for entry in archive:
65
+ print(entry.raw_path, entry.size)
66
+
67
+ # Read bytes directly (loads full file into memory)
68
+ payload = archive.read("hello.txt")
69
+
70
+ # Stream contents (incremental reads, avoids large memory usage)
71
+ entry = next(iter(archive))
72
+ with entry.open() as stream:
73
+ chunk = stream.read(1024)
74
+
75
+ # Extract safely (default)
76
+ archive.extractall("out")
77
+ archive.close()
78
+ ```
79
+
80
+ ## API overview
81
+
82
+ Top-level functions:
83
+
84
+ - `pylhasa.open(path)`: open an archive from a filesystem path.
85
+ - `pylhasa.open_bytes(data)` / `pylhasa.from_bytes(data)`: open from in-memory bytes.
86
+ - `pylhasa.open_fileobj(fileobj, buffering=131072)`: open from a stream by spooling to a temp file.
87
+
88
+ Archive behavior:
89
+
90
+ - `Archive` is iterable; each item is an `Entry`.
91
+ - `Archive.read(name_or_entry)` returns the full bytes of a file entry.
92
+ - `Entry.read()` returns the full bytes for that entry (same as `Archive.read(entry)`).
93
+ - `Archive.extract(name_or_entry, dest_dir, safe=True, allow_symlinks=False, verify_crc=True)` extracts a single entry.
94
+ - `Archive.extractall(dest_dir, safe=True, allow_symlinks=False, verify_crc=True)` extracts all entries.
95
+
96
+ Entry behavior:
97
+
98
+ - `Entry.open()` returns a binary file-like object for streaming decompressed data.
99
+ - `Entry.read()` loads the full entry into memory in one call.
100
+ - `Entry.read()` reads the full decompressed bytes into memory.
101
+ - `Entry.raw_path` preserves the original path from the archive; `Entry.safe_path` is the sanitized path used for safe extraction.
102
+
103
+ ## Examples
104
+
105
+ See `examples/` for runnable scripts:
106
+
107
+ - `examples/list_entries.py`
108
+ - `examples/extract_all.py`
109
+ - `examples/stream_read.py`
110
+ - `examples/all_functions.py`
111
+
112
+ ### In-memory / streaming
113
+
114
+ ```python
115
+ import pylhasa
116
+
117
+ # In-memory bytes
118
+ data = open("example.lha", "rb").read()
119
+ archive = pylhasa.open_bytes(data)
120
+ # or: archive = pylhasa.from_bytes(data)
121
+
122
+ # Streaming file-like object
123
+ with open("example.lha", "rb") as fp:
124
+ archive = pylhasa.open_fileobj(fp, buffering=131072)
125
+ ```
126
+
127
+ ## Safety notes
128
+
129
+ - Safe extraction is **on by default**. Unsafe paths raise `UnsafePathError`.
130
+ - `Entry.raw_path` preserves the original stored path (best-effort decoding).
131
+ - `Entry.safe_path` contains the sanitized path used for extraction when safe mode is enabled.
132
+ - Path traversal, absolute paths, Windows drive paths, and UNC paths are rejected when `safe=True`.
133
+ - Extraction verifies CRC by default; pass `verify_crc=False` to skip.
134
+
135
+ ## Exceptions
136
+
137
+ - `PylhasaError`: base exception
138
+ - `BadArchiveError`: malformed or unsupported archive
139
+ - `UnsafePathError`: unsafe entry path for extraction
140
+
141
+ ## Header metadata
142
+
143
+ Each `Entry` exposes the full parsed LHA header fields (for example `header_level`, `os_type`, `extra_flags`, Unix permissions, Windows timestamps, and `raw_header_bytes`). These are available for forensic and advanced use.
144
+
145
+ Time helper:
146
+
147
+ - `Entry.datetime_utc()` returns a best‑effort UTC `datetime` (prefers Windows FILETIME when present, otherwise Unix timestamp).
148
+
149
+ ## Compression support
150
+
151
+ The vendored liblhasa core supports common LHA/LZH compression methods including `-lh1-` through `-lh7-`, `-lhd-`, and LArc `-lz*` variants.
152
+
153
+ **Warning (experimental):** `-lh2-` and `-lh3-` support is best‑effort and under‑documented. Treat results with caution and validate against trusted tools when possible.
154
+
155
+ Directory entries (`-lhd-`) and symlinks do not carry file data; `Archive.read()` returns `b\"\"` for those entries.
156
+
157
+ ## Third-party licenses
158
+
159
+ This project vendors liblhasa. Its license is included at `native/vendor/lhasa/COPYING.md` and applies to the vendored sources.
@@ -0,0 +1,10 @@
1
+ pylhasa/_paths.py,sha256=7SFViWdryBODhzKLTd5YF96bahuc-raQ5Jni-3wPcXE,1790
2
+ pylhasa/_archive.py,sha256=6kXiyMBYRH6YdH71_Bf8doasVq9prqVMVjjbpj9QTQ8,16512
3
+ pylhasa/_exceptions.py,sha256=rY6CpsShOXATIEzx2-yOtzTxORSnYd9-NkeRwwVHhHo,517
4
+ pylhasa/_pylhasa.so,sha256=vVBsBAhmegfc23T26gAV5OjKU12vxP_VZfuooLvy8-A,118904
5
+ pylhasa/__init__.py,sha256=CQfkFxnlTJ9v3z79ob5l2Lak0xq7O5xlIwOzmLKm1mA,445
6
+ pylhasa-0.1.1.dist-info/WHEEL,sha256=SdD_Ze46rbG8O82pDF4NTDXbsCKrpf8pf8aQc3IgDLU,156
7
+ pylhasa-0.1.1.dist-info/METADATA,sha256=De24iY9-fw-OMPuTscZ4hEvgtPZMwadJfcA9IsoxDvA,6085
8
+ pylhasa-0.1.1.dist-info/RECORD,,
9
+ pylhasa-0.1.1.dist-info/licenses/LICENSE,sha256=iXa4uBkH521dMnGiESJVafTsbNcxfh14J9ua0C6WaNs,1063
10
+ pylhasa-0.1.1.dist-info/licenses/native/vendor/lhasa/COPYING.md,sha256=v3sFXGMgXPwf6hdep2OPfn8je6s-q_Kp_Su1yIcu3vs,752
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: scikit-build-core 0.11.6
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-cp311-manylinux_2_17_x86_64
5
+ Tag: cp311-cp311-manylinux2014_x86_64
6
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 bwhitn
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,17 @@
1
+ ## ISC License
2
+
3
+ Copyright (c) 2011-2025, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.