amcx 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amcx-0.3.0/LICENSE +22 -0
- amcx-0.3.0/PKG-INFO +53 -0
- amcx-0.3.0/README.md +32 -0
- amcx-0.3.0/amcx/__init__.py +31 -0
- amcx-0.3.0/amcx/compression.py +75 -0
- amcx-0.3.0/amcx/exceptions.py +30 -0
- amcx-0.3.0/amcx/format.py +64 -0
- amcx-0.3.0/amcx/mirror.py +379 -0
- amcx-0.3.0/amcx/reader.py +227 -0
- amcx-0.3.0/amcx/smart.py +349 -0
- amcx-0.3.0/amcx/writer.py +195 -0
- amcx-0.3.0/amcx.egg-info/PKG-INFO +53 -0
- amcx-0.3.0/amcx.egg-info/SOURCES.txt +19 -0
- amcx-0.3.0/amcx.egg-info/dependency_links.txt +1 -0
- amcx-0.3.0/amcx.egg-info/requires.txt +3 -0
- amcx-0.3.0/amcx.egg-info/top_level.txt +1 -0
- amcx-0.3.0/pyproject.toml +31 -0
- amcx-0.3.0/setup.cfg +4 -0
- amcx-0.3.0/tests/test_mirror.py +156 -0
- amcx-0.3.0/tests/test_smart.py +179 -0
- amcx-0.3.0/tests/test_writer_reader.py +163 -0
amcx-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
GNU LESSER GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3.0 or later, February 1999
|
|
3
|
+
|
|
4
|
+
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
|
5
|
+
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
6
|
+
|
|
7
|
+
This library is free software; you can redistribute it and/or
|
|
8
|
+
modify it under the terms of the GNU Lesser General Public
|
|
9
|
+
License as published by the Free Software Foundation; either
|
|
10
|
+
version 2.1 of the License, or (at your option) any later version.
|
|
11
|
+
|
|
12
|
+
This library is distributed in the hope that it will be useful,
|
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
+
Lesser General Public License for more details.
|
|
16
|
+
|
|
17
|
+
You should have received a copy of the GNU Lesser General Public
|
|
18
|
+
License along with this library; if not, write to the Free Software
|
|
19
|
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
20
|
+
|
|
21
|
+
The full license text is available at:
|
|
22
|
+
https://www.gnu.org/licenses/lgpl-3.0.html#license-text
|
amcx-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: amcx
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Compressed memory chunks for AI chat history - 60-70% space savings
|
|
5
|
+
License: LGPL-3.0-or-later
|
|
6
|
+
Keywords: ai,memory,chunks,compression,chatbot,llm
|
|
7
|
+
Classifier: Development Status :: 4 - Beta
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# Adaptive Memory Chunk eXtended (amcx)
|
|
23
|
+
|
|
24
|
+
A Python library for efficient memory chunking with adaptive algorithms.
|
|
25
|
+
|
|
26
|
+
### *read [wiki](https://github.com/hacko223/adaptive-memory-chunk-eXtended/wiki) for more info*
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install amcx
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Usage
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from amcx import chunk_memory
|
|
38
|
+
|
|
39
|
+
# Your code here
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## License
|
|
43
|
+
|
|
44
|
+
This project is licensed under the GNU Lesser General Public License v3.0 or later (LGPL-3.0+).
|
|
45
|
+
See the [LICENSE](LICENSE) file for details.
|
|
46
|
+
|
|
47
|
+
## Author
|
|
48
|
+
|
|
49
|
+
- hacko223
|
|
50
|
+
|
|
51
|
+
## Contributing
|
|
52
|
+
|
|
53
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
amcx-0.3.0/README.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Adaptive Memory Chunk eXtended (amcx)
|
|
2
|
+
|
|
3
|
+
A Python library for efficient memory chunking with adaptive algorithms.
|
|
4
|
+
|
|
5
|
+
### *read [wiki](https://github.com/hacko223/adaptive-memory-chunk-eXtended/wiki) for more info*
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install amcx
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from amcx import chunk_memory
|
|
17
|
+
|
|
18
|
+
# Your code here
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## License
|
|
22
|
+
|
|
23
|
+
This project is licensed under the GNU Lesser General Public License v3.0 or later (LGPL-3.0+).
|
|
24
|
+
See the [LICENSE](LICENSE) file for details.
|
|
25
|
+
|
|
26
|
+
## Author
|
|
27
|
+
|
|
28
|
+
- hacko223
|
|
29
|
+
|
|
30
|
+
## Contributing
|
|
31
|
+
|
|
32
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# amc/__init__.py
|
|
2
|
+
# Public API of the amcx library
|
|
3
|
+
|
|
4
|
+
from .format import (
|
|
5
|
+
COMPRESS_NONE, COMPRESS_ZLIB, COMPRESS_LZMA,
|
|
6
|
+
CHUNK_LORE, CHUNK_CHARACTER, CHUNK_EVENT, CHUNK_ACTIVE, CHUNK_GENERIC,
|
|
7
|
+
)
|
|
8
|
+
from .reader import AMCXReader, IndexEntry, AMCXHeader
|
|
9
|
+
from .writer import AMCXWriter, ChunkEntry
|
|
10
|
+
from .mirror import AMCXMirror, AMCXRecovery, MirrorMode, MirrorStatus, ChunkStatus
|
|
11
|
+
from .exceptions import (
|
|
12
|
+
AMCXError, AMCXInvalidFileError, AMCXVersionError,
|
|
13
|
+
AMCXCompressionError, AMCXChunkNotFoundError, AMCXCorruptError, AMCXReadOnlyError,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__version__ = "0.3.0"
|
|
17
|
+
__author__ = "Tu nombre aquí"
|
|
18
|
+
__all__ = [
|
|
19
|
+
"AMCXReader", "AMCXWriter",
|
|
20
|
+
"AMCXMirror", "AMCXRecovery", "MirrorMode", "MirrorStatus", "ChunkStatus",
|
|
21
|
+
"ChunkEntry", "IndexEntry", "AMCXHeader",
|
|
22
|
+
"COMPRESS_NONE", "COMPRESS_ZLIB", "COMPRESS_LZMA",
|
|
23
|
+
"CHUNK_LORE", "CHUNK_CHARACTER", "CHUNK_EVENT", "CHUNK_ACTIVE", "CHUNK_GENERIC",
|
|
24
|
+
"AMCXError", "AMCXInvalidFileError", "AMCXVersionError",
|
|
25
|
+
"AMCXCompressionError", "AMCXChunkNotFoundError", "AMCXCorruptError", "AMCXReadOnlyError",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# High-level API
|
|
29
|
+
from .smart import SmartMemory
|
|
30
|
+
|
|
31
|
+
__all__ += ["SmartMemory"]
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# amc/compression.py
|
|
2
|
+
# Compression abstraction — the rest of the library only calls compress/decompress
|
|
3
|
+
|
|
4
|
+
import zlib
|
|
5
|
+
import lzma
|
|
6
|
+
from .format import COMPRESS_NONE, COMPRESS_ZLIB, COMPRESS_LZMA
|
|
7
|
+
from .exceptions import AMCXCompressionError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def compress(data: bytes, algorithm: int) -> bytes:
|
|
11
|
+
"""
|
|
12
|
+
Compresses data using the specified algorithm.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
data: bytes to compress
|
|
16
|
+
algorithm: COMPRESS_NONE | COMPRESS_ZLIB | COMPRESS_LZMA
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
compressed bytes (or the same if algorithm=COMPRESS_NONE)
|
|
20
|
+
"""
|
|
21
|
+
if algorithm == COMPRESS_NONE:
|
|
22
|
+
return data
|
|
23
|
+
|
|
24
|
+
if algorithm == COMPRESS_ZLIB:
|
|
25
|
+
try:
|
|
26
|
+
return zlib.compress(data, level=6) # level 6: speed/size balance
|
|
27
|
+
except zlib.error as e:
|
|
28
|
+
raise AMCXCompressionError(f"Error compressing with zlib: {e}") from e
|
|
29
|
+
|
|
30
|
+
if algorithm == COMPRESS_LZMA:
|
|
31
|
+
try:
|
|
32
|
+
return lzma.compress(data, preset=6) # preset 6: good balance for old chunks
|
|
33
|
+
except lzma.LZMAError as e:
|
|
34
|
+
raise AMCXCompressionError(f"Error compressing with lzma: {e}") from e
|
|
35
|
+
|
|
36
|
+
raise AMCXCompressionError(f"Unknown compression algorithm: {algorithm:#04x}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def decompress(data: bytes, algorithm: int) -> bytes:
|
|
40
|
+
"""
|
|
41
|
+
Decompresses data using the specified algorithm.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
data: compressed bytes
|
|
45
|
+
algorithm: COMPRESS_NONE | COMPRESS_ZLIB | COMPRESS_LZMA
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
original bytes
|
|
49
|
+
"""
|
|
50
|
+
if algorithm == COMPRESS_NONE:
|
|
51
|
+
return data
|
|
52
|
+
|
|
53
|
+
if algorithm == COMPRESS_ZLIB:
|
|
54
|
+
try:
|
|
55
|
+
return zlib.decompress(data)
|
|
56
|
+
except zlib.error as e:
|
|
57
|
+
raise AMCXCompressionError(f"Error decompressing with zlib: {e}") from e
|
|
58
|
+
|
|
59
|
+
if algorithm == COMPRESS_LZMA:
|
|
60
|
+
try:
|
|
61
|
+
return lzma.decompress(data)
|
|
62
|
+
except lzma.LZMAError as e:
|
|
63
|
+
raise AMCXCompressionError(f"Error decompressing with lzma: {e}") from e
|
|
64
|
+
|
|
65
|
+
raise AMCXCompressionError(f"Unknown compression algorithm: {algorithm:#04x}")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def algorithm_name(algorithm: int) -> str:
|
|
69
|
+
"""Returns the human-readable name of the algorithm."""
|
|
70
|
+
names = {
|
|
71
|
+
COMPRESS_NONE: "none",
|
|
72
|
+
COMPRESS_ZLIB: "zlib",
|
|
73
|
+
COMPRESS_LZMA: "lzma",
|
|
74
|
+
}
|
|
75
|
+
return names.get(algorithm, f"unknown({algorithm:#04x})")
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# amc/exceptions.py
|
|
2
|
+
# Custom errors for the .amcx library
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class AMCXError(Exception):
|
|
6
|
+
"""Base class for all AMC errors."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AMCXInvalidFileError(AMCXError):
|
|
10
|
+
"""The file is not a valid .amcx (incorrect magic bytes, corrupt header, etc.)."""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AMCXVersionError(AMCXError):
|
|
14
|
+
"""The file version is not compatible with this library."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AMCXCompressionError(AMCXError):
|
|
18
|
+
"""Error during compression or decompression of a chunk."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AMCXChunkNotFoundError(AMCXError):
|
|
22
|
+
"""A chunk was requested that does not exist in the index."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AMCXCorruptError(AMCXError):
|
|
26
|
+
"""CRC32 does not match — the file is corrupt."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AMCXReadOnlyError(AMCXError):
|
|
30
|
+
"""Attempted to write to a file marked as read-only."""
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# amcx/format.py
|
|
2
|
+
# Binary format definition for .amcx
|
|
3
|
+
# AMCX = Adaptive Memory Chunks X
|
|
4
|
+
|
|
5
|
+
import struct
|
|
6
|
+
|
|
7
|
+
# ─── Magic & Version ───────────────────────────────────────────────────────────
|
|
8
|
+
MAGIC = b'AMC\x00' # 4 bytes that identify the file
|
|
9
|
+
VERSION_MAJOR = 0
|
|
10
|
+
VERSION_MINOR = 2 # version 0.2 — adds per-chunk checksum
|
|
11
|
+
|
|
12
|
+
# ─── Compression ───────────────────────────────────────────────────────────────
|
|
13
|
+
COMPRESS_NONE = 0x00 # no compression
|
|
14
|
+
COMPRESS_ZLIB = 0x01 # zlib → active chunks (fast)
|
|
15
|
+
COMPRESS_LZMA = 0x02 # lzma → old chunks (maximum compression)
|
|
16
|
+
|
|
17
|
+
# ─── Chunk types ───────────────────────────────────────────────────────────────
|
|
18
|
+
CHUNK_LORE = 0x00 # lore / world / rules
|
|
19
|
+
CHUNK_CHARACTER = 0x01 # character
|
|
20
|
+
CHUNK_EVENT = 0x02 # narrative event
|
|
21
|
+
CHUNK_ACTIVE = 0x03 # active chunk (the most recent)
|
|
22
|
+
CHUNK_GENERIC = 0x04 # generic content
|
|
23
|
+
|
|
24
|
+
# ─── Header flags (2-byte bitfield) ───────────────────────────────────────────
|
|
25
|
+
FLAG_COMPRESSED = 1 << 0 # at least one chunk has compression
|
|
26
|
+
FLAG_ENCRYPTED = 1 << 1 # reserved for future encryption
|
|
27
|
+
FLAG_READONLY = 1 << 2 # read-only file
|
|
28
|
+
FLAG_HAS_ACTIVE = 1 << 3 # there is a chunk marked as active
|
|
29
|
+
FLAG_HAS_ASSETS = 1 << 4 # contains assets/images (future)
|
|
30
|
+
|
|
31
|
+
# ─── Fixed sizes in bytes ──────────────────────────────────────────────────────
|
|
32
|
+
HEADER_SIZE = 32 # total size of the header
|
|
33
|
+
SUMMARY_SIZE = 64 # bytes reserved for the summary in the index
|
|
34
|
+
|
|
35
|
+
# ─── Header layout ─────────────────────────────────────────────────────────────
|
|
36
|
+
# Offset Size Field
|
|
37
|
+
# 0x00 4 Magic "AMC\0"
|
|
38
|
+
# 0x04 1 Version major
|
|
39
|
+
# 0x05 1 Version minor
|
|
40
|
+
# 0x06 4 Number of chunks
|
|
41
|
+
# 0x0A 8 Creation timestamp (unix, big-endian)
|
|
42
|
+
# 0x12 4 Offset where the index starts
|
|
43
|
+
# 0x16 4 Size of the index block
|
|
44
|
+
# 0x1A 2 Flags
|
|
45
|
+
# 0x1C 4 CRC32 of the header (first 28 bytes)
|
|
46
|
+
HEADER_STRUCT = struct.Struct('>4sBBIQIIHI')
|
|
47
|
+
# Fields: magic, v_major, v_minor, num_chunks, timestamp, index_offset, index_size, flags, crc32
|
|
48
|
+
|
|
49
|
+
# ─── Layout of each index entry ───────────────────────────────────────────────
|
|
50
|
+
# Offset Size Field
|
|
51
|
+
# 0x00 4 Chunk ID
|
|
52
|
+
# 0x04 4 Chunk offset in the file
|
|
53
|
+
# 0x08 4 Compressed size
|
|
54
|
+
# 0x0C 4 Original size (before compression)
|
|
55
|
+
# 0x10 2 Chunk type
|
|
56
|
+
# 0x12 1 Compression algorithm
|
|
57
|
+
# 0x13 1 Reserved
|
|
58
|
+
# 0x14 8 Chunk timestamp
|
|
59
|
+
# 0x1C 4 CRC32 of the chunk (over compressed bytes) ← NEW
|
|
60
|
+
# 0x20 64 Summary in UTF-8 (null-padded)
|
|
61
|
+
INDEX_ENTRY_STRUCT = struct.Struct('>IIIIHBBQI64s')
|
|
62
|
+
# Fields: chunk_id, offset, size_c, size_o, ctype, algo, reserved, ts, crc32, summary
|
|
63
|
+
|
|
64
|
+
INDEX_ENTRY_SIZE = INDEX_ENTRY_STRUCT.size # automatically calculated from the struct
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
# amcx/mirror.py
|
|
2
|
+
# Mirror system embedded inside the .amcx — inspired by WinRAR.
|
|
3
|
+
#
|
|
4
|
+
# Final file structure:
|
|
5
|
+
# [ HEADER ] [ INDEX ] [ CHUNKS... ] [ XOR RECOVERY BLOCK ] [ SHA-1 MIRROR BLOCK ]
|
|
6
|
+
#
|
|
7
|
+
# Both blocks are optional and are detected by their magic bytes at the end of the file.
|
|
8
|
+
# The reader finds them with rfind() without affecting normal chunk reading.
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import struct
|
|
12
|
+
import time
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from enum import Enum, auto
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
from .exceptions import AMCXError, AMCXCorruptError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ─── Magic bytes for each block ───────────────────────────────────────────────
|
|
21
|
+
RECOVERY_MAGIC = b'AMCXR\x00' # XOR recovery block
|
|
22
|
+
MIRROR_MAGIC = b'AMCXM\x00' # SHA-1 mirror block
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ─── Configuration ─────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
class MirrorMode(Enum):
|
|
28
|
+
NONE = auto() # no embedded mirror
|
|
29
|
+
MANUAL = auto() # only when you call writer.embed_mirror()
|
|
30
|
+
AUTO = auto() # automatic every time writer.save() is called
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ChunkStatus(Enum):
|
|
34
|
+
OK = "ok"
|
|
35
|
+
MODIFIED = "modified" # chunk SHA-1 changed
|
|
36
|
+
MISSING_ORIG = "missing_orig" # in the mirror but not in the chunks
|
|
37
|
+
MISSING_MIRROR = "missing_mirror" # in the chunks but not in the mirror
|
|
38
|
+
OUTDATED = "outdated" # mirror is older than the file
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ChunkReport:
|
|
43
|
+
chunk_id: int
|
|
44
|
+
summary: str
|
|
45
|
+
status: ChunkStatus
|
|
46
|
+
sha1_original: Optional[str] = None
|
|
47
|
+
sha1_mirror: Optional[str] = None
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def ok(self) -> bool:
|
|
51
|
+
return self.status == ChunkStatus.OK
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class MirrorStatus:
|
|
56
|
+
mirror_exists: bool
|
|
57
|
+
chunks: list[ChunkReport] = field(default_factory=list)
|
|
58
|
+
mirror_ts: Optional[int] = None
|
|
59
|
+
file_ts: Optional[int] = None
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def all_ok(self) -> bool:
|
|
63
|
+
return self.mirror_exists and all(c.ok for c in self.chunks)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def problems(self) -> list[ChunkReport]:
|
|
67
|
+
return [c for c in self.chunks if not c.ok]
|
|
68
|
+
|
|
69
|
+
def report(self) -> str:
|
|
70
|
+
lines = [
|
|
71
|
+
f"Embedded mirror: {'✓ exists' if self.mirror_exists else '✗ does not exist'}",
|
|
72
|
+
]
|
|
73
|
+
if self.mirror_ts and self.file_ts:
|
|
74
|
+
if self.mirror_ts < self.file_ts:
|
|
75
|
+
lines.append("⚠ The mirror is older than the file")
|
|
76
|
+
else:
|
|
77
|
+
lines.append("✓ Mirror up to date")
|
|
78
|
+
lines.append("")
|
|
79
|
+
if self.chunks:
|
|
80
|
+
lines.append(f"{'ID':>4} {'Status':<16} Summary")
|
|
81
|
+
lines.append("-" * 52)
|
|
82
|
+
for c in self.chunks:
|
|
83
|
+
icon = "✓" if c.ok else "✗"
|
|
84
|
+
lines.append(f"{c.chunk_id:>4} {icon} {c.status.value:<14} {c.summary}")
|
|
85
|
+
lines.append("")
|
|
86
|
+
if self.all_ok:
|
|
87
|
+
lines.append("✓ Everything is in order.")
|
|
88
|
+
else:
|
|
89
|
+
lines.append(f"✗ {len(self.problems)} problem(s) found.")
|
|
90
|
+
return "\n".join(lines)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ─── AMCXMirror — embedded SHA-1 block ───────────────────────────────────────
|
|
94
|
+
|
|
95
|
+
class AMCXMirror:
|
|
96
|
+
"""
|
|
97
|
+
Reads and writes the SHA-1 mirror block inside the .amcx.
|
|
98
|
+
|
|
99
|
+
Block format:
|
|
100
|
+
AMCXM\\x00 magic (6 bytes)
|
|
101
|
+
uint32 version
|
|
102
|
+
uint64 timestamp
|
|
103
|
+
uint32 num_entries
|
|
104
|
+
[ for each chunk: ]
|
|
105
|
+
uint32 chunk_id
|
|
106
|
+
uint32 size_original
|
|
107
|
+
20 bytes SHA-1 (raw)
|
|
108
|
+
uint8 summary_len
|
|
109
|
+
N bytes summary UTF-8
|
|
110
|
+
|
|
111
|
+
The block is appended at the end of the file after the chunks
|
|
112
|
+
and after the XOR recovery block if it exists.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def build_block(chunk_data: dict[int, tuple[bytes, str]]) -> bytes:
|
|
117
|
+
"""
|
|
118
|
+
Builds the mirror block in bytes.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
chunk_data: {chunk_id: (original_content, summary)}
|
|
122
|
+
"""
|
|
123
|
+
buf = bytearray()
|
|
124
|
+
buf += MIRROR_MAGIC
|
|
125
|
+
buf += struct.pack('>II', 1, len(chunk_data)) # version=1, num_entries
|
|
126
|
+
buf += struct.pack('>Q', int(time.time())) # timestamp
|
|
127
|
+
|
|
128
|
+
for chunk_id, (data, summary) in sorted(chunk_data.items()):
|
|
129
|
+
sha1 = hashlib.sha1(data).digest() # 20 raw bytes
|
|
130
|
+
summary_bytes = summary.encode("utf-8")[:255]
|
|
131
|
+
buf += struct.pack('>II', chunk_id, len(data))
|
|
132
|
+
buf += sha1
|
|
133
|
+
buf += struct.pack('>B', len(summary_bytes))
|
|
134
|
+
buf += summary_bytes
|
|
135
|
+
|
|
136
|
+
return bytes(buf)
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def read_block(amcx_path: str) -> Optional[dict]:
|
|
140
|
+
"""
|
|
141
|
+
Reads the mirror block from the file if it exists.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
dict with 'timestamp' and 'chunks': {chunk_id: {'sha1': hex, 'summary': str}}
|
|
145
|
+
or None if there is no mirror block.
|
|
146
|
+
"""
|
|
147
|
+
with open(amcx_path, "rb") as f:
|
|
148
|
+
data = f.read()
|
|
149
|
+
|
|
150
|
+
pos = data.rfind(MIRROR_MAGIC)
|
|
151
|
+
if pos == -1:
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
pos += len(MIRROR_MAGIC)
|
|
155
|
+
version, num_entries = struct.unpack_from('>II', data, pos); pos += 8
|
|
156
|
+
timestamp, = struct.unpack_from('>Q', data, pos); pos += 8
|
|
157
|
+
|
|
158
|
+
chunks = {}
|
|
159
|
+
for _ in range(num_entries):
|
|
160
|
+
chunk_id, size_orig = struct.unpack_from('>II', data, pos); pos += 8
|
|
161
|
+
sha1_raw = data[pos:pos+20]; pos += 20
|
|
162
|
+
summary_len, = struct.unpack_from('>B', data, pos); pos += 1
|
|
163
|
+
summary = data[pos:pos+summary_len].decode("utf-8", errors="replace")
|
|
164
|
+
pos += summary_len
|
|
165
|
+
chunks[chunk_id] = {
|
|
166
|
+
"sha1": sha1_raw.hex(),
|
|
167
|
+
"summary": summary,
|
|
168
|
+
"size": size_orig,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {"timestamp": timestamp, "chunks": chunks}
|
|
172
|
+
|
|
173
|
+
@staticmethod
|
|
174
|
+
def verify(amcx_path: str) -> MirrorStatus:
|
|
175
|
+
"""
|
|
176
|
+
Compares the mirror block with the current chunks in the file.
|
|
177
|
+
"""
|
|
178
|
+
import os
|
|
179
|
+
from .reader import AMCXReader
|
|
180
|
+
|
|
181
|
+
mirror_data = AMCXMirror.read_block(amcx_path)
|
|
182
|
+
status = MirrorStatus(
|
|
183
|
+
mirror_exists=mirror_data is not None,
|
|
184
|
+
file_ts=int(os.path.getmtime(amcx_path)),
|
|
185
|
+
mirror_ts=mirror_data["timestamp"] if mirror_data else None,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if not mirror_data:
|
|
189
|
+
return status
|
|
190
|
+
|
|
191
|
+
mirror_chunks = mirror_data["chunks"]
|
|
192
|
+
|
|
193
|
+
with AMCXReader(amcx_path) as reader:
|
|
194
|
+
orig_ids = {e.chunk_id for e in reader.list_chunks()}
|
|
195
|
+
|
|
196
|
+
for entry in reader.list_chunks():
|
|
197
|
+
cid = entry.chunk_id
|
|
198
|
+
|
|
199
|
+
if cid not in mirror_chunks:
|
|
200
|
+
status.chunks.append(ChunkReport(
|
|
201
|
+
chunk_id=cid,
|
|
202
|
+
summary=entry.summary,
|
|
203
|
+
status=ChunkStatus.MISSING_MIRROR,
|
|
204
|
+
))
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
raw = reader.read_chunk(cid)
|
|
209
|
+
sha1 = hashlib.sha1(raw).hexdigest()
|
|
210
|
+
except AMCXCorruptError:
|
|
211
|
+
sha1 = None
|
|
212
|
+
|
|
213
|
+
mirror_sha1 = mirror_chunks[cid]["sha1"]
|
|
214
|
+
|
|
215
|
+
if sha1 is None or sha1 != mirror_sha1:
|
|
216
|
+
chunk_status = ChunkStatus.MODIFIED
|
|
217
|
+
elif status.mirror_ts and status.file_ts and status.mirror_ts < status.file_ts:
|
|
218
|
+
chunk_status = ChunkStatus.OUTDATED
|
|
219
|
+
else:
|
|
220
|
+
chunk_status = ChunkStatus.OK
|
|
221
|
+
|
|
222
|
+
status.chunks.append(ChunkReport(
|
|
223
|
+
chunk_id=cid,
|
|
224
|
+
summary=entry.summary,
|
|
225
|
+
status=chunk_status,
|
|
226
|
+
sha1_original=sha1,
|
|
227
|
+
sha1_mirror=mirror_sha1,
|
|
228
|
+
))
|
|
229
|
+
|
|
230
|
+
# Chunks in the mirror that are no longer in the original
|
|
231
|
+
for cid, info in mirror_chunks.items():
|
|
232
|
+
if cid not in orig_ids:
|
|
233
|
+
status.chunks.append(ChunkReport(
|
|
234
|
+
chunk_id=cid,
|
|
235
|
+
summary=info["summary"],
|
|
236
|
+
status=ChunkStatus.MISSING_ORIG,
|
|
237
|
+
sha1_mirror=info["sha1"],
|
|
238
|
+
))
|
|
239
|
+
|
|
240
|
+
status.chunks.sort(key=lambda c: c.chunk_id)
|
|
241
|
+
return status
|
|
242
|
+
|
|
243
|
+
@staticmethod
|
|
244
|
+
def embed(amcx_path: str, chunk_data: dict[int, tuple[bytes, str]]) -> None:
|
|
245
|
+
"""Adds or replaces the mirror block at the end of the file."""
|
|
246
|
+
with open(amcx_path, "rb") as f:
|
|
247
|
+
data = f.read()
|
|
248
|
+
|
|
249
|
+
# If there is already a mirror block, remove it before adding the new one
|
|
250
|
+
pos = data.rfind(MIRROR_MAGIC)
|
|
251
|
+
if pos != -1:
|
|
252
|
+
data = data[:pos]
|
|
253
|
+
|
|
254
|
+
with open(amcx_path, "wb") as f:
|
|
255
|
+
f.write(data)
|
|
256
|
+
f.write(AMCXMirror.build_block(chunk_data))
|
|
257
|
+
|
|
258
|
+
@staticmethod
|
|
259
|
+
def update(amcx_path: str) -> None:
|
|
260
|
+
"""Regenerates the mirror block by reading the current state of the chunks."""
|
|
261
|
+
from .reader import AMCXReader
|
|
262
|
+
chunk_data = {}
|
|
263
|
+
with AMCXReader(amcx_path) as reader:
|
|
264
|
+
for entry in reader.list_chunks():
|
|
265
|
+
raw = reader.read_chunk(entry.chunk_id)
|
|
266
|
+
chunk_data[entry.chunk_id] = (raw, entry.summary)
|
|
267
|
+
AMCXMirror.embed(amcx_path, chunk_data)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# ─── AMCXRecovery — embedded XOR blocks ──────────────────────────────────────
|
|
271
|
+
|
|
272
|
+
class AMCXRecovery:
|
|
273
|
+
"""
|
|
274
|
+
XOR recovery blocks embedded in the .amcx, inspired by WinRAR.
|
|
275
|
+
|
|
276
|
+
Each group of N chunks has a parity block (XOR of all of them).
|
|
277
|
+
If a chunk is damaged, it can be reconstructed with the others + the parity.
|
|
278
|
+
|
|
279
|
+
Block format:
|
|
280
|
+
AMCXR\\x00 magic (6 bytes)
|
|
281
|
+
uint32 num_groups
|
|
282
|
+
[ for each group: ]
|
|
283
|
+
uint32 group_id
|
|
284
|
+
uint32 num_chunk_ids
|
|
285
|
+
[uint32 chunk_id ...]
|
|
286
|
+
uint32 parity_size
|
|
287
|
+
[bytes parity]
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
@staticmethod
|
|
291
|
+
def append(amcx_path: str, group_size: int = 3) -> None:
|
|
292
|
+
"""Appends XOR recovery blocks to the file."""
|
|
293
|
+
from .reader import AMCXReader
|
|
294
|
+
|
|
295
|
+
with AMCXReader(amcx_path) as reader:
|
|
296
|
+
entries = reader.list_chunks()
|
|
297
|
+
groups = [entries[i:i+group_size] for i in range(0, len(entries), group_size)]
|
|
298
|
+
|
|
299
|
+
recovery_blocks = []
|
|
300
|
+
for gidx, group in enumerate(groups):
|
|
301
|
+
chunk_ids = [e.chunk_id for e in group]
|
|
302
|
+
chunks = [reader.read_chunk(cid) for cid in chunk_ids]
|
|
303
|
+
max_len = max(len(c) for c in chunks)
|
|
304
|
+
padded = [c + b'\x00' * (max_len - len(c)) for c in chunks]
|
|
305
|
+
parity = bytearray(padded[0])
|
|
306
|
+
for extra in padded[1:]:
|
|
307
|
+
for i, b in enumerate(extra):
|
|
308
|
+
parity[i] ^= b
|
|
309
|
+
recovery_blocks.append((gidx, chunk_ids, bytes(parity)))
|
|
310
|
+
|
|
311
|
+
with open(amcx_path, "ab") as f:
|
|
312
|
+
f.write(RECOVERY_MAGIC)
|
|
313
|
+
f.write(struct.pack('>I', len(recovery_blocks)))
|
|
314
|
+
for gidx, chunk_ids, parity in recovery_blocks:
|
|
315
|
+
f.write(struct.pack('>II', gidx, len(chunk_ids)))
|
|
316
|
+
for cid in chunk_ids:
|
|
317
|
+
f.write(struct.pack('>I', cid))
|
|
318
|
+
f.write(struct.pack('>I', len(parity)))
|
|
319
|
+
f.write(parity)
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def can_recover(amcx_path: str, damaged_chunk_id: int) -> bool:
|
|
323
|
+
blocks = AMCXRecovery._read_blocks(amcx_path)
|
|
324
|
+
return any(damaged_chunk_id in ids for _, ids, _ in blocks)
|
|
325
|
+
|
|
326
|
+
@staticmethod
|
|
327
|
+
def recover_chunk(amcx_path: str, damaged_chunk_id: int) -> bytes:
|
|
328
|
+
"""Reconstructs a damaged chunk using XOR parity."""
|
|
329
|
+
from .reader import AMCXReader
|
|
330
|
+
|
|
331
|
+
for _, chunk_ids, parity in AMCXRecovery._read_blocks(amcx_path):
|
|
332
|
+
if damaged_chunk_id not in chunk_ids:
|
|
333
|
+
continue
|
|
334
|
+
|
|
335
|
+
with AMCXReader(amcx_path) as reader:
|
|
336
|
+
healthy = []
|
|
337
|
+
for cid in chunk_ids:
|
|
338
|
+
if cid == damaged_chunk_id:
|
|
339
|
+
continue
|
|
340
|
+
try:
|
|
341
|
+
healthy.append(reader.read_chunk(cid))
|
|
342
|
+
except AMCXCorruptError:
|
|
343
|
+
raise AMCXError(
|
|
344
|
+
f"Cannot recover chunk {damaged_chunk_id}: "
|
|
345
|
+
f"chunk {cid} in the same group is also damaged."
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
max_len = max(len(parity), *(len(h) for h in healthy))
|
|
349
|
+
result = bytearray(parity + b'\x00' * (max_len - len(parity)))
|
|
350
|
+
for h in healthy:
|
|
351
|
+
padded = h + b'\x00' * (max_len - len(h))
|
|
352
|
+
for i, b in enumerate(padded):
|
|
353
|
+
result[i] ^= b
|
|
354
|
+
|
|
355
|
+
return bytes(result).rstrip(b'\x00')
|
|
356
|
+
|
|
357
|
+
raise AMCXError(f"No recovery block found for chunk {damaged_chunk_id}.")
|
|
358
|
+
|
|
359
|
+
@staticmethod
|
|
360
|
+
def _read_blocks(amcx_path: str) -> list[tuple[int, list[int], bytes]]:
|
|
361
|
+
with open(amcx_path, "rb") as f:
|
|
362
|
+
data = f.read()
|
|
363
|
+
|
|
364
|
+
pos = data.rfind(RECOVERY_MAGIC)
|
|
365
|
+
if pos == -1:
|
|
366
|
+
return []
|
|
367
|
+
|
|
368
|
+
pos += len(RECOVERY_MAGIC)
|
|
369
|
+
num_g, = struct.unpack_from('>I', data, pos); pos += 4
|
|
370
|
+
blocks = []
|
|
371
|
+
|
|
372
|
+
for _ in range(num_g):
|
|
373
|
+
gidx, num_ids = struct.unpack_from('>II', data, pos); pos += 8
|
|
374
|
+
chunk_ids = list(struct.unpack_from(f'>{num_ids}I', data, pos)); pos += 4 * num_ids
|
|
375
|
+
parity_size, = struct.unpack_from('>I', data, pos); pos += 4
|
|
376
|
+
parity = data[pos:pos + parity_size]; pos += parity_size
|
|
377
|
+
blocks.append((gidx, chunk_ids, parity))
|
|
378
|
+
|
|
379
|
+
return blocks
|