amcx 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
amcx-0.3.0/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ GNU LESSER GENERAL PUBLIC LICENSE
2
+ Version 3.0 or later, February 1999
3
+
4
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
5
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+
21
+ The full license text is available at:
22
+ https://www.gnu.org/licenses/lgpl-3.0.html#license-text
amcx-0.3.0/PKG-INFO ADDED
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.4
2
+ Name: amcx
3
+ Version: 0.3.0
4
+ Summary: Compressed memory chunks for AI chat history - 60-70% space savings
5
+ License: LGPL-3.0-or-later
6
+ Keywords: ai,memory,chunks,compression,chatbot,llm
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Libraries
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=7; extra == "dev"
20
+ Dynamic: license-file
21
+
22
+ # Adaptive Memory Chunk eXtended (amcx)
23
+
24
+ A Python library for efficient memory chunking with adaptive algorithms.
25
+
26
+ ### *read [wiki](https://github.com/hacko223/adaptive-memory-chunk-eXtended/wiki) for more info*
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install amcx
32
+ ```
33
+
34
+ ## Usage
35
+
36
+ ```python
37
+ from amcx import chunk_memory
38
+
39
+ # Your code here
40
+ ```
41
+
42
+ ## License
43
+
44
+ This project is licensed under the GNU Lesser General Public License v3.0 or later (LGPL-3.0+).
45
+ See the [LICENSE](LICENSE) file for details.
46
+
47
+ ## Author
48
+
49
+ - hacko223
50
+
51
+ ## Contributing
52
+
53
+ Contributions are welcome! Please feel free to submit a Pull Request.
amcx-0.3.0/README.md ADDED
@@ -0,0 +1,32 @@
1
+ # Adaptive Memory Chunk eXtended (amcx)
2
+
3
+ A Python library for efficient memory chunking with adaptive algorithms.
4
+
5
+ ### *read [wiki](https://github.com/hacko223/adaptive-memory-chunk-eXtended/wiki) for more info*
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install amcx
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```python
16
+ from amcx import chunk_memory
17
+
18
+ # Your code here
19
+ ```
20
+
21
+ ## License
22
+
23
+ This project is licensed under the GNU Lesser General Public License v3.0 or later (LGPL-3.0+).
24
+ See the [LICENSE](LICENSE) file for details.
25
+
26
+ ## Author
27
+
28
+ - hacko223
29
+
30
+ ## Contributing
31
+
32
+ Contributions are welcome! Please feel free to submit a Pull Request.
@@ -0,0 +1,31 @@
1
+ # amc/__init__.py
2
+ # Public API of the amcx library
3
+
4
+ from .format import (
5
+ COMPRESS_NONE, COMPRESS_ZLIB, COMPRESS_LZMA,
6
+ CHUNK_LORE, CHUNK_CHARACTER, CHUNK_EVENT, CHUNK_ACTIVE, CHUNK_GENERIC,
7
+ )
8
+ from .reader import AMCXReader, IndexEntry, AMCXHeader
9
+ from .writer import AMCXWriter, ChunkEntry
10
+ from .mirror import AMCXMirror, AMCXRecovery, MirrorMode, MirrorStatus, ChunkStatus
11
+ from .exceptions import (
12
+ AMCXError, AMCXInvalidFileError, AMCXVersionError,
13
+ AMCXCompressionError, AMCXChunkNotFoundError, AMCXCorruptError, AMCXReadOnlyError,
14
+ )
15
+
16
+ __version__ = "0.3.0"
17
+ __author__ = "Tu nombre aquí"
18
+ __all__ = [
19
+ "AMCXReader", "AMCXWriter",
20
+ "AMCXMirror", "AMCXRecovery", "MirrorMode", "MirrorStatus", "ChunkStatus",
21
+ "ChunkEntry", "IndexEntry", "AMCXHeader",
22
+ "COMPRESS_NONE", "COMPRESS_ZLIB", "COMPRESS_LZMA",
23
+ "CHUNK_LORE", "CHUNK_CHARACTER", "CHUNK_EVENT", "CHUNK_ACTIVE", "CHUNK_GENERIC",
24
+ "AMCXError", "AMCXInvalidFileError", "AMCXVersionError",
25
+ "AMCXCompressionError", "AMCXChunkNotFoundError", "AMCXCorruptError", "AMCXReadOnlyError",
26
+ ]
27
+
28
+ # High-level API
29
+ from .smart import SmartMemory
30
+
31
+ __all__ += ["SmartMemory"]
@@ -0,0 +1,75 @@
1
+ # amc/compression.py
2
+ # Compression abstraction — the rest of the library only calls compress/decompress
3
+
4
+ import zlib
5
+ import lzma
6
+ from .format import COMPRESS_NONE, COMPRESS_ZLIB, COMPRESS_LZMA
7
+ from .exceptions import AMCXCompressionError
8
+
9
+
10
+ def compress(data: bytes, algorithm: int) -> bytes:
11
+ """
12
+ Compresses data using the specified algorithm.
13
+
14
+ Args:
15
+ data: bytes to compress
16
+ algorithm: COMPRESS_NONE | COMPRESS_ZLIB | COMPRESS_LZMA
17
+
18
+ Returns:
19
+ compressed bytes (or the same if algorithm=COMPRESS_NONE)
20
+ """
21
+ if algorithm == COMPRESS_NONE:
22
+ return data
23
+
24
+ if algorithm == COMPRESS_ZLIB:
25
+ try:
26
+ return zlib.compress(data, level=6) # level 6: speed/size balance
27
+ except zlib.error as e:
28
+ raise AMCXCompressionError(f"Error compressing with zlib: {e}") from e
29
+
30
+ if algorithm == COMPRESS_LZMA:
31
+ try:
32
+ return lzma.compress(data, preset=6) # preset 6: good balance for old chunks
33
+ except lzma.LZMAError as e:
34
+ raise AMCXCompressionError(f"Error compressing with lzma: {e}") from e
35
+
36
+ raise AMCXCompressionError(f"Unknown compression algorithm: {algorithm:#04x}")
37
+
38
+
39
+ def decompress(data: bytes, algorithm: int) -> bytes:
40
+ """
41
+ Decompresses data using the specified algorithm.
42
+
43
+ Args:
44
+ data: compressed bytes
45
+ algorithm: COMPRESS_NONE | COMPRESS_ZLIB | COMPRESS_LZMA
46
+
47
+ Returns:
48
+ original bytes
49
+ """
50
+ if algorithm == COMPRESS_NONE:
51
+ return data
52
+
53
+ if algorithm == COMPRESS_ZLIB:
54
+ try:
55
+ return zlib.decompress(data)
56
+ except zlib.error as e:
57
+ raise AMCXCompressionError(f"Error decompressing with zlib: {e}") from e
58
+
59
+ if algorithm == COMPRESS_LZMA:
60
+ try:
61
+ return lzma.decompress(data)
62
+ except lzma.LZMAError as e:
63
+ raise AMCXCompressionError(f"Error decompressing with lzma: {e}") from e
64
+
65
+ raise AMCXCompressionError(f"Unknown compression algorithm: {algorithm:#04x}")
66
+
67
+
68
+ def algorithm_name(algorithm: int) -> str:
69
+ """Returns the human-readable name of the algorithm."""
70
+ names = {
71
+ COMPRESS_NONE: "none",
72
+ COMPRESS_ZLIB: "zlib",
73
+ COMPRESS_LZMA: "lzma",
74
+ }
75
+ return names.get(algorithm, f"unknown({algorithm:#04x})")
@@ -0,0 +1,30 @@
1
+ # amc/exceptions.py
2
+ # Custom errors for the .amcx library
3
+
4
+
5
+ class AMCXError(Exception):
6
+ """Base class for all AMC errors."""
7
+
8
+
9
+ class AMCXInvalidFileError(AMCXError):
10
+ """The file is not a valid .amcx (incorrect magic bytes, corrupt header, etc.)."""
11
+
12
+
13
+ class AMCXVersionError(AMCXError):
14
+ """The file version is not compatible with this library."""
15
+
16
+
17
+ class AMCXCompressionError(AMCXError):
18
+ """Error during compression or decompression of a chunk."""
19
+
20
+
21
+ class AMCXChunkNotFoundError(AMCXError):
22
+ """A chunk was requested that does not exist in the index."""
23
+
24
+
25
+ class AMCXCorruptError(AMCXError):
26
+ """CRC32 does not match — the file is corrupt."""
27
+
28
+
29
+ class AMCXReadOnlyError(AMCXError):
30
+ """Attempted to write to a file marked as read-only."""
@@ -0,0 +1,64 @@
1
+ # amcx/format.py
2
+ # Binary format definition for .amcx
3
+ # AMCX = Adaptive Memory Chunks X
4
+
5
+ import struct
6
+
7
+ # ─── Magic & Version ───────────────────────────────────────────────────────────
8
+ MAGIC = b'AMC\x00' # 4 bytes that identify the file
9
+ VERSION_MAJOR = 0
10
+ VERSION_MINOR = 2 # version 0.2 — adds per-chunk checksum
11
+
12
+ # ─── Compression ───────────────────────────────────────────────────────────────
13
+ COMPRESS_NONE = 0x00 # no compression
14
+ COMPRESS_ZLIB = 0x01 # zlib → active chunks (fast)
15
+ COMPRESS_LZMA = 0x02 # lzma → old chunks (maximum compression)
16
+
17
+ # ─── Chunk types ───────────────────────────────────────────────────────────────
18
+ CHUNK_LORE = 0x00 # lore / world / rules
19
+ CHUNK_CHARACTER = 0x01 # character
20
+ CHUNK_EVENT = 0x02 # narrative event
21
+ CHUNK_ACTIVE = 0x03 # active chunk (the most recent)
22
+ CHUNK_GENERIC = 0x04 # generic content
23
+
24
+ # ─── Header flags (2-byte bitfield) ───────────────────────────────────────────
25
+ FLAG_COMPRESSED = 1 << 0 # at least one chunk has compression
26
+ FLAG_ENCRYPTED = 1 << 1 # reserved for future encryption
27
+ FLAG_READONLY = 1 << 2 # read-only file
28
+ FLAG_HAS_ACTIVE = 1 << 3 # there is a chunk marked as active
29
+ FLAG_HAS_ASSETS = 1 << 4 # contains assets/images (future)
30
+
31
+ # ─── Fixed sizes in bytes ──────────────────────────────────────────────────────
32
+ HEADER_SIZE = 32 # total size of the header
33
+ SUMMARY_SIZE = 64 # bytes reserved for the summary in the index
34
+
35
+ # ─── Header layout ─────────────────────────────────────────────────────────────
36
+ # Offset Size Field
37
+ # 0x00 4 Magic "AMC\0"
38
+ # 0x04 1 Version major
39
+ # 0x05 1 Version minor
40
+ # 0x06 4 Number of chunks
41
+ # 0x0A 8 Creation timestamp (unix, big-endian)
42
+ # 0x12 4 Offset where the index starts
43
+ # 0x16 4 Size of the index block
44
+ # 0x1A 2 Flags
45
+ # 0x1C 4 CRC32 of the header (first 28 bytes)
46
+ HEADER_STRUCT = struct.Struct('>4sBBIQIIHI')
47
+ # Fields: magic, v_major, v_minor, num_chunks, timestamp, index_offset, index_size, flags, crc32
48
+
49
+ # ─── Layout of each index entry ───────────────────────────────────────────────
50
+ # Offset Size Field
51
+ # 0x00 4 Chunk ID
52
+ # 0x04 4 Chunk offset in the file
53
+ # 0x08 4 Compressed size
54
+ # 0x0C 4 Original size (before compression)
55
+ # 0x10 2 Chunk type
56
+ # 0x12 1 Compression algorithm
57
+ # 0x13 1 Reserved
58
+ # 0x14 8 Chunk timestamp
59
+ # 0x1C 4 CRC32 of the chunk (over compressed bytes) ← NEW
60
+ # 0x20 64 Summary in UTF-8 (null-padded)
61
+ INDEX_ENTRY_STRUCT = struct.Struct('>IIIIHBBQI64s')
62
+ # Fields: chunk_id, offset, size_c, size_o, ctype, algo, reserved, ts, crc32, summary
63
+
64
+ INDEX_ENTRY_SIZE = INDEX_ENTRY_STRUCT.size # automatically calculated from the struct
@@ -0,0 +1,379 @@
1
+ # amcx/mirror.py
2
+ # Mirror system embedded inside the .amcx — inspired by WinRAR.
3
+ #
4
+ # Final file structure:
5
+ # [ HEADER ] [ INDEX ] [ CHUNKS... ] [ XOR RECOVERY BLOCK ] [ SHA-1 MIRROR BLOCK ]
6
+ #
7
+ # Both blocks are optional and are detected by their magic bytes at the end of the file.
8
+ # The reader finds them with rfind() without affecting normal chunk reading.
9
+
10
+ import hashlib
11
+ import struct
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from enum import Enum, auto
15
+ from typing import Optional
16
+
17
+ from .exceptions import AMCXError, AMCXCorruptError
18
+
19
+
20
+ # ─── Magic bytes for each block ───────────────────────────────────────────────
21
+ RECOVERY_MAGIC = b'AMCXR\x00' # XOR recovery block
22
+ MIRROR_MAGIC = b'AMCXM\x00' # SHA-1 mirror block
23
+
24
+
25
+ # ─── Configuration ─────────────────────────────────────────────────────────────
26
+
27
+ class MirrorMode(Enum):
28
+ NONE = auto() # no embedded mirror
29
+ MANUAL = auto() # only when you call writer.embed_mirror()
30
+ AUTO = auto() # automatic every time writer.save() is called
31
+
32
+
33
+ class ChunkStatus(Enum):
34
+ OK = "ok"
35
+ MODIFIED = "modified" # chunk SHA-1 changed
36
+ MISSING_ORIG = "missing_orig" # in the mirror but not in the chunks
37
+ MISSING_MIRROR = "missing_mirror" # in the chunks but not in the mirror
38
+ OUTDATED = "outdated" # mirror is older than the file
39
+
40
+
41
+ @dataclass
42
+ class ChunkReport:
43
+ chunk_id: int
44
+ summary: str
45
+ status: ChunkStatus
46
+ sha1_original: Optional[str] = None
47
+ sha1_mirror: Optional[str] = None
48
+
49
+ @property
50
+ def ok(self) -> bool:
51
+ return self.status == ChunkStatus.OK
52
+
53
+
54
+ @dataclass
55
+ class MirrorStatus:
56
+ mirror_exists: bool
57
+ chunks: list[ChunkReport] = field(default_factory=list)
58
+ mirror_ts: Optional[int] = None
59
+ file_ts: Optional[int] = None
60
+
61
+ @property
62
+ def all_ok(self) -> bool:
63
+ return self.mirror_exists and all(c.ok for c in self.chunks)
64
+
65
+ @property
66
+ def problems(self) -> list[ChunkReport]:
67
+ return [c for c in self.chunks if not c.ok]
68
+
69
+ def report(self) -> str:
70
+ lines = [
71
+ f"Embedded mirror: {'✓ exists' if self.mirror_exists else '✗ does not exist'}",
72
+ ]
73
+ if self.mirror_ts and self.file_ts:
74
+ if self.mirror_ts < self.file_ts:
75
+ lines.append("⚠ The mirror is older than the file")
76
+ else:
77
+ lines.append("✓ Mirror up to date")
78
+ lines.append("")
79
+ if self.chunks:
80
+ lines.append(f"{'ID':>4} {'Status':<16} Summary")
81
+ lines.append("-" * 52)
82
+ for c in self.chunks:
83
+ icon = "✓" if c.ok else "✗"
84
+ lines.append(f"{c.chunk_id:>4} {icon} {c.status.value:<14} {c.summary}")
85
+ lines.append("")
86
+ if self.all_ok:
87
+ lines.append("✓ Everything is in order.")
88
+ else:
89
+ lines.append(f"✗ {len(self.problems)} problem(s) found.")
90
+ return "\n".join(lines)
91
+
92
+
93
+ # ─── AMCXMirror — embedded SHA-1 block ───────────────────────────────────────
94
+
95
+ class AMCXMirror:
96
+ """
97
+ Reads and writes the SHA-1 mirror block inside the .amcx.
98
+
99
+ Block format:
100
+ AMCXM\\x00 magic (6 bytes)
101
+ uint32 version
102
+ uint64 timestamp
103
+ uint32 num_entries
104
+ [ for each chunk: ]
105
+ uint32 chunk_id
106
+ uint32 size_original
107
+ 20 bytes SHA-1 (raw)
108
+ uint8 summary_len
109
+ N bytes summary UTF-8
110
+
111
+ The block is appended at the end of the file after the chunks
112
+ and after the XOR recovery block if it exists.
113
+ """
114
+
115
+ @staticmethod
116
+ def build_block(chunk_data: dict[int, tuple[bytes, str]]) -> bytes:
117
+ """
118
+ Builds the mirror block in bytes.
119
+
120
+ Args:
121
+ chunk_data: {chunk_id: (original_content, summary)}
122
+ """
123
+ buf = bytearray()
124
+ buf += MIRROR_MAGIC
125
+ buf += struct.pack('>II', 1, len(chunk_data)) # version=1, num_entries
126
+ buf += struct.pack('>Q', int(time.time())) # timestamp
127
+
128
+ for chunk_id, (data, summary) in sorted(chunk_data.items()):
129
+ sha1 = hashlib.sha1(data).digest() # 20 raw bytes
130
+ summary_bytes = summary.encode("utf-8")[:255]
131
+ buf += struct.pack('>II', chunk_id, len(data))
132
+ buf += sha1
133
+ buf += struct.pack('>B', len(summary_bytes))
134
+ buf += summary_bytes
135
+
136
+ return bytes(buf)
137
+
138
+ @staticmethod
139
+ def read_block(amcx_path: str) -> Optional[dict]:
140
+ """
141
+ Reads the mirror block from the file if it exists.
142
+
143
+ Returns:
144
+ dict with 'timestamp' and 'chunks': {chunk_id: {'sha1': hex, 'summary': str}}
145
+ or None if there is no mirror block.
146
+ """
147
+ with open(amcx_path, "rb") as f:
148
+ data = f.read()
149
+
150
+ pos = data.rfind(MIRROR_MAGIC)
151
+ if pos == -1:
152
+ return None
153
+
154
+ pos += len(MIRROR_MAGIC)
155
+ version, num_entries = struct.unpack_from('>II', data, pos); pos += 8
156
+ timestamp, = struct.unpack_from('>Q', data, pos); pos += 8
157
+
158
+ chunks = {}
159
+ for _ in range(num_entries):
160
+ chunk_id, size_orig = struct.unpack_from('>II', data, pos); pos += 8
161
+ sha1_raw = data[pos:pos+20]; pos += 20
162
+ summary_len, = struct.unpack_from('>B', data, pos); pos += 1
163
+ summary = data[pos:pos+summary_len].decode("utf-8", errors="replace")
164
+ pos += summary_len
165
+ chunks[chunk_id] = {
166
+ "sha1": sha1_raw.hex(),
167
+ "summary": summary,
168
+ "size": size_orig,
169
+ }
170
+
171
+ return {"timestamp": timestamp, "chunks": chunks}
172
+
173
+ @staticmethod
174
+ def verify(amcx_path: str) -> MirrorStatus:
175
+ """
176
+ Compares the mirror block with the current chunks in the file.
177
+ """
178
+ import os
179
+ from .reader import AMCXReader
180
+
181
+ mirror_data = AMCXMirror.read_block(amcx_path)
182
+ status = MirrorStatus(
183
+ mirror_exists=mirror_data is not None,
184
+ file_ts=int(os.path.getmtime(amcx_path)),
185
+ mirror_ts=mirror_data["timestamp"] if mirror_data else None,
186
+ )
187
+
188
+ if not mirror_data:
189
+ return status
190
+
191
+ mirror_chunks = mirror_data["chunks"]
192
+
193
+ with AMCXReader(amcx_path) as reader:
194
+ orig_ids = {e.chunk_id for e in reader.list_chunks()}
195
+
196
+ for entry in reader.list_chunks():
197
+ cid = entry.chunk_id
198
+
199
+ if cid not in mirror_chunks:
200
+ status.chunks.append(ChunkReport(
201
+ chunk_id=cid,
202
+ summary=entry.summary,
203
+ status=ChunkStatus.MISSING_MIRROR,
204
+ ))
205
+ continue
206
+
207
+ try:
208
+ raw = reader.read_chunk(cid)
209
+ sha1 = hashlib.sha1(raw).hexdigest()
210
+ except AMCXCorruptError:
211
+ sha1 = None
212
+
213
+ mirror_sha1 = mirror_chunks[cid]["sha1"]
214
+
215
+ if sha1 is None or sha1 != mirror_sha1:
216
+ chunk_status = ChunkStatus.MODIFIED
217
+ elif status.mirror_ts and status.file_ts and status.mirror_ts < status.file_ts:
218
+ chunk_status = ChunkStatus.OUTDATED
219
+ else:
220
+ chunk_status = ChunkStatus.OK
221
+
222
+ status.chunks.append(ChunkReport(
223
+ chunk_id=cid,
224
+ summary=entry.summary,
225
+ status=chunk_status,
226
+ sha1_original=sha1,
227
+ sha1_mirror=mirror_sha1,
228
+ ))
229
+
230
+ # Chunks in the mirror that are no longer in the original
231
+ for cid, info in mirror_chunks.items():
232
+ if cid not in orig_ids:
233
+ status.chunks.append(ChunkReport(
234
+ chunk_id=cid,
235
+ summary=info["summary"],
236
+ status=ChunkStatus.MISSING_ORIG,
237
+ sha1_mirror=info["sha1"],
238
+ ))
239
+
240
+ status.chunks.sort(key=lambda c: c.chunk_id)
241
+ return status
242
+
243
+ @staticmethod
244
+ def embed(amcx_path: str, chunk_data: dict[int, tuple[bytes, str]]) -> None:
245
+ """Adds or replaces the mirror block at the end of the file."""
246
+ with open(amcx_path, "rb") as f:
247
+ data = f.read()
248
+
249
+ # If there is already a mirror block, remove it before adding the new one
250
+ pos = data.rfind(MIRROR_MAGIC)
251
+ if pos != -1:
252
+ data = data[:pos]
253
+
254
+ with open(amcx_path, "wb") as f:
255
+ f.write(data)
256
+ f.write(AMCXMirror.build_block(chunk_data))
257
+
258
+ @staticmethod
259
+ def update(amcx_path: str) -> None:
260
+ """Regenerates the mirror block by reading the current state of the chunks."""
261
+ from .reader import AMCXReader
262
+ chunk_data = {}
263
+ with AMCXReader(amcx_path) as reader:
264
+ for entry in reader.list_chunks():
265
+ raw = reader.read_chunk(entry.chunk_id)
266
+ chunk_data[entry.chunk_id] = (raw, entry.summary)
267
+ AMCXMirror.embed(amcx_path, chunk_data)
268
+
269
+
270
+ # ─── AMCXRecovery — embedded XOR blocks ──────────────────────────────────────
271
+
272
+ class AMCXRecovery:
273
+ """
274
+ XOR recovery blocks embedded in the .amcx, inspired by WinRAR.
275
+
276
+ Each group of N chunks has a parity block (XOR of all of them).
277
+ If a chunk is damaged, it can be reconstructed with the others + the parity.
278
+
279
+ Block format:
280
+ AMCXR\\x00 magic (6 bytes)
281
+ uint32 num_groups
282
+ [ for each group: ]
283
+ uint32 group_id
284
+ uint32 num_chunk_ids
285
+ [uint32 chunk_id ...]
286
+ uint32 parity_size
287
+ [bytes parity]
288
+ """
289
+
290
+ @staticmethod
291
+ def append(amcx_path: str, group_size: int = 3) -> None:
292
+ """Appends XOR recovery blocks to the file."""
293
+ from .reader import AMCXReader
294
+
295
+ with AMCXReader(amcx_path) as reader:
296
+ entries = reader.list_chunks()
297
+ groups = [entries[i:i+group_size] for i in range(0, len(entries), group_size)]
298
+
299
+ recovery_blocks = []
300
+ for gidx, group in enumerate(groups):
301
+ chunk_ids = [e.chunk_id for e in group]
302
+ chunks = [reader.read_chunk(cid) for cid in chunk_ids]
303
+ max_len = max(len(c) for c in chunks)
304
+ padded = [c + b'\x00' * (max_len - len(c)) for c in chunks]
305
+ parity = bytearray(padded[0])
306
+ for extra in padded[1:]:
307
+ for i, b in enumerate(extra):
308
+ parity[i] ^= b
309
+ recovery_blocks.append((gidx, chunk_ids, bytes(parity)))
310
+
311
+ with open(amcx_path, "ab") as f:
312
+ f.write(RECOVERY_MAGIC)
313
+ f.write(struct.pack('>I', len(recovery_blocks)))
314
+ for gidx, chunk_ids, parity in recovery_blocks:
315
+ f.write(struct.pack('>II', gidx, len(chunk_ids)))
316
+ for cid in chunk_ids:
317
+ f.write(struct.pack('>I', cid))
318
+ f.write(struct.pack('>I', len(parity)))
319
+ f.write(parity)
320
+
321
+ @staticmethod
322
+ def can_recover(amcx_path: str, damaged_chunk_id: int) -> bool:
323
+ blocks = AMCXRecovery._read_blocks(amcx_path)
324
+ return any(damaged_chunk_id in ids for _, ids, _ in blocks)
325
+
326
+ @staticmethod
327
+ def recover_chunk(amcx_path: str, damaged_chunk_id: int) -> bytes:
328
+ """Reconstructs a damaged chunk using XOR parity."""
329
+ from .reader import AMCXReader
330
+
331
+ for _, chunk_ids, parity in AMCXRecovery._read_blocks(amcx_path):
332
+ if damaged_chunk_id not in chunk_ids:
333
+ continue
334
+
335
+ with AMCXReader(amcx_path) as reader:
336
+ healthy = []
337
+ for cid in chunk_ids:
338
+ if cid == damaged_chunk_id:
339
+ continue
340
+ try:
341
+ healthy.append(reader.read_chunk(cid))
342
+ except AMCXCorruptError:
343
+ raise AMCXError(
344
+ f"Cannot recover chunk {damaged_chunk_id}: "
345
+ f"chunk {cid} in the same group is also damaged."
346
+ )
347
+
348
+ max_len = max(len(parity), *(len(h) for h in healthy))
349
+ result = bytearray(parity + b'\x00' * (max_len - len(parity)))
350
+ for h in healthy:
351
+ padded = h + b'\x00' * (max_len - len(h))
352
+ for i, b in enumerate(padded):
353
+ result[i] ^= b
354
+
355
+ return bytes(result).rstrip(b'\x00')
356
+
357
+ raise AMCXError(f"No recovery block found for chunk {damaged_chunk_id}.")
358
+
359
+ @staticmethod
360
+ def _read_blocks(amcx_path: str) -> list[tuple[int, list[int], bytes]]:
361
+ with open(amcx_path, "rb") as f:
362
+ data = f.read()
363
+
364
+ pos = data.rfind(RECOVERY_MAGIC)
365
+ if pos == -1:
366
+ return []
367
+
368
+ pos += len(RECOVERY_MAGIC)
369
+ num_g, = struct.unpack_from('>I', data, pos); pos += 4
370
+ blocks = []
371
+
372
+ for _ in range(num_g):
373
+ gidx, num_ids = struct.unpack_from('>II', data, pos); pos += 8
374
+ chunk_ids = list(struct.unpack_from(f'>{num_ids}I', data, pos)); pos += 4 * num_ids
375
+ parity_size, = struct.unpack_from('>I', data, pos); pos += 4
376
+ parity = data[pos:pos + parity_size]; pos += parity_size
377
+ blocks.append((gidx, chunk_ids, parity))
378
+
379
+ return blocks