memplex 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memnex/__init__.py +31 -0
- memnex/__main__.py +6 -0
- memnex/_plugin/.claude-plugin/plugin.json +24 -0
- memnex/_plugin/.mcp.json +9 -0
- memnex/_plugin/__init__.py +0 -0
- memnex/_plugin/hooks/hooks.json +43 -0
- memnex/_plugin/scripts/hook-runner.py +166 -0
- memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
- memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
- memnex/_plugin/skills/mem-search/SKILL.md +85 -0
- memnex/_plugin/skills/mem-write/SKILL.md +78 -0
- memnex/adapters/__init__.py +14 -0
- memnex/adapters/claude_skill.py +169 -0
- memnex/adapters/cli.py +525 -0
- memnex/adapters/http_api.py +314 -0
- memnex/adapters/mcp_server.py +448 -0
- memnex/compaction.py +563 -0
- memnex/config.py +366 -0
- memnex/core/__init__.py +13 -0
- memnex/core/associator/__init__.py +8 -0
- memnex/core/associator/domain_classifier.py +75 -0
- memnex/core/associator/entity_aligner.py +127 -0
- memnex/core/associator/ref_linker.py +197 -0
- memnex/core/associator/term_mapper.py +77 -0
- memnex/core/dictionaries/__init__.py +50 -0
- memnex/core/engine.py +667 -0
- memnex/core/extractors/__init__.py +15 -0
- memnex/core/extractors/docx.py +97 -0
- memnex/core/extractors/image.py +233 -0
- memnex/core/extractors/markdown.py +139 -0
- memnex/core/extractors/pdf.py +133 -0
- memnex/core/extractors/vision_mapper.py +131 -0
- memnex/core/handlers/__init__.py +7 -0
- memnex/core/handlers/clipboard.py +40 -0
- memnex/core/handlers/file_handler.py +62 -0
- memnex/core/handlers/url_handler.py +132 -0
- memnex/llm/__init__.py +25 -0
- memnex/llm/enhancer.py +226 -0
- memnex/llm/fallback_chain.py +87 -0
- memnex/llm/injection_guard.py +178 -0
- memnex/llm/provider.py +130 -0
- memnex/llm/providers/__init__.py +22 -0
- memnex/llm/providers/anthropic.py +135 -0
- memnex/llm/providers/local.py +135 -0
- memnex/llm/providers/rule_based.py +68 -0
- memnex/llm/sanitizer.py +67 -0
- memnex/models/__init__.py +68 -0
- memnex/models/feedback.py +42 -0
- memnex/models/graph.py +33 -0
- memnex/models/memory.py +102 -0
- memnex/models/misc.py +185 -0
- memnex/models/paragraph.py +45 -0
- memnex/models/search.py +51 -0
- memnex/models/source.py +23 -0
- memnex/models/task.py +62 -0
- memnex/processing/__init__.py +1 -0
- memnex/processing/graph_builder.py +278 -0
- memnex/processing/merger/__init__.py +6 -0
- memnex/processing/merger/confidence_calculator.py +127 -0
- memnex/processing/merger/conflict_resolver.py +116 -0
- memnex/retrieval/__init__.py +1 -0
- memnex/retrieval/dedup.py +386 -0
- memnex/retrieval/embedding.py +289 -0
- memnex/retrieval/reranker.py +299 -0
- memnex/service.py +902 -0
- memnex/storage/__init__.py +65 -0
- memnex/storage/base.py +132 -0
- memnex/storage/changelog.py +106 -0
- memnex/storage/feedback.py +486 -0
- memnex/storage/lite/__init__.py +5 -0
- memnex/storage/lite/store.py +606 -0
- memnex/storage/vector.py +265 -0
- memnex/wiki/__init__.py +11 -0
- memnex/wiki/community.py +221 -0
- memnex/wiki/compiler.py +545 -0
- memnex/wiki/generator.py +270 -0
- memnex/wiki/search.py +282 -0
- memnex/worker.py +412 -0
- memplex-3.2.0.dist-info/METADATA +37 -0
- memplex-3.2.0.dist-info/RECORD +83 -0
- memplex-3.2.0.dist-info/WHEEL +5 -0
- memplex-3.2.0.dist-info/entry_points.txt +2 -0
- memplex-3.2.0.dist-info/top_level.txt +1 -0
memnex/compaction.py
ADDED
|
@@ -0,0 +1,563 @@
|
|
|
1
|
+
"""CompactionPipeline -- 5-stage memory compression pipeline.
|
|
2
|
+
|
|
3
|
+
Stages::
|
|
4
|
+
|
|
5
|
+
1. Extract -- extract atomic facts from history
|
|
6
|
+
2. Dedup -- remove exact + semantic duplicates
|
|
7
|
+
3. Summarize -- generate summaries, trim oversized FieldValue lists
|
|
8
|
+
4. Prune -- remove stale, low-confidence, deprecated entries
|
|
9
|
+
5. Archive -- move low-frequency memories to cold storage
|
|
10
|
+
|
|
11
|
+
Concurrency safety::
|
|
12
|
+
|
|
13
|
+
Compaction runs under a mutually-exclusive lock (FileLock for
|
|
14
|
+
Lite/Standard, PGAdvisoryLock for Enterprise). If the lock is
|
|
15
|
+
already held, ``run()`` returns immediately with ``skipped=True``.
|
|
16
|
+
|
|
17
|
+
Usage::
|
|
18
|
+
|
|
19
|
+
pipeline = CompactionPipeline(store, embedding_service, config)
|
|
20
|
+
result = await pipeline.run(CompactionScope.GLOBAL)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import abc
|
|
26
|
+
import asyncio
|
|
27
|
+
import fcntl
|
|
28
|
+
import hashlib
|
|
29
|
+
import logging
|
|
30
|
+
import math
|
|
31
|
+
import os
|
|
32
|
+
import time
|
|
33
|
+
from dataclasses import dataclass
|
|
34
|
+
from datetime import datetime
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Dict, List, Optional, TYPE_CHECKING
|
|
37
|
+
|
|
38
|
+
from memnex.config import MemNexConfig
|
|
39
|
+
from memnex.retrieval.dedup import MemoryDeduplicator
|
|
40
|
+
from memnex.retrieval.embedding import EmbeddingService
|
|
41
|
+
from memnex.models import (
|
|
42
|
+
CompactionResult,
|
|
43
|
+
CompactionScope,
|
|
44
|
+
CompactionStageResult,
|
|
45
|
+
FieldValue,
|
|
46
|
+
Memory,
|
|
47
|
+
)
|
|
48
|
+
from memnex.storage.base import MemoryStore
|
|
49
|
+
|
|
50
|
+
if TYPE_CHECKING:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
logger = logging.getLogger(__name__)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ── Compaction lock abstraction ────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class CompactionLock(abc.ABC):
|
|
60
|
+
"""Abstract base class for compaction mutual-exclusion locks.
|
|
61
|
+
|
|
62
|
+
``try_acquire`` is non-blocking: returns ``False`` immediately when
|
|
63
|
+
the lock is already held.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
@abc.abstractmethod
|
|
67
|
+
async def try_acquire(self) -> bool:
|
|
68
|
+
"""Attempt to acquire the lock. Return ``True`` on success."""
|
|
69
|
+
|
|
70
|
+
@abc.abstractmethod
|
|
71
|
+
async def release(self) -> None:
|
|
72
|
+
"""Release the lock. No-op when not held."""
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class FileLock(CompactionLock):
|
|
76
|
+
"""POSIX ``fcntl.flock``-based file lock (Lite / Standard backends).
|
|
77
|
+
|
|
78
|
+
Lock file: ``lock_dir / {key_sha1}.lock``.
|
|
79
|
+
Suitable for single-machine multi-process scenarios.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, key: str, lock_dir: Path) -> None:
|
|
83
|
+
key_hash = hashlib.sha1(key.encode()).hexdigest()[:16]
|
|
84
|
+
self._lock_path = lock_dir / f"{key_hash}.lock"
|
|
85
|
+
self._lock_dir = lock_dir
|
|
86
|
+
self._fd: Optional[int] = None
|
|
87
|
+
|
|
88
|
+
async def try_acquire(self) -> bool:
|
|
89
|
+
self._lock_dir.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
fd = os.open(str(self._lock_path), os.O_CREAT | os.O_RDWR)
|
|
91
|
+
try:
|
|
92
|
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
93
|
+
self._fd = fd
|
|
94
|
+
return True
|
|
95
|
+
except BlockingIOError:
|
|
96
|
+
os.close(fd)
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
async def release(self) -> None:
|
|
100
|
+
if self._fd is not None:
|
|
101
|
+
fcntl.flock(self._fd, fcntl.LOCK_UN)
|
|
102
|
+
os.close(self._fd)
|
|
103
|
+
self._fd = None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class PGAdvisoryLock(CompactionLock):
|
|
107
|
+
"""PostgreSQL ``pg_try_advisory_lock`` (Enterprise backend).
|
|
108
|
+
|
|
109
|
+
The lock ID is derived by hashing *key* to a positive int64.
|
|
110
|
+
Advisory locks are released when the connection is returned to the
|
|
111
|
+
pool, so process crashes automatically clear them.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(self, key: str, pool: object) -> None:
|
|
115
|
+
self._pool = pool
|
|
116
|
+
raw = int(hashlib.sha256(key.encode()).hexdigest(), 16)
|
|
117
|
+
self._lock_id: int = raw % (2 ** 63)
|
|
118
|
+
self._conn = None
|
|
119
|
+
|
|
120
|
+
async def try_acquire(self) -> bool:
|
|
121
|
+
conn = await self._pool.acquire()
|
|
122
|
+
result = await conn.fetchval(
|
|
123
|
+
"SELECT pg_try_advisory_lock($1)", self._lock_id
|
|
124
|
+
)
|
|
125
|
+
if result:
|
|
126
|
+
self._conn = conn
|
|
127
|
+
return True
|
|
128
|
+
await self._pool.release(conn)
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
async def release(self) -> None:
|
|
132
|
+
if self._conn is not None:
|
|
133
|
+
await self._conn.fetchval(
|
|
134
|
+
"SELECT pg_advisory_unlock($1)", self._lock_id
|
|
135
|
+
)
|
|
136
|
+
await self._pool.release(self._conn)
|
|
137
|
+
self._conn = None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# ── Checkpoint ────────────────────────────────────────────────────────
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass
|
|
144
|
+
class _Checkpoint:
|
|
145
|
+
"""Checkpoint written after each stage for crash-recovery."""
|
|
146
|
+
|
|
147
|
+
stage_name: str
|
|
148
|
+
processed_offset: int
|
|
149
|
+
processed_ids: List[str]
|
|
150
|
+
timestamp: str
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# ── CompactionPipeline ────────────────────────────────────────────────
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class CompactionPipeline:
|
|
157
|
+
"""5-stage compaction pipeline.
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
store:
|
|
162
|
+
The active :class:`MemoryStore` backend.
|
|
163
|
+
embedding_service:
|
|
164
|
+
For semantic dedup and summarisation.
|
|
165
|
+
config:
|
|
166
|
+
Full :class:`MemNexConfig` (read compaction sub-config).
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
STAGES = ["extract", "dedup", "summarize", "prune", "archive"]
|
|
170
|
+
|
|
171
|
+
def __init__(
|
|
172
|
+
self,
|
|
173
|
+
store: MemoryStore,
|
|
174
|
+
embedding_service: EmbeddingService,
|
|
175
|
+
config: MemNexConfig,
|
|
176
|
+
) -> None:
|
|
177
|
+
self._store = store
|
|
178
|
+
self._embedding = embedding_service
|
|
179
|
+
self._config = config
|
|
180
|
+
self._pg_pool: Optional[object] = None # injected for Enterprise
|
|
181
|
+
|
|
182
|
+
# ── Lock helpers ────────────────────────────────────────────────
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def _lock_key(scope: CompactionScope) -> str:
|
|
187
|
+
return f"compaction:{scope.value}"
|
|
188
|
+
|
|
189
|
+
def _build_lock(self, scope: CompactionScope) -> CompactionLock:
|
|
190
|
+
key = self._lock_key(scope)
|
|
191
|
+
backend = getattr(self._config, "storage", None)
|
|
192
|
+
backend_name = getattr(backend, "backend", "lite") if backend else "lite"
|
|
193
|
+
if backend_name == "enterprise" and self._pg_pool is not None:
|
|
194
|
+
return PGAdvisoryLock(key=key, pool=self._pg_pool)
|
|
195
|
+
lock_dir = Path.home() / ".memnex" / "locks"
|
|
196
|
+
return FileLock(key=key, lock_dir=lock_dir)
|
|
197
|
+
|
|
198
|
+
# ── Public API ──────────────────────────────────────────────────
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
async def run(self, scope: CompactionScope) -> CompactionResult:
|
|
202
|
+
"""Execute the compaction pipeline with mutual-exclusion.
|
|
203
|
+
|
|
204
|
+
Returns ``CompactionResult(skipped=True)`` immediately when the
|
|
205
|
+
lock cannot be acquired.
|
|
206
|
+
"""
|
|
207
|
+
lock = self._build_lock(scope)
|
|
208
|
+
acquired = await lock.try_acquire()
|
|
209
|
+
if not acquired:
|
|
210
|
+
logger.warning(
|
|
211
|
+
"Compaction skipped: another instance holds the lock for scope=%s",
|
|
212
|
+
scope,
|
|
213
|
+
)
|
|
214
|
+
return CompactionResult(
|
|
215
|
+
total_processed=0,
|
|
216
|
+
total_removed=0,
|
|
217
|
+
total_merged=0,
|
|
218
|
+
duration_ms=0,
|
|
219
|
+
stages=[],
|
|
220
|
+
skipped=True,
|
|
221
|
+
)
|
|
222
|
+
try:
|
|
223
|
+
return await self._run_pipeline(scope)
|
|
224
|
+
finally:
|
|
225
|
+
await lock.release()
|
|
226
|
+
|
|
227
|
+
# ── Pipeline execution ──────────────────────────────────────────
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
async def _run_pipeline(self, scope: CompactionScope) -> CompactionResult:
|
|
231
|
+
"""Execute each stage sequentially."""
|
|
232
|
+
start_time = time.monotonic()
|
|
233
|
+
stage_results: List[CompactionStageResult] = []
|
|
234
|
+
total_processed = 0
|
|
235
|
+
total_removed = 0
|
|
236
|
+
total_merged = 0
|
|
237
|
+
|
|
238
|
+
for stage in self.STAGES:
|
|
239
|
+
result = await self._execute_stage(stage, scope)
|
|
240
|
+
stage_results.append(result)
|
|
241
|
+
total_processed += result.processed
|
|
242
|
+
total_removed += result.removed
|
|
243
|
+
total_merged += result.merged
|
|
244
|
+
|
|
245
|
+
if result.abort:
|
|
246
|
+
logger.warning("Compaction aborted at stage %s", stage)
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
# Write checkpoint after each completed stage
|
|
250
|
+
self._write_checkpoint(stage, result)
|
|
251
|
+
|
|
252
|
+
elapsed_ms = int((time.monotonic() - start_time) * 1000)
|
|
253
|
+
|
|
254
|
+
return CompactionResult(
|
|
255
|
+
total_processed=total_processed,
|
|
256
|
+
total_removed=total_removed,
|
|
257
|
+
total_merged=total_merged,
|
|
258
|
+
duration_ms=elapsed_ms,
|
|
259
|
+
stages=stage_results,
|
|
260
|
+
skipped=False,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
async def _execute_stage(
|
|
264
|
+
self, stage: str, scope: CompactionScope
|
|
265
|
+
) -> CompactionStageResult:
|
|
266
|
+
"""Dispatch to the correct stage handler."""
|
|
267
|
+
handlers = {
|
|
268
|
+
"extract": self._execute_extract,
|
|
269
|
+
"dedup": self._execute_dedup,
|
|
270
|
+
"summarize": self._execute_summarize,
|
|
271
|
+
"prune": self._execute_prune,
|
|
272
|
+
"archive": self._execute_archive,
|
|
273
|
+
}
|
|
274
|
+
handler = handlers.get(stage)
|
|
275
|
+
if handler is None:
|
|
276
|
+
return CompactionStageResult(
|
|
277
|
+
stage=stage, processed=0, removed=0, merged=0, duration_ms=0
|
|
278
|
+
)
|
|
279
|
+
return await handler(scope)
|
|
280
|
+
|
|
281
|
+
# ── Stage: Extract ──────────────────────────────────────────────
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
async def _execute_extract(
|
|
285
|
+
self, scope: CompactionScope
|
|
286
|
+
) -> CompactionStageResult:
|
|
287
|
+
"""Extract atomic facts from stored memories.
|
|
288
|
+
|
|
289
|
+
In the current implementation this is a no-op pass-through that
|
|
290
|
+
counts the total number of functions. Full extraction logic is
|
|
291
|
+
wired by the application layer via LLM providers.
|
|
292
|
+
"""
|
|
293
|
+
t0 = time.monotonic()
|
|
294
|
+
functions = self._store.list_functions(limit=100000)
|
|
295
|
+
elapsed = int((time.monotonic() - t0) * 1000)
|
|
296
|
+
return CompactionStageResult(
|
|
297
|
+
stage="extract",
|
|
298
|
+
processed=len(functions),
|
|
299
|
+
removed=0,
|
|
300
|
+
merged=0,
|
|
301
|
+
duration_ms=elapsed,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# ── Stage: Dedup ────────────────────────────────────────────────
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
async def _execute_dedup(
|
|
308
|
+
self, scope: CompactionScope
|
|
309
|
+
) -> CompactionStageResult:
|
|
310
|
+
"""Dedup stage: remove exact and semantic duplicates."""
|
|
311
|
+
t0 = time.monotonic()
|
|
312
|
+
functions = self._store.list_functions(limit=100000)
|
|
313
|
+
memories: List[Memory] = list(functions)
|
|
314
|
+
|
|
315
|
+
threshold = self._config.compaction.dedup_threshold
|
|
316
|
+
deduplicator = MemoryDeduplicator(
|
|
317
|
+
self._embedding, threshold=threshold
|
|
318
|
+
)
|
|
319
|
+
result = deduplicator.deduplicate(memories)
|
|
320
|
+
elapsed = int((time.monotonic() - t0) * 1000)
|
|
321
|
+
|
|
322
|
+
return CompactionStageResult(
|
|
323
|
+
stage="dedup",
|
|
324
|
+
processed=result.original_count,
|
|
325
|
+
removed=result.exact_removed + result.semantic_removed,
|
|
326
|
+
merged=result.semantic_removed,
|
|
327
|
+
duration_ms=elapsed,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# ── Stage: Summarize ────────────────────────────────────────────
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
async def _execute_summarize(
|
|
334
|
+
self, scope: CompactionScope
|
|
335
|
+
) -> CompactionStageResult:
|
|
336
|
+
"""Summarize stage: generate summaries and trim oversized FieldValue lists.
|
|
337
|
+
|
|
338
|
+
When a role field exceeds ``max_values_per_field`` (default 20):
|
|
339
|
+
- Sort by ``weight * observation`` descending
|
|
340
|
+
- Mark low-score entries as ``status="deprecated"`` for later Prune
|
|
341
|
+
"""
|
|
342
|
+
t0 = time.monotonic()
|
|
343
|
+
functions = self._store.list_functions(limit=100000)
|
|
344
|
+
max_values = self._config.compaction.field_max_values
|
|
345
|
+
processed = 0
|
|
346
|
+
trimmed = 0
|
|
347
|
+
|
|
348
|
+
for func in functions:
|
|
349
|
+
trimmed_this = False
|
|
350
|
+
for role in ("trigger", "condition", "action", "benefit"):
|
|
351
|
+
values: List[FieldValue] = getattr(func, role, [])
|
|
352
|
+
if len(values) <= max_values:
|
|
353
|
+
continue
|
|
354
|
+
# Sort by weight * observation composite score
|
|
355
|
+
def _score(fv: FieldValue) -> float:
|
|
356
|
+
return fv.weight * (fv.observation if fv.observation is not None else 1.0)
|
|
357
|
+
|
|
358
|
+
values.sort(key=_score, reverse=True)
|
|
359
|
+
for fv in values[max_values:]:
|
|
360
|
+
if fv.status != "deprecated":
|
|
361
|
+
fv.status = "deprecated"
|
|
362
|
+
trimmed += 1
|
|
363
|
+
trimmed_this = True
|
|
364
|
+
if trimmed_this:
|
|
365
|
+
processed += 1
|
|
366
|
+
|
|
367
|
+
elapsed = int((time.monotonic() - t0) * 1000)
|
|
368
|
+
return CompactionStageResult(
|
|
369
|
+
stage="summarize",
|
|
370
|
+
processed=processed,
|
|
371
|
+
removed=0,
|
|
372
|
+
merged=0,
|
|
373
|
+
duration_ms=elapsed,
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# ── Stage: Prune ────────────────────────────────────────────────
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
async def _execute_prune(
|
|
380
|
+
self, scope: CompactionScope
|
|
381
|
+
) -> CompactionStageResult:
|
|
382
|
+
"""Prune stage: clean low-confidence, stale, and deprecated entries.
|
|
383
|
+
|
|
384
|
+
Removal criteria (all thresholds from config):
|
|
385
|
+
- ``confidence < prune_confidence_threshold``
|
|
386
|
+
- Age > ``prune_max_age_days`` AND ``access_count < prune_min_access_count``
|
|
387
|
+
- ``needs_review=True`` AND ``needs_review_until`` has expired
|
|
388
|
+
- FieldValue entries with ``status="deprecated"``
|
|
389
|
+
"""
|
|
390
|
+
t0 = time.monotonic()
|
|
391
|
+
functions = self._store.list_functions(limit=100000)
|
|
392
|
+
conf_thresh = self._config.compaction.prune_confidence_threshold
|
|
393
|
+
max_age_days = self._config.compaction.prune_max_age_days
|
|
394
|
+
min_access = self._config.compaction.prune_min_access_count
|
|
395
|
+
review_ttl = self._config.compaction.needs_review_ttl_days
|
|
396
|
+
|
|
397
|
+
removed = 0
|
|
398
|
+
processed = len(functions)
|
|
399
|
+
now = datetime.now()
|
|
400
|
+
|
|
401
|
+
for func in functions:
|
|
402
|
+
should_delete = False
|
|
403
|
+
|
|
404
|
+
# Low confidence
|
|
405
|
+
if func.confidence < conf_thresh:
|
|
406
|
+
should_delete = True
|
|
407
|
+
|
|
408
|
+
# Stale and rarely accessed
|
|
409
|
+
if not should_delete:
|
|
410
|
+
updated = func.updated_at
|
|
411
|
+
if isinstance(updated, str):
|
|
412
|
+
try:
|
|
413
|
+
updated = datetime.fromisoformat(updated)
|
|
414
|
+
except (ValueError, TypeError):
|
|
415
|
+
updated = None
|
|
416
|
+
if updated is not None:
|
|
417
|
+
age_days = (now - updated).days
|
|
418
|
+
if age_days > max_age_days and func.access_count < min_access:
|
|
419
|
+
should_delete = True
|
|
420
|
+
|
|
421
|
+
# Expired needs_review
|
|
422
|
+
if not should_delete and func.needs_review:
|
|
423
|
+
review_until = func.needs_review_until
|
|
424
|
+
if isinstance(review_until, str):
|
|
425
|
+
try:
|
|
426
|
+
review_until = datetime.fromisoformat(review_until)
|
|
427
|
+
except (ValueError, TypeError):
|
|
428
|
+
review_until = None
|
|
429
|
+
if review_until is not None and now > review_until:
|
|
430
|
+
should_delete = True
|
|
431
|
+
elif review_until is None:
|
|
432
|
+
# No expiry set -- use TTL from creation
|
|
433
|
+
created = func.created_at
|
|
434
|
+
if isinstance(created, str):
|
|
435
|
+
try:
|
|
436
|
+
created = datetime.fromisoformat(created)
|
|
437
|
+
except (ValueError, TypeError):
|
|
438
|
+
created = None
|
|
439
|
+
if created is not None and (now - created).days > review_ttl:
|
|
440
|
+
should_delete = True
|
|
441
|
+
|
|
442
|
+
# Prune deprecated FieldValue entries (not the whole Function)
|
|
443
|
+
if not should_delete:
|
|
444
|
+
for role in ("trigger", "condition", "action", "benefit"):
|
|
445
|
+
values: List[FieldValue] = getattr(func, role, [])
|
|
446
|
+
before = len(values)
|
|
447
|
+
kept = [fv for fv in values if fv.status != "deprecated"]
|
|
448
|
+
if len(kept) < before:
|
|
449
|
+
setattr(func, role, kept)
|
|
450
|
+
removed += before - len(kept)
|
|
451
|
+
|
|
452
|
+
if should_delete:
|
|
453
|
+
self._store.delete(func.id)
|
|
454
|
+
removed += 1
|
|
455
|
+
|
|
456
|
+
elapsed = int((time.monotonic() - t0) * 1000)
|
|
457
|
+
return CompactionStageResult(
|
|
458
|
+
stage="prune",
|
|
459
|
+
processed=processed,
|
|
460
|
+
removed=removed,
|
|
461
|
+
merged=0,
|
|
462
|
+
duration_ms=elapsed,
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# ── Stage: Archive ──────────────────────────────────────────────
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
async def _execute_archive(
|
|
469
|
+
self, scope: CompactionScope
|
|
470
|
+
) -> CompactionStageResult:
|
|
471
|
+
"""Archive stage: move low-frequency memories to cold storage.
|
|
472
|
+
|
|
473
|
+
Archive directory: ``~/.memnex/archive/``.
|
|
474
|
+
Memories with very low access count and age beyond the max age
|
|
475
|
+
threshold are serialised to JSON files and then soft-deleted.
|
|
476
|
+
"""
|
|
477
|
+
t0 = time.monotonic()
|
|
478
|
+
archive_dir = Path.home() / ".memnex" / "archive"
|
|
479
|
+
archive_dir.mkdir(parents=True, exist_ok=True)
|
|
480
|
+
|
|
481
|
+
functions = self._store.list_functions(limit=100000)
|
|
482
|
+
max_age_days = self._config.compaction.prune_max_age_days
|
|
483
|
+
now = datetime.now()
|
|
484
|
+
archived = 0
|
|
485
|
+
|
|
486
|
+
for func in functions:
|
|
487
|
+
# Only archive very old, very rarely accessed memories
|
|
488
|
+
updated = func.updated_at
|
|
489
|
+
if isinstance(updated, str):
|
|
490
|
+
try:
|
|
491
|
+
updated = datetime.fromisoformat(updated)
|
|
492
|
+
except (ValueError, TypeError):
|
|
493
|
+
updated = None
|
|
494
|
+
if updated is None:
|
|
495
|
+
continue
|
|
496
|
+
|
|
497
|
+
age_days = (now - updated).days
|
|
498
|
+
if age_days > max_age_days and func.access_count == 0:
|
|
499
|
+
# Write to archive
|
|
500
|
+
archive_file = archive_dir / f"{func.id}.json"
|
|
501
|
+
try:
|
|
502
|
+
import json
|
|
503
|
+
from memnex.worker import _json_serializer
|
|
504
|
+
with open(archive_file, "w", encoding="utf-8") as fh:
|
|
505
|
+
json.dump(
|
|
506
|
+
{
|
|
507
|
+
"id": func.id,
|
|
508
|
+
"name": func.name,
|
|
509
|
+
"domain": func.domain,
|
|
510
|
+
"archived_at": now.isoformat(),
|
|
511
|
+
"original_updated_at": str(updated),
|
|
512
|
+
},
|
|
513
|
+
fh,
|
|
514
|
+
default=_json_serializer,
|
|
515
|
+
indent=2,
|
|
516
|
+
)
|
|
517
|
+
self._store.delete(func.id)
|
|
518
|
+
archived += 1
|
|
519
|
+
except Exception as exc:
|
|
520
|
+
logger.warning(
|
|
521
|
+
"Failed to archive %s: %s", func.id, exc
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
elapsed = int((time.monotonic() - t0) * 1000)
|
|
525
|
+
return CompactionStageResult(
|
|
526
|
+
stage="archive",
|
|
527
|
+
processed=len(functions),
|
|
528
|
+
removed=archived,
|
|
529
|
+
merged=0,
|
|
530
|
+
duration_ms=elapsed,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# ── Checkpoint ──────────────────────────────────────────────────
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _write_checkpoint(
|
|
537
|
+
self, stage: str, result: CompactionStageResult
|
|
538
|
+
) -> None:
|
|
539
|
+
"""Write a checkpoint after each stage for crash-recovery."""
|
|
540
|
+
checkpoint_dir = Path.home() / ".memnex" / "checkpoints"
|
|
541
|
+
checkpoint_dir.mkdir(parents=True, exist_ok=True)
|
|
542
|
+
cp = _Checkpoint(
|
|
543
|
+
stage_name=stage,
|
|
544
|
+
processed_offset=result.processed,
|
|
545
|
+
processed_ids=[],
|
|
546
|
+
timestamp=datetime.now().isoformat(),
|
|
547
|
+
)
|
|
548
|
+
cp_file = checkpoint_dir / "latest.json"
|
|
549
|
+
try:
|
|
550
|
+
import json
|
|
551
|
+
with open(cp_file, "w", encoding="utf-8") as fh:
|
|
552
|
+
json.dump(
|
|
553
|
+
{
|
|
554
|
+
"stage_name": cp.stage_name,
|
|
555
|
+
"processed_offset": cp.processed_offset,
|
|
556
|
+
"processed_ids": cp.processed_ids,
|
|
557
|
+
"timestamp": cp.timestamp,
|
|
558
|
+
},
|
|
559
|
+
fh,
|
|
560
|
+
indent=2,
|
|
561
|
+
)
|
|
562
|
+
except OSError as exc:
|
|
563
|
+
logger.warning("Failed to write checkpoint: %s", exc)
|