agmem 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/METADATA +157 -16
- agmem-0.1.3.dist-info/RECORD +105 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +45 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +83 -76
- memvcs/commands/audit.py +59 -0
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +11 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +109 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +90 -0
- memvcs/commands/federated.py +53 -0
- memvcs/commands/fsck.py +86 -61
- memvcs/commands/garden.py +40 -35
- memvcs/commands/gc.py +51 -0
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +69 -27
- memvcs/commands/pack.py +129 -0
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +31 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +110 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/crypto_verify.py +280 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +86 -0
- memvcs/core/gardener.py +176 -145
- memvcs/core/hooks.py +48 -14
- memvcs/core/ipfs_remote.py +39 -0
- memvcs/core/knowledge_graph.py +135 -138
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +260 -170
- memvcs/core/objects.py +110 -101
- memvcs/core/pack.py +92 -0
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/refs.py +132 -115
- memvcs/core/remote.py +38 -0
- memvcs/core/repository.py +254 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +121 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +56 -36
- memvcs/core/zk_proofs.py +26 -0
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/top_level.txt +0 -0
memvcs/core/vector_store.py
CHANGED
|
@@ -6,11 +6,11 @@ Requires: pip install agmem[vector]
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
|
+
import struct
|
|
10
|
+
from pathlib import Path
|
|
9
11
|
from typing import List, Optional, Tuple
|
|
10
12
|
|
|
11
13
|
from .constants import MEMORY_TYPES
|
|
12
|
-
import struct
|
|
13
|
-
from pathlib import Path
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger("agmem.vector_store")
|
|
16
16
|
|
|
@@ -56,6 +56,19 @@ class VectorStore:
|
|
|
56
56
|
"On macOS, try: brew install python (for Homebrew SQLite)"
|
|
57
57
|
) from e
|
|
58
58
|
|
|
59
|
+
def _device(self) -> str:
|
|
60
|
+
"""Return device for embeddings: cuda/mps/cpu. GPU acceleration when available."""
|
|
61
|
+
try:
|
|
62
|
+
import torch
|
|
63
|
+
|
|
64
|
+
if torch.cuda.is_available():
|
|
65
|
+
return "cuda"
|
|
66
|
+
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
67
|
+
return "mps"
|
|
68
|
+
except ImportError:
|
|
69
|
+
pass
|
|
70
|
+
return "cpu"
|
|
71
|
+
|
|
59
72
|
def _get_model(self):
|
|
60
73
|
"""Lazy-load the sentence-transformers model."""
|
|
61
74
|
if self._model is not None:
|
|
@@ -64,7 +77,8 @@ class VectorStore:
|
|
|
64
77
|
try:
|
|
65
78
|
from sentence_transformers import SentenceTransformer
|
|
66
79
|
|
|
67
|
-
|
|
80
|
+
device = self._device()
|
|
81
|
+
self._model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
|
|
68
82
|
return self._model
|
|
69
83
|
except ImportError as e:
|
|
70
84
|
raise ImportError(
|
|
@@ -75,7 +89,8 @@ class VectorStore:
|
|
|
75
89
|
def _ensure_tables(self):
|
|
76
90
|
"""Create vector and metadata tables if they don't exist."""
|
|
77
91
|
conn = self._get_connection()
|
|
78
|
-
conn.execute(
|
|
92
|
+
conn.execute(
|
|
93
|
+
"""
|
|
79
94
|
CREATE TABLE IF NOT EXISTS memory_meta (
|
|
80
95
|
rowid INTEGER PRIMARY KEY,
|
|
81
96
|
path TEXT NOT NULL,
|
|
@@ -85,18 +100,21 @@ class VectorStore:
|
|
|
85
100
|
author TEXT,
|
|
86
101
|
indexed_at TEXT
|
|
87
102
|
)
|
|
88
|
-
"""
|
|
103
|
+
"""
|
|
104
|
+
)
|
|
89
105
|
# Try to add new columns to existing tables (for upgrades)
|
|
90
|
-
for col in [
|
|
106
|
+
for col in ["commit_hash TEXT", "author TEXT", "indexed_at TEXT"]:
|
|
91
107
|
try:
|
|
92
108
|
conn.execute(f"ALTER TABLE memory_meta ADD COLUMN {col}")
|
|
93
109
|
except Exception:
|
|
94
110
|
pass # Column already exists
|
|
95
111
|
try:
|
|
96
|
-
conn.execute(
|
|
112
|
+
conn.execute(
|
|
113
|
+
f"""
|
|
97
114
|
CREATE VIRTUAL TABLE IF NOT EXISTS vec_memory
|
|
98
115
|
USING vec0(embedding float[{EMBEDDING_DIM}])
|
|
99
|
-
"""
|
|
116
|
+
"""
|
|
117
|
+
)
|
|
100
118
|
except Exception as e:
|
|
101
119
|
# vec0 might already exist with different schema
|
|
102
120
|
logger.debug("vec_memory creation: %s", e)
|
|
@@ -114,11 +132,11 @@ class VectorStore:
|
|
|
114
132
|
content: str,
|
|
115
133
|
blob_hash: Optional[str] = None,
|
|
116
134
|
commit_hash: Optional[str] = None,
|
|
117
|
-
author: Optional[str] = None
|
|
135
|
+
author: Optional[str] = None,
|
|
118
136
|
) -> None:
|
|
119
137
|
"""
|
|
120
138
|
Index a memory file for semantic search.
|
|
121
|
-
|
|
139
|
+
|
|
122
140
|
Args:
|
|
123
141
|
path: File path relative to current/
|
|
124
142
|
content: File content to index
|
|
@@ -127,13 +145,13 @@ class VectorStore:
|
|
|
127
145
|
author: Optional author string for provenance tracking
|
|
128
146
|
"""
|
|
129
147
|
from datetime import datetime
|
|
130
|
-
|
|
148
|
+
|
|
131
149
|
self._ensure_tables()
|
|
132
150
|
conn = self._get_connection()
|
|
133
151
|
|
|
134
152
|
embedding = self._embed(content)
|
|
135
153
|
emb_bytes = _serialize_f32(embedding)
|
|
136
|
-
indexed_at = datetime.utcnow().isoformat() +
|
|
154
|
+
indexed_at = datetime.utcnow().isoformat() + "Z"
|
|
137
155
|
|
|
138
156
|
with conn:
|
|
139
157
|
conn.execute(
|
|
@@ -203,13 +221,13 @@ class VectorStore:
|
|
|
203
221
|
results.append((path, snippet, float(distance)))
|
|
204
222
|
|
|
205
223
|
return results
|
|
206
|
-
|
|
224
|
+
|
|
207
225
|
def search_with_provenance(
|
|
208
226
|
self, query: str, limit: int = 10, min_score: Optional[float] = None
|
|
209
227
|
) -> List[dict]:
|
|
210
228
|
"""
|
|
211
229
|
Semantic search with provenance metadata.
|
|
212
|
-
|
|
230
|
+
|
|
213
231
|
Returns list of dicts with: path, content, distance, commit_hash, author, indexed_at
|
|
214
232
|
"""
|
|
215
233
|
self._ensure_tables()
|
|
@@ -235,51 +253,53 @@ class VectorStore:
|
|
|
235
253
|
if min_score is not None and distance > min_score:
|
|
236
254
|
continue
|
|
237
255
|
snippet = content[:500] + ("..." if len(content) > 500 else "")
|
|
238
|
-
results.append(
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
256
|
+
results.append(
|
|
257
|
+
{
|
|
258
|
+
"path": path,
|
|
259
|
+
"content": snippet,
|
|
260
|
+
"distance": float(distance),
|
|
261
|
+
"similarity": 1.0 - float(distance), # Convert to similarity score
|
|
262
|
+
"commit_hash": commit_hash,
|
|
263
|
+
"author": author,
|
|
264
|
+
"indexed_at": indexed_at,
|
|
265
|
+
"blob_hash": blob_hash,
|
|
266
|
+
}
|
|
267
|
+
)
|
|
248
268
|
|
|
249
269
|
return results
|
|
250
|
-
|
|
270
|
+
|
|
251
271
|
def get_all_entries(self) -> List[dict]:
|
|
252
272
|
"""
|
|
253
273
|
Get all indexed entries with their metadata.
|
|
254
|
-
|
|
274
|
+
|
|
255
275
|
Used for fsck operations to check for dangling vectors.
|
|
256
276
|
"""
|
|
257
277
|
self._ensure_tables()
|
|
258
278
|
conn = self._get_connection()
|
|
259
|
-
|
|
279
|
+
|
|
260
280
|
rows = conn.execute(
|
|
261
281
|
"""
|
|
262
282
|
SELECT rowid, path, blob_hash, commit_hash, author, indexed_at
|
|
263
283
|
FROM memory_meta
|
|
264
284
|
"""
|
|
265
285
|
).fetchall()
|
|
266
|
-
|
|
286
|
+
|
|
267
287
|
return [
|
|
268
288
|
{
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
289
|
+
"rowid": rowid,
|
|
290
|
+
"path": path,
|
|
291
|
+
"blob_hash": blob_hash,
|
|
292
|
+
"commit_hash": commit_hash,
|
|
293
|
+
"author": author,
|
|
294
|
+
"indexed_at": indexed_at,
|
|
275
295
|
}
|
|
276
296
|
for rowid, path, blob_hash, commit_hash, author, indexed_at in rows
|
|
277
297
|
]
|
|
278
|
-
|
|
298
|
+
|
|
279
299
|
def delete_entry(self, rowid: int) -> bool:
|
|
280
300
|
"""
|
|
281
301
|
Delete an entry by rowid.
|
|
282
|
-
|
|
302
|
+
|
|
283
303
|
Used by fsck to remove dangling vectors.
|
|
284
304
|
"""
|
|
285
305
|
conn = self._get_connection()
|
memvcs/core/zk_proofs.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Zero-knowledge proof system for agmem (stub).
|
|
3
|
+
|
|
4
|
+
Planned: zk-SNARKs (Groth16) for keyword containment, memory freshness, competence verification.
|
|
5
|
+
Requires optional zk extra (circuit lib, proving system). Trusted setup: public ceremony or small multi-party.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional, Tuple
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def prove_keyword_containment(memory_path: Path, keyword: str, output_proof_path: Path) -> bool:
|
|
13
|
+
"""Prove memory file contains keyword without revealing content. Stub: returns False until zk backend added."""
|
|
14
|
+
return False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def prove_memory_freshness(
|
|
18
|
+
memory_path: Path, after_timestamp: str, output_proof_path: Path
|
|
19
|
+
) -> bool:
|
|
20
|
+
"""Prove memory was updated after date without revealing content. Stub: returns False until zk backend added."""
|
|
21
|
+
return False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def verify_proof(proof_path: Path, statement_type: str, **kwargs) -> bool:
|
|
25
|
+
"""Verify a zk proof. Stub: returns False until zk backend added."""
|
|
26
|
+
return False
|
|
@@ -219,9 +219,7 @@ def _create_mcp_server():
|
|
|
219
219
|
fp = Path(root) / f
|
|
220
220
|
rel = str(fp.relative_to(repo.current_dir))
|
|
221
221
|
working_files[rel] = fp.read_bytes()
|
|
222
|
-
tree_diff = engine.diff_working_dir(
|
|
223
|
-
head_commit.store(repo.object_store), working_files
|
|
224
|
-
)
|
|
222
|
+
tree_diff = engine.diff_working_dir(head_commit.store(repo.object_store), working_files)
|
|
225
223
|
return engine.format_diff(tree_diff, "HEAD", "working")
|
|
226
224
|
else:
|
|
227
225
|
base_ref = base or "HEAD~1"
|
|
@@ -50,7 +50,9 @@ def create_app(repo_path: Path) -> FastAPI:
|
|
|
50
50
|
if not repo.is_valid_repo():
|
|
51
51
|
raise HTTPException(status_code=400, detail="Not an agmem repository")
|
|
52
52
|
|
|
53
|
-
resolved = repo.resolve_ref(commit_hash) or (
|
|
53
|
+
resolved = repo.resolve_ref(commit_hash) or (
|
|
54
|
+
commit_hash if _valid_commit_hash(commit_hash) else None
|
|
55
|
+
)
|
|
54
56
|
if not resolved:
|
|
55
57
|
raise HTTPException(status_code=400, detail="Invalid revision or hash")
|
|
56
58
|
c = Commit.load(repo.object_store, resolved)
|
|
@@ -87,13 +89,15 @@ def create_app(repo_path: Path) -> FastAPI:
|
|
|
87
89
|
tree_diff = engine.diff_commits(c1, c2)
|
|
88
90
|
files = []
|
|
89
91
|
for fd in tree_diff.files:
|
|
90
|
-
files.append(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
92
|
+
files.append(
|
|
93
|
+
{
|
|
94
|
+
"path": fd.path,
|
|
95
|
+
"diff_type": fd.diff_type.value,
|
|
96
|
+
"old_hash": fd.old_hash,
|
|
97
|
+
"new_hash": fd.new_hash,
|
|
98
|
+
"diff_lines": fd.diff_lines,
|
|
99
|
+
}
|
|
100
|
+
)
|
|
97
101
|
return {
|
|
98
102
|
"base": base,
|
|
99
103
|
"head": head,
|
|
@@ -138,37 +142,32 @@ def create_app(repo_path: Path) -> FastAPI:
|
|
|
138
142
|
if include_similarity:
|
|
139
143
|
try:
|
|
140
144
|
from memvcs.core.vector_store import VectorStore
|
|
141
|
-
|
|
145
|
+
|
|
146
|
+
vector_store = VectorStore(_repo_path / ".mem")
|
|
142
147
|
except ImportError:
|
|
143
148
|
pass
|
|
144
149
|
|
|
145
150
|
builder = KnowledgeGraphBuilder(repo, vector_store)
|
|
146
151
|
graph_data = builder.build_graph(
|
|
147
|
-
include_similarity=include_similarity,
|
|
148
|
-
similarity_threshold=threshold
|
|
152
|
+
include_similarity=include_similarity, similarity_threshold=threshold
|
|
149
153
|
)
|
|
150
154
|
|
|
151
155
|
# Return D3-compatible format
|
|
152
156
|
return {
|
|
153
|
-
|
|
157
|
+
"nodes": [
|
|
154
158
|
{
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
+
"id": n.id,
|
|
160
|
+
"name": n.label,
|
|
161
|
+
"group": n.memory_type,
|
|
162
|
+
"size": min(20, max(5, n.size // 100)),
|
|
159
163
|
}
|
|
160
164
|
for n in graph_data.nodes
|
|
161
165
|
],
|
|
162
|
-
|
|
163
|
-
{
|
|
164
|
-
'source': e.source,
|
|
165
|
-
'target': e.target,
|
|
166
|
-
'type': e.edge_type,
|
|
167
|
-
'value': e.weight
|
|
168
|
-
}
|
|
166
|
+
"links": [
|
|
167
|
+
{"source": e.source, "target": e.target, "type": e.edge_type, "value": e.weight}
|
|
169
168
|
for e in graph_data.edges
|
|
170
169
|
],
|
|
171
|
-
|
|
170
|
+
"metadata": graph_data.metadata,
|
|
172
171
|
}
|
|
173
172
|
|
|
174
173
|
@app.get("/graph", response_class=HTMLResponse)
|
|
@@ -185,7 +184,7 @@ def create_app(repo_path: Path) -> FastAPI:
|
|
|
185
184
|
|
|
186
185
|
|
|
187
186
|
# Embedded graph viewer template
|
|
188
|
-
GRAPH_HTML_TEMPLATE =
|
|
187
|
+
GRAPH_HTML_TEMPLATE = """<!DOCTYPE html>
|
|
189
188
|
<html>
|
|
190
189
|
<head>
|
|
191
190
|
<title>agmem Knowledge Graph</title>
|
|
@@ -349,4 +348,4 @@ GRAPH_HTML_TEMPLATE = '''<!DOCTYPE html>
|
|
|
349
348
|
</script>
|
|
350
349
|
</body>
|
|
351
350
|
</html>
|
|
352
|
-
|
|
351
|
+
"""
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Retrieval module for agmem - context-aware recall with pluggable strategies.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .base import RetrievalStrategy, RecallResult
|
|
6
|
+
from .strategies import (
|
|
7
|
+
RecencyStrategy,
|
|
8
|
+
ImportanceStrategy,
|
|
9
|
+
SimilarityStrategy,
|
|
10
|
+
HybridStrategy,
|
|
11
|
+
)
|
|
12
|
+
from .recaller import RecallEngine
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"RetrievalStrategy",
|
|
16
|
+
"RecallResult",
|
|
17
|
+
"RecencyStrategy",
|
|
18
|
+
"ImportanceStrategy",
|
|
19
|
+
"SimilarityStrategy",
|
|
20
|
+
"HybridStrategy",
|
|
21
|
+
"RecallEngine",
|
|
22
|
+
]
|
memvcs/retrieval/base.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base retrieval interfaces for agmem recall.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import List, Any, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class RecallResult:
|
|
12
|
+
"""Single recalled memory with metadata."""
|
|
13
|
+
|
|
14
|
+
path: str
|
|
15
|
+
content: str
|
|
16
|
+
relevance_score: float
|
|
17
|
+
source: dict # commit_hash, author, indexed_at, etc.
|
|
18
|
+
importance: Optional[float] = None
|
|
19
|
+
|
|
20
|
+
def to_dict(self) -> dict:
|
|
21
|
+
"""Convert to JSON-serializable dict."""
|
|
22
|
+
return {
|
|
23
|
+
"path": self.path,
|
|
24
|
+
"content": self.content,
|
|
25
|
+
"relevance_score": self.relevance_score,
|
|
26
|
+
"source": self.source,
|
|
27
|
+
"importance": self.importance,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class RetrievalStrategy(ABC):
|
|
32
|
+
"""Abstract base for recall strategies."""
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def recall(
|
|
36
|
+
self,
|
|
37
|
+
context: str,
|
|
38
|
+
limit: int,
|
|
39
|
+
exclude: List[str],
|
|
40
|
+
**kwargs: Any,
|
|
41
|
+
) -> List[RecallResult]:
|
|
42
|
+
"""
|
|
43
|
+
Retrieve and rank memories for the given context.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
context: Current task description
|
|
47
|
+
limit: Max results to return
|
|
48
|
+
exclude: Tag/branch patterns to exclude (e.g., "experiment/*")
|
|
49
|
+
**kwargs: Strategy-specific options
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Ranked list of RecallResult
|
|
53
|
+
"""
|
|
54
|
+
pass
|
memvcs/retrieval/pack.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pack engine - context window budget manager for agmem.
|
|
3
|
+
|
|
4
|
+
Fills token budget with most relevant memories, optionally with summarization cascade.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional, Any
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
from .base import RecallResult
|
|
11
|
+
from .recaller import RecallEngine
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class PackResult:
|
|
16
|
+
"""Result of packing memories into budget."""
|
|
17
|
+
|
|
18
|
+
content: str
|
|
19
|
+
total_tokens: int
|
|
20
|
+
budget: int
|
|
21
|
+
items_used: int
|
|
22
|
+
items_total: int
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class PackEngine:
|
|
26
|
+
"""Packs recalled memories into a token budget."""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
recall_engine: RecallEngine,
|
|
31
|
+
model: str = "gpt-4o-mini",
|
|
32
|
+
summarization_cascade: bool = False,
|
|
33
|
+
):
|
|
34
|
+
self.recall_engine = recall_engine
|
|
35
|
+
self.model = model
|
|
36
|
+
self.summarization_cascade = summarization_cascade
|
|
37
|
+
|
|
38
|
+
def _count_tokens(self, text: str) -> int:
|
|
39
|
+
"""Count tokens using tiktoken."""
|
|
40
|
+
try:
|
|
41
|
+
import tiktoken
|
|
42
|
+
|
|
43
|
+
enc = tiktoken.encoding_for_model(self.model)
|
|
44
|
+
return len(enc.encode(text))
|
|
45
|
+
except ImportError:
|
|
46
|
+
# Fallback: ~4 chars per token
|
|
47
|
+
return len(text) // 4
|
|
48
|
+
except Exception:
|
|
49
|
+
return len(text) // 4
|
|
50
|
+
|
|
51
|
+
def pack(
|
|
52
|
+
self,
|
|
53
|
+
context: str,
|
|
54
|
+
budget: int = 4000,
|
|
55
|
+
strategy: str = "relevance",
|
|
56
|
+
exclude: Optional[List[str]] = None,
|
|
57
|
+
) -> PackResult:
|
|
58
|
+
"""
|
|
59
|
+
Pack memories into token budget.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
context: Current task description for recall
|
|
63
|
+
budget: Max tokens to use
|
|
64
|
+
strategy: recall strategy (relevance=hybrid, recency, importance)
|
|
65
|
+
exclude: Path patterns to exclude
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
PackResult with packed content and metadata
|
|
69
|
+
"""
|
|
70
|
+
exclude = exclude or []
|
|
71
|
+
recall_strategy = "hybrid" if strategy == "relevance" else strategy
|
|
72
|
+
if recall_strategy not in ("hybrid", "recency", "importance", "similarity"):
|
|
73
|
+
recall_strategy = "hybrid"
|
|
74
|
+
|
|
75
|
+
results = self.recall_engine.recall(
|
|
76
|
+
context=context,
|
|
77
|
+
limit=50, # Get more candidates
|
|
78
|
+
strategy=recall_strategy,
|
|
79
|
+
exclude=exclude,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Sort by relevance (already sorted by recall)
|
|
83
|
+
# Add tokens, fill greedily
|
|
84
|
+
total_tokens = 0
|
|
85
|
+
packed_items: List[RecallResult] = []
|
|
86
|
+
separator = "\n\n---\n\n"
|
|
87
|
+
header = f"# Context for: {context}\n\n" if context else ""
|
|
88
|
+
header_tokens = self._count_tokens(header)
|
|
89
|
+
budget -= header_tokens
|
|
90
|
+
|
|
91
|
+
for r in results:
|
|
92
|
+
item_text = f"## {r.path}\n{r.content}"
|
|
93
|
+
item_tokens = self._count_tokens(item_text)
|
|
94
|
+
if total_tokens + item_tokens <= budget:
|
|
95
|
+
packed_items.append(r)
|
|
96
|
+
total_tokens += item_tokens
|
|
97
|
+
else:
|
|
98
|
+
# Try truncated
|
|
99
|
+
if total_tokens < budget and item_tokens > 0:
|
|
100
|
+
ratio = (budget - total_tokens) / item_tokens
|
|
101
|
+
if ratio > 0.2: # At least 20% of item
|
|
102
|
+
trunc_len = int(len(item_text) * ratio)
|
|
103
|
+
truncated = item_text[:trunc_len] + "\n..."
|
|
104
|
+
pack_tokens = self._count_tokens(truncated)
|
|
105
|
+
if total_tokens + pack_tokens <= budget:
|
|
106
|
+
r_trunc = RecallResult(
|
|
107
|
+
path=r.path,
|
|
108
|
+
content=r.content[: int(len(r.content) * ratio)] + "...",
|
|
109
|
+
relevance_score=r.relevance_score,
|
|
110
|
+
source=r.source,
|
|
111
|
+
importance=r.importance,
|
|
112
|
+
)
|
|
113
|
+
packed_items.append(r_trunc)
|
|
114
|
+
total_tokens += pack_tokens
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
content_parts = [r.content for r in packed_items]
|
|
118
|
+
body = separator.join(content_parts)
|
|
119
|
+
full_content = header + body
|
|
120
|
+
total_tokens = header_tokens + self._count_tokens(body)
|
|
121
|
+
|
|
122
|
+
return PackResult(
|
|
123
|
+
content=full_content,
|
|
124
|
+
total_tokens=total_tokens,
|
|
125
|
+
budget=budget + header_tokens,
|
|
126
|
+
items_used=len(packed_items),
|
|
127
|
+
items_total=len(results),
|
|
128
|
+
)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Recall engine - orchestrates strategies and access tracking.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional, Any
|
|
6
|
+
|
|
7
|
+
from .base import RetrievalStrategy, RecallResult
|
|
8
|
+
from .strategies import RecencyStrategy, ImportanceStrategy, SimilarityStrategy, HybridStrategy
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RecallEngine:
|
|
12
|
+
"""Orchestrates recall with pluggable strategies and access tracking."""
|
|
13
|
+
|
|
14
|
+
STRATEGIES = ["recency", "importance", "similarity", "hybrid"]
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
repo: Any,
|
|
19
|
+
vector_store: Optional[Any] = None,
|
|
20
|
+
access_index: Optional[Any] = None,
|
|
21
|
+
use_cache: bool = True,
|
|
22
|
+
):
|
|
23
|
+
self.repo = repo
|
|
24
|
+
self.vector_store = vector_store
|
|
25
|
+
self.access_index = access_index
|
|
26
|
+
self.use_cache = use_cache
|
|
27
|
+
|
|
28
|
+
def _get_strategy(self, strategy_name: str) -> RetrievalStrategy:
|
|
29
|
+
"""Get strategy instance by name."""
|
|
30
|
+
name = strategy_name.lower()
|
|
31
|
+
if name == "recency":
|
|
32
|
+
return RecencyStrategy(self.repo)
|
|
33
|
+
if name == "importance":
|
|
34
|
+
return ImportanceStrategy(self.repo)
|
|
35
|
+
if name == "similarity":
|
|
36
|
+
if not self.vector_store:
|
|
37
|
+
raise ImportError(
|
|
38
|
+
"Similarity strategy requires agmem[vector]. Install with: pip install agmem[vector]"
|
|
39
|
+
)
|
|
40
|
+
return SimilarityStrategy(self.repo, self.vector_store)
|
|
41
|
+
if name == "hybrid":
|
|
42
|
+
return HybridStrategy(self.repo, self.vector_store)
|
|
43
|
+
raise ValueError(f"Unknown strategy: {strategy_name}. Choose from {self.STRATEGIES}")
|
|
44
|
+
|
|
45
|
+
def recall(
|
|
46
|
+
self,
|
|
47
|
+
context: str,
|
|
48
|
+
limit: int = 10,
|
|
49
|
+
strategy: str = "hybrid",
|
|
50
|
+
exclude: Optional[List[str]] = None,
|
|
51
|
+
) -> List[RecallResult]:
|
|
52
|
+
"""
|
|
53
|
+
Recall memories for the given context.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
context: Current task description
|
|
57
|
+
limit: Max results
|
|
58
|
+
strategy: recency, importance, similarity, or hybrid
|
|
59
|
+
exclude: Tag/path patterns to exclude
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Ranked list of RecallResult
|
|
63
|
+
"""
|
|
64
|
+
exclude_list = [e.strip() for e in (exclude or []) if e.strip()]
|
|
65
|
+
|
|
66
|
+
cached = self._get_cached_results(context, strategy, limit, exclude_list)
|
|
67
|
+
if cached is not None:
|
|
68
|
+
return cached
|
|
69
|
+
|
|
70
|
+
effective_strategy = (
|
|
71
|
+
"recency" if (strategy == "hybrid" and not self.vector_store) else strategy
|
|
72
|
+
)
|
|
73
|
+
strat = self._get_strategy(effective_strategy)
|
|
74
|
+
results = strat.recall(context=context, limit=limit, exclude=exclude_list)
|
|
75
|
+
|
|
76
|
+
self._record_access_and_cache(context, effective_strategy, limit, exclude_list, results)
|
|
77
|
+
return results
|
|
78
|
+
|
|
79
|
+
def _get_cached_results(
|
|
80
|
+
self, context: str, strategy: str, limit: int, exclude: List[str]
|
|
81
|
+
) -> Optional[List[RecallResult]]:
|
|
82
|
+
if not (self.use_cache and self.access_index and context):
|
|
83
|
+
return None
|
|
84
|
+
cached = self.access_index.get_cached_recall(context, strategy, limit, exclude)
|
|
85
|
+
if not cached or not cached.get("results"):
|
|
86
|
+
return None
|
|
87
|
+
return [RecallResult(**r) if isinstance(r, dict) else r for r in cached["results"]]
|
|
88
|
+
|
|
89
|
+
def _record_access_and_cache(
|
|
90
|
+
self,
|
|
91
|
+
context: str,
|
|
92
|
+
strategy: str,
|
|
93
|
+
limit: int,
|
|
94
|
+
exclude: List[str],
|
|
95
|
+
results: List[RecallResult],
|
|
96
|
+
) -> None:
|
|
97
|
+
if self.access_index:
|
|
98
|
+
head = self.repo.get_head_commit()
|
|
99
|
+
commit_hash = head.store(self.repo.object_store) if head else ""
|
|
100
|
+
for r in results:
|
|
101
|
+
self.access_index.record_access(r.path, commit_hash)
|
|
102
|
+
if self.use_cache and self.access_index and context and results:
|
|
103
|
+
self.access_index.set_cached_recall(
|
|
104
|
+
context, strategy, limit, exclude, [r.to_dict() for r in results]
|
|
105
|
+
)
|