memoryagent-lib 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryagent/__init__.py +35 -0
- memoryagent/confidence.py +82 -0
- memoryagent/config.py +35 -0
- memoryagent/consolidation.py +5 -0
- memoryagent/examples/export_memory.py +110 -0
- memoryagent/examples/memory_api_server.py +223 -0
- memoryagent/examples/minimal.py +47 -0
- memoryagent/examples/openai_agent.py +137 -0
- memoryagent/indexers.py +61 -0
- memoryagent/models.py +156 -0
- memoryagent/policy.py +171 -0
- memoryagent/retrieval.py +154 -0
- memoryagent/storage/base.py +86 -0
- memoryagent/storage/in_memory.py +88 -0
- memoryagent/storage/local_disk.py +415 -0
- memoryagent/system.py +182 -0
- memoryagent/utils.py +35 -0
- memoryagent/workers.py +169 -0
- memoryagent_lib-0.1.1.dist-info/METADATA +186 -0
- memoryagent_lib-0.1.1.dist-info/RECORD +22 -0
- memoryagent_lib-0.1.1.dist-info/WHEEL +5 -0
- memoryagent_lib-0.1.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
|
|
7
|
+
from memoryagent.models import MemoryQuery, MemoryType, ScoredMemory, StorageTier
|
|
8
|
+
from memoryagent.storage.base import GraphStore, VectorIndex
|
|
9
|
+
from memoryagent.utils import tokenize
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SimpleVectorIndex(VectorIndex):
|
|
13
|
+
"""Local in-memory lexical index used for local mode."""
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self._tokens: Dict[str, List[str]] = defaultdict(list)
|
|
17
|
+
self._metadata: Dict[str, dict] = {}
|
|
18
|
+
self._texts: Dict[str, str] = {}
|
|
19
|
+
|
|
20
|
+
async def upsert(self, item_id, text: str, metadata: dict) -> None:
|
|
21
|
+
item_id = str(item_id)
|
|
22
|
+
self._texts[item_id] = text
|
|
23
|
+
self._metadata[item_id] = metadata
|
|
24
|
+
for token in set(tokenize(text)):
|
|
25
|
+
if item_id not in self._tokens[token]:
|
|
26
|
+
self._tokens[token].append(item_id)
|
|
27
|
+
|
|
28
|
+
async def delete(self, item_id) -> None:
|
|
29
|
+
item_id = str(item_id)
|
|
30
|
+
self._texts.pop(item_id, None)
|
|
31
|
+
self._metadata.pop(item_id, None)
|
|
32
|
+
for token, ids in list(self._tokens.items()):
|
|
33
|
+
if item_id in ids:
|
|
34
|
+
ids.remove(item_id)
|
|
35
|
+
if not ids:
|
|
36
|
+
self._tokens.pop(token, None)
|
|
37
|
+
|
|
38
|
+
async def query(self, query: MemoryQuery, filters: dict, limit: int) -> List[ScoredMemory]:
|
|
39
|
+
query_tokens = set(tokenize(query.text))
|
|
40
|
+
if not query_tokens:
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
candidate_scores: Dict[str, int] = {}
|
|
44
|
+
for token in query_tokens:
|
|
45
|
+
for item_id in self._tokens.get(token, []):
|
|
46
|
+
candidate_scores[item_id] = candidate_scores.get(item_id, 0) + 1
|
|
47
|
+
|
|
48
|
+
scored: List[ScoredMemory] = []
|
|
49
|
+
for item_id, overlap in candidate_scores.items():
|
|
50
|
+
meta = self._metadata.get(item_id, {})
|
|
51
|
+
if filters:
|
|
52
|
+
if "owner" in filters and meta.get("owner") != filters["owner"]:
|
|
53
|
+
continue
|
|
54
|
+
if "tier" in filters and meta.get("tier") != filters["tier"]:
|
|
55
|
+
continue
|
|
56
|
+
if "types" in filters:
|
|
57
|
+
if meta.get("type") not in {t.value for t in filters["types"]}:
|
|
58
|
+
continue
|
|
59
|
+
score = overlap / max(1, len(query_tokens))
|
|
60
|
+
meta_tier = meta.get("tier")
|
|
61
|
+
tier_value = StorageTier(meta_tier) if meta_tier else meta["item"].tier
|
|
62
|
+
scored.append(
|
|
63
|
+
ScoredMemory(
|
|
64
|
+
item=meta["item"],
|
|
65
|
+
score=score,
|
|
66
|
+
tier=tier_value,
|
|
67
|
+
explanation="token overlap",
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
scored.sort(key=lambda item: item.score, reverse=True)
|
|
72
|
+
return scored[:limit]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class SimpleGraphStore(GraphStore):
|
|
76
|
+
def __init__(self) -> None:
|
|
77
|
+
self._edges: Dict[str, List[str]] = defaultdict(list)
|
|
78
|
+
|
|
79
|
+
async def upsert_fact(self, owner: str, subject: str, predicate: str, obj: str) -> None:
|
|
80
|
+
key = f"{owner}:{subject}:{predicate}"
|
|
81
|
+
self._edges[key].append(obj)
|
|
82
|
+
|
|
83
|
+
async def query_related(self, owner: str, subject: str, limit: int) -> List[str]:
|
|
84
|
+
results: List[str] = []
|
|
85
|
+
for key, targets in self._edges.items():
|
|
86
|
+
if key.startswith(f"{owner}:{subject}:"):
|
|
87
|
+
results.extend(targets)
|
|
88
|
+
return results[:limit]
|
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import sqlite3
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Iterable, List, Optional
|
|
9
|
+
|
|
10
|
+
from memoryagent.models import MemoryItem, MemoryQuery, MemoryType, ScoredMemory, StorageTier, utc_now
|
|
11
|
+
from memoryagent.storage.base import FeatureStore, MetadataStore, ObjectStore, VectorIndex
|
|
12
|
+
from memoryagent.utils import clamp, hash_embed
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SQLiteMetadataStore(MetadataStore):
|
|
16
|
+
def __init__(self, path: Path) -> None:
|
|
17
|
+
self.path = path
|
|
18
|
+
self._init_db()
|
|
19
|
+
|
|
20
|
+
def _init_db(self) -> None:
|
|
21
|
+
with sqlite3.connect(self.path) as conn:
|
|
22
|
+
conn.execute(
|
|
23
|
+
"""
|
|
24
|
+
CREATE TABLE IF NOT EXISTS memory_items (
|
|
25
|
+
id TEXT PRIMARY KEY,
|
|
26
|
+
type TEXT,
|
|
27
|
+
owner TEXT,
|
|
28
|
+
summary TEXT,
|
|
29
|
+
content_json TEXT,
|
|
30
|
+
tags_json TEXT,
|
|
31
|
+
created_at TEXT,
|
|
32
|
+
updated_at TEXT,
|
|
33
|
+
last_accessed TEXT,
|
|
34
|
+
tier TEXT,
|
|
35
|
+
pointer_json TEXT,
|
|
36
|
+
ttl_seconds INTEGER,
|
|
37
|
+
confidence REAL,
|
|
38
|
+
authority REAL,
|
|
39
|
+
stability REAL
|
|
40
|
+
)
|
|
41
|
+
"""
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
async def upsert(self, item: MemoryItem) -> None:
|
|
45
|
+
await asyncio.to_thread(self._upsert_sync, item)
|
|
46
|
+
|
|
47
|
+
def _upsert_sync(self, item: MemoryItem) -> None:
|
|
48
|
+
now = utc_now().isoformat()
|
|
49
|
+
item.updated_at = utc_now()
|
|
50
|
+
with sqlite3.connect(self.path) as conn:
|
|
51
|
+
conn.execute(
|
|
52
|
+
"""
|
|
53
|
+
INSERT INTO memory_items (
|
|
54
|
+
id, type, owner, summary, content_json, tags_json,
|
|
55
|
+
created_at, updated_at, last_accessed, tier, pointer_json,
|
|
56
|
+
ttl_seconds, confidence, authority, stability
|
|
57
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
58
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
59
|
+
type=excluded.type,
|
|
60
|
+
owner=excluded.owner,
|
|
61
|
+
summary=excluded.summary,
|
|
62
|
+
content_json=excluded.content_json,
|
|
63
|
+
tags_json=excluded.tags_json,
|
|
64
|
+
created_at=excluded.created_at,
|
|
65
|
+
updated_at=excluded.updated_at,
|
|
66
|
+
last_accessed=excluded.last_accessed,
|
|
67
|
+
tier=excluded.tier,
|
|
68
|
+
pointer_json=excluded.pointer_json,
|
|
69
|
+
ttl_seconds=excluded.ttl_seconds,
|
|
70
|
+
confidence=excluded.confidence,
|
|
71
|
+
authority=excluded.authority,
|
|
72
|
+
stability=excluded.stability
|
|
73
|
+
""",
|
|
74
|
+
(
|
|
75
|
+
str(item.id),
|
|
76
|
+
item.type.value,
|
|
77
|
+
item.owner,
|
|
78
|
+
item.summary,
|
|
79
|
+
json.dumps(item.content, ensure_ascii=True),
|
|
80
|
+
json.dumps(item.tags, ensure_ascii=True),
|
|
81
|
+
item.created_at.isoformat(),
|
|
82
|
+
item.updated_at.isoformat(),
|
|
83
|
+
item.last_accessed.isoformat() if item.last_accessed else None,
|
|
84
|
+
item.tier.value,
|
|
85
|
+
json.dumps(item.pointer, ensure_ascii=True),
|
|
86
|
+
item.ttl_seconds,
|
|
87
|
+
item.confidence,
|
|
88
|
+
item.authority,
|
|
89
|
+
item.stability,
|
|
90
|
+
),
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
async def get(self, item_id) -> Optional[MemoryItem]:
|
|
94
|
+
return await asyncio.to_thread(self._get_sync, item_id)
|
|
95
|
+
|
|
96
|
+
def _get_sync(self, item_id) -> Optional[MemoryItem]:
|
|
97
|
+
with sqlite3.connect(self.path) as conn:
|
|
98
|
+
row = conn.execute(
|
|
99
|
+
"SELECT id, type, owner, summary, content_json, tags_json, created_at, updated_at, last_accessed, tier, pointer_json, ttl_seconds, confidence, authority, stability FROM memory_items WHERE id = ?",
|
|
100
|
+
(str(item_id),),
|
|
101
|
+
).fetchone()
|
|
102
|
+
if not row:
|
|
103
|
+
return None
|
|
104
|
+
return _row_to_item(row)
|
|
105
|
+
|
|
106
|
+
async def delete(self, item_id) -> None:
|
|
107
|
+
await asyncio.to_thread(self._delete_sync, item_id)
|
|
108
|
+
|
|
109
|
+
def _delete_sync(self, item_id) -> None:
|
|
110
|
+
with sqlite3.connect(self.path) as conn:
|
|
111
|
+
conn.execute("DELETE FROM memory_items WHERE id = ?", (str(item_id),))
|
|
112
|
+
|
|
113
|
+
async def list_by_owner(self, owner: str) -> List[MemoryItem]:
|
|
114
|
+
return await asyncio.to_thread(self._list_by_owner_sync, owner)
|
|
115
|
+
|
|
116
|
+
def _list_by_owner_sync(self, owner: str) -> List[MemoryItem]:
|
|
117
|
+
with sqlite3.connect(self.path) as conn:
|
|
118
|
+
rows = conn.execute(
|
|
119
|
+
"SELECT id, type, owner, summary, content_json, tags_json, created_at, updated_at, last_accessed, tier, pointer_json, ttl_seconds, confidence, authority, stability FROM memory_items WHERE owner = ?",
|
|
120
|
+
(owner,),
|
|
121
|
+
).fetchall()
|
|
122
|
+
return [_row_to_item(row) for row in rows]
|
|
123
|
+
|
|
124
|
+
async def list_by_owner_and_type(self, owner: str, types: Iterable[str]) -> List[MemoryItem]:
|
|
125
|
+
return await asyncio.to_thread(self._list_by_owner_and_type_sync, owner, list(types))
|
|
126
|
+
|
|
127
|
+
def _list_by_owner_and_type_sync(self, owner: str, types: List[str]) -> List[MemoryItem]:
|
|
128
|
+
placeholders = ",".join("?" for _ in types)
|
|
129
|
+
with sqlite3.connect(self.path) as conn:
|
|
130
|
+
rows = conn.execute(
|
|
131
|
+
f"SELECT id, type, owner, summary, content_json, tags_json, created_at, updated_at, last_accessed, tier, pointer_json, ttl_seconds, confidence, authority, stability FROM memory_items WHERE owner = ? AND type IN ({placeholders})",
|
|
132
|
+
(owner, *types),
|
|
133
|
+
).fetchall()
|
|
134
|
+
return [_row_to_item(row) for row in rows]
|
|
135
|
+
|
|
136
|
+
async def update_access(self, item_id) -> None:
|
|
137
|
+
await asyncio.to_thread(self._update_access_sync, item_id)
|
|
138
|
+
|
|
139
|
+
def _update_access_sync(self, item_id) -> None:
|
|
140
|
+
with sqlite3.connect(self.path) as conn:
|
|
141
|
+
conn.execute(
|
|
142
|
+
"UPDATE memory_items SET last_accessed = ? WHERE id = ?",
|
|
143
|
+
(utc_now().isoformat(), str(item_id)),
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class FileObjectStore(ObjectStore):
|
|
148
|
+
def __init__(self, root: Path) -> None:
|
|
149
|
+
self.root = root
|
|
150
|
+
self.root.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
|
|
152
|
+
async def put(self, key: str, payload: dict) -> str:
|
|
153
|
+
return await asyncio.to_thread(self._put_sync, key, payload)
|
|
154
|
+
|
|
155
|
+
def _put_sync(self, key: str, payload: dict) -> str:
|
|
156
|
+
path = self._resolve_path(key)
|
|
157
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
158
|
+
tmp_path = path.with_suffix(".json.tmp")
|
|
159
|
+
tmp_path.write_text(json.dumps(payload, ensure_ascii=True, indent=2), encoding="utf-8")
|
|
160
|
+
tmp_path.replace(path)
|
|
161
|
+
return str(path)
|
|
162
|
+
|
|
163
|
+
async def get(self, key: str) -> Optional[dict]:
|
|
164
|
+
return await asyncio.to_thread(self._get_sync, key)
|
|
165
|
+
|
|
166
|
+
def _get_sync(self, key: str) -> Optional[dict]:
|
|
167
|
+
path = self._resolve_path(key)
|
|
168
|
+
if not path.exists():
|
|
169
|
+
return None
|
|
170
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
171
|
+
|
|
172
|
+
def _resolve_path(self, key: str) -> Path:
|
|
173
|
+
if key.endswith(".json"):
|
|
174
|
+
relative = Path(key)
|
|
175
|
+
else:
|
|
176
|
+
relative = Path(f"{key}.json")
|
|
177
|
+
if relative.is_absolute():
|
|
178
|
+
return relative
|
|
179
|
+
return self.root / relative
|
|
180
|
+
|
|
181
|
+
async def append(self, key: str, payload: dict) -> str:
|
|
182
|
+
return await asyncio.to_thread(self._append_sync, key, payload)
|
|
183
|
+
|
|
184
|
+
def _append_sync(self, key: str, payload: dict) -> str:
|
|
185
|
+
path = self._resolve_path(key)
|
|
186
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
187
|
+
existing = []
|
|
188
|
+
if path.exists():
|
|
189
|
+
try:
|
|
190
|
+
existing = json.loads(path.read_text(encoding="utf-8"))
|
|
191
|
+
except Exception:
|
|
192
|
+
existing = []
|
|
193
|
+
if not isinstance(existing, list):
|
|
194
|
+
existing = []
|
|
195
|
+
existing.append(payload)
|
|
196
|
+
tmp_path = path.with_suffix(".json.tmp")
|
|
197
|
+
tmp_path.write_text(json.dumps(existing, ensure_ascii=True, indent=2), encoding="utf-8")
|
|
198
|
+
tmp_path.replace(path)
|
|
199
|
+
return str(path)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class SQLiteFeatureStore(FeatureStore):
|
|
203
|
+
def __init__(self, path: Path) -> None:
|
|
204
|
+
self.path = path
|
|
205
|
+
self._init_db()
|
|
206
|
+
|
|
207
|
+
def _init_db(self) -> None:
|
|
208
|
+
with sqlite3.connect(self.path) as conn:
|
|
209
|
+
conn.execute(
|
|
210
|
+
"""
|
|
211
|
+
CREATE TABLE IF NOT EXISTS features (
|
|
212
|
+
owner TEXT,
|
|
213
|
+
created_at TEXT,
|
|
214
|
+
payload_json TEXT
|
|
215
|
+
)
|
|
216
|
+
"""
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
async def write_feature(self, owner: str, payload: dict) -> None:
|
|
220
|
+
await asyncio.to_thread(self._write_feature_sync, owner, payload)
|
|
221
|
+
|
|
222
|
+
def _write_feature_sync(self, owner: str, payload: dict) -> None:
|
|
223
|
+
with sqlite3.connect(self.path) as conn:
|
|
224
|
+
conn.execute(
|
|
225
|
+
"INSERT INTO features (owner, created_at, payload_json) VALUES (?, ?, ?)",
|
|
226
|
+
(owner, utc_now().isoformat(), json.dumps(payload, ensure_ascii=True)),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
async def query_features(self, owner: str, limit: int) -> List[dict]:
|
|
230
|
+
return await asyncio.to_thread(self._query_features_sync, owner, limit)
|
|
231
|
+
|
|
232
|
+
def _query_features_sync(self, owner: str, limit: int) -> List[dict]:
|
|
233
|
+
with sqlite3.connect(self.path) as conn:
|
|
234
|
+
rows = conn.execute(
|
|
235
|
+
"SELECT payload_json FROM features WHERE owner = ? ORDER BY created_at DESC LIMIT ?",
|
|
236
|
+
(owner, limit),
|
|
237
|
+
).fetchall()
|
|
238
|
+
return [json.loads(row[0]) for row in rows]
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class SQLiteVecIndex(VectorIndex):
|
|
242
|
+
"""Vector search via sqlite-vec (optional extension)."""
|
|
243
|
+
|
|
244
|
+
def __init__(
|
|
245
|
+
self,
|
|
246
|
+
path: Path,
|
|
247
|
+
dim: int,
|
|
248
|
+
embedding_fn=None,
|
|
249
|
+
extension_path: Optional[Path] = None,
|
|
250
|
+
) -> None:
|
|
251
|
+
self.path = path
|
|
252
|
+
self.dim = dim
|
|
253
|
+
self.embedding_fn = embedding_fn or (lambda text: hash_embed(text, dim))
|
|
254
|
+
self.extension_path = extension_path
|
|
255
|
+
self._init_db()
|
|
256
|
+
|
|
257
|
+
def _connect(self) -> sqlite3.Connection:
|
|
258
|
+
conn = sqlite3.connect(self.path)
|
|
259
|
+
conn.row_factory = sqlite3.Row
|
|
260
|
+
conn.enable_load_extension(True)
|
|
261
|
+
if not self._try_load_extension(conn):
|
|
262
|
+
raise RuntimeError(
|
|
263
|
+
"sqlite-vec extension not available. Install sqlite-vec or provide extension_path."
|
|
264
|
+
)
|
|
265
|
+
return conn
|
|
266
|
+
|
|
267
|
+
def _try_load_extension(self, conn: sqlite3.Connection) -> bool:
|
|
268
|
+
try:
|
|
269
|
+
import sqlite_vec # type: ignore
|
|
270
|
+
|
|
271
|
+
sqlite_vec.load(conn)
|
|
272
|
+
return True
|
|
273
|
+
except Exception:
|
|
274
|
+
if self.extension_path is None:
|
|
275
|
+
return False
|
|
276
|
+
conn.load_extension(str(self.extension_path))
|
|
277
|
+
return True
|
|
278
|
+
|
|
279
|
+
def _init_db(self) -> None:
|
|
280
|
+
with self._connect() as conn:
|
|
281
|
+
conn.execute(
|
|
282
|
+
f"""
|
|
283
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vec_items USING vec0(
|
|
284
|
+
item_id TEXT PRIMARY KEY,
|
|
285
|
+
owner TEXT,
|
|
286
|
+
tier TEXT,
|
|
287
|
+
type TEXT,
|
|
288
|
+
embedding FLOAT[{self.dim}],
|
|
289
|
+
+item_json TEXT
|
|
290
|
+
)
|
|
291
|
+
"""
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
async def upsert(self, item_id, text: str, metadata: dict) -> None:
|
|
295
|
+
await asyncio.to_thread(self._upsert_sync, item_id, text, metadata)
|
|
296
|
+
|
|
297
|
+
def _serialize_embedding(self, embedding: List[float]):
|
|
298
|
+
try:
|
|
299
|
+
import sqlite_vec # type: ignore
|
|
300
|
+
|
|
301
|
+
return sqlite_vec.serialize_float32(embedding)
|
|
302
|
+
except Exception:
|
|
303
|
+
return json.dumps(embedding, ensure_ascii=True)
|
|
304
|
+
|
|
305
|
+
def _upsert_sync(self, item_id, text: str, metadata: dict) -> None:
|
|
306
|
+
item = metadata.get("item")
|
|
307
|
+
if item is None:
|
|
308
|
+
raise ValueError("SQLiteVecIndex expects metadata['item'] to be a MemoryItem")
|
|
309
|
+
embedding = self.embedding_fn(text)
|
|
310
|
+
item_json = item.model_dump_json(
|
|
311
|
+
include={"id", "type", "owner", "summary", "tier", "pointer"}
|
|
312
|
+
)
|
|
313
|
+
with self._connect() as conn:
|
|
314
|
+
conn.execute("DELETE FROM vec_items WHERE item_id = ?", (str(item_id),))
|
|
315
|
+
conn.execute(
|
|
316
|
+
"""
|
|
317
|
+
INSERT INTO vec_items (item_id, owner, tier, type, embedding, item_json)
|
|
318
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
319
|
+
""",
|
|
320
|
+
(
|
|
321
|
+
str(item_id),
|
|
322
|
+
metadata.get("owner"),
|
|
323
|
+
metadata.get("tier"),
|
|
324
|
+
metadata.get("type"),
|
|
325
|
+
self._serialize_embedding(embedding),
|
|
326
|
+
item_json,
|
|
327
|
+
),
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
async def delete(self, item_id) -> None:
|
|
331
|
+
await asyncio.to_thread(self._delete_sync, item_id)
|
|
332
|
+
|
|
333
|
+
def _delete_sync(self, item_id) -> None:
|
|
334
|
+
with self._connect() as conn:
|
|
335
|
+
conn.execute("DELETE FROM vec_items WHERE item_id = ?", (str(item_id),))
|
|
336
|
+
|
|
337
|
+
async def query(self, query: MemoryQuery, filters: dict, limit: int) -> List[ScoredMemory]:
|
|
338
|
+
return await asyncio.to_thread(self._query_sync, query, filters, limit)
|
|
339
|
+
|
|
340
|
+
def _query_sync(self, query: MemoryQuery, filters: dict, limit: int) -> List[ScoredMemory]:
|
|
341
|
+
embedding = self.embedding_fn(query.text)
|
|
342
|
+
embedding_blob = self._serialize_embedding(embedding)
|
|
343
|
+
|
|
344
|
+
clauses = ["embedding MATCH ?"]
|
|
345
|
+
params: List[object] = [embedding_blob]
|
|
346
|
+
|
|
347
|
+
if filters.get("owner"):
|
|
348
|
+
clauses.append("owner = ?")
|
|
349
|
+
params.append(filters["owner"])
|
|
350
|
+
if filters.get("tier"):
|
|
351
|
+
clauses.append("tier = ?")
|
|
352
|
+
params.append(filters["tier"])
|
|
353
|
+
if filters.get("types"):
|
|
354
|
+
types = filters["types"]
|
|
355
|
+
placeholders = ",".join("?" for _ in types)
|
|
356
|
+
clauses.append(f"type IN ({placeholders})")
|
|
357
|
+
params.extend([t.value for t in types])
|
|
358
|
+
|
|
359
|
+
where_sql = " AND ".join(clauses)
|
|
360
|
+
sql = f"SELECT item_json, distance FROM vec_items WHERE {where_sql} ORDER BY distance LIMIT ?"
|
|
361
|
+
params.append(limit)
|
|
362
|
+
|
|
363
|
+
with self._connect() as conn:
|
|
364
|
+
rows = conn.execute(sql, params).fetchall()
|
|
365
|
+
|
|
366
|
+
scored: List[ScoredMemory] = []
|
|
367
|
+
for row in rows:
|
|
368
|
+
item = MemoryItem.model_validate_json(row["item_json"])
|
|
369
|
+
distance = row["distance"]
|
|
370
|
+
score = clamp(1.0 / (1.0 + distance))
|
|
371
|
+
scored.append(ScoredMemory(item=item, score=score, tier=item.tier, explanation="sqlite-vec"))
|
|
372
|
+
return scored
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _row_to_item(row) -> MemoryItem:
|
|
376
|
+
(
|
|
377
|
+
item_id,
|
|
378
|
+
item_type,
|
|
379
|
+
owner,
|
|
380
|
+
summary,
|
|
381
|
+
content_json,
|
|
382
|
+
tags_json,
|
|
383
|
+
created_at,
|
|
384
|
+
updated_at,
|
|
385
|
+
last_accessed,
|
|
386
|
+
tier,
|
|
387
|
+
pointer_json,
|
|
388
|
+
ttl_seconds,
|
|
389
|
+
confidence,
|
|
390
|
+
authority,
|
|
391
|
+
stability,
|
|
392
|
+
) = row
|
|
393
|
+
return MemoryItem(
|
|
394
|
+
id=item_id,
|
|
395
|
+
type=MemoryType(item_type),
|
|
396
|
+
owner=owner,
|
|
397
|
+
summary=summary,
|
|
398
|
+
content=json.loads(content_json) if content_json else None,
|
|
399
|
+
tags=json.loads(tags_json) if tags_json else [],
|
|
400
|
+
created_at=datetime_from_iso(created_at),
|
|
401
|
+
updated_at=datetime_from_iso(updated_at),
|
|
402
|
+
last_accessed=datetime_from_iso(last_accessed) if last_accessed else None,
|
|
403
|
+
tier=StorageTier(tier),
|
|
404
|
+
pointer=json.loads(pointer_json) if pointer_json else {},
|
|
405
|
+
ttl_seconds=ttl_seconds,
|
|
406
|
+
confidence=confidence,
|
|
407
|
+
authority=authority,
|
|
408
|
+
stability=stability,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def datetime_from_iso(value: Optional[str]):
|
|
413
|
+
if not value:
|
|
414
|
+
return utc_now()
|
|
415
|
+
return datetime.fromisoformat(value)
|
memoryagent/system.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import Dict, Optional, Union
|
|
5
|
+
|
|
6
|
+
from memoryagent.config import MemorySystemConfig
|
|
7
|
+
from memoryagent.indexers import EpisodicIndexer, PerceptualIndexer, SemanticGraphIndexer
|
|
8
|
+
from memoryagent.models import MemoryEvent, MemoryItem, MemoryQuery, MemoryType, StorageTier
|
|
9
|
+
from memoryagent.policy import MemoryRoutingPolicy
|
|
10
|
+
from memoryagent.retrieval import RetrievalOrchestrator
|
|
11
|
+
from memoryagent.storage.base import FeatureStore, GraphStore, MetadataStore, ObjectStore, VectorIndex
|
|
12
|
+
from memoryagent.utils import tokenize
|
|
13
|
+
from memoryagent.storage.in_memory import SimpleGraphStore, SimpleVectorIndex
|
|
14
|
+
from memoryagent.storage.local_disk import (
|
|
15
|
+
FileObjectStore,
|
|
16
|
+
SQLiteFeatureStore,
|
|
17
|
+
SQLiteMetadataStore,
|
|
18
|
+
SQLiteVecIndex,
|
|
19
|
+
)
|
|
20
|
+
from memoryagent.workers import ArchiverWorker, Compactor, ConsolidationWorker, RehydratorWorker
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MemorySystem:
|
|
24
|
+
"""Entry point for the memory framework with local-mode defaults."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
config: Optional[MemorySystemConfig] = None,
|
|
29
|
+
metadata_store: Optional[MetadataStore] = None,
|
|
30
|
+
vector_index: Optional[VectorIndex] = None,
|
|
31
|
+
graph_store: Optional[GraphStore] = None,
|
|
32
|
+
object_store: Optional[ObjectStore] = None,
|
|
33
|
+
feature_store: Optional[FeatureStore] = None,
|
|
34
|
+
embedding_fn=None,
|
|
35
|
+
routing_policy: Optional[MemoryRoutingPolicy] = None,
|
|
36
|
+
) -> None:
|
|
37
|
+
self.config = config or MemorySystemConfig()
|
|
38
|
+
self.metadata_store = metadata_store or SQLiteMetadataStore(self.config.metadata_db_path)
|
|
39
|
+
if vector_index is not None:
|
|
40
|
+
self.vector_index = vector_index
|
|
41
|
+
elif self.config.use_sqlite_vec:
|
|
42
|
+
self.vector_index = SQLiteVecIndex(
|
|
43
|
+
path=self.config.vector_db_path,
|
|
44
|
+
dim=self.config.vector_dim,
|
|
45
|
+
embedding_fn=embedding_fn,
|
|
46
|
+
extension_path=self.config.sqlite_vec_extension_path,
|
|
47
|
+
)
|
|
48
|
+
else:
|
|
49
|
+
self.vector_index = SimpleVectorIndex()
|
|
50
|
+
self.graph_store = graph_store or SimpleGraphStore()
|
|
51
|
+
self.object_store = object_store or FileObjectStore(self.config.cold_store_path / "records")
|
|
52
|
+
self.feature_store = feature_store or SQLiteFeatureStore(self.config.feature_db_path)
|
|
53
|
+
|
|
54
|
+
self.episodic_indexer = EpisodicIndexer(self.vector_index)
|
|
55
|
+
self.semantic_indexer = SemanticGraphIndexer(self.graph_store)
|
|
56
|
+
self.perceptual_indexer = PerceptualIndexer(self.feature_store)
|
|
57
|
+
self.routing_policy = routing_policy or MemoryRoutingPolicy()
|
|
58
|
+
|
|
59
|
+
self.retrieval = RetrievalOrchestrator(
|
|
60
|
+
metadata_store=self.metadata_store,
|
|
61
|
+
vector_index=self.vector_index,
|
|
62
|
+
object_store=self.object_store,
|
|
63
|
+
plan=self.config.retrieval_plan,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
self.consolidation_worker = ConsolidationWorker(
|
|
67
|
+
metadata_store=self.metadata_store,
|
|
68
|
+
vector_index=self.vector_index,
|
|
69
|
+
config=self.config,
|
|
70
|
+
)
|
|
71
|
+
self.archiver_worker = ArchiverWorker(
|
|
72
|
+
metadata_store=self.metadata_store,
|
|
73
|
+
object_store=self.object_store,
|
|
74
|
+
vector_index=self.vector_index,
|
|
75
|
+
)
|
|
76
|
+
self.rehydrator_worker = RehydratorWorker(
|
|
77
|
+
metadata_store=self.metadata_store,
|
|
78
|
+
vector_index=self.vector_index,
|
|
79
|
+
)
|
|
80
|
+
self.compactor = Compactor(self.metadata_store)
|
|
81
|
+
|
|
82
|
+
self.metrics: Dict[str, int] = {
|
|
83
|
+
"requests": 0,
|
|
84
|
+
"hot_hit": 0,
|
|
85
|
+
"archive_escalation": 0,
|
|
86
|
+
"cold_fetch": 0,
|
|
87
|
+
"thrash_detected": 0,
|
|
88
|
+
"tokens_returned": 0,
|
|
89
|
+
"tokens_saved_estimate": 0,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def write(self, event: Union[MemoryEvent, MemoryItem, dict]) -> None:
|
|
93
|
+
self._run_async(self.write_async(event))
|
|
94
|
+
|
|
95
|
+
async def write_async(self, event: Union[MemoryEvent, MemoryItem, dict]) -> MemoryItem:
|
|
96
|
+
item = self._coerce_event(event)
|
|
97
|
+
if item.type == MemoryType.WORKING and item.ttl_seconds is None:
|
|
98
|
+
item.ttl_seconds = self.config.working_ttl_seconds
|
|
99
|
+
decision = self.routing_policy.route(item)
|
|
100
|
+
if decision.write_hot:
|
|
101
|
+
await self.metadata_store.upsert(item)
|
|
102
|
+
if decision.write_vector:
|
|
103
|
+
await self.episodic_indexer.index_hot(item)
|
|
104
|
+
if decision.write_features:
|
|
105
|
+
await self.perceptual_indexer.index(item)
|
|
106
|
+
await self.semantic_indexer.index(item)
|
|
107
|
+
return item
|
|
108
|
+
|
|
109
|
+
def write_perceptual(self, payload: Union[MemoryEvent, MemoryItem, dict]) -> None:
|
|
110
|
+
self._run_async(self.write_perceptual_async(payload))
|
|
111
|
+
|
|
112
|
+
async def write_perceptual_async(self, payload: Union[MemoryEvent, MemoryItem, dict]) -> MemoryItem:
|
|
113
|
+
item = self._coerce_event(payload)
|
|
114
|
+
item.type = MemoryType.PERCEPTUAL
|
|
115
|
+
decision = self.routing_policy.route(item)
|
|
116
|
+
if decision.write_hot:
|
|
117
|
+
await self.metadata_store.upsert(item)
|
|
118
|
+
if decision.write_vector:
|
|
119
|
+
await self.episodic_indexer.index_hot(item)
|
|
120
|
+
if decision.write_features:
|
|
121
|
+
await self.perceptual_indexer.index(item)
|
|
122
|
+
return item
|
|
123
|
+
|
|
124
|
+
def retrieve(self, query: Union[MemoryQuery, str], owner: Optional[str] = None):
|
|
125
|
+
return self._run_async(self.retrieve_async(query, owner))
|
|
126
|
+
|
|
127
|
+
async def retrieve_async(self, query: Union[MemoryQuery, str], owner: Optional[str] = None):
|
|
128
|
+
if isinstance(query, str):
|
|
129
|
+
if not owner:
|
|
130
|
+
raise ValueError("owner is required when query is a string")
|
|
131
|
+
query = MemoryQuery(text=query, owner=owner)
|
|
132
|
+
bundle = await self.retrieval.retrieve(query)
|
|
133
|
+
self.metrics["requests"] += 1
|
|
134
|
+
if StorageTier.ARCHIVE_INDEX in bundle.used_tiers:
|
|
135
|
+
self.metrics["archive_escalation"] += 1
|
|
136
|
+
if StorageTier.COLD in bundle.used_tiers:
|
|
137
|
+
self.metrics["cold_fetch"] += 1
|
|
138
|
+
if bundle.used_tiers and bundle.used_tiers[0] == StorageTier.HOT:
|
|
139
|
+
self.metrics["hot_hit"] += 1
|
|
140
|
+
returned_tokens = sum(len(tokenize(block.text)) for block in bundle.blocks)
|
|
141
|
+
self.metrics["tokens_returned"] += returned_tokens
|
|
142
|
+
baseline = self.config.retrieval_plan.max_results * 50
|
|
143
|
+
self.metrics["tokens_saved_estimate"] += max(0, baseline - returned_tokens)
|
|
144
|
+
return bundle
|
|
145
|
+
|
|
146
|
+
def flush(self, owner: str):
|
|
147
|
+
return self._run_async(self.flush_async(owner))
|
|
148
|
+
|
|
149
|
+
async def flush_async(self, owner: str):
|
|
150
|
+
new_items = await self.consolidation_worker.run_once(owner)
|
|
151
|
+
if self.config.consolidation.archive_on_flush:
|
|
152
|
+
await self.archiver_worker.run_once(owner)
|
|
153
|
+
await self.compactor.run_once(owner)
|
|
154
|
+
return new_items
|
|
155
|
+
|
|
156
|
+
async def record_access(self, item_id) -> None:
|
|
157
|
+
await self.rehydrator_worker.record_access(item_id)
|
|
158
|
+
await self.metadata_store.update_access(item_id)
|
|
159
|
+
|
|
160
|
+
async def rehydrate(self, owner: str):
|
|
161
|
+
warmed = await self.rehydrator_worker.run_once(owner)
|
|
162
|
+
if warmed:
|
|
163
|
+
self.metrics["thrash_detected"] += 1
|
|
164
|
+
return warmed
|
|
165
|
+
|
|
166
|
+
def _coerce_event(self, event: Union[MemoryEvent, MemoryItem, dict]) -> MemoryItem:
|
|
167
|
+
if isinstance(event, MemoryItem):
|
|
168
|
+
return event
|
|
169
|
+
if isinstance(event, MemoryEvent):
|
|
170
|
+
return event.to_item()
|
|
171
|
+
if isinstance(event, dict):
|
|
172
|
+
return MemoryEvent(**event).to_item()
|
|
173
|
+
raise TypeError("Unsupported event payload")
|
|
174
|
+
|
|
175
|
+
def _run_async(self, coro):
|
|
176
|
+
try:
|
|
177
|
+
loop = asyncio.get_running_loop()
|
|
178
|
+
except RuntimeError:
|
|
179
|
+
loop = None
|
|
180
|
+
if loop and loop.is_running():
|
|
181
|
+
raise RuntimeError("MemorySystem sync API called inside an event loop; use *_async methods.")
|
|
182
|
+
return asyncio.run(coro)
|