memuron 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memuron/__init__.py +3 -0
- memuron/actions/__init__.py +12 -0
- memuron/actions/context.py +63 -0
- memuron/actions/helpers.py +88 -0
- memuron/actions/memory.py +340 -0
- memuron/actions/memory_write.py +290 -0
- memuron/actions/nodes.py +340 -0
- memuron/actions/registry.py +5 -0
- memuron/actions/runtime.py +37 -0
- memuron/actions/spaces_documents.py +720 -0
- memuron/actions/sync.py +155 -0
- memuron/application/__init__.py +1 -0
- memuron/application/api.py +206 -0
- memuron/application/app.py +103 -0
- memuron/application/capabilities.py +82 -0
- memuron/application/cli.py +35 -0
- memuron/application/config.py +176 -0
- memuron/application/mcp.py +44 -0
- memuron/application/mcp_oauth.py +290 -0
- memuron/application/registry.py +52 -0
- memuron/context.py +532 -0
- memuron/documents/__init__.py +1 -0
- memuron/documents/link_guardian.py +192 -0
- memuron/documents/linking.py +292 -0
- memuron/documents/parser.py +1152 -0
- memuron/documents/storage.py +151 -0
- memuron/documents/url_ingest.py +375 -0
- memuron/domain/__init__.py +1 -0
- memuron/domain/decoders.py +1 -0
- memuron/domain/encoders.py +185 -0
- memuron/domain/lifecycles.py +8 -0
- memuron/domain/limits.py +6 -0
- memuron/domain/representations.py +56 -0
- memuron/domain/schemas.py +581 -0
- memuron/domain/scope_filter.py +104 -0
- memuron/graphfs/__init__.py +1 -0
- memuron/graphfs/manual.py +635 -0
- memuron/graphfs/projection.py +578 -0
- memuron/graphfs/query.py +1782 -0
- memuron/graphfs/read_model.py +574 -0
- memuron/ingest/__init__.py +1 -0
- memuron/ingest/guardian.py +213 -0
- memuron/ingest/jobs.py +424 -0
- memuron/ingest/prompts.py +147 -0
- memuron/memory/__init__.py +1 -0
- memuron/memory/engine.py +35 -0
- memuron/memory/projections.py +452 -0
- memuron/memory/recipes.py +3247 -0
- memuron/persistence/__init__.py +1 -0
- memuron/persistence/db_pool.py +57 -0
- memuron/persistence/identity_store.py +918 -0
- memuron/persistence/store_helpers.py +16 -0
- memuron/search/__init__.py +1 -0
- memuron/search/fulltext.py +110 -0
- memuron/search/hybrid.py +284 -0
- memuron/search/pgvector.py +252 -0
- memuron/security/__init__.py +1 -0
- memuron/security/auth.py +143 -0
- memuron/security/auth_provider.py +119 -0
- memuron/security/authorization.py +53 -0
- memuron/security/clerk_scopes.py +94 -0
- memuron/security/clerk_webhooks.py +61 -0
- memuron/security/jwt_tokens.py +53 -0
- memuron/security/passwords.py +38 -0
- memuron/security/tenant.py +58 -0
- memuron/spaces/__init__.py +1 -0
- memuron/spaces/model.py +35 -0
- memuron/spaces/service.py +155 -0
- memuron/sync/__init__.py +25 -0
- memuron/sync/folder.py +828 -0
- memuron-0.1.1.dist-info/METADATA +242 -0
- memuron-0.1.1.dist-info/RECORD +74 -0
- memuron-0.1.1.dist-info/WHEEL +4 -0
- memuron-0.1.1.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,3247 @@
|
|
|
1
|
+
"""Memuron memory recipes: ingest, CRUD, and graph helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections import OrderedDict
|
|
7
|
+
from dataclasses import replace
|
|
8
|
+
from datetime import UTC, datetime
|
|
9
|
+
from typing import Any
|
|
10
|
+
from uuid import uuid4
|
|
11
|
+
|
|
12
|
+
from artha_engine import ArthaEngine, EmbeddingArthaanu, EmbeddingCandidate, EmbeddingSearchParams
|
|
13
|
+
from artha_engine.runtime.serde import arthaanu_from_dict, arthaanu_to_dict
|
|
14
|
+
from artha_engine.store.projection_sql import sql_store_fetchall, sql_store_has_tables
|
|
15
|
+
|
|
16
|
+
from memuron.domain.encoders import MemoryEncoderInput, MemoryLinkEncoderInput
|
|
17
|
+
from memuron.application.config import settings
|
|
18
|
+
from memuron.documents.parser import ParsedDocument, parse_source
|
|
19
|
+
from memuron.documents.storage import maybe_store_source_file, presign_source_object
|
|
20
|
+
from memuron.domain.scope_filter import parse_comma_scope, scope_matches_filter
|
|
21
|
+
from memuron.domain.schemas import merge_source_identity_metadata, source_identity_from_metadata
|
|
22
|
+
from memuron.search.pgvector import (
|
|
23
|
+
pgvector_is_ready,
|
|
24
|
+
pgvector_link_search,
|
|
25
|
+
pgvector_memory_search,
|
|
26
|
+
pgvector_memory_search_ids,
|
|
27
|
+
)
|
|
28
|
+
from memuron.ingest.guardian import AgnoGuardian, GuardianError
|
|
29
|
+
from memuron.ingest.prompts import ConnectionSpec, GuardianWritePlan
|
|
30
|
+
from memuron.spaces.service import apply_guardian_space_scope
|
|
31
|
+
from memuron.domain.representations import MemoryArthaanu, MemoryLinkArthaanu, MemoryLinkValue, MemoryValue
|
|
32
|
+
from memuron.security.tenant import merge_org_scope, org_scope_token
|
|
33
|
+
|
|
34
|
+
MAX_GUARDIAN_LINKS = 2
|
|
35
|
+
CANDIDATE_TOP_K = 10
|
|
36
|
+
LINK_CANDIDATE_TOP_K = 15
|
|
37
|
+
AUTO_LINK_MIN_SCORE = 0.2
|
|
38
|
+
DOCUMENT_MARKDOWN_PREVIEW_CHARS = 12_000
|
|
39
|
+
DOCUMENT_CHUNK_SCOPE_PREFIX = "document:"
|
|
40
|
+
QUERY_EMBEDDING_CACHE_SIZE = 128
|
|
41
|
+
EXTERNAL_ID_FIELDS = ("custom_id", "session_id", "thread_id", "source_id", "source_url")
|
|
42
|
+
IDEMPOTENCY_EXTERNAL_ID_FIELDS = ("custom_id", "source_id")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _now_stamp() -> str:
|
|
46
|
+
return datetime.now(UTC).strftime("%Y%m%d%H%M")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def external_identity_from_values(**values: str | None) -> dict[str, str]:
|
|
50
|
+
identity: dict[str, str] = {}
|
|
51
|
+
for key in EXTERNAL_ID_FIELDS:
|
|
52
|
+
value = values.get(key)
|
|
53
|
+
if not isinstance(value, str):
|
|
54
|
+
continue
|
|
55
|
+
cleaned = value.strip()
|
|
56
|
+
if cleaned:
|
|
57
|
+
identity[key] = cleaned
|
|
58
|
+
return identity
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _external_identity_from_metadata(metadata: dict[str, Any] | None) -> dict[str, str]:
|
|
62
|
+
if not isinstance(metadata, dict):
|
|
63
|
+
return {}
|
|
64
|
+
identity: dict[str, str] = {}
|
|
65
|
+
identity.update(source_identity_from_metadata(metadata))
|
|
66
|
+
system = metadata.get("system")
|
|
67
|
+
if isinstance(system, dict):
|
|
68
|
+
legacy_source = system.get("source")
|
|
69
|
+
if isinstance(legacy_source, dict):
|
|
70
|
+
identity.update(external_identity_from_values(**legacy_source))
|
|
71
|
+
nested = system.get("external_identity")
|
|
72
|
+
if isinstance(nested, dict):
|
|
73
|
+
identity.update(external_identity_from_values(**nested))
|
|
74
|
+
identity.update(external_identity_from_values(**metadata))
|
|
75
|
+
return identity
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _external_identity_from_payload(payload: dict[str, Any] | None) -> dict[str, str]:
|
|
79
|
+
if not isinstance(payload, dict):
|
|
80
|
+
return {}
|
|
81
|
+
identity: dict[str, str] = {}
|
|
82
|
+
nested = payload.get("source_identity")
|
|
83
|
+
if isinstance(nested, dict):
|
|
84
|
+
identity.update(external_identity_from_values(**nested))
|
|
85
|
+
identity.update(external_identity_from_values(**payload))
|
|
86
|
+
return identity
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _memory_external_identity(memory: dict[str, Any]) -> dict[str, str]:
|
|
90
|
+
identity = _external_identity_from_payload(memory.get("payload"))
|
|
91
|
+
identity.update(_external_identity_from_metadata(memory.get("metadata")))
|
|
92
|
+
return identity
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _metadata_with_external_identity(
|
|
96
|
+
metadata: dict[str, Any] | None,
|
|
97
|
+
external_identity: dict[str, str] | None,
|
|
98
|
+
) -> dict[str, Any]:
|
|
99
|
+
output = dict(metadata or {})
|
|
100
|
+
identity = {
|
|
101
|
+
**_external_identity_from_metadata(output),
|
|
102
|
+
**external_identity_from_values(**(external_identity or {})),
|
|
103
|
+
}
|
|
104
|
+
if not identity:
|
|
105
|
+
return output
|
|
106
|
+
output = merge_source_identity_metadata(output, **identity)
|
|
107
|
+
for key, value in identity.items():
|
|
108
|
+
output[key] = value
|
|
109
|
+
system = dict(output.get("system") or {})
|
|
110
|
+
# Keep the old worker alias readable while standardizing on system.source.
|
|
111
|
+
system["external_identity"] = identity
|
|
112
|
+
output["system"] = system
|
|
113
|
+
return output
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _payload_with_external_identity(
|
|
117
|
+
payload: dict[str, Any] | None,
|
|
118
|
+
external_identity: dict[str, str] | None,
|
|
119
|
+
) -> dict[str, Any]:
|
|
120
|
+
output = dict(payload or {})
|
|
121
|
+
identity = {
|
|
122
|
+
**_external_identity_from_payload(output),
|
|
123
|
+
**external_identity_from_values(**(external_identity or {})),
|
|
124
|
+
}
|
|
125
|
+
if identity:
|
|
126
|
+
output["source_identity"] = identity
|
|
127
|
+
return output
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def refresh_memory_projections(engine: ArthaEngine) -> None:
|
|
131
|
+
engine.refresh_projection("memuron_memories")
|
|
132
|
+
engine.refresh_projection("memuron_links")
|
|
133
|
+
engine.refresh_projection("memuron_placements")
|
|
134
|
+
engine.refresh_projection("memuron_fs")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
_PROJECTIONS_READY_FLAG = "_memuron_projections_ready"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def projections_are_ready(store: object) -> bool:
|
|
141
|
+
return bool(getattr(store, _PROJECTIONS_READY_FLAG, False))
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _require_memory_projections(engine: ArthaEngine) -> None:
|
|
145
|
+
if not projections_are_ready(engine.store):
|
|
146
|
+
raise RuntimeError(
|
|
147
|
+
"Memuron projections are not initialized. "
|
|
148
|
+
"Call ensure_memory_projections during app startup or test setup."
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def ensure_memory_projections(engine: ArthaEngine) -> None:
|
|
153
|
+
"""Create projection tables if they do not exist yet."""
|
|
154
|
+
store = engine.store
|
|
155
|
+
if projections_are_ready(store):
|
|
156
|
+
return
|
|
157
|
+
for name in ("memuron_memories", "memuron_links", "memuron_placements", "memuron_fs"):
|
|
158
|
+
projection = engine.registry.create_projection(name)
|
|
159
|
+
projection.init(store) # type: ignore[attr-defined]
|
|
160
|
+
from memuron.search.pgvector import ensure_pgvector_schema
|
|
161
|
+
|
|
162
|
+
ensure_pgvector_schema(store, engine.embedder.dimensions)
|
|
163
|
+
setattr(store, _PROJECTIONS_READY_FLAG, True)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _memory_node_type(existing: MemoryArthaanu) -> str:
|
|
167
|
+
return str(getattr(existing.value, "node_type", "text") or "text")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _rich_fields_from_memory(existing: MemoryArthaanu) -> dict[str, Any]:
|
|
171
|
+
return {
|
|
172
|
+
"node_type": _memory_node_type(existing),
|
|
173
|
+
"payload": dict(getattr(existing.value, "payload", {}) or {}),
|
|
174
|
+
"perception": getattr(existing.value, "perception", None),
|
|
175
|
+
"encoding": getattr(existing.value, "encoding", "memory"),
|
|
176
|
+
"metadata": dict(getattr(existing.value, "metadata", {}) or {}),
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _coerce_memory_arthaanu(engine: ArthaEngine, item: object) -> MemoryArthaanu:
|
|
181
|
+
if isinstance(item, MemoryArthaanu):
|
|
182
|
+
return item
|
|
183
|
+
return arthaanu_from_dict(
|
|
184
|
+
arthaanu_to_dict(item),
|
|
185
|
+
type_registry=engine.registry.arthaanu_types,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _guardian_may_update_target(existing: MemoryArthaanu) -> bool:
|
|
190
|
+
"""Text ingest may merge into text memories only — not image/document/collection nodes."""
|
|
191
|
+
return _memory_node_type(existing) == "text"
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _encode_memory(
|
|
195
|
+
engine: ArthaEngine,
|
|
196
|
+
*,
|
|
197
|
+
content: str,
|
|
198
|
+
scope: list[str] | None = None,
|
|
199
|
+
node_type: str = "text",
|
|
200
|
+
payload: dict[str, Any] | None = None,
|
|
201
|
+
perception: str | None = None,
|
|
202
|
+
encoding: str = "memory",
|
|
203
|
+
metadata: dict[str, Any] | None = None,
|
|
204
|
+
artha_id: str | None = None,
|
|
205
|
+
retrieval_count: int = 0,
|
|
206
|
+
persist: bool = False,
|
|
207
|
+
) -> MemoryArthaanu:
|
|
208
|
+
encoded = engine.encode(
|
|
209
|
+
"memory",
|
|
210
|
+
{
|
|
211
|
+
"content": content,
|
|
212
|
+
"scope": scope or [],
|
|
213
|
+
"node_type": node_type,
|
|
214
|
+
"payload": payload or {},
|
|
215
|
+
"perception": perception,
|
|
216
|
+
"encoding": encoding,
|
|
217
|
+
"metadata": metadata or {},
|
|
218
|
+
"artha_id": artha_id,
|
|
219
|
+
"retrieval_count": retrieval_count,
|
|
220
|
+
},
|
|
221
|
+
persist=persist,
|
|
222
|
+
)
|
|
223
|
+
if not isinstance(encoded, MemoryArthaanu):
|
|
224
|
+
raise TypeError("memory encoder must return MemoryArthaanu")
|
|
225
|
+
return encoded
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _encode_link(
|
|
229
|
+
engine: ArthaEngine,
|
|
230
|
+
*,
|
|
231
|
+
source_id: str,
|
|
232
|
+
target_id: str,
|
|
233
|
+
description: str,
|
|
234
|
+
metadata: dict[str, Any] | None = None,
|
|
235
|
+
persist: bool = False,
|
|
236
|
+
) -> MemoryLinkArthaanu:
|
|
237
|
+
encoded = engine.encode(
|
|
238
|
+
"memory_link",
|
|
239
|
+
{
|
|
240
|
+
"source_id": source_id,
|
|
241
|
+
"target_id": target_id,
|
|
242
|
+
"description": description,
|
|
243
|
+
"metadata": metadata or {},
|
|
244
|
+
},
|
|
245
|
+
persist=persist,
|
|
246
|
+
)
|
|
247
|
+
if not isinstance(encoded, MemoryLinkArthaanu):
|
|
248
|
+
raise TypeError("memory_link encoder must return MemoryLinkArthaanu")
|
|
249
|
+
return encoded
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _append_memory_event(
|
|
253
|
+
engine: ArthaEngine,
|
|
254
|
+
*,
|
|
255
|
+
event_type: str,
|
|
256
|
+
memory: MemoryArthaanu,
|
|
257
|
+
component: str,
|
|
258
|
+
extra_payload: dict[str, object] | None = None,
|
|
259
|
+
event_metadata: dict[str, object] | None = None,
|
|
260
|
+
) -> str:
|
|
261
|
+
payload: dict[str, object] = {"created_at": _now_stamp()}
|
|
262
|
+
if extra_payload:
|
|
263
|
+
payload.update(extra_payload)
|
|
264
|
+
if event_metadata:
|
|
265
|
+
payload.update(event_metadata)
|
|
266
|
+
return engine.store.append_event(
|
|
267
|
+
event_type=event_type,
|
|
268
|
+
arthaanu=memory,
|
|
269
|
+
component=component,
|
|
270
|
+
payload=payload,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _append_link_event(
|
|
275
|
+
engine: ArthaEngine,
|
|
276
|
+
*,
|
|
277
|
+
event_type: str,
|
|
278
|
+
link: MemoryLinkArthaanu,
|
|
279
|
+
component: str,
|
|
280
|
+
extra_payload: dict[str, object] | None = None,
|
|
281
|
+
event_metadata: dict[str, object] | None = None,
|
|
282
|
+
) -> str:
|
|
283
|
+
payload: dict[str, object] = {}
|
|
284
|
+
if extra_payload:
|
|
285
|
+
payload.update(extra_payload)
|
|
286
|
+
if event_metadata:
|
|
287
|
+
payload.update(event_metadata)
|
|
288
|
+
return engine.store.append_event(
|
|
289
|
+
event_type=event_type,
|
|
290
|
+
arthaanu=link,
|
|
291
|
+
component=component,
|
|
292
|
+
payload=payload,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _list_memory_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
|
|
297
|
+
_require_memory_projections(engine)
|
|
298
|
+
store = engine.store
|
|
299
|
+
if sql_store_has_tables(store):
|
|
300
|
+
return sql_store_fetchall(
|
|
301
|
+
store,
|
|
302
|
+
"""
|
|
303
|
+
SELECT artha_id, content, scope_json, embedding_json,
|
|
304
|
+
node_type, payload_json, perception, encoding, metadata_json,
|
|
305
|
+
created_at, updated_at, sequence
|
|
306
|
+
FROM memuron_memories
|
|
307
|
+
ORDER BY sequence DESC
|
|
308
|
+
""",
|
|
309
|
+
)
|
|
310
|
+
bucket = getattr(store, "memuron_memories", {})
|
|
311
|
+
rows: list[dict[str, Any]] = []
|
|
312
|
+
for artha_id, item in bucket.items():
|
|
313
|
+
rows.append(
|
|
314
|
+
{
|
|
315
|
+
"artha_id": artha_id,
|
|
316
|
+
"content": item["content"],
|
|
317
|
+
"node_type": item.get("node_type", "text"),
|
|
318
|
+
"payload_json": item.get("payload", {}),
|
|
319
|
+
"perception": item.get("perception"),
|
|
320
|
+
"encoding": item.get("encoding", "memory"),
|
|
321
|
+
"metadata_json": item.get("metadata", {}),
|
|
322
|
+
"scope_json": item.get("scope", []),
|
|
323
|
+
"embedding_json": item.get("embedding", []),
|
|
324
|
+
"created_at": item.get("created_at"),
|
|
325
|
+
"updated_at": item.get("updated_at"),
|
|
326
|
+
"sequence": item.get("sequence", 0),
|
|
327
|
+
}
|
|
328
|
+
)
|
|
329
|
+
return rows
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _row_to_memory_dict(row: dict[str, Any]) -> dict[str, Any]:
|
|
333
|
+
scope = row.get("scope_json")
|
|
334
|
+
embedding = row.get("embedding_json")
|
|
335
|
+
payload = row.get("payload_json")
|
|
336
|
+
metadata = row.get("metadata_json")
|
|
337
|
+
if isinstance(scope, str):
|
|
338
|
+
scope = json.loads(scope)
|
|
339
|
+
if isinstance(embedding, str):
|
|
340
|
+
embedding = json.loads(embedding)
|
|
341
|
+
if isinstance(payload, str):
|
|
342
|
+
payload = json.loads(payload)
|
|
343
|
+
if isinstance(metadata, str):
|
|
344
|
+
metadata = json.loads(metadata)
|
|
345
|
+
return {
|
|
346
|
+
"id": row["artha_id"],
|
|
347
|
+
"content": row["content"],
|
|
348
|
+
"node_type": row.get("node_type") or "text",
|
|
349
|
+
"payload": payload if isinstance(payload, dict) else {},
|
|
350
|
+
"perception": row.get("perception") or row["content"],
|
|
351
|
+
"encoding": row.get("encoding") or "memory",
|
|
352
|
+
"metadata": metadata if isinstance(metadata, dict) else {},
|
|
353
|
+
"scope": scope if isinstance(scope, list) else [],
|
|
354
|
+
"embedding": embedding if isinstance(embedding, list) else [],
|
|
355
|
+
"timestamp": row.get("created_at") or row.get("updated_at") or _now_stamp(),
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def _list_link_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
|
|
360
|
+
_require_memory_projections(engine)
|
|
361
|
+
store = engine.store
|
|
362
|
+
if sql_store_has_tables(store):
|
|
363
|
+
return sql_store_fetchall(
|
|
364
|
+
store,
|
|
365
|
+
"""
|
|
366
|
+
SELECT link_id, source_id, target_id, description, metadata_json,
|
|
367
|
+
embedding_json, sequence
|
|
368
|
+
FROM memuron_links
|
|
369
|
+
ORDER BY sequence DESC
|
|
370
|
+
""",
|
|
371
|
+
)
|
|
372
|
+
bucket = getattr(store, "memuron_links", {})
|
|
373
|
+
return [
|
|
374
|
+
{
|
|
375
|
+
"link_id": link_id,
|
|
376
|
+
"source_id": item["source_id"],
|
|
377
|
+
"target_id": item["target_id"],
|
|
378
|
+
"description": item["description"],
|
|
379
|
+
"metadata_json": item.get("metadata", {}),
|
|
380
|
+
"embedding_json": item.get("embedding", []),
|
|
381
|
+
"sequence": item.get("sequence", 0),
|
|
382
|
+
}
|
|
383
|
+
for link_id, item in bucket.items()
|
|
384
|
+
]
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _list_placement_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
|
|
388
|
+
_require_memory_projections(engine)
|
|
389
|
+
store = engine.store
|
|
390
|
+
if sql_store_has_tables(store):
|
|
391
|
+
return sql_store_fetchall(
|
|
392
|
+
store,
|
|
393
|
+
"""
|
|
394
|
+
SELECT placement_id, parent_id, child_id, name, scope_json, metadata_json,
|
|
395
|
+
inherit_parent_scope, sequence
|
|
396
|
+
FROM memuron_placements
|
|
397
|
+
ORDER BY sequence DESC
|
|
398
|
+
""",
|
|
399
|
+
)
|
|
400
|
+
bucket = getattr(store, "memuron_placements", {})
|
|
401
|
+
return [
|
|
402
|
+
{
|
|
403
|
+
"placement_id": placement_id,
|
|
404
|
+
"parent_id": item["parent_id"],
|
|
405
|
+
"child_id": item["child_id"],
|
|
406
|
+
"name": item["name"],
|
|
407
|
+
"scope_json": item.get("scope", []),
|
|
408
|
+
"metadata_json": item.get("metadata", {}),
|
|
409
|
+
"inherit_parent_scope": item.get("inherit_parent_scope", True),
|
|
410
|
+
"sequence": item.get("sequence", 0),
|
|
411
|
+
}
|
|
412
|
+
for placement_id, item in bucket.items()
|
|
413
|
+
]
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def _filtered_memory_rows(
|
|
417
|
+
engine: ArthaEngine,
|
|
418
|
+
*,
|
|
419
|
+
scope: list[str] | None = None,
|
|
420
|
+
) -> list[dict[str, Any]]:
|
|
421
|
+
scope_patterns = list(scope or [])
|
|
422
|
+
rows: list[dict[str, Any]] = []
|
|
423
|
+
for row in _list_memory_rows(engine):
|
|
424
|
+
memory = _row_to_memory_dict(row)
|
|
425
|
+
if scope_patterns and not scope_matches_filter(memory.get("scope") or [], scope_patterns):
|
|
426
|
+
continue
|
|
427
|
+
rows.append(row)
|
|
428
|
+
return rows
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _scope_matches_filter(scope: list[str], scope_filter: list[str] | None) -> bool:
|
|
432
|
+
return scope_matches_filter(scope, scope_filter)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _parse_json_field(value: Any, default: Any) -> Any:
|
|
436
|
+
if value is None:
|
|
437
|
+
return default
|
|
438
|
+
if isinstance(value, (list, dict)):
|
|
439
|
+
return value
|
|
440
|
+
try:
|
|
441
|
+
return json.loads(str(value))
|
|
442
|
+
except json.JSONDecodeError:
|
|
443
|
+
return default
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _cosine_score(left: list[float], right: list[float]) -> float:
|
|
447
|
+
if not left or not right or len(left) != len(right):
|
|
448
|
+
return 0.0
|
|
449
|
+
dot = sum(a * b for a, b in zip(left, right, strict=False))
|
|
450
|
+
left_norm = sum(a * a for a in left) ** 0.5
|
|
451
|
+
right_norm = sum(b * b for b in right) ** 0.5
|
|
452
|
+
if left_norm == 0 or right_norm == 0:
|
|
453
|
+
return 0.0
|
|
454
|
+
raw = dot / (left_norm * right_norm)
|
|
455
|
+
return max(0.0, min(1.0, (raw + 1.0) / 2.0))
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def _embed_query_vector(engine: ArthaEngine, text: str) -> list[float]:
|
|
459
|
+
normalized = text.strip()
|
|
460
|
+
cache = getattr(engine, "_memuron_query_embedding_cache", None)
|
|
461
|
+
if not isinstance(cache, OrderedDict):
|
|
462
|
+
cache = OrderedDict()
|
|
463
|
+
setattr(engine, "_memuron_query_embedding_cache", cache)
|
|
464
|
+
cached = cache.get(normalized)
|
|
465
|
+
if cached is not None:
|
|
466
|
+
cache.move_to_end(normalized)
|
|
467
|
+
return list(cached)
|
|
468
|
+
vector = engine.embedder.embed_queries([normalized])[0]
|
|
469
|
+
cache[normalized] = tuple(vector)
|
|
470
|
+
cache.move_to_end(normalized)
|
|
471
|
+
while len(cache) > QUERY_EMBEDDING_CACHE_SIZE:
|
|
472
|
+
cache.popitem(last=False)
|
|
473
|
+
return vector
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _list_memory_search_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
|
|
477
|
+
"""Slim projection rows for similarity search (no content text)."""
|
|
478
|
+
_require_memory_projections(engine)
|
|
479
|
+
store = engine.store
|
|
480
|
+
if sql_store_has_tables(store):
|
|
481
|
+
return sql_store_fetchall(
|
|
482
|
+
store,
|
|
483
|
+
"""
|
|
484
|
+
SELECT artha_id, scope_json, embedding_json
|
|
485
|
+
FROM memuron_memories
|
|
486
|
+
""",
|
|
487
|
+
)
|
|
488
|
+
bucket = getattr(store, "memuron_memories", {})
|
|
489
|
+
return [
|
|
490
|
+
{
|
|
491
|
+
"artha_id": artha_id,
|
|
492
|
+
"scope_json": item.get("scope", []),
|
|
493
|
+
"embedding_json": item.get("embedding", []),
|
|
494
|
+
}
|
|
495
|
+
for artha_id, item in bucket.items()
|
|
496
|
+
]
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def _list_link_search_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
|
|
500
|
+
_require_memory_projections(engine)
|
|
501
|
+
store = engine.store
|
|
502
|
+
if sql_store_has_tables(store):
|
|
503
|
+
return sql_store_fetchall(
|
|
504
|
+
store,
|
|
505
|
+
"""
|
|
506
|
+
SELECT link_id, source_id, target_id, description, metadata_json, embedding_json
|
|
507
|
+
FROM memuron_links
|
|
508
|
+
""",
|
|
509
|
+
)
|
|
510
|
+
bucket = getattr(store, "memuron_links", {})
|
|
511
|
+
return [
|
|
512
|
+
{
|
|
513
|
+
"link_id": link_id,
|
|
514
|
+
"source_id": item["source_id"],
|
|
515
|
+
"target_id": item["target_id"],
|
|
516
|
+
"description": item["description"],
|
|
517
|
+
"metadata_json": item.get("metadata", {}),
|
|
518
|
+
"embedding_json": item.get("embedding", []),
|
|
519
|
+
}
|
|
520
|
+
for link_id, item in bucket.items()
|
|
521
|
+
]
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _fetch_memory_rows_by_ids(
|
|
525
|
+
engine: ArthaEngine,
|
|
526
|
+
memory_ids: list[str],
|
|
527
|
+
) -> dict[str, dict[str, Any]]:
|
|
528
|
+
if not memory_ids:
|
|
529
|
+
return {}
|
|
530
|
+
wanted = set(memory_ids)
|
|
531
|
+
found: dict[str, dict[str, Any]] = {}
|
|
532
|
+
store = engine.store
|
|
533
|
+
if sql_store_has_tables(store):
|
|
534
|
+
placeholders = ", ".join("?" for _ in memory_ids)
|
|
535
|
+
rows = sql_store_fetchall(
|
|
536
|
+
store,
|
|
537
|
+
f"""
|
|
538
|
+
SELECT artha_id, content, scope_json, embedding_json,
|
|
539
|
+
node_type, payload_json, perception, encoding, metadata_json,
|
|
540
|
+
created_at, updated_at, sequence
|
|
541
|
+
FROM memuron_memories
|
|
542
|
+
WHERE artha_id IN ({placeholders})
|
|
543
|
+
""",
|
|
544
|
+
tuple(memory_ids),
|
|
545
|
+
)
|
|
546
|
+
for row in rows:
|
|
547
|
+
found[str(row["artha_id"])] = row
|
|
548
|
+
return found
|
|
549
|
+
bucket = getattr(store, "memuron_memories", {})
|
|
550
|
+
for artha_id, item in bucket.items():
|
|
551
|
+
if artha_id in wanted:
|
|
552
|
+
found[artha_id] = {
|
|
553
|
+
"artha_id": artha_id,
|
|
554
|
+
"content": item["content"],
|
|
555
|
+
"node_type": item.get("node_type") or "text",
|
|
556
|
+
"payload_json": item.get("payload", {}),
|
|
557
|
+
"perception": item.get("perception"),
|
|
558
|
+
"encoding": item.get("encoding", "memory"),
|
|
559
|
+
"scope_json": item.get("scope", []),
|
|
560
|
+
"embedding_json": item.get("embedding", []),
|
|
561
|
+
"metadata_json": item.get("metadata", {}),
|
|
562
|
+
"created_at": item.get("created_at"),
|
|
563
|
+
"updated_at": item.get("updated_at"),
|
|
564
|
+
"sequence": item.get("sequence", 0),
|
|
565
|
+
}
|
|
566
|
+
return found
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def _cosine_similarity_hits(
|
|
570
|
+
engine: ArthaEngine,
|
|
571
|
+
query_vector: list[float],
|
|
572
|
+
rows: list[dict[str, Any]],
|
|
573
|
+
*,
|
|
574
|
+
scope: list[str] | None = None,
|
|
575
|
+
top_k: int,
|
|
576
|
+
id_key: str,
|
|
577
|
+
embedding_key: str = "embedding_json",
|
|
578
|
+
scope_key: str | None = "scope_json",
|
|
579
|
+
) -> list[tuple[str, float]]:
|
|
580
|
+
if not rows:
|
|
581
|
+
return []
|
|
582
|
+
|
|
583
|
+
filtered_rows: list[dict[str, Any]] = []
|
|
584
|
+
if scope_key is not None and scope:
|
|
585
|
+
filtered_rows = [
|
|
586
|
+
row
|
|
587
|
+
for row in rows
|
|
588
|
+
if _scope_matches_filter(
|
|
589
|
+
_parse_json_field(row.get(scope_key), []),
|
|
590
|
+
scope,
|
|
591
|
+
)
|
|
592
|
+
]
|
|
593
|
+
else:
|
|
594
|
+
filtered_rows = rows
|
|
595
|
+
|
|
596
|
+
query = EmbeddingArthaanu(
|
|
597
|
+
name="memuron_query",
|
|
598
|
+
value=query_vector,
|
|
599
|
+
dimensions=len(query_vector),
|
|
600
|
+
model=engine.embedder.query_model_name,
|
|
601
|
+
)
|
|
602
|
+
candidates: list[EmbeddingCandidate] = []
|
|
603
|
+
for row in filtered_rows:
|
|
604
|
+
embedding = _parse_json_field(row.get(embedding_key), [])
|
|
605
|
+
if not embedding or len(embedding) != len(query_vector):
|
|
606
|
+
continue
|
|
607
|
+
item_id = str(row[id_key])
|
|
608
|
+
if id_key == "link_id":
|
|
609
|
+
link = _link_row_to_dict(row)
|
|
610
|
+
item = MemoryLinkArthaanu(
|
|
611
|
+
artha_id=item_id,
|
|
612
|
+
name="memory_link",
|
|
613
|
+
value=MemoryLinkValue(
|
|
614
|
+
source_id=link["source_id"],
|
|
615
|
+
target_id=link["target_id"],
|
|
616
|
+
description=link["description"],
|
|
617
|
+
embedding=embedding,
|
|
618
|
+
),
|
|
619
|
+
)
|
|
620
|
+
else:
|
|
621
|
+
item = MemoryArthaanu(
|
|
622
|
+
artha_id=item_id,
|
|
623
|
+
name="memory",
|
|
624
|
+
value=MemoryValue(
|
|
625
|
+
content="",
|
|
626
|
+
scope=_parse_json_field(row.get(scope_key), []) if scope_key else [],
|
|
627
|
+
embedding=embedding,
|
|
628
|
+
),
|
|
629
|
+
)
|
|
630
|
+
candidates.append(
|
|
631
|
+
EmbeddingCandidate(
|
|
632
|
+
item=item,
|
|
633
|
+
embedding=EmbeddingArthaanu(
|
|
634
|
+
name=f"{item_id}_embedding",
|
|
635
|
+
value=embedding,
|
|
636
|
+
dimensions=len(embedding),
|
|
637
|
+
),
|
|
638
|
+
)
|
|
639
|
+
)
|
|
640
|
+
if not candidates:
|
|
641
|
+
return []
|
|
642
|
+
|
|
643
|
+
result = engine.decode(
|
|
644
|
+
"cosine_similarity",
|
|
645
|
+
query,
|
|
646
|
+
EmbeddingSearchParams(candidates=candidates, top_k=top_k),
|
|
647
|
+
)
|
|
648
|
+
hits = result.hits if hasattr(result, "hits") else []
|
|
649
|
+
output: list[tuple[str, float]] = []
|
|
650
|
+
for hit in hits:
|
|
651
|
+
output.append((hit.item.artha_id, float(hit.score)))
|
|
652
|
+
return output
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
def _memory_similarity_hits(
|
|
656
|
+
engine: ArthaEngine,
|
|
657
|
+
query_vector: list[float],
|
|
658
|
+
*,
|
|
659
|
+
scope: list[str] | None = None,
|
|
660
|
+
top_k: int,
|
|
661
|
+
) -> list[tuple[str, float]]:
|
|
662
|
+
store = engine.store
|
|
663
|
+
if pgvector_is_ready(store):
|
|
664
|
+
return pgvector_memory_search_ids(store, query_vector, top_k=top_k, scope=scope)
|
|
665
|
+
rows = _list_memory_search_rows(engine)
|
|
666
|
+
return _cosine_similarity_hits(
|
|
667
|
+
engine,
|
|
668
|
+
query_vector,
|
|
669
|
+
rows,
|
|
670
|
+
scope=scope,
|
|
671
|
+
top_k=top_k,
|
|
672
|
+
id_key="artha_id",
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def find_candidates(
|
|
677
|
+
engine: ArthaEngine,
|
|
678
|
+
content: str,
|
|
679
|
+
*,
|
|
680
|
+
scope: list[str] | None = None,
|
|
681
|
+
top_k: int = CANDIDATE_TOP_K,
|
|
682
|
+
query_vector: list[float] | None = None,
|
|
683
|
+
) -> list[tuple[str, float, dict[str, Any]]]:
|
|
684
|
+
if not pgvector_is_ready(engine.store):
|
|
685
|
+
rows = _list_memory_search_rows(engine)
|
|
686
|
+
if not rows:
|
|
687
|
+
return []
|
|
688
|
+
|
|
689
|
+
vector = query_vector or _embed_query_vector(engine, content)
|
|
690
|
+
hit_ids = _memory_similarity_hits(
|
|
691
|
+
engine,
|
|
692
|
+
vector,
|
|
693
|
+
scope=scope,
|
|
694
|
+
top_k=top_k,
|
|
695
|
+
)
|
|
696
|
+
if not hit_ids:
|
|
697
|
+
return []
|
|
698
|
+
|
|
699
|
+
details = _fetch_memory_rows_by_ids(engine, [memory_id for memory_id, _score in hit_ids])
|
|
700
|
+
output: list[tuple[str, float, dict[str, Any]]] = []
|
|
701
|
+
for memory_id, score in hit_ids:
|
|
702
|
+
row = details.get(memory_id)
|
|
703
|
+
if not row:
|
|
704
|
+
continue
|
|
705
|
+
output.append((memory_id, score, _row_to_memory_dict(row)))
|
|
706
|
+
return output
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
def _tenant_id_from_metadata(event_metadata: dict[str, object] | None) -> str | None:
|
|
710
|
+
if not event_metadata:
|
|
711
|
+
return None
|
|
712
|
+
tenant_id = event_metadata.get("tenant_id")
|
|
713
|
+
return str(tenant_id) if tenant_id else None
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def _memory_belongs_to_tenant(memory: dict[str, Any], tenant_id: str | None) -> bool:
|
|
717
|
+
if not tenant_id:
|
|
718
|
+
return True
|
|
719
|
+
return scope_matches_filter(memory.get("scope") or [], [org_scope_token(tenant_id)])
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
def _space_tokens_for_identity_context(
|
|
723
|
+
scope: list[str],
|
|
724
|
+
space_context: dict[str, str] | None,
|
|
725
|
+
) -> list[str]:
|
|
726
|
+
tokens: list[str] = []
|
|
727
|
+
active_space = (space_context or {}).get("active_space_token")
|
|
728
|
+
if active_space:
|
|
729
|
+
tokens.append(active_space)
|
|
730
|
+
for token in scope:
|
|
731
|
+
if str(token).startswith("space.") and token not in tokens:
|
|
732
|
+
tokens.append(str(token))
|
|
733
|
+
return tokens
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def _memory_matches_identity_context(
|
|
737
|
+
memory: dict[str, Any],
|
|
738
|
+
*,
|
|
739
|
+
tenant_id: str | None,
|
|
740
|
+
scope: list[str],
|
|
741
|
+
space_context: dict[str, str] | None,
|
|
742
|
+
) -> bool:
|
|
743
|
+
if not _memory_belongs_to_tenant(memory, tenant_id):
|
|
744
|
+
return False
|
|
745
|
+
memory_scope = memory.get("scope") or []
|
|
746
|
+
for space_token in _space_tokens_for_identity_context(scope, space_context):
|
|
747
|
+
if space_token not in memory_scope:
|
|
748
|
+
return False
|
|
749
|
+
return True
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def _find_external_identity_match(
|
|
753
|
+
engine: ArthaEngine,
|
|
754
|
+
*,
|
|
755
|
+
external_identity: dict[str, str] | None,
|
|
756
|
+
tenant_id: str | None,
|
|
757
|
+
scope: list[str],
|
|
758
|
+
space_context: dict[str, str] | None,
|
|
759
|
+
node_type: str | None = None,
|
|
760
|
+
) -> dict[str, Any] | None:
|
|
761
|
+
identity = external_identity_from_values(**(external_identity or {}))
|
|
762
|
+
if not any(identity.get(key) for key in IDEMPOTENCY_EXTERNAL_ID_FIELDS):
|
|
763
|
+
return None
|
|
764
|
+
matches: list[dict[str, Any]] = []
|
|
765
|
+
for row in _list_memory_rows(engine):
|
|
766
|
+
memory = _row_to_memory_dict(row)
|
|
767
|
+
if node_type is not None and str(memory.get("node_type") or "text") != node_type:
|
|
768
|
+
continue
|
|
769
|
+
if not _memory_matches_identity_context(
|
|
770
|
+
memory,
|
|
771
|
+
tenant_id=tenant_id,
|
|
772
|
+
scope=scope,
|
|
773
|
+
space_context=space_context,
|
|
774
|
+
):
|
|
775
|
+
continue
|
|
776
|
+
existing_identity = _memory_external_identity(memory)
|
|
777
|
+
if any(
|
|
778
|
+
identity.get(key) and identity.get(key) == existing_identity.get(key)
|
|
779
|
+
for key in IDEMPOTENCY_EXTERNAL_ID_FIELDS
|
|
780
|
+
):
|
|
781
|
+
matches.append(memory)
|
|
782
|
+
if not matches:
|
|
783
|
+
return None
|
|
784
|
+
matches.sort(key=lambda item: str(item.get("timestamp") or ""))
|
|
785
|
+
return matches[-1]
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def _filter_candidates_by_tenant(
|
|
789
|
+
candidates: list[tuple[str, float, dict[str, Any]]],
|
|
790
|
+
tenant_id: str | None,
|
|
791
|
+
) -> list[tuple[str, float, dict[str, Any]]]:
|
|
792
|
+
if not tenant_id:
|
|
793
|
+
return candidates
|
|
794
|
+
return [
|
|
795
|
+
(memory_id, score, memory)
|
|
796
|
+
for memory_id, score, memory in candidates
|
|
797
|
+
if _memory_belongs_to_tenant(memory, tenant_id)
|
|
798
|
+
]
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def _merge_memory_metadata(
|
|
802
|
+
existing: dict[str, Any] | None,
|
|
803
|
+
incoming: dict[str, Any] | None,
|
|
804
|
+
) -> dict[str, Any]:
|
|
805
|
+
output = dict(existing or {})
|
|
806
|
+
for key, value in (incoming or {}).items():
|
|
807
|
+
if key == "system" and isinstance(value, dict):
|
|
808
|
+
system = dict(output.get("system") or {})
|
|
809
|
+
for system_key, system_value in value.items():
|
|
810
|
+
if isinstance(system_value, dict) and isinstance(system.get(system_key), dict):
|
|
811
|
+
system[system_key] = {**system[system_key], **system_value}
|
|
812
|
+
else:
|
|
813
|
+
system[system_key] = system_value
|
|
814
|
+
output["system"] = system
|
|
815
|
+
else:
|
|
816
|
+
output[key] = value
|
|
817
|
+
return output
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
def _resolve_update_target(
|
|
821
|
+
engine: ArthaEngine,
|
|
822
|
+
*,
|
|
823
|
+
target_id: str | None,
|
|
824
|
+
tenant_id: str | None,
|
|
825
|
+
candidates: list[tuple[str, float, dict[str, Any]]],
|
|
826
|
+
) -> str | None:
|
|
827
|
+
if not target_id:
|
|
828
|
+
return None
|
|
829
|
+
candidate_ids = {memory_id for memory_id, _score, _memory in candidates}
|
|
830
|
+
if target_id in candidate_ids:
|
|
831
|
+
return target_id
|
|
832
|
+
if not tenant_id:
|
|
833
|
+
return target_id
|
|
834
|
+
try:
|
|
835
|
+
existing = get_memory(engine, target_id)
|
|
836
|
+
except KeyError:
|
|
837
|
+
return None
|
|
838
|
+
if _memory_belongs_to_tenant(existing, tenant_id):
|
|
839
|
+
return target_id
|
|
840
|
+
return None
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
def _validate_connections(
|
|
844
|
+
memory_id: str,
|
|
845
|
+
scope: list[str],
|
|
846
|
+
raw_connections: list[ConnectionSpec],
|
|
847
|
+
candidate_ids: list[str],
|
|
848
|
+
memories_by_id: dict[str, dict[str, Any]],
|
|
849
|
+
) -> list[ConnectionSpec]:
|
|
850
|
+
validated: list[ConnectionSpec] = []
|
|
851
|
+
for conn in raw_connections:
|
|
852
|
+
target_id = conn.target_id
|
|
853
|
+
description = conn.description.strip()
|
|
854
|
+
if not target_id:
|
|
855
|
+
continue
|
|
856
|
+
if len(validated) >= MAX_GUARDIAN_LINKS:
|
|
857
|
+
break
|
|
858
|
+
target = memories_by_id.get(target_id)
|
|
859
|
+
if not target:
|
|
860
|
+
continue
|
|
861
|
+
if target_id != memory_id and target_id not in candidate_ids:
|
|
862
|
+
overlap = set(scope) & set(target.get("scope") or [])
|
|
863
|
+
if not overlap:
|
|
864
|
+
continue
|
|
865
|
+
if len(description) < 20 or "?" not in description:
|
|
866
|
+
continue
|
|
867
|
+
validated.append(conn)
|
|
868
|
+
return validated
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
def _find_link_ids_between(
|
|
872
|
+
engine: ArthaEngine,
|
|
873
|
+
memory_id_1: str,
|
|
874
|
+
memory_id_2: str,
|
|
875
|
+
) -> list[str]:
|
|
876
|
+
store = engine.store
|
|
877
|
+
if sql_store_has_tables(store):
|
|
878
|
+
rows = sql_store_fetchall(
|
|
879
|
+
store,
|
|
880
|
+
"""
|
|
881
|
+
SELECT link_id FROM memuron_links
|
|
882
|
+
WHERE (source_id = ? AND target_id = ?)
|
|
883
|
+
OR (source_id = ? AND target_id = ?)
|
|
884
|
+
""",
|
|
885
|
+
(memory_id_1, memory_id_2, memory_id_2, memory_id_1),
|
|
886
|
+
)
|
|
887
|
+
return [str(row["link_id"]) for row in rows]
|
|
888
|
+
bucket = getattr(store, "memuron_links", {})
|
|
889
|
+
link_ids: list[str] = []
|
|
890
|
+
for link_id, item in bucket.items():
|
|
891
|
+
src = item["source_id"]
|
|
892
|
+
tgt = item["target_id"]
|
|
893
|
+
if {src, tgt} == {memory_id_1, memory_id_2}:
|
|
894
|
+
link_ids.append(link_id)
|
|
895
|
+
return link_ids
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
def _normalize_link_description(description: str) -> str:
|
|
899
|
+
return " ".join(description.strip().casefold().split())
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
def _find_existing_link(
|
|
903
|
+
engine: ArthaEngine,
|
|
904
|
+
*,
|
|
905
|
+
source_id: str,
|
|
906
|
+
target_id: str,
|
|
907
|
+
description: str,
|
|
908
|
+
) -> dict[str, Any] | None:
|
|
909
|
+
normalized = _normalize_link_description(description)
|
|
910
|
+
for row in _list_link_rows(engine):
|
|
911
|
+
if str(row["source_id"]) != source_id or str(row["target_id"]) != target_id:
|
|
912
|
+
continue
|
|
913
|
+
if _normalize_link_description(str(row.get("description") or "")) == normalized:
|
|
914
|
+
return _link_row_to_dict(row)
|
|
915
|
+
return None
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
def create_memory_link(
|
|
919
|
+
engine: ArthaEngine,
|
|
920
|
+
*,
|
|
921
|
+
source_id: str,
|
|
922
|
+
target_id: str,
|
|
923
|
+
description: str,
|
|
924
|
+
event_metadata: dict[str, object] | None = None,
|
|
925
|
+
) -> tuple[dict[str, Any], bool]:
|
|
926
|
+
_require_memory_projections(engine)
|
|
927
|
+
if not description.strip():
|
|
928
|
+
raise ValueError("description is required")
|
|
929
|
+
get_memory(engine, source_id)
|
|
930
|
+
get_memory(engine, target_id)
|
|
931
|
+
existing = _find_existing_link(
|
|
932
|
+
engine,
|
|
933
|
+
source_id=source_id,
|
|
934
|
+
target_id=target_id,
|
|
935
|
+
description=description,
|
|
936
|
+
)
|
|
937
|
+
if existing:
|
|
938
|
+
return existing, False
|
|
939
|
+
link = _encode_link(
|
|
940
|
+
engine,
|
|
941
|
+
source_id=source_id,
|
|
942
|
+
target_id=target_id,
|
|
943
|
+
description=description.strip(),
|
|
944
|
+
persist=False,
|
|
945
|
+
)
|
|
946
|
+
_append_link_event(
|
|
947
|
+
engine,
|
|
948
|
+
event_type="link.created",
|
|
949
|
+
link=link,
|
|
950
|
+
component="memuron.node.link",
|
|
951
|
+
event_metadata=event_metadata,
|
|
952
|
+
)
|
|
953
|
+
refresh_memory_projections(engine)
|
|
954
|
+
return {
|
|
955
|
+
"link_id": link.artha_id,
|
|
956
|
+
"source_id": source_id,
|
|
957
|
+
"target_id": target_id,
|
|
958
|
+
"description": description.strip(),
|
|
959
|
+
"embedding": list(link.value.embedding),
|
|
960
|
+
}, True
|
|
961
|
+
|
|
962
|
+
|
|
963
|
+
def _remove_link(
|
|
964
|
+
engine: ArthaEngine,
|
|
965
|
+
link_id: str,
|
|
966
|
+
*,
|
|
967
|
+
event_metadata: dict[str, object] | None = None,
|
|
968
|
+
) -> None:
|
|
969
|
+
link = engine.store.get(link_id)
|
|
970
|
+
if link is None:
|
|
971
|
+
return
|
|
972
|
+
if not isinstance(link, MemoryLinkArthaanu):
|
|
973
|
+
link = arthaanu_from_dict(
|
|
974
|
+
arthaanu_to_dict(link),
|
|
975
|
+
type_registry=engine.registry.arthaanu_types,
|
|
976
|
+
)
|
|
977
|
+
engine.semantic_delete(
|
|
978
|
+
link_id,
|
|
979
|
+
event_type="delete",
|
|
980
|
+
component="memuron.unlink",
|
|
981
|
+
metadata={
|
|
982
|
+
"domain_event_type": "link.removed",
|
|
983
|
+
"link_id": link_id,
|
|
984
|
+
**(event_metadata or {}),
|
|
985
|
+
},
|
|
986
|
+
)
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
def _apply_guardian_links(
|
|
990
|
+
engine: ArthaEngine,
|
|
991
|
+
memory_id: str,
|
|
992
|
+
scope: list[str],
|
|
993
|
+
plan: GuardianWritePlan,
|
|
994
|
+
candidate_ids: list[str],
|
|
995
|
+
memories_by_id: dict[str, dict[str, Any]],
|
|
996
|
+
*,
|
|
997
|
+
event_metadata: dict[str, object] | None = None,
|
|
998
|
+
) -> int:
|
|
999
|
+
validated = _validate_connections(
|
|
1000
|
+
memory_id,
|
|
1001
|
+
scope,
|
|
1002
|
+
plan.connections,
|
|
1003
|
+
candidate_ids,
|
|
1004
|
+
memories_by_id,
|
|
1005
|
+
)
|
|
1006
|
+
for conn in validated:
|
|
1007
|
+
link = _encode_link(
|
|
1008
|
+
engine,
|
|
1009
|
+
source_id=memory_id,
|
|
1010
|
+
target_id=conn.target_id,
|
|
1011
|
+
description=conn.description,
|
|
1012
|
+
persist=False,
|
|
1013
|
+
)
|
|
1014
|
+
_append_link_event(
|
|
1015
|
+
engine,
|
|
1016
|
+
event_type="link.created",
|
|
1017
|
+
link=link,
|
|
1018
|
+
component="memuron.ingest.guardian",
|
|
1019
|
+
event_metadata=event_metadata,
|
|
1020
|
+
)
|
|
1021
|
+
|
|
1022
|
+
for pair in plan.links_to_remove:
|
|
1023
|
+
if not isinstance(pair, list) or len(pair) != 2:
|
|
1024
|
+
continue
|
|
1025
|
+
for link_id in _find_link_ids_between(engine, str(pair[0]), str(pair[1])):
|
|
1026
|
+
_remove_link(engine, link_id, event_metadata=event_metadata)
|
|
1027
|
+
return len(validated)
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
def _find_link_candidates(
|
|
1031
|
+
engine: ArthaEngine,
|
|
1032
|
+
content: str,
|
|
1033
|
+
*,
|
|
1034
|
+
tenant_id: str | None,
|
|
1035
|
+
exclude_memory_id: str | None = None,
|
|
1036
|
+
top_k: int = LINK_CANDIDATE_TOP_K,
|
|
1037
|
+
) -> list[tuple[str, float, dict[str, Any]]]:
|
|
1038
|
+
"""Similar memories for linking — org-wide within tenant, not AND-scoped to space tokens."""
|
|
1039
|
+
candidates = find_candidates(engine, content, scope=None, top_k=top_k)
|
|
1040
|
+
candidates = _filter_candidates_by_tenant(candidates, tenant_id)
|
|
1041
|
+
if exclude_memory_id:
|
|
1042
|
+
candidates = [
|
|
1043
|
+
(memory_id, score, memory)
|
|
1044
|
+
for memory_id, score, memory in candidates
|
|
1045
|
+
if memory_id != exclude_memory_id
|
|
1046
|
+
]
|
|
1047
|
+
return candidates
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def _ingest_scope_context(
|
|
1051
|
+
*,
|
|
1052
|
+
scope: list[str] | None,
|
|
1053
|
+
event_metadata: dict[str, object] | None,
|
|
1054
|
+
space_context: dict[str, str] | None,
|
|
1055
|
+
candidate_scope: list[str] | None,
|
|
1056
|
+
) -> tuple[list[str], str | None, list[str]]:
|
|
1057
|
+
request_scope = list(scope or [])
|
|
1058
|
+
if space_context is None and event_metadata:
|
|
1059
|
+
backup = event_metadata.get("space_context")
|
|
1060
|
+
if isinstance(backup, dict) and backup.get("active_space_token"):
|
|
1061
|
+
space_context = backup
|
|
1062
|
+
active_space_token = (space_context or {}).get("active_space_token")
|
|
1063
|
+
tenant_id = _tenant_id_from_metadata(event_metadata)
|
|
1064
|
+
if not tenant_id:
|
|
1065
|
+
for token in request_scope:
|
|
1066
|
+
if str(token).startswith("org:"):
|
|
1067
|
+
tenant_id = str(token)[4:]
|
|
1068
|
+
break
|
|
1069
|
+
if tenant_id:
|
|
1070
|
+
request_scope = merge_org_scope(request_scope, tenant_id)
|
|
1071
|
+
search_scope = list(candidate_scope or request_scope or [])
|
|
1072
|
+
if not search_scope and active_space_token:
|
|
1073
|
+
search_scope = [active_space_token]
|
|
1074
|
+
if tenant_id and not any(str(token).startswith("org:") for token in search_scope):
|
|
1075
|
+
search_scope = merge_org_scope(search_scope, tenant_id)
|
|
1076
|
+
return request_scope, tenant_id, search_scope
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
def _auto_link_neighbors_by_similarity(
|
|
1080
|
+
engine: ArthaEngine,
|
|
1081
|
+
memory_id: str,
|
|
1082
|
+
content: str,
|
|
1083
|
+
*,
|
|
1084
|
+
scope: list[str] | None = None,
|
|
1085
|
+
event_metadata: dict[str, object] | None = None,
|
|
1086
|
+
tenant_id: str | None = None,
|
|
1087
|
+
min_score: float = AUTO_LINK_MIN_SCORE,
|
|
1088
|
+
top_k: int = LINK_CANDIDATE_TOP_K,
|
|
1089
|
+
) -> int:
|
|
1090
|
+
"""Create semantic links from an existing memory to similar neighbors (Guardian fallback)."""
|
|
1091
|
+
_require_memory_projections(engine)
|
|
1092
|
+
request_scope, resolved_tenant, _search_scope = _ingest_scope_context(
|
|
1093
|
+
scope=scope,
|
|
1094
|
+
event_metadata=event_metadata,
|
|
1095
|
+
space_context=None,
|
|
1096
|
+
candidate_scope=None,
|
|
1097
|
+
)
|
|
1098
|
+
tenant_id = tenant_id or resolved_tenant
|
|
1099
|
+
candidates = _find_link_candidates(
|
|
1100
|
+
engine,
|
|
1101
|
+
content,
|
|
1102
|
+
tenant_id=tenant_id,
|
|
1103
|
+
exclude_memory_id=memory_id,
|
|
1104
|
+
top_k=top_k,
|
|
1105
|
+
)
|
|
1106
|
+
candidates = [
|
|
1107
|
+
(candidate_id, score, memory)
|
|
1108
|
+
for candidate_id, score, memory in candidates
|
|
1109
|
+
if score >= min_score
|
|
1110
|
+
]
|
|
1111
|
+
if not candidates:
|
|
1112
|
+
return 0
|
|
1113
|
+
candidate_ids = [candidate_id for candidate_id, _score, _memory in candidates]
|
|
1114
|
+
memories_by_id = {candidate_id: memory for candidate_id, _score, memory in candidates}
|
|
1115
|
+
connections: list[ConnectionSpec] = []
|
|
1116
|
+
for candidate_id, score, memory in candidates[:MAX_GUARDIAN_LINKS]:
|
|
1117
|
+
snippet = str(memory.get("content", "")).strip()[:80] or candidate_id
|
|
1118
|
+
connections.append(
|
|
1119
|
+
ConnectionSpec(
|
|
1120
|
+
target_id=candidate_id,
|
|
1121
|
+
description=(
|
|
1122
|
+
f"How does this memory relate to '{snippet}'? "
|
|
1123
|
+
f"What shared context connects them (similarity {score:.2f})?"
|
|
1124
|
+
),
|
|
1125
|
+
)
|
|
1126
|
+
)
|
|
1127
|
+
plan = GuardianWritePlan(
|
|
1128
|
+
action="create",
|
|
1129
|
+
target_memory_id=None,
|
|
1130
|
+
reasoning="auto-linked by embedding similarity",
|
|
1131
|
+
final_content=content,
|
|
1132
|
+
scope=request_scope,
|
|
1133
|
+
connections=connections,
|
|
1134
|
+
links_to_remove=[],
|
|
1135
|
+
)
|
|
1136
|
+
_apply_guardian_links(
|
|
1137
|
+
engine,
|
|
1138
|
+
memory_id,
|
|
1139
|
+
request_scope,
|
|
1140
|
+
plan,
|
|
1141
|
+
candidate_ids,
|
|
1142
|
+
memories_by_id,
|
|
1143
|
+
event_metadata=event_metadata,
|
|
1144
|
+
)
|
|
1145
|
+
refresh_memory_projections(engine)
|
|
1146
|
+
return len(connections)
|
|
1147
|
+
|
|
1148
|
+
|
|
1149
|
+
async def link_memory_with_guardian(
|
|
1150
|
+
engine: ArthaEngine,
|
|
1151
|
+
guardian: AgnoGuardian,
|
|
1152
|
+
*,
|
|
1153
|
+
memory_id: str,
|
|
1154
|
+
content: str,
|
|
1155
|
+
scope: list[str] | None = None,
|
|
1156
|
+
event_metadata: dict[str, object] | None = None,
|
|
1157
|
+
space_context: dict[str, str] | None = None,
|
|
1158
|
+
candidate_scope: list[str] | None = None, # noqa: ARG001 — ignored; linking is org-wide
|
|
1159
|
+
) -> int:
|
|
1160
|
+
"""Run Guardian link planning for an existing memory without changing its content."""
|
|
1161
|
+
_require_memory_projections(engine)
|
|
1162
|
+
request_scope, tenant_id, _search_scope = _ingest_scope_context(
|
|
1163
|
+
scope=scope,
|
|
1164
|
+
event_metadata=event_metadata,
|
|
1165
|
+
space_context=space_context,
|
|
1166
|
+
candidate_scope=None,
|
|
1167
|
+
)
|
|
1168
|
+
candidates = _find_link_candidates(
|
|
1169
|
+
engine,
|
|
1170
|
+
content,
|
|
1171
|
+
tenant_id=tenant_id,
|
|
1172
|
+
exclude_memory_id=memory_id,
|
|
1173
|
+
)
|
|
1174
|
+
if not candidates:
|
|
1175
|
+
return 0
|
|
1176
|
+
candidate_ids = [candidate_id for candidate_id, _score, _memory in candidates]
|
|
1177
|
+
memories_by_id = {candidate_id: memory for candidate_id, _score, memory in candidates}
|
|
1178
|
+
try:
|
|
1179
|
+
memories_by_id.setdefault(memory_id, get_memory(engine, memory_id))
|
|
1180
|
+
except KeyError:
|
|
1181
|
+
pass
|
|
1182
|
+
if tenant_id:
|
|
1183
|
+
for row in _list_memory_rows(engine):
|
|
1184
|
+
memory = _row_to_memory_dict(row)
|
|
1185
|
+
if _memory_belongs_to_tenant(memory, tenant_id):
|
|
1186
|
+
memories_by_id.setdefault(row["artha_id"], memory)
|
|
1187
|
+
|
|
1188
|
+
if not settings.openrouter_api_key:
|
|
1189
|
+
return _auto_link_neighbors_by_similarity(
|
|
1190
|
+
engine,
|
|
1191
|
+
memory_id,
|
|
1192
|
+
content,
|
|
1193
|
+
scope=request_scope,
|
|
1194
|
+
event_metadata=event_metadata,
|
|
1195
|
+
tenant_id=tenant_id,
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
linked = 0
|
|
1199
|
+
try:
|
|
1200
|
+
plan = await guardian.plan_write(content, candidates, space_context=space_context)
|
|
1201
|
+
merged_scope = list(request_scope)
|
|
1202
|
+
for token in plan.scope or []:
|
|
1203
|
+
if token not in merged_scope:
|
|
1204
|
+
merged_scope.append(token)
|
|
1205
|
+
if space_context:
|
|
1206
|
+
merged_scope = apply_guardian_space_scope(merged_scope, space_context=space_context)
|
|
1207
|
+
if tenant_id:
|
|
1208
|
+
merged_scope = merge_org_scope(merged_scope, tenant_id)
|
|
1209
|
+
linked = _apply_guardian_links(
|
|
1210
|
+
engine,
|
|
1211
|
+
memory_id,
|
|
1212
|
+
merged_scope,
|
|
1213
|
+
plan,
|
|
1214
|
+
candidate_ids,
|
|
1215
|
+
memories_by_id,
|
|
1216
|
+
event_metadata=event_metadata,
|
|
1217
|
+
)
|
|
1218
|
+
except GuardianError:
|
|
1219
|
+
linked = 0
|
|
1220
|
+
|
|
1221
|
+
if linked == 0:
|
|
1222
|
+
linked = _auto_link_neighbors_by_similarity(
|
|
1223
|
+
engine,
|
|
1224
|
+
memory_id,
|
|
1225
|
+
content,
|
|
1226
|
+
scope=request_scope,
|
|
1227
|
+
event_metadata=event_metadata,
|
|
1228
|
+
tenant_id=tenant_id,
|
|
1229
|
+
)
|
|
1230
|
+
|
|
1231
|
+
refresh_memory_projections(engine)
|
|
1232
|
+
return linked
|
|
1233
|
+
|
|
1234
|
+
|
|
1235
|
+
async def ingest_memory(
|
|
1236
|
+
engine: ArthaEngine,
|
|
1237
|
+
guardian: AgnoGuardian,
|
|
1238
|
+
*,
|
|
1239
|
+
content: str,
|
|
1240
|
+
scope: list[str] | None = None,
|
|
1241
|
+
metadata: dict[str, Any] | None = None,
|
|
1242
|
+
external_identity: dict[str, str] | None = None,
|
|
1243
|
+
event_metadata: dict[str, object] | None = None,
|
|
1244
|
+
space_context: dict[str, str] | None = None,
|
|
1245
|
+
candidate_scope: list[str] | None = None,
|
|
1246
|
+
) -> dict[str, Any]:
|
|
1247
|
+
_require_memory_projections(engine)
|
|
1248
|
+
metadata = metadata or {}
|
|
1249
|
+
external_identity = {
|
|
1250
|
+
**_external_identity_from_metadata(metadata),
|
|
1251
|
+
**external_identity_from_values(**(external_identity or {})),
|
|
1252
|
+
}
|
|
1253
|
+
ingest_event_metadata = {**(event_metadata or {}), **external_identity}
|
|
1254
|
+
request_scope, tenant_id, search_scope = _ingest_scope_context(
|
|
1255
|
+
scope=scope,
|
|
1256
|
+
event_metadata=ingest_event_metadata,
|
|
1257
|
+
space_context=space_context,
|
|
1258
|
+
candidate_scope=candidate_scope,
|
|
1259
|
+
)
|
|
1260
|
+
candidates = find_candidates(engine, content, scope=search_scope or None)
|
|
1261
|
+
candidates = _filter_candidates_by_tenant(candidates, tenant_id)
|
|
1262
|
+
external_match = _find_external_identity_match(
|
|
1263
|
+
engine,
|
|
1264
|
+
external_identity=external_identity,
|
|
1265
|
+
tenant_id=tenant_id,
|
|
1266
|
+
scope=request_scope,
|
|
1267
|
+
space_context=space_context,
|
|
1268
|
+
node_type="text",
|
|
1269
|
+
)
|
|
1270
|
+
if external_match is not None and all(
|
|
1271
|
+
memory_id != external_match["id"] for memory_id, _score, _memory in candidates
|
|
1272
|
+
):
|
|
1273
|
+
candidates.insert(0, (str(external_match["id"]), 1.0, external_match))
|
|
1274
|
+
candidate_ids = [memory_id for memory_id, _score, _memory in candidates]
|
|
1275
|
+
memories_by_id = {memory_id: memory for memory_id, _score, memory in candidates}
|
|
1276
|
+
if tenant_id:
|
|
1277
|
+
for row in _list_memory_rows(engine):
|
|
1278
|
+
memory = _row_to_memory_dict(row)
|
|
1279
|
+
if _memory_belongs_to_tenant(memory, tenant_id):
|
|
1280
|
+
memories_by_id.setdefault(row["artha_id"], memory)
|
|
1281
|
+
else:
|
|
1282
|
+
for row in _list_memory_rows(engine):
|
|
1283
|
+
memories_by_id.setdefault(row["artha_id"], _row_to_memory_dict(row))
|
|
1284
|
+
|
|
1285
|
+
plan = await guardian.plan_write(content, candidates, space_context=space_context)
|
|
1286
|
+
merged_scope = list(request_scope)
|
|
1287
|
+
for token in plan.scope or []:
|
|
1288
|
+
if token not in merged_scope:
|
|
1289
|
+
merged_scope.append(token)
|
|
1290
|
+
if space_context:
|
|
1291
|
+
merged_scope = apply_guardian_space_scope(merged_scope, space_context=space_context)
|
|
1292
|
+
action = plan.action
|
|
1293
|
+
external_target_id = str(external_match["id"]) if external_match is not None else None
|
|
1294
|
+
if action == "create" and external_target_id:
|
|
1295
|
+
action = "update"
|
|
1296
|
+
|
|
1297
|
+
def _final_scope() -> list[str]:
|
|
1298
|
+
scoped = list(merged_scope)
|
|
1299
|
+
if tenant_id:
|
|
1300
|
+
scoped = merge_org_scope(scoped, tenant_id)
|
|
1301
|
+
if space_context:
|
|
1302
|
+
scoped = apply_guardian_space_scope(scoped, space_context=space_context)
|
|
1303
|
+
return scoped
|
|
1304
|
+
|
|
1305
|
+
if action == "update":
|
|
1306
|
+
target_id = _resolve_update_target(
|
|
1307
|
+
engine,
|
|
1308
|
+
target_id=external_target_id or plan.target_memory_id,
|
|
1309
|
+
tenant_id=tenant_id,
|
|
1310
|
+
candidates=candidates,
|
|
1311
|
+
)
|
|
1312
|
+
if not target_id:
|
|
1313
|
+
action = "create"
|
|
1314
|
+
else:
|
|
1315
|
+
existing_raw = engine.store.get(target_id)
|
|
1316
|
+
if existing_raw is None:
|
|
1317
|
+
action = "create"
|
|
1318
|
+
else:
|
|
1319
|
+
existing = _coerce_memory_arthaanu(engine, existing_raw)
|
|
1320
|
+
if not _guardian_may_update_target(existing):
|
|
1321
|
+
action = "create"
|
|
1322
|
+
else:
|
|
1323
|
+
rich = _rich_fields_from_memory(existing)
|
|
1324
|
+
retrieval_count = existing.value.retrieval_count
|
|
1325
|
+
memory = _encode_memory(
|
|
1326
|
+
engine,
|
|
1327
|
+
content=plan.final_content,
|
|
1328
|
+
scope=_final_scope(),
|
|
1329
|
+
node_type=rich["node_type"],
|
|
1330
|
+
payload=rich["payload"],
|
|
1331
|
+
perception=plan.final_content,
|
|
1332
|
+
encoding=rich["encoding"],
|
|
1333
|
+
metadata=_metadata_with_external_identity(
|
|
1334
|
+
_merge_memory_metadata(rich["metadata"], metadata),
|
|
1335
|
+
external_identity,
|
|
1336
|
+
),
|
|
1337
|
+
artha_id=target_id,
|
|
1338
|
+
retrieval_count=retrieval_count,
|
|
1339
|
+
persist=False,
|
|
1340
|
+
)
|
|
1341
|
+
_append_memory_event(
|
|
1342
|
+
engine,
|
|
1343
|
+
event_type="memory.updated",
|
|
1344
|
+
memory=memory,
|
|
1345
|
+
component="memuron.ingest.guardian",
|
|
1346
|
+
extra_payload={
|
|
1347
|
+
"reasoning": plan.reasoning,
|
|
1348
|
+
"idempotency_match": bool(external_target_id),
|
|
1349
|
+
},
|
|
1350
|
+
event_metadata=ingest_event_metadata,
|
|
1351
|
+
)
|
|
1352
|
+
memory_id = target_id
|
|
1353
|
+
result_action = "updated"
|
|
1354
|
+
|
|
1355
|
+
if action == "create":
|
|
1356
|
+
memory = _encode_memory(
|
|
1357
|
+
engine,
|
|
1358
|
+
content=plan.final_content,
|
|
1359
|
+
scope=_final_scope(),
|
|
1360
|
+
metadata=_metadata_with_external_identity(metadata, external_identity),
|
|
1361
|
+
persist=False,
|
|
1362
|
+
)
|
|
1363
|
+
_append_memory_event(
|
|
1364
|
+
engine,
|
|
1365
|
+
event_type="memory.created",
|
|
1366
|
+
memory=memory,
|
|
1367
|
+
component="memuron.ingest.guardian",
|
|
1368
|
+
extra_payload={"reasoning": plan.reasoning},
|
|
1369
|
+
event_metadata=ingest_event_metadata,
|
|
1370
|
+
)
|
|
1371
|
+
memory_id = memory.artha_id
|
|
1372
|
+
result_action = "created"
|
|
1373
|
+
|
|
1374
|
+
final_scope = _final_scope()
|
|
1375
|
+
memories_by_id.setdefault(memory_id, memory_payload(engine, memory_id))
|
|
1376
|
+
_apply_guardian_links(
|
|
1377
|
+
engine,
|
|
1378
|
+
memory_id,
|
|
1379
|
+
final_scope,
|
|
1380
|
+
plan,
|
|
1381
|
+
candidate_ids,
|
|
1382
|
+
memories_by_id,
|
|
1383
|
+
event_metadata=ingest_event_metadata,
|
|
1384
|
+
)
|
|
1385
|
+
refresh_memory_projections(engine)
|
|
1386
|
+
payload = memory_payload(engine, memory_id)
|
|
1387
|
+
return {
|
|
1388
|
+
"status": "success",
|
|
1389
|
+
"memory_id": memory_id,
|
|
1390
|
+
"action": result_action,
|
|
1391
|
+
"memory": payload,
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
def _linked_memory_ids(engine: ArthaEngine, memory_id: str) -> list[str]:
|
|
1396
|
+
store = engine.store
|
|
1397
|
+
if sql_store_has_tables(store):
|
|
1398
|
+
rows = sql_store_fetchall(
|
|
1399
|
+
store,
|
|
1400
|
+
"""
|
|
1401
|
+
SELECT source_id, target_id FROM memuron_links
|
|
1402
|
+
WHERE source_id = ? OR target_id = ?
|
|
1403
|
+
""",
|
|
1404
|
+
(memory_id, memory_id),
|
|
1405
|
+
)
|
|
1406
|
+
else:
|
|
1407
|
+
rows = []
|
|
1408
|
+
for item in getattr(store, "memuron_links", {}).values():
|
|
1409
|
+
if item["source_id"] == memory_id or item["target_id"] == memory_id:
|
|
1410
|
+
rows.append(item)
|
|
1411
|
+
linked: set[str] = set()
|
|
1412
|
+
for row in rows:
|
|
1413
|
+
source_id = row["source_id"]
|
|
1414
|
+
target_id = row["target_id"]
|
|
1415
|
+
if source_id != memory_id:
|
|
1416
|
+
linked.add(source_id)
|
|
1417
|
+
if target_id != memory_id:
|
|
1418
|
+
linked.add(target_id)
|
|
1419
|
+
return sorted(linked)
|
|
1420
|
+
|
|
1421
|
+
|
|
1422
|
+
def _evolution_history(engine: ArthaEngine, memory_id: str) -> list[dict[str, Any]]:
|
|
1423
|
+
list_events = getattr(engine.store, "list_events", None)
|
|
1424
|
+
if not callable(list_events):
|
|
1425
|
+
return []
|
|
1426
|
+
events = list_events(artha_id=memory_id, limit=100)
|
|
1427
|
+
history: list[dict[str, Any]] = []
|
|
1428
|
+
for event in reversed(events):
|
|
1429
|
+
event_type = str(event.get("event_type", ""))
|
|
1430
|
+
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
|
|
1431
|
+
domain_event_type = str(payload.get("domain_event_type") or "")
|
|
1432
|
+
if event_type == "delete" and domain_event_type:
|
|
1433
|
+
event_type = domain_event_type
|
|
1434
|
+
if event_type not in {"memory.created", "memory.updated", "memory.deleted"}:
|
|
1435
|
+
continue
|
|
1436
|
+
history.append(
|
|
1437
|
+
{
|
|
1438
|
+
"event_type": event_type,
|
|
1439
|
+
"timestamp": event.get("created_at"),
|
|
1440
|
+
"component": event.get("component"),
|
|
1441
|
+
"reasoning": payload.get("reasoning"),
|
|
1442
|
+
"actor_id": payload.get("actor_id"),
|
|
1443
|
+
"tenant_id": payload.get("tenant_id"),
|
|
1444
|
+
}
|
|
1445
|
+
)
|
|
1446
|
+
return history
|
|
1447
|
+
|
|
1448
|
+
|
|
1449
|
+
def memory_payload(engine: ArthaEngine, memory_id: str) -> dict[str, Any]:
|
|
1450
|
+
rows = [
|
|
1451
|
+
row for row in _list_memory_rows(engine) if row["artha_id"] == memory_id
|
|
1452
|
+
]
|
|
1453
|
+
if rows:
|
|
1454
|
+
memory = _row_to_memory_dict(rows[0])
|
|
1455
|
+
else:
|
|
1456
|
+
item = engine.store.require(memory_id)
|
|
1457
|
+
if not isinstance(item, MemoryArthaanu):
|
|
1458
|
+
item = arthaanu_from_dict(
|
|
1459
|
+
arthaanu_to_dict(item),
|
|
1460
|
+
type_registry=engine.registry.arthaanu_types,
|
|
1461
|
+
)
|
|
1462
|
+
memory = {
|
|
1463
|
+
"id": item.artha_id,
|
|
1464
|
+
"content": item.value.content,
|
|
1465
|
+
"node_type": getattr(item.value, "node_type", "text"),
|
|
1466
|
+
"payload": getattr(item.value, "payload", {}),
|
|
1467
|
+
"perception": getattr(item.value, "perception", None) or item.value.content,
|
|
1468
|
+
"encoding": getattr(item.value, "encoding", "memory"),
|
|
1469
|
+
"metadata": getattr(item.value, "metadata", {}),
|
|
1470
|
+
"scope": list(item.value.scope),
|
|
1471
|
+
"embedding": list(item.value.embedding),
|
|
1472
|
+
"timestamp": _now_stamp(),
|
|
1473
|
+
}
|
|
1474
|
+
node_type = str(memory.get("node_type") or "text")
|
|
1475
|
+
content = str(memory["content"])
|
|
1476
|
+
preview = " ".join(content.split())
|
|
1477
|
+
payload = memory.get("payload") or {}
|
|
1478
|
+
external_identity = {
|
|
1479
|
+
**_external_identity_from_payload(payload if isinstance(payload, dict) else {}),
|
|
1480
|
+
**_external_identity_from_metadata(memory.get("metadata")),
|
|
1481
|
+
}
|
|
1482
|
+
if isinstance(payload, dict) and node_type == "collection" and memory.get("encoding") == "document_collection":
|
|
1483
|
+
source_placement = next(
|
|
1484
|
+
(
|
|
1485
|
+
_placement_row_to_dict(row)
|
|
1486
|
+
for row in _list_placement_rows(engine)
|
|
1487
|
+
if str(row["parent_id"]) == memory_id
|
|
1488
|
+
and (
|
|
1489
|
+
_parse_json_field(row.get("metadata_json"), {}).get("role")
|
|
1490
|
+
if isinstance(_parse_json_field(row.get("metadata_json"), {}), dict)
|
|
1491
|
+
else None
|
|
1492
|
+
)
|
|
1493
|
+
== "source"
|
|
1494
|
+
),
|
|
1495
|
+
None,
|
|
1496
|
+
)
|
|
1497
|
+
document_id = (
|
|
1498
|
+
str(source_placement["child_id"])
|
|
1499
|
+
if source_placement
|
|
1500
|
+
else payload.get("document_id")
|
|
1501
|
+
)
|
|
1502
|
+
payload = {
|
|
1503
|
+
"collection_kind": "document",
|
|
1504
|
+
"document_id": document_id,
|
|
1505
|
+
"document": payload.get("document")
|
|
1506
|
+
or {
|
|
1507
|
+
"id": document_id,
|
|
1508
|
+
"file_name": payload.get("name"),
|
|
1509
|
+
"source_type": payload.get("source_type"),
|
|
1510
|
+
"chunk_count": payload.get("chunk_count", 0),
|
|
1511
|
+
"image_count": payload.get("image_count", 0),
|
|
1512
|
+
"graph_image_count": payload.get("graph_image_count", 0),
|
|
1513
|
+
},
|
|
1514
|
+
**payload,
|
|
1515
|
+
}
|
|
1516
|
+
return {
|
|
1517
|
+
"id": memory["id"],
|
|
1518
|
+
"content": content,
|
|
1519
|
+
"preview": preview[:240] + ("..." if len(preview) > 240 else ""),
|
|
1520
|
+
"content_length": len(content),
|
|
1521
|
+
"truncated": len(preview) > 240,
|
|
1522
|
+
"type": node_type,
|
|
1523
|
+
"node_type": node_type,
|
|
1524
|
+
"payload": payload,
|
|
1525
|
+
"perception": memory.get("perception") or content,
|
|
1526
|
+
"encoding": memory.get("encoding") or "memory",
|
|
1527
|
+
"metadata": memory.get("metadata") or {},
|
|
1528
|
+
**{key: external_identity.get(key) for key in EXTERNAL_ID_FIELDS},
|
|
1529
|
+
"scope": memory.get("scope") or [],
|
|
1530
|
+
"links": _linked_memory_ids(engine, memory_id),
|
|
1531
|
+
"evolution_history": _evolution_history(engine, memory_id),
|
|
1532
|
+
"retrieval_count": 0,
|
|
1533
|
+
"timestamp": memory.get("timestamp") or _now_stamp(),
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
|
|
1537
|
+
def get_memory(engine: ArthaEngine, memory_id: str) -> dict[str, Any]:
|
|
1538
|
+
if engine.store.get(memory_id) is None:
|
|
1539
|
+
rows = [row for row in _list_memory_rows(engine) if row["artha_id"] == memory_id]
|
|
1540
|
+
if not rows:
|
|
1541
|
+
raise KeyError(f"Memory not found: {memory_id}")
|
|
1542
|
+
return memory_payload(engine, memory_id)
|
|
1543
|
+
|
|
1544
|
+
|
|
1545
|
+
def get_memories(engine: ArthaEngine, memory_ids: list[str]) -> list[dict[str, Any]]:
|
|
1546
|
+
memories: list[dict[str, Any]] = []
|
|
1547
|
+
for memory_id in memory_ids:
|
|
1548
|
+
try:
|
|
1549
|
+
memories.append(get_memory(engine, memory_id))
|
|
1550
|
+
except KeyError:
|
|
1551
|
+
continue
|
|
1552
|
+
return memories
|
|
1553
|
+
|
|
1554
|
+
|
|
1555
|
+
def update_memory(
|
|
1556
|
+
engine: ArthaEngine,
|
|
1557
|
+
memory_id: str,
|
|
1558
|
+
*,
|
|
1559
|
+
content: str | None = None,
|
|
1560
|
+
scope: list[str] | None = None,
|
|
1561
|
+
event_metadata: dict[str, object] | None = None,
|
|
1562
|
+
) -> dict[str, Any]:
|
|
1563
|
+
existing = _coerce_memory_arthaanu(engine, engine.store.require(memory_id))
|
|
1564
|
+
rich = _rich_fields_from_memory(existing)
|
|
1565
|
+
new_scope = scope if scope is not None else list(existing.value.scope)
|
|
1566
|
+
if content is not None:
|
|
1567
|
+
new_content = content
|
|
1568
|
+
new_perception = content
|
|
1569
|
+
else:
|
|
1570
|
+
new_content = existing.value.content
|
|
1571
|
+
new_perception = rich["perception"]
|
|
1572
|
+
memory = _encode_memory(
|
|
1573
|
+
engine,
|
|
1574
|
+
content=new_content,
|
|
1575
|
+
scope=new_scope,
|
|
1576
|
+
node_type=rich["node_type"],
|
|
1577
|
+
payload=rich["payload"],
|
|
1578
|
+
perception=new_perception,
|
|
1579
|
+
encoding=rich["encoding"],
|
|
1580
|
+
metadata=rich["metadata"],
|
|
1581
|
+
artha_id=memory_id,
|
|
1582
|
+
retrieval_count=existing.value.retrieval_count,
|
|
1583
|
+
persist=False,
|
|
1584
|
+
)
|
|
1585
|
+
_append_memory_event(
|
|
1586
|
+
engine,
|
|
1587
|
+
event_type="memory.updated",
|
|
1588
|
+
memory=memory,
|
|
1589
|
+
component="memuron.update",
|
|
1590
|
+
event_metadata=event_metadata,
|
|
1591
|
+
)
|
|
1592
|
+
if scope is not None and rich["node_type"] == "collection":
|
|
1593
|
+
_propagate_parent_scope_to_children(
|
|
1594
|
+
engine,
|
|
1595
|
+
parent_id=memory_id,
|
|
1596
|
+
parent_scope=new_scope,
|
|
1597
|
+
event_metadata=event_metadata,
|
|
1598
|
+
)
|
|
1599
|
+
refresh_memory_projections(engine)
|
|
1600
|
+
return memory_payload(engine, memory_id)
|
|
1601
|
+
|
|
1602
|
+
|
|
1603
|
+
def _scope_tokens(scope: list[str], prefix: str) -> list[str]:
|
|
1604
|
+
return [token for token in scope if token.startswith(prefix)]
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
def _merge_inherited_scope(child_scope: list[str], parent_scope: list[str]) -> list[str]:
|
|
1608
|
+
parent_org = _scope_tokens(parent_scope, "org:")
|
|
1609
|
+
parent_space = _scope_tokens(parent_scope, "space.")
|
|
1610
|
+
if not parent_org and not parent_space:
|
|
1611
|
+
return list(child_scope)
|
|
1612
|
+
inherited = [*parent_org, *parent_space]
|
|
1613
|
+
output = [
|
|
1614
|
+
token
|
|
1615
|
+
for token in child_scope
|
|
1616
|
+
if not token.startswith("org:") and not token.startswith("space.")
|
|
1617
|
+
]
|
|
1618
|
+
for token in inherited:
|
|
1619
|
+
if token not in output:
|
|
1620
|
+
output.append(token)
|
|
1621
|
+
return output
|
|
1622
|
+
|
|
1623
|
+
|
|
1624
|
+
def _append_scope_update(
|
|
1625
|
+
engine: ArthaEngine,
|
|
1626
|
+
*,
|
|
1627
|
+
memory_id: str,
|
|
1628
|
+
scope: list[str],
|
|
1629
|
+
component: str,
|
|
1630
|
+
event_metadata: dict[str, object] | None = None,
|
|
1631
|
+
) -> dict[str, Any]:
|
|
1632
|
+
existing = _coerce_memory_arthaanu(engine, engine.store.require(memory_id))
|
|
1633
|
+
if list(existing.value.scope) == scope:
|
|
1634
|
+
return memory_payload(engine, memory_id)
|
|
1635
|
+
rich = _rich_fields_from_memory(existing)
|
|
1636
|
+
memory = _encode_memory(
|
|
1637
|
+
engine,
|
|
1638
|
+
content=existing.value.content,
|
|
1639
|
+
scope=scope,
|
|
1640
|
+
node_type=rich["node_type"],
|
|
1641
|
+
payload=rich["payload"],
|
|
1642
|
+
perception=rich["perception"],
|
|
1643
|
+
encoding=rich["encoding"],
|
|
1644
|
+
metadata=rich["metadata"],
|
|
1645
|
+
artha_id=memory_id,
|
|
1646
|
+
retrieval_count=existing.value.retrieval_count,
|
|
1647
|
+
persist=False,
|
|
1648
|
+
)
|
|
1649
|
+
_append_memory_event(
|
|
1650
|
+
engine,
|
|
1651
|
+
event_type="memory.updated",
|
|
1652
|
+
memory=memory,
|
|
1653
|
+
component=component,
|
|
1654
|
+
extra_payload={"reason": "placement_scope_inheritance"},
|
|
1655
|
+
event_metadata=event_metadata,
|
|
1656
|
+
)
|
|
1657
|
+
return {
|
|
1658
|
+
"id": memory_id,
|
|
1659
|
+
"scope": scope,
|
|
1660
|
+
"node_type": rich["node_type"],
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
|
|
1664
|
+
def _child_placements(engine: ArthaEngine, parent_id: str) -> list[dict[str, Any]]:
|
|
1665
|
+
placements = []
|
|
1666
|
+
for row in _list_placement_rows(engine):
|
|
1667
|
+
placement = _placement_row_to_dict(row)
|
|
1668
|
+
if str(placement["parent_id"]) == parent_id:
|
|
1669
|
+
placements.append(placement)
|
|
1670
|
+
return placements
|
|
1671
|
+
|
|
1672
|
+
|
|
1673
|
+
def _append_placement_scope_update(
|
|
1674
|
+
engine: ArthaEngine,
|
|
1675
|
+
*,
|
|
1676
|
+
placement: dict[str, Any],
|
|
1677
|
+
parent_scope: list[str],
|
|
1678
|
+
event_metadata: dict[str, object] | None = None,
|
|
1679
|
+
) -> dict[str, Any]:
|
|
1680
|
+
new_scope = _merge_inherited_scope(
|
|
1681
|
+
list(placement.get("scope") or []),
|
|
1682
|
+
parent_scope,
|
|
1683
|
+
)
|
|
1684
|
+
if new_scope == list(placement.get("scope") or []):
|
|
1685
|
+
return placement
|
|
1686
|
+
updated = engine.encode(
|
|
1687
|
+
"memory_placement",
|
|
1688
|
+
{
|
|
1689
|
+
"artha_id": placement["id"],
|
|
1690
|
+
"parent_id": placement["parent_id"],
|
|
1691
|
+
"child_id": placement["child_id"],
|
|
1692
|
+
"name": placement["name"],
|
|
1693
|
+
"scope": new_scope,
|
|
1694
|
+
"metadata": placement.get("metadata") or {},
|
|
1695
|
+
"inherit_parent_scope": True,
|
|
1696
|
+
},
|
|
1697
|
+
persist=False,
|
|
1698
|
+
)
|
|
1699
|
+
engine.store.append_event(
|
|
1700
|
+
event_type="placement.created",
|
|
1701
|
+
arthaanu=updated,
|
|
1702
|
+
component="memuron.placement_scope",
|
|
1703
|
+
payload={
|
|
1704
|
+
**(event_metadata or {}),
|
|
1705
|
+
"reason": "placement_scope_inheritance",
|
|
1706
|
+
},
|
|
1707
|
+
)
|
|
1708
|
+
return {**placement, "scope": new_scope}
|
|
1709
|
+
|
|
1710
|
+
|
|
1711
|
+
def _propagate_parent_scope_to_children(
|
|
1712
|
+
engine: ArthaEngine,
|
|
1713
|
+
*,
|
|
1714
|
+
parent_id: str,
|
|
1715
|
+
parent_scope: list[str],
|
|
1716
|
+
event_metadata: dict[str, object] | None = None,
|
|
1717
|
+
) -> list[dict[str, Any]]:
|
|
1718
|
+
changed: list[dict[str, Any]] = []
|
|
1719
|
+
for placement in _child_placements(engine, parent_id):
|
|
1720
|
+
if not placement.get("inherit_parent_scope", True):
|
|
1721
|
+
continue
|
|
1722
|
+
placement = _append_placement_scope_update(
|
|
1723
|
+
engine,
|
|
1724
|
+
placement=placement,
|
|
1725
|
+
parent_scope=parent_scope,
|
|
1726
|
+
event_metadata=event_metadata,
|
|
1727
|
+
)
|
|
1728
|
+
changed.extend(
|
|
1729
|
+
_propagate_scope_to_node_and_descendants(
|
|
1730
|
+
engine,
|
|
1731
|
+
node_id=str(placement["child_id"]),
|
|
1732
|
+
parent_scope=parent_scope,
|
|
1733
|
+
event_metadata=event_metadata,
|
|
1734
|
+
)
|
|
1735
|
+
)
|
|
1736
|
+
return changed
|
|
1737
|
+
|
|
1738
|
+
|
|
1739
|
+
def _propagate_scope_to_node_and_descendants(
|
|
1740
|
+
engine: ArthaEngine,
|
|
1741
|
+
*,
|
|
1742
|
+
node_id: str,
|
|
1743
|
+
parent_scope: list[str],
|
|
1744
|
+
event_metadata: dict[str, object] | None = None,
|
|
1745
|
+
) -> list[dict[str, Any]]:
|
|
1746
|
+
try:
|
|
1747
|
+
child = _coerce_memory_arthaanu(engine, engine.store.require(node_id))
|
|
1748
|
+
except KeyError:
|
|
1749
|
+
return []
|
|
1750
|
+
new_scope = _merge_inherited_scope(list(child.value.scope), parent_scope)
|
|
1751
|
+
changed: list[dict[str, Any]] = []
|
|
1752
|
+
child_result = _append_scope_update(
|
|
1753
|
+
engine,
|
|
1754
|
+
memory_id=node_id,
|
|
1755
|
+
scope=new_scope,
|
|
1756
|
+
component="memuron.placement_scope",
|
|
1757
|
+
event_metadata=event_metadata,
|
|
1758
|
+
)
|
|
1759
|
+
if list(child.value.scope) != new_scope:
|
|
1760
|
+
changed.append(child_result)
|
|
1761
|
+
changed.extend(
|
|
1762
|
+
_propagate_parent_scope_to_children(
|
|
1763
|
+
engine,
|
|
1764
|
+
parent_id=node_id,
|
|
1765
|
+
parent_scope=new_scope,
|
|
1766
|
+
event_metadata=event_metadata,
|
|
1767
|
+
)
|
|
1768
|
+
)
|
|
1769
|
+
return changed
|
|
1770
|
+
|
|
1771
|
+
|
|
1772
|
+
def delete_memory(
|
|
1773
|
+
engine: ArthaEngine,
|
|
1774
|
+
memory_id: str,
|
|
1775
|
+
*,
|
|
1776
|
+
event_metadata: dict[str, object] | None = None,
|
|
1777
|
+
) -> bool:
|
|
1778
|
+
existing = engine.store.get(memory_id)
|
|
1779
|
+
if existing is None:
|
|
1780
|
+
return False
|
|
1781
|
+
if not isinstance(existing, MemoryArthaanu):
|
|
1782
|
+
existing = arthaanu_from_dict(
|
|
1783
|
+
arthaanu_to_dict(existing),
|
|
1784
|
+
type_registry=engine.registry.arthaanu_types,
|
|
1785
|
+
)
|
|
1786
|
+
for link_id in _find_links_for_memory(engine, memory_id):
|
|
1787
|
+
_remove_link(engine, link_id, event_metadata=event_metadata)
|
|
1788
|
+
engine.semantic_delete(
|
|
1789
|
+
memory_id,
|
|
1790
|
+
event_type="delete",
|
|
1791
|
+
component="memuron.delete",
|
|
1792
|
+
metadata={
|
|
1793
|
+
"domain_event_type": "memory.deleted",
|
|
1794
|
+
**(event_metadata or {}),
|
|
1795
|
+
},
|
|
1796
|
+
)
|
|
1797
|
+
refresh_memory_projections(engine)
|
|
1798
|
+
return True
|
|
1799
|
+
|
|
1800
|
+
|
|
1801
|
+
def _find_links_for_memory(engine: ArthaEngine, memory_id: str) -> list[str]:
|
|
1802
|
+
store = engine.store
|
|
1803
|
+
if sql_store_has_tables(store):
|
|
1804
|
+
rows = sql_store_fetchall(
|
|
1805
|
+
store,
|
|
1806
|
+
"""
|
|
1807
|
+
SELECT link_id FROM memuron_links
|
|
1808
|
+
WHERE source_id = ? OR target_id = ?
|
|
1809
|
+
""",
|
|
1810
|
+
(memory_id, memory_id),
|
|
1811
|
+
)
|
|
1812
|
+
return [str(row["link_id"]) for row in rows]
|
|
1813
|
+
bucket = getattr(store, "memuron_links", {})
|
|
1814
|
+
return [
|
|
1815
|
+
link_id
|
|
1816
|
+
for link_id, item in bucket.items()
|
|
1817
|
+
if item["source_id"] == memory_id or item["target_id"] == memory_id
|
|
1818
|
+
]
|
|
1819
|
+
|
|
1820
|
+
|
|
1821
|
+
def bulk_delete_memories(
|
|
1822
|
+
engine: ArthaEngine,
|
|
1823
|
+
*,
|
|
1824
|
+
scope: str | None = None,
|
|
1825
|
+
event_metadata: dict[str, object] | None = None,
|
|
1826
|
+
) -> tuple[int, list[str]]:
|
|
1827
|
+
scope_patterns = parse_comma_scope(scope)
|
|
1828
|
+
if not scope_patterns:
|
|
1829
|
+
raise ValueError("scope is required for bulk delete")
|
|
1830
|
+
deleted_ids: list[str] = []
|
|
1831
|
+
for row in _list_memory_rows(engine):
|
|
1832
|
+
memory = _row_to_memory_dict(row)
|
|
1833
|
+
if scope_patterns and not scope_matches_filter(memory.get("scope") or [], scope_patterns):
|
|
1834
|
+
continue
|
|
1835
|
+
if delete_memory(engine, memory["id"], event_metadata=event_metadata):
|
|
1836
|
+
deleted_ids.append(memory["id"])
|
|
1837
|
+
return len(deleted_ids), deleted_ids
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
def count_memories(
|
|
1841
|
+
engine: ArthaEngine,
|
|
1842
|
+
*,
|
|
1843
|
+
scope: str | None = None,
|
|
1844
|
+
) -> tuple[int, dict[str, Any]]:
|
|
1845
|
+
scope_patterns = [part.strip() for part in (scope or "").split(",") if part.strip()]
|
|
1846
|
+
filters: dict[str, Any] = {}
|
|
1847
|
+
if scope_patterns:
|
|
1848
|
+
filters["scope"] = scope_patterns
|
|
1849
|
+
rows = _filtered_memory_rows(engine, scope=scope_patterns or None)
|
|
1850
|
+
return len(rows), filters
|
|
1851
|
+
|
|
1852
|
+
|
|
1853
|
+
def list_memories(
|
|
1854
|
+
engine: ArthaEngine,
|
|
1855
|
+
*,
|
|
1856
|
+
scope: str | None = None,
|
|
1857
|
+
limit: int = 100,
|
|
1858
|
+
offset: int = 0,
|
|
1859
|
+
) -> tuple[list[dict[str, Any]], int, dict[str, Any]]:
|
|
1860
|
+
scope_patterns = [part.strip() for part in (scope or "").split(",") if part.strip()]
|
|
1861
|
+
filters: dict[str, Any] = {}
|
|
1862
|
+
if scope_patterns:
|
|
1863
|
+
filters["scope"] = scope_patterns
|
|
1864
|
+
|
|
1865
|
+
rows = _filtered_memory_rows(engine, scope=scope_patterns or None)
|
|
1866
|
+
page = rows[offset : offset + limit]
|
|
1867
|
+
memories = [memory_payload(engine, row["artha_id"]) for row in page]
|
|
1868
|
+
return memories, len(rows), filters
|
|
1869
|
+
|
|
1870
|
+
|
|
1871
|
+
def create_rich_node(
|
|
1872
|
+
engine: ArthaEngine,
|
|
1873
|
+
*,
|
|
1874
|
+
content: str,
|
|
1875
|
+
node_type: str = "text",
|
|
1876
|
+
payload: dict[str, Any] | None = None,
|
|
1877
|
+
perception: str | None = None,
|
|
1878
|
+
encoding: str = "memory",
|
|
1879
|
+
metadata: dict[str, Any] | None = None,
|
|
1880
|
+
external_identity: dict[str, str] | None = None,
|
|
1881
|
+
scope: list[str] | None = None,
|
|
1882
|
+
event_metadata: dict[str, object] | None = None,
|
|
1883
|
+
) -> dict[str, Any]:
|
|
1884
|
+
external_identity = external_identity_from_values(**(external_identity or {}))
|
|
1885
|
+
node = _encode_memory(
|
|
1886
|
+
engine,
|
|
1887
|
+
content=content,
|
|
1888
|
+
scope=scope,
|
|
1889
|
+
node_type=node_type,
|
|
1890
|
+
payload=_payload_with_external_identity(payload, external_identity),
|
|
1891
|
+
perception=perception,
|
|
1892
|
+
encoding=encoding,
|
|
1893
|
+
metadata=_metadata_with_external_identity(metadata, external_identity),
|
|
1894
|
+
persist=False,
|
|
1895
|
+
)
|
|
1896
|
+
event_type = "collection.created" if node_type == "collection" else "memory.created"
|
|
1897
|
+
_append_memory_event(
|
|
1898
|
+
engine,
|
|
1899
|
+
event_type=event_type,
|
|
1900
|
+
memory=node,
|
|
1901
|
+
component="memuron.nodes",
|
|
1902
|
+
event_metadata={**(event_metadata or {}), **external_identity},
|
|
1903
|
+
)
|
|
1904
|
+
refresh_memory_projections(engine)
|
|
1905
|
+
return memory_payload(engine, node.artha_id)
|
|
1906
|
+
|
|
1907
|
+
|
|
1908
|
+
def create_collection(
|
|
1909
|
+
engine: ArthaEngine,
|
|
1910
|
+
*,
|
|
1911
|
+
name: str,
|
|
1912
|
+
summary: str,
|
|
1913
|
+
scope: list[str] | None = None,
|
|
1914
|
+
metadata: dict[str, Any] | None = None,
|
|
1915
|
+
event_metadata: dict[str, object] | None = None,
|
|
1916
|
+
) -> dict[str, Any]:
|
|
1917
|
+
payload = {"name": name, "summary": summary}
|
|
1918
|
+
return create_rich_node(
|
|
1919
|
+
engine,
|
|
1920
|
+
content=summary,
|
|
1921
|
+
node_type="collection",
|
|
1922
|
+
payload=payload,
|
|
1923
|
+
perception=summary,
|
|
1924
|
+
encoding="collection_summary",
|
|
1925
|
+
metadata=metadata,
|
|
1926
|
+
scope=scope,
|
|
1927
|
+
event_metadata=event_metadata,
|
|
1928
|
+
)
|
|
1929
|
+
|
|
1930
|
+
|
|
1931
|
+
def ingest_document_source(
|
|
1932
|
+
engine: ArthaEngine,
|
|
1933
|
+
*,
|
|
1934
|
+
file_name: str,
|
|
1935
|
+
content_type: str | None,
|
|
1936
|
+
file_bytes: bytes,
|
|
1937
|
+
scope: list[str] | None = None,
|
|
1938
|
+
metadata: dict[str, Any] | None = None,
|
|
1939
|
+
external_identity: dict[str, str] | None = None,
|
|
1940
|
+
source_metadata: dict[str, Any] | None = None,
|
|
1941
|
+
event_metadata: dict[str, object] | None = None,
|
|
1942
|
+
) -> dict[str, Any]:
|
|
1943
|
+
"""Parse a source file and append document/collection/chunk semantic events."""
|
|
1944
|
+
|
|
1945
|
+
external_identity = {
|
|
1946
|
+
**_external_identity_from_metadata(metadata),
|
|
1947
|
+
**external_identity_from_values(**(external_identity or {})),
|
|
1948
|
+
}
|
|
1949
|
+
parsed = parse_source(
|
|
1950
|
+
file_name=file_name,
|
|
1951
|
+
content_type=content_type,
|
|
1952
|
+
file_bytes=file_bytes,
|
|
1953
|
+
describe_images=settings.describe_images,
|
|
1954
|
+
vlm_api_key=settings.openrouter_api_key,
|
|
1955
|
+
vlm_model=settings.image_vlm_model,
|
|
1956
|
+
vlm_timeout_seconds=settings.image_vlm_timeout_seconds,
|
|
1957
|
+
)
|
|
1958
|
+
if source_metadata:
|
|
1959
|
+
parsed = replace(
|
|
1960
|
+
parsed,
|
|
1961
|
+
source_metadata={**parsed.source_metadata, **source_metadata},
|
|
1962
|
+
)
|
|
1963
|
+
document_key = str(uuid4())
|
|
1964
|
+
source_object = maybe_store_source_file(
|
|
1965
|
+
org_id=_tenant_id_from_metadata(event_metadata),
|
|
1966
|
+
document_key=document_key,
|
|
1967
|
+
file_name=parsed.file_name,
|
|
1968
|
+
content_type=content_type or parsed.media_type,
|
|
1969
|
+
file_bytes=file_bytes,
|
|
1970
|
+
)
|
|
1971
|
+
source_object_ref = {"document_key": document_key}
|
|
1972
|
+
base_scope = list(scope or [])
|
|
1973
|
+
document_scope = _document_scope(base_scope, document_key)
|
|
1974
|
+
user_metadata = _metadata_with_external_identity(metadata, external_identity)
|
|
1975
|
+
common_event_metadata = {
|
|
1976
|
+
**(event_metadata or {}),
|
|
1977
|
+
**external_identity,
|
|
1978
|
+
"document_key": document_key,
|
|
1979
|
+
"parser": "memuron.documents.parser",
|
|
1980
|
+
"source_type": parsed.source_type,
|
|
1981
|
+
}
|
|
1982
|
+
graph_images = [image for image in parsed.images if image.include_in_graph]
|
|
1983
|
+
skipped_image_count = len(parsed.images) - len(graph_images)
|
|
1984
|
+
|
|
1985
|
+
summary = _document_summary(parsed)
|
|
1986
|
+
collection = _encode_memory(
|
|
1987
|
+
engine,
|
|
1988
|
+
content=summary,
|
|
1989
|
+
scope=document_scope,
|
|
1990
|
+
node_type="collection",
|
|
1991
|
+
payload={
|
|
1992
|
+
"name": parsed.file_name,
|
|
1993
|
+
"summary": summary,
|
|
1994
|
+
"document_key": document_key,
|
|
1995
|
+
"source_type": parsed.source_type,
|
|
1996
|
+
"chunk_count": len(parsed.chunks),
|
|
1997
|
+
"image_count": len(parsed.images),
|
|
1998
|
+
"graph_image_count": len(graph_images),
|
|
1999
|
+
"skipped_image_count": skipped_image_count,
|
|
2000
|
+
"source_object_ref": source_object_ref,
|
|
2001
|
+
"source_identity": external_identity,
|
|
2002
|
+
},
|
|
2003
|
+
perception=summary,
|
|
2004
|
+
encoding="document_collection",
|
|
2005
|
+
metadata=_document_metadata(
|
|
2006
|
+
user_metadata,
|
|
2007
|
+
document_key=document_key,
|
|
2008
|
+
role="document_collection",
|
|
2009
|
+
parsed=parsed,
|
|
2010
|
+
),
|
|
2011
|
+
persist=False,
|
|
2012
|
+
)
|
|
2013
|
+
_append_memory_event(
|
|
2014
|
+
engine,
|
|
2015
|
+
event_type="collection.created",
|
|
2016
|
+
memory=collection,
|
|
2017
|
+
component="memuron.documents",
|
|
2018
|
+
event_metadata=common_event_metadata,
|
|
2019
|
+
)
|
|
2020
|
+
|
|
2021
|
+
source_node_type = "image" if parsed.source_type == "image" else "document"
|
|
2022
|
+
document = _encode_memory(
|
|
2023
|
+
engine,
|
|
2024
|
+
content=summary,
|
|
2025
|
+
scope=document_scope,
|
|
2026
|
+
node_type=source_node_type,
|
|
2027
|
+
payload={
|
|
2028
|
+
"file_name": parsed.file_name,
|
|
2029
|
+
"media_type": parsed.media_type,
|
|
2030
|
+
"source_type": parsed.source_type,
|
|
2031
|
+
"size_bytes": len(file_bytes),
|
|
2032
|
+
"document_key": document_key,
|
|
2033
|
+
"collection_id": collection.artha_id,
|
|
2034
|
+
"chunk_count": len(parsed.chunks),
|
|
2035
|
+
"image_count": len(parsed.images),
|
|
2036
|
+
"graph_image_count": len(graph_images),
|
|
2037
|
+
"skipped_image_count": skipped_image_count,
|
|
2038
|
+
"page_count": parsed.page_count,
|
|
2039
|
+
"unreadable_pages": parsed.unreadable_pages,
|
|
2040
|
+
"source_metadata": parsed.source_metadata,
|
|
2041
|
+
"markdown_preview": parsed.markdown[:DOCUMENT_MARKDOWN_PREVIEW_CHARS],
|
|
2042
|
+
"markdown_truncated": len(parsed.markdown) > DOCUMENT_MARKDOWN_PREVIEW_CHARS,
|
|
2043
|
+
"source_object": source_object,
|
|
2044
|
+
"source_identity": external_identity,
|
|
2045
|
+
},
|
|
2046
|
+
perception=summary,
|
|
2047
|
+
encoding=f"{parsed.source_type}_source",
|
|
2048
|
+
metadata=_document_metadata(
|
|
2049
|
+
user_metadata,
|
|
2050
|
+
document_key=document_key,
|
|
2051
|
+
role="source",
|
|
2052
|
+
parsed=parsed,
|
|
2053
|
+
),
|
|
2054
|
+
persist=False,
|
|
2055
|
+
)
|
|
2056
|
+
_append_memory_event(
|
|
2057
|
+
engine,
|
|
2058
|
+
event_type="memory.created",
|
|
2059
|
+
memory=document,
|
|
2060
|
+
component="memuron.documents",
|
|
2061
|
+
event_metadata=common_event_metadata,
|
|
2062
|
+
)
|
|
2063
|
+
|
|
2064
|
+
collection_payload = {
|
|
2065
|
+
**dict(collection.value.payload),
|
|
2066
|
+
"collection_kind": "document",
|
|
2067
|
+
"document_id": document.artha_id,
|
|
2068
|
+
"document": {
|
|
2069
|
+
"id": document.artha_id,
|
|
2070
|
+
"file_name": parsed.file_name,
|
|
2071
|
+
"source_type": parsed.source_type,
|
|
2072
|
+
"media_type": parsed.media_type,
|
|
2073
|
+
"chunk_count": len(parsed.chunks),
|
|
2074
|
+
"image_count": len(parsed.images),
|
|
2075
|
+
"graph_image_count": len(graph_images),
|
|
2076
|
+
"source_object_ref": {
|
|
2077
|
+
"document_id": document.artha_id,
|
|
2078
|
+
"document_key": document_key,
|
|
2079
|
+
},
|
|
2080
|
+
"source_identity": external_identity,
|
|
2081
|
+
},
|
|
2082
|
+
}
|
|
2083
|
+
collection_metadata = _document_metadata(
|
|
2084
|
+
{
|
|
2085
|
+
**user_metadata,
|
|
2086
|
+
"document_id": document.artha_id,
|
|
2087
|
+
"collection_kind": "document",
|
|
2088
|
+
},
|
|
2089
|
+
document_key=document_key,
|
|
2090
|
+
role="document_collection",
|
|
2091
|
+
parsed=parsed,
|
|
2092
|
+
)
|
|
2093
|
+
collection_update = _encode_memory(
|
|
2094
|
+
engine,
|
|
2095
|
+
content=summary,
|
|
2096
|
+
scope=document_scope,
|
|
2097
|
+
node_type="collection",
|
|
2098
|
+
payload=collection_payload,
|
|
2099
|
+
perception=summary,
|
|
2100
|
+
encoding="document_collection",
|
|
2101
|
+
metadata=collection_metadata,
|
|
2102
|
+
artha_id=collection.artha_id,
|
|
2103
|
+
persist=False,
|
|
2104
|
+
)
|
|
2105
|
+
_append_memory_event(
|
|
2106
|
+
engine,
|
|
2107
|
+
event_type="memory.updated",
|
|
2108
|
+
memory=collection_update,
|
|
2109
|
+
component="memuron.documents",
|
|
2110
|
+
extra_payload={"reason": "document_container_source_link"},
|
|
2111
|
+
event_metadata=common_event_metadata,
|
|
2112
|
+
)
|
|
2113
|
+
|
|
2114
|
+
placements: list[dict[str, Any]] = [
|
|
2115
|
+
_append_placement(
|
|
2116
|
+
engine,
|
|
2117
|
+
parent_id=collection.artha_id,
|
|
2118
|
+
child_id=document.artha_id,
|
|
2119
|
+
name=f"source:{parsed.file_name}",
|
|
2120
|
+
scope=document_scope,
|
|
2121
|
+
metadata={"role": "source", "document_key": document_key, **external_identity},
|
|
2122
|
+
event_metadata=common_event_metadata,
|
|
2123
|
+
)
|
|
2124
|
+
]
|
|
2125
|
+
|
|
2126
|
+
image_ids: list[str] = []
|
|
2127
|
+
image_attachments: list[dict[str, Any]] = []
|
|
2128
|
+
if parsed.source_type != "image":
|
|
2129
|
+
for image in graph_images:
|
|
2130
|
+
image_node = _encode_memory(
|
|
2131
|
+
engine,
|
|
2132
|
+
content=image.description,
|
|
2133
|
+
scope=[*document_scope, "kind:document_image"],
|
|
2134
|
+
node_type="image",
|
|
2135
|
+
payload={
|
|
2136
|
+
"document_key": document_key,
|
|
2137
|
+
"document_id": document.artha_id,
|
|
2138
|
+
"collection_id": collection.artha_id,
|
|
2139
|
+
"source_object_ref": {
|
|
2140
|
+
"document_id": document.artha_id,
|
|
2141
|
+
"document_key": document_key,
|
|
2142
|
+
},
|
|
2143
|
+
"source_identity": external_identity,
|
|
2144
|
+
"file_name": image.file_name,
|
|
2145
|
+
"media_type": image.media_type,
|
|
2146
|
+
"source": image.source,
|
|
2147
|
+
"page_number": image.page_number,
|
|
2148
|
+
"image_index": image.index,
|
|
2149
|
+
"size_bytes": image.size_bytes,
|
|
2150
|
+
"description": image.description,
|
|
2151
|
+
"include_in_graph": image.include_in_graph,
|
|
2152
|
+
"image_kind": image.image_kind,
|
|
2153
|
+
"reason": image.reason,
|
|
2154
|
+
"metadata": image.metadata,
|
|
2155
|
+
},
|
|
2156
|
+
perception=image.description,
|
|
2157
|
+
encoding="image_vlm" if image.metadata.get("parser") == "openrouter_vlm" else "image_metadata",
|
|
2158
|
+
metadata=_document_metadata(
|
|
2159
|
+
{
|
|
2160
|
+
**user_metadata,
|
|
2161
|
+
"image": {
|
|
2162
|
+
"file_name": image.file_name,
|
|
2163
|
+
"media_type": image.media_type,
|
|
2164
|
+
"page_number": image.page_number,
|
|
2165
|
+
"image_index": image.index,
|
|
2166
|
+
"source": image.source,
|
|
2167
|
+
"include_in_graph": image.include_in_graph,
|
|
2168
|
+
"image_kind": image.image_kind,
|
|
2169
|
+
"reason": image.reason,
|
|
2170
|
+
**image.metadata,
|
|
2171
|
+
},
|
|
2172
|
+
},
|
|
2173
|
+
document_key=document_key,
|
|
2174
|
+
role="image",
|
|
2175
|
+
parsed=parsed,
|
|
2176
|
+
),
|
|
2177
|
+
persist=False,
|
|
2178
|
+
)
|
|
2179
|
+
image_event_metadata = {
|
|
2180
|
+
**common_event_metadata,
|
|
2181
|
+
"image_index": image.index,
|
|
2182
|
+
"page_number": image.page_number,
|
|
2183
|
+
}
|
|
2184
|
+
_append_memory_event(
|
|
2185
|
+
engine,
|
|
2186
|
+
event_type="memory.created",
|
|
2187
|
+
memory=image_node,
|
|
2188
|
+
component="memuron.documents",
|
|
2189
|
+
event_metadata=image_event_metadata,
|
|
2190
|
+
)
|
|
2191
|
+
image_ids.append(image_node.artha_id)
|
|
2192
|
+
if image.raw_bytes:
|
|
2193
|
+
image_attachments.append(
|
|
2194
|
+
{
|
|
2195
|
+
"memory_id": image_node.artha_id,
|
|
2196
|
+
"media_type": image.media_type,
|
|
2197
|
+
"bytes": image.raw_bytes,
|
|
2198
|
+
}
|
|
2199
|
+
)
|
|
2200
|
+
placements.append(
|
|
2201
|
+
_append_placement(
|
|
2202
|
+
engine,
|
|
2203
|
+
parent_id=collection.artha_id,
|
|
2204
|
+
child_id=image_node.artha_id,
|
|
2205
|
+
name=f"image-{image.index + 1:04d}:{image.file_name}",
|
|
2206
|
+
scope=document_scope,
|
|
2207
|
+
metadata={
|
|
2208
|
+
"role": "image",
|
|
2209
|
+
"document_key": document_key,
|
|
2210
|
+
**external_identity,
|
|
2211
|
+
"image_index": image.index,
|
|
2212
|
+
"page_number": image.page_number,
|
|
2213
|
+
},
|
|
2214
|
+
event_metadata=image_event_metadata,
|
|
2215
|
+
)
|
|
2216
|
+
)
|
|
2217
|
+
|
|
2218
|
+
chunk_ids: list[str] = []
|
|
2219
|
+
for chunk in parsed.chunks:
|
|
2220
|
+
chunk_node = _encode_memory(
|
|
2221
|
+
engine,
|
|
2222
|
+
content=chunk.text,
|
|
2223
|
+
scope=[*document_scope, "kind:document_chunk"],
|
|
2224
|
+
node_type="text",
|
|
2225
|
+
payload={
|
|
2226
|
+
"document_key": document_key,
|
|
2227
|
+
"document_id": document.artha_id,
|
|
2228
|
+
"collection_id": collection.artha_id,
|
|
2229
|
+
"source_object_ref": {
|
|
2230
|
+
"document_id": document.artha_id,
|
|
2231
|
+
"document_key": document_key,
|
|
2232
|
+
},
|
|
2233
|
+
"source_identity": external_identity,
|
|
2234
|
+
"file_name": parsed.file_name,
|
|
2235
|
+
"source_type": parsed.source_type,
|
|
2236
|
+
"chunk_index": chunk.index,
|
|
2237
|
+
"chunk_count": len(parsed.chunks),
|
|
2238
|
+
"location": chunk.to_location(),
|
|
2239
|
+
},
|
|
2240
|
+
perception=chunk.text[:2_000],
|
|
2241
|
+
encoding="document_chunk",
|
|
2242
|
+
metadata=_document_metadata(
|
|
2243
|
+
user_metadata,
|
|
2244
|
+
document_key=document_key,
|
|
2245
|
+
role="chunk",
|
|
2246
|
+
parsed=parsed,
|
|
2247
|
+
chunk_index=chunk.index,
|
|
2248
|
+
chunk_count=len(parsed.chunks),
|
|
2249
|
+
location=chunk.to_location(),
|
|
2250
|
+
),
|
|
2251
|
+
persist=False,
|
|
2252
|
+
)
|
|
2253
|
+
chunk_event_metadata = {
|
|
2254
|
+
**common_event_metadata,
|
|
2255
|
+
"chunk_index": chunk.index,
|
|
2256
|
+
}
|
|
2257
|
+
_append_memory_event(
|
|
2258
|
+
engine,
|
|
2259
|
+
event_type="memory.created",
|
|
2260
|
+
memory=chunk_node,
|
|
2261
|
+
component="memuron.documents",
|
|
2262
|
+
event_metadata=chunk_event_metadata,
|
|
2263
|
+
)
|
|
2264
|
+
chunk_ids.append(chunk_node.artha_id)
|
|
2265
|
+
placements.append(
|
|
2266
|
+
_append_placement(
|
|
2267
|
+
engine,
|
|
2268
|
+
parent_id=collection.artha_id,
|
|
2269
|
+
child_id=chunk_node.artha_id,
|
|
2270
|
+
name=f"chunk-{chunk.index + 1:04d}",
|
|
2271
|
+
scope=document_scope,
|
|
2272
|
+
metadata={
|
|
2273
|
+
"role": "chunk",
|
|
2274
|
+
"document_key": document_key,
|
|
2275
|
+
**external_identity,
|
|
2276
|
+
"chunk_index": chunk.index,
|
|
2277
|
+
"location": chunk.to_location(),
|
|
2278
|
+
},
|
|
2279
|
+
event_metadata=chunk_event_metadata,
|
|
2280
|
+
)
|
|
2281
|
+
)
|
|
2282
|
+
|
|
2283
|
+
refresh_memory_projections(engine)
|
|
2284
|
+
return {
|
|
2285
|
+
"status": "success",
|
|
2286
|
+
"document_key": document_key,
|
|
2287
|
+
**{key: external_identity.get(key) for key in EXTERNAL_ID_FIELDS},
|
|
2288
|
+
"source_type": parsed.source_type,
|
|
2289
|
+
"media_type": parsed.media_type,
|
|
2290
|
+
"file_name": parsed.file_name,
|
|
2291
|
+
"source_object": source_object,
|
|
2292
|
+
"page_count": parsed.page_count,
|
|
2293
|
+
"unreadable_pages": parsed.unreadable_pages,
|
|
2294
|
+
"image_count": len(parsed.images),
|
|
2295
|
+
"graph_image_count": len(graph_images),
|
|
2296
|
+
"skipped_image_count": skipped_image_count,
|
|
2297
|
+
"image_ids": image_ids,
|
|
2298
|
+
"chunk_count": len(parsed.chunks),
|
|
2299
|
+
"chunk_ids": chunk_ids,
|
|
2300
|
+
"collection": memory_payload(engine, collection.artha_id),
|
|
2301
|
+
"document": memory_payload(engine, document.artha_id),
|
|
2302
|
+
"images": [memory_payload(engine, image_id) for image_id in image_ids],
|
|
2303
|
+
"chunks": [memory_payload(engine, chunk_id) for chunk_id in chunk_ids],
|
|
2304
|
+
"placements": [placement_payload(engine, item["id"]) for item in placements],
|
|
2305
|
+
"image_attachments": image_attachments,
|
|
2306
|
+
}
|
|
2307
|
+
|
|
2308
|
+
|
|
2309
|
+
def document_source_payload(engine: ArthaEngine, node_id: str) -> dict[str, Any]:
|
|
2310
|
+
"""Resolve any document-related node to its original source object."""
|
|
2311
|
+
requested = get_memory(engine, node_id)
|
|
2312
|
+
requested_payload = requested.get("payload") if isinstance(requested.get("payload"), dict) else {}
|
|
2313
|
+
source_object_ref = requested_payload.get("source_object_ref")
|
|
2314
|
+
if not isinstance(source_object_ref, dict):
|
|
2315
|
+
source_object_ref = {}
|
|
2316
|
+
document_descriptor = requested_payload.get("document")
|
|
2317
|
+
if not isinstance(document_descriptor, dict):
|
|
2318
|
+
document_descriptor = {}
|
|
2319
|
+
document_id = (
|
|
2320
|
+
requested_payload.get("document_id")
|
|
2321
|
+
or source_object_ref.get("document_id")
|
|
2322
|
+
or document_descriptor.get("id")
|
|
2323
|
+
or (node_id if requested_payload.get("source_object") else None)
|
|
2324
|
+
)
|
|
2325
|
+
if not document_id:
|
|
2326
|
+
raise KeyError(f"Document source not found for node: {node_id}")
|
|
2327
|
+
document = requested if document_id == node_id else get_memory(engine, str(document_id))
|
|
2328
|
+
document_payload = document.get("payload") if isinstance(document.get("payload"), dict) else {}
|
|
2329
|
+
source_object = document_payload.get("source_object")
|
|
2330
|
+
if not isinstance(source_object, dict) or not source_object:
|
|
2331
|
+
raise KeyError(f"Document source object not found for node: {node_id}")
|
|
2332
|
+
download_url = presign_source_object(source_object)
|
|
2333
|
+
return {
|
|
2334
|
+
"status": "success",
|
|
2335
|
+
"requested_node_id": node_id,
|
|
2336
|
+
"document_id": str(document["id"]),
|
|
2337
|
+
"document_key": str(document_payload.get("document_key") or source_object.get("document_key") or ""),
|
|
2338
|
+
"file_name": str(source_object.get("file_name") or document_payload.get("file_name") or ""),
|
|
2339
|
+
"content_type": str(
|
|
2340
|
+
source_object.get("content_type")
|
|
2341
|
+
or document_payload.get("media_type")
|
|
2342
|
+
or "application/octet-stream"
|
|
2343
|
+
),
|
|
2344
|
+
"size_bytes": int(source_object.get("size_bytes") or document_payload.get("size_bytes") or 0),
|
|
2345
|
+
"sha256": str(source_object.get("sha256") or ""),
|
|
2346
|
+
"source_object": source_object,
|
|
2347
|
+
"download_url": download_url,
|
|
2348
|
+
"expires_in_seconds": settings.object_storage_presign_seconds if download_url else None,
|
|
2349
|
+
}
|
|
2350
|
+
|
|
2351
|
+
|
|
2352
|
+
def _document_scope(scope: list[str], document_key: str) -> list[str]:
|
|
2353
|
+
output = list(scope)
|
|
2354
|
+
token = f"{DOCUMENT_CHUNK_SCOPE_PREFIX}{document_key}"
|
|
2355
|
+
if token not in output:
|
|
2356
|
+
output.append(token)
|
|
2357
|
+
return output
|
|
2358
|
+
|
|
2359
|
+
|
|
2360
|
+
def _document_summary(parsed: ParsedDocument) -> str:
|
|
2361
|
+
first_chunk = parsed.chunks[0].text if parsed.chunks else parsed.markdown
|
|
2362
|
+
summary = first_chunk.strip().replace("\n\n", "\n")
|
|
2363
|
+
if len(summary) > 2_000:
|
|
2364
|
+
summary = summary[:1_997].rstrip() + "..."
|
|
2365
|
+
return f"{parsed.file_name}\n\n{summary}"
|
|
2366
|
+
|
|
2367
|
+
|
|
2368
|
+
def _document_metadata(
|
|
2369
|
+
metadata: dict[str, Any],
|
|
2370
|
+
*,
|
|
2371
|
+
document_key: str,
|
|
2372
|
+
role: str,
|
|
2373
|
+
parsed: ParsedDocument,
|
|
2374
|
+
chunk_index: int | None = None,
|
|
2375
|
+
chunk_count: int | None = None,
|
|
2376
|
+
location: dict[str, Any] | None = None,
|
|
2377
|
+
) -> dict[str, Any]:
|
|
2378
|
+
output = dict(metadata)
|
|
2379
|
+
system = dict(output.get("system") or {})
|
|
2380
|
+
document_meta: dict[str, Any] = {
|
|
2381
|
+
"document_key": document_key,
|
|
2382
|
+
"role": role,
|
|
2383
|
+
"file_name": parsed.file_name,
|
|
2384
|
+
"source_type": parsed.source_type,
|
|
2385
|
+
"media_type": parsed.media_type,
|
|
2386
|
+
"page_count": parsed.page_count,
|
|
2387
|
+
"unreadable_pages": list(parsed.unreadable_pages),
|
|
2388
|
+
"source_metadata": dict(parsed.source_metadata),
|
|
2389
|
+
}
|
|
2390
|
+
if chunk_index is not None:
|
|
2391
|
+
document_meta["chunk_index"] = chunk_index
|
|
2392
|
+
if chunk_count is not None:
|
|
2393
|
+
document_meta["chunk_count"] = chunk_count
|
|
2394
|
+
if location is not None:
|
|
2395
|
+
document_meta["location"] = location
|
|
2396
|
+
system["document"] = document_meta
|
|
2397
|
+
output["system"] = system
|
|
2398
|
+
return output
|
|
2399
|
+
|
|
2400
|
+
|
|
2401
|
+
def _append_placement(
|
|
2402
|
+
engine: ArthaEngine,
|
|
2403
|
+
*,
|
|
2404
|
+
parent_id: str,
|
|
2405
|
+
child_id: str,
|
|
2406
|
+
name: str,
|
|
2407
|
+
scope: list[str],
|
|
2408
|
+
metadata: dict[str, Any],
|
|
2409
|
+
inherit_parent_scope: bool = True,
|
|
2410
|
+
event_metadata: dict[str, object] | None = None,
|
|
2411
|
+
) -> dict[str, Any]:
|
|
2412
|
+
placement = engine.encode(
|
|
2413
|
+
"memory_placement",
|
|
2414
|
+
{
|
|
2415
|
+
"parent_id": parent_id,
|
|
2416
|
+
"child_id": child_id,
|
|
2417
|
+
"name": name,
|
|
2418
|
+
"scope": scope,
|
|
2419
|
+
"metadata": metadata,
|
|
2420
|
+
"inherit_parent_scope": inherit_parent_scope,
|
|
2421
|
+
},
|
|
2422
|
+
persist=False,
|
|
2423
|
+
)
|
|
2424
|
+
engine.store.append_event(
|
|
2425
|
+
event_type="placement.created",
|
|
2426
|
+
arthaanu=placement,
|
|
2427
|
+
component="memuron.documents",
|
|
2428
|
+
payload=event_metadata or {},
|
|
2429
|
+
)
|
|
2430
|
+
return {
|
|
2431
|
+
"id": placement.artha_id,
|
|
2432
|
+
"parent_id": parent_id,
|
|
2433
|
+
"child_id": child_id,
|
|
2434
|
+
"name": name,
|
|
2435
|
+
"scope": scope,
|
|
2436
|
+
"metadata": metadata,
|
|
2437
|
+
"inherit_parent_scope": inherit_parent_scope,
|
|
2438
|
+
}
|
|
2439
|
+
|
|
2440
|
+
|
|
2441
|
+
def place_node_in_collection(
|
|
2442
|
+
engine: ArthaEngine,
|
|
2443
|
+
*,
|
|
2444
|
+
parent_id: str,
|
|
2445
|
+
child_id: str,
|
|
2446
|
+
name: str,
|
|
2447
|
+
scope: list[str] | None = None,
|
|
2448
|
+
metadata: dict[str, Any] | None = None,
|
|
2449
|
+
inherit_parent_scope: bool = True,
|
|
2450
|
+
event_metadata: dict[str, object] | None = None,
|
|
2451
|
+
) -> dict[str, Any]:
|
|
2452
|
+
parent = get_memory(engine, parent_id)
|
|
2453
|
+
if parent.get("node_type") != "collection":
|
|
2454
|
+
raise ValueError("parent_id must be a collection node")
|
|
2455
|
+
get_memory(engine, child_id)
|
|
2456
|
+
if parent_id == child_id:
|
|
2457
|
+
raise ValueError("collection cannot contain itself")
|
|
2458
|
+
if _placement_would_create_cycle(engine, parent_id=parent_id, child_id=child_id):
|
|
2459
|
+
raise ValueError("placement would create a collection cycle")
|
|
2460
|
+
placement_scope = list(scope or [])
|
|
2461
|
+
if inherit_parent_scope:
|
|
2462
|
+
placement_scope = _merge_inherited_scope(
|
|
2463
|
+
placement_scope,
|
|
2464
|
+
list(parent.get("scope") or []),
|
|
2465
|
+
)
|
|
2466
|
+
placement = engine.encode(
|
|
2467
|
+
"memory_placement",
|
|
2468
|
+
{
|
|
2469
|
+
"parent_id": parent_id,
|
|
2470
|
+
"child_id": child_id,
|
|
2471
|
+
"name": name,
|
|
2472
|
+
"scope": placement_scope,
|
|
2473
|
+
"metadata": metadata or {},
|
|
2474
|
+
"inherit_parent_scope": inherit_parent_scope,
|
|
2475
|
+
},
|
|
2476
|
+
persist=False,
|
|
2477
|
+
)
|
|
2478
|
+
engine.store.append_event(
|
|
2479
|
+
event_type="placement.created",
|
|
2480
|
+
arthaanu=placement,
|
|
2481
|
+
component="memuron.collections",
|
|
2482
|
+
payload=event_metadata or {},
|
|
2483
|
+
)
|
|
2484
|
+
if inherit_parent_scope:
|
|
2485
|
+
_propagate_scope_to_node_and_descendants(
|
|
2486
|
+
engine,
|
|
2487
|
+
node_id=child_id,
|
|
2488
|
+
parent_scope=list(parent.get("scope") or []),
|
|
2489
|
+
event_metadata=event_metadata,
|
|
2490
|
+
)
|
|
2491
|
+
refresh_memory_projections(engine)
|
|
2492
|
+
return placement_payload(engine, placement.artha_id)
|
|
2493
|
+
|
|
2494
|
+
|
|
2495
|
+
def _placement_would_create_cycle(engine: ArthaEngine, *, parent_id: str, child_id: str) -> bool:
|
|
2496
|
+
"""Return true when adding parent -> child would make child an ancestor of parent."""
|
|
2497
|
+
children_by_parent: dict[str, list[str]] = {}
|
|
2498
|
+
for row in _list_placement_rows(engine):
|
|
2499
|
+
placement = _placement_row_to_dict(row)
|
|
2500
|
+
children_by_parent.setdefault(str(placement["parent_id"]), []).append(str(placement["child_id"]))
|
|
2501
|
+
|
|
2502
|
+
stack = [child_id]
|
|
2503
|
+
seen: set[str] = set()
|
|
2504
|
+
while stack:
|
|
2505
|
+
current = stack.pop()
|
|
2506
|
+
if current == parent_id:
|
|
2507
|
+
return True
|
|
2508
|
+
if current in seen:
|
|
2509
|
+
continue
|
|
2510
|
+
seen.add(current)
|
|
2511
|
+
stack.extend(children_by_parent.get(current, []))
|
|
2512
|
+
return False
|
|
2513
|
+
|
|
2514
|
+
|
|
2515
|
+
def _placement_row_to_dict(row: dict[str, Any]) -> dict[str, Any]:
|
|
2516
|
+
scope = _parse_json_field(row.get("scope_json"), [])
|
|
2517
|
+
metadata = _parse_json_field(row.get("metadata_json"), {})
|
|
2518
|
+
return {
|
|
2519
|
+
"id": str(row["placement_id"]),
|
|
2520
|
+
"parent_id": str(row["parent_id"]),
|
|
2521
|
+
"child_id": str(row["child_id"]),
|
|
2522
|
+
"name": str(row["name"]),
|
|
2523
|
+
"scope": scope if isinstance(scope, list) else [],
|
|
2524
|
+
"metadata": metadata if isinstance(metadata, dict) else {},
|
|
2525
|
+
"inherit_parent_scope": bool(row.get("inherit_parent_scope", True)),
|
|
2526
|
+
}
|
|
2527
|
+
|
|
2528
|
+
|
|
2529
|
+
def placement_payload(engine: ArthaEngine, placement_id: str) -> dict[str, Any]:
|
|
2530
|
+
for row in _list_placement_rows(engine):
|
|
2531
|
+
if str(row["placement_id"]) == placement_id:
|
|
2532
|
+
return _placement_row_to_dict(row)
|
|
2533
|
+
raise KeyError(f"Placement not found: {placement_id}")
|
|
2534
|
+
|
|
2535
|
+
|
|
2536
|
+
def collection_members(engine: ArthaEngine, collection_id: str) -> list[dict[str, Any]]:
|
|
2537
|
+
get_memory(engine, collection_id)
|
|
2538
|
+
members: list[dict[str, Any]] = []
|
|
2539
|
+
for row in _list_placement_rows(engine):
|
|
2540
|
+
if str(row["parent_id"]) != collection_id:
|
|
2541
|
+
continue
|
|
2542
|
+
placement = _placement_row_to_dict(row)
|
|
2543
|
+
try:
|
|
2544
|
+
child = memory_payload(engine, placement["child_id"])
|
|
2545
|
+
except KeyError:
|
|
2546
|
+
continue
|
|
2547
|
+
members.append({"placement": placement, "node": child})
|
|
2548
|
+
return members
|
|
2549
|
+
|
|
2550
|
+
|
|
2551
|
+
def _graph_scope_patterns(scope: str | list[str] | None) -> list[str]:
|
|
2552
|
+
if scope is None:
|
|
2553
|
+
return []
|
|
2554
|
+
if isinstance(scope, str):
|
|
2555
|
+
return [part.strip() for part in scope.split(",") if part.strip()]
|
|
2556
|
+
return [str(part).strip() for part in scope if str(part).strip()]
|
|
2557
|
+
|
|
2558
|
+
|
|
2559
|
+
def _graph_node_from_memory(
|
|
2560
|
+
memory: dict[str, Any],
|
|
2561
|
+
degree: int = 0,
|
|
2562
|
+
self_loop_count: int = 0,
|
|
2563
|
+
) -> dict[str, Any]:
|
|
2564
|
+
content = str(memory.get("content") or "")
|
|
2565
|
+
label = content[:50] + ("..." if len(content) > 50 else "")
|
|
2566
|
+
return {
|
|
2567
|
+
"id": str(memory["id"]),
|
|
2568
|
+
"label": label,
|
|
2569
|
+
"content": content,
|
|
2570
|
+
"node_type": memory.get("node_type") or "text",
|
|
2571
|
+
"payload": memory.get("payload") or {},
|
|
2572
|
+
"perception": memory.get("perception") or content,
|
|
2573
|
+
"encoding": memory.get("encoding") or "memory",
|
|
2574
|
+
"metadata": memory.get("metadata") or {},
|
|
2575
|
+
"scope": list(memory.get("scope") or []),
|
|
2576
|
+
"importance": degree,
|
|
2577
|
+
"degree": degree,
|
|
2578
|
+
"self_loop_count": self_loop_count,
|
|
2579
|
+
"type": memory.get("node_type") or "text",
|
|
2580
|
+
"timestamp": memory.get("timestamp"),
|
|
2581
|
+
}
|
|
2582
|
+
|
|
2583
|
+
|
|
2584
|
+
def _graph_edge_from_link(link: dict[str, Any]) -> dict[str, Any]:
|
|
2585
|
+
source_id = str(link["source_id"])
|
|
2586
|
+
target_id = str(link["target_id"])
|
|
2587
|
+
metadata = link.get("metadata") if isinstance(link.get("metadata"), dict) else {}
|
|
2588
|
+
return {
|
|
2589
|
+
"id": str(link["link_id"]),
|
|
2590
|
+
"link_id": str(link["link_id"]),
|
|
2591
|
+
"source": source_id,
|
|
2592
|
+
"target": target_id,
|
|
2593
|
+
"description": str(link.get("description") or ""),
|
|
2594
|
+
"metadata": metadata,
|
|
2595
|
+
"type": "semantic_link",
|
|
2596
|
+
"edge_type": "semantic_link",
|
|
2597
|
+
"directed": False,
|
|
2598
|
+
"is_self_loop": source_id == target_id,
|
|
2599
|
+
"parallel_key": str(link["link_id"]),
|
|
2600
|
+
}
|
|
2601
|
+
|
|
2602
|
+
|
|
2603
|
+
def _graph_edge_from_placement(placement: dict[str, Any]) -> dict[str, Any]:
|
|
2604
|
+
return {
|
|
2605
|
+
"id": str(placement["id"]),
|
|
2606
|
+
"source": str(placement["parent_id"]),
|
|
2607
|
+
"target": str(placement["child_id"]),
|
|
2608
|
+
"description": str(placement["name"]),
|
|
2609
|
+
"type": "placement",
|
|
2610
|
+
"name": str(placement["name"]),
|
|
2611
|
+
"scope": list(placement.get("scope") or []),
|
|
2612
|
+
"metadata": placement.get("metadata") or {},
|
|
2613
|
+
}
|
|
2614
|
+
|
|
2615
|
+
|
|
2616
|
+
def export_graph(
|
|
2617
|
+
engine: ArthaEngine,
|
|
2618
|
+
*,
|
|
2619
|
+
scope: str | list[str] | None = None,
|
|
2620
|
+
limit: int = 1000,
|
|
2621
|
+
) -> dict[str, Any]:
|
|
2622
|
+
_require_memory_projections(engine)
|
|
2623
|
+
scope_patterns = _graph_scope_patterns(scope)
|
|
2624
|
+
rows = _filtered_memory_rows(engine, scope=scope_patterns or None)[:limit]
|
|
2625
|
+
memories = [_row_to_memory_dict(row) for row in rows]
|
|
2626
|
+
memory_ids = {memory["id"] for memory in memories}
|
|
2627
|
+
|
|
2628
|
+
link_rows: list[dict[str, Any]] = []
|
|
2629
|
+
extra_memory_ids: set[str] = set()
|
|
2630
|
+
for row in _list_link_rows(engine):
|
|
2631
|
+
source_id = str(row["source_id"])
|
|
2632
|
+
target_id = str(row["target_id"])
|
|
2633
|
+
if source_id in memory_ids or target_id in memory_ids:
|
|
2634
|
+
link_rows.append(row)
|
|
2635
|
+
if source_id not in memory_ids:
|
|
2636
|
+
extra_memory_ids.add(source_id)
|
|
2637
|
+
if target_id not in memory_ids:
|
|
2638
|
+
extra_memory_ids.add(target_id)
|
|
2639
|
+
|
|
2640
|
+
if extra_memory_ids:
|
|
2641
|
+
extra_rows = _fetch_memory_rows_by_ids(engine, list(extra_memory_ids))
|
|
2642
|
+
for memory_id, row in extra_rows.items():
|
|
2643
|
+
memories.append(_row_to_memory_dict(row))
|
|
2644
|
+
memory_ids.add(memory_id)
|
|
2645
|
+
|
|
2646
|
+
degree: dict[str, int] = {memory_id: 0 for memory_id in memory_ids}
|
|
2647
|
+
self_loop_count: dict[str, int] = {memory_id: 0 for memory_id in memory_ids}
|
|
2648
|
+
edges: list[dict[str, Any]] = []
|
|
2649
|
+
for row in link_rows:
|
|
2650
|
+
source_id = str(row["source_id"])
|
|
2651
|
+
target_id = str(row["target_id"])
|
|
2652
|
+
if source_id == target_id:
|
|
2653
|
+
self_loop_count[source_id] = self_loop_count.get(source_id, 0) + 1
|
|
2654
|
+
else:
|
|
2655
|
+
degree[source_id] = degree.get(source_id, 0) + 1
|
|
2656
|
+
degree[target_id] = degree.get(target_id, 0) + 1
|
|
2657
|
+
edges.append(_graph_edge_from_link(_link_row_to_dict(row)))
|
|
2658
|
+
|
|
2659
|
+
for row in _list_placement_rows(engine):
|
|
2660
|
+
placement = _placement_row_to_dict(row)
|
|
2661
|
+
parent_id = str(placement["parent_id"])
|
|
2662
|
+
child_id = str(placement["child_id"])
|
|
2663
|
+
if parent_id not in memory_ids or child_id not in memory_ids:
|
|
2664
|
+
continue
|
|
2665
|
+
degree[parent_id] = degree.get(parent_id, 0) + 1
|
|
2666
|
+
degree[child_id] = degree.get(child_id, 0) + 1
|
|
2667
|
+
edges.append(_graph_edge_from_placement(placement))
|
|
2668
|
+
|
|
2669
|
+
nodes = [
|
|
2670
|
+
_graph_node_from_memory(
|
|
2671
|
+
memory,
|
|
2672
|
+
degree=degree.get(str(memory["id"]), 0),
|
|
2673
|
+
self_loop_count=self_loop_count.get(str(memory["id"]), 0),
|
|
2674
|
+
)
|
|
2675
|
+
for memory in memories
|
|
2676
|
+
]
|
|
2677
|
+
return {
|
|
2678
|
+
"nodes": nodes,
|
|
2679
|
+
"edges": edges,
|
|
2680
|
+
"metadata": {
|
|
2681
|
+
"node_count": len(nodes),
|
|
2682
|
+
"edge_count": len(edges),
|
|
2683
|
+
"scope": scope_patterns,
|
|
2684
|
+
"projection_sources": ["memuron_memories", "memuron_links", "memuron_placements"],
|
|
2685
|
+
},
|
|
2686
|
+
}
|
|
2687
|
+
|
|
2688
|
+
|
|
2689
|
+
def graph_hubs(
|
|
2690
|
+
engine: ArthaEngine,
|
|
2691
|
+
*,
|
|
2692
|
+
scope: str | list[str] | None = None,
|
|
2693
|
+
limit: int = 10,
|
|
2694
|
+
) -> tuple[list[dict[str, Any]], int]:
|
|
2695
|
+
graph = export_graph(engine, scope=scope)
|
|
2696
|
+
nodes = sorted(
|
|
2697
|
+
graph["nodes"],
|
|
2698
|
+
key=lambda item: (-int(item.get("degree") or 0), str(item.get("id"))),
|
|
2699
|
+
)
|
|
2700
|
+
hubs = [
|
|
2701
|
+
{
|
|
2702
|
+
"id": node["id"],
|
|
2703
|
+
"content": node["content"],
|
|
2704
|
+
"scope": node["scope"],
|
|
2705
|
+
"degree": node["degree"],
|
|
2706
|
+
"hub_score": node["degree"],
|
|
2707
|
+
}
|
|
2708
|
+
for node in nodes[:limit]
|
|
2709
|
+
]
|
|
2710
|
+
return hubs, len(nodes)
|
|
2711
|
+
|
|
2712
|
+
|
|
2713
|
+
def graph_neighborhood(
|
|
2714
|
+
engine: ArthaEngine,
|
|
2715
|
+
*,
|
|
2716
|
+
memory_id: str,
|
|
2717
|
+
hops: int = 2,
|
|
2718
|
+
scope: str | list[str] | None = None,
|
|
2719
|
+
) -> dict[str, Any]:
|
|
2720
|
+
graph = export_graph(engine, scope=scope)
|
|
2721
|
+
nodes_by_id = {str(node["id"]): node for node in graph["nodes"]}
|
|
2722
|
+
if memory_id not in nodes_by_id:
|
|
2723
|
+
raise KeyError(f"Memory not found: {memory_id}")
|
|
2724
|
+
adjacency: dict[str, set[str]] = {node_id: set() for node_id in nodes_by_id}
|
|
2725
|
+
for edge in graph["edges"]:
|
|
2726
|
+
source = str(edge["source"])
|
|
2727
|
+
target = str(edge["target"])
|
|
2728
|
+
if source == target:
|
|
2729
|
+
continue
|
|
2730
|
+
adjacency.setdefault(source, set()).add(target)
|
|
2731
|
+
adjacency.setdefault(target, set()).add(source)
|
|
2732
|
+
|
|
2733
|
+
visited = {memory_id: 0}
|
|
2734
|
+
queue: list[tuple[str, int]] = [(memory_id, 0)]
|
|
2735
|
+
while queue:
|
|
2736
|
+
current_id, current_hop = queue.pop(0)
|
|
2737
|
+
if current_hop >= hops:
|
|
2738
|
+
continue
|
|
2739
|
+
for neighbor_id in sorted(adjacency.get(current_id, set())):
|
|
2740
|
+
if neighbor_id in visited:
|
|
2741
|
+
continue
|
|
2742
|
+
visited[neighbor_id] = current_hop + 1
|
|
2743
|
+
queue.append((neighbor_id, current_hop + 1))
|
|
2744
|
+
|
|
2745
|
+
neighborhood = []
|
|
2746
|
+
for node_id, hop_distance in sorted(visited.items(), key=lambda item: (item[1], item[0])):
|
|
2747
|
+
node = nodes_by_id[node_id]
|
|
2748
|
+
neighborhood.append(
|
|
2749
|
+
{
|
|
2750
|
+
"id": node["id"],
|
|
2751
|
+
"content": node["content"],
|
|
2752
|
+
"scope": node["scope"],
|
|
2753
|
+
"hop_distance": hop_distance,
|
|
2754
|
+
"is_center": node_id == memory_id,
|
|
2755
|
+
}
|
|
2756
|
+
)
|
|
2757
|
+
return {
|
|
2758
|
+
"center_memory_id": memory_id,
|
|
2759
|
+
"hops": hops,
|
|
2760
|
+
"neighborhood": neighborhood,
|
|
2761
|
+
"total_in_neighborhood": len(neighborhood),
|
|
2762
|
+
}
|
|
2763
|
+
|
|
2764
|
+
|
|
2765
|
+
def graph_path(
|
|
2766
|
+
engine: ArthaEngine,
|
|
2767
|
+
*,
|
|
2768
|
+
from_id: str,
|
|
2769
|
+
to_id: str,
|
|
2770
|
+
scope: str | list[str] | None = None,
|
|
2771
|
+
) -> dict[str, Any]:
|
|
2772
|
+
graph = export_graph(engine, scope=scope)
|
|
2773
|
+
nodes_by_id = {str(node["id"]): node for node in graph["nodes"]}
|
|
2774
|
+
if from_id not in nodes_by_id or to_id not in nodes_by_id:
|
|
2775
|
+
raise KeyError("One or both memories not found")
|
|
2776
|
+
if from_id == to_id:
|
|
2777
|
+
return {
|
|
2778
|
+
"status": "success",
|
|
2779
|
+
"path": [from_id],
|
|
2780
|
+
"length": 0,
|
|
2781
|
+
"memories": [{"id": from_id, "content": nodes_by_id[from_id]["content"][:100]}],
|
|
2782
|
+
}
|
|
2783
|
+
adjacency: dict[str, set[str]] = {node_id: set() for node_id in nodes_by_id}
|
|
2784
|
+
for edge in graph["edges"]:
|
|
2785
|
+
source = str(edge["source"])
|
|
2786
|
+
target = str(edge["target"])
|
|
2787
|
+
if source == target:
|
|
2788
|
+
continue
|
|
2789
|
+
adjacency.setdefault(source, set()).add(target)
|
|
2790
|
+
adjacency.setdefault(target, set()).add(source)
|
|
2791
|
+
|
|
2792
|
+
queue: list[tuple[str, list[str]]] = [(from_id, [from_id])]
|
|
2793
|
+
visited = {from_id}
|
|
2794
|
+
while queue:
|
|
2795
|
+
current_id, path = queue.pop(0)
|
|
2796
|
+
for neighbor_id in sorted(adjacency.get(current_id, set())):
|
|
2797
|
+
if neighbor_id == to_id:
|
|
2798
|
+
full_path = [*path, neighbor_id]
|
|
2799
|
+
return {
|
|
2800
|
+
"status": "success",
|
|
2801
|
+
"path": full_path,
|
|
2802
|
+
"length": len(full_path) - 1,
|
|
2803
|
+
"memories": [
|
|
2804
|
+
{"id": node_id, "content": nodes_by_id[node_id]["content"][:100]}
|
|
2805
|
+
for node_id in full_path
|
|
2806
|
+
],
|
|
2807
|
+
}
|
|
2808
|
+
if neighbor_id not in visited:
|
|
2809
|
+
visited.add(neighbor_id)
|
|
2810
|
+
queue.append((neighbor_id, [*path, neighbor_id]))
|
|
2811
|
+
return {
|
|
2812
|
+
"status": "no_path",
|
|
2813
|
+
"message": f"No path found between {from_id[:8]}... and {to_id[:8]}...",
|
|
2814
|
+
}
|
|
2815
|
+
|
|
2816
|
+
|
|
2817
|
+
def semantic_traverse_graph(
|
|
2818
|
+
engine: ArthaEngine,
|
|
2819
|
+
*,
|
|
2820
|
+
start_memory_id: str,
|
|
2821
|
+
query: str,
|
|
2822
|
+
max_hops: int = 2,
|
|
2823
|
+
edge_similarity_threshold: float = 0.7,
|
|
2824
|
+
scope: str | list[str] | None = None,
|
|
2825
|
+
) -> dict[str, Any]:
|
|
2826
|
+
graph = export_graph(engine, scope=scope)
|
|
2827
|
+
nodes_by_id = {str(node["id"]): node for node in graph["nodes"]}
|
|
2828
|
+
if start_memory_id not in nodes_by_id:
|
|
2829
|
+
raise KeyError(f"Memory not found: {start_memory_id}")
|
|
2830
|
+
|
|
2831
|
+
query_vector = _embed_query_vector(engine, query)
|
|
2832
|
+
rows_by_id = {_link_row_to_dict(row)["link_id"]: row for row in _list_link_rows(engine)}
|
|
2833
|
+
adjacency: dict[str, list[dict[str, Any]]] = {node_id: [] for node_id in nodes_by_id}
|
|
2834
|
+
for edge in graph["edges"]:
|
|
2835
|
+
source = str(edge["source"])
|
|
2836
|
+
target = str(edge["target"])
|
|
2837
|
+
row = rows_by_id.get(str(edge["id"]))
|
|
2838
|
+
embedding = _parse_json_field(row.get("embedding_json") if row else None, [])
|
|
2839
|
+
similarity = _cosine_score(
|
|
2840
|
+
query_vector,
|
|
2841
|
+
[float(value) for value in embedding] if isinstance(embedding, list) else [],
|
|
2842
|
+
)
|
|
2843
|
+
edge_with_similarity = {**edge, "similarity": similarity}
|
|
2844
|
+
adjacency.setdefault(source, []).append(edge_with_similarity)
|
|
2845
|
+
if target != source:
|
|
2846
|
+
adjacency.setdefault(target, []).append(edge_with_similarity)
|
|
2847
|
+
|
|
2848
|
+
visited = {start_memory_id: 0}
|
|
2849
|
+
queue: list[tuple[str, int]] = [(start_memory_id, 0)]
|
|
2850
|
+
traversed_edges: list[dict[str, Any]] = []
|
|
2851
|
+
traversed_edge_ids: set[str] = set()
|
|
2852
|
+
while queue:
|
|
2853
|
+
current_id, current_hop = queue.pop(0)
|
|
2854
|
+
if current_hop >= max_hops:
|
|
2855
|
+
continue
|
|
2856
|
+
for edge in sorted(
|
|
2857
|
+
adjacency.get(current_id, []),
|
|
2858
|
+
key=lambda item: (-float(item.get("similarity") or 0), str(item.get("id"))),
|
|
2859
|
+
):
|
|
2860
|
+
similarity = float(edge.get("similarity") or 0)
|
|
2861
|
+
if similarity < edge_similarity_threshold:
|
|
2862
|
+
continue
|
|
2863
|
+
source = str(edge["source"])
|
|
2864
|
+
target = str(edge["target"])
|
|
2865
|
+
neighbor_id = target if source == current_id else source
|
|
2866
|
+
is_self_loop = source == target
|
|
2867
|
+
if neighbor_id not in nodes_by_id:
|
|
2868
|
+
continue
|
|
2869
|
+
edge_id = str(edge.get("id") or f"{source}:{target}")
|
|
2870
|
+
if edge_id not in traversed_edge_ids:
|
|
2871
|
+
traversed_edge_ids.add(edge_id)
|
|
2872
|
+
traversed_edges.append(
|
|
2873
|
+
{
|
|
2874
|
+
"id": edge_id,
|
|
2875
|
+
"source_id": source,
|
|
2876
|
+
"target_id": target,
|
|
2877
|
+
"description": edge.get("description") or "",
|
|
2878
|
+
"similarity": similarity,
|
|
2879
|
+
"from_memory_id": current_id,
|
|
2880
|
+
"to_memory_id": neighbor_id,
|
|
2881
|
+
"is_self_loop": is_self_loop,
|
|
2882
|
+
}
|
|
2883
|
+
)
|
|
2884
|
+
if is_self_loop:
|
|
2885
|
+
continue
|
|
2886
|
+
next_hop = current_hop + 1
|
|
2887
|
+
previous_hop = visited.get(neighbor_id)
|
|
2888
|
+
if previous_hop is None or next_hop < previous_hop:
|
|
2889
|
+
visited[neighbor_id] = next_hop
|
|
2890
|
+
queue.append((neighbor_id, next_hop))
|
|
2891
|
+
|
|
2892
|
+
memories = []
|
|
2893
|
+
for memory_id, hop_distance in sorted(visited.items(), key=lambda item: (item[1], item[0])):
|
|
2894
|
+
node = nodes_by_id[memory_id]
|
|
2895
|
+
memories.append(
|
|
2896
|
+
{
|
|
2897
|
+
"id": node["id"],
|
|
2898
|
+
"content": node["content"],
|
|
2899
|
+
"scope": node["scope"],
|
|
2900
|
+
"hop_distance": hop_distance,
|
|
2901
|
+
"is_start": memory_id == start_memory_id,
|
|
2902
|
+
}
|
|
2903
|
+
)
|
|
2904
|
+
traversed_edges.sort(key=lambda edge: float(edge["similarity"]), reverse=True)
|
|
2905
|
+
return {
|
|
2906
|
+
"start_memory_id": start_memory_id,
|
|
2907
|
+
"query": query,
|
|
2908
|
+
"max_hops": max_hops,
|
|
2909
|
+
"edge_similarity_threshold": edge_similarity_threshold,
|
|
2910
|
+
"scope": _graph_scope_patterns(scope),
|
|
2911
|
+
"memories": memories,
|
|
2912
|
+
"traversed_edges": traversed_edges,
|
|
2913
|
+
"total_memories": len(memories),
|
|
2914
|
+
"total_edges": len(traversed_edges),
|
|
2915
|
+
}
|
|
2916
|
+
|
|
2917
|
+
|
|
2918
|
+
def _link_row_to_dict(row: dict[str, Any]) -> dict[str, Any]:
|
|
2919
|
+
embedding = row.get("embedding_json")
|
|
2920
|
+
if isinstance(embedding, str):
|
|
2921
|
+
embedding = json.loads(embedding)
|
|
2922
|
+
metadata = _parse_json_field(row.get("metadata_json"), {})
|
|
2923
|
+
return {
|
|
2924
|
+
"link_id": row["link_id"],
|
|
2925
|
+
"source_id": row["source_id"],
|
|
2926
|
+
"target_id": row["target_id"],
|
|
2927
|
+
"description": row["description"],
|
|
2928
|
+
"metadata": metadata if isinstance(metadata, dict) else {},
|
|
2929
|
+
"embedding": embedding if isinstance(embedding, list) else [],
|
|
2930
|
+
}
|
|
2931
|
+
|
|
2932
|
+
|
|
2933
|
+
def _memory_endpoint_payload(row: dict[str, Any]) -> dict[str, Any]:
|
|
2934
|
+
memory = _row_to_memory_dict(row)
|
|
2935
|
+
return {
|
|
2936
|
+
"id": memory["id"],
|
|
2937
|
+
"content": memory["content"],
|
|
2938
|
+
"scope": memory.get("scope") or [],
|
|
2939
|
+
"source_metadata": source_identity_from_metadata(memory.get("metadata") or {}),
|
|
2940
|
+
}
|
|
2941
|
+
|
|
2942
|
+
|
|
2943
|
+
def _relationship_edge_result(
|
|
2944
|
+
link: dict[str, Any],
|
|
2945
|
+
*,
|
|
2946
|
+
score: float,
|
|
2947
|
+
source_row: dict[str, Any] | None,
|
|
2948
|
+
target_row: dict[str, Any] | None,
|
|
2949
|
+
) -> dict[str, Any] | None:
|
|
2950
|
+
if not source_row or not target_row:
|
|
2951
|
+
return None
|
|
2952
|
+
link_id = str(link["link_id"])
|
|
2953
|
+
metadata = link.get("metadata") if isinstance(link.get("metadata"), dict) else {}
|
|
2954
|
+
return {
|
|
2955
|
+
"type": "relationship_edge",
|
|
2956
|
+
"id": link_id,
|
|
2957
|
+
"link_id": link_id,
|
|
2958
|
+
"description": str(link.get("description") or ""),
|
|
2959
|
+
"metadata": metadata,
|
|
2960
|
+
"semantic_score": score,
|
|
2961
|
+
"matched_via": "link",
|
|
2962
|
+
"matched_via_link_id": link_id,
|
|
2963
|
+
"is_self_loop": str(link["source_id"]) == str(link["target_id"]),
|
|
2964
|
+
"parallel_key": link_id,
|
|
2965
|
+
"source": _memory_endpoint_payload(source_row),
|
|
2966
|
+
"target": _memory_endpoint_payload(target_row),
|
|
2967
|
+
}
|
|
2968
|
+
|
|
2969
|
+
|
|
2970
|
+
def _self_loop_link_result(
|
|
2971
|
+
link: dict[str, Any],
|
|
2972
|
+
*,
|
|
2973
|
+
score: float,
|
|
2974
|
+
memory_row: dict[str, Any] | None,
|
|
2975
|
+
) -> dict[str, Any] | None:
|
|
2976
|
+
if not memory_row:
|
|
2977
|
+
return None
|
|
2978
|
+
memory = _row_to_memory_dict(memory_row)
|
|
2979
|
+
return {
|
|
2980
|
+
"type": "memory_node",
|
|
2981
|
+
"id": memory["id"],
|
|
2982
|
+
"content": memory["content"],
|
|
2983
|
+
"scope": memory.get("scope") or [],
|
|
2984
|
+
"semantic_score": score,
|
|
2985
|
+
"matched_via": "semantic_link",
|
|
2986
|
+
"matched_via_link_id": str(link["link_id"]),
|
|
2987
|
+
"matched_link_description": str(link.get("description") or ""),
|
|
2988
|
+
}
|
|
2989
|
+
|
|
2990
|
+
|
|
2991
|
+
def _filter_links_by_endpoint_scope(
|
|
2992
|
+
link_rows: list[dict[str, Any]],
|
|
2993
|
+
scope_index: dict[str, list[str]],
|
|
2994
|
+
scope: list[str],
|
|
2995
|
+
) -> list[dict[str, Any]]:
|
|
2996
|
+
"""MemBrain-style: both link endpoints must match every scope pattern."""
|
|
2997
|
+
filtered: list[dict[str, Any]] = []
|
|
2998
|
+
for row in link_rows:
|
|
2999
|
+
source_scope = scope_index.get(str(row["source_id"]), [])
|
|
3000
|
+
target_scope = scope_index.get(str(row["target_id"]), [])
|
|
3001
|
+
if _scope_matches_filter(source_scope, scope) and _scope_matches_filter(
|
|
3002
|
+
target_scope, scope
|
|
3003
|
+
):
|
|
3004
|
+
filtered.append(row)
|
|
3005
|
+
return filtered
|
|
3006
|
+
|
|
3007
|
+
|
|
3008
|
+
def search_memories(
|
|
3009
|
+
engine: ArthaEngine,
|
|
3010
|
+
query: str,
|
|
3011
|
+
*,
|
|
3012
|
+
k: int = 5,
|
|
3013
|
+
scope: list[str] | None = None,
|
|
3014
|
+
include_links: bool | None = None,
|
|
3015
|
+
) -> tuple[list[dict[str, Any]], list[str] | None]:
|
|
3016
|
+
from memuron.application.config import settings
|
|
3017
|
+
from memuron.search.hybrid import hybrid_memory_search, retrieve_pool_size
|
|
3018
|
+
|
|
3019
|
+
_require_memory_projections(engine)
|
|
3020
|
+
if include_links is None:
|
|
3021
|
+
include_links = settings.search_include_links
|
|
3022
|
+
|
|
3023
|
+
query_vector = _embed_query_vector(engine, query)
|
|
3024
|
+
store = engine.store
|
|
3025
|
+
memory_pool = retrieve_pool_size(k * 2 if include_links else k)
|
|
3026
|
+
unified: list[tuple[str, dict[str, Any], float]] = []
|
|
3027
|
+
|
|
3028
|
+
if settings.search_hybrid:
|
|
3029
|
+
for kind, payload, score in hybrid_memory_search(
|
|
3030
|
+
engine,
|
|
3031
|
+
query,
|
|
3032
|
+
query_vector,
|
|
3033
|
+
k=memory_pool,
|
|
3034
|
+
scope=scope,
|
|
3035
|
+
):
|
|
3036
|
+
unified.append((kind, payload, score))
|
|
3037
|
+
elif pgvector_is_ready(store):
|
|
3038
|
+
for row in pgvector_memory_search(
|
|
3039
|
+
store,
|
|
3040
|
+
query_vector,
|
|
3041
|
+
top_k=memory_pool,
|
|
3042
|
+
scope=scope,
|
|
3043
|
+
include_content=True,
|
|
3044
|
+
):
|
|
3045
|
+
scope_tokens = _parse_json_field(row.get("scope_json"), [])
|
|
3046
|
+
unified.append(
|
|
3047
|
+
(
|
|
3048
|
+
"memory_node",
|
|
3049
|
+
{
|
|
3050
|
+
"type": "memory_node",
|
|
3051
|
+
"id": row["artha_id"],
|
|
3052
|
+
"content": str(row.get("content") or ""),
|
|
3053
|
+
"scope": scope_tokens if isinstance(scope_tokens, list) else [],
|
|
3054
|
+
"source_metadata": source_identity_from_metadata(
|
|
3055
|
+
_parse_json_field(row.get("metadata_json"), {})
|
|
3056
|
+
),
|
|
3057
|
+
"matched_via": "memory",
|
|
3058
|
+
"matched_via_link_id": None,
|
|
3059
|
+
},
|
|
3060
|
+
float(row["semantic_score"]),
|
|
3061
|
+
)
|
|
3062
|
+
)
|
|
3063
|
+
else:
|
|
3064
|
+
memory_hits = _memory_similarity_hits(
|
|
3065
|
+
engine,
|
|
3066
|
+
query_vector,
|
|
3067
|
+
scope=scope,
|
|
3068
|
+
top_k=memory_pool,
|
|
3069
|
+
)
|
|
3070
|
+
memory_details = _fetch_memory_rows_by_ids(
|
|
3071
|
+
engine,
|
|
3072
|
+
[memory_id for memory_id, _score in memory_hits],
|
|
3073
|
+
)
|
|
3074
|
+
for memory_id, score in memory_hits:
|
|
3075
|
+
row = memory_details.get(memory_id)
|
|
3076
|
+
if not row:
|
|
3077
|
+
continue
|
|
3078
|
+
memory = _row_to_memory_dict(row)
|
|
3079
|
+
unified.append(
|
|
3080
|
+
(
|
|
3081
|
+
"memory_node",
|
|
3082
|
+
{
|
|
3083
|
+
"type": "memory_node",
|
|
3084
|
+
"id": memory_id,
|
|
3085
|
+
"content": memory["content"],
|
|
3086
|
+
"scope": memory.get("scope") or [],
|
|
3087
|
+
"source_metadata": source_identity_from_metadata(
|
|
3088
|
+
memory.get("metadata") if isinstance(memory.get("metadata"), dict) else {}
|
|
3089
|
+
),
|
|
3090
|
+
"matched_via": "memory",
|
|
3091
|
+
"matched_via_link_id": None,
|
|
3092
|
+
},
|
|
3093
|
+
score,
|
|
3094
|
+
)
|
|
3095
|
+
)
|
|
3096
|
+
|
|
3097
|
+
if include_links:
|
|
3098
|
+
link_limit = k
|
|
3099
|
+
self_loop_memory_ids: set[str] = set()
|
|
3100
|
+
pending_link_hits: list[tuple[dict[str, Any], float]] = []
|
|
3101
|
+
if pgvector_is_ready(store):
|
|
3102
|
+
for row in pgvector_link_search(store, query_vector, top_k=link_limit, scope=scope):
|
|
3103
|
+
link = {
|
|
3104
|
+
"link_id": row["link_id"],
|
|
3105
|
+
"source_id": row["source_id"],
|
|
3106
|
+
"target_id": row["target_id"],
|
|
3107
|
+
"description": row["description"],
|
|
3108
|
+
"metadata": _parse_json_field(row.get("metadata_json"), {}),
|
|
3109
|
+
}
|
|
3110
|
+
pending_link_hits.append((link, float(row["semantic_score"])))
|
|
3111
|
+
if str(link["source_id"]) == str(link["target_id"]):
|
|
3112
|
+
self_loop_memory_ids.add(str(link["source_id"]))
|
|
3113
|
+
else:
|
|
3114
|
+
link_rows = _list_link_search_rows(engine)
|
|
3115
|
+
if link_rows:
|
|
3116
|
+
filtered_links = link_rows
|
|
3117
|
+
if scope:
|
|
3118
|
+
memory_rows = _list_memory_search_rows(engine)
|
|
3119
|
+
scope_index = {
|
|
3120
|
+
str(row["artha_id"]): _parse_json_field(row.get("scope_json"), [])
|
|
3121
|
+
for row in memory_rows
|
|
3122
|
+
}
|
|
3123
|
+
filtered_links = _filter_links_by_endpoint_scope(
|
|
3124
|
+
link_rows, scope_index, scope
|
|
3125
|
+
)
|
|
3126
|
+
link_hits = _cosine_similarity_hits(
|
|
3127
|
+
engine,
|
|
3128
|
+
query_vector,
|
|
3129
|
+
filtered_links,
|
|
3130
|
+
top_k=link_limit,
|
|
3131
|
+
id_key="link_id",
|
|
3132
|
+
scope_key=None,
|
|
3133
|
+
)
|
|
3134
|
+
link_meta = {str(row["link_id"]): _link_row_to_dict(row) for row in link_rows}
|
|
3135
|
+
for link_id, score in link_hits:
|
|
3136
|
+
link = link_meta.get(link_id)
|
|
3137
|
+
if not link:
|
|
3138
|
+
continue
|
|
3139
|
+
pending_link_hits.append(
|
|
3140
|
+
(
|
|
3141
|
+
{
|
|
3142
|
+
"link_id": link["link_id"],
|
|
3143
|
+
"source_id": link["source_id"],
|
|
3144
|
+
"target_id": link["target_id"],
|
|
3145
|
+
"description": link["description"],
|
|
3146
|
+
"metadata": link.get("metadata") or {},
|
|
3147
|
+
},
|
|
3148
|
+
score,
|
|
3149
|
+
)
|
|
3150
|
+
)
|
|
3151
|
+
if str(link["source_id"]) == str(link["target_id"]):
|
|
3152
|
+
self_loop_memory_ids.add(str(link["source_id"]))
|
|
3153
|
+
self_loop_rows = _fetch_memory_rows_by_ids(engine, list(self_loop_memory_ids))
|
|
3154
|
+
for link, score in pending_link_hits:
|
|
3155
|
+
if str(link["source_id"]) == str(link["target_id"]):
|
|
3156
|
+
result = _self_loop_link_result(
|
|
3157
|
+
link,
|
|
3158
|
+
score=score,
|
|
3159
|
+
memory_row=self_loop_rows.get(str(link["source_id"])),
|
|
3160
|
+
)
|
|
3161
|
+
if result:
|
|
3162
|
+
unified.append(("memory_node", result, score))
|
|
3163
|
+
continue
|
|
3164
|
+
unified.append(("relationship_edge", link, score))
|
|
3165
|
+
|
|
3166
|
+
min_score = settings.search_min_semantic_score
|
|
3167
|
+
if not settings.search_hybrid:
|
|
3168
|
+
unified = [item for item in unified if item[2] >= min_score]
|
|
3169
|
+
else:
|
|
3170
|
+
unified = [
|
|
3171
|
+
item
|
|
3172
|
+
for item in unified
|
|
3173
|
+
if item[0] != "relationship_edge" or item[2] >= min_score
|
|
3174
|
+
]
|
|
3175
|
+
|
|
3176
|
+
seen_content: dict[str, int] = {}
|
|
3177
|
+
deduped: list[tuple[str, dict[str, Any], float]] = []
|
|
3178
|
+
for kind, payload, score in sorted(unified, key=lambda item: item[2], reverse=True):
|
|
3179
|
+
if kind != "memory_node":
|
|
3180
|
+
deduped.append((kind, payload, score))
|
|
3181
|
+
continue
|
|
3182
|
+
content_key = str(payload.get("content") or "").strip().lower()[:500]
|
|
3183
|
+
if content_key and content_key in seen_content:
|
|
3184
|
+
existing_index = seen_content[content_key]
|
|
3185
|
+
existing_kind, existing_payload, existing_score = deduped[existing_index]
|
|
3186
|
+
incoming_match = payload.get("matched_via")
|
|
3187
|
+
existing_match = existing_payload.get("matched_via")
|
|
3188
|
+
if incoming_match and incoming_match != "memory" and existing_match == "memory":
|
|
3189
|
+
deduped[existing_index] = (
|
|
3190
|
+
existing_kind,
|
|
3191
|
+
{
|
|
3192
|
+
**existing_payload,
|
|
3193
|
+
"matched_via": payload.get("matched_via"),
|
|
3194
|
+
"matched_via_link_id": payload.get("matched_via_link_id"),
|
|
3195
|
+
"matched_link_description": payload.get("matched_link_description"),
|
|
3196
|
+
},
|
|
3197
|
+
existing_score,
|
|
3198
|
+
)
|
|
3199
|
+
continue
|
|
3200
|
+
if content_key:
|
|
3201
|
+
seen_content[content_key] = len(deduped)
|
|
3202
|
+
deduped.append((kind, payload, score))
|
|
3203
|
+
|
|
3204
|
+
top = deduped[:k]
|
|
3205
|
+
|
|
3206
|
+
endpoint_ids = {
|
|
3207
|
+
str(item["source_id"])
|
|
3208
|
+
for kind, item, _score in top
|
|
3209
|
+
if kind == "relationship_edge"
|
|
3210
|
+
} | {
|
|
3211
|
+
str(item["target_id"])
|
|
3212
|
+
for kind, item, _score in top
|
|
3213
|
+
if kind == "relationship_edge"
|
|
3214
|
+
}
|
|
3215
|
+
endpoint_rows = _fetch_memory_rows_by_ids(engine, list(endpoint_ids))
|
|
3216
|
+
|
|
3217
|
+
results: list[dict[str, Any]] = []
|
|
3218
|
+
for kind, payload, score in top:
|
|
3219
|
+
if kind == "memory_node":
|
|
3220
|
+
results.append({**payload, "semantic_score": payload.get("semantic_score", score)})
|
|
3221
|
+
continue
|
|
3222
|
+
edge = _relationship_edge_result(
|
|
3223
|
+
payload,
|
|
3224
|
+
score=score,
|
|
3225
|
+
source_row=endpoint_rows.get(str(payload["source_id"])),
|
|
3226
|
+
target_row=endpoint_rows.get(str(payload["target_id"])),
|
|
3227
|
+
)
|
|
3228
|
+
if edge:
|
|
3229
|
+
results.append(edge)
|
|
3230
|
+
|
|
3231
|
+
return results, scope
|
|
3232
|
+
|
|
3233
|
+
|
|
3234
|
+
def unlink_memories(
|
|
3235
|
+
engine: ArthaEngine,
|
|
3236
|
+
memory_id_1: str,
|
|
3237
|
+
memory_id_2: str,
|
|
3238
|
+
*,
|
|
3239
|
+
event_metadata: dict[str, object] | None = None,
|
|
3240
|
+
) -> int:
|
|
3241
|
+
removed = 0
|
|
3242
|
+
for link_id in _find_link_ids_between(engine, memory_id_1, memory_id_2):
|
|
3243
|
+
_remove_link(engine, link_id, event_metadata=event_metadata)
|
|
3244
|
+
removed += 1
|
|
3245
|
+
if removed:
|
|
3246
|
+
refresh_memory_projections(engine)
|
|
3247
|
+
return removed
|