memuron 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. memuron/__init__.py +3 -0
  2. memuron/actions/__init__.py +12 -0
  3. memuron/actions/context.py +63 -0
  4. memuron/actions/helpers.py +88 -0
  5. memuron/actions/memory.py +340 -0
  6. memuron/actions/memory_write.py +290 -0
  7. memuron/actions/nodes.py +340 -0
  8. memuron/actions/registry.py +5 -0
  9. memuron/actions/runtime.py +37 -0
  10. memuron/actions/spaces_documents.py +720 -0
  11. memuron/actions/sync.py +155 -0
  12. memuron/application/__init__.py +1 -0
  13. memuron/application/api.py +206 -0
  14. memuron/application/app.py +103 -0
  15. memuron/application/capabilities.py +82 -0
  16. memuron/application/cli.py +35 -0
  17. memuron/application/config.py +176 -0
  18. memuron/application/mcp.py +44 -0
  19. memuron/application/mcp_oauth.py +290 -0
  20. memuron/application/registry.py +52 -0
  21. memuron/context.py +532 -0
  22. memuron/documents/__init__.py +1 -0
  23. memuron/documents/link_guardian.py +192 -0
  24. memuron/documents/linking.py +292 -0
  25. memuron/documents/parser.py +1152 -0
  26. memuron/documents/storage.py +151 -0
  27. memuron/documents/url_ingest.py +375 -0
  28. memuron/domain/__init__.py +1 -0
  29. memuron/domain/decoders.py +1 -0
  30. memuron/domain/encoders.py +185 -0
  31. memuron/domain/lifecycles.py +8 -0
  32. memuron/domain/limits.py +6 -0
  33. memuron/domain/representations.py +56 -0
  34. memuron/domain/schemas.py +581 -0
  35. memuron/domain/scope_filter.py +104 -0
  36. memuron/graphfs/__init__.py +1 -0
  37. memuron/graphfs/manual.py +635 -0
  38. memuron/graphfs/projection.py +578 -0
  39. memuron/graphfs/query.py +1782 -0
  40. memuron/graphfs/read_model.py +574 -0
  41. memuron/ingest/__init__.py +1 -0
  42. memuron/ingest/guardian.py +213 -0
  43. memuron/ingest/jobs.py +424 -0
  44. memuron/ingest/prompts.py +147 -0
  45. memuron/memory/__init__.py +1 -0
  46. memuron/memory/engine.py +35 -0
  47. memuron/memory/projections.py +452 -0
  48. memuron/memory/recipes.py +3247 -0
  49. memuron/persistence/__init__.py +1 -0
  50. memuron/persistence/db_pool.py +57 -0
  51. memuron/persistence/identity_store.py +918 -0
  52. memuron/persistence/store_helpers.py +16 -0
  53. memuron/search/__init__.py +1 -0
  54. memuron/search/fulltext.py +110 -0
  55. memuron/search/hybrid.py +284 -0
  56. memuron/search/pgvector.py +252 -0
  57. memuron/security/__init__.py +1 -0
  58. memuron/security/auth.py +143 -0
  59. memuron/security/auth_provider.py +119 -0
  60. memuron/security/authorization.py +53 -0
  61. memuron/security/clerk_scopes.py +94 -0
  62. memuron/security/clerk_webhooks.py +61 -0
  63. memuron/security/jwt_tokens.py +53 -0
  64. memuron/security/passwords.py +38 -0
  65. memuron/security/tenant.py +58 -0
  66. memuron/spaces/__init__.py +1 -0
  67. memuron/spaces/model.py +35 -0
  68. memuron/spaces/service.py +155 -0
  69. memuron/sync/__init__.py +25 -0
  70. memuron/sync/folder.py +828 -0
  71. memuron-0.1.1.dist-info/METADATA +242 -0
  72. memuron-0.1.1.dist-info/RECORD +74 -0
  73. memuron-0.1.1.dist-info/WHEEL +4 -0
  74. memuron-0.1.1.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,3247 @@
1
+ """Memuron memory recipes: ingest, CRUD, and graph helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from collections import OrderedDict
7
+ from dataclasses import replace
8
+ from datetime import UTC, datetime
9
+ from typing import Any
10
+ from uuid import uuid4
11
+
12
+ from artha_engine import ArthaEngine, EmbeddingArthaanu, EmbeddingCandidate, EmbeddingSearchParams
13
+ from artha_engine.runtime.serde import arthaanu_from_dict, arthaanu_to_dict
14
+ from artha_engine.store.projection_sql import sql_store_fetchall, sql_store_has_tables
15
+
16
+ from memuron.domain.encoders import MemoryEncoderInput, MemoryLinkEncoderInput
17
+ from memuron.application.config import settings
18
+ from memuron.documents.parser import ParsedDocument, parse_source
19
+ from memuron.documents.storage import maybe_store_source_file, presign_source_object
20
+ from memuron.domain.scope_filter import parse_comma_scope, scope_matches_filter
21
+ from memuron.domain.schemas import merge_source_identity_metadata, source_identity_from_metadata
22
+ from memuron.search.pgvector import (
23
+ pgvector_is_ready,
24
+ pgvector_link_search,
25
+ pgvector_memory_search,
26
+ pgvector_memory_search_ids,
27
+ )
28
+ from memuron.ingest.guardian import AgnoGuardian, GuardianError
29
+ from memuron.ingest.prompts import ConnectionSpec, GuardianWritePlan
30
+ from memuron.spaces.service import apply_guardian_space_scope
31
+ from memuron.domain.representations import MemoryArthaanu, MemoryLinkArthaanu, MemoryLinkValue, MemoryValue
32
+ from memuron.security.tenant import merge_org_scope, org_scope_token
33
+
34
+ MAX_GUARDIAN_LINKS = 2
35
+ CANDIDATE_TOP_K = 10
36
+ LINK_CANDIDATE_TOP_K = 15
37
+ AUTO_LINK_MIN_SCORE = 0.2
38
+ DOCUMENT_MARKDOWN_PREVIEW_CHARS = 12_000
39
+ DOCUMENT_CHUNK_SCOPE_PREFIX = "document:"
40
+ QUERY_EMBEDDING_CACHE_SIZE = 128
41
+ EXTERNAL_ID_FIELDS = ("custom_id", "session_id", "thread_id", "source_id", "source_url")
42
+ IDEMPOTENCY_EXTERNAL_ID_FIELDS = ("custom_id", "source_id")
43
+
44
+
45
+ def _now_stamp() -> str:
46
+ return datetime.now(UTC).strftime("%Y%m%d%H%M")
47
+
48
+
49
+ def external_identity_from_values(**values: str | None) -> dict[str, str]:
50
+ identity: dict[str, str] = {}
51
+ for key in EXTERNAL_ID_FIELDS:
52
+ value = values.get(key)
53
+ if not isinstance(value, str):
54
+ continue
55
+ cleaned = value.strip()
56
+ if cleaned:
57
+ identity[key] = cleaned
58
+ return identity
59
+
60
+
61
+ def _external_identity_from_metadata(metadata: dict[str, Any] | None) -> dict[str, str]:
62
+ if not isinstance(metadata, dict):
63
+ return {}
64
+ identity: dict[str, str] = {}
65
+ identity.update(source_identity_from_metadata(metadata))
66
+ system = metadata.get("system")
67
+ if isinstance(system, dict):
68
+ legacy_source = system.get("source")
69
+ if isinstance(legacy_source, dict):
70
+ identity.update(external_identity_from_values(**legacy_source))
71
+ nested = system.get("external_identity")
72
+ if isinstance(nested, dict):
73
+ identity.update(external_identity_from_values(**nested))
74
+ identity.update(external_identity_from_values(**metadata))
75
+ return identity
76
+
77
+
78
+ def _external_identity_from_payload(payload: dict[str, Any] | None) -> dict[str, str]:
79
+ if not isinstance(payload, dict):
80
+ return {}
81
+ identity: dict[str, str] = {}
82
+ nested = payload.get("source_identity")
83
+ if isinstance(nested, dict):
84
+ identity.update(external_identity_from_values(**nested))
85
+ identity.update(external_identity_from_values(**payload))
86
+ return identity
87
+
88
+
89
+ def _memory_external_identity(memory: dict[str, Any]) -> dict[str, str]:
90
+ identity = _external_identity_from_payload(memory.get("payload"))
91
+ identity.update(_external_identity_from_metadata(memory.get("metadata")))
92
+ return identity
93
+
94
+
95
+ def _metadata_with_external_identity(
96
+ metadata: dict[str, Any] | None,
97
+ external_identity: dict[str, str] | None,
98
+ ) -> dict[str, Any]:
99
+ output = dict(metadata or {})
100
+ identity = {
101
+ **_external_identity_from_metadata(output),
102
+ **external_identity_from_values(**(external_identity or {})),
103
+ }
104
+ if not identity:
105
+ return output
106
+ output = merge_source_identity_metadata(output, **identity)
107
+ for key, value in identity.items():
108
+ output[key] = value
109
+ system = dict(output.get("system") or {})
110
+ # Keep the old worker alias readable while standardizing on system.source.
111
+ system["external_identity"] = identity
112
+ output["system"] = system
113
+ return output
114
+
115
+
116
+ def _payload_with_external_identity(
117
+ payload: dict[str, Any] | None,
118
+ external_identity: dict[str, str] | None,
119
+ ) -> dict[str, Any]:
120
+ output = dict(payload or {})
121
+ identity = {
122
+ **_external_identity_from_payload(output),
123
+ **external_identity_from_values(**(external_identity or {})),
124
+ }
125
+ if identity:
126
+ output["source_identity"] = identity
127
+ return output
128
+
129
+
130
+ def refresh_memory_projections(engine: ArthaEngine) -> None:
131
+ engine.refresh_projection("memuron_memories")
132
+ engine.refresh_projection("memuron_links")
133
+ engine.refresh_projection("memuron_placements")
134
+ engine.refresh_projection("memuron_fs")
135
+
136
+
137
+ _PROJECTIONS_READY_FLAG = "_memuron_projections_ready"
138
+
139
+
140
+ def projections_are_ready(store: object) -> bool:
141
+ return bool(getattr(store, _PROJECTIONS_READY_FLAG, False))
142
+
143
+
144
+ def _require_memory_projections(engine: ArthaEngine) -> None:
145
+ if not projections_are_ready(engine.store):
146
+ raise RuntimeError(
147
+ "Memuron projections are not initialized. "
148
+ "Call ensure_memory_projections during app startup or test setup."
149
+ )
150
+
151
+
152
+ def ensure_memory_projections(engine: ArthaEngine) -> None:
153
+ """Create projection tables if they do not exist yet."""
154
+ store = engine.store
155
+ if projections_are_ready(store):
156
+ return
157
+ for name in ("memuron_memories", "memuron_links", "memuron_placements", "memuron_fs"):
158
+ projection = engine.registry.create_projection(name)
159
+ projection.init(store) # type: ignore[attr-defined]
160
+ from memuron.search.pgvector import ensure_pgvector_schema
161
+
162
+ ensure_pgvector_schema(store, engine.embedder.dimensions)
163
+ setattr(store, _PROJECTIONS_READY_FLAG, True)
164
+
165
+
166
+ def _memory_node_type(existing: MemoryArthaanu) -> str:
167
+ return str(getattr(existing.value, "node_type", "text") or "text")
168
+
169
+
170
+ def _rich_fields_from_memory(existing: MemoryArthaanu) -> dict[str, Any]:
171
+ return {
172
+ "node_type": _memory_node_type(existing),
173
+ "payload": dict(getattr(existing.value, "payload", {}) or {}),
174
+ "perception": getattr(existing.value, "perception", None),
175
+ "encoding": getattr(existing.value, "encoding", "memory"),
176
+ "metadata": dict(getattr(existing.value, "metadata", {}) or {}),
177
+ }
178
+
179
+
180
+ def _coerce_memory_arthaanu(engine: ArthaEngine, item: object) -> MemoryArthaanu:
181
+ if isinstance(item, MemoryArthaanu):
182
+ return item
183
+ return arthaanu_from_dict(
184
+ arthaanu_to_dict(item),
185
+ type_registry=engine.registry.arthaanu_types,
186
+ )
187
+
188
+
189
+ def _guardian_may_update_target(existing: MemoryArthaanu) -> bool:
190
+ """Text ingest may merge into text memories only — not image/document/collection nodes."""
191
+ return _memory_node_type(existing) == "text"
192
+
193
+
194
+ def _encode_memory(
195
+ engine: ArthaEngine,
196
+ *,
197
+ content: str,
198
+ scope: list[str] | None = None,
199
+ node_type: str = "text",
200
+ payload: dict[str, Any] | None = None,
201
+ perception: str | None = None,
202
+ encoding: str = "memory",
203
+ metadata: dict[str, Any] | None = None,
204
+ artha_id: str | None = None,
205
+ retrieval_count: int = 0,
206
+ persist: bool = False,
207
+ ) -> MemoryArthaanu:
208
+ encoded = engine.encode(
209
+ "memory",
210
+ {
211
+ "content": content,
212
+ "scope": scope or [],
213
+ "node_type": node_type,
214
+ "payload": payload or {},
215
+ "perception": perception,
216
+ "encoding": encoding,
217
+ "metadata": metadata or {},
218
+ "artha_id": artha_id,
219
+ "retrieval_count": retrieval_count,
220
+ },
221
+ persist=persist,
222
+ )
223
+ if not isinstance(encoded, MemoryArthaanu):
224
+ raise TypeError("memory encoder must return MemoryArthaanu")
225
+ return encoded
226
+
227
+
228
+ def _encode_link(
229
+ engine: ArthaEngine,
230
+ *,
231
+ source_id: str,
232
+ target_id: str,
233
+ description: str,
234
+ metadata: dict[str, Any] | None = None,
235
+ persist: bool = False,
236
+ ) -> MemoryLinkArthaanu:
237
+ encoded = engine.encode(
238
+ "memory_link",
239
+ {
240
+ "source_id": source_id,
241
+ "target_id": target_id,
242
+ "description": description,
243
+ "metadata": metadata or {},
244
+ },
245
+ persist=persist,
246
+ )
247
+ if not isinstance(encoded, MemoryLinkArthaanu):
248
+ raise TypeError("memory_link encoder must return MemoryLinkArthaanu")
249
+ return encoded
250
+
251
+
252
+ def _append_memory_event(
253
+ engine: ArthaEngine,
254
+ *,
255
+ event_type: str,
256
+ memory: MemoryArthaanu,
257
+ component: str,
258
+ extra_payload: dict[str, object] | None = None,
259
+ event_metadata: dict[str, object] | None = None,
260
+ ) -> str:
261
+ payload: dict[str, object] = {"created_at": _now_stamp()}
262
+ if extra_payload:
263
+ payload.update(extra_payload)
264
+ if event_metadata:
265
+ payload.update(event_metadata)
266
+ return engine.store.append_event(
267
+ event_type=event_type,
268
+ arthaanu=memory,
269
+ component=component,
270
+ payload=payload,
271
+ )
272
+
273
+
274
+ def _append_link_event(
275
+ engine: ArthaEngine,
276
+ *,
277
+ event_type: str,
278
+ link: MemoryLinkArthaanu,
279
+ component: str,
280
+ extra_payload: dict[str, object] | None = None,
281
+ event_metadata: dict[str, object] | None = None,
282
+ ) -> str:
283
+ payload: dict[str, object] = {}
284
+ if extra_payload:
285
+ payload.update(extra_payload)
286
+ if event_metadata:
287
+ payload.update(event_metadata)
288
+ return engine.store.append_event(
289
+ event_type=event_type,
290
+ arthaanu=link,
291
+ component=component,
292
+ payload=payload,
293
+ )
294
+
295
+
296
+ def _list_memory_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
297
+ _require_memory_projections(engine)
298
+ store = engine.store
299
+ if sql_store_has_tables(store):
300
+ return sql_store_fetchall(
301
+ store,
302
+ """
303
+ SELECT artha_id, content, scope_json, embedding_json,
304
+ node_type, payload_json, perception, encoding, metadata_json,
305
+ created_at, updated_at, sequence
306
+ FROM memuron_memories
307
+ ORDER BY sequence DESC
308
+ """,
309
+ )
310
+ bucket = getattr(store, "memuron_memories", {})
311
+ rows: list[dict[str, Any]] = []
312
+ for artha_id, item in bucket.items():
313
+ rows.append(
314
+ {
315
+ "artha_id": artha_id,
316
+ "content": item["content"],
317
+ "node_type": item.get("node_type", "text"),
318
+ "payload_json": item.get("payload", {}),
319
+ "perception": item.get("perception"),
320
+ "encoding": item.get("encoding", "memory"),
321
+ "metadata_json": item.get("metadata", {}),
322
+ "scope_json": item.get("scope", []),
323
+ "embedding_json": item.get("embedding", []),
324
+ "created_at": item.get("created_at"),
325
+ "updated_at": item.get("updated_at"),
326
+ "sequence": item.get("sequence", 0),
327
+ }
328
+ )
329
+ return rows
330
+
331
+
332
+ def _row_to_memory_dict(row: dict[str, Any]) -> dict[str, Any]:
333
+ scope = row.get("scope_json")
334
+ embedding = row.get("embedding_json")
335
+ payload = row.get("payload_json")
336
+ metadata = row.get("metadata_json")
337
+ if isinstance(scope, str):
338
+ scope = json.loads(scope)
339
+ if isinstance(embedding, str):
340
+ embedding = json.loads(embedding)
341
+ if isinstance(payload, str):
342
+ payload = json.loads(payload)
343
+ if isinstance(metadata, str):
344
+ metadata = json.loads(metadata)
345
+ return {
346
+ "id": row["artha_id"],
347
+ "content": row["content"],
348
+ "node_type": row.get("node_type") or "text",
349
+ "payload": payload if isinstance(payload, dict) else {},
350
+ "perception": row.get("perception") or row["content"],
351
+ "encoding": row.get("encoding") or "memory",
352
+ "metadata": metadata if isinstance(metadata, dict) else {},
353
+ "scope": scope if isinstance(scope, list) else [],
354
+ "embedding": embedding if isinstance(embedding, list) else [],
355
+ "timestamp": row.get("created_at") or row.get("updated_at") or _now_stamp(),
356
+ }
357
+
358
+
359
+ def _list_link_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
360
+ _require_memory_projections(engine)
361
+ store = engine.store
362
+ if sql_store_has_tables(store):
363
+ return sql_store_fetchall(
364
+ store,
365
+ """
366
+ SELECT link_id, source_id, target_id, description, metadata_json,
367
+ embedding_json, sequence
368
+ FROM memuron_links
369
+ ORDER BY sequence DESC
370
+ """,
371
+ )
372
+ bucket = getattr(store, "memuron_links", {})
373
+ return [
374
+ {
375
+ "link_id": link_id,
376
+ "source_id": item["source_id"],
377
+ "target_id": item["target_id"],
378
+ "description": item["description"],
379
+ "metadata_json": item.get("metadata", {}),
380
+ "embedding_json": item.get("embedding", []),
381
+ "sequence": item.get("sequence", 0),
382
+ }
383
+ for link_id, item in bucket.items()
384
+ ]
385
+
386
+
387
+ def _list_placement_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
388
+ _require_memory_projections(engine)
389
+ store = engine.store
390
+ if sql_store_has_tables(store):
391
+ return sql_store_fetchall(
392
+ store,
393
+ """
394
+ SELECT placement_id, parent_id, child_id, name, scope_json, metadata_json,
395
+ inherit_parent_scope, sequence
396
+ FROM memuron_placements
397
+ ORDER BY sequence DESC
398
+ """,
399
+ )
400
+ bucket = getattr(store, "memuron_placements", {})
401
+ return [
402
+ {
403
+ "placement_id": placement_id,
404
+ "parent_id": item["parent_id"],
405
+ "child_id": item["child_id"],
406
+ "name": item["name"],
407
+ "scope_json": item.get("scope", []),
408
+ "metadata_json": item.get("metadata", {}),
409
+ "inherit_parent_scope": item.get("inherit_parent_scope", True),
410
+ "sequence": item.get("sequence", 0),
411
+ }
412
+ for placement_id, item in bucket.items()
413
+ ]
414
+
415
+
416
+ def _filtered_memory_rows(
417
+ engine: ArthaEngine,
418
+ *,
419
+ scope: list[str] | None = None,
420
+ ) -> list[dict[str, Any]]:
421
+ scope_patterns = list(scope or [])
422
+ rows: list[dict[str, Any]] = []
423
+ for row in _list_memory_rows(engine):
424
+ memory = _row_to_memory_dict(row)
425
+ if scope_patterns and not scope_matches_filter(memory.get("scope") or [], scope_patterns):
426
+ continue
427
+ rows.append(row)
428
+ return rows
429
+
430
+
431
+ def _scope_matches_filter(scope: list[str], scope_filter: list[str] | None) -> bool:
432
+ return scope_matches_filter(scope, scope_filter)
433
+
434
+
435
+ def _parse_json_field(value: Any, default: Any) -> Any:
436
+ if value is None:
437
+ return default
438
+ if isinstance(value, (list, dict)):
439
+ return value
440
+ try:
441
+ return json.loads(str(value))
442
+ except json.JSONDecodeError:
443
+ return default
444
+
445
+
446
+ def _cosine_score(left: list[float], right: list[float]) -> float:
447
+ if not left or not right or len(left) != len(right):
448
+ return 0.0
449
+ dot = sum(a * b for a, b in zip(left, right, strict=False))
450
+ left_norm = sum(a * a for a in left) ** 0.5
451
+ right_norm = sum(b * b for b in right) ** 0.5
452
+ if left_norm == 0 or right_norm == 0:
453
+ return 0.0
454
+ raw = dot / (left_norm * right_norm)
455
+ return max(0.0, min(1.0, (raw + 1.0) / 2.0))
456
+
457
+
458
+ def _embed_query_vector(engine: ArthaEngine, text: str) -> list[float]:
459
+ normalized = text.strip()
460
+ cache = getattr(engine, "_memuron_query_embedding_cache", None)
461
+ if not isinstance(cache, OrderedDict):
462
+ cache = OrderedDict()
463
+ setattr(engine, "_memuron_query_embedding_cache", cache)
464
+ cached = cache.get(normalized)
465
+ if cached is not None:
466
+ cache.move_to_end(normalized)
467
+ return list(cached)
468
+ vector = engine.embedder.embed_queries([normalized])[0]
469
+ cache[normalized] = tuple(vector)
470
+ cache.move_to_end(normalized)
471
+ while len(cache) > QUERY_EMBEDDING_CACHE_SIZE:
472
+ cache.popitem(last=False)
473
+ return vector
474
+
475
+
476
+ def _list_memory_search_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
477
+ """Slim projection rows for similarity search (no content text)."""
478
+ _require_memory_projections(engine)
479
+ store = engine.store
480
+ if sql_store_has_tables(store):
481
+ return sql_store_fetchall(
482
+ store,
483
+ """
484
+ SELECT artha_id, scope_json, embedding_json
485
+ FROM memuron_memories
486
+ """,
487
+ )
488
+ bucket = getattr(store, "memuron_memories", {})
489
+ return [
490
+ {
491
+ "artha_id": artha_id,
492
+ "scope_json": item.get("scope", []),
493
+ "embedding_json": item.get("embedding", []),
494
+ }
495
+ for artha_id, item in bucket.items()
496
+ ]
497
+
498
+
499
+ def _list_link_search_rows(engine: ArthaEngine) -> list[dict[str, Any]]:
500
+ _require_memory_projections(engine)
501
+ store = engine.store
502
+ if sql_store_has_tables(store):
503
+ return sql_store_fetchall(
504
+ store,
505
+ """
506
+ SELECT link_id, source_id, target_id, description, metadata_json, embedding_json
507
+ FROM memuron_links
508
+ """,
509
+ )
510
+ bucket = getattr(store, "memuron_links", {})
511
+ return [
512
+ {
513
+ "link_id": link_id,
514
+ "source_id": item["source_id"],
515
+ "target_id": item["target_id"],
516
+ "description": item["description"],
517
+ "metadata_json": item.get("metadata", {}),
518
+ "embedding_json": item.get("embedding", []),
519
+ }
520
+ for link_id, item in bucket.items()
521
+ ]
522
+
523
+
524
+ def _fetch_memory_rows_by_ids(
525
+ engine: ArthaEngine,
526
+ memory_ids: list[str],
527
+ ) -> dict[str, dict[str, Any]]:
528
+ if not memory_ids:
529
+ return {}
530
+ wanted = set(memory_ids)
531
+ found: dict[str, dict[str, Any]] = {}
532
+ store = engine.store
533
+ if sql_store_has_tables(store):
534
+ placeholders = ", ".join("?" for _ in memory_ids)
535
+ rows = sql_store_fetchall(
536
+ store,
537
+ f"""
538
+ SELECT artha_id, content, scope_json, embedding_json,
539
+ node_type, payload_json, perception, encoding, metadata_json,
540
+ created_at, updated_at, sequence
541
+ FROM memuron_memories
542
+ WHERE artha_id IN ({placeholders})
543
+ """,
544
+ tuple(memory_ids),
545
+ )
546
+ for row in rows:
547
+ found[str(row["artha_id"])] = row
548
+ return found
549
+ bucket = getattr(store, "memuron_memories", {})
550
+ for artha_id, item in bucket.items():
551
+ if artha_id in wanted:
552
+ found[artha_id] = {
553
+ "artha_id": artha_id,
554
+ "content": item["content"],
555
+ "node_type": item.get("node_type") or "text",
556
+ "payload_json": item.get("payload", {}),
557
+ "perception": item.get("perception"),
558
+ "encoding": item.get("encoding", "memory"),
559
+ "scope_json": item.get("scope", []),
560
+ "embedding_json": item.get("embedding", []),
561
+ "metadata_json": item.get("metadata", {}),
562
+ "created_at": item.get("created_at"),
563
+ "updated_at": item.get("updated_at"),
564
+ "sequence": item.get("sequence", 0),
565
+ }
566
+ return found
567
+
568
+
569
+ def _cosine_similarity_hits(
570
+ engine: ArthaEngine,
571
+ query_vector: list[float],
572
+ rows: list[dict[str, Any]],
573
+ *,
574
+ scope: list[str] | None = None,
575
+ top_k: int,
576
+ id_key: str,
577
+ embedding_key: str = "embedding_json",
578
+ scope_key: str | None = "scope_json",
579
+ ) -> list[tuple[str, float]]:
580
+ if not rows:
581
+ return []
582
+
583
+ filtered_rows: list[dict[str, Any]] = []
584
+ if scope_key is not None and scope:
585
+ filtered_rows = [
586
+ row
587
+ for row in rows
588
+ if _scope_matches_filter(
589
+ _parse_json_field(row.get(scope_key), []),
590
+ scope,
591
+ )
592
+ ]
593
+ else:
594
+ filtered_rows = rows
595
+
596
+ query = EmbeddingArthaanu(
597
+ name="memuron_query",
598
+ value=query_vector,
599
+ dimensions=len(query_vector),
600
+ model=engine.embedder.query_model_name,
601
+ )
602
+ candidates: list[EmbeddingCandidate] = []
603
+ for row in filtered_rows:
604
+ embedding = _parse_json_field(row.get(embedding_key), [])
605
+ if not embedding or len(embedding) != len(query_vector):
606
+ continue
607
+ item_id = str(row[id_key])
608
+ if id_key == "link_id":
609
+ link = _link_row_to_dict(row)
610
+ item = MemoryLinkArthaanu(
611
+ artha_id=item_id,
612
+ name="memory_link",
613
+ value=MemoryLinkValue(
614
+ source_id=link["source_id"],
615
+ target_id=link["target_id"],
616
+ description=link["description"],
617
+ embedding=embedding,
618
+ ),
619
+ )
620
+ else:
621
+ item = MemoryArthaanu(
622
+ artha_id=item_id,
623
+ name="memory",
624
+ value=MemoryValue(
625
+ content="",
626
+ scope=_parse_json_field(row.get(scope_key), []) if scope_key else [],
627
+ embedding=embedding,
628
+ ),
629
+ )
630
+ candidates.append(
631
+ EmbeddingCandidate(
632
+ item=item,
633
+ embedding=EmbeddingArthaanu(
634
+ name=f"{item_id}_embedding",
635
+ value=embedding,
636
+ dimensions=len(embedding),
637
+ ),
638
+ )
639
+ )
640
+ if not candidates:
641
+ return []
642
+
643
+ result = engine.decode(
644
+ "cosine_similarity",
645
+ query,
646
+ EmbeddingSearchParams(candidates=candidates, top_k=top_k),
647
+ )
648
+ hits = result.hits if hasattr(result, "hits") else []
649
+ output: list[tuple[str, float]] = []
650
+ for hit in hits:
651
+ output.append((hit.item.artha_id, float(hit.score)))
652
+ return output
653
+
654
+
655
+ def _memory_similarity_hits(
656
+ engine: ArthaEngine,
657
+ query_vector: list[float],
658
+ *,
659
+ scope: list[str] | None = None,
660
+ top_k: int,
661
+ ) -> list[tuple[str, float]]:
662
+ store = engine.store
663
+ if pgvector_is_ready(store):
664
+ return pgvector_memory_search_ids(store, query_vector, top_k=top_k, scope=scope)
665
+ rows = _list_memory_search_rows(engine)
666
+ return _cosine_similarity_hits(
667
+ engine,
668
+ query_vector,
669
+ rows,
670
+ scope=scope,
671
+ top_k=top_k,
672
+ id_key="artha_id",
673
+ )
674
+
675
+
676
+ def find_candidates(
677
+ engine: ArthaEngine,
678
+ content: str,
679
+ *,
680
+ scope: list[str] | None = None,
681
+ top_k: int = CANDIDATE_TOP_K,
682
+ query_vector: list[float] | None = None,
683
+ ) -> list[tuple[str, float, dict[str, Any]]]:
684
+ if not pgvector_is_ready(engine.store):
685
+ rows = _list_memory_search_rows(engine)
686
+ if not rows:
687
+ return []
688
+
689
+ vector = query_vector or _embed_query_vector(engine, content)
690
+ hit_ids = _memory_similarity_hits(
691
+ engine,
692
+ vector,
693
+ scope=scope,
694
+ top_k=top_k,
695
+ )
696
+ if not hit_ids:
697
+ return []
698
+
699
+ details = _fetch_memory_rows_by_ids(engine, [memory_id for memory_id, _score in hit_ids])
700
+ output: list[tuple[str, float, dict[str, Any]]] = []
701
+ for memory_id, score in hit_ids:
702
+ row = details.get(memory_id)
703
+ if not row:
704
+ continue
705
+ output.append((memory_id, score, _row_to_memory_dict(row)))
706
+ return output
707
+
708
+
709
+ def _tenant_id_from_metadata(event_metadata: dict[str, object] | None) -> str | None:
710
+ if not event_metadata:
711
+ return None
712
+ tenant_id = event_metadata.get("tenant_id")
713
+ return str(tenant_id) if tenant_id else None
714
+
715
+
716
+ def _memory_belongs_to_tenant(memory: dict[str, Any], tenant_id: str | None) -> bool:
717
+ if not tenant_id:
718
+ return True
719
+ return scope_matches_filter(memory.get("scope") or [], [org_scope_token(tenant_id)])
720
+
721
+
722
+ def _space_tokens_for_identity_context(
723
+ scope: list[str],
724
+ space_context: dict[str, str] | None,
725
+ ) -> list[str]:
726
+ tokens: list[str] = []
727
+ active_space = (space_context or {}).get("active_space_token")
728
+ if active_space:
729
+ tokens.append(active_space)
730
+ for token in scope:
731
+ if str(token).startswith("space.") and token not in tokens:
732
+ tokens.append(str(token))
733
+ return tokens
734
+
735
+
736
+ def _memory_matches_identity_context(
737
+ memory: dict[str, Any],
738
+ *,
739
+ tenant_id: str | None,
740
+ scope: list[str],
741
+ space_context: dict[str, str] | None,
742
+ ) -> bool:
743
+ if not _memory_belongs_to_tenant(memory, tenant_id):
744
+ return False
745
+ memory_scope = memory.get("scope") or []
746
+ for space_token in _space_tokens_for_identity_context(scope, space_context):
747
+ if space_token not in memory_scope:
748
+ return False
749
+ return True
750
+
751
+
752
+ def _find_external_identity_match(
753
+ engine: ArthaEngine,
754
+ *,
755
+ external_identity: dict[str, str] | None,
756
+ tenant_id: str | None,
757
+ scope: list[str],
758
+ space_context: dict[str, str] | None,
759
+ node_type: str | None = None,
760
+ ) -> dict[str, Any] | None:
761
+ identity = external_identity_from_values(**(external_identity or {}))
762
+ if not any(identity.get(key) for key in IDEMPOTENCY_EXTERNAL_ID_FIELDS):
763
+ return None
764
+ matches: list[dict[str, Any]] = []
765
+ for row in _list_memory_rows(engine):
766
+ memory = _row_to_memory_dict(row)
767
+ if node_type is not None and str(memory.get("node_type") or "text") != node_type:
768
+ continue
769
+ if not _memory_matches_identity_context(
770
+ memory,
771
+ tenant_id=tenant_id,
772
+ scope=scope,
773
+ space_context=space_context,
774
+ ):
775
+ continue
776
+ existing_identity = _memory_external_identity(memory)
777
+ if any(
778
+ identity.get(key) and identity.get(key) == existing_identity.get(key)
779
+ for key in IDEMPOTENCY_EXTERNAL_ID_FIELDS
780
+ ):
781
+ matches.append(memory)
782
+ if not matches:
783
+ return None
784
+ matches.sort(key=lambda item: str(item.get("timestamp") or ""))
785
+ return matches[-1]
786
+
787
+
788
+ def _filter_candidates_by_tenant(
789
+ candidates: list[tuple[str, float, dict[str, Any]]],
790
+ tenant_id: str | None,
791
+ ) -> list[tuple[str, float, dict[str, Any]]]:
792
+ if not tenant_id:
793
+ return candidates
794
+ return [
795
+ (memory_id, score, memory)
796
+ for memory_id, score, memory in candidates
797
+ if _memory_belongs_to_tenant(memory, tenant_id)
798
+ ]
799
+
800
+
801
+ def _merge_memory_metadata(
802
+ existing: dict[str, Any] | None,
803
+ incoming: dict[str, Any] | None,
804
+ ) -> dict[str, Any]:
805
+ output = dict(existing or {})
806
+ for key, value in (incoming or {}).items():
807
+ if key == "system" and isinstance(value, dict):
808
+ system = dict(output.get("system") or {})
809
+ for system_key, system_value in value.items():
810
+ if isinstance(system_value, dict) and isinstance(system.get(system_key), dict):
811
+ system[system_key] = {**system[system_key], **system_value}
812
+ else:
813
+ system[system_key] = system_value
814
+ output["system"] = system
815
+ else:
816
+ output[key] = value
817
+ return output
818
+
819
+
820
+ def _resolve_update_target(
821
+ engine: ArthaEngine,
822
+ *,
823
+ target_id: str | None,
824
+ tenant_id: str | None,
825
+ candidates: list[tuple[str, float, dict[str, Any]]],
826
+ ) -> str | None:
827
+ if not target_id:
828
+ return None
829
+ candidate_ids = {memory_id for memory_id, _score, _memory in candidates}
830
+ if target_id in candidate_ids:
831
+ return target_id
832
+ if not tenant_id:
833
+ return target_id
834
+ try:
835
+ existing = get_memory(engine, target_id)
836
+ except KeyError:
837
+ return None
838
+ if _memory_belongs_to_tenant(existing, tenant_id):
839
+ return target_id
840
+ return None
841
+
842
+
843
+ def _validate_connections(
844
+ memory_id: str,
845
+ scope: list[str],
846
+ raw_connections: list[ConnectionSpec],
847
+ candidate_ids: list[str],
848
+ memories_by_id: dict[str, dict[str, Any]],
849
+ ) -> list[ConnectionSpec]:
850
+ validated: list[ConnectionSpec] = []
851
+ for conn in raw_connections:
852
+ target_id = conn.target_id
853
+ description = conn.description.strip()
854
+ if not target_id:
855
+ continue
856
+ if len(validated) >= MAX_GUARDIAN_LINKS:
857
+ break
858
+ target = memories_by_id.get(target_id)
859
+ if not target:
860
+ continue
861
+ if target_id != memory_id and target_id not in candidate_ids:
862
+ overlap = set(scope) & set(target.get("scope") or [])
863
+ if not overlap:
864
+ continue
865
+ if len(description) < 20 or "?" not in description:
866
+ continue
867
+ validated.append(conn)
868
+ return validated
869
+
870
+
871
+ def _find_link_ids_between(
872
+ engine: ArthaEngine,
873
+ memory_id_1: str,
874
+ memory_id_2: str,
875
+ ) -> list[str]:
876
+ store = engine.store
877
+ if sql_store_has_tables(store):
878
+ rows = sql_store_fetchall(
879
+ store,
880
+ """
881
+ SELECT link_id FROM memuron_links
882
+ WHERE (source_id = ? AND target_id = ?)
883
+ OR (source_id = ? AND target_id = ?)
884
+ """,
885
+ (memory_id_1, memory_id_2, memory_id_2, memory_id_1),
886
+ )
887
+ return [str(row["link_id"]) for row in rows]
888
+ bucket = getattr(store, "memuron_links", {})
889
+ link_ids: list[str] = []
890
+ for link_id, item in bucket.items():
891
+ src = item["source_id"]
892
+ tgt = item["target_id"]
893
+ if {src, tgt} == {memory_id_1, memory_id_2}:
894
+ link_ids.append(link_id)
895
+ return link_ids
896
+
897
+
898
+ def _normalize_link_description(description: str) -> str:
899
+ return " ".join(description.strip().casefold().split())
900
+
901
+
902
+ def _find_existing_link(
903
+ engine: ArthaEngine,
904
+ *,
905
+ source_id: str,
906
+ target_id: str,
907
+ description: str,
908
+ ) -> dict[str, Any] | None:
909
+ normalized = _normalize_link_description(description)
910
+ for row in _list_link_rows(engine):
911
+ if str(row["source_id"]) != source_id or str(row["target_id"]) != target_id:
912
+ continue
913
+ if _normalize_link_description(str(row.get("description") or "")) == normalized:
914
+ return _link_row_to_dict(row)
915
+ return None
916
+
917
+
918
+ def create_memory_link(
919
+ engine: ArthaEngine,
920
+ *,
921
+ source_id: str,
922
+ target_id: str,
923
+ description: str,
924
+ event_metadata: dict[str, object] | None = None,
925
+ ) -> tuple[dict[str, Any], bool]:
926
+ _require_memory_projections(engine)
927
+ if not description.strip():
928
+ raise ValueError("description is required")
929
+ get_memory(engine, source_id)
930
+ get_memory(engine, target_id)
931
+ existing = _find_existing_link(
932
+ engine,
933
+ source_id=source_id,
934
+ target_id=target_id,
935
+ description=description,
936
+ )
937
+ if existing:
938
+ return existing, False
939
+ link = _encode_link(
940
+ engine,
941
+ source_id=source_id,
942
+ target_id=target_id,
943
+ description=description.strip(),
944
+ persist=False,
945
+ )
946
+ _append_link_event(
947
+ engine,
948
+ event_type="link.created",
949
+ link=link,
950
+ component="memuron.node.link",
951
+ event_metadata=event_metadata,
952
+ )
953
+ refresh_memory_projections(engine)
954
+ return {
955
+ "link_id": link.artha_id,
956
+ "source_id": source_id,
957
+ "target_id": target_id,
958
+ "description": description.strip(),
959
+ "embedding": list(link.value.embedding),
960
+ }, True
961
+
962
+
963
+ def _remove_link(
964
+ engine: ArthaEngine,
965
+ link_id: str,
966
+ *,
967
+ event_metadata: dict[str, object] | None = None,
968
+ ) -> None:
969
+ link = engine.store.get(link_id)
970
+ if link is None:
971
+ return
972
+ if not isinstance(link, MemoryLinkArthaanu):
973
+ link = arthaanu_from_dict(
974
+ arthaanu_to_dict(link),
975
+ type_registry=engine.registry.arthaanu_types,
976
+ )
977
+ engine.semantic_delete(
978
+ link_id,
979
+ event_type="delete",
980
+ component="memuron.unlink",
981
+ metadata={
982
+ "domain_event_type": "link.removed",
983
+ "link_id": link_id,
984
+ **(event_metadata or {}),
985
+ },
986
+ )
987
+
988
+
989
+ def _apply_guardian_links(
990
+ engine: ArthaEngine,
991
+ memory_id: str,
992
+ scope: list[str],
993
+ plan: GuardianWritePlan,
994
+ candidate_ids: list[str],
995
+ memories_by_id: dict[str, dict[str, Any]],
996
+ *,
997
+ event_metadata: dict[str, object] | None = None,
998
+ ) -> int:
999
+ validated = _validate_connections(
1000
+ memory_id,
1001
+ scope,
1002
+ plan.connections,
1003
+ candidate_ids,
1004
+ memories_by_id,
1005
+ )
1006
+ for conn in validated:
1007
+ link = _encode_link(
1008
+ engine,
1009
+ source_id=memory_id,
1010
+ target_id=conn.target_id,
1011
+ description=conn.description,
1012
+ persist=False,
1013
+ )
1014
+ _append_link_event(
1015
+ engine,
1016
+ event_type="link.created",
1017
+ link=link,
1018
+ component="memuron.ingest.guardian",
1019
+ event_metadata=event_metadata,
1020
+ )
1021
+
1022
+ for pair in plan.links_to_remove:
1023
+ if not isinstance(pair, list) or len(pair) != 2:
1024
+ continue
1025
+ for link_id in _find_link_ids_between(engine, str(pair[0]), str(pair[1])):
1026
+ _remove_link(engine, link_id, event_metadata=event_metadata)
1027
+ return len(validated)
1028
+
1029
+
1030
+ def _find_link_candidates(
1031
+ engine: ArthaEngine,
1032
+ content: str,
1033
+ *,
1034
+ tenant_id: str | None,
1035
+ exclude_memory_id: str | None = None,
1036
+ top_k: int = LINK_CANDIDATE_TOP_K,
1037
+ ) -> list[tuple[str, float, dict[str, Any]]]:
1038
+ """Similar memories for linking — org-wide within tenant, not AND-scoped to space tokens."""
1039
+ candidates = find_candidates(engine, content, scope=None, top_k=top_k)
1040
+ candidates = _filter_candidates_by_tenant(candidates, tenant_id)
1041
+ if exclude_memory_id:
1042
+ candidates = [
1043
+ (memory_id, score, memory)
1044
+ for memory_id, score, memory in candidates
1045
+ if memory_id != exclude_memory_id
1046
+ ]
1047
+ return candidates
1048
+
1049
+
1050
+ def _ingest_scope_context(
1051
+ *,
1052
+ scope: list[str] | None,
1053
+ event_metadata: dict[str, object] | None,
1054
+ space_context: dict[str, str] | None,
1055
+ candidate_scope: list[str] | None,
1056
+ ) -> tuple[list[str], str | None, list[str]]:
1057
+ request_scope = list(scope or [])
1058
+ if space_context is None and event_metadata:
1059
+ backup = event_metadata.get("space_context")
1060
+ if isinstance(backup, dict) and backup.get("active_space_token"):
1061
+ space_context = backup
1062
+ active_space_token = (space_context or {}).get("active_space_token")
1063
+ tenant_id = _tenant_id_from_metadata(event_metadata)
1064
+ if not tenant_id:
1065
+ for token in request_scope:
1066
+ if str(token).startswith("org:"):
1067
+ tenant_id = str(token)[4:]
1068
+ break
1069
+ if tenant_id:
1070
+ request_scope = merge_org_scope(request_scope, tenant_id)
1071
+ search_scope = list(candidate_scope or request_scope or [])
1072
+ if not search_scope and active_space_token:
1073
+ search_scope = [active_space_token]
1074
+ if tenant_id and not any(str(token).startswith("org:") for token in search_scope):
1075
+ search_scope = merge_org_scope(search_scope, tenant_id)
1076
+ return request_scope, tenant_id, search_scope
1077
+
1078
+
1079
+ def _auto_link_neighbors_by_similarity(
1080
+ engine: ArthaEngine,
1081
+ memory_id: str,
1082
+ content: str,
1083
+ *,
1084
+ scope: list[str] | None = None,
1085
+ event_metadata: dict[str, object] | None = None,
1086
+ tenant_id: str | None = None,
1087
+ min_score: float = AUTO_LINK_MIN_SCORE,
1088
+ top_k: int = LINK_CANDIDATE_TOP_K,
1089
+ ) -> int:
1090
+ """Create semantic links from an existing memory to similar neighbors (Guardian fallback)."""
1091
+ _require_memory_projections(engine)
1092
+ request_scope, resolved_tenant, _search_scope = _ingest_scope_context(
1093
+ scope=scope,
1094
+ event_metadata=event_metadata,
1095
+ space_context=None,
1096
+ candidate_scope=None,
1097
+ )
1098
+ tenant_id = tenant_id or resolved_tenant
1099
+ candidates = _find_link_candidates(
1100
+ engine,
1101
+ content,
1102
+ tenant_id=tenant_id,
1103
+ exclude_memory_id=memory_id,
1104
+ top_k=top_k,
1105
+ )
1106
+ candidates = [
1107
+ (candidate_id, score, memory)
1108
+ for candidate_id, score, memory in candidates
1109
+ if score >= min_score
1110
+ ]
1111
+ if not candidates:
1112
+ return 0
1113
+ candidate_ids = [candidate_id for candidate_id, _score, _memory in candidates]
1114
+ memories_by_id = {candidate_id: memory for candidate_id, _score, memory in candidates}
1115
+ connections: list[ConnectionSpec] = []
1116
+ for candidate_id, score, memory in candidates[:MAX_GUARDIAN_LINKS]:
1117
+ snippet = str(memory.get("content", "")).strip()[:80] or candidate_id
1118
+ connections.append(
1119
+ ConnectionSpec(
1120
+ target_id=candidate_id,
1121
+ description=(
1122
+ f"How does this memory relate to '{snippet}'? "
1123
+ f"What shared context connects them (similarity {score:.2f})?"
1124
+ ),
1125
+ )
1126
+ )
1127
+ plan = GuardianWritePlan(
1128
+ action="create",
1129
+ target_memory_id=None,
1130
+ reasoning="auto-linked by embedding similarity",
1131
+ final_content=content,
1132
+ scope=request_scope,
1133
+ connections=connections,
1134
+ links_to_remove=[],
1135
+ )
1136
+ _apply_guardian_links(
1137
+ engine,
1138
+ memory_id,
1139
+ request_scope,
1140
+ plan,
1141
+ candidate_ids,
1142
+ memories_by_id,
1143
+ event_metadata=event_metadata,
1144
+ )
1145
+ refresh_memory_projections(engine)
1146
+ return len(connections)
1147
+
1148
+
1149
+ async def link_memory_with_guardian(
1150
+ engine: ArthaEngine,
1151
+ guardian: AgnoGuardian,
1152
+ *,
1153
+ memory_id: str,
1154
+ content: str,
1155
+ scope: list[str] | None = None,
1156
+ event_metadata: dict[str, object] | None = None,
1157
+ space_context: dict[str, str] | None = None,
1158
+ candidate_scope: list[str] | None = None, # noqa: ARG001 — ignored; linking is org-wide
1159
+ ) -> int:
1160
+ """Run Guardian link planning for an existing memory without changing its content."""
1161
+ _require_memory_projections(engine)
1162
+ request_scope, tenant_id, _search_scope = _ingest_scope_context(
1163
+ scope=scope,
1164
+ event_metadata=event_metadata,
1165
+ space_context=space_context,
1166
+ candidate_scope=None,
1167
+ )
1168
+ candidates = _find_link_candidates(
1169
+ engine,
1170
+ content,
1171
+ tenant_id=tenant_id,
1172
+ exclude_memory_id=memory_id,
1173
+ )
1174
+ if not candidates:
1175
+ return 0
1176
+ candidate_ids = [candidate_id for candidate_id, _score, _memory in candidates]
1177
+ memories_by_id = {candidate_id: memory for candidate_id, _score, memory in candidates}
1178
+ try:
1179
+ memories_by_id.setdefault(memory_id, get_memory(engine, memory_id))
1180
+ except KeyError:
1181
+ pass
1182
+ if tenant_id:
1183
+ for row in _list_memory_rows(engine):
1184
+ memory = _row_to_memory_dict(row)
1185
+ if _memory_belongs_to_tenant(memory, tenant_id):
1186
+ memories_by_id.setdefault(row["artha_id"], memory)
1187
+
1188
+ if not settings.openrouter_api_key:
1189
+ return _auto_link_neighbors_by_similarity(
1190
+ engine,
1191
+ memory_id,
1192
+ content,
1193
+ scope=request_scope,
1194
+ event_metadata=event_metadata,
1195
+ tenant_id=tenant_id,
1196
+ )
1197
+
1198
+ linked = 0
1199
+ try:
1200
+ plan = await guardian.plan_write(content, candidates, space_context=space_context)
1201
+ merged_scope = list(request_scope)
1202
+ for token in plan.scope or []:
1203
+ if token not in merged_scope:
1204
+ merged_scope.append(token)
1205
+ if space_context:
1206
+ merged_scope = apply_guardian_space_scope(merged_scope, space_context=space_context)
1207
+ if tenant_id:
1208
+ merged_scope = merge_org_scope(merged_scope, tenant_id)
1209
+ linked = _apply_guardian_links(
1210
+ engine,
1211
+ memory_id,
1212
+ merged_scope,
1213
+ plan,
1214
+ candidate_ids,
1215
+ memories_by_id,
1216
+ event_metadata=event_metadata,
1217
+ )
1218
+ except GuardianError:
1219
+ linked = 0
1220
+
1221
+ if linked == 0:
1222
+ linked = _auto_link_neighbors_by_similarity(
1223
+ engine,
1224
+ memory_id,
1225
+ content,
1226
+ scope=request_scope,
1227
+ event_metadata=event_metadata,
1228
+ tenant_id=tenant_id,
1229
+ )
1230
+
1231
+ refresh_memory_projections(engine)
1232
+ return linked
1233
+
1234
+
1235
+ async def ingest_memory(
1236
+ engine: ArthaEngine,
1237
+ guardian: AgnoGuardian,
1238
+ *,
1239
+ content: str,
1240
+ scope: list[str] | None = None,
1241
+ metadata: dict[str, Any] | None = None,
1242
+ external_identity: dict[str, str] | None = None,
1243
+ event_metadata: dict[str, object] | None = None,
1244
+ space_context: dict[str, str] | None = None,
1245
+ candidate_scope: list[str] | None = None,
1246
+ ) -> dict[str, Any]:
1247
+ _require_memory_projections(engine)
1248
+ metadata = metadata or {}
1249
+ external_identity = {
1250
+ **_external_identity_from_metadata(metadata),
1251
+ **external_identity_from_values(**(external_identity or {})),
1252
+ }
1253
+ ingest_event_metadata = {**(event_metadata or {}), **external_identity}
1254
+ request_scope, tenant_id, search_scope = _ingest_scope_context(
1255
+ scope=scope,
1256
+ event_metadata=ingest_event_metadata,
1257
+ space_context=space_context,
1258
+ candidate_scope=candidate_scope,
1259
+ )
1260
+ candidates = find_candidates(engine, content, scope=search_scope or None)
1261
+ candidates = _filter_candidates_by_tenant(candidates, tenant_id)
1262
+ external_match = _find_external_identity_match(
1263
+ engine,
1264
+ external_identity=external_identity,
1265
+ tenant_id=tenant_id,
1266
+ scope=request_scope,
1267
+ space_context=space_context,
1268
+ node_type="text",
1269
+ )
1270
+ if external_match is not None and all(
1271
+ memory_id != external_match["id"] for memory_id, _score, _memory in candidates
1272
+ ):
1273
+ candidates.insert(0, (str(external_match["id"]), 1.0, external_match))
1274
+ candidate_ids = [memory_id for memory_id, _score, _memory in candidates]
1275
+ memories_by_id = {memory_id: memory for memory_id, _score, memory in candidates}
1276
+ if tenant_id:
1277
+ for row in _list_memory_rows(engine):
1278
+ memory = _row_to_memory_dict(row)
1279
+ if _memory_belongs_to_tenant(memory, tenant_id):
1280
+ memories_by_id.setdefault(row["artha_id"], memory)
1281
+ else:
1282
+ for row in _list_memory_rows(engine):
1283
+ memories_by_id.setdefault(row["artha_id"], _row_to_memory_dict(row))
1284
+
1285
+ plan = await guardian.plan_write(content, candidates, space_context=space_context)
1286
+ merged_scope = list(request_scope)
1287
+ for token in plan.scope or []:
1288
+ if token not in merged_scope:
1289
+ merged_scope.append(token)
1290
+ if space_context:
1291
+ merged_scope = apply_guardian_space_scope(merged_scope, space_context=space_context)
1292
+ action = plan.action
1293
+ external_target_id = str(external_match["id"]) if external_match is not None else None
1294
+ if action == "create" and external_target_id:
1295
+ action = "update"
1296
+
1297
+ def _final_scope() -> list[str]:
1298
+ scoped = list(merged_scope)
1299
+ if tenant_id:
1300
+ scoped = merge_org_scope(scoped, tenant_id)
1301
+ if space_context:
1302
+ scoped = apply_guardian_space_scope(scoped, space_context=space_context)
1303
+ return scoped
1304
+
1305
+ if action == "update":
1306
+ target_id = _resolve_update_target(
1307
+ engine,
1308
+ target_id=external_target_id or plan.target_memory_id,
1309
+ tenant_id=tenant_id,
1310
+ candidates=candidates,
1311
+ )
1312
+ if not target_id:
1313
+ action = "create"
1314
+ else:
1315
+ existing_raw = engine.store.get(target_id)
1316
+ if existing_raw is None:
1317
+ action = "create"
1318
+ else:
1319
+ existing = _coerce_memory_arthaanu(engine, existing_raw)
1320
+ if not _guardian_may_update_target(existing):
1321
+ action = "create"
1322
+ else:
1323
+ rich = _rich_fields_from_memory(existing)
1324
+ retrieval_count = existing.value.retrieval_count
1325
+ memory = _encode_memory(
1326
+ engine,
1327
+ content=plan.final_content,
1328
+ scope=_final_scope(),
1329
+ node_type=rich["node_type"],
1330
+ payload=rich["payload"],
1331
+ perception=plan.final_content,
1332
+ encoding=rich["encoding"],
1333
+ metadata=_metadata_with_external_identity(
1334
+ _merge_memory_metadata(rich["metadata"], metadata),
1335
+ external_identity,
1336
+ ),
1337
+ artha_id=target_id,
1338
+ retrieval_count=retrieval_count,
1339
+ persist=False,
1340
+ )
1341
+ _append_memory_event(
1342
+ engine,
1343
+ event_type="memory.updated",
1344
+ memory=memory,
1345
+ component="memuron.ingest.guardian",
1346
+ extra_payload={
1347
+ "reasoning": plan.reasoning,
1348
+ "idempotency_match": bool(external_target_id),
1349
+ },
1350
+ event_metadata=ingest_event_metadata,
1351
+ )
1352
+ memory_id = target_id
1353
+ result_action = "updated"
1354
+
1355
+ if action == "create":
1356
+ memory = _encode_memory(
1357
+ engine,
1358
+ content=plan.final_content,
1359
+ scope=_final_scope(),
1360
+ metadata=_metadata_with_external_identity(metadata, external_identity),
1361
+ persist=False,
1362
+ )
1363
+ _append_memory_event(
1364
+ engine,
1365
+ event_type="memory.created",
1366
+ memory=memory,
1367
+ component="memuron.ingest.guardian",
1368
+ extra_payload={"reasoning": plan.reasoning},
1369
+ event_metadata=ingest_event_metadata,
1370
+ )
1371
+ memory_id = memory.artha_id
1372
+ result_action = "created"
1373
+
1374
+ final_scope = _final_scope()
1375
+ memories_by_id.setdefault(memory_id, memory_payload(engine, memory_id))
1376
+ _apply_guardian_links(
1377
+ engine,
1378
+ memory_id,
1379
+ final_scope,
1380
+ plan,
1381
+ candidate_ids,
1382
+ memories_by_id,
1383
+ event_metadata=ingest_event_metadata,
1384
+ )
1385
+ refresh_memory_projections(engine)
1386
+ payload = memory_payload(engine, memory_id)
1387
+ return {
1388
+ "status": "success",
1389
+ "memory_id": memory_id,
1390
+ "action": result_action,
1391
+ "memory": payload,
1392
+ }
1393
+
1394
+
1395
+ def _linked_memory_ids(engine: ArthaEngine, memory_id: str) -> list[str]:
1396
+ store = engine.store
1397
+ if sql_store_has_tables(store):
1398
+ rows = sql_store_fetchall(
1399
+ store,
1400
+ """
1401
+ SELECT source_id, target_id FROM memuron_links
1402
+ WHERE source_id = ? OR target_id = ?
1403
+ """,
1404
+ (memory_id, memory_id),
1405
+ )
1406
+ else:
1407
+ rows = []
1408
+ for item in getattr(store, "memuron_links", {}).values():
1409
+ if item["source_id"] == memory_id or item["target_id"] == memory_id:
1410
+ rows.append(item)
1411
+ linked: set[str] = set()
1412
+ for row in rows:
1413
+ source_id = row["source_id"]
1414
+ target_id = row["target_id"]
1415
+ if source_id != memory_id:
1416
+ linked.add(source_id)
1417
+ if target_id != memory_id:
1418
+ linked.add(target_id)
1419
+ return sorted(linked)
1420
+
1421
+
1422
+ def _evolution_history(engine: ArthaEngine, memory_id: str) -> list[dict[str, Any]]:
1423
+ list_events = getattr(engine.store, "list_events", None)
1424
+ if not callable(list_events):
1425
+ return []
1426
+ events = list_events(artha_id=memory_id, limit=100)
1427
+ history: list[dict[str, Any]] = []
1428
+ for event in reversed(events):
1429
+ event_type = str(event.get("event_type", ""))
1430
+ payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
1431
+ domain_event_type = str(payload.get("domain_event_type") or "")
1432
+ if event_type == "delete" and domain_event_type:
1433
+ event_type = domain_event_type
1434
+ if event_type not in {"memory.created", "memory.updated", "memory.deleted"}:
1435
+ continue
1436
+ history.append(
1437
+ {
1438
+ "event_type": event_type,
1439
+ "timestamp": event.get("created_at"),
1440
+ "component": event.get("component"),
1441
+ "reasoning": payload.get("reasoning"),
1442
+ "actor_id": payload.get("actor_id"),
1443
+ "tenant_id": payload.get("tenant_id"),
1444
+ }
1445
+ )
1446
+ return history
1447
+
1448
+
1449
+ def memory_payload(engine: ArthaEngine, memory_id: str) -> dict[str, Any]:
1450
+ rows = [
1451
+ row for row in _list_memory_rows(engine) if row["artha_id"] == memory_id
1452
+ ]
1453
+ if rows:
1454
+ memory = _row_to_memory_dict(rows[0])
1455
+ else:
1456
+ item = engine.store.require(memory_id)
1457
+ if not isinstance(item, MemoryArthaanu):
1458
+ item = arthaanu_from_dict(
1459
+ arthaanu_to_dict(item),
1460
+ type_registry=engine.registry.arthaanu_types,
1461
+ )
1462
+ memory = {
1463
+ "id": item.artha_id,
1464
+ "content": item.value.content,
1465
+ "node_type": getattr(item.value, "node_type", "text"),
1466
+ "payload": getattr(item.value, "payload", {}),
1467
+ "perception": getattr(item.value, "perception", None) or item.value.content,
1468
+ "encoding": getattr(item.value, "encoding", "memory"),
1469
+ "metadata": getattr(item.value, "metadata", {}),
1470
+ "scope": list(item.value.scope),
1471
+ "embedding": list(item.value.embedding),
1472
+ "timestamp": _now_stamp(),
1473
+ }
1474
+ node_type = str(memory.get("node_type") or "text")
1475
+ content = str(memory["content"])
1476
+ preview = " ".join(content.split())
1477
+ payload = memory.get("payload") or {}
1478
+ external_identity = {
1479
+ **_external_identity_from_payload(payload if isinstance(payload, dict) else {}),
1480
+ **_external_identity_from_metadata(memory.get("metadata")),
1481
+ }
1482
+ if isinstance(payload, dict) and node_type == "collection" and memory.get("encoding") == "document_collection":
1483
+ source_placement = next(
1484
+ (
1485
+ _placement_row_to_dict(row)
1486
+ for row in _list_placement_rows(engine)
1487
+ if str(row["parent_id"]) == memory_id
1488
+ and (
1489
+ _parse_json_field(row.get("metadata_json"), {}).get("role")
1490
+ if isinstance(_parse_json_field(row.get("metadata_json"), {}), dict)
1491
+ else None
1492
+ )
1493
+ == "source"
1494
+ ),
1495
+ None,
1496
+ )
1497
+ document_id = (
1498
+ str(source_placement["child_id"])
1499
+ if source_placement
1500
+ else payload.get("document_id")
1501
+ )
1502
+ payload = {
1503
+ "collection_kind": "document",
1504
+ "document_id": document_id,
1505
+ "document": payload.get("document")
1506
+ or {
1507
+ "id": document_id,
1508
+ "file_name": payload.get("name"),
1509
+ "source_type": payload.get("source_type"),
1510
+ "chunk_count": payload.get("chunk_count", 0),
1511
+ "image_count": payload.get("image_count", 0),
1512
+ "graph_image_count": payload.get("graph_image_count", 0),
1513
+ },
1514
+ **payload,
1515
+ }
1516
+ return {
1517
+ "id": memory["id"],
1518
+ "content": content,
1519
+ "preview": preview[:240] + ("..." if len(preview) > 240 else ""),
1520
+ "content_length": len(content),
1521
+ "truncated": len(preview) > 240,
1522
+ "type": node_type,
1523
+ "node_type": node_type,
1524
+ "payload": payload,
1525
+ "perception": memory.get("perception") or content,
1526
+ "encoding": memory.get("encoding") or "memory",
1527
+ "metadata": memory.get("metadata") or {},
1528
+ **{key: external_identity.get(key) for key in EXTERNAL_ID_FIELDS},
1529
+ "scope": memory.get("scope") or [],
1530
+ "links": _linked_memory_ids(engine, memory_id),
1531
+ "evolution_history": _evolution_history(engine, memory_id),
1532
+ "retrieval_count": 0,
1533
+ "timestamp": memory.get("timestamp") or _now_stamp(),
1534
+ }
1535
+
1536
+
1537
+ def get_memory(engine: ArthaEngine, memory_id: str) -> dict[str, Any]:
1538
+ if engine.store.get(memory_id) is None:
1539
+ rows = [row for row in _list_memory_rows(engine) if row["artha_id"] == memory_id]
1540
+ if not rows:
1541
+ raise KeyError(f"Memory not found: {memory_id}")
1542
+ return memory_payload(engine, memory_id)
1543
+
1544
+
1545
+ def get_memories(engine: ArthaEngine, memory_ids: list[str]) -> list[dict[str, Any]]:
1546
+ memories: list[dict[str, Any]] = []
1547
+ for memory_id in memory_ids:
1548
+ try:
1549
+ memories.append(get_memory(engine, memory_id))
1550
+ except KeyError:
1551
+ continue
1552
+ return memories
1553
+
1554
+
1555
+ def update_memory(
1556
+ engine: ArthaEngine,
1557
+ memory_id: str,
1558
+ *,
1559
+ content: str | None = None,
1560
+ scope: list[str] | None = None,
1561
+ event_metadata: dict[str, object] | None = None,
1562
+ ) -> dict[str, Any]:
1563
+ existing = _coerce_memory_arthaanu(engine, engine.store.require(memory_id))
1564
+ rich = _rich_fields_from_memory(existing)
1565
+ new_scope = scope if scope is not None else list(existing.value.scope)
1566
+ if content is not None:
1567
+ new_content = content
1568
+ new_perception = content
1569
+ else:
1570
+ new_content = existing.value.content
1571
+ new_perception = rich["perception"]
1572
+ memory = _encode_memory(
1573
+ engine,
1574
+ content=new_content,
1575
+ scope=new_scope,
1576
+ node_type=rich["node_type"],
1577
+ payload=rich["payload"],
1578
+ perception=new_perception,
1579
+ encoding=rich["encoding"],
1580
+ metadata=rich["metadata"],
1581
+ artha_id=memory_id,
1582
+ retrieval_count=existing.value.retrieval_count,
1583
+ persist=False,
1584
+ )
1585
+ _append_memory_event(
1586
+ engine,
1587
+ event_type="memory.updated",
1588
+ memory=memory,
1589
+ component="memuron.update",
1590
+ event_metadata=event_metadata,
1591
+ )
1592
+ if scope is not None and rich["node_type"] == "collection":
1593
+ _propagate_parent_scope_to_children(
1594
+ engine,
1595
+ parent_id=memory_id,
1596
+ parent_scope=new_scope,
1597
+ event_metadata=event_metadata,
1598
+ )
1599
+ refresh_memory_projections(engine)
1600
+ return memory_payload(engine, memory_id)
1601
+
1602
+
1603
+ def _scope_tokens(scope: list[str], prefix: str) -> list[str]:
1604
+ return [token for token in scope if token.startswith(prefix)]
1605
+
1606
+
1607
+ def _merge_inherited_scope(child_scope: list[str], parent_scope: list[str]) -> list[str]:
1608
+ parent_org = _scope_tokens(parent_scope, "org:")
1609
+ parent_space = _scope_tokens(parent_scope, "space.")
1610
+ if not parent_org and not parent_space:
1611
+ return list(child_scope)
1612
+ inherited = [*parent_org, *parent_space]
1613
+ output = [
1614
+ token
1615
+ for token in child_scope
1616
+ if not token.startswith("org:") and not token.startswith("space.")
1617
+ ]
1618
+ for token in inherited:
1619
+ if token not in output:
1620
+ output.append(token)
1621
+ return output
1622
+
1623
+
1624
+ def _append_scope_update(
1625
+ engine: ArthaEngine,
1626
+ *,
1627
+ memory_id: str,
1628
+ scope: list[str],
1629
+ component: str,
1630
+ event_metadata: dict[str, object] | None = None,
1631
+ ) -> dict[str, Any]:
1632
+ existing = _coerce_memory_arthaanu(engine, engine.store.require(memory_id))
1633
+ if list(existing.value.scope) == scope:
1634
+ return memory_payload(engine, memory_id)
1635
+ rich = _rich_fields_from_memory(existing)
1636
+ memory = _encode_memory(
1637
+ engine,
1638
+ content=existing.value.content,
1639
+ scope=scope,
1640
+ node_type=rich["node_type"],
1641
+ payload=rich["payload"],
1642
+ perception=rich["perception"],
1643
+ encoding=rich["encoding"],
1644
+ metadata=rich["metadata"],
1645
+ artha_id=memory_id,
1646
+ retrieval_count=existing.value.retrieval_count,
1647
+ persist=False,
1648
+ )
1649
+ _append_memory_event(
1650
+ engine,
1651
+ event_type="memory.updated",
1652
+ memory=memory,
1653
+ component=component,
1654
+ extra_payload={"reason": "placement_scope_inheritance"},
1655
+ event_metadata=event_metadata,
1656
+ )
1657
+ return {
1658
+ "id": memory_id,
1659
+ "scope": scope,
1660
+ "node_type": rich["node_type"],
1661
+ }
1662
+
1663
+
1664
+ def _child_placements(engine: ArthaEngine, parent_id: str) -> list[dict[str, Any]]:
1665
+ placements = []
1666
+ for row in _list_placement_rows(engine):
1667
+ placement = _placement_row_to_dict(row)
1668
+ if str(placement["parent_id"]) == parent_id:
1669
+ placements.append(placement)
1670
+ return placements
1671
+
1672
+
1673
+ def _append_placement_scope_update(
1674
+ engine: ArthaEngine,
1675
+ *,
1676
+ placement: dict[str, Any],
1677
+ parent_scope: list[str],
1678
+ event_metadata: dict[str, object] | None = None,
1679
+ ) -> dict[str, Any]:
1680
+ new_scope = _merge_inherited_scope(
1681
+ list(placement.get("scope") or []),
1682
+ parent_scope,
1683
+ )
1684
+ if new_scope == list(placement.get("scope") or []):
1685
+ return placement
1686
+ updated = engine.encode(
1687
+ "memory_placement",
1688
+ {
1689
+ "artha_id": placement["id"],
1690
+ "parent_id": placement["parent_id"],
1691
+ "child_id": placement["child_id"],
1692
+ "name": placement["name"],
1693
+ "scope": new_scope,
1694
+ "metadata": placement.get("metadata") or {},
1695
+ "inherit_parent_scope": True,
1696
+ },
1697
+ persist=False,
1698
+ )
1699
+ engine.store.append_event(
1700
+ event_type="placement.created",
1701
+ arthaanu=updated,
1702
+ component="memuron.placement_scope",
1703
+ payload={
1704
+ **(event_metadata or {}),
1705
+ "reason": "placement_scope_inheritance",
1706
+ },
1707
+ )
1708
+ return {**placement, "scope": new_scope}
1709
+
1710
+
1711
+ def _propagate_parent_scope_to_children(
1712
+ engine: ArthaEngine,
1713
+ *,
1714
+ parent_id: str,
1715
+ parent_scope: list[str],
1716
+ event_metadata: dict[str, object] | None = None,
1717
+ ) -> list[dict[str, Any]]:
1718
+ changed: list[dict[str, Any]] = []
1719
+ for placement in _child_placements(engine, parent_id):
1720
+ if not placement.get("inherit_parent_scope", True):
1721
+ continue
1722
+ placement = _append_placement_scope_update(
1723
+ engine,
1724
+ placement=placement,
1725
+ parent_scope=parent_scope,
1726
+ event_metadata=event_metadata,
1727
+ )
1728
+ changed.extend(
1729
+ _propagate_scope_to_node_and_descendants(
1730
+ engine,
1731
+ node_id=str(placement["child_id"]),
1732
+ parent_scope=parent_scope,
1733
+ event_metadata=event_metadata,
1734
+ )
1735
+ )
1736
+ return changed
1737
+
1738
+
1739
+ def _propagate_scope_to_node_and_descendants(
1740
+ engine: ArthaEngine,
1741
+ *,
1742
+ node_id: str,
1743
+ parent_scope: list[str],
1744
+ event_metadata: dict[str, object] | None = None,
1745
+ ) -> list[dict[str, Any]]:
1746
+ try:
1747
+ child = _coerce_memory_arthaanu(engine, engine.store.require(node_id))
1748
+ except KeyError:
1749
+ return []
1750
+ new_scope = _merge_inherited_scope(list(child.value.scope), parent_scope)
1751
+ changed: list[dict[str, Any]] = []
1752
+ child_result = _append_scope_update(
1753
+ engine,
1754
+ memory_id=node_id,
1755
+ scope=new_scope,
1756
+ component="memuron.placement_scope",
1757
+ event_metadata=event_metadata,
1758
+ )
1759
+ if list(child.value.scope) != new_scope:
1760
+ changed.append(child_result)
1761
+ changed.extend(
1762
+ _propagate_parent_scope_to_children(
1763
+ engine,
1764
+ parent_id=node_id,
1765
+ parent_scope=new_scope,
1766
+ event_metadata=event_metadata,
1767
+ )
1768
+ )
1769
+ return changed
1770
+
1771
+
1772
+ def delete_memory(
1773
+ engine: ArthaEngine,
1774
+ memory_id: str,
1775
+ *,
1776
+ event_metadata: dict[str, object] | None = None,
1777
+ ) -> bool:
1778
+ existing = engine.store.get(memory_id)
1779
+ if existing is None:
1780
+ return False
1781
+ if not isinstance(existing, MemoryArthaanu):
1782
+ existing = arthaanu_from_dict(
1783
+ arthaanu_to_dict(existing),
1784
+ type_registry=engine.registry.arthaanu_types,
1785
+ )
1786
+ for link_id in _find_links_for_memory(engine, memory_id):
1787
+ _remove_link(engine, link_id, event_metadata=event_metadata)
1788
+ engine.semantic_delete(
1789
+ memory_id,
1790
+ event_type="delete",
1791
+ component="memuron.delete",
1792
+ metadata={
1793
+ "domain_event_type": "memory.deleted",
1794
+ **(event_metadata or {}),
1795
+ },
1796
+ )
1797
+ refresh_memory_projections(engine)
1798
+ return True
1799
+
1800
+
1801
+ def _find_links_for_memory(engine: ArthaEngine, memory_id: str) -> list[str]:
1802
+ store = engine.store
1803
+ if sql_store_has_tables(store):
1804
+ rows = sql_store_fetchall(
1805
+ store,
1806
+ """
1807
+ SELECT link_id FROM memuron_links
1808
+ WHERE source_id = ? OR target_id = ?
1809
+ """,
1810
+ (memory_id, memory_id),
1811
+ )
1812
+ return [str(row["link_id"]) for row in rows]
1813
+ bucket = getattr(store, "memuron_links", {})
1814
+ return [
1815
+ link_id
1816
+ for link_id, item in bucket.items()
1817
+ if item["source_id"] == memory_id or item["target_id"] == memory_id
1818
+ ]
1819
+
1820
+
1821
+ def bulk_delete_memories(
1822
+ engine: ArthaEngine,
1823
+ *,
1824
+ scope: str | None = None,
1825
+ event_metadata: dict[str, object] | None = None,
1826
+ ) -> tuple[int, list[str]]:
1827
+ scope_patterns = parse_comma_scope(scope)
1828
+ if not scope_patterns:
1829
+ raise ValueError("scope is required for bulk delete")
1830
+ deleted_ids: list[str] = []
1831
+ for row in _list_memory_rows(engine):
1832
+ memory = _row_to_memory_dict(row)
1833
+ if scope_patterns and not scope_matches_filter(memory.get("scope") or [], scope_patterns):
1834
+ continue
1835
+ if delete_memory(engine, memory["id"], event_metadata=event_metadata):
1836
+ deleted_ids.append(memory["id"])
1837
+ return len(deleted_ids), deleted_ids
1838
+
1839
+
1840
+ def count_memories(
1841
+ engine: ArthaEngine,
1842
+ *,
1843
+ scope: str | None = None,
1844
+ ) -> tuple[int, dict[str, Any]]:
1845
+ scope_patterns = [part.strip() for part in (scope or "").split(",") if part.strip()]
1846
+ filters: dict[str, Any] = {}
1847
+ if scope_patterns:
1848
+ filters["scope"] = scope_patterns
1849
+ rows = _filtered_memory_rows(engine, scope=scope_patterns or None)
1850
+ return len(rows), filters
1851
+
1852
+
1853
+ def list_memories(
1854
+ engine: ArthaEngine,
1855
+ *,
1856
+ scope: str | None = None,
1857
+ limit: int = 100,
1858
+ offset: int = 0,
1859
+ ) -> tuple[list[dict[str, Any]], int, dict[str, Any]]:
1860
+ scope_patterns = [part.strip() for part in (scope or "").split(",") if part.strip()]
1861
+ filters: dict[str, Any] = {}
1862
+ if scope_patterns:
1863
+ filters["scope"] = scope_patterns
1864
+
1865
+ rows = _filtered_memory_rows(engine, scope=scope_patterns or None)
1866
+ page = rows[offset : offset + limit]
1867
+ memories = [memory_payload(engine, row["artha_id"]) for row in page]
1868
+ return memories, len(rows), filters
1869
+
1870
+
1871
+ def create_rich_node(
1872
+ engine: ArthaEngine,
1873
+ *,
1874
+ content: str,
1875
+ node_type: str = "text",
1876
+ payload: dict[str, Any] | None = None,
1877
+ perception: str | None = None,
1878
+ encoding: str = "memory",
1879
+ metadata: dict[str, Any] | None = None,
1880
+ external_identity: dict[str, str] | None = None,
1881
+ scope: list[str] | None = None,
1882
+ event_metadata: dict[str, object] | None = None,
1883
+ ) -> dict[str, Any]:
1884
+ external_identity = external_identity_from_values(**(external_identity or {}))
1885
+ node = _encode_memory(
1886
+ engine,
1887
+ content=content,
1888
+ scope=scope,
1889
+ node_type=node_type,
1890
+ payload=_payload_with_external_identity(payload, external_identity),
1891
+ perception=perception,
1892
+ encoding=encoding,
1893
+ metadata=_metadata_with_external_identity(metadata, external_identity),
1894
+ persist=False,
1895
+ )
1896
+ event_type = "collection.created" if node_type == "collection" else "memory.created"
1897
+ _append_memory_event(
1898
+ engine,
1899
+ event_type=event_type,
1900
+ memory=node,
1901
+ component="memuron.nodes",
1902
+ event_metadata={**(event_metadata or {}), **external_identity},
1903
+ )
1904
+ refresh_memory_projections(engine)
1905
+ return memory_payload(engine, node.artha_id)
1906
+
1907
+
1908
+ def create_collection(
1909
+ engine: ArthaEngine,
1910
+ *,
1911
+ name: str,
1912
+ summary: str,
1913
+ scope: list[str] | None = None,
1914
+ metadata: dict[str, Any] | None = None,
1915
+ event_metadata: dict[str, object] | None = None,
1916
+ ) -> dict[str, Any]:
1917
+ payload = {"name": name, "summary": summary}
1918
+ return create_rich_node(
1919
+ engine,
1920
+ content=summary,
1921
+ node_type="collection",
1922
+ payload=payload,
1923
+ perception=summary,
1924
+ encoding="collection_summary",
1925
+ metadata=metadata,
1926
+ scope=scope,
1927
+ event_metadata=event_metadata,
1928
+ )
1929
+
1930
+
1931
+ def ingest_document_source(
1932
+ engine: ArthaEngine,
1933
+ *,
1934
+ file_name: str,
1935
+ content_type: str | None,
1936
+ file_bytes: bytes,
1937
+ scope: list[str] | None = None,
1938
+ metadata: dict[str, Any] | None = None,
1939
+ external_identity: dict[str, str] | None = None,
1940
+ source_metadata: dict[str, Any] | None = None,
1941
+ event_metadata: dict[str, object] | None = None,
1942
+ ) -> dict[str, Any]:
1943
+ """Parse a source file and append document/collection/chunk semantic events."""
1944
+
1945
+ external_identity = {
1946
+ **_external_identity_from_metadata(metadata),
1947
+ **external_identity_from_values(**(external_identity or {})),
1948
+ }
1949
+ parsed = parse_source(
1950
+ file_name=file_name,
1951
+ content_type=content_type,
1952
+ file_bytes=file_bytes,
1953
+ describe_images=settings.describe_images,
1954
+ vlm_api_key=settings.openrouter_api_key,
1955
+ vlm_model=settings.image_vlm_model,
1956
+ vlm_timeout_seconds=settings.image_vlm_timeout_seconds,
1957
+ )
1958
+ if source_metadata:
1959
+ parsed = replace(
1960
+ parsed,
1961
+ source_metadata={**parsed.source_metadata, **source_metadata},
1962
+ )
1963
+ document_key = str(uuid4())
1964
+ source_object = maybe_store_source_file(
1965
+ org_id=_tenant_id_from_metadata(event_metadata),
1966
+ document_key=document_key,
1967
+ file_name=parsed.file_name,
1968
+ content_type=content_type or parsed.media_type,
1969
+ file_bytes=file_bytes,
1970
+ )
1971
+ source_object_ref = {"document_key": document_key}
1972
+ base_scope = list(scope or [])
1973
+ document_scope = _document_scope(base_scope, document_key)
1974
+ user_metadata = _metadata_with_external_identity(metadata, external_identity)
1975
+ common_event_metadata = {
1976
+ **(event_metadata or {}),
1977
+ **external_identity,
1978
+ "document_key": document_key,
1979
+ "parser": "memuron.documents.parser",
1980
+ "source_type": parsed.source_type,
1981
+ }
1982
+ graph_images = [image for image in parsed.images if image.include_in_graph]
1983
+ skipped_image_count = len(parsed.images) - len(graph_images)
1984
+
1985
+ summary = _document_summary(parsed)
1986
+ collection = _encode_memory(
1987
+ engine,
1988
+ content=summary,
1989
+ scope=document_scope,
1990
+ node_type="collection",
1991
+ payload={
1992
+ "name": parsed.file_name,
1993
+ "summary": summary,
1994
+ "document_key": document_key,
1995
+ "source_type": parsed.source_type,
1996
+ "chunk_count": len(parsed.chunks),
1997
+ "image_count": len(parsed.images),
1998
+ "graph_image_count": len(graph_images),
1999
+ "skipped_image_count": skipped_image_count,
2000
+ "source_object_ref": source_object_ref,
2001
+ "source_identity": external_identity,
2002
+ },
2003
+ perception=summary,
2004
+ encoding="document_collection",
2005
+ metadata=_document_metadata(
2006
+ user_metadata,
2007
+ document_key=document_key,
2008
+ role="document_collection",
2009
+ parsed=parsed,
2010
+ ),
2011
+ persist=False,
2012
+ )
2013
+ _append_memory_event(
2014
+ engine,
2015
+ event_type="collection.created",
2016
+ memory=collection,
2017
+ component="memuron.documents",
2018
+ event_metadata=common_event_metadata,
2019
+ )
2020
+
2021
+ source_node_type = "image" if parsed.source_type == "image" else "document"
2022
+ document = _encode_memory(
2023
+ engine,
2024
+ content=summary,
2025
+ scope=document_scope,
2026
+ node_type=source_node_type,
2027
+ payload={
2028
+ "file_name": parsed.file_name,
2029
+ "media_type": parsed.media_type,
2030
+ "source_type": parsed.source_type,
2031
+ "size_bytes": len(file_bytes),
2032
+ "document_key": document_key,
2033
+ "collection_id": collection.artha_id,
2034
+ "chunk_count": len(parsed.chunks),
2035
+ "image_count": len(parsed.images),
2036
+ "graph_image_count": len(graph_images),
2037
+ "skipped_image_count": skipped_image_count,
2038
+ "page_count": parsed.page_count,
2039
+ "unreadable_pages": parsed.unreadable_pages,
2040
+ "source_metadata": parsed.source_metadata,
2041
+ "markdown_preview": parsed.markdown[:DOCUMENT_MARKDOWN_PREVIEW_CHARS],
2042
+ "markdown_truncated": len(parsed.markdown) > DOCUMENT_MARKDOWN_PREVIEW_CHARS,
2043
+ "source_object": source_object,
2044
+ "source_identity": external_identity,
2045
+ },
2046
+ perception=summary,
2047
+ encoding=f"{parsed.source_type}_source",
2048
+ metadata=_document_metadata(
2049
+ user_metadata,
2050
+ document_key=document_key,
2051
+ role="source",
2052
+ parsed=parsed,
2053
+ ),
2054
+ persist=False,
2055
+ )
2056
+ _append_memory_event(
2057
+ engine,
2058
+ event_type="memory.created",
2059
+ memory=document,
2060
+ component="memuron.documents",
2061
+ event_metadata=common_event_metadata,
2062
+ )
2063
+
2064
+ collection_payload = {
2065
+ **dict(collection.value.payload),
2066
+ "collection_kind": "document",
2067
+ "document_id": document.artha_id,
2068
+ "document": {
2069
+ "id": document.artha_id,
2070
+ "file_name": parsed.file_name,
2071
+ "source_type": parsed.source_type,
2072
+ "media_type": parsed.media_type,
2073
+ "chunk_count": len(parsed.chunks),
2074
+ "image_count": len(parsed.images),
2075
+ "graph_image_count": len(graph_images),
2076
+ "source_object_ref": {
2077
+ "document_id": document.artha_id,
2078
+ "document_key": document_key,
2079
+ },
2080
+ "source_identity": external_identity,
2081
+ },
2082
+ }
2083
+ collection_metadata = _document_metadata(
2084
+ {
2085
+ **user_metadata,
2086
+ "document_id": document.artha_id,
2087
+ "collection_kind": "document",
2088
+ },
2089
+ document_key=document_key,
2090
+ role="document_collection",
2091
+ parsed=parsed,
2092
+ )
2093
+ collection_update = _encode_memory(
2094
+ engine,
2095
+ content=summary,
2096
+ scope=document_scope,
2097
+ node_type="collection",
2098
+ payload=collection_payload,
2099
+ perception=summary,
2100
+ encoding="document_collection",
2101
+ metadata=collection_metadata,
2102
+ artha_id=collection.artha_id,
2103
+ persist=False,
2104
+ )
2105
+ _append_memory_event(
2106
+ engine,
2107
+ event_type="memory.updated",
2108
+ memory=collection_update,
2109
+ component="memuron.documents",
2110
+ extra_payload={"reason": "document_container_source_link"},
2111
+ event_metadata=common_event_metadata,
2112
+ )
2113
+
2114
+ placements: list[dict[str, Any]] = [
2115
+ _append_placement(
2116
+ engine,
2117
+ parent_id=collection.artha_id,
2118
+ child_id=document.artha_id,
2119
+ name=f"source:{parsed.file_name}",
2120
+ scope=document_scope,
2121
+ metadata={"role": "source", "document_key": document_key, **external_identity},
2122
+ event_metadata=common_event_metadata,
2123
+ )
2124
+ ]
2125
+
2126
+ image_ids: list[str] = []
2127
+ image_attachments: list[dict[str, Any]] = []
2128
+ if parsed.source_type != "image":
2129
+ for image in graph_images:
2130
+ image_node = _encode_memory(
2131
+ engine,
2132
+ content=image.description,
2133
+ scope=[*document_scope, "kind:document_image"],
2134
+ node_type="image",
2135
+ payload={
2136
+ "document_key": document_key,
2137
+ "document_id": document.artha_id,
2138
+ "collection_id": collection.artha_id,
2139
+ "source_object_ref": {
2140
+ "document_id": document.artha_id,
2141
+ "document_key": document_key,
2142
+ },
2143
+ "source_identity": external_identity,
2144
+ "file_name": image.file_name,
2145
+ "media_type": image.media_type,
2146
+ "source": image.source,
2147
+ "page_number": image.page_number,
2148
+ "image_index": image.index,
2149
+ "size_bytes": image.size_bytes,
2150
+ "description": image.description,
2151
+ "include_in_graph": image.include_in_graph,
2152
+ "image_kind": image.image_kind,
2153
+ "reason": image.reason,
2154
+ "metadata": image.metadata,
2155
+ },
2156
+ perception=image.description,
2157
+ encoding="image_vlm" if image.metadata.get("parser") == "openrouter_vlm" else "image_metadata",
2158
+ metadata=_document_metadata(
2159
+ {
2160
+ **user_metadata,
2161
+ "image": {
2162
+ "file_name": image.file_name,
2163
+ "media_type": image.media_type,
2164
+ "page_number": image.page_number,
2165
+ "image_index": image.index,
2166
+ "source": image.source,
2167
+ "include_in_graph": image.include_in_graph,
2168
+ "image_kind": image.image_kind,
2169
+ "reason": image.reason,
2170
+ **image.metadata,
2171
+ },
2172
+ },
2173
+ document_key=document_key,
2174
+ role="image",
2175
+ parsed=parsed,
2176
+ ),
2177
+ persist=False,
2178
+ )
2179
+ image_event_metadata = {
2180
+ **common_event_metadata,
2181
+ "image_index": image.index,
2182
+ "page_number": image.page_number,
2183
+ }
2184
+ _append_memory_event(
2185
+ engine,
2186
+ event_type="memory.created",
2187
+ memory=image_node,
2188
+ component="memuron.documents",
2189
+ event_metadata=image_event_metadata,
2190
+ )
2191
+ image_ids.append(image_node.artha_id)
2192
+ if image.raw_bytes:
2193
+ image_attachments.append(
2194
+ {
2195
+ "memory_id": image_node.artha_id,
2196
+ "media_type": image.media_type,
2197
+ "bytes": image.raw_bytes,
2198
+ }
2199
+ )
2200
+ placements.append(
2201
+ _append_placement(
2202
+ engine,
2203
+ parent_id=collection.artha_id,
2204
+ child_id=image_node.artha_id,
2205
+ name=f"image-{image.index + 1:04d}:{image.file_name}",
2206
+ scope=document_scope,
2207
+ metadata={
2208
+ "role": "image",
2209
+ "document_key": document_key,
2210
+ **external_identity,
2211
+ "image_index": image.index,
2212
+ "page_number": image.page_number,
2213
+ },
2214
+ event_metadata=image_event_metadata,
2215
+ )
2216
+ )
2217
+
2218
+ chunk_ids: list[str] = []
2219
+ for chunk in parsed.chunks:
2220
+ chunk_node = _encode_memory(
2221
+ engine,
2222
+ content=chunk.text,
2223
+ scope=[*document_scope, "kind:document_chunk"],
2224
+ node_type="text",
2225
+ payload={
2226
+ "document_key": document_key,
2227
+ "document_id": document.artha_id,
2228
+ "collection_id": collection.artha_id,
2229
+ "source_object_ref": {
2230
+ "document_id": document.artha_id,
2231
+ "document_key": document_key,
2232
+ },
2233
+ "source_identity": external_identity,
2234
+ "file_name": parsed.file_name,
2235
+ "source_type": parsed.source_type,
2236
+ "chunk_index": chunk.index,
2237
+ "chunk_count": len(parsed.chunks),
2238
+ "location": chunk.to_location(),
2239
+ },
2240
+ perception=chunk.text[:2_000],
2241
+ encoding="document_chunk",
2242
+ metadata=_document_metadata(
2243
+ user_metadata,
2244
+ document_key=document_key,
2245
+ role="chunk",
2246
+ parsed=parsed,
2247
+ chunk_index=chunk.index,
2248
+ chunk_count=len(parsed.chunks),
2249
+ location=chunk.to_location(),
2250
+ ),
2251
+ persist=False,
2252
+ )
2253
+ chunk_event_metadata = {
2254
+ **common_event_metadata,
2255
+ "chunk_index": chunk.index,
2256
+ }
2257
+ _append_memory_event(
2258
+ engine,
2259
+ event_type="memory.created",
2260
+ memory=chunk_node,
2261
+ component="memuron.documents",
2262
+ event_metadata=chunk_event_metadata,
2263
+ )
2264
+ chunk_ids.append(chunk_node.artha_id)
2265
+ placements.append(
2266
+ _append_placement(
2267
+ engine,
2268
+ parent_id=collection.artha_id,
2269
+ child_id=chunk_node.artha_id,
2270
+ name=f"chunk-{chunk.index + 1:04d}",
2271
+ scope=document_scope,
2272
+ metadata={
2273
+ "role": "chunk",
2274
+ "document_key": document_key,
2275
+ **external_identity,
2276
+ "chunk_index": chunk.index,
2277
+ "location": chunk.to_location(),
2278
+ },
2279
+ event_metadata=chunk_event_metadata,
2280
+ )
2281
+ )
2282
+
2283
+ refresh_memory_projections(engine)
2284
+ return {
2285
+ "status": "success",
2286
+ "document_key": document_key,
2287
+ **{key: external_identity.get(key) for key in EXTERNAL_ID_FIELDS},
2288
+ "source_type": parsed.source_type,
2289
+ "media_type": parsed.media_type,
2290
+ "file_name": parsed.file_name,
2291
+ "source_object": source_object,
2292
+ "page_count": parsed.page_count,
2293
+ "unreadable_pages": parsed.unreadable_pages,
2294
+ "image_count": len(parsed.images),
2295
+ "graph_image_count": len(graph_images),
2296
+ "skipped_image_count": skipped_image_count,
2297
+ "image_ids": image_ids,
2298
+ "chunk_count": len(parsed.chunks),
2299
+ "chunk_ids": chunk_ids,
2300
+ "collection": memory_payload(engine, collection.artha_id),
2301
+ "document": memory_payload(engine, document.artha_id),
2302
+ "images": [memory_payload(engine, image_id) for image_id in image_ids],
2303
+ "chunks": [memory_payload(engine, chunk_id) for chunk_id in chunk_ids],
2304
+ "placements": [placement_payload(engine, item["id"]) for item in placements],
2305
+ "image_attachments": image_attachments,
2306
+ }
2307
+
2308
+
2309
+ def document_source_payload(engine: ArthaEngine, node_id: str) -> dict[str, Any]:
2310
+ """Resolve any document-related node to its original source object."""
2311
+ requested = get_memory(engine, node_id)
2312
+ requested_payload = requested.get("payload") if isinstance(requested.get("payload"), dict) else {}
2313
+ source_object_ref = requested_payload.get("source_object_ref")
2314
+ if not isinstance(source_object_ref, dict):
2315
+ source_object_ref = {}
2316
+ document_descriptor = requested_payload.get("document")
2317
+ if not isinstance(document_descriptor, dict):
2318
+ document_descriptor = {}
2319
+ document_id = (
2320
+ requested_payload.get("document_id")
2321
+ or source_object_ref.get("document_id")
2322
+ or document_descriptor.get("id")
2323
+ or (node_id if requested_payload.get("source_object") else None)
2324
+ )
2325
+ if not document_id:
2326
+ raise KeyError(f"Document source not found for node: {node_id}")
2327
+ document = requested if document_id == node_id else get_memory(engine, str(document_id))
2328
+ document_payload = document.get("payload") if isinstance(document.get("payload"), dict) else {}
2329
+ source_object = document_payload.get("source_object")
2330
+ if not isinstance(source_object, dict) or not source_object:
2331
+ raise KeyError(f"Document source object not found for node: {node_id}")
2332
+ download_url = presign_source_object(source_object)
2333
+ return {
2334
+ "status": "success",
2335
+ "requested_node_id": node_id,
2336
+ "document_id": str(document["id"]),
2337
+ "document_key": str(document_payload.get("document_key") or source_object.get("document_key") or ""),
2338
+ "file_name": str(source_object.get("file_name") or document_payload.get("file_name") or ""),
2339
+ "content_type": str(
2340
+ source_object.get("content_type")
2341
+ or document_payload.get("media_type")
2342
+ or "application/octet-stream"
2343
+ ),
2344
+ "size_bytes": int(source_object.get("size_bytes") or document_payload.get("size_bytes") or 0),
2345
+ "sha256": str(source_object.get("sha256") or ""),
2346
+ "source_object": source_object,
2347
+ "download_url": download_url,
2348
+ "expires_in_seconds": settings.object_storage_presign_seconds if download_url else None,
2349
+ }
2350
+
2351
+
2352
+ def _document_scope(scope: list[str], document_key: str) -> list[str]:
2353
+ output = list(scope)
2354
+ token = f"{DOCUMENT_CHUNK_SCOPE_PREFIX}{document_key}"
2355
+ if token not in output:
2356
+ output.append(token)
2357
+ return output
2358
+
2359
+
2360
+ def _document_summary(parsed: ParsedDocument) -> str:
2361
+ first_chunk = parsed.chunks[0].text if parsed.chunks else parsed.markdown
2362
+ summary = first_chunk.strip().replace("\n\n", "\n")
2363
+ if len(summary) > 2_000:
2364
+ summary = summary[:1_997].rstrip() + "..."
2365
+ return f"{parsed.file_name}\n\n{summary}"
2366
+
2367
+
2368
+ def _document_metadata(
2369
+ metadata: dict[str, Any],
2370
+ *,
2371
+ document_key: str,
2372
+ role: str,
2373
+ parsed: ParsedDocument,
2374
+ chunk_index: int | None = None,
2375
+ chunk_count: int | None = None,
2376
+ location: dict[str, Any] | None = None,
2377
+ ) -> dict[str, Any]:
2378
+ output = dict(metadata)
2379
+ system = dict(output.get("system") or {})
2380
+ document_meta: dict[str, Any] = {
2381
+ "document_key": document_key,
2382
+ "role": role,
2383
+ "file_name": parsed.file_name,
2384
+ "source_type": parsed.source_type,
2385
+ "media_type": parsed.media_type,
2386
+ "page_count": parsed.page_count,
2387
+ "unreadable_pages": list(parsed.unreadable_pages),
2388
+ "source_metadata": dict(parsed.source_metadata),
2389
+ }
2390
+ if chunk_index is not None:
2391
+ document_meta["chunk_index"] = chunk_index
2392
+ if chunk_count is not None:
2393
+ document_meta["chunk_count"] = chunk_count
2394
+ if location is not None:
2395
+ document_meta["location"] = location
2396
+ system["document"] = document_meta
2397
+ output["system"] = system
2398
+ return output
2399
+
2400
+
2401
+ def _append_placement(
2402
+ engine: ArthaEngine,
2403
+ *,
2404
+ parent_id: str,
2405
+ child_id: str,
2406
+ name: str,
2407
+ scope: list[str],
2408
+ metadata: dict[str, Any],
2409
+ inherit_parent_scope: bool = True,
2410
+ event_metadata: dict[str, object] | None = None,
2411
+ ) -> dict[str, Any]:
2412
+ placement = engine.encode(
2413
+ "memory_placement",
2414
+ {
2415
+ "parent_id": parent_id,
2416
+ "child_id": child_id,
2417
+ "name": name,
2418
+ "scope": scope,
2419
+ "metadata": metadata,
2420
+ "inherit_parent_scope": inherit_parent_scope,
2421
+ },
2422
+ persist=False,
2423
+ )
2424
+ engine.store.append_event(
2425
+ event_type="placement.created",
2426
+ arthaanu=placement,
2427
+ component="memuron.documents",
2428
+ payload=event_metadata or {},
2429
+ )
2430
+ return {
2431
+ "id": placement.artha_id,
2432
+ "parent_id": parent_id,
2433
+ "child_id": child_id,
2434
+ "name": name,
2435
+ "scope": scope,
2436
+ "metadata": metadata,
2437
+ "inherit_parent_scope": inherit_parent_scope,
2438
+ }
2439
+
2440
+
2441
+ def place_node_in_collection(
2442
+ engine: ArthaEngine,
2443
+ *,
2444
+ parent_id: str,
2445
+ child_id: str,
2446
+ name: str,
2447
+ scope: list[str] | None = None,
2448
+ metadata: dict[str, Any] | None = None,
2449
+ inherit_parent_scope: bool = True,
2450
+ event_metadata: dict[str, object] | None = None,
2451
+ ) -> dict[str, Any]:
2452
+ parent = get_memory(engine, parent_id)
2453
+ if parent.get("node_type") != "collection":
2454
+ raise ValueError("parent_id must be a collection node")
2455
+ get_memory(engine, child_id)
2456
+ if parent_id == child_id:
2457
+ raise ValueError("collection cannot contain itself")
2458
+ if _placement_would_create_cycle(engine, parent_id=parent_id, child_id=child_id):
2459
+ raise ValueError("placement would create a collection cycle")
2460
+ placement_scope = list(scope or [])
2461
+ if inherit_parent_scope:
2462
+ placement_scope = _merge_inherited_scope(
2463
+ placement_scope,
2464
+ list(parent.get("scope") or []),
2465
+ )
2466
+ placement = engine.encode(
2467
+ "memory_placement",
2468
+ {
2469
+ "parent_id": parent_id,
2470
+ "child_id": child_id,
2471
+ "name": name,
2472
+ "scope": placement_scope,
2473
+ "metadata": metadata or {},
2474
+ "inherit_parent_scope": inherit_parent_scope,
2475
+ },
2476
+ persist=False,
2477
+ )
2478
+ engine.store.append_event(
2479
+ event_type="placement.created",
2480
+ arthaanu=placement,
2481
+ component="memuron.collections",
2482
+ payload=event_metadata or {},
2483
+ )
2484
+ if inherit_parent_scope:
2485
+ _propagate_scope_to_node_and_descendants(
2486
+ engine,
2487
+ node_id=child_id,
2488
+ parent_scope=list(parent.get("scope") or []),
2489
+ event_metadata=event_metadata,
2490
+ )
2491
+ refresh_memory_projections(engine)
2492
+ return placement_payload(engine, placement.artha_id)
2493
+
2494
+
2495
+ def _placement_would_create_cycle(engine: ArthaEngine, *, parent_id: str, child_id: str) -> bool:
2496
+ """Return true when adding parent -> child would make child an ancestor of parent."""
2497
+ children_by_parent: dict[str, list[str]] = {}
2498
+ for row in _list_placement_rows(engine):
2499
+ placement = _placement_row_to_dict(row)
2500
+ children_by_parent.setdefault(str(placement["parent_id"]), []).append(str(placement["child_id"]))
2501
+
2502
+ stack = [child_id]
2503
+ seen: set[str] = set()
2504
+ while stack:
2505
+ current = stack.pop()
2506
+ if current == parent_id:
2507
+ return True
2508
+ if current in seen:
2509
+ continue
2510
+ seen.add(current)
2511
+ stack.extend(children_by_parent.get(current, []))
2512
+ return False
2513
+
2514
+
2515
+ def _placement_row_to_dict(row: dict[str, Any]) -> dict[str, Any]:
2516
+ scope = _parse_json_field(row.get("scope_json"), [])
2517
+ metadata = _parse_json_field(row.get("metadata_json"), {})
2518
+ return {
2519
+ "id": str(row["placement_id"]),
2520
+ "parent_id": str(row["parent_id"]),
2521
+ "child_id": str(row["child_id"]),
2522
+ "name": str(row["name"]),
2523
+ "scope": scope if isinstance(scope, list) else [],
2524
+ "metadata": metadata if isinstance(metadata, dict) else {},
2525
+ "inherit_parent_scope": bool(row.get("inherit_parent_scope", True)),
2526
+ }
2527
+
2528
+
2529
+ def placement_payload(engine: ArthaEngine, placement_id: str) -> dict[str, Any]:
2530
+ for row in _list_placement_rows(engine):
2531
+ if str(row["placement_id"]) == placement_id:
2532
+ return _placement_row_to_dict(row)
2533
+ raise KeyError(f"Placement not found: {placement_id}")
2534
+
2535
+
2536
+ def collection_members(engine: ArthaEngine, collection_id: str) -> list[dict[str, Any]]:
2537
+ get_memory(engine, collection_id)
2538
+ members: list[dict[str, Any]] = []
2539
+ for row in _list_placement_rows(engine):
2540
+ if str(row["parent_id"]) != collection_id:
2541
+ continue
2542
+ placement = _placement_row_to_dict(row)
2543
+ try:
2544
+ child = memory_payload(engine, placement["child_id"])
2545
+ except KeyError:
2546
+ continue
2547
+ members.append({"placement": placement, "node": child})
2548
+ return members
2549
+
2550
+
2551
+ def _graph_scope_patterns(scope: str | list[str] | None) -> list[str]:
2552
+ if scope is None:
2553
+ return []
2554
+ if isinstance(scope, str):
2555
+ return [part.strip() for part in scope.split(",") if part.strip()]
2556
+ return [str(part).strip() for part in scope if str(part).strip()]
2557
+
2558
+
2559
+ def _graph_node_from_memory(
2560
+ memory: dict[str, Any],
2561
+ degree: int = 0,
2562
+ self_loop_count: int = 0,
2563
+ ) -> dict[str, Any]:
2564
+ content = str(memory.get("content") or "")
2565
+ label = content[:50] + ("..." if len(content) > 50 else "")
2566
+ return {
2567
+ "id": str(memory["id"]),
2568
+ "label": label,
2569
+ "content": content,
2570
+ "node_type": memory.get("node_type") or "text",
2571
+ "payload": memory.get("payload") or {},
2572
+ "perception": memory.get("perception") or content,
2573
+ "encoding": memory.get("encoding") or "memory",
2574
+ "metadata": memory.get("metadata") or {},
2575
+ "scope": list(memory.get("scope") or []),
2576
+ "importance": degree,
2577
+ "degree": degree,
2578
+ "self_loop_count": self_loop_count,
2579
+ "type": memory.get("node_type") or "text",
2580
+ "timestamp": memory.get("timestamp"),
2581
+ }
2582
+
2583
+
2584
+ def _graph_edge_from_link(link: dict[str, Any]) -> dict[str, Any]:
2585
+ source_id = str(link["source_id"])
2586
+ target_id = str(link["target_id"])
2587
+ metadata = link.get("metadata") if isinstance(link.get("metadata"), dict) else {}
2588
+ return {
2589
+ "id": str(link["link_id"]),
2590
+ "link_id": str(link["link_id"]),
2591
+ "source": source_id,
2592
+ "target": target_id,
2593
+ "description": str(link.get("description") or ""),
2594
+ "metadata": metadata,
2595
+ "type": "semantic_link",
2596
+ "edge_type": "semantic_link",
2597
+ "directed": False,
2598
+ "is_self_loop": source_id == target_id,
2599
+ "parallel_key": str(link["link_id"]),
2600
+ }
2601
+
2602
+
2603
+ def _graph_edge_from_placement(placement: dict[str, Any]) -> dict[str, Any]:
2604
+ return {
2605
+ "id": str(placement["id"]),
2606
+ "source": str(placement["parent_id"]),
2607
+ "target": str(placement["child_id"]),
2608
+ "description": str(placement["name"]),
2609
+ "type": "placement",
2610
+ "name": str(placement["name"]),
2611
+ "scope": list(placement.get("scope") or []),
2612
+ "metadata": placement.get("metadata") or {},
2613
+ }
2614
+
2615
+
2616
+ def export_graph(
2617
+ engine: ArthaEngine,
2618
+ *,
2619
+ scope: str | list[str] | None = None,
2620
+ limit: int = 1000,
2621
+ ) -> dict[str, Any]:
2622
+ _require_memory_projections(engine)
2623
+ scope_patterns = _graph_scope_patterns(scope)
2624
+ rows = _filtered_memory_rows(engine, scope=scope_patterns or None)[:limit]
2625
+ memories = [_row_to_memory_dict(row) for row in rows]
2626
+ memory_ids = {memory["id"] for memory in memories}
2627
+
2628
+ link_rows: list[dict[str, Any]] = []
2629
+ extra_memory_ids: set[str] = set()
2630
+ for row in _list_link_rows(engine):
2631
+ source_id = str(row["source_id"])
2632
+ target_id = str(row["target_id"])
2633
+ if source_id in memory_ids or target_id in memory_ids:
2634
+ link_rows.append(row)
2635
+ if source_id not in memory_ids:
2636
+ extra_memory_ids.add(source_id)
2637
+ if target_id not in memory_ids:
2638
+ extra_memory_ids.add(target_id)
2639
+
2640
+ if extra_memory_ids:
2641
+ extra_rows = _fetch_memory_rows_by_ids(engine, list(extra_memory_ids))
2642
+ for memory_id, row in extra_rows.items():
2643
+ memories.append(_row_to_memory_dict(row))
2644
+ memory_ids.add(memory_id)
2645
+
2646
+ degree: dict[str, int] = {memory_id: 0 for memory_id in memory_ids}
2647
+ self_loop_count: dict[str, int] = {memory_id: 0 for memory_id in memory_ids}
2648
+ edges: list[dict[str, Any]] = []
2649
+ for row in link_rows:
2650
+ source_id = str(row["source_id"])
2651
+ target_id = str(row["target_id"])
2652
+ if source_id == target_id:
2653
+ self_loop_count[source_id] = self_loop_count.get(source_id, 0) + 1
2654
+ else:
2655
+ degree[source_id] = degree.get(source_id, 0) + 1
2656
+ degree[target_id] = degree.get(target_id, 0) + 1
2657
+ edges.append(_graph_edge_from_link(_link_row_to_dict(row)))
2658
+
2659
+ for row in _list_placement_rows(engine):
2660
+ placement = _placement_row_to_dict(row)
2661
+ parent_id = str(placement["parent_id"])
2662
+ child_id = str(placement["child_id"])
2663
+ if parent_id not in memory_ids or child_id not in memory_ids:
2664
+ continue
2665
+ degree[parent_id] = degree.get(parent_id, 0) + 1
2666
+ degree[child_id] = degree.get(child_id, 0) + 1
2667
+ edges.append(_graph_edge_from_placement(placement))
2668
+
2669
+ nodes = [
2670
+ _graph_node_from_memory(
2671
+ memory,
2672
+ degree=degree.get(str(memory["id"]), 0),
2673
+ self_loop_count=self_loop_count.get(str(memory["id"]), 0),
2674
+ )
2675
+ for memory in memories
2676
+ ]
2677
+ return {
2678
+ "nodes": nodes,
2679
+ "edges": edges,
2680
+ "metadata": {
2681
+ "node_count": len(nodes),
2682
+ "edge_count": len(edges),
2683
+ "scope": scope_patterns,
2684
+ "projection_sources": ["memuron_memories", "memuron_links", "memuron_placements"],
2685
+ },
2686
+ }
2687
+
2688
+
2689
+ def graph_hubs(
2690
+ engine: ArthaEngine,
2691
+ *,
2692
+ scope: str | list[str] | None = None,
2693
+ limit: int = 10,
2694
+ ) -> tuple[list[dict[str, Any]], int]:
2695
+ graph = export_graph(engine, scope=scope)
2696
+ nodes = sorted(
2697
+ graph["nodes"],
2698
+ key=lambda item: (-int(item.get("degree") or 0), str(item.get("id"))),
2699
+ )
2700
+ hubs = [
2701
+ {
2702
+ "id": node["id"],
2703
+ "content": node["content"],
2704
+ "scope": node["scope"],
2705
+ "degree": node["degree"],
2706
+ "hub_score": node["degree"],
2707
+ }
2708
+ for node in nodes[:limit]
2709
+ ]
2710
+ return hubs, len(nodes)
2711
+
2712
+
2713
+ def graph_neighborhood(
2714
+ engine: ArthaEngine,
2715
+ *,
2716
+ memory_id: str,
2717
+ hops: int = 2,
2718
+ scope: str | list[str] | None = None,
2719
+ ) -> dict[str, Any]:
2720
+ graph = export_graph(engine, scope=scope)
2721
+ nodes_by_id = {str(node["id"]): node for node in graph["nodes"]}
2722
+ if memory_id not in nodes_by_id:
2723
+ raise KeyError(f"Memory not found: {memory_id}")
2724
+ adjacency: dict[str, set[str]] = {node_id: set() for node_id in nodes_by_id}
2725
+ for edge in graph["edges"]:
2726
+ source = str(edge["source"])
2727
+ target = str(edge["target"])
2728
+ if source == target:
2729
+ continue
2730
+ adjacency.setdefault(source, set()).add(target)
2731
+ adjacency.setdefault(target, set()).add(source)
2732
+
2733
+ visited = {memory_id: 0}
2734
+ queue: list[tuple[str, int]] = [(memory_id, 0)]
2735
+ while queue:
2736
+ current_id, current_hop = queue.pop(0)
2737
+ if current_hop >= hops:
2738
+ continue
2739
+ for neighbor_id in sorted(adjacency.get(current_id, set())):
2740
+ if neighbor_id in visited:
2741
+ continue
2742
+ visited[neighbor_id] = current_hop + 1
2743
+ queue.append((neighbor_id, current_hop + 1))
2744
+
2745
+ neighborhood = []
2746
+ for node_id, hop_distance in sorted(visited.items(), key=lambda item: (item[1], item[0])):
2747
+ node = nodes_by_id[node_id]
2748
+ neighborhood.append(
2749
+ {
2750
+ "id": node["id"],
2751
+ "content": node["content"],
2752
+ "scope": node["scope"],
2753
+ "hop_distance": hop_distance,
2754
+ "is_center": node_id == memory_id,
2755
+ }
2756
+ )
2757
+ return {
2758
+ "center_memory_id": memory_id,
2759
+ "hops": hops,
2760
+ "neighborhood": neighborhood,
2761
+ "total_in_neighborhood": len(neighborhood),
2762
+ }
2763
+
2764
+
2765
+ def graph_path(
2766
+ engine: ArthaEngine,
2767
+ *,
2768
+ from_id: str,
2769
+ to_id: str,
2770
+ scope: str | list[str] | None = None,
2771
+ ) -> dict[str, Any]:
2772
+ graph = export_graph(engine, scope=scope)
2773
+ nodes_by_id = {str(node["id"]): node for node in graph["nodes"]}
2774
+ if from_id not in nodes_by_id or to_id not in nodes_by_id:
2775
+ raise KeyError("One or both memories not found")
2776
+ if from_id == to_id:
2777
+ return {
2778
+ "status": "success",
2779
+ "path": [from_id],
2780
+ "length": 0,
2781
+ "memories": [{"id": from_id, "content": nodes_by_id[from_id]["content"][:100]}],
2782
+ }
2783
+ adjacency: dict[str, set[str]] = {node_id: set() for node_id in nodes_by_id}
2784
+ for edge in graph["edges"]:
2785
+ source = str(edge["source"])
2786
+ target = str(edge["target"])
2787
+ if source == target:
2788
+ continue
2789
+ adjacency.setdefault(source, set()).add(target)
2790
+ adjacency.setdefault(target, set()).add(source)
2791
+
2792
+ queue: list[tuple[str, list[str]]] = [(from_id, [from_id])]
2793
+ visited = {from_id}
2794
+ while queue:
2795
+ current_id, path = queue.pop(0)
2796
+ for neighbor_id in sorted(adjacency.get(current_id, set())):
2797
+ if neighbor_id == to_id:
2798
+ full_path = [*path, neighbor_id]
2799
+ return {
2800
+ "status": "success",
2801
+ "path": full_path,
2802
+ "length": len(full_path) - 1,
2803
+ "memories": [
2804
+ {"id": node_id, "content": nodes_by_id[node_id]["content"][:100]}
2805
+ for node_id in full_path
2806
+ ],
2807
+ }
2808
+ if neighbor_id not in visited:
2809
+ visited.add(neighbor_id)
2810
+ queue.append((neighbor_id, [*path, neighbor_id]))
2811
+ return {
2812
+ "status": "no_path",
2813
+ "message": f"No path found between {from_id[:8]}... and {to_id[:8]}...",
2814
+ }
2815
+
2816
+
2817
+ def semantic_traverse_graph(
2818
+ engine: ArthaEngine,
2819
+ *,
2820
+ start_memory_id: str,
2821
+ query: str,
2822
+ max_hops: int = 2,
2823
+ edge_similarity_threshold: float = 0.7,
2824
+ scope: str | list[str] | None = None,
2825
+ ) -> dict[str, Any]:
2826
+ graph = export_graph(engine, scope=scope)
2827
+ nodes_by_id = {str(node["id"]): node for node in graph["nodes"]}
2828
+ if start_memory_id not in nodes_by_id:
2829
+ raise KeyError(f"Memory not found: {start_memory_id}")
2830
+
2831
+ query_vector = _embed_query_vector(engine, query)
2832
+ rows_by_id = {_link_row_to_dict(row)["link_id"]: row for row in _list_link_rows(engine)}
2833
+ adjacency: dict[str, list[dict[str, Any]]] = {node_id: [] for node_id in nodes_by_id}
2834
+ for edge in graph["edges"]:
2835
+ source = str(edge["source"])
2836
+ target = str(edge["target"])
2837
+ row = rows_by_id.get(str(edge["id"]))
2838
+ embedding = _parse_json_field(row.get("embedding_json") if row else None, [])
2839
+ similarity = _cosine_score(
2840
+ query_vector,
2841
+ [float(value) for value in embedding] if isinstance(embedding, list) else [],
2842
+ )
2843
+ edge_with_similarity = {**edge, "similarity": similarity}
2844
+ adjacency.setdefault(source, []).append(edge_with_similarity)
2845
+ if target != source:
2846
+ adjacency.setdefault(target, []).append(edge_with_similarity)
2847
+
2848
+ visited = {start_memory_id: 0}
2849
+ queue: list[tuple[str, int]] = [(start_memory_id, 0)]
2850
+ traversed_edges: list[dict[str, Any]] = []
2851
+ traversed_edge_ids: set[str] = set()
2852
+ while queue:
2853
+ current_id, current_hop = queue.pop(0)
2854
+ if current_hop >= max_hops:
2855
+ continue
2856
+ for edge in sorted(
2857
+ adjacency.get(current_id, []),
2858
+ key=lambda item: (-float(item.get("similarity") or 0), str(item.get("id"))),
2859
+ ):
2860
+ similarity = float(edge.get("similarity") or 0)
2861
+ if similarity < edge_similarity_threshold:
2862
+ continue
2863
+ source = str(edge["source"])
2864
+ target = str(edge["target"])
2865
+ neighbor_id = target if source == current_id else source
2866
+ is_self_loop = source == target
2867
+ if neighbor_id not in nodes_by_id:
2868
+ continue
2869
+ edge_id = str(edge.get("id") or f"{source}:{target}")
2870
+ if edge_id not in traversed_edge_ids:
2871
+ traversed_edge_ids.add(edge_id)
2872
+ traversed_edges.append(
2873
+ {
2874
+ "id": edge_id,
2875
+ "source_id": source,
2876
+ "target_id": target,
2877
+ "description": edge.get("description") or "",
2878
+ "similarity": similarity,
2879
+ "from_memory_id": current_id,
2880
+ "to_memory_id": neighbor_id,
2881
+ "is_self_loop": is_self_loop,
2882
+ }
2883
+ )
2884
+ if is_self_loop:
2885
+ continue
2886
+ next_hop = current_hop + 1
2887
+ previous_hop = visited.get(neighbor_id)
2888
+ if previous_hop is None or next_hop < previous_hop:
2889
+ visited[neighbor_id] = next_hop
2890
+ queue.append((neighbor_id, next_hop))
2891
+
2892
+ memories = []
2893
+ for memory_id, hop_distance in sorted(visited.items(), key=lambda item: (item[1], item[0])):
2894
+ node = nodes_by_id[memory_id]
2895
+ memories.append(
2896
+ {
2897
+ "id": node["id"],
2898
+ "content": node["content"],
2899
+ "scope": node["scope"],
2900
+ "hop_distance": hop_distance,
2901
+ "is_start": memory_id == start_memory_id,
2902
+ }
2903
+ )
2904
+ traversed_edges.sort(key=lambda edge: float(edge["similarity"]), reverse=True)
2905
+ return {
2906
+ "start_memory_id": start_memory_id,
2907
+ "query": query,
2908
+ "max_hops": max_hops,
2909
+ "edge_similarity_threshold": edge_similarity_threshold,
2910
+ "scope": _graph_scope_patterns(scope),
2911
+ "memories": memories,
2912
+ "traversed_edges": traversed_edges,
2913
+ "total_memories": len(memories),
2914
+ "total_edges": len(traversed_edges),
2915
+ }
2916
+
2917
+
2918
+ def _link_row_to_dict(row: dict[str, Any]) -> dict[str, Any]:
2919
+ embedding = row.get("embedding_json")
2920
+ if isinstance(embedding, str):
2921
+ embedding = json.loads(embedding)
2922
+ metadata = _parse_json_field(row.get("metadata_json"), {})
2923
+ return {
2924
+ "link_id": row["link_id"],
2925
+ "source_id": row["source_id"],
2926
+ "target_id": row["target_id"],
2927
+ "description": row["description"],
2928
+ "metadata": metadata if isinstance(metadata, dict) else {},
2929
+ "embedding": embedding if isinstance(embedding, list) else [],
2930
+ }
2931
+
2932
+
2933
+ def _memory_endpoint_payload(row: dict[str, Any]) -> dict[str, Any]:
2934
+ memory = _row_to_memory_dict(row)
2935
+ return {
2936
+ "id": memory["id"],
2937
+ "content": memory["content"],
2938
+ "scope": memory.get("scope") or [],
2939
+ "source_metadata": source_identity_from_metadata(memory.get("metadata") or {}),
2940
+ }
2941
+
2942
+
2943
+ def _relationship_edge_result(
2944
+ link: dict[str, Any],
2945
+ *,
2946
+ score: float,
2947
+ source_row: dict[str, Any] | None,
2948
+ target_row: dict[str, Any] | None,
2949
+ ) -> dict[str, Any] | None:
2950
+ if not source_row or not target_row:
2951
+ return None
2952
+ link_id = str(link["link_id"])
2953
+ metadata = link.get("metadata") if isinstance(link.get("metadata"), dict) else {}
2954
+ return {
2955
+ "type": "relationship_edge",
2956
+ "id": link_id,
2957
+ "link_id": link_id,
2958
+ "description": str(link.get("description") or ""),
2959
+ "metadata": metadata,
2960
+ "semantic_score": score,
2961
+ "matched_via": "link",
2962
+ "matched_via_link_id": link_id,
2963
+ "is_self_loop": str(link["source_id"]) == str(link["target_id"]),
2964
+ "parallel_key": link_id,
2965
+ "source": _memory_endpoint_payload(source_row),
2966
+ "target": _memory_endpoint_payload(target_row),
2967
+ }
2968
+
2969
+
2970
+ def _self_loop_link_result(
2971
+ link: dict[str, Any],
2972
+ *,
2973
+ score: float,
2974
+ memory_row: dict[str, Any] | None,
2975
+ ) -> dict[str, Any] | None:
2976
+ if not memory_row:
2977
+ return None
2978
+ memory = _row_to_memory_dict(memory_row)
2979
+ return {
2980
+ "type": "memory_node",
2981
+ "id": memory["id"],
2982
+ "content": memory["content"],
2983
+ "scope": memory.get("scope") or [],
2984
+ "semantic_score": score,
2985
+ "matched_via": "semantic_link",
2986
+ "matched_via_link_id": str(link["link_id"]),
2987
+ "matched_link_description": str(link.get("description") or ""),
2988
+ }
2989
+
2990
+
2991
+ def _filter_links_by_endpoint_scope(
2992
+ link_rows: list[dict[str, Any]],
2993
+ scope_index: dict[str, list[str]],
2994
+ scope: list[str],
2995
+ ) -> list[dict[str, Any]]:
2996
+ """MemBrain-style: both link endpoints must match every scope pattern."""
2997
+ filtered: list[dict[str, Any]] = []
2998
+ for row in link_rows:
2999
+ source_scope = scope_index.get(str(row["source_id"]), [])
3000
+ target_scope = scope_index.get(str(row["target_id"]), [])
3001
+ if _scope_matches_filter(source_scope, scope) and _scope_matches_filter(
3002
+ target_scope, scope
3003
+ ):
3004
+ filtered.append(row)
3005
+ return filtered
3006
+
3007
+
3008
+ def search_memories(
3009
+ engine: ArthaEngine,
3010
+ query: str,
3011
+ *,
3012
+ k: int = 5,
3013
+ scope: list[str] | None = None,
3014
+ include_links: bool | None = None,
3015
+ ) -> tuple[list[dict[str, Any]], list[str] | None]:
3016
+ from memuron.application.config import settings
3017
+ from memuron.search.hybrid import hybrid_memory_search, retrieve_pool_size
3018
+
3019
+ _require_memory_projections(engine)
3020
+ if include_links is None:
3021
+ include_links = settings.search_include_links
3022
+
3023
+ query_vector = _embed_query_vector(engine, query)
3024
+ store = engine.store
3025
+ memory_pool = retrieve_pool_size(k * 2 if include_links else k)
3026
+ unified: list[tuple[str, dict[str, Any], float]] = []
3027
+
3028
+ if settings.search_hybrid:
3029
+ for kind, payload, score in hybrid_memory_search(
3030
+ engine,
3031
+ query,
3032
+ query_vector,
3033
+ k=memory_pool,
3034
+ scope=scope,
3035
+ ):
3036
+ unified.append((kind, payload, score))
3037
+ elif pgvector_is_ready(store):
3038
+ for row in pgvector_memory_search(
3039
+ store,
3040
+ query_vector,
3041
+ top_k=memory_pool,
3042
+ scope=scope,
3043
+ include_content=True,
3044
+ ):
3045
+ scope_tokens = _parse_json_field(row.get("scope_json"), [])
3046
+ unified.append(
3047
+ (
3048
+ "memory_node",
3049
+ {
3050
+ "type": "memory_node",
3051
+ "id": row["artha_id"],
3052
+ "content": str(row.get("content") or ""),
3053
+ "scope": scope_tokens if isinstance(scope_tokens, list) else [],
3054
+ "source_metadata": source_identity_from_metadata(
3055
+ _parse_json_field(row.get("metadata_json"), {})
3056
+ ),
3057
+ "matched_via": "memory",
3058
+ "matched_via_link_id": None,
3059
+ },
3060
+ float(row["semantic_score"]),
3061
+ )
3062
+ )
3063
+ else:
3064
+ memory_hits = _memory_similarity_hits(
3065
+ engine,
3066
+ query_vector,
3067
+ scope=scope,
3068
+ top_k=memory_pool,
3069
+ )
3070
+ memory_details = _fetch_memory_rows_by_ids(
3071
+ engine,
3072
+ [memory_id for memory_id, _score in memory_hits],
3073
+ )
3074
+ for memory_id, score in memory_hits:
3075
+ row = memory_details.get(memory_id)
3076
+ if not row:
3077
+ continue
3078
+ memory = _row_to_memory_dict(row)
3079
+ unified.append(
3080
+ (
3081
+ "memory_node",
3082
+ {
3083
+ "type": "memory_node",
3084
+ "id": memory_id,
3085
+ "content": memory["content"],
3086
+ "scope": memory.get("scope") or [],
3087
+ "source_metadata": source_identity_from_metadata(
3088
+ memory.get("metadata") if isinstance(memory.get("metadata"), dict) else {}
3089
+ ),
3090
+ "matched_via": "memory",
3091
+ "matched_via_link_id": None,
3092
+ },
3093
+ score,
3094
+ )
3095
+ )
3096
+
3097
+ if include_links:
3098
+ link_limit = k
3099
+ self_loop_memory_ids: set[str] = set()
3100
+ pending_link_hits: list[tuple[dict[str, Any], float]] = []
3101
+ if pgvector_is_ready(store):
3102
+ for row in pgvector_link_search(store, query_vector, top_k=link_limit, scope=scope):
3103
+ link = {
3104
+ "link_id": row["link_id"],
3105
+ "source_id": row["source_id"],
3106
+ "target_id": row["target_id"],
3107
+ "description": row["description"],
3108
+ "metadata": _parse_json_field(row.get("metadata_json"), {}),
3109
+ }
3110
+ pending_link_hits.append((link, float(row["semantic_score"])))
3111
+ if str(link["source_id"]) == str(link["target_id"]):
3112
+ self_loop_memory_ids.add(str(link["source_id"]))
3113
+ else:
3114
+ link_rows = _list_link_search_rows(engine)
3115
+ if link_rows:
3116
+ filtered_links = link_rows
3117
+ if scope:
3118
+ memory_rows = _list_memory_search_rows(engine)
3119
+ scope_index = {
3120
+ str(row["artha_id"]): _parse_json_field(row.get("scope_json"), [])
3121
+ for row in memory_rows
3122
+ }
3123
+ filtered_links = _filter_links_by_endpoint_scope(
3124
+ link_rows, scope_index, scope
3125
+ )
3126
+ link_hits = _cosine_similarity_hits(
3127
+ engine,
3128
+ query_vector,
3129
+ filtered_links,
3130
+ top_k=link_limit,
3131
+ id_key="link_id",
3132
+ scope_key=None,
3133
+ )
3134
+ link_meta = {str(row["link_id"]): _link_row_to_dict(row) for row in link_rows}
3135
+ for link_id, score in link_hits:
3136
+ link = link_meta.get(link_id)
3137
+ if not link:
3138
+ continue
3139
+ pending_link_hits.append(
3140
+ (
3141
+ {
3142
+ "link_id": link["link_id"],
3143
+ "source_id": link["source_id"],
3144
+ "target_id": link["target_id"],
3145
+ "description": link["description"],
3146
+ "metadata": link.get("metadata") or {},
3147
+ },
3148
+ score,
3149
+ )
3150
+ )
3151
+ if str(link["source_id"]) == str(link["target_id"]):
3152
+ self_loop_memory_ids.add(str(link["source_id"]))
3153
+ self_loop_rows = _fetch_memory_rows_by_ids(engine, list(self_loop_memory_ids))
3154
+ for link, score in pending_link_hits:
3155
+ if str(link["source_id"]) == str(link["target_id"]):
3156
+ result = _self_loop_link_result(
3157
+ link,
3158
+ score=score,
3159
+ memory_row=self_loop_rows.get(str(link["source_id"])),
3160
+ )
3161
+ if result:
3162
+ unified.append(("memory_node", result, score))
3163
+ continue
3164
+ unified.append(("relationship_edge", link, score))
3165
+
3166
+ min_score = settings.search_min_semantic_score
3167
+ if not settings.search_hybrid:
3168
+ unified = [item for item in unified if item[2] >= min_score]
3169
+ else:
3170
+ unified = [
3171
+ item
3172
+ for item in unified
3173
+ if item[0] != "relationship_edge" or item[2] >= min_score
3174
+ ]
3175
+
3176
+ seen_content: dict[str, int] = {}
3177
+ deduped: list[tuple[str, dict[str, Any], float]] = []
3178
+ for kind, payload, score in sorted(unified, key=lambda item: item[2], reverse=True):
3179
+ if kind != "memory_node":
3180
+ deduped.append((kind, payload, score))
3181
+ continue
3182
+ content_key = str(payload.get("content") or "").strip().lower()[:500]
3183
+ if content_key and content_key in seen_content:
3184
+ existing_index = seen_content[content_key]
3185
+ existing_kind, existing_payload, existing_score = deduped[existing_index]
3186
+ incoming_match = payload.get("matched_via")
3187
+ existing_match = existing_payload.get("matched_via")
3188
+ if incoming_match and incoming_match != "memory" and existing_match == "memory":
3189
+ deduped[existing_index] = (
3190
+ existing_kind,
3191
+ {
3192
+ **existing_payload,
3193
+ "matched_via": payload.get("matched_via"),
3194
+ "matched_via_link_id": payload.get("matched_via_link_id"),
3195
+ "matched_link_description": payload.get("matched_link_description"),
3196
+ },
3197
+ existing_score,
3198
+ )
3199
+ continue
3200
+ if content_key:
3201
+ seen_content[content_key] = len(deduped)
3202
+ deduped.append((kind, payload, score))
3203
+
3204
+ top = deduped[:k]
3205
+
3206
+ endpoint_ids = {
3207
+ str(item["source_id"])
3208
+ for kind, item, _score in top
3209
+ if kind == "relationship_edge"
3210
+ } | {
3211
+ str(item["target_id"])
3212
+ for kind, item, _score in top
3213
+ if kind == "relationship_edge"
3214
+ }
3215
+ endpoint_rows = _fetch_memory_rows_by_ids(engine, list(endpoint_ids))
3216
+
3217
+ results: list[dict[str, Any]] = []
3218
+ for kind, payload, score in top:
3219
+ if kind == "memory_node":
3220
+ results.append({**payload, "semantic_score": payload.get("semantic_score", score)})
3221
+ continue
3222
+ edge = _relationship_edge_result(
3223
+ payload,
3224
+ score=score,
3225
+ source_row=endpoint_rows.get(str(payload["source_id"])),
3226
+ target_row=endpoint_rows.get(str(payload["target_id"])),
3227
+ )
3228
+ if edge:
3229
+ results.append(edge)
3230
+
3231
+ return results, scope
3232
+
3233
+
3234
+ def unlink_memories(
3235
+ engine: ArthaEngine,
3236
+ memory_id_1: str,
3237
+ memory_id_2: str,
3238
+ *,
3239
+ event_metadata: dict[str, object] | None = None,
3240
+ ) -> int:
3241
+ removed = 0
3242
+ for link_id in _find_link_ids_between(engine, memory_id_1, memory_id_2):
3243
+ _remove_link(engine, link_id, event_metadata=event_metadata)
3244
+ removed += 1
3245
+ if removed:
3246
+ refresh_memory_projections(engine)
3247
+ return removed