MemoryOS 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (74) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/METADATA +2 -1
  2. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/RECORD +72 -55
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +156 -65
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +90 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +6 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +18 -4
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +9 -1
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1364 -0
  18. memos/graph_dbs/neo4j.py +4 -4
  19. memos/log.py +1 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +140 -30
  22. memos/mem_os/main.py +1 -1
  23. memos/mem_os/product.py +266 -152
  24. memos/mem_os/utils/format_utils.py +314 -67
  25. memos/mem_reader/simple_struct.py +13 -5
  26. memos/mem_scheduler/base_scheduler.py +220 -250
  27. memos/mem_scheduler/general_scheduler.py +193 -73
  28. memos/mem_scheduler/modules/base.py +5 -5
  29. memos/mem_scheduler/modules/dispatcher.py +6 -9
  30. memos/mem_scheduler/modules/misc.py +81 -16
  31. memos/mem_scheduler/modules/monitor.py +52 -41
  32. memos/mem_scheduler/modules/rabbitmq_service.py +9 -7
  33. memos/mem_scheduler/modules/retriever.py +108 -191
  34. memos/mem_scheduler/modules/scheduler_logger.py +255 -0
  35. memos/mem_scheduler/mos_for_test_scheduler.py +16 -19
  36. memos/mem_scheduler/schemas/__init__.py +0 -0
  37. memos/mem_scheduler/schemas/general_schemas.py +43 -0
  38. memos/mem_scheduler/schemas/message_schemas.py +148 -0
  39. memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
  40. memos/mem_scheduler/utils/__init__.py +0 -0
  41. memos/mem_scheduler/utils/filter_utils.py +176 -0
  42. memos/mem_scheduler/utils/misc_utils.py +61 -0
  43. memos/mem_user/factory.py +94 -0
  44. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  45. memos/mem_user/mysql_user_manager.py +500 -0
  46. memos/mem_user/persistent_factory.py +96 -0
  47. memos/mem_user/user_manager.py +4 -4
  48. memos/memories/activation/item.py +4 -0
  49. memos/memories/textual/base.py +1 -1
  50. memos/memories/textual/general.py +35 -91
  51. memos/memories/textual/item.py +5 -33
  52. memos/memories/textual/tree.py +13 -7
  53. memos/memories/textual/tree_text_memory/organize/conflict.py +4 -2
  54. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +47 -43
  55. memos/memories/textual/tree_text_memory/organize/reorganizer.py +8 -5
  56. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  57. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
  58. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  59. memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -23
  60. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
  61. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  62. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  63. memos/memos_tools/dinding_report_bot.py +422 -0
  64. memos/memos_tools/notification_service.py +44 -0
  65. memos/memos_tools/notification_utils.py +96 -0
  66. memos/settings.py +3 -1
  67. memos/templates/mem_reader_prompts.py +2 -1
  68. memos/templates/mem_scheduler_prompts.py +41 -7
  69. memos/templates/mos_prompts.py +87 -0
  70. memos/mem_scheduler/modules/schemas.py +0 -328
  71. memos/mem_scheduler/utils.py +0 -75
  72. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
  73. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
  74. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1364 @@
1
+ import traceback
2
+
3
+ from contextlib import suppress
4
+ from datetime import datetime
5
+ from queue import Empty, Queue
6
+ from threading import Lock
7
+ from typing import Any, Literal
8
+
9
+ import numpy as np
10
+
11
+ from memos.configs.graph_db import NebulaGraphDBConfig
12
+ from memos.dependency import require_python_package
13
+ from memos.graph_dbs.base import BaseGraphDB
14
+ from memos.log import get_logger
15
+
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ def _normalize(vec: list[float]) -> list[float]:
21
+ v = np.asarray(vec, dtype=np.float32)
22
+ norm = np.linalg.norm(v)
23
+ return (v / (norm if norm else 1.0)).tolist()
24
+
25
+
26
+ def _compose_node(item: dict[str, Any]) -> tuple[str, str, dict[str, Any]]:
27
+ node_id = item["id"]
28
+ memory = item["memory"]
29
+ metadata = item.get("metadata", {})
30
+ return node_id, memory, metadata
31
+
32
+
33
+ def _prepare_node_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
34
+ """
35
+ Ensure metadata has proper datetime fields and normalized types.
36
+
37
+ - Fill `created_at` and `updated_at` if missing (in ISO 8601 format).
38
+ - Convert embedding to list of float if present.
39
+ """
40
+ now = datetime.utcnow().isoformat()
41
+ metadata["node_type"] = metadata.pop("type")
42
+
43
+ # Fill timestamps if missing
44
+ metadata.setdefault("created_at", now)
45
+ metadata.setdefault("updated_at", now)
46
+
47
+ # Normalize embedding type
48
+ embedding = metadata.get("embedding")
49
+ if embedding and isinstance(embedding, list):
50
+ metadata["embedding"] = _normalize([float(x) for x in embedding])
51
+
52
+ return metadata
53
+
54
+
55
+ def _metadata_filter(metadata: dict[str, Any]) -> dict[str, Any]:
56
+ """
57
+ Filter and validate metadata dictionary against the Memory node schema.
58
+ - Removes keys not in schema.
59
+ - Warns if required fields are missing.
60
+ """
61
+
62
+ allowed_fields = {
63
+ "id",
64
+ "memory",
65
+ "user_name",
66
+ "user_id",
67
+ "session_id",
68
+ "status",
69
+ "key",
70
+ "confidence",
71
+ "tags",
72
+ "created_at",
73
+ "updated_at",
74
+ "memory_type",
75
+ "sources",
76
+ "source",
77
+ "node_type",
78
+ "visibility",
79
+ "usage",
80
+ "background",
81
+ "embedding",
82
+ }
83
+
84
+ missing_fields = allowed_fields - metadata.keys()
85
+ if missing_fields:
86
+ logger.warning(f"Metadata missing required fields: {sorted(missing_fields)}")
87
+
88
+ filtered_metadata = {k: v for k, v in metadata.items() if k in allowed_fields}
89
+
90
+ return filtered_metadata
91
+
92
+
93
+ def _escape_str(value: str) -> str:
94
+ return value.replace('"', '\\"')
95
+
96
+
97
+ def _format_value(val: Any, key: str = "") -> str:
98
+ from nebulagraph_python.py_data_types import NVector
99
+
100
+ if isinstance(val, str):
101
+ return f'"{_escape_str(val)}"'
102
+ elif isinstance(val, (int | float)):
103
+ return str(val)
104
+ elif isinstance(val, datetime):
105
+ return f'datetime("{val.isoformat()}")'
106
+ elif isinstance(val, list):
107
+ if key == "embedding":
108
+ dim = len(val)
109
+ joined = ",".join(str(float(x)) for x in val)
110
+ return f"VECTOR<{dim}, FLOAT>([{joined}])"
111
+ else:
112
+ return f"[{', '.join(_format_value(v) for v in val)}]"
113
+ elif isinstance(val, NVector):
114
+ if key == "embedding":
115
+ dim = len(val)
116
+ joined = ",".join(str(float(x)) for x in val)
117
+ return f"VECTOR<{dim}, FLOAT>([{joined}])"
118
+ elif val is None:
119
+ return "NULL"
120
+ else:
121
+ return f'"{_escape_str(str(val))}"'
122
+
123
+
124
+ def _format_datetime(value: str | datetime) -> str:
125
+ """Ensure datetime is in ISO 8601 format string."""
126
+ if isinstance(value, datetime):
127
+ return value.isoformat()
128
+ return str(value)
129
+
130
+
131
+ class SessionPoolError(Exception):
132
+ pass
133
+
134
+
135
+ class SessionPool:
136
+ @require_python_package(
137
+ import_name="nebulagraph_python",
138
+ install_command="pip install ... @Tianxing",
139
+ install_link=".....",
140
+ )
141
+ def __init__(
142
+ self,
143
+ hosts: list[str],
144
+ user: str,
145
+ password: str,
146
+ minsize: int = 1,
147
+ maxsize: int = 10000,
148
+ ):
149
+ self.hosts = hosts
150
+ self.user = user
151
+ self.password = password
152
+ self.maxsize = maxsize
153
+ self.pool = Queue(maxsize)
154
+ self.lock = Lock()
155
+
156
+ self.clients = []
157
+
158
+ for _ in range(minsize):
159
+ self._create_and_add_client()
160
+
161
+ def _create_and_add_client(self):
162
+ from nebulagraph_python import NebulaClient
163
+
164
+ client = NebulaClient(self.hosts, self.user, self.password)
165
+ self.pool.put(client)
166
+ self.clients.append(client)
167
+
168
+ def get_client(self, timeout: float = 5.0):
169
+ from nebulagraph_python import NebulaClient
170
+
171
+ try:
172
+ return self.pool.get(timeout=timeout)
173
+ except Empty:
174
+ with self.lock:
175
+ if len(self.clients) < self.maxsize:
176
+ client = NebulaClient(self.hosts, self.user, self.password)
177
+ self.clients.append(client)
178
+ return client
179
+ raise RuntimeError("NebulaClientPool exhausted") from None
180
+
181
+ def return_client(self, client):
182
+ self.pool.put(client)
183
+
184
+ def close(self):
185
+ for client in self.clients:
186
+ with suppress(Exception):
187
+ client.close()
188
+ self.clients.clear()
189
+
190
+ def get(self):
191
+ """
192
+ Context manager: with pool.get() as client:
193
+ """
194
+
195
+ class _ClientContext:
196
+ def __init__(self, outer):
197
+ self.outer = outer
198
+ self.client = None
199
+
200
+ def __enter__(self):
201
+ self.client = self.outer.get_client()
202
+ return self.client
203
+
204
+ def __exit__(self, exc_type, exc_val, exc_tb):
205
+ if self.client:
206
+ self.outer.return_client(self.client)
207
+
208
+ return _ClientContext(self)
209
+
210
+
211
+ class NebulaGraphDB(BaseGraphDB):
212
+ """
213
+ NebulaGraph-based implementation of a graph memory store.
214
+ """
215
+
216
+ @require_python_package(
217
+ import_name="nebulagraph_python",
218
+ install_command="pip install ... @Tianxing",
219
+ install_link=".....",
220
+ )
221
+ def __init__(self, config: NebulaGraphDBConfig):
222
+ """
223
+ NebulaGraph DB client initialization.
224
+
225
+ Required config attributes:
226
+ - hosts: list[str] like ["host1:port", "host2:port"]
227
+ - user: str
228
+ - password: str
229
+ - db_name: str (optional for basic commands)
230
+
231
+ Example config:
232
+ {
233
+ "hosts": ["xxx.xx.xx.xxx:xxxx"],
234
+ "user": "root",
235
+ "password": "nebula",
236
+ "space": "test"
237
+ }
238
+ """
239
+
240
+ self.config = config
241
+ self.db_name = config.space
242
+ self.user_name = config.user_name
243
+ self.system_db_name = "system" if config.use_multi_db else config.space
244
+ self.pool = SessionPool(
245
+ hosts=config.get("uri"),
246
+ user=config.get("user"),
247
+ password=config.get("password"),
248
+ minsize=1,
249
+ maxsize=config.get("max_client", 1000),
250
+ )
251
+
252
+ if config.auto_create:
253
+ self._ensure_database_exists()
254
+
255
+ self.execute_query(f"SESSION SET GRAPH `{self.db_name}`")
256
+
257
+ # Create only if not exists
258
+ self.create_index(dimensions=config.embedding_dimension)
259
+
260
+ logger.info("Connected to NebulaGraph successfully.")
261
+
262
+ def execute_query(self, gql: str, timeout: float = 5.0, auto_set_db: bool = True):
263
+ with self.pool.get() as client:
264
+ if auto_set_db and self.db_name:
265
+ client.execute(f"SESSION SET GRAPH `{self.db_name}`")
266
+ return client.execute(gql, timeout=timeout)
267
+
268
+ def close(self):
269
+ self.pool.close()
270
+
271
+ def create_index(
272
+ self,
273
+ label: str = "Memory",
274
+ vector_property: str = "embedding",
275
+ dimensions: int = 3072,
276
+ index_name: str = "memory_vector_index",
277
+ ) -> None:
278
+ # Create vector index
279
+ self._create_vector_index(label, vector_property, dimensions, index_name)
280
+ # Create indexes
281
+ self._create_basic_property_indexes()
282
+
283
+ def remove_oldest_memory(self, memory_type: str, keep_latest: int) -> None:
284
+ """
285
+ Remove all WorkingMemory nodes except the latest `keep_latest` entries.
286
+
287
+ Args:
288
+ memory_type (str): Memory type (e.g., 'WorkingMemory', 'LongTermMemory').
289
+ keep_latest (int): Number of latest WorkingMemory entries to keep.
290
+ """
291
+ optional_condition = ""
292
+ if not self.config.use_multi_db and self.config.user_name:
293
+ optional_condition = f"AND n.user_name = '{self.config.user_name}'"
294
+
295
+ query = f"""
296
+ MATCH (n@Memory)
297
+ WHERE n.memory_type = '{memory_type}'
298
+ {optional_condition}
299
+ ORDER BY n.updated_at DESC
300
+ OFFSET {keep_latest}
301
+ DETACH DELETE n
302
+ """
303
+ self.execute_query(query)
304
+
305
+ def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None:
306
+ """
307
+ Insert or update a Memory node in NebulaGraph.
308
+ """
309
+ if not self.config.use_multi_db and self.config.user_name:
310
+ metadata["user_name"] = self.config.user_name
311
+
312
+ now = datetime.utcnow()
313
+ metadata = metadata.copy()
314
+ metadata.setdefault("created_at", now)
315
+ metadata.setdefault("updated_at", now)
316
+ metadata["node_type"] = metadata.pop("type")
317
+ metadata["id"] = id
318
+ metadata["memory"] = memory
319
+
320
+ if "embedding" in metadata and isinstance(metadata["embedding"], list):
321
+ metadata["embedding"] = _normalize(metadata["embedding"])
322
+
323
+ metadata = _metadata_filter(metadata)
324
+ properties = ", ".join(f"{k}: {_format_value(v, k)}" for k, v in metadata.items())
325
+ gql = f"INSERT OR IGNORE (n@Memory {{{properties}}})"
326
+
327
+ try:
328
+ self.execute_query(gql)
329
+ logger.info("insert success")
330
+ except Exception as e:
331
+ logger.error(
332
+ f"Failed to insert vertex {id}: gql: {gql}, {e}\ntrace: {traceback.format_exc()}"
333
+ )
334
+
335
+ def node_not_exist(self, scope: str) -> int:
336
+ if not self.config.use_multi_db and self.config.user_name:
337
+ filter_clause = f'n.memory_type = "{scope}" AND n.user_name = "{self.config.user_name}"'
338
+ else:
339
+ filter_clause = f'n.memory_type = "{scope}"'
340
+
341
+ query = f"""
342
+ MATCH (n@Memory)
343
+ WHERE {filter_clause}
344
+ RETURN n
345
+ LIMIT 1
346
+ """
347
+
348
+ try:
349
+ result = self.execute_query(query)
350
+ return result.size == 0
351
+ except Exception as e:
352
+ logger.error(f"[node_not_exist] Query failed: {e}", exc_info=True)
353
+ raise
354
+
355
+ def update_node(self, id: str, fields: dict[str, Any]) -> None:
356
+ """
357
+ Update node fields in Nebular, auto-converting `created_at` and `updated_at` to datetime type if present.
358
+ """
359
+ fields = fields.copy()
360
+ set_clauses = []
361
+ for k, v in fields.items():
362
+ set_clauses.append(f"n.{k} = {_format_value(v, k)}")
363
+
364
+ set_clause_str = ",\n ".join(set_clauses)
365
+
366
+ query = f"""
367
+ MATCH (n@Memory {{id: "{id}"}})
368
+ """
369
+
370
+ if not self.config.use_multi_db and self.config.user_name:
371
+ query += f'WHERE n.user_name = "{self.config.user_name}"'
372
+
373
+ query += f"\nSET {set_clause_str}"
374
+ self.execute_query(query)
375
+
376
+ def delete_node(self, id: str) -> None:
377
+ """
378
+ Delete a node from the graph.
379
+ Args:
380
+ id: Node identifier to delete.
381
+ """
382
+ query = f"""
383
+ MATCH (n@Memory {{id: "{id}"}})
384
+ """
385
+ if not self.config.use_multi_db and self.config.user_name:
386
+ user_name = self.config.user_name
387
+ query += f" WHERE n.user_name = {_format_value(user_name)}"
388
+ query += "\n DETACH DELETE n"
389
+ self.execute_query(query)
390
+
391
+ def add_edge(self, source_id: str, target_id: str, type: str):
392
+ """
393
+ Create an edge from source node to target node.
394
+ Args:
395
+ source_id: ID of the source node.
396
+ target_id: ID of the target node.
397
+ type: Relationship type (e.g., 'RELATE_TO', 'PARENT').
398
+ """
399
+ if not source_id or not target_id:
400
+ raise ValueError("[add_edge] source_id and target_id must be provided")
401
+
402
+ props = ""
403
+ if not self.config.use_multi_db and self.config.user_name:
404
+ props = f'{{user_name: "{self.config.user_name}"}}'
405
+
406
+ insert_stmt = f'''
407
+ MATCH (a@Memory {{id: "{source_id}"}}), (b@Memory {{id: "{target_id}"}})
408
+ INSERT (a) -[e@{type} {props}]-> (b)
409
+ '''
410
+ try:
411
+ self.execute_query(insert_stmt)
412
+ except Exception as e:
413
+ logger.error(f"Failed to insert edge: {e}", exc_info=True)
414
+
415
+ def delete_edge(self, source_id: str, target_id: str, type: str) -> None:
416
+ """
417
+ Delete a specific edge between two nodes.
418
+ Args:
419
+ source_id: ID of the source node.
420
+ target_id: ID of the target node.
421
+ type: Relationship type to remove.
422
+ """
423
+ query = f"""
424
+ MATCH (a@Memory) -[r@{type}]-> (b@Memory)
425
+ WHERE a.id = {_format_value(source_id)} AND b.id = {_format_value(target_id)}
426
+ """
427
+
428
+ if not self.config.use_multi_db and self.config.user_name:
429
+ user_name = self.config.user_name
430
+ query += f" AND a.user_name = {_format_value(user_name)} AND b.user_name = {_format_value(user_name)}"
431
+
432
+ query += "\nDELETE r"
433
+ self.execute_query(query)
434
+
435
+ def get_memory_count(self, memory_type: str) -> int:
436
+ query = f"""
437
+ MATCH (n@Memory)
438
+ WHERE n.memory_type = "{memory_type}"
439
+ """
440
+ if not self.config.use_multi_db and self.config.user_name:
441
+ user_name = self.config.user_name
442
+ query += f"\nAND n.user_name = '{user_name}'"
443
+ query += "\nRETURN COUNT(n) AS count"
444
+
445
+ try:
446
+ result = self.execute_query(query)
447
+ return result.one_or_none()["count"].value
448
+ except Exception as e:
449
+ logger.error(f"[get_memory_count] Failed: {e}")
450
+ return -1
451
+
452
+ def count_nodes(self, scope: str) -> int:
453
+ query = f"""
454
+ MATCH (n@Memory)
455
+ WHERE n.memory_type = "{scope}"
456
+ """
457
+ if not self.config.use_multi_db and self.config.user_name:
458
+ user_name = self.config.user_name
459
+ query += f"\nAND n.user_name = '{user_name}'"
460
+ query += "\nRETURN count(n) AS count"
461
+
462
+ result = self.execute_query(query)
463
+ return result.one_or_none()["count"].value
464
+
465
+ def edge_exists(
466
+ self, source_id: str, target_id: str, type: str = "ANY", direction: str = "OUTGOING"
467
+ ) -> bool:
468
+ """
469
+ Check if an edge exists between two nodes.
470
+ Args:
471
+ source_id: ID of the source node.
472
+ target_id: ID of the target node.
473
+ type: Relationship type. Use "ANY" to match any relationship type.
474
+ direction: Direction of the edge.
475
+ Use "OUTGOING" (default), "INCOMING", or "ANY".
476
+ Returns:
477
+ True if the edge exists, otherwise False.
478
+ """
479
+ # Prepare the relationship pattern
480
+ rel = "r" if type == "ANY" else f"r@{type}"
481
+
482
+ # Prepare the match pattern with direction
483
+ if direction == "OUTGOING":
484
+ pattern = f"(a@Memory {{id: '{source_id}'}})-[{rel}]->(b@Memory {{id: '{target_id}'}})"
485
+ elif direction == "INCOMING":
486
+ pattern = f"(a@Memory {{id: '{source_id}'}})<-[{rel}]-(b@Memory {{id: '{target_id}'}})"
487
+ elif direction == "ANY":
488
+ pattern = f"(a@Memory {{id: '{source_id}'}})-[{rel}]-(b@Memory {{id: '{target_id}'}})"
489
+ else:
490
+ raise ValueError(
491
+ f"Invalid direction: {direction}. Must be 'OUTGOING', 'INCOMING', or 'ANY'."
492
+ )
493
+ query = f"MATCH {pattern}"
494
+ if not self.config.use_multi_db and self.config.user_name:
495
+ user_name = self.config.user_name
496
+ query += f"\nWHERE a.user_name = '{user_name}' AND b.user_name = '{user_name}'"
497
+ query += "\nRETURN r"
498
+
499
+ # Run the Cypher query
500
+ result = self.execute_query(query)
501
+ record = result.one_or_none()
502
+ if record is None:
503
+ return False
504
+ return record.values() is not None
505
+
506
+ # Graph Query & Reasoning
507
+ def get_node(self, id: str) -> dict[str, Any] | None:
508
+ """
509
+ Retrieve a Memory node by its unique ID.
510
+
511
+ Args:
512
+ id (str): Node ID (Memory.id)
513
+
514
+ Returns:
515
+ dict: Node properties as key-value pairs, or None if not found.
516
+ """
517
+ gql = f"""
518
+ USE `{self.db_name}`
519
+ MATCH (v {{id: '{id}'}})
520
+ RETURN v
521
+ """
522
+
523
+ try:
524
+ result = self.execute_query(gql)
525
+ record = result.one_or_none()
526
+ if record is None:
527
+ return None
528
+
529
+ node_wrapper = record["v"].as_node()
530
+ props = node_wrapper.get_properties()
531
+ node = self._parse_node(props)
532
+ return node
533
+
534
+ except Exception as e:
535
+ logger.error(f"[get_node] Failed to retrieve node '{id}': {e}")
536
+ return None
537
+
538
+ def get_nodes(self, ids: list[str]) -> list[dict[str, Any]]:
539
+ """
540
+ Retrieve the metadata and memory of a list of nodes.
541
+ Args:
542
+ ids: List of Node identifier.
543
+ Returns:
544
+ list[dict]: Parsed node records containing 'id', 'memory', and 'metadata'.
545
+
546
+ Notes:
547
+ - Assumes all provided IDs are valid and exist.
548
+ - Returns empty list if input is empty.
549
+ """
550
+ if not ids:
551
+ return []
552
+
553
+ where_user = ""
554
+ if not self.config.use_multi_db and self.config.user_name:
555
+ where_user = f" AND n.user_name = '{self.config.user_name}'"
556
+
557
+ query = f"MATCH (n@Memory) WHERE n.id IN {ids} {where_user} RETURN n"
558
+
559
+ results = self.execute_query(query)
560
+ nodes = []
561
+ for rec in results:
562
+ node_props = rec["n"].as_node().get_properties()
563
+ nodes.append(self._parse_node(node_props))
564
+
565
+ return nodes
566
+
567
+ def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[dict[str, str]]:
568
+ """
569
+ Get edges connected to a node, with optional type and direction filter.
570
+
571
+ Args:
572
+ id: Node ID to retrieve edges for.
573
+ type: Relationship type to match, or 'ANY' to match all.
574
+ direction: 'OUTGOING', 'INCOMING', or 'ANY'.
575
+
576
+ Returns:
577
+ List of edges:
578
+ [
579
+ {"from": "source_id", "to": "target_id", "type": "RELATE"},
580
+ ...
581
+ ]
582
+ """
583
+ # Build relationship type filter
584
+ rel_type = "" if type == "ANY" else f"@{type}"
585
+
586
+ # Build Cypher pattern based on direction
587
+ if direction == "OUTGOING":
588
+ pattern = f"(a@Memory)-[r{rel_type}]->(b@Memory)"
589
+ where_clause = f"a.id = '{id}'"
590
+ elif direction == "INCOMING":
591
+ pattern = f"(a@Memory)<-[r{rel_type}]-(b@Memory)"
592
+ where_clause = f"a.id = '{id}'"
593
+ elif direction == "ANY":
594
+ pattern = f"(a@Memory)-[r{rel_type}]-(b@Memory)"
595
+ where_clause = f"a.id = '{id}' OR b.id = '{id}'"
596
+ else:
597
+ raise ValueError("Invalid direction. Must be 'OUTGOING', 'INCOMING', or 'ANY'.")
598
+
599
+ if not self.config.use_multi_db and self.config.user_name:
600
+ where_clause += f" AND a.user_name = '{self.config.user_name}' AND b.user_name = '{self.config.user_name}'"
601
+
602
+ query = f"""
603
+ MATCH {pattern}
604
+ WHERE {where_clause}
605
+ RETURN a.id AS from_id, b.id AS to_id, type(r) AS edge_type
606
+ """
607
+
608
+ result = self.execute_query(query)
609
+ edges = []
610
+ for record in result:
611
+ edges.append(
612
+ {
613
+ "from": record["from_id"].value,
614
+ "to": record["to_id"].value,
615
+ "type": record["edge_type"].value,
616
+ }
617
+ )
618
+ return edges
619
+
620
+ def get_neighbors_by_tag(
621
+ self,
622
+ tags: list[str],
623
+ exclude_ids: list[str],
624
+ top_k: int = 5,
625
+ min_overlap: int = 1,
626
+ ) -> list[dict[str, Any]]:
627
+ """
628
+ Find top-K neighbor nodes with maximum tag overlap.
629
+
630
+ Args:
631
+ tags: The list of tags to match.
632
+ exclude_ids: Node IDs to exclude (e.g., local cluster).
633
+ top_k: Max number of neighbors to return.
634
+ min_overlap: Minimum number of overlapping tags required.
635
+
636
+ Returns:
637
+ List of dicts with node details and overlap count.
638
+ """
639
+ if not tags:
640
+ return []
641
+
642
+ where_clauses = [
643
+ 'n.status = "activated"',
644
+ 'NOT (n.node_type = "reasoning")',
645
+ 'NOT (n.memory_type = "WorkingMemory")',
646
+ ]
647
+ if exclude_ids:
648
+ where_clauses.append(f"NOT (n.id IN {exclude_ids})")
649
+
650
+ if not self.config.use_multi_db and self.config.user_name:
651
+ where_clauses.append(f'n.user_name = "{self.config.user_name}"')
652
+
653
+ where_clause = " AND ".join(where_clauses)
654
+ tag_list_literal = "[" + ", ".join(f'"{_escape_str(t)}"' for t in tags) + "]"
655
+
656
+ query = f"""
657
+ LET tag_list = {tag_list_literal}
658
+
659
+ MATCH (n@Memory)
660
+ WHERE {where_clause}
661
+ RETURN n,
662
+ size( filter( n.tags, t -> t IN tag_list ) ) AS overlap_count
663
+ ORDER BY overlap_count DESC
664
+ LIMIT {top_k}
665
+ """
666
+
667
+ result = self.execute_query(query)
668
+ neighbors: list[dict[str, Any]] = []
669
+ for r in result:
670
+ node_props = r["n"].as_node().get_properties()
671
+ parsed = self._parse_node(node_props) # --> {id, memory, metadata}
672
+
673
+ parsed["overlap_count"] = r["overlap_count"].value
674
+ neighbors.append(parsed)
675
+
676
+ neighbors.sort(key=lambda x: x["overlap_count"], reverse=True)
677
+ neighbors = neighbors[:top_k]
678
+ result = []
679
+ for neighbor in neighbors[:top_k]:
680
+ neighbor.pop("overlap_count")
681
+ result.append(neighbor)
682
+ return result
683
+
684
+ def get_children_with_embeddings(self, id: str) -> list[dict[str, Any]]:
685
+ where_user = ""
686
+
687
+ if not self.config.use_multi_db and self.config.user_name:
688
+ user_name = self.config.user_name
689
+ where_user = f"AND p.user_name = '{user_name}' AND c.user_name = '{user_name}'"
690
+
691
+ query = f"""
692
+ MATCH (p@Memory)-[@PARENT]->(c@Memory)
693
+ WHERE p.id = "{id}" {where_user}
694
+ RETURN c.id AS id, c.embedding AS embedding, c.memory AS memory
695
+ """
696
+ result = self.execute_query(query)
697
+ children = []
698
+ for row in result:
699
+ eid = row["id"].value # STRING
700
+ emb_v = row["embedding"].value # NVector
701
+ emb = list(emb_v.values) if emb_v else []
702
+ mem = row["memory"].value # STRING
703
+
704
+ children.append({"id": eid, "embedding": emb, "memory": mem})
705
+ return children
706
+
707
+ def get_subgraph(
708
+ self, center_id: str, depth: int = 2, center_status: str = "activated"
709
+ ) -> dict[str, Any]:
710
+ """
711
+ Retrieve a local subgraph centered at a given node.
712
+ Args:
713
+ center_id: The ID of the center node.
714
+ depth: The hop distance for neighbors.
715
+ center_status: Required status for center node.
716
+ Returns:
717
+ {
718
+ "core_node": {...},
719
+ "neighbors": [...],
720
+ "edges": [...]
721
+ }
722
+ """
723
+ if not 1 <= depth <= 5:
724
+ raise ValueError("depth must be 1-5")
725
+
726
+ user_name = self.config.user_name
727
+ gql = f"""
728
+ MATCH (center@Memory)
729
+ WHERE center.id = '{center_id}'
730
+ AND center.status = '{center_status}'
731
+ AND center.user_name = '{user_name}'
732
+ OPTIONAL MATCH p = (center)-[e]->{{1,{depth}}}(neighbor@Memory)
733
+ WHERE neighbor.user_name = '{user_name}'
734
+ RETURN center,
735
+ collect(DISTINCT neighbor) AS neighbors,
736
+ collect(EDGES(p)) AS edge_chains
737
+ """
738
+
739
+ result = self.execute_query(gql).one_or_none()
740
+ if not result or result.size == 0:
741
+ return {"core_node": None, "neighbors": [], "edges": []}
742
+
743
+ core_node_props = result["center"].as_node().get_properties()
744
+ core_node = self._parse_node(core_node_props)
745
+ neighbors = []
746
+ vid_to_id_map = {result["center"].as_node().node_id: core_node["id"]}
747
+ for n in result["neighbors"].value:
748
+ n_node = n.as_node()
749
+ n_props = n_node.get_properties()
750
+ node_parsed = self._parse_node(n_props)
751
+ neighbors.append(node_parsed)
752
+ vid_to_id_map[n_node.node_id] = node_parsed["id"]
753
+
754
+ edges = []
755
+ for chain_group in result["edge_chains"].value:
756
+ for edge_wr in chain_group.value:
757
+ edge = edge_wr.value
758
+ edges.append(
759
+ {
760
+ "type": edge.get_type(),
761
+ "source": vid_to_id_map.get(edge.get_src_id()),
762
+ "target": vid_to_id_map.get(edge.get_dst_id()),
763
+ }
764
+ )
765
+
766
+ return {"core_node": core_node, "neighbors": neighbors, "edges": edges}
767
+
768
+ # Search / recall operations
769
+ def search_by_embedding(
770
+ self,
771
+ vector: list[float],
772
+ top_k: int = 5,
773
+ scope: str | None = None,
774
+ status: str | None = None,
775
+ threshold: float | None = None,
776
+ ) -> list[dict]:
777
+ """
778
+ Retrieve node IDs based on vector similarity.
779
+
780
+ Args:
781
+ vector (list[float]): The embedding vector representing query semantics.
782
+ top_k (int): Number of top similar nodes to retrieve.
783
+ scope (str, optional): Memory type filter (e.g., 'WorkingMemory', 'LongTermMemory').
784
+ status (str, optional): Node status filter (e.g., 'active', 'archived').
785
+ If provided, restricts results to nodes with matching status.
786
+ threshold (float, optional): Minimum similarity score threshold (0 ~ 1).
787
+
788
+ Returns:
789
+ list[dict]: A list of dicts with 'id' and 'score', ordered by similarity.
790
+
791
+ Notes:
792
+ - This method uses Neo4j native vector indexing to search for similar nodes.
793
+ - If scope is provided, it restricts results to nodes with matching memory_type.
794
+ - If 'status' is provided, only nodes with the matching status will be returned.
795
+ - If threshold is provided, only results with score >= threshold will be returned.
796
+ - Typical use case: restrict to 'status = activated' to avoid
797
+ matching archived or merged nodes.
798
+ """
799
+ vector = _normalize(vector)
800
+ dim = len(vector)
801
+ vector_str = ",".join(f"{float(x)}" for x in vector)
802
+ gql_vector = f"VECTOR<{dim}, FLOAT>([{vector_str}])"
803
+
804
+ where_clauses = []
805
+ if scope:
806
+ where_clauses.append(f'n.memory_type = "{scope}"')
807
+ if status:
808
+ where_clauses.append(f'n.status = "{status}"')
809
+ if not self.config.use_multi_db and self.config.user_name:
810
+ where_clauses.append(f'n.user_name = "{self.config.user_name}"')
811
+
812
+ where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
813
+
814
+ gql = f"""
815
+ USE `{self.db_name}`
816
+ MATCH (n@Memory)
817
+ {where_clause}
818
+ ORDER BY inner_product(n.embedding, {gql_vector}) DESC
819
+ APPROXIMATE
820
+ LIMIT {top_k}
821
+ OPTIONS {{ METRIC: IP, TYPE: IVF, NPROBE: 8 }}
822
+ RETURN n.id AS id, inner_product(n.embedding, {gql_vector}) AS score
823
+ """
824
+
825
+ try:
826
+ result = self.execute_query(gql)
827
+ except Exception as e:
828
+ logger.error(f"[search_by_embedding] Query failed: {e}")
829
+ return []
830
+
831
+ try:
832
+ output = []
833
+ for row in result:
834
+ values = row.values()
835
+ id_val = values[0].as_string()
836
+ score_val = values[1].as_double()
837
+ score_val = (score_val + 1) / 2 # align to neo4j, Normalized Cosine Score
838
+ if threshold is None or score_val <= threshold:
839
+ output.append({"id": id_val, "score": score_val})
840
+ return output
841
+ except Exception as e:
842
+ logger.error(f"[search_by_embedding] Result parse failed: {e}")
843
+ return []
844
+
845
+ def get_by_metadata(self, filters: list[dict[str, Any]]) -> list[str]:
846
+ """
847
+ 1. ADD logic: "AND" vs "OR"(support logic combination);
848
+ 2. Support nested conditional expressions;
849
+
850
+ Retrieve node IDs that match given metadata filters.
851
+ Supports exact match.
852
+
853
+ Args:
854
+ filters: List of filter dicts like:
855
+ [
856
+ {"field": "key", "op": "in", "value": ["A", "B"]},
857
+ {"field": "confidence", "op": ">=", "value": 80},
858
+ {"field": "tags", "op": "contains", "value": "AI"},
859
+ ...
860
+ ]
861
+
862
+ Returns:
863
+ list[str]: Node IDs whose metadata match the filter conditions. (AND logic).
864
+
865
+ Notes:
866
+ - Supports structured querying such as tag/category/importance/time filtering.
867
+ - Can be used for faceted recall or prefiltering before embedding rerank.
868
+ """
869
+ where_clauses = []
870
+
871
+ def _escape_value(value):
872
+ if isinstance(value, str):
873
+ return f'"{value}"'
874
+ elif isinstance(value, list):
875
+ return "[" + ", ".join(_escape_value(v) for v in value) + "]"
876
+ else:
877
+ return str(value)
878
+
879
+ for _i, f in enumerate(filters):
880
+ field = f["field"]
881
+ op = f.get("op", "=")
882
+ value = f["value"]
883
+
884
+ escaped_value = _escape_value(value)
885
+
886
+ # Build WHERE clause
887
+ if op == "=":
888
+ where_clauses.append(f"n.{field} = {escaped_value}")
889
+ elif op == "in":
890
+ where_clauses.append(f"n.{field} IN {escaped_value}")
891
+ elif op == "contains":
892
+ where_clauses.append(f"size(filter(n.{field}, t -> t IN {escaped_value})) > 0")
893
+ elif op == "starts_with":
894
+ where_clauses.append(f"n.{field} STARTS WITH {escaped_value}")
895
+ elif op == "ends_with":
896
+ where_clauses.append(f"n.{field} ENDS WITH {escaped_value}")
897
+ elif op in [">", ">=", "<", "<="]:
898
+ where_clauses.append(f"n.{field} {op} {escaped_value}")
899
+ else:
900
+ raise ValueError(f"Unsupported operator: {op}")
901
+
902
+ if not self.config.use_multi_db and self.user_name:
903
+ where_clauses.append(f'n.user_name = "{self.config.user_name}"')
904
+
905
+ where_str = " AND ".join(where_clauses)
906
+ gql = f"MATCH (n@Memory) WHERE {where_str} RETURN n.id AS id"
907
+ ids = []
908
+ try:
909
+ result = self.execute_query(gql)
910
+ ids = [record["id"].value for record in result]
911
+ except Exception as e:
912
+ logger.error(f"Failed to get metadata: {e}, gql is {gql}")
913
+ return ids
914
+
915
+ def get_grouped_counts(
916
+ self,
917
+ group_fields: list[str],
918
+ where_clause: str = "",
919
+ params: dict[str, Any] | None = None,
920
+ ) -> list[dict[str, Any]]:
921
+ """
922
+ Count nodes grouped by any fields.
923
+
924
+ Args:
925
+ group_fields (list[str]): Fields to group by, e.g., ["memory_type", "status"]
926
+ where_clause (str, optional): Extra WHERE condition. E.g.,
927
+ "WHERE n.status = 'activated'"
928
+ params (dict, optional): Parameters for WHERE clause.
929
+
930
+ Returns:
931
+ list[dict]: e.g., [{ 'memory_type': 'WorkingMemory', 'status': 'active', 'count': 10 }, ...]
932
+ """
933
+ if not group_fields:
934
+ raise ValueError("group_fields cannot be empty")
935
+
936
+ # GQL-specific modifications
937
+ if not self.config.use_multi_db and self.config.user_name:
938
+ user_clause = f"n.user_name = '{self.config.user_name}'"
939
+ if where_clause:
940
+ where_clause = where_clause.strip()
941
+ if where_clause.upper().startswith("WHERE"):
942
+ where_clause += f" AND {user_clause}"
943
+ else:
944
+ where_clause = f"WHERE {where_clause} AND {user_clause}"
945
+ else:
946
+ where_clause = f"WHERE {user_clause}"
947
+
948
+ # Inline parameters if provided
949
+ if params:
950
+ for key, value in params.items():
951
+ # Handle different value types appropriately
952
+ if isinstance(value, str):
953
+ value = f"'{value}'"
954
+ where_clause = where_clause.replace(f"${key}", str(value))
955
+
956
+ return_fields = []
957
+ group_by_fields = []
958
+
959
+ for field in group_fields:
960
+ alias = field.replace(".", "_")
961
+ return_fields.append(f"n.{field} AS {alias}")
962
+ group_by_fields.append(alias)
963
+ # Full GQL query construction
964
+ gql = f"""
965
+ MATCH (n)
966
+ {where_clause}
967
+ RETURN {", ".join(return_fields)}, COUNT(n) AS count
968
+ GROUP BY {", ".join(group_by_fields)}
969
+ """
970
+ result = self.execute_query(gql) # Pure GQL string execution
971
+
972
+ output = []
973
+ for record in result:
974
+ group_values = {}
975
+ for i, field in enumerate(group_fields):
976
+ value = record.values()[i].as_string()
977
+ group_values[field] = value
978
+ count_value = record["count"].value
979
+ output.append({**group_values, "count": count_value})
980
+
981
+ return output
982
+
983
+ def clear(self) -> None:
984
+ """
985
+ Clear the entire graph if the target database exists.
986
+ """
987
+ try:
988
+ if not self.config.use_multi_db and self.config.user_name:
989
+ query = f"MATCH (n@Memory) WHERE n.user_name = '{self.config.user_name}' DETACH DELETE n"
990
+ else:
991
+ query = "MATCH (n) DETACH DELETE n"
992
+
993
+ self.execute_query(query)
994
+ logger.info("Cleared all nodes from database.")
995
+
996
+ except Exception as e:
997
+ logger.error(f"[ERROR] Failed to clear database: {e}")
998
+
999
+ def export_graph(self) -> dict[str, Any]:
1000
+ """
1001
+ Export all graph nodes and edges in a structured form.
1002
+
1003
+ Returns:
1004
+ {
1005
+ "nodes": [ { "id": ..., "memory": ..., "metadata": {...} }, ... ],
1006
+ "edges": [ { "source": ..., "target": ..., "type": ... }, ... ]
1007
+ }
1008
+ """
1009
+ node_query = "MATCH (n@Memory)"
1010
+ edge_query = "MATCH (a@Memory)-[r]->(b@Memory)"
1011
+
1012
+ if not self.config.use_multi_db and self.config.user_name:
1013
+ username = self.config.user_name
1014
+ node_query += f' WHERE n.user_name = "{username}"'
1015
+ edge_query += f' WHERE r.user_name = "{username}"'
1016
+
1017
+ try:
1018
+ full_node_query = f"{node_query} RETURN n"
1019
+ node_result = self.execute_query(full_node_query)
1020
+ nodes = []
1021
+ for row in node_result:
1022
+ node_wrapper = row.values()[0].as_node()
1023
+ props = node_wrapper.get_properties()
1024
+
1025
+ node = self._parse_node(props)
1026
+ nodes.append(node)
1027
+ except Exception as e:
1028
+ raise RuntimeError(f"[EXPORT GRAPH - NODES] Exception: {e}") from e
1029
+
1030
+ try:
1031
+ full_edge_query = f"{edge_query} RETURN a.id AS source, b.id AS target, type(r) as edge"
1032
+ edge_result = self.execute_query(full_edge_query)
1033
+ edges = [
1034
+ {
1035
+ "source": row.values()[0].value,
1036
+ "target": row.values()[1].value,
1037
+ "type": row.values()[2].value,
1038
+ }
1039
+ for row in edge_result
1040
+ ]
1041
+ except Exception as e:
1042
+ raise RuntimeError(f"[EXPORT GRAPH - EDGES] Exception: {e}") from e
1043
+
1044
+ return {"nodes": nodes, "edges": edges}
1045
+
1046
+ def import_graph(self, data: dict[str, Any]) -> None:
1047
+ """
1048
+ Import the entire graph from a serialized dictionary.
1049
+
1050
+ Args:
1051
+ data: A dictionary containing all nodes and edges to be loaded.
1052
+ """
1053
+ for node in data.get("nodes", []):
1054
+ id, memory, metadata = _compose_node(node)
1055
+
1056
+ if not self.config.use_multi_db and self.config.user_name:
1057
+ metadata["user_name"] = self.config.user_name
1058
+
1059
+ metadata = _prepare_node_metadata(metadata)
1060
+ metadata.update({"id": id, "memory": memory})
1061
+ properties = ", ".join(f"{k}: {_format_value(v, k)}" for k, v in metadata.items())
1062
+ node_gql = f"INSERT OR IGNORE (n@Memory {{{properties}}})"
1063
+ self.execute_query(node_gql)
1064
+
1065
+ for edge in data.get("edges", []):
1066
+ source_id, target_id = edge["source"], edge["target"]
1067
+ edge_type = edge["type"]
1068
+ props = ""
1069
+ if not self.config.use_multi_db and self.config.user_name:
1070
+ props = f'{{user_name: "{self.config.user_name}"}}'
1071
+ edge_gql = f'''
1072
+ MATCH (a@Memory {{id: "{source_id}"}}), (b@Memory {{id: "{target_id}"}})
1073
+ INSERT OR IGNORE (a) -[e@{edge_type} {props}]-> (b)
1074
+ '''
1075
+ self.execute_query(edge_gql)
1076
+
1077
+ def get_all_memory_items(self, scope: str) -> list[dict]:
1078
+ """
1079
+ Retrieve all memory items of a specific memory_type.
1080
+
1081
+ Args:
1082
+ scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'.
1083
+
1084
+ Returns:
1085
+ list[dict]: Full list of memory items under this scope.
1086
+ """
1087
+ if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}:
1088
+ raise ValueError(f"Unsupported memory type scope: {scope}")
1089
+
1090
+ where_clause = f"WHERE n.memory_type = '{scope}'"
1091
+
1092
+ if not self.config.use_multi_db and self.config.user_name:
1093
+ where_clause += f" AND n.user_name = '{self.config.user_name}'"
1094
+
1095
+ query = f"""
1096
+ MATCH (n@Memory)
1097
+ {where_clause}
1098
+ RETURN n
1099
+ """
1100
+ nodes = []
1101
+ try:
1102
+ results = self.execute_query(query)
1103
+ for rec in results:
1104
+ node_props = rec["n"].as_node().get_properties()
1105
+ nodes.append(self._parse_node(node_props))
1106
+ except Exception as e:
1107
+ logger.error(f"Failed to get memories: {e}")
1108
+ return nodes
1109
+
1110
+ def get_structure_optimization_candidates(self, scope: str) -> list[dict]:
1111
+ """
1112
+ Find nodes that are likely candidates for structure optimization:
1113
+ - Isolated nodes, nodes with empty background, or nodes with exactly one child.
1114
+ - Plus: the child of any parent node that has exactly one child.
1115
+ """
1116
+
1117
+ where_clause = f'''
1118
+ n.memory_type = "{scope}"
1119
+ AND n.status = "activated"
1120
+ '''
1121
+ if not self.config.use_multi_db and self.config.user_name:
1122
+ where_clause += f' AND n.user_name = "{self.config.user_name}"'
1123
+
1124
+ query = f"""
1125
+ USE `{self.db_name}`
1126
+ MATCH (n@Memory)
1127
+ WHERE {where_clause}
1128
+ OPTIONAL MATCH (n)-[@PARENT]->(c@Memory)
1129
+ OPTIONAL MATCH (p@Memory)-[@PARENT]->(n)
1130
+ WHERE c IS NULL AND p IS NULL
1131
+ RETURN n
1132
+ """
1133
+
1134
+ candidates = []
1135
+ try:
1136
+ results = self.execute_query(query)
1137
+ for rec in results:
1138
+ node_props = rec["n"].as_node().get_properties()
1139
+ candidates.append(self._parse_node(node_props))
1140
+ except Exception as e:
1141
+ logger.error(f"Failed : {e}")
1142
+ return candidates
1143
+
1144
+ def drop_database(self) -> None:
1145
+ """
1146
+ Permanently delete the entire database this instance is using.
1147
+ WARNING: This operation is destructive and cannot be undone.
1148
+ """
1149
+ if self.config.use_multi_db:
1150
+ self.execute_query(f"DROP GRAPH `{self.db_name}`")
1151
+ logger.info(f"Database '`{self.db_name}`' has been dropped.")
1152
+ else:
1153
+ raise ValueError(
1154
+ f"Refusing to drop protected database: `{self.db_name}` in "
1155
+ f"Shared Database Multi-Tenant mode"
1156
+ )
1157
+
1158
+ def detect_conflicts(self) -> list[tuple[str, str]]:
1159
+ """
1160
+ Detect conflicting nodes based on logical or semantic inconsistency.
1161
+ Returns:
1162
+ A list of (node_id1, node_id2) tuples that conflict.
1163
+ """
1164
+ raise NotImplementedError
1165
+
1166
+ # Structure Maintenance
1167
+ def deduplicate_nodes(self) -> None:
1168
+ """
1169
+ Deduplicate redundant or semantically similar nodes.
1170
+ This typically involves identifying nodes with identical or near-identical memory.
1171
+ """
1172
+ raise NotImplementedError
1173
+
1174
+ def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]:
1175
+ """
1176
+ Get the ordered context chain starting from a node, following a relationship type.
1177
+ Args:
1178
+ id: Starting node ID.
1179
+ type: Relationship type to follow (e.g., 'FOLLOWS').
1180
+ Returns:
1181
+ List of ordered node IDs in the chain.
1182
+ """
1183
+ raise NotImplementedError
1184
+
1185
+ def get_neighbors(
1186
+ self, id: str, type: str, direction: Literal["in", "out", "both"] = "out"
1187
+ ) -> list[str]:
1188
+ """
1189
+ Get connected node IDs in a specific direction and relationship type.
1190
+ Args:
1191
+ id: Source node ID.
1192
+ type: Relationship type.
1193
+ direction: Edge direction to follow ('out', 'in', or 'both').
1194
+ Returns:
1195
+ List of neighboring node IDs.
1196
+ """
1197
+ raise NotImplementedError
1198
+
1199
+ def get_path(self, source_id: str, target_id: str, max_depth: int = 3) -> list[str]:
1200
+ """
1201
+ Get the path of nodes from source to target within a limited depth.
1202
+ Args:
1203
+ source_id: Starting node ID.
1204
+ target_id: Target node ID.
1205
+ max_depth: Maximum path length to traverse.
1206
+ Returns:
1207
+ Ordered list of node IDs along the path.
1208
+ """
1209
+ raise NotImplementedError
1210
+
1211
+ def merge_nodes(self, id1: str, id2: str) -> str:
1212
+ """
1213
+ Merge two similar or duplicate nodes into one.
1214
+ Args:
1215
+ id1: First node ID.
1216
+ id2: Second node ID.
1217
+ Returns:
1218
+ ID of the resulting merged node.
1219
+ """
1220
+ raise NotImplementedError
1221
+
1222
+ def _ensure_database_exists(self):
1223
+ create_tag = """
1224
+ CREATE GRAPH TYPE IF NOT EXISTS MemOSType AS {
1225
+ NODE Memory (:MemoryTag {
1226
+ id STRING,
1227
+ memory STRING,
1228
+ user_name STRING,
1229
+ user_id STRING,
1230
+ session_id STRING,
1231
+ status STRING,
1232
+ key STRING,
1233
+ confidence FLOAT,
1234
+ tags LIST<STRING>,
1235
+ created_at STRING,
1236
+ updated_at STRING,
1237
+ memory_type STRING,
1238
+ sources LIST<STRING>,
1239
+ source STRING,
1240
+ node_type STRING,
1241
+ visibility STRING,
1242
+ usage LIST<STRING>,
1243
+ background STRING,
1244
+ embedding VECTOR<3072, FLOAT>,
1245
+ PRIMARY KEY(id)
1246
+ }),
1247
+ EDGE RELATE_TO (Memory) -[{user_name STRING}]-> (Memory),
1248
+ EDGE PARENT (Memory) -[{user_name STRING}]-> (Memory),
1249
+ EDGE AGGREGATE_TO (Memory) -[{user_name STRING}]-> (Memory),
1250
+ EDGE MERGED_TO (Memory) -[{user_name STRING}]-> (Memory),
1251
+ EDGE INFERS (Memory) -[{user_name STRING}]-> (Memory),
1252
+ EDGE FOLLOWS (Memory) -[{user_name STRING}]-> (Memory)
1253
+ }
1254
+ """
1255
+ create_graph = f"CREATE GRAPH IF NOT EXISTS `{self.db_name}` TYPED MemOSType"
1256
+ set_graph_working = f"SESSION SET GRAPH `{self.db_name}`"
1257
+
1258
+ try:
1259
+ self.execute_query(create_tag, auto_set_db=False)
1260
+ self.execute_query(create_graph, auto_set_db=False)
1261
+ self.execute_query(set_graph_working)
1262
+ logger.info(f"✅ Graph ``{self.db_name}`` is now the working graph.")
1263
+ except Exception as e:
1264
+ logger.error(f"❌ Failed to create tag: {e} trace: {traceback.format_exc()}")
1265
+
1266
+ def _create_vector_index(
1267
+ self, label: str, vector_property: str, dimensions: int, index_name: str
1268
+ ) -> None:
1269
+ """
1270
+ Create a vector index for the specified property in the label.
1271
+ """
1272
+ create_vector_index = f"""
1273
+ CREATE VECTOR INDEX IF NOT EXISTS {index_name}
1274
+ ON NODE Memory::{vector_property}
1275
+ OPTIONS {{
1276
+ DIM: {dimensions},
1277
+ METRIC: IP,
1278
+ TYPE: IVF,
1279
+ NLIST: 100,
1280
+ TRAINSIZE: 1000
1281
+ }}
1282
+ FOR `{self.db_name}`
1283
+ """
1284
+ self.execute_query(create_vector_index)
1285
+
1286
+ def _create_basic_property_indexes(self) -> None:
1287
+ """
1288
+ Create standard B-tree indexes on status, memory_type, created_at
1289
+ and updated_at fields.
1290
+ Create standard B-tree indexes on user_name when use Shared Database
1291
+ Multi-Tenant Mode.
1292
+ """
1293
+ fields = ["status", "memory_type", "created_at", "updated_at"]
1294
+ if not self.config.use_multi_db:
1295
+ fields.append("user_name")
1296
+
1297
+ for field in fields:
1298
+ index_name = f"idx_memory_{field}"
1299
+ gql = f"""
1300
+ CREATE INDEX IF NOT EXISTS {index_name} ON NODE Memory({field})
1301
+ FOR `{self.db_name}`
1302
+ """
1303
+ try:
1304
+ self.execute_query(gql)
1305
+ logger.info(f"✅ Created index: {index_name} on field {field}")
1306
+ except Exception as e:
1307
+ logger.error(f"❌ Failed to create index {index_name}: {e}")
1308
+
1309
+ def _index_exists(self, index_name: str) -> bool:
1310
+ """
1311
+ Check if an index with the given name exists.
1312
+ """
1313
+ """
1314
+ Check if a vector index with the given name exists in NebulaGraph.
1315
+
1316
+ Args:
1317
+ index_name (str): The name of the index to check.
1318
+
1319
+ Returns:
1320
+ bool: True if the index exists, False otherwise.
1321
+ """
1322
+ query = "SHOW VECTOR INDEXES"
1323
+ try:
1324
+ result = self.execute_query(query)
1325
+ return any(row.values()[0].as_string() == index_name for row in result)
1326
+ except Exception as e:
1327
+ logger.error(f"[Nebula] Failed to check index existence: {e}")
1328
+ return False
1329
+
1330
+ def _parse_value(self, value: Any) -> Any:
1331
+ """turn Nebula ValueWrapper to Python type"""
1332
+ from nebulagraph_python.value_wrapper import ValueWrapper
1333
+
1334
+ if value is None or (hasattr(value, "is_null") and value.is_null()):
1335
+ return None
1336
+ try:
1337
+ prim = value.cast_primitive() if isinstance(value, ValueWrapper) else value
1338
+ except Exception as e:
1339
+ logger.warning(f"Error when decode Nebula ValueWrapper: {e}")
1340
+ prim = value.cast() if isinstance(value, ValueWrapper) else value
1341
+
1342
+ if isinstance(prim, ValueWrapper):
1343
+ return self._parse_value(prim)
1344
+ if isinstance(prim, list):
1345
+ return [self._parse_value(v) for v in prim]
1346
+ if type(prim).__name__ == "NVector":
1347
+ return list(prim.values)
1348
+
1349
+ return prim # already a Python primitive
1350
+
1351
+ def _parse_node(self, props: dict[str, Any]) -> dict[str, Any]:
1352
+ parsed = {k: self._parse_value(v) for k, v in props.items()}
1353
+
1354
+ for tf in ("created_at", "updated_at"):
1355
+ if tf in parsed and hasattr(parsed[tf], "isoformat"):
1356
+ parsed[tf] = parsed[tf].isoformat()
1357
+
1358
+ node_id = parsed.pop("id")
1359
+ memory = parsed.pop("memory", "")
1360
+ parsed.pop("user_name", None)
1361
+ metadata = parsed
1362
+ metadata["type"] = metadata.pop("node_type")
1363
+
1364
+ return {"id": node_id, "memory": memory, "metadata": metadata}