@gralkor/openclaw 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/.env.example +32 -0
  2. package/README.md +77 -0
  3. package/config.yaml +16 -0
  4. package/dist/config.d.ts +33 -0
  5. package/dist/config.d.ts.map +1 -0
  6. package/dist/config.js +49 -0
  7. package/dist/config.js.map +1 -0
  8. package/dist/ctx-to-messages.d.ts +36 -0
  9. package/dist/ctx-to-messages.d.ts.map +1 -0
  10. package/dist/ctx-to-messages.js +120 -0
  11. package/dist/ctx-to-messages.js.map +1 -0
  12. package/dist/ctx-to-turn.d.ts +32 -0
  13. package/dist/ctx-to-turn.d.ts.map +1 -0
  14. package/dist/ctx-to-turn.js +55 -0
  15. package/dist/ctx-to-turn.js.map +1 -0
  16. package/dist/gralkor/client/http.d.ts +55 -0
  17. package/dist/gralkor/client/http.d.ts.map +1 -0
  18. package/dist/gralkor/client/http.js +150 -0
  19. package/dist/gralkor/client/http.js.map +1 -0
  20. package/dist/gralkor/client/in-memory.d.ts +38 -0
  21. package/dist/gralkor/client/in-memory.d.ts.map +1 -0
  22. package/dist/gralkor/client/in-memory.js +72 -0
  23. package/dist/gralkor/client/in-memory.js.map +1 -0
  24. package/dist/gralkor/client.d.ts +64 -0
  25. package/dist/gralkor/client.d.ts.map +1 -0
  26. package/dist/gralkor/client.js +32 -0
  27. package/dist/gralkor/client.js.map +1 -0
  28. package/dist/gralkor/config.d.ts +33 -0
  29. package/dist/gralkor/config.d.ts.map +1 -0
  30. package/dist/gralkor/config.js +58 -0
  31. package/dist/gralkor/config.js.map +1 -0
  32. package/dist/gralkor/connection.d.ts +20 -0
  33. package/dist/gralkor/connection.d.ts.map +1 -0
  34. package/dist/gralkor/connection.js +31 -0
  35. package/dist/gralkor/connection.js.map +1 -0
  36. package/dist/gralkor/index.d.ts +11 -0
  37. package/dist/gralkor/index.d.ts.map +1 -0
  38. package/dist/gralkor/index.js +6 -0
  39. package/dist/gralkor/index.js.map +1 -0
  40. package/dist/gralkor/server-env.d.ts +11 -0
  41. package/dist/gralkor/server-env.d.ts.map +1 -0
  42. package/dist/gralkor/server-env.js +26 -0
  43. package/dist/gralkor/server-env.js.map +1 -0
  44. package/dist/gralkor/server-manager.d.ts +58 -0
  45. package/dist/gralkor/server-manager.d.ts.map +1 -0
  46. package/dist/gralkor/server-manager.js +390 -0
  47. package/dist/gralkor/server-manager.js.map +1 -0
  48. package/dist/gralkor/testing.d.ts +10 -0
  49. package/dist/gralkor/testing.d.ts.map +1 -0
  50. package/dist/gralkor/testing.js +10 -0
  51. package/dist/gralkor/testing.js.map +1 -0
  52. package/dist/hooks/agent-end.d.ts +25 -0
  53. package/dist/hooks/agent-end.d.ts.map +1 -0
  54. package/dist/hooks/agent-end.js +51 -0
  55. package/dist/hooks/agent-end.js.map +1 -0
  56. package/dist/hooks/before-prompt-build.d.ts +12 -0
  57. package/dist/hooks/before-prompt-build.d.ts.map +1 -0
  58. package/dist/hooks/before-prompt-build.js +15 -0
  59. package/dist/hooks/before-prompt-build.js.map +1 -0
  60. package/dist/hooks/session-end.d.ts +18 -0
  61. package/dist/hooks/session-end.d.ts.map +1 -0
  62. package/dist/hooks/session-end.js +19 -0
  63. package/dist/hooks/session-end.js.map +1 -0
  64. package/dist/index.d.ts +130 -0
  65. package/dist/index.d.ts.map +1 -0
  66. package/dist/index.js +133 -0
  67. package/dist/index.js.map +1 -0
  68. package/dist/native-indexer.d.ts +43 -0
  69. package/dist/native-indexer.d.ts.map +1 -0
  70. package/dist/native-indexer.js +107 -0
  71. package/dist/native-indexer.js.map +1 -0
  72. package/dist/register.d.ts +25 -0
  73. package/dist/register.d.ts.map +1 -0
  74. package/dist/register.js +184 -0
  75. package/dist/register.js.map +1 -0
  76. package/dist/session-map.d.ts +13 -0
  77. package/dist/session-map.d.ts.map +1 -0
  78. package/dist/session-map.js +32 -0
  79. package/dist/session-map.js.map +1 -0
  80. package/dist/tools/memory-add.d.ts +15 -0
  81. package/dist/tools/memory-add.d.ts.map +1 -0
  82. package/dist/tools/memory-add.js +15 -0
  83. package/dist/tools/memory-add.js.map +1 -0
  84. package/dist/tools/memory-build-communities.d.ts +19 -0
  85. package/dist/tools/memory-build-communities.d.ts.map +1 -0
  86. package/dist/tools/memory-build-communities.js +18 -0
  87. package/dist/tools/memory-build-communities.js.map +1 -0
  88. package/dist/tools/memory-build-indices.d.ts +12 -0
  89. package/dist/tools/memory-build-indices.d.ts.map +1 -0
  90. package/dist/tools/memory-build-indices.js +11 -0
  91. package/dist/tools/memory-build-indices.js.map +1 -0
  92. package/dist/tools/memory-search.d.ts +20 -0
  93. package/dist/tools/memory-search.d.ts.map +1 -0
  94. package/dist/tools/memory-search.js +18 -0
  95. package/dist/tools/memory-search.js.map +1 -0
  96. package/dist/types.d.ts +62 -0
  97. package/dist/types.d.ts.map +1 -0
  98. package/dist/types.js +8 -0
  99. package/dist/types.js.map +1 -0
  100. package/openclaw.plugin.json +130 -0
  101. package/package.json +75 -0
  102. package/server/server/.python-version +1 -0
  103. package/server/server/main.py +902 -0
  104. package/server/server/pipelines/__init__.py +0 -0
  105. package/server/server/pipelines/capture_buffer.py +170 -0
  106. package/server/server/pipelines/distill.py +122 -0
  107. package/server/server/pipelines/formatting.py +48 -0
  108. package/server/server/pipelines/interpret.py +165 -0
  109. package/server/server/pipelines/messages.py +13 -0
  110. package/server/server/pyproject.toml +19 -0
  111. package/server/server/pytest.ini +4 -0
  112. package/server/server/requirements-dev.txt +3 -0
  113. package/server/server/requirements.txt +5 -0
  114. package/server/server/uv.lock +1162 -0
  115. package/server/wheels/falkordblite-0.9.0-py3-none-manylinux_2_36_aarch64.whl +0 -0
@@ -0,0 +1,902 @@
1
+ """Thin FastAPI server wrapping graphiti-core for the Gralkor plugin."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import time
9
+ from contextlib import asynccontextmanager
10
+ from copy import deepcopy
11
+ from datetime import datetime, timezone
12
+ from typing import Any, Literal
13
+
14
+ import uuid
15
+
16
+ import yaml
17
+ from fastapi import APIRouter, FastAPI, HTTPException, Response, status
18
+ from fastapi.responses import JSONResponse
19
+ from pydantic import BaseModel, Field, create_model
20
+
21
+ from pipelines.capture_buffer import CaptureBuffer, CaptureClientError
22
+ from pipelines.distill import format_transcript
23
+ from pipelines.formatting import format_fact, format_node
24
+ from pipelines.interpret import interpret_facts
25
+ from pipelines.messages import Message
26
+
27
+
28
+
29
+ from graphiti_core import Graphiti
30
+ from graphiti_core.driver.falkordb_driver import FalkorDriver
31
+ from graphiti_core.edges import EntityEdge
32
+ from graphiti_core.nodes import EpisodicNode, EpisodeType
33
+ from graphiti_core.llm_client import LLMConfig
34
+ from graphiti_core.search.search_config_recipes import COMBINED_HYBRID_SEARCH_CROSS_ENCODER
35
+
36
+ DEFAULT_DATABASE = "default_db"
37
+
38
+
39
+ # ── Config ────────────────────────────────────────────────────
40
+
41
+
42
+ def _load_config() -> dict:
43
+ path = os.getenv("CONFIG_PATH", "/app/config.yaml")
44
+ if os.path.exists(path):
45
+ with open(path) as f:
46
+ return yaml.safe_load(f) or {}
47
+ return {}
48
+
49
+
50
+ DEFAULT_LLM_PROVIDER = "gemini"
51
+ DEFAULT_LLM_MODEL = "gemini-3.1-flash-lite"
52
+ DEFAULT_EMBEDDER_PROVIDER = "gemini"
53
+ DEFAULT_EMBEDDER_MODEL = "gemini-embedding-2-preview"
54
+
55
+
56
+ def _build_genai_client():
57
+ """Build the shared google-genai client as a plain transport.
58
+
59
+ No HttpOptions. In particular: HttpOptions.timeout is NEVER set here.
60
+ The SDK serialises that field on the wire as a Vertex-side deadline,
61
+ and Gemini 3.x rejects values below 10_000 ms with 400 INVALID_ARGUMENT
62
+ (see MENTAL_MODEL.md › Invariants › Vertex deadline floor). Local
63
+ per-request bounds live above the SDK — see /recall's deadline and
64
+ its per-call 429 retry (both in this file).
65
+
66
+ HttpRetryOptions is also not set: retry ownership for 429 lives in
67
+ /recall's handler, not in the SDK (see TEST_TREES.md › Retry
68
+ ownership). No layer retries 429 above /recall; other endpoints
69
+ surface 429 immediately through rate_limit_middleware.
70
+ """
71
+ from google import genai
72
+
73
+ return genai.Client()
74
+
75
+
76
+ def _build_llm_client(cfg: dict, genai_client=None):
77
+ provider = cfg.get("llm", {}).get("provider") or DEFAULT_LLM_PROVIDER
78
+ model = cfg.get("llm", {}).get("model") or (
79
+ DEFAULT_LLM_MODEL if provider == DEFAULT_LLM_PROVIDER else None
80
+ )
81
+ llm_cfg = LLMConfig(model=model) if model else None
82
+
83
+ if provider == "anthropic":
84
+ from graphiti_core.llm_client.anthropic_client import AnthropicClient
85
+
86
+ return AnthropicClient(config=llm_cfg)
87
+ if provider == "gemini":
88
+ from graphiti_core.llm_client.gemini_client import GeminiClient
89
+
90
+ return GeminiClient(config=llm_cfg, client=genai_client)
91
+ if provider == "groq":
92
+ from graphiti_core.llm_client.groq_client import GroqClient
93
+
94
+ return GroqClient(config=llm_cfg)
95
+
96
+ # Default: openai (also covers azure_openai with base_url set via env)
97
+ from graphiti_core.llm_client import OpenAIClient
98
+
99
+ return OpenAIClient(config=llm_cfg)
100
+
101
+
102
+ def _build_embedder(cfg: dict, genai_client=None):
103
+ provider = cfg.get("embedder", {}).get("provider") or DEFAULT_EMBEDDER_PROVIDER
104
+ model = cfg.get("embedder", {}).get("model") or (
105
+ DEFAULT_EMBEDDER_MODEL if provider == DEFAULT_EMBEDDER_PROVIDER else None
106
+ )
107
+
108
+ if provider == "gemini":
109
+ from graphiti_core.embedder.gemini import GeminiEmbedder, GeminiEmbedderConfig
110
+
111
+ ecfg = GeminiEmbedderConfig(embedding_model=model) if model else GeminiEmbedderConfig()
112
+ return GeminiEmbedder(ecfg, client=genai_client)
113
+
114
+ from graphiti_core.embedder import OpenAIEmbedder, OpenAIEmbedderConfig
115
+
116
+ ecfg = OpenAIEmbedderConfig(embedding_model=model) if model else OpenAIEmbedderConfig()
117
+ return OpenAIEmbedder(ecfg)
118
+
119
+
120
+ def _build_cross_encoder(cfg: dict, genai_client=None):
121
+ """Match cross-encoder to LLM provider; fall back to OpenAI only if key is present."""
122
+ provider = cfg.get("llm", {}).get("provider", "gemini")
123
+
124
+ if provider == "gemini":
125
+ from graphiti_core.cross_encoder.gemini_reranker_client import GeminiRerankerClient
126
+ return GeminiRerankerClient(client=genai_client)
127
+
128
+ if os.environ.get("OPENAI_API_KEY"):
129
+ from graphiti_core.cross_encoder.openai_reranker_client import OpenAIRerankerClient
130
+ return OpenAIRerankerClient()
131
+
132
+ return None
133
+
134
+
135
+ _TYPE_MAP: dict[str, type] = {
136
+ "string": str,
137
+ "int": int,
138
+ "float": float,
139
+ "bool": bool,
140
+ "datetime": datetime,
141
+ }
142
+
143
+
144
+ def _build_type_defs(
145
+ defs: dict[str, Any],
146
+ ) -> dict[str, type[BaseModel]]:
147
+ """Build Pydantic models from ontology type definitions."""
148
+ models: dict[str, type[BaseModel]] = {}
149
+ for name, defn in defs.items():
150
+ fields: dict[str, Any] = {}
151
+ for attr_name, attr_val in (defn.get("attributes") or {}).items():
152
+ if isinstance(attr_val, str):
153
+ fields[attr_name] = (str, Field(description=attr_val))
154
+ elif isinstance(attr_val, list):
155
+ lit_type = Literal[tuple(attr_val)] # type: ignore[valid-type]
156
+ fields[attr_name] = (lit_type, Field())
157
+ elif isinstance(attr_val, dict):
158
+ if "enum" in attr_val:
159
+ lit_type = Literal[tuple(attr_val["enum"])] # type: ignore[valid-type]
160
+ fields[attr_name] = (lit_type, Field(description=attr_val.get("description", "")))
161
+ else:
162
+ py_type = _TYPE_MAP[attr_val["type"]]
163
+ fields[attr_name] = (py_type, Field(description=attr_val.get("description", "")))
164
+ model = create_model(name, **fields)
165
+ model.__doc__ = defn.get("description", "")
166
+ models[name] = model
167
+ return models
168
+
169
+
170
+ def _build_ontology(
171
+ cfg: dict,
172
+ ) -> tuple[
173
+ dict[str, type[BaseModel]] | None,
174
+ dict[str, type[BaseModel]] | None,
175
+ dict[tuple[str, str], list[str]] | None,
176
+ list[str] | None,
177
+ ]:
178
+ """Build ontology from config. Returns (entity_types, edge_types, edge_type_map)."""
179
+ raw = cfg.get("ontology")
180
+ if not raw:
181
+ return None, None, None
182
+
183
+ entity_defs = raw.get("entities") or {}
184
+ edge_defs = raw.get("edges") or {}
185
+ edge_map_raw = raw.get("edgeMap") or {}
186
+ entity_types = _build_type_defs(entity_defs) if entity_defs else None
187
+ edge_types = _build_type_defs(edge_defs) if edge_defs else None
188
+
189
+ edge_type_map: dict[tuple[str, str], list[str]] | None = None
190
+ if edge_map_raw:
191
+ edge_type_map = {}
192
+ for key, values in edge_map_raw.items():
193
+ parts = key.split(",")
194
+ edge_type_map[(parts[0], parts[1])] = values
195
+
196
+ if not entity_types and not edge_types and not edge_type_map:
197
+ return None, None, None
198
+
199
+ return entity_types, edge_types, edge_type_map
200
+
201
+
202
+ def _log_falkordblite_diagnostics(error: Exception) -> None:
203
+ """Log diagnostic info when FalkorDBLite fails to start."""
204
+ import platform
205
+ import subprocess
206
+
207
+ print(f"[gralkor] FalkorDBLite startup failed: {error}", flush=True)
208
+ print(f"[gralkor] Platform: {platform.platform()}, arch: {platform.machine()}", flush=True)
209
+ try:
210
+ from redislite import __redis_executable__, __falkordb_module__
211
+
212
+ for label, path in [("redis-server", __redis_executable__), ("FalkorDB module", __falkordb_module__)]:
213
+ if not path:
214
+ print(f"[gralkor] {label}: not found", flush=True)
215
+ continue
216
+ print(f"[gralkor] {label}: {path}", flush=True)
217
+ for cmd in [[path, "--version"] if "redis" in label else [], ["file", path], ["ldd", path]]:
218
+ if not cmd:
219
+ continue
220
+ try:
221
+ r = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
222
+ out = r.stdout.strip() or r.stderr.strip()
223
+ if out:
224
+ for line in out.split("\n"):
225
+ print(f"[gralkor] {line}", flush=True)
226
+ except FileNotFoundError:
227
+ pass
228
+ except Exception:
229
+ pass
230
+ except Exception as diag_err:
231
+ print(f"[gralkor] Diagnostic collection failed: {diag_err}", flush=True)
232
+
233
+
234
+ _falkor_db = None
235
+ _llm_client = None
236
+ _embedder = None
237
+ _cross_encoder = None
238
+ _graphiti_instances: dict[str, Graphiti] = {}
239
+ ontology_entity_types: dict[str, type[BaseModel]] | None = None
240
+ ontology_edge_types: dict[str, type[BaseModel]] | None = None
241
+ ontology_edge_type_map: dict[tuple[str, str], list[str]] | None = None
242
+
243
+
244
+ def _graphiti_for(group_id: str) -> Graphiti:
245
+ """Return the Graphiti instance for one FalkorDB graph.
246
+
247
+ Caller is responsible for sanitising group_id (FalkorDB rejects hyphens).
248
+ The same instance is returned for every call with the same group_id; a
249
+ new one is constructed lazily on first use. Pinning each instance to one
250
+ group_id keeps graphiti-core's add_episode driver-clone branch (which
251
+ mutates self.driver when group_id != self.driver._database) inert.
252
+ """
253
+ g = _graphiti_instances.get(group_id)
254
+ if g is None:
255
+ driver = FalkorDriver(falkor_db=_falkor_db, database=group_id)
256
+ g = Graphiti(
257
+ graph_driver=driver,
258
+ llm_client=_llm_client,
259
+ embedder=_embedder,
260
+ cross_encoder=_cross_encoder,
261
+ )
262
+ _graphiti_instances[group_id] = g
263
+ return g
264
+
265
+
266
+ @asynccontextmanager
267
+ async def lifespan(_app: FastAPI):
268
+ global _falkor_db, _llm_client, _embedder, _cross_encoder
269
+ global ontology_entity_types, ontology_edge_types, ontology_edge_type_map
270
+ cfg = _load_config()
271
+
272
+ # Embedded FalkorDBLite (no Docker needed)
273
+ logging.getLogger("redislite").setLevel(logging.DEBUG)
274
+
275
+ from redislite.async_falkordb_client import AsyncFalkorDB
276
+
277
+ data_dir = os.getenv("FALKORDB_DATA_DIR", "./data/falkordb")
278
+ os.makedirs(data_dir, exist_ok=True)
279
+ db_path = os.path.join(data_dir, "gralkor.db")
280
+ # See `server-falkordb-bootstrap` in TEST_TREES.md and the contract at
281
+ # tests/test_redislite_resume_trap.py.
282
+ settings_path = os.path.join(data_dir, "gralkor.db.settings")
283
+ try:
284
+ os.unlink(settings_path)
285
+ except FileNotFoundError:
286
+ pass
287
+ try:
288
+ _falkor_db = AsyncFalkorDB(db_path)
289
+ except Exception as e:
290
+ _log_falkordblite_diagnostics(e)
291
+ raise
292
+
293
+ genai_client = _build_genai_client()
294
+ _llm_client = _build_llm_client(cfg, genai_client=genai_client)
295
+ _embedder = _build_embedder(cfg, genai_client=genai_client)
296
+ _cross_encoder = _build_cross_encoder(cfg, genai_client=genai_client)
297
+
298
+ # Configure logging level: DEBUG in test mode for full data visibility
299
+ log_level = logging.DEBUG if cfg.get("test") else logging.INFO
300
+ logger.setLevel(log_level)
301
+ if not logger.handlers:
302
+ handler = logging.StreamHandler()
303
+ handler.setFormatter(logging.Formatter("%(message)s"))
304
+ logger.addHandler(handler)
305
+
306
+ ontology_entity_types, ontology_edge_types, ontology_edge_type_map = _build_ontology(cfg)
307
+ if ontology_entity_types or ontology_edge_types:
308
+ entity_names = list(ontology_entity_types or {})
309
+ edge_names = list(ontology_edge_types or {})
310
+ print(f"[gralkor] ontology: entities={entity_names} edges={edge_names}", flush=True)
311
+
312
+ # Build indices on the default graph if not already present. Per-group
313
+ # graphs get their own indices on first FalkorDriver instantiation
314
+ # (graphiti-core schedules build_indices_and_constraints from the
315
+ # FalkorDriver constructor; CREATE INDEX is idempotent in FalkorDB).
316
+ boot_g = _graphiti_for(DEFAULT_DATABASE)
317
+ existing = await boot_g.driver.execute_query("CALL db.indexes()")
318
+ if existing and existing[0]:
319
+ print(f"[gralkor] indices already exist ({len(existing[0])} found), skipping build", flush=True)
320
+ else:
321
+ print("[gralkor] building indices and constraints...", flush=True)
322
+ t0_idx = time.monotonic()
323
+ await boot_g.build_indices_and_constraints()
324
+ idx_ms = (time.monotonic() - t0_idx) * 1000
325
+ print(f"[gralkor] indices ready — {idx_ms:.0f}ms", flush=True)
326
+
327
+ global capture_buffer
328
+ capture_buffer = CaptureBuffer(flush_callback=_capture_flush)
329
+
330
+ await _warmup()
331
+
332
+ yield
333
+
334
+ await capture_buffer.flush_all()
335
+ _graphiti_instances.clear()
336
+ if _falkor_db is not None:
337
+ try:
338
+ if hasattr(_falkor_db, "aclose"):
339
+ await _falkor_db.aclose()
340
+ elif hasattr(_falkor_db.connection, "aclose"):
341
+ await _falkor_db.connection.aclose()
342
+ elif hasattr(_falkor_db.connection, "close"):
343
+ await _falkor_db.connection.close()
344
+ except Exception as e:
345
+ logger.warning("[gralkor] FalkorDB shutdown failed: %s", e)
346
+
347
+
348
+ app = FastAPI(title="Gralkor Graphiti Server", lifespan=lifespan)
349
+
350
+
351
+ # ── Rate-limit passthrough ───────────────────────────────────
352
+
353
+
354
+ def _find_rate_limit_error(exc: Exception) -> Exception | None:
355
+ """Walk exception chain to find an upstream rate-limit error."""
356
+ current: Exception | None = exc
357
+ seen: set[int] = set()
358
+ while current is not None and id(current) not in seen:
359
+ seen.add(id(current))
360
+ # Match openai.RateLimitError, anthropic.RateLimitError, google.genai.errors.ClientError, etc.
361
+ # Note: Google's APIError uses .code, most others use .status_code.
362
+ http_code = getattr(current, "status_code", None) or getattr(current, "code", None)
363
+ if type(current).__name__ == "RateLimitError" or http_code == 429:
364
+ return current
365
+ current = current.__cause__ or current.__context__
366
+ return None
367
+
368
+
369
+ _CREDENTIAL_HINTS = ("api key", "apikey", "credential", "authentication", "expired", "unauthorized")
370
+
371
+
372
+ def _downstream_llm_response(exc: Exception) -> JSONResponse:
373
+ """Map a downstream LLM provider error to an appropriate HTTP response."""
374
+ http_code = int(getattr(exc, "status_code", None) or getattr(exc, "code", None))
375
+ msg = str(exc).split("\n")[0][:200]
376
+
377
+ if 400 <= http_code < 500:
378
+ if http_code == 400:
379
+ status = 503 if any(h in msg.lower() for h in _CREDENTIAL_HINTS) else 500
380
+ elif http_code in (401, 403):
381
+ status = 503
382
+ elif http_code in (404, 422):
383
+ status = 500
384
+ else:
385
+ status = 502
386
+ else:
387
+ status = 502
388
+
389
+ return JSONResponse(status_code=status, content={"error": "provider error", "detail": msg})
390
+
391
+
392
+ def _find_downstream_llm_error(exc: Exception) -> Exception | None:
393
+ """Walk the exception chain to find a downstream LLM provider error with an HTTP status code."""
394
+ current: Exception | None = exc
395
+ seen: set[int] = set()
396
+ while current is not None and id(current) not in seen:
397
+ seen.add(id(current))
398
+ http_code = getattr(current, "status_code", None) or getattr(current, "code", None)
399
+ if http_code is not None and int(http_code) != 429:
400
+ return current
401
+ current = current.__cause__ or current.__context__
402
+ return None
403
+
404
+
405
+ _DEFAULT_RETRY_AFTER = 5 # seconds
406
+
407
+
408
+ @app.middleware("http")
409
+ async def rate_limit_middleware(request, call_next):
410
+ try:
411
+ return await call_next(request)
412
+ except Exception as exc:
413
+ rl = _find_rate_limit_error(exc)
414
+ if rl is not None:
415
+ msg = str(rl).split("\n")[0][:200]
416
+ retry_after = getattr(rl, "retry_after", None)
417
+ if retry_after is None:
418
+ retry_after = _DEFAULT_RETRY_AFTER
419
+ return JSONResponse(
420
+ status_code=429,
421
+ content={"detail": msg},
422
+ headers={"retry-after": str(int(retry_after))},
423
+ )
424
+ llm_err = _find_downstream_llm_error(exc)
425
+ if llm_err is not None:
426
+ return _downstream_llm_response(llm_err)
427
+ raise
428
+
429
+
430
+ # ── Auth ─────────────────────────────────────────────────────
431
+
432
+
433
+ # ── Capture buffer ───────────────────────────────────────────
434
+
435
+ capture_buffer: CaptureBuffer | None = None
436
+
437
+
438
+ _WARMUP_GROUP_ID = "_warmup"
439
+ _WARMUP_QUERY = "warmup"
440
+
441
+
442
+ async def _warmup() -> None:
443
+ t0 = time.monotonic()
444
+ try:
445
+ g = _graphiti_for(_WARMUP_GROUP_ID)
446
+ t_search_start = time.monotonic()
447
+ await g.search(query=_WARMUP_QUERY, group_ids=[_WARMUP_GROUP_ID], num_results=1)
448
+ t_search_done = time.monotonic()
449
+ await interpret_facts([], _WARMUP_QUERY, g.llm_client, "Warmup")
450
+ t_interpret_done = time.monotonic()
451
+ logger.info(
452
+ "[gralkor] warmup — search:%.0f interpret:%.0f %.0fms",
453
+ (t_search_done - t_search_start) * 1000,
454
+ (t_interpret_done - t_search_done) * 1000,
455
+ (time.monotonic() - t0) * 1000,
456
+ )
457
+ except Exception as e:
458
+ logger.warning("[gralkor] warmup failed (non-fatal): %s", e)
459
+
460
+
461
+ async def _capture_flush(group_id: str, agent_name: str, turns: list[list[Message]]) -> None:
462
+ if _llm_client is None or _falkor_db is None:
463
+ return
464
+ t0 = time.monotonic()
465
+ sanitized = _sanitize_group_id(group_id)
466
+ g = _graphiti_for(sanitized)
467
+ episode_body = await format_transcript(turns, g.llm_client, agent_name)
468
+ if not episode_body.strip():
469
+ return
470
+ logger.debug("[gralkor] [test] capture flush body: %s", episode_body)
471
+ result = await g.add_episode(
472
+ name=f"conversation-{int(time.time() * 1000)}",
473
+ episode_body=episode_body,
474
+ source_description="auto-capture",
475
+ group_id=sanitized,
476
+ reference_time=datetime.now(timezone.utc),
477
+ source=EpisodeType.message,
478
+ entity_types=ontology_entity_types,
479
+ edge_types=ontology_edge_types,
480
+ edge_type_map=ontology_edge_type_map,
481
+ )
482
+ duration_ms = (time.monotonic() - t0) * 1000
483
+ logger.info("[gralkor] capture flushed — group:%s uuid:%s bodyChars:%d %.0fms",
484
+ group_id, result.episode.uuid, len(episode_body), duration_ms)
485
+
486
+
487
+ # ── Idempotency store ────────────────────────────────────────
488
+
489
+ # In-memory store: idempotency_key -> serialized_episode
490
+ _idempotency_store: dict[str, dict[str, Any]] = {}
491
+
492
+
493
+ def _idempotency_check(key: str) -> dict[str, Any] | None:
494
+ """Return cached episode if key has been seen, else None."""
495
+ return _idempotency_store.get(key)
496
+
497
+
498
+ def _idempotency_store_result(key: str, result: dict[str, Any]) -> None:
499
+ """Cache the result under the idempotency key."""
500
+ _idempotency_store[key] = result
501
+
502
+
503
+ # ── Request / response models ────────────────────────────────
504
+
505
+
506
+ class AddEpisodeRequest(BaseModel):
507
+ name: str
508
+ episode_body: str
509
+ source_description: str
510
+ group_id: str
511
+ reference_time: str | None = None
512
+ source: str | None = None
513
+ idempotency_key: str
514
+
515
+
516
+ class SearchRequest(BaseModel):
517
+ query: str
518
+ group_ids: list[str]
519
+ num_results: int = 10
520
+ mode: Literal["fast", "slow"] = "fast"
521
+
522
+
523
+ class GroupIdRequest(BaseModel):
524
+ group_id: str
525
+
526
+
527
+ class RecallRequest(BaseModel):
528
+ session_id: str | None = Field(default=None, min_length=1)
529
+ group_id: str
530
+ agent_name: str = Field(min_length=1)
531
+ query: str
532
+ max_results: int = 10
533
+ interpret_max_output_tokens: int | None = Field(default=None, gt=0)
534
+
535
+
536
+ class RecallResponse(BaseModel):
537
+ memory_block: str
538
+
539
+
540
+ class DistillRequest(BaseModel):
541
+ agent_name: str = Field(min_length=1)
542
+ turns: list[list[Message]]
543
+
544
+
545
+ class DistillResponse(BaseModel):
546
+ episode_body: str
547
+
548
+
549
+ class CaptureRequest(BaseModel):
550
+ session_id: str = Field(min_length=1)
551
+ group_id: str
552
+ agent_name: str = Field(min_length=1)
553
+ messages: list[Message]
554
+
555
+
556
+ class SessionEndRequest(BaseModel):
557
+ session_id: str = Field(min_length=1)
558
+
559
+
560
+ class MemoryAddRequest(BaseModel):
561
+ group_id: str
562
+ content: str
563
+ source_description: str = "manual"
564
+
565
+
566
+ class MemoryAddResponse(BaseModel):
567
+ status: Literal["stored"]
568
+
569
+
570
+ # ── Serializers ───────────────────────────────────────────────
571
+
572
+
573
+ def _ts(dt: datetime | None) -> str | None:
574
+ return dt.isoformat() if dt else None
575
+
576
+
577
+ def _serialize_node(node) -> dict[str, Any]:
578
+ return {
579
+ "uuid": node.uuid,
580
+ "name": node.name,
581
+ "summary": node.summary,
582
+ "group_id": node.group_id,
583
+ }
584
+
585
+
586
+ def _serialize_fact(edge: EntityEdge) -> dict[str, Any]:
587
+ return {
588
+ "uuid": edge.uuid,
589
+ "name": edge.name,
590
+ "fact": edge.fact,
591
+ "group_id": edge.group_id,
592
+ "valid_at": _ts(edge.valid_at),
593
+ "invalid_at": _ts(edge.invalid_at),
594
+ "expired_at": _ts(edge.expired_at),
595
+ "created_at": _ts(edge.created_at),
596
+ }
597
+
598
+
599
+ def _serialize_episode(ep: EpisodicNode) -> dict[str, Any]:
600
+ return {
601
+ "uuid": ep.uuid,
602
+ "name": ep.name,
603
+ "content": ep.content,
604
+ "source_description": ep.source_description,
605
+ "group_id": ep.group_id,
606
+ "created_at": _ts(ep.created_at),
607
+ }
608
+
609
+
610
+ # ── Endpoints ─────────────────────────────────────────────────
611
+
612
+
613
+ logger = logging.getLogger(__name__)
614
+
615
+
616
+ router = APIRouter()
617
+
618
+
619
+ def _conversation_for_session(session_id: str) -> list[Message]:
620
+ if capture_buffer is None:
621
+ return []
622
+ flat: list[Message] = []
623
+ for turn in capture_buffer.turns_for(session_id):
624
+ flat.extend(turn)
625
+ return flat
626
+
627
+
628
+ FURTHER_QUERYING_INSTRUCTION = (
629
+ "Search memory (up to 3 times, diverse queries) if you need more detail."
630
+ )
631
+
632
+ NO_RELEVANT_MEMORIES_BODY = "No relevant memories found."
633
+
634
+
635
+ @router.get("/health")
636
+ async def health():
637
+ try:
638
+ g = _graphiti_for(DEFAULT_DATABASE)
639
+ await g.driver.execute_query("RETURN 1")
640
+ except Exception as e:
641
+ raise HTTPException(status_code=503, detail=str(e))
642
+ return {"status": "ok"}
643
+
644
+
645
+ @router.post("/episodes")
646
+ async def add_episode(req: AddEpisodeRequest):
647
+ cached = _idempotency_check(req.idempotency_key)
648
+ if cached is not None:
649
+ return cached
650
+
651
+ ref_time = (
652
+ datetime.fromisoformat(req.reference_time)
653
+ if req.reference_time
654
+ else datetime.now(timezone.utc)
655
+ )
656
+ episode_type = EpisodeType(req.source) if req.source else EpisodeType.message
657
+ sanitized = _sanitize_group_id(req.group_id)
658
+ g = _graphiti_for(sanitized)
659
+ result = await g.add_episode(
660
+ name=req.name,
661
+ episode_body=req.episode_body,
662
+ source_description=req.source_description,
663
+ group_id=sanitized,
664
+ reference_time=ref_time,
665
+ source=episode_type,
666
+ entity_types=ontology_entity_types,
667
+ edge_types=ontology_edge_types,
668
+ edge_type_map=ontology_edge_type_map,
669
+ excluded_entity_types=None,
670
+ )
671
+ episode = result.episode
672
+ serialized = _serialize_episode(episode)
673
+ _idempotency_store_result(req.idempotency_key, serialized)
674
+ return serialized
675
+
676
+
677
+ def _sanitize_query(query: str) -> str:
678
+ """Strip backticks that cause RediSearch syntax errors.
679
+
680
+ graphiti-core's _SEPARATOR_MAP handles most special characters
681
+ but misses backticks. We strip them at the API boundary.
682
+ """
683
+ return query.replace("`", " ")
684
+
685
+
686
+ def _sanitize_group_id(group_id: str) -> str:
687
+ """Replace hyphens with underscores to avoid RediSearch syntax errors.
688
+
689
+ graphiti-core embeds group_id verbatim in RediSearch queries like
690
+ (@group_id:"my-hyphen-agent") where hyphens break the parser.
691
+ The plugin-side sanitizeGroupId() handles this at write time, but
692
+ direct API callers (e.g. functional tests) may pass raw hyphens.
693
+ """
694
+ return group_id.replace("-", "_")
695
+
696
+
697
+ @router.post("/search")
698
+ async def search(req: SearchRequest):
699
+ # Sanitize group IDs: hyphens cause RediSearch syntax errors in graphiti-core.
700
+ sanitized = [_sanitize_group_id(g) for g in req.group_ids]
701
+ # The Graphiti driver targets one FalkorDB graph; multi-group search
702
+ # currently fans into the first group's graph. Multi-graph fanout is a
703
+ # separate feature.
704
+ target = sanitized[0] if sanitized else DEFAULT_DATABASE
705
+ t0 = time.monotonic()
706
+ try:
707
+ g = _graphiti_for(target)
708
+ if req.mode == "slow":
709
+ # Cross-encoder + BFS: higher quality, also returns entity node summaries.
710
+ # deepcopy required — COMBINED_HYBRID_SEARCH_CROSS_ENCODER is a module-level
711
+ # constant; mutating .limit directly would corrupt it across requests.
712
+ config = deepcopy(COMBINED_HYBRID_SEARCH_CROSS_ENCODER)
713
+ config.limit = req.num_results
714
+ search_result = await g.search_(
715
+ query=_sanitize_query(req.query),
716
+ group_ids=sanitized,
717
+ config=config,
718
+ )
719
+ edges = search_result.edges
720
+ nodes = search_result.nodes
721
+ else:
722
+ edges = await g.search(
723
+ query=_sanitize_query(req.query),
724
+ group_ids=sanitized,
725
+ num_results=req.num_results,
726
+ )
727
+ nodes = []
728
+ except Exception as e:
729
+ duration_ms = (time.monotonic() - t0) * 1000
730
+ logger.error("[gralkor] search failed — mode:%s %.0fms: %s", req.mode, duration_ms, e)
731
+ raise
732
+ result = [_serialize_fact(e) for e in edges]
733
+ serialized_nodes = [_serialize_node(n) for n in nodes]
734
+ return {"facts": result, "nodes": serialized_nodes}
735
+
736
+
737
+
738
+ @router.post("/build-indices")
739
+ async def build_indices():
740
+ g = _graphiti_for(DEFAULT_DATABASE)
741
+ await g.build_indices_and_constraints()
742
+ return {"status": "ok"}
743
+
744
+
745
+ @router.post("/build-communities")
746
+ async def build_communities(req: GroupIdRequest):
747
+ gid = _sanitize_group_id(req.group_id)
748
+ g = _graphiti_for(gid)
749
+ communities, edges = await g.build_communities(
750
+ group_ids=[gid],
751
+ )
752
+ return {"communities": len(communities), "edges": len(edges)}
753
+
754
+
755
+ # ── New endpoints ────────────────────────────────────────────
756
+
757
+
758
+ RECALL_DEADLINE_SECONDS = 12.0
759
+ RECALL_RETRY_DELAY_SECONDS = 1.0
760
+
761
+
762
+ async def _recall_vertex_call(factory):
763
+ """Run `factory()` (zero-arg coroutine) with one 429 retry.
764
+
765
+ Reifies Retry ownership > Vertex-upstream rate-limit: /recall owns
766
+ retry for this class. The first 429 from an upstream Gemini call
767
+ during /recall is absorbed by a single retry after a fixed delay.
768
+ A second 429 — or any non-429 failure on either attempt — surfaces
769
+ immediately and is mapped to an HTTP response by the request-level
770
+ middleware (rate_limit_middleware / downstream_error_handling).
771
+ """
772
+ try:
773
+ return await factory()
774
+ except Exception as err:
775
+ if _find_rate_limit_error(err) is None:
776
+ raise
777
+ await asyncio.sleep(RECALL_RETRY_DELAY_SECONDS)
778
+ return await factory()
779
+
780
+
781
+ @router.post("/recall", response_model=RecallResponse)
782
+ async def recall(req: RecallRequest) -> Response:
783
+ try:
784
+ return await asyncio.wait_for(_recall_body(req), timeout=RECALL_DEADLINE_SECONDS)
785
+ except asyncio.TimeoutError:
786
+ logger.warning(
787
+ "[gralkor] recall deadline expired — session:%s group:%s",
788
+ req.session_id, req.group_id,
789
+ )
790
+ return JSONResponse(
791
+ status_code=504,
792
+ content={"error": "recall deadline expired"},
793
+ )
794
+
795
+
796
+ async def _recall_body(req: RecallRequest) -> RecallResponse:
797
+ sanitized = _sanitize_group_id(req.group_id)
798
+ conversation = [] if req.session_id is None else _conversation_for_session(req.session_id)
799
+ logger.info("[gralkor] recall — session:%s group:%s queryChars:%d max:%d",
800
+ req.session_id, sanitized, len(req.query), req.max_results)
801
+ logger.debug("[gralkor] [test] recall query: %s", req.query)
802
+ t0 = time.monotonic()
803
+
804
+ g = _graphiti_for(sanitized)
805
+ edges = await _recall_vertex_call(
806
+ lambda: g.search(
807
+ query=_sanitize_query(req.query),
808
+ group_ids=[sanitized],
809
+ num_results=req.max_results,
810
+ )
811
+ )
812
+ t_search = time.monotonic()
813
+
814
+ facts = [_serialize_fact(e) for e in edges]
815
+ if not facts:
816
+ body = NO_RELEVANT_MEMORIES_BODY
817
+ t_interpret = t_search
818
+ else:
819
+ facts_text = "\n".join(format_fact(f) for f in facts)
820
+ interpret_kwargs: dict[str, int] = {}
821
+ if req.interpret_max_output_tokens is not None:
822
+ interpret_kwargs["output_token_budget"] = req.interpret_max_output_tokens
823
+ relevant_facts = await _recall_vertex_call(
824
+ lambda: interpret_facts(
825
+ conversation,
826
+ facts_text,
827
+ g.llm_client,
828
+ req.agent_name,
829
+ **interpret_kwargs,
830
+ )
831
+ )
832
+ t_interpret = time.monotonic()
833
+ body = "\n".join(relevant_facts) if relevant_facts else NO_RELEVANT_MEMORIES_BODY
834
+
835
+ block = (
836
+ '<gralkor-memory trust="untrusted">\n'
837
+ f"{body}\n\n"
838
+ f"{FURTHER_QUERYING_INSTRUCTION}\n"
839
+ "</gralkor-memory>"
840
+ )
841
+ duration_ms = (time.monotonic() - t0) * 1000
842
+ if not facts:
843
+ logger.info(
844
+ "[gralkor] recall result — 0 facts blockChars:%d %.0fms (search:%.0f interpret:0)",
845
+ len(block), duration_ms, (t_search - t0) * 1000,
846
+ )
847
+ else:
848
+ logger.info(
849
+ "[gralkor] recall result — %d facts blockChars:%d %.0fms (search:%.0f interpret:%.0f)",
850
+ len(facts), len(block), duration_ms,
851
+ (t_search - t0) * 1000, (t_interpret - t_search) * 1000,
852
+ )
853
+ logger.debug("[gralkor] [test] recall block: %s", block)
854
+ return RecallResponse(memory_block=block)
855
+
856
+
857
+ @router.post("/distill", response_model=DistillResponse)
858
+ async def distill(req: DistillRequest) -> DistillResponse:
859
+ episode_body = await format_transcript(req.turns, _llm_client, req.agent_name)
860
+ return DistillResponse(episode_body=episode_body)
861
+
862
+
863
+ @router.post("/capture", status_code=status.HTTP_204_NO_CONTENT)
864
+ async def capture(req: CaptureRequest) -> Response:
865
+ if capture_buffer is None:
866
+ raise HTTPException(status.HTTP_503_SERVICE_UNAVAILABLE, "capture buffer not initialized")
867
+ sanitized = _sanitize_group_id(req.group_id)
868
+ capture_buffer.append(req.session_id, sanitized, req.agent_name, req.messages)
869
+ logger.debug("[gralkor] [test] capture messages: %s",
870
+ [(m.role, m.content) for m in req.messages])
871
+ return Response(status_code=status.HTTP_204_NO_CONTENT)
872
+
873
+
874
+ @router.post("/session_end", status_code=status.HTTP_204_NO_CONTENT)
875
+ async def session_end(req: SessionEndRequest) -> Response:
876
+ if capture_buffer is None:
877
+ raise HTTPException(status.HTTP_503_SERVICE_UNAVAILABLE, "capture buffer not initialized")
878
+ turns = len(capture_buffer.turns_for(req.session_id))
879
+ capture_buffer.flush(req.session_id)
880
+ logger.info("[gralkor] session_end session:%s turns:%d", req.session_id, turns)
881
+ return Response(status_code=status.HTTP_204_NO_CONTENT)
882
+
883
+
884
+ @router.post("/tools/memory_add", response_model=MemoryAddResponse)
885
+ async def tools_memory_add(req: MemoryAddRequest) -> MemoryAddResponse:
886
+ sanitized = _sanitize_group_id(req.group_id)
887
+ g = _graphiti_for(sanitized)
888
+ await g.add_episode(
889
+ name=f"manual-add-{int(time.time() * 1000)}",
890
+ episode_body=req.content,
891
+ source_description=req.source_description,
892
+ group_id=sanitized,
893
+ reference_time=datetime.now(timezone.utc),
894
+ source=EpisodeType.text,
895
+ entity_types=ontology_entity_types,
896
+ edge_types=ontology_edge_types,
897
+ edge_type_map=ontology_edge_type_map,
898
+ )
899
+ return MemoryAddResponse(status="stored")
900
+
901
+
902
+ app.include_router(router)